{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.999390119943078,
  "eval_steps": 500,
  "global_step": 4918,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0004065867046147591,
      "grad_norm": 0.22144322097301483,
      "learning_rate": 0.0,
      "loss": 1.3598,
      "step": 1
    },
    {
      "epoch": 0.0008131734092295182,
      "grad_norm": 0.199473574757576,
      "learning_rate": 4e-05,
      "loss": 1.405,
      "step": 2
    },
    {
      "epoch": 0.0012197601138442774,
      "grad_norm": 0.20758001506328583,
      "learning_rate": 8e-05,
      "loss": 1.2815,
      "step": 3
    },
    {
      "epoch": 0.0016263468184590363,
      "grad_norm": 0.21362783014774323,
      "learning_rate": 0.00012,
      "loss": 1.245,
      "step": 4
    },
    {
      "epoch": 0.0020329335230737954,
      "grad_norm": 0.24631692469120026,
      "learning_rate": 0.00016,
      "loss": 1.3086,
      "step": 5
    },
    {
      "epoch": 0.002439520227688555,
      "grad_norm": 0.20009225606918335,
      "learning_rate": 0.0002,
      "loss": 1.2443,
      "step": 6
    },
    {
      "epoch": 0.0028461069323033137,
      "grad_norm": 0.1735246330499649,
      "learning_rate": 0.00019995929167514756,
      "loss": 1.1878,
      "step": 7
    },
    {
      "epoch": 0.0032526936369180726,
      "grad_norm": 0.18904437124729156,
      "learning_rate": 0.00019991858335029514,
      "loss": 1.2478,
      "step": 8
    },
    {
      "epoch": 0.003659280341532832,
      "grad_norm": 0.1645248979330063,
      "learning_rate": 0.0001998778750254427,
      "loss": 1.2098,
      "step": 9
    },
    {
      "epoch": 0.004065867046147591,
      "grad_norm": 0.22034819424152374,
      "learning_rate": 0.00019983716670059028,
      "loss": 1.1183,
      "step": 10
    },
    {
      "epoch": 0.00447245375076235,
      "grad_norm": 0.3233634829521179,
      "learning_rate": 0.00019979645837573783,
      "loss": 1.0974,
      "step": 11
    },
    {
      "epoch": 0.00487904045537711,
      "grad_norm": 0.2592090368270874,
      "learning_rate": 0.00019975575005088542,
      "loss": 1.1611,
      "step": 12
    },
    {
      "epoch": 0.005285627159991868,
      "grad_norm": 0.14754348993301392,
      "learning_rate": 0.000199715041726033,
      "loss": 1.1932,
      "step": 13
    },
    {
      "epoch": 0.005692213864606627,
      "grad_norm": 0.09341374039649963,
      "learning_rate": 0.00019967433340118055,
      "loss": 1.348,
      "step": 14
    },
    {
      "epoch": 0.006098800569221387,
      "grad_norm": 0.10229193419218063,
      "learning_rate": 0.00019963362507632813,
      "loss": 1.0927,
      "step": 15
    },
    {
      "epoch": 0.006505387273836145,
      "grad_norm": 0.14015386998653412,
      "learning_rate": 0.00019959291675147569,
      "loss": 1.2263,
      "step": 16
    },
    {
      "epoch": 0.006911973978450905,
      "grad_norm": 0.17507047951221466,
      "learning_rate": 0.00019955220842662327,
      "loss": 1.1951,
      "step": 17
    },
    {
      "epoch": 0.007318560683065664,
      "grad_norm": 0.17176274955272675,
      "learning_rate": 0.00019951150010177082,
      "loss": 1.1895,
      "step": 18
    },
    {
      "epoch": 0.007725147387680423,
      "grad_norm": 0.13839803636074066,
      "learning_rate": 0.00019947079177691838,
      "loss": 0.9549,
      "step": 19
    },
    {
      "epoch": 0.008131734092295182,
      "grad_norm": 0.0970696285367012,
      "learning_rate": 0.00019943008345206596,
      "loss": 1.0867,
      "step": 20
    },
    {
      "epoch": 0.008538320796909941,
      "grad_norm": 0.08836886286735535,
      "learning_rate": 0.0001993893751272135,
      "loss": 1.155,
      "step": 21
    },
    {
      "epoch": 0.0089449075015247,
      "grad_norm": 0.11885025352239609,
      "learning_rate": 0.0001993486668023611,
      "loss": 1.1231,
      "step": 22
    },
    {
      "epoch": 0.00935149420613946,
      "grad_norm": 0.15120816230773926,
      "learning_rate": 0.00019930795847750865,
      "loss": 1.1078,
      "step": 23
    },
    {
      "epoch": 0.00975808091075422,
      "grad_norm": 0.16326424479484558,
      "learning_rate": 0.00019926725015265623,
      "loss": 1.079,
      "step": 24
    },
    {
      "epoch": 0.010164667615368977,
      "grad_norm": 0.1179085448384285,
      "learning_rate": 0.0001992265418278038,
      "loss": 0.932,
      "step": 25
    },
    {
      "epoch": 0.010571254319983736,
      "grad_norm": 0.10621985793113708,
      "learning_rate": 0.00019918583350295136,
      "loss": 1.1386,
      "step": 26
    },
    {
      "epoch": 0.010977841024598495,
      "grad_norm": 0.08408638089895248,
      "learning_rate": 0.00019914512517809894,
      "loss": 1.0987,
      "step": 27
    },
    {
      "epoch": 0.011384427729213255,
      "grad_norm": 0.08222135156393051,
      "learning_rate": 0.0001991044168532465,
      "loss": 1.0378,
      "step": 28
    },
    {
      "epoch": 0.011791014433828014,
      "grad_norm": 0.08763129264116287,
      "learning_rate": 0.00019906370852839408,
      "loss": 0.983,
      "step": 29
    },
    {
      "epoch": 0.012197601138442773,
      "grad_norm": 0.10638878494501114,
      "learning_rate": 0.00019902300020354163,
      "loss": 1.0258,
      "step": 30
    },
    {
      "epoch": 0.012604187843057533,
      "grad_norm": 0.10155023634433746,
      "learning_rate": 0.0001989822918786892,
      "loss": 0.9579,
      "step": 31
    },
    {
      "epoch": 0.01301077454767229,
      "grad_norm": 0.08844579011201859,
      "learning_rate": 0.00019894158355383677,
      "loss": 1.1007,
      "step": 32
    },
    {
      "epoch": 0.01341736125228705,
      "grad_norm": 0.10394158959388733,
      "learning_rate": 0.00019890087522898432,
      "loss": 1.0459,
      "step": 33
    },
    {
      "epoch": 0.01382394795690181,
      "grad_norm": 0.08938682824373245,
      "learning_rate": 0.0001988601669041319,
      "loss": 1.0985,
      "step": 34
    },
    {
      "epoch": 0.014230534661516568,
      "grad_norm": 0.08639086782932281,
      "learning_rate": 0.00019881945857927948,
      "loss": 1.0712,
      "step": 35
    },
    {
      "epoch": 0.014637121366131328,
      "grad_norm": 0.08568435162305832,
      "learning_rate": 0.00019877875025442704,
      "loss": 1.0549,
      "step": 36
    },
    {
      "epoch": 0.015043708070746087,
      "grad_norm": 0.0859316885471344,
      "learning_rate": 0.00019873804192957462,
      "loss": 1.1042,
      "step": 37
    },
    {
      "epoch": 0.015450294775360847,
      "grad_norm": 0.09534381330013275,
      "learning_rate": 0.00019869733360472217,
      "loss": 1.0127,
      "step": 38
    },
    {
      "epoch": 0.015856881479975604,
      "grad_norm": 0.09103580564260483,
      "learning_rate": 0.00019865662527986976,
      "loss": 0.9347,
      "step": 39
    },
    {
      "epoch": 0.016263468184590364,
      "grad_norm": 0.0928095132112503,
      "learning_rate": 0.0001986159169550173,
      "loss": 1.0559,
      "step": 40
    },
    {
      "epoch": 0.016670054889205123,
      "grad_norm": 0.09370871633291245,
      "learning_rate": 0.0001985752086301649,
      "loss": 1.1473,
      "step": 41
    },
    {
      "epoch": 0.017076641593819882,
      "grad_norm": 0.07691123336553574,
      "learning_rate": 0.00019853450030531244,
      "loss": 1.0128,
      "step": 42
    },
    {
      "epoch": 0.01748322829843464,
      "grad_norm": 0.09201047569513321,
      "learning_rate": 0.00019849379198046,
      "loss": 1.1296,
      "step": 43
    },
    {
      "epoch": 0.0178898150030494,
      "grad_norm": 0.08490074425935745,
      "learning_rate": 0.00019845308365560758,
      "loss": 1.0444,
      "step": 44
    },
    {
      "epoch": 0.01829640170766416,
      "grad_norm": 0.08623114228248596,
      "learning_rate": 0.00019841237533075513,
      "loss": 1.066,
      "step": 45
    },
    {
      "epoch": 0.01870298841227892,
      "grad_norm": 0.09486474096775055,
      "learning_rate": 0.00019837166700590271,
      "loss": 1.0788,
      "step": 46
    },
    {
      "epoch": 0.01910957511689368,
      "grad_norm": 0.08024484664201736,
      "learning_rate": 0.0001983309586810503,
      "loss": 1.0262,
      "step": 47
    },
    {
      "epoch": 0.01951616182150844,
      "grad_norm": 0.09256327897310257,
      "learning_rate": 0.00019829025035619785,
      "loss": 1.107,
      "step": 48
    },
    {
      "epoch": 0.019922748526123194,
      "grad_norm": 0.09877921640872955,
      "learning_rate": 0.00019824954203134543,
      "loss": 1.1731,
      "step": 49
    },
    {
      "epoch": 0.020329335230737954,
      "grad_norm": 0.08699575811624527,
      "learning_rate": 0.00019820883370649299,
      "loss": 1.0809,
      "step": 50
    },
    {
      "epoch": 0.020735921935352713,
      "grad_norm": 0.089649498462677,
      "learning_rate": 0.00019816812538164057,
      "loss": 1.1564,
      "step": 51
    },
    {
      "epoch": 0.021142508639967472,
      "grad_norm": 0.08757214993238449,
      "learning_rate": 0.00019812741705678812,
      "loss": 1.0272,
      "step": 52
    },
    {
      "epoch": 0.02154909534458223,
      "grad_norm": 0.08320939540863037,
      "learning_rate": 0.0001980867087319357,
      "loss": 0.9931,
      "step": 53
    },
    {
      "epoch": 0.02195568204919699,
      "grad_norm": 0.08898070454597473,
      "learning_rate": 0.00019804600040708326,
      "loss": 0.9421,
      "step": 54
    },
    {
      "epoch": 0.02236226875381175,
      "grad_norm": 0.08072236180305481,
      "learning_rate": 0.0001980052920822308,
      "loss": 1.0304,
      "step": 55
    },
    {
      "epoch": 0.02276885545842651,
      "grad_norm": 0.09354112297296524,
      "learning_rate": 0.0001979645837573784,
      "loss": 1.1041,
      "step": 56
    },
    {
      "epoch": 0.02317544216304127,
      "grad_norm": 0.09214304387569427,
      "learning_rate": 0.00019792387543252595,
      "loss": 1.0666,
      "step": 57
    },
    {
      "epoch": 0.02358202886765603,
      "grad_norm": 0.08546210825443268,
      "learning_rate": 0.00019788316710767353,
      "loss": 1.0795,
      "step": 58
    },
    {
      "epoch": 0.023988615572270788,
      "grad_norm": 0.09029046446084976,
      "learning_rate": 0.0001978424587828211,
      "loss": 1.199,
      "step": 59
    },
    {
      "epoch": 0.024395202276885547,
      "grad_norm": 0.08200937509536743,
      "learning_rate": 0.00019780175045796866,
      "loss": 0.9853,
      "step": 60
    },
    {
      "epoch": 0.024801788981500306,
      "grad_norm": 0.08928566426038742,
      "learning_rate": 0.00019776104213311624,
      "loss": 0.9948,
      "step": 61
    },
    {
      "epoch": 0.025208375686115066,
      "grad_norm": 0.08067034929990768,
      "learning_rate": 0.0001977203338082638,
      "loss": 0.9824,
      "step": 62
    },
    {
      "epoch": 0.02561496239072982,
      "grad_norm": 0.07509499788284302,
      "learning_rate": 0.00019767962548341138,
      "loss": 0.9166,
      "step": 63
    },
    {
      "epoch": 0.02602154909534458,
      "grad_norm": 0.10127029567956924,
      "learning_rate": 0.00019763891715855893,
      "loss": 0.978,
      "step": 64
    },
    {
      "epoch": 0.02642813579995934,
      "grad_norm": 0.08480218052864075,
      "learning_rate": 0.0001975982088337065,
      "loss": 1.0019,
      "step": 65
    },
    {
      "epoch": 0.0268347225045741,
      "grad_norm": 0.0922696441411972,
      "learning_rate": 0.00019755750050885407,
      "loss": 1.0213,
      "step": 66
    },
    {
      "epoch": 0.02724130920918886,
      "grad_norm": 0.0819278433918953,
      "learning_rate": 0.00019751679218400162,
      "loss": 0.9792,
      "step": 67
    },
    {
      "epoch": 0.02764789591380362,
      "grad_norm": 0.09971120208501816,
      "learning_rate": 0.0001974760838591492,
      "loss": 0.9605,
      "step": 68
    },
    {
      "epoch": 0.028054482618418378,
      "grad_norm": 0.09195531904697418,
      "learning_rate": 0.00019743537553429676,
      "loss": 1.1203,
      "step": 69
    },
    {
      "epoch": 0.028461069323033137,
      "grad_norm": 0.09179981052875519,
      "learning_rate": 0.00019739466720944434,
      "loss": 1.0586,
      "step": 70
    },
    {
      "epoch": 0.028867656027647896,
      "grad_norm": 0.0866156816482544,
      "learning_rate": 0.00019735395888459192,
      "loss": 1.0558,
      "step": 71
    },
    {
      "epoch": 0.029274242732262656,
      "grad_norm": 0.09198956191539764,
      "learning_rate": 0.00019731325055973947,
      "loss": 1.117,
      "step": 72
    },
    {
      "epoch": 0.029680829436877415,
      "grad_norm": 0.0912180244922638,
      "learning_rate": 0.00019727254223488705,
      "loss": 1.0235,
      "step": 73
    },
    {
      "epoch": 0.030087416141492174,
      "grad_norm": 0.092186838388443,
      "learning_rate": 0.0001972318339100346,
      "loss": 1.0119,
      "step": 74
    },
    {
      "epoch": 0.030494002846106934,
      "grad_norm": 0.091013602912426,
      "learning_rate": 0.0001971911255851822,
      "loss": 1.0523,
      "step": 75
    },
    {
      "epoch": 0.030900589550721693,
      "grad_norm": 0.0932595282793045,
      "learning_rate": 0.00019715041726032974,
      "loss": 1.0471,
      "step": 76
    },
    {
      "epoch": 0.03130717625533645,
      "grad_norm": 0.089345782995224,
      "learning_rate": 0.0001971097089354773,
      "loss": 1.0214,
      "step": 77
    },
    {
      "epoch": 0.03171376295995121,
      "grad_norm": 0.09476006776094437,
      "learning_rate": 0.00019706900061062488,
      "loss": 0.9888,
      "step": 78
    },
    {
      "epoch": 0.03212034966456597,
      "grad_norm": 0.09379832446575165,
      "learning_rate": 0.00019702829228577243,
      "loss": 1.1039,
      "step": 79
    },
    {
      "epoch": 0.03252693636918073,
      "grad_norm": 0.10659569501876831,
      "learning_rate": 0.00019698758396092001,
      "loss": 1.1377,
      "step": 80
    },
    {
      "epoch": 0.03293352307379549,
      "grad_norm": 0.09652398526668549,
      "learning_rate": 0.0001969468756360676,
      "loss": 1.0194,
      "step": 81
    },
    {
      "epoch": 0.033340109778410246,
      "grad_norm": 0.08641666918992996,
      "learning_rate": 0.00019690616731121515,
      "loss": 1.0239,
      "step": 82
    },
    {
      "epoch": 0.03374669648302501,
      "grad_norm": 0.0956072062253952,
      "learning_rate": 0.00019686545898636273,
      "loss": 1.032,
      "step": 83
    },
    {
      "epoch": 0.034153283187639764,
      "grad_norm": 0.08402691036462784,
      "learning_rate": 0.00019682475066151029,
      "loss": 0.9802,
      "step": 84
    },
    {
      "epoch": 0.03455986989225452,
      "grad_norm": 0.08827648311853409,
      "learning_rate": 0.00019678404233665787,
      "loss": 1.1805,
      "step": 85
    },
    {
      "epoch": 0.03496645659686928,
      "grad_norm": 0.08757660537958145,
      "learning_rate": 0.00019674333401180542,
      "loss": 0.952,
      "step": 86
    },
    {
      "epoch": 0.03537304330148404,
      "grad_norm": 0.09728538244962692,
      "learning_rate": 0.000196702625686953,
      "loss": 1.0875,
      "step": 87
    },
    {
      "epoch": 0.0357796300060988,
      "grad_norm": 0.08561044931411743,
      "learning_rate": 0.00019666191736210056,
      "loss": 0.9818,
      "step": 88
    },
    {
      "epoch": 0.03618621671071356,
      "grad_norm": 0.08389468491077423,
      "learning_rate": 0.0001966212090372481,
      "loss": 0.9962,
      "step": 89
    },
    {
      "epoch": 0.03659280341532832,
      "grad_norm": 0.08847957849502563,
      "learning_rate": 0.0001965805007123957,
      "loss": 1.0138,
      "step": 90
    },
    {
      "epoch": 0.036999390119943076,
      "grad_norm": 0.08515489101409912,
      "learning_rate": 0.00019653979238754324,
      "loss": 1.0119,
      "step": 91
    },
    {
      "epoch": 0.03740597682455784,
      "grad_norm": 0.09340325742959976,
      "learning_rate": 0.00019649908406269083,
      "loss": 1.0635,
      "step": 92
    },
    {
      "epoch": 0.037812563529172595,
      "grad_norm": 0.09383916854858398,
      "learning_rate": 0.0001964583757378384,
      "loss": 1.0999,
      "step": 93
    },
    {
      "epoch": 0.03821915023378736,
      "grad_norm": 0.09956547617912292,
      "learning_rate": 0.00019641766741298596,
      "loss": 1.0186,
      "step": 94
    },
    {
      "epoch": 0.038625736938402114,
      "grad_norm": 0.09809234738349915,
      "learning_rate": 0.00019637695908813354,
      "loss": 1.0641,
      "step": 95
    },
    {
      "epoch": 0.03903232364301688,
      "grad_norm": 0.08520065993070602,
      "learning_rate": 0.0001963362507632811,
      "loss": 0.9255,
      "step": 96
    },
    {
      "epoch": 0.03943891034763163,
      "grad_norm": 0.09007880836725235,
      "learning_rate": 0.00019629554243842868,
      "loss": 1.0963,
      "step": 97
    },
    {
      "epoch": 0.03984549705224639,
      "grad_norm": 0.08900373429059982,
      "learning_rate": 0.00019625483411357623,
      "loss": 0.9908,
      "step": 98
    },
    {
      "epoch": 0.04025208375686115,
      "grad_norm": 0.09613076597452164,
      "learning_rate": 0.0001962141257887238,
      "loss": 0.9729,
      "step": 99
    },
    {
      "epoch": 0.04065867046147591,
      "grad_norm": 0.09987878054380417,
      "learning_rate": 0.00019617341746387137,
      "loss": 1.0554,
      "step": 100
    },
    {
      "epoch": 0.04106525716609067,
      "grad_norm": 0.10209144651889801,
      "learning_rate": 0.00019613270913901892,
      "loss": 1.1162,
      "step": 101
    },
    {
      "epoch": 0.041471843870705426,
      "grad_norm": 0.10085388273000717,
      "learning_rate": 0.0001960920008141665,
      "loss": 1.1355,
      "step": 102
    },
    {
      "epoch": 0.04187843057532019,
      "grad_norm": 0.08966121822595596,
      "learning_rate": 0.00019605129248931406,
      "loss": 0.9275,
      "step": 103
    },
    {
      "epoch": 0.042285017279934944,
      "grad_norm": 0.10507562756538391,
      "learning_rate": 0.00019601058416446166,
      "loss": 1.081,
      "step": 104
    },
    {
      "epoch": 0.04269160398454971,
      "grad_norm": 0.09719648957252502,
      "learning_rate": 0.00019596987583960922,
      "loss": 1.0884,
      "step": 105
    },
    {
      "epoch": 0.04309819068916446,
      "grad_norm": 0.09457529336214066,
      "learning_rate": 0.00019592916751475677,
      "loss": 1.0413,
      "step": 106
    },
    {
      "epoch": 0.043504777393779226,
      "grad_norm": 0.11330179125070572,
      "learning_rate": 0.00019588845918990435,
      "loss": 1.0937,
      "step": 107
    },
    {
      "epoch": 0.04391136409839398,
      "grad_norm": 0.09778840839862823,
      "learning_rate": 0.0001958477508650519,
      "loss": 1.1316,
      "step": 108
    },
    {
      "epoch": 0.044317950803008745,
      "grad_norm": 0.09848835319280624,
      "learning_rate": 0.0001958070425401995,
      "loss": 1.1244,
      "step": 109
    },
    {
      "epoch": 0.0447245375076235,
      "grad_norm": 0.0965428277850151,
      "learning_rate": 0.00019576633421534704,
      "loss": 0.9952,
      "step": 110
    },
    {
      "epoch": 0.045131124212238256,
      "grad_norm": 0.0857444629073143,
      "learning_rate": 0.00019572562589049462,
      "loss": 0.9822,
      "step": 111
    },
    {
      "epoch": 0.04553771091685302,
      "grad_norm": 0.10461942851543427,
      "learning_rate": 0.00019568491756564218,
      "loss": 1.1463,
      "step": 112
    },
    {
      "epoch": 0.045944297621467775,
      "grad_norm": 0.08575154095888138,
      "learning_rate": 0.00019564420924078973,
      "loss": 0.8976,
      "step": 113
    },
    {
      "epoch": 0.04635088432608254,
      "grad_norm": 0.0948256254196167,
      "learning_rate": 0.00019560350091593731,
      "loss": 1.1205,
      "step": 114
    },
    {
      "epoch": 0.046757471030697294,
      "grad_norm": 0.09214090555906296,
      "learning_rate": 0.00019556279259108487,
      "loss": 1.1416,
      "step": 115
    },
    {
      "epoch": 0.04716405773531206,
      "grad_norm": 0.09885852038860321,
      "learning_rate": 0.00019552208426623248,
      "loss": 1.079,
      "step": 116
    },
    {
      "epoch": 0.04757064443992681,
      "grad_norm": 0.09071148931980133,
      "learning_rate": 0.00019548137594138003,
      "loss": 1.0128,
      "step": 117
    },
    {
      "epoch": 0.047977231144541575,
      "grad_norm": 0.09190430492162704,
      "learning_rate": 0.00019544066761652758,
      "loss": 0.9631,
      "step": 118
    },
    {
      "epoch": 0.04838381784915633,
      "grad_norm": 0.08024870604276657,
      "learning_rate": 0.00019539995929167517,
      "loss": 0.9086,
      "step": 119
    },
    {
      "epoch": 0.048790404553771094,
      "grad_norm": 0.09223239868879318,
      "learning_rate": 0.00019535925096682272,
      "loss": 1.0255,
      "step": 120
    },
    {
      "epoch": 0.04919699125838585,
      "grad_norm": 0.09259685128927231,
      "learning_rate": 0.0001953185426419703,
      "loss": 1.0221,
      "step": 121
    },
    {
      "epoch": 0.04960357796300061,
      "grad_norm": 0.08371948450803757,
      "learning_rate": 0.00019527783431711786,
      "loss": 0.966,
      "step": 122
    },
    {
      "epoch": 0.05001016466761537,
      "grad_norm": 0.0957912728190422,
      "learning_rate": 0.00019523712599226544,
      "loss": 1.0919,
      "step": 123
    },
    {
      "epoch": 0.05041675137223013,
      "grad_norm": 0.09397678077220917,
      "learning_rate": 0.000195196417667413,
      "loss": 0.9666,
      "step": 124
    },
    {
      "epoch": 0.05082333807684489,
      "grad_norm": 0.1014254167675972,
      "learning_rate": 0.00019515570934256054,
      "loss": 0.9321,
      "step": 125
    },
    {
      "epoch": 0.05122992478145964,
      "grad_norm": 0.09339801222085953,
      "learning_rate": 0.00019511500101770813,
      "loss": 1.0487,
      "step": 126
    },
    {
      "epoch": 0.051636511486074406,
      "grad_norm": 0.08642175793647766,
      "learning_rate": 0.0001950742926928557,
      "loss": 1.0606,
      "step": 127
    },
    {
      "epoch": 0.05204309819068916,
      "grad_norm": 0.09092641621828079,
      "learning_rate": 0.0001950335843680033,
      "loss": 0.904,
      "step": 128
    },
    {
      "epoch": 0.052449684895303925,
      "grad_norm": 0.09896791726350784,
      "learning_rate": 0.00019499287604315084,
      "loss": 1.0325,
      "step": 129
    },
    {
      "epoch": 0.05285627159991868,
      "grad_norm": 0.08731307834386826,
      "learning_rate": 0.0001949521677182984,
      "loss": 0.9258,
      "step": 130
    },
    {
      "epoch": 0.05326285830453344,
      "grad_norm": 0.09673330187797546,
      "learning_rate": 0.00019491145939344598,
      "loss": 1.1198,
      "step": 131
    },
    {
      "epoch": 0.0536694450091482,
      "grad_norm": 0.09038975089788437,
      "learning_rate": 0.00019487075106859353,
      "loss": 1.0295,
      "step": 132
    },
    {
      "epoch": 0.05407603171376296,
      "grad_norm": 0.0918399840593338,
      "learning_rate": 0.0001948300427437411,
      "loss": 1.0127,
      "step": 133
    },
    {
      "epoch": 0.05448261841837772,
      "grad_norm": 0.08970967680215836,
      "learning_rate": 0.00019478933441888867,
      "loss": 1.0238,
      "step": 134
    },
    {
      "epoch": 0.05488920512299248,
      "grad_norm": 0.09728217124938965,
      "learning_rate": 0.00019474862609403625,
      "loss": 1.069,
      "step": 135
    },
    {
      "epoch": 0.05529579182760724,
      "grad_norm": 0.10240956395864487,
      "learning_rate": 0.0001947079177691838,
      "loss": 1.1467,
      "step": 136
    },
    {
      "epoch": 0.055702378532222,
      "grad_norm": 0.10397852212190628,
      "learning_rate": 0.00019466720944433136,
      "loss": 1.0415,
      "step": 137
    },
    {
      "epoch": 0.056108965236836755,
      "grad_norm": 0.10451675951480865,
      "learning_rate": 0.00019462650111947894,
      "loss": 1.0309,
      "step": 138
    },
    {
      "epoch": 0.05651555194145151,
      "grad_norm": 0.09685720503330231,
      "learning_rate": 0.00019458579279462652,
      "loss": 1.11,
      "step": 139
    },
    {
      "epoch": 0.056922138646066274,
      "grad_norm": 0.09885822236537933,
      "learning_rate": 0.00019454508446977407,
      "loss": 0.993,
      "step": 140
    },
    {
      "epoch": 0.05732872535068103,
      "grad_norm": 0.10943586379289627,
      "learning_rate": 0.00019450437614492165,
      "loss": 0.9749,
      "step": 141
    },
    {
      "epoch": 0.05773531205529579,
      "grad_norm": 0.10964591801166534,
      "learning_rate": 0.0001944636678200692,
      "loss": 1.1108,
      "step": 142
    },
    {
      "epoch": 0.05814189875991055,
      "grad_norm": 0.10109028965234756,
      "learning_rate": 0.0001944229594952168,
      "loss": 1.0897,
      "step": 143
    },
    {
      "epoch": 0.05854848546452531,
      "grad_norm": 0.11243695765733719,
      "learning_rate": 0.00019438225117036434,
      "loss": 1.0338,
      "step": 144
    },
    {
      "epoch": 0.05895507216914007,
      "grad_norm": 0.1047658622264862,
      "learning_rate": 0.00019434154284551192,
      "loss": 0.9566,
      "step": 145
    },
    {
      "epoch": 0.05936165887375483,
      "grad_norm": 0.09534204006195068,
      "learning_rate": 0.00019430083452065948,
      "loss": 1.0313,
      "step": 146
    },
    {
      "epoch": 0.059768245578369586,
      "grad_norm": 0.10418044775724411,
      "learning_rate": 0.00019426012619580706,
      "loss": 0.9759,
      "step": 147
    },
    {
      "epoch": 0.06017483228298435,
      "grad_norm": 0.10020595043897629,
      "learning_rate": 0.00019421941787095461,
      "loss": 0.9368,
      "step": 148
    },
    {
      "epoch": 0.060581418987599105,
      "grad_norm": 0.09832129627466202,
      "learning_rate": 0.00019417870954610217,
      "loss": 1.0494,
      "step": 149
    },
    {
      "epoch": 0.06098800569221387,
      "grad_norm": 0.09458506107330322,
      "learning_rate": 0.00019413800122124978,
      "loss": 0.9631,
      "step": 150
    },
    {
      "epoch": 0.06139459239682862,
      "grad_norm": 0.10380101203918457,
      "learning_rate": 0.00019409729289639733,
      "loss": 1.1003,
      "step": 151
    },
    {
      "epoch": 0.061801179101443386,
      "grad_norm": 0.107131227850914,
      "learning_rate": 0.00019405658457154488,
      "loss": 1.0819,
      "step": 152
    },
    {
      "epoch": 0.06220776580605814,
      "grad_norm": 0.10330741852521896,
      "learning_rate": 0.00019401587624669247,
      "loss": 1.128,
      "step": 153
    },
    {
      "epoch": 0.0626143525106729,
      "grad_norm": 0.08829359710216522,
      "learning_rate": 0.00019397516792184002,
      "loss": 0.8754,
      "step": 154
    },
    {
      "epoch": 0.06302093921528766,
      "grad_norm": 0.10422427207231522,
      "learning_rate": 0.0001939344595969876,
      "loss": 0.9633,
      "step": 155
    },
    {
      "epoch": 0.06342752591990242,
      "grad_norm": 0.11499015986919403,
      "learning_rate": 0.00019389375127213515,
      "loss": 0.9735,
      "step": 156
    },
    {
      "epoch": 0.06383411262451717,
      "grad_norm": 0.0938427522778511,
      "learning_rate": 0.00019385304294728274,
      "loss": 0.9219,
      "step": 157
    },
    {
      "epoch": 0.06424069932913194,
      "grad_norm": 0.1080261766910553,
      "learning_rate": 0.0001938123346224303,
      "loss": 0.9678,
      "step": 158
    },
    {
      "epoch": 0.0646472860337467,
      "grad_norm": 0.10001271218061447,
      "learning_rate": 0.00019377162629757784,
      "loss": 1.0854,
      "step": 159
    },
    {
      "epoch": 0.06505387273836145,
      "grad_norm": 0.10731212794780731,
      "learning_rate": 0.00019373091797272543,
      "loss": 1.0108,
      "step": 160
    },
    {
      "epoch": 0.06546045944297621,
      "grad_norm": 0.10019373893737793,
      "learning_rate": 0.00019369020964787298,
      "loss": 1.0315,
      "step": 161
    },
    {
      "epoch": 0.06586704614759098,
      "grad_norm": 0.0947297066450119,
      "learning_rate": 0.0001936495013230206,
      "loss": 1.0634,
      "step": 162
    },
    {
      "epoch": 0.06627363285220574,
      "grad_norm": 0.12204254418611526,
      "learning_rate": 0.00019360879299816814,
      "loss": 1.0635,
      "step": 163
    },
    {
      "epoch": 0.06668021955682049,
      "grad_norm": 0.10462553054094315,
      "learning_rate": 0.0001935680846733157,
      "loss": 1.0248,
      "step": 164
    },
    {
      "epoch": 0.06708680626143525,
      "grad_norm": 0.09576130658388138,
      "learning_rate": 0.00019352737634846328,
      "loss": 0.9671,
      "step": 165
    },
    {
      "epoch": 0.06749339296605002,
      "grad_norm": 0.10027123987674713,
      "learning_rate": 0.00019348666802361083,
      "loss": 0.9317,
      "step": 166
    },
    {
      "epoch": 0.06789997967066477,
      "grad_norm": 0.10674256086349487,
      "learning_rate": 0.0001934459596987584,
      "loss": 1.0058,
      "step": 167
    },
    {
      "epoch": 0.06830656637527953,
      "grad_norm": 0.12352320551872253,
      "learning_rate": 0.00019340525137390597,
      "loss": 1.0926,
      "step": 168
    },
    {
      "epoch": 0.06871315307989428,
      "grad_norm": 0.09426864236593246,
      "learning_rate": 0.00019336454304905355,
      "loss": 1.0876,
      "step": 169
    },
    {
      "epoch": 0.06911973978450904,
      "grad_norm": 0.09280996024608612,
      "learning_rate": 0.0001933238347242011,
      "loss": 0.977,
      "step": 170
    },
    {
      "epoch": 0.06952632648912381,
      "grad_norm": 0.11547420918941498,
      "learning_rate": 0.00019328312639934866,
      "loss": 1.0598,
      "step": 171
    },
    {
      "epoch": 0.06993291319373857,
      "grad_norm": 0.12538915872573853,
      "learning_rate": 0.00019324241807449624,
      "loss": 1.0996,
      "step": 172
    },
    {
      "epoch": 0.07033949989835332,
      "grad_norm": 0.08110898733139038,
      "learning_rate": 0.00019320170974964382,
      "loss": 0.8776,
      "step": 173
    },
    {
      "epoch": 0.07074608660296808,
      "grad_norm": 0.10475198924541473,
      "learning_rate": 0.0001931610014247914,
      "loss": 1.0876,
      "step": 174
    },
    {
      "epoch": 0.07115267330758285,
      "grad_norm": 0.1095360517501831,
      "learning_rate": 0.00019312029309993895,
      "loss": 1.054,
      "step": 175
    },
    {
      "epoch": 0.0715592600121976,
      "grad_norm": 0.09516473114490509,
      "learning_rate": 0.0001930795847750865,
      "loss": 1.0558,
      "step": 176
    },
    {
      "epoch": 0.07196584671681236,
      "grad_norm": 0.09316466003656387,
      "learning_rate": 0.0001930388764502341,
      "loss": 0.9467,
      "step": 177
    },
    {
      "epoch": 0.07237243342142712,
      "grad_norm": 0.11777061969041824,
      "learning_rate": 0.00019299816812538164,
      "loss": 1.1441,
      "step": 178
    },
    {
      "epoch": 0.07277902012604189,
      "grad_norm": 0.09438811987638474,
      "learning_rate": 0.00019295745980052922,
      "loss": 0.9521,
      "step": 179
    },
    {
      "epoch": 0.07318560683065664,
      "grad_norm": 0.08892639726400375,
      "learning_rate": 0.00019291675147567678,
      "loss": 0.9804,
      "step": 180
    },
    {
      "epoch": 0.0735921935352714,
      "grad_norm": 0.08963356912136078,
      "learning_rate": 0.00019287604315082436,
      "loss": 1.0427,
      "step": 181
    },
    {
      "epoch": 0.07399878023988615,
      "grad_norm": 0.09870661795139313,
      "learning_rate": 0.0001928353348259719,
      "loss": 1.051,
      "step": 182
    },
    {
      "epoch": 0.07440536694450091,
      "grad_norm": 0.11843609809875488,
      "learning_rate": 0.00019279462650111947,
      "loss": 1.0109,
      "step": 183
    },
    {
      "epoch": 0.07481195364911568,
      "grad_norm": 0.08860404789447784,
      "learning_rate": 0.00019275391817626705,
      "loss": 1.0035,
      "step": 184
    },
    {
      "epoch": 0.07521854035373043,
      "grad_norm": 0.09085170924663544,
      "learning_rate": 0.00019271320985141463,
      "loss": 0.9461,
      "step": 185
    },
    {
      "epoch": 0.07562512705834519,
      "grad_norm": 0.09071815758943558,
      "learning_rate": 0.0001926725015265622,
      "loss": 0.9542,
      "step": 186
    },
    {
      "epoch": 0.07603171376295995,
      "grad_norm": 0.09566846489906311,
      "learning_rate": 0.00019263179320170976,
      "loss": 0.9958,
      "step": 187
    },
    {
      "epoch": 0.07643830046757472,
      "grad_norm": 0.11846338212490082,
      "learning_rate": 0.00019259108487685732,
      "loss": 1.0737,
      "step": 188
    },
    {
      "epoch": 0.07684488717218947,
      "grad_norm": 0.09295649081468582,
      "learning_rate": 0.0001925503765520049,
      "loss": 1.0162,
      "step": 189
    },
    {
      "epoch": 0.07725147387680423,
      "grad_norm": 0.0917876660823822,
      "learning_rate": 0.00019250966822715245,
      "loss": 1.0432,
      "step": 190
    },
    {
      "epoch": 0.07765806058141898,
      "grad_norm": 0.10864109545946121,
      "learning_rate": 0.00019246895990230004,
      "loss": 1.1107,
      "step": 191
    },
    {
      "epoch": 0.07806464728603375,
      "grad_norm": 0.09689877927303314,
      "learning_rate": 0.0001924282515774476,
      "loss": 1.0421,
      "step": 192
    },
    {
      "epoch": 0.07847123399064851,
      "grad_norm": 0.09406042098999023,
      "learning_rate": 0.00019238754325259517,
      "loss": 1.1042,
      "step": 193
    },
    {
      "epoch": 0.07887782069526326,
      "grad_norm": 0.08346063643693924,
      "learning_rate": 0.00019234683492774272,
      "loss": 0.9554,
      "step": 194
    },
    {
      "epoch": 0.07928440739987802,
      "grad_norm": 0.10317754745483398,
      "learning_rate": 0.00019230612660289028,
      "loss": 1.0835,
      "step": 195
    },
    {
      "epoch": 0.07969099410449278,
      "grad_norm": 0.08712919056415558,
      "learning_rate": 0.0001922654182780379,
      "loss": 0.9799,
      "step": 196
    },
    {
      "epoch": 0.08009758080910755,
      "grad_norm": 0.0860556811094284,
      "learning_rate": 0.00019222470995318544,
      "loss": 0.8661,
      "step": 197
    },
    {
      "epoch": 0.0805041675137223,
      "grad_norm": 0.07940655201673508,
      "learning_rate": 0.00019218400162833302,
      "loss": 0.8305,
      "step": 198
    },
    {
      "epoch": 0.08091075421833706,
      "grad_norm": 0.09200199693441391,
      "learning_rate": 0.00019214329330348058,
      "loss": 0.9774,
      "step": 199
    },
    {
      "epoch": 0.08131734092295181,
      "grad_norm": 0.09980164468288422,
      "learning_rate": 0.00019210258497862813,
      "loss": 0.9791,
      "step": 200
    },
    {
      "epoch": 0.08172392762756658,
      "grad_norm": 0.09660688042640686,
      "learning_rate": 0.0001920618766537757,
      "loss": 1.027,
      "step": 201
    },
    {
      "epoch": 0.08213051433218134,
      "grad_norm": 0.09518909454345703,
      "learning_rate": 0.00019202116832892327,
      "loss": 0.9939,
      "step": 202
    },
    {
      "epoch": 0.0825371010367961,
      "grad_norm": 0.0886114165186882,
      "learning_rate": 0.00019198046000407085,
      "loss": 0.985,
      "step": 203
    },
    {
      "epoch": 0.08294368774141085,
      "grad_norm": 0.09820783883333206,
      "learning_rate": 0.0001919397516792184,
      "loss": 1.0064,
      "step": 204
    },
    {
      "epoch": 0.08335027444602562,
      "grad_norm": 0.0957496389746666,
      "learning_rate": 0.00019189904335436598,
      "loss": 1.1126,
      "step": 205
    },
    {
      "epoch": 0.08375686115064038,
      "grad_norm": 0.09990067780017853,
      "learning_rate": 0.00019185833502951354,
      "loss": 1.1517,
      "step": 206
    },
    {
      "epoch": 0.08416344785525513,
      "grad_norm": 0.0953991562128067,
      "learning_rate": 0.0001918176267046611,
      "loss": 1.087,
      "step": 207
    },
    {
      "epoch": 0.08457003455986989,
      "grad_norm": 0.10291532427072525,
      "learning_rate": 0.0001917769183798087,
      "loss": 1.0366,
      "step": 208
    },
    {
      "epoch": 0.08497662126448464,
      "grad_norm": 0.09986121207475662,
      "learning_rate": 0.00019173621005495625,
      "loss": 0.9581,
      "step": 209
    },
    {
      "epoch": 0.08538320796909941,
      "grad_norm": 0.09369988739490509,
      "learning_rate": 0.00019169550173010383,
      "loss": 1.0048,
      "step": 210
    },
    {
      "epoch": 0.08578979467371417,
      "grad_norm": 0.0968063622713089,
      "learning_rate": 0.0001916547934052514,
      "loss": 1.0005,
      "step": 211
    },
    {
      "epoch": 0.08619638137832893,
      "grad_norm": 0.11241315305233002,
      "learning_rate": 0.00019161408508039894,
      "loss": 1.0316,
      "step": 212
    },
    {
      "epoch": 0.08660296808294368,
      "grad_norm": 0.09230878949165344,
      "learning_rate": 0.00019157337675554652,
      "loss": 0.917,
      "step": 213
    },
    {
      "epoch": 0.08700955478755845,
      "grad_norm": 0.08461520820856094,
      "learning_rate": 0.00019153266843069408,
      "loss": 0.9144,
      "step": 214
    },
    {
      "epoch": 0.08741614149217321,
      "grad_norm": 0.09011861681938171,
      "learning_rate": 0.00019149196010584166,
      "loss": 1.0092,
      "step": 215
    },
    {
      "epoch": 0.08782272819678796,
      "grad_norm": 0.09200841188430786,
      "learning_rate": 0.0001914512517809892,
      "loss": 1.0552,
      "step": 216
    },
    {
      "epoch": 0.08822931490140272,
      "grad_norm": 0.09052886068820953,
      "learning_rate": 0.0001914105434561368,
      "loss": 0.9067,
      "step": 217
    },
    {
      "epoch": 0.08863590160601749,
      "grad_norm": 0.08740741014480591,
      "learning_rate": 0.00019136983513128435,
      "loss": 0.9182,
      "step": 218
    },
    {
      "epoch": 0.08904248831063225,
      "grad_norm": 0.08494284749031067,
      "learning_rate": 0.00019132912680643193,
      "loss": 0.8321,
      "step": 219
    },
    {
      "epoch": 0.089449075015247,
      "grad_norm": 0.0890796035528183,
      "learning_rate": 0.0001912884184815795,
      "loss": 0.9801,
      "step": 220
    },
    {
      "epoch": 0.08985566171986176,
      "grad_norm": 0.094822458922863,
      "learning_rate": 0.00019124771015672706,
      "loss": 0.9779,
      "step": 221
    },
    {
      "epoch": 0.09026224842447651,
      "grad_norm": 0.09756983071565628,
      "learning_rate": 0.00019120700183187465,
      "loss": 1.0385,
      "step": 222
    },
    {
      "epoch": 0.09066883512909128,
      "grad_norm": 0.09434107691049576,
      "learning_rate": 0.0001911662935070222,
      "loss": 1.063,
      "step": 223
    },
    {
      "epoch": 0.09107542183370604,
      "grad_norm": 0.0925639271736145,
      "learning_rate": 0.00019112558518216975,
      "loss": 0.9061,
      "step": 224
    },
    {
      "epoch": 0.0914820085383208,
      "grad_norm": 0.10531201958656311,
      "learning_rate": 0.00019108487685731734,
      "loss": 1.1593,
      "step": 225
    },
    {
      "epoch": 0.09188859524293555,
      "grad_norm": 0.08259832113981247,
      "learning_rate": 0.0001910441685324649,
      "loss": 0.8463,
      "step": 226
    },
    {
      "epoch": 0.09229518194755032,
      "grad_norm": 431.5063171386719,
      "learning_rate": 0.00019100346020761247,
      "loss": 1.0632,
      "step": 227
    },
    {
      "epoch": 0.09270176865216508,
      "grad_norm": 0.10764740407466888,
      "learning_rate": 0.00019096275188276002,
      "loss": 1.0083,
      "step": 228
    },
    {
      "epoch": 0.09310835535677983,
      "grad_norm": 0.08872029185295105,
      "learning_rate": 0.0001909220435579076,
      "loss": 0.9301,
      "step": 229
    },
    {
      "epoch": 0.09351494206139459,
      "grad_norm": 0.1006346270442009,
      "learning_rate": 0.00019088133523305516,
      "loss": 1.0103,
      "step": 230
    },
    {
      "epoch": 0.09392152876600936,
      "grad_norm": 0.0970514565706253,
      "learning_rate": 0.00019084062690820274,
      "loss": 1.0522,
      "step": 231
    },
    {
      "epoch": 0.09432811547062411,
      "grad_norm": 0.09807727485895157,
      "learning_rate": 0.00019079991858335032,
      "loss": 1.0498,
      "step": 232
    },
    {
      "epoch": 0.09473470217523887,
      "grad_norm": 0.09828022867441177,
      "learning_rate": 0.00019075921025849788,
      "loss": 0.9871,
      "step": 233
    },
    {
      "epoch": 0.09514128887985362,
      "grad_norm": 0.10089042782783508,
      "learning_rate": 0.00019071850193364543,
      "loss": 0.977,
      "step": 234
    },
    {
      "epoch": 0.0955478755844684,
      "grad_norm": 0.09905245155096054,
      "learning_rate": 0.000190677793608793,
      "loss": 1.0135,
      "step": 235
    },
    {
      "epoch": 0.09595446228908315,
      "grad_norm": 0.1002473533153534,
      "learning_rate": 0.00019063708528394057,
      "loss": 1.0219,
      "step": 236
    },
    {
      "epoch": 0.0963610489936979,
      "grad_norm": 0.09028339385986328,
      "learning_rate": 0.00019059637695908815,
      "loss": 0.909,
      "step": 237
    },
    {
      "epoch": 0.09676763569831266,
      "grad_norm": 0.0950377881526947,
      "learning_rate": 0.0001905556686342357,
      "loss": 0.9749,
      "step": 238
    },
    {
      "epoch": 0.09717422240292742,
      "grad_norm": 0.09866049885749817,
      "learning_rate": 0.00019051496030938328,
      "loss": 1.0927,
      "step": 239
    },
    {
      "epoch": 0.09758080910754219,
      "grad_norm": 0.09754758328199387,
      "learning_rate": 0.00019047425198453084,
      "loss": 1.059,
      "step": 240
    },
    {
      "epoch": 0.09798739581215694,
      "grad_norm": 0.09261766821146011,
      "learning_rate": 0.00019043354365967842,
      "loss": 1.0912,
      "step": 241
    },
    {
      "epoch": 0.0983939825167717,
      "grad_norm": 0.08637125045061111,
      "learning_rate": 0.000190392835334826,
      "loss": 0.8925,
      "step": 242
    },
    {
      "epoch": 0.09880056922138646,
      "grad_norm": 0.0962812602519989,
      "learning_rate": 0.00019035212700997355,
      "loss": 1.0435,
      "step": 243
    },
    {
      "epoch": 0.09920715592600123,
      "grad_norm": 0.09047430753707886,
      "learning_rate": 0.00019031141868512113,
      "loss": 1.0787,
      "step": 244
    },
    {
      "epoch": 0.09961374263061598,
      "grad_norm": 0.09183438867330551,
      "learning_rate": 0.0001902707103602687,
      "loss": 0.9338,
      "step": 245
    },
    {
      "epoch": 0.10002032933523074,
      "grad_norm": 0.09977632761001587,
      "learning_rate": 0.00019023000203541624,
      "loss": 1.1605,
      "step": 246
    },
    {
      "epoch": 0.10042691603984549,
      "grad_norm": 0.10386580228805542,
      "learning_rate": 0.00019018929371056382,
      "loss": 1.0493,
      "step": 247
    },
    {
      "epoch": 0.10083350274446026,
      "grad_norm": 0.09106533974409103,
      "learning_rate": 0.00019014858538571138,
      "loss": 0.9891,
      "step": 248
    },
    {
      "epoch": 0.10124008944907502,
      "grad_norm": 0.09407884627580643,
      "learning_rate": 0.00019010787706085896,
      "loss": 1.0367,
      "step": 249
    },
    {
      "epoch": 0.10164667615368977,
      "grad_norm": 0.10133463889360428,
      "learning_rate": 0.0001900671687360065,
      "loss": 1.0743,
      "step": 250
    },
    {
      "epoch": 0.10205326285830453,
      "grad_norm": 0.11877205967903137,
      "learning_rate": 0.0001900264604111541,
      "loss": 1.1572,
      "step": 251
    },
    {
      "epoch": 0.10245984956291929,
      "grad_norm": 0.10216309130191803,
      "learning_rate": 0.00018998575208630165,
      "loss": 1.0687,
      "step": 252
    },
    {
      "epoch": 0.10286643626753406,
      "grad_norm": 0.09023922681808472,
      "learning_rate": 0.0001899450437614492,
      "loss": 0.9153,
      "step": 253
    },
    {
      "epoch": 0.10327302297214881,
      "grad_norm": 0.09972742944955826,
      "learning_rate": 0.0001899043354365968,
      "loss": 0.9059,
      "step": 254
    },
    {
      "epoch": 0.10367960967676357,
      "grad_norm": 0.1175752505660057,
      "learning_rate": 0.00018986362711174436,
      "loss": 1.0659,
      "step": 255
    },
    {
      "epoch": 0.10408619638137832,
      "grad_norm": 0.09030337631702423,
      "learning_rate": 0.00018982291878689195,
      "loss": 0.9577,
      "step": 256
    },
    {
      "epoch": 0.1044927830859931,
      "grad_norm": 0.08850797265768051,
      "learning_rate": 0.0001897822104620395,
      "loss": 0.9193,
      "step": 257
    },
    {
      "epoch": 0.10489936979060785,
      "grad_norm": 1767.7669677734375,
      "learning_rate": 0.00018974150213718705,
      "loss": 0.9977,
      "step": 258
    },
    {
      "epoch": 0.1053059564952226,
      "grad_norm": 0.11435185372829437,
      "learning_rate": 0.00018970079381233463,
      "loss": 1.0468,
      "step": 259
    },
    {
      "epoch": 0.10571254319983736,
      "grad_norm": 0.10342080891132355,
      "learning_rate": 0.0001896600854874822,
      "loss": 1.0119,
      "step": 260
    },
    {
      "epoch": 0.10611912990445213,
      "grad_norm": 0.11568263173103333,
      "learning_rate": 0.00018961937716262977,
      "loss": 1.025,
      "step": 261
    },
    {
      "epoch": 0.10652571660906689,
      "grad_norm": 0.12752321362495422,
      "learning_rate": 0.00018957866883777732,
      "loss": 1.1283,
      "step": 262
    },
    {
      "epoch": 0.10693230331368164,
      "grad_norm": 0.10688795894384384,
      "learning_rate": 0.0001895379605129249,
      "loss": 0.9052,
      "step": 263
    },
    {
      "epoch": 0.1073388900182964,
      "grad_norm": 0.10426552593708038,
      "learning_rate": 0.00018949725218807246,
      "loss": 0.9556,
      "step": 264
    },
    {
      "epoch": 0.10774547672291115,
      "grad_norm": 0.09953362494707108,
      "learning_rate": 0.00018945654386322004,
      "loss": 1.0734,
      "step": 265
    },
    {
      "epoch": 0.10815206342752592,
      "grad_norm": 0.09143470227718353,
      "learning_rate": 0.00018941583553836762,
      "loss": 1.0063,
      "step": 266
    },
    {
      "epoch": 0.10855865013214068,
      "grad_norm": 0.10831563919782639,
      "learning_rate": 0.00018937512721351518,
      "loss": 1.011,
      "step": 267
    },
    {
      "epoch": 0.10896523683675544,
      "grad_norm": 0.10352573543787003,
      "learning_rate": 0.00018933441888866276,
      "loss": 1.0625,
      "step": 268
    },
    {
      "epoch": 0.10937182354137019,
      "grad_norm": 0.09499429166316986,
      "learning_rate": 0.0001892937105638103,
      "loss": 0.8775,
      "step": 269
    },
    {
      "epoch": 0.10977841024598496,
      "grad_norm": 0.10296636819839478,
      "learning_rate": 0.00018925300223895787,
      "loss": 0.985,
      "step": 270
    },
    {
      "epoch": 0.11018499695059972,
      "grad_norm": 0.10464894771575928,
      "learning_rate": 0.00018921229391410545,
      "loss": 1.0051,
      "step": 271
    },
    {
      "epoch": 0.11059158365521447,
      "grad_norm": 0.09429532289505005,
      "learning_rate": 0.000189171585589253,
      "loss": 0.9793,
      "step": 272
    },
    {
      "epoch": 0.11099817035982923,
      "grad_norm": 0.09751992672681808,
      "learning_rate": 0.00018913087726440058,
      "loss": 1.0756,
      "step": 273
    },
    {
      "epoch": 0.111404757064444,
      "grad_norm": 0.11418993026018143,
      "learning_rate": 0.00018909016893954814,
      "loss": 1.0742,
      "step": 274
    },
    {
      "epoch": 0.11181134376905875,
      "grad_norm": 0.10320629924535751,
      "learning_rate": 0.00018904946061469572,
      "loss": 1.036,
      "step": 275
    },
    {
      "epoch": 0.11221793047367351,
      "grad_norm": 0.09697311371564865,
      "learning_rate": 0.00018900875228984327,
      "loss": 1.0317,
      "step": 276
    },
    {
      "epoch": 0.11262451717828827,
      "grad_norm": 0.09579788893461227,
      "learning_rate": 0.00018896804396499085,
      "loss": 0.9621,
      "step": 277
    },
    {
      "epoch": 0.11303110388290302,
      "grad_norm": 0.09918879717588425,
      "learning_rate": 0.00018892733564013843,
      "loss": 1.0292,
      "step": 278
    },
    {
      "epoch": 0.11343769058751779,
      "grad_norm": 0.0923212468624115,
      "learning_rate": 0.000188886627315286,
      "loss": 1.0611,
      "step": 279
    },
    {
      "epoch": 0.11384427729213255,
      "grad_norm": 0.09480055421590805,
      "learning_rate": 0.00018884591899043357,
      "loss": 0.9809,
      "step": 280
    },
    {
      "epoch": 0.1142508639967473,
      "grad_norm": 0.09431526064872742,
      "learning_rate": 0.00018880521066558112,
      "loss": 1.0326,
      "step": 281
    },
    {
      "epoch": 0.11465745070136206,
      "grad_norm": 0.09080514311790466,
      "learning_rate": 0.00018876450234072868,
      "loss": 0.9115,
      "step": 282
    },
    {
      "epoch": 0.11506403740597683,
      "grad_norm": 0.10855970531702042,
      "learning_rate": 0.00018872379401587626,
      "loss": 1.0422,
      "step": 283
    },
    {
      "epoch": 0.11547062411059159,
      "grad_norm": 0.0941060334444046,
      "learning_rate": 0.0001886830856910238,
      "loss": 1.0352,
      "step": 284
    },
    {
      "epoch": 0.11587721081520634,
      "grad_norm": 0.08903583139181137,
      "learning_rate": 0.0001886423773661714,
      "loss": 0.964,
      "step": 285
    },
    {
      "epoch": 0.1162837975198211,
      "grad_norm": 0.08521820604801178,
      "learning_rate": 0.00018860166904131895,
      "loss": 0.917,
      "step": 286
    },
    {
      "epoch": 0.11669038422443587,
      "grad_norm": 0.1058691143989563,
      "learning_rate": 0.00018856096071646653,
      "loss": 1.0375,
      "step": 287
    },
    {
      "epoch": 0.11709697092905062,
      "grad_norm": 0.09435714781284332,
      "learning_rate": 0.0001885202523916141,
      "loss": 0.9766,
      "step": 288
    },
    {
      "epoch": 0.11750355763366538,
      "grad_norm": 0.09868729114532471,
      "learning_rate": 0.00018847954406676166,
      "loss": 1.1059,
      "step": 289
    },
    {
      "epoch": 0.11791014433828013,
      "grad_norm": 0.08855635672807693,
      "learning_rate": 0.00018843883574190924,
      "loss": 0.9424,
      "step": 290
    },
    {
      "epoch": 0.11831673104289489,
      "grad_norm": 0.09142837673425674,
      "learning_rate": 0.0001883981274170568,
      "loss": 1.0425,
      "step": 291
    },
    {
      "epoch": 0.11872331774750966,
      "grad_norm": 0.0971277505159378,
      "learning_rate": 0.00018835741909220438,
      "loss": 1.108,
      "step": 292
    },
    {
      "epoch": 0.11912990445212442,
      "grad_norm": 0.09940122812986374,
      "learning_rate": 0.00018831671076735193,
      "loss": 1.0172,
      "step": 293
    },
    {
      "epoch": 0.11953649115673917,
      "grad_norm": 0.10263317078351974,
      "learning_rate": 0.0001882760024424995,
      "loss": 1.0956,
      "step": 294
    },
    {
      "epoch": 0.11994307786135393,
      "grad_norm": 0.1092846542596817,
      "learning_rate": 0.00018823529411764707,
      "loss": 0.9454,
      "step": 295
    },
    {
      "epoch": 0.1203496645659687,
      "grad_norm": 0.10364726930856705,
      "learning_rate": 0.00018819458579279462,
      "loss": 0.8884,
      "step": 296
    },
    {
      "epoch": 0.12075625127058345,
      "grad_norm": 0.0889100730419159,
      "learning_rate": 0.0001881538774679422,
      "loss": 0.9922,
      "step": 297
    },
    {
      "epoch": 0.12116283797519821,
      "grad_norm": 0.09209653735160828,
      "learning_rate": 0.00018811316914308976,
      "loss": 0.977,
      "step": 298
    },
    {
      "epoch": 0.12156942467981297,
      "grad_norm": 0.11542046815156937,
      "learning_rate": 0.00018807246081823734,
      "loss": 1.0694,
      "step": 299
    },
    {
      "epoch": 0.12197601138442773,
      "grad_norm": 0.10896503180265427,
      "learning_rate": 0.00018803175249338492,
      "loss": 1.0508,
      "step": 300
    },
    {
      "epoch": 0.12238259808904249,
      "grad_norm": 0.09302002936601639,
      "learning_rate": 0.00018799104416853248,
      "loss": 1.0512,
      "step": 301
    },
    {
      "epoch": 0.12278918479365725,
      "grad_norm": 0.09081271290779114,
      "learning_rate": 0.00018795033584368006,
      "loss": 0.9688,
      "step": 302
    },
    {
      "epoch": 0.123195771498272,
      "grad_norm": 0.1059931218624115,
      "learning_rate": 0.0001879096275188276,
      "loss": 1.0483,
      "step": 303
    },
    {
      "epoch": 0.12360235820288677,
      "grad_norm": 0.1018669605255127,
      "learning_rate": 0.0001878689191939752,
      "loss": 1.019,
      "step": 304
    },
    {
      "epoch": 0.12400894490750153,
      "grad_norm": 0.1040007546544075,
      "learning_rate": 0.00018782821086912275,
      "loss": 1.037,
      "step": 305
    },
    {
      "epoch": 0.12441553161211628,
      "grad_norm": 0.10204601287841797,
      "learning_rate": 0.0001877875025442703,
      "loss": 0.9816,
      "step": 306
    },
    {
      "epoch": 0.12482211831673104,
      "grad_norm": 0.10591764748096466,
      "learning_rate": 0.00018774679421941788,
      "loss": 1.0939,
      "step": 307
    },
    {
      "epoch": 0.1252287050213458,
      "grad_norm": 0.09306305646896362,
      "learning_rate": 0.00018770608589456544,
      "loss": 1.0476,
      "step": 308
    },
    {
      "epoch": 0.12563529172596055,
      "grad_norm": 11.22681713104248,
      "learning_rate": 0.00018766537756971302,
      "loss": 1.0573,
      "step": 309
    },
    {
      "epoch": 0.12604187843057532,
      "grad_norm": 0.09422402083873749,
      "learning_rate": 0.00018762466924486057,
      "loss": 0.9993,
      "step": 310
    },
    {
      "epoch": 0.1264484651351901,
      "grad_norm": 0.0982229933142662,
      "learning_rate": 0.00018758396092000815,
      "loss": 0.9159,
      "step": 311
    },
    {
      "epoch": 0.12685505183980483,
      "grad_norm": 0.12579265236854553,
      "learning_rate": 0.00018754325259515573,
      "loss": 1.0935,
      "step": 312
    },
    {
      "epoch": 0.1272616385444196,
      "grad_norm": 0.10069390386343002,
      "learning_rate": 0.0001875025442703033,
      "loss": 1.0127,
      "step": 313
    },
    {
      "epoch": 0.12766822524903434,
      "grad_norm": 0.10948827862739563,
      "learning_rate": 0.00018746183594545087,
      "loss": 1.0576,
      "step": 314
    },
    {
      "epoch": 0.12807481195364911,
      "grad_norm": 0.09232445061206818,
      "learning_rate": 0.00018742112762059842,
      "loss": 0.9856,
      "step": 315
    },
    {
      "epoch": 0.12848139865826388,
      "grad_norm": 0.08319563418626785,
      "learning_rate": 0.000187380419295746,
      "loss": 0.9172,
      "step": 316
    },
    {
      "epoch": 0.12888798536287863,
      "grad_norm": 0.09697309136390686,
      "learning_rate": 0.00018733971097089356,
      "loss": 1.0567,
      "step": 317
    },
    {
      "epoch": 0.1292945720674934,
      "grad_norm": 0.09254255145788193,
      "learning_rate": 0.0001872990026460411,
      "loss": 1.0177,
      "step": 318
    },
    {
      "epoch": 0.12970115877210814,
      "grad_norm": 0.09254108369350433,
      "learning_rate": 0.0001872582943211887,
      "loss": 1.0079,
      "step": 319
    },
    {
      "epoch": 0.1301077454767229,
      "grad_norm": 0.09095866233110428,
      "learning_rate": 0.00018721758599633625,
      "loss": 1.0633,
      "step": 320
    },
    {
      "epoch": 0.13051433218133768,
      "grad_norm": 0.09073010087013245,
      "learning_rate": 0.00018717687767148383,
      "loss": 0.9059,
      "step": 321
    },
    {
      "epoch": 0.13092091888595242,
      "grad_norm": 0.09842764586210251,
      "learning_rate": 0.00018713616934663138,
      "loss": 1.0766,
      "step": 322
    },
    {
      "epoch": 0.1313275055905672,
      "grad_norm": 0.09325529634952545,
      "learning_rate": 0.00018709546102177896,
      "loss": 1.066,
      "step": 323
    },
    {
      "epoch": 0.13173409229518196,
      "grad_norm": 0.09692969918251038,
      "learning_rate": 0.00018705475269692654,
      "loss": 0.9743,
      "step": 324
    },
    {
      "epoch": 0.1321406789997967,
      "grad_norm": 0.09432708472013474,
      "learning_rate": 0.0001870140443720741,
      "loss": 1.0141,
      "step": 325
    },
    {
      "epoch": 0.13254726570441147,
      "grad_norm": 0.09226994961500168,
      "learning_rate": 0.00018697333604722168,
      "loss": 0.9837,
      "step": 326
    },
    {
      "epoch": 0.1329538524090262,
      "grad_norm": 0.10843974351882935,
      "learning_rate": 0.00018693262772236923,
      "loss": 1.0248,
      "step": 327
    },
    {
      "epoch": 0.13336043911364098,
      "grad_norm": 0.09324774891138077,
      "learning_rate": 0.00018689191939751681,
      "loss": 1.0642,
      "step": 328
    },
    {
      "epoch": 0.13376702581825575,
      "grad_norm": 0.08934729546308517,
      "learning_rate": 0.00018685121107266437,
      "loss": 0.9792,
      "step": 329
    },
    {
      "epoch": 0.1341736125228705,
      "grad_norm": 0.09125274419784546,
      "learning_rate": 0.00018681050274781192,
      "loss": 1.0093,
      "step": 330
    },
    {
      "epoch": 0.13458019922748526,
      "grad_norm": 0.09645108133554459,
      "learning_rate": 0.0001867697944229595,
      "loss": 0.9503,
      "step": 331
    },
    {
      "epoch": 0.13498678593210003,
      "grad_norm": 0.09900861978530884,
      "learning_rate": 0.00018672908609810706,
      "loss": 0.9966,
      "step": 332
    },
    {
      "epoch": 0.13539337263671478,
      "grad_norm": 0.09018311649560928,
      "learning_rate": 0.00018668837777325464,
      "loss": 0.965,
      "step": 333
    },
    {
      "epoch": 0.13579995934132955,
      "grad_norm": 0.10296136885881424,
      "learning_rate": 0.00018664766944840222,
      "loss": 1.1011,
      "step": 334
    },
    {
      "epoch": 0.1362065460459443,
      "grad_norm": 0.09104129672050476,
      "learning_rate": 0.00018660696112354977,
      "loss": 0.9814,
      "step": 335
    },
    {
      "epoch": 0.13661313275055906,
      "grad_norm": 0.09881450235843658,
      "learning_rate": 0.00018656625279869736,
      "loss": 1.0989,
      "step": 336
    },
    {
      "epoch": 0.13701971945517383,
      "grad_norm": 0.09691241383552551,
      "learning_rate": 0.0001865255444738449,
      "loss": 1.0967,
      "step": 337
    },
    {
      "epoch": 0.13742630615978857,
      "grad_norm": 0.10152243077754974,
      "learning_rate": 0.0001864848361489925,
      "loss": 1.0951,
      "step": 338
    },
    {
      "epoch": 0.13783289286440334,
      "grad_norm": 0.10802541673183441,
      "learning_rate": 0.00018644412782414005,
      "loss": 0.8742,
      "step": 339
    },
    {
      "epoch": 0.13823947956901808,
      "grad_norm": 0.09942565858364105,
      "learning_rate": 0.0001864034194992876,
      "loss": 0.9961,
      "step": 340
    },
    {
      "epoch": 0.13864606627363285,
      "grad_norm": 0.08618199825286865,
      "learning_rate": 0.00018636271117443518,
      "loss": 0.9645,
      "step": 341
    },
    {
      "epoch": 0.13905265297824762,
      "grad_norm": 0.1056099608540535,
      "learning_rate": 0.00018632200284958273,
      "loss": 0.9885,
      "step": 342
    },
    {
      "epoch": 0.13945923968286236,
      "grad_norm": 0.08862382173538208,
      "learning_rate": 0.00018628129452473032,
      "loss": 0.9316,
      "step": 343
    },
    {
      "epoch": 0.13986582638747713,
      "grad_norm": 0.09923135489225388,
      "learning_rate": 0.00018624058619987787,
      "loss": 0.9959,
      "step": 344
    },
    {
      "epoch": 0.1402724130920919,
      "grad_norm": 0.09120538830757141,
      "learning_rate": 0.00018619987787502545,
      "loss": 0.968,
      "step": 345
    },
    {
      "epoch": 0.14067899979670664,
      "grad_norm": 0.09669141471385956,
      "learning_rate": 0.00018615916955017303,
      "loss": 1.085,
      "step": 346
    },
    {
      "epoch": 0.1410855865013214,
      "grad_norm": 0.08598754554986954,
      "learning_rate": 0.00018611846122532059,
      "loss": 0.9504,
      "step": 347
    },
    {
      "epoch": 0.14149217320593616,
      "grad_norm": 0.09238371253013611,
      "learning_rate": 0.00018607775290046817,
      "loss": 0.9742,
      "step": 348
    },
    {
      "epoch": 0.14189875991055093,
      "grad_norm": 0.091258205473423,
      "learning_rate": 0.00018603704457561572,
      "loss": 0.9341,
      "step": 349
    },
    {
      "epoch": 0.1423053466151657,
      "grad_norm": 0.10129548609256744,
      "learning_rate": 0.0001859963362507633,
      "loss": 1.0814,
      "step": 350
    },
    {
      "epoch": 0.14271193331978044,
      "grad_norm": 0.09523019194602966,
      "learning_rate": 0.00018595562792591086,
      "loss": 0.9848,
      "step": 351
    },
    {
      "epoch": 0.1431185200243952,
      "grad_norm": 0.09485248476266861,
      "learning_rate": 0.0001859149196010584,
      "loss": 0.9828,
      "step": 352
    },
    {
      "epoch": 0.14352510672900995,
      "grad_norm": 0.09963666647672653,
      "learning_rate": 0.000185874211276206,
      "loss": 1.1075,
      "step": 353
    },
    {
      "epoch": 0.14393169343362472,
      "grad_norm": 0.09067155420780182,
      "learning_rate": 0.00018583350295135355,
      "loss": 0.971,
      "step": 354
    },
    {
      "epoch": 0.1443382801382395,
      "grad_norm": 0.09153544157743454,
      "learning_rate": 0.00018579279462650113,
      "loss": 0.9405,
      "step": 355
    },
    {
      "epoch": 0.14474486684285423,
      "grad_norm": 0.1024472787976265,
      "learning_rate": 0.00018575208630164868,
      "loss": 0.9967,
      "step": 356
    },
    {
      "epoch": 0.145151453547469,
      "grad_norm": 0.09804495424032211,
      "learning_rate": 0.00018571137797679626,
      "loss": 0.9578,
      "step": 357
    },
    {
      "epoch": 0.14555804025208377,
      "grad_norm": 0.099054716527462,
      "learning_rate": 0.00018567066965194384,
      "loss": 0.9999,
      "step": 358
    },
    {
      "epoch": 0.1459646269566985,
      "grad_norm": 0.09781336784362793,
      "learning_rate": 0.0001856299613270914,
      "loss": 1.09,
      "step": 359
    },
    {
      "epoch": 0.14637121366131328,
      "grad_norm": 0.08993211388587952,
      "learning_rate": 0.00018558925300223898,
      "loss": 1.0719,
      "step": 360
    },
    {
      "epoch": 0.14677780036592802,
      "grad_norm": 0.09146003425121307,
      "learning_rate": 0.00018554854467738653,
      "loss": 1.0008,
      "step": 361
    },
    {
      "epoch": 0.1471843870705428,
      "grad_norm": 0.09643495827913284,
      "learning_rate": 0.00018550783635253411,
      "loss": 1.0791,
      "step": 362
    },
    {
      "epoch": 0.14759097377515756,
      "grad_norm": 0.09078676998615265,
      "learning_rate": 0.00018546712802768167,
      "loss": 0.8641,
      "step": 363
    },
    {
      "epoch": 0.1479975604797723,
      "grad_norm": 0.08719085901975632,
      "learning_rate": 0.00018542641970282922,
      "loss": 0.985,
      "step": 364
    },
    {
      "epoch": 0.14840414718438708,
      "grad_norm": 0.09189736843109131,
      "learning_rate": 0.0001853857113779768,
      "loss": 0.9638,
      "step": 365
    },
    {
      "epoch": 0.14881073388900182,
      "grad_norm": 0.09381456673145294,
      "learning_rate": 0.00018534500305312436,
      "loss": 1.0036,
      "step": 366
    },
    {
      "epoch": 0.1492173205936166,
      "grad_norm": 0.0922684445977211,
      "learning_rate": 0.00018530429472827194,
      "loss": 1.0391,
      "step": 367
    },
    {
      "epoch": 0.14962390729823136,
      "grad_norm": 0.09465248882770538,
      "learning_rate": 0.0001852635864034195,
      "loss": 0.8874,
      "step": 368
    },
    {
      "epoch": 0.1500304940028461,
      "grad_norm": 0.0938408225774765,
      "learning_rate": 0.00018522287807856707,
      "loss": 1.0269,
      "step": 369
    },
    {
      "epoch": 0.15043708070746087,
      "grad_norm": 0.09377933293581009,
      "learning_rate": 0.00018518216975371466,
      "loss": 1.0142,
      "step": 370
    },
    {
      "epoch": 0.15084366741207564,
      "grad_norm": 0.1117277517914772,
      "learning_rate": 0.0001851414614288622,
      "loss": 1.0371,
      "step": 371
    },
    {
      "epoch": 0.15125025411669038,
      "grad_norm": 0.10293183475732803,
      "learning_rate": 0.0001851007531040098,
      "loss": 1.0,
      "step": 372
    },
    {
      "epoch": 0.15165684082130515,
      "grad_norm": 0.09216313809156418,
      "learning_rate": 0.00018506004477915734,
      "loss": 0.9703,
      "step": 373
    },
    {
      "epoch": 0.1520634275259199,
      "grad_norm": 0.09088669717311859,
      "learning_rate": 0.00018501933645430493,
      "loss": 0.8766,
      "step": 374
    },
    {
      "epoch": 0.15247001423053466,
      "grad_norm": 0.09916643798351288,
      "learning_rate": 0.00018497862812945248,
      "loss": 1.0958,
      "step": 375
    },
    {
      "epoch": 0.15287660093514943,
      "grad_norm": 0.08404985070228577,
      "learning_rate": 0.00018493791980460003,
      "loss": 0.9602,
      "step": 376
    },
    {
      "epoch": 0.15328318763976417,
      "grad_norm": 0.10011377185583115,
      "learning_rate": 0.00018489721147974762,
      "loss": 1.0377,
      "step": 377
    },
    {
      "epoch": 0.15368977434437894,
      "grad_norm": 0.09958089143037796,
      "learning_rate": 0.00018485650315489517,
      "loss": 1.0213,
      "step": 378
    },
    {
      "epoch": 0.15409636104899369,
      "grad_norm": 0.09488838911056519,
      "learning_rate": 0.00018481579483004275,
      "loss": 0.941,
      "step": 379
    },
    {
      "epoch": 0.15450294775360846,
      "grad_norm": 0.09099314361810684,
      "learning_rate": 0.00018477508650519033,
      "loss": 0.8913,
      "step": 380
    },
    {
      "epoch": 0.15490953445822322,
      "grad_norm": 0.0956854447722435,
      "learning_rate": 0.00018473437818033789,
      "loss": 1.1478,
      "step": 381
    },
    {
      "epoch": 0.15531612116283797,
      "grad_norm": 0.11225584149360657,
      "learning_rate": 0.00018469366985548547,
      "loss": 1.0795,
      "step": 382
    },
    {
      "epoch": 0.15572270786745274,
      "grad_norm": 0.11592987924814224,
      "learning_rate": 0.00018465296153063302,
      "loss": 1.0863,
      "step": 383
    },
    {
      "epoch": 0.1561292945720675,
      "grad_norm": 0.09232570976018906,
      "learning_rate": 0.0001846122532057806,
      "loss": 0.9551,
      "step": 384
    },
    {
      "epoch": 0.15653588127668225,
      "grad_norm": 0.08860056847333908,
      "learning_rate": 0.00018457154488092816,
      "loss": 1.0206,
      "step": 385
    },
    {
      "epoch": 0.15694246798129702,
      "grad_norm": 0.10788331180810928,
      "learning_rate": 0.00018453083655607574,
      "loss": 0.9378,
      "step": 386
    },
    {
      "epoch": 0.15734905468591176,
      "grad_norm": 0.10758615285158157,
      "learning_rate": 0.0001844901282312233,
      "loss": 1.1149,
      "step": 387
    },
    {
      "epoch": 0.15775564139052653,
      "grad_norm": 0.10551386326551437,
      "learning_rate": 0.00018444941990637085,
      "loss": 1.0729,
      "step": 388
    },
    {
      "epoch": 0.1581622280951413,
      "grad_norm": 0.08733198046684265,
      "learning_rate": 0.00018440871158151843,
      "loss": 1.0058,
      "step": 389
    },
    {
      "epoch": 0.15856881479975604,
      "grad_norm": 0.1095399409532547,
      "learning_rate": 0.00018436800325666598,
      "loss": 1.0566,
      "step": 390
    },
    {
      "epoch": 0.1589754015043708,
      "grad_norm": 0.12356330454349518,
      "learning_rate": 0.00018432729493181356,
      "loss": 1.0173,
      "step": 391
    },
    {
      "epoch": 0.15938198820898555,
      "grad_norm": 0.09934639930725098,
      "learning_rate": 0.00018428658660696114,
      "loss": 1.1237,
      "step": 392
    },
    {
      "epoch": 0.15978857491360032,
      "grad_norm": 0.09402013570070267,
      "learning_rate": 0.0001842458782821087,
      "loss": 1.0018,
      "step": 393
    },
    {
      "epoch": 0.1601951616182151,
      "grad_norm": 0.10511749237775803,
      "learning_rate": 0.00018420516995725628,
      "loss": 0.9844,
      "step": 394
    },
    {
      "epoch": 0.16060174832282983,
      "grad_norm": 0.11193688213825226,
      "learning_rate": 0.00018416446163240383,
      "loss": 0.9888,
      "step": 395
    },
    {
      "epoch": 0.1610083350274446,
      "grad_norm": 0.09895443916320801,
      "learning_rate": 0.00018412375330755141,
      "loss": 1.1045,
      "step": 396
    },
    {
      "epoch": 0.16141492173205937,
      "grad_norm": 0.09660319238901138,
      "learning_rate": 0.00018408304498269897,
      "loss": 1.0457,
      "step": 397
    },
    {
      "epoch": 0.16182150843667412,
      "grad_norm": 0.1339186728000641,
      "learning_rate": 0.00018404233665784655,
      "loss": 1.1266,
      "step": 398
    },
    {
      "epoch": 0.16222809514128889,
      "grad_norm": 0.1154564693570137,
      "learning_rate": 0.0001840016283329941,
      "loss": 1.0299,
      "step": 399
    },
    {
      "epoch": 0.16263468184590363,
      "grad_norm": 0.09698904305696487,
      "learning_rate": 0.00018396092000814166,
      "loss": 1.1101,
      "step": 400
    },
    {
      "epoch": 0.1630412685505184,
      "grad_norm": 0.09455164521932602,
      "learning_rate": 0.00018392021168328924,
      "loss": 0.9928,
      "step": 401
    },
    {
      "epoch": 0.16344785525513317,
      "grad_norm": 0.09728690981864929,
      "learning_rate": 0.0001838795033584368,
      "loss": 1.0603,
      "step": 402
    },
    {
      "epoch": 0.1638544419597479,
      "grad_norm": 0.10577269643545151,
      "learning_rate": 0.0001838387950335844,
      "loss": 0.9922,
      "step": 403
    },
    {
      "epoch": 0.16426102866436268,
      "grad_norm": 0.08850935101509094,
      "learning_rate": 0.00018379808670873196,
      "loss": 0.9758,
      "step": 404
    },
    {
      "epoch": 0.16466761536897742,
      "grad_norm": 0.09496256709098816,
      "learning_rate": 0.0001837573783838795,
      "loss": 1.0949,
      "step": 405
    },
    {
      "epoch": 0.1650742020735922,
      "grad_norm": 0.09768050909042358,
      "learning_rate": 0.0001837166700590271,
      "loss": 1.0054,
      "step": 406
    },
    {
      "epoch": 0.16548078877820696,
      "grad_norm": 0.09913921356201172,
      "learning_rate": 0.00018367596173417464,
      "loss": 1.0272,
      "step": 407
    },
    {
      "epoch": 0.1658873754828217,
      "grad_norm": 0.0901927724480629,
      "learning_rate": 0.00018363525340932223,
      "loss": 1.0264,
      "step": 408
    },
    {
      "epoch": 0.16629396218743647,
      "grad_norm": 0.09796515852212906,
      "learning_rate": 0.00018359454508446978,
      "loss": 1.0338,
      "step": 409
    },
    {
      "epoch": 0.16670054889205124,
      "grad_norm": 0.1018638014793396,
      "learning_rate": 0.00018355383675961736,
      "loss": 1.0409,
      "step": 410
    },
    {
      "epoch": 0.16710713559666598,
      "grad_norm": 0.10666611790657043,
      "learning_rate": 0.00018351312843476492,
      "loss": 1.0924,
      "step": 411
    },
    {
      "epoch": 0.16751372230128075,
      "grad_norm": 0.0986141785979271,
      "learning_rate": 0.00018347242010991247,
      "loss": 0.9468,
      "step": 412
    },
    {
      "epoch": 0.1679203090058955,
      "grad_norm": 0.09429168701171875,
      "learning_rate": 0.00018343171178506005,
      "loss": 0.9706,
      "step": 413
    },
    {
      "epoch": 0.16832689571051027,
      "grad_norm": 0.09704872965812683,
      "learning_rate": 0.0001833910034602076,
      "loss": 1.0692,
      "step": 414
    },
    {
      "epoch": 0.16873348241512504,
      "grad_norm": 0.0980519950389862,
      "learning_rate": 0.00018335029513535519,
      "loss": 1.0218,
      "step": 415
    },
    {
      "epoch": 0.16914006911973978,
      "grad_norm": 0.08980212360620499,
      "learning_rate": 0.00018330958681050277,
      "loss": 0.9243,
      "step": 416
    },
    {
      "epoch": 0.16954665582435455,
      "grad_norm": 0.09630506485700607,
      "learning_rate": 0.00018326887848565032,
      "loss": 0.9599,
      "step": 417
    },
    {
      "epoch": 0.1699532425289693,
      "grad_norm": 0.08608522266149521,
      "learning_rate": 0.0001832281701607979,
      "loss": 0.9577,
      "step": 418
    },
    {
      "epoch": 0.17035982923358406,
      "grad_norm": 0.09151248633861542,
      "learning_rate": 0.00018318746183594546,
      "loss": 0.9956,
      "step": 419
    },
    {
      "epoch": 0.17076641593819883,
      "grad_norm": 0.09689094871282578,
      "learning_rate": 0.00018314675351109304,
      "loss": 1.0999,
      "step": 420
    },
    {
      "epoch": 0.17117300264281357,
      "grad_norm": 0.09316612035036087,
      "learning_rate": 0.0001831060451862406,
      "loss": 0.8572,
      "step": 421
    },
    {
      "epoch": 0.17157958934742834,
      "grad_norm": 0.11449979990720749,
      "learning_rate": 0.00018306533686138817,
      "loss": 1.0328,
      "step": 422
    },
    {
      "epoch": 0.1719861760520431,
      "grad_norm": 0.10802194476127625,
      "learning_rate": 0.00018302462853653573,
      "loss": 0.9785,
      "step": 423
    },
    {
      "epoch": 0.17239276275665785,
      "grad_norm": 0.09997294098138809,
      "learning_rate": 0.00018298392021168328,
      "loss": 0.9778,
      "step": 424
    },
    {
      "epoch": 0.17279934946127262,
      "grad_norm": 0.10244690626859665,
      "learning_rate": 0.00018294321188683086,
      "loss": 1.0874,
      "step": 425
    },
    {
      "epoch": 0.17320593616588736,
      "grad_norm": 0.10659472644329071,
      "learning_rate": 0.00018290250356197844,
      "loss": 1.0196,
      "step": 426
    },
    {
      "epoch": 0.17361252287050213,
      "grad_norm": 0.09812036156654358,
      "learning_rate": 0.000182861795237126,
      "loss": 0.9051,
      "step": 427
    },
    {
      "epoch": 0.1740191095751169,
      "grad_norm": 0.845235288143158,
      "learning_rate": 0.00018282108691227358,
      "loss": 1.0531,
      "step": 428
    },
    {
      "epoch": 0.17442569627973165,
      "grad_norm": 0.109995998442173,
      "learning_rate": 0.00018278037858742113,
      "loss": 1.001,
      "step": 429
    },
    {
      "epoch": 0.17483228298434642,
      "grad_norm": 0.12578758597373962,
      "learning_rate": 0.00018273967026256871,
      "loss": 0.9513,
      "step": 430
    },
    {
      "epoch": 0.17523886968896116,
      "grad_norm": 0.1585826873779297,
      "learning_rate": 0.00018269896193771627,
      "loss": 1.0091,
      "step": 431
    },
    {
      "epoch": 0.17564545639357593,
      "grad_norm": 0.15150819718837738,
      "learning_rate": 0.00018265825361286385,
      "loss": 1.1045,
      "step": 432
    },
    {
      "epoch": 0.1760520430981907,
      "grad_norm": 0.1110219806432724,
      "learning_rate": 0.0001826175452880114,
      "loss": 0.9877,
      "step": 433
    },
    {
      "epoch": 0.17645862980280544,
      "grad_norm": 0.11296675354242325,
      "learning_rate": 0.00018257683696315896,
      "loss": 1.1317,
      "step": 434
    },
    {
      "epoch": 0.1768652165074202,
      "grad_norm": 0.11464451253414154,
      "learning_rate": 0.00018253612863830654,
      "loss": 0.9485,
      "step": 435
    },
    {
      "epoch": 0.17727180321203498,
      "grad_norm": 0.08836513012647629,
      "learning_rate": 0.0001824954203134541,
      "loss": 0.8667,
      "step": 436
    },
    {
      "epoch": 0.17767838991664972,
      "grad_norm": 0.10697431862354279,
      "learning_rate": 0.00018245471198860167,
      "loss": 1.0692,
      "step": 437
    },
    {
      "epoch": 0.1780849766212645,
      "grad_norm": 0.10565032064914703,
      "learning_rate": 0.00018241400366374925,
      "loss": 1.0723,
      "step": 438
    },
    {
      "epoch": 0.17849156332587923,
      "grad_norm": 0.11343531310558319,
      "learning_rate": 0.0001823732953388968,
      "loss": 1.1038,
      "step": 439
    },
    {
      "epoch": 0.178898150030494,
      "grad_norm": 0.10002034902572632,
      "learning_rate": 0.0001823325870140444,
      "loss": 0.9859,
      "step": 440
    },
    {
      "epoch": 0.17930473673510877,
      "grad_norm": 0.10602378845214844,
      "learning_rate": 0.00018229187868919194,
      "loss": 1.1091,
      "step": 441
    },
    {
      "epoch": 0.1797113234397235,
      "grad_norm": 0.09775001555681229,
      "learning_rate": 0.00018225117036433953,
      "loss": 1.0473,
      "step": 442
    },
    {
      "epoch": 0.18011791014433828,
      "grad_norm": 0.09872320294380188,
      "learning_rate": 0.00018221046203948708,
      "loss": 1.0657,
      "step": 443
    },
    {
      "epoch": 0.18052449684895303,
      "grad_norm": 0.0893816128373146,
      "learning_rate": 0.00018216975371463466,
      "loss": 0.915,
      "step": 444
    },
    {
      "epoch": 0.1809310835535678,
      "grad_norm": 0.09870447218418121,
      "learning_rate": 0.00018212904538978221,
      "loss": 0.8847,
      "step": 445
    },
    {
      "epoch": 0.18133767025818257,
      "grad_norm": 0.09775330871343613,
      "learning_rate": 0.00018208833706492977,
      "loss": 0.841,
      "step": 446
    },
    {
      "epoch": 0.1817442569627973,
      "grad_norm": 0.10025996714830399,
      "learning_rate": 0.00018204762874007735,
      "loss": 0.9965,
      "step": 447
    },
    {
      "epoch": 0.18215084366741208,
      "grad_norm": 0.09369905292987823,
      "learning_rate": 0.0001820069204152249,
      "loss": 0.9998,
      "step": 448
    },
    {
      "epoch": 0.18255743037202685,
      "grad_norm": 0.09244808554649353,
      "learning_rate": 0.0001819662120903725,
      "loss": 0.9938,
      "step": 449
    },
    {
      "epoch": 0.1829640170766416,
      "grad_norm": 0.12163155525922775,
      "learning_rate": 0.00018192550376552007,
      "loss": 1.1384,
      "step": 450
    },
    {
      "epoch": 0.18337060378125636,
      "grad_norm": 0.08755457401275635,
      "learning_rate": 0.00018188479544066762,
      "loss": 0.9002,
      "step": 451
    },
    {
      "epoch": 0.1837771904858711,
      "grad_norm": 0.0917607769370079,
      "learning_rate": 0.0001818440871158152,
      "loss": 0.9874,
      "step": 452
    },
    {
      "epoch": 0.18418377719048587,
      "grad_norm": 0.09113719314336777,
      "learning_rate": 0.00018180337879096276,
      "loss": 1.0187,
      "step": 453
    },
    {
      "epoch": 0.18459036389510064,
      "grad_norm": 0.08795943111181259,
      "learning_rate": 0.00018176267046611034,
      "loss": 0.902,
      "step": 454
    },
    {
      "epoch": 0.18499695059971538,
      "grad_norm": 0.1016731783747673,
      "learning_rate": 0.0001817219621412579,
      "loss": 0.9933,
      "step": 455
    },
    {
      "epoch": 0.18540353730433015,
      "grad_norm": 0.09413068741559982,
      "learning_rate": 0.00018168125381640547,
      "loss": 0.9448,
      "step": 456
    },
    {
      "epoch": 0.18581012400894492,
      "grad_norm": 0.10015012323856354,
      "learning_rate": 0.00018164054549155303,
      "loss": 1.1458,
      "step": 457
    },
    {
      "epoch": 0.18621671071355966,
      "grad_norm": 0.09086768329143524,
      "learning_rate": 0.00018159983716670058,
      "loss": 1.0543,
      "step": 458
    },
    {
      "epoch": 0.18662329741817443,
      "grad_norm": 0.10910352319478989,
      "learning_rate": 0.00018155912884184816,
      "loss": 1.0078,
      "step": 459
    },
    {
      "epoch": 0.18702988412278918,
      "grad_norm": 0.09674135595560074,
      "learning_rate": 0.00018151842051699572,
      "loss": 0.9758,
      "step": 460
    },
    {
      "epoch": 0.18743647082740394,
      "grad_norm": 0.09108126163482666,
      "learning_rate": 0.00018147771219214332,
      "loss": 1.0038,
      "step": 461
    },
    {
      "epoch": 0.18784305753201871,
      "grad_norm": 0.09710326045751572,
      "learning_rate": 0.00018143700386729088,
      "loss": 0.9693,
      "step": 462
    },
    {
      "epoch": 0.18824964423663346,
      "grad_norm": 0.10069318860769272,
      "learning_rate": 0.00018139629554243843,
      "loss": 1.1005,
      "step": 463
    },
    {
      "epoch": 0.18865623094124823,
      "grad_norm": 0.09434141218662262,
      "learning_rate": 0.000181355587217586,
      "loss": 1.0359,
      "step": 464
    },
    {
      "epoch": 0.18906281764586297,
      "grad_norm": 0.09208261221647263,
      "learning_rate": 0.00018131487889273357,
      "loss": 1.0374,
      "step": 465
    },
    {
      "epoch": 0.18946940435047774,
      "grad_norm": 0.09581121802330017,
      "learning_rate": 0.00018127417056788115,
      "loss": 1.0267,
      "step": 466
    },
    {
      "epoch": 0.1898759910550925,
      "grad_norm": 0.09809669107198715,
      "learning_rate": 0.0001812334622430287,
      "loss": 1.0652,
      "step": 467
    },
    {
      "epoch": 0.19028257775970725,
      "grad_norm": 0.08496394008398056,
      "learning_rate": 0.00018119275391817628,
      "loss": 0.9468,
      "step": 468
    },
    {
      "epoch": 0.19068916446432202,
      "grad_norm": 0.09247399121522903,
      "learning_rate": 0.00018115204559332384,
      "loss": 1.0247,
      "step": 469
    },
    {
      "epoch": 0.1910957511689368,
      "grad_norm": 0.10010971128940582,
      "learning_rate": 0.0001811113372684714,
      "loss": 0.9674,
      "step": 470
    },
    {
      "epoch": 0.19150233787355153,
      "grad_norm": 0.09562191367149353,
      "learning_rate": 0.00018107062894361897,
      "loss": 0.9819,
      "step": 471
    },
    {
      "epoch": 0.1919089245781663,
      "grad_norm": 0.09223975241184235,
      "learning_rate": 0.00018102992061876655,
      "loss": 1.0051,
      "step": 472
    },
    {
      "epoch": 0.19231551128278104,
      "grad_norm": 0.09564565122127533,
      "learning_rate": 0.00018098921229391414,
      "loss": 0.908,
      "step": 473
    },
    {
      "epoch": 0.1927220979873958,
      "grad_norm": 0.09371364116668701,
      "learning_rate": 0.0001809485039690617,
      "loss": 1.0195,
      "step": 474
    },
    {
      "epoch": 0.19312868469201058,
      "grad_norm": 0.0895533412694931,
      "learning_rate": 0.00018090779564420924,
      "loss": 0.8912,
      "step": 475
    },
    {
      "epoch": 0.19353527139662532,
      "grad_norm": 0.08874888718128204,
      "learning_rate": 0.00018086708731935682,
      "loss": 0.9941,
      "step": 476
    },
    {
      "epoch": 0.1939418581012401,
      "grad_norm": 8989.1748046875,
      "learning_rate": 0.00018082637899450438,
      "loss": 1.0191,
      "step": 477
    },
    {
      "epoch": 0.19434844480585484,
      "grad_norm": 0.09893982112407684,
      "learning_rate": 0.00018078567066965196,
      "loss": 1.1682,
      "step": 478
    },
    {
      "epoch": 0.1947550315104696,
      "grad_norm": 0.09100797772407532,
      "learning_rate": 0.00018074496234479951,
      "loss": 0.9466,
      "step": 479
    },
    {
      "epoch": 0.19516161821508438,
      "grad_norm": 0.10540256649255753,
      "learning_rate": 0.0001807042540199471,
      "loss": 1.0735,
      "step": 480
    },
    {
      "epoch": 0.19556820491969912,
      "grad_norm": 0.09110235422849655,
      "learning_rate": 0.00018066354569509465,
      "loss": 1.0097,
      "step": 481
    },
    {
      "epoch": 0.1959747916243139,
      "grad_norm": 0.10651825368404388,
      "learning_rate": 0.0001806228373702422,
      "loss": 1.014,
      "step": 482
    },
    {
      "epoch": 0.19638137832892866,
      "grad_norm": 0.08685674518346786,
      "learning_rate": 0.00018058212904538978,
      "loss": 0.9755,
      "step": 483
    },
    {
      "epoch": 0.1967879650335434,
      "grad_norm": 0.10092045366764069,
      "learning_rate": 0.00018054142072053737,
      "loss": 0.9397,
      "step": 484
    },
    {
      "epoch": 0.19719455173815817,
      "grad_norm": 0.1056622639298439,
      "learning_rate": 0.00018050071239568495,
      "loss": 0.9864,
      "step": 485
    },
    {
      "epoch": 0.1976011384427729,
      "grad_norm": 0.10525202006101608,
      "learning_rate": 0.0001804600040708325,
      "loss": 1.1085,
      "step": 486
    },
    {
      "epoch": 0.19800772514738768,
      "grad_norm": 0.10073073953390121,
      "learning_rate": 0.00018041929574598006,
      "loss": 1.1264,
      "step": 487
    },
    {
      "epoch": 0.19841431185200245,
      "grad_norm": 0.09659091383218765,
      "learning_rate": 0.00018037858742112764,
      "loss": 0.9848,
      "step": 488
    },
    {
      "epoch": 0.1988208985566172,
      "grad_norm": 0.09986629337072372,
      "learning_rate": 0.0001803378790962752,
      "loss": 1.0732,
      "step": 489
    },
    {
      "epoch": 0.19922748526123196,
      "grad_norm": 0.11215290427207947,
      "learning_rate": 0.00018029717077142277,
      "loss": 1.1259,
      "step": 490
    },
    {
      "epoch": 0.1996340719658467,
      "grad_norm": 0.11136343330144882,
      "learning_rate": 0.00018025646244657033,
      "loss": 1.0857,
      "step": 491
    },
    {
      "epoch": 0.20004065867046147,
      "grad_norm": 0.10452030599117279,
      "learning_rate": 0.0001802157541217179,
      "loss": 0.9997,
      "step": 492
    },
    {
      "epoch": 0.20044724537507624,
      "grad_norm": 0.10394178330898285,
      "learning_rate": 0.00018017504579686546,
      "loss": 1.0852,
      "step": 493
    },
    {
      "epoch": 0.20085383207969099,
      "grad_norm": 0.10206598043441772,
      "learning_rate": 0.00018013433747201302,
      "loss": 0.9629,
      "step": 494
    },
    {
      "epoch": 0.20126041878430576,
      "grad_norm": 0.09365608543157578,
      "learning_rate": 0.00018009362914716062,
      "loss": 0.9504,
      "step": 495
    },
    {
      "epoch": 0.20166700548892053,
      "grad_norm": 0.09425178170204163,
      "learning_rate": 0.00018005292082230818,
      "loss": 1.0038,
      "step": 496
    },
    {
      "epoch": 0.20207359219353527,
      "grad_norm": 0.09562011808156967,
      "learning_rate": 0.00018001221249745576,
      "loss": 1.0877,
      "step": 497
    },
    {
      "epoch": 0.20248017889815004,
      "grad_norm": 0.11452426016330719,
      "learning_rate": 0.0001799715041726033,
      "loss": 1.0688,
      "step": 498
    },
    {
      "epoch": 0.20288676560276478,
      "grad_norm": 0.0930696651339531,
      "learning_rate": 0.00017993079584775087,
      "loss": 1.0255,
      "step": 499
    },
    {
      "epoch": 0.20329335230737955,
      "grad_norm": 0.10522327572107315,
      "learning_rate": 0.00017989008752289845,
      "loss": 1.085,
      "step": 500
    },
    {
      "epoch": 0.20369993901199432,
      "grad_norm": 0.08499190211296082,
      "learning_rate": 0.000179849379198046,
      "loss": 0.9235,
      "step": 501
    },
    {
      "epoch": 0.20410652571660906,
      "grad_norm": 0.09169955551624298,
      "learning_rate": 0.00017980867087319358,
      "loss": 0.9836,
      "step": 502
    },
    {
      "epoch": 0.20451311242122383,
      "grad_norm": 0.10331466048955917,
      "learning_rate": 0.00017976796254834114,
      "loss": 1.0255,
      "step": 503
    },
    {
      "epoch": 0.20491969912583857,
      "grad_norm": 0.0900363028049469,
      "learning_rate": 0.00017972725422348872,
      "loss": 0.9691,
      "step": 504
    },
    {
      "epoch": 0.20532628583045334,
      "grad_norm": 0.10095544904470444,
      "learning_rate": 0.00017968654589863627,
      "loss": 1.0289,
      "step": 505
    },
    {
      "epoch": 0.2057328725350681,
      "grad_norm": 0.0992627814412117,
      "learning_rate": 0.00017964583757378383,
      "loss": 0.9785,
      "step": 506
    },
    {
      "epoch": 0.20613945923968285,
      "grad_norm": 0.0954422652721405,
      "learning_rate": 0.00017960512924893144,
      "loss": 1.0105,
      "step": 507
    },
    {
      "epoch": 0.20654604594429762,
      "grad_norm": 0.0994410440325737,
      "learning_rate": 0.000179564420924079,
      "loss": 1.0894,
      "step": 508
    },
    {
      "epoch": 0.2069526326489124,
      "grad_norm": 0.08866444230079651,
      "learning_rate": 0.00017952371259922654,
      "loss": 0.9725,
      "step": 509
    },
    {
      "epoch": 0.20735921935352714,
      "grad_norm": 0.09361348301172256,
      "learning_rate": 0.00017948300427437412,
      "loss": 1.0441,
      "step": 510
    },
    {
      "epoch": 0.2077658060581419,
      "grad_norm": 0.08215323090553284,
      "learning_rate": 0.00017944229594952168,
      "loss": 0.9214,
      "step": 511
    },
    {
      "epoch": 0.20817239276275665,
      "grad_norm": 0.09752262383699417,
      "learning_rate": 0.00017940158762466926,
      "loss": 0.9456,
      "step": 512
    },
    {
      "epoch": 0.20857897946737142,
      "grad_norm": 0.10021419823169708,
      "learning_rate": 0.00017936087929981681,
      "loss": 1.1158,
      "step": 513
    },
    {
      "epoch": 0.2089855661719862,
      "grad_norm": 0.09550227969884872,
      "learning_rate": 0.0001793201709749644,
      "loss": 0.9789,
      "step": 514
    },
    {
      "epoch": 0.20939215287660093,
      "grad_norm": 0.09059977531433105,
      "learning_rate": 0.00017927946265011195,
      "loss": 0.9649,
      "step": 515
    },
    {
      "epoch": 0.2097987395812157,
      "grad_norm": 0.09227627515792847,
      "learning_rate": 0.00017923875432525953,
      "loss": 0.9779,
      "step": 516
    },
    {
      "epoch": 0.21020532628583044,
      "grad_norm": 0.09919798374176025,
      "learning_rate": 0.00017919804600040708,
      "loss": 1.0155,
      "step": 517
    },
    {
      "epoch": 0.2106119129904452,
      "grad_norm": 0.09044051915407181,
      "learning_rate": 0.00017915733767555464,
      "loss": 0.9428,
      "step": 518
    },
    {
      "epoch": 0.21101849969505998,
      "grad_norm": 0.09017504006624222,
      "learning_rate": 0.00017911662935070225,
      "loss": 0.9244,
      "step": 519
    },
    {
      "epoch": 0.21142508639967472,
      "grad_norm": 0.09257036447525024,
      "learning_rate": 0.0001790759210258498,
      "loss": 1.0168,
      "step": 520
    },
    {
      "epoch": 0.2118316731042895,
      "grad_norm": 0.0926235020160675,
      "learning_rate": 0.00017903521270099735,
      "loss": 0.9363,
      "step": 521
    },
    {
      "epoch": 0.21223825980890426,
      "grad_norm": 0.08785069733858109,
      "learning_rate": 0.00017899450437614494,
      "loss": 0.9428,
      "step": 522
    },
    {
      "epoch": 0.212644846513519,
      "grad_norm": 0.09824348986148834,
      "learning_rate": 0.0001789537960512925,
      "loss": 1.0378,
      "step": 523
    },
    {
      "epoch": 0.21305143321813377,
      "grad_norm": 0.0915142148733139,
      "learning_rate": 0.00017891308772644007,
      "loss": 0.9603,
      "step": 524
    },
    {
      "epoch": 0.21345801992274852,
      "grad_norm": 0.09466978907585144,
      "learning_rate": 0.00017887237940158763,
      "loss": 1.013,
      "step": 525
    },
    {
      "epoch": 0.21386460662736329,
      "grad_norm": 0.09305880963802338,
      "learning_rate": 0.0001788316710767352,
      "loss": 0.9386,
      "step": 526
    },
    {
      "epoch": 0.21427119333197805,
      "grad_norm": 0.09210691601037979,
      "learning_rate": 0.00017879096275188276,
      "loss": 0.9797,
      "step": 527
    },
    {
      "epoch": 0.2146777800365928,
      "grad_norm": 0.10415366291999817,
      "learning_rate": 0.00017875025442703031,
      "loss": 1.0125,
      "step": 528
    },
    {
      "epoch": 0.21508436674120757,
      "grad_norm": 0.10259640216827393,
      "learning_rate": 0.0001787095461021779,
      "loss": 1.0473,
      "step": 529
    },
    {
      "epoch": 0.2154909534458223,
      "grad_norm": 0.09523239731788635,
      "learning_rate": 0.00017866883777732548,
      "loss": 0.9603,
      "step": 530
    },
    {
      "epoch": 0.21589754015043708,
      "grad_norm": 0.10005185008049011,
      "learning_rate": 0.00017862812945247306,
      "loss": 1.0768,
      "step": 531
    },
    {
      "epoch": 0.21630412685505185,
      "grad_norm": 0.09643250703811646,
      "learning_rate": 0.0001785874211276206,
      "loss": 1.0799,
      "step": 532
    },
    {
      "epoch": 0.2167107135596666,
      "grad_norm": 0.09473159909248352,
      "learning_rate": 0.00017854671280276817,
      "loss": 1.0657,
      "step": 533
    },
    {
      "epoch": 0.21711730026428136,
      "grad_norm": 0.09550385922193527,
      "learning_rate": 0.00017850600447791575,
      "loss": 1.0389,
      "step": 534
    },
    {
      "epoch": 0.21752388696889613,
      "grad_norm": 0.09414463490247726,
      "learning_rate": 0.0001784652961530633,
      "loss": 1.0317,
      "step": 535
    },
    {
      "epoch": 0.21793047367351087,
      "grad_norm": 0.090250164270401,
      "learning_rate": 0.00017842458782821088,
      "loss": 1.0212,
      "step": 536
    },
    {
      "epoch": 0.21833706037812564,
      "grad_norm": 0.09635050594806671,
      "learning_rate": 0.00017838387950335844,
      "loss": 0.9473,
      "step": 537
    },
    {
      "epoch": 0.21874364708274038,
      "grad_norm": 0.0985347330570221,
      "learning_rate": 0.00017834317117850602,
      "loss": 1.1372,
      "step": 538
    },
    {
      "epoch": 0.21915023378735515,
      "grad_norm": 0.09789203107357025,
      "learning_rate": 0.00017830246285365357,
      "loss": 1.0369,
      "step": 539
    },
    {
      "epoch": 0.21955682049196992,
      "grad_norm": 0.09777568280696869,
      "learning_rate": 0.00017826175452880113,
      "loss": 1.0746,
      "step": 540
    },
    {
      "epoch": 0.21996340719658466,
      "grad_norm": 0.09013503789901733,
      "learning_rate": 0.0001782210462039487,
      "loss": 1.0124,
      "step": 541
    },
    {
      "epoch": 0.22036999390119943,
      "grad_norm": 0.10604355484247208,
      "learning_rate": 0.0001781803378790963,
      "loss": 1.0158,
      "step": 542
    },
    {
      "epoch": 0.22077658060581418,
      "grad_norm": 0.09194648265838623,
      "learning_rate": 0.00017813962955424387,
      "loss": 0.9544,
      "step": 543
    },
    {
      "epoch": 0.22118316731042895,
      "grad_norm": 0.09223110228776932,
      "learning_rate": 0.00017809892122939142,
      "loss": 1.0094,
      "step": 544
    },
    {
      "epoch": 0.22158975401504372,
      "grad_norm": 0.09049870073795319,
      "learning_rate": 0.00017805821290453898,
      "loss": 0.8829,
      "step": 545
    },
    {
      "epoch": 0.22199634071965846,
      "grad_norm": 0.10157813131809235,
      "learning_rate": 0.00017801750457968656,
      "loss": 1.0904,
      "step": 546
    },
    {
      "epoch": 0.22240292742427323,
      "grad_norm": 0.09934356063604355,
      "learning_rate": 0.0001779767962548341,
      "loss": 1.0708,
      "step": 547
    },
    {
      "epoch": 0.222809514128888,
      "grad_norm": 0.09037156403064728,
      "learning_rate": 0.0001779360879299817,
      "loss": 0.916,
      "step": 548
    },
    {
      "epoch": 0.22321610083350274,
      "grad_norm": 0.09347829967737198,
      "learning_rate": 0.00017789537960512925,
      "loss": 1.0328,
      "step": 549
    },
    {
      "epoch": 0.2236226875381175,
      "grad_norm": 0.087796151638031,
      "learning_rate": 0.00017785467128027683,
      "loss": 0.9961,
      "step": 550
    },
    {
      "epoch": 0.22402927424273225,
      "grad_norm": 0.09518422931432724,
      "learning_rate": 0.00017781396295542438,
      "loss": 0.9855,
      "step": 551
    },
    {
      "epoch": 0.22443586094734702,
      "grad_norm": 0.09606748074293137,
      "learning_rate": 0.00017777325463057194,
      "loss": 0.954,
      "step": 552
    },
    {
      "epoch": 0.2248424476519618,
      "grad_norm": 0.09338165074586868,
      "learning_rate": 0.00017773254630571955,
      "loss": 1.0876,
      "step": 553
    },
    {
      "epoch": 0.22524903435657653,
      "grad_norm": 0.09242440015077591,
      "learning_rate": 0.0001776918379808671,
      "loss": 0.9418,
      "step": 554
    },
    {
      "epoch": 0.2256556210611913,
      "grad_norm": 0.0990302637219429,
      "learning_rate": 0.00017765112965601468,
      "loss": 1.0641,
      "step": 555
    },
    {
      "epoch": 0.22606220776580604,
      "grad_norm": 0.09444238990545273,
      "learning_rate": 0.00017761042133116224,
      "loss": 1.0315,
      "step": 556
    },
    {
      "epoch": 0.22646879447042081,
      "grad_norm": 0.08771083503961563,
      "learning_rate": 0.0001775697130063098,
      "loss": 0.9898,
      "step": 557
    },
    {
      "epoch": 0.22687538117503558,
      "grad_norm": 0.10041147470474243,
      "learning_rate": 0.00017752900468145737,
      "loss": 1.0478,
      "step": 558
    },
    {
      "epoch": 0.22728196787965033,
      "grad_norm": 0.0933571383357048,
      "learning_rate": 0.00017748829635660492,
      "loss": 1.0002,
      "step": 559
    },
    {
      "epoch": 0.2276885545842651,
      "grad_norm": 0.0912991389632225,
      "learning_rate": 0.0001774475880317525,
      "loss": 1.0807,
      "step": 560
    },
    {
      "epoch": 0.22809514128887987,
      "grad_norm": 0.09350984543561935,
      "learning_rate": 0.00017740687970690006,
      "loss": 0.8962,
      "step": 561
    },
    {
      "epoch": 0.2285017279934946,
      "grad_norm": 0.0978541299700737,
      "learning_rate": 0.00017736617138204764,
      "loss": 1.0339,
      "step": 562
    },
    {
      "epoch": 0.22890831469810938,
      "grad_norm": 0.08964958041906357,
      "learning_rate": 0.0001773254630571952,
      "loss": 1.051,
      "step": 563
    },
    {
      "epoch": 0.22931490140272412,
      "grad_norm": 0.09241898357868195,
      "learning_rate": 0.00017728475473234275,
      "loss": 0.903,
      "step": 564
    },
    {
      "epoch": 0.2297214881073389,
      "grad_norm": 0.09366483986377716,
      "learning_rate": 0.00017724404640749036,
      "loss": 1.0055,
      "step": 565
    },
    {
      "epoch": 0.23012807481195366,
      "grad_norm": 0.10184673964977264,
      "learning_rate": 0.0001772033380826379,
      "loss": 1.004,
      "step": 566
    },
    {
      "epoch": 0.2305346615165684,
      "grad_norm": 0.09287306666374207,
      "learning_rate": 0.0001771626297577855,
      "loss": 0.9667,
      "step": 567
    },
    {
      "epoch": 0.23094124822118317,
      "grad_norm": 0.08905091136693954,
      "learning_rate": 0.00017712192143293305,
      "loss": 0.9295,
      "step": 568
    },
    {
      "epoch": 0.2313478349257979,
      "grad_norm": 0.0908786877989769,
      "learning_rate": 0.0001770812131080806,
      "loss": 0.8957,
      "step": 569
    },
    {
      "epoch": 0.23175442163041268,
      "grad_norm": 0.10284281522035599,
      "learning_rate": 0.00017704050478322818,
      "loss": 1.1311,
      "step": 570
    },
    {
      "epoch": 0.23216100833502745,
      "grad_norm": 0.09007006883621216,
      "learning_rate": 0.00017699979645837574,
      "loss": 0.9919,
      "step": 571
    },
    {
      "epoch": 0.2325675950396422,
      "grad_norm": 0.09025272727012634,
      "learning_rate": 0.00017695908813352332,
      "loss": 0.9057,
      "step": 572
    },
    {
      "epoch": 0.23297418174425696,
      "grad_norm": 0.0994710698723793,
      "learning_rate": 0.00017691837980867087,
      "loss": 1.1472,
      "step": 573
    },
    {
      "epoch": 0.23338076844887173,
      "grad_norm": 0.09117428958415985,
      "learning_rate": 0.00017687767148381845,
      "loss": 0.9665,
      "step": 574
    },
    {
      "epoch": 0.23378735515348648,
      "grad_norm": 0.0893009826540947,
      "learning_rate": 0.000176836963158966,
      "loss": 0.951,
      "step": 575
    },
    {
      "epoch": 0.23419394185810125,
      "grad_norm": 0.08649599552154541,
      "learning_rate": 0.0001767962548341136,
      "loss": 0.925,
      "step": 576
    },
    {
      "epoch": 0.234600528562716,
      "grad_norm": 0.0928448736667633,
      "learning_rate": 0.00017675554650926117,
      "loss": 0.9253,
      "step": 577
    },
    {
      "epoch": 0.23500711526733076,
      "grad_norm": 0.10335158556699753,
      "learning_rate": 0.00017671483818440872,
      "loss": 1.1171,
      "step": 578
    },
    {
      "epoch": 0.23541370197194553,
      "grad_norm": 0.09889842569828033,
      "learning_rate": 0.0001766741298595563,
      "loss": 1.0005,
      "step": 579
    },
    {
      "epoch": 0.23582028867656027,
      "grad_norm": 0.09655506163835526,
      "learning_rate": 0.00017663342153470386,
      "loss": 1.0273,
      "step": 580
    },
    {
      "epoch": 0.23622687538117504,
      "grad_norm": 0.09516560286283493,
      "learning_rate": 0.0001765927132098514,
      "loss": 1.024,
      "step": 581
    },
    {
      "epoch": 0.23663346208578978,
      "grad_norm": 0.10024843364953995,
      "learning_rate": 0.000176552004884999,
      "loss": 1.0299,
      "step": 582
    },
    {
      "epoch": 0.23704004879040455,
      "grad_norm": 0.10152596235275269,
      "learning_rate": 0.00017651129656014655,
      "loss": 0.9658,
      "step": 583
    },
    {
      "epoch": 0.23744663549501932,
      "grad_norm": 0.09654249995946884,
      "learning_rate": 0.00017647058823529413,
      "loss": 1.0722,
      "step": 584
    },
    {
      "epoch": 0.23785322219963406,
      "grad_norm": 0.09112072736024857,
      "learning_rate": 0.00017642987991044168,
      "loss": 0.9846,
      "step": 585
    },
    {
      "epoch": 0.23825980890424883,
      "grad_norm": 0.09640034288167953,
      "learning_rate": 0.00017638917158558926,
      "loss": 1.0501,
      "step": 586
    },
    {
      "epoch": 0.2386663956088636,
      "grad_norm": 0.09564584493637085,
      "learning_rate": 0.00017634846326073682,
      "loss": 0.955,
      "step": 587
    },
    {
      "epoch": 0.23907298231347834,
      "grad_norm": 0.10815359652042389,
      "learning_rate": 0.0001763077549358844,
      "loss": 1.203,
      "step": 588
    },
    {
      "epoch": 0.2394795690180931,
      "grad_norm": 0.09078256040811539,
      "learning_rate": 0.00017626704661103198,
      "loss": 0.9881,
      "step": 589
    },
    {
      "epoch": 0.23988615572270786,
      "grad_norm": 0.09075487405061722,
      "learning_rate": 0.00017622633828617954,
      "loss": 0.984,
      "step": 590
    },
    {
      "epoch": 0.24029274242732263,
      "grad_norm": 0.09048381447792053,
      "learning_rate": 0.00017618562996132712,
      "loss": 1.0235,
      "step": 591
    },
    {
      "epoch": 0.2406993291319374,
      "grad_norm": 0.09820905327796936,
      "learning_rate": 0.00017614492163647467,
      "loss": 0.9763,
      "step": 592
    },
    {
      "epoch": 0.24110591583655214,
      "grad_norm": 0.0961097925901413,
      "learning_rate": 0.00017610421331162222,
      "loss": 1.1035,
      "step": 593
    },
    {
      "epoch": 0.2415125025411669,
      "grad_norm": 0.0877358540892601,
      "learning_rate": 0.0001760635049867698,
      "loss": 0.8962,
      "step": 594
    },
    {
      "epoch": 0.24191908924578168,
      "grad_norm": 0.09730017930269241,
      "learning_rate": 0.00017602279666191736,
      "loss": 1.1232,
      "step": 595
    },
    {
      "epoch": 0.24232567595039642,
      "grad_norm": 0.09486240148544312,
      "learning_rate": 0.00017598208833706494,
      "loss": 1.0566,
      "step": 596
    },
    {
      "epoch": 0.2427322626550112,
      "grad_norm": 0.09367606788873672,
      "learning_rate": 0.0001759413800122125,
      "loss": 0.9934,
      "step": 597
    },
    {
      "epoch": 0.24313884935962593,
      "grad_norm": 0.09046703577041626,
      "learning_rate": 0.00017590067168736008,
      "loss": 0.9137,
      "step": 598
    },
    {
      "epoch": 0.2435454360642407,
      "grad_norm": 0.09512536972761154,
      "learning_rate": 0.00017585996336250766,
      "loss": 0.9733,
      "step": 599
    },
    {
      "epoch": 0.24395202276885547,
      "grad_norm": 0.08619649708271027,
      "learning_rate": 0.0001758192550376552,
      "loss": 0.8777,
      "step": 600
    },
    {
      "epoch": 0.2443586094734702,
      "grad_norm": 0.09386080503463745,
      "learning_rate": 0.0001757785467128028,
      "loss": 1.0171,
      "step": 601
    },
    {
      "epoch": 0.24476519617808498,
      "grad_norm": 0.098211869597435,
      "learning_rate": 0.00017573783838795035,
      "loss": 1.0283,
      "step": 602
    },
    {
      "epoch": 0.24517178288269972,
      "grad_norm": 0.08785215020179749,
      "learning_rate": 0.00017569713006309793,
      "loss": 0.9256,
      "step": 603
    },
    {
      "epoch": 0.2455783695873145,
      "grad_norm": 0.09419196099042892,
      "learning_rate": 0.00017565642173824548,
      "loss": 1.0727,
      "step": 604
    },
    {
      "epoch": 0.24598495629192926,
      "grad_norm": 0.08359744399785995,
      "learning_rate": 0.00017561571341339304,
      "loss": 0.8128,
      "step": 605
    },
    {
      "epoch": 0.246391542996544,
      "grad_norm": 0.09521903842687607,
      "learning_rate": 0.00017557500508854062,
      "loss": 1.0039,
      "step": 606
    },
    {
      "epoch": 0.24679812970115877,
      "grad_norm": 0.09249220043420792,
      "learning_rate": 0.00017553429676368817,
      "loss": 0.9793,
      "step": 607
    },
    {
      "epoch": 0.24720471640577354,
      "grad_norm": 0.09042320400476456,
      "learning_rate": 0.00017549358843883575,
      "loss": 0.9552,
      "step": 608
    },
    {
      "epoch": 0.2476113031103883,
      "grad_norm": 0.09921760857105255,
      "learning_rate": 0.0001754528801139833,
      "loss": 1.0438,
      "step": 609
    },
    {
      "epoch": 0.24801788981500306,
      "grad_norm": 0.09381897002458572,
      "learning_rate": 0.0001754121717891309,
      "loss": 0.9345,
      "step": 610
    },
    {
      "epoch": 0.2484244765196178,
      "grad_norm": 0.10531708598136902,
      "learning_rate": 0.00017537146346427847,
      "loss": 1.0504,
      "step": 611
    },
    {
      "epoch": 0.24883106322423257,
      "grad_norm": 0.08689824491739273,
      "learning_rate": 0.00017533075513942602,
      "loss": 0.9314,
      "step": 612
    },
    {
      "epoch": 0.24923764992884734,
      "grad_norm": 0.0852714404463768,
      "learning_rate": 0.0001752900468145736,
      "loss": 0.9555,
      "step": 613
    },
    {
      "epoch": 0.24964423663346208,
      "grad_norm": 0.09126532077789307,
      "learning_rate": 0.00017524933848972116,
      "loss": 1.0025,
      "step": 614
    },
    {
      "epoch": 0.2500508233380768,
      "grad_norm": 0.09271440654993057,
      "learning_rate": 0.0001752086301648687,
      "loss": 0.9753,
      "step": 615
    },
    {
      "epoch": 0.2504574100426916,
      "grad_norm": 0.09532623738050461,
      "learning_rate": 0.0001751679218400163,
      "loss": 1.0158,
      "step": 616
    },
    {
      "epoch": 0.25086399674730636,
      "grad_norm": 0.09074733406305313,
      "learning_rate": 0.00017512721351516385,
      "loss": 0.9335,
      "step": 617
    },
    {
      "epoch": 0.2512705834519211,
      "grad_norm": 0.08851765096187592,
      "learning_rate": 0.00017508650519031143,
      "loss": 0.9356,
      "step": 618
    },
    {
      "epoch": 0.2516771701565359,
      "grad_norm": 0.09362735599279404,
      "learning_rate": 0.00017504579686545898,
      "loss": 0.9671,
      "step": 619
    },
    {
      "epoch": 0.25208375686115064,
      "grad_norm": 0.09651681780815125,
      "learning_rate": 0.00017500508854060656,
      "loss": 1.0424,
      "step": 620
    },
    {
      "epoch": 0.2524903435657654,
      "grad_norm": 0.0909847766160965,
      "learning_rate": 0.00017496438021575412,
      "loss": 0.9606,
      "step": 621
    },
    {
      "epoch": 0.2528969302703802,
      "grad_norm": 0.09061215072870255,
      "learning_rate": 0.0001749236718909017,
      "loss": 1.0444,
      "step": 622
    },
    {
      "epoch": 0.2533035169749949,
      "grad_norm": 0.10378221422433853,
      "learning_rate": 0.00017488296356604928,
      "loss": 0.991,
      "step": 623
    },
    {
      "epoch": 0.25371010367960967,
      "grad_norm": 0.09829584509134293,
      "learning_rate": 0.00017484225524119683,
      "loss": 0.8911,
      "step": 624
    },
    {
      "epoch": 0.2541166903842244,
      "grad_norm": 0.0915360227227211,
      "learning_rate": 0.00017480154691634442,
      "loss": 0.9616,
      "step": 625
    },
    {
      "epoch": 0.2545232770888392,
      "grad_norm": 0.09093396365642548,
      "learning_rate": 0.00017476083859149197,
      "loss": 0.886,
      "step": 626
    },
    {
      "epoch": 0.25492986379345395,
      "grad_norm": 0.09621252864599228,
      "learning_rate": 0.00017472013026663952,
      "loss": 1.0167,
      "step": 627
    },
    {
      "epoch": 0.2553364504980687,
      "grad_norm": 0.10381032526493073,
      "learning_rate": 0.0001746794219417871,
      "loss": 1.0008,
      "step": 628
    },
    {
      "epoch": 0.2557430372026835,
      "grad_norm": 0.10300707817077637,
      "learning_rate": 0.00017463871361693466,
      "loss": 1.0176,
      "step": 629
    },
    {
      "epoch": 0.25614962390729823,
      "grad_norm": 0.09527314454317093,
      "learning_rate": 0.00017459800529208224,
      "loss": 0.9698,
      "step": 630
    },
    {
      "epoch": 0.25655621061191297,
      "grad_norm": 0.09126346558332443,
      "learning_rate": 0.0001745572969672298,
      "loss": 0.9575,
      "step": 631
    },
    {
      "epoch": 0.25696279731652777,
      "grad_norm": 0.10338298231363297,
      "learning_rate": 0.00017451658864237738,
      "loss": 0.9666,
      "step": 632
    },
    {
      "epoch": 0.2573693840211425,
      "grad_norm": 0.10729333758354187,
      "learning_rate": 0.00017447588031752493,
      "loss": 1.0857,
      "step": 633
    },
    {
      "epoch": 0.25777597072575725,
      "grad_norm": 0.09935057163238525,
      "learning_rate": 0.0001744351719926725,
      "loss": 1.0387,
      "step": 634
    },
    {
      "epoch": 0.25818255743037205,
      "grad_norm": 0.10013054311275482,
      "learning_rate": 0.0001743944636678201,
      "loss": 1.0938,
      "step": 635
    },
    {
      "epoch": 0.2585891441349868,
      "grad_norm": 0.1010405421257019,
      "learning_rate": 0.00017435375534296765,
      "loss": 0.9507,
      "step": 636
    },
    {
      "epoch": 0.25899573083960153,
      "grad_norm": 0.09375156462192535,
      "learning_rate": 0.00017431304701811523,
      "loss": 0.965,
      "step": 637
    },
    {
      "epoch": 0.2594023175442163,
      "grad_norm": 0.09231811761856079,
      "learning_rate": 0.00017427233869326278,
      "loss": 1.0549,
      "step": 638
    },
    {
      "epoch": 0.2598089042488311,
      "grad_norm": 0.09252883493900299,
      "learning_rate": 0.00017423163036841034,
      "loss": 0.9562,
      "step": 639
    },
    {
      "epoch": 0.2602154909534458,
      "grad_norm": 0.08758233487606049,
      "learning_rate": 0.00017419092204355792,
      "loss": 0.8267,
      "step": 640
    },
    {
      "epoch": 0.26062207765806056,
      "grad_norm": 0.11551918089389801,
      "learning_rate": 0.00017415021371870547,
      "loss": 1.1932,
      "step": 641
    },
    {
      "epoch": 0.26102866436267536,
      "grad_norm": 0.09874440729618073,
      "learning_rate": 0.00017410950539385305,
      "loss": 0.994,
      "step": 642
    },
    {
      "epoch": 0.2614352510672901,
      "grad_norm": 0.0956895500421524,
      "learning_rate": 0.0001740687970690006,
      "loss": 1.0509,
      "step": 643
    },
    {
      "epoch": 0.26184183777190484,
      "grad_norm": 0.09503257274627686,
      "learning_rate": 0.0001740280887441482,
      "loss": 1.0729,
      "step": 644
    },
    {
      "epoch": 0.26224842447651964,
      "grad_norm": 0.09010849893093109,
      "learning_rate": 0.00017398738041929577,
      "loss": 1.0029,
      "step": 645
    },
    {
      "epoch": 0.2626550111811344,
      "grad_norm": 0.1030508279800415,
      "learning_rate": 0.00017394667209444332,
      "loss": 1.0345,
      "step": 646
    },
    {
      "epoch": 0.2630615978857491,
      "grad_norm": 0.09376908838748932,
      "learning_rate": 0.0001739059637695909,
      "loss": 0.9222,
      "step": 647
    },
    {
      "epoch": 0.2634681845903639,
      "grad_norm": 0.09775326400995255,
      "learning_rate": 0.00017386525544473846,
      "loss": 1.0516,
      "step": 648
    },
    {
      "epoch": 0.26387477129497866,
      "grad_norm": 0.08575090020895004,
      "learning_rate": 0.00017382454711988604,
      "loss": 0.9617,
      "step": 649
    },
    {
      "epoch": 0.2642813579995934,
      "grad_norm": 0.10131283849477768,
      "learning_rate": 0.0001737838387950336,
      "loss": 0.9831,
      "step": 650
    },
    {
      "epoch": 0.2646879447042082,
      "grad_norm": 0.10559401661157608,
      "learning_rate": 0.00017374313047018115,
      "loss": 1.1504,
      "step": 651
    },
    {
      "epoch": 0.26509453140882294,
      "grad_norm": 0.10393818467855453,
      "learning_rate": 0.00017370242214532873,
      "loss": 0.9879,
      "step": 652
    },
    {
      "epoch": 0.2655011181134377,
      "grad_norm": 0.0909782201051712,
      "learning_rate": 0.00017366171382047628,
      "loss": 0.9822,
      "step": 653
    },
    {
      "epoch": 0.2659077048180524,
      "grad_norm": 0.07904715836048126,
      "learning_rate": 0.00017362100549562386,
      "loss": 0.9603,
      "step": 654
    },
    {
      "epoch": 0.2663142915226672,
      "grad_norm": 0.1078203096985817,
      "learning_rate": 0.00017358029717077142,
      "loss": 0.9873,
      "step": 655
    },
    {
      "epoch": 0.26672087822728197,
      "grad_norm": 0.09209142625331879,
      "learning_rate": 0.000173539588845919,
      "loss": 0.9763,
      "step": 656
    },
    {
      "epoch": 0.2671274649318967,
      "grad_norm": 0.10040608793497086,
      "learning_rate": 0.00017349888052106658,
      "loss": 1.0362,
      "step": 657
    },
    {
      "epoch": 0.2675340516365115,
      "grad_norm": 0.1019914448261261,
      "learning_rate": 0.00017345817219621413,
      "loss": 0.9715,
      "step": 658
    },
    {
      "epoch": 0.26794063834112625,
      "grad_norm": 0.09650052338838577,
      "learning_rate": 0.00017341746387136172,
      "loss": 0.9212,
      "step": 659
    },
    {
      "epoch": 0.268347225045741,
      "grad_norm": 0.10352949053049088,
      "learning_rate": 0.00017337675554650927,
      "loss": 0.9535,
      "step": 660
    },
    {
      "epoch": 0.2687538117503558,
      "grad_norm": 0.0910978689789772,
      "learning_rate": 0.00017333604722165685,
      "loss": 0.92,
      "step": 661
    },
    {
      "epoch": 0.26916039845497053,
      "grad_norm": 0.0917704775929451,
      "learning_rate": 0.0001732953388968044,
      "loss": 0.9926,
      "step": 662
    },
    {
      "epoch": 0.26956698515958527,
      "grad_norm": 0.09258386492729187,
      "learning_rate": 0.00017325463057195196,
      "loss": 1.0495,
      "step": 663
    },
    {
      "epoch": 0.26997357186420007,
      "grad_norm": 0.08842117339372635,
      "learning_rate": 0.00017321392224709954,
      "loss": 0.8509,
      "step": 664
    },
    {
      "epoch": 0.2703801585688148,
      "grad_norm": 0.10442432016134262,
      "learning_rate": 0.0001731732139222471,
      "loss": 1.0109,
      "step": 665
    },
    {
      "epoch": 0.27078674527342955,
      "grad_norm": 0.0932939201593399,
      "learning_rate": 0.00017313250559739468,
      "loss": 0.979,
      "step": 666
    },
    {
      "epoch": 0.2711933319780443,
      "grad_norm": 0.09750665724277496,
      "learning_rate": 0.00017309179727254223,
      "loss": 1.0937,
      "step": 667
    },
    {
      "epoch": 0.2715999186826591,
      "grad_norm": 0.0916363000869751,
      "learning_rate": 0.0001730510889476898,
      "loss": 0.9497,
      "step": 668
    },
    {
      "epoch": 0.27200650538727383,
      "grad_norm": 0.08987271040678024,
      "learning_rate": 0.0001730103806228374,
      "loss": 0.9528,
      "step": 669
    },
    {
      "epoch": 0.2724130920918886,
      "grad_norm": 0.10026190429925919,
      "learning_rate": 0.00017296967229798495,
      "loss": 0.9323,
      "step": 670
    },
    {
      "epoch": 0.2728196787965034,
      "grad_norm": 0.09267815947532654,
      "learning_rate": 0.00017292896397313253,
      "loss": 0.9469,
      "step": 671
    },
    {
      "epoch": 0.2732262655011181,
      "grad_norm": 0.0934181734919548,
      "learning_rate": 0.00017288825564828008,
      "loss": 0.9217,
      "step": 672
    },
    {
      "epoch": 0.27363285220573286,
      "grad_norm": 0.09324284642934799,
      "learning_rate": 0.00017284754732342766,
      "loss": 0.941,
      "step": 673
    },
    {
      "epoch": 0.27403943891034765,
      "grad_norm": 0.09280567616224289,
      "learning_rate": 0.00017280683899857522,
      "loss": 0.9792,
      "step": 674
    },
    {
      "epoch": 0.2744460256149624,
      "grad_norm": 0.09610418230295181,
      "learning_rate": 0.00017276613067372277,
      "loss": 1.0206,
      "step": 675
    },
    {
      "epoch": 0.27485261231957714,
      "grad_norm": 0.09881840646266937,
      "learning_rate": 0.00017272542234887035,
      "loss": 1.027,
      "step": 676
    },
    {
      "epoch": 0.27525919902419194,
      "grad_norm": 0.08691172301769257,
      "learning_rate": 0.0001726847140240179,
      "loss": 0.9146,
      "step": 677
    },
    {
      "epoch": 0.2756657857288067,
      "grad_norm": 0.09320187568664551,
      "learning_rate": 0.0001726440056991655,
      "loss": 0.9386,
      "step": 678
    },
    {
      "epoch": 0.2760723724334214,
      "grad_norm": 0.10148660838603973,
      "learning_rate": 0.00017260329737431304,
      "loss": 1.0792,
      "step": 679
    },
    {
      "epoch": 0.27647895913803616,
      "grad_norm": 0.09676487743854523,
      "learning_rate": 0.00017256258904946062,
      "loss": 0.9966,
      "step": 680
    },
    {
      "epoch": 0.27688554584265096,
      "grad_norm": 0.10451057553291321,
      "learning_rate": 0.0001725218807246082,
      "loss": 1.0894,
      "step": 681
    },
    {
      "epoch": 0.2772921325472657,
      "grad_norm": 0.0993044376373291,
      "learning_rate": 0.00017248117239975576,
      "loss": 0.9681,
      "step": 682
    },
    {
      "epoch": 0.27769871925188044,
      "grad_norm": 0.10459738969802856,
      "learning_rate": 0.00017244046407490334,
      "loss": 1.0805,
      "step": 683
    },
    {
      "epoch": 0.27810530595649524,
      "grad_norm": 0.09404733031988144,
      "learning_rate": 0.0001723997557500509,
      "loss": 1.019,
      "step": 684
    },
    {
      "epoch": 0.27851189266111,
      "grad_norm": 0.09058643132448196,
      "learning_rate": 0.00017235904742519847,
      "loss": 1.0262,
      "step": 685
    },
    {
      "epoch": 0.2789184793657247,
      "grad_norm": 0.08885390311479568,
      "learning_rate": 0.00017231833910034603,
      "loss": 0.9735,
      "step": 686
    },
    {
      "epoch": 0.2793250660703395,
      "grad_norm": 0.09429844468832016,
      "learning_rate": 0.00017227763077549358,
      "loss": 0.9265,
      "step": 687
    },
    {
      "epoch": 0.27973165277495426,
      "grad_norm": 0.09403959661722183,
      "learning_rate": 0.00017223692245064116,
      "loss": 1.105,
      "step": 688
    },
    {
      "epoch": 0.280138239479569,
      "grad_norm": 0.0966695174574852,
      "learning_rate": 0.00017219621412578872,
      "loss": 1.1486,
      "step": 689
    },
    {
      "epoch": 0.2805448261841838,
      "grad_norm": 0.08807272464036942,
      "learning_rate": 0.0001721555058009363,
      "loss": 0.7858,
      "step": 690
    },
    {
      "epoch": 0.28095141288879855,
      "grad_norm": 0.09138672798871994,
      "learning_rate": 0.00017211479747608388,
      "loss": 1.0746,
      "step": 691
    },
    {
      "epoch": 0.2813579995934133,
      "grad_norm": 0.1015787124633789,
      "learning_rate": 0.00017207408915123143,
      "loss": 1.0996,
      "step": 692
    },
    {
      "epoch": 0.28176458629802803,
      "grad_norm": 0.1018923744559288,
      "learning_rate": 0.00017203338082637901,
      "loss": 0.9986,
      "step": 693
    },
    {
      "epoch": 0.2821711730026428,
      "grad_norm": 0.10809756815433502,
      "learning_rate": 0.00017199267250152657,
      "loss": 1.0796,
      "step": 694
    },
    {
      "epoch": 0.28257775970725757,
      "grad_norm": 0.08786237984895706,
      "learning_rate": 0.00017195196417667415,
      "loss": 0.9383,
      "step": 695
    },
    {
      "epoch": 0.2829843464118723,
      "grad_norm": 0.12186282873153687,
      "learning_rate": 0.0001719112558518217,
      "loss": 0.9624,
      "step": 696
    },
    {
      "epoch": 0.2833909331164871,
      "grad_norm": 0.09969845414161682,
      "learning_rate": 0.00017187054752696929,
      "loss": 0.9895,
      "step": 697
    },
    {
      "epoch": 0.28379751982110185,
      "grad_norm": 0.09089677035808563,
      "learning_rate": 0.00017182983920211684,
      "loss": 1.0026,
      "step": 698
    },
    {
      "epoch": 0.2842041065257166,
      "grad_norm": 0.10096369683742523,
      "learning_rate": 0.0001717891308772644,
      "loss": 1.0723,
      "step": 699
    },
    {
      "epoch": 0.2846106932303314,
      "grad_norm": 0.09782766550779343,
      "learning_rate": 0.00017174842255241197,
      "loss": 1.0235,
      "step": 700
    },
    {
      "epoch": 0.28501727993494613,
      "grad_norm": 0.10204874724149704,
      "learning_rate": 0.00017170771422755953,
      "loss": 1.0794,
      "step": 701
    },
    {
      "epoch": 0.2854238666395609,
      "grad_norm": 0.08970806747674942,
      "learning_rate": 0.0001716670059027071,
      "loss": 1.0124,
      "step": 702
    },
    {
      "epoch": 0.2858304533441757,
      "grad_norm": 0.10221361368894577,
      "learning_rate": 0.0001716262975778547,
      "loss": 1.0053,
      "step": 703
    },
    {
      "epoch": 0.2862370400487904,
      "grad_norm": 0.09354016184806824,
      "learning_rate": 0.00017158558925300225,
      "loss": 1.0673,
      "step": 704
    },
    {
      "epoch": 0.28664362675340516,
      "grad_norm": 0.10699865967035294,
      "learning_rate": 0.00017154488092814983,
      "loss": 1.1191,
      "step": 705
    },
    {
      "epoch": 0.2870502134580199,
      "grad_norm": 0.10488536953926086,
      "learning_rate": 0.00017150417260329738,
      "loss": 1.0351,
      "step": 706
    },
    {
      "epoch": 0.2874568001626347,
      "grad_norm": 0.09427926689386368,
      "learning_rate": 0.00017146346427844496,
      "loss": 0.9539,
      "step": 707
    },
    {
      "epoch": 0.28786338686724944,
      "grad_norm": 0.1017470583319664,
      "learning_rate": 0.00017142275595359252,
      "loss": 1.0699,
      "step": 708
    },
    {
      "epoch": 0.2882699735718642,
      "grad_norm": 0.09857328236103058,
      "learning_rate": 0.00017138204762874007,
      "loss": 1.1037,
      "step": 709
    },
    {
      "epoch": 0.288676560276479,
      "grad_norm": 0.08667341619729996,
      "learning_rate": 0.00017134133930388765,
      "loss": 0.9774,
      "step": 710
    },
    {
      "epoch": 0.2890831469810937,
      "grad_norm": 0.09951958060264587,
      "learning_rate": 0.0001713006309790352,
      "loss": 0.9343,
      "step": 711
    },
    {
      "epoch": 0.28948973368570846,
      "grad_norm": 0.11067653447389603,
      "learning_rate": 0.0001712599226541828,
      "loss": 1.004,
      "step": 712
    },
    {
      "epoch": 0.28989632039032326,
      "grad_norm": 0.09549311548471451,
      "learning_rate": 0.00017121921432933034,
      "loss": 0.9841,
      "step": 713
    },
    {
      "epoch": 0.290302907094938,
      "grad_norm": 0.08448553830385208,
      "learning_rate": 0.00017117850600447792,
      "loss": 0.8772,
      "step": 714
    },
    {
      "epoch": 0.29070949379955274,
      "grad_norm": 0.10418415814638138,
      "learning_rate": 0.0001711377976796255,
      "loss": 1.0146,
      "step": 715
    },
    {
      "epoch": 0.29111608050416754,
      "grad_norm": 0.0994696393609047,
      "learning_rate": 0.00017109708935477306,
      "loss": 1.0321,
      "step": 716
    },
    {
      "epoch": 0.2915226672087823,
      "grad_norm": 0.10770169645547867,
      "learning_rate": 0.00017105638102992064,
      "loss": 0.9757,
      "step": 717
    },
    {
      "epoch": 0.291929253913397,
      "grad_norm": 0.09281262755393982,
      "learning_rate": 0.0001710156727050682,
      "loss": 0.9315,
      "step": 718
    },
    {
      "epoch": 0.29233584061801177,
      "grad_norm": 0.09385684132575989,
      "learning_rate": 0.00017097496438021577,
      "loss": 0.984,
      "step": 719
    },
    {
      "epoch": 0.29274242732262656,
      "grad_norm": 0.09654410928487778,
      "learning_rate": 0.00017093425605536333,
      "loss": 1.0594,
      "step": 720
    },
    {
      "epoch": 0.2931490140272413,
      "grad_norm": 0.09549154341220856,
      "learning_rate": 0.00017089354773051088,
      "loss": 0.8973,
      "step": 721
    },
    {
      "epoch": 0.29355560073185605,
      "grad_norm": 0.10645559430122375,
      "learning_rate": 0.00017085283940565846,
      "loss": 1.0742,
      "step": 722
    },
    {
      "epoch": 0.29396218743647085,
      "grad_norm": 0.10242202132940292,
      "learning_rate": 0.00017081213108080602,
      "loss": 1.0859,
      "step": 723
    },
    {
      "epoch": 0.2943687741410856,
      "grad_norm": 0.09050785005092621,
      "learning_rate": 0.0001707714227559536,
      "loss": 0.9552,
      "step": 724
    },
    {
      "epoch": 0.29477536084570033,
      "grad_norm": 0.09605402499437332,
      "learning_rate": 0.00017073071443110115,
      "loss": 1.0419,
      "step": 725
    },
    {
      "epoch": 0.2951819475503151,
      "grad_norm": 0.10179407894611359,
      "learning_rate": 0.00017069000610624873,
      "loss": 1.018,
      "step": 726
    },
    {
      "epoch": 0.29558853425492987,
      "grad_norm": 0.10907282680273056,
      "learning_rate": 0.00017064929778139631,
      "loss": 0.9834,
      "step": 727
    },
    {
      "epoch": 0.2959951209595446,
      "grad_norm": 0.09972859919071198,
      "learning_rate": 0.00017060858945654387,
      "loss": 0.9138,
      "step": 728
    },
    {
      "epoch": 0.2964017076641594,
      "grad_norm": 0.10035282373428345,
      "learning_rate": 0.00017056788113169145,
      "loss": 1.0388,
      "step": 729
    },
    {
      "epoch": 0.29680829436877415,
      "grad_norm": 0.09865434467792511,
      "learning_rate": 0.000170527172806839,
      "loss": 1.088,
      "step": 730
    },
    {
      "epoch": 0.2972148810733889,
      "grad_norm": 0.09653953462839127,
      "learning_rate": 0.00017048646448198659,
      "loss": 1.0562,
      "step": 731
    },
    {
      "epoch": 0.29762146777800363,
      "grad_norm": 0.09930591285228729,
      "learning_rate": 0.00017044575615713414,
      "loss": 1.0312,
      "step": 732
    },
    {
      "epoch": 0.29802805448261843,
      "grad_norm": 0.10804631561040878,
      "learning_rate": 0.0001704050478322817,
      "loss": 0.9913,
      "step": 733
    },
    {
      "epoch": 0.2984346411872332,
      "grad_norm": 0.09772603958845139,
      "learning_rate": 0.00017036433950742927,
      "loss": 1.0463,
      "step": 734
    },
    {
      "epoch": 0.2988412278918479,
      "grad_norm": 0.09799183160066605,
      "learning_rate": 0.00017032363118257683,
      "loss": 1.0048,
      "step": 735
    },
    {
      "epoch": 0.2992478145964627,
      "grad_norm": 0.09203165769577026,
      "learning_rate": 0.0001702829228577244,
      "loss": 0.9581,
      "step": 736
    },
    {
      "epoch": 0.29965440130107746,
      "grad_norm": 0.1009124219417572,
      "learning_rate": 0.000170242214532872,
      "loss": 1.0613,
      "step": 737
    },
    {
      "epoch": 0.3000609880056922,
      "grad_norm": 0.10120426118373871,
      "learning_rate": 0.00017020150620801954,
      "loss": 0.9948,
      "step": 738
    },
    {
      "epoch": 0.300467574710307,
      "grad_norm": 0.09432508796453476,
      "learning_rate": 0.00017016079788316713,
      "loss": 1.0334,
      "step": 739
    },
    {
      "epoch": 0.30087416141492174,
      "grad_norm": 0.09501038491725922,
      "learning_rate": 0.00017012008955831468,
      "loss": 1.0526,
      "step": 740
    },
    {
      "epoch": 0.3012807481195365,
      "grad_norm": 0.08357376605272293,
      "learning_rate": 0.00017007938123346226,
      "loss": 0.9447,
      "step": 741
    },
    {
      "epoch": 0.3016873348241513,
      "grad_norm": 0.10098525881767273,
      "learning_rate": 0.00017003867290860982,
      "loss": 1.0812,
      "step": 742
    },
    {
      "epoch": 0.302093921528766,
      "grad_norm": 0.09913761168718338,
      "learning_rate": 0.0001699979645837574,
      "loss": 0.9696,
      "step": 743
    },
    {
      "epoch": 0.30250050823338076,
      "grad_norm": 0.10989236831665039,
      "learning_rate": 0.00016995725625890495,
      "loss": 1.1352,
      "step": 744
    },
    {
      "epoch": 0.3029070949379955,
      "grad_norm": 0.09131161123514175,
      "learning_rate": 0.0001699165479340525,
      "loss": 0.9211,
      "step": 745
    },
    {
      "epoch": 0.3033136816426103,
      "grad_norm": 0.09235724061727524,
      "learning_rate": 0.00016987583960920009,
      "loss": 0.9645,
      "step": 746
    },
    {
      "epoch": 0.30372026834722504,
      "grad_norm": 0.09928274899721146,
      "learning_rate": 0.00016983513128434764,
      "loss": 1.0359,
      "step": 747
    },
    {
      "epoch": 0.3041268550518398,
      "grad_norm": 0.094466932117939,
      "learning_rate": 0.00016979442295949522,
      "loss": 0.9406,
      "step": 748
    },
    {
      "epoch": 0.3045334417564546,
      "grad_norm": 0.0981815829873085,
      "learning_rate": 0.0001697537146346428,
      "loss": 1.0819,
      "step": 749
    },
    {
      "epoch": 0.3049400284610693,
      "grad_norm": 0.10226401686668396,
      "learning_rate": 0.00016971300630979036,
      "loss": 1.0499,
      "step": 750
    },
    {
      "epoch": 0.30534661516568407,
      "grad_norm": 0.09603258967399597,
      "learning_rate": 0.00016967229798493794,
      "loss": 1.046,
      "step": 751
    },
    {
      "epoch": 0.30575320187029886,
      "grad_norm": 0.08673054724931717,
      "learning_rate": 0.0001696315896600855,
      "loss": 0.9594,
      "step": 752
    },
    {
      "epoch": 0.3061597885749136,
      "grad_norm": 0.08766573667526245,
      "learning_rate": 0.00016959088133523307,
      "loss": 0.9368,
      "step": 753
    },
    {
      "epoch": 0.30656637527952835,
      "grad_norm": 0.09147453308105469,
      "learning_rate": 0.00016955017301038063,
      "loss": 1.0122,
      "step": 754
    },
    {
      "epoch": 0.30697296198414314,
      "grad_norm": 0.09681879729032516,
      "learning_rate": 0.0001695094646855282,
      "loss": 1.0743,
      "step": 755
    },
    {
      "epoch": 0.3073795486887579,
      "grad_norm": 0.09195173531770706,
      "learning_rate": 0.00016946875636067576,
      "loss": 0.9974,
      "step": 756
    },
    {
      "epoch": 0.30778613539337263,
      "grad_norm": 0.08841879665851593,
      "learning_rate": 0.00016942804803582332,
      "loss": 0.9233,
      "step": 757
    },
    {
      "epoch": 0.30819272209798737,
      "grad_norm": 0.09924349188804626,
      "learning_rate": 0.0001693873397109709,
      "loss": 1.0277,
      "step": 758
    },
    {
      "epoch": 0.30859930880260217,
      "grad_norm": 0.09324993193149567,
      "learning_rate": 0.00016934663138611845,
      "loss": 0.9836,
      "step": 759
    },
    {
      "epoch": 0.3090058955072169,
      "grad_norm": 0.0990774929523468,
      "learning_rate": 0.00016930592306126606,
      "loss": 1.1064,
      "step": 760
    },
    {
      "epoch": 0.30941248221183165,
      "grad_norm": 0.09344697743654251,
      "learning_rate": 0.00016926521473641361,
      "loss": 0.9465,
      "step": 761
    },
    {
      "epoch": 0.30981906891644645,
      "grad_norm": 0.10055997967720032,
      "learning_rate": 0.00016922450641156117,
      "loss": 1.0706,
      "step": 762
    },
    {
      "epoch": 0.3102256556210612,
      "grad_norm": 0.09746789187192917,
      "learning_rate": 0.00016918379808670875,
      "loss": 1.0355,
      "step": 763
    },
    {
      "epoch": 0.31063224232567593,
      "grad_norm": 0.08599979430437088,
      "learning_rate": 0.0001691430897618563,
      "loss": 0.9445,
      "step": 764
    },
    {
      "epoch": 0.31103882903029073,
      "grad_norm": 0.09424544125795364,
      "learning_rate": 0.00016910238143700388,
      "loss": 1.0393,
      "step": 765
    },
    {
      "epoch": 0.3114454157349055,
      "grad_norm": 0.09379003942012787,
      "learning_rate": 0.00016906167311215144,
      "loss": 0.9328,
      "step": 766
    },
    {
      "epoch": 0.3118520024395202,
      "grad_norm": 0.09361857920885086,
      "learning_rate": 0.00016902096478729902,
      "loss": 1.017,
      "step": 767
    },
    {
      "epoch": 0.312258589144135,
      "grad_norm": 0.09718876332044601,
      "learning_rate": 0.00016898025646244657,
      "loss": 1.057,
      "step": 768
    },
    {
      "epoch": 0.31266517584874975,
      "grad_norm": 0.10596045851707458,
      "learning_rate": 0.00016893954813759413,
      "loss": 1.1706,
      "step": 769
    },
    {
      "epoch": 0.3130717625533645,
      "grad_norm": 0.09151418507099152,
      "learning_rate": 0.0001688988398127417,
      "loss": 1.0758,
      "step": 770
    },
    {
      "epoch": 0.31347834925797924,
      "grad_norm": 0.11438169330358505,
      "learning_rate": 0.00016885813148788926,
      "loss": 0.9558,
      "step": 771
    },
    {
      "epoch": 0.31388493596259404,
      "grad_norm": 0.10611554235219955,
      "learning_rate": 0.00016881742316303687,
      "loss": 1.1583,
      "step": 772
    },
    {
      "epoch": 0.3142915226672088,
      "grad_norm": 0.10111712664365768,
      "learning_rate": 0.00016877671483818443,
      "loss": 1.0377,
      "step": 773
    },
    {
      "epoch": 0.3146981093718235,
      "grad_norm": 0.08957924693822861,
      "learning_rate": 0.00016873600651333198,
      "loss": 0.9819,
      "step": 774
    },
    {
      "epoch": 0.3151046960764383,
      "grad_norm": 0.09603768587112427,
      "learning_rate": 0.00016869529818847956,
      "loss": 0.9849,
      "step": 775
    },
    {
      "epoch": 0.31551128278105306,
      "grad_norm": 0.10142724215984344,
      "learning_rate": 0.00016865458986362712,
      "loss": 1.0325,
      "step": 776
    },
    {
      "epoch": 0.3159178694856678,
      "grad_norm": 0.09229385852813721,
      "learning_rate": 0.0001686138815387747,
      "loss": 1.0085,
      "step": 777
    },
    {
      "epoch": 0.3163244561902826,
      "grad_norm": 0.09404195845127106,
      "learning_rate": 0.00016857317321392225,
      "loss": 0.9239,
      "step": 778
    },
    {
      "epoch": 0.31673104289489734,
      "grad_norm": 0.0984378457069397,
      "learning_rate": 0.00016853246488906983,
      "loss": 0.9911,
      "step": 779
    },
    {
      "epoch": 0.3171376295995121,
      "grad_norm": 0.09309301525354385,
      "learning_rate": 0.00016849175656421739,
      "loss": 0.9738,
      "step": 780
    },
    {
      "epoch": 0.3175442163041269,
      "grad_norm": 0.09041745960712433,
      "learning_rate": 0.00016845104823936494,
      "loss": 0.9106,
      "step": 781
    },
    {
      "epoch": 0.3179508030087416,
      "grad_norm": 0.09435202926397324,
      "learning_rate": 0.00016841033991451252,
      "loss": 0.9261,
      "step": 782
    },
    {
      "epoch": 0.31835738971335636,
      "grad_norm": 0.10118155926465988,
      "learning_rate": 0.0001683696315896601,
      "loss": 0.9127,
      "step": 783
    },
    {
      "epoch": 0.3187639764179711,
      "grad_norm": 0.09075888991355896,
      "learning_rate": 0.00016832892326480768,
      "loss": 0.9201,
      "step": 784
    },
    {
      "epoch": 0.3191705631225859,
      "grad_norm": 0.11029943823814392,
      "learning_rate": 0.00016828821493995524,
      "loss": 1.1284,
      "step": 785
    },
    {
      "epoch": 0.31957714982720065,
      "grad_norm": 0.08948411792516708,
      "learning_rate": 0.0001682475066151028,
      "loss": 1.0084,
      "step": 786
    },
    {
      "epoch": 0.3199837365318154,
      "grad_norm": 0.09159238636493683,
      "learning_rate": 0.00016820679829025037,
      "loss": 0.9912,
      "step": 787
    },
    {
      "epoch": 0.3203903232364302,
      "grad_norm": 0.10452720522880554,
      "learning_rate": 0.00016816608996539793,
      "loss": 1.0907,
      "step": 788
    },
    {
      "epoch": 0.3207969099410449,
      "grad_norm": 0.10225928574800491,
      "learning_rate": 0.0001681253816405455,
      "loss": 1.06,
      "step": 789
    },
    {
      "epoch": 0.32120349664565967,
      "grad_norm": 0.09055547416210175,
      "learning_rate": 0.00016808467331569306,
      "loss": 0.9644,
      "step": 790
    },
    {
      "epoch": 0.32161008335027447,
      "grad_norm": 0.10098310559988022,
      "learning_rate": 0.00016804396499084064,
      "loss": 1.2431,
      "step": 791
    },
    {
      "epoch": 0.3220166700548892,
      "grad_norm": 0.09253955632448196,
      "learning_rate": 0.0001680032566659882,
      "loss": 0.8664,
      "step": 792
    },
    {
      "epoch": 0.32242325675950395,
      "grad_norm": 0.09952203184366226,
      "learning_rate": 0.00016796254834113575,
      "loss": 1.0275,
      "step": 793
    },
    {
      "epoch": 0.32282984346411875,
      "grad_norm": 1376.9244384765625,
      "learning_rate": 0.00016792184001628333,
      "loss": 0.971,
      "step": 794
    },
    {
      "epoch": 0.3232364301687335,
      "grad_norm": 0.0970926433801651,
      "learning_rate": 0.00016788113169143091,
      "loss": 1.0805,
      "step": 795
    },
    {
      "epoch": 0.32364301687334823,
      "grad_norm": 0.0982482060790062,
      "learning_rate": 0.00016784042336657847,
      "loss": 1.0573,
      "step": 796
    },
    {
      "epoch": 0.324049603577963,
      "grad_norm": 0.0871889740228653,
      "learning_rate": 0.00016779971504172605,
      "loss": 0.9305,
      "step": 797
    },
    {
      "epoch": 0.32445619028257777,
      "grad_norm": 0.10493195801973343,
      "learning_rate": 0.0001677590067168736,
      "loss": 1.052,
      "step": 798
    },
    {
      "epoch": 0.3248627769871925,
      "grad_norm": 0.10067540407180786,
      "learning_rate": 0.00016771829839202118,
      "loss": 1.0014,
      "step": 799
    },
    {
      "epoch": 0.32526936369180726,
      "grad_norm": 0.12116575986146927,
      "learning_rate": 0.00016767759006716874,
      "loss": 1.0831,
      "step": 800
    },
    {
      "epoch": 0.32567595039642205,
      "grad_norm": 0.10948335379362106,
      "learning_rate": 0.00016763688174231632,
      "loss": 0.9823,
      "step": 801
    },
    {
      "epoch": 0.3260825371010368,
      "grad_norm": 0.12705056369304657,
      "learning_rate": 0.00016759617341746387,
      "loss": 1.0244,
      "step": 802
    },
    {
      "epoch": 0.32648912380565154,
      "grad_norm": 0.10819011181592941,
      "learning_rate": 0.00016755546509261143,
      "loss": 1.057,
      "step": 803
    },
    {
      "epoch": 0.32689571051026634,
      "grad_norm": 0.0998898446559906,
      "learning_rate": 0.000167514756767759,
      "loss": 1.0062,
      "step": 804
    },
    {
      "epoch": 0.3273022972148811,
      "grad_norm": 0.10293715447187424,
      "learning_rate": 0.00016747404844290656,
      "loss": 0.9149,
      "step": 805
    },
    {
      "epoch": 0.3277088839194958,
      "grad_norm": 0.10077858716249466,
      "learning_rate": 0.00016743334011805417,
      "loss": 0.9758,
      "step": 806
    },
    {
      "epoch": 0.3281154706241106,
      "grad_norm": 0.10782469809055328,
      "learning_rate": 0.00016739263179320173,
      "loss": 1.1042,
      "step": 807
    },
    {
      "epoch": 0.32852205732872536,
      "grad_norm": 0.12658415734767914,
      "learning_rate": 0.00016735192346834928,
      "loss": 0.9899,
      "step": 808
    },
    {
      "epoch": 0.3289286440333401,
      "grad_norm": 0.10533516108989716,
      "learning_rate": 0.00016731121514349686,
      "loss": 1.0378,
      "step": 809
    },
    {
      "epoch": 0.32933523073795484,
      "grad_norm": 0.09154223650693893,
      "learning_rate": 0.00016727050681864441,
      "loss": 0.9687,
      "step": 810
    },
    {
      "epoch": 0.32974181744256964,
      "grad_norm": 0.10092673450708389,
      "learning_rate": 0.000167229798493792,
      "loss": 1.1347,
      "step": 811
    },
    {
      "epoch": 0.3301484041471844,
      "grad_norm": 0.11135310679674149,
      "learning_rate": 0.00016718909016893955,
      "loss": 1.1032,
      "step": 812
    },
    {
      "epoch": 0.3305549908517991,
      "grad_norm": 0.11181894689798355,
      "learning_rate": 0.00016714838184408713,
      "loss": 1.0279,
      "step": 813
    },
    {
      "epoch": 0.3309615775564139,
      "grad_norm": 0.09581311047077179,
      "learning_rate": 0.00016710767351923469,
      "loss": 0.9012,
      "step": 814
    },
    {
      "epoch": 0.33136816426102866,
      "grad_norm": 0.09267252683639526,
      "learning_rate": 0.00016706696519438224,
      "loss": 0.872,
      "step": 815
    },
    {
      "epoch": 0.3317747509656434,
      "grad_norm": 0.10392332822084427,
      "learning_rate": 0.00016702625686952982,
      "loss": 1.017,
      "step": 816
    },
    {
      "epoch": 0.3321813376702582,
      "grad_norm": 0.10805673897266388,
      "learning_rate": 0.00016698554854467737,
      "loss": 1.0013,
      "step": 817
    },
    {
      "epoch": 0.33258792437487295,
      "grad_norm": 0.09724876284599304,
      "learning_rate": 0.00016694484021982498,
      "loss": 0.9708,
      "step": 818
    },
    {
      "epoch": 0.3329945110794877,
      "grad_norm": 0.09445172548294067,
      "learning_rate": 0.00016690413189497254,
      "loss": 1.0422,
      "step": 819
    },
    {
      "epoch": 0.3334010977841025,
      "grad_norm": 0.09621834754943848,
      "learning_rate": 0.0001668634235701201,
      "loss": 1.0557,
      "step": 820
    },
    {
      "epoch": 0.3338076844887172,
      "grad_norm": 0.09800583869218826,
      "learning_rate": 0.00016682271524526767,
      "loss": 0.8907,
      "step": 821
    },
    {
      "epoch": 0.33421427119333197,
      "grad_norm": 0.0968775674700737,
      "learning_rate": 0.00016678200692041523,
      "loss": 0.9531,
      "step": 822
    },
    {
      "epoch": 0.3346208578979467,
      "grad_norm": 0.10581623017787933,
      "learning_rate": 0.0001667412985955628,
      "loss": 0.9462,
      "step": 823
    },
    {
      "epoch": 0.3350274446025615,
      "grad_norm": 0.10356664657592773,
      "learning_rate": 0.00016670059027071036,
      "loss": 1.0315,
      "step": 824
    },
    {
      "epoch": 0.33543403130717625,
      "grad_norm": 0.10202962160110474,
      "learning_rate": 0.00016665988194585794,
      "loss": 1.004,
      "step": 825
    },
    {
      "epoch": 0.335840618011791,
      "grad_norm": 0.09426257014274597,
      "learning_rate": 0.0001666191736210055,
      "loss": 0.958,
      "step": 826
    },
    {
      "epoch": 0.3362472047164058,
      "grad_norm": 0.09597484767436981,
      "learning_rate": 0.00016657846529615305,
      "loss": 0.9446,
      "step": 827
    },
    {
      "epoch": 0.33665379142102053,
      "grad_norm": 0.10052574425935745,
      "learning_rate": 0.00016653775697130063,
      "loss": 1.0467,
      "step": 828
    },
    {
      "epoch": 0.3370603781256353,
      "grad_norm": 0.09183933585882187,
      "learning_rate": 0.0001664970486464482,
      "loss": 0.9665,
      "step": 829
    },
    {
      "epoch": 0.33746696483025007,
      "grad_norm": 0.10629544407129288,
      "learning_rate": 0.0001664563403215958,
      "loss": 1.1186,
      "step": 830
    },
    {
      "epoch": 0.3378735515348648,
      "grad_norm": 0.09273683279752731,
      "learning_rate": 0.00016641563199674335,
      "loss": 1.0235,
      "step": 831
    },
    {
      "epoch": 0.33828013823947956,
      "grad_norm": 0.10386747121810913,
      "learning_rate": 0.0001663749236718909,
      "loss": 1.1214,
      "step": 832
    },
    {
      "epoch": 0.33868672494409435,
      "grad_norm": 0.0918426588177681,
      "learning_rate": 0.00016633421534703848,
      "loss": 0.9122,
      "step": 833
    },
    {
      "epoch": 0.3390933116487091,
      "grad_norm": 0.09304569661617279,
      "learning_rate": 0.00016629350702218604,
      "loss": 0.9478,
      "step": 834
    },
    {
      "epoch": 0.33949989835332384,
      "grad_norm": 0.09941792488098145,
      "learning_rate": 0.00016625279869733362,
      "loss": 0.9306,
      "step": 835
    },
    {
      "epoch": 0.3399064850579386,
      "grad_norm": 0.10528752952814102,
      "learning_rate": 0.00016621209037248117,
      "loss": 1.0313,
      "step": 836
    },
    {
      "epoch": 0.3403130717625534,
      "grad_norm": 0.09300185739994049,
      "learning_rate": 0.00016617138204762875,
      "loss": 0.9339,
      "step": 837
    },
    {
      "epoch": 0.3407196584671681,
      "grad_norm": 0.10146823525428772,
      "learning_rate": 0.0001661306737227763,
      "loss": 1.0543,
      "step": 838
    },
    {
      "epoch": 0.34112624517178286,
      "grad_norm": 0.10440155863761902,
      "learning_rate": 0.00016608996539792386,
      "loss": 1.1149,
      "step": 839
    },
    {
      "epoch": 0.34153283187639766,
      "grad_norm": 0.1079946756362915,
      "learning_rate": 0.00016604925707307144,
      "loss": 1.1279,
      "step": 840
    },
    {
      "epoch": 0.3419394185810124,
      "grad_norm": 0.08682949841022491,
      "learning_rate": 0.00016600854874821902,
      "loss": 0.878,
      "step": 841
    },
    {
      "epoch": 0.34234600528562714,
      "grad_norm": 0.10855992883443832,
      "learning_rate": 0.0001659678404233666,
      "loss": 0.9611,
      "step": 842
    },
    {
      "epoch": 0.34275259199024194,
      "grad_norm": 0.089480921626091,
      "learning_rate": 0.00016592713209851416,
      "loss": 0.9065,
      "step": 843
    },
    {
      "epoch": 0.3431591786948567,
      "grad_norm": 0.0934784933924675,
      "learning_rate": 0.00016588642377366171,
      "loss": 1.0118,
      "step": 844
    },
    {
      "epoch": 0.3435657653994714,
      "grad_norm": 0.10359178483486176,
      "learning_rate": 0.0001658457154488093,
      "loss": 1.0117,
      "step": 845
    },
    {
      "epoch": 0.3439723521040862,
      "grad_norm": 0.09534402191638947,
      "learning_rate": 0.00016580500712395685,
      "loss": 0.9719,
      "step": 846
    },
    {
      "epoch": 0.34437893880870096,
      "grad_norm": 0.1008550152182579,
      "learning_rate": 0.00016576429879910443,
      "loss": 1.1044,
      "step": 847
    },
    {
      "epoch": 0.3447855255133157,
      "grad_norm": 0.09598672389984131,
      "learning_rate": 0.00016572359047425198,
      "loss": 0.9764,
      "step": 848
    },
    {
      "epoch": 0.34519211221793045,
      "grad_norm": 0.09562084078788757,
      "learning_rate": 0.00016568288214939957,
      "loss": 0.9964,
      "step": 849
    },
    {
      "epoch": 0.34559869892254524,
      "grad_norm": 0.0925833135843277,
      "learning_rate": 0.00016564217382454712,
      "loss": 0.9717,
      "step": 850
    },
    {
      "epoch": 0.34600528562716,
      "grad_norm": 0.09141584485769272,
      "learning_rate": 0.00016560146549969467,
      "loss": 0.8164,
      "step": 851
    },
    {
      "epoch": 0.34641187233177473,
      "grad_norm": 0.09384193271398544,
      "learning_rate": 0.00016556075717484228,
      "loss": 1.0281,
      "step": 852
    },
    {
      "epoch": 0.3468184590363895,
      "grad_norm": 0.08962181210517883,
      "learning_rate": 0.00016552004884998984,
      "loss": 0.9792,
      "step": 853
    },
    {
      "epoch": 0.34722504574100427,
      "grad_norm": 0.1070941761136055,
      "learning_rate": 0.00016547934052513742,
      "loss": 0.9392,
      "step": 854
    },
    {
      "epoch": 0.347631632445619,
      "grad_norm": 0.0990995392203331,
      "learning_rate": 0.00016543863220028497,
      "loss": 1.015,
      "step": 855
    },
    {
      "epoch": 0.3480382191502338,
      "grad_norm": 0.10476068407297134,
      "learning_rate": 0.00016539792387543253,
      "loss": 0.974,
      "step": 856
    },
    {
      "epoch": 0.34844480585484855,
      "grad_norm": 0.09355341643095016,
      "learning_rate": 0.0001653572155505801,
      "loss": 0.9558,
      "step": 857
    },
    {
      "epoch": 0.3488513925594633,
      "grad_norm": 0.10174727439880371,
      "learning_rate": 0.00016531650722572766,
      "loss": 0.9864,
      "step": 858
    },
    {
      "epoch": 0.3492579792640781,
      "grad_norm": 0.09851422905921936,
      "learning_rate": 0.00016527579890087524,
      "loss": 0.8641,
      "step": 859
    },
    {
      "epoch": 0.34966456596869283,
      "grad_norm": 0.10708778351545334,
      "learning_rate": 0.0001652350905760228,
      "loss": 0.9883,
      "step": 860
    },
    {
      "epoch": 0.3500711526733076,
      "grad_norm": 0.0919436663389206,
      "learning_rate": 0.00016519438225117038,
      "loss": 1.0178,
      "step": 861
    },
    {
      "epoch": 0.3504777393779223,
      "grad_norm": 0.09071869403123856,
      "learning_rate": 0.00016515367392631793,
      "loss": 1.0118,
      "step": 862
    },
    {
      "epoch": 0.3508843260825371,
      "grad_norm": 0.10379180312156677,
      "learning_rate": 0.00016511296560146549,
      "loss": 1.0068,
      "step": 863
    },
    {
      "epoch": 0.35129091278715185,
      "grad_norm": 0.10693410784006119,
      "learning_rate": 0.0001650722572766131,
      "loss": 1.0659,
      "step": 864
    },
    {
      "epoch": 0.3516974994917666,
      "grad_norm": 0.08886521309614182,
      "learning_rate": 0.00016503154895176065,
      "loss": 0.9019,
      "step": 865
    },
    {
      "epoch": 0.3521040861963814,
      "grad_norm": 0.110934779047966,
      "learning_rate": 0.00016499084062690823,
      "loss": 1.0018,
      "step": 866
    },
    {
      "epoch": 0.35251067290099614,
      "grad_norm": 0.09179084002971649,
      "learning_rate": 0.00016495013230205578,
      "loss": 1.0337,
      "step": 867
    },
    {
      "epoch": 0.3529172596056109,
      "grad_norm": 0.10181482136249542,
      "learning_rate": 0.00016490942397720334,
      "loss": 1.0436,
      "step": 868
    },
    {
      "epoch": 0.3533238463102257,
      "grad_norm": 0.10369636118412018,
      "learning_rate": 0.00016486871565235092,
      "loss": 1.0509,
      "step": 869
    },
    {
      "epoch": 0.3537304330148404,
      "grad_norm": 0.09437630325555801,
      "learning_rate": 0.00016482800732749847,
      "loss": 0.9957,
      "step": 870
    },
    {
      "epoch": 0.35413701971945516,
      "grad_norm": 0.0950985848903656,
      "learning_rate": 0.00016478729900264605,
      "loss": 1.037,
      "step": 871
    },
    {
      "epoch": 0.35454360642406996,
      "grad_norm": 0.09876928478479385,
      "learning_rate": 0.0001647465906777936,
      "loss": 0.9807,
      "step": 872
    },
    {
      "epoch": 0.3549501931286847,
      "grad_norm": 0.1047278568148613,
      "learning_rate": 0.0001647058823529412,
      "loss": 1.1218,
      "step": 873
    },
    {
      "epoch": 0.35535677983329944,
      "grad_norm": 0.10372647643089294,
      "learning_rate": 0.00016466517402808874,
      "loss": 0.9792,
      "step": 874
    },
    {
      "epoch": 0.3557633665379142,
      "grad_norm": 0.09626027941703796,
      "learning_rate": 0.00016462446570323632,
      "loss": 1.0559,
      "step": 875
    },
    {
      "epoch": 0.356169953242529,
      "grad_norm": 0.09671976417303085,
      "learning_rate": 0.0001645837573783839,
      "loss": 0.9966,
      "step": 876
    },
    {
      "epoch": 0.3565765399471437,
      "grad_norm": 0.09746148437261581,
      "learning_rate": 0.00016454304905353146,
      "loss": 1.0355,
      "step": 877
    },
    {
      "epoch": 0.35698312665175846,
      "grad_norm": 0.10173183679580688,
      "learning_rate": 0.00016450234072867904,
      "loss": 1.0395,
      "step": 878
    },
    {
      "epoch": 0.35738971335637326,
      "grad_norm": 0.09872651845216751,
      "learning_rate": 0.0001644616324038266,
      "loss": 0.9183,
      "step": 879
    },
    {
      "epoch": 0.357796300060988,
      "grad_norm": 0.08974689990282059,
      "learning_rate": 0.00016442092407897415,
      "loss": 0.9767,
      "step": 880
    },
    {
      "epoch": 0.35820288676560275,
      "grad_norm": 0.10287570208311081,
      "learning_rate": 0.00016438021575412173,
      "loss": 0.9762,
      "step": 881
    },
    {
      "epoch": 0.35860947347021754,
      "grad_norm": 0.09830573201179504,
      "learning_rate": 0.00016433950742926928,
      "loss": 0.9674,
      "step": 882
    },
    {
      "epoch": 0.3590160601748323,
      "grad_norm": 0.0909447893500328,
      "learning_rate": 0.00016429879910441687,
      "loss": 0.9603,
      "step": 883
    },
    {
      "epoch": 0.359422646879447,
      "grad_norm": 0.09882505238056183,
      "learning_rate": 0.00016425809077956442,
      "loss": 0.994,
      "step": 884
    },
    {
      "epoch": 0.3598292335840618,
      "grad_norm": 0.09665820002555847,
      "learning_rate": 0.000164217382454712,
      "loss": 1.0242,
      "step": 885
    },
    {
      "epoch": 0.36023582028867657,
      "grad_norm": 0.09496365487575531,
      "learning_rate": 0.00016417667412985955,
      "loss": 0.9487,
      "step": 886
    },
    {
      "epoch": 0.3606424069932913,
      "grad_norm": 0.10158530622720718,
      "learning_rate": 0.00016413596580500714,
      "loss": 1.0484,
      "step": 887
    },
    {
      "epoch": 0.36104899369790605,
      "grad_norm": 0.09193973243236542,
      "learning_rate": 0.00016409525748015472,
      "loss": 0.9148,
      "step": 888
    },
    {
      "epoch": 0.36145558040252085,
      "grad_norm": 0.08965738117694855,
      "learning_rate": 0.00016405454915530227,
      "loss": 0.8086,
      "step": 889
    },
    {
      "epoch": 0.3618621671071356,
      "grad_norm": 0.09757012128829956,
      "learning_rate": 0.00016401384083044983,
      "loss": 1.0015,
      "step": 890
    },
    {
      "epoch": 0.36226875381175033,
      "grad_norm": 0.10004543513059616,
      "learning_rate": 0.0001639731325055974,
      "loss": 0.9308,
      "step": 891
    },
    {
      "epoch": 0.36267534051636513,
      "grad_norm": 0.09754510223865509,
      "learning_rate": 0.00016393242418074496,
      "loss": 1.1236,
      "step": 892
    },
    {
      "epoch": 0.36308192722097987,
      "grad_norm": 0.08879724144935608,
      "learning_rate": 0.00016389171585589254,
      "loss": 0.9708,
      "step": 893
    },
    {
      "epoch": 0.3634885139255946,
      "grad_norm": 0.09469077736139297,
      "learning_rate": 0.0001638510075310401,
      "loss": 0.9263,
      "step": 894
    },
    {
      "epoch": 0.3638951006302094,
      "grad_norm": 0.09939096122980118,
      "learning_rate": 0.00016381029920618768,
      "loss": 1.0967,
      "step": 895
    },
    {
      "epoch": 0.36430168733482415,
      "grad_norm": 0.10240636765956879,
      "learning_rate": 0.00016376959088133523,
      "loss": 0.992,
      "step": 896
    },
    {
      "epoch": 0.3647082740394389,
      "grad_norm": 0.09816282987594604,
      "learning_rate": 0.0001637288825564828,
      "loss": 1.051,
      "step": 897
    },
    {
      "epoch": 0.3651148607440537,
      "grad_norm": 0.10622686892747879,
      "learning_rate": 0.0001636881742316304,
      "loss": 1.0324,
      "step": 898
    },
    {
      "epoch": 0.36552144744866844,
      "grad_norm": 0.08964063972234726,
      "learning_rate": 0.00016364746590677795,
      "loss": 0.9818,
      "step": 899
    },
    {
      "epoch": 0.3659280341532832,
      "grad_norm": 0.09420112520456314,
      "learning_rate": 0.00016360675758192553,
      "loss": 1.0712,
      "step": 900
    },
    {
      "epoch": 0.3663346208578979,
      "grad_norm": 0.09154132008552551,
      "learning_rate": 0.00016356604925707308,
      "loss": 0.9368,
      "step": 901
    },
    {
      "epoch": 0.3667412075625127,
      "grad_norm": 0.09309022128582001,
      "learning_rate": 0.00016352534093222064,
      "loss": 0.9532,
      "step": 902
    },
    {
      "epoch": 0.36714779426712746,
      "grad_norm": 0.09177148342132568,
      "learning_rate": 0.00016348463260736822,
      "loss": 0.9278,
      "step": 903
    },
    {
      "epoch": 0.3675543809717422,
      "grad_norm": 0.09655489027500153,
      "learning_rate": 0.00016344392428251577,
      "loss": 0.989,
      "step": 904
    },
    {
      "epoch": 0.367960967676357,
      "grad_norm": 0.09367511421442032,
      "learning_rate": 0.00016340321595766335,
      "loss": 0.9545,
      "step": 905
    },
    {
      "epoch": 0.36836755438097174,
      "grad_norm": 0.09844557195901871,
      "learning_rate": 0.0001633625076328109,
      "loss": 0.9813,
      "step": 906
    },
    {
      "epoch": 0.3687741410855865,
      "grad_norm": 0.09753169119358063,
      "learning_rate": 0.0001633217993079585,
      "loss": 0.9828,
      "step": 907
    },
    {
      "epoch": 0.3691807277902013,
      "grad_norm": 0.09012485295534134,
      "learning_rate": 0.00016328109098310604,
      "loss": 0.9234,
      "step": 908
    },
    {
      "epoch": 0.369587314494816,
      "grad_norm": 0.09102308750152588,
      "learning_rate": 0.0001632403826582536,
      "loss": 0.9435,
      "step": 909
    },
    {
      "epoch": 0.36999390119943076,
      "grad_norm": 0.10014689713716507,
      "learning_rate": 0.0001631996743334012,
      "loss": 0.9706,
      "step": 910
    },
    {
      "epoch": 0.37040048790404556,
      "grad_norm": 0.09847233444452286,
      "learning_rate": 0.00016315896600854876,
      "loss": 1.0053,
      "step": 911
    },
    {
      "epoch": 0.3708070746086603,
      "grad_norm": 0.08919807523488998,
      "learning_rate": 0.00016311825768369634,
      "loss": 0.9453,
      "step": 912
    },
    {
      "epoch": 0.37121366131327505,
      "grad_norm": 0.09738612174987793,
      "learning_rate": 0.0001630775493588439,
      "loss": 0.9532,
      "step": 913
    },
    {
      "epoch": 0.37162024801788984,
      "grad_norm": 0.09383881837129593,
      "learning_rate": 0.00016303684103399145,
      "loss": 0.9785,
      "step": 914
    },
    {
      "epoch": 0.3720268347225046,
      "grad_norm": 0.09604702144861221,
      "learning_rate": 0.00016299613270913903,
      "loss": 0.9903,
      "step": 915
    },
    {
      "epoch": 0.3724334214271193,
      "grad_norm": 0.10132728517055511,
      "learning_rate": 0.00016295542438428658,
      "loss": 0.9108,
      "step": 916
    },
    {
      "epoch": 0.37284000813173407,
      "grad_norm": 0.09230207651853561,
      "learning_rate": 0.00016291471605943417,
      "loss": 0.9311,
      "step": 917
    },
    {
      "epoch": 0.37324659483634887,
      "grad_norm": 0.1002974808216095,
      "learning_rate": 0.00016287400773458172,
      "loss": 1.036,
      "step": 918
    },
    {
      "epoch": 0.3736531815409636,
      "grad_norm": 0.10646216571331024,
      "learning_rate": 0.0001628332994097293,
      "loss": 1.0322,
      "step": 919
    },
    {
      "epoch": 0.37405976824557835,
      "grad_norm": 0.10693056881427765,
      "learning_rate": 0.00016279259108487685,
      "loss": 0.9632,
      "step": 920
    },
    {
      "epoch": 0.37446635495019315,
      "grad_norm": 0.09401918947696686,
      "learning_rate": 0.00016275188276002444,
      "loss": 0.9099,
      "step": 921
    },
    {
      "epoch": 0.3748729416548079,
      "grad_norm": 0.09380720555782318,
      "learning_rate": 0.00016271117443517202,
      "loss": 0.9477,
      "step": 922
    },
    {
      "epoch": 0.37527952835942263,
      "grad_norm": 0.10077174752950668,
      "learning_rate": 0.00016267046611031957,
      "loss": 1.0681,
      "step": 923
    },
    {
      "epoch": 0.37568611506403743,
      "grad_norm": 0.09828921407461166,
      "learning_rate": 0.00016262975778546715,
      "loss": 1.0714,
      "step": 924
    },
    {
      "epoch": 0.37609270176865217,
      "grad_norm": 0.09168947488069534,
      "learning_rate": 0.0001625890494606147,
      "loss": 0.9704,
      "step": 925
    },
    {
      "epoch": 0.3764992884732669,
      "grad_norm": 0.11230597645044327,
      "learning_rate": 0.00016254834113576226,
      "loss": 1.035,
      "step": 926
    },
    {
      "epoch": 0.3769058751778817,
      "grad_norm": 0.09307698905467987,
      "learning_rate": 0.00016250763281090984,
      "loss": 1.0032,
      "step": 927
    },
    {
      "epoch": 0.37731246188249645,
      "grad_norm": 0.0920303463935852,
      "learning_rate": 0.0001624669244860574,
      "loss": 0.9544,
      "step": 928
    },
    {
      "epoch": 0.3777190485871112,
      "grad_norm": 0.09571109712123871,
      "learning_rate": 0.00016242621616120498,
      "loss": 1.0121,
      "step": 929
    },
    {
      "epoch": 0.37812563529172594,
      "grad_norm": 0.10284926742315292,
      "learning_rate": 0.00016238550783635253,
      "loss": 1.0774,
      "step": 930
    },
    {
      "epoch": 0.37853222199634073,
      "grad_norm": 0.10457168519496918,
      "learning_rate": 0.0001623447995115001,
      "loss": 1.0838,
      "step": 931
    },
    {
      "epoch": 0.3789388087009555,
      "grad_norm": 0.09635209292173386,
      "learning_rate": 0.00016230409118664767,
      "loss": 1.0938,
      "step": 932
    },
    {
      "epoch": 0.3793453954055702,
      "grad_norm": 0.09526656568050385,
      "learning_rate": 0.00016226338286179525,
      "loss": 0.8854,
      "step": 933
    },
    {
      "epoch": 0.379751982110185,
      "grad_norm": 0.09416744112968445,
      "learning_rate": 0.00016222267453694283,
      "loss": 0.9763,
      "step": 934
    },
    {
      "epoch": 0.38015856881479976,
      "grad_norm": 0.10477912425994873,
      "learning_rate": 0.00016218196621209038,
      "loss": 1.0202,
      "step": 935
    },
    {
      "epoch": 0.3805651555194145,
      "grad_norm": 0.1058596596121788,
      "learning_rate": 0.00016214125788723796,
      "loss": 1.094,
      "step": 936
    },
    {
      "epoch": 0.3809717422240293,
      "grad_norm": 0.09116796404123306,
      "learning_rate": 0.00016210054956238552,
      "loss": 0.8538,
      "step": 937
    },
    {
      "epoch": 0.38137832892864404,
      "grad_norm": 0.10126717388629913,
      "learning_rate": 0.00016205984123753307,
      "loss": 1.0668,
      "step": 938
    },
    {
      "epoch": 0.3817849156332588,
      "grad_norm": 0.09571392089128494,
      "learning_rate": 0.00016201913291268065,
      "loss": 1.0369,
      "step": 939
    },
    {
      "epoch": 0.3821915023378736,
      "grad_norm": 0.0956893190741539,
      "learning_rate": 0.0001619784245878282,
      "loss": 1.0634,
      "step": 940
    },
    {
      "epoch": 0.3825980890424883,
      "grad_norm": 0.09609857201576233,
      "learning_rate": 0.0001619377162629758,
      "loss": 1.0129,
      "step": 941
    },
    {
      "epoch": 0.38300467574710306,
      "grad_norm": 0.09440251439809799,
      "learning_rate": 0.00016189700793812334,
      "loss": 1.0692,
      "step": 942
    },
    {
      "epoch": 0.3834112624517178,
      "grad_norm": 0.09696158766746521,
      "learning_rate": 0.00016185629961327092,
      "loss": 1.0562,
      "step": 943
    },
    {
      "epoch": 0.3838178491563326,
      "grad_norm": 0.09598648548126221,
      "learning_rate": 0.0001618155912884185,
      "loss": 1.0046,
      "step": 944
    },
    {
      "epoch": 0.38422443586094734,
      "grad_norm": 0.09640836715698242,
      "learning_rate": 0.00016177488296356606,
      "loss": 0.9735,
      "step": 945
    },
    {
      "epoch": 0.3846310225655621,
      "grad_norm": 0.08648312091827393,
      "learning_rate": 0.00016173417463871364,
      "loss": 0.8721,
      "step": 946
    },
    {
      "epoch": 0.3850376092701769,
      "grad_norm": 0.09831465780735016,
      "learning_rate": 0.0001616934663138612,
      "loss": 0.9943,
      "step": 947
    },
    {
      "epoch": 0.3854441959747916,
      "grad_norm": 0.09906010329723358,
      "learning_rate": 0.00016165275798900878,
      "loss": 1.0565,
      "step": 948
    },
    {
      "epoch": 0.38585078267940637,
      "grad_norm": 0.08963965624570847,
      "learning_rate": 0.00016161204966415633,
      "loss": 0.9376,
      "step": 949
    },
    {
      "epoch": 0.38625736938402117,
      "grad_norm": 0.09176084399223328,
      "learning_rate": 0.00016157134133930388,
      "loss": 0.9542,
      "step": 950
    },
    {
      "epoch": 0.3866639560886359,
      "grad_norm": 0.09641337394714355,
      "learning_rate": 0.00016153063301445146,
      "loss": 1.0248,
      "step": 951
    },
    {
      "epoch": 0.38707054279325065,
      "grad_norm": 0.09608114510774612,
      "learning_rate": 0.00016148992468959902,
      "loss": 0.9336,
      "step": 952
    },
    {
      "epoch": 0.38747712949786545,
      "grad_norm": 0.1011141762137413,
      "learning_rate": 0.0001614492163647466,
      "loss": 0.9581,
      "step": 953
    },
    {
      "epoch": 0.3878837162024802,
      "grad_norm": 0.08915555477142334,
      "learning_rate": 0.00016140850803989415,
      "loss": 0.9766,
      "step": 954
    },
    {
      "epoch": 0.38829030290709493,
      "grad_norm": 0.09591024369001389,
      "learning_rate": 0.00016136779971504174,
      "loss": 0.9601,
      "step": 955
    },
    {
      "epoch": 0.3886968896117097,
      "grad_norm": 0.0986289530992508,
      "learning_rate": 0.00016132709139018932,
      "loss": 0.9403,
      "step": 956
    },
    {
      "epoch": 0.38910347631632447,
      "grad_norm": 0.11183958500623703,
      "learning_rate": 0.00016128638306533687,
      "loss": 1.1319,
      "step": 957
    },
    {
      "epoch": 0.3895100630209392,
      "grad_norm": 0.09588544070720673,
      "learning_rate": 0.00016124567474048445,
      "loss": 0.949,
      "step": 958
    },
    {
      "epoch": 0.38991664972555395,
      "grad_norm": 0.09099708497524261,
      "learning_rate": 0.000161204966415632,
      "loss": 0.8462,
      "step": 959
    },
    {
      "epoch": 0.39032323643016875,
      "grad_norm": 0.08816317468881607,
      "learning_rate": 0.0001611642580907796,
      "loss": 0.9555,
      "step": 960
    },
    {
      "epoch": 0.3907298231347835,
      "grad_norm": 0.10011658817529678,
      "learning_rate": 0.00016112354976592714,
      "loss": 0.984,
      "step": 961
    },
    {
      "epoch": 0.39113640983939824,
      "grad_norm": 0.10004715621471405,
      "learning_rate": 0.0001610828414410747,
      "loss": 1.0356,
      "step": 962
    },
    {
      "epoch": 0.39154299654401303,
      "grad_norm": 0.09157074242830276,
      "learning_rate": 0.00016104213311622228,
      "loss": 1.0009,
      "step": 963
    },
    {
      "epoch": 0.3919495832486278,
      "grad_norm": 0.09388852119445801,
      "learning_rate": 0.00016100142479136983,
      "loss": 0.9339,
      "step": 964
    },
    {
      "epoch": 0.3923561699532425,
      "grad_norm": 0.08737456053495407,
      "learning_rate": 0.0001609607164665174,
      "loss": 0.9075,
      "step": 965
    },
    {
      "epoch": 0.3927627566578573,
      "grad_norm": 0.10345963388681412,
      "learning_rate": 0.00016092000814166497,
      "loss": 0.9599,
      "step": 966
    },
    {
      "epoch": 0.39316934336247206,
      "grad_norm": 0.09817633777856827,
      "learning_rate": 0.00016087929981681255,
      "loss": 0.9688,
      "step": 967
    },
    {
      "epoch": 0.3935759300670868,
      "grad_norm": 0.09691375494003296,
      "learning_rate": 0.00016083859149196013,
      "loss": 0.9936,
      "step": 968
    },
    {
      "epoch": 0.39398251677170154,
      "grad_norm": 0.09408018738031387,
      "learning_rate": 0.00016079788316710768,
      "loss": 1.0414,
      "step": 969
    },
    {
      "epoch": 0.39438910347631634,
      "grad_norm": 0.0967404916882515,
      "learning_rate": 0.00016075717484225526,
      "loss": 1.0127,
      "step": 970
    },
    {
      "epoch": 0.3947956901809311,
      "grad_norm": 14.380797386169434,
      "learning_rate": 0.00016071646651740282,
      "loss": 1.0321,
      "step": 971
    },
    {
      "epoch": 0.3952022768855458,
      "grad_norm": 0.1003538966178894,
      "learning_rate": 0.0001606757581925504,
      "loss": 1.0326,
      "step": 972
    },
    {
      "epoch": 0.3956088635901606,
      "grad_norm": 0.11198288947343826,
      "learning_rate": 0.00016063504986769795,
      "loss": 1.0124,
      "step": 973
    },
    {
      "epoch": 0.39601545029477536,
      "grad_norm": 0.11186987906694412,
      "learning_rate": 0.0001605943415428455,
      "loss": 1.0373,
      "step": 974
    },
    {
      "epoch": 0.3964220369993901,
      "grad_norm": 0.10965568572282791,
      "learning_rate": 0.0001605536332179931,
      "loss": 1.004,
      "step": 975
    },
    {
      "epoch": 0.3968286237040049,
      "grad_norm": 0.09624014794826508,
      "learning_rate": 0.00016051292489314064,
      "loss": 0.9294,
      "step": 976
    },
    {
      "epoch": 0.39723521040861964,
      "grad_norm": 0.10577430576086044,
      "learning_rate": 0.00016047221656828822,
      "loss": 1.0446,
      "step": 977
    },
    {
      "epoch": 0.3976417971132344,
      "grad_norm": 0.10079281777143478,
      "learning_rate": 0.00016043150824343578,
      "loss": 1.0926,
      "step": 978
    },
    {
      "epoch": 0.3980483838178492,
      "grad_norm": 0.09973543137311935,
      "learning_rate": 0.00016039079991858336,
      "loss": 1.0352,
      "step": 979
    },
    {
      "epoch": 0.3984549705224639,
      "grad_norm": 0.10137680172920227,
      "learning_rate": 0.00016035009159373094,
      "loss": 0.9871,
      "step": 980
    },
    {
      "epoch": 0.39886155722707867,
      "grad_norm": 0.09879370778799057,
      "learning_rate": 0.0001603093832688785,
      "loss": 1.0077,
      "step": 981
    },
    {
      "epoch": 0.3992681439316934,
      "grad_norm": 0.09389031678438187,
      "learning_rate": 0.00016026867494402607,
      "loss": 0.8345,
      "step": 982
    },
    {
      "epoch": 0.3996747306363082,
      "grad_norm": 0.09968902170658112,
      "learning_rate": 0.00016022796661917363,
      "loss": 1.0025,
      "step": 983
    },
    {
      "epoch": 0.40008131734092295,
      "grad_norm": 0.09817297756671906,
      "learning_rate": 0.00016018725829432118,
      "loss": 1.0436,
      "step": 984
    },
    {
      "epoch": 0.4004879040455377,
      "grad_norm": 0.09468533098697662,
      "learning_rate": 0.00016014654996946876,
      "loss": 0.9757,
      "step": 985
    },
    {
      "epoch": 0.4008944907501525,
      "grad_norm": 0.10573722422122955,
      "learning_rate": 0.00016010584164461632,
      "loss": 0.9627,
      "step": 986
    },
    {
      "epoch": 0.40130107745476723,
      "grad_norm": 0.09328682720661163,
      "learning_rate": 0.0001600651333197639,
      "loss": 0.8463,
      "step": 987
    },
    {
      "epoch": 0.40170766415938197,
      "grad_norm": 0.10987431555986404,
      "learning_rate": 0.00016002442499491145,
      "loss": 1.0123,
      "step": 988
    },
    {
      "epoch": 0.40211425086399677,
      "grad_norm": 0.09426723420619965,
      "learning_rate": 0.00015998371667005903,
      "loss": 0.8706,
      "step": 989
    },
    {
      "epoch": 0.4025208375686115,
      "grad_norm": 0.10630480945110321,
      "learning_rate": 0.00015994300834520662,
      "loss": 0.9596,
      "step": 990
    },
    {
      "epoch": 0.40292742427322625,
      "grad_norm": 0.11168541014194489,
      "learning_rate": 0.00015990230002035417,
      "loss": 1.0848,
      "step": 991
    },
    {
      "epoch": 0.40333401097784105,
      "grad_norm": 0.09651850908994675,
      "learning_rate": 0.00015986159169550175,
      "loss": 0.9965,
      "step": 992
    },
    {
      "epoch": 0.4037405976824558,
      "grad_norm": 0.10979650169610977,
      "learning_rate": 0.0001598208833706493,
      "loss": 1.0658,
      "step": 993
    },
    {
      "epoch": 0.40414718438707053,
      "grad_norm": 0.10453470051288605,
      "learning_rate": 0.00015978017504579689,
      "loss": 0.8697,
      "step": 994
    },
    {
      "epoch": 0.4045537710916853,
      "grad_norm": 0.09393549710512161,
      "learning_rate": 0.00015973946672094444,
      "loss": 0.9818,
      "step": 995
    },
    {
      "epoch": 0.4049603577963001,
      "grad_norm": 0.10618766397237778,
      "learning_rate": 0.000159698758396092,
      "loss": 1.1107,
      "step": 996
    },
    {
      "epoch": 0.4053669445009148,
      "grad_norm": 0.09401122480630875,
      "learning_rate": 0.00015965805007123958,
      "loss": 0.9484,
      "step": 997
    },
    {
      "epoch": 0.40577353120552956,
      "grad_norm": 0.10404767096042633,
      "learning_rate": 0.00015961734174638713,
      "loss": 1.0457,
      "step": 998
    },
    {
      "epoch": 0.40618011791014436,
      "grad_norm": 0.10144046694040298,
      "learning_rate": 0.0001595766334215347,
      "loss": 1.0164,
      "step": 999
    },
    {
      "epoch": 0.4065867046147591,
      "grad_norm": 69.98524475097656,
      "learning_rate": 0.00015953592509668227,
      "loss": 1.0974,
      "step": 1000
    },
    {
      "epoch": 0.40699329131937384,
      "grad_norm": 0.09672264754772186,
      "learning_rate": 0.00015949521677182985,
      "loss": 0.9351,
      "step": 1001
    },
    {
      "epoch": 0.40739987802398864,
      "grad_norm": 0.09717651456594467,
      "learning_rate": 0.00015945450844697743,
      "loss": 0.992,
      "step": 1002
    },
    {
      "epoch": 0.4078064647286034,
      "grad_norm": 0.10012587159872055,
      "learning_rate": 0.00015941380012212498,
      "loss": 0.9564,
      "step": 1003
    },
    {
      "epoch": 0.4082130514332181,
      "grad_norm": 0.11782870441675186,
      "learning_rate": 0.00015937309179727256,
      "loss": 1.0572,
      "step": 1004
    },
    {
      "epoch": 0.4086196381378329,
      "grad_norm": 0.12483621388673782,
      "learning_rate": 0.00015933238347242012,
      "loss": 0.9195,
      "step": 1005
    },
    {
      "epoch": 0.40902622484244766,
      "grad_norm": 0.16169683635234833,
      "learning_rate": 0.0001592916751475677,
      "loss": 0.9553,
      "step": 1006
    },
    {
      "epoch": 0.4094328115470624,
      "grad_norm": 0.18174675107002258,
      "learning_rate": 0.00015925096682271525,
      "loss": 0.9884,
      "step": 1007
    },
    {
      "epoch": 0.40983939825167714,
      "grad_norm": 0.15436168015003204,
      "learning_rate": 0.0001592102584978628,
      "loss": 0.975,
      "step": 1008
    },
    {
      "epoch": 0.41024598495629194,
      "grad_norm": 0.37080836296081543,
      "learning_rate": 0.0001591695501730104,
      "loss": 0.9542,
      "step": 1009
    },
    {
      "epoch": 0.4106525716609067,
      "grad_norm": 0.10444851219654083,
      "learning_rate": 0.00015912884184815794,
      "loss": 0.8729,
      "step": 1010
    },
    {
      "epoch": 0.4110591583655214,
      "grad_norm": 0.09934143722057343,
      "learning_rate": 0.00015908813352330552,
      "loss": 1.0016,
      "step": 1011
    },
    {
      "epoch": 0.4114657450701362,
      "grad_norm": 0.10826974362134933,
      "learning_rate": 0.00015904742519845308,
      "loss": 1.0141,
      "step": 1012
    },
    {
      "epoch": 0.41187233177475097,
      "grad_norm": 0.0943305566906929,
      "learning_rate": 0.00015900671687360066,
      "loss": 0.9172,
      "step": 1013
    },
    {
      "epoch": 0.4122789184793657,
      "grad_norm": 0.0978141725063324,
      "learning_rate": 0.00015896600854874824,
      "loss": 1.0325,
      "step": 1014
    },
    {
      "epoch": 0.4126855051839805,
      "grad_norm": 0.10199011117219925,
      "learning_rate": 0.0001589253002238958,
      "loss": 1.1241,
      "step": 1015
    },
    {
      "epoch": 0.41309209188859525,
      "grad_norm": 0.09425395727157593,
      "learning_rate": 0.00015888459189904337,
      "loss": 0.9909,
      "step": 1016
    },
    {
      "epoch": 0.41349867859321,
      "grad_norm": 0.10020224750041962,
      "learning_rate": 0.00015884388357419093,
      "loss": 1.048,
      "step": 1017
    },
    {
      "epoch": 0.4139052652978248,
      "grad_norm": 0.09428106248378754,
      "learning_rate": 0.0001588031752493385,
      "loss": 1.0091,
      "step": 1018
    },
    {
      "epoch": 0.41431185200243953,
      "grad_norm": 0.10922541469335556,
      "learning_rate": 0.00015876246692448606,
      "loss": 1.0321,
      "step": 1019
    },
    {
      "epoch": 0.41471843870705427,
      "grad_norm": 0.10005990415811539,
      "learning_rate": 0.00015872175859963362,
      "loss": 0.9331,
      "step": 1020
    },
    {
      "epoch": 0.415125025411669,
      "grad_norm": 0.09880723059177399,
      "learning_rate": 0.0001586810502747812,
      "loss": 1.0831,
      "step": 1021
    },
    {
      "epoch": 0.4155316121162838,
      "grad_norm": 0.10210402309894562,
      "learning_rate": 0.00015864034194992875,
      "loss": 0.8376,
      "step": 1022
    },
    {
      "epoch": 0.41593819882089855,
      "grad_norm": 0.10243164747953415,
      "learning_rate": 0.00015859963362507633,
      "loss": 0.974,
      "step": 1023
    },
    {
      "epoch": 0.4163447855255133,
      "grad_norm": 0.0910453349351883,
      "learning_rate": 0.0001585589253002239,
      "loss": 0.9429,
      "step": 1024
    },
    {
      "epoch": 0.4167513722301281,
      "grad_norm": 0.10028322786092758,
      "learning_rate": 0.00015851821697537147,
      "loss": 0.9692,
      "step": 1025
    },
    {
      "epoch": 0.41715795893474283,
      "grad_norm": 0.10679830610752106,
      "learning_rate": 0.00015847750865051905,
      "loss": 1.0561,
      "step": 1026
    },
    {
      "epoch": 0.4175645456393576,
      "grad_norm": 0.10921266674995422,
      "learning_rate": 0.0001584368003256666,
      "loss": 1.0046,
      "step": 1027
    },
    {
      "epoch": 0.4179711323439724,
      "grad_norm": 0.09717408567667007,
      "learning_rate": 0.00015839609200081419,
      "loss": 0.9963,
      "step": 1028
    },
    {
      "epoch": 0.4183777190485871,
      "grad_norm": 0.10907028615474701,
      "learning_rate": 0.00015835538367596174,
      "loss": 1.1112,
      "step": 1029
    },
    {
      "epoch": 0.41878430575320186,
      "grad_norm": 0.0934014692902565,
      "learning_rate": 0.00015831467535110932,
      "loss": 0.9392,
      "step": 1030
    },
    {
      "epoch": 0.41919089245781666,
      "grad_norm": 0.10372751951217651,
      "learning_rate": 0.00015827396702625688,
      "loss": 0.9911,
      "step": 1031
    },
    {
      "epoch": 0.4195974791624314,
      "grad_norm": 0.0926424190402031,
      "learning_rate": 0.00015823325870140443,
      "loss": 0.9568,
      "step": 1032
    },
    {
      "epoch": 0.42000406586704614,
      "grad_norm": 0.09991902112960815,
      "learning_rate": 0.000158192550376552,
      "loss": 1.1551,
      "step": 1033
    },
    {
      "epoch": 0.4204106525716609,
      "grad_norm": 0.10407492518424988,
      "learning_rate": 0.00015815184205169956,
      "loss": 1.0001,
      "step": 1034
    },
    {
      "epoch": 0.4208172392762757,
      "grad_norm": 0.09984209388494492,
      "learning_rate": 0.00015811113372684715,
      "loss": 1.0661,
      "step": 1035
    },
    {
      "epoch": 0.4212238259808904,
      "grad_norm": 0.08815161138772964,
      "learning_rate": 0.00015807042540199473,
      "loss": 0.9132,
      "step": 1036
    },
    {
      "epoch": 0.42163041268550516,
      "grad_norm": 0.10167308151721954,
      "learning_rate": 0.00015802971707714228,
      "loss": 1.0113,
      "step": 1037
    },
    {
      "epoch": 0.42203699939011996,
      "grad_norm": 0.09093226492404938,
      "learning_rate": 0.00015798900875228986,
      "loss": 0.9016,
      "step": 1038
    },
    {
      "epoch": 0.4224435860947347,
      "grad_norm": 0.09932513535022736,
      "learning_rate": 0.00015794830042743742,
      "loss": 1.0756,
      "step": 1039
    },
    {
      "epoch": 0.42285017279934944,
      "grad_norm": 0.09752842038869858,
      "learning_rate": 0.000157907592102585,
      "loss": 1.0552,
      "step": 1040
    },
    {
      "epoch": 0.42325675950396424,
      "grad_norm": 0.09833484143018723,
      "learning_rate": 0.00015786688377773255,
      "loss": 1.0448,
      "step": 1041
    },
    {
      "epoch": 0.423663346208579,
      "grad_norm": 0.09440255910158157,
      "learning_rate": 0.00015782617545288013,
      "loss": 0.966,
      "step": 1042
    },
    {
      "epoch": 0.4240699329131937,
      "grad_norm": 0.09800337255001068,
      "learning_rate": 0.0001577854671280277,
      "loss": 0.9517,
      "step": 1043
    },
    {
      "epoch": 0.4244765196178085,
      "grad_norm": 0.100920170545578,
      "learning_rate": 0.00015774475880317524,
      "loss": 1.0075,
      "step": 1044
    },
    {
      "epoch": 0.42488310632242327,
      "grad_norm": 0.10229222476482391,
      "learning_rate": 0.00015770405047832282,
      "loss": 1.0644,
      "step": 1045
    },
    {
      "epoch": 0.425289693027038,
      "grad_norm": 0.09247329086065292,
      "learning_rate": 0.00015766334215347038,
      "loss": 0.9628,
      "step": 1046
    },
    {
      "epoch": 0.42569627973165275,
      "grad_norm": 0.08849867433309555,
      "learning_rate": 0.00015762263382861796,
      "loss": 0.9044,
      "step": 1047
    },
    {
      "epoch": 0.42610286643626755,
      "grad_norm": 0.10035345703363419,
      "learning_rate": 0.00015758192550376554,
      "loss": 1.0025,
      "step": 1048
    },
    {
      "epoch": 0.4265094531408823,
      "grad_norm": 0.10530912131071091,
      "learning_rate": 0.0001575412171789131,
      "loss": 1.1156,
      "step": 1049
    },
    {
      "epoch": 0.42691603984549703,
      "grad_norm": 0.0959988534450531,
      "learning_rate": 0.00015750050885406067,
      "loss": 0.9927,
      "step": 1050
    },
    {
      "epoch": 0.42732262655011183,
      "grad_norm": 0.09642820060253143,
      "learning_rate": 0.00015745980052920823,
      "loss": 1.0081,
      "step": 1051
    },
    {
      "epoch": 0.42772921325472657,
      "grad_norm": 0.09695859253406525,
      "learning_rate": 0.0001574190922043558,
      "loss": 1.0104,
      "step": 1052
    },
    {
      "epoch": 0.4281357999593413,
      "grad_norm": 0.09271597862243652,
      "learning_rate": 0.00015737838387950336,
      "loss": 0.9655,
      "step": 1053
    },
    {
      "epoch": 0.4285423866639561,
      "grad_norm": 0.11482039839029312,
      "learning_rate": 0.00015733767555465094,
      "loss": 1.1689,
      "step": 1054
    },
    {
      "epoch": 0.42894897336857085,
      "grad_norm": 0.12072457373142242,
      "learning_rate": 0.0001572969672297985,
      "loss": 1.1573,
      "step": 1055
    },
    {
      "epoch": 0.4293555600731856,
      "grad_norm": 0.10628031194210052,
      "learning_rate": 0.00015725625890494605,
      "loss": 1.0604,
      "step": 1056
    },
    {
      "epoch": 0.4297621467778004,
      "grad_norm": 0.09997066110372543,
      "learning_rate": 0.00015721555058009363,
      "loss": 1.0791,
      "step": 1057
    },
    {
      "epoch": 0.43016873348241513,
      "grad_norm": 0.09063227474689484,
      "learning_rate": 0.0001571748422552412,
      "loss": 0.8821,
      "step": 1058
    },
    {
      "epoch": 0.4305753201870299,
      "grad_norm": 0.09447956085205078,
      "learning_rate": 0.0001571341339303888,
      "loss": 0.9104,
      "step": 1059
    },
    {
      "epoch": 0.4309819068916446,
      "grad_norm": 0.09488890320062637,
      "learning_rate": 0.00015709342560553635,
      "loss": 0.9476,
      "step": 1060
    },
    {
      "epoch": 0.4313884935962594,
      "grad_norm": 0.09842818230390549,
      "learning_rate": 0.0001570527172806839,
      "loss": 1.0041,
      "step": 1061
    },
    {
      "epoch": 0.43179508030087416,
      "grad_norm": 0.10026121884584427,
      "learning_rate": 0.00015701200895583149,
      "loss": 0.9704,
      "step": 1062
    },
    {
      "epoch": 0.4322016670054889,
      "grad_norm": 0.10602670162916183,
      "learning_rate": 0.00015697130063097904,
      "loss": 0.9626,
      "step": 1063
    },
    {
      "epoch": 0.4326082537101037,
      "grad_norm": 0.09817321598529816,
      "learning_rate": 0.00015693059230612662,
      "loss": 1.018,
      "step": 1064
    },
    {
      "epoch": 0.43301484041471844,
      "grad_norm": 0.10956291854381561,
      "learning_rate": 0.00015688988398127417,
      "loss": 1.0773,
      "step": 1065
    },
    {
      "epoch": 0.4334214271193332,
      "grad_norm": 0.10461815446615219,
      "learning_rate": 0.00015684917565642176,
      "loss": 1.0276,
      "step": 1066
    },
    {
      "epoch": 0.433828013823948,
      "grad_norm": 0.1066046878695488,
      "learning_rate": 0.0001568084673315693,
      "loss": 1.0104,
      "step": 1067
    },
    {
      "epoch": 0.4342346005285627,
      "grad_norm": 0.09685570001602173,
      "learning_rate": 0.00015676775900671686,
      "loss": 0.9324,
      "step": 1068
    },
    {
      "epoch": 0.43464118723317746,
      "grad_norm": 0.10849763453006744,
      "learning_rate": 0.00015672705068186445,
      "loss": 1.1898,
      "step": 1069
    },
    {
      "epoch": 0.43504777393779226,
      "grad_norm": 0.09181284159421921,
      "learning_rate": 0.000156686342357012,
      "loss": 0.9655,
      "step": 1070
    },
    {
      "epoch": 0.435454360642407,
      "grad_norm": 0.09956375509500504,
      "learning_rate": 0.00015664563403215958,
      "loss": 0.9767,
      "step": 1071
    },
    {
      "epoch": 0.43586094734702174,
      "grad_norm": 0.09587504714727402,
      "learning_rate": 0.00015660492570730716,
      "loss": 1.0046,
      "step": 1072
    },
    {
      "epoch": 0.4362675340516365,
      "grad_norm": 0.09740083664655685,
      "learning_rate": 0.00015656421738245472,
      "loss": 1.0235,
      "step": 1073
    },
    {
      "epoch": 0.4366741207562513,
      "grad_norm": 0.1067059263586998,
      "learning_rate": 0.0001565235090576023,
      "loss": 1.0495,
      "step": 1074
    },
    {
      "epoch": 0.437080707460866,
      "grad_norm": 0.0951162800192833,
      "learning_rate": 0.00015648280073274985,
      "loss": 1.0601,
      "step": 1075
    },
    {
      "epoch": 0.43748729416548077,
      "grad_norm": 0.10814306139945984,
      "learning_rate": 0.00015644209240789743,
      "loss": 1.0642,
      "step": 1076
    },
    {
      "epoch": 0.43789388087009556,
      "grad_norm": 0.10104648023843765,
      "learning_rate": 0.000156401384083045,
      "loss": 1.0183,
      "step": 1077
    },
    {
      "epoch": 0.4383004675747103,
      "grad_norm": 0.10644647479057312,
      "learning_rate": 0.00015636067575819254,
      "loss": 0.9845,
      "step": 1078
    },
    {
      "epoch": 0.43870705427932505,
      "grad_norm": 0.10958357155323029,
      "learning_rate": 0.00015631996743334012,
      "loss": 1.0803,
      "step": 1079
    },
    {
      "epoch": 0.43911364098393985,
      "grad_norm": 0.09988164156675339,
      "learning_rate": 0.00015627925910848768,
      "loss": 0.9468,
      "step": 1080
    },
    {
      "epoch": 0.4395202276885546,
      "grad_norm": 0.09617158770561218,
      "learning_rate": 0.00015623855078363526,
      "loss": 0.9929,
      "step": 1081
    },
    {
      "epoch": 0.43992681439316933,
      "grad_norm": 0.09235814958810806,
      "learning_rate": 0.00015619784245878284,
      "loss": 0.9681,
      "step": 1082
    },
    {
      "epoch": 0.4403334010977841,
      "grad_norm": 0.0999334529042244,
      "learning_rate": 0.0001561571341339304,
      "loss": 1.0971,
      "step": 1083
    },
    {
      "epoch": 0.44073998780239887,
      "grad_norm": 0.09117653220891953,
      "learning_rate": 0.00015611642580907797,
      "loss": 0.9176,
      "step": 1084
    },
    {
      "epoch": 0.4411465745070136,
      "grad_norm": 0.11608845740556717,
      "learning_rate": 0.00015607571748422553,
      "loss": 1.016,
      "step": 1085
    },
    {
      "epoch": 0.44155316121162835,
      "grad_norm": NaN,
      "learning_rate": 0.0001560350091593731,
      "loss": 3.9953,
      "step": 1086
    },
    {
      "epoch": 0.44195974791624315,
      "grad_norm": 0.08910229802131653,
      "learning_rate": 0.00015599430083452066,
      "loss": 0.9387,
      "step": 1087
    },
    {
      "epoch": 0.4423663346208579,
      "grad_norm": NaN,
      "learning_rate": 0.00015595359250966824,
      "loss": 0.9939,
      "step": 1088
    },
    {
      "epoch": 0.44277292132547263,
      "grad_norm": 0.13621561229228973,
      "learning_rate": 0.0001559128841848158,
      "loss": 0.9376,
      "step": 1089
    },
    {
      "epoch": 0.44317950803008743,
      "grad_norm": 0.263536661863327,
      "learning_rate": 0.00015587217585996335,
      "loss": 1.0808,
      "step": 1090
    },
    {
      "epoch": 0.4435860947347022,
      "grad_norm": 0.21123525500297546,
      "learning_rate": 0.00015583146753511093,
      "loss": 0.9142,
      "step": 1091
    },
    {
      "epoch": 0.4439926814393169,
      "grad_norm": 0.16994574666023254,
      "learning_rate": 0.0001557907592102585,
      "loss": 1.0273,
      "step": 1092
    },
    {
      "epoch": 0.4443992681439317,
      "grad_norm": 0.1400166153907776,
      "learning_rate": 0.00015575005088540607,
      "loss": 0.9135,
      "step": 1093
    },
    {
      "epoch": 0.44480585484854646,
      "grad_norm": 0.13885940611362457,
      "learning_rate": 0.00015570934256055365,
      "loss": 1.158,
      "step": 1094
    },
    {
      "epoch": 0.4452124415531612,
      "grad_norm": 0.12671105563640594,
      "learning_rate": 0.0001556686342357012,
      "loss": 0.9401,
      "step": 1095
    },
    {
      "epoch": 0.445619028257776,
      "grad_norm": 0.11388255655765533,
      "learning_rate": 0.00015562792591084879,
      "loss": 0.9454,
      "step": 1096
    },
    {
      "epoch": 0.44602561496239074,
      "grad_norm": 0.13421480357646942,
      "learning_rate": 0.00015558721758599634,
      "loss": 1.0017,
      "step": 1097
    },
    {
      "epoch": 0.4464322016670055,
      "grad_norm": 0.11914326995611191,
      "learning_rate": 0.00015554650926114392,
      "loss": 1.0312,
      "step": 1098
    },
    {
      "epoch": 0.4468387883716202,
      "grad_norm": 0.11101624369621277,
      "learning_rate": 0.00015550580093629147,
      "loss": 1.0555,
      "step": 1099
    },
    {
      "epoch": 0.447245375076235,
      "grad_norm": 0.12158175557851791,
      "learning_rate": 0.00015546509261143906,
      "loss": 1.008,
      "step": 1100
    },
    {
      "epoch": 0.44765196178084976,
      "grad_norm": 0.09680108726024628,
      "learning_rate": 0.0001554243842865866,
      "loss": 0.8603,
      "step": 1101
    },
    {
      "epoch": 0.4480585484854645,
      "grad_norm": 0.12374867498874664,
      "learning_rate": 0.00015538367596173416,
      "loss": 0.9282,
      "step": 1102
    },
    {
      "epoch": 0.4484651351900793,
      "grad_norm": 0.12144714593887329,
      "learning_rate": 0.00015534296763688175,
      "loss": 1.1072,
      "step": 1103
    },
    {
      "epoch": 0.44887172189469404,
      "grad_norm": 0.13777373731136322,
      "learning_rate": 0.0001553022593120293,
      "loss": 1.0914,
      "step": 1104
    },
    {
      "epoch": 0.4492783085993088,
      "grad_norm": 0.14908930659294128,
      "learning_rate": 0.0001552615509871769,
      "loss": 1.0349,
      "step": 1105
    },
    {
      "epoch": 0.4496848953039236,
      "grad_norm": 0.09202148765325546,
      "learning_rate": 0.00015522084266232446,
      "loss": 1.0544,
      "step": 1106
    },
    {
      "epoch": 0.4500914820085383,
      "grad_norm": 0.14155222475528717,
      "learning_rate": 0.00015518013433747202,
      "loss": 1.145,
      "step": 1107
    },
    {
      "epoch": 0.45049806871315307,
      "grad_norm": 0.13090363144874573,
      "learning_rate": 0.0001551394260126196,
      "loss": 1.0815,
      "step": 1108
    },
    {
      "epoch": 0.45090465541776786,
      "grad_norm": 0.09763860702514648,
      "learning_rate": 0.00015509871768776715,
      "loss": 0.9798,
      "step": 1109
    },
    {
      "epoch": 0.4513112421223826,
      "grad_norm": 0.11425314843654633,
      "learning_rate": 0.00015505800936291473,
      "loss": 1.0609,
      "step": 1110
    },
    {
      "epoch": 0.45171782882699735,
      "grad_norm": 0.1132175624370575,
      "learning_rate": 0.00015501730103806229,
      "loss": 1.0784,
      "step": 1111
    },
    {
      "epoch": 0.4521244155316121,
      "grad_norm": 0.09365850687026978,
      "learning_rate": 0.00015497659271320987,
      "loss": 0.971,
      "step": 1112
    },
    {
      "epoch": 0.4525310022362269,
      "grad_norm": 0.10959959030151367,
      "learning_rate": 0.00015493588438835742,
      "loss": 1.0991,
      "step": 1113
    },
    {
      "epoch": 0.45293758894084163,
      "grad_norm": 0.1113215982913971,
      "learning_rate": 0.00015489517606350498,
      "loss": 0.9664,
      "step": 1114
    },
    {
      "epoch": 0.45334417564545637,
      "grad_norm": 0.09337687492370605,
      "learning_rate": 0.00015485446773865256,
      "loss": 0.9801,
      "step": 1115
    },
    {
      "epoch": 0.45375076235007117,
      "grad_norm": 0.09887603670358658,
      "learning_rate": 0.0001548137594138001,
      "loss": 0.9329,
      "step": 1116
    },
    {
      "epoch": 0.4541573490546859,
      "grad_norm": 0.09895873069763184,
      "learning_rate": 0.00015477305108894772,
      "loss": 0.9742,
      "step": 1117
    },
    {
      "epoch": 0.45456393575930065,
      "grad_norm": 0.10547256469726562,
      "learning_rate": 0.00015473234276409527,
      "loss": 1.0917,
      "step": 1118
    },
    {
      "epoch": 0.45497052246391545,
      "grad_norm": 0.10243359208106995,
      "learning_rate": 0.00015469163443924283,
      "loss": 1.153,
      "step": 1119
    },
    {
      "epoch": 0.4553771091685302,
      "grad_norm": 0.10679526627063751,
      "learning_rate": 0.0001546509261143904,
      "loss": 1.0256,
      "step": 1120
    },
    {
      "epoch": 0.45578369587314493,
      "grad_norm": 0.10002291947603226,
      "learning_rate": 0.00015461021778953796,
      "loss": 1.0984,
      "step": 1121
    },
    {
      "epoch": 0.45619028257775973,
      "grad_norm": 0.0953390821814537,
      "learning_rate": 0.00015456950946468554,
      "loss": 1.015,
      "step": 1122
    },
    {
      "epoch": 0.4565968692823745,
      "grad_norm": 0.09738897532224655,
      "learning_rate": 0.0001545288011398331,
      "loss": 1.0193,
      "step": 1123
    },
    {
      "epoch": 0.4570034559869892,
      "grad_norm": 0.09633835405111313,
      "learning_rate": 0.00015448809281498068,
      "loss": 1.0595,
      "step": 1124
    },
    {
      "epoch": 0.45741004269160396,
      "grad_norm": 0.09380267560482025,
      "learning_rate": 0.00015444738449012823,
      "loss": 1.0411,
      "step": 1125
    },
    {
      "epoch": 0.45781662939621875,
      "grad_norm": 0.09572221338748932,
      "learning_rate": 0.0001544066761652758,
      "loss": 1.0509,
      "step": 1126
    },
    {
      "epoch": 0.4582232161008335,
      "grad_norm": 0.09846567362546921,
      "learning_rate": 0.00015436596784042337,
      "loss": 1.0026,
      "step": 1127
    },
    {
      "epoch": 0.45862980280544824,
      "grad_norm": 0.10050946474075317,
      "learning_rate": 0.00015432525951557095,
      "loss": 0.9278,
      "step": 1128
    },
    {
      "epoch": 0.45903638951006304,
      "grad_norm": 0.09319213777780533,
      "learning_rate": 0.00015428455119071853,
      "loss": 1.0591,
      "step": 1129
    },
    {
      "epoch": 0.4594429762146778,
      "grad_norm": 0.10778182744979858,
      "learning_rate": 0.00015424384286586608,
      "loss": 1.1913,
      "step": 1130
    },
    {
      "epoch": 0.4598495629192925,
      "grad_norm": 0.09819093346595764,
      "learning_rate": 0.00015420313454101364,
      "loss": 1.0254,
      "step": 1131
    },
    {
      "epoch": 0.4602561496239073,
      "grad_norm": 0.09300455451011658,
      "learning_rate": 0.00015416242621616122,
      "loss": 0.9092,
      "step": 1132
    },
    {
      "epoch": 0.46066273632852206,
      "grad_norm": 0.09690682590007782,
      "learning_rate": 0.00015412171789130877,
      "loss": 1.0309,
      "step": 1133
    },
    {
      "epoch": 0.4610693230331368,
      "grad_norm": 0.10080096125602722,
      "learning_rate": 0.00015408100956645636,
      "loss": 1.059,
      "step": 1134
    },
    {
      "epoch": 0.4614759097377516,
      "grad_norm": 0.10120131820440292,
      "learning_rate": 0.0001540403012416039,
      "loss": 1.0201,
      "step": 1135
    },
    {
      "epoch": 0.46188249644236634,
      "grad_norm": 0.09029684960842133,
      "learning_rate": 0.0001539995929167515,
      "loss": 0.9981,
      "step": 1136
    },
    {
      "epoch": 0.4622890831469811,
      "grad_norm": 0.10337984561920166,
      "learning_rate": 0.00015395888459189904,
      "loss": 1.0746,
      "step": 1137
    },
    {
      "epoch": 0.4626956698515958,
      "grad_norm": 0.10107820481061935,
      "learning_rate": 0.0001539181762670466,
      "loss": 1.0901,
      "step": 1138
    },
    {
      "epoch": 0.4631022565562106,
      "grad_norm": 0.09064685553312302,
      "learning_rate": 0.00015387746794219418,
      "loss": 0.9654,
      "step": 1139
    },
    {
      "epoch": 0.46350884326082537,
      "grad_norm": 0.08879990130662918,
      "learning_rate": 0.00015383675961734176,
      "loss": 0.9099,
      "step": 1140
    },
    {
      "epoch": 0.4639154299654401,
      "grad_norm": 0.09138944000005722,
      "learning_rate": 0.00015379605129248934,
      "loss": 1.029,
      "step": 1141
    },
    {
      "epoch": 0.4643220166700549,
      "grad_norm": 0.08852239698171616,
      "learning_rate": 0.0001537553429676369,
      "loss": 0.8866,
      "step": 1142
    },
    {
      "epoch": 0.46472860337466965,
      "grad_norm": 0.1031791940331459,
      "learning_rate": 0.00015371463464278445,
      "loss": 1.0403,
      "step": 1143
    },
    {
      "epoch": 0.4651351900792844,
      "grad_norm": 0.10525615513324738,
      "learning_rate": 0.00015367392631793203,
      "loss": 1.0979,
      "step": 1144
    },
    {
      "epoch": 0.4655417767838992,
      "grad_norm": 0.08951327204704285,
      "learning_rate": 0.00015363321799307959,
      "loss": 1.1415,
      "step": 1145
    },
    {
      "epoch": 0.46594836348851393,
      "grad_norm": 0.08904453366994858,
      "learning_rate": 0.00015359250966822717,
      "loss": 0.9916,
      "step": 1146
    },
    {
      "epoch": 0.46635495019312867,
      "grad_norm": 0.09936080127954483,
      "learning_rate": 0.00015355180134337472,
      "loss": 0.8986,
      "step": 1147
    },
    {
      "epoch": 0.46676153689774347,
      "grad_norm": 0.09393945336341858,
      "learning_rate": 0.0001535110930185223,
      "loss": 0.9999,
      "step": 1148
    },
    {
      "epoch": 0.4671681236023582,
      "grad_norm": 0.09378618746995926,
      "learning_rate": 0.00015347038469366986,
      "loss": 1.047,
      "step": 1149
    },
    {
      "epoch": 0.46757471030697295,
      "grad_norm": 0.08764394372701645,
      "learning_rate": 0.0001534296763688174,
      "loss": 1.0553,
      "step": 1150
    },
    {
      "epoch": 0.4679812970115877,
      "grad_norm": 0.09421446919441223,
      "learning_rate": 0.00015338896804396502,
      "loss": 0.9849,
      "step": 1151
    },
    {
      "epoch": 0.4683878837162025,
      "grad_norm": 0.08507819473743439,
      "learning_rate": 0.00015334825971911257,
      "loss": 0.9776,
      "step": 1152
    },
    {
      "epoch": 0.46879447042081723,
      "grad_norm": 0.08929714560508728,
      "learning_rate": 0.00015330755139426015,
      "loss": 0.9386,
      "step": 1153
    },
    {
      "epoch": 0.469201057125432,
      "grad_norm": 0.08826079219579697,
      "learning_rate": 0.0001532668430694077,
      "loss": 0.9566,
      "step": 1154
    },
    {
      "epoch": 0.4696076438300468,
      "grad_norm": 0.09339980781078339,
      "learning_rate": 0.00015322613474455526,
      "loss": 1.0428,
      "step": 1155
    },
    {
      "epoch": 0.4700142305346615,
      "grad_norm": 0.09100881218910217,
      "learning_rate": 0.00015318542641970284,
      "loss": 0.998,
      "step": 1156
    },
    {
      "epoch": 0.47042081723927626,
      "grad_norm": 0.10815288126468658,
      "learning_rate": 0.0001531447180948504,
      "loss": 0.9677,
      "step": 1157
    },
    {
      "epoch": 0.47082740394389105,
      "grad_norm": 0.10011841356754303,
      "learning_rate": 0.00015310400976999798,
      "loss": 1.0712,
      "step": 1158
    },
    {
      "epoch": 0.4712339906485058,
      "grad_norm": 0.09442432969808578,
      "learning_rate": 0.00015306330144514553,
      "loss": 1.0916,
      "step": 1159
    },
    {
      "epoch": 0.47164057735312054,
      "grad_norm": 0.09668919444084167,
      "learning_rate": 0.00015302259312029311,
      "loss": 1.0755,
      "step": 1160
    },
    {
      "epoch": 0.47204716405773534,
      "grad_norm": 0.09985285252332687,
      "learning_rate": 0.00015298188479544067,
      "loss": 1.0688,
      "step": 1161
    },
    {
      "epoch": 0.4724537507623501,
      "grad_norm": 0.10555320233106613,
      "learning_rate": 0.00015294117647058822,
      "loss": 1.0152,
      "step": 1162
    },
    {
      "epoch": 0.4728603374669648,
      "grad_norm": 0.0884140282869339,
      "learning_rate": 0.00015290046814573583,
      "loss": 0.9648,
      "step": 1163
    },
    {
      "epoch": 0.47326692417157956,
      "grad_norm": 0.07746291160583496,
      "learning_rate": 0.00015285975982088338,
      "loss": 0.8335,
      "step": 1164
    },
    {
      "epoch": 0.47367351087619436,
      "grad_norm": 0.09735523909330368,
      "learning_rate": 0.00015281905149603094,
      "loss": 1.0043,
      "step": 1165
    },
    {
      "epoch": 0.4740800975808091,
      "grad_norm": 0.0871511772274971,
      "learning_rate": 0.00015277834317117852,
      "loss": 0.9071,
      "step": 1166
    },
    {
      "epoch": 0.47448668428542384,
      "grad_norm": 0.08971349149942398,
      "learning_rate": 0.00015273763484632607,
      "loss": 1.0586,
      "step": 1167
    },
    {
      "epoch": 0.47489327099003864,
      "grad_norm": 0.0872373878955841,
      "learning_rate": 0.00015269692652147365,
      "loss": 1.0302,
      "step": 1168
    },
    {
      "epoch": 0.4752998576946534,
      "grad_norm": 0.07631363719701767,
      "learning_rate": 0.0001526562181966212,
      "loss": 0.8899,
      "step": 1169
    },
    {
      "epoch": 0.4757064443992681,
      "grad_norm": 0.0988103449344635,
      "learning_rate": 0.0001526155098717688,
      "loss": 1.1254,
      "step": 1170
    },
    {
      "epoch": 0.4761130311038829,
      "grad_norm": 0.097597636282444,
      "learning_rate": 0.00015257480154691634,
      "loss": 1.1146,
      "step": 1171
    },
    {
      "epoch": 0.47651961780849766,
      "grad_norm": 0.09990191459655762,
      "learning_rate": 0.00015253409322206393,
      "loss": 1.2176,
      "step": 1172
    },
    {
      "epoch": 0.4769262045131124,
      "grad_norm": 0.09328643232584,
      "learning_rate": 0.00015249338489721148,
      "loss": 1.0341,
      "step": 1173
    },
    {
      "epoch": 0.4773327912177272,
      "grad_norm": 0.10171747207641602,
      "learning_rate": 0.00015245267657235906,
      "loss": 1.0254,
      "step": 1174
    },
    {
      "epoch": 0.47773937792234195,
      "grad_norm": 0.10708395391702652,
      "learning_rate": 0.00015241196824750664,
      "loss": 1.0829,
      "step": 1175
    },
    {
      "epoch": 0.4781459646269567,
      "grad_norm": 0.08677671104669571,
      "learning_rate": 0.0001523712599226542,
      "loss": 1.0284,
      "step": 1176
    },
    {
      "epoch": 0.4785525513315715,
      "grad_norm": 0.09038002789020538,
      "learning_rate": 0.00015233055159780175,
      "loss": 0.9153,
      "step": 1177
    },
    {
      "epoch": 0.4789591380361862,
      "grad_norm": 0.11192218214273453,
      "learning_rate": 0.00015228984327294933,
      "loss": 1.0457,
      "step": 1178
    },
    {
      "epoch": 0.47936572474080097,
      "grad_norm": 0.09288083016872406,
      "learning_rate": 0.00015224913494809689,
      "loss": 1.0015,
      "step": 1179
    },
    {
      "epoch": 0.4797723114454157,
      "grad_norm": 0.09631673991680145,
      "learning_rate": 0.00015220842662324447,
      "loss": 1.0815,
      "step": 1180
    },
    {
      "epoch": 0.4801788981500305,
      "grad_norm": 0.10445179790258408,
      "learning_rate": 0.00015216771829839202,
      "loss": 1.0739,
      "step": 1181
    },
    {
      "epoch": 0.48058548485464525,
      "grad_norm": 0.09268762916326523,
      "learning_rate": 0.0001521270099735396,
      "loss": 0.8934,
      "step": 1182
    },
    {
      "epoch": 0.48099207155926,
      "grad_norm": 0.08889751881361008,
      "learning_rate": 0.00015208630164868716,
      "loss": 0.9938,
      "step": 1183
    },
    {
      "epoch": 0.4813986582638748,
      "grad_norm": 45.80461883544922,
      "learning_rate": 0.0001520455933238347,
      "loss": 1.1104,
      "step": 1184
    },
    {
      "epoch": 0.48180524496848953,
      "grad_norm": 0.10641971975564957,
      "learning_rate": 0.0001520048849989823,
      "loss": 0.94,
      "step": 1185
    },
    {
      "epoch": 0.4822118316731043,
      "grad_norm": 0.1041031926870346,
      "learning_rate": 0.00015196417667412987,
      "loss": 1.0479,
      "step": 1186
    },
    {
      "epoch": 0.48261841837771907,
      "grad_norm": 0.09576927870512009,
      "learning_rate": 0.00015192346834927745,
      "loss": 1.0385,
      "step": 1187
    },
    {
      "epoch": 0.4830250050823338,
      "grad_norm": 26.211715698242188,
      "learning_rate": 0.000151882760024425,
      "loss": 0.9019,
      "step": 1188
    },
    {
      "epoch": 0.48343159178694856,
      "grad_norm": 0.10039546340703964,
      "learning_rate": 0.00015184205169957256,
      "loss": 0.9887,
      "step": 1189
    },
    {
      "epoch": 0.48383817849156335,
      "grad_norm": 0.14768731594085693,
      "learning_rate": 0.00015180134337472014,
      "loss": 0.9373,
      "step": 1190
    },
    {
      "epoch": 0.4842447651961781,
      "grad_norm": 0.29760250449180603,
      "learning_rate": 0.0001517606350498677,
      "loss": 0.9899,
      "step": 1191
    },
    {
      "epoch": 0.48465135190079284,
      "grad_norm": 0.29652246832847595,
      "learning_rate": 0.00015171992672501528,
      "loss": 0.961,
      "step": 1192
    },
    {
      "epoch": 0.4850579386054076,
      "grad_norm": 0.7517414689064026,
      "learning_rate": 0.00015167921840016283,
      "loss": 1.0964,
      "step": 1193
    },
    {
      "epoch": 0.4854645253100224,
      "grad_norm": 0.14506421983242035,
      "learning_rate": 0.0001516385100753104,
      "loss": 1.1155,
      "step": 1194
    },
    {
      "epoch": 0.4858711120146371,
      "grad_norm": 0.11916639655828476,
      "learning_rate": 0.00015159780175045797,
      "loss": 0.9494,
      "step": 1195
    },
    {
      "epoch": 0.48627769871925186,
      "grad_norm": 0.10341714322566986,
      "learning_rate": 0.00015155709342560552,
      "loss": 0.9381,
      "step": 1196
    },
    {
      "epoch": 0.48668428542386666,
      "grad_norm": 0.10921141505241394,
      "learning_rate": 0.00015151638510075313,
      "loss": 1.0357,
      "step": 1197
    },
    {
      "epoch": 0.4870908721284814,
      "grad_norm": 0.12874668836593628,
      "learning_rate": 0.00015147567677590068,
      "loss": 0.9918,
      "step": 1198
    },
    {
      "epoch": 0.48749745883309614,
      "grad_norm": 0.10311154276132584,
      "learning_rate": 0.00015143496845104827,
      "loss": 1.0575,
      "step": 1199
    },
    {
      "epoch": 0.48790404553771094,
      "grad_norm": 0.09126869589090347,
      "learning_rate": 0.00015139426012619582,
      "loss": 0.9125,
      "step": 1200
    },
    {
      "epoch": 0.4883106322423257,
      "grad_norm": 0.11038295179605484,
      "learning_rate": 0.00015135355180134337,
      "loss": 1.0761,
      "step": 1201
    },
    {
      "epoch": 0.4887172189469404,
      "grad_norm": 0.10550364851951599,
      "learning_rate": 0.00015131284347649095,
      "loss": 1.0513,
      "step": 1202
    },
    {
      "epoch": 0.4891238056515552,
      "grad_norm": 0.08666063100099564,
      "learning_rate": 0.0001512721351516385,
      "loss": 0.8815,
      "step": 1203
    },
    {
      "epoch": 0.48953039235616996,
      "grad_norm": 0.09860862046480179,
      "learning_rate": 0.0001512314268267861,
      "loss": 1.0451,
      "step": 1204
    },
    {
      "epoch": 0.4899369790607847,
      "grad_norm": 0.10188648104667664,
      "learning_rate": 0.00015119071850193364,
      "loss": 0.9911,
      "step": 1205
    },
    {
      "epoch": 0.49034356576539945,
      "grad_norm": 0.09538048505783081,
      "learning_rate": 0.00015115001017708122,
      "loss": 0.8973,
      "step": 1206
    },
    {
      "epoch": 0.49075015247001424,
      "grad_norm": 0.10558182001113892,
      "learning_rate": 0.00015110930185222878,
      "loss": 1.0272,
      "step": 1207
    },
    {
      "epoch": 0.491156739174629,
      "grad_norm": 0.10072223097085953,
      "learning_rate": 0.00015106859352737633,
      "loss": 1.0966,
      "step": 1208
    },
    {
      "epoch": 0.49156332587924373,
      "grad_norm": 0.10667192190885544,
      "learning_rate": 0.00015102788520252394,
      "loss": 1.0805,
      "step": 1209
    },
    {
      "epoch": 0.4919699125838585,
      "grad_norm": 0.10285364836454391,
      "learning_rate": 0.0001509871768776715,
      "loss": 1.0553,
      "step": 1210
    },
    {
      "epoch": 0.49237649928847327,
      "grad_norm": 0.09896936267614365,
      "learning_rate": 0.00015094646855281908,
      "loss": 1.032,
      "step": 1211
    },
    {
      "epoch": 0.492783085993088,
      "grad_norm": 0.08868112415075302,
      "learning_rate": 0.00015090576022796663,
      "loss": 1.013,
      "step": 1212
    },
    {
      "epoch": 0.4931896726977028,
      "grad_norm": 0.10103127360343933,
      "learning_rate": 0.00015086505190311418,
      "loss": 1.0589,
      "step": 1213
    },
    {
      "epoch": 0.49359625940231755,
      "grad_norm": 0.11582531780004501,
      "learning_rate": 0.00015082434357826177,
      "loss": 1.0731,
      "step": 1214
    },
    {
      "epoch": 0.4940028461069323,
      "grad_norm": 0.0953935906291008,
      "learning_rate": 0.00015078363525340932,
      "loss": 0.9751,
      "step": 1215
    },
    {
      "epoch": 0.4944094328115471,
      "grad_norm": 0.10135170817375183,
      "learning_rate": 0.0001507429269285569,
      "loss": 1.0676,
      "step": 1216
    },
    {
      "epoch": 0.49481601951616183,
      "grad_norm": 0.10529596358537674,
      "learning_rate": 0.00015070221860370446,
      "loss": 1.0274,
      "step": 1217
    },
    {
      "epoch": 0.4952226062207766,
      "grad_norm": 0.11172258853912354,
      "learning_rate": 0.00015066151027885204,
      "loss": 1.1241,
      "step": 1218
    },
    {
      "epoch": 0.4956291929253913,
      "grad_norm": 0.10328125208616257,
      "learning_rate": 0.0001506208019539996,
      "loss": 1.1032,
      "step": 1219
    },
    {
      "epoch": 0.4960357796300061,
      "grad_norm": 0.09035445749759674,
      "learning_rate": 0.00015058009362914717,
      "loss": 1.0394,
      "step": 1220
    },
    {
      "epoch": 0.49644236633462085,
      "grad_norm": 0.0988045334815979,
      "learning_rate": 0.00015053938530429475,
      "loss": 1.0092,
      "step": 1221
    },
    {
      "epoch": 0.4968489530392356,
      "grad_norm": 0.12335261702537537,
      "learning_rate": 0.0001504986769794423,
      "loss": 1.0994,
      "step": 1222
    },
    {
      "epoch": 0.4972555397438504,
      "grad_norm": 0.09677151590585709,
      "learning_rate": 0.0001504579686545899,
      "loss": 0.9352,
      "step": 1223
    },
    {
      "epoch": 0.49766212644846514,
      "grad_norm": 0.0954160988330841,
      "learning_rate": 0.00015041726032973744,
      "loss": 1.0526,
      "step": 1224
    },
    {
      "epoch": 0.4980687131530799,
      "grad_norm": 0.09783489257097244,
      "learning_rate": 0.000150376552004885,
      "loss": 0.9689,
      "step": 1225
    },
    {
      "epoch": 0.4984752998576947,
      "grad_norm": 0.09221793711185455,
      "learning_rate": 0.00015033584368003258,
      "loss": 0.9458,
      "step": 1226
    },
    {
      "epoch": 0.4988818865623094,
      "grad_norm": 0.09968589246273041,
      "learning_rate": 0.00015029513535518013,
      "loss": 0.9938,
      "step": 1227
    },
    {
      "epoch": 0.49928847326692416,
      "grad_norm": 0.10488888621330261,
      "learning_rate": 0.0001502544270303277,
      "loss": 0.9525,
      "step": 1228
    },
    {
      "epoch": 0.49969505997153896,
      "grad_norm": 0.08479832857847214,
      "learning_rate": 0.00015021371870547527,
      "loss": 0.976,
      "step": 1229
    },
    {
      "epoch": 0.5001016466761536,
      "grad_norm": 0.0930403620004654,
      "learning_rate": 0.00015017301038062285,
      "loss": 1.0259,
      "step": 1230
    },
    {
      "epoch": 0.5005082333807684,
      "grad_norm": 0.09309448301792145,
      "learning_rate": 0.0001501323020557704,
      "loss": 0.9997,
      "step": 1231
    },
    {
      "epoch": 0.5009148200853832,
      "grad_norm": 0.09209504723548889,
      "learning_rate": 0.00015009159373091798,
      "loss": 0.9365,
      "step": 1232
    },
    {
      "epoch": 0.5013214067899979,
      "grad_norm": 0.09045909345149994,
      "learning_rate": 0.00015005088540606556,
      "loss": 0.9572,
      "step": 1233
    },
    {
      "epoch": 0.5017279934946127,
      "grad_norm": 0.0892348513007164,
      "learning_rate": 0.00015001017708121312,
      "loss": 0.9593,
      "step": 1234
    },
    {
      "epoch": 0.5021345801992275,
      "grad_norm": 0.08853106945753098,
      "learning_rate": 0.0001499694687563607,
      "loss": 0.9518,
      "step": 1235
    },
    {
      "epoch": 0.5025411669038422,
      "grad_norm": 0.0941222533583641,
      "learning_rate": 0.00014992876043150825,
      "loss": 0.9474,
      "step": 1236
    },
    {
      "epoch": 0.502947753608457,
      "grad_norm": 0.09374161809682846,
      "learning_rate": 0.0001498880521066558,
      "loss": 1.0018,
      "step": 1237
    },
    {
      "epoch": 0.5033543403130718,
      "grad_norm": 0.08115139603614807,
      "learning_rate": 0.0001498473437818034,
      "loss": 0.8797,
      "step": 1238
    },
    {
      "epoch": 0.5037609270176865,
      "grad_norm": 0.09270316362380981,
      "learning_rate": 0.00014980663545695094,
      "loss": 1.0203,
      "step": 1239
    },
    {
      "epoch": 0.5041675137223013,
      "grad_norm": 0.08950728923082352,
      "learning_rate": 0.00014976592713209852,
      "loss": 0.9909,
      "step": 1240
    },
    {
      "epoch": 0.5045741004269161,
      "grad_norm": 0.09764236211776733,
      "learning_rate": 0.00014972521880724608,
      "loss": 0.9851,
      "step": 1241
    },
    {
      "epoch": 0.5049806871315308,
      "grad_norm": 0.09275151789188385,
      "learning_rate": 0.00014968451048239366,
      "loss": 0.9452,
      "step": 1242
    },
    {
      "epoch": 0.5053872738361456,
      "grad_norm": 0.09436964988708496,
      "learning_rate": 0.00014964380215754124,
      "loss": 1.0731,
      "step": 1243
    },
    {
      "epoch": 0.5057938605407604,
      "grad_norm": 0.09008494764566422,
      "learning_rate": 0.0001496030938326888,
      "loss": 1.0238,
      "step": 1244
    },
    {
      "epoch": 0.506200447245375,
      "grad_norm": 0.08599425107240677,
      "learning_rate": 0.00014956238550783638,
      "loss": 0.9148,
      "step": 1245
    },
    {
      "epoch": 0.5066070339499898,
      "grad_norm": 0.09270120412111282,
      "learning_rate": 0.00014952167718298393,
      "loss": 0.9348,
      "step": 1246
    },
    {
      "epoch": 0.5070136206546046,
      "grad_norm": 0.09423110634088516,
      "learning_rate": 0.0001494809688581315,
      "loss": 1.0746,
      "step": 1247
    },
    {
      "epoch": 0.5074202073592193,
      "grad_norm": 0.08819740265607834,
      "learning_rate": 0.00014944026053327907,
      "loss": 1.0913,
      "step": 1248
    },
    {
      "epoch": 0.5078267940638341,
      "grad_norm": 0.08502914011478424,
      "learning_rate": 0.00014939955220842662,
      "loss": 1.0142,
      "step": 1249
    },
    {
      "epoch": 0.5082333807684488,
      "grad_norm": 0.09372544288635254,
      "learning_rate": 0.0001493588438835742,
      "loss": 0.9297,
      "step": 1250
    },
    {
      "epoch": 0.5086399674730636,
      "grad_norm": 0.09857220202684402,
      "learning_rate": 0.00014931813555872175,
      "loss": 1.0665,
      "step": 1251
    },
    {
      "epoch": 0.5090465541776784,
      "grad_norm": 0.09227776527404785,
      "learning_rate": 0.00014927742723386934,
      "loss": 0.9791,
      "step": 1252
    },
    {
      "epoch": 0.5094531408822931,
      "grad_norm": 0.09301433712244034,
      "learning_rate": 0.0001492367189090169,
      "loss": 0.8855,
      "step": 1253
    },
    {
      "epoch": 0.5098597275869079,
      "grad_norm": 0.09796632081270218,
      "learning_rate": 0.00014919601058416447,
      "loss": 1.0645,
      "step": 1254
    },
    {
      "epoch": 0.5102663142915227,
      "grad_norm": 0.09791705757379532,
      "learning_rate": 0.00014915530225931205,
      "loss": 0.9949,
      "step": 1255
    },
    {
      "epoch": 0.5106729009961374,
      "grad_norm": 0.09171664714813232,
      "learning_rate": 0.0001491145939344596,
      "loss": 0.8958,
      "step": 1256
    },
    {
      "epoch": 0.5110794877007522,
      "grad_norm": 0.10115580260753632,
      "learning_rate": 0.0001490738856096072,
      "loss": 1.0141,
      "step": 1257
    },
    {
      "epoch": 0.511486074405367,
      "grad_norm": 0.08854761719703674,
      "learning_rate": 0.00014903317728475474,
      "loss": 0.9733,
      "step": 1258
    },
    {
      "epoch": 0.5118926611099817,
      "grad_norm": 0.0944913849234581,
      "learning_rate": 0.0001489924689599023,
      "loss": 1.0187,
      "step": 1259
    },
    {
      "epoch": 0.5122992478145965,
      "grad_norm": 0.08820286393165588,
      "learning_rate": 0.00014895176063504988,
      "loss": 0.984,
      "step": 1260
    },
    {
      "epoch": 0.5127058345192113,
      "grad_norm": 0.0941242128610611,
      "learning_rate": 0.00014891105231019743,
      "loss": 1.0333,
      "step": 1261
    },
    {
      "epoch": 0.5131124212238259,
      "grad_norm": 0.09355438500642776,
      "learning_rate": 0.000148870343985345,
      "loss": 1.1186,
      "step": 1262
    },
    {
      "epoch": 0.5135190079284407,
      "grad_norm": 0.09487958997488022,
      "learning_rate": 0.00014882963566049257,
      "loss": 1.1297,
      "step": 1263
    },
    {
      "epoch": 0.5139255946330555,
      "grad_norm": 0.08488618582487106,
      "learning_rate": 0.00014878892733564015,
      "loss": 0.9725,
      "step": 1264
    },
    {
      "epoch": 0.5143321813376702,
      "grad_norm": 0.09238637238740921,
      "learning_rate": 0.0001487482190107877,
      "loss": 0.9798,
      "step": 1265
    },
    {
      "epoch": 0.514738768042285,
      "grad_norm": 0.09334023296833038,
      "learning_rate": 0.00014870751068593528,
      "loss": 1.0818,
      "step": 1266
    },
    {
      "epoch": 0.5151453547468998,
      "grad_norm": 0.09130462259054184,
      "learning_rate": 0.00014866680236108286,
      "loss": 0.9885,
      "step": 1267
    },
    {
      "epoch": 0.5155519414515145,
      "grad_norm": 0.08275487273931503,
      "learning_rate": 0.00014862609403623042,
      "loss": 0.8525,
      "step": 1268
    },
    {
      "epoch": 0.5159585281561293,
      "grad_norm": 0.09485149383544922,
      "learning_rate": 0.000148585385711378,
      "loss": 1.0424,
      "step": 1269
    },
    {
      "epoch": 0.5163651148607441,
      "grad_norm": 0.08834747970104218,
      "learning_rate": 0.00014854467738652555,
      "loss": 0.9235,
      "step": 1270
    },
    {
      "epoch": 0.5167717015653588,
      "grad_norm": 0.09200993925333023,
      "learning_rate": 0.0001485039690616731,
      "loss": 1.0669,
      "step": 1271
    },
    {
      "epoch": 0.5171782882699736,
      "grad_norm": 0.08159536123275757,
      "learning_rate": 0.0001484632607368207,
      "loss": 0.9067,
      "step": 1272
    },
    {
      "epoch": 0.5175848749745884,
      "grad_norm": 0.08643992245197296,
      "learning_rate": 0.00014842255241196824,
      "loss": 0.9632,
      "step": 1273
    },
    {
      "epoch": 0.5179914616792031,
      "grad_norm": 0.09672199934720993,
      "learning_rate": 0.00014838184408711582,
      "loss": 1.0263,
      "step": 1274
    },
    {
      "epoch": 0.5183980483838179,
      "grad_norm": 0.09713756293058395,
      "learning_rate": 0.00014834113576226338,
      "loss": 0.9166,
      "step": 1275
    },
    {
      "epoch": 0.5188046350884326,
      "grad_norm": 0.08467654883861542,
      "learning_rate": 0.00014830042743741096,
      "loss": 0.9201,
      "step": 1276
    },
    {
      "epoch": 0.5192112217930474,
      "grad_norm": 0.08024970442056656,
      "learning_rate": 0.0001482597191125585,
      "loss": 0.8556,
      "step": 1277
    },
    {
      "epoch": 0.5196178084976621,
      "grad_norm": 0.09249437600374222,
      "learning_rate": 0.0001482190107877061,
      "loss": 1.0381,
      "step": 1278
    },
    {
      "epoch": 0.5200243952022768,
      "grad_norm": 0.08076690137386322,
      "learning_rate": 0.00014817830246285368,
      "loss": 0.9216,
      "step": 1279
    },
    {
      "epoch": 0.5204309819068916,
      "grad_norm": 0.09259018301963806,
      "learning_rate": 0.00014813759413800123,
      "loss": 1.0547,
      "step": 1280
    },
    {
      "epoch": 0.5208375686115064,
      "grad_norm": 0.08734786510467529,
      "learning_rate": 0.0001480968858131488,
      "loss": 0.8811,
      "step": 1281
    },
    {
      "epoch": 0.5212441553161211,
      "grad_norm": 0.094956174492836,
      "learning_rate": 0.00014805617748829637,
      "loss": 0.9665,
      "step": 1282
    },
    {
      "epoch": 0.5216507420207359,
      "grad_norm": 0.08848060667514801,
      "learning_rate": 0.00014801546916344392,
      "loss": 0.9945,
      "step": 1283
    },
    {
      "epoch": 0.5220573287253507,
      "grad_norm": 0.0921303778886795,
      "learning_rate": 0.0001479747608385915,
      "loss": 1.0927,
      "step": 1284
    },
    {
      "epoch": 0.5224639154299654,
      "grad_norm": 0.08918172121047974,
      "learning_rate": 0.00014793405251373905,
      "loss": 0.9598,
      "step": 1285
    },
    {
      "epoch": 0.5228705021345802,
      "grad_norm": 0.10177495330572128,
      "learning_rate": 0.00014789334418888664,
      "loss": 1.1625,
      "step": 1286
    },
    {
      "epoch": 0.523277088839195,
      "grad_norm": 0.0942060649394989,
      "learning_rate": 0.0001478526358640342,
      "loss": 1.0612,
      "step": 1287
    },
    {
      "epoch": 0.5236836755438097,
      "grad_norm": 0.09780838340520859,
      "learning_rate": 0.00014781192753918177,
      "loss": 1.1024,
      "step": 1288
    },
    {
      "epoch": 0.5240902622484245,
      "grad_norm": 0.08893782645463943,
      "learning_rate": 0.00014777121921432935,
      "loss": 1.043,
      "step": 1289
    },
    {
      "epoch": 0.5244968489530393,
      "grad_norm": 0.0918479710817337,
      "learning_rate": 0.0001477305108894769,
      "loss": 0.9824,
      "step": 1290
    },
    {
      "epoch": 0.524903435657654,
      "grad_norm": 0.09912838041782379,
      "learning_rate": 0.0001476898025646245,
      "loss": 1.0346,
      "step": 1291
    },
    {
      "epoch": 0.5253100223622688,
      "grad_norm": 0.10609038919210434,
      "learning_rate": 0.00014764909423977204,
      "loss": 1.0133,
      "step": 1292
    },
    {
      "epoch": 0.5257166090668836,
      "grad_norm": 0.09957921504974365,
      "learning_rate": 0.00014760838591491962,
      "loss": 0.9842,
      "step": 1293
    },
    {
      "epoch": 0.5261231957714982,
      "grad_norm": 0.09777513146400452,
      "learning_rate": 0.00014756767759006718,
      "loss": 1.0092,
      "step": 1294
    },
    {
      "epoch": 0.526529782476113,
      "grad_norm": 0.08816764503717422,
      "learning_rate": 0.00014752696926521473,
      "loss": 0.9064,
      "step": 1295
    },
    {
      "epoch": 0.5269363691807278,
      "grad_norm": 0.09163589775562286,
      "learning_rate": 0.0001474862609403623,
      "loss": 0.9682,
      "step": 1296
    },
    {
      "epoch": 0.5273429558853425,
      "grad_norm": 21.36524772644043,
      "learning_rate": 0.00014744555261550987,
      "loss": 1.0146,
      "step": 1297
    },
    {
      "epoch": 0.5277495425899573,
      "grad_norm": 0.09484653919935226,
      "learning_rate": 0.00014740484429065745,
      "loss": 0.979,
      "step": 1298
    },
    {
      "epoch": 0.5281561292945721,
      "grad_norm": 0.09288137406110764,
      "learning_rate": 0.000147364135965805,
      "loss": 0.9692,
      "step": 1299
    },
    {
      "epoch": 0.5285627159991868,
      "grad_norm": 0.09847582131624222,
      "learning_rate": 0.00014732342764095258,
      "loss": 1.0519,
      "step": 1300
    },
    {
      "epoch": 0.5289693027038016,
      "grad_norm": 0.09856998920440674,
      "learning_rate": 0.00014728271931610016,
      "loss": 0.9929,
      "step": 1301
    },
    {
      "epoch": 0.5293758894084164,
      "grad_norm": 0.0969497561454773,
      "learning_rate": 0.00014724201099124772,
      "loss": 0.9412,
      "step": 1302
    },
    {
      "epoch": 0.5297824761130311,
      "grad_norm": 0.09796781092882156,
      "learning_rate": 0.0001472013026663953,
      "loss": 0.9538,
      "step": 1303
    },
    {
      "epoch": 0.5301890628176459,
      "grad_norm": 0.09267283231019974,
      "learning_rate": 0.00014716059434154285,
      "loss": 0.9616,
      "step": 1304
    },
    {
      "epoch": 0.5305956495222606,
      "grad_norm": 0.10447274148464203,
      "learning_rate": 0.00014711988601669043,
      "loss": 0.9485,
      "step": 1305
    },
    {
      "epoch": 0.5310022362268754,
      "grad_norm": 0.10163460671901703,
      "learning_rate": 0.000147079177691838,
      "loss": 0.9419,
      "step": 1306
    },
    {
      "epoch": 0.5314088229314902,
      "grad_norm": 0.09405020624399185,
      "learning_rate": 0.00014703846936698554,
      "loss": 0.9806,
      "step": 1307
    },
    {
      "epoch": 0.5318154096361049,
      "grad_norm": 0.09395210444927216,
      "learning_rate": 0.00014699776104213312,
      "loss": 1.0278,
      "step": 1308
    },
    {
      "epoch": 0.5322219963407196,
      "grad_norm": 0.09595540910959244,
      "learning_rate": 0.00014695705271728068,
      "loss": 1.0625,
      "step": 1309
    },
    {
      "epoch": 0.5326285830453344,
      "grad_norm": 0.0832480788230896,
      "learning_rate": 0.00014691634439242826,
      "loss": 0.9134,
      "step": 1310
    },
    {
      "epoch": 0.5330351697499491,
      "grad_norm": 0.10631989687681198,
      "learning_rate": 0.0001468756360675758,
      "loss": 0.9753,
      "step": 1311
    },
    {
      "epoch": 0.5334417564545639,
      "grad_norm": 0.0866394117474556,
      "learning_rate": 0.0001468349277427234,
      "loss": 0.9492,
      "step": 1312
    },
    {
      "epoch": 0.5338483431591787,
      "grad_norm": 0.10123784095048904,
      "learning_rate": 0.00014679421941787098,
      "loss": 0.9819,
      "step": 1313
    },
    {
      "epoch": 0.5342549298637934,
      "grad_norm": 0.08982353657484055,
      "learning_rate": 0.00014675351109301853,
      "loss": 0.9026,
      "step": 1314
    },
    {
      "epoch": 0.5346615165684082,
      "grad_norm": 0.08998806774616241,
      "learning_rate": 0.0001467128027681661,
      "loss": 0.9467,
      "step": 1315
    },
    {
      "epoch": 0.535068103273023,
      "grad_norm": 0.09901012480258942,
      "learning_rate": 0.00014667209444331366,
      "loss": 0.9655,
      "step": 1316
    },
    {
      "epoch": 0.5354746899776377,
      "grad_norm": 0.10991565883159637,
      "learning_rate": 0.00014663138611846125,
      "loss": 1.0236,
      "step": 1317
    },
    {
      "epoch": 0.5358812766822525,
      "grad_norm": 0.10133833438158035,
      "learning_rate": 0.0001465906777936088,
      "loss": 1.0453,
      "step": 1318
    },
    {
      "epoch": 0.5362878633868673,
      "grad_norm": 0.10197743028402328,
      "learning_rate": 0.00014654996946875635,
      "loss": 0.9008,
      "step": 1319
    },
    {
      "epoch": 0.536694450091482,
      "grad_norm": 0.09654685854911804,
      "learning_rate": 0.00014650926114390394,
      "loss": 1.0586,
      "step": 1320
    },
    {
      "epoch": 0.5371010367960968,
      "grad_norm": 0.10006607323884964,
      "learning_rate": 0.0001464685528190515,
      "loss": 0.9627,
      "step": 1321
    },
    {
      "epoch": 0.5375076235007116,
      "grad_norm": 0.09992939233779907,
      "learning_rate": 0.00014642784449419907,
      "loss": 0.9841,
      "step": 1322
    },
    {
      "epoch": 0.5379142102053263,
      "grad_norm": 0.098929263651371,
      "learning_rate": 0.00014638713616934662,
      "loss": 0.9764,
      "step": 1323
    },
    {
      "epoch": 0.5383207969099411,
      "grad_norm": 0.09640022367238998,
      "learning_rate": 0.0001463464278444942,
      "loss": 0.9922,
      "step": 1324
    },
    {
      "epoch": 0.5387273836145559,
      "grad_norm": 0.09175208956003189,
      "learning_rate": 0.0001463057195196418,
      "loss": 1.0659,
      "step": 1325
    },
    {
      "epoch": 0.5391339703191705,
      "grad_norm": 0.09107311069965363,
      "learning_rate": 0.00014626501119478934,
      "loss": 0.9898,
      "step": 1326
    },
    {
      "epoch": 0.5395405570237853,
      "grad_norm": 0.10652513056993484,
      "learning_rate": 0.00014622430286993692,
      "loss": 1.1346,
      "step": 1327
    },
    {
      "epoch": 0.5399471437284001,
      "grad_norm": 0.09096572548151016,
      "learning_rate": 0.00014618359454508448,
      "loss": 0.9296,
      "step": 1328
    },
    {
      "epoch": 0.5403537304330148,
      "grad_norm": 0.0995742529630661,
      "learning_rate": 0.00014614288622023206,
      "loss": 1.0034,
      "step": 1329
    },
    {
      "epoch": 0.5407603171376296,
      "grad_norm": 0.08811762928962708,
      "learning_rate": 0.0001461021778953796,
      "loss": 0.9928,
      "step": 1330
    },
    {
      "epoch": 0.5411669038422443,
      "grad_norm": 0.09473133832216263,
      "learning_rate": 0.00014606146957052717,
      "loss": 1.002,
      "step": 1331
    },
    {
      "epoch": 0.5415734905468591,
      "grad_norm": 0.08898860216140747,
      "learning_rate": 0.00014602076124567475,
      "loss": 0.9358,
      "step": 1332
    },
    {
      "epoch": 0.5419800772514739,
      "grad_norm": 0.093483105301857,
      "learning_rate": 0.0001459800529208223,
      "loss": 0.9319,
      "step": 1333
    },
    {
      "epoch": 0.5423866639560886,
      "grad_norm": 0.09663254022598267,
      "learning_rate": 0.00014593934459596988,
      "loss": 1.0041,
      "step": 1334
    },
    {
      "epoch": 0.5427932506607034,
      "grad_norm": 0.08969207853078842,
      "learning_rate": 0.00014589863627111746,
      "loss": 0.996,
      "step": 1335
    },
    {
      "epoch": 0.5431998373653182,
      "grad_norm": 0.08921096473932266,
      "learning_rate": 0.00014585792794626502,
      "loss": 0.9263,
      "step": 1336
    },
    {
      "epoch": 0.5436064240699329,
      "grad_norm": 0.08625603467226028,
      "learning_rate": 0.0001458172196214126,
      "loss": 0.9372,
      "step": 1337
    },
    {
      "epoch": 0.5440130107745477,
      "grad_norm": 0.09406933933496475,
      "learning_rate": 0.00014577651129656015,
      "loss": 1.0037,
      "step": 1338
    },
    {
      "epoch": 0.5444195974791625,
      "grad_norm": 0.08918149769306183,
      "learning_rate": 0.00014573580297170773,
      "loss": 0.9418,
      "step": 1339
    },
    {
      "epoch": 0.5448261841837772,
      "grad_norm": 0.09736087918281555,
      "learning_rate": 0.0001456950946468553,
      "loss": 0.965,
      "step": 1340
    },
    {
      "epoch": 0.545232770888392,
      "grad_norm": 0.09973054379224777,
      "learning_rate": 0.00014565438632200287,
      "loss": 0.8948,
      "step": 1341
    },
    {
      "epoch": 0.5456393575930067,
      "grad_norm": 0.08326181769371033,
      "learning_rate": 0.00014561367799715042,
      "loss": 0.9051,
      "step": 1342
    },
    {
      "epoch": 0.5460459442976214,
      "grad_norm": 0.0919221043586731,
      "learning_rate": 0.00014557296967229798,
      "loss": 0.95,
      "step": 1343
    },
    {
      "epoch": 0.5464525310022362,
      "grad_norm": 0.08741891384124756,
      "learning_rate": 0.00014553226134744556,
      "loss": 0.9682,
      "step": 1344
    },
    {
      "epoch": 0.546859117706851,
      "grad_norm": 0.09859665483236313,
      "learning_rate": 0.0001454915530225931,
      "loss": 1.0564,
      "step": 1345
    },
    {
      "epoch": 0.5472657044114657,
      "grad_norm": 0.09352114796638489,
      "learning_rate": 0.0001454508446977407,
      "loss": 0.982,
      "step": 1346
    },
    {
      "epoch": 0.5476722911160805,
      "grad_norm": 0.09592889994382858,
      "learning_rate": 0.00014541013637288827,
      "loss": 0.9874,
      "step": 1347
    },
    {
      "epoch": 0.5480788778206953,
      "grad_norm": 0.08276782929897308,
      "learning_rate": 0.00014536942804803583,
      "loss": 1.0243,
      "step": 1348
    },
    {
      "epoch": 0.54848546452531,
      "grad_norm": 0.09625902026891708,
      "learning_rate": 0.0001453287197231834,
      "loss": 1.0977,
      "step": 1349
    },
    {
      "epoch": 0.5488920512299248,
      "grad_norm": 0.08539925515651703,
      "learning_rate": 0.00014528801139833096,
      "loss": 0.9816,
      "step": 1350
    },
    {
      "epoch": 0.5492986379345396,
      "grad_norm": 0.08654636144638062,
      "learning_rate": 0.00014524730307347855,
      "loss": 1.02,
      "step": 1351
    },
    {
      "epoch": 0.5497052246391543,
      "grad_norm": 0.09811274707317352,
      "learning_rate": 0.0001452065947486261,
      "loss": 1.1509,
      "step": 1352
    },
    {
      "epoch": 0.5501118113437691,
      "grad_norm": 0.09280407428741455,
      "learning_rate": 0.00014516588642377365,
      "loss": 1.0163,
      "step": 1353
    },
    {
      "epoch": 0.5505183980483839,
      "grad_norm": 0.08086491376161575,
      "learning_rate": 0.00014512517809892123,
      "loss": 0.853,
      "step": 1354
    },
    {
      "epoch": 0.5509249847529986,
      "grad_norm": 0.0827447846531868,
      "learning_rate": 0.0001450844697740688,
      "loss": 0.9749,
      "step": 1355
    },
    {
      "epoch": 0.5513315714576134,
      "grad_norm": 0.09065467119216919,
      "learning_rate": 0.00014504376144921637,
      "loss": 1.0186,
      "step": 1356
    },
    {
      "epoch": 0.551738158162228,
      "grad_norm": 0.08642933517694473,
      "learning_rate": 0.00014500305312436392,
      "loss": 0.9175,
      "step": 1357
    },
    {
      "epoch": 0.5521447448668428,
      "grad_norm": 0.08930498361587524,
      "learning_rate": 0.0001449623447995115,
      "loss": 1.0027,
      "step": 1358
    },
    {
      "epoch": 0.5525513315714576,
      "grad_norm": 0.09525667130947113,
      "learning_rate": 0.0001449216364746591,
      "loss": 1.1328,
      "step": 1359
    },
    {
      "epoch": 0.5529579182760723,
      "grad_norm": 0.08723597228527069,
      "learning_rate": 0.00014488092814980664,
      "loss": 0.9025,
      "step": 1360
    },
    {
      "epoch": 0.5533645049806871,
      "grad_norm": 0.08364204317331314,
      "learning_rate": 0.00014484021982495422,
      "loss": 0.939,
      "step": 1361
    },
    {
      "epoch": 0.5537710916853019,
      "grad_norm": 0.08982790261507034,
      "learning_rate": 0.00014479951150010178,
      "loss": 0.8604,
      "step": 1362
    },
    {
      "epoch": 0.5541776783899166,
      "grad_norm": 0.08386033028364182,
      "learning_rate": 0.00014475880317524936,
      "loss": 0.957,
      "step": 1363
    },
    {
      "epoch": 0.5545842650945314,
      "grad_norm": 0.0920158326625824,
      "learning_rate": 0.0001447180948503969,
      "loss": 0.9388,
      "step": 1364
    },
    {
      "epoch": 0.5549908517991462,
      "grad_norm": 0.08764606714248657,
      "learning_rate": 0.00014467738652554447,
      "loss": 0.9721,
      "step": 1365
    },
    {
      "epoch": 0.5553974385037609,
      "grad_norm": 0.09296350926160812,
      "learning_rate": 0.00014463667820069205,
      "loss": 1.0195,
      "step": 1366
    },
    {
      "epoch": 0.5558040252083757,
      "grad_norm": 0.08107852935791016,
      "learning_rate": 0.0001445959698758396,
      "loss": 0.9001,
      "step": 1367
    },
    {
      "epoch": 0.5562106119129905,
      "grad_norm": 0.08827921748161316,
      "learning_rate": 0.00014455526155098718,
      "loss": 1.0009,
      "step": 1368
    },
    {
      "epoch": 0.5566171986176052,
      "grad_norm": 0.08549787849187851,
      "learning_rate": 0.00014451455322613474,
      "loss": 0.8675,
      "step": 1369
    },
    {
      "epoch": 0.55702378532222,
      "grad_norm": 0.10005125403404236,
      "learning_rate": 0.00014447384490128232,
      "loss": 1.1293,
      "step": 1370
    },
    {
      "epoch": 0.5574303720268348,
      "grad_norm": 0.09509359300136566,
      "learning_rate": 0.0001444331365764299,
      "loss": 1.0033,
      "step": 1371
    },
    {
      "epoch": 0.5578369587314495,
      "grad_norm": 0.09246810525655746,
      "learning_rate": 0.00014439242825157745,
      "loss": 0.9563,
      "step": 1372
    },
    {
      "epoch": 0.5582435454360642,
      "grad_norm": 0.09919826686382294,
      "learning_rate": 0.00014435171992672503,
      "loss": 1.0925,
      "step": 1373
    },
    {
      "epoch": 0.558650132140679,
      "grad_norm": 0.09652990102767944,
      "learning_rate": 0.0001443110116018726,
      "loss": 1.0716,
      "step": 1374
    },
    {
      "epoch": 0.5590567188452937,
      "grad_norm": 0.08819134533405304,
      "learning_rate": 0.00014427030327702017,
      "loss": 0.9586,
      "step": 1375
    },
    {
      "epoch": 0.5594633055499085,
      "grad_norm": 0.09266290068626404,
      "learning_rate": 0.00014422959495216772,
      "loss": 1.0399,
      "step": 1376
    },
    {
      "epoch": 0.5598698922545233,
      "grad_norm": 0.08892200142145157,
      "learning_rate": 0.00014418888662731528,
      "loss": 0.9844,
      "step": 1377
    },
    {
      "epoch": 0.560276478959138,
      "grad_norm": 0.09452232718467712,
      "learning_rate": 0.00014414817830246286,
      "loss": 1.0875,
      "step": 1378
    },
    {
      "epoch": 0.5606830656637528,
      "grad_norm": 0.08958882093429565,
      "learning_rate": 0.0001441074699776104,
      "loss": 1.0234,
      "step": 1379
    },
    {
      "epoch": 0.5610896523683676,
      "grad_norm": 0.09218178689479828,
      "learning_rate": 0.000144066761652758,
      "loss": 1.0871,
      "step": 1380
    },
    {
      "epoch": 0.5614962390729823,
      "grad_norm": 0.08819695562124252,
      "learning_rate": 0.00014402605332790557,
      "loss": 0.9046,
      "step": 1381
    },
    {
      "epoch": 0.5619028257775971,
      "grad_norm": 0.09621118754148483,
      "learning_rate": 0.00014398534500305313,
      "loss": 0.9789,
      "step": 1382
    },
    {
      "epoch": 0.5623094124822118,
      "grad_norm": 0.08230914175510406,
      "learning_rate": 0.0001439446366782007,
      "loss": 0.817,
      "step": 1383
    },
    {
      "epoch": 0.5627159991868266,
      "grad_norm": 0.08805210143327713,
      "learning_rate": 0.00014390392835334826,
      "loss": 0.9488,
      "step": 1384
    },
    {
      "epoch": 0.5631225858914414,
      "grad_norm": 0.09026028960943222,
      "learning_rate": 0.00014386322002849584,
      "loss": 0.9837,
      "step": 1385
    },
    {
      "epoch": 0.5635291725960561,
      "grad_norm": 0.09834691882133484,
      "learning_rate": 0.0001438225117036434,
      "loss": 0.999,
      "step": 1386
    },
    {
      "epoch": 0.5639357593006709,
      "grad_norm": 0.09209754317998886,
      "learning_rate": 0.00014378180337879098,
      "loss": 0.9923,
      "step": 1387
    },
    {
      "epoch": 0.5643423460052857,
      "grad_norm": 0.08959315717220306,
      "learning_rate": 0.00014374109505393853,
      "loss": 0.9282,
      "step": 1388
    },
    {
      "epoch": 0.5647489327099003,
      "grad_norm": 0.08573776483535767,
      "learning_rate": 0.0001437003867290861,
      "loss": 0.9504,
      "step": 1389
    },
    {
      "epoch": 0.5651555194145151,
      "grad_norm": 0.08887659013271332,
      "learning_rate": 0.00014365967840423367,
      "loss": 0.9195,
      "step": 1390
    },
    {
      "epoch": 0.5655621061191299,
      "grad_norm": 0.08740208297967911,
      "learning_rate": 0.00014361897007938122,
      "loss": 0.9537,
      "step": 1391
    },
    {
      "epoch": 0.5659686928237446,
      "grad_norm": 0.08976002782583237,
      "learning_rate": 0.0001435782617545288,
      "loss": 0.9126,
      "step": 1392
    },
    {
      "epoch": 0.5663752795283594,
      "grad_norm": 0.09727158397436142,
      "learning_rate": 0.00014353755342967639,
      "loss": 1.0088,
      "step": 1393
    },
    {
      "epoch": 0.5667818662329742,
      "grad_norm": 0.09165914356708527,
      "learning_rate": 0.00014349684510482394,
      "loss": 1.0443,
      "step": 1394
    },
    {
      "epoch": 0.5671884529375889,
      "grad_norm": 0.08791441470384598,
      "learning_rate": 0.00014345613677997152,
      "loss": 0.9708,
      "step": 1395
    },
    {
      "epoch": 0.5675950396422037,
      "grad_norm": 0.08658348023891449,
      "learning_rate": 0.00014341542845511908,
      "loss": 0.9347,
      "step": 1396
    },
    {
      "epoch": 0.5680016263468185,
      "grad_norm": 0.08867420256137848,
      "learning_rate": 0.00014337472013026666,
      "loss": 1.0331,
      "step": 1397
    },
    {
      "epoch": 0.5684082130514332,
      "grad_norm": 0.09206686913967133,
      "learning_rate": 0.0001433340118054142,
      "loss": 1.0469,
      "step": 1398
    },
    {
      "epoch": 0.568814799756048,
      "grad_norm": 0.09050408750772476,
      "learning_rate": 0.0001432933034805618,
      "loss": 0.9426,
      "step": 1399
    },
    {
      "epoch": 0.5692213864606628,
      "grad_norm": 0.08967922627925873,
      "learning_rate": 0.00014325259515570935,
      "loss": 0.9217,
      "step": 1400
    },
    {
      "epoch": 0.5696279731652775,
      "grad_norm": 0.08758019655942917,
      "learning_rate": 0.0001432118868308569,
      "loss": 0.9559,
      "step": 1401
    },
    {
      "epoch": 0.5700345598698923,
      "grad_norm": 0.09254743903875351,
      "learning_rate": 0.00014317117850600448,
      "loss": 0.9779,
      "step": 1402
    },
    {
      "epoch": 0.5704411465745071,
      "grad_norm": 0.09395452588796616,
      "learning_rate": 0.00014313047018115204,
      "loss": 1.0009,
      "step": 1403
    },
    {
      "epoch": 0.5708477332791217,
      "grad_norm": 0.09259745478630066,
      "learning_rate": 0.00014308976185629964,
      "loss": 1.0154,
      "step": 1404
    },
    {
      "epoch": 0.5712543199837365,
      "grad_norm": 0.09286468476057053,
      "learning_rate": 0.0001430490535314472,
      "loss": 1.0889,
      "step": 1405
    },
    {
      "epoch": 0.5716609066883513,
      "grad_norm": 0.08744499087333679,
      "learning_rate": 0.00014300834520659475,
      "loss": 0.9786,
      "step": 1406
    },
    {
      "epoch": 0.572067493392966,
      "grad_norm": 0.09346942603588104,
      "learning_rate": 0.00014296763688174233,
      "loss": 0.9789,
      "step": 1407
    },
    {
      "epoch": 0.5724740800975808,
      "grad_norm": 0.09010860323905945,
      "learning_rate": 0.0001429269285568899,
      "loss": 1.018,
      "step": 1408
    },
    {
      "epoch": 0.5728806668021955,
      "grad_norm": 0.0881861224770546,
      "learning_rate": 0.00014288622023203747,
      "loss": 1.0898,
      "step": 1409
    },
    {
      "epoch": 0.5732872535068103,
      "grad_norm": 0.08293981850147247,
      "learning_rate": 0.00014284551190718502,
      "loss": 0.9129,
      "step": 1410
    },
    {
      "epoch": 0.5736938402114251,
      "grad_norm": 0.09111000597476959,
      "learning_rate": 0.0001428048035823326,
      "loss": 0.9556,
      "step": 1411
    },
    {
      "epoch": 0.5741004269160398,
      "grad_norm": 0.09435521066188812,
      "learning_rate": 0.00014276409525748016,
      "loss": 1.1178,
      "step": 1412
    },
    {
      "epoch": 0.5745070136206546,
      "grad_norm": 0.08865281194448471,
      "learning_rate": 0.0001427233869326277,
      "loss": 0.9682,
      "step": 1413
    },
    {
      "epoch": 0.5749136003252694,
      "grad_norm": 0.08608002215623856,
      "learning_rate": 0.0001426826786077753,
      "loss": 0.9144,
      "step": 1414
    },
    {
      "epoch": 0.5753201870298841,
      "grad_norm": 0.08543986827135086,
      "learning_rate": 0.00014264197028292285,
      "loss": 0.9314,
      "step": 1415
    },
    {
      "epoch": 0.5757267737344989,
      "grad_norm": 0.09068971127271652,
      "learning_rate": 0.00014260126195807046,
      "loss": 0.9835,
      "step": 1416
    },
    {
      "epoch": 0.5761333604391137,
      "grad_norm": 0.08598853647708893,
      "learning_rate": 0.000142560553633218,
      "loss": 0.9396,
      "step": 1417
    },
    {
      "epoch": 0.5765399471437284,
      "grad_norm": 0.08450654149055481,
      "learning_rate": 0.00014251984530836556,
      "loss": 0.9472,
      "step": 1418
    },
    {
      "epoch": 0.5769465338483432,
      "grad_norm": 0.09064414352178574,
      "learning_rate": 0.00014247913698351314,
      "loss": 1.0167,
      "step": 1419
    },
    {
      "epoch": 0.577353120552958,
      "grad_norm": 0.08948381245136261,
      "learning_rate": 0.0001424384286586607,
      "loss": 0.9459,
      "step": 1420
    },
    {
      "epoch": 0.5777597072575726,
      "grad_norm": 0.0811019316315651,
      "learning_rate": 0.00014239772033380828,
      "loss": 0.8846,
      "step": 1421
    },
    {
      "epoch": 0.5781662939621874,
      "grad_norm": 0.09058842808008194,
      "learning_rate": 0.00014235701200895583,
      "loss": 0.9999,
      "step": 1422
    },
    {
      "epoch": 0.5785728806668022,
      "grad_norm": 0.09327298402786255,
      "learning_rate": 0.00014231630368410342,
      "loss": 1.0694,
      "step": 1423
    },
    {
      "epoch": 0.5789794673714169,
      "grad_norm": 0.08615417778491974,
      "learning_rate": 0.00014227559535925097,
      "loss": 0.9884,
      "step": 1424
    },
    {
      "epoch": 0.5793860540760317,
      "grad_norm": 0.09632913023233414,
      "learning_rate": 0.00014223488703439852,
      "loss": 1.0215,
      "step": 1425
    },
    {
      "epoch": 0.5797926407806465,
      "grad_norm": 0.0939357578754425,
      "learning_rate": 0.0001421941787095461,
      "loss": 1.0785,
      "step": 1426
    },
    {
      "epoch": 0.5801992274852612,
      "grad_norm": 0.08809401839971542,
      "learning_rate": 0.00014215347038469369,
      "loss": 0.9597,
      "step": 1427
    },
    {
      "epoch": 0.580605814189876,
      "grad_norm": 0.08961009234189987,
      "learning_rate": 0.00014211276205984127,
      "loss": 0.9988,
      "step": 1428
    },
    {
      "epoch": 0.5810124008944908,
      "grad_norm": 0.0883122980594635,
      "learning_rate": 0.00014207205373498882,
      "loss": 1.0566,
      "step": 1429
    },
    {
      "epoch": 0.5814189875991055,
      "grad_norm": 0.09150592237710953,
      "learning_rate": 0.00014203134541013637,
      "loss": 1.0875,
      "step": 1430
    },
    {
      "epoch": 0.5818255743037203,
      "grad_norm": 0.097344771027565,
      "learning_rate": 0.00014199063708528396,
      "loss": 1.0141,
      "step": 1431
    },
    {
      "epoch": 0.5822321610083351,
      "grad_norm": 0.09442117810249329,
      "learning_rate": 0.0001419499287604315,
      "loss": 0.9769,
      "step": 1432
    },
    {
      "epoch": 0.5826387477129498,
      "grad_norm": 0.08522289991378784,
      "learning_rate": 0.0001419092204355791,
      "loss": 0.9396,
      "step": 1433
    },
    {
      "epoch": 0.5830453344175646,
      "grad_norm": 0.0909838005900383,
      "learning_rate": 0.00014186851211072665,
      "loss": 0.9983,
      "step": 1434
    },
    {
      "epoch": 0.5834519211221793,
      "grad_norm": 0.09627141058444977,
      "learning_rate": 0.00014182780378587423,
      "loss": 1.0929,
      "step": 1435
    },
    {
      "epoch": 0.583858507826794,
      "grad_norm": 0.08965554088354111,
      "learning_rate": 0.00014178709546102178,
      "loss": 0.9145,
      "step": 1436
    },
    {
      "epoch": 0.5842650945314088,
      "grad_norm": 0.09004207700490952,
      "learning_rate": 0.00014174638713616933,
      "loss": 0.9921,
      "step": 1437
    },
    {
      "epoch": 0.5846716812360235,
      "grad_norm": 0.09295787662267685,
      "learning_rate": 0.00014170567881131692,
      "loss": 0.9756,
      "step": 1438
    },
    {
      "epoch": 0.5850782679406383,
      "grad_norm": 0.0893683210015297,
      "learning_rate": 0.0001416649704864645,
      "loss": 0.8974,
      "step": 1439
    },
    {
      "epoch": 0.5854848546452531,
      "grad_norm": 0.08255141973495483,
      "learning_rate": 0.00014162426216161205,
      "loss": 0.9201,
      "step": 1440
    },
    {
      "epoch": 0.5858914413498678,
      "grad_norm": 0.0966111272573471,
      "learning_rate": 0.00014158355383675963,
      "loss": 1.0611,
      "step": 1441
    },
    {
      "epoch": 0.5862980280544826,
      "grad_norm": 0.09531056135892868,
      "learning_rate": 0.0001415428455119072,
      "loss": 1.0989,
      "step": 1442
    },
    {
      "epoch": 0.5867046147590974,
      "grad_norm": 0.09289577603340149,
      "learning_rate": 0.00014150213718705477,
      "loss": 1.0232,
      "step": 1443
    },
    {
      "epoch": 0.5871112014637121,
      "grad_norm": 0.10038848221302032,
      "learning_rate": 0.00014146142886220232,
      "loss": 1.013,
      "step": 1444
    },
    {
      "epoch": 0.5875177881683269,
      "grad_norm": 0.09008078277111053,
      "learning_rate": 0.0001414207205373499,
      "loss": 0.9039,
      "step": 1445
    },
    {
      "epoch": 0.5879243748729417,
      "grad_norm": 0.08941890299320221,
      "learning_rate": 0.00014138001221249746,
      "loss": 0.8866,
      "step": 1446
    },
    {
      "epoch": 0.5883309615775564,
      "grad_norm": 0.08407185226678848,
      "learning_rate": 0.00014133930388764504,
      "loss": 0.9468,
      "step": 1447
    },
    {
      "epoch": 0.5887375482821712,
      "grad_norm": 0.096216581761837,
      "learning_rate": 0.0001412985955627926,
      "loss": 1.0516,
      "step": 1448
    },
    {
      "epoch": 0.589144134986786,
      "grad_norm": 0.09403221309185028,
      "learning_rate": 0.00014125788723794015,
      "loss": 0.9771,
      "step": 1449
    },
    {
      "epoch": 0.5895507216914007,
      "grad_norm": 0.08534131199121475,
      "learning_rate": 0.00014121717891308775,
      "loss": 0.9012,
      "step": 1450
    },
    {
      "epoch": 0.5899573083960155,
      "grad_norm": 0.09011968225240707,
      "learning_rate": 0.0001411764705882353,
      "loss": 0.9724,
      "step": 1451
    },
    {
      "epoch": 0.5903638951006303,
      "grad_norm": 0.08891688287258148,
      "learning_rate": 0.00014113576226338286,
      "loss": 0.9225,
      "step": 1452
    },
    {
      "epoch": 0.5907704818052449,
      "grad_norm": 0.08605680614709854,
      "learning_rate": 0.00014109505393853044,
      "loss": 0.9403,
      "step": 1453
    },
    {
      "epoch": 0.5911770685098597,
      "grad_norm": 0.08760562539100647,
      "learning_rate": 0.000141054345613678,
      "loss": 0.9728,
      "step": 1454
    },
    {
      "epoch": 0.5915836552144745,
      "grad_norm": 0.08932702243328094,
      "learning_rate": 0.00014101363728882558,
      "loss": 1.0377,
      "step": 1455
    },
    {
      "epoch": 0.5919902419190892,
      "grad_norm": 0.09998058527708054,
      "learning_rate": 0.00014097292896397313,
      "loss": 1.0434,
      "step": 1456
    },
    {
      "epoch": 0.592396828623704,
      "grad_norm": 0.09377194941043854,
      "learning_rate": 0.00014093222063912071,
      "loss": 0.9308,
      "step": 1457
    },
    {
      "epoch": 0.5928034153283188,
      "grad_norm": 0.08387821912765503,
      "learning_rate": 0.00014089151231426827,
      "loss": 0.8875,
      "step": 1458
    },
    {
      "epoch": 0.5932100020329335,
      "grad_norm": 0.08756202459335327,
      "learning_rate": 0.00014085080398941582,
      "loss": 1.0069,
      "step": 1459
    },
    {
      "epoch": 0.5936165887375483,
      "grad_norm": 0.08637526631355286,
      "learning_rate": 0.0001408100956645634,
      "loss": 0.9067,
      "step": 1460
    },
    {
      "epoch": 0.594023175442163,
      "grad_norm": 0.08818566054105759,
      "learning_rate": 0.00014076938733971096,
      "loss": 0.9375,
      "step": 1461
    },
    {
      "epoch": 0.5944297621467778,
      "grad_norm": 0.09050768613815308,
      "learning_rate": 0.00014072867901485857,
      "loss": 0.9742,
      "step": 1462
    },
    {
      "epoch": 0.5948363488513926,
      "grad_norm": 0.08764854818582535,
      "learning_rate": 0.00014068797069000612,
      "loss": 0.8995,
      "step": 1463
    },
    {
      "epoch": 0.5952429355560073,
      "grad_norm": 0.0841783955693245,
      "learning_rate": 0.00014064726236515367,
      "loss": 0.9179,
      "step": 1464
    },
    {
      "epoch": 0.5956495222606221,
      "grad_norm": 0.08915995806455612,
      "learning_rate": 0.00014060655404030126,
      "loss": 0.9973,
      "step": 1465
    },
    {
      "epoch": 0.5960561089652369,
      "grad_norm": 0.08400030434131622,
      "learning_rate": 0.0001405658457154488,
      "loss": 0.9374,
      "step": 1466
    },
    {
      "epoch": 0.5964626956698516,
      "grad_norm": 0.08585075289011002,
      "learning_rate": 0.0001405251373905964,
      "loss": 0.9371,
      "step": 1467
    },
    {
      "epoch": 0.5968692823744663,
      "grad_norm": 5.189364433288574,
      "learning_rate": 0.00014048442906574395,
      "loss": 0.9443,
      "step": 1468
    },
    {
      "epoch": 0.5972758690790811,
      "grad_norm": 0.11196129769086838,
      "learning_rate": 0.00014044372074089153,
      "loss": 0.8475,
      "step": 1469
    },
    {
      "epoch": 0.5976824557836958,
      "grad_norm": 0.13671468198299408,
      "learning_rate": 0.00014040301241603908,
      "loss": 0.9082,
      "step": 1470
    },
    {
      "epoch": 0.5980890424883106,
      "grad_norm": 0.1605953872203827,
      "learning_rate": 0.00014036230409118663,
      "loss": 1.0311,
      "step": 1471
    },
    {
      "epoch": 0.5984956291929254,
      "grad_norm": 0.1232098862528801,
      "learning_rate": 0.00014032159576633422,
      "loss": 0.9131,
      "step": 1472
    },
    {
      "epoch": 0.5989022158975401,
      "grad_norm": 0.10262708365917206,
      "learning_rate": 0.0001402808874414818,
      "loss": 0.998,
      "step": 1473
    },
    {
      "epoch": 0.5993088026021549,
      "grad_norm": 0.10314701497554779,
      "learning_rate": 0.00014024017911662938,
      "loss": 0.9527,
      "step": 1474
    },
    {
      "epoch": 0.5997153893067697,
      "grad_norm": 0.10268500447273254,
      "learning_rate": 0.00014019947079177693,
      "loss": 1.0287,
      "step": 1475
    },
    {
      "epoch": 0.6001219760113844,
      "grad_norm": 0.10218296945095062,
      "learning_rate": 0.00014015876246692449,
      "loss": 1.0562,
      "step": 1476
    },
    {
      "epoch": 0.6005285627159992,
      "grad_norm": 0.10347164422273636,
      "learning_rate": 0.00014011805414207207,
      "loss": 1.0227,
      "step": 1477
    },
    {
      "epoch": 0.600935149420614,
      "grad_norm": 0.09892403334379196,
      "learning_rate": 0.00014007734581721962,
      "loss": 0.8526,
      "step": 1478
    },
    {
      "epoch": 0.6013417361252287,
      "grad_norm": 0.10327230393886566,
      "learning_rate": 0.0001400366374923672,
      "loss": 0.9473,
      "step": 1479
    },
    {
      "epoch": 0.6017483228298435,
      "grad_norm": 0.10661543160676956,
      "learning_rate": 0.00013999592916751476,
      "loss": 1.0807,
      "step": 1480
    },
    {
      "epoch": 0.6021549095344583,
      "grad_norm": 0.10507283359766006,
      "learning_rate": 0.00013995522084266234,
      "loss": 1.0405,
      "step": 1481
    },
    {
      "epoch": 0.602561496239073,
      "grad_norm": 0.09952735900878906,
      "learning_rate": 0.0001399145125178099,
      "loss": 1.0233,
      "step": 1482
    },
    {
      "epoch": 0.6029680829436878,
      "grad_norm": 0.0861600711941719,
      "learning_rate": 0.00013987380419295745,
      "loss": 0.9264,
      "step": 1483
    },
    {
      "epoch": 0.6033746696483026,
      "grad_norm": 0.09560652077198029,
      "learning_rate": 0.00013983309586810503,
      "loss": 0.961,
      "step": 1484
    },
    {
      "epoch": 0.6037812563529172,
      "grad_norm": 0.09961631894111633,
      "learning_rate": 0.0001397923875432526,
      "loss": 1.0687,
      "step": 1485
    },
    {
      "epoch": 0.604187843057532,
      "grad_norm": 0.10031979531049728,
      "learning_rate": 0.0001397516792184002,
      "loss": 1.0088,
      "step": 1486
    },
    {
      "epoch": 0.6045944297621467,
      "grad_norm": 0.09212915599346161,
      "learning_rate": 0.00013971097089354774,
      "loss": 1.0183,
      "step": 1487
    },
    {
      "epoch": 0.6050010164667615,
      "grad_norm": 0.09258651733398438,
      "learning_rate": 0.0001396702625686953,
      "loss": 0.9473,
      "step": 1488
    },
    {
      "epoch": 0.6054076031713763,
      "grad_norm": 0.09315144270658493,
      "learning_rate": 0.00013962955424384288,
      "loss": 1.0049,
      "step": 1489
    },
    {
      "epoch": 0.605814189875991,
      "grad_norm": 0.08820061385631561,
      "learning_rate": 0.00013958884591899043,
      "loss": 0.9485,
      "step": 1490
    },
    {
      "epoch": 0.6062207765806058,
      "grad_norm": 852.4391479492188,
      "learning_rate": 0.00013954813759413801,
      "loss": 0.9986,
      "step": 1491
    },
    {
      "epoch": 0.6066273632852206,
      "grad_norm": 0.0940237045288086,
      "learning_rate": 0.00013950742926928557,
      "loss": 1.0065,
      "step": 1492
    },
    {
      "epoch": 0.6070339499898353,
      "grad_norm": 0.09184816479682922,
      "learning_rate": 0.00013946672094443315,
      "loss": 1.0186,
      "step": 1493
    },
    {
      "epoch": 0.6074405366944501,
      "grad_norm": 0.08479593694210052,
      "learning_rate": 0.0001394260126195807,
      "loss": 0.9213,
      "step": 1494
    },
    {
      "epoch": 0.6078471233990649,
      "grad_norm": 0.10088304430246353,
      "learning_rate": 0.00013938530429472826,
      "loss": 1.1029,
      "step": 1495
    },
    {
      "epoch": 0.6082537101036796,
      "grad_norm": 0.08876685053110123,
      "learning_rate": 0.00013934459596987584,
      "loss": 0.9532,
      "step": 1496
    },
    {
      "epoch": 0.6086602968082944,
      "grad_norm": 0.10209202021360397,
      "learning_rate": 0.00013930388764502342,
      "loss": 1.0292,
      "step": 1497
    },
    {
      "epoch": 0.6090668835129092,
      "grad_norm": 0.09144751727581024,
      "learning_rate": 0.000139263179320171,
      "loss": 0.9214,
      "step": 1498
    },
    {
      "epoch": 0.6094734702175238,
      "grad_norm": 0.08805158734321594,
      "learning_rate": 0.00013922247099531856,
      "loss": 0.8983,
      "step": 1499
    },
    {
      "epoch": 0.6098800569221386,
      "grad_norm": 0.0918235033750534,
      "learning_rate": 0.0001391817626704661,
      "loss": 0.9198,
      "step": 1500
    },
    {
      "epoch": 0.6102866436267534,
      "grad_norm": 0.09051943570375443,
      "learning_rate": 0.0001391410543456137,
      "loss": 0.9885,
      "step": 1501
    },
    {
      "epoch": 0.6106932303313681,
      "grad_norm": 0.09222988784313202,
      "learning_rate": 0.00013910034602076124,
      "loss": 0.8739,
      "step": 1502
    },
    {
      "epoch": 0.6110998170359829,
      "grad_norm": 0.1040385290980339,
      "learning_rate": 0.00013905963769590883,
      "loss": 1.0559,
      "step": 1503
    },
    {
      "epoch": 0.6115064037405977,
      "grad_norm": 0.09393730759620667,
      "learning_rate": 0.00013901892937105638,
      "loss": 1.0138,
      "step": 1504
    },
    {
      "epoch": 0.6119129904452124,
      "grad_norm": 0.09828665107488632,
      "learning_rate": 0.00013897822104620396,
      "loss": 0.99,
      "step": 1505
    },
    {
      "epoch": 0.6123195771498272,
      "grad_norm": 0.08924803137779236,
      "learning_rate": 0.00013893751272135152,
      "loss": 0.9948,
      "step": 1506
    },
    {
      "epoch": 0.612726163854442,
      "grad_norm": 0.09292086958885193,
      "learning_rate": 0.00013889680439649907,
      "loss": 0.9319,
      "step": 1507
    },
    {
      "epoch": 0.6131327505590567,
      "grad_norm": 0.09370770305395126,
      "learning_rate": 0.00013885609607164668,
      "loss": 0.9535,
      "step": 1508
    },
    {
      "epoch": 0.6135393372636715,
      "grad_norm": 0.0799320712685585,
      "learning_rate": 0.00013881538774679423,
      "loss": 0.8244,
      "step": 1509
    },
    {
      "epoch": 0.6139459239682863,
      "grad_norm": 0.0891839936375618,
      "learning_rate": 0.0001387746794219418,
      "loss": 1.0157,
      "step": 1510
    },
    {
      "epoch": 0.614352510672901,
      "grad_norm": 0.09138181805610657,
      "learning_rate": 0.00013873397109708937,
      "loss": 0.9066,
      "step": 1511
    },
    {
      "epoch": 0.6147590973775158,
      "grad_norm": 0.09552167356014252,
      "learning_rate": 0.00013869326277223692,
      "loss": 1.0479,
      "step": 1512
    },
    {
      "epoch": 0.6151656840821305,
      "grad_norm": 0.09162238240242004,
      "learning_rate": 0.0001386525544473845,
      "loss": 1.016,
      "step": 1513
    },
    {
      "epoch": 0.6155722707867453,
      "grad_norm": 0.09654813259840012,
      "learning_rate": 0.00013861184612253206,
      "loss": 0.993,
      "step": 1514
    },
    {
      "epoch": 0.61597885749136,
      "grad_norm": 0.0941232442855835,
      "learning_rate": 0.00013857113779767964,
      "loss": 1.0068,
      "step": 1515
    },
    {
      "epoch": 0.6163854441959747,
      "grad_norm": 0.0947796180844307,
      "learning_rate": 0.0001385304294728272,
      "loss": 1.0618,
      "step": 1516
    },
    {
      "epoch": 0.6167920309005895,
      "grad_norm": 0.08732841163873672,
      "learning_rate": 0.00013848972114797477,
      "loss": 0.9348,
      "step": 1517
    },
    {
      "epoch": 0.6171986176052043,
      "grad_norm": 0.09297166019678116,
      "learning_rate": 0.00013844901282312233,
      "loss": 1.0029,
      "step": 1518
    },
    {
      "epoch": 0.617605204309819,
      "grad_norm": 0.09339512884616852,
      "learning_rate": 0.00013840830449826988,
      "loss": 1.0557,
      "step": 1519
    },
    {
      "epoch": 0.6180117910144338,
      "grad_norm": 0.09277696907520294,
      "learning_rate": 0.0001383675961734175,
      "loss": 0.9322,
      "step": 1520
    },
    {
      "epoch": 0.6184183777190486,
      "grad_norm": 0.08687552809715271,
      "learning_rate": 0.00013832688784856504,
      "loss": 0.9162,
      "step": 1521
    },
    {
      "epoch": 0.6188249644236633,
      "grad_norm": 0.08844698965549469,
      "learning_rate": 0.00013828617952371262,
      "loss": 0.9958,
      "step": 1522
    },
    {
      "epoch": 0.6192315511282781,
      "grad_norm": 0.09178265184164047,
      "learning_rate": 0.00013824547119886018,
      "loss": 0.8926,
      "step": 1523
    },
    {
      "epoch": 0.6196381378328929,
      "grad_norm": 0.09063131362199783,
      "learning_rate": 0.00013820476287400773,
      "loss": 1.0047,
      "step": 1524
    },
    {
      "epoch": 0.6200447245375076,
      "grad_norm": 0.09506388753652573,
      "learning_rate": 0.00013816405454915531,
      "loss": 1.112,
      "step": 1525
    },
    {
      "epoch": 0.6204513112421224,
      "grad_norm": 0.0870959535241127,
      "learning_rate": 0.00013812334622430287,
      "loss": 1.0074,
      "step": 1526
    },
    {
      "epoch": 0.6208578979467372,
      "grad_norm": 0.08569116145372391,
      "learning_rate": 0.00013808263789945045,
      "loss": 0.9702,
      "step": 1527
    },
    {
      "epoch": 0.6212644846513519,
      "grad_norm": 0.09870801120996475,
      "learning_rate": 0.000138041929574598,
      "loss": 1.0475,
      "step": 1528
    },
    {
      "epoch": 0.6216710713559667,
      "grad_norm": 0.09899303317070007,
      "learning_rate": 0.00013800122124974558,
      "loss": 1.0806,
      "step": 1529
    },
    {
      "epoch": 0.6220776580605815,
      "grad_norm": 0.09373268485069275,
      "learning_rate": 0.00013796051292489314,
      "loss": 1.0295,
      "step": 1530
    },
    {
      "epoch": 0.6224842447651961,
      "grad_norm": 0.09074109047651291,
      "learning_rate": 0.00013791980460004072,
      "loss": 0.9462,
      "step": 1531
    },
    {
      "epoch": 0.622890831469811,
      "grad_norm": 0.09384390711784363,
      "learning_rate": 0.0001378790962751883,
      "loss": 1.0606,
      "step": 1532
    },
    {
      "epoch": 0.6232974181744257,
      "grad_norm": 0.0943252295255661,
      "learning_rate": 0.00013783838795033585,
      "loss": 1.1258,
      "step": 1533
    },
    {
      "epoch": 0.6237040048790404,
      "grad_norm": 0.08777976781129837,
      "learning_rate": 0.0001377976796254834,
      "loss": 0.9621,
      "step": 1534
    },
    {
      "epoch": 0.6241105915836552,
      "grad_norm": 0.09006936848163605,
      "learning_rate": 0.000137756971300631,
      "loss": 0.9815,
      "step": 1535
    },
    {
      "epoch": 0.62451717828827,
      "grad_norm": 0.1147993803024292,
      "learning_rate": 0.00013771626297577854,
      "loss": 0.9917,
      "step": 1536
    },
    {
      "epoch": 0.6249237649928847,
      "grad_norm": 0.09408791363239288,
      "learning_rate": 0.00013767555465092613,
      "loss": 1.024,
      "step": 1537
    },
    {
      "epoch": 0.6253303516974995,
      "grad_norm": 0.09530872851610184,
      "learning_rate": 0.00013763484632607368,
      "loss": 1.0339,
      "step": 1538
    },
    {
      "epoch": 0.6257369384021142,
      "grad_norm": 0.09337632358074188,
      "learning_rate": 0.00013759413800122126,
      "loss": 1.031,
      "step": 1539
    },
    {
      "epoch": 0.626143525106729,
      "grad_norm": 0.08535618335008621,
      "learning_rate": 0.00013755342967636881,
      "loss": 0.9597,
      "step": 1540
    },
    {
      "epoch": 0.6265501118113438,
      "grad_norm": 0.09226896613836288,
      "learning_rate": 0.0001375127213515164,
      "loss": 1.0017,
      "step": 1541
    },
    {
      "epoch": 0.6269566985159585,
      "grad_norm": 0.08831244707107544,
      "learning_rate": 0.00013747201302666395,
      "loss": 0.9695,
      "step": 1542
    },
    {
      "epoch": 0.6273632852205733,
      "grad_norm": 0.07351087778806686,
      "learning_rate": 0.00013743130470181153,
      "loss": 0.8212,
      "step": 1543
    },
    {
      "epoch": 0.6277698719251881,
      "grad_norm": 0.09002837538719177,
      "learning_rate": 0.0001373905963769591,
      "loss": 0.9882,
      "step": 1544
    },
    {
      "epoch": 0.6281764586298028,
      "grad_norm": 0.09743615984916687,
      "learning_rate": 0.00013734988805210667,
      "loss": 1.0246,
      "step": 1545
    },
    {
      "epoch": 0.6285830453344176,
      "grad_norm": 0.09634383767843246,
      "learning_rate": 0.00013730917972725422,
      "loss": 1.0452,
      "step": 1546
    },
    {
      "epoch": 0.6289896320390324,
      "grad_norm": 0.09213767200708389,
      "learning_rate": 0.0001372684714024018,
      "loss": 1.0618,
      "step": 1547
    },
    {
      "epoch": 0.629396218743647,
      "grad_norm": 0.08717525005340576,
      "learning_rate": 0.00013722776307754936,
      "loss": 0.9834,
      "step": 1548
    },
    {
      "epoch": 0.6298028054482618,
      "grad_norm": 0.08541104942560196,
      "learning_rate": 0.00013718705475269694,
      "loss": 0.9332,
      "step": 1549
    },
    {
      "epoch": 0.6302093921528766,
      "grad_norm": 0.09747796505689621,
      "learning_rate": 0.0001371463464278445,
      "loss": 1.0459,
      "step": 1550
    },
    {
      "epoch": 0.6306159788574913,
      "grad_norm": 0.09076548367738724,
      "learning_rate": 0.00013710563810299207,
      "loss": 0.9951,
      "step": 1551
    },
    {
      "epoch": 0.6310225655621061,
      "grad_norm": 0.08712035417556763,
      "learning_rate": 0.00013706492977813963,
      "loss": 0.9876,
      "step": 1552
    },
    {
      "epoch": 0.6314291522667209,
      "grad_norm": 0.09062602370977402,
      "learning_rate": 0.00013702422145328718,
      "loss": 0.9246,
      "step": 1553
    },
    {
      "epoch": 0.6318357389713356,
      "grad_norm": 0.0910324677824974,
      "learning_rate": 0.0001369835131284348,
      "loss": 0.884,
      "step": 1554
    },
    {
      "epoch": 0.6322423256759504,
      "grad_norm": 0.09255006164312363,
      "learning_rate": 0.00013694280480358234,
      "loss": 1.0172,
      "step": 1555
    },
    {
      "epoch": 0.6326489123805652,
      "grad_norm": 0.0950237512588501,
      "learning_rate": 0.00013690209647872992,
      "loss": 1.0263,
      "step": 1556
    },
    {
      "epoch": 0.6330554990851799,
      "grad_norm": 0.09103222191333771,
      "learning_rate": 0.00013686138815387748,
      "loss": 1.0245,
      "step": 1557
    },
    {
      "epoch": 0.6334620857897947,
      "grad_norm": 0.09043283015489578,
      "learning_rate": 0.00013682067982902503,
      "loss": 1.0197,
      "step": 1558
    },
    {
      "epoch": 0.6338686724944095,
      "grad_norm": 0.08311565965414047,
      "learning_rate": 0.0001367799715041726,
      "loss": 0.9372,
      "step": 1559
    },
    {
      "epoch": 0.6342752591990242,
      "grad_norm": 0.09156910330057144,
      "learning_rate": 0.00013673926317932017,
      "loss": 1.0579,
      "step": 1560
    },
    {
      "epoch": 0.634681845903639,
      "grad_norm": 0.08262625336647034,
      "learning_rate": 0.00013669855485446775,
      "loss": 0.9047,
      "step": 1561
    },
    {
      "epoch": 0.6350884326082538,
      "grad_norm": 0.09856829047203064,
      "learning_rate": 0.0001366578465296153,
      "loss": 1.0933,
      "step": 1562
    },
    {
      "epoch": 0.6354950193128684,
      "grad_norm": 0.09453229606151581,
      "learning_rate": 0.00013661713820476288,
      "loss": 1.0361,
      "step": 1563
    },
    {
      "epoch": 0.6359016060174832,
      "grad_norm": 0.09291166812181473,
      "learning_rate": 0.00013657642987991044,
      "loss": 0.9099,
      "step": 1564
    },
    {
      "epoch": 0.636308192722098,
      "grad_norm": 0.09416390210390091,
      "learning_rate": 0.000136535721555058,
      "loss": 0.9919,
      "step": 1565
    },
    {
      "epoch": 0.6367147794267127,
      "grad_norm": 0.08964714407920837,
      "learning_rate": 0.0001364950132302056,
      "loss": 1.0352,
      "step": 1566
    },
    {
      "epoch": 0.6371213661313275,
      "grad_norm": 0.1002277210354805,
      "learning_rate": 0.00013645430490535315,
      "loss": 1.0121,
      "step": 1567
    },
    {
      "epoch": 0.6375279528359422,
      "grad_norm": 0.09013176709413528,
      "learning_rate": 0.00013641359658050074,
      "loss": 0.943,
      "step": 1568
    },
    {
      "epoch": 0.637934539540557,
      "grad_norm": 0.09195754677057266,
      "learning_rate": 0.0001363728882556483,
      "loss": 1.0101,
      "step": 1569
    },
    {
      "epoch": 0.6383411262451718,
      "grad_norm": 0.09277264773845673,
      "learning_rate": 0.00013633217993079584,
      "loss": 1.0411,
      "step": 1570
    },
    {
      "epoch": 0.6387477129497865,
      "grad_norm": 0.09677015990018845,
      "learning_rate": 0.00013629147160594342,
      "loss": 1.0047,
      "step": 1571
    },
    {
      "epoch": 0.6391542996544013,
      "grad_norm": 0.09898823499679565,
      "learning_rate": 0.00013625076328109098,
      "loss": 1.0897,
      "step": 1572
    },
    {
      "epoch": 0.6395608863590161,
      "grad_norm": 0.09134434908628464,
      "learning_rate": 0.00013621005495623856,
      "loss": 1.0471,
      "step": 1573
    },
    {
      "epoch": 0.6399674730636308,
      "grad_norm": 0.09015446901321411,
      "learning_rate": 0.00013616934663138611,
      "loss": 0.9521,
      "step": 1574
    },
    {
      "epoch": 0.6403740597682456,
      "grad_norm": 0.09361066669225693,
      "learning_rate": 0.0001361286383065337,
      "loss": 1.0378,
      "step": 1575
    },
    {
      "epoch": 0.6407806464728604,
      "grad_norm": 0.10741425305604935,
      "learning_rate": 0.00013608792998168125,
      "loss": 1.0042,
      "step": 1576
    },
    {
      "epoch": 0.6411872331774751,
      "grad_norm": 0.09339326620101929,
      "learning_rate": 0.00013604722165682883,
      "loss": 0.9641,
      "step": 1577
    },
    {
      "epoch": 0.6415938198820899,
      "grad_norm": 0.09786434471607208,
      "learning_rate": 0.0001360065133319764,
      "loss": 0.9643,
      "step": 1578
    },
    {
      "epoch": 0.6420004065867047,
      "grad_norm": 0.08545216172933578,
      "learning_rate": 0.00013596580500712397,
      "loss": 0.9413,
      "step": 1579
    },
    {
      "epoch": 0.6424069932913193,
      "grad_norm": 0.09042125940322876,
      "learning_rate": 0.00013592509668227155,
      "loss": 0.9105,
      "step": 1580
    },
    {
      "epoch": 0.6428135799959341,
      "grad_norm": 0.08778928220272064,
      "learning_rate": 0.0001358843883574191,
      "loss": 1.0262,
      "step": 1581
    },
    {
      "epoch": 0.6432201667005489,
      "grad_norm": 0.08905961364507675,
      "learning_rate": 0.00013584368003256666,
      "loss": 1.0317,
      "step": 1582
    },
    {
      "epoch": 0.6436267534051636,
      "grad_norm": 0.09242242574691772,
      "learning_rate": 0.00013580297170771424,
      "loss": 0.9415,
      "step": 1583
    },
    {
      "epoch": 0.6440333401097784,
      "grad_norm": 0.08425027132034302,
      "learning_rate": 0.0001357622633828618,
      "loss": 0.8964,
      "step": 1584
    },
    {
      "epoch": 0.6444399268143932,
      "grad_norm": 0.0858960896730423,
      "learning_rate": 0.00013572155505800937,
      "loss": 0.9441,
      "step": 1585
    },
    {
      "epoch": 0.6448465135190079,
      "grad_norm": 0.09374553710222244,
      "learning_rate": 0.00013568084673315693,
      "loss": 0.9784,
      "step": 1586
    },
    {
      "epoch": 0.6452531002236227,
      "grad_norm": 0.09684876352548599,
      "learning_rate": 0.0001356401384083045,
      "loss": 0.9867,
      "step": 1587
    },
    {
      "epoch": 0.6456596869282375,
      "grad_norm": 0.0853944793343544,
      "learning_rate": 0.00013559943008345206,
      "loss": 0.9136,
      "step": 1588
    },
    {
      "epoch": 0.6460662736328522,
      "grad_norm": 0.0905388742685318,
      "learning_rate": 0.00013555872175859964,
      "loss": 0.9335,
      "step": 1589
    },
    {
      "epoch": 0.646472860337467,
      "grad_norm": 0.08938907831907272,
      "learning_rate": 0.00013551801343374722,
      "loss": 0.9889,
      "step": 1590
    },
    {
      "epoch": 0.6468794470420818,
      "grad_norm": 0.08857300132513046,
      "learning_rate": 0.00013547730510889478,
      "loss": 0.9986,
      "step": 1591
    },
    {
      "epoch": 0.6472860337466965,
      "grad_norm": 0.09151600301265717,
      "learning_rate": 0.00013543659678404236,
      "loss": 1.0025,
      "step": 1592
    },
    {
      "epoch": 0.6476926204513113,
      "grad_norm": 0.08548744767904282,
      "learning_rate": 0.0001353958884591899,
      "loss": 1.012,
      "step": 1593
    },
    {
      "epoch": 0.648099207155926,
      "grad_norm": 0.08982311189174652,
      "learning_rate": 0.00013535518013433747,
      "loss": 0.9827,
      "step": 1594
    },
    {
      "epoch": 0.6485057938605407,
      "grad_norm": 0.09153248369693756,
      "learning_rate": 0.00013531447180948505,
      "loss": 0.9818,
      "step": 1595
    },
    {
      "epoch": 0.6489123805651555,
      "grad_norm": 0.1022023931145668,
      "learning_rate": 0.0001352737634846326,
      "loss": 1.054,
      "step": 1596
    },
    {
      "epoch": 0.6493189672697702,
      "grad_norm": 0.09080366045236588,
      "learning_rate": 0.00013523305515978018,
      "loss": 0.9667,
      "step": 1597
    },
    {
      "epoch": 0.649725553974385,
      "grad_norm": 0.0935145765542984,
      "learning_rate": 0.00013519234683492774,
      "loss": 0.9668,
      "step": 1598
    },
    {
      "epoch": 0.6501321406789998,
      "grad_norm": 0.09892317652702332,
      "learning_rate": 0.00013515163851007532,
      "loss": 1.012,
      "step": 1599
    },
    {
      "epoch": 0.6505387273836145,
      "grad_norm": 0.09385450929403305,
      "learning_rate": 0.0001351109301852229,
      "loss": 1.1,
      "step": 1600
    },
    {
      "epoch": 0.6509453140882293,
      "grad_norm": 0.09270552545785904,
      "learning_rate": 0.00013507022186037045,
      "loss": 0.9509,
      "step": 1601
    },
    {
      "epoch": 0.6513519007928441,
      "grad_norm": 0.09725828468799591,
      "learning_rate": 0.00013502951353551804,
      "loss": 1.0435,
      "step": 1602
    },
    {
      "epoch": 0.6517584874974588,
      "grad_norm": 0.096989206969738,
      "learning_rate": 0.0001349888052106656,
      "loss": 1.0152,
      "step": 1603
    },
    {
      "epoch": 0.6521650742020736,
      "grad_norm": 0.09739220887422562,
      "learning_rate": 0.00013494809688581317,
      "loss": 0.9834,
      "step": 1604
    },
    {
      "epoch": 0.6525716609066884,
      "grad_norm": 0.07972859591245651,
      "learning_rate": 0.00013490738856096072,
      "loss": 0.8542,
      "step": 1605
    },
    {
      "epoch": 0.6529782476113031,
      "grad_norm": 0.09360089153051376,
      "learning_rate": 0.00013486668023610828,
      "loss": 1.0077,
      "step": 1606
    },
    {
      "epoch": 0.6533848343159179,
      "grad_norm": 0.08999258279800415,
      "learning_rate": 0.00013482597191125586,
      "loss": 0.8802,
      "step": 1607
    },
    {
      "epoch": 0.6537914210205327,
      "grad_norm": 0.0885370746254921,
      "learning_rate": 0.00013478526358640341,
      "loss": 0.9867,
      "step": 1608
    },
    {
      "epoch": 0.6541980077251474,
      "grad_norm": 0.0924537256360054,
      "learning_rate": 0.000134744555261551,
      "loss": 0.9653,
      "step": 1609
    },
    {
      "epoch": 0.6546045944297622,
      "grad_norm": 0.08841130137443542,
      "learning_rate": 0.00013470384693669855,
      "loss": 0.9005,
      "step": 1610
    },
    {
      "epoch": 0.655011181134377,
      "grad_norm": 0.0968664139509201,
      "learning_rate": 0.00013466313861184613,
      "loss": 1.1191,
      "step": 1611
    },
    {
      "epoch": 0.6554177678389916,
      "grad_norm": 0.0909125879406929,
      "learning_rate": 0.0001346224302869937,
      "loss": 1.0247,
      "step": 1612
    },
    {
      "epoch": 0.6558243545436064,
      "grad_norm": 0.1032382994890213,
      "learning_rate": 0.00013458172196214127,
      "loss": 1.021,
      "step": 1613
    },
    {
      "epoch": 0.6562309412482212,
      "grad_norm": 0.08680799603462219,
      "learning_rate": 0.00013454101363728885,
      "loss": 0.9425,
      "step": 1614
    },
    {
      "epoch": 0.6566375279528359,
      "grad_norm": 0.08841447532176971,
      "learning_rate": 0.0001345003053124364,
      "loss": 0.9105,
      "step": 1615
    },
    {
      "epoch": 0.6570441146574507,
      "grad_norm": 0.09229273349046707,
      "learning_rate": 0.00013445959698758398,
      "loss": 0.9492,
      "step": 1616
    },
    {
      "epoch": 0.6574507013620655,
      "grad_norm": 0.09328685700893402,
      "learning_rate": 0.00013441888866273154,
      "loss": 1.0456,
      "step": 1617
    },
    {
      "epoch": 0.6578572880666802,
      "grad_norm": 0.08448266983032227,
      "learning_rate": 0.0001343781803378791,
      "loss": 0.9209,
      "step": 1618
    },
    {
      "epoch": 0.658263874771295,
      "grad_norm": 0.09344170242547989,
      "learning_rate": 0.00013433747201302667,
      "loss": 1.0107,
      "step": 1619
    },
    {
      "epoch": 0.6586704614759097,
      "grad_norm": 0.08675231039524078,
      "learning_rate": 0.00013429676368817423,
      "loss": 0.989,
      "step": 1620
    },
    {
      "epoch": 0.6590770481805245,
      "grad_norm": 0.09648977965116501,
      "learning_rate": 0.0001342560553633218,
      "loss": 1.079,
      "step": 1621
    },
    {
      "epoch": 0.6594836348851393,
      "grad_norm": 0.08079522848129272,
      "learning_rate": 0.00013421534703846936,
      "loss": 0.862,
      "step": 1622
    },
    {
      "epoch": 0.659890221589754,
      "grad_norm": 0.1015796810388565,
      "learning_rate": 0.00013417463871361694,
      "loss": 1.136,
      "step": 1623
    },
    {
      "epoch": 0.6602968082943688,
      "grad_norm": 0.08189254999160767,
      "learning_rate": 0.00013413393038876452,
      "loss": 0.9161,
      "step": 1624
    },
    {
      "epoch": 0.6607033949989836,
      "grad_norm": 0.09128617495298386,
      "learning_rate": 0.00013409322206391208,
      "loss": 0.9605,
      "step": 1625
    },
    {
      "epoch": 0.6611099817035982,
      "grad_norm": 0.09256181865930557,
      "learning_rate": 0.00013405251373905966,
      "loss": 0.9844,
      "step": 1626
    },
    {
      "epoch": 0.661516568408213,
      "grad_norm": 0.092183917760849,
      "learning_rate": 0.0001340118054142072,
      "loss": 1.0694,
      "step": 1627
    },
    {
      "epoch": 0.6619231551128278,
      "grad_norm": 0.10037260502576828,
      "learning_rate": 0.00013397109708935477,
      "loss": 1.1395,
      "step": 1628
    },
    {
      "epoch": 0.6623297418174425,
      "grad_norm": 0.08758927881717682,
      "learning_rate": 0.00013393038876450235,
      "loss": 0.9494,
      "step": 1629
    },
    {
      "epoch": 0.6627363285220573,
      "grad_norm": 0.08407801389694214,
      "learning_rate": 0.0001338896804396499,
      "loss": 0.8953,
      "step": 1630
    },
    {
      "epoch": 0.6631429152266721,
      "grad_norm": 0.10363683849573135,
      "learning_rate": 0.00013384897211479748,
      "loss": 1.0613,
      "step": 1631
    },
    {
      "epoch": 0.6635495019312868,
      "grad_norm": 0.0939316600561142,
      "learning_rate": 0.00013380826378994504,
      "loss": 0.9668,
      "step": 1632
    },
    {
      "epoch": 0.6639560886359016,
      "grad_norm": 0.097317174077034,
      "learning_rate": 0.00013376755546509262,
      "loss": 1.1024,
      "step": 1633
    },
    {
      "epoch": 0.6643626753405164,
      "grad_norm": 0.10394629091024399,
      "learning_rate": 0.00013372684714024017,
      "loss": 1.0706,
      "step": 1634
    },
    {
      "epoch": 0.6647692620451311,
      "grad_norm": 0.09405668824911118,
      "learning_rate": 0.00013368613881538775,
      "loss": 1.0496,
      "step": 1635
    },
    {
      "epoch": 0.6651758487497459,
      "grad_norm": 0.08976142853498459,
      "learning_rate": 0.00013364543049053533,
      "loss": 0.9811,
      "step": 1636
    },
    {
      "epoch": 0.6655824354543607,
      "grad_norm": 0.09220533818006516,
      "learning_rate": 0.0001336047221656829,
      "loss": 1.0655,
      "step": 1637
    },
    {
      "epoch": 0.6659890221589754,
      "grad_norm": 0.09313860535621643,
      "learning_rate": 0.00013356401384083047,
      "loss": 0.9664,
      "step": 1638
    },
    {
      "epoch": 0.6663956088635902,
      "grad_norm": 0.08653722703456879,
      "learning_rate": 0.00013352330551597802,
      "loss": 0.91,
      "step": 1639
    },
    {
      "epoch": 0.666802195568205,
      "grad_norm": 0.09094205498695374,
      "learning_rate": 0.00013348259719112558,
      "loss": 1.0011,
      "step": 1640
    },
    {
      "epoch": 0.6672087822728197,
      "grad_norm": 0.09969717264175415,
      "learning_rate": 0.00013344188886627316,
      "loss": 1.0853,
      "step": 1641
    },
    {
      "epoch": 0.6676153689774345,
      "grad_norm": 0.08996472507715225,
      "learning_rate": 0.0001334011805414207,
      "loss": 0.9693,
      "step": 1642
    },
    {
      "epoch": 0.6680219556820493,
      "grad_norm": 0.08930208534002304,
      "learning_rate": 0.0001333604722165683,
      "loss": 1.0078,
      "step": 1643
    },
    {
      "epoch": 0.6684285423866639,
      "grad_norm": 0.09799496084451675,
      "learning_rate": 0.00013331976389171585,
      "loss": 1.0764,
      "step": 1644
    },
    {
      "epoch": 0.6688351290912787,
      "grad_norm": 0.1000712588429451,
      "learning_rate": 0.00013327905556686343,
      "loss": 1.0769,
      "step": 1645
    },
    {
      "epoch": 0.6692417157958934,
      "grad_norm": 0.09583432227373123,
      "learning_rate": 0.000133238347242011,
      "loss": 1.0311,
      "step": 1646
    },
    {
      "epoch": 0.6696483025005082,
      "grad_norm": 0.10381270945072174,
      "learning_rate": 0.00013319763891715857,
      "loss": 1.0879,
      "step": 1647
    },
    {
      "epoch": 0.670054889205123,
      "grad_norm": 0.09310910850763321,
      "learning_rate": 0.00013315693059230615,
      "loss": 0.9875,
      "step": 1648
    },
    {
      "epoch": 0.6704614759097377,
      "grad_norm": 0.09691096842288971,
      "learning_rate": 0.0001331162222674537,
      "loss": 1.001,
      "step": 1649
    },
    {
      "epoch": 0.6708680626143525,
      "grad_norm": 0.08782976865768433,
      "learning_rate": 0.00013307551394260128,
      "loss": 1.0192,
      "step": 1650
    },
    {
      "epoch": 0.6712746493189673,
      "grad_norm": 0.07851552218198776,
      "learning_rate": 0.00013303480561774884,
      "loss": 0.8345,
      "step": 1651
    },
    {
      "epoch": 0.671681236023582,
      "grad_norm": 0.09602700173854828,
      "learning_rate": 0.0001329940972928964,
      "loss": 0.9761,
      "step": 1652
    },
    {
      "epoch": 0.6720878227281968,
      "grad_norm": 0.09454475343227386,
      "learning_rate": 0.00013295338896804397,
      "loss": 0.9775,
      "step": 1653
    },
    {
      "epoch": 0.6724944094328116,
      "grad_norm": 0.09530249238014221,
      "learning_rate": 0.00013291268064319153,
      "loss": 0.9155,
      "step": 1654
    },
    {
      "epoch": 0.6729009961374263,
      "grad_norm": 0.09022442251443863,
      "learning_rate": 0.0001328719723183391,
      "loss": 0.9651,
      "step": 1655
    },
    {
      "epoch": 0.6733075828420411,
      "grad_norm": 0.09096933156251907,
      "learning_rate": 0.00013283126399348666,
      "loss": 1.003,
      "step": 1656
    },
    {
      "epoch": 0.6737141695466559,
      "grad_norm": 0.09274188429117203,
      "learning_rate": 0.00013279055566863424,
      "loss": 1.0024,
      "step": 1657
    },
    {
      "epoch": 0.6741207562512705,
      "grad_norm": 0.09318679571151733,
      "learning_rate": 0.00013274984734378182,
      "loss": 0.9613,
      "step": 1658
    },
    {
      "epoch": 0.6745273429558853,
      "grad_norm": 0.1088038757443428,
      "learning_rate": 0.00013270913901892938,
      "loss": 0.9718,
      "step": 1659
    },
    {
      "epoch": 0.6749339296605001,
      "grad_norm": 0.08833767473697662,
      "learning_rate": 0.00013266843069407696,
      "loss": 0.8893,
      "step": 1660
    },
    {
      "epoch": 0.6753405163651148,
      "grad_norm": 0.09868477284908295,
      "learning_rate": 0.0001326277223692245,
      "loss": 1.0233,
      "step": 1661
    },
    {
      "epoch": 0.6757471030697296,
      "grad_norm": 0.09289266169071198,
      "learning_rate": 0.0001325870140443721,
      "loss": 0.8636,
      "step": 1662
    },
    {
      "epoch": 0.6761536897743444,
      "grad_norm": 0.08200156688690186,
      "learning_rate": 0.00013254630571951965,
      "loss": 0.8317,
      "step": 1663
    },
    {
      "epoch": 0.6765602764789591,
      "grad_norm": 0.09031883627176285,
      "learning_rate": 0.0001325055973946672,
      "loss": 0.9759,
      "step": 1664
    },
    {
      "epoch": 0.6769668631835739,
      "grad_norm": 0.09911596029996872,
      "learning_rate": 0.00013246488906981478,
      "loss": 1.1484,
      "step": 1665
    },
    {
      "epoch": 0.6773734498881887,
      "grad_norm": 0.09470785409212112,
      "learning_rate": 0.00013242418074496234,
      "loss": 1.0261,
      "step": 1666
    },
    {
      "epoch": 0.6777800365928034,
      "grad_norm": 0.09936736524105072,
      "learning_rate": 0.00013238347242010992,
      "loss": 0.9697,
      "step": 1667
    },
    {
      "epoch": 0.6781866232974182,
      "grad_norm": 0.08819877356290817,
      "learning_rate": 0.00013234276409525747,
      "loss": 1.0686,
      "step": 1668
    },
    {
      "epoch": 0.678593210002033,
      "grad_norm": 0.0861021876335144,
      "learning_rate": 0.00013230205577040505,
      "loss": 0.9567,
      "step": 1669
    },
    {
      "epoch": 0.6789997967066477,
      "grad_norm": 0.092157743871212,
      "learning_rate": 0.00013226134744555263,
      "loss": 1.0348,
      "step": 1670
    },
    {
      "epoch": 0.6794063834112625,
      "grad_norm": 0.08593881130218506,
      "learning_rate": 0.0001322206391207002,
      "loss": 0.9318,
      "step": 1671
    },
    {
      "epoch": 0.6798129701158772,
      "grad_norm": 0.09625545144081116,
      "learning_rate": 0.00013217993079584777,
      "loss": 0.9666,
      "step": 1672
    },
    {
      "epoch": 0.680219556820492,
      "grad_norm": 0.09877568483352661,
      "learning_rate": 0.00013213922247099532,
      "loss": 0.8862,
      "step": 1673
    },
    {
      "epoch": 0.6806261435251068,
      "grad_norm": 0.09340859204530716,
      "learning_rate": 0.0001320985141461429,
      "loss": 1.0305,
      "step": 1674
    },
    {
      "epoch": 0.6810327302297214,
      "grad_norm": 0.08883026987314224,
      "learning_rate": 0.00013205780582129046,
      "loss": 0.9499,
      "step": 1675
    },
    {
      "epoch": 0.6814393169343362,
      "grad_norm": 0.09625538438558578,
      "learning_rate": 0.000132017097496438,
      "loss": 1.0381,
      "step": 1676
    },
    {
      "epoch": 0.681845903638951,
      "grad_norm": 0.0917878970503807,
      "learning_rate": 0.0001319763891715856,
      "loss": 0.8924,
      "step": 1677
    },
    {
      "epoch": 0.6822524903435657,
      "grad_norm": 0.08996240794658661,
      "learning_rate": 0.00013193568084673315,
      "loss": 0.936,
      "step": 1678
    },
    {
      "epoch": 0.6826590770481805,
      "grad_norm": 0.09168268740177155,
      "learning_rate": 0.00013189497252188073,
      "loss": 0.9608,
      "step": 1679
    },
    {
      "epoch": 0.6830656637527953,
      "grad_norm": 0.09493600577116013,
      "learning_rate": 0.00013185426419702828,
      "loss": 1.0394,
      "step": 1680
    },
    {
      "epoch": 0.68347225045741,
      "grad_norm": 0.094533272087574,
      "learning_rate": 0.00013181355587217586,
      "loss": 0.9437,
      "step": 1681
    },
    {
      "epoch": 0.6838788371620248,
      "grad_norm": 0.09590426087379456,
      "learning_rate": 0.00013177284754732345,
      "loss": 1.0504,
      "step": 1682
    },
    {
      "epoch": 0.6842854238666396,
      "grad_norm": 0.1008445993065834,
      "learning_rate": 0.000131732139222471,
      "loss": 0.9966,
      "step": 1683
    },
    {
      "epoch": 0.6846920105712543,
      "grad_norm": 0.09178382903337479,
      "learning_rate": 0.00013169143089761858,
      "loss": 1.0171,
      "step": 1684
    },
    {
      "epoch": 0.6850985972758691,
      "grad_norm": 0.09064016491174698,
      "learning_rate": 0.00013165072257276614,
      "loss": 1.0259,
      "step": 1685
    },
    {
      "epoch": 0.6855051839804839,
      "grad_norm": 0.09577952325344086,
      "learning_rate": 0.00013161001424791372,
      "loss": 0.9391,
      "step": 1686
    },
    {
      "epoch": 0.6859117706850986,
      "grad_norm": 0.08866085112094879,
      "learning_rate": 0.00013156930592306127,
      "loss": 1.0251,
      "step": 1687
    },
    {
      "epoch": 0.6863183573897134,
      "grad_norm": 0.09070689976215363,
      "learning_rate": 0.00013152859759820882,
      "loss": 0.9223,
      "step": 1688
    },
    {
      "epoch": 0.6867249440943282,
      "grad_norm": 0.08675026893615723,
      "learning_rate": 0.0001314878892733564,
      "loss": 0.94,
      "step": 1689
    },
    {
      "epoch": 0.6871315307989428,
      "grad_norm": 0.08852765709161758,
      "learning_rate": 0.00013144718094850396,
      "loss": 0.9587,
      "step": 1690
    },
    {
      "epoch": 0.6875381175035576,
      "grad_norm": 0.09738162159919739,
      "learning_rate": 0.00013140647262365154,
      "loss": 1.0469,
      "step": 1691
    },
    {
      "epoch": 0.6879447042081724,
      "grad_norm": 0.09765305370092392,
      "learning_rate": 0.00013136576429879912,
      "loss": 1.0384,
      "step": 1692
    },
    {
      "epoch": 0.6883512909127871,
      "grad_norm": 0.09691577404737473,
      "learning_rate": 0.00013132505597394668,
      "loss": 1.1035,
      "step": 1693
    },
    {
      "epoch": 0.6887578776174019,
      "grad_norm": 0.09987527132034302,
      "learning_rate": 0.00013128434764909426,
      "loss": 1.0447,
      "step": 1694
    },
    {
      "epoch": 0.6891644643220167,
      "grad_norm": 0.09481899440288544,
      "learning_rate": 0.0001312436393242418,
      "loss": 1.0686,
      "step": 1695
    },
    {
      "epoch": 0.6895710510266314,
      "grad_norm": 0.08769707381725311,
      "learning_rate": 0.0001312029309993894,
      "loss": 0.9485,
      "step": 1696
    },
    {
      "epoch": 0.6899776377312462,
      "grad_norm": 0.08787425607442856,
      "learning_rate": 0.00013116222267453695,
      "loss": 0.9945,
      "step": 1697
    },
    {
      "epoch": 0.6903842244358609,
      "grad_norm": 0.09898071736097336,
      "learning_rate": 0.00013112151434968453,
      "loss": 1.0373,
      "step": 1698
    },
    {
      "epoch": 0.6907908111404757,
      "grad_norm": 0.09396618604660034,
      "learning_rate": 0.00013108080602483208,
      "loss": 1.0555,
      "step": 1699
    },
    {
      "epoch": 0.6911973978450905,
      "grad_norm": 0.09377385675907135,
      "learning_rate": 0.00013104009769997964,
      "loss": 0.9912,
      "step": 1700
    },
    {
      "epoch": 0.6916039845497052,
      "grad_norm": 0.09066810458898544,
      "learning_rate": 0.00013099938937512722,
      "loss": 1.0106,
      "step": 1701
    },
    {
      "epoch": 0.69201057125432,
      "grad_norm": 0.10170560330152512,
      "learning_rate": 0.00013095868105027477,
      "loss": 1.1167,
      "step": 1702
    },
    {
      "epoch": 0.6924171579589348,
      "grad_norm": 0.10096985846757889,
      "learning_rate": 0.00013091797272542235,
      "loss": 1.1092,
      "step": 1703
    },
    {
      "epoch": 0.6928237446635495,
      "grad_norm": 0.08942307531833649,
      "learning_rate": 0.00013087726440056993,
      "loss": 0.8489,
      "step": 1704
    },
    {
      "epoch": 0.6932303313681643,
      "grad_norm": 0.0931686982512474,
      "learning_rate": 0.0001308365560757175,
      "loss": 1.0615,
      "step": 1705
    },
    {
      "epoch": 0.693636918072779,
      "grad_norm": 0.08369520306587219,
      "learning_rate": 0.00013079584775086507,
      "loss": 0.9376,
      "step": 1706
    },
    {
      "epoch": 0.6940435047773937,
      "grad_norm": 0.09754310548305511,
      "learning_rate": 0.00013075513942601262,
      "loss": 1.076,
      "step": 1707
    },
    {
      "epoch": 0.6944500914820085,
      "grad_norm": 0.09425446391105652,
      "learning_rate": 0.0001307144311011602,
      "loss": 1.0354,
      "step": 1708
    },
    {
      "epoch": 0.6948566781866233,
      "grad_norm": 0.08762680739164352,
      "learning_rate": 0.00013067372277630776,
      "loss": 0.892,
      "step": 1709
    },
    {
      "epoch": 0.695263264891238,
      "grad_norm": 0.08966252207756042,
      "learning_rate": 0.00013063301445145534,
      "loss": 0.9067,
      "step": 1710
    },
    {
      "epoch": 0.6956698515958528,
      "grad_norm": 0.08628804236650467,
      "learning_rate": 0.0001305923061266029,
      "loss": 0.8314,
      "step": 1711
    },
    {
      "epoch": 0.6960764383004676,
      "grad_norm": 0.0932592824101448,
      "learning_rate": 0.00013055159780175045,
      "loss": 0.9557,
      "step": 1712
    },
    {
      "epoch": 0.6964830250050823,
      "grad_norm": 0.0861787497997284,
      "learning_rate": 0.00013051088947689803,
      "loss": 1.0075,
      "step": 1713
    },
    {
      "epoch": 0.6968896117096971,
      "grad_norm": 0.08896369487047195,
      "learning_rate": 0.00013047018115204558,
      "loss": 0.9439,
      "step": 1714
    },
    {
      "epoch": 0.6972961984143119,
      "grad_norm": 0.09481415897607803,
      "learning_rate": 0.00013042947282719316,
      "loss": 1.0008,
      "step": 1715
    },
    {
      "epoch": 0.6977027851189266,
      "grad_norm": 0.09036390483379364,
      "learning_rate": 0.00013038876450234075,
      "loss": 1.0723,
      "step": 1716
    },
    {
      "epoch": 0.6981093718235414,
      "grad_norm": 0.09333796054124832,
      "learning_rate": 0.0001303480561774883,
      "loss": 0.9998,
      "step": 1717
    },
    {
      "epoch": 0.6985159585281562,
      "grad_norm": 0.09343329071998596,
      "learning_rate": 0.00013030734785263588,
      "loss": 1.069,
      "step": 1718
    },
    {
      "epoch": 0.6989225452327709,
      "grad_norm": 0.10213945806026459,
      "learning_rate": 0.00013026663952778343,
      "loss": 1.1121,
      "step": 1719
    },
    {
      "epoch": 0.6993291319373857,
      "grad_norm": 0.08944682031869888,
      "learning_rate": 0.00013022593120293102,
      "loss": 1.0139,
      "step": 1720
    },
    {
      "epoch": 0.6997357186420005,
      "grad_norm": 0.09763380140066147,
      "learning_rate": 0.00013018522287807857,
      "loss": 1.1057,
      "step": 1721
    },
    {
      "epoch": 0.7001423053466151,
      "grad_norm": 0.08643307536840439,
      "learning_rate": 0.00013014451455322615,
      "loss": 0.825,
      "step": 1722
    },
    {
      "epoch": 0.7005488920512299,
      "grad_norm": 0.0778571143746376,
      "learning_rate": 0.0001301038062283737,
      "loss": 0.8161,
      "step": 1723
    },
    {
      "epoch": 0.7009554787558446,
      "grad_norm": 0.08897890895605087,
      "learning_rate": 0.00013006309790352126,
      "loss": 0.9659,
      "step": 1724
    },
    {
      "epoch": 0.7013620654604594,
      "grad_norm": 0.08511462807655334,
      "learning_rate": 0.00013002238957866884,
      "loss": 0.8577,
      "step": 1725
    },
    {
      "epoch": 0.7017686521650742,
      "grad_norm": 0.09079938381910324,
      "learning_rate": 0.0001299816812538164,
      "loss": 1.0091,
      "step": 1726
    },
    {
      "epoch": 0.7021752388696889,
      "grad_norm": 0.08795303851366043,
      "learning_rate": 0.00012994097292896398,
      "loss": 0.9966,
      "step": 1727
    },
    {
      "epoch": 0.7025818255743037,
      "grad_norm": 0.0925462394952774,
      "learning_rate": 0.00012990026460411156,
      "loss": 1.0207,
      "step": 1728
    },
    {
      "epoch": 0.7029884122789185,
      "grad_norm": 0.0894242599606514,
      "learning_rate": 0.0001298595562792591,
      "loss": 0.9207,
      "step": 1729
    },
    {
      "epoch": 0.7033949989835332,
      "grad_norm": 0.09216928482055664,
      "learning_rate": 0.0001298188479544067,
      "loss": 0.9725,
      "step": 1730
    },
    {
      "epoch": 0.703801585688148,
      "grad_norm": 0.09627533704042435,
      "learning_rate": 0.00012977813962955425,
      "loss": 0.998,
      "step": 1731
    },
    {
      "epoch": 0.7042081723927628,
      "grad_norm": 0.0950872004032135,
      "learning_rate": 0.00012973743130470183,
      "loss": 1.0275,
      "step": 1732
    },
    {
      "epoch": 0.7046147590973775,
      "grad_norm": 0.09819149225950241,
      "learning_rate": 0.00012969672297984938,
      "loss": 1.0179,
      "step": 1733
    },
    {
      "epoch": 0.7050213458019923,
      "grad_norm": 0.09157780557870865,
      "learning_rate": 0.00012965601465499694,
      "loss": 0.998,
      "step": 1734
    },
    {
      "epoch": 0.7054279325066071,
      "grad_norm": 0.09206783026456833,
      "learning_rate": 0.00012961530633014452,
      "loss": 0.9698,
      "step": 1735
    },
    {
      "epoch": 0.7058345192112218,
      "grad_norm": 0.08928617089986801,
      "learning_rate": 0.00012957459800529207,
      "loss": 0.9288,
      "step": 1736
    },
    {
      "epoch": 0.7062411059158366,
      "grad_norm": 0.09673994034528732,
      "learning_rate": 0.00012953388968043965,
      "loss": 1.0768,
      "step": 1737
    },
    {
      "epoch": 0.7066476926204514,
      "grad_norm": 0.09382779896259308,
      "learning_rate": 0.00012949318135558723,
      "loss": 1.0142,
      "step": 1738
    },
    {
      "epoch": 0.707054279325066,
      "grad_norm": 0.08966720104217529,
      "learning_rate": 0.0001294524730307348,
      "loss": 0.8738,
      "step": 1739
    },
    {
      "epoch": 0.7074608660296808,
      "grad_norm": 0.09402105212211609,
      "learning_rate": 0.00012941176470588237,
      "loss": 0.9459,
      "step": 1740
    },
    {
      "epoch": 0.7078674527342956,
      "grad_norm": 0.08750198781490326,
      "learning_rate": 0.00012937105638102992,
      "loss": 0.9953,
      "step": 1741
    },
    {
      "epoch": 0.7082740394389103,
      "grad_norm": 0.09970106184482574,
      "learning_rate": 0.0001293303480561775,
      "loss": 1.0423,
      "step": 1742
    },
    {
      "epoch": 0.7086806261435251,
      "grad_norm": 0.08987673372030258,
      "learning_rate": 0.00012928963973132506,
      "loss": 0.9796,
      "step": 1743
    },
    {
      "epoch": 0.7090872128481399,
      "grad_norm": 0.09364349395036697,
      "learning_rate": 0.00012924893140647264,
      "loss": 1.0452,
      "step": 1744
    },
    {
      "epoch": 0.7094937995527546,
      "grad_norm": 0.09844768047332764,
      "learning_rate": 0.0001292082230816202,
      "loss": 1.0507,
      "step": 1745
    },
    {
      "epoch": 0.7099003862573694,
      "grad_norm": 0.08439893275499344,
      "learning_rate": 0.00012916751475676775,
      "loss": 0.9159,
      "step": 1746
    },
    {
      "epoch": 0.7103069729619842,
      "grad_norm": 0.08530126512050629,
      "learning_rate": 0.00012912680643191533,
      "loss": 0.8958,
      "step": 1747
    },
    {
      "epoch": 0.7107135596665989,
      "grad_norm": 0.09442596137523651,
      "learning_rate": 0.00012908609810706288,
      "loss": 1.0103,
      "step": 1748
    },
    {
      "epoch": 0.7111201463712137,
      "grad_norm": 0.09051500260829926,
      "learning_rate": 0.00012904538978221046,
      "loss": 0.9922,
      "step": 1749
    },
    {
      "epoch": 0.7115267330758284,
      "grad_norm": 0.09218533337116241,
      "learning_rate": 0.00012900468145735805,
      "loss": 1.0237,
      "step": 1750
    },
    {
      "epoch": 0.7119333197804432,
      "grad_norm": 0.09059412032365799,
      "learning_rate": 0.0001289639731325056,
      "loss": 0.8807,
      "step": 1751
    },
    {
      "epoch": 0.712339906485058,
      "grad_norm": 0.09302126616239548,
      "learning_rate": 0.00012892326480765318,
      "loss": 0.9996,
      "step": 1752
    },
    {
      "epoch": 0.7127464931896726,
      "grad_norm": 0.0886523425579071,
      "learning_rate": 0.00012888255648280073,
      "loss": 0.9456,
      "step": 1753
    },
    {
      "epoch": 0.7131530798942874,
      "grad_norm": 0.08531109243631363,
      "learning_rate": 0.00012884184815794832,
      "loss": 0.8851,
      "step": 1754
    },
    {
      "epoch": 0.7135596665989022,
      "grad_norm": 0.08533506095409393,
      "learning_rate": 0.00012880113983309587,
      "loss": 1.004,
      "step": 1755
    },
    {
      "epoch": 0.7139662533035169,
      "grad_norm": 0.10868436843156815,
      "learning_rate": 0.00012876043150824345,
      "loss": 1.0434,
      "step": 1756
    },
    {
      "epoch": 0.7143728400081317,
      "grad_norm": 0.08798620849847794,
      "learning_rate": 0.000128719723183391,
      "loss": 0.944,
      "step": 1757
    },
    {
      "epoch": 0.7147794267127465,
      "grad_norm": 0.08957348763942719,
      "learning_rate": 0.00012867901485853856,
      "loss": 0.9431,
      "step": 1758
    },
    {
      "epoch": 0.7151860134173612,
      "grad_norm": 0.09171691536903381,
      "learning_rate": 0.00012863830653368614,
      "loss": 0.9877,
      "step": 1759
    },
    {
      "epoch": 0.715592600121976,
      "grad_norm": 0.10308198630809784,
      "learning_rate": 0.0001285975982088337,
      "loss": 1.0491,
      "step": 1760
    },
    {
      "epoch": 0.7159991868265908,
      "grad_norm": 0.09395022690296173,
      "learning_rate": 0.0001285568898839813,
      "loss": 0.9605,
      "step": 1761
    },
    {
      "epoch": 0.7164057735312055,
      "grad_norm": 0.09098276495933533,
      "learning_rate": 0.00012851618155912886,
      "loss": 0.9623,
      "step": 1762
    },
    {
      "epoch": 0.7168123602358203,
      "grad_norm": 0.09622596204280853,
      "learning_rate": 0.0001284754732342764,
      "loss": 0.9981,
      "step": 1763
    },
    {
      "epoch": 0.7172189469404351,
      "grad_norm": 0.09966776520013809,
      "learning_rate": 0.000128434764909424,
      "loss": 1.1082,
      "step": 1764
    },
    {
      "epoch": 0.7176255336450498,
      "grad_norm": 0.08151479065418243,
      "learning_rate": 0.00012839405658457155,
      "loss": 0.9498,
      "step": 1765
    },
    {
      "epoch": 0.7180321203496646,
      "grad_norm": 0.10801077634096146,
      "learning_rate": 0.00012835334825971913,
      "loss": 1.0845,
      "step": 1766
    },
    {
      "epoch": 0.7184387070542794,
      "grad_norm": 0.10468696802854538,
      "learning_rate": 0.00012831263993486668,
      "loss": 1.1407,
      "step": 1767
    },
    {
      "epoch": 0.718845293758894,
      "grad_norm": 0.08649425953626633,
      "learning_rate": 0.00012827193161001426,
      "loss": 1.0136,
      "step": 1768
    },
    {
      "epoch": 0.7192518804635089,
      "grad_norm": 0.0891176387667656,
      "learning_rate": 0.00012823122328516182,
      "loss": 0.9647,
      "step": 1769
    },
    {
      "epoch": 0.7196584671681237,
      "grad_norm": 0.08572922646999359,
      "learning_rate": 0.00012819051496030937,
      "loss": 0.9131,
      "step": 1770
    },
    {
      "epoch": 0.7200650538727383,
      "grad_norm": 0.09400682896375656,
      "learning_rate": 0.00012814980663545695,
      "loss": 1.0212,
      "step": 1771
    },
    {
      "epoch": 0.7204716405773531,
      "grad_norm": 0.08426962792873383,
      "learning_rate": 0.0001281090983106045,
      "loss": 0.9203,
      "step": 1772
    },
    {
      "epoch": 0.7208782272819679,
      "grad_norm": 0.08990871161222458,
      "learning_rate": 0.00012806838998575211,
      "loss": 0.9154,
      "step": 1773
    },
    {
      "epoch": 0.7212848139865826,
      "grad_norm": 0.09853409230709076,
      "learning_rate": 0.00012802768166089967,
      "loss": 1.0219,
      "step": 1774
    },
    {
      "epoch": 0.7216914006911974,
      "grad_norm": 0.09549330174922943,
      "learning_rate": 0.00012798697333604722,
      "loss": 1.0584,
      "step": 1775
    },
    {
      "epoch": 0.7220979873958121,
      "grad_norm": 0.09176405519247055,
      "learning_rate": 0.0001279462650111948,
      "loss": 1.0623,
      "step": 1776
    },
    {
      "epoch": 0.7225045741004269,
      "grad_norm": 0.0894324779510498,
      "learning_rate": 0.00012790555668634236,
      "loss": 0.8873,
      "step": 1777
    },
    {
      "epoch": 0.7229111608050417,
      "grad_norm": 0.09495782852172852,
      "learning_rate": 0.00012786484836148994,
      "loss": 0.9914,
      "step": 1778
    },
    {
      "epoch": 0.7233177475096564,
      "grad_norm": 0.09165625274181366,
      "learning_rate": 0.0001278241400366375,
      "loss": 0.9946,
      "step": 1779
    },
    {
      "epoch": 0.7237243342142712,
      "grad_norm": 0.08971066772937775,
      "learning_rate": 0.00012778343171178507,
      "loss": 0.9684,
      "step": 1780
    },
    {
      "epoch": 0.724130920918886,
      "grad_norm": 0.09194676578044891,
      "learning_rate": 0.00012774272338693263,
      "loss": 0.9845,
      "step": 1781
    },
    {
      "epoch": 0.7245375076235007,
      "grad_norm": 0.08844684064388275,
      "learning_rate": 0.00012770201506208018,
      "loss": 1.0189,
      "step": 1782
    },
    {
      "epoch": 0.7249440943281155,
      "grad_norm": 0.09508199989795685,
      "learning_rate": 0.00012766130673722776,
      "loss": 0.9609,
      "step": 1783
    },
    {
      "epoch": 0.7253506810327303,
      "grad_norm": 0.08686284720897675,
      "learning_rate": 0.00012762059841237534,
      "loss": 0.9364,
      "step": 1784
    },
    {
      "epoch": 0.725757267737345,
      "grad_norm": 0.08749787509441376,
      "learning_rate": 0.00012757989008752293,
      "loss": 0.9021,
      "step": 1785
    },
    {
      "epoch": 0.7261638544419597,
      "grad_norm": 0.09259208291769028,
      "learning_rate": 0.00012753918176267048,
      "loss": 1.031,
      "step": 1786
    },
    {
      "epoch": 0.7265704411465745,
      "grad_norm": 0.09524762630462646,
      "learning_rate": 0.00012749847343781803,
      "loss": 0.9958,
      "step": 1787
    },
    {
      "epoch": 0.7269770278511892,
      "grad_norm": 0.08385960757732391,
      "learning_rate": 0.00012745776511296562,
      "loss": 0.943,
      "step": 1788
    },
    {
      "epoch": 0.727383614555804,
      "grad_norm": 0.09703537821769714,
      "learning_rate": 0.00012741705678811317,
      "loss": 0.9854,
      "step": 1789
    },
    {
      "epoch": 0.7277902012604188,
      "grad_norm": 0.08761659264564514,
      "learning_rate": 0.00012737634846326075,
      "loss": 0.8797,
      "step": 1790
    },
    {
      "epoch": 0.7281967879650335,
      "grad_norm": 0.08612256497144699,
      "learning_rate": 0.0001273356401384083,
      "loss": 0.894,
      "step": 1791
    },
    {
      "epoch": 0.7286033746696483,
      "grad_norm": 0.09343304485082626,
      "learning_rate": 0.00012729493181355589,
      "loss": 0.969,
      "step": 1792
    },
    {
      "epoch": 0.7290099613742631,
      "grad_norm": 0.09733837842941284,
      "learning_rate": 0.00012725422348870344,
      "loss": 1.0479,
      "step": 1793
    },
    {
      "epoch": 0.7294165480788778,
      "grad_norm": 0.08351567387580872,
      "learning_rate": 0.000127213515163851,
      "loss": 0.9141,
      "step": 1794
    },
    {
      "epoch": 0.7298231347834926,
      "grad_norm": 0.09528695791959763,
      "learning_rate": 0.00012717280683899858,
      "loss": 1.0193,
      "step": 1795
    },
    {
      "epoch": 0.7302297214881074,
      "grad_norm": 0.0906892865896225,
      "learning_rate": 0.00012713209851414616,
      "loss": 0.9095,
      "step": 1796
    },
    {
      "epoch": 0.7306363081927221,
      "grad_norm": 119.45793151855469,
      "learning_rate": 0.00012709139018929374,
      "loss": 1.0114,
      "step": 1797
    },
    {
      "epoch": 0.7310428948973369,
      "grad_norm": 0.0933651253581047,
      "learning_rate": 0.0001270506818644413,
      "loss": 1.0666,
      "step": 1798
    },
    {
      "epoch": 0.7314494816019517,
      "grad_norm": 0.10169385373592377,
      "learning_rate": 0.00012700997353958885,
      "loss": 0.9892,
      "step": 1799
    },
    {
      "epoch": 0.7318560683065664,
      "grad_norm": 0.0868530198931694,
      "learning_rate": 0.00012696926521473643,
      "loss": 0.9162,
      "step": 1800
    },
    {
      "epoch": 0.7322626550111812,
      "grad_norm": 0.09074793756008148,
      "learning_rate": 0.00012692855688988398,
      "loss": 0.9388,
      "step": 1801
    },
    {
      "epoch": 0.7326692417157958,
      "grad_norm": 0.10199327766895294,
      "learning_rate": 0.00012688784856503156,
      "loss": 0.9585,
      "step": 1802
    },
    {
      "epoch": 0.7330758284204106,
      "grad_norm": 0.10722784698009491,
      "learning_rate": 0.00012684714024017912,
      "loss": 1.0226,
      "step": 1803
    },
    {
      "epoch": 0.7334824151250254,
      "grad_norm": 0.10113389045000076,
      "learning_rate": 0.0001268064319153267,
      "loss": 1.0593,
      "step": 1804
    },
    {
      "epoch": 0.7338890018296401,
      "grad_norm": 0.1125817522406578,
      "learning_rate": 0.00012676572359047425,
      "loss": 0.8962,
      "step": 1805
    },
    {
      "epoch": 0.7342955885342549,
      "grad_norm": 0.10177897661924362,
      "learning_rate": 0.0001267250152656218,
      "loss": 1.0323,
      "step": 1806
    },
    {
      "epoch": 0.7347021752388697,
      "grad_norm": 0.10272479057312012,
      "learning_rate": 0.00012668430694076941,
      "loss": 0.9947,
      "step": 1807
    },
    {
      "epoch": 0.7351087619434844,
      "grad_norm": 0.11395642906427383,
      "learning_rate": 0.00012664359861591697,
      "loss": 1.0144,
      "step": 1808
    },
    {
      "epoch": 0.7355153486480992,
      "grad_norm": 0.09565427899360657,
      "learning_rate": 0.00012660289029106452,
      "loss": 1.0052,
      "step": 1809
    },
    {
      "epoch": 0.735921935352714,
      "grad_norm": 0.09244798123836517,
      "learning_rate": 0.0001265621819662121,
      "loss": 0.8411,
      "step": 1810
    },
    {
      "epoch": 0.7363285220573287,
      "grad_norm": 0.08985315263271332,
      "learning_rate": 0.00012652147364135966,
      "loss": 1.0301,
      "step": 1811
    },
    {
      "epoch": 0.7367351087619435,
      "grad_norm": 0.09606938809156418,
      "learning_rate": 0.00012648076531650724,
      "loss": 1.0053,
      "step": 1812
    },
    {
      "epoch": 0.7371416954665583,
      "grad_norm": 0.10566183179616928,
      "learning_rate": 0.0001264400569916548,
      "loss": 0.9527,
      "step": 1813
    },
    {
      "epoch": 0.737548282171173,
      "grad_norm": 0.10999652743339539,
      "learning_rate": 0.00012639934866680237,
      "loss": 1.0756,
      "step": 1814
    },
    {
      "epoch": 0.7379548688757878,
      "grad_norm": 0.09473931044340134,
      "learning_rate": 0.00012635864034194993,
      "loss": 0.94,
      "step": 1815
    },
    {
      "epoch": 0.7383614555804026,
      "grad_norm": 0.09815262258052826,
      "learning_rate": 0.0001263179320170975,
      "loss": 1.0436,
      "step": 1816
    },
    {
      "epoch": 0.7387680422850172,
      "grad_norm": 0.08889912813901901,
      "learning_rate": 0.00012627722369224506,
      "loss": 0.9368,
      "step": 1817
    },
    {
      "epoch": 0.739174628989632,
      "grad_norm": 0.09337257593870163,
      "learning_rate": 0.00012623651536739262,
      "loss": 1.0949,
      "step": 1818
    },
    {
      "epoch": 0.7395812156942468,
      "grad_norm": 0.09112720191478729,
      "learning_rate": 0.00012619580704254023,
      "loss": 1.0239,
      "step": 1819
    },
    {
      "epoch": 0.7399878023988615,
      "grad_norm": 0.0988708958029747,
      "learning_rate": 0.00012615509871768778,
      "loss": 1.0648,
      "step": 1820
    },
    {
      "epoch": 0.7403943891034763,
      "grad_norm": 0.09849932789802551,
      "learning_rate": 0.00012611439039283533,
      "loss": 0.9867,
      "step": 1821
    },
    {
      "epoch": 0.7408009758080911,
      "grad_norm": 0.09254156798124313,
      "learning_rate": 0.00012607368206798291,
      "loss": 0.9903,
      "step": 1822
    },
    {
      "epoch": 0.7412075625127058,
      "grad_norm": 0.0954776182770729,
      "learning_rate": 0.00012603297374313047,
      "loss": 1.0081,
      "step": 1823
    },
    {
      "epoch": 0.7416141492173206,
      "grad_norm": 0.08610807359218597,
      "learning_rate": 0.00012599226541827805,
      "loss": 0.9229,
      "step": 1824
    },
    {
      "epoch": 0.7420207359219354,
      "grad_norm": 0.0977591797709465,
      "learning_rate": 0.0001259515570934256,
      "loss": 0.9076,
      "step": 1825
    },
    {
      "epoch": 0.7424273226265501,
      "grad_norm": 0.0858481377363205,
      "learning_rate": 0.00012591084876857319,
      "loss": 0.8604,
      "step": 1826
    },
    {
      "epoch": 0.7428339093311649,
      "grad_norm": 0.09642601758241653,
      "learning_rate": 0.00012587014044372074,
      "loss": 1.0476,
      "step": 1827
    },
    {
      "epoch": 0.7432404960357797,
      "grad_norm": 0.08871784061193466,
      "learning_rate": 0.0001258294321188683,
      "loss": 0.9597,
      "step": 1828
    },
    {
      "epoch": 0.7436470827403944,
      "grad_norm": 0.10808097571134567,
      "learning_rate": 0.00012578872379401587,
      "loss": 1.1415,
      "step": 1829
    },
    {
      "epoch": 0.7440536694450092,
      "grad_norm": 0.09339917451143265,
      "learning_rate": 0.00012574801546916346,
      "loss": 0.9437,
      "step": 1830
    },
    {
      "epoch": 0.7444602561496239,
      "grad_norm": 0.08945673704147339,
      "learning_rate": 0.00012570730714431104,
      "loss": 0.9714,
      "step": 1831
    },
    {
      "epoch": 0.7448668428542387,
      "grad_norm": 0.0939527079463005,
      "learning_rate": 0.0001256665988194586,
      "loss": 0.9868,
      "step": 1832
    },
    {
      "epoch": 0.7452734295588535,
      "grad_norm": 0.09327416867017746,
      "learning_rate": 0.00012562589049460615,
      "loss": 1.0001,
      "step": 1833
    },
    {
      "epoch": 0.7456800162634681,
      "grad_norm": 0.10278622061014175,
      "learning_rate": 0.00012558518216975373,
      "loss": 1.0724,
      "step": 1834
    },
    {
      "epoch": 0.7460866029680829,
      "grad_norm": 0.09421471506357193,
      "learning_rate": 0.00012554447384490128,
      "loss": 1.0088,
      "step": 1835
    },
    {
      "epoch": 0.7464931896726977,
      "grad_norm": 0.1009073331952095,
      "learning_rate": 0.00012550376552004886,
      "loss": 1.0485,
      "step": 1836
    },
    {
      "epoch": 0.7468997763773124,
      "grad_norm": 0.09199651330709457,
      "learning_rate": 0.00012546305719519642,
      "loss": 0.9765,
      "step": 1837
    },
    {
      "epoch": 0.7473063630819272,
      "grad_norm": 0.09672168642282486,
      "learning_rate": 0.000125422348870344,
      "loss": 1.018,
      "step": 1838
    },
    {
      "epoch": 0.747712949786542,
      "grad_norm": 0.09036868065595627,
      "learning_rate": 0.00012538164054549155,
      "loss": 0.9067,
      "step": 1839
    },
    {
      "epoch": 0.7481195364911567,
      "grad_norm": 0.09706352651119232,
      "learning_rate": 0.0001253409322206391,
      "loss": 1.0439,
      "step": 1840
    },
    {
      "epoch": 0.7485261231957715,
      "grad_norm": 0.09940480440855026,
      "learning_rate": 0.00012530022389578669,
      "loss": 1.0936,
      "step": 1841
    },
    {
      "epoch": 0.7489327099003863,
      "grad_norm": 0.09489309787750244,
      "learning_rate": 0.00012525951557093427,
      "loss": 1.0606,
      "step": 1842
    },
    {
      "epoch": 0.749339296605001,
      "grad_norm": 0.07897097617387772,
      "learning_rate": 0.00012521880724608185,
      "loss": 0.8109,
      "step": 1843
    },
    {
      "epoch": 0.7497458833096158,
      "grad_norm": 0.09423919022083282,
      "learning_rate": 0.0001251780989212294,
      "loss": 1.0703,
      "step": 1844
    },
    {
      "epoch": 0.7501524700142306,
      "grad_norm": 0.09601794928312302,
      "learning_rate": 0.00012513739059637696,
      "loss": 0.9692,
      "step": 1845
    },
    {
      "epoch": 0.7505590567188453,
      "grad_norm": 0.09051002562046051,
      "learning_rate": 0.00012509668227152454,
      "loss": 0.9727,
      "step": 1846
    },
    {
      "epoch": 0.7509656434234601,
      "grad_norm": 0.09665656834840775,
      "learning_rate": 0.0001250559739466721,
      "loss": 1.0701,
      "step": 1847
    },
    {
      "epoch": 0.7513722301280749,
      "grad_norm": 0.08956587314605713,
      "learning_rate": 0.00012501526562181967,
      "loss": 0.9863,
      "step": 1848
    },
    {
      "epoch": 0.7517788168326895,
      "grad_norm": 0.09464751929044724,
      "learning_rate": 0.00012497455729696723,
      "loss": 1.043,
      "step": 1849
    },
    {
      "epoch": 0.7521854035373043,
      "grad_norm": 0.09246315807104111,
      "learning_rate": 0.0001249338489721148,
      "loss": 1.0306,
      "step": 1850
    },
    {
      "epoch": 0.7525919902419191,
      "grad_norm": 0.0943431407213211,
      "learning_rate": 0.00012489314064726236,
      "loss": 0.9251,
      "step": 1851
    },
    {
      "epoch": 0.7529985769465338,
      "grad_norm": 0.08852697908878326,
      "learning_rate": 0.00012485243232240992,
      "loss": 0.919,
      "step": 1852
    },
    {
      "epoch": 0.7534051636511486,
      "grad_norm": 0.08856131881475449,
      "learning_rate": 0.00012481172399755752,
      "loss": 0.9874,
      "step": 1853
    },
    {
      "epoch": 0.7538117503557634,
      "grad_norm": 0.08715582638978958,
      "learning_rate": 0.00012477101567270508,
      "loss": 0.9569,
      "step": 1854
    },
    {
      "epoch": 0.7542183370603781,
      "grad_norm": 0.1005750522017479,
      "learning_rate": 0.00012473030734785266,
      "loss": 1.118,
      "step": 1855
    },
    {
      "epoch": 0.7546249237649929,
      "grad_norm": 0.0848010703921318,
      "learning_rate": 0.00012468959902300021,
      "loss": 0.8808,
      "step": 1856
    },
    {
      "epoch": 0.7550315104696076,
      "grad_norm": 0.10509838908910751,
      "learning_rate": 0.00012464889069814777,
      "loss": 1.0019,
      "step": 1857
    },
    {
      "epoch": 0.7554380971742224,
      "grad_norm": 0.09729699045419693,
      "learning_rate": 0.00012460818237329535,
      "loss": 0.9275,
      "step": 1858
    },
    {
      "epoch": 0.7558446838788372,
      "grad_norm": 0.0901610478758812,
      "learning_rate": 0.0001245674740484429,
      "loss": 1.0285,
      "step": 1859
    },
    {
      "epoch": 0.7562512705834519,
      "grad_norm": 0.08691520988941193,
      "learning_rate": 0.00012452676572359048,
      "loss": 0.9524,
      "step": 1860
    },
    {
      "epoch": 0.7566578572880667,
      "grad_norm": 0.09559500962495804,
      "learning_rate": 0.00012448605739873804,
      "loss": 1.0781,
      "step": 1861
    },
    {
      "epoch": 0.7570644439926815,
      "grad_norm": 0.09581112861633301,
      "learning_rate": 0.00012444534907388562,
      "loss": 1.068,
      "step": 1862
    },
    {
      "epoch": 0.7574710306972962,
      "grad_norm": 0.10235914587974548,
      "learning_rate": 0.00012440464074903317,
      "loss": 1.078,
      "step": 1863
    },
    {
      "epoch": 0.757877617401911,
      "grad_norm": 0.09794023633003235,
      "learning_rate": 0.00012436393242418073,
      "loss": 1.0951,
      "step": 1864
    },
    {
      "epoch": 0.7582842041065257,
      "grad_norm": 0.08910951763391495,
      "learning_rate": 0.00012432322409932834,
      "loss": 1.002,
      "step": 1865
    },
    {
      "epoch": 0.7586907908111404,
      "grad_norm": 0.08909524232149124,
      "learning_rate": 0.0001242825157744759,
      "loss": 0.9027,
      "step": 1866
    },
    {
      "epoch": 0.7590973775157552,
      "grad_norm": 0.09639742970466614,
      "learning_rate": 0.00012424180744962347,
      "loss": 1.1356,
      "step": 1867
    },
    {
      "epoch": 0.75950396422037,
      "grad_norm": 0.08606995642185211,
      "learning_rate": 0.00012420109912477103,
      "loss": 0.8974,
      "step": 1868
    },
    {
      "epoch": 0.7599105509249847,
      "grad_norm": 0.09715355932712555,
      "learning_rate": 0.00012416039079991858,
      "loss": 1.078,
      "step": 1869
    },
    {
      "epoch": 0.7603171376295995,
      "grad_norm": 0.08933407068252563,
      "learning_rate": 0.00012411968247506616,
      "loss": 0.9177,
      "step": 1870
    },
    {
      "epoch": 0.7607237243342143,
      "grad_norm": 0.0859113335609436,
      "learning_rate": 0.00012407897415021372,
      "loss": 0.9703,
      "step": 1871
    },
    {
      "epoch": 0.761130311038829,
      "grad_norm": 0.09086931496858597,
      "learning_rate": 0.0001240382658253613,
      "loss": 1.0298,
      "step": 1872
    },
    {
      "epoch": 0.7615368977434438,
      "grad_norm": 0.09112663567066193,
      "learning_rate": 0.00012399755750050885,
      "loss": 0.9918,
      "step": 1873
    },
    {
      "epoch": 0.7619434844480586,
      "grad_norm": 0.09044841676950455,
      "learning_rate": 0.00012395684917565643,
      "loss": 0.9469,
      "step": 1874
    },
    {
      "epoch": 0.7623500711526733,
      "grad_norm": 0.08345028758049011,
      "learning_rate": 0.00012391614085080399,
      "loss": 0.879,
      "step": 1875
    },
    {
      "epoch": 0.7627566578572881,
      "grad_norm": 0.10249708592891693,
      "learning_rate": 0.00012387543252595157,
      "loss": 1.0247,
      "step": 1876
    },
    {
      "epoch": 0.7631632445619029,
      "grad_norm": 0.0914909839630127,
      "learning_rate": 0.00012383472420109915,
      "loss": 0.9341,
      "step": 1877
    },
    {
      "epoch": 0.7635698312665176,
      "grad_norm": 0.08616846054792404,
      "learning_rate": 0.0001237940158762467,
      "loss": 0.918,
      "step": 1878
    },
    {
      "epoch": 0.7639764179711324,
      "grad_norm": 0.0853181779384613,
      "learning_rate": 0.00012375330755139428,
      "loss": 0.8903,
      "step": 1879
    },
    {
      "epoch": 0.7643830046757472,
      "grad_norm": 0.0943385511636734,
      "learning_rate": 0.00012371259922654184,
      "loss": 1.0437,
      "step": 1880
    },
    {
      "epoch": 0.7647895913803618,
      "grad_norm": 0.08487629890441895,
      "learning_rate": 0.0001236718909016894,
      "loss": 0.9655,
      "step": 1881
    },
    {
      "epoch": 0.7651961780849766,
      "grad_norm": 0.09635015577077866,
      "learning_rate": 0.00012363118257683697,
      "loss": 1.0047,
      "step": 1882
    },
    {
      "epoch": 0.7656027647895913,
      "grad_norm": 0.09787151217460632,
      "learning_rate": 0.00012359047425198453,
      "loss": 1.1058,
      "step": 1883
    },
    {
      "epoch": 0.7660093514942061,
      "grad_norm": 0.10217342525720596,
      "learning_rate": 0.0001235497659271321,
      "loss": 1.1407,
      "step": 1884
    },
    {
      "epoch": 0.7664159381988209,
      "grad_norm": 0.08770392835140228,
      "learning_rate": 0.00012350905760227966,
      "loss": 0.8851,
      "step": 1885
    },
    {
      "epoch": 0.7668225249034356,
      "grad_norm": 0.08978156745433807,
      "learning_rate": 0.00012346834927742724,
      "loss": 1.0138,
      "step": 1886
    },
    {
      "epoch": 0.7672291116080504,
      "grad_norm": 0.09110313653945923,
      "learning_rate": 0.0001234276409525748,
      "loss": 0.8872,
      "step": 1887
    },
    {
      "epoch": 0.7676356983126652,
      "grad_norm": 0.0905870720744133,
      "learning_rate": 0.00012338693262772238,
      "loss": 0.9819,
      "step": 1888
    },
    {
      "epoch": 0.7680422850172799,
      "grad_norm": 0.09418340027332306,
      "learning_rate": 0.00012334622430286996,
      "loss": 1.0486,
      "step": 1889
    },
    {
      "epoch": 0.7684488717218947,
      "grad_norm": 0.09140585362911224,
      "learning_rate": 0.00012330551597801751,
      "loss": 0.9463,
      "step": 1890
    },
    {
      "epoch": 0.7688554584265095,
      "grad_norm": 0.08720141649246216,
      "learning_rate": 0.0001232648076531651,
      "loss": 0.9833,
      "step": 1891
    },
    {
      "epoch": 0.7692620451311242,
      "grad_norm": 0.09206419438123703,
      "learning_rate": 0.00012322409932831265,
      "loss": 0.9554,
      "step": 1892
    },
    {
      "epoch": 0.769668631835739,
      "grad_norm": 0.09324870258569717,
      "learning_rate": 0.0001231833910034602,
      "loss": 1.0703,
      "step": 1893
    },
    {
      "epoch": 0.7700752185403538,
      "grad_norm": 0.0868481770157814,
      "learning_rate": 0.00012314268267860778,
      "loss": 0.9374,
      "step": 1894
    },
    {
      "epoch": 0.7704818052449685,
      "grad_norm": 0.0907289981842041,
      "learning_rate": 0.00012310197435375534,
      "loss": 1.0148,
      "step": 1895
    },
    {
      "epoch": 0.7708883919495833,
      "grad_norm": 0.09804967790842056,
      "learning_rate": 0.00012306126602890292,
      "loss": 1.0541,
      "step": 1896
    },
    {
      "epoch": 0.771294978654198,
      "grad_norm": 0.09168083965778351,
      "learning_rate": 0.00012302055770405047,
      "loss": 0.9363,
      "step": 1897
    },
    {
      "epoch": 0.7717015653588127,
      "grad_norm": 0.09078045189380646,
      "learning_rate": 0.00012297984937919805,
      "loss": 1.0683,
      "step": 1898
    },
    {
      "epoch": 0.7721081520634275,
      "grad_norm": 0.08930620551109314,
      "learning_rate": 0.00012293914105434564,
      "loss": 0.9659,
      "step": 1899
    },
    {
      "epoch": 0.7725147387680423,
      "grad_norm": 0.09990911930799484,
      "learning_rate": 0.0001228984327294932,
      "loss": 1.1301,
      "step": 1900
    },
    {
      "epoch": 0.772921325472657,
      "grad_norm": 0.08707278221845627,
      "learning_rate": 0.00012285772440464077,
      "loss": 0.919,
      "step": 1901
    },
    {
      "epoch": 0.7733279121772718,
      "grad_norm": 0.0829259380698204,
      "learning_rate": 0.00012281701607978833,
      "loss": 0.8806,
      "step": 1902
    },
    {
      "epoch": 0.7737344988818866,
      "grad_norm": 0.09047359228134155,
      "learning_rate": 0.0001227763077549359,
      "loss": 0.9478,
      "step": 1903
    },
    {
      "epoch": 0.7741410855865013,
      "grad_norm": 0.08373644202947617,
      "learning_rate": 0.00012273559943008346,
      "loss": 0.8043,
      "step": 1904
    },
    {
      "epoch": 0.7745476722911161,
      "grad_norm": 0.08893609046936035,
      "learning_rate": 0.00012269489110523101,
      "loss": 0.9849,
      "step": 1905
    },
    {
      "epoch": 0.7749542589957309,
      "grad_norm": 0.09218044579029083,
      "learning_rate": 0.0001226541827803786,
      "loss": 0.8856,
      "step": 1906
    },
    {
      "epoch": 0.7753608457003456,
      "grad_norm": 0.08562997728586197,
      "learning_rate": 0.00012261347445552615,
      "loss": 0.8934,
      "step": 1907
    },
    {
      "epoch": 0.7757674324049604,
      "grad_norm": 0.09083337336778641,
      "learning_rate": 0.00012257276613067373,
      "loss": 1.0118,
      "step": 1908
    },
    {
      "epoch": 0.7761740191095751,
      "grad_norm": 0.08940907567739487,
      "learning_rate": 0.00012253205780582129,
      "loss": 0.9138,
      "step": 1909
    },
    {
      "epoch": 0.7765806058141899,
      "grad_norm": 0.09383655339479446,
      "learning_rate": 0.00012249134948096887,
      "loss": 0.9727,
      "step": 1910
    },
    {
      "epoch": 0.7769871925188047,
      "grad_norm": 0.10210567712783813,
      "learning_rate": 0.00012245064115611645,
      "loss": 1.0628,
      "step": 1911
    },
    {
      "epoch": 0.7773937792234193,
      "grad_norm": 0.09698057174682617,
      "learning_rate": 0.000122409932831264,
      "loss": 1.0863,
      "step": 1912
    },
    {
      "epoch": 0.7778003659280341,
      "grad_norm": 0.0920233353972435,
      "learning_rate": 0.00012236922450641158,
      "loss": 0.9377,
      "step": 1913
    },
    {
      "epoch": 0.7782069526326489,
      "grad_norm": 0.08810736984014511,
      "learning_rate": 0.00012232851618155914,
      "loss": 0.9152,
      "step": 1914
    },
    {
      "epoch": 0.7786135393372636,
      "grad_norm": 0.0930081456899643,
      "learning_rate": 0.0001222878078567067,
      "loss": 0.9493,
      "step": 1915
    },
    {
      "epoch": 0.7790201260418784,
      "grad_norm": 0.09303618222475052,
      "learning_rate": 0.00012224709953185427,
      "loss": 0.9638,
      "step": 1916
    },
    {
      "epoch": 0.7794267127464932,
      "grad_norm": 0.09462623298168182,
      "learning_rate": 0.00012220639120700183,
      "loss": 1.0028,
      "step": 1917
    },
    {
      "epoch": 0.7798332994511079,
      "grad_norm": 0.08258619159460068,
      "learning_rate": 0.0001221656828821494,
      "loss": 0.934,
      "step": 1918
    },
    {
      "epoch": 0.7802398861557227,
      "grad_norm": 0.0895158126950264,
      "learning_rate": 0.00012212497455729696,
      "loss": 1.0514,
      "step": 1919
    },
    {
      "epoch": 0.7806464728603375,
      "grad_norm": 0.0877576693892479,
      "learning_rate": 0.00012208426623244454,
      "loss": 0.906,
      "step": 1920
    },
    {
      "epoch": 0.7810530595649522,
      "grad_norm": 0.08271359652280807,
      "learning_rate": 0.0001220435579075921,
      "loss": 0.8446,
      "step": 1921
    },
    {
      "epoch": 0.781459646269567,
      "grad_norm": 0.09866933524608612,
      "learning_rate": 0.00012200284958273969,
      "loss": 1.0477,
      "step": 1922
    },
    {
      "epoch": 0.7818662329741818,
      "grad_norm": 0.0881706029176712,
      "learning_rate": 0.00012196214125788725,
      "loss": 0.9121,
      "step": 1923
    },
    {
      "epoch": 0.7822728196787965,
      "grad_norm": 0.08888103812932968,
      "learning_rate": 0.00012192143293303481,
      "loss": 0.9179,
      "step": 1924
    },
    {
      "epoch": 0.7826794063834113,
      "grad_norm": 0.08678455650806427,
      "learning_rate": 0.00012188072460818238,
      "loss": 0.8909,
      "step": 1925
    },
    {
      "epoch": 0.7830859930880261,
      "grad_norm": 0.08965113759040833,
      "learning_rate": 0.00012184001628332995,
      "loss": 0.9483,
      "step": 1926
    },
    {
      "epoch": 0.7834925797926408,
      "grad_norm": 0.09123353660106659,
      "learning_rate": 0.00012179930795847752,
      "loss": 0.9552,
      "step": 1927
    },
    {
      "epoch": 0.7838991664972556,
      "grad_norm": 0.08975458890199661,
      "learning_rate": 0.00012175859963362508,
      "loss": 0.9155,
      "step": 1928
    },
    {
      "epoch": 0.7843057532018703,
      "grad_norm": 0.09666619449853897,
      "learning_rate": 0.00012171789130877265,
      "loss": 0.9306,
      "step": 1929
    },
    {
      "epoch": 0.784712339906485,
      "grad_norm": 0.10168115049600601,
      "learning_rate": 0.00012167718298392022,
      "loss": 1.0875,
      "step": 1930
    },
    {
      "epoch": 0.7851189266110998,
      "grad_norm": 0.09472563117742538,
      "learning_rate": 0.00012163647465906777,
      "loss": 1.0703,
      "step": 1931
    },
    {
      "epoch": 0.7855255133157146,
      "grad_norm": 0.09953609853982925,
      "learning_rate": 0.00012159576633421534,
      "loss": 1.0868,
      "step": 1932
    },
    {
      "epoch": 0.7859321000203293,
      "grad_norm": 0.0972660630941391,
      "learning_rate": 0.00012155505800936291,
      "loss": 0.9368,
      "step": 1933
    },
    {
      "epoch": 0.7863386867249441,
      "grad_norm": 0.08514858037233353,
      "learning_rate": 0.0001215143496845105,
      "loss": 0.9114,
      "step": 1934
    },
    {
      "epoch": 0.7867452734295588,
      "grad_norm": 0.08503813296556473,
      "learning_rate": 0.00012147364135965806,
      "loss": 0.9315,
      "step": 1935
    },
    {
      "epoch": 0.7871518601341736,
      "grad_norm": 0.09241003543138504,
      "learning_rate": 0.00012143293303480563,
      "loss": 0.9656,
      "step": 1936
    },
    {
      "epoch": 0.7875584468387884,
      "grad_norm": 0.08896864950656891,
      "learning_rate": 0.00012139222470995319,
      "loss": 1.0197,
      "step": 1937
    },
    {
      "epoch": 0.7879650335434031,
      "grad_norm": 0.0944843515753746,
      "learning_rate": 0.00012135151638510076,
      "loss": 1.0548,
      "step": 1938
    },
    {
      "epoch": 0.7883716202480179,
      "grad_norm": 0.09366561472415924,
      "learning_rate": 0.00012131080806024833,
      "loss": 1.0353,
      "step": 1939
    },
    {
      "epoch": 0.7887782069526327,
      "grad_norm": 0.094940185546875,
      "learning_rate": 0.0001212700997353959,
      "loss": 0.9467,
      "step": 1940
    },
    {
      "epoch": 0.7891847936572474,
      "grad_norm": 0.08708861470222473,
      "learning_rate": 0.00012122939141054346,
      "loss": 0.9762,
      "step": 1941
    },
    {
      "epoch": 0.7895913803618622,
      "grad_norm": 0.09528307616710663,
      "learning_rate": 0.00012118868308569102,
      "loss": 0.9989,
      "step": 1942
    },
    {
      "epoch": 0.789997967066477,
      "grad_norm": 0.09655644744634628,
      "learning_rate": 0.00012114797476083858,
      "loss": 1.0961,
      "step": 1943
    },
    {
      "epoch": 0.7904045537710916,
      "grad_norm": 0.09209448844194412,
      "learning_rate": 0.00012110726643598615,
      "loss": 0.9271,
      "step": 1944
    },
    {
      "epoch": 0.7908111404757064,
      "grad_norm": 0.10271915793418884,
      "learning_rate": 0.00012106655811113375,
      "loss": 1.1772,
      "step": 1945
    },
    {
      "epoch": 0.7912177271803212,
      "grad_norm": 0.08724693953990936,
      "learning_rate": 0.00012102584978628131,
      "loss": 1.0258,
      "step": 1946
    },
    {
      "epoch": 0.7916243138849359,
      "grad_norm": 0.09265289455652237,
      "learning_rate": 0.00012098514146142887,
      "loss": 0.9987,
      "step": 1947
    },
    {
      "epoch": 0.7920309005895507,
      "grad_norm": 0.08839931339025497,
      "learning_rate": 0.00012094443313657644,
      "loss": 0.9955,
      "step": 1948
    },
    {
      "epoch": 0.7924374872941655,
      "grad_norm": 0.0918072909116745,
      "learning_rate": 0.000120903724811724,
      "loss": 0.966,
      "step": 1949
    },
    {
      "epoch": 0.7928440739987802,
      "grad_norm": 0.09354092925786972,
      "learning_rate": 0.00012086301648687157,
      "loss": 1.0129,
      "step": 1950
    },
    {
      "epoch": 0.793250660703395,
      "grad_norm": 0.0950416848063469,
      "learning_rate": 0.00012082230816201914,
      "loss": 1.1276,
      "step": 1951
    },
    {
      "epoch": 0.7936572474080098,
      "grad_norm": 0.08683070540428162,
      "learning_rate": 0.00012078159983716671,
      "loss": 0.8137,
      "step": 1952
    },
    {
      "epoch": 0.7940638341126245,
      "grad_norm": 0.0931641161441803,
      "learning_rate": 0.00012074089151231427,
      "loss": 1.0094,
      "step": 1953
    },
    {
      "epoch": 0.7944704208172393,
      "grad_norm": 0.09602310508489609,
      "learning_rate": 0.00012070018318746183,
      "loss": 1.0643,
      "step": 1954
    },
    {
      "epoch": 0.7948770075218541,
      "grad_norm": 0.08961457759141922,
      "learning_rate": 0.0001206594748626094,
      "loss": 0.9338,
      "step": 1955
    },
    {
      "epoch": 0.7952835942264688,
      "grad_norm": 0.09515352547168732,
      "learning_rate": 0.00012061876653775696,
      "loss": 1.0151,
      "step": 1956
    },
    {
      "epoch": 0.7956901809310836,
      "grad_norm": 0.08361717313528061,
      "learning_rate": 0.00012057805821290456,
      "loss": 0.9363,
      "step": 1957
    },
    {
      "epoch": 0.7960967676356984,
      "grad_norm": 0.09745500981807709,
      "learning_rate": 0.00012053734988805213,
      "loss": 1.0578,
      "step": 1958
    },
    {
      "epoch": 0.796503354340313,
      "grad_norm": 0.09218847006559372,
      "learning_rate": 0.00012049664156319968,
      "loss": 0.9899,
      "step": 1959
    },
    {
      "epoch": 0.7969099410449278,
      "grad_norm": 0.08713049441576004,
      "learning_rate": 0.00012045593323834725,
      "loss": 0.9283,
      "step": 1960
    },
    {
      "epoch": 0.7973165277495425,
      "grad_norm": 0.09556014090776443,
      "learning_rate": 0.00012041522491349482,
      "loss": 1.0288,
      "step": 1961
    },
    {
      "epoch": 0.7977231144541573,
      "grad_norm": 0.09281028062105179,
      "learning_rate": 0.00012037451658864238,
      "loss": 0.9812,
      "step": 1962
    },
    {
      "epoch": 0.7981297011587721,
      "grad_norm": 0.10029471665620804,
      "learning_rate": 0.00012033380826378995,
      "loss": 1.1787,
      "step": 1963
    },
    {
      "epoch": 0.7985362878633868,
      "grad_norm": 0.08875210583209991,
      "learning_rate": 0.00012029309993893752,
      "loss": 0.9432,
      "step": 1964
    },
    {
      "epoch": 0.7989428745680016,
      "grad_norm": 0.09191716462373734,
      "learning_rate": 0.00012025239161408509,
      "loss": 1.0301,
      "step": 1965
    },
    {
      "epoch": 0.7993494612726164,
      "grad_norm": 0.09936721622943878,
      "learning_rate": 0.00012021168328923264,
      "loss": 1.0008,
      "step": 1966
    },
    {
      "epoch": 0.7997560479772311,
      "grad_norm": 0.09157298505306244,
      "learning_rate": 0.00012017097496438021,
      "loss": 1.0065,
      "step": 1967
    },
    {
      "epoch": 0.8001626346818459,
      "grad_norm": 0.0955449789762497,
      "learning_rate": 0.0001201302666395278,
      "loss": 1.1102,
      "step": 1968
    },
    {
      "epoch": 0.8005692213864607,
      "grad_norm": 0.10182837396860123,
      "learning_rate": 0.00012008955831467537,
      "loss": 1.1306,
      "step": 1969
    },
    {
      "epoch": 0.8009758080910754,
      "grad_norm": 0.09424961358308792,
      "learning_rate": 0.00012004884998982292,
      "loss": 0.953,
      "step": 1970
    },
    {
      "epoch": 0.8013823947956902,
      "grad_norm": 0.09814165532588959,
      "learning_rate": 0.00012000814166497049,
      "loss": 1.1101,
      "step": 1971
    },
    {
      "epoch": 0.801788981500305,
      "grad_norm": 0.09669219702482224,
      "learning_rate": 0.00011996743334011806,
      "loss": 0.9598,
      "step": 1972
    },
    {
      "epoch": 0.8021955682049197,
      "grad_norm": 0.08699534088373184,
      "learning_rate": 0.00011992672501526563,
      "loss": 0.8809,
      "step": 1973
    },
    {
      "epoch": 0.8026021549095345,
      "grad_norm": 0.08246033638715744,
      "learning_rate": 0.0001198860166904132,
      "loss": 0.9572,
      "step": 1974
    },
    {
      "epoch": 0.8030087416141493,
      "grad_norm": 0.08641736209392548,
      "learning_rate": 0.00011984530836556076,
      "loss": 0.9201,
      "step": 1975
    },
    {
      "epoch": 0.8034153283187639,
      "grad_norm": 0.09478481858968735,
      "learning_rate": 0.00011980460004070833,
      "loss": 0.9875,
      "step": 1976
    },
    {
      "epoch": 0.8038219150233787,
      "grad_norm": 0.09944069385528564,
      "learning_rate": 0.0001197638917158559,
      "loss": 1.01,
      "step": 1977
    },
    {
      "epoch": 0.8042285017279935,
      "grad_norm": 0.09341408312320709,
      "learning_rate": 0.00011972318339100345,
      "loss": 0.9655,
      "step": 1978
    },
    {
      "epoch": 0.8046350884326082,
      "grad_norm": 0.09643781185150146,
      "learning_rate": 0.00011968247506615102,
      "loss": 0.9764,
      "step": 1979
    },
    {
      "epoch": 0.805041675137223,
      "grad_norm": 0.08750199526548386,
      "learning_rate": 0.00011964176674129861,
      "loss": 0.9561,
      "step": 1980
    },
    {
      "epoch": 0.8054482618418378,
      "grad_norm": 0.09079190343618393,
      "learning_rate": 0.00011960105841644618,
      "loss": 0.9068,
      "step": 1981
    },
    {
      "epoch": 0.8058548485464525,
      "grad_norm": 0.08659665286540985,
      "learning_rate": 0.00011956035009159374,
      "loss": 0.9414,
      "step": 1982
    },
    {
      "epoch": 0.8062614352510673,
      "grad_norm": 0.08911033719778061,
      "learning_rate": 0.0001195196417667413,
      "loss": 0.9566,
      "step": 1983
    },
    {
      "epoch": 0.8066680219556821,
      "grad_norm": 0.09575940668582916,
      "learning_rate": 0.00011947893344188887,
      "loss": 0.9836,
      "step": 1984
    },
    {
      "epoch": 0.8070746086602968,
      "grad_norm": 0.09464087337255478,
      "learning_rate": 0.00011943822511703644,
      "loss": 1.0854,
      "step": 1985
    },
    {
      "epoch": 0.8074811953649116,
      "grad_norm": 0.09153340756893158,
      "learning_rate": 0.000119397516792184,
      "loss": 1.0362,
      "step": 1986
    },
    {
      "epoch": 0.8078877820695263,
      "grad_norm": 0.09390057623386383,
      "learning_rate": 0.00011935680846733157,
      "loss": 0.9829,
      "step": 1987
    },
    {
      "epoch": 0.8082943687741411,
      "grad_norm": 0.09109530597925186,
      "learning_rate": 0.00011931610014247914,
      "loss": 0.9472,
      "step": 1988
    },
    {
      "epoch": 0.8087009554787559,
      "grad_norm": 0.09163598716259003,
      "learning_rate": 0.0001192753918176267,
      "loss": 1.0141,
      "step": 1989
    },
    {
      "epoch": 0.8091075421833706,
      "grad_norm": 0.09769194573163986,
      "learning_rate": 0.00011923468349277426,
      "loss": 1.0536,
      "step": 1990
    },
    {
      "epoch": 0.8095141288879854,
      "grad_norm": 0.09787027537822723,
      "learning_rate": 0.00011919397516792186,
      "loss": 1.0384,
      "step": 1991
    },
    {
      "epoch": 0.8099207155926001,
      "grad_norm": 0.09753425419330597,
      "learning_rate": 0.00011915326684306943,
      "loss": 0.9425,
      "step": 1992
    },
    {
      "epoch": 0.8103273022972148,
      "grad_norm": 0.0975935086607933,
      "learning_rate": 0.000119112558518217,
      "loss": 0.9755,
      "step": 1993
    },
    {
      "epoch": 0.8107338890018296,
      "grad_norm": 0.09270279854536057,
      "learning_rate": 0.00011907185019336455,
      "loss": 0.9501,
      "step": 1994
    },
    {
      "epoch": 0.8111404757064444,
      "grad_norm": 0.09884528815746307,
      "learning_rate": 0.00011903114186851212,
      "loss": 0.986,
      "step": 1995
    },
    {
      "epoch": 0.8115470624110591,
      "grad_norm": 0.09327102452516556,
      "learning_rate": 0.00011899043354365968,
      "loss": 0.9258,
      "step": 1996
    },
    {
      "epoch": 0.8119536491156739,
      "grad_norm": 0.09072525054216385,
      "learning_rate": 0.00011894972521880725,
      "loss": 0.8975,
      "step": 1997
    },
    {
      "epoch": 0.8123602358202887,
      "grad_norm": 0.09030098468065262,
      "learning_rate": 0.00011890901689395482,
      "loss": 0.9163,
      "step": 1998
    },
    {
      "epoch": 0.8127668225249034,
      "grad_norm": 0.09437917172908783,
      "learning_rate": 0.00011886830856910239,
      "loss": 1.0101,
      "step": 1999
    },
    {
      "epoch": 0.8131734092295182,
      "grad_norm": 0.092490553855896,
      "learning_rate": 0.00011882760024424995,
      "loss": 0.9865,
      "step": 2000
    },
    {
      "epoch": 0.813579995934133,
      "grad_norm": 0.08658891916275024,
      "learning_rate": 0.00011878689191939751,
      "loss": 0.878,
      "step": 2001
    },
    {
      "epoch": 0.8139865826387477,
      "grad_norm": 0.08677167445421219,
      "learning_rate": 0.00011874618359454508,
      "loss": 0.9192,
      "step": 2002
    },
    {
      "epoch": 0.8143931693433625,
      "grad_norm": 0.09392429143190384,
      "learning_rate": 0.00011870547526969267,
      "loss": 1.023,
      "step": 2003
    },
    {
      "epoch": 0.8147997560479773,
      "grad_norm": 0.09641133248806,
      "learning_rate": 0.00011866476694484024,
      "loss": 1.0088,
      "step": 2004
    },
    {
      "epoch": 0.815206342752592,
      "grad_norm": 0.0969158262014389,
      "learning_rate": 0.0001186240586199878,
      "loss": 1.0194,
      "step": 2005
    },
    {
      "epoch": 0.8156129294572068,
      "grad_norm": 0.09078145772218704,
      "learning_rate": 0.00011858335029513536,
      "loss": 0.9206,
      "step": 2006
    },
    {
      "epoch": 0.8160195161618216,
      "grad_norm": 0.0925087034702301,
      "learning_rate": 0.00011854264197028293,
      "loss": 1.0193,
      "step": 2007
    },
    {
      "epoch": 0.8164261028664362,
      "grad_norm": 0.08056949079036713,
      "learning_rate": 0.0001185019336454305,
      "loss": 0.8271,
      "step": 2008
    },
    {
      "epoch": 0.816832689571051,
      "grad_norm": 0.08815829455852509,
      "learning_rate": 0.00011846122532057806,
      "loss": 0.9061,
      "step": 2009
    },
    {
      "epoch": 0.8172392762756658,
      "grad_norm": 0.0914953425526619,
      "learning_rate": 0.00011842051699572563,
      "loss": 1.058,
      "step": 2010
    },
    {
      "epoch": 0.8176458629802805,
      "grad_norm": 0.09589337557554245,
      "learning_rate": 0.0001183798086708732,
      "loss": 1.0244,
      "step": 2011
    },
    {
      "epoch": 0.8180524496848953,
      "grad_norm": 0.09444423019886017,
      "learning_rate": 0.00011833910034602077,
      "loss": 1.007,
      "step": 2012
    },
    {
      "epoch": 0.81845903638951,
      "grad_norm": 0.0973111018538475,
      "learning_rate": 0.00011829839202116832,
      "loss": 1.1077,
      "step": 2013
    },
    {
      "epoch": 0.8188656230941248,
      "grad_norm": 0.09395329654216766,
      "learning_rate": 0.00011825768369631591,
      "loss": 0.9592,
      "step": 2014
    },
    {
      "epoch": 0.8192722097987396,
      "grad_norm": 1295.30712890625,
      "learning_rate": 0.00011821697537146348,
      "loss": 0.9503,
      "step": 2015
    },
    {
      "epoch": 0.8196787965033543,
      "grad_norm": 0.09247137606143951,
      "learning_rate": 0.00011817626704661105,
      "loss": 0.9733,
      "step": 2016
    },
    {
      "epoch": 0.8200853832079691,
      "grad_norm": 0.09796010702848434,
      "learning_rate": 0.0001181355587217586,
      "loss": 1.0192,
      "step": 2017
    },
    {
      "epoch": 0.8204919699125839,
      "grad_norm": 0.11081703752279282,
      "learning_rate": 0.00011809485039690617,
      "loss": 1.0732,
      "step": 2018
    },
    {
      "epoch": 0.8208985566171986,
      "grad_norm": 0.1036204919219017,
      "learning_rate": 0.00011805414207205374,
      "loss": 1.0547,
      "step": 2019
    },
    {
      "epoch": 0.8213051433218134,
      "grad_norm": 0.09200289845466614,
      "learning_rate": 0.0001180134337472013,
      "loss": 0.8751,
      "step": 2020
    },
    {
      "epoch": 0.8217117300264282,
      "grad_norm": 0.09226184338331223,
      "learning_rate": 0.00011797272542234887,
      "loss": 0.8352,
      "step": 2021
    },
    {
      "epoch": 0.8221183167310429,
      "grad_norm": 0.09109731018543243,
      "learning_rate": 0.00011793201709749644,
      "loss": 0.8986,
      "step": 2022
    },
    {
      "epoch": 0.8225249034356577,
      "grad_norm": 0.0922974944114685,
      "learning_rate": 0.00011789130877264401,
      "loss": 0.9166,
      "step": 2023
    },
    {
      "epoch": 0.8229314901402724,
      "grad_norm": 0.09215478599071503,
      "learning_rate": 0.00011785060044779158,
      "loss": 0.9764,
      "step": 2024
    },
    {
      "epoch": 0.8233380768448871,
      "grad_norm": 0.09852897375822067,
      "learning_rate": 0.00011780989212293913,
      "loss": 1.0164,
      "step": 2025
    },
    {
      "epoch": 0.8237446635495019,
      "grad_norm": 0.08374243229627609,
      "learning_rate": 0.00011776918379808673,
      "loss": 0.947,
      "step": 2026
    },
    {
      "epoch": 0.8241512502541167,
      "grad_norm": 0.09260760992765427,
      "learning_rate": 0.0001177284754732343,
      "loss": 1.0338,
      "step": 2027
    },
    {
      "epoch": 0.8245578369587314,
      "grad_norm": 0.09498609602451324,
      "learning_rate": 0.00011768776714838186,
      "loss": 1.1077,
      "step": 2028
    },
    {
      "epoch": 0.8249644236633462,
      "grad_norm": 0.0914779007434845,
      "learning_rate": 0.00011764705882352942,
      "loss": 0.9499,
      "step": 2029
    },
    {
      "epoch": 0.825371010367961,
      "grad_norm": 0.09603306651115417,
      "learning_rate": 0.00011760635049867698,
      "loss": 0.9652,
      "step": 2030
    },
    {
      "epoch": 0.8257775970725757,
      "grad_norm": 0.09358631074428558,
      "learning_rate": 0.00011756564217382455,
      "loss": 0.9593,
      "step": 2031
    },
    {
      "epoch": 0.8261841837771905,
      "grad_norm": 0.08833447843790054,
      "learning_rate": 0.00011752493384897212,
      "loss": 0.9582,
      "step": 2032
    },
    {
      "epoch": 0.8265907704818053,
      "grad_norm": 0.09237752854824066,
      "learning_rate": 0.00011748422552411969,
      "loss": 0.9893,
      "step": 2033
    },
    {
      "epoch": 0.82699735718642,
      "grad_norm": 0.09216301888227463,
      "learning_rate": 0.00011744351719926725,
      "loss": 1.0804,
      "step": 2034
    },
    {
      "epoch": 0.8274039438910348,
      "grad_norm": 0.09359832108020782,
      "learning_rate": 0.00011740280887441482,
      "loss": 1.0516,
      "step": 2035
    },
    {
      "epoch": 0.8278105305956496,
      "grad_norm": 0.08962893486022949,
      "learning_rate": 0.00011736210054956237,
      "loss": 0.9592,
      "step": 2036
    },
    {
      "epoch": 0.8282171173002643,
      "grad_norm": 0.08905650675296783,
      "learning_rate": 0.00011732139222470997,
      "loss": 1.0088,
      "step": 2037
    },
    {
      "epoch": 0.8286237040048791,
      "grad_norm": 0.09745819866657257,
      "learning_rate": 0.00011728068389985754,
      "loss": 1.0656,
      "step": 2038
    },
    {
      "epoch": 0.8290302907094937,
      "grad_norm": 0.09807167202234268,
      "learning_rate": 0.0001172399755750051,
      "loss": 1.0324,
      "step": 2039
    },
    {
      "epoch": 0.8294368774141085,
      "grad_norm": 0.094350166618824,
      "learning_rate": 0.00011719926725015267,
      "loss": 0.9412,
      "step": 2040
    },
    {
      "epoch": 0.8298434641187233,
      "grad_norm": 0.10582345724105835,
      "learning_rate": 0.00011715855892530023,
      "loss": 1.0602,
      "step": 2041
    },
    {
      "epoch": 0.830250050823338,
      "grad_norm": 0.0957132950425148,
      "learning_rate": 0.0001171178506004478,
      "loss": 1.0394,
      "step": 2042
    },
    {
      "epoch": 0.8306566375279528,
      "grad_norm": 0.09163911640644073,
      "learning_rate": 0.00011707714227559536,
      "loss": 0.966,
      "step": 2043
    },
    {
      "epoch": 0.8310632242325676,
      "grad_norm": 0.08863722532987595,
      "learning_rate": 0.00011703643395074293,
      "loss": 1.0096,
      "step": 2044
    },
    {
      "epoch": 0.8314698109371823,
      "grad_norm": 0.0891941711306572,
      "learning_rate": 0.0001169957256258905,
      "loss": 0.9694,
      "step": 2045
    },
    {
      "epoch": 0.8318763976417971,
      "grad_norm": 0.097014419734478,
      "learning_rate": 0.00011695501730103806,
      "loss": 1.022,
      "step": 2046
    },
    {
      "epoch": 0.8322829843464119,
      "grad_norm": 0.09055501222610474,
      "learning_rate": 0.00011691430897618563,
      "loss": 0.9859,
      "step": 2047
    },
    {
      "epoch": 0.8326895710510266,
      "grad_norm": 0.09768117219209671,
      "learning_rate": 0.00011687360065133319,
      "loss": 1.1293,
      "step": 2048
    },
    {
      "epoch": 0.8330961577556414,
      "grad_norm": 0.09261562675237656,
      "learning_rate": 0.00011683289232648078,
      "loss": 0.9639,
      "step": 2049
    },
    {
      "epoch": 0.8335027444602562,
      "grad_norm": 0.09100788086652756,
      "learning_rate": 0.00011679218400162835,
      "loss": 0.9564,
      "step": 2050
    },
    {
      "epoch": 0.8339093311648709,
      "grad_norm": 0.08779970556497574,
      "learning_rate": 0.00011675147567677592,
      "loss": 0.9273,
      "step": 2051
    },
    {
      "epoch": 0.8343159178694857,
      "grad_norm": 0.09557755291461945,
      "learning_rate": 0.00011671076735192348,
      "loss": 1.0858,
      "step": 2052
    },
    {
      "epoch": 0.8347225045741005,
      "grad_norm": 0.09423234313726425,
      "learning_rate": 0.00011667005902707104,
      "loss": 0.9901,
      "step": 2053
    },
    {
      "epoch": 0.8351290912787152,
      "grad_norm": 0.08855794370174408,
      "learning_rate": 0.0001166293507022186,
      "loss": 0.9226,
      "step": 2054
    },
    {
      "epoch": 0.83553567798333,
      "grad_norm": 0.09653773158788681,
      "learning_rate": 0.00011658864237736617,
      "loss": 1.0381,
      "step": 2055
    },
    {
      "epoch": 0.8359422646879447,
      "grad_norm": 0.08749355375766754,
      "learning_rate": 0.00011654793405251374,
      "loss": 0.964,
      "step": 2056
    },
    {
      "epoch": 0.8363488513925594,
      "grad_norm": 0.09068714082241058,
      "learning_rate": 0.00011650722572766131,
      "loss": 0.9222,
      "step": 2057
    },
    {
      "epoch": 0.8367554380971742,
      "grad_norm": 0.09309016168117523,
      "learning_rate": 0.00011646651740280888,
      "loss": 0.9706,
      "step": 2058
    },
    {
      "epoch": 0.837162024801789,
      "grad_norm": 0.08905037492513657,
      "learning_rate": 0.00011642580907795644,
      "loss": 0.925,
      "step": 2059
    },
    {
      "epoch": 0.8375686115064037,
      "grad_norm": 0.09465768933296204,
      "learning_rate": 0.00011638510075310403,
      "loss": 0.9834,
      "step": 2060
    },
    {
      "epoch": 0.8379751982110185,
      "grad_norm": 0.09916462749242783,
      "learning_rate": 0.00011634439242825159,
      "loss": 1.1026,
      "step": 2061
    },
    {
      "epoch": 0.8383817849156333,
      "grad_norm": 0.10114342719316483,
      "learning_rate": 0.00011630368410339916,
      "loss": 1.1066,
      "step": 2062
    },
    {
      "epoch": 0.838788371620248,
      "grad_norm": 0.09570196270942688,
      "learning_rate": 0.00011626297577854673,
      "loss": 0.9225,
      "step": 2063
    },
    {
      "epoch": 0.8391949583248628,
      "grad_norm": 0.09646128118038177,
      "learning_rate": 0.0001162222674536943,
      "loss": 0.8873,
      "step": 2064
    },
    {
      "epoch": 0.8396015450294775,
      "grad_norm": 0.08988897502422333,
      "learning_rate": 0.00011618155912884185,
      "loss": 0.8496,
      "step": 2065
    },
    {
      "epoch": 0.8400081317340923,
      "grad_norm": 0.09540429711341858,
      "learning_rate": 0.00011614085080398942,
      "loss": 0.9864,
      "step": 2066
    },
    {
      "epoch": 0.8404147184387071,
      "grad_norm": 0.09202653169631958,
      "learning_rate": 0.00011610014247913699,
      "loss": 0.9331,
      "step": 2067
    },
    {
      "epoch": 0.8408213051433218,
      "grad_norm": 0.0908489003777504,
      "learning_rate": 0.00011605943415428455,
      "loss": 0.9212,
      "step": 2068
    },
    {
      "epoch": 0.8412278918479366,
      "grad_norm": 0.08785940706729889,
      "learning_rate": 0.00011601872582943212,
      "loss": 0.8732,
      "step": 2069
    },
    {
      "epoch": 0.8416344785525514,
      "grad_norm": 0.09724607318639755,
      "learning_rate": 0.00011597801750457969,
      "loss": 1.0057,
      "step": 2070
    },
    {
      "epoch": 0.842041065257166,
      "grad_norm": 0.09459156543016434,
      "learning_rate": 0.00011593730917972726,
      "loss": 1.0179,
      "step": 2071
    },
    {
      "epoch": 0.8424476519617808,
      "grad_norm": 0.09080464392900467,
      "learning_rate": 0.00011589660085487484,
      "loss": 0.9499,
      "step": 2072
    },
    {
      "epoch": 0.8428542386663956,
      "grad_norm": 0.0882730633020401,
      "learning_rate": 0.0001158558925300224,
      "loss": 0.9109,
      "step": 2073
    },
    {
      "epoch": 0.8432608253710103,
      "grad_norm": 0.08633995056152344,
      "learning_rate": 0.00011581518420516997,
      "loss": 1.0101,
      "step": 2074
    },
    {
      "epoch": 0.8436674120756251,
      "grad_norm": 0.08903708308935165,
      "learning_rate": 0.00011577447588031754,
      "loss": 0.9705,
      "step": 2075
    },
    {
      "epoch": 0.8440739987802399,
      "grad_norm": 0.09651392698287964,
      "learning_rate": 0.0001157337675554651,
      "loss": 1.1204,
      "step": 2076
    },
    {
      "epoch": 0.8444805854848546,
      "grad_norm": 0.08830486238002777,
      "learning_rate": 0.00011569305923061266,
      "loss": 0.918,
      "step": 2077
    },
    {
      "epoch": 0.8448871721894694,
      "grad_norm": 0.09067387878894806,
      "learning_rate": 0.00011565235090576023,
      "loss": 0.9139,
      "step": 2078
    },
    {
      "epoch": 0.8452937588940842,
      "grad_norm": 0.08710314333438873,
      "learning_rate": 0.0001156116425809078,
      "loss": 0.8869,
      "step": 2079
    },
    {
      "epoch": 0.8457003455986989,
      "grad_norm": 0.09208957850933075,
      "learning_rate": 0.00011557093425605536,
      "loss": 0.9318,
      "step": 2080
    },
    {
      "epoch": 0.8461069323033137,
      "grad_norm": 0.09385235607624054,
      "learning_rate": 0.00011553022593120293,
      "loss": 1.0103,
      "step": 2081
    },
    {
      "epoch": 0.8465135190079285,
      "grad_norm": 0.09042852371931076,
      "learning_rate": 0.0001154895176063505,
      "loss": 0.9433,
      "step": 2082
    },
    {
      "epoch": 0.8469201057125432,
      "grad_norm": 0.09457480907440186,
      "learning_rate": 0.00011544880928149808,
      "loss": 0.9826,
      "step": 2083
    },
    {
      "epoch": 0.847326692417158,
      "grad_norm": 0.09332386404275894,
      "learning_rate": 0.00011540810095664565,
      "loss": 1.0264,
      "step": 2084
    },
    {
      "epoch": 0.8477332791217728,
      "grad_norm": 0.09104622900485992,
      "learning_rate": 0.00011536739263179322,
      "loss": 0.997,
      "step": 2085
    },
    {
      "epoch": 0.8481398658263875,
      "grad_norm": 0.08547891676425934,
      "learning_rate": 0.00011532668430694078,
      "loss": 0.9314,
      "step": 2086
    },
    {
      "epoch": 0.8485464525310022,
      "grad_norm": 0.08525467664003372,
      "learning_rate": 0.00011528597598208835,
      "loss": 0.9128,
      "step": 2087
    },
    {
      "epoch": 0.848953039235617,
      "grad_norm": 0.08956707268953323,
      "learning_rate": 0.0001152452676572359,
      "loss": 0.9383,
      "step": 2088
    },
    {
      "epoch": 0.8493596259402317,
      "grad_norm": 0.09025274217128754,
      "learning_rate": 0.00011520455933238347,
      "loss": 0.975,
      "step": 2089
    },
    {
      "epoch": 0.8497662126448465,
      "grad_norm": 0.09149152785539627,
      "learning_rate": 0.00011516385100753104,
      "loss": 0.9633,
      "step": 2090
    },
    {
      "epoch": 0.8501727993494613,
      "grad_norm": 0.0975874587893486,
      "learning_rate": 0.00011512314268267861,
      "loss": 1.0283,
      "step": 2091
    },
    {
      "epoch": 0.850579386054076,
      "grad_norm": 0.09499591588973999,
      "learning_rate": 0.00011508243435782618,
      "loss": 1.0041,
      "step": 2092
    },
    {
      "epoch": 0.8509859727586908,
      "grad_norm": 0.09098786115646362,
      "learning_rate": 0.00011504172603297374,
      "loss": 1.0112,
      "step": 2093
    },
    {
      "epoch": 0.8513925594633055,
      "grad_norm": 0.08904889225959778,
      "learning_rate": 0.00011500101770812131,
      "loss": 0.8774,
      "step": 2094
    },
    {
      "epoch": 0.8517991461679203,
      "grad_norm": 0.08806558698415756,
      "learning_rate": 0.00011496030938326889,
      "loss": 0.8791,
      "step": 2095
    },
    {
      "epoch": 0.8522057328725351,
      "grad_norm": 0.09309332817792892,
      "learning_rate": 0.00011491960105841646,
      "loss": 1.0235,
      "step": 2096
    },
    {
      "epoch": 0.8526123195771498,
      "grad_norm": 0.0923120453953743,
      "learning_rate": 0.00011487889273356403,
      "loss": 0.9926,
      "step": 2097
    },
    {
      "epoch": 0.8530189062817646,
      "grad_norm": 0.09120898693799973,
      "learning_rate": 0.0001148381844087116,
      "loss": 1.0387,
      "step": 2098
    },
    {
      "epoch": 0.8534254929863794,
      "grad_norm": 0.09038707613945007,
      "learning_rate": 0.00011479747608385916,
      "loss": 0.9998,
      "step": 2099
    },
    {
      "epoch": 0.8538320796909941,
      "grad_norm": 0.09675489366054535,
      "learning_rate": 0.00011475676775900672,
      "loss": 0.9872,
      "step": 2100
    },
    {
      "epoch": 0.8542386663956089,
      "grad_norm": 0.09282051771879196,
      "learning_rate": 0.00011471605943415428,
      "loss": 0.9191,
      "step": 2101
    },
    {
      "epoch": 0.8546452531002237,
      "grad_norm": 0.09295305609703064,
      "learning_rate": 0.00011467535110930185,
      "loss": 0.9979,
      "step": 2102
    },
    {
      "epoch": 0.8550518398048383,
      "grad_norm": 0.09186626225709915,
      "learning_rate": 0.00011463464278444942,
      "loss": 0.9737,
      "step": 2103
    },
    {
      "epoch": 0.8554584265094531,
      "grad_norm": 0.09875518828630447,
      "learning_rate": 0.00011459393445959699,
      "loss": 0.9602,
      "step": 2104
    },
    {
      "epoch": 0.8558650132140679,
      "grad_norm": 0.09007591754198074,
      "learning_rate": 0.00011455322613474456,
      "loss": 0.9516,
      "step": 2105
    },
    {
      "epoch": 0.8562715999186826,
      "grad_norm": 0.08967866748571396,
      "learning_rate": 0.00011451251780989214,
      "loss": 0.846,
      "step": 2106
    },
    {
      "epoch": 0.8566781866232974,
      "grad_norm": 0.08921065926551819,
      "learning_rate": 0.0001144718094850397,
      "loss": 0.912,
      "step": 2107
    },
    {
      "epoch": 0.8570847733279122,
      "grad_norm": 0.09793677181005478,
      "learning_rate": 0.00011443110116018727,
      "loss": 1.036,
      "step": 2108
    },
    {
      "epoch": 0.8574913600325269,
      "grad_norm": 0.08594641089439392,
      "learning_rate": 0.00011439039283533484,
      "loss": 0.9059,
      "step": 2109
    },
    {
      "epoch": 0.8578979467371417,
      "grad_norm": 0.09588687121868134,
      "learning_rate": 0.00011434968451048241,
      "loss": 1.0412,
      "step": 2110
    },
    {
      "epoch": 0.8583045334417565,
      "grad_norm": 0.09782074391841888,
      "learning_rate": 0.00011430897618562997,
      "loss": 0.9654,
      "step": 2111
    },
    {
      "epoch": 0.8587111201463712,
      "grad_norm": 0.0923461988568306,
      "learning_rate": 0.00011426826786077753,
      "loss": 0.8857,
      "step": 2112
    },
    {
      "epoch": 0.859117706850986,
      "grad_norm": 0.09952476620674133,
      "learning_rate": 0.0001142275595359251,
      "loss": 1.1011,
      "step": 2113
    },
    {
      "epoch": 0.8595242935556008,
      "grad_norm": 0.09214503318071365,
      "learning_rate": 0.00011418685121107266,
      "loss": 1.0602,
      "step": 2114
    },
    {
      "epoch": 0.8599308802602155,
      "grad_norm": 0.08914364874362946,
      "learning_rate": 0.00011414614288622023,
      "loss": 0.9854,
      "step": 2115
    },
    {
      "epoch": 0.8603374669648303,
      "grad_norm": 0.07836323231458664,
      "learning_rate": 0.0001141054345613678,
      "loss": 0.8843,
      "step": 2116
    },
    {
      "epoch": 0.8607440536694451,
      "grad_norm": 0.09170486778020859,
      "learning_rate": 0.00011406472623651537,
      "loss": 1.0271,
      "step": 2117
    },
    {
      "epoch": 0.8611506403740598,
      "grad_norm": 0.1003408208489418,
      "learning_rate": 0.00011402401791166295,
      "loss": 1.0277,
      "step": 2118
    },
    {
      "epoch": 0.8615572270786745,
      "grad_norm": 0.09307452291250229,
      "learning_rate": 0.00011398330958681052,
      "loss": 0.9288,
      "step": 2119
    },
    {
      "epoch": 0.8619638137832892,
      "grad_norm": 0.0927853137254715,
      "learning_rate": 0.00011394260126195808,
      "loss": 0.9514,
      "step": 2120
    },
    {
      "epoch": 0.862370400487904,
      "grad_norm": 0.08969385176897049,
      "learning_rate": 0.00011390189293710565,
      "loss": 0.9732,
      "step": 2121
    },
    {
      "epoch": 0.8627769871925188,
      "grad_norm": 0.09284186363220215,
      "learning_rate": 0.00011386118461225322,
      "loss": 0.9383,
      "step": 2122
    },
    {
      "epoch": 0.8631835738971335,
      "grad_norm": 0.09814801067113876,
      "learning_rate": 0.00011382047628740077,
      "loss": 1.1479,
      "step": 2123
    },
    {
      "epoch": 0.8635901606017483,
      "grad_norm": 0.09331676363945007,
      "learning_rate": 0.00011377976796254834,
      "loss": 1.0516,
      "step": 2124
    },
    {
      "epoch": 0.8639967473063631,
      "grad_norm": 0.1085168719291687,
      "learning_rate": 0.00011373905963769591,
      "loss": 1.0429,
      "step": 2125
    },
    {
      "epoch": 0.8644033340109778,
      "grad_norm": 0.084463931620121,
      "learning_rate": 0.00011369835131284348,
      "loss": 0.8698,
      "step": 2126
    },
    {
      "epoch": 0.8648099207155926,
      "grad_norm": 0.08983217924833298,
      "learning_rate": 0.00011365764298799104,
      "loss": 0.9252,
      "step": 2127
    },
    {
      "epoch": 0.8652165074202074,
      "grad_norm": 0.09315849840641022,
      "learning_rate": 0.00011361693466313861,
      "loss": 0.9502,
      "step": 2128
    },
    {
      "epoch": 0.8656230941248221,
      "grad_norm": 0.08930740505456924,
      "learning_rate": 0.00011357622633828619,
      "loss": 0.9571,
      "step": 2129
    },
    {
      "epoch": 0.8660296808294369,
      "grad_norm": 0.09786850959062576,
      "learning_rate": 0.00011353551801343376,
      "loss": 0.9578,
      "step": 2130
    },
    {
      "epoch": 0.8664362675340517,
      "grad_norm": 0.09109771996736526,
      "learning_rate": 0.00011349480968858133,
      "loss": 1.0178,
      "step": 2131
    },
    {
      "epoch": 0.8668428542386664,
      "grad_norm": 0.09116113185882568,
      "learning_rate": 0.0001134541013637289,
      "loss": 0.9807,
      "step": 2132
    },
    {
      "epoch": 0.8672494409432812,
      "grad_norm": 0.08628479391336441,
      "learning_rate": 0.00011341339303887646,
      "loss": 0.8486,
      "step": 2133
    },
    {
      "epoch": 0.867656027647896,
      "grad_norm": 0.08679687231779099,
      "learning_rate": 0.00011337268471402403,
      "loss": 0.9621,
      "step": 2134
    },
    {
      "epoch": 0.8680626143525106,
      "grad_norm": 0.08852676302194595,
      "learning_rate": 0.00011333197638917158,
      "loss": 1.0283,
      "step": 2135
    },
    {
      "epoch": 0.8684692010571254,
      "grad_norm": 0.0838993713259697,
      "learning_rate": 0.00011329126806431915,
      "loss": 0.9123,
      "step": 2136
    },
    {
      "epoch": 0.8688757877617402,
      "grad_norm": 0.09657544642686844,
      "learning_rate": 0.00011325055973946672,
      "loss": 1.0619,
      "step": 2137
    },
    {
      "epoch": 0.8692823744663549,
      "grad_norm": 0.0973362997174263,
      "learning_rate": 0.00011320985141461429,
      "loss": 1.0232,
      "step": 2138
    },
    {
      "epoch": 0.8696889611709697,
      "grad_norm": 0.09646733105182648,
      "learning_rate": 0.00011316914308976185,
      "loss": 0.972,
      "step": 2139
    },
    {
      "epoch": 0.8700955478755845,
      "grad_norm": 0.09988803416490555,
      "learning_rate": 0.00011312843476490942,
      "loss": 1.0555,
      "step": 2140
    },
    {
      "epoch": 0.8705021345801992,
      "grad_norm": 0.08326305449008942,
      "learning_rate": 0.000113087726440057,
      "loss": 0.8427,
      "step": 2141
    },
    {
      "epoch": 0.870908721284814,
      "grad_norm": 0.08908620476722717,
      "learning_rate": 0.00011304701811520457,
      "loss": 0.9304,
      "step": 2142
    },
    {
      "epoch": 0.8713153079894288,
      "grad_norm": 0.09493114799261093,
      "learning_rate": 0.00011300630979035214,
      "loss": 0.9481,
      "step": 2143
    },
    {
      "epoch": 0.8717218946940435,
      "grad_norm": 0.09405462443828583,
      "learning_rate": 0.0001129656014654997,
      "loss": 0.8995,
      "step": 2144
    },
    {
      "epoch": 0.8721284813986583,
      "grad_norm": 0.09000107645988464,
      "learning_rate": 0.00011292489314064727,
      "loss": 0.9969,
      "step": 2145
    },
    {
      "epoch": 0.872535068103273,
      "grad_norm": 0.08611016720533371,
      "learning_rate": 0.00011288418481579484,
      "loss": 0.9461,
      "step": 2146
    },
    {
      "epoch": 0.8729416548078878,
      "grad_norm": 0.09909865260124207,
      "learning_rate": 0.0001128434764909424,
      "loss": 1.0668,
      "step": 2147
    },
    {
      "epoch": 0.8733482415125026,
      "grad_norm": 0.09296669065952301,
      "learning_rate": 0.00011280276816608996,
      "loss": 1.0196,
      "step": 2148
    },
    {
      "epoch": 0.8737548282171173,
      "grad_norm": 0.09515411406755447,
      "learning_rate": 0.00011276205984123753,
      "loss": 0.9542,
      "step": 2149
    },
    {
      "epoch": 0.874161414921732,
      "grad_norm": 0.1016170084476471,
      "learning_rate": 0.0001127213515163851,
      "loss": 1.0545,
      "step": 2150
    },
    {
      "epoch": 0.8745680016263468,
      "grad_norm": 0.09408387541770935,
      "learning_rate": 0.00011268064319153267,
      "loss": 0.9852,
      "step": 2151
    },
    {
      "epoch": 0.8749745883309615,
      "grad_norm": 0.09204485267400742,
      "learning_rate": 0.00011263993486668025,
      "loss": 0.9893,
      "step": 2152
    },
    {
      "epoch": 0.8753811750355763,
      "grad_norm": 0.10141453891992569,
      "learning_rate": 0.00011259922654182782,
      "loss": 0.9496,
      "step": 2153
    },
    {
      "epoch": 0.8757877617401911,
      "grad_norm": 0.09088826179504395,
      "learning_rate": 0.00011255851821697538,
      "loss": 0.9073,
      "step": 2154
    },
    {
      "epoch": 0.8761943484448058,
      "grad_norm": 0.09122118353843689,
      "learning_rate": 0.00011251780989212295,
      "loss": 0.9927,
      "step": 2155
    },
    {
      "epoch": 0.8766009351494206,
      "grad_norm": 0.08325305581092834,
      "learning_rate": 0.00011247710156727052,
      "loss": 0.8863,
      "step": 2156
    },
    {
      "epoch": 0.8770075218540354,
      "grad_norm": 0.09161413460969925,
      "learning_rate": 0.00011243639324241809,
      "loss": 0.9652,
      "step": 2157
    },
    {
      "epoch": 0.8774141085586501,
      "grad_norm": 0.08764609694480896,
      "learning_rate": 0.00011239568491756565,
      "loss": 0.9851,
      "step": 2158
    },
    {
      "epoch": 0.8778206952632649,
      "grad_norm": 0.09217865765094757,
      "learning_rate": 0.00011235497659271321,
      "loss": 0.9546,
      "step": 2159
    },
    {
      "epoch": 0.8782272819678797,
      "grad_norm": 0.08746439218521118,
      "learning_rate": 0.00011231426826786078,
      "loss": 0.9115,
      "step": 2160
    },
    {
      "epoch": 0.8786338686724944,
      "grad_norm": 0.09703024476766586,
      "learning_rate": 0.00011227355994300834,
      "loss": 1.0464,
      "step": 2161
    },
    {
      "epoch": 0.8790404553771092,
      "grad_norm": 0.08776511996984482,
      "learning_rate": 0.00011223285161815591,
      "loss": 0.9828,
      "step": 2162
    },
    {
      "epoch": 0.879447042081724,
      "grad_norm": 0.09440065920352936,
      "learning_rate": 0.00011219214329330348,
      "loss": 1.0458,
      "step": 2163
    },
    {
      "epoch": 0.8798536287863387,
      "grad_norm": 0.08808255940675735,
      "learning_rate": 0.00011215143496845106,
      "loss": 0.9835,
      "step": 2164
    },
    {
      "epoch": 0.8802602154909535,
      "grad_norm": 0.09321518242359161,
      "learning_rate": 0.00011211072664359863,
      "loss": 0.9592,
      "step": 2165
    },
    {
      "epoch": 0.8806668021955683,
      "grad_norm": 0.08485117554664612,
      "learning_rate": 0.0001120700183187462,
      "loss": 0.8574,
      "step": 2166
    },
    {
      "epoch": 0.8810733889001829,
      "grad_norm": 0.09101716428995132,
      "learning_rate": 0.00011202930999389376,
      "loss": 1.0562,
      "step": 2167
    },
    {
      "epoch": 0.8814799756047977,
      "grad_norm": 0.0868394672870636,
      "learning_rate": 0.00011198860166904133,
      "loss": 0.9244,
      "step": 2168
    },
    {
      "epoch": 0.8818865623094125,
      "grad_norm": 0.09465855360031128,
      "learning_rate": 0.0001119478933441889,
      "loss": 1.0169,
      "step": 2169
    },
    {
      "epoch": 0.8822931490140272,
      "grad_norm": 0.08937587589025497,
      "learning_rate": 0.00011190718501933645,
      "loss": 1.0126,
      "step": 2170
    },
    {
      "epoch": 0.882699735718642,
      "grad_norm": 0.09273424744606018,
      "learning_rate": 0.00011186647669448402,
      "loss": 0.9194,
      "step": 2171
    },
    {
      "epoch": 0.8831063224232567,
      "grad_norm": 0.09193231910467148,
      "learning_rate": 0.00011182576836963159,
      "loss": 1.0218,
      "step": 2172
    },
    {
      "epoch": 0.8835129091278715,
      "grad_norm": 0.09555093199014664,
      "learning_rate": 0.00011178506004477915,
      "loss": 0.9679,
      "step": 2173
    },
    {
      "epoch": 0.8839194958324863,
      "grad_norm": 0.09123765677213669,
      "learning_rate": 0.00011174435171992672,
      "loss": 0.9842,
      "step": 2174
    },
    {
      "epoch": 0.884326082537101,
      "grad_norm": 0.0927356630563736,
      "learning_rate": 0.0001117036433950743,
      "loss": 0.9448,
      "step": 2175
    },
    {
      "epoch": 0.8847326692417158,
      "grad_norm": 0.09687252342700958,
      "learning_rate": 0.00011166293507022187,
      "loss": 1.0458,
      "step": 2176
    },
    {
      "epoch": 0.8851392559463306,
      "grad_norm": 0.10106469690799713,
      "learning_rate": 0.00011162222674536944,
      "loss": 0.9754,
      "step": 2177
    },
    {
      "epoch": 0.8855458426509453,
      "grad_norm": 0.09762795269489288,
      "learning_rate": 0.000111581518420517,
      "loss": 0.922,
      "step": 2178
    },
    {
      "epoch": 0.8859524293555601,
      "grad_norm": 0.09456496685743332,
      "learning_rate": 0.00011154081009566457,
      "loss": 0.9345,
      "step": 2179
    },
    {
      "epoch": 0.8863590160601749,
      "grad_norm": 0.09217999875545502,
      "learning_rate": 0.00011150010177081214,
      "loss": 1.0163,
      "step": 2180
    },
    {
      "epoch": 0.8867656027647896,
      "grad_norm": 0.0955888032913208,
      "learning_rate": 0.00011145939344595971,
      "loss": 0.9464,
      "step": 2181
    },
    {
      "epoch": 0.8871721894694043,
      "grad_norm": 0.09351805597543716,
      "learning_rate": 0.00011141868512110726,
      "loss": 0.9911,
      "step": 2182
    },
    {
      "epoch": 0.8875787761740191,
      "grad_norm": 0.08360351622104645,
      "learning_rate": 0.00011137797679625483,
      "loss": 0.8656,
      "step": 2183
    },
    {
      "epoch": 0.8879853628786338,
      "grad_norm": 0.09139275550842285,
      "learning_rate": 0.0001113372684714024,
      "loss": 0.9629,
      "step": 2184
    },
    {
      "epoch": 0.8883919495832486,
      "grad_norm": 0.0988682433962822,
      "learning_rate": 0.00011129656014654997,
      "loss": 0.9856,
      "step": 2185
    },
    {
      "epoch": 0.8887985362878634,
      "grad_norm": 0.098371222615242,
      "learning_rate": 0.00011125585182169753,
      "loss": 1.0566,
      "step": 2186
    },
    {
      "epoch": 0.8892051229924781,
      "grad_norm": 0.09045372158288956,
      "learning_rate": 0.00011121514349684511,
      "loss": 0.9015,
      "step": 2187
    },
    {
      "epoch": 0.8896117096970929,
      "grad_norm": 0.09395705908536911,
      "learning_rate": 0.00011117443517199268,
      "loss": 1.0059,
      "step": 2188
    },
    {
      "epoch": 0.8900182964017077,
      "grad_norm": 0.09204548597335815,
      "learning_rate": 0.00011113372684714025,
      "loss": 1.0135,
      "step": 2189
    },
    {
      "epoch": 0.8904248831063224,
      "grad_norm": 0.08476635068655014,
      "learning_rate": 0.00011109301852228782,
      "loss": 0.9098,
      "step": 2190
    },
    {
      "epoch": 0.8908314698109372,
      "grad_norm": 0.09019143879413605,
      "learning_rate": 0.00011105231019743539,
      "loss": 0.9087,
      "step": 2191
    },
    {
      "epoch": 0.891238056515552,
      "grad_norm": 0.0935545563697815,
      "learning_rate": 0.00011101160187258295,
      "loss": 1.0204,
      "step": 2192
    },
    {
      "epoch": 0.8916446432201667,
      "grad_norm": 0.09029703587293625,
      "learning_rate": 0.00011097089354773052,
      "loss": 0.9913,
      "step": 2193
    },
    {
      "epoch": 0.8920512299247815,
      "grad_norm": 0.0886225774884224,
      "learning_rate": 0.00011093018522287807,
      "loss": 0.9958,
      "step": 2194
    },
    {
      "epoch": 0.8924578166293963,
      "grad_norm": 0.09101995080709457,
      "learning_rate": 0.00011088947689802564,
      "loss": 0.918,
      "step": 2195
    },
    {
      "epoch": 0.892864403334011,
      "grad_norm": 0.10184985399246216,
      "learning_rate": 0.00011084876857317321,
      "loss": 1.1037,
      "step": 2196
    },
    {
      "epoch": 0.8932709900386258,
      "grad_norm": 0.09409435093402863,
      "learning_rate": 0.00011080806024832078,
      "loss": 0.9088,
      "step": 2197
    },
    {
      "epoch": 0.8936775767432404,
      "grad_norm": 0.09551674872636795,
      "learning_rate": 0.00011076735192346836,
      "loss": 1.0379,
      "step": 2198
    },
    {
      "epoch": 0.8940841634478552,
      "grad_norm": 0.08619996160268784,
      "learning_rate": 0.00011072664359861593,
      "loss": 0.9068,
      "step": 2199
    },
    {
      "epoch": 0.89449075015247,
      "grad_norm": 0.09373293071985245,
      "learning_rate": 0.0001106859352737635,
      "loss": 0.9394,
      "step": 2200
    },
    {
      "epoch": 0.8948973368570847,
      "grad_norm": 0.09360924363136292,
      "learning_rate": 0.00011064522694891106,
      "loss": 0.918,
      "step": 2201
    },
    {
      "epoch": 0.8953039235616995,
      "grad_norm": 0.08794824033975601,
      "learning_rate": 0.00011060451862405863,
      "loss": 0.9127,
      "step": 2202
    },
    {
      "epoch": 0.8957105102663143,
      "grad_norm": 0.09011366963386536,
      "learning_rate": 0.0001105638102992062,
      "loss": 0.9744,
      "step": 2203
    },
    {
      "epoch": 0.896117096970929,
      "grad_norm": 0.09070491790771484,
      "learning_rate": 0.00011052310197435376,
      "loss": 0.9182,
      "step": 2204
    },
    {
      "epoch": 0.8965236836755438,
      "grad_norm": 0.09090661257505417,
      "learning_rate": 0.00011048239364950133,
      "loss": 0.8662,
      "step": 2205
    },
    {
      "epoch": 0.8969302703801586,
      "grad_norm": 0.1035584807395935,
      "learning_rate": 0.00011044168532464889,
      "loss": 1.0132,
      "step": 2206
    },
    {
      "epoch": 0.8973368570847733,
      "grad_norm": 0.09471878409385681,
      "learning_rate": 0.00011040097699979645,
      "loss": 0.9183,
      "step": 2207
    },
    {
      "epoch": 0.8977434437893881,
      "grad_norm": 0.08386964350938797,
      "learning_rate": 0.00011036026867494402,
      "loss": 0.8727,
      "step": 2208
    },
    {
      "epoch": 0.8981500304940029,
      "grad_norm": 0.09777465462684631,
      "learning_rate": 0.00011031956035009159,
      "loss": 1.1244,
      "step": 2209
    },
    {
      "epoch": 0.8985566171986176,
      "grad_norm": 0.0950189158320427,
      "learning_rate": 0.00011027885202523917,
      "loss": 0.9494,
      "step": 2210
    },
    {
      "epoch": 0.8989632039032324,
      "grad_norm": 0.10297118872404099,
      "learning_rate": 0.00011023814370038674,
      "loss": 1.0345,
      "step": 2211
    },
    {
      "epoch": 0.8993697906078472,
      "grad_norm": 0.10186666250228882,
      "learning_rate": 0.0001101974353755343,
      "loss": 1.0064,
      "step": 2212
    },
    {
      "epoch": 0.8997763773124619,
      "grad_norm": 0.09332112222909927,
      "learning_rate": 0.00011015672705068187,
      "loss": 0.9915,
      "step": 2213
    },
    {
      "epoch": 0.9001829640170766,
      "grad_norm": 0.09262728691101074,
      "learning_rate": 0.00011011601872582944,
      "loss": 0.9909,
      "step": 2214
    },
    {
      "epoch": 0.9005895507216914,
      "grad_norm": 0.08695352077484131,
      "learning_rate": 0.00011007531040097701,
      "loss": 0.9143,
      "step": 2215
    },
    {
      "epoch": 0.9009961374263061,
      "grad_norm": 0.09473065286874771,
      "learning_rate": 0.00011003460207612458,
      "loss": 0.9297,
      "step": 2216
    },
    {
      "epoch": 0.9014027241309209,
      "grad_norm": 0.09609273076057434,
      "learning_rate": 0.00010999389375127213,
      "loss": 0.9357,
      "step": 2217
    },
    {
      "epoch": 0.9018093108355357,
      "grad_norm": 0.09273882955312729,
      "learning_rate": 0.0001099531854264197,
      "loss": 0.9215,
      "step": 2218
    },
    {
      "epoch": 0.9022158975401504,
      "grad_norm": 0.09666993468999863,
      "learning_rate": 0.00010991247710156727,
      "loss": 1.0015,
      "step": 2219
    },
    {
      "epoch": 0.9026224842447652,
      "grad_norm": 0.09521298855543137,
      "learning_rate": 0.00010987176877671483,
      "loss": 1.0203,
      "step": 2220
    },
    {
      "epoch": 0.90302907094938,
      "grad_norm": 0.08719142526388168,
      "learning_rate": 0.00010983106045186243,
      "loss": 0.8722,
      "step": 2221
    },
    {
      "epoch": 0.9034356576539947,
      "grad_norm": 0.09398588538169861,
      "learning_rate": 0.00010979035212700998,
      "loss": 1.0722,
      "step": 2222
    },
    {
      "epoch": 0.9038422443586095,
      "grad_norm": 0.09667246043682098,
      "learning_rate": 0.00010974964380215755,
      "loss": 1.0235,
      "step": 2223
    },
    {
      "epoch": 0.9042488310632242,
      "grad_norm": 0.08866921067237854,
      "learning_rate": 0.00010970893547730512,
      "loss": 0.9155,
      "step": 2224
    },
    {
      "epoch": 0.904655417767839,
      "grad_norm": 0.08643452823162079,
      "learning_rate": 0.00010966822715245268,
      "loss": 0.9939,
      "step": 2225
    },
    {
      "epoch": 0.9050620044724538,
      "grad_norm": 0.09741934388875961,
      "learning_rate": 0.00010962751882760025,
      "loss": 1.094,
      "step": 2226
    },
    {
      "epoch": 0.9054685911770685,
      "grad_norm": 0.09106621891260147,
      "learning_rate": 0.00010958681050274782,
      "loss": 0.9378,
      "step": 2227
    },
    {
      "epoch": 0.9058751778816833,
      "grad_norm": 0.09541244804859161,
      "learning_rate": 0.00010954610217789539,
      "loss": 1.0023,
      "step": 2228
    },
    {
      "epoch": 0.906281764586298,
      "grad_norm": 0.09381993860006332,
      "learning_rate": 0.00010950539385304294,
      "loss": 1.0045,
      "step": 2229
    },
    {
      "epoch": 0.9066883512909127,
      "grad_norm": 0.09603835642337799,
      "learning_rate": 0.00010946468552819051,
      "loss": 1.0988,
      "step": 2230
    },
    {
      "epoch": 0.9070949379955275,
      "grad_norm": 0.10151727497577667,
      "learning_rate": 0.00010942397720333808,
      "loss": 1.0537,
      "step": 2231
    },
    {
      "epoch": 0.9075015247001423,
      "grad_norm": 0.09192585945129395,
      "learning_rate": 0.00010938326887848564,
      "loss": 0.9195,
      "step": 2232
    },
    {
      "epoch": 0.907908111404757,
      "grad_norm": 0.09959591180086136,
      "learning_rate": 0.00010934256055363324,
      "loss": 1.0567,
      "step": 2233
    },
    {
      "epoch": 0.9083146981093718,
      "grad_norm": 0.09753983467817307,
      "learning_rate": 0.0001093018522287808,
      "loss": 0.9355,
      "step": 2234
    },
    {
      "epoch": 0.9087212848139866,
      "grad_norm": 0.10025233775377274,
      "learning_rate": 0.00010926114390392836,
      "loss": 0.9571,
      "step": 2235
    },
    {
      "epoch": 0.9091278715186013,
      "grad_norm": 0.09255032986402512,
      "learning_rate": 0.00010922043557907593,
      "loss": 1.0291,
      "step": 2236
    },
    {
      "epoch": 0.9095344582232161,
      "grad_norm": 0.09453842043876648,
      "learning_rate": 0.0001091797272542235,
      "loss": 0.9489,
      "step": 2237
    },
    {
      "epoch": 0.9099410449278309,
      "grad_norm": 0.09328801184892654,
      "learning_rate": 0.00010913901892937106,
      "loss": 1.0596,
      "step": 2238
    },
    {
      "epoch": 0.9103476316324456,
      "grad_norm": 0.08745749294757843,
      "learning_rate": 0.00010909831060451863,
      "loss": 0.846,
      "step": 2239
    },
    {
      "epoch": 0.9107542183370604,
      "grad_norm": 0.09585551172494888,
      "learning_rate": 0.0001090576022796662,
      "loss": 0.8888,
      "step": 2240
    },
    {
      "epoch": 0.9111608050416752,
      "grad_norm": 0.09437873214483261,
      "learning_rate": 0.00010901689395481375,
      "loss": 1.0954,
      "step": 2241
    },
    {
      "epoch": 0.9115673917462899,
      "grad_norm": 0.09190462529659271,
      "learning_rate": 0.00010897618562996132,
      "loss": 0.9484,
      "step": 2242
    },
    {
      "epoch": 0.9119739784509047,
      "grad_norm": 0.09598547965288162,
      "learning_rate": 0.00010893547730510889,
      "loss": 0.9765,
      "step": 2243
    },
    {
      "epoch": 0.9123805651555195,
      "grad_norm": 0.08472473174333572,
      "learning_rate": 0.00010889476898025648,
      "loss": 0.914,
      "step": 2244
    },
    {
      "epoch": 0.9127871518601341,
      "grad_norm": 0.09113691002130508,
      "learning_rate": 0.00010885406065540404,
      "loss": 1.0507,
      "step": 2245
    },
    {
      "epoch": 0.913193738564749,
      "grad_norm": 0.09340670704841614,
      "learning_rate": 0.0001088133523305516,
      "loss": 0.9908,
      "step": 2246
    },
    {
      "epoch": 0.9136003252693637,
      "grad_norm": 0.09673475474119186,
      "learning_rate": 0.00010877264400569917,
      "loss": 0.966,
      "step": 2247
    },
    {
      "epoch": 0.9140069119739784,
      "grad_norm": 0.09419335424900055,
      "learning_rate": 0.00010873193568084674,
      "loss": 0.9484,
      "step": 2248
    },
    {
      "epoch": 0.9144134986785932,
      "grad_norm": 0.09127677232027054,
      "learning_rate": 0.00010869122735599431,
      "loss": 0.9786,
      "step": 2249
    },
    {
      "epoch": 0.9148200853832079,
      "grad_norm": 0.09134241938591003,
      "learning_rate": 0.00010865051903114188,
      "loss": 0.9651,
      "step": 2250
    },
    {
      "epoch": 0.9152266720878227,
      "grad_norm": 0.08164233714342117,
      "learning_rate": 0.00010860981070628944,
      "loss": 0.8301,
      "step": 2251
    },
    {
      "epoch": 0.9156332587924375,
      "grad_norm": 0.09648903459310532,
      "learning_rate": 0.00010856910238143701,
      "loss": 0.9931,
      "step": 2252
    },
    {
      "epoch": 0.9160398454970522,
      "grad_norm": 0.09599076956510544,
      "learning_rate": 0.00010852839405658457,
      "loss": 1.1588,
      "step": 2253
    },
    {
      "epoch": 0.916446432201667,
      "grad_norm": 0.09624163806438446,
      "learning_rate": 0.00010848768573173213,
      "loss": 1.0291,
      "step": 2254
    },
    {
      "epoch": 0.9168530189062818,
      "grad_norm": 0.09379248321056366,
      "learning_rate": 0.0001084469774068797,
      "loss": 1.0189,
      "step": 2255
    },
    {
      "epoch": 0.9172596056108965,
      "grad_norm": 0.1004246398806572,
      "learning_rate": 0.0001084062690820273,
      "loss": 1.0819,
      "step": 2256
    },
    {
      "epoch": 0.9176661923155113,
      "grad_norm": 0.0896550863981247,
      "learning_rate": 0.00010836556075717485,
      "loss": 0.9514,
      "step": 2257
    },
    {
      "epoch": 0.9180727790201261,
      "grad_norm": 0.08566062897443771,
      "learning_rate": 0.00010832485243232242,
      "loss": 0.9827,
      "step": 2258
    },
    {
      "epoch": 0.9184793657247408,
      "grad_norm": 0.09392201900482178,
      "learning_rate": 0.00010828414410746998,
      "loss": 1.0118,
      "step": 2259
    },
    {
      "epoch": 0.9188859524293556,
      "grad_norm": 0.09124386310577393,
      "learning_rate": 0.00010824343578261755,
      "loss": 0.9892,
      "step": 2260
    },
    {
      "epoch": 0.9192925391339704,
      "grad_norm": 0.10101054608821869,
      "learning_rate": 0.00010820272745776512,
      "loss": 1.1112,
      "step": 2261
    },
    {
      "epoch": 0.919699125838585,
      "grad_norm": 0.0995619148015976,
      "learning_rate": 0.00010816201913291269,
      "loss": 0.9978,
      "step": 2262
    },
    {
      "epoch": 0.9201057125431998,
      "grad_norm": 0.10450758039951324,
      "learning_rate": 0.00010812131080806025,
      "loss": 1.0496,
      "step": 2263
    },
    {
      "epoch": 0.9205122992478146,
      "grad_norm": 0.08600231260061264,
      "learning_rate": 0.00010808060248320781,
      "loss": 0.9513,
      "step": 2264
    },
    {
      "epoch": 0.9209188859524293,
      "grad_norm": 0.09189002215862274,
      "learning_rate": 0.00010803989415835538,
      "loss": 0.9342,
      "step": 2265
    },
    {
      "epoch": 0.9213254726570441,
      "grad_norm": 0.0933215469121933,
      "learning_rate": 0.00010799918583350294,
      "loss": 0.9806,
      "step": 2266
    },
    {
      "epoch": 0.9217320593616589,
      "grad_norm": 0.09535648673772812,
      "learning_rate": 0.00010795847750865054,
      "loss": 1.045,
      "step": 2267
    },
    {
      "epoch": 0.9221386460662736,
      "grad_norm": 0.09350398182868958,
      "learning_rate": 0.0001079177691837981,
      "loss": 0.948,
      "step": 2268
    },
    {
      "epoch": 0.9225452327708884,
      "grad_norm": 0.09485659748315811,
      "learning_rate": 0.00010787706085894566,
      "loss": 1.0113,
      "step": 2269
    },
    {
      "epoch": 0.9229518194755032,
      "grad_norm": 0.08902882784605026,
      "learning_rate": 0.00010783635253409323,
      "loss": 0.9287,
      "step": 2270
    },
    {
      "epoch": 0.9233584061801179,
      "grad_norm": 0.09547727555036545,
      "learning_rate": 0.0001077956442092408,
      "loss": 0.9704,
      "step": 2271
    },
    {
      "epoch": 0.9237649928847327,
      "grad_norm": 0.0938442051410675,
      "learning_rate": 0.00010775493588438836,
      "loss": 1.0824,
      "step": 2272
    },
    {
      "epoch": 0.9241715795893475,
      "grad_norm": 0.09499689936637878,
      "learning_rate": 0.00010771422755953593,
      "loss": 1.0162,
      "step": 2273
    },
    {
      "epoch": 0.9245781662939622,
      "grad_norm": 0.08982361853122711,
      "learning_rate": 0.0001076735192346835,
      "loss": 1.0051,
      "step": 2274
    },
    {
      "epoch": 0.924984752998577,
      "grad_norm": 0.08913452923297882,
      "learning_rate": 0.00010763281090983107,
      "loss": 0.9585,
      "step": 2275
    },
    {
      "epoch": 0.9253913397031917,
      "grad_norm": 0.09322965890169144,
      "learning_rate": 0.00010759210258497862,
      "loss": 0.9951,
      "step": 2276
    },
    {
      "epoch": 0.9257979264078064,
      "grad_norm": 0.08852788060903549,
      "learning_rate": 0.00010755139426012619,
      "loss": 0.8826,
      "step": 2277
    },
    {
      "epoch": 0.9262045131124212,
      "grad_norm": 0.08934798091650009,
      "learning_rate": 0.00010751068593527376,
      "loss": 0.9592,
      "step": 2278
    },
    {
      "epoch": 0.9266110998170359,
      "grad_norm": 0.08754114806652069,
      "learning_rate": 0.00010746997761042135,
      "loss": 0.8947,
      "step": 2279
    },
    {
      "epoch": 0.9270176865216507,
      "grad_norm": 0.08998506516218185,
      "learning_rate": 0.00010742926928556892,
      "loss": 0.9905,
      "step": 2280
    },
    {
      "epoch": 0.9274242732262655,
      "grad_norm": 0.09599866718053818,
      "learning_rate": 0.00010738856096071647,
      "loss": 0.9931,
      "step": 2281
    },
    {
      "epoch": 0.9278308599308802,
      "grad_norm": 0.0930427685379982,
      "learning_rate": 0.00010734785263586404,
      "loss": 1.0059,
      "step": 2282
    },
    {
      "epoch": 0.928237446635495,
      "grad_norm": 0.0885154977440834,
      "learning_rate": 0.00010730714431101161,
      "loss": 0.9802,
      "step": 2283
    },
    {
      "epoch": 0.9286440333401098,
      "grad_norm": 0.0902063325047493,
      "learning_rate": 0.00010726643598615918,
      "loss": 0.9687,
      "step": 2284
    },
    {
      "epoch": 0.9290506200447245,
      "grad_norm": 0.08460281789302826,
      "learning_rate": 0.00010722572766130674,
      "loss": 0.8834,
      "step": 2285
    },
    {
      "epoch": 0.9294572067493393,
      "grad_norm": 0.0936511978507042,
      "learning_rate": 0.00010718501933645431,
      "loss": 1.0907,
      "step": 2286
    },
    {
      "epoch": 0.9298637934539541,
      "grad_norm": 0.09102717787027359,
      "learning_rate": 0.00010714431101160188,
      "loss": 0.9573,
      "step": 2287
    },
    {
      "epoch": 0.9302703801585688,
      "grad_norm": 0.08209431916475296,
      "learning_rate": 0.00010710360268674943,
      "loss": 0.79,
      "step": 2288
    },
    {
      "epoch": 0.9306769668631836,
      "grad_norm": 0.09181005507707596,
      "learning_rate": 0.000107062894361897,
      "loss": 1.0394,
      "step": 2289
    },
    {
      "epoch": 0.9310835535677984,
      "grad_norm": 0.09006737917661667,
      "learning_rate": 0.0001070221860370446,
      "loss": 0.976,
      "step": 2290
    },
    {
      "epoch": 0.9314901402724131,
      "grad_norm": 0.08806903660297394,
      "learning_rate": 0.00010698147771219216,
      "loss": 0.9429,
      "step": 2291
    },
    {
      "epoch": 0.9318967269770279,
      "grad_norm": 0.09663230180740356,
      "learning_rate": 0.00010694076938733973,
      "loss": 0.9936,
      "step": 2292
    },
    {
      "epoch": 0.9323033136816427,
      "grad_norm": 0.09236756712198257,
      "learning_rate": 0.00010690006106248728,
      "loss": 0.9775,
      "step": 2293
    },
    {
      "epoch": 0.9327099003862573,
      "grad_norm": 0.0875551626086235,
      "learning_rate": 0.00010685935273763485,
      "loss": 0.9222,
      "step": 2294
    },
    {
      "epoch": 0.9331164870908721,
      "grad_norm": 0.09144583344459534,
      "learning_rate": 0.00010681864441278242,
      "loss": 0.9166,
      "step": 2295
    },
    {
      "epoch": 0.9335230737954869,
      "grad_norm": 0.09605292975902557,
      "learning_rate": 0.00010677793608792999,
      "loss": 1.0085,
      "step": 2296
    },
    {
      "epoch": 0.9339296605001016,
      "grad_norm": 0.09013127535581589,
      "learning_rate": 0.00010673722776307755,
      "loss": 0.9473,
      "step": 2297
    },
    {
      "epoch": 0.9343362472047164,
      "grad_norm": 0.09012243151664734,
      "learning_rate": 0.00010669651943822512,
      "loss": 0.953,
      "step": 2298
    },
    {
      "epoch": 0.9347428339093312,
      "grad_norm": 0.0961398184299469,
      "learning_rate": 0.00010665581111337269,
      "loss": 1.0658,
      "step": 2299
    },
    {
      "epoch": 0.9351494206139459,
      "grad_norm": 0.09278837591409683,
      "learning_rate": 0.00010661510278852024,
      "loss": 0.9739,
      "step": 2300
    },
    {
      "epoch": 0.9355560073185607,
      "grad_norm": 0.08477824926376343,
      "learning_rate": 0.00010657439446366781,
      "loss": 0.9376,
      "step": 2301
    },
    {
      "epoch": 0.9359625940231754,
      "grad_norm": 0.08817529678344727,
      "learning_rate": 0.0001065336861388154,
      "loss": 0.9371,
      "step": 2302
    },
    {
      "epoch": 0.9363691807277902,
      "grad_norm": 0.09441924840211868,
      "learning_rate": 0.00010649297781396297,
      "loss": 0.8977,
      "step": 2303
    },
    {
      "epoch": 0.936775767432405,
      "grad_norm": 0.09430365264415741,
      "learning_rate": 0.00010645226948911053,
      "loss": 1.0525,
      "step": 2304
    },
    {
      "epoch": 0.9371823541370197,
      "grad_norm": 0.09169165045022964,
      "learning_rate": 0.0001064115611642581,
      "loss": 0.9261,
      "step": 2305
    },
    {
      "epoch": 0.9375889408416345,
      "grad_norm": 0.09943647682666779,
      "learning_rate": 0.00010637085283940566,
      "loss": 0.9956,
      "step": 2306
    },
    {
      "epoch": 0.9379955275462493,
      "grad_norm": 0.0941019132733345,
      "learning_rate": 0.00010633014451455323,
      "loss": 1.0029,
      "step": 2307
    },
    {
      "epoch": 0.938402114250864,
      "grad_norm": 0.08687194436788559,
      "learning_rate": 0.0001062894361897008,
      "loss": 0.9077,
      "step": 2308
    },
    {
      "epoch": 0.9388087009554787,
      "grad_norm": 0.09248825162649155,
      "learning_rate": 0.00010624872786484837,
      "loss": 1.0412,
      "step": 2309
    },
    {
      "epoch": 0.9392152876600935,
      "grad_norm": 0.09985529631376266,
      "learning_rate": 0.00010620801953999593,
      "loss": 1.0573,
      "step": 2310
    },
    {
      "epoch": 0.9396218743647082,
      "grad_norm": 0.09216563403606415,
      "learning_rate": 0.00010616731121514349,
      "loss": 0.9448,
      "step": 2311
    },
    {
      "epoch": 0.940028461069323,
      "grad_norm": 0.092438243329525,
      "learning_rate": 0.00010612660289029106,
      "loss": 0.9679,
      "step": 2312
    },
    {
      "epoch": 0.9404350477739378,
      "grad_norm": 0.0857539102435112,
      "learning_rate": 0.00010608589456543865,
      "loss": 0.8766,
      "step": 2313
    },
    {
      "epoch": 0.9408416344785525,
      "grad_norm": 0.09243746846914291,
      "learning_rate": 0.00010604518624058622,
      "loss": 0.9536,
      "step": 2314
    },
    {
      "epoch": 0.9412482211831673,
      "grad_norm": 0.08617236465215683,
      "learning_rate": 0.00010600447791573379,
      "loss": 0.9518,
      "step": 2315
    },
    {
      "epoch": 0.9416548078877821,
      "grad_norm": 0.08910689502954483,
      "learning_rate": 0.00010596376959088134,
      "loss": 0.9602,
      "step": 2316
    },
    {
      "epoch": 0.9420613945923968,
      "grad_norm": 0.08643607795238495,
      "learning_rate": 0.00010592306126602891,
      "loss": 0.8827,
      "step": 2317
    },
    {
      "epoch": 0.9424679812970116,
      "grad_norm": 0.0912124440073967,
      "learning_rate": 0.00010588235294117647,
      "loss": 0.9965,
      "step": 2318
    },
    {
      "epoch": 0.9428745680016264,
      "grad_norm": 0.09088627249002457,
      "learning_rate": 0.00010584164461632404,
      "loss": 0.9025,
      "step": 2319
    },
    {
      "epoch": 0.9432811547062411,
      "grad_norm": 0.09329286962747574,
      "learning_rate": 0.00010580093629147161,
      "loss": 0.9791,
      "step": 2320
    },
    {
      "epoch": 0.9436877414108559,
      "grad_norm": 0.10339915007352829,
      "learning_rate": 0.00010576022796661918,
      "loss": 1.0807,
      "step": 2321
    },
    {
      "epoch": 0.9440943281154707,
      "grad_norm": 0.09373354911804199,
      "learning_rate": 0.00010571951964176675,
      "loss": 0.9911,
      "step": 2322
    },
    {
      "epoch": 0.9445009148200854,
      "grad_norm": 0.10617939382791519,
      "learning_rate": 0.0001056788113169143,
      "loss": 1.0851,
      "step": 2323
    },
    {
      "epoch": 0.9449075015247002,
      "grad_norm": 0.09167637676000595,
      "learning_rate": 0.00010563810299206187,
      "loss": 0.9047,
      "step": 2324
    },
    {
      "epoch": 0.945314088229315,
      "grad_norm": 0.08472510427236557,
      "learning_rate": 0.00010559739466720946,
      "loss": 0.8727,
      "step": 2325
    },
    {
      "epoch": 0.9457206749339296,
      "grad_norm": 0.0884479507803917,
      "learning_rate": 0.00010555668634235703,
      "loss": 0.9784,
      "step": 2326
    },
    {
      "epoch": 0.9461272616385444,
      "grad_norm": 0.09533506631851196,
      "learning_rate": 0.0001055159780175046,
      "loss": 0.9641,
      "step": 2327
    },
    {
      "epoch": 0.9465338483431591,
      "grad_norm": 0.09487663954496384,
      "learning_rate": 0.00010547526969265215,
      "loss": 0.9594,
      "step": 2328
    },
    {
      "epoch": 0.9469404350477739,
      "grad_norm": 0.09608594328165054,
      "learning_rate": 0.00010543456136779972,
      "loss": 0.9552,
      "step": 2329
    },
    {
      "epoch": 0.9473470217523887,
      "grad_norm": 0.08777690678834915,
      "learning_rate": 0.00010539385304294729,
      "loss": 0.944,
      "step": 2330
    },
    {
      "epoch": 0.9477536084570034,
      "grad_norm": 0.09336721152067184,
      "learning_rate": 0.00010535314471809485,
      "loss": 0.9872,
      "step": 2331
    },
    {
      "epoch": 0.9481601951616182,
      "grad_norm": 0.0932617112994194,
      "learning_rate": 0.00010531243639324242,
      "loss": 1.0259,
      "step": 2332
    },
    {
      "epoch": 0.948566781866233,
      "grad_norm": 0.09936727583408356,
      "learning_rate": 0.00010527172806838999,
      "loss": 1.0559,
      "step": 2333
    },
    {
      "epoch": 0.9489733685708477,
      "grad_norm": 0.08607706427574158,
      "learning_rate": 0.00010523101974353756,
      "loss": 0.8735,
      "step": 2334
    },
    {
      "epoch": 0.9493799552754625,
      "grad_norm": 0.10083240270614624,
      "learning_rate": 0.00010519031141868511,
      "loss": 1.1199,
      "step": 2335
    },
    {
      "epoch": 0.9497865419800773,
      "grad_norm": 0.09380745142698288,
      "learning_rate": 0.0001051496030938327,
      "loss": 0.9708,
      "step": 2336
    },
    {
      "epoch": 0.950193128684692,
      "grad_norm": 0.09522271901369095,
      "learning_rate": 0.00010510889476898027,
      "loss": 0.9576,
      "step": 2337
    },
    {
      "epoch": 0.9505997153893068,
      "grad_norm": 0.08754262328147888,
      "learning_rate": 0.00010506818644412784,
      "loss": 0.8834,
      "step": 2338
    },
    {
      "epoch": 0.9510063020939216,
      "grad_norm": 0.09373676776885986,
      "learning_rate": 0.00010502747811927541,
      "loss": 1.0229,
      "step": 2339
    },
    {
      "epoch": 0.9514128887985362,
      "grad_norm": 0.09756851196289062,
      "learning_rate": 0.00010498676979442296,
      "loss": 1.0262,
      "step": 2340
    },
    {
      "epoch": 0.951819475503151,
      "grad_norm": 0.09419600665569305,
      "learning_rate": 0.00010494606146957053,
      "loss": 1.0049,
      "step": 2341
    },
    {
      "epoch": 0.9522260622077658,
      "grad_norm": 0.08849748224020004,
      "learning_rate": 0.0001049053531447181,
      "loss": 1.0045,
      "step": 2342
    },
    {
      "epoch": 0.9526326489123805,
      "grad_norm": 0.09651193022727966,
      "learning_rate": 0.00010486464481986567,
      "loss": 1.0209,
      "step": 2343
    },
    {
      "epoch": 0.9530392356169953,
      "grad_norm": 0.09986065328121185,
      "learning_rate": 0.00010482393649501323,
      "loss": 1.0789,
      "step": 2344
    },
    {
      "epoch": 0.9534458223216101,
      "grad_norm": 0.0957985445857048,
      "learning_rate": 0.0001047832281701608,
      "loss": 1.106,
      "step": 2345
    },
    {
      "epoch": 0.9538524090262248,
      "grad_norm": 0.1007857397198677,
      "learning_rate": 0.00010474251984530837,
      "loss": 1.027,
      "step": 2346
    },
    {
      "epoch": 0.9542589957308396,
      "grad_norm": 0.09330718219280243,
      "learning_rate": 0.00010470181152045592,
      "loss": 1.0046,
      "step": 2347
    },
    {
      "epoch": 0.9546655824354544,
      "grad_norm": 0.09503220021724701,
      "learning_rate": 0.00010466110319560352,
      "loss": 1.0119,
      "step": 2348
    },
    {
      "epoch": 0.9550721691400691,
      "grad_norm": 0.09526234120130539,
      "learning_rate": 0.00010462039487075109,
      "loss": 0.9898,
      "step": 2349
    },
    {
      "epoch": 0.9554787558446839,
      "grad_norm": 0.0942670926451683,
      "learning_rate": 0.00010457968654589865,
      "loss": 1.0538,
      "step": 2350
    },
    {
      "epoch": 0.9558853425492987,
      "grad_norm": 0.09694371372461319,
      "learning_rate": 0.00010453897822104621,
      "loss": 0.9101,
      "step": 2351
    },
    {
      "epoch": 0.9562919292539134,
      "grad_norm": 0.09850834310054779,
      "learning_rate": 0.00010449826989619377,
      "loss": 1.0476,
      "step": 2352
    },
    {
      "epoch": 0.9566985159585282,
      "grad_norm": 0.09078159183263779,
      "learning_rate": 0.00010445756157134134,
      "loss": 0.8798,
      "step": 2353
    },
    {
      "epoch": 0.957105102663143,
      "grad_norm": 0.09196247905492783,
      "learning_rate": 0.00010441685324648891,
      "loss": 0.9571,
      "step": 2354
    },
    {
      "epoch": 0.9575116893677577,
      "grad_norm": 0.09725657850503922,
      "learning_rate": 0.00010437614492163648,
      "loss": 1.0229,
      "step": 2355
    },
    {
      "epoch": 0.9579182760723725,
      "grad_norm": 0.09602061659097672,
      "learning_rate": 0.00010433543659678404,
      "loss": 0.9666,
      "step": 2356
    },
    {
      "epoch": 0.9583248627769871,
      "grad_norm": 0.09440819919109344,
      "learning_rate": 0.00010429472827193161,
      "loss": 1.0165,
      "step": 2357
    },
    {
      "epoch": 0.9587314494816019,
      "grad_norm": 0.09775765985250473,
      "learning_rate": 0.00010425401994707917,
      "loss": 1.0927,
      "step": 2358
    },
    {
      "epoch": 0.9591380361862167,
      "grad_norm": 0.10038933902978897,
      "learning_rate": 0.00010421331162222676,
      "loss": 1.1155,
      "step": 2359
    },
    {
      "epoch": 0.9595446228908314,
      "grad_norm": 0.09265521913766861,
      "learning_rate": 0.00010417260329737433,
      "loss": 0.9965,
      "step": 2360
    },
    {
      "epoch": 0.9599512095954462,
      "grad_norm": 0.09679180383682251,
      "learning_rate": 0.0001041318949725219,
      "loss": 0.9484,
      "step": 2361
    },
    {
      "epoch": 0.960357796300061,
      "grad_norm": 0.09756863862276077,
      "learning_rate": 0.00010409118664766946,
      "loss": 0.9929,
      "step": 2362
    },
    {
      "epoch": 0.9607643830046757,
      "grad_norm": 0.09271581470966339,
      "learning_rate": 0.00010405047832281702,
      "loss": 0.9717,
      "step": 2363
    },
    {
      "epoch": 0.9611709697092905,
      "grad_norm": 0.08519497513771057,
      "learning_rate": 0.00010400976999796459,
      "loss": 0.9248,
      "step": 2364
    },
    {
      "epoch": 0.9615775564139053,
      "grad_norm": 0.0930318683385849,
      "learning_rate": 0.00010396906167311215,
      "loss": 0.9269,
      "step": 2365
    },
    {
      "epoch": 0.96198414311852,
      "grad_norm": 0.0876484215259552,
      "learning_rate": 0.00010392835334825972,
      "loss": 0.8956,
      "step": 2366
    },
    {
      "epoch": 0.9623907298231348,
      "grad_norm": 0.10773497074842453,
      "learning_rate": 0.00010388764502340729,
      "loss": 1.0162,
      "step": 2367
    },
    {
      "epoch": 0.9627973165277496,
      "grad_norm": 0.10369701683521271,
      "learning_rate": 0.00010384693669855486,
      "loss": 1.0242,
      "step": 2368
    },
    {
      "epoch": 0.9632039032323643,
      "grad_norm": 0.09781001508235931,
      "learning_rate": 0.00010380622837370242,
      "loss": 0.9984,
      "step": 2369
    },
    {
      "epoch": 0.9636104899369791,
      "grad_norm": 0.09027720987796783,
      "learning_rate": 0.00010376552004884998,
      "loss": 0.9459,
      "step": 2370
    },
    {
      "epoch": 0.9640170766415939,
      "grad_norm": 0.0846111848950386,
      "learning_rate": 0.00010372481172399757,
      "loss": 0.8168,
      "step": 2371
    },
    {
      "epoch": 0.9644236633462085,
      "grad_norm": 0.09253893047571182,
      "learning_rate": 0.00010368410339914514,
      "loss": 1.036,
      "step": 2372
    },
    {
      "epoch": 0.9648302500508233,
      "grad_norm": 0.09075961261987686,
      "learning_rate": 0.00010364339507429271,
      "loss": 0.9765,
      "step": 2373
    },
    {
      "epoch": 0.9652368367554381,
      "grad_norm": 0.09227050840854645,
      "learning_rate": 0.00010360268674944028,
      "loss": 0.9577,
      "step": 2374
    },
    {
      "epoch": 0.9656434234600528,
      "grad_norm": 0.09381213039159775,
      "learning_rate": 0.00010356197842458783,
      "loss": 1.041,
      "step": 2375
    },
    {
      "epoch": 0.9660500101646676,
      "grad_norm": 0.08584290742874146,
      "learning_rate": 0.0001035212700997354,
      "loss": 0.7906,
      "step": 2376
    },
    {
      "epoch": 0.9664565968692824,
      "grad_norm": 0.09522596746683121,
      "learning_rate": 0.00010348056177488297,
      "loss": 0.9739,
      "step": 2377
    },
    {
      "epoch": 0.9668631835738971,
      "grad_norm": 0.09105250984430313,
      "learning_rate": 0.00010343985345003053,
      "loss": 0.943,
      "step": 2378
    },
    {
      "epoch": 0.9672697702785119,
      "grad_norm": 0.09327445179224014,
      "learning_rate": 0.0001033991451251781,
      "loss": 1.0486,
      "step": 2379
    },
    {
      "epoch": 0.9676763569831267,
      "grad_norm": 0.08443416655063629,
      "learning_rate": 0.00010335843680032567,
      "loss": 0.8889,
      "step": 2380
    },
    {
      "epoch": 0.9680829436877414,
      "grad_norm": 0.09366993606090546,
      "learning_rate": 0.00010331772847547324,
      "loss": 0.9585,
      "step": 2381
    },
    {
      "epoch": 0.9684895303923562,
      "grad_norm": 0.1025518849492073,
      "learning_rate": 0.00010327702015062082,
      "loss": 0.9062,
      "step": 2382
    },
    {
      "epoch": 0.9688961170969709,
      "grad_norm": 0.08948516100645065,
      "learning_rate": 0.00010323631182576838,
      "loss": 0.9477,
      "step": 2383
    },
    {
      "epoch": 0.9693027038015857,
      "grad_norm": 0.09162997454404831,
      "learning_rate": 0.00010319560350091595,
      "loss": 0.9069,
      "step": 2384
    },
    {
      "epoch": 0.9697092905062005,
      "grad_norm": 0.09584391862154007,
      "learning_rate": 0.00010315489517606352,
      "loss": 0.9816,
      "step": 2385
    },
    {
      "epoch": 0.9701158772108152,
      "grad_norm": 0.08747036010026932,
      "learning_rate": 0.00010311418685121109,
      "loss": 0.9845,
      "step": 2386
    },
    {
      "epoch": 0.97052246391543,
      "grad_norm": 0.09000515937805176,
      "learning_rate": 0.00010307347852635864,
      "loss": 0.8898,
      "step": 2387
    },
    {
      "epoch": 0.9709290506200448,
      "grad_norm": 0.0957585796713829,
      "learning_rate": 0.00010303277020150621,
      "loss": 1.0053,
      "step": 2388
    },
    {
      "epoch": 0.9713356373246594,
      "grad_norm": 0.0985213965177536,
      "learning_rate": 0.00010299206187665378,
      "loss": 1.0988,
      "step": 2389
    },
    {
      "epoch": 0.9717422240292742,
      "grad_norm": 0.09285228699445724,
      "learning_rate": 0.00010295135355180134,
      "loss": 0.957,
      "step": 2390
    },
    {
      "epoch": 0.972148810733889,
      "grad_norm": 0.08875738829374313,
      "learning_rate": 0.00010291064522694891,
      "loss": 0.9324,
      "step": 2391
    },
    {
      "epoch": 0.9725553974385037,
      "grad_norm": 0.09840039908885956,
      "learning_rate": 0.00010286993690209648,
      "loss": 0.9047,
      "step": 2392
    },
    {
      "epoch": 0.9729619841431185,
      "grad_norm": 0.09745080024003983,
      "learning_rate": 0.00010282922857724405,
      "loss": 1.0707,
      "step": 2393
    },
    {
      "epoch": 0.9733685708477333,
      "grad_norm": 0.09076414257287979,
      "learning_rate": 0.00010278852025239163,
      "loss": 0.947,
      "step": 2394
    },
    {
      "epoch": 0.973775157552348,
      "grad_norm": 0.08922093361616135,
      "learning_rate": 0.0001027478119275392,
      "loss": 0.8983,
      "step": 2395
    },
    {
      "epoch": 0.9741817442569628,
      "grad_norm": 0.09455031156539917,
      "learning_rate": 0.00010270710360268676,
      "loss": 1.0877,
      "step": 2396
    },
    {
      "epoch": 0.9745883309615776,
      "grad_norm": 0.09286132454872131,
      "learning_rate": 0.00010266639527783433,
      "loss": 0.98,
      "step": 2397
    },
    {
      "epoch": 0.9749949176661923,
      "grad_norm": 0.10121460258960724,
      "learning_rate": 0.00010262568695298189,
      "loss": 1.0906,
      "step": 2398
    },
    {
      "epoch": 0.9754015043708071,
      "grad_norm": 0.0891910120844841,
      "learning_rate": 0.00010258497862812945,
      "loss": 0.8889,
      "step": 2399
    },
    {
      "epoch": 0.9758080910754219,
      "grad_norm": 0.0938873440027237,
      "learning_rate": 0.00010254427030327702,
      "loss": 0.8787,
      "step": 2400
    },
    {
      "epoch": 0.9762146777800366,
      "grad_norm": 0.09117105603218079,
      "learning_rate": 0.00010250356197842459,
      "loss": 0.9053,
      "step": 2401
    },
    {
      "epoch": 0.9766212644846514,
      "grad_norm": 0.09840644896030426,
      "learning_rate": 0.00010246285365357216,
      "loss": 1.0462,
      "step": 2402
    },
    {
      "epoch": 0.9770278511892662,
      "grad_norm": 0.09379451721906662,
      "learning_rate": 0.00010242214532871972,
      "loss": 0.9617,
      "step": 2403
    },
    {
      "epoch": 0.9774344378938808,
      "grad_norm": 0.09142056852579117,
      "learning_rate": 0.00010238143700386729,
      "loss": 1.0022,
      "step": 2404
    },
    {
      "epoch": 0.9778410245984956,
      "grad_norm": 0.09325367957353592,
      "learning_rate": 0.00010234072867901487,
      "loss": 0.9356,
      "step": 2405
    },
    {
      "epoch": 0.9782476113031104,
      "grad_norm": 0.09714538604021072,
      "learning_rate": 0.00010230002035416244,
      "loss": 1.0685,
      "step": 2406
    },
    {
      "epoch": 0.9786541980077251,
      "grad_norm": 0.09502388536930084,
      "learning_rate": 0.00010225931202931001,
      "loss": 1.0158,
      "step": 2407
    },
    {
      "epoch": 0.9790607847123399,
      "grad_norm": 0.09626177698373795,
      "learning_rate": 0.00010221860370445758,
      "loss": 1.0249,
      "step": 2408
    },
    {
      "epoch": 0.9794673714169546,
      "grad_norm": 0.09790710359811783,
      "learning_rate": 0.00010217789537960514,
      "loss": 0.9974,
      "step": 2409
    },
    {
      "epoch": 0.9798739581215694,
      "grad_norm": 0.0907469391822815,
      "learning_rate": 0.0001021371870547527,
      "loss": 0.994,
      "step": 2410
    },
    {
      "epoch": 0.9802805448261842,
      "grad_norm": 0.10248905420303345,
      "learning_rate": 0.00010209647872990026,
      "loss": 1.0214,
      "step": 2411
    },
    {
      "epoch": 0.9806871315307989,
      "grad_norm": 0.09504317492246628,
      "learning_rate": 0.00010205577040504783,
      "loss": 1.0642,
      "step": 2412
    },
    {
      "epoch": 0.9810937182354137,
      "grad_norm": 0.09868543595075607,
      "learning_rate": 0.0001020150620801954,
      "loss": 1.0595,
      "step": 2413
    },
    {
      "epoch": 0.9815003049400285,
      "grad_norm": 0.08648547530174255,
      "learning_rate": 0.00010197435375534297,
      "loss": 0.9273,
      "step": 2414
    },
    {
      "epoch": 0.9819068916446432,
      "grad_norm": 0.0870203897356987,
      "learning_rate": 0.00010193364543049054,
      "loss": 0.8661,
      "step": 2415
    },
    {
      "epoch": 0.982313478349258,
      "grad_norm": 0.09689280390739441,
      "learning_rate": 0.0001018929371056381,
      "loss": 1.0179,
      "step": 2416
    },
    {
      "epoch": 0.9827200650538728,
      "grad_norm": 0.09497373551130295,
      "learning_rate": 0.00010185222878078568,
      "loss": 0.9292,
      "step": 2417
    },
    {
      "epoch": 0.9831266517584875,
      "grad_norm": 0.09194166213274002,
      "learning_rate": 0.00010181152045593325,
      "loss": 0.969,
      "step": 2418
    },
    {
      "epoch": 0.9835332384631023,
      "grad_norm": 0.08828569948673248,
      "learning_rate": 0.00010177081213108082,
      "loss": 0.8936,
      "step": 2419
    },
    {
      "epoch": 0.983939825167717,
      "grad_norm": 0.095185786485672,
      "learning_rate": 0.00010173010380622839,
      "loss": 0.9859,
      "step": 2420
    },
    {
      "epoch": 0.9843464118723317,
      "grad_norm": 0.09699594974517822,
      "learning_rate": 0.00010168939548137595,
      "loss": 1.0568,
      "step": 2421
    },
    {
      "epoch": 0.9847529985769465,
      "grad_norm": 0.09333425760269165,
      "learning_rate": 0.00010164868715652351,
      "loss": 0.9503,
      "step": 2422
    },
    {
      "epoch": 0.9851595852815613,
      "grad_norm": 0.0883539542555809,
      "learning_rate": 0.00010160797883167108,
      "loss": 0.9711,
      "step": 2423
    },
    {
      "epoch": 0.985566171986176,
      "grad_norm": 0.09544458985328674,
      "learning_rate": 0.00010156727050681864,
      "loss": 0.8668,
      "step": 2424
    },
    {
      "epoch": 0.9859727586907908,
      "grad_norm": 0.0979728177189827,
      "learning_rate": 0.00010152656218196621,
      "loss": 1.0685,
      "step": 2425
    },
    {
      "epoch": 0.9863793453954056,
      "grad_norm": 0.08907411992549896,
      "learning_rate": 0.00010148585385711378,
      "loss": 0.8947,
      "step": 2426
    },
    {
      "epoch": 0.9867859321000203,
      "grad_norm": 0.09532100707292557,
      "learning_rate": 0.00010144514553226135,
      "loss": 1.0793,
      "step": 2427
    },
    {
      "epoch": 0.9871925188046351,
      "grad_norm": 0.0916009321808815,
      "learning_rate": 0.00010140443720740893,
      "loss": 0.9604,
      "step": 2428
    },
    {
      "epoch": 0.9875991055092499,
      "grad_norm": 0.0960593968629837,
      "learning_rate": 0.0001013637288825565,
      "loss": 1.0012,
      "step": 2429
    },
    {
      "epoch": 0.9880056922138646,
      "grad_norm": 0.0948946550488472,
      "learning_rate": 0.00010132302055770406,
      "loss": 0.9555,
      "step": 2430
    },
    {
      "epoch": 0.9884122789184794,
      "grad_norm": 0.08670156449079514,
      "learning_rate": 0.00010128231223285163,
      "loss": 0.8863,
      "step": 2431
    },
    {
      "epoch": 0.9888188656230942,
      "grad_norm": 0.0870981365442276,
      "learning_rate": 0.0001012416039079992,
      "loss": 0.949,
      "step": 2432
    },
    {
      "epoch": 0.9892254523277089,
      "grad_norm": 0.09065506607294083,
      "learning_rate": 0.00010120089558314677,
      "loss": 1.0791,
      "step": 2433
    },
    {
      "epoch": 0.9896320390323237,
      "grad_norm": 0.08753534406423569,
      "learning_rate": 0.00010116018725829432,
      "loss": 0.8656,
      "step": 2434
    },
    {
      "epoch": 0.9900386257369383,
      "grad_norm": 0.08939878642559052,
      "learning_rate": 0.00010111947893344189,
      "loss": 0.8983,
      "step": 2435
    },
    {
      "epoch": 0.9904452124415531,
      "grad_norm": 0.09110575914382935,
      "learning_rate": 0.00010107877060858946,
      "loss": 0.8971,
      "step": 2436
    },
    {
      "epoch": 0.9908517991461679,
      "grad_norm": 0.08614566922187805,
      "learning_rate": 0.00010103806228373702,
      "loss": 0.9746,
      "step": 2437
    },
    {
      "epoch": 0.9912583858507826,
      "grad_norm": 0.09685923904180527,
      "learning_rate": 0.00010099735395888459,
      "loss": 0.9638,
      "step": 2438
    },
    {
      "epoch": 0.9916649725553974,
      "grad_norm": 0.10014784336090088,
      "learning_rate": 0.00010095664563403216,
      "loss": 1.0335,
      "step": 2439
    },
    {
      "epoch": 0.9920715592600122,
      "grad_norm": 0.09917939454317093,
      "learning_rate": 0.00010091593730917974,
      "loss": 1.0288,
      "step": 2440
    },
    {
      "epoch": 0.9924781459646269,
      "grad_norm": 0.09158805757761002,
      "learning_rate": 0.00010087522898432731,
      "loss": 0.9372,
      "step": 2441
    },
    {
      "epoch": 0.9928847326692417,
      "grad_norm": 0.09151756763458252,
      "learning_rate": 0.00010083452065947488,
      "loss": 1.0042,
      "step": 2442
    },
    {
      "epoch": 0.9932913193738565,
      "grad_norm": 0.09201864898204803,
      "learning_rate": 0.00010079381233462244,
      "loss": 0.937,
      "step": 2443
    },
    {
      "epoch": 0.9936979060784712,
      "grad_norm": 0.10031972825527191,
      "learning_rate": 0.00010075310400977001,
      "loss": 0.989,
      "step": 2444
    },
    {
      "epoch": 0.994104492783086,
      "grad_norm": 0.09593512862920761,
      "learning_rate": 0.00010071239568491756,
      "loss": 0.9259,
      "step": 2445
    },
    {
      "epoch": 0.9945110794877008,
      "grad_norm": 0.10088519006967545,
      "learning_rate": 0.00010067168736006513,
      "loss": 1.0888,
      "step": 2446
    },
    {
      "epoch": 0.9949176661923155,
      "grad_norm": 0.09052947163581848,
      "learning_rate": 0.0001006309790352127,
      "loss": 0.9643,
      "step": 2447
    },
    {
      "epoch": 0.9953242528969303,
      "grad_norm": 0.0943833664059639,
      "learning_rate": 0.00010059027071036027,
      "loss": 1.0308,
      "step": 2448
    },
    {
      "epoch": 0.9957308396015451,
      "grad_norm": 0.0929458737373352,
      "learning_rate": 0.00010054956238550783,
      "loss": 0.8993,
      "step": 2449
    },
    {
      "epoch": 0.9961374263061598,
      "grad_norm": 0.09643827378749847,
      "learning_rate": 0.0001005088540606554,
      "loss": 0.9708,
      "step": 2450
    },
    {
      "epoch": 0.9965440130107746,
      "grad_norm": 0.08925779908895493,
      "learning_rate": 0.00010046814573580298,
      "loss": 0.9209,
      "step": 2451
    },
    {
      "epoch": 0.9969505997153894,
      "grad_norm": 0.08630047738552094,
      "learning_rate": 0.00010042743741095055,
      "loss": 0.9324,
      "step": 2452
    },
    {
      "epoch": 0.997357186420004,
      "grad_norm": 0.10127938538789749,
      "learning_rate": 0.00010038672908609812,
      "loss": 0.9926,
      "step": 2453
    },
    {
      "epoch": 0.9977637731246188,
      "grad_norm": 0.09573110938072205,
      "learning_rate": 0.00010034602076124569,
      "loss": 0.9801,
      "step": 2454
    },
    {
      "epoch": 0.9981703598292336,
      "grad_norm": 0.0963260605931282,
      "learning_rate": 0.00010030531243639325,
      "loss": 0.98,
      "step": 2455
    },
    {
      "epoch": 0.9985769465338483,
      "grad_norm": 0.08414101600646973,
      "learning_rate": 0.00010026460411154082,
      "loss": 0.8676,
      "step": 2456
    },
    {
      "epoch": 0.9989835332384631,
      "grad_norm": 0.09320447593927383,
      "learning_rate": 0.00010022389578668838,
      "loss": 0.998,
      "step": 2457
    },
    {
      "epoch": 0.9993901199430779,
      "grad_norm": 0.09721797704696655,
      "learning_rate": 0.00010018318746183594,
      "loss": 1.0123,
      "step": 2458
    },
    {
      "epoch": 0.9997967066476926,
      "grad_norm": 0.08773447573184967,
      "learning_rate": 0.00010014247913698351,
      "loss": 0.9673,
      "step": 2459
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.15718789398670197,
      "learning_rate": 0.00010010177081213108,
      "loss": 1.1286,
      "step": 2460
    },
    {
      "epoch": 1.0004065867046148,
      "grad_norm": 0.09029074758291245,
      "learning_rate": 0.00010006106248727865,
      "loss": 0.9905,
      "step": 2461
    },
    {
      "epoch": 1.0008131734092296,
      "grad_norm": 0.09984813630580902,
      "learning_rate": 0.00010002035416242621,
      "loss": 0.9981,
      "step": 2462
    },
    {
      "epoch": 1.0012197601138442,
      "grad_norm": 0.09808840602636337,
      "learning_rate": 9.997964583757378e-05,
      "loss": 1.0156,
      "step": 2463
    },
    {
      "epoch": 1.001626346818459,
      "grad_norm": 0.08917602896690369,
      "learning_rate": 9.993893751272135e-05,
      "loss": 0.944,
      "step": 2464
    },
    {
      "epoch": 1.0020329335230738,
      "grad_norm": 0.0943906158208847,
      "learning_rate": 9.989822918786892e-05,
      "loss": 0.9294,
      "step": 2465
    },
    {
      "epoch": 1.0024395202276886,
      "grad_norm": 0.09091315418481827,
      "learning_rate": 9.98575208630165e-05,
      "loss": 0.9707,
      "step": 2466
    },
    {
      "epoch": 1.0028461069323034,
      "grad_norm": 0.09035106003284454,
      "learning_rate": 9.981681253816407e-05,
      "loss": 0.9562,
      "step": 2467
    },
    {
      "epoch": 1.0032526936369182,
      "grad_norm": 0.09709779173135757,
      "learning_rate": 9.977610421331163e-05,
      "loss": 0.9287,
      "step": 2468
    },
    {
      "epoch": 1.0036592803415327,
      "grad_norm": 0.09063035994768143,
      "learning_rate": 9.973539588845919e-05,
      "loss": 0.9138,
      "step": 2469
    },
    {
      "epoch": 1.0040658670461475,
      "grad_norm": 0.09490003436803818,
      "learning_rate": 9.969468756360676e-05,
      "loss": 0.9475,
      "step": 2470
    },
    {
      "epoch": 1.0044724537507623,
      "grad_norm": 0.10134010761976242,
      "learning_rate": 9.965397923875432e-05,
      "loss": 1.0092,
      "step": 2471
    },
    {
      "epoch": 1.0048790404553771,
      "grad_norm": 0.09728873521089554,
      "learning_rate": 9.96132709139019e-05,
      "loss": 0.9498,
      "step": 2472
    },
    {
      "epoch": 1.005285627159992,
      "grad_norm": 0.09160648286342621,
      "learning_rate": 9.957256258904947e-05,
      "loss": 0.8707,
      "step": 2473
    },
    {
      "epoch": 1.0056922138646067,
      "grad_norm": 0.0939764603972435,
      "learning_rate": 9.953185426419704e-05,
      "loss": 0.9619,
      "step": 2474
    },
    {
      "epoch": 1.0060988005692213,
      "grad_norm": 0.08643637597560883,
      "learning_rate": 9.94911459393446e-05,
      "loss": 0.9377,
      "step": 2475
    },
    {
      "epoch": 1.006505387273836,
      "grad_norm": 0.09141729027032852,
      "learning_rate": 9.945043761449216e-05,
      "loss": 0.8859,
      "step": 2476
    },
    {
      "epoch": 1.006911973978451,
      "grad_norm": 0.09555509686470032,
      "learning_rate": 9.940972928963974e-05,
      "loss": 0.933,
      "step": 2477
    },
    {
      "epoch": 1.0073185606830657,
      "grad_norm": 0.0935022309422493,
      "learning_rate": 9.936902096478731e-05,
      "loss": 0.9368,
      "step": 2478
    },
    {
      "epoch": 1.0077251473876805,
      "grad_norm": 0.09959034621715546,
      "learning_rate": 9.932831263993488e-05,
      "loss": 0.974,
      "step": 2479
    },
    {
      "epoch": 1.0081317340922953,
      "grad_norm": 0.09246455878019333,
      "learning_rate": 9.928760431508245e-05,
      "loss": 0.9248,
      "step": 2480
    },
    {
      "epoch": 1.0085383207969099,
      "grad_norm": 0.10091500729322433,
      "learning_rate": 9.924689599023e-05,
      "loss": 1.122,
      "step": 2481
    },
    {
      "epoch": 1.0089449075015247,
      "grad_norm": 0.10083048790693283,
      "learning_rate": 9.920618766537757e-05,
      "loss": 1.0199,
      "step": 2482
    },
    {
      "epoch": 1.0093514942061395,
      "grad_norm": 0.09641805291175842,
      "learning_rate": 9.916547934052515e-05,
      "loss": 0.9971,
      "step": 2483
    },
    {
      "epoch": 1.0097580809107543,
      "grad_norm": 0.10362432897090912,
      "learning_rate": 9.912477101567272e-05,
      "loss": 0.9596,
      "step": 2484
    },
    {
      "epoch": 1.010164667615369,
      "grad_norm": 0.09050238877534866,
      "learning_rate": 9.908406269082028e-05,
      "loss": 0.9423,
      "step": 2485
    },
    {
      "epoch": 1.0105712543199838,
      "grad_norm": 0.10209590941667557,
      "learning_rate": 9.904335436596785e-05,
      "loss": 0.9366,
      "step": 2486
    },
    {
      "epoch": 1.0109778410245984,
      "grad_norm": 0.104631707072258,
      "learning_rate": 9.90026460411154e-05,
      "loss": 1.0476,
      "step": 2487
    },
    {
      "epoch": 1.0113844277292132,
      "grad_norm": 0.09572993963956833,
      "learning_rate": 9.896193771626297e-05,
      "loss": 1.0523,
      "step": 2488
    },
    {
      "epoch": 1.011791014433828,
      "grad_norm": 0.10640837252140045,
      "learning_rate": 9.892122939141055e-05,
      "loss": 1.1238,
      "step": 2489
    },
    {
      "epoch": 1.0121976011384428,
      "grad_norm": 0.09798834472894669,
      "learning_rate": 9.888052106655812e-05,
      "loss": 0.9597,
      "step": 2490
    },
    {
      "epoch": 1.0126041878430576,
      "grad_norm": 0.08913593739271164,
      "learning_rate": 9.883981274170569e-05,
      "loss": 0.9258,
      "step": 2491
    },
    {
      "epoch": 1.0130107745476722,
      "grad_norm": 0.09719277173280716,
      "learning_rate": 9.879910441685324e-05,
      "loss": 0.9812,
      "step": 2492
    },
    {
      "epoch": 1.013417361252287,
      "grad_norm": 0.09699688851833344,
      "learning_rate": 9.875839609200081e-05,
      "loss": 0.8946,
      "step": 2493
    },
    {
      "epoch": 1.0138239479569018,
      "grad_norm": 0.09061427414417267,
      "learning_rate": 9.871768776714838e-05,
      "loss": 0.9075,
      "step": 2494
    },
    {
      "epoch": 1.0142305346615166,
      "grad_norm": 0.08979996293783188,
      "learning_rate": 9.867697944229596e-05,
      "loss": 0.933,
      "step": 2495
    },
    {
      "epoch": 1.0146371213661314,
      "grad_norm": 0.09325064718723297,
      "learning_rate": 9.863627111744353e-05,
      "loss": 0.9604,
      "step": 2496
    },
    {
      "epoch": 1.0150437080707462,
      "grad_norm": 0.09821408241987228,
      "learning_rate": 9.85955627925911e-05,
      "loss": 1.0871,
      "step": 2497
    },
    {
      "epoch": 1.0154502947753608,
      "grad_norm": 0.09746625274419785,
      "learning_rate": 9.855485446773865e-05,
      "loss": 0.9304,
      "step": 2498
    },
    {
      "epoch": 1.0158568814799755,
      "grad_norm": 0.09508597105741501,
      "learning_rate": 9.851414614288622e-05,
      "loss": 0.9469,
      "step": 2499
    },
    {
      "epoch": 1.0162634681845903,
      "grad_norm": 0.10357919335365295,
      "learning_rate": 9.84734378180338e-05,
      "loss": 1.0272,
      "step": 2500
    },
    {
      "epoch": 1.0166700548892051,
      "grad_norm": 0.09326835721731186,
      "learning_rate": 9.843272949318137e-05,
      "loss": 0.8754,
      "step": 2501
    },
    {
      "epoch": 1.01707664159382,
      "grad_norm": 0.0892389789223671,
      "learning_rate": 9.839202116832893e-05,
      "loss": 0.831,
      "step": 2502
    },
    {
      "epoch": 1.0174832282984347,
      "grad_norm": 0.09790865331888199,
      "learning_rate": 9.83513128434765e-05,
      "loss": 1.0179,
      "step": 2503
    },
    {
      "epoch": 1.0178898150030493,
      "grad_norm": 0.09933339804410934,
      "learning_rate": 9.831060451862405e-05,
      "loss": 1.012,
      "step": 2504
    },
    {
      "epoch": 1.0182964017076641,
      "grad_norm": 0.09628647565841675,
      "learning_rate": 9.826989619377162e-05,
      "loss": 0.9739,
      "step": 2505
    },
    {
      "epoch": 1.018702988412279,
      "grad_norm": 0.09639148414134979,
      "learning_rate": 9.82291878689192e-05,
      "loss": 0.9626,
      "step": 2506
    },
    {
      "epoch": 1.0191095751168937,
      "grad_norm": 0.10145976394414902,
      "learning_rate": 9.818847954406677e-05,
      "loss": 0.9734,
      "step": 2507
    },
    {
      "epoch": 1.0195161618215085,
      "grad_norm": 0.09076192229986191,
      "learning_rate": 9.814777121921434e-05,
      "loss": 0.898,
      "step": 2508
    },
    {
      "epoch": 1.0199227485261233,
      "grad_norm": 0.09159097820520401,
      "learning_rate": 9.81070628943619e-05,
      "loss": 0.9218,
      "step": 2509
    },
    {
      "epoch": 1.0203293352307379,
      "grad_norm": 0.08706653863191605,
      "learning_rate": 9.806635456950946e-05,
      "loss": 0.8249,
      "step": 2510
    },
    {
      "epoch": 1.0207359219353527,
      "grad_norm": 0.10595209151506424,
      "learning_rate": 9.802564624465703e-05,
      "loss": 1.0499,
      "step": 2511
    },
    {
      "epoch": 1.0211425086399675,
      "grad_norm": 0.08821277320384979,
      "learning_rate": 9.798493791980461e-05,
      "loss": 0.8806,
      "step": 2512
    },
    {
      "epoch": 1.0215490953445823,
      "grad_norm": 0.09965387731790543,
      "learning_rate": 9.794422959495218e-05,
      "loss": 1.0049,
      "step": 2513
    },
    {
      "epoch": 1.021955682049197,
      "grad_norm": 0.09820786118507385,
      "learning_rate": 9.790352127009974e-05,
      "loss": 0.9716,
      "step": 2514
    },
    {
      "epoch": 1.0223622687538116,
      "grad_norm": 0.10157819837331772,
      "learning_rate": 9.786281294524731e-05,
      "loss": 0.9659,
      "step": 2515
    },
    {
      "epoch": 1.0227688554584264,
      "grad_norm": 0.09603773802518845,
      "learning_rate": 9.782210462039487e-05,
      "loss": 0.9056,
      "step": 2516
    },
    {
      "epoch": 1.0231754421630412,
      "grad_norm": 0.10223423689603806,
      "learning_rate": 9.778139629554243e-05,
      "loss": 1.0357,
      "step": 2517
    },
    {
      "epoch": 1.023582028867656,
      "grad_norm": 0.10251198709011078,
      "learning_rate": 9.774068797069002e-05,
      "loss": 1.0054,
      "step": 2518
    },
    {
      "epoch": 1.0239886155722708,
      "grad_norm": 0.11098898202180862,
      "learning_rate": 9.769997964583758e-05,
      "loss": 1.0292,
      "step": 2519
    },
    {
      "epoch": 1.0243952022768856,
      "grad_norm": 0.1087106242775917,
      "learning_rate": 9.765927132098515e-05,
      "loss": 1.0838,
      "step": 2520
    },
    {
      "epoch": 1.0248017889815002,
      "grad_norm": 0.09911047667264938,
      "learning_rate": 9.761856299613272e-05,
      "loss": 0.9538,
      "step": 2521
    },
    {
      "epoch": 1.025208375686115,
      "grad_norm": 0.10460842400789261,
      "learning_rate": 9.757785467128027e-05,
      "loss": 1.008,
      "step": 2522
    },
    {
      "epoch": 1.0256149623907298,
      "grad_norm": 0.09570446610450745,
      "learning_rate": 9.753714634642785e-05,
      "loss": 0.8979,
      "step": 2523
    },
    {
      "epoch": 1.0260215490953446,
      "grad_norm": 0.10213327407836914,
      "learning_rate": 9.749643802157542e-05,
      "loss": 0.9863,
      "step": 2524
    },
    {
      "epoch": 1.0264281357999594,
      "grad_norm": 0.11625881493091583,
      "learning_rate": 9.745572969672299e-05,
      "loss": 1.102,
      "step": 2525
    },
    {
      "epoch": 1.0268347225045742,
      "grad_norm": 0.09997177869081497,
      "learning_rate": 9.741502137187056e-05,
      "loss": 1.0134,
      "step": 2526
    },
    {
      "epoch": 1.0272413092091888,
      "grad_norm": 0.09349930286407471,
      "learning_rate": 9.737431304701812e-05,
      "loss": 0.9323,
      "step": 2527
    },
    {
      "epoch": 1.0276478959138036,
      "grad_norm": 0.09024021774530411,
      "learning_rate": 9.733360472216568e-05,
      "loss": 0.9381,
      "step": 2528
    },
    {
      "epoch": 1.0280544826184184,
      "grad_norm": 0.09808880090713501,
      "learning_rate": 9.729289639731326e-05,
      "loss": 0.902,
      "step": 2529
    },
    {
      "epoch": 1.0284610693230332,
      "grad_norm": 0.09804200381040573,
      "learning_rate": 9.725218807246083e-05,
      "loss": 0.9712,
      "step": 2530
    },
    {
      "epoch": 1.028867656027648,
      "grad_norm": 0.09585238248109818,
      "learning_rate": 9.72114797476084e-05,
      "loss": 0.944,
      "step": 2531
    },
    {
      "epoch": 1.0292742427322628,
      "grad_norm": 0.10107024013996124,
      "learning_rate": 9.717077142275596e-05,
      "loss": 0.9959,
      "step": 2532
    },
    {
      "epoch": 1.0296808294368773,
      "grad_norm": 0.09128806740045547,
      "learning_rate": 9.713006309790353e-05,
      "loss": 0.8755,
      "step": 2533
    },
    {
      "epoch": 1.0300874161414921,
      "grad_norm": 0.10488265007734299,
      "learning_rate": 9.708935477305108e-05,
      "loss": 1.0307,
      "step": 2534
    },
    {
      "epoch": 1.030494002846107,
      "grad_norm": 0.09195258468389511,
      "learning_rate": 9.704864644819867e-05,
      "loss": 0.8642,
      "step": 2535
    },
    {
      "epoch": 1.0309005895507217,
      "grad_norm": 0.09590499103069305,
      "learning_rate": 9.700793812334623e-05,
      "loss": 0.8461,
      "step": 2536
    },
    {
      "epoch": 1.0313071762553365,
      "grad_norm": 0.09776647388935089,
      "learning_rate": 9.69672297984938e-05,
      "loss": 0.9141,
      "step": 2537
    },
    {
      "epoch": 1.031713762959951,
      "grad_norm": 0.09859136492013931,
      "learning_rate": 9.692652147364137e-05,
      "loss": 1.0216,
      "step": 2538
    },
    {
      "epoch": 1.032120349664566,
      "grad_norm": 0.10951580852270126,
      "learning_rate": 9.688581314878892e-05,
      "loss": 0.979,
      "step": 2539
    },
    {
      "epoch": 1.0325269363691807,
      "grad_norm": 0.09757594019174576,
      "learning_rate": 9.684510482393649e-05,
      "loss": 0.8619,
      "step": 2540
    },
    {
      "epoch": 1.0329335230737955,
      "grad_norm": 0.10344915837049484,
      "learning_rate": 9.680439649908407e-05,
      "loss": 0.9842,
      "step": 2541
    },
    {
      "epoch": 1.0333401097784103,
      "grad_norm": 0.09753288328647614,
      "learning_rate": 9.676368817423164e-05,
      "loss": 0.8892,
      "step": 2542
    },
    {
      "epoch": 1.033746696483025,
      "grad_norm": 0.10194489359855652,
      "learning_rate": 9.67229798493792e-05,
      "loss": 0.953,
      "step": 2543
    },
    {
      "epoch": 1.0341532831876397,
      "grad_norm": 0.0974886566400528,
      "learning_rate": 9.668227152452677e-05,
      "loss": 0.9984,
      "step": 2544
    },
    {
      "epoch": 1.0345598698922545,
      "grad_norm": 0.09721877425909042,
      "learning_rate": 9.664156319967433e-05,
      "loss": 0.8556,
      "step": 2545
    },
    {
      "epoch": 1.0349664565968693,
      "grad_norm": 0.1020737811923027,
      "learning_rate": 9.660085487482191e-05,
      "loss": 1.0099,
      "step": 2546
    },
    {
      "epoch": 1.035373043301484,
      "grad_norm": 0.10895517468452454,
      "learning_rate": 9.656014654996948e-05,
      "loss": 0.9703,
      "step": 2547
    },
    {
      "epoch": 1.0357796300060989,
      "grad_norm": 0.10454720258712769,
      "learning_rate": 9.651943822511704e-05,
      "loss": 1.0158,
      "step": 2548
    },
    {
      "epoch": 1.0361862167107136,
      "grad_norm": 0.09759974479675293,
      "learning_rate": 9.647872990026461e-05,
      "loss": 0.9259,
      "step": 2549
    },
    {
      "epoch": 1.0365928034153282,
      "grad_norm": 0.09353537857532501,
      "learning_rate": 9.643802157541218e-05,
      "loss": 0.8987,
      "step": 2550
    },
    {
      "epoch": 1.036999390119943,
      "grad_norm": 0.10114728659391403,
      "learning_rate": 9.639731325055973e-05,
      "loss": 0.9499,
      "step": 2551
    },
    {
      "epoch": 1.0374059768245578,
      "grad_norm": 0.09962712973356247,
      "learning_rate": 9.635660492570731e-05,
      "loss": 1.0086,
      "step": 2552
    },
    {
      "epoch": 1.0378125635291726,
      "grad_norm": 0.09930434823036194,
      "learning_rate": 9.631589660085488e-05,
      "loss": 0.891,
      "step": 2553
    },
    {
      "epoch": 1.0382191502337874,
      "grad_norm": 0.10339832305908203,
      "learning_rate": 9.627518827600245e-05,
      "loss": 0.9528,
      "step": 2554
    },
    {
      "epoch": 1.0386257369384022,
      "grad_norm": 0.09386780112981796,
      "learning_rate": 9.623447995115002e-05,
      "loss": 0.879,
      "step": 2555
    },
    {
      "epoch": 1.0390323236430168,
      "grad_norm": 0.09669435769319534,
      "learning_rate": 9.619377162629759e-05,
      "loss": 0.9047,
      "step": 2556
    },
    {
      "epoch": 1.0394389103476316,
      "grad_norm": 0.08469796180725098,
      "learning_rate": 9.615306330144514e-05,
      "loss": 0.782,
      "step": 2557
    },
    {
      "epoch": 1.0398454970522464,
      "grad_norm": 0.09662485867738724,
      "learning_rate": 9.611235497659272e-05,
      "loss": 0.9099,
      "step": 2558
    },
    {
      "epoch": 1.0402520837568612,
      "grad_norm": 0.09601373970508575,
      "learning_rate": 9.607164665174029e-05,
      "loss": 0.9126,
      "step": 2559
    },
    {
      "epoch": 1.040658670461476,
      "grad_norm": 0.10070160031318665,
      "learning_rate": 9.603093832688786e-05,
      "loss": 0.9936,
      "step": 2560
    },
    {
      "epoch": 1.0410652571660908,
      "grad_norm": 0.09629065543413162,
      "learning_rate": 9.599023000203542e-05,
      "loss": 0.8927,
      "step": 2561
    },
    {
      "epoch": 1.0414718438707053,
      "grad_norm": 0.08817669004201889,
      "learning_rate": 9.594952167718299e-05,
      "loss": 0.854,
      "step": 2562
    },
    {
      "epoch": 1.0418784305753201,
      "grad_norm": 0.0914379209280014,
      "learning_rate": 9.590881335233055e-05,
      "loss": 0.9465,
      "step": 2563
    },
    {
      "epoch": 1.042285017279935,
      "grad_norm": 0.10295330733060837,
      "learning_rate": 9.586810502747813e-05,
      "loss": 0.9533,
      "step": 2564
    },
    {
      "epoch": 1.0426916039845497,
      "grad_norm": 0.1070484146475792,
      "learning_rate": 9.58273967026257e-05,
      "loss": 1.0482,
      "step": 2565
    },
    {
      "epoch": 1.0430981906891645,
      "grad_norm": 0.10043883323669434,
      "learning_rate": 9.578668837777326e-05,
      "loss": 1.06,
      "step": 2566
    },
    {
      "epoch": 1.0435047773937791,
      "grad_norm": 0.09721029549837112,
      "learning_rate": 9.574598005292083e-05,
      "loss": 0.9336,
      "step": 2567
    },
    {
      "epoch": 1.043911364098394,
      "grad_norm": 0.08390473574399948,
      "learning_rate": 9.57052717280684e-05,
      "loss": 0.7674,
      "step": 2568
    },
    {
      "epoch": 1.0443179508030087,
      "grad_norm": 0.09861475974321365,
      "learning_rate": 9.566456340321596e-05,
      "loss": 0.9026,
      "step": 2569
    },
    {
      "epoch": 1.0447245375076235,
      "grad_norm": 0.10255376249551773,
      "learning_rate": 9.562385507836353e-05,
      "loss": 0.9269,
      "step": 2570
    },
    {
      "epoch": 1.0451311242122383,
      "grad_norm": 0.09502318501472473,
      "learning_rate": 9.55831467535111e-05,
      "loss": 0.9383,
      "step": 2571
    },
    {
      "epoch": 1.045537710916853,
      "grad_norm": 0.09613403677940369,
      "learning_rate": 9.554243842865867e-05,
      "loss": 0.9479,
      "step": 2572
    },
    {
      "epoch": 1.0459442976214677,
      "grad_norm": 0.09740449488162994,
      "learning_rate": 9.550173010380624e-05,
      "loss": 0.9378,
      "step": 2573
    },
    {
      "epoch": 1.0463508843260825,
      "grad_norm": 0.09630079567432404,
      "learning_rate": 9.54610217789538e-05,
      "loss": 0.9408,
      "step": 2574
    },
    {
      "epoch": 1.0467574710306973,
      "grad_norm": 0.10916483402252197,
      "learning_rate": 9.542031345410137e-05,
      "loss": 1.0303,
      "step": 2575
    },
    {
      "epoch": 1.047164057735312,
      "grad_norm": 0.10121887922286987,
      "learning_rate": 9.537960512924894e-05,
      "loss": 0.9663,
      "step": 2576
    },
    {
      "epoch": 1.0475706444399269,
      "grad_norm": 0.10116361826658249,
      "learning_rate": 9.53388968043965e-05,
      "loss": 1.0089,
      "step": 2577
    },
    {
      "epoch": 1.0479772311445417,
      "grad_norm": 0.09431501477956772,
      "learning_rate": 9.529818847954407e-05,
      "loss": 0.9011,
      "step": 2578
    },
    {
      "epoch": 1.0483838178491562,
      "grad_norm": 0.09918123483657837,
      "learning_rate": 9.525748015469164e-05,
      "loss": 1.0262,
      "step": 2579
    },
    {
      "epoch": 1.048790404553771,
      "grad_norm": 0.0959305465221405,
      "learning_rate": 9.521677182983921e-05,
      "loss": 0.9491,
      "step": 2580
    },
    {
      "epoch": 1.0491969912583858,
      "grad_norm": 0.0992065966129303,
      "learning_rate": 9.517606350498678e-05,
      "loss": 1.0223,
      "step": 2581
    },
    {
      "epoch": 1.0496035779630006,
      "grad_norm": 0.10246460884809494,
      "learning_rate": 9.513535518013434e-05,
      "loss": 0.892,
      "step": 2582
    },
    {
      "epoch": 1.0500101646676154,
      "grad_norm": 0.10209383815526962,
      "learning_rate": 9.509464685528191e-05,
      "loss": 1.0251,
      "step": 2583
    },
    {
      "epoch": 1.0504167513722302,
      "grad_norm": 173.35934448242188,
      "learning_rate": 9.505393853042948e-05,
      "loss": 0.9476,
      "step": 2584
    },
    {
      "epoch": 1.0508233380768448,
      "grad_norm": 0.10001904517412186,
      "learning_rate": 9.501323020557705e-05,
      "loss": 0.9104,
      "step": 2585
    },
    {
      "epoch": 1.0512299247814596,
      "grad_norm": 0.09232128411531448,
      "learning_rate": 9.49725218807246e-05,
      "loss": 0.8637,
      "step": 2586
    },
    {
      "epoch": 1.0516365114860744,
      "grad_norm": 0.09748049825429916,
      "learning_rate": 9.493181355587218e-05,
      "loss": 0.9357,
      "step": 2587
    },
    {
      "epoch": 1.0520430981906892,
      "grad_norm": 0.11075956374406815,
      "learning_rate": 9.489110523101975e-05,
      "loss": 1.0479,
      "step": 2588
    },
    {
      "epoch": 1.052449684895304,
      "grad_norm": 0.10015081614255905,
      "learning_rate": 9.485039690616732e-05,
      "loss": 0.8373,
      "step": 2589
    },
    {
      "epoch": 1.0528562715999188,
      "grad_norm": 0.10808718949556351,
      "learning_rate": 9.480968858131488e-05,
      "loss": 0.9984,
      "step": 2590
    },
    {
      "epoch": 1.0532628583045334,
      "grad_norm": 0.09263164550065994,
      "learning_rate": 9.476898025646245e-05,
      "loss": 0.8195,
      "step": 2591
    },
    {
      "epoch": 1.0536694450091482,
      "grad_norm": 0.10392975807189941,
      "learning_rate": 9.472827193161002e-05,
      "loss": 0.9241,
      "step": 2592
    },
    {
      "epoch": 1.054076031713763,
      "grad_norm": 0.10209937393665314,
      "learning_rate": 9.468756360675759e-05,
      "loss": 0.9368,
      "step": 2593
    },
    {
      "epoch": 1.0544826184183778,
      "grad_norm": 0.0948430597782135,
      "learning_rate": 9.464685528190516e-05,
      "loss": 0.9274,
      "step": 2594
    },
    {
      "epoch": 1.0548892051229926,
      "grad_norm": 0.09769920259714127,
      "learning_rate": 9.460614695705272e-05,
      "loss": 0.9379,
      "step": 2595
    },
    {
      "epoch": 1.0552957918276071,
      "grad_norm": 0.09840547293424606,
      "learning_rate": 9.456543863220029e-05,
      "loss": 0.9622,
      "step": 2596
    },
    {
      "epoch": 1.055702378532222,
      "grad_norm": 0.10511568933725357,
      "learning_rate": 9.452473030734786e-05,
      "loss": 0.9294,
      "step": 2597
    },
    {
      "epoch": 1.0561089652368367,
      "grad_norm": 0.10543198883533478,
      "learning_rate": 9.448402198249543e-05,
      "loss": 0.879,
      "step": 2598
    },
    {
      "epoch": 1.0565155519414515,
      "grad_norm": 0.11027191579341888,
      "learning_rate": 9.4443313657643e-05,
      "loss": 1.1137,
      "step": 2599
    },
    {
      "epoch": 1.0569221386460663,
      "grad_norm": 0.10025797039270401,
      "learning_rate": 9.440260533279056e-05,
      "loss": 0.9876,
      "step": 2600
    },
    {
      "epoch": 1.0573287253506811,
      "grad_norm": 0.09540455043315887,
      "learning_rate": 9.436189700793813e-05,
      "loss": 0.9154,
      "step": 2601
    },
    {
      "epoch": 1.0577353120552957,
      "grad_norm": 0.1021379828453064,
      "learning_rate": 9.43211886830857e-05,
      "loss": 0.9915,
      "step": 2602
    },
    {
      "epoch": 1.0581418987599105,
      "grad_norm": 0.09948939830064774,
      "learning_rate": 9.428048035823326e-05,
      "loss": 0.9677,
      "step": 2603
    },
    {
      "epoch": 1.0585484854645253,
      "grad_norm": 0.10177826136350632,
      "learning_rate": 9.423977203338083e-05,
      "loss": 0.9533,
      "step": 2604
    },
    {
      "epoch": 1.05895507216914,
      "grad_norm": 0.10010895878076553,
      "learning_rate": 9.41990637085284e-05,
      "loss": 1.0366,
      "step": 2605
    },
    {
      "epoch": 1.0593616588737549,
      "grad_norm": 0.0993037298321724,
      "learning_rate": 9.415835538367597e-05,
      "loss": 1.0429,
      "step": 2606
    },
    {
      "epoch": 1.0597682455783697,
      "grad_norm": 0.09915399551391602,
      "learning_rate": 9.411764705882353e-05,
      "loss": 0.9091,
      "step": 2607
    },
    {
      "epoch": 1.0601748322829843,
      "grad_norm": 0.09989267587661743,
      "learning_rate": 9.40769387339711e-05,
      "loss": 1.0243,
      "step": 2608
    },
    {
      "epoch": 1.060581418987599,
      "grad_norm": 0.10886070877313614,
      "learning_rate": 9.403623040911867e-05,
      "loss": 1.0211,
      "step": 2609
    },
    {
      "epoch": 1.0609880056922139,
      "grad_norm": 0.09823065251111984,
      "learning_rate": 9.399552208426624e-05,
      "loss": 0.9097,
      "step": 2610
    },
    {
      "epoch": 1.0613945923968287,
      "grad_norm": 0.09129935503005981,
      "learning_rate": 9.39548137594138e-05,
      "loss": 0.9066,
      "step": 2611
    },
    {
      "epoch": 1.0618011791014434,
      "grad_norm": 0.09999019652605057,
      "learning_rate": 9.391410543456137e-05,
      "loss": 0.9658,
      "step": 2612
    },
    {
      "epoch": 1.0622077658060582,
      "grad_norm": 0.1015915721654892,
      "learning_rate": 9.387339710970894e-05,
      "loss": 1.0863,
      "step": 2613
    },
    {
      "epoch": 1.0626143525106728,
      "grad_norm": 0.11688552051782608,
      "learning_rate": 9.383268878485651e-05,
      "loss": 1.0107,
      "step": 2614
    },
    {
      "epoch": 1.0630209392152876,
      "grad_norm": 0.09864789247512817,
      "learning_rate": 9.379198046000408e-05,
      "loss": 0.9256,
      "step": 2615
    },
    {
      "epoch": 1.0634275259199024,
      "grad_norm": 0.11037877202033997,
      "learning_rate": 9.375127213515164e-05,
      "loss": 1.089,
      "step": 2616
    },
    {
      "epoch": 1.0638341126245172,
      "grad_norm": 0.10612582415342331,
      "learning_rate": 9.371056381029921e-05,
      "loss": 1.0316,
      "step": 2617
    },
    {
      "epoch": 1.064240699329132,
      "grad_norm": 0.09921829402446747,
      "learning_rate": 9.366985548544678e-05,
      "loss": 0.9821,
      "step": 2618
    },
    {
      "epoch": 1.0646472860337468,
      "grad_norm": 0.0911387950181961,
      "learning_rate": 9.362914716059435e-05,
      "loss": 0.864,
      "step": 2619
    },
    {
      "epoch": 1.0650538727383614,
      "grad_norm": 0.10604958236217499,
      "learning_rate": 9.358843883574191e-05,
      "loss": 0.9687,
      "step": 2620
    },
    {
      "epoch": 1.0654604594429762,
      "grad_norm": 0.10203681141138077,
      "learning_rate": 9.354773051088948e-05,
      "loss": 1.0158,
      "step": 2621
    },
    {
      "epoch": 1.065867046147591,
      "grad_norm": 0.09713797271251678,
      "learning_rate": 9.350702218603705e-05,
      "loss": 0.9671,
      "step": 2622
    },
    {
      "epoch": 1.0662736328522058,
      "grad_norm": 0.10216394811868668,
      "learning_rate": 9.346631386118462e-05,
      "loss": 0.9156,
      "step": 2623
    },
    {
      "epoch": 1.0666802195568206,
      "grad_norm": 0.09904835373163223,
      "learning_rate": 9.342560553633218e-05,
      "loss": 0.9856,
      "step": 2624
    },
    {
      "epoch": 1.0670868062614352,
      "grad_norm": 0.09881392866373062,
      "learning_rate": 9.338489721147975e-05,
      "loss": 0.964,
      "step": 2625
    },
    {
      "epoch": 1.06749339296605,
      "grad_norm": 0.10800333321094513,
      "learning_rate": 9.334418888662732e-05,
      "loss": 1.0773,
      "step": 2626
    },
    {
      "epoch": 1.0678999796706647,
      "grad_norm": 0.09454644471406937,
      "learning_rate": 9.330348056177489e-05,
      "loss": 0.8724,
      "step": 2627
    },
    {
      "epoch": 1.0683065663752795,
      "grad_norm": 0.10942061245441437,
      "learning_rate": 9.326277223692246e-05,
      "loss": 1.0294,
      "step": 2628
    },
    {
      "epoch": 1.0687131530798943,
      "grad_norm": 0.10133802890777588,
      "learning_rate": 9.322206391207002e-05,
      "loss": 0.9862,
      "step": 2629
    },
    {
      "epoch": 1.0691197397845091,
      "grad_norm": 0.08950886130332947,
      "learning_rate": 9.318135558721759e-05,
      "loss": 0.8339,
      "step": 2630
    },
    {
      "epoch": 1.0695263264891237,
      "grad_norm": 0.09953132271766663,
      "learning_rate": 9.314064726236516e-05,
      "loss": 0.94,
      "step": 2631
    },
    {
      "epoch": 1.0699329131937385,
      "grad_norm": 0.09424632787704468,
      "learning_rate": 9.309993893751273e-05,
      "loss": 0.8335,
      "step": 2632
    },
    {
      "epoch": 1.0703394998983533,
      "grad_norm": 0.09430480003356934,
      "learning_rate": 9.305923061266029e-05,
      "loss": 0.9151,
      "step": 2633
    },
    {
      "epoch": 1.070746086602968,
      "grad_norm": 0.10957103222608566,
      "learning_rate": 9.301852228780786e-05,
      "loss": 1.0281,
      "step": 2634
    },
    {
      "epoch": 1.071152673307583,
      "grad_norm": 0.09792932122945786,
      "learning_rate": 9.297781396295543e-05,
      "loss": 0.8805,
      "step": 2635
    },
    {
      "epoch": 1.0715592600121977,
      "grad_norm": 0.09845682233572006,
      "learning_rate": 9.2937105638103e-05,
      "loss": 0.8915,
      "step": 2636
    },
    {
      "epoch": 1.0719658467168123,
      "grad_norm": 0.10949815809726715,
      "learning_rate": 9.289639731325056e-05,
      "loss": 0.9888,
      "step": 2637
    },
    {
      "epoch": 1.072372433421427,
      "grad_norm": 0.09920839220285416,
      "learning_rate": 9.285568898839813e-05,
      "loss": 0.9253,
      "step": 2638
    },
    {
      "epoch": 1.0727790201260419,
      "grad_norm": 0.10604346543550491,
      "learning_rate": 9.28149806635457e-05,
      "loss": 1.0617,
      "step": 2639
    },
    {
      "epoch": 1.0731856068306567,
      "grad_norm": 0.09291350096464157,
      "learning_rate": 9.277427233869327e-05,
      "loss": 0.8628,
      "step": 2640
    },
    {
      "epoch": 1.0735921935352715,
      "grad_norm": 0.0925416573882103,
      "learning_rate": 9.273356401384083e-05,
      "loss": 0.8899,
      "step": 2641
    },
    {
      "epoch": 1.073998780239886,
      "grad_norm": 0.0955965593457222,
      "learning_rate": 9.26928556889884e-05,
      "loss": 0.9303,
      "step": 2642
    },
    {
      "epoch": 1.0744053669445008,
      "grad_norm": 0.08776776492595673,
      "learning_rate": 9.265214736413597e-05,
      "loss": 0.8305,
      "step": 2643
    },
    {
      "epoch": 1.0748119536491156,
      "grad_norm": 0.09283957630395889,
      "learning_rate": 9.261143903928354e-05,
      "loss": 0.8323,
      "step": 2644
    },
    {
      "epoch": 1.0752185403537304,
      "grad_norm": 0.1023586168885231,
      "learning_rate": 9.25707307144311e-05,
      "loss": 0.9612,
      "step": 2645
    },
    {
      "epoch": 1.0756251270583452,
      "grad_norm": 0.09603264182806015,
      "learning_rate": 9.253002238957867e-05,
      "loss": 0.8514,
      "step": 2646
    },
    {
      "epoch": 1.07603171376296,
      "grad_norm": 0.09656079113483429,
      "learning_rate": 9.248931406472624e-05,
      "loss": 0.9003,
      "step": 2647
    },
    {
      "epoch": 1.0764383004675748,
      "grad_norm": 0.10394012928009033,
      "learning_rate": 9.244860573987381e-05,
      "loss": 1.0031,
      "step": 2648
    },
    {
      "epoch": 1.0768448871721894,
      "grad_norm": 0.1057206466794014,
      "learning_rate": 9.240789741502138e-05,
      "loss": 0.9915,
      "step": 2649
    },
    {
      "epoch": 1.0772514738768042,
      "grad_norm": 0.09920359402894974,
      "learning_rate": 9.236718909016894e-05,
      "loss": 0.9242,
      "step": 2650
    },
    {
      "epoch": 1.077658060581419,
      "grad_norm": 0.09820383042097092,
      "learning_rate": 9.232648076531651e-05,
      "loss": 0.889,
      "step": 2651
    },
    {
      "epoch": 1.0780646472860338,
      "grad_norm": 0.09957915544509888,
      "learning_rate": 9.228577244046408e-05,
      "loss": 0.9418,
      "step": 2652
    },
    {
      "epoch": 1.0784712339906486,
      "grad_norm": 0.09969063103199005,
      "learning_rate": 9.224506411561165e-05,
      "loss": 0.905,
      "step": 2653
    },
    {
      "epoch": 1.0788778206952632,
      "grad_norm": 0.10219317674636841,
      "learning_rate": 9.220435579075921e-05,
      "loss": 1.0422,
      "step": 2654
    },
    {
      "epoch": 1.079284407399878,
      "grad_norm": 0.10804678499698639,
      "learning_rate": 9.216364746590678e-05,
      "loss": 0.9122,
      "step": 2655
    },
    {
      "epoch": 1.0796909941044928,
      "grad_norm": 0.10206186026334763,
      "learning_rate": 9.212293914105435e-05,
      "loss": 0.9674,
      "step": 2656
    },
    {
      "epoch": 1.0800975808091076,
      "grad_norm": 0.1036703959107399,
      "learning_rate": 9.208223081620192e-05,
      "loss": 0.9717,
      "step": 2657
    },
    {
      "epoch": 1.0805041675137224,
      "grad_norm": 0.09528395533561707,
      "learning_rate": 9.204152249134948e-05,
      "loss": 0.8668,
      "step": 2658
    },
    {
      "epoch": 1.0809107542183372,
      "grad_norm": 0.08862180262804031,
      "learning_rate": 9.200081416649705e-05,
      "loss": 0.8139,
      "step": 2659
    },
    {
      "epoch": 1.0813173409229517,
      "grad_norm": 0.11283780634403229,
      "learning_rate": 9.196010584164462e-05,
      "loss": 0.99,
      "step": 2660
    },
    {
      "epoch": 1.0817239276275665,
      "grad_norm": 0.10028998553752899,
      "learning_rate": 9.19193975167922e-05,
      "loss": 0.8809,
      "step": 2661
    },
    {
      "epoch": 1.0821305143321813,
      "grad_norm": 0.11139478534460068,
      "learning_rate": 9.187868919193975e-05,
      "loss": 0.9946,
      "step": 2662
    },
    {
      "epoch": 1.0825371010367961,
      "grad_norm": 0.1029946580529213,
      "learning_rate": 9.183798086708732e-05,
      "loss": 0.9297,
      "step": 2663
    },
    {
      "epoch": 1.082943687741411,
      "grad_norm": 0.10619094967842102,
      "learning_rate": 9.179727254223489e-05,
      "loss": 1.0645,
      "step": 2664
    },
    {
      "epoch": 1.0833502744460257,
      "grad_norm": 0.1395910680294037,
      "learning_rate": 9.175656421738246e-05,
      "loss": 1.0362,
      "step": 2665
    },
    {
      "epoch": 1.0837568611506403,
      "grad_norm": 0.09811388701200485,
      "learning_rate": 9.171585589253003e-05,
      "loss": 0.9939,
      "step": 2666
    },
    {
      "epoch": 1.084163447855255,
      "grad_norm": 0.1118270680308342,
      "learning_rate": 9.167514756767759e-05,
      "loss": 1.031,
      "step": 2667
    },
    {
      "epoch": 1.0845700345598699,
      "grad_norm": 0.11443159729242325,
      "learning_rate": 9.163443924282516e-05,
      "loss": 1.0236,
      "step": 2668
    },
    {
      "epoch": 1.0849766212644847,
      "grad_norm": 0.10500071197748184,
      "learning_rate": 9.159373091797273e-05,
      "loss": 0.9794,
      "step": 2669
    },
    {
      "epoch": 1.0853832079690995,
      "grad_norm": 0.10952949523925781,
      "learning_rate": 9.15530225931203e-05,
      "loss": 1.0597,
      "step": 2670
    },
    {
      "epoch": 1.085789794673714,
      "grad_norm": 0.10230562835931778,
      "learning_rate": 9.151231426826786e-05,
      "loss": 1.0483,
      "step": 2671
    },
    {
      "epoch": 1.0861963813783289,
      "grad_norm": 0.1016286313533783,
      "learning_rate": 9.147160594341543e-05,
      "loss": 0.9346,
      "step": 2672
    },
    {
      "epoch": 1.0866029680829437,
      "grad_norm": 0.08879520744085312,
      "learning_rate": 9.1430897618563e-05,
      "loss": 0.8451,
      "step": 2673
    },
    {
      "epoch": 1.0870095547875585,
      "grad_norm": 0.11024922877550125,
      "learning_rate": 9.139018929371057e-05,
      "loss": 1.0428,
      "step": 2674
    },
    {
      "epoch": 1.0874161414921732,
      "grad_norm": 0.08919038623571396,
      "learning_rate": 9.134948096885813e-05,
      "loss": 0.8909,
      "step": 2675
    },
    {
      "epoch": 1.087822728196788,
      "grad_norm": 0.09503115713596344,
      "learning_rate": 9.13087726440057e-05,
      "loss": 0.9496,
      "step": 2676
    },
    {
      "epoch": 1.0882293149014026,
      "grad_norm": 0.09637421369552612,
      "learning_rate": 9.126806431915327e-05,
      "loss": 0.8955,
      "step": 2677
    },
    {
      "epoch": 1.0886359016060174,
      "grad_norm": 0.10244832187891006,
      "learning_rate": 9.122735599430084e-05,
      "loss": 1.061,
      "step": 2678
    },
    {
      "epoch": 1.0890424883106322,
      "grad_norm": 0.09347443282604218,
      "learning_rate": 9.11866476694484e-05,
      "loss": 0.9011,
      "step": 2679
    },
    {
      "epoch": 1.089449075015247,
      "grad_norm": 0.09415366500616074,
      "learning_rate": 9.114593934459597e-05,
      "loss": 0.8837,
      "step": 2680
    },
    {
      "epoch": 1.0898556617198618,
      "grad_norm": 0.1009860560297966,
      "learning_rate": 9.110523101974354e-05,
      "loss": 1.0073,
      "step": 2681
    },
    {
      "epoch": 1.0902622484244766,
      "grad_norm": 0.10200529545545578,
      "learning_rate": 9.106452269489111e-05,
      "loss": 0.9762,
      "step": 2682
    },
    {
      "epoch": 1.0906688351290912,
      "grad_norm": 0.08658542484045029,
      "learning_rate": 9.102381437003867e-05,
      "loss": 0.7831,
      "step": 2683
    },
    {
      "epoch": 1.091075421833706,
      "grad_norm": 0.11266512423753738,
      "learning_rate": 9.098310604518626e-05,
      "loss": 0.9758,
      "step": 2684
    },
    {
      "epoch": 1.0914820085383208,
      "grad_norm": 0.09850563108921051,
      "learning_rate": 9.094239772033381e-05,
      "loss": 0.9202,
      "step": 2685
    },
    {
      "epoch": 1.0918885952429356,
      "grad_norm": 0.10467302799224854,
      "learning_rate": 9.090168939548138e-05,
      "loss": 0.983,
      "step": 2686
    },
    {
      "epoch": 1.0922951819475504,
      "grad_norm": 0.10072293132543564,
      "learning_rate": 9.086098107062895e-05,
      "loss": 0.9643,
      "step": 2687
    },
    {
      "epoch": 1.0927017686521652,
      "grad_norm": 0.10193373262882233,
      "learning_rate": 9.082027274577651e-05,
      "loss": 0.9569,
      "step": 2688
    },
    {
      "epoch": 1.0931083553567797,
      "grad_norm": 0.09216511994600296,
      "learning_rate": 9.077956442092408e-05,
      "loss": 0.8781,
      "step": 2689
    },
    {
      "epoch": 1.0935149420613945,
      "grad_norm": 0.09577429294586182,
      "learning_rate": 9.073885609607166e-05,
      "loss": 0.9484,
      "step": 2690
    },
    {
      "epoch": 1.0939215287660093,
      "grad_norm": 0.08991552889347076,
      "learning_rate": 9.069814777121922e-05,
      "loss": 0.843,
      "step": 2691
    },
    {
      "epoch": 1.0943281154706241,
      "grad_norm": 0.09744630008935928,
      "learning_rate": 9.065743944636678e-05,
      "loss": 0.9266,
      "step": 2692
    },
    {
      "epoch": 1.094734702175239,
      "grad_norm": 0.11267295479774475,
      "learning_rate": 9.061673112151435e-05,
      "loss": 1.1497,
      "step": 2693
    },
    {
      "epoch": 1.0951412888798537,
      "grad_norm": 0.10447680950164795,
      "learning_rate": 9.057602279666192e-05,
      "loss": 0.9988,
      "step": 2694
    },
    {
      "epoch": 1.0955478755844683,
      "grad_norm": 0.09360986948013306,
      "learning_rate": 9.053531447180949e-05,
      "loss": 0.8463,
      "step": 2695
    },
    {
      "epoch": 1.095954462289083,
      "grad_norm": 0.10234752297401428,
      "learning_rate": 9.049460614695707e-05,
      "loss": 0.9858,
      "step": 2696
    },
    {
      "epoch": 1.096361048993698,
      "grad_norm": 0.09333793818950653,
      "learning_rate": 9.045389782210462e-05,
      "loss": 0.8818,
      "step": 2697
    },
    {
      "epoch": 1.0967676356983127,
      "grad_norm": 0.10592950135469437,
      "learning_rate": 9.041318949725219e-05,
      "loss": 1.1268,
      "step": 2698
    },
    {
      "epoch": 1.0971742224029275,
      "grad_norm": 0.10253705084323883,
      "learning_rate": 9.037248117239976e-05,
      "loss": 1.0303,
      "step": 2699
    },
    {
      "epoch": 1.097580809107542,
      "grad_norm": 0.09744442999362946,
      "learning_rate": 9.033177284754732e-05,
      "loss": 0.9919,
      "step": 2700
    },
    {
      "epoch": 1.0979873958121569,
      "grad_norm": 0.0978417843580246,
      "learning_rate": 9.029106452269489e-05,
      "loss": 0.9778,
      "step": 2701
    },
    {
      "epoch": 1.0983939825167717,
      "grad_norm": 0.10374154895544052,
      "learning_rate": 9.025035619784247e-05,
      "loss": 0.9441,
      "step": 2702
    },
    {
      "epoch": 1.0988005692213865,
      "grad_norm": 0.10075423121452332,
      "learning_rate": 9.020964787299003e-05,
      "loss": 0.9613,
      "step": 2703
    },
    {
      "epoch": 1.0992071559260013,
      "grad_norm": 0.10084596276283264,
      "learning_rate": 9.01689395481376e-05,
      "loss": 1.0501,
      "step": 2704
    },
    {
      "epoch": 1.099613742630616,
      "grad_norm": 0.09317726641893387,
      "learning_rate": 9.012823122328516e-05,
      "loss": 0.9205,
      "step": 2705
    },
    {
      "epoch": 1.1000203293352306,
      "grad_norm": 0.10026173293590546,
      "learning_rate": 9.008752289843273e-05,
      "loss": 0.9694,
      "step": 2706
    },
    {
      "epoch": 1.1004269160398454,
      "grad_norm": 0.10271118581295013,
      "learning_rate": 9.004681457358031e-05,
      "loss": 0.966,
      "step": 2707
    },
    {
      "epoch": 1.1008335027444602,
      "grad_norm": 0.1029544472694397,
      "learning_rate": 9.000610624872788e-05,
      "loss": 1.0658,
      "step": 2708
    },
    {
      "epoch": 1.101240089449075,
      "grad_norm": 0.08744987100362778,
      "learning_rate": 8.996539792387543e-05,
      "loss": 0.7922,
      "step": 2709
    },
    {
      "epoch": 1.1016466761536898,
      "grad_norm": 0.10885384678840637,
      "learning_rate": 8.9924689599023e-05,
      "loss": 0.966,
      "step": 2710
    },
    {
      "epoch": 1.1020532628583046,
      "grad_norm": 0.09841740131378174,
      "learning_rate": 8.988398127417057e-05,
      "loss": 0.93,
      "step": 2711
    },
    {
      "epoch": 1.1024598495629192,
      "grad_norm": 0.09065406024456024,
      "learning_rate": 8.984327294931814e-05,
      "loss": 0.8524,
      "step": 2712
    },
    {
      "epoch": 1.102866436267534,
      "grad_norm": 0.1025364026427269,
      "learning_rate": 8.980256462446572e-05,
      "loss": 0.9865,
      "step": 2713
    },
    {
      "epoch": 1.1032730229721488,
      "grad_norm": 0.10353400558233261,
      "learning_rate": 8.976185629961327e-05,
      "loss": 0.9485,
      "step": 2714
    },
    {
      "epoch": 1.1036796096767636,
      "grad_norm": 0.10245194286108017,
      "learning_rate": 8.972114797476084e-05,
      "loss": 0.95,
      "step": 2715
    },
    {
      "epoch": 1.1040861963813784,
      "grad_norm": 0.10163327306509018,
      "learning_rate": 8.968043964990841e-05,
      "loss": 0.9418,
      "step": 2716
    },
    {
      "epoch": 1.1044927830859932,
      "grad_norm": 0.0943874716758728,
      "learning_rate": 8.963973132505597e-05,
      "loss": 0.9057,
      "step": 2717
    },
    {
      "epoch": 1.1048993697906078,
      "grad_norm": 0.10380052775144577,
      "learning_rate": 8.959902300020354e-05,
      "loss": 0.908,
      "step": 2718
    },
    {
      "epoch": 1.1053059564952226,
      "grad_norm": 0.1012316420674324,
      "learning_rate": 8.955831467535112e-05,
      "loss": 1.0247,
      "step": 2719
    },
    {
      "epoch": 1.1057125431998374,
      "grad_norm": 0.10259490460157394,
      "learning_rate": 8.951760635049868e-05,
      "loss": 1.02,
      "step": 2720
    },
    {
      "epoch": 1.1061191299044522,
      "grad_norm": 0.09200392663478851,
      "learning_rate": 8.947689802564625e-05,
      "loss": 0.8184,
      "step": 2721
    },
    {
      "epoch": 1.106525716609067,
      "grad_norm": 0.10655350238084793,
      "learning_rate": 8.943618970079381e-05,
      "loss": 1.0162,
      "step": 2722
    },
    {
      "epoch": 1.1069323033136818,
      "grad_norm": 0.09815651178359985,
      "learning_rate": 8.939548137594138e-05,
      "loss": 0.8743,
      "step": 2723
    },
    {
      "epoch": 1.1073388900182963,
      "grad_norm": 0.09680456668138504,
      "learning_rate": 8.935477305108895e-05,
      "loss": 0.8738,
      "step": 2724
    },
    {
      "epoch": 1.1077454767229111,
      "grad_norm": 0.09177197515964508,
      "learning_rate": 8.931406472623653e-05,
      "loss": 0.8669,
      "step": 2725
    },
    {
      "epoch": 1.108152063427526,
      "grad_norm": 0.10532869398593903,
      "learning_rate": 8.927335640138408e-05,
      "loss": 0.9685,
      "step": 2726
    },
    {
      "epoch": 1.1085586501321407,
      "grad_norm": 0.09062668681144714,
      "learning_rate": 8.923264807653165e-05,
      "loss": 0.815,
      "step": 2727
    },
    {
      "epoch": 1.1089652368367555,
      "grad_norm": 0.10259625315666199,
      "learning_rate": 8.919193975167922e-05,
      "loss": 1.0085,
      "step": 2728
    },
    {
      "epoch": 1.10937182354137,
      "grad_norm": 0.10090707242488861,
      "learning_rate": 8.915123142682679e-05,
      "loss": 0.9229,
      "step": 2729
    },
    {
      "epoch": 1.109778410245985,
      "grad_norm": 0.10648062825202942,
      "learning_rate": 8.911052310197435e-05,
      "loss": 0.9649,
      "step": 2730
    },
    {
      "epoch": 1.1101849969505997,
      "grad_norm": 0.10565739125013351,
      "learning_rate": 8.906981477712193e-05,
      "loss": 0.9967,
      "step": 2731
    },
    {
      "epoch": 1.1105915836552145,
      "grad_norm": 0.10286445170640945,
      "learning_rate": 8.902910645226949e-05,
      "loss": 0.9484,
      "step": 2732
    },
    {
      "epoch": 1.1109981703598293,
      "grad_norm": 0.09817038476467133,
      "learning_rate": 8.898839812741706e-05,
      "loss": 0.9006,
      "step": 2733
    },
    {
      "epoch": 1.111404757064444,
      "grad_norm": 0.10235543549060822,
      "learning_rate": 8.894768980256462e-05,
      "loss": 0.9197,
      "step": 2734
    },
    {
      "epoch": 1.1118113437690587,
      "grad_norm": 0.10497331619262695,
      "learning_rate": 8.890698147771219e-05,
      "loss": 0.9529,
      "step": 2735
    },
    {
      "epoch": 1.1122179304736735,
      "grad_norm": 0.0995490625500679,
      "learning_rate": 8.886627315285977e-05,
      "loss": 0.9327,
      "step": 2736
    },
    {
      "epoch": 1.1126245171782883,
      "grad_norm": 0.10197664797306061,
      "learning_rate": 8.882556482800734e-05,
      "loss": 0.967,
      "step": 2737
    },
    {
      "epoch": 1.113031103882903,
      "grad_norm": 0.09399368613958359,
      "learning_rate": 8.87848565031549e-05,
      "loss": 0.9018,
      "step": 2738
    },
    {
      "epoch": 1.1134376905875178,
      "grad_norm": 0.09783720225095749,
      "learning_rate": 8.874414817830246e-05,
      "loss": 0.9248,
      "step": 2739
    },
    {
      "epoch": 1.1138442772921326,
      "grad_norm": 0.10185014456510544,
      "learning_rate": 8.870343985345003e-05,
      "loss": 1.0671,
      "step": 2740
    },
    {
      "epoch": 1.1142508639967472,
      "grad_norm": 0.09915787726640701,
      "learning_rate": 8.86627315285976e-05,
      "loss": 0.9916,
      "step": 2741
    },
    {
      "epoch": 1.114657450701362,
      "grad_norm": 0.08849018812179565,
      "learning_rate": 8.862202320374518e-05,
      "loss": 0.8339,
      "step": 2742
    },
    {
      "epoch": 1.1150640374059768,
      "grad_norm": 0.09823833405971527,
      "learning_rate": 8.858131487889275e-05,
      "loss": 0.8801,
      "step": 2743
    },
    {
      "epoch": 1.1154706241105916,
      "grad_norm": 0.10026133805513382,
      "learning_rate": 8.85406065540403e-05,
      "loss": 0.9545,
      "step": 2744
    },
    {
      "epoch": 1.1158772108152064,
      "grad_norm": 0.10472730547189713,
      "learning_rate": 8.849989822918787e-05,
      "loss": 0.9644,
      "step": 2745
    },
    {
      "epoch": 1.116283797519821,
      "grad_norm": 1352.027587890625,
      "learning_rate": 8.845918990433544e-05,
      "loss": 0.9703,
      "step": 2746
    },
    {
      "epoch": 1.1166903842244358,
      "grad_norm": 0.10632047057151794,
      "learning_rate": 8.8418481579483e-05,
      "loss": 0.9849,
      "step": 2747
    },
    {
      "epoch": 1.1170969709290506,
      "grad_norm": 0.09786203503608704,
      "learning_rate": 8.837777325463058e-05,
      "loss": 0.9203,
      "step": 2748
    },
    {
      "epoch": 1.1175035576336654,
      "grad_norm": 0.09631546586751938,
      "learning_rate": 8.833706492977815e-05,
      "loss": 0.8382,
      "step": 2749
    },
    {
      "epoch": 1.1179101443382802,
      "grad_norm": 0.1131991297006607,
      "learning_rate": 8.82963566049257e-05,
      "loss": 1.1161,
      "step": 2750
    },
    {
      "epoch": 1.118316731042895,
      "grad_norm": 0.11070824414491653,
      "learning_rate": 8.825564828007327e-05,
      "loss": 1.0256,
      "step": 2751
    },
    {
      "epoch": 1.1187233177475098,
      "grad_norm": 0.09996247291564941,
      "learning_rate": 8.821493995522084e-05,
      "loss": 0.8926,
      "step": 2752
    },
    {
      "epoch": 1.1191299044521243,
      "grad_norm": 0.1090439185500145,
      "learning_rate": 8.817423163036841e-05,
      "loss": 0.9081,
      "step": 2753
    },
    {
      "epoch": 1.1195364911567391,
      "grad_norm": 0.10079578310251236,
      "learning_rate": 8.813352330551599e-05,
      "loss": 0.9501,
      "step": 2754
    },
    {
      "epoch": 1.119943077861354,
      "grad_norm": 0.10204090178012848,
      "learning_rate": 8.809281498066356e-05,
      "loss": 0.8608,
      "step": 2755
    },
    {
      "epoch": 1.1203496645659687,
      "grad_norm": 0.09947852045297623,
      "learning_rate": 8.805210665581111e-05,
      "loss": 0.969,
      "step": 2756
    },
    {
      "epoch": 1.1207562512705835,
      "grad_norm": 0.10679657757282257,
      "learning_rate": 8.801139833095868e-05,
      "loss": 0.9747,
      "step": 2757
    },
    {
      "epoch": 1.1211628379751981,
      "grad_norm": 0.10643206536769867,
      "learning_rate": 8.797069000610625e-05,
      "loss": 0.9658,
      "step": 2758
    },
    {
      "epoch": 1.121569424679813,
      "grad_norm": 0.09698309749364853,
      "learning_rate": 8.792998168125383e-05,
      "loss": 0.8848,
      "step": 2759
    },
    {
      "epoch": 1.1219760113844277,
      "grad_norm": 0.10456421971321106,
      "learning_rate": 8.78892733564014e-05,
      "loss": 1.0093,
      "step": 2760
    },
    {
      "epoch": 1.1223825980890425,
      "grad_norm": 0.10722696781158447,
      "learning_rate": 8.784856503154896e-05,
      "loss": 0.9656,
      "step": 2761
    },
    {
      "epoch": 1.1227891847936573,
      "grad_norm": 0.09573463350534439,
      "learning_rate": 8.780785670669652e-05,
      "loss": 0.8725,
      "step": 2762
    },
    {
      "epoch": 1.123195771498272,
      "grad_norm": 0.10508301854133606,
      "learning_rate": 8.776714838184409e-05,
      "loss": 1.0974,
      "step": 2763
    },
    {
      "epoch": 1.1236023582028867,
      "grad_norm": 0.10459071397781372,
      "learning_rate": 8.772644005699165e-05,
      "loss": 0.991,
      "step": 2764
    },
    {
      "epoch": 1.1240089449075015,
      "grad_norm": 0.10841382294893265,
      "learning_rate": 8.768573173213923e-05,
      "loss": 1.0578,
      "step": 2765
    },
    {
      "epoch": 1.1244155316121163,
      "grad_norm": 0.09497111290693283,
      "learning_rate": 8.76450234072868e-05,
      "loss": 0.8538,
      "step": 2766
    },
    {
      "epoch": 1.124822118316731,
      "grad_norm": 0.10247037559747696,
      "learning_rate": 8.760431508243436e-05,
      "loss": 0.9779,
      "step": 2767
    },
    {
      "epoch": 1.1252287050213459,
      "grad_norm": 0.09652630984783173,
      "learning_rate": 8.756360675758192e-05,
      "loss": 0.8701,
      "step": 2768
    },
    {
      "epoch": 1.1256352917259607,
      "grad_norm": 0.09543488174676895,
      "learning_rate": 8.752289843272949e-05,
      "loss": 0.8204,
      "step": 2769
    },
    {
      "epoch": 1.1260418784305752,
      "grad_norm": 0.10089685767889023,
      "learning_rate": 8.748219010787706e-05,
      "loss": 0.9666,
      "step": 2770
    },
    {
      "epoch": 1.12644846513519,
      "grad_norm": 0.09978599101305008,
      "learning_rate": 8.744148178302464e-05,
      "loss": 0.8704,
      "step": 2771
    },
    {
      "epoch": 1.1268550518398048,
      "grad_norm": 0.0978054329752922,
      "learning_rate": 8.740077345817221e-05,
      "loss": 0.9111,
      "step": 2772
    },
    {
      "epoch": 1.1272616385444196,
      "grad_norm": 0.11330624669790268,
      "learning_rate": 8.736006513331976e-05,
      "loss": 1.0663,
      "step": 2773
    },
    {
      "epoch": 1.1276682252490344,
      "grad_norm": 0.10306650400161743,
      "learning_rate": 8.731935680846733e-05,
      "loss": 0.8753,
      "step": 2774
    },
    {
      "epoch": 1.128074811953649,
      "grad_norm": 0.10659723728895187,
      "learning_rate": 8.72786484836149e-05,
      "loss": 1.0044,
      "step": 2775
    },
    {
      "epoch": 1.1284813986582638,
      "grad_norm": 0.09779758006334305,
      "learning_rate": 8.723794015876246e-05,
      "loss": 0.904,
      "step": 2776
    },
    {
      "epoch": 1.1288879853628786,
      "grad_norm": 0.11017712950706482,
      "learning_rate": 8.719723183391005e-05,
      "loss": 0.977,
      "step": 2777
    },
    {
      "epoch": 1.1292945720674934,
      "grad_norm": 0.11215135455131531,
      "learning_rate": 8.715652350905761e-05,
      "loss": 1.0298,
      "step": 2778
    },
    {
      "epoch": 1.1297011587721082,
      "grad_norm": 0.09850891679525375,
      "learning_rate": 8.711581518420517e-05,
      "loss": 0.9411,
      "step": 2779
    },
    {
      "epoch": 1.130107745476723,
      "grad_norm": 0.11157305538654327,
      "learning_rate": 8.707510685935274e-05,
      "loss": 1.0069,
      "step": 2780
    },
    {
      "epoch": 1.1305143321813378,
      "grad_norm": 0.09551572054624557,
      "learning_rate": 8.70343985345003e-05,
      "loss": 0.8586,
      "step": 2781
    },
    {
      "epoch": 1.1309209188859524,
      "grad_norm": 0.09917795658111572,
      "learning_rate": 8.699369020964788e-05,
      "loss": 0.928,
      "step": 2782
    },
    {
      "epoch": 1.1313275055905672,
      "grad_norm": 0.10252156853675842,
      "learning_rate": 8.695298188479545e-05,
      "loss": 0.9748,
      "step": 2783
    },
    {
      "epoch": 1.131734092295182,
      "grad_norm": 0.09795645624399185,
      "learning_rate": 8.691227355994302e-05,
      "loss": 0.9089,
      "step": 2784
    },
    {
      "epoch": 1.1321406789997968,
      "grad_norm": 0.1064736470580101,
      "learning_rate": 8.687156523509057e-05,
      "loss": 1.0198,
      "step": 2785
    },
    {
      "epoch": 1.1325472657044116,
      "grad_norm": 0.10220332443714142,
      "learning_rate": 8.683085691023814e-05,
      "loss": 1.0092,
      "step": 2786
    },
    {
      "epoch": 1.1329538524090261,
      "grad_norm": 0.10353989899158478,
      "learning_rate": 8.679014858538571e-05,
      "loss": 0.993,
      "step": 2787
    },
    {
      "epoch": 1.133360439113641,
      "grad_norm": 0.10385473817586899,
      "learning_rate": 8.674944026053329e-05,
      "loss": 0.9788,
      "step": 2788
    },
    {
      "epoch": 1.1337670258182557,
      "grad_norm": 0.10164317488670349,
      "learning_rate": 8.670873193568086e-05,
      "loss": 0.9838,
      "step": 2789
    },
    {
      "epoch": 1.1341736125228705,
      "grad_norm": 0.10048189759254456,
      "learning_rate": 8.666802361082843e-05,
      "loss": 0.9583,
      "step": 2790
    },
    {
      "epoch": 1.1345801992274853,
      "grad_norm": 0.1055910512804985,
      "learning_rate": 8.662731528597598e-05,
      "loss": 1.01,
      "step": 2791
    },
    {
      "epoch": 1.1349867859321001,
      "grad_norm": 0.10301291197538376,
      "learning_rate": 8.658660696112355e-05,
      "loss": 0.9385,
      "step": 2792
    },
    {
      "epoch": 1.1353933726367147,
      "grad_norm": 0.10312401503324509,
      "learning_rate": 8.654589863627111e-05,
      "loss": 0.9724,
      "step": 2793
    },
    {
      "epoch": 1.1357999593413295,
      "grad_norm": 0.09779727458953857,
      "learning_rate": 8.65051903114187e-05,
      "loss": 0.9319,
      "step": 2794
    },
    {
      "epoch": 1.1362065460459443,
      "grad_norm": 0.1034865453839302,
      "learning_rate": 8.646448198656626e-05,
      "loss": 1.008,
      "step": 2795
    },
    {
      "epoch": 1.136613132750559,
      "grad_norm": 0.10120035707950592,
      "learning_rate": 8.642377366171383e-05,
      "loss": 0.9098,
      "step": 2796
    },
    {
      "epoch": 1.1370197194551739,
      "grad_norm": 0.09492117911577225,
      "learning_rate": 8.638306533686139e-05,
      "loss": 0.9175,
      "step": 2797
    },
    {
      "epoch": 1.1374263061597887,
      "grad_norm": 0.10626331716775894,
      "learning_rate": 8.634235701200895e-05,
      "loss": 1.0008,
      "step": 2798
    },
    {
      "epoch": 1.1378328928644033,
      "grad_norm": 0.1041049063205719,
      "learning_rate": 8.630164868715652e-05,
      "loss": 1.0499,
      "step": 2799
    },
    {
      "epoch": 1.138239479569018,
      "grad_norm": 0.1089131087064743,
      "learning_rate": 8.62609403623041e-05,
      "loss": 1.0344,
      "step": 2800
    },
    {
      "epoch": 1.1386460662736329,
      "grad_norm": 0.10952405631542206,
      "learning_rate": 8.622023203745167e-05,
      "loss": 1.0276,
      "step": 2801
    },
    {
      "epoch": 1.1390526529782476,
      "grad_norm": 0.10866481065750122,
      "learning_rate": 8.617952371259924e-05,
      "loss": 1.1198,
      "step": 2802
    },
    {
      "epoch": 1.1394592396828624,
      "grad_norm": 0.09285107254981995,
      "learning_rate": 8.613881538774679e-05,
      "loss": 0.8278,
      "step": 2803
    },
    {
      "epoch": 1.139865826387477,
      "grad_norm": 0.10245712101459503,
      "learning_rate": 8.609810706289436e-05,
      "loss": 0.8754,
      "step": 2804
    },
    {
      "epoch": 1.1402724130920918,
      "grad_norm": 0.10147379338741302,
      "learning_rate": 8.605739873804194e-05,
      "loss": 0.9467,
      "step": 2805
    },
    {
      "epoch": 1.1406789997967066,
      "grad_norm": 0.10768549889326096,
      "learning_rate": 8.601669041318951e-05,
      "loss": 0.9884,
      "step": 2806
    },
    {
      "epoch": 1.1410855865013214,
      "grad_norm": 0.10503536462783813,
      "learning_rate": 8.597598208833708e-05,
      "loss": 1.07,
      "step": 2807
    },
    {
      "epoch": 1.1414921732059362,
      "grad_norm": 0.09846587479114532,
      "learning_rate": 8.593527376348464e-05,
      "loss": 0.8614,
      "step": 2808
    },
    {
      "epoch": 1.141898759910551,
      "grad_norm": 0.10765058547258377,
      "learning_rate": 8.58945654386322e-05,
      "loss": 0.8949,
      "step": 2809
    },
    {
      "epoch": 1.1423053466151658,
      "grad_norm": 0.08815496414899826,
      "learning_rate": 8.585385711377976e-05,
      "loss": 0.8567,
      "step": 2810
    },
    {
      "epoch": 1.1427119333197804,
      "grad_norm": 0.08793221414089203,
      "learning_rate": 8.581314878892735e-05,
      "loss": 0.7961,
      "step": 2811
    },
    {
      "epoch": 1.1431185200243952,
      "grad_norm": 0.10945441573858261,
      "learning_rate": 8.577244046407491e-05,
      "loss": 0.9724,
      "step": 2812
    },
    {
      "epoch": 1.14352510672901,
      "grad_norm": 0.09829845279455185,
      "learning_rate": 8.573173213922248e-05,
      "loss": 0.9575,
      "step": 2813
    },
    {
      "epoch": 1.1439316934336248,
      "grad_norm": 0.10379641503095627,
      "learning_rate": 8.569102381437004e-05,
      "loss": 0.9815,
      "step": 2814
    },
    {
      "epoch": 1.1443382801382396,
      "grad_norm": 0.10781152546405792,
      "learning_rate": 8.56503154895176e-05,
      "loss": 1.0267,
      "step": 2815
    },
    {
      "epoch": 1.1447448668428541,
      "grad_norm": 0.09144961833953857,
      "learning_rate": 8.560960716466517e-05,
      "loss": 0.8425,
      "step": 2816
    },
    {
      "epoch": 1.145151453547469,
      "grad_norm": 0.1059332862496376,
      "learning_rate": 8.556889883981275e-05,
      "loss": 0.964,
      "step": 2817
    },
    {
      "epoch": 1.1455580402520837,
      "grad_norm": 0.09091661870479584,
      "learning_rate": 8.552819051496032e-05,
      "loss": 0.89,
      "step": 2818
    },
    {
      "epoch": 1.1459646269566985,
      "grad_norm": 0.09638272970914841,
      "learning_rate": 8.548748219010789e-05,
      "loss": 0.8643,
      "step": 2819
    },
    {
      "epoch": 1.1463712136613133,
      "grad_norm": 0.09995229542255402,
      "learning_rate": 8.544677386525544e-05,
      "loss": 0.9955,
      "step": 2820
    },
    {
      "epoch": 1.146777800365928,
      "grad_norm": 0.10457552224397659,
      "learning_rate": 8.540606554040301e-05,
      "loss": 1.0031,
      "step": 2821
    },
    {
      "epoch": 1.1471843870705427,
      "grad_norm": 0.10142842680215836,
      "learning_rate": 8.536535721555058e-05,
      "loss": 0.9013,
      "step": 2822
    },
    {
      "epoch": 1.1475909737751575,
      "grad_norm": 0.09488385915756226,
      "learning_rate": 8.532464889069816e-05,
      "loss": 0.9549,
      "step": 2823
    },
    {
      "epoch": 1.1479975604797723,
      "grad_norm": 0.10237988084554672,
      "learning_rate": 8.528394056584572e-05,
      "loss": 0.8964,
      "step": 2824
    },
    {
      "epoch": 1.148404147184387,
      "grad_norm": 0.09889756143093109,
      "learning_rate": 8.524323224099329e-05,
      "loss": 0.8978,
      "step": 2825
    },
    {
      "epoch": 1.148810733889002,
      "grad_norm": 0.10641611367464066,
      "learning_rate": 8.520252391614085e-05,
      "loss": 0.9597,
      "step": 2826
    },
    {
      "epoch": 1.1492173205936167,
      "grad_norm": 0.09953330457210541,
      "learning_rate": 8.516181559128841e-05,
      "loss": 0.9997,
      "step": 2827
    },
    {
      "epoch": 1.1496239072982313,
      "grad_norm": 0.10381393134593964,
      "learning_rate": 8.5121107266436e-05,
      "loss": 0.9151,
      "step": 2828
    },
    {
      "epoch": 1.150030494002846,
      "grad_norm": 0.09743472933769226,
      "learning_rate": 8.508039894158356e-05,
      "loss": 0.8761,
      "step": 2829
    },
    {
      "epoch": 1.1504370807074609,
      "grad_norm": 0.10740388184785843,
      "learning_rate": 8.503969061673113e-05,
      "loss": 1.0341,
      "step": 2830
    },
    {
      "epoch": 1.1508436674120757,
      "grad_norm": 0.10258743166923523,
      "learning_rate": 8.49989822918787e-05,
      "loss": 0.9992,
      "step": 2831
    },
    {
      "epoch": 1.1512502541166905,
      "grad_norm": 0.09499403089284897,
      "learning_rate": 8.495827396702625e-05,
      "loss": 0.9685,
      "step": 2832
    },
    {
      "epoch": 1.151656840821305,
      "grad_norm": 0.09847860038280487,
      "learning_rate": 8.491756564217382e-05,
      "loss": 0.8483,
      "step": 2833
    },
    {
      "epoch": 1.1520634275259198,
      "grad_norm": 0.09773585200309753,
      "learning_rate": 8.48768573173214e-05,
      "loss": 0.9926,
      "step": 2834
    },
    {
      "epoch": 1.1524700142305346,
      "grad_norm": 0.10191180557012558,
      "learning_rate": 8.483614899246897e-05,
      "loss": 0.9724,
      "step": 2835
    },
    {
      "epoch": 1.1528766009351494,
      "grad_norm": 0.0922137051820755,
      "learning_rate": 8.479544066761654e-05,
      "loss": 0.9134,
      "step": 2836
    },
    {
      "epoch": 1.1532831876397642,
      "grad_norm": 0.10144314914941788,
      "learning_rate": 8.47547323427641e-05,
      "loss": 0.9085,
      "step": 2837
    },
    {
      "epoch": 1.153689774344379,
      "grad_norm": 0.1033085286617279,
      "learning_rate": 8.471402401791166e-05,
      "loss": 1.0122,
      "step": 2838
    },
    {
      "epoch": 1.1540963610489936,
      "grad_norm": 0.1011093407869339,
      "learning_rate": 8.467331569305923e-05,
      "loss": 0.9573,
      "step": 2839
    },
    {
      "epoch": 1.1545029477536084,
      "grad_norm": 0.09297510981559753,
      "learning_rate": 8.463260736820681e-05,
      "loss": 0.9212,
      "step": 2840
    },
    {
      "epoch": 1.1549095344582232,
      "grad_norm": 0.09835392981767654,
      "learning_rate": 8.459189904335437e-05,
      "loss": 0.9009,
      "step": 2841
    },
    {
      "epoch": 1.155316121162838,
      "grad_norm": 0.09254229813814163,
      "learning_rate": 8.455119071850194e-05,
      "loss": 0.8968,
      "step": 2842
    },
    {
      "epoch": 1.1557227078674528,
      "grad_norm": 0.09188991039991379,
      "learning_rate": 8.451048239364951e-05,
      "loss": 0.8474,
      "step": 2843
    },
    {
      "epoch": 1.1561292945720676,
      "grad_norm": 0.0945422425866127,
      "learning_rate": 8.446977406879706e-05,
      "loss": 0.9277,
      "step": 2844
    },
    {
      "epoch": 1.1565358812766822,
      "grad_norm": 0.09804350137710571,
      "learning_rate": 8.442906574394463e-05,
      "loss": 0.9676,
      "step": 2845
    },
    {
      "epoch": 1.156942467981297,
      "grad_norm": 0.107129767537117,
      "learning_rate": 8.438835741909221e-05,
      "loss": 1.0516,
      "step": 2846
    },
    {
      "epoch": 1.1573490546859118,
      "grad_norm": 0.08845387399196625,
      "learning_rate": 8.434764909423978e-05,
      "loss": 0.8411,
      "step": 2847
    },
    {
      "epoch": 1.1577556413905266,
      "grad_norm": 0.09986454993486404,
      "learning_rate": 8.430694076938735e-05,
      "loss": 0.9952,
      "step": 2848
    },
    {
      "epoch": 1.1581622280951414,
      "grad_norm": 0.10270238667726517,
      "learning_rate": 8.426623244453492e-05,
      "loss": 0.9298,
      "step": 2849
    },
    {
      "epoch": 1.158568814799756,
      "grad_norm": 0.10141734033823013,
      "learning_rate": 8.422552411968247e-05,
      "loss": 0.9347,
      "step": 2850
    },
    {
      "epoch": 1.1589754015043707,
      "grad_norm": 0.1073596179485321,
      "learning_rate": 8.418481579483005e-05,
      "loss": 1.0582,
      "step": 2851
    },
    {
      "epoch": 1.1593819882089855,
      "grad_norm": 0.1060674786567688,
      "learning_rate": 8.414410746997762e-05,
      "loss": 1.1045,
      "step": 2852
    },
    {
      "epoch": 1.1597885749136003,
      "grad_norm": 0.09997183829545975,
      "learning_rate": 8.410339914512519e-05,
      "loss": 1.0096,
      "step": 2853
    },
    {
      "epoch": 1.1601951616182151,
      "grad_norm": 0.10038676112890244,
      "learning_rate": 8.406269082027275e-05,
      "loss": 0.9495,
      "step": 2854
    },
    {
      "epoch": 1.16060174832283,
      "grad_norm": 0.10116416215896606,
      "learning_rate": 8.402198249542032e-05,
      "loss": 0.941,
      "step": 2855
    },
    {
      "epoch": 1.1610083350274447,
      "grad_norm": 0.10599818825721741,
      "learning_rate": 8.398127417056788e-05,
      "loss": 0.977,
      "step": 2856
    },
    {
      "epoch": 1.1614149217320593,
      "grad_norm": 0.10183148086071014,
      "learning_rate": 8.394056584571546e-05,
      "loss": 0.8848,
      "step": 2857
    },
    {
      "epoch": 1.161821508436674,
      "grad_norm": 0.10016648471355438,
      "learning_rate": 8.389985752086302e-05,
      "loss": 0.9616,
      "step": 2858
    },
    {
      "epoch": 1.1622280951412889,
      "grad_norm": 0.0992264375090599,
      "learning_rate": 8.385914919601059e-05,
      "loss": 0.9242,
      "step": 2859
    },
    {
      "epoch": 1.1626346818459037,
      "grad_norm": 0.09841668605804443,
      "learning_rate": 8.381844087115816e-05,
      "loss": 0.8993,
      "step": 2860
    },
    {
      "epoch": 1.1630412685505185,
      "grad_norm": 0.10682433098554611,
      "learning_rate": 8.377773254630571e-05,
      "loss": 1.0783,
      "step": 2861
    },
    {
      "epoch": 1.163447855255133,
      "grad_norm": 0.10249704122543335,
      "learning_rate": 8.373702422145328e-05,
      "loss": 0.9872,
      "step": 2862
    },
    {
      "epoch": 1.1638544419597479,
      "grad_norm": 0.10155528038740158,
      "learning_rate": 8.369631589660086e-05,
      "loss": 0.9609,
      "step": 2863
    },
    {
      "epoch": 1.1642610286643627,
      "grad_norm": 0.10584763437509537,
      "learning_rate": 8.365560757174843e-05,
      "loss": 0.9927,
      "step": 2864
    },
    {
      "epoch": 1.1646676153689774,
      "grad_norm": 0.0969410091638565,
      "learning_rate": 8.3614899246896e-05,
      "loss": 0.9124,
      "step": 2865
    },
    {
      "epoch": 1.1650742020735922,
      "grad_norm": 0.10030529648065567,
      "learning_rate": 8.357419092204357e-05,
      "loss": 0.9888,
      "step": 2866
    },
    {
      "epoch": 1.165480788778207,
      "grad_norm": 0.11542686820030212,
      "learning_rate": 8.353348259719112e-05,
      "loss": 1.0132,
      "step": 2867
    },
    {
      "epoch": 1.1658873754828216,
      "grad_norm": 0.10081325471401215,
      "learning_rate": 8.349277427233869e-05,
      "loss": 0.8826,
      "step": 2868
    },
    {
      "epoch": 1.1662939621874364,
      "grad_norm": 0.10607606172561646,
      "learning_rate": 8.345206594748627e-05,
      "loss": 0.9492,
      "step": 2869
    },
    {
      "epoch": 1.1667005488920512,
      "grad_norm": 0.10742900520563126,
      "learning_rate": 8.341135762263384e-05,
      "loss": 1.0318,
      "step": 2870
    },
    {
      "epoch": 1.167107135596666,
      "grad_norm": 0.09361705929040909,
      "learning_rate": 8.33706492977814e-05,
      "loss": 0.9225,
      "step": 2871
    },
    {
      "epoch": 1.1675137223012808,
      "grad_norm": 0.09765168279409409,
      "learning_rate": 8.332994097292897e-05,
      "loss": 0.9638,
      "step": 2872
    },
    {
      "epoch": 1.1679203090058956,
      "grad_norm": 0.09763183444738388,
      "learning_rate": 8.328923264807653e-05,
      "loss": 0.8912,
      "step": 2873
    },
    {
      "epoch": 1.1683268957105102,
      "grad_norm": 0.10219339281320572,
      "learning_rate": 8.32485243232241e-05,
      "loss": 0.9928,
      "step": 2874
    },
    {
      "epoch": 1.168733482415125,
      "grad_norm": 0.10122732818126678,
      "learning_rate": 8.320781599837167e-05,
      "loss": 0.9395,
      "step": 2875
    },
    {
      "epoch": 1.1691400691197398,
      "grad_norm": 0.10562714189291,
      "learning_rate": 8.316710767351924e-05,
      "loss": 1.0062,
      "step": 2876
    },
    {
      "epoch": 1.1695466558243546,
      "grad_norm": 0.1061634048819542,
      "learning_rate": 8.312639934866681e-05,
      "loss": 0.9781,
      "step": 2877
    },
    {
      "epoch": 1.1699532425289694,
      "grad_norm": 0.09807330369949341,
      "learning_rate": 8.308569102381438e-05,
      "loss": 0.9942,
      "step": 2878
    },
    {
      "epoch": 1.170359829233584,
      "grad_norm": 0.09426051378250122,
      "learning_rate": 8.304498269896193e-05,
      "loss": 0.8829,
      "step": 2879
    },
    {
      "epoch": 1.1707664159381987,
      "grad_norm": 0.10720623284578323,
      "learning_rate": 8.300427437410951e-05,
      "loss": 1.013,
      "step": 2880
    },
    {
      "epoch": 1.1711730026428135,
      "grad_norm": 0.1007690355181694,
      "learning_rate": 8.296356604925708e-05,
      "loss": 0.9141,
      "step": 2881
    },
    {
      "epoch": 1.1715795893474283,
      "grad_norm": 0.09463895857334137,
      "learning_rate": 8.292285772440465e-05,
      "loss": 0.8538,
      "step": 2882
    },
    {
      "epoch": 1.1719861760520431,
      "grad_norm": 0.10601162165403366,
      "learning_rate": 8.288214939955222e-05,
      "loss": 0.9914,
      "step": 2883
    },
    {
      "epoch": 1.172392762756658,
      "grad_norm": 0.10166117548942566,
      "learning_rate": 8.284144107469978e-05,
      "loss": 1.0497,
      "step": 2884
    },
    {
      "epoch": 1.1727993494612727,
      "grad_norm": 0.09678583592176437,
      "learning_rate": 8.280073274984734e-05,
      "loss": 0.877,
      "step": 2885
    },
    {
      "epoch": 1.1732059361658873,
      "grad_norm": 0.10086601227521896,
      "learning_rate": 8.276002442499492e-05,
      "loss": 0.9405,
      "step": 2886
    },
    {
      "epoch": 1.173612522870502,
      "grad_norm": 0.09687767922878265,
      "learning_rate": 8.271931610014249e-05,
      "loss": 0.9439,
      "step": 2887
    },
    {
      "epoch": 1.174019109575117,
      "grad_norm": 0.10354665666818619,
      "learning_rate": 8.267860777529005e-05,
      "loss": 0.9474,
      "step": 2888
    },
    {
      "epoch": 1.1744256962797317,
      "grad_norm": 0.10761476308107376,
      "learning_rate": 8.263789945043762e-05,
      "loss": 0.9987,
      "step": 2889
    },
    {
      "epoch": 1.1748322829843465,
      "grad_norm": 0.10557498037815094,
      "learning_rate": 8.259719112558519e-05,
      "loss": 0.9969,
      "step": 2890
    },
    {
      "epoch": 1.175238869688961,
      "grad_norm": 0.09625912457704544,
      "learning_rate": 8.255648280073274e-05,
      "loss": 0.8682,
      "step": 2891
    },
    {
      "epoch": 1.1756454563935759,
      "grad_norm": 0.10188374668359756,
      "learning_rate": 8.251577447588032e-05,
      "loss": 0.9408,
      "step": 2892
    },
    {
      "epoch": 1.1760520430981907,
      "grad_norm": 0.10539949685335159,
      "learning_rate": 8.247506615102789e-05,
      "loss": 0.9603,
      "step": 2893
    },
    {
      "epoch": 1.1764586298028055,
      "grad_norm": 0.10070807486772537,
      "learning_rate": 8.243435782617546e-05,
      "loss": 0.9721,
      "step": 2894
    },
    {
      "epoch": 1.1768652165074203,
      "grad_norm": 0.10509887337684631,
      "learning_rate": 8.239364950132303e-05,
      "loss": 1.0064,
      "step": 2895
    },
    {
      "epoch": 1.177271803212035,
      "grad_norm": 0.10489141196012497,
      "learning_rate": 8.23529411764706e-05,
      "loss": 0.9902,
      "step": 2896
    },
    {
      "epoch": 1.1776783899166496,
      "grad_norm": 0.1037009060382843,
      "learning_rate": 8.231223285161816e-05,
      "loss": 0.9923,
      "step": 2897
    },
    {
      "epoch": 1.1780849766212644,
      "grad_norm": 0.10252012312412262,
      "learning_rate": 8.227152452676573e-05,
      "loss": 0.9177,
      "step": 2898
    },
    {
      "epoch": 1.1784915633258792,
      "grad_norm": 0.10643766820430756,
      "learning_rate": 8.22308162019133e-05,
      "loss": 0.9861,
      "step": 2899
    },
    {
      "epoch": 1.178898150030494,
      "grad_norm": 0.10174702107906342,
      "learning_rate": 8.219010787706087e-05,
      "loss": 1.0039,
      "step": 2900
    },
    {
      "epoch": 1.1793047367351088,
      "grad_norm": 0.10257185995578766,
      "learning_rate": 8.214939955220843e-05,
      "loss": 1.0292,
      "step": 2901
    },
    {
      "epoch": 1.1797113234397236,
      "grad_norm": 0.09647761285305023,
      "learning_rate": 8.2108691227356e-05,
      "loss": 0.9235,
      "step": 2902
    },
    {
      "epoch": 1.1801179101443382,
      "grad_norm": 0.09710411727428436,
      "learning_rate": 8.206798290250357e-05,
      "loss": 0.8994,
      "step": 2903
    },
    {
      "epoch": 1.180524496848953,
      "grad_norm": 0.10740290582180023,
      "learning_rate": 8.202727457765114e-05,
      "loss": 0.9419,
      "step": 2904
    },
    {
      "epoch": 1.1809310835535678,
      "grad_norm": 0.10176997631788254,
      "learning_rate": 8.19865662527987e-05,
      "loss": 0.9735,
      "step": 2905
    },
    {
      "epoch": 1.1813376702581826,
      "grad_norm": 0.11002610623836517,
      "learning_rate": 8.194585792794627e-05,
      "loss": 1.0246,
      "step": 2906
    },
    {
      "epoch": 1.1817442569627974,
      "grad_norm": 0.09396279603242874,
      "learning_rate": 8.190514960309384e-05,
      "loss": 0.9448,
      "step": 2907
    },
    {
      "epoch": 1.182150843667412,
      "grad_norm": 0.09984367340803146,
      "learning_rate": 8.18644412782414e-05,
      "loss": 0.9305,
      "step": 2908
    },
    {
      "epoch": 1.1825574303720268,
      "grad_norm": 0.10197685658931732,
      "learning_rate": 8.182373295338897e-05,
      "loss": 0.9936,
      "step": 2909
    },
    {
      "epoch": 1.1829640170766416,
      "grad_norm": 0.10787008702754974,
      "learning_rate": 8.178302462853654e-05,
      "loss": 0.9962,
      "step": 2910
    },
    {
      "epoch": 1.1833706037812564,
      "grad_norm": 0.09014932066202164,
      "learning_rate": 8.174231630368411e-05,
      "loss": 0.8241,
      "step": 2911
    },
    {
      "epoch": 1.1837771904858712,
      "grad_norm": 0.10313025861978531,
      "learning_rate": 8.170160797883168e-05,
      "loss": 0.9016,
      "step": 2912
    },
    {
      "epoch": 1.184183777190486,
      "grad_norm": 0.1036885604262352,
      "learning_rate": 8.166089965397924e-05,
      "loss": 0.9661,
      "step": 2913
    },
    {
      "epoch": 1.1845903638951008,
      "grad_norm": 0.09668964147567749,
      "learning_rate": 8.16201913291268e-05,
      "loss": 0.8826,
      "step": 2914
    },
    {
      "epoch": 1.1849969505997153,
      "grad_norm": 0.09810838848352432,
      "learning_rate": 8.157948300427438e-05,
      "loss": 0.8429,
      "step": 2915
    },
    {
      "epoch": 1.1854035373043301,
      "grad_norm": 0.10050015151500702,
      "learning_rate": 8.153877467942195e-05,
      "loss": 0.8683,
      "step": 2916
    },
    {
      "epoch": 1.185810124008945,
      "grad_norm": 0.10292979329824448,
      "learning_rate": 8.149806635456951e-05,
      "loss": 0.9993,
      "step": 2917
    },
    {
      "epoch": 1.1862167107135597,
      "grad_norm": 0.11106216162443161,
      "learning_rate": 8.145735802971708e-05,
      "loss": 1.0484,
      "step": 2918
    },
    {
      "epoch": 1.1866232974181745,
      "grad_norm": 0.10027094185352325,
      "learning_rate": 8.141664970486465e-05,
      "loss": 0.9296,
      "step": 2919
    },
    {
      "epoch": 1.187029884122789,
      "grad_norm": 0.1021319329738617,
      "learning_rate": 8.137594138001222e-05,
      "loss": 0.9502,
      "step": 2920
    },
    {
      "epoch": 1.187436470827404,
      "grad_norm": 0.09963817149400711,
      "learning_rate": 8.133523305515979e-05,
      "loss": 0.9158,
      "step": 2921
    },
    {
      "epoch": 1.1878430575320187,
      "grad_norm": 0.10387451201677322,
      "learning_rate": 8.129452473030735e-05,
      "loss": 0.9993,
      "step": 2922
    },
    {
      "epoch": 1.1882496442366335,
      "grad_norm": 0.09406285732984543,
      "learning_rate": 8.125381640545492e-05,
      "loss": 0.7994,
      "step": 2923
    },
    {
      "epoch": 1.1886562309412483,
      "grad_norm": 0.10746529698371887,
      "learning_rate": 8.121310808060249e-05,
      "loss": 1.0508,
      "step": 2924
    },
    {
      "epoch": 1.1890628176458629,
      "grad_norm": 0.09646695107221603,
      "learning_rate": 8.117239975575006e-05,
      "loss": 0.8983,
      "step": 2925
    },
    {
      "epoch": 1.1894694043504777,
      "grad_norm": 0.10675112158060074,
      "learning_rate": 8.113169143089762e-05,
      "loss": 1.0671,
      "step": 2926
    },
    {
      "epoch": 1.1898759910550925,
      "grad_norm": 0.10293237864971161,
      "learning_rate": 8.109098310604519e-05,
      "loss": 1.0689,
      "step": 2927
    },
    {
      "epoch": 1.1902825777597072,
      "grad_norm": 0.10142801702022552,
      "learning_rate": 8.105027478119276e-05,
      "loss": 1.0164,
      "step": 2928
    },
    {
      "epoch": 1.190689164464322,
      "grad_norm": 0.10416755080223083,
      "learning_rate": 8.100956645634033e-05,
      "loss": 1.0012,
      "step": 2929
    },
    {
      "epoch": 1.1910957511689368,
      "grad_norm": 0.102670818567276,
      "learning_rate": 8.09688581314879e-05,
      "loss": 0.9931,
      "step": 2930
    },
    {
      "epoch": 1.1915023378735516,
      "grad_norm": 0.09856782853603363,
      "learning_rate": 8.092814980663546e-05,
      "loss": 0.9588,
      "step": 2931
    },
    {
      "epoch": 1.1919089245781662,
      "grad_norm": 0.09374082833528519,
      "learning_rate": 8.088744148178303e-05,
      "loss": 0.8243,
      "step": 2932
    },
    {
      "epoch": 1.192315511282781,
      "grad_norm": 0.10136809945106506,
      "learning_rate": 8.08467331569306e-05,
      "loss": 0.907,
      "step": 2933
    },
    {
      "epoch": 1.1927220979873958,
      "grad_norm": 0.09178245067596436,
      "learning_rate": 8.080602483207816e-05,
      "loss": 0.8386,
      "step": 2934
    },
    {
      "epoch": 1.1931286846920106,
      "grad_norm": 0.10176187753677368,
      "learning_rate": 8.076531650722573e-05,
      "loss": 0.991,
      "step": 2935
    },
    {
      "epoch": 1.1935352713966254,
      "grad_norm": 0.11457332223653793,
      "learning_rate": 8.07246081823733e-05,
      "loss": 1.0754,
      "step": 2936
    },
    {
      "epoch": 1.19394185810124,
      "grad_norm": 0.10390684008598328,
      "learning_rate": 8.068389985752087e-05,
      "loss": 0.9519,
      "step": 2937
    },
    {
      "epoch": 1.1943484448058548,
      "grad_norm": 0.09363167732954025,
      "learning_rate": 8.064319153266844e-05,
      "loss": 0.8842,
      "step": 2938
    },
    {
      "epoch": 1.1947550315104696,
      "grad_norm": 0.09722575545310974,
      "learning_rate": 8.0602483207816e-05,
      "loss": 0.9828,
      "step": 2939
    },
    {
      "epoch": 1.1951616182150844,
      "grad_norm": 1.2541481256484985,
      "learning_rate": 8.056177488296357e-05,
      "loss": 1.0317,
      "step": 2940
    },
    {
      "epoch": 1.1955682049196992,
      "grad_norm": 0.10507947951555252,
      "learning_rate": 8.052106655811114e-05,
      "loss": 0.9299,
      "step": 2941
    },
    {
      "epoch": 1.195974791624314,
      "grad_norm": 0.10633766651153564,
      "learning_rate": 8.04803582332587e-05,
      "loss": 0.9335,
      "step": 2942
    },
    {
      "epoch": 1.1963813783289288,
      "grad_norm": 0.11865809559822083,
      "learning_rate": 8.043964990840627e-05,
      "loss": 0.986,
      "step": 2943
    },
    {
      "epoch": 1.1967879650335433,
      "grad_norm": 0.11408359557390213,
      "learning_rate": 8.039894158355384e-05,
      "loss": 0.9278,
      "step": 2944
    },
    {
      "epoch": 1.1971945517381581,
      "grad_norm": 0.117740198969841,
      "learning_rate": 8.035823325870141e-05,
      "loss": 0.9724,
      "step": 2945
    },
    {
      "epoch": 1.197601138442773,
      "grad_norm": 0.12005554139614105,
      "learning_rate": 8.031752493384898e-05,
      "loss": 1.054,
      "step": 2946
    },
    {
      "epoch": 1.1980077251473877,
      "grad_norm": 0.10749775171279907,
      "learning_rate": 8.027681660899654e-05,
      "loss": 0.9919,
      "step": 2947
    },
    {
      "epoch": 1.1984143118520025,
      "grad_norm": 0.110999695956707,
      "learning_rate": 8.023610828414411e-05,
      "loss": 0.9795,
      "step": 2948
    },
    {
      "epoch": 1.198820898556617,
      "grad_norm": 0.09761643409729004,
      "learning_rate": 8.019539995929168e-05,
      "loss": 0.9312,
      "step": 2949
    },
    {
      "epoch": 1.199227485261232,
      "grad_norm": 0.10558291524648666,
      "learning_rate": 8.015469163443925e-05,
      "loss": 0.9006,
      "step": 2950
    },
    {
      "epoch": 1.1996340719658467,
      "grad_norm": 0.10757201164960861,
      "learning_rate": 8.011398330958681e-05,
      "loss": 0.9704,
      "step": 2951
    },
    {
      "epoch": 1.2000406586704615,
      "grad_norm": 0.11688996106386185,
      "learning_rate": 8.007327498473438e-05,
      "loss": 1.0249,
      "step": 2952
    },
    {
      "epoch": 1.2004472453750763,
      "grad_norm": 0.10010217875242233,
      "learning_rate": 8.003256665988195e-05,
      "loss": 0.9378,
      "step": 2953
    },
    {
      "epoch": 1.2008538320796909,
      "grad_norm": 0.10797873884439468,
      "learning_rate": 7.999185833502952e-05,
      "loss": 1.0828,
      "step": 2954
    },
    {
      "epoch": 1.2012604187843057,
      "grad_norm": 0.09149176627397537,
      "learning_rate": 7.995115001017708e-05,
      "loss": 0.871,
      "step": 2955
    },
    {
      "epoch": 1.2016670054889205,
      "grad_norm": 0.10462988913059235,
      "learning_rate": 7.991044168532465e-05,
      "loss": 0.905,
      "step": 2956
    },
    {
      "epoch": 1.2020735921935353,
      "grad_norm": 0.10012760758399963,
      "learning_rate": 7.986973336047222e-05,
      "loss": 0.9237,
      "step": 2957
    },
    {
      "epoch": 1.20248017889815,
      "grad_norm": 0.0970139279961586,
      "learning_rate": 7.982902503561979e-05,
      "loss": 0.8619,
      "step": 2958
    },
    {
      "epoch": 1.2028867656027649,
      "grad_norm": 0.1061381995677948,
      "learning_rate": 7.978831671076736e-05,
      "loss": 0.9103,
      "step": 2959
    },
    {
      "epoch": 1.2032933523073797,
      "grad_norm": 0.09973873943090439,
      "learning_rate": 7.974760838591492e-05,
      "loss": 0.8852,
      "step": 2960
    },
    {
      "epoch": 1.2036999390119942,
      "grad_norm": 0.11318770796060562,
      "learning_rate": 7.970690006106249e-05,
      "loss": 1.1028,
      "step": 2961
    },
    {
      "epoch": 1.204106525716609,
      "grad_norm": 0.09135531634092331,
      "learning_rate": 7.966619173621006e-05,
      "loss": 0.8432,
      "step": 2962
    },
    {
      "epoch": 1.2045131124212238,
      "grad_norm": 0.1008799597620964,
      "learning_rate": 7.962548341135763e-05,
      "loss": 0.9173,
      "step": 2963
    },
    {
      "epoch": 1.2049196991258386,
      "grad_norm": 0.09507846087217331,
      "learning_rate": 7.95847750865052e-05,
      "loss": 0.9164,
      "step": 2964
    },
    {
      "epoch": 1.2053262858304534,
      "grad_norm": 0.10645583271980286,
      "learning_rate": 7.954406676165276e-05,
      "loss": 0.968,
      "step": 2965
    },
    {
      "epoch": 1.205732872535068,
      "grad_norm": 0.09509435296058655,
      "learning_rate": 7.950335843680033e-05,
      "loss": 0.8426,
      "step": 2966
    },
    {
      "epoch": 1.2061394592396828,
      "grad_norm": 0.09644295275211334,
      "learning_rate": 7.94626501119479e-05,
      "loss": 0.9195,
      "step": 2967
    },
    {
      "epoch": 1.2065460459442976,
      "grad_norm": 0.1063341349363327,
      "learning_rate": 7.942194178709546e-05,
      "loss": 0.8805,
      "step": 2968
    },
    {
      "epoch": 1.2069526326489124,
      "grad_norm": 0.1006791740655899,
      "learning_rate": 7.938123346224303e-05,
      "loss": 0.96,
      "step": 2969
    },
    {
      "epoch": 1.2073592193535272,
      "grad_norm": 0.11306698620319366,
      "learning_rate": 7.93405251373906e-05,
      "loss": 1.0238,
      "step": 2970
    },
    {
      "epoch": 1.207765806058142,
      "grad_norm": 0.10371936857700348,
      "learning_rate": 7.929981681253817e-05,
      "loss": 0.9954,
      "step": 2971
    },
    {
      "epoch": 1.2081723927627566,
      "grad_norm": 0.11341479420661926,
      "learning_rate": 7.925910848768573e-05,
      "loss": 1.122,
      "step": 2972
    },
    {
      "epoch": 1.2085789794673714,
      "grad_norm": 0.09975296258926392,
      "learning_rate": 7.92184001628333e-05,
      "loss": 0.8842,
      "step": 2973
    },
    {
      "epoch": 1.2089855661719862,
      "grad_norm": 0.10060261934995651,
      "learning_rate": 7.917769183798087e-05,
      "loss": 0.8922,
      "step": 2974
    },
    {
      "epoch": 1.209392152876601,
      "grad_norm": 0.10362927615642548,
      "learning_rate": 7.913698351312844e-05,
      "loss": 0.9477,
      "step": 2975
    },
    {
      "epoch": 1.2097987395812158,
      "grad_norm": 0.10995787382125854,
      "learning_rate": 7.9096275188276e-05,
      "loss": 0.9736,
      "step": 2976
    },
    {
      "epoch": 1.2102053262858306,
      "grad_norm": 0.10389982908964157,
      "learning_rate": 7.905556686342357e-05,
      "loss": 1.0125,
      "step": 2977
    },
    {
      "epoch": 1.2106119129904451,
      "grad_norm": 0.10749273002147675,
      "learning_rate": 7.901485853857114e-05,
      "loss": 0.973,
      "step": 2978
    },
    {
      "epoch": 1.21101849969506,
      "grad_norm": 0.10813795030117035,
      "learning_rate": 7.897415021371871e-05,
      "loss": 1.0413,
      "step": 2979
    },
    {
      "epoch": 1.2114250863996747,
      "grad_norm": 0.10621776431798935,
      "learning_rate": 7.893344188886628e-05,
      "loss": 0.9746,
      "step": 2980
    },
    {
      "epoch": 1.2118316731042895,
      "grad_norm": 0.09385337680578232,
      "learning_rate": 7.889273356401384e-05,
      "loss": 0.9024,
      "step": 2981
    },
    {
      "epoch": 1.2122382598089043,
      "grad_norm": 0.10254476219415665,
      "learning_rate": 7.885202523916141e-05,
      "loss": 0.956,
      "step": 2982
    },
    {
      "epoch": 1.212644846513519,
      "grad_norm": 0.11078932881355286,
      "learning_rate": 7.881131691430898e-05,
      "loss": 1.0671,
      "step": 2983
    },
    {
      "epoch": 1.2130514332181337,
      "grad_norm": 0.10841862857341766,
      "learning_rate": 7.877060858945655e-05,
      "loss": 0.8946,
      "step": 2984
    },
    {
      "epoch": 1.2134580199227485,
      "grad_norm": 0.09590809792280197,
      "learning_rate": 7.872990026460411e-05,
      "loss": 0.8795,
      "step": 2985
    },
    {
      "epoch": 1.2138646066273633,
      "grad_norm": 0.10805724561214447,
      "learning_rate": 7.868919193975168e-05,
      "loss": 1.096,
      "step": 2986
    },
    {
      "epoch": 1.214271193331978,
      "grad_norm": 0.10256502032279968,
      "learning_rate": 7.864848361489925e-05,
      "loss": 0.9098,
      "step": 2987
    },
    {
      "epoch": 1.2146777800365929,
      "grad_norm": 0.10332726687192917,
      "learning_rate": 7.860777529004682e-05,
      "loss": 0.9683,
      "step": 2988
    },
    {
      "epoch": 1.2150843667412077,
      "grad_norm": 0.10192207992076874,
      "learning_rate": 7.85670669651944e-05,
      "loss": 0.8888,
      "step": 2989
    },
    {
      "epoch": 1.2154909534458223,
      "grad_norm": 0.10659588873386383,
      "learning_rate": 7.852635864034195e-05,
      "loss": 1.0104,
      "step": 2990
    },
    {
      "epoch": 1.215897540150437,
      "grad_norm": 0.11742359399795532,
      "learning_rate": 7.848565031548952e-05,
      "loss": 0.9874,
      "step": 2991
    },
    {
      "epoch": 1.2163041268550518,
      "grad_norm": 0.10197114944458008,
      "learning_rate": 7.844494199063709e-05,
      "loss": 0.9632,
      "step": 2992
    },
    {
      "epoch": 1.2167107135596666,
      "grad_norm": 0.10864005237817764,
      "learning_rate": 7.840423366578466e-05,
      "loss": 1.0323,
      "step": 2993
    },
    {
      "epoch": 1.2171173002642814,
      "grad_norm": 0.09765638411045074,
      "learning_rate": 7.836352534093222e-05,
      "loss": 0.8922,
      "step": 2994
    },
    {
      "epoch": 1.217523886968896,
      "grad_norm": 0.09913370013237,
      "learning_rate": 7.832281701607979e-05,
      "loss": 0.842,
      "step": 2995
    },
    {
      "epoch": 1.2179304736735108,
      "grad_norm": 0.10157128423452377,
      "learning_rate": 7.828210869122736e-05,
      "loss": 0.9314,
      "step": 2996
    },
    {
      "epoch": 1.2183370603781256,
      "grad_norm": 0.11263057589530945,
      "learning_rate": 7.824140036637493e-05,
      "loss": 0.9824,
      "step": 2997
    },
    {
      "epoch": 1.2187436470827404,
      "grad_norm": 0.1071547195315361,
      "learning_rate": 7.82006920415225e-05,
      "loss": 1.0332,
      "step": 2998
    },
    {
      "epoch": 1.2191502337873552,
      "grad_norm": 0.097862608730793,
      "learning_rate": 7.815998371667006e-05,
      "loss": 0.891,
      "step": 2999
    },
    {
      "epoch": 1.21955682049197,
      "grad_norm": 0.100653737783432,
      "learning_rate": 7.811927539181763e-05,
      "loss": 0.8556,
      "step": 3000
    },
    {
      "epoch": 1.2199634071965846,
      "grad_norm": 0.09933151304721832,
      "learning_rate": 7.80785670669652e-05,
      "loss": 0.9655,
      "step": 3001
    },
    {
      "epoch": 1.2203699939011994,
      "grad_norm": 0.09901740401983261,
      "learning_rate": 7.803785874211276e-05,
      "loss": 0.8788,
      "step": 3002
    },
    {
      "epoch": 1.2207765806058142,
      "grad_norm": 0.10724866390228271,
      "learning_rate": 7.799715041726033e-05,
      "loss": 1.0607,
      "step": 3003
    },
    {
      "epoch": 1.221183167310429,
      "grad_norm": 0.10218902677297592,
      "learning_rate": 7.79564420924079e-05,
      "loss": 0.8872,
      "step": 3004
    },
    {
      "epoch": 1.2215897540150438,
      "grad_norm": 0.10645647346973419,
      "learning_rate": 7.791573376755547e-05,
      "loss": 0.9713,
      "step": 3005
    },
    {
      "epoch": 1.2219963407196586,
      "grad_norm": 0.09906148910522461,
      "learning_rate": 7.787502544270303e-05,
      "loss": 0.893,
      "step": 3006
    },
    {
      "epoch": 1.2224029274242731,
      "grad_norm": 0.10134434700012207,
      "learning_rate": 7.78343171178506e-05,
      "loss": 0.9174,
      "step": 3007
    },
    {
      "epoch": 1.222809514128888,
      "grad_norm": 0.09524626284837723,
      "learning_rate": 7.779360879299817e-05,
      "loss": 0.884,
      "step": 3008
    },
    {
      "epoch": 1.2232161008335027,
      "grad_norm": 0.1112762987613678,
      "learning_rate": 7.775290046814574e-05,
      "loss": 0.9625,
      "step": 3009
    },
    {
      "epoch": 1.2236226875381175,
      "grad_norm": 0.10021709650754929,
      "learning_rate": 7.77121921432933e-05,
      "loss": 0.9371,
      "step": 3010
    },
    {
      "epoch": 1.2240292742427323,
      "grad_norm": 0.09872548282146454,
      "learning_rate": 7.767148381844087e-05,
      "loss": 0.9013,
      "step": 3011
    },
    {
      "epoch": 1.224435860947347,
      "grad_norm": 0.10400618612766266,
      "learning_rate": 7.763077549358845e-05,
      "loss": 0.8636,
      "step": 3012
    },
    {
      "epoch": 1.2248424476519617,
      "grad_norm": 0.09894006699323654,
      "learning_rate": 7.759006716873601e-05,
      "loss": 0.8982,
      "step": 3013
    },
    {
      "epoch": 1.2252490343565765,
      "grad_norm": 0.10343599319458008,
      "learning_rate": 7.754935884388358e-05,
      "loss": 0.9316,
      "step": 3014
    },
    {
      "epoch": 1.2256556210611913,
      "grad_norm": 0.10449540615081787,
      "learning_rate": 7.750865051903114e-05,
      "loss": 0.9958,
      "step": 3015
    },
    {
      "epoch": 1.226062207765806,
      "grad_norm": 0.11554834991693497,
      "learning_rate": 7.746794219417871e-05,
      "loss": 1.0841,
      "step": 3016
    },
    {
      "epoch": 1.226468794470421,
      "grad_norm": 0.0996081531047821,
      "learning_rate": 7.742723386932628e-05,
      "loss": 0.8628,
      "step": 3017
    },
    {
      "epoch": 1.2268753811750357,
      "grad_norm": 0.10145995020866394,
      "learning_rate": 7.738652554447386e-05,
      "loss": 0.9285,
      "step": 3018
    },
    {
      "epoch": 1.2272819678796503,
      "grad_norm": 0.10826444625854492,
      "learning_rate": 7.734581721962141e-05,
      "loss": 0.9654,
      "step": 3019
    },
    {
      "epoch": 1.227688554584265,
      "grad_norm": 0.09943236410617828,
      "learning_rate": 7.730510889476898e-05,
      "loss": 0.9524,
      "step": 3020
    },
    {
      "epoch": 1.2280951412888799,
      "grad_norm": 0.10398366302251816,
      "learning_rate": 7.726440056991655e-05,
      "loss": 0.9339,
      "step": 3021
    },
    {
      "epoch": 1.2285017279934947,
      "grad_norm": 0.10858220607042313,
      "learning_rate": 7.722369224506412e-05,
      "loss": 1.0769,
      "step": 3022
    },
    {
      "epoch": 1.2289083146981095,
      "grad_norm": 0.10792049020528793,
      "learning_rate": 7.718298392021168e-05,
      "loss": 1.0769,
      "step": 3023
    },
    {
      "epoch": 1.229314901402724,
      "grad_norm": 0.10450518876314163,
      "learning_rate": 7.714227559535927e-05,
      "loss": 0.9088,
      "step": 3024
    },
    {
      "epoch": 1.2297214881073388,
      "grad_norm": 0.10447126626968384,
      "learning_rate": 7.710156727050682e-05,
      "loss": 0.9865,
      "step": 3025
    },
    {
      "epoch": 1.2301280748119536,
      "grad_norm": 0.1073504388332367,
      "learning_rate": 7.706085894565439e-05,
      "loss": 0.9695,
      "step": 3026
    },
    {
      "epoch": 1.2305346615165684,
      "grad_norm": 0.09741394966840744,
      "learning_rate": 7.702015062080195e-05,
      "loss": 0.9064,
      "step": 3027
    },
    {
      "epoch": 1.2309412482211832,
      "grad_norm": 0.09577346593141556,
      "learning_rate": 7.697944229594952e-05,
      "loss": 0.9028,
      "step": 3028
    },
    {
      "epoch": 1.2313478349257978,
      "grad_norm": 7.139596939086914,
      "learning_rate": 7.693873397109709e-05,
      "loss": 0.9565,
      "step": 3029
    },
    {
      "epoch": 1.2317544216304126,
      "grad_norm": 0.1124730035662651,
      "learning_rate": 7.689802564624467e-05,
      "loss": 1.0396,
      "step": 3030
    },
    {
      "epoch": 1.2321610083350274,
      "grad_norm": 0.10936611145734787,
      "learning_rate": 7.685731732139223e-05,
      "loss": 0.9758,
      "step": 3031
    },
    {
      "epoch": 1.2325675950396422,
      "grad_norm": 0.09854471683502197,
      "learning_rate": 7.681660899653979e-05,
      "loss": 0.9238,
      "step": 3032
    },
    {
      "epoch": 1.232974181744257,
      "grad_norm": 0.10498196631669998,
      "learning_rate": 7.677590067168736e-05,
      "loss": 0.9476,
      "step": 3033
    },
    {
      "epoch": 1.2333807684488718,
      "grad_norm": 0.10828989744186401,
      "learning_rate": 7.673519234683493e-05,
      "loss": 1.1091,
      "step": 3034
    },
    {
      "epoch": 1.2337873551534866,
      "grad_norm": 0.10605454444885254,
      "learning_rate": 7.669448402198251e-05,
      "loss": 0.9726,
      "step": 3035
    },
    {
      "epoch": 1.2341939418581012,
      "grad_norm": 0.10483945906162262,
      "learning_rate": 7.665377569713008e-05,
      "loss": 0.9175,
      "step": 3036
    },
    {
      "epoch": 1.234600528562716,
      "grad_norm": 0.1095857173204422,
      "learning_rate": 7.661306737227763e-05,
      "loss": 0.9373,
      "step": 3037
    },
    {
      "epoch": 1.2350071152673308,
      "grad_norm": 0.1086532399058342,
      "learning_rate": 7.65723590474252e-05,
      "loss": 1.0688,
      "step": 3038
    },
    {
      "epoch": 1.2354137019719456,
      "grad_norm": 0.1058100163936615,
      "learning_rate": 7.653165072257277e-05,
      "loss": 0.9784,
      "step": 3039
    },
    {
      "epoch": 1.2358202886765604,
      "grad_norm": 0.10250196605920792,
      "learning_rate": 7.649094239772033e-05,
      "loss": 0.9483,
      "step": 3040
    },
    {
      "epoch": 1.236226875381175,
      "grad_norm": 0.10203064978122711,
      "learning_rate": 7.645023407286792e-05,
      "loss": 0.9149,
      "step": 3041
    },
    {
      "epoch": 1.2366334620857897,
      "grad_norm": 0.10342703759670258,
      "learning_rate": 7.640952574801547e-05,
      "loss": 1.0001,
      "step": 3042
    },
    {
      "epoch": 1.2370400487904045,
      "grad_norm": 0.10385413467884064,
      "learning_rate": 7.636881742316304e-05,
      "loss": 0.9545,
      "step": 3043
    },
    {
      "epoch": 1.2374466354950193,
      "grad_norm": 0.11165875196456909,
      "learning_rate": 7.63281090983106e-05,
      "loss": 1.0679,
      "step": 3044
    },
    {
      "epoch": 1.2378532221996341,
      "grad_norm": 0.09251503646373749,
      "learning_rate": 7.628740077345817e-05,
      "loss": 0.8941,
      "step": 3045
    },
    {
      "epoch": 1.238259808904249,
      "grad_norm": 0.1017691120505333,
      "learning_rate": 7.624669244860574e-05,
      "loss": 0.9473,
      "step": 3046
    },
    {
      "epoch": 1.2386663956088637,
      "grad_norm": 0.11081571877002716,
      "learning_rate": 7.620598412375332e-05,
      "loss": 0.9368,
      "step": 3047
    },
    {
      "epoch": 1.2390729823134783,
      "grad_norm": 0.09933064877986908,
      "learning_rate": 7.616527579890087e-05,
      "loss": 0.96,
      "step": 3048
    },
    {
      "epoch": 1.239479569018093,
      "grad_norm": 0.10422008484601974,
      "learning_rate": 7.612456747404844e-05,
      "loss": 0.9896,
      "step": 3049
    },
    {
      "epoch": 1.2398861557227079,
      "grad_norm": 0.10104691237211227,
      "learning_rate": 7.608385914919601e-05,
      "loss": 0.9302,
      "step": 3050
    },
    {
      "epoch": 1.2402927424273227,
      "grad_norm": 0.10157372057437897,
      "learning_rate": 7.604315082434358e-05,
      "loss": 0.9781,
      "step": 3051
    },
    {
      "epoch": 1.2406993291319375,
      "grad_norm": 0.1113799512386322,
      "learning_rate": 7.600244249949115e-05,
      "loss": 0.9526,
      "step": 3052
    },
    {
      "epoch": 1.241105915836552,
      "grad_norm": 0.09875572472810745,
      "learning_rate": 7.596173417463873e-05,
      "loss": 0.9739,
      "step": 3053
    },
    {
      "epoch": 1.2415125025411669,
      "grad_norm": 0.09874456375837326,
      "learning_rate": 7.592102584978628e-05,
      "loss": 0.8575,
      "step": 3054
    },
    {
      "epoch": 1.2419190892457816,
      "grad_norm": 0.10408841073513031,
      "learning_rate": 7.588031752493385e-05,
      "loss": 0.9692,
      "step": 3055
    },
    {
      "epoch": 1.2423256759503964,
      "grad_norm": 0.10415156930685043,
      "learning_rate": 7.583960920008142e-05,
      "loss": 0.9416,
      "step": 3056
    },
    {
      "epoch": 1.2427322626550112,
      "grad_norm": 0.08860078454017639,
      "learning_rate": 7.579890087522898e-05,
      "loss": 0.8252,
      "step": 3057
    },
    {
      "epoch": 1.2431388493596258,
      "grad_norm": 0.10806316137313843,
      "learning_rate": 7.575819255037656e-05,
      "loss": 1.0561,
      "step": 3058
    },
    {
      "epoch": 1.2435454360642406,
      "grad_norm": 0.10151507705450058,
      "learning_rate": 7.571748422552413e-05,
      "loss": 1.0323,
      "step": 3059
    },
    {
      "epoch": 1.2439520227688554,
      "grad_norm": 0.10553670674562454,
      "learning_rate": 7.567677590067169e-05,
      "loss": 0.9316,
      "step": 3060
    },
    {
      "epoch": 1.2443586094734702,
      "grad_norm": 0.10227076709270477,
      "learning_rate": 7.563606757581925e-05,
      "loss": 0.9502,
      "step": 3061
    },
    {
      "epoch": 1.244765196178085,
      "grad_norm": 0.09627656638622284,
      "learning_rate": 7.559535925096682e-05,
      "loss": 0.8739,
      "step": 3062
    },
    {
      "epoch": 1.2451717828826998,
      "grad_norm": 0.10383637249469757,
      "learning_rate": 7.555465092611439e-05,
      "loss": 0.9143,
      "step": 3063
    },
    {
      "epoch": 1.2455783695873146,
      "grad_norm": 0.09451835602521896,
      "learning_rate": 7.551394260126197e-05,
      "loss": 0.876,
      "step": 3064
    },
    {
      "epoch": 1.2459849562919292,
      "grad_norm": 0.09634227305650711,
      "learning_rate": 7.547323427640954e-05,
      "loss": 0.9188,
      "step": 3065
    },
    {
      "epoch": 1.246391542996544,
      "grad_norm": 0.10271312296390533,
      "learning_rate": 7.543252595155709e-05,
      "loss": 0.8986,
      "step": 3066
    },
    {
      "epoch": 1.2467981297011588,
      "grad_norm": 0.10047610104084015,
      "learning_rate": 7.539181762670466e-05,
      "loss": 0.9865,
      "step": 3067
    },
    {
      "epoch": 1.2472047164057736,
      "grad_norm": 0.09453471750020981,
      "learning_rate": 7.535110930185223e-05,
      "loss": 0.8758,
      "step": 3068
    },
    {
      "epoch": 1.2476113031103884,
      "grad_norm": 0.10748513042926788,
      "learning_rate": 7.53104009769998e-05,
      "loss": 0.9987,
      "step": 3069
    },
    {
      "epoch": 1.248017889815003,
      "grad_norm": 0.11174870282411575,
      "learning_rate": 7.526969265214738e-05,
      "loss": 0.9974,
      "step": 3070
    },
    {
      "epoch": 1.2484244765196177,
      "grad_norm": 0.10034792870283127,
      "learning_rate": 7.522898432729494e-05,
      "loss": 0.8953,
      "step": 3071
    },
    {
      "epoch": 1.2488310632242325,
      "grad_norm": 0.10158214718103409,
      "learning_rate": 7.51882760024425e-05,
      "loss": 0.9504,
      "step": 3072
    },
    {
      "epoch": 1.2492376499288473,
      "grad_norm": 0.10856463760137558,
      "learning_rate": 7.514756767759007e-05,
      "loss": 0.9936,
      "step": 3073
    },
    {
      "epoch": 1.2496442366334621,
      "grad_norm": 0.09521564841270447,
      "learning_rate": 7.510685935273763e-05,
      "loss": 0.8602,
      "step": 3074
    },
    {
      "epoch": 1.2500508233380767,
      "grad_norm": 0.1103881299495697,
      "learning_rate": 7.50661510278852e-05,
      "loss": 0.949,
      "step": 3075
    },
    {
      "epoch": 1.2504574100426917,
      "grad_norm": 0.10218459367752075,
      "learning_rate": 7.502544270303278e-05,
      "loss": 0.9122,
      "step": 3076
    },
    {
      "epoch": 1.2508639967473063,
      "grad_norm": 0.1006489172577858,
      "learning_rate": 7.498473437818035e-05,
      "loss": 0.9942,
      "step": 3077
    },
    {
      "epoch": 1.251270583451921,
      "grad_norm": 0.09525283426046371,
      "learning_rate": 7.49440260533279e-05,
      "loss": 0.8852,
      "step": 3078
    },
    {
      "epoch": 1.251677170156536,
      "grad_norm": 0.10537436604499817,
      "learning_rate": 7.490331772847547e-05,
      "loss": 1.0135,
      "step": 3079
    },
    {
      "epoch": 1.2520837568611507,
      "grad_norm": 0.10263707488775253,
      "learning_rate": 7.486260940362304e-05,
      "loss": 1.0011,
      "step": 3080
    },
    {
      "epoch": 1.2524903435657655,
      "grad_norm": 0.10623662173748016,
      "learning_rate": 7.482190107877062e-05,
      "loss": 1.0535,
      "step": 3081
    },
    {
      "epoch": 1.25289693027038,
      "grad_norm": 0.09408336877822876,
      "learning_rate": 7.478119275391819e-05,
      "loss": 0.8362,
      "step": 3082
    },
    {
      "epoch": 1.2533035169749949,
      "grad_norm": 0.0979636088013649,
      "learning_rate": 7.474048442906576e-05,
      "loss": 0.9462,
      "step": 3083
    },
    {
      "epoch": 1.2537101036796097,
      "grad_norm": 0.10067994147539139,
      "learning_rate": 7.469977610421331e-05,
      "loss": 0.9655,
      "step": 3084
    },
    {
      "epoch": 1.2541166903842245,
      "grad_norm": 0.10560835152864456,
      "learning_rate": 7.465906777936088e-05,
      "loss": 1.0768,
      "step": 3085
    },
    {
      "epoch": 1.2545232770888393,
      "grad_norm": 0.09928199648857117,
      "learning_rate": 7.461835945450845e-05,
      "loss": 0.93,
      "step": 3086
    },
    {
      "epoch": 1.2549298637934538,
      "grad_norm": 0.10560108721256256,
      "learning_rate": 7.457765112965603e-05,
      "loss": 0.9143,
      "step": 3087
    },
    {
      "epoch": 1.2553364504980686,
      "grad_norm": 0.10322803258895874,
      "learning_rate": 7.45369428048036e-05,
      "loss": 0.9063,
      "step": 3088
    },
    {
      "epoch": 1.2557430372026834,
      "grad_norm": 0.10367201268672943,
      "learning_rate": 7.449623447995115e-05,
      "loss": 0.9393,
      "step": 3089
    },
    {
      "epoch": 1.2561496239072982,
      "grad_norm": 0.0985729992389679,
      "learning_rate": 7.445552615509872e-05,
      "loss": 0.9015,
      "step": 3090
    },
    {
      "epoch": 1.256556210611913,
      "grad_norm": 0.09679027646780014,
      "learning_rate": 7.441481783024628e-05,
      "loss": 0.9666,
      "step": 3091
    },
    {
      "epoch": 1.2569627973165278,
      "grad_norm": 0.10759008675813675,
      "learning_rate": 7.437410950539385e-05,
      "loss": 1.0382,
      "step": 3092
    },
    {
      "epoch": 1.2573693840211426,
      "grad_norm": 0.10421041399240494,
      "learning_rate": 7.433340118054143e-05,
      "loss": 0.9283,
      "step": 3093
    },
    {
      "epoch": 1.2577759707257572,
      "grad_norm": 0.10084979981184006,
      "learning_rate": 7.4292692855689e-05,
      "loss": 0.9368,
      "step": 3094
    },
    {
      "epoch": 1.258182557430372,
      "grad_norm": 0.09285192936658859,
      "learning_rate": 7.425198453083655e-05,
      "loss": 0.8245,
      "step": 3095
    },
    {
      "epoch": 1.2585891441349868,
      "grad_norm": 0.10228876024484634,
      "learning_rate": 7.421127620598412e-05,
      "loss": 0.9542,
      "step": 3096
    },
    {
      "epoch": 1.2589957308396016,
      "grad_norm": 0.11151353269815445,
      "learning_rate": 7.417056788113169e-05,
      "loss": 1.0976,
      "step": 3097
    },
    {
      "epoch": 1.2594023175442164,
      "grad_norm": 0.10276535898447037,
      "learning_rate": 7.412985955627926e-05,
      "loss": 0.9056,
      "step": 3098
    },
    {
      "epoch": 1.259808904248831,
      "grad_norm": 0.09734170138835907,
      "learning_rate": 7.408915123142684e-05,
      "loss": 0.9009,
      "step": 3099
    },
    {
      "epoch": 1.2602154909534458,
      "grad_norm": 0.09822948276996613,
      "learning_rate": 7.40484429065744e-05,
      "loss": 1.0048,
      "step": 3100
    },
    {
      "epoch": 1.2606220776580606,
      "grad_norm": 0.10522522032260895,
      "learning_rate": 7.400773458172196e-05,
      "loss": 1.043,
      "step": 3101
    },
    {
      "epoch": 1.2610286643626754,
      "grad_norm": 0.10531225055456161,
      "learning_rate": 7.396702625686953e-05,
      "loss": 1.0161,
      "step": 3102
    },
    {
      "epoch": 1.2614352510672902,
      "grad_norm": 0.08887706696987152,
      "learning_rate": 7.39263179320171e-05,
      "loss": 0.7876,
      "step": 3103
    },
    {
      "epoch": 1.2618418377719047,
      "grad_norm": 0.10563154518604279,
      "learning_rate": 7.388560960716468e-05,
      "loss": 0.952,
      "step": 3104
    },
    {
      "epoch": 1.2622484244765197,
      "grad_norm": 0.09857525676488876,
      "learning_rate": 7.384490128231224e-05,
      "loss": 1.0119,
      "step": 3105
    },
    {
      "epoch": 1.2626550111811343,
      "grad_norm": 0.10056712478399277,
      "learning_rate": 7.380419295745981e-05,
      "loss": 0.9256,
      "step": 3106
    },
    {
      "epoch": 1.2630615978857491,
      "grad_norm": 0.11001981049776077,
      "learning_rate": 7.376348463260737e-05,
      "loss": 1.0066,
      "step": 3107
    },
    {
      "epoch": 1.263468184590364,
      "grad_norm": 0.10128811001777649,
      "learning_rate": 7.372277630775493e-05,
      "loss": 1.0057,
      "step": 3108
    },
    {
      "epoch": 1.2638747712949787,
      "grad_norm": 0.08895913511514664,
      "learning_rate": 7.36820679829025e-05,
      "loss": 0.8264,
      "step": 3109
    },
    {
      "epoch": 1.2642813579995935,
      "grad_norm": 0.10075806081295013,
      "learning_rate": 7.364135965805008e-05,
      "loss": 0.9723,
      "step": 3110
    },
    {
      "epoch": 1.264687944704208,
      "grad_norm": 0.1001645028591156,
      "learning_rate": 7.360065133319765e-05,
      "loss": 0.9413,
      "step": 3111
    },
    {
      "epoch": 1.2650945314088229,
      "grad_norm": 0.10511557012796402,
      "learning_rate": 7.355994300834522e-05,
      "loss": 0.9366,
      "step": 3112
    },
    {
      "epoch": 1.2655011181134377,
      "grad_norm": 0.09894498437643051,
      "learning_rate": 7.351923468349277e-05,
      "loss": 0.8659,
      "step": 3113
    },
    {
      "epoch": 1.2659077048180525,
      "grad_norm": 0.10577372461557388,
      "learning_rate": 7.347852635864034e-05,
      "loss": 0.9615,
      "step": 3114
    },
    {
      "epoch": 1.2663142915226673,
      "grad_norm": 0.10764329135417938,
      "learning_rate": 7.34378180337879e-05,
      "loss": 1.031,
      "step": 3115
    },
    {
      "epoch": 1.2667208782272819,
      "grad_norm": 0.09749144315719604,
      "learning_rate": 7.339710970893549e-05,
      "loss": 0.895,
      "step": 3116
    },
    {
      "epoch": 1.2671274649318967,
      "grad_norm": 0.09897952526807785,
      "learning_rate": 7.335640138408306e-05,
      "loss": 0.882,
      "step": 3117
    },
    {
      "epoch": 1.2675340516365114,
      "grad_norm": 0.11318530142307281,
      "learning_rate": 7.331569305923062e-05,
      "loss": 1.0453,
      "step": 3118
    },
    {
      "epoch": 1.2679406383411262,
      "grad_norm": 0.09759360551834106,
      "learning_rate": 7.327498473437818e-05,
      "loss": 0.8508,
      "step": 3119
    },
    {
      "epoch": 1.268347225045741,
      "grad_norm": 0.10094036906957626,
      "learning_rate": 7.323427640952574e-05,
      "loss": 1.0193,
      "step": 3120
    },
    {
      "epoch": 1.2687538117503558,
      "grad_norm": 0.11087031662464142,
      "learning_rate": 7.319356808467331e-05,
      "loss": 1.0469,
      "step": 3121
    },
    {
      "epoch": 1.2691603984549706,
      "grad_norm": 0.10733988881111145,
      "learning_rate": 7.31528597598209e-05,
      "loss": 0.9555,
      "step": 3122
    },
    {
      "epoch": 1.2695669851595852,
      "grad_norm": 0.10932072252035141,
      "learning_rate": 7.311215143496846e-05,
      "loss": 0.9682,
      "step": 3123
    },
    {
      "epoch": 1.2699735718642,
      "grad_norm": 0.09542959183454514,
      "learning_rate": 7.307144311011603e-05,
      "loss": 0.9259,
      "step": 3124
    },
    {
      "epoch": 1.2703801585688148,
      "grad_norm": 0.10774809122085571,
      "learning_rate": 7.303073478526358e-05,
      "loss": 0.9209,
      "step": 3125
    },
    {
      "epoch": 1.2707867452734296,
      "grad_norm": 0.09668859839439392,
      "learning_rate": 7.299002646041115e-05,
      "loss": 0.9183,
      "step": 3126
    },
    {
      "epoch": 1.2711933319780444,
      "grad_norm": 0.10144450515508652,
      "learning_rate": 7.294931813555873e-05,
      "loss": 0.8618,
      "step": 3127
    },
    {
      "epoch": 1.271599918682659,
      "grad_norm": 0.09714296460151672,
      "learning_rate": 7.29086098107063e-05,
      "loss": 0.9043,
      "step": 3128
    },
    {
      "epoch": 1.2720065053872738,
      "grad_norm": 0.10313305258750916,
      "learning_rate": 7.286790148585387e-05,
      "loss": 0.9729,
      "step": 3129
    },
    {
      "epoch": 1.2724130920918886,
      "grad_norm": 0.09867827594280243,
      "learning_rate": 7.282719316100143e-05,
      "loss": 0.8913,
      "step": 3130
    },
    {
      "epoch": 1.2728196787965034,
      "grad_norm": 0.09492600709199905,
      "learning_rate": 7.278648483614899e-05,
      "loss": 0.8386,
      "step": 3131
    },
    {
      "epoch": 1.2732262655011182,
      "grad_norm": 0.10441063344478607,
      "learning_rate": 7.274577651129656e-05,
      "loss": 0.946,
      "step": 3132
    },
    {
      "epoch": 1.2736328522057327,
      "grad_norm": 0.10944189876317978,
      "learning_rate": 7.270506818644414e-05,
      "loss": 0.9703,
      "step": 3133
    },
    {
      "epoch": 1.2740394389103478,
      "grad_norm": 0.09535184502601624,
      "learning_rate": 7.26643598615917e-05,
      "loss": 0.8566,
      "step": 3134
    },
    {
      "epoch": 1.2744460256149623,
      "grad_norm": 0.10626508295536041,
      "learning_rate": 7.262365153673927e-05,
      "loss": 0.9717,
      "step": 3135
    },
    {
      "epoch": 1.2748526123195771,
      "grad_norm": 0.10682171583175659,
      "learning_rate": 7.258294321188683e-05,
      "loss": 0.9931,
      "step": 3136
    },
    {
      "epoch": 1.275259199024192,
      "grad_norm": 0.10641193389892578,
      "learning_rate": 7.25422348870344e-05,
      "loss": 0.9644,
      "step": 3137
    },
    {
      "epoch": 1.2756657857288067,
      "grad_norm": 0.10564321279525757,
      "learning_rate": 7.250152656218196e-05,
      "loss": 0.9756,
      "step": 3138
    },
    {
      "epoch": 1.2760723724334215,
      "grad_norm": 0.10609064251184464,
      "learning_rate": 7.246081823732954e-05,
      "loss": 0.9403,
      "step": 3139
    },
    {
      "epoch": 1.276478959138036,
      "grad_norm": 0.10627634078264236,
      "learning_rate": 7.242010991247711e-05,
      "loss": 1.0142,
      "step": 3140
    },
    {
      "epoch": 1.276885545842651,
      "grad_norm": 0.09416361898183823,
      "learning_rate": 7.237940158762468e-05,
      "loss": 0.8662,
      "step": 3141
    },
    {
      "epoch": 1.2772921325472657,
      "grad_norm": 0.0893898606300354,
      "learning_rate": 7.233869326277223e-05,
      "loss": 0.8637,
      "step": 3142
    },
    {
      "epoch": 1.2776987192518805,
      "grad_norm": 0.09307090193033218,
      "learning_rate": 7.22979849379198e-05,
      "loss": 0.8181,
      "step": 3143
    },
    {
      "epoch": 1.2781053059564953,
      "grad_norm": 0.10833927989006042,
      "learning_rate": 7.225727661306737e-05,
      "loss": 1.018,
      "step": 3144
    },
    {
      "epoch": 1.2785118926611099,
      "grad_norm": 0.10201290994882584,
      "learning_rate": 7.221656828821495e-05,
      "loss": 0.975,
      "step": 3145
    },
    {
      "epoch": 1.2789184793657247,
      "grad_norm": 0.1058439314365387,
      "learning_rate": 7.217585996336252e-05,
      "loss": 0.9321,
      "step": 3146
    },
    {
      "epoch": 1.2793250660703395,
      "grad_norm": 0.10530184209346771,
      "learning_rate": 7.213515163851008e-05,
      "loss": 1.0229,
      "step": 3147
    },
    {
      "epoch": 1.2797316527749543,
      "grad_norm": 0.10850049555301666,
      "learning_rate": 7.209444331365764e-05,
      "loss": 0.9531,
      "step": 3148
    },
    {
      "epoch": 1.280138239479569,
      "grad_norm": 0.09624800086021423,
      "learning_rate": 7.20537349888052e-05,
      "loss": 0.8507,
      "step": 3149
    },
    {
      "epoch": 1.2805448261841839,
      "grad_norm": 0.1027805283665657,
      "learning_rate": 7.201302666395279e-05,
      "loss": 0.8931,
      "step": 3150
    },
    {
      "epoch": 1.2809514128887987,
      "grad_norm": 0.09510186314582825,
      "learning_rate": 7.197231833910035e-05,
      "loss": 0.8505,
      "step": 3151
    },
    {
      "epoch": 1.2813579995934132,
      "grad_norm": 0.09477930516004562,
      "learning_rate": 7.193161001424792e-05,
      "loss": 0.8898,
      "step": 3152
    },
    {
      "epoch": 1.281764586298028,
      "grad_norm": 0.10591868311166763,
      "learning_rate": 7.189090168939549e-05,
      "loss": 1.0329,
      "step": 3153
    },
    {
      "epoch": 1.2821711730026428,
      "grad_norm": 0.10289102792739868,
      "learning_rate": 7.185019336454304e-05,
      "loss": 0.9094,
      "step": 3154
    },
    {
      "epoch": 1.2825777597072576,
      "grad_norm": 0.09908158332109451,
      "learning_rate": 7.180948503969061e-05,
      "loss": 0.9129,
      "step": 3155
    },
    {
      "epoch": 1.2829843464118724,
      "grad_norm": 0.1069243773818016,
      "learning_rate": 7.176877671483819e-05,
      "loss": 1.0386,
      "step": 3156
    },
    {
      "epoch": 1.283390933116487,
      "grad_norm": 0.09986919909715652,
      "learning_rate": 7.172806838998576e-05,
      "loss": 0.9309,
      "step": 3157
    },
    {
      "epoch": 1.2837975198211018,
      "grad_norm": 0.11003892868757248,
      "learning_rate": 7.168736006513333e-05,
      "loss": 1.0736,
      "step": 3158
    },
    {
      "epoch": 1.2842041065257166,
      "grad_norm": 0.09842909872531891,
      "learning_rate": 7.16466517402809e-05,
      "loss": 0.8691,
      "step": 3159
    },
    {
      "epoch": 1.2846106932303314,
      "grad_norm": 0.10040794312953949,
      "learning_rate": 7.160594341542845e-05,
      "loss": 1.0088,
      "step": 3160
    },
    {
      "epoch": 1.2850172799349462,
      "grad_norm": 0.10622259974479675,
      "learning_rate": 7.156523509057602e-05,
      "loss": 0.9163,
      "step": 3161
    },
    {
      "epoch": 1.2854238666395608,
      "grad_norm": 0.10022327303886414,
      "learning_rate": 7.15245267657236e-05,
      "loss": 0.8016,
      "step": 3162
    },
    {
      "epoch": 1.2858304533441758,
      "grad_norm": 0.09203210473060608,
      "learning_rate": 7.148381844087117e-05,
      "loss": 0.872,
      "step": 3163
    },
    {
      "epoch": 1.2862370400487904,
      "grad_norm": 0.10571742057800293,
      "learning_rate": 7.144311011601873e-05,
      "loss": 0.9389,
      "step": 3164
    },
    {
      "epoch": 1.2866436267534052,
      "grad_norm": 0.09963870793581009,
      "learning_rate": 7.14024017911663e-05,
      "loss": 0.8696,
      "step": 3165
    },
    {
      "epoch": 1.28705021345802,
      "grad_norm": 0.11249116063117981,
      "learning_rate": 7.136169346631386e-05,
      "loss": 1.0006,
      "step": 3166
    },
    {
      "epoch": 1.2874568001626348,
      "grad_norm": 0.10685020685195923,
      "learning_rate": 7.132098514146142e-05,
      "loss": 0.9818,
      "step": 3167
    },
    {
      "epoch": 1.2878633868672495,
      "grad_norm": 0.10361088067293167,
      "learning_rate": 7.1280276816609e-05,
      "loss": 0.9227,
      "step": 3168
    },
    {
      "epoch": 1.2882699735718641,
      "grad_norm": 0.103251151740551,
      "learning_rate": 7.123956849175657e-05,
      "loss": 0.9239,
      "step": 3169
    },
    {
      "epoch": 1.288676560276479,
      "grad_norm": 0.10526666045188904,
      "learning_rate": 7.119886016690414e-05,
      "loss": 0.9133,
      "step": 3170
    },
    {
      "epoch": 1.2890831469810937,
      "grad_norm": 0.1041581928730011,
      "learning_rate": 7.115815184205171e-05,
      "loss": 1.0211,
      "step": 3171
    },
    {
      "epoch": 1.2894897336857085,
      "grad_norm": 0.1083633154630661,
      "learning_rate": 7.111744351719926e-05,
      "loss": 0.8242,
      "step": 3172
    },
    {
      "epoch": 1.2898963203903233,
      "grad_norm": 0.10779697448015213,
      "learning_rate": 7.107673519234684e-05,
      "loss": 0.995,
      "step": 3173
    },
    {
      "epoch": 1.290302907094938,
      "grad_norm": 0.11221274733543396,
      "learning_rate": 7.103602686749441e-05,
      "loss": 1.0821,
      "step": 3174
    },
    {
      "epoch": 1.2907094937995527,
      "grad_norm": 0.0996582880616188,
      "learning_rate": 7.099531854264198e-05,
      "loss": 0.9197,
      "step": 3175
    },
    {
      "epoch": 1.2911160805041675,
      "grad_norm": 0.10174833238124847,
      "learning_rate": 7.095461021778955e-05,
      "loss": 0.911,
      "step": 3176
    },
    {
      "epoch": 1.2915226672087823,
      "grad_norm": 0.09850712865591049,
      "learning_rate": 7.091390189293711e-05,
      "loss": 0.9195,
      "step": 3177
    },
    {
      "epoch": 1.291929253913397,
      "grad_norm": 0.1044590100646019,
      "learning_rate": 7.087319356808467e-05,
      "loss": 0.9239,
      "step": 3178
    },
    {
      "epoch": 1.2923358406180117,
      "grad_norm": 0.10324393957853317,
      "learning_rate": 7.083248524323225e-05,
      "loss": 0.9766,
      "step": 3179
    },
    {
      "epoch": 1.2927424273226267,
      "grad_norm": 0.10219740122556686,
      "learning_rate": 7.079177691837982e-05,
      "loss": 0.8427,
      "step": 3180
    },
    {
      "epoch": 1.2931490140272413,
      "grad_norm": 0.09563204646110535,
      "learning_rate": 7.075106859352738e-05,
      "loss": 0.8523,
      "step": 3181
    },
    {
      "epoch": 1.293555600731856,
      "grad_norm": 0.10874439030885696,
      "learning_rate": 7.071036026867495e-05,
      "loss": 1.0068,
      "step": 3182
    },
    {
      "epoch": 1.2939621874364708,
      "grad_norm": 0.10866596549749374,
      "learning_rate": 7.066965194382252e-05,
      "loss": 0.97,
      "step": 3183
    },
    {
      "epoch": 1.2943687741410856,
      "grad_norm": 0.1108192652463913,
      "learning_rate": 7.062894361897007e-05,
      "loss": 1.0094,
      "step": 3184
    },
    {
      "epoch": 1.2947753608457004,
      "grad_norm": 0.10688415169715881,
      "learning_rate": 7.058823529411765e-05,
      "loss": 0.9784,
      "step": 3185
    },
    {
      "epoch": 1.295181947550315,
      "grad_norm": 0.11468330025672913,
      "learning_rate": 7.054752696926522e-05,
      "loss": 1.0524,
      "step": 3186
    },
    {
      "epoch": 1.2955885342549298,
      "grad_norm": 0.09452933818101883,
      "learning_rate": 7.050681864441279e-05,
      "loss": 0.84,
      "step": 3187
    },
    {
      "epoch": 1.2959951209595446,
      "grad_norm": 0.10682807862758636,
      "learning_rate": 7.046611031956036e-05,
      "loss": 0.9884,
      "step": 3188
    },
    {
      "epoch": 1.2964017076641594,
      "grad_norm": 0.09095878899097443,
      "learning_rate": 7.042540199470791e-05,
      "loss": 0.8412,
      "step": 3189
    },
    {
      "epoch": 1.2968082943687742,
      "grad_norm": 0.09463642537593842,
      "learning_rate": 7.038469366985548e-05,
      "loss": 0.8523,
      "step": 3190
    },
    {
      "epoch": 1.2972148810733888,
      "grad_norm": 0.10467414557933807,
      "learning_rate": 7.034398534500306e-05,
      "loss": 0.946,
      "step": 3191
    },
    {
      "epoch": 1.2976214677780036,
      "grad_norm": 0.09827042371034622,
      "learning_rate": 7.030327702015063e-05,
      "loss": 0.8918,
      "step": 3192
    },
    {
      "epoch": 1.2980280544826184,
      "grad_norm": 0.10762523114681244,
      "learning_rate": 7.02625686952982e-05,
      "loss": 0.8899,
      "step": 3193
    },
    {
      "epoch": 1.2984346411872332,
      "grad_norm": 0.10172978043556213,
      "learning_rate": 7.022186037044576e-05,
      "loss": 1.0034,
      "step": 3194
    },
    {
      "epoch": 1.298841227891848,
      "grad_norm": 0.10815691202878952,
      "learning_rate": 7.018115204559332e-05,
      "loss": 1.067,
      "step": 3195
    },
    {
      "epoch": 1.2992478145964628,
      "grad_norm": 0.10719442367553711,
      "learning_rate": 7.01404437207409e-05,
      "loss": 1.1058,
      "step": 3196
    },
    {
      "epoch": 1.2996544013010776,
      "grad_norm": 0.1034155860543251,
      "learning_rate": 7.009973539588847e-05,
      "loss": 1.0444,
      "step": 3197
    },
    {
      "epoch": 1.3000609880056921,
      "grad_norm": 0.10380623489618301,
      "learning_rate": 7.005902707103603e-05,
      "loss": 0.9943,
      "step": 3198
    },
    {
      "epoch": 1.300467574710307,
      "grad_norm": 0.1016862615942955,
      "learning_rate": 7.00183187461836e-05,
      "loss": 0.8967,
      "step": 3199
    },
    {
      "epoch": 1.3008741614149217,
      "grad_norm": 0.0974292978644371,
      "learning_rate": 6.997761042133117e-05,
      "loss": 0.9727,
      "step": 3200
    },
    {
      "epoch": 1.3012807481195365,
      "grad_norm": 0.10616238415241241,
      "learning_rate": 6.993690209647872e-05,
      "loss": 0.942,
      "step": 3201
    },
    {
      "epoch": 1.3016873348241513,
      "grad_norm": 0.10380051285028458,
      "learning_rate": 6.98961937716263e-05,
      "loss": 0.919,
      "step": 3202
    },
    {
      "epoch": 1.302093921528766,
      "grad_norm": 0.10846268385648727,
      "learning_rate": 6.985548544677387e-05,
      "loss": 1.0066,
      "step": 3203
    },
    {
      "epoch": 1.3025005082333807,
      "grad_norm": 0.09922856092453003,
      "learning_rate": 6.981477712192144e-05,
      "loss": 0.9161,
      "step": 3204
    },
    {
      "epoch": 1.3029070949379955,
      "grad_norm": 0.09791149944067001,
      "learning_rate": 6.977406879706901e-05,
      "loss": 0.8916,
      "step": 3205
    },
    {
      "epoch": 1.3033136816426103,
      "grad_norm": 0.10441888123750687,
      "learning_rate": 6.973336047221657e-05,
      "loss": 0.8901,
      "step": 3206
    },
    {
      "epoch": 1.303720268347225,
      "grad_norm": 0.10184387862682343,
      "learning_rate": 6.969265214736413e-05,
      "loss": 0.9033,
      "step": 3207
    },
    {
      "epoch": 1.3041268550518397,
      "grad_norm": 0.10396290570497513,
      "learning_rate": 6.965194382251171e-05,
      "loss": 0.9896,
      "step": 3208
    },
    {
      "epoch": 1.3045334417564547,
      "grad_norm": 0.10278751701116562,
      "learning_rate": 6.961123549765928e-05,
      "loss": 0.8561,
      "step": 3209
    },
    {
      "epoch": 1.3049400284610693,
      "grad_norm": 0.1081685721874237,
      "learning_rate": 6.957052717280685e-05,
      "loss": 1.0042,
      "step": 3210
    },
    {
      "epoch": 1.305346615165684,
      "grad_norm": 0.10850156843662262,
      "learning_rate": 6.952981884795441e-05,
      "loss": 0.9461,
      "step": 3211
    },
    {
      "epoch": 1.3057532018702989,
      "grad_norm": 0.10625606030225754,
      "learning_rate": 6.948911052310198e-05,
      "loss": 0.9943,
      "step": 3212
    },
    {
      "epoch": 1.3061597885749137,
      "grad_norm": 0.1126924604177475,
      "learning_rate": 6.944840219824953e-05,
      "loss": 1.1293,
      "step": 3213
    },
    {
      "epoch": 1.3065663752795285,
      "grad_norm": 0.09927353262901306,
      "learning_rate": 6.940769387339712e-05,
      "loss": 0.8866,
      "step": 3214
    },
    {
      "epoch": 1.306972961984143,
      "grad_norm": 0.11918359994888306,
      "learning_rate": 6.936698554854468e-05,
      "loss": 1.0915,
      "step": 3215
    },
    {
      "epoch": 1.3073795486887578,
      "grad_norm": 0.11078538745641708,
      "learning_rate": 6.932627722369225e-05,
      "loss": 1.0182,
      "step": 3216
    },
    {
      "epoch": 1.3077861353933726,
      "grad_norm": 0.10765037685632706,
      "learning_rate": 6.928556889883982e-05,
      "loss": 0.9387,
      "step": 3217
    },
    {
      "epoch": 1.3081927220979874,
      "grad_norm": 0.09321583807468414,
      "learning_rate": 6.924486057398739e-05,
      "loss": 0.8916,
      "step": 3218
    },
    {
      "epoch": 1.3085993088026022,
      "grad_norm": 0.09504148364067078,
      "learning_rate": 6.920415224913494e-05,
      "loss": 0.8669,
      "step": 3219
    },
    {
      "epoch": 1.3090058955072168,
      "grad_norm": 0.09915818274021149,
      "learning_rate": 6.916344392428252e-05,
      "loss": 0.9645,
      "step": 3220
    },
    {
      "epoch": 1.3094124822118316,
      "grad_norm": 0.10582833737134933,
      "learning_rate": 6.912273559943009e-05,
      "loss": 0.9428,
      "step": 3221
    },
    {
      "epoch": 1.3098190689164464,
      "grad_norm": 0.09936497360467911,
      "learning_rate": 6.908202727457766e-05,
      "loss": 0.9818,
      "step": 3222
    },
    {
      "epoch": 1.3102256556210612,
      "grad_norm": 0.0910920724272728,
      "learning_rate": 6.904131894972522e-05,
      "loss": 0.8306,
      "step": 3223
    },
    {
      "epoch": 1.310632242325676,
      "grad_norm": 0.10046056658029556,
      "learning_rate": 6.900061062487279e-05,
      "loss": 0.9159,
      "step": 3224
    },
    {
      "epoch": 1.3110388290302908,
      "grad_norm": 0.09471949934959412,
      "learning_rate": 6.895990230002036e-05,
      "loss": 0.8514,
      "step": 3225
    },
    {
      "epoch": 1.3114454157349056,
      "grad_norm": 0.09952743351459503,
      "learning_rate": 6.891919397516793e-05,
      "loss": 0.8931,
      "step": 3226
    },
    {
      "epoch": 1.3118520024395202,
      "grad_norm": 0.11039602011442184,
      "learning_rate": 6.88784856503155e-05,
      "loss": 1.0626,
      "step": 3227
    },
    {
      "epoch": 1.312258589144135,
      "grad_norm": 0.09720449149608612,
      "learning_rate": 6.883777732546306e-05,
      "loss": 0.8322,
      "step": 3228
    },
    {
      "epoch": 1.3126651758487498,
      "grad_norm": 0.10088366270065308,
      "learning_rate": 6.879706900061063e-05,
      "loss": 0.9092,
      "step": 3229
    },
    {
      "epoch": 1.3130717625533646,
      "grad_norm": 0.09886251389980316,
      "learning_rate": 6.87563606757582e-05,
      "loss": 0.8671,
      "step": 3230
    },
    {
      "epoch": 1.3134783492579793,
      "grad_norm": 0.09615595638751984,
      "learning_rate": 6.871565235090577e-05,
      "loss": 0.8902,
      "step": 3231
    },
    {
      "epoch": 1.313884935962594,
      "grad_norm": 0.09880536794662476,
      "learning_rate": 6.867494402605333e-05,
      "loss": 0.986,
      "step": 3232
    },
    {
      "epoch": 1.3142915226672087,
      "grad_norm": 0.08802687376737595,
      "learning_rate": 6.86342357012009e-05,
      "loss": 0.8119,
      "step": 3233
    },
    {
      "epoch": 1.3146981093718235,
      "grad_norm": 0.10455913096666336,
      "learning_rate": 6.859352737634847e-05,
      "loss": 1.0014,
      "step": 3234
    },
    {
      "epoch": 1.3151046960764383,
      "grad_norm": 0.10536748915910721,
      "learning_rate": 6.855281905149604e-05,
      "loss": 0.9696,
      "step": 3235
    },
    {
      "epoch": 1.3155112827810531,
      "grad_norm": 0.10749450325965881,
      "learning_rate": 6.851211072664359e-05,
      "loss": 1.0153,
      "step": 3236
    },
    {
      "epoch": 1.3159178694856677,
      "grad_norm": 0.10122597217559814,
      "learning_rate": 6.847140240179117e-05,
      "loss": 0.9241,
      "step": 3237
    },
    {
      "epoch": 1.3163244561902827,
      "grad_norm": 0.09782005101442337,
      "learning_rate": 6.843069407693874e-05,
      "loss": 0.8996,
      "step": 3238
    },
    {
      "epoch": 1.3167310428948973,
      "grad_norm": 0.09286556392908096,
      "learning_rate": 6.83899857520863e-05,
      "loss": 0.8474,
      "step": 3239
    },
    {
      "epoch": 1.317137629599512,
      "grad_norm": 0.10210733860731125,
      "learning_rate": 6.834927742723387e-05,
      "loss": 0.9524,
      "step": 3240
    },
    {
      "epoch": 1.3175442163041269,
      "grad_norm": 0.09640049189329147,
      "learning_rate": 6.830856910238144e-05,
      "loss": 0.915,
      "step": 3241
    },
    {
      "epoch": 1.3179508030087417,
      "grad_norm": 0.11026381701231003,
      "learning_rate": 6.8267860777529e-05,
      "loss": 1.0658,
      "step": 3242
    },
    {
      "epoch": 1.3183573897133565,
      "grad_norm": 0.11060287058353424,
      "learning_rate": 6.822715245267658e-05,
      "loss": 0.9972,
      "step": 3243
    },
    {
      "epoch": 1.318763976417971,
      "grad_norm": 0.09819656610488892,
      "learning_rate": 6.818644412782414e-05,
      "loss": 0.8394,
      "step": 3244
    },
    {
      "epoch": 1.3191705631225858,
      "grad_norm": 0.09501704573631287,
      "learning_rate": 6.814573580297171e-05,
      "loss": 0.8501,
      "step": 3245
    },
    {
      "epoch": 1.3195771498272006,
      "grad_norm": 0.10703945904970169,
      "learning_rate": 6.810502747811928e-05,
      "loss": 1.0179,
      "step": 3246
    },
    {
      "epoch": 1.3199837365318154,
      "grad_norm": 0.09531719982624054,
      "learning_rate": 6.806431915326685e-05,
      "loss": 0.8521,
      "step": 3247
    },
    {
      "epoch": 1.3203903232364302,
      "grad_norm": 0.10462060570716858,
      "learning_rate": 6.802361082841442e-05,
      "loss": 0.9666,
      "step": 3248
    },
    {
      "epoch": 1.3207969099410448,
      "grad_norm": 0.11056574434041977,
      "learning_rate": 6.798290250356198e-05,
      "loss": 1.0324,
      "step": 3249
    },
    {
      "epoch": 1.3212034966456596,
      "grad_norm": 0.10290572047233582,
      "learning_rate": 6.794219417870955e-05,
      "loss": 0.9468,
      "step": 3250
    },
    {
      "epoch": 1.3216100833502744,
      "grad_norm": 0.11361809819936752,
      "learning_rate": 6.790148585385712e-05,
      "loss": 1.1379,
      "step": 3251
    },
    {
      "epoch": 1.3220166700548892,
      "grad_norm": 0.10172957926988602,
      "learning_rate": 6.786077752900469e-05,
      "loss": 0.9012,
      "step": 3252
    },
    {
      "epoch": 1.322423256759504,
      "grad_norm": 0.10785951465368271,
      "learning_rate": 6.782006920415225e-05,
      "loss": 1.0223,
      "step": 3253
    },
    {
      "epoch": 1.3228298434641188,
      "grad_norm": 0.10551370680332184,
      "learning_rate": 6.777936087929982e-05,
      "loss": 0.9104,
      "step": 3254
    },
    {
      "epoch": 1.3232364301687336,
      "grad_norm": 0.10205043107271194,
      "learning_rate": 6.773865255444739e-05,
      "loss": 0.9634,
      "step": 3255
    },
    {
      "epoch": 1.3236430168733482,
      "grad_norm": 0.11159402132034302,
      "learning_rate": 6.769794422959496e-05,
      "loss": 0.9731,
      "step": 3256
    },
    {
      "epoch": 1.324049603577963,
      "grad_norm": 0.10272105038166046,
      "learning_rate": 6.765723590474252e-05,
      "loss": 0.8697,
      "step": 3257
    },
    {
      "epoch": 1.3244561902825778,
      "grad_norm": 0.09885246306657791,
      "learning_rate": 6.761652757989009e-05,
      "loss": 0.8714,
      "step": 3258
    },
    {
      "epoch": 1.3248627769871926,
      "grad_norm": 0.10272397845983505,
      "learning_rate": 6.757581925503766e-05,
      "loss": 0.9957,
      "step": 3259
    },
    {
      "epoch": 1.3252693636918074,
      "grad_norm": 0.10085637867450714,
      "learning_rate": 6.753511093018523e-05,
      "loss": 0.8967,
      "step": 3260
    },
    {
      "epoch": 1.325675950396422,
      "grad_norm": 0.09773235023021698,
      "learning_rate": 6.74944026053328e-05,
      "loss": 0.9523,
      "step": 3261
    },
    {
      "epoch": 1.3260825371010367,
      "grad_norm": 0.09937093406915665,
      "learning_rate": 6.745369428048036e-05,
      "loss": 0.9269,
      "step": 3262
    },
    {
      "epoch": 1.3264891238056515,
      "grad_norm": 0.1010124534368515,
      "learning_rate": 6.741298595562793e-05,
      "loss": 0.917,
      "step": 3263
    },
    {
      "epoch": 1.3268957105102663,
      "grad_norm": 0.09788401424884796,
      "learning_rate": 6.73722776307755e-05,
      "loss": 0.9131,
      "step": 3264
    },
    {
      "epoch": 1.3273022972148811,
      "grad_norm": 0.1079770028591156,
      "learning_rate": 6.733156930592307e-05,
      "loss": 0.9804,
      "step": 3265
    },
    {
      "epoch": 1.3277088839194957,
      "grad_norm": 0.10159478336572647,
      "learning_rate": 6.729086098107063e-05,
      "loss": 0.9987,
      "step": 3266
    },
    {
      "epoch": 1.3281154706241107,
      "grad_norm": 0.10081673413515091,
      "learning_rate": 6.72501526562182e-05,
      "loss": 0.9141,
      "step": 3267
    },
    {
      "epoch": 1.3285220573287253,
      "grad_norm": 0.10763905942440033,
      "learning_rate": 6.720944433136577e-05,
      "loss": 0.9234,
      "step": 3268
    },
    {
      "epoch": 1.32892864403334,
      "grad_norm": 0.10242763161659241,
      "learning_rate": 6.716873600651334e-05,
      "loss": 0.8658,
      "step": 3269
    },
    {
      "epoch": 1.329335230737955,
      "grad_norm": 0.09579820930957794,
      "learning_rate": 6.71280276816609e-05,
      "loss": 0.8644,
      "step": 3270
    },
    {
      "epoch": 1.3297418174425697,
      "grad_norm": 0.10901151597499847,
      "learning_rate": 6.708731935680847e-05,
      "loss": 0.9082,
      "step": 3271
    },
    {
      "epoch": 1.3301484041471845,
      "grad_norm": 0.10376943647861481,
      "learning_rate": 6.704661103195604e-05,
      "loss": 1.0499,
      "step": 3272
    },
    {
      "epoch": 1.330554990851799,
      "grad_norm": 0.09166496247053146,
      "learning_rate": 6.70059027071036e-05,
      "loss": 0.8626,
      "step": 3273
    },
    {
      "epoch": 1.3309615775564139,
      "grad_norm": 0.10346846282482147,
      "learning_rate": 6.696519438225117e-05,
      "loss": 1.0357,
      "step": 3274
    },
    {
      "epoch": 1.3313681642610287,
      "grad_norm": 0.09558013081550598,
      "learning_rate": 6.692448605739874e-05,
      "loss": 0.8998,
      "step": 3275
    },
    {
      "epoch": 1.3317747509656435,
      "grad_norm": 0.10983523726463318,
      "learning_rate": 6.688377773254631e-05,
      "loss": 1.0083,
      "step": 3276
    },
    {
      "epoch": 1.3321813376702583,
      "grad_norm": 0.1021072268486023,
      "learning_rate": 6.684306940769388e-05,
      "loss": 0.9392,
      "step": 3277
    },
    {
      "epoch": 1.3325879243748728,
      "grad_norm": 0.09880812466144562,
      "learning_rate": 6.680236108284144e-05,
      "loss": 0.9355,
      "step": 3278
    },
    {
      "epoch": 1.3329945110794876,
      "grad_norm": 0.1031312495470047,
      "learning_rate": 6.676165275798901e-05,
      "loss": 0.9932,
      "step": 3279
    },
    {
      "epoch": 1.3334010977841024,
      "grad_norm": 0.09794270247220993,
      "learning_rate": 6.672094443313658e-05,
      "loss": 0.8731,
      "step": 3280
    },
    {
      "epoch": 1.3338076844887172,
      "grad_norm": 0.11040783673524857,
      "learning_rate": 6.668023610828415e-05,
      "loss": 0.9403,
      "step": 3281
    },
    {
      "epoch": 1.334214271193332,
      "grad_norm": 0.11623408645391464,
      "learning_rate": 6.663952778343171e-05,
      "loss": 1.0711,
      "step": 3282
    },
    {
      "epoch": 1.3346208578979466,
      "grad_norm": 0.11237382143735886,
      "learning_rate": 6.659881945857928e-05,
      "loss": 0.9668,
      "step": 3283
    },
    {
      "epoch": 1.3350274446025616,
      "grad_norm": 0.10134642571210861,
      "learning_rate": 6.655811113372685e-05,
      "loss": 0.9352,
      "step": 3284
    },
    {
      "epoch": 1.3354340313071762,
      "grad_norm": 0.09573974460363388,
      "learning_rate": 6.651740280887442e-05,
      "loss": 0.91,
      "step": 3285
    },
    {
      "epoch": 1.335840618011791,
      "grad_norm": 0.10902750492095947,
      "learning_rate": 6.647669448402199e-05,
      "loss": 1.0084,
      "step": 3286
    },
    {
      "epoch": 1.3362472047164058,
      "grad_norm": 0.10625172406435013,
      "learning_rate": 6.643598615916955e-05,
      "loss": 0.994,
      "step": 3287
    },
    {
      "epoch": 1.3366537914210206,
      "grad_norm": 0.11479779332876205,
      "learning_rate": 6.639527783431712e-05,
      "loss": 1.0517,
      "step": 3288
    },
    {
      "epoch": 1.3370603781256354,
      "grad_norm": 0.10585431009531021,
      "learning_rate": 6.635456950946469e-05,
      "loss": 1.0049,
      "step": 3289
    },
    {
      "epoch": 1.33746696483025,
      "grad_norm": 0.10156022757291794,
      "learning_rate": 6.631386118461226e-05,
      "loss": 0.8845,
      "step": 3290
    },
    {
      "epoch": 1.3378735515348648,
      "grad_norm": 0.1020706295967102,
      "learning_rate": 6.627315285975982e-05,
      "loss": 0.9834,
      "step": 3291
    },
    {
      "epoch": 1.3382801382394796,
      "grad_norm": 0.09861770272254944,
      "learning_rate": 6.623244453490739e-05,
      "loss": 0.9128,
      "step": 3292
    },
    {
      "epoch": 1.3386867249440944,
      "grad_norm": 0.0913916602730751,
      "learning_rate": 6.619173621005496e-05,
      "loss": 0.8242,
      "step": 3293
    },
    {
      "epoch": 1.3390933116487092,
      "grad_norm": 0.10706761479377747,
      "learning_rate": 6.615102788520253e-05,
      "loss": 1.0844,
      "step": 3294
    },
    {
      "epoch": 1.3394998983533237,
      "grad_norm": 0.09800904244184494,
      "learning_rate": 6.61103195603501e-05,
      "loss": 0.881,
      "step": 3295
    },
    {
      "epoch": 1.3399064850579385,
      "grad_norm": 0.09987018257379532,
      "learning_rate": 6.606961123549766e-05,
      "loss": 0.9368,
      "step": 3296
    },
    {
      "epoch": 1.3403130717625533,
      "grad_norm": 0.10405978560447693,
      "learning_rate": 6.602890291064523e-05,
      "loss": 0.9642,
      "step": 3297
    },
    {
      "epoch": 1.3407196584671681,
      "grad_norm": 0.10836942493915558,
      "learning_rate": 6.59881945857928e-05,
      "loss": 0.9626,
      "step": 3298
    },
    {
      "epoch": 1.341126245171783,
      "grad_norm": 0.11826450377702713,
      "learning_rate": 6.594748626094036e-05,
      "loss": 0.9083,
      "step": 3299
    },
    {
      "epoch": 1.3415328318763977,
      "grad_norm": 0.09229649603366852,
      "learning_rate": 6.590677793608793e-05,
      "loss": 0.886,
      "step": 3300
    },
    {
      "epoch": 1.3419394185810125,
      "grad_norm": 0.10104688256978989,
      "learning_rate": 6.58660696112355e-05,
      "loss": 0.8792,
      "step": 3301
    },
    {
      "epoch": 1.342346005285627,
      "grad_norm": 0.10879054665565491,
      "learning_rate": 6.582536128638307e-05,
      "loss": 1.0057,
      "step": 3302
    },
    {
      "epoch": 1.3427525919902419,
      "grad_norm": 0.10086936503648758,
      "learning_rate": 6.578465296153064e-05,
      "loss": 0.9508,
      "step": 3303
    },
    {
      "epoch": 1.3431591786948567,
      "grad_norm": 0.10607967525720596,
      "learning_rate": 6.57439446366782e-05,
      "loss": 0.9406,
      "step": 3304
    },
    {
      "epoch": 1.3435657653994715,
      "grad_norm": 0.0975409746170044,
      "learning_rate": 6.570323631182577e-05,
      "loss": 0.8687,
      "step": 3305
    },
    {
      "epoch": 1.3439723521040863,
      "grad_norm": 0.09562422335147858,
      "learning_rate": 6.566252798697334e-05,
      "loss": 0.7939,
      "step": 3306
    },
    {
      "epoch": 1.3443789388087009,
      "grad_norm": 0.102279432117939,
      "learning_rate": 6.56218196621209e-05,
      "loss": 0.8287,
      "step": 3307
    },
    {
      "epoch": 1.3447855255133156,
      "grad_norm": 0.11360716074705124,
      "learning_rate": 6.558111133726847e-05,
      "loss": 0.923,
      "step": 3308
    },
    {
      "epoch": 1.3451921122179304,
      "grad_norm": 0.09787465631961823,
      "learning_rate": 6.554040301241604e-05,
      "loss": 0.8715,
      "step": 3309
    },
    {
      "epoch": 1.3455986989225452,
      "grad_norm": 0.1050533726811409,
      "learning_rate": 6.549969468756361e-05,
      "loss": 0.9855,
      "step": 3310
    },
    {
      "epoch": 1.34600528562716,
      "grad_norm": 0.09932462126016617,
      "learning_rate": 6.545898636271118e-05,
      "loss": 0.8941,
      "step": 3311
    },
    {
      "epoch": 1.3464118723317746,
      "grad_norm": 0.10330644994974136,
      "learning_rate": 6.541827803785874e-05,
      "loss": 1.0426,
      "step": 3312
    },
    {
      "epoch": 1.3468184590363896,
      "grad_norm": 0.10034661740064621,
      "learning_rate": 6.537756971300631e-05,
      "loss": 0.869,
      "step": 3313
    },
    {
      "epoch": 1.3472250457410042,
      "grad_norm": 0.10637210309505463,
      "learning_rate": 6.533686138815388e-05,
      "loss": 0.9451,
      "step": 3314
    },
    {
      "epoch": 1.347631632445619,
      "grad_norm": 0.1081581711769104,
      "learning_rate": 6.529615306330145e-05,
      "loss": 1.0438,
      "step": 3315
    },
    {
      "epoch": 1.3480382191502338,
      "grad_norm": 0.10719630867242813,
      "learning_rate": 6.525544473844901e-05,
      "loss": 0.903,
      "step": 3316
    },
    {
      "epoch": 1.3484448058548486,
      "grad_norm": 0.09714970737695694,
      "learning_rate": 6.521473641359658e-05,
      "loss": 0.8939,
      "step": 3317
    },
    {
      "epoch": 1.3488513925594634,
      "grad_norm": 0.10341964662075043,
      "learning_rate": 6.517402808874415e-05,
      "loss": 0.9428,
      "step": 3318
    },
    {
      "epoch": 1.349257979264078,
      "grad_norm": 0.10897208005189896,
      "learning_rate": 6.513331976389172e-05,
      "loss": 0.9621,
      "step": 3319
    },
    {
      "epoch": 1.3496645659686928,
      "grad_norm": 0.09563440829515457,
      "learning_rate": 6.509261143903929e-05,
      "loss": 0.9126,
      "step": 3320
    },
    {
      "epoch": 1.3500711526733076,
      "grad_norm": 0.10758570581674576,
      "learning_rate": 6.505190311418685e-05,
      "loss": 1.039,
      "step": 3321
    },
    {
      "epoch": 1.3504777393779224,
      "grad_norm": 0.10049328953027725,
      "learning_rate": 6.501119478933442e-05,
      "loss": 0.8708,
      "step": 3322
    },
    {
      "epoch": 1.3508843260825372,
      "grad_norm": 0.10646460205316544,
      "learning_rate": 6.497048646448199e-05,
      "loss": 1.01,
      "step": 3323
    },
    {
      "epoch": 1.3512909127871517,
      "grad_norm": 0.10543674230575562,
      "learning_rate": 6.492977813962956e-05,
      "loss": 0.9799,
      "step": 3324
    },
    {
      "epoch": 1.3516974994917665,
      "grad_norm": 0.10643398016691208,
      "learning_rate": 6.488906981477712e-05,
      "loss": 0.9281,
      "step": 3325
    },
    {
      "epoch": 1.3521040861963813,
      "grad_norm": 0.10901035368442535,
      "learning_rate": 6.484836148992469e-05,
      "loss": 1.0218,
      "step": 3326
    },
    {
      "epoch": 1.3525106729009961,
      "grad_norm": 0.12187345325946808,
      "learning_rate": 6.480765316507226e-05,
      "loss": 1.0313,
      "step": 3327
    },
    {
      "epoch": 1.352917259605611,
      "grad_norm": 0.10931206494569778,
      "learning_rate": 6.476694484021983e-05,
      "loss": 1.0675,
      "step": 3328
    },
    {
      "epoch": 1.3533238463102257,
      "grad_norm": 0.10836771130561829,
      "learning_rate": 6.47262365153674e-05,
      "loss": 1.0236,
      "step": 3329
    },
    {
      "epoch": 1.3537304330148405,
      "grad_norm": 0.10628294199705124,
      "learning_rate": 6.468552819051496e-05,
      "loss": 0.961,
      "step": 3330
    },
    {
      "epoch": 1.354137019719455,
      "grad_norm": 0.10702615231275558,
      "learning_rate": 6.464481986566253e-05,
      "loss": 0.9972,
      "step": 3331
    },
    {
      "epoch": 1.35454360642407,
      "grad_norm": 0.10359417647123337,
      "learning_rate": 6.46041115408101e-05,
      "loss": 0.9665,
      "step": 3332
    },
    {
      "epoch": 1.3549501931286847,
      "grad_norm": 0.10689602792263031,
      "learning_rate": 6.456340321595766e-05,
      "loss": 0.9761,
      "step": 3333
    },
    {
      "epoch": 1.3553567798332995,
      "grad_norm": 0.10960511118173599,
      "learning_rate": 6.452269489110523e-05,
      "loss": 0.9182,
      "step": 3334
    },
    {
      "epoch": 1.3557633665379143,
      "grad_norm": 0.1095903292298317,
      "learning_rate": 6.44819865662528e-05,
      "loss": 1.0502,
      "step": 3335
    },
    {
      "epoch": 1.3561699532425289,
      "grad_norm": 0.11125817894935608,
      "learning_rate": 6.444127824140037e-05,
      "loss": 0.9591,
      "step": 3336
    },
    {
      "epoch": 1.3565765399471437,
      "grad_norm": 0.09888199716806412,
      "learning_rate": 6.440056991654793e-05,
      "loss": 0.9291,
      "step": 3337
    },
    {
      "epoch": 1.3569831266517585,
      "grad_norm": 0.10406041145324707,
      "learning_rate": 6.43598615916955e-05,
      "loss": 0.8849,
      "step": 3338
    },
    {
      "epoch": 1.3573897133563733,
      "grad_norm": 0.10289091616868973,
      "learning_rate": 6.431915326684307e-05,
      "loss": 0.9114,
      "step": 3339
    },
    {
      "epoch": 1.357796300060988,
      "grad_norm": 0.10479141771793365,
      "learning_rate": 6.427844494199065e-05,
      "loss": 0.9903,
      "step": 3340
    },
    {
      "epoch": 1.3582028867656026,
      "grad_norm": 0.1062149703502655,
      "learning_rate": 6.42377366171382e-05,
      "loss": 0.9887,
      "step": 3341
    },
    {
      "epoch": 1.3586094734702177,
      "grad_norm": 0.10779149830341339,
      "learning_rate": 6.419702829228577e-05,
      "loss": 0.9764,
      "step": 3342
    },
    {
      "epoch": 1.3590160601748322,
      "grad_norm": 0.10290367156267166,
      "learning_rate": 6.415631996743334e-05,
      "loss": 0.9468,
      "step": 3343
    },
    {
      "epoch": 1.359422646879447,
      "grad_norm": 0.10836590826511383,
      "learning_rate": 6.411561164258091e-05,
      "loss": 0.9258,
      "step": 3344
    },
    {
      "epoch": 1.3598292335840618,
      "grad_norm": 0.10067134350538254,
      "learning_rate": 6.407490331772848e-05,
      "loss": 0.9739,
      "step": 3345
    },
    {
      "epoch": 1.3602358202886766,
      "grad_norm": 0.1004357561469078,
      "learning_rate": 6.403419499287606e-05,
      "loss": 0.8851,
      "step": 3346
    },
    {
      "epoch": 1.3606424069932914,
      "grad_norm": 0.10546833276748657,
      "learning_rate": 6.399348666802361e-05,
      "loss": 0.9733,
      "step": 3347
    },
    {
      "epoch": 1.361048993697906,
      "grad_norm": 0.10163208842277527,
      "learning_rate": 6.395277834317118e-05,
      "loss": 0.9647,
      "step": 3348
    },
    {
      "epoch": 1.3614555804025208,
      "grad_norm": 0.09670364856719971,
      "learning_rate": 6.391207001831875e-05,
      "loss": 0.812,
      "step": 3349
    },
    {
      "epoch": 1.3618621671071356,
      "grad_norm": 0.10565786063671112,
      "learning_rate": 6.387136169346631e-05,
      "loss": 0.9251,
      "step": 3350
    },
    {
      "epoch": 1.3622687538117504,
      "grad_norm": 0.10906309634447098,
      "learning_rate": 6.383065336861388e-05,
      "loss": 0.9706,
      "step": 3351
    },
    {
      "epoch": 1.3626753405163652,
      "grad_norm": 0.10507390648126602,
      "learning_rate": 6.378994504376146e-05,
      "loss": 0.9618,
      "step": 3352
    },
    {
      "epoch": 1.3630819272209798,
      "grad_norm": 0.1075945496559143,
      "learning_rate": 6.374923671890902e-05,
      "loss": 1.0055,
      "step": 3353
    },
    {
      "epoch": 1.3634885139255946,
      "grad_norm": 0.10433916747570038,
      "learning_rate": 6.370852839405658e-05,
      "loss": 1.0028,
      "step": 3354
    },
    {
      "epoch": 1.3638951006302094,
      "grad_norm": 0.09030991047620773,
      "learning_rate": 6.366782006920415e-05,
      "loss": 0.8476,
      "step": 3355
    },
    {
      "epoch": 1.3643016873348242,
      "grad_norm": 0.09600205719470978,
      "learning_rate": 6.362711174435172e-05,
      "loss": 0.8717,
      "step": 3356
    },
    {
      "epoch": 1.364708274039439,
      "grad_norm": 0.10889092087745667,
      "learning_rate": 6.358640341949929e-05,
      "loss": 0.9637,
      "step": 3357
    },
    {
      "epoch": 1.3651148607440537,
      "grad_norm": 0.09938116371631622,
      "learning_rate": 6.354569509464687e-05,
      "loss": 0.896,
      "step": 3358
    },
    {
      "epoch": 1.3655214474486685,
      "grad_norm": 0.10416344553232193,
      "learning_rate": 6.350498676979442e-05,
      "loss": 0.9297,
      "step": 3359
    },
    {
      "epoch": 1.3659280341532831,
      "grad_norm": 0.11025689542293549,
      "learning_rate": 6.346427844494199e-05,
      "loss": 1.0943,
      "step": 3360
    },
    {
      "epoch": 1.366334620857898,
      "grad_norm": 0.10861194133758545,
      "learning_rate": 6.342357012008956e-05,
      "loss": 1.0826,
      "step": 3361
    },
    {
      "epoch": 1.3667412075625127,
      "grad_norm": 0.09897050261497498,
      "learning_rate": 6.338286179523713e-05,
      "loss": 0.8662,
      "step": 3362
    },
    {
      "epoch": 1.3671477942671275,
      "grad_norm": 0.1111622229218483,
      "learning_rate": 6.334215347038471e-05,
      "loss": 1.059,
      "step": 3363
    },
    {
      "epoch": 1.3675543809717423,
      "grad_norm": 0.10954444110393524,
      "learning_rate": 6.330144514553226e-05,
      "loss": 1.0297,
      "step": 3364
    },
    {
      "epoch": 1.3679609676763569,
      "grad_norm": 0.09817023575305939,
      "learning_rate": 6.326073682067983e-05,
      "loss": 1.0391,
      "step": 3365
    },
    {
      "epoch": 1.3683675543809717,
      "grad_norm": 0.10183369368314743,
      "learning_rate": 6.32200284958274e-05,
      "loss": 0.8658,
      "step": 3366
    },
    {
      "epoch": 1.3687741410855865,
      "grad_norm": 0.1102672666311264,
      "learning_rate": 6.317932017097496e-05,
      "loss": 1.0753,
      "step": 3367
    },
    {
      "epoch": 1.3691807277902013,
      "grad_norm": 0.10442694276571274,
      "learning_rate": 6.313861184612253e-05,
      "loss": 0.979,
      "step": 3368
    },
    {
      "epoch": 1.369587314494816,
      "grad_norm": 0.10101038962602615,
      "learning_rate": 6.309790352127011e-05,
      "loss": 0.9198,
      "step": 3369
    },
    {
      "epoch": 1.3699939011994307,
      "grad_norm": 0.10193105787038803,
      "learning_rate": 6.305719519641767e-05,
      "loss": 0.934,
      "step": 3370
    },
    {
      "epoch": 1.3704004879040457,
      "grad_norm": 0.10245472937822342,
      "learning_rate": 6.301648687156523e-05,
      "loss": 0.9839,
      "step": 3371
    },
    {
      "epoch": 1.3708070746086602,
      "grad_norm": 58.737693786621094,
      "learning_rate": 6.29757785467128e-05,
      "loss": 0.9931,
      "step": 3372
    },
    {
      "epoch": 1.371213661313275,
      "grad_norm": 0.11053670197725296,
      "learning_rate": 6.293507022186037e-05,
      "loss": 0.9689,
      "step": 3373
    },
    {
      "epoch": 1.3716202480178898,
      "grad_norm": 0.10878605395555496,
      "learning_rate": 6.289436189700794e-05,
      "loss": 0.988,
      "step": 3374
    },
    {
      "epoch": 1.3720268347225046,
      "grad_norm": 0.11353089660406113,
      "learning_rate": 6.285365357215552e-05,
      "loss": 1.0371,
      "step": 3375
    },
    {
      "epoch": 1.3724334214271194,
      "grad_norm": 0.10434375703334808,
      "learning_rate": 6.281294524730307e-05,
      "loss": 0.9719,
      "step": 3376
    },
    {
      "epoch": 1.372840008131734,
      "grad_norm": 0.11517924815416336,
      "learning_rate": 6.277223692245064e-05,
      "loss": 0.97,
      "step": 3377
    },
    {
      "epoch": 1.3732465948363488,
      "grad_norm": 0.09761136770248413,
      "learning_rate": 6.273152859759821e-05,
      "loss": 0.9268,
      "step": 3378
    },
    {
      "epoch": 1.3736531815409636,
      "grad_norm": 0.11084869503974915,
      "learning_rate": 6.269082027274578e-05,
      "loss": 1.0954,
      "step": 3379
    },
    {
      "epoch": 1.3740597682455784,
      "grad_norm": 0.11147692054510117,
      "learning_rate": 6.265011194789334e-05,
      "loss": 0.9841,
      "step": 3380
    },
    {
      "epoch": 1.3744663549501932,
      "grad_norm": 0.09878647327423096,
      "learning_rate": 6.260940362304092e-05,
      "loss": 0.9229,
      "step": 3381
    },
    {
      "epoch": 1.3748729416548078,
      "grad_norm": 0.11191148310899734,
      "learning_rate": 6.256869529818848e-05,
      "loss": 1.0527,
      "step": 3382
    },
    {
      "epoch": 1.3752795283594226,
      "grad_norm": 0.09895115345716476,
      "learning_rate": 6.252798697333605e-05,
      "loss": 0.8546,
      "step": 3383
    },
    {
      "epoch": 1.3756861150640374,
      "grad_norm": 0.10903530567884445,
      "learning_rate": 6.248727864848361e-05,
      "loss": 0.9679,
      "step": 3384
    },
    {
      "epoch": 1.3760927017686522,
      "grad_norm": 0.1128024086356163,
      "learning_rate": 6.244657032363118e-05,
      "loss": 0.9939,
      "step": 3385
    },
    {
      "epoch": 1.376499288473267,
      "grad_norm": 0.10549446940422058,
      "learning_rate": 6.240586199877876e-05,
      "loss": 0.9451,
      "step": 3386
    },
    {
      "epoch": 1.3769058751778818,
      "grad_norm": 0.10267242044210434,
      "learning_rate": 6.236515367392633e-05,
      "loss": 0.9846,
      "step": 3387
    },
    {
      "epoch": 1.3773124618824966,
      "grad_norm": 0.09926862269639969,
      "learning_rate": 6.232444534907388e-05,
      "loss": 0.8743,
      "step": 3388
    },
    {
      "epoch": 1.3777190485871111,
      "grad_norm": 0.09842429310083389,
      "learning_rate": 6.228373702422145e-05,
      "loss": 0.8384,
      "step": 3389
    },
    {
      "epoch": 1.378125635291726,
      "grad_norm": 0.10774646699428558,
      "learning_rate": 6.224302869936902e-05,
      "loss": 1.0182,
      "step": 3390
    },
    {
      "epoch": 1.3785322219963407,
      "grad_norm": 0.10634428262710571,
      "learning_rate": 6.220232037451659e-05,
      "loss": 0.9824,
      "step": 3391
    },
    {
      "epoch": 1.3789388087009555,
      "grad_norm": 0.1014680489897728,
      "learning_rate": 6.216161204966417e-05,
      "loss": 0.8391,
      "step": 3392
    },
    {
      "epoch": 1.3793453954055703,
      "grad_norm": 0.1142488569021225,
      "learning_rate": 6.212090372481174e-05,
      "loss": 0.9874,
      "step": 3393
    },
    {
      "epoch": 1.379751982110185,
      "grad_norm": 0.11224708706140518,
      "learning_rate": 6.208019539995929e-05,
      "loss": 1.1148,
      "step": 3394
    },
    {
      "epoch": 1.3801585688147997,
      "grad_norm": 0.0988827645778656,
      "learning_rate": 6.203948707510686e-05,
      "loss": 0.9135,
      "step": 3395
    },
    {
      "epoch": 1.3805651555194145,
      "grad_norm": 0.10036725550889969,
      "learning_rate": 6.199877875025443e-05,
      "loss": 0.9165,
      "step": 3396
    },
    {
      "epoch": 1.3809717422240293,
      "grad_norm": 0.09397398680448532,
      "learning_rate": 6.195807042540199e-05,
      "loss": 0.8081,
      "step": 3397
    },
    {
      "epoch": 1.381378328928644,
      "grad_norm": 0.10657388716936111,
      "learning_rate": 6.191736210054957e-05,
      "loss": 0.9866,
      "step": 3398
    },
    {
      "epoch": 1.3817849156332587,
      "grad_norm": 0.11075644195079803,
      "learning_rate": 6.187665377569714e-05,
      "loss": 0.9838,
      "step": 3399
    },
    {
      "epoch": 1.3821915023378737,
      "grad_norm": 0.10680566728115082,
      "learning_rate": 6.18359454508447e-05,
      "loss": 0.9901,
      "step": 3400
    },
    {
      "epoch": 1.3825980890424883,
      "grad_norm": 0.11256230622529984,
      "learning_rate": 6.179523712599226e-05,
      "loss": 0.954,
      "step": 3401
    },
    {
      "epoch": 1.383004675747103,
      "grad_norm": 0.10656154155731201,
      "learning_rate": 6.175452880113983e-05,
      "loss": 0.9413,
      "step": 3402
    },
    {
      "epoch": 1.3834112624517179,
      "grad_norm": 0.10036874562501907,
      "learning_rate": 6.17138204762874e-05,
      "loss": 0.8834,
      "step": 3403
    },
    {
      "epoch": 1.3838178491563327,
      "grad_norm": 0.1091538667678833,
      "learning_rate": 6.167311215143498e-05,
      "loss": 0.9906,
      "step": 3404
    },
    {
      "epoch": 1.3842244358609475,
      "grad_norm": 0.09833438694477081,
      "learning_rate": 6.163240382658255e-05,
      "loss": 0.9316,
      "step": 3405
    },
    {
      "epoch": 1.384631022565562,
      "grad_norm": 0.11654241383075714,
      "learning_rate": 6.15916955017301e-05,
      "loss": 1.0712,
      "step": 3406
    },
    {
      "epoch": 1.3850376092701768,
      "grad_norm": 0.10447299480438232,
      "learning_rate": 6.155098717687767e-05,
      "loss": 0.9511,
      "step": 3407
    },
    {
      "epoch": 1.3854441959747916,
      "grad_norm": 0.1039959117770195,
      "learning_rate": 6.151027885202524e-05,
      "loss": 0.953,
      "step": 3408
    },
    {
      "epoch": 1.3858507826794064,
      "grad_norm": 0.10340915620326996,
      "learning_rate": 6.146957052717282e-05,
      "loss": 0.9893,
      "step": 3409
    },
    {
      "epoch": 1.3862573693840212,
      "grad_norm": 0.10685911029577255,
      "learning_rate": 6.142886220232039e-05,
      "loss": 0.8551,
      "step": 3410
    },
    {
      "epoch": 1.3866639560886358,
      "grad_norm": 0.10175175964832306,
      "learning_rate": 6.138815387746795e-05,
      "loss": 0.9222,
      "step": 3411
    },
    {
      "epoch": 1.3870705427932506,
      "grad_norm": 0.10555768758058548,
      "learning_rate": 6.134744555261551e-05,
      "loss": 0.9464,
      "step": 3412
    },
    {
      "epoch": 1.3874771294978654,
      "grad_norm": 0.10031068325042725,
      "learning_rate": 6.130673722776308e-05,
      "loss": 0.9038,
      "step": 3413
    },
    {
      "epoch": 1.3878837162024802,
      "grad_norm": 0.10450392216444016,
      "learning_rate": 6.126602890291064e-05,
      "loss": 0.9718,
      "step": 3414
    },
    {
      "epoch": 1.388290302907095,
      "grad_norm": 0.1052430272102356,
      "learning_rate": 6.122532057805822e-05,
      "loss": 0.9401,
      "step": 3415
    },
    {
      "epoch": 1.3886968896117096,
      "grad_norm": 0.11526583880186081,
      "learning_rate": 6.118461225320579e-05,
      "loss": 1.0543,
      "step": 3416
    },
    {
      "epoch": 1.3891034763163246,
      "grad_norm": 0.10712984949350357,
      "learning_rate": 6.114390392835335e-05,
      "loss": 1.0006,
      "step": 3417
    },
    {
      "epoch": 1.3895100630209392,
      "grad_norm": 0.10705496370792389,
      "learning_rate": 6.110319560350091e-05,
      "loss": 1.0115,
      "step": 3418
    },
    {
      "epoch": 1.389916649725554,
      "grad_norm": 0.10251638293266296,
      "learning_rate": 6.106248727864848e-05,
      "loss": 0.9905,
      "step": 3419
    },
    {
      "epoch": 1.3903232364301688,
      "grad_norm": 0.10566147416830063,
      "learning_rate": 6.102177895379605e-05,
      "loss": 0.9758,
      "step": 3420
    },
    {
      "epoch": 1.3907298231347835,
      "grad_norm": 0.09883508831262589,
      "learning_rate": 6.098107062894362e-05,
      "loss": 0.8685,
      "step": 3421
    },
    {
      "epoch": 1.3911364098393983,
      "grad_norm": 0.10582920163869858,
      "learning_rate": 6.094036230409119e-05,
      "loss": 0.9492,
      "step": 3422
    },
    {
      "epoch": 1.391542996544013,
      "grad_norm": 0.0955805778503418,
      "learning_rate": 6.089965397923876e-05,
      "loss": 0.8709,
      "step": 3423
    },
    {
      "epoch": 1.3919495832486277,
      "grad_norm": 0.10453370958566666,
      "learning_rate": 6.0858945654386326e-05,
      "loss": 0.9211,
      "step": 3424
    },
    {
      "epoch": 1.3923561699532425,
      "grad_norm": 0.09814976155757904,
      "learning_rate": 6.081823732953389e-05,
      "loss": 0.872,
      "step": 3425
    },
    {
      "epoch": 1.3927627566578573,
      "grad_norm": 0.11669237911701202,
      "learning_rate": 6.0777529004681454e-05,
      "loss": 1.0086,
      "step": 3426
    },
    {
      "epoch": 1.3931693433624721,
      "grad_norm": 0.10733123868703842,
      "learning_rate": 6.073682067982903e-05,
      "loss": 0.9742,
      "step": 3427
    },
    {
      "epoch": 1.3935759300670867,
      "grad_norm": 0.10307322442531586,
      "learning_rate": 6.0696112354976596e-05,
      "loss": 0.9764,
      "step": 3428
    },
    {
      "epoch": 1.3939825167717015,
      "grad_norm": 0.09604302793741226,
      "learning_rate": 6.0655404030124164e-05,
      "loss": 0.8573,
      "step": 3429
    },
    {
      "epoch": 1.3943891034763163,
      "grad_norm": 0.10685808956623077,
      "learning_rate": 6.061469570527173e-05,
      "loss": 0.9497,
      "step": 3430
    },
    {
      "epoch": 1.394795690180931,
      "grad_norm": 0.10362914949655533,
      "learning_rate": 6.057398738041929e-05,
      "loss": 0.947,
      "step": 3431
    },
    {
      "epoch": 1.3952022768855459,
      "grad_norm": 0.10438563674688339,
      "learning_rate": 6.0533279055566874e-05,
      "loss": 0.9665,
      "step": 3432
    },
    {
      "epoch": 1.3956088635901607,
      "grad_norm": 0.11194411665201187,
      "learning_rate": 6.0492570730714435e-05,
      "loss": 0.9862,
      "step": 3433
    },
    {
      "epoch": 1.3960154502947755,
      "grad_norm": 0.11035612970590591,
      "learning_rate": 6.0451862405862e-05,
      "loss": 0.9924,
      "step": 3434
    },
    {
      "epoch": 1.39642203699939,
      "grad_norm": 0.10321981459856033,
      "learning_rate": 6.041115408100957e-05,
      "loss": 0.997,
      "step": 3435
    },
    {
      "epoch": 1.3968286237040048,
      "grad_norm": 0.10283782333135605,
      "learning_rate": 6.037044575615714e-05,
      "loss": 1.001,
      "step": 3436
    },
    {
      "epoch": 1.3972352104086196,
      "grad_norm": 0.10645655542612076,
      "learning_rate": 6.03297374313047e-05,
      "loss": 0.9283,
      "step": 3437
    },
    {
      "epoch": 1.3976417971132344,
      "grad_norm": 0.11539904773235321,
      "learning_rate": 6.028902910645228e-05,
      "loss": 1.0165,
      "step": 3438
    },
    {
      "epoch": 1.3980483838178492,
      "grad_norm": 0.1119072213768959,
      "learning_rate": 6.024832078159984e-05,
      "loss": 1.0619,
      "step": 3439
    },
    {
      "epoch": 1.3984549705224638,
      "grad_norm": 0.10923818498849869,
      "learning_rate": 6.020761245674741e-05,
      "loss": 1.0457,
      "step": 3440
    },
    {
      "epoch": 1.3988615572270786,
      "grad_norm": 0.09986556321382523,
      "learning_rate": 6.0166904131894976e-05,
      "loss": 0.9656,
      "step": 3441
    },
    {
      "epoch": 1.3992681439316934,
      "grad_norm": 0.10424059629440308,
      "learning_rate": 6.012619580704254e-05,
      "loss": 0.9985,
      "step": 3442
    },
    {
      "epoch": 1.3996747306363082,
      "grad_norm": 0.09739932417869568,
      "learning_rate": 6.0085487482190104e-05,
      "loss": 0.8726,
      "step": 3443
    },
    {
      "epoch": 1.400081317340923,
      "grad_norm": 0.10547023266553879,
      "learning_rate": 6.0044779157337685e-05,
      "loss": 1.0137,
      "step": 3444
    },
    {
      "epoch": 1.4004879040455376,
      "grad_norm": 0.09859997779130936,
      "learning_rate": 6.0004070832485246e-05,
      "loss": 0.9356,
      "step": 3445
    },
    {
      "epoch": 1.4008944907501526,
      "grad_norm": 0.10799646377563477,
      "learning_rate": 5.9963362507632814e-05,
      "loss": 0.9075,
      "step": 3446
    },
    {
      "epoch": 1.4013010774547672,
      "grad_norm": 0.10137838125228882,
      "learning_rate": 5.992265418278038e-05,
      "loss": 0.9959,
      "step": 3447
    },
    {
      "epoch": 1.401707664159382,
      "grad_norm": 0.10512836277484894,
      "learning_rate": 5.988194585792795e-05,
      "loss": 0.9675,
      "step": 3448
    },
    {
      "epoch": 1.4021142508639968,
      "grad_norm": 0.1019698828458786,
      "learning_rate": 5.984123753307551e-05,
      "loss": 0.8817,
      "step": 3449
    },
    {
      "epoch": 1.4025208375686116,
      "grad_norm": 0.10622533410787582,
      "learning_rate": 5.980052920822309e-05,
      "loss": 0.9868,
      "step": 3450
    },
    {
      "epoch": 1.4029274242732264,
      "grad_norm": 0.1112954169511795,
      "learning_rate": 5.975982088337065e-05,
      "loss": 1.0354,
      "step": 3451
    },
    {
      "epoch": 1.403334010977841,
      "grad_norm": 0.09042443335056305,
      "learning_rate": 5.971911255851822e-05,
      "loss": 0.819,
      "step": 3452
    },
    {
      "epoch": 1.4037405976824557,
      "grad_norm": 0.10594133287668228,
      "learning_rate": 5.967840423366579e-05,
      "loss": 0.9371,
      "step": 3453
    },
    {
      "epoch": 1.4041471843870705,
      "grad_norm": 0.10706698149442673,
      "learning_rate": 5.963769590881335e-05,
      "loss": 1.0281,
      "step": 3454
    },
    {
      "epoch": 1.4045537710916853,
      "grad_norm": 0.09997668862342834,
      "learning_rate": 5.959698758396093e-05,
      "loss": 0.9686,
      "step": 3455
    },
    {
      "epoch": 1.4049603577963001,
      "grad_norm": 0.11369183659553528,
      "learning_rate": 5.95562792591085e-05,
      "loss": 1.0679,
      "step": 3456
    },
    {
      "epoch": 1.4053669445009147,
      "grad_norm": 0.0964435562491417,
      "learning_rate": 5.951557093425606e-05,
      "loss": 0.8741,
      "step": 3457
    },
    {
      "epoch": 1.4057735312055295,
      "grad_norm": 0.10728949308395386,
      "learning_rate": 5.9474862609403625e-05,
      "loss": 0.9704,
      "step": 3458
    },
    {
      "epoch": 1.4061801179101443,
      "grad_norm": 0.0988539531826973,
      "learning_rate": 5.943415428455119e-05,
      "loss": 0.8462,
      "step": 3459
    },
    {
      "epoch": 1.406586704614759,
      "grad_norm": 0.09907898306846619,
      "learning_rate": 5.9393445959698754e-05,
      "loss": 0.9082,
      "step": 3460
    },
    {
      "epoch": 1.406993291319374,
      "grad_norm": 0.10469436645507812,
      "learning_rate": 5.9352737634846335e-05,
      "loss": 0.9228,
      "step": 3461
    },
    {
      "epoch": 1.4073998780239887,
      "grad_norm": 0.09769507497549057,
      "learning_rate": 5.93120293099939e-05,
      "loss": 0.8791,
      "step": 3462
    },
    {
      "epoch": 1.4078064647286035,
      "grad_norm": 0.10890262573957443,
      "learning_rate": 5.9271320985141464e-05,
      "loss": 0.984,
      "step": 3463
    },
    {
      "epoch": 1.408213051433218,
      "grad_norm": 0.10154248028993607,
      "learning_rate": 5.923061266028903e-05,
      "loss": 0.9842,
      "step": 3464
    },
    {
      "epoch": 1.4086196381378329,
      "grad_norm": 0.10687004029750824,
      "learning_rate": 5.91899043354366e-05,
      "loss": 0.9201,
      "step": 3465
    },
    {
      "epoch": 1.4090262248424477,
      "grad_norm": 0.10408183187246323,
      "learning_rate": 5.914919601058416e-05,
      "loss": 0.866,
      "step": 3466
    },
    {
      "epoch": 1.4094328115470625,
      "grad_norm": 0.11365069448947906,
      "learning_rate": 5.910848768573174e-05,
      "loss": 1.0448,
      "step": 3467
    },
    {
      "epoch": 1.4098393982516773,
      "grad_norm": 0.10718761384487152,
      "learning_rate": 5.90677793608793e-05,
      "loss": 0.9858,
      "step": 3468
    },
    {
      "epoch": 1.4102459849562918,
      "grad_norm": 0.11558745801448822,
      "learning_rate": 5.902707103602687e-05,
      "loss": 0.9746,
      "step": 3469
    },
    {
      "epoch": 1.4106525716609066,
      "grad_norm": 0.10393566638231277,
      "learning_rate": 5.898636271117444e-05,
      "loss": 0.9475,
      "step": 3470
    },
    {
      "epoch": 1.4110591583655214,
      "grad_norm": 0.10006933659315109,
      "learning_rate": 5.8945654386322005e-05,
      "loss": 0.9519,
      "step": 3471
    },
    {
      "epoch": 1.4114657450701362,
      "grad_norm": 0.10952023416757584,
      "learning_rate": 5.8904946061469565e-05,
      "loss": 0.9115,
      "step": 3472
    },
    {
      "epoch": 1.411872331774751,
      "grad_norm": 0.10266458243131638,
      "learning_rate": 5.886423773661715e-05,
      "loss": 0.8776,
      "step": 3473
    },
    {
      "epoch": 1.4122789184793656,
      "grad_norm": 0.10357289016246796,
      "learning_rate": 5.882352941176471e-05,
      "loss": 0.8574,
      "step": 3474
    },
    {
      "epoch": 1.4126855051839806,
      "grad_norm": 0.10914462804794312,
      "learning_rate": 5.8782821086912275e-05,
      "loss": 0.9747,
      "step": 3475
    },
    {
      "epoch": 1.4130920918885952,
      "grad_norm": 0.10908231884241104,
      "learning_rate": 5.874211276205984e-05,
      "loss": 1.0257,
      "step": 3476
    },
    {
      "epoch": 1.41349867859321,
      "grad_norm": 0.1127469390630722,
      "learning_rate": 5.870140443720741e-05,
      "loss": 1.0647,
      "step": 3477
    },
    {
      "epoch": 1.4139052652978248,
      "grad_norm": 0.10754305124282837,
      "learning_rate": 5.8660696112354985e-05,
      "loss": 1.0463,
      "step": 3478
    },
    {
      "epoch": 1.4143118520024396,
      "grad_norm": 0.10822881013154984,
      "learning_rate": 5.861998778750255e-05,
      "loss": 0.9339,
      "step": 3479
    },
    {
      "epoch": 1.4147184387070544,
      "grad_norm": 0.10191161185503006,
      "learning_rate": 5.857927946265011e-05,
      "loss": 0.9637,
      "step": 3480
    },
    {
      "epoch": 1.415125025411669,
      "grad_norm": 0.10310634225606918,
      "learning_rate": 5.853857113779768e-05,
      "loss": 0.9967,
      "step": 3481
    },
    {
      "epoch": 1.4155316121162838,
      "grad_norm": 0.10512620210647583,
      "learning_rate": 5.849786281294525e-05,
      "loss": 1.0131,
      "step": 3482
    },
    {
      "epoch": 1.4159381988208986,
      "grad_norm": 0.10264958441257477,
      "learning_rate": 5.8457154488092816e-05,
      "loss": 0.9531,
      "step": 3483
    },
    {
      "epoch": 1.4163447855255134,
      "grad_norm": 0.1196940541267395,
      "learning_rate": 5.841644616324039e-05,
      "loss": 1.1297,
      "step": 3484
    },
    {
      "epoch": 1.4167513722301281,
      "grad_norm": 0.10680915415287018,
      "learning_rate": 5.837573783838796e-05,
      "loss": 0.8811,
      "step": 3485
    },
    {
      "epoch": 1.4171579589347427,
      "grad_norm": 0.10389327257871628,
      "learning_rate": 5.833502951353552e-05,
      "loss": 0.9699,
      "step": 3486
    },
    {
      "epoch": 1.4175645456393575,
      "grad_norm": 0.10058487951755524,
      "learning_rate": 5.829432118868309e-05,
      "loss": 0.8641,
      "step": 3487
    },
    {
      "epoch": 1.4179711323439723,
      "grad_norm": 0.10796665400266647,
      "learning_rate": 5.8253612863830654e-05,
      "loss": 0.9222,
      "step": 3488
    },
    {
      "epoch": 1.4183777190485871,
      "grad_norm": 0.10643795132637024,
      "learning_rate": 5.821290453897822e-05,
      "loss": 0.9759,
      "step": 3489
    },
    {
      "epoch": 1.418784305753202,
      "grad_norm": 0.1110243946313858,
      "learning_rate": 5.8172196214125796e-05,
      "loss": 1.0283,
      "step": 3490
    },
    {
      "epoch": 1.4191908924578167,
      "grad_norm": 0.10213904827833176,
      "learning_rate": 5.8131487889273364e-05,
      "loss": 0.9287,
      "step": 3491
    },
    {
      "epoch": 1.4195974791624315,
      "grad_norm": 0.11450214684009552,
      "learning_rate": 5.8090779564420925e-05,
      "loss": 1.0305,
      "step": 3492
    },
    {
      "epoch": 1.420004065867046,
      "grad_norm": 0.0998944565653801,
      "learning_rate": 5.805007123956849e-05,
      "loss": 0.9308,
      "step": 3493
    },
    {
      "epoch": 1.4204106525716609,
      "grad_norm": 0.10851487517356873,
      "learning_rate": 5.800936291471606e-05,
      "loss": 0.9519,
      "step": 3494
    },
    {
      "epoch": 1.4208172392762757,
      "grad_norm": 0.10219226777553558,
      "learning_rate": 5.796865458986363e-05,
      "loss": 0.9442,
      "step": 3495
    },
    {
      "epoch": 1.4212238259808905,
      "grad_norm": 0.10230858623981476,
      "learning_rate": 5.79279462650112e-05,
      "loss": 0.9174,
      "step": 3496
    },
    {
      "epoch": 1.4216304126855053,
      "grad_norm": 0.10335145145654678,
      "learning_rate": 5.788723794015877e-05,
      "loss": 0.9702,
      "step": 3497
    },
    {
      "epoch": 1.4220369993901198,
      "grad_norm": 0.10137276351451874,
      "learning_rate": 5.784652961530633e-05,
      "loss": 0.9462,
      "step": 3498
    },
    {
      "epoch": 1.4224435860947346,
      "grad_norm": 0.10088439285755157,
      "learning_rate": 5.78058212904539e-05,
      "loss": 0.9083,
      "step": 3499
    },
    {
      "epoch": 1.4228501727993494,
      "grad_norm": 51.91322708129883,
      "learning_rate": 5.7765112965601466e-05,
      "loss": 1.0057,
      "step": 3500
    },
    {
      "epoch": 1.4232567595039642,
      "grad_norm": 0.11275726556777954,
      "learning_rate": 5.772440464074904e-05,
      "loss": 1.0476,
      "step": 3501
    },
    {
      "epoch": 1.423663346208579,
      "grad_norm": 0.10602027922868729,
      "learning_rate": 5.768369631589661e-05,
      "loss": 0.9777,
      "step": 3502
    },
    {
      "epoch": 1.4240699329131936,
      "grad_norm": 0.10429500043392181,
      "learning_rate": 5.7642987991044176e-05,
      "loss": 0.9427,
      "step": 3503
    },
    {
      "epoch": 1.4244765196178086,
      "grad_norm": 0.10678494721651077,
      "learning_rate": 5.7602279666191737e-05,
      "loss": 0.961,
      "step": 3504
    },
    {
      "epoch": 1.4248831063224232,
      "grad_norm": 0.10437629371881485,
      "learning_rate": 5.7561571341339304e-05,
      "loss": 0.943,
      "step": 3505
    },
    {
      "epoch": 1.425289693027038,
      "grad_norm": 0.11122875660657883,
      "learning_rate": 5.752086301648687e-05,
      "loss": 0.9774,
      "step": 3506
    },
    {
      "epoch": 1.4256962797316528,
      "grad_norm": 0.10330124944448471,
      "learning_rate": 5.7480154691634446e-05,
      "loss": 0.9258,
      "step": 3507
    },
    {
      "epoch": 1.4261028664362676,
      "grad_norm": 0.11003297567367554,
      "learning_rate": 5.7439446366782014e-05,
      "loss": 0.9034,
      "step": 3508
    },
    {
      "epoch": 1.4265094531408824,
      "grad_norm": 0.1117345541715622,
      "learning_rate": 5.739873804192958e-05,
      "loss": 0.9539,
      "step": 3509
    },
    {
      "epoch": 1.426916039845497,
      "grad_norm": 0.10259495675563812,
      "learning_rate": 5.735802971707714e-05,
      "loss": 0.9002,
      "step": 3510
    },
    {
      "epoch": 1.4273226265501118,
      "grad_norm": 0.11275705695152283,
      "learning_rate": 5.731732139222471e-05,
      "loss": 1.1176,
      "step": 3511
    },
    {
      "epoch": 1.4277292132547266,
      "grad_norm": 0.10503105074167252,
      "learning_rate": 5.727661306737228e-05,
      "loss": 0.9467,
      "step": 3512
    },
    {
      "epoch": 1.4281357999593414,
      "grad_norm": 0.1054445430636406,
      "learning_rate": 5.723590474251985e-05,
      "loss": 1.0557,
      "step": 3513
    },
    {
      "epoch": 1.4285423866639562,
      "grad_norm": 0.1129273921251297,
      "learning_rate": 5.719519641766742e-05,
      "loss": 1.0294,
      "step": 3514
    },
    {
      "epoch": 1.4289489733685707,
      "grad_norm": 0.10943814367055893,
      "learning_rate": 5.715448809281499e-05,
      "loss": 0.9208,
      "step": 3515
    },
    {
      "epoch": 1.4293555600731855,
      "grad_norm": 0.1063983365893364,
      "learning_rate": 5.711377976796255e-05,
      "loss": 0.938,
      "step": 3516
    },
    {
      "epoch": 1.4297621467778003,
      "grad_norm": 0.11121434718370438,
      "learning_rate": 5.7073071443110116e-05,
      "loss": 0.9533,
      "step": 3517
    },
    {
      "epoch": 1.4301687334824151,
      "grad_norm": 0.1092074066400528,
      "learning_rate": 5.7032363118257683e-05,
      "loss": 1.0131,
      "step": 3518
    },
    {
      "epoch": 1.43057532018703,
      "grad_norm": 0.1058395728468895,
      "learning_rate": 5.699165479340526e-05,
      "loss": 0.9296,
      "step": 3519
    },
    {
      "epoch": 1.4309819068916445,
      "grad_norm": 0.10550788789987564,
      "learning_rate": 5.6950946468552825e-05,
      "loss": 1.0363,
      "step": 3520
    },
    {
      "epoch": 1.4313884935962595,
      "grad_norm": 0.09717314690351486,
      "learning_rate": 5.6910238143700386e-05,
      "loss": 0.896,
      "step": 3521
    },
    {
      "epoch": 1.431795080300874,
      "grad_norm": 0.10000022500753403,
      "learning_rate": 5.6869529818847954e-05,
      "loss": 0.9379,
      "step": 3522
    },
    {
      "epoch": 1.432201667005489,
      "grad_norm": 0.11158038675785065,
      "learning_rate": 5.682882149399552e-05,
      "loss": 1.0327,
      "step": 3523
    },
    {
      "epoch": 1.4326082537101037,
      "grad_norm": 0.11050955951213837,
      "learning_rate": 5.6788113169143096e-05,
      "loss": 0.9745,
      "step": 3524
    },
    {
      "epoch": 1.4330148404147185,
      "grad_norm": 0.10940226912498474,
      "learning_rate": 5.6747404844290664e-05,
      "loss": 1.0722,
      "step": 3525
    },
    {
      "epoch": 1.4334214271193333,
      "grad_norm": 0.09851626306772232,
      "learning_rate": 5.670669651943823e-05,
      "loss": 0.8825,
      "step": 3526
    },
    {
      "epoch": 1.4338280138239479,
      "grad_norm": 0.10658843070268631,
      "learning_rate": 5.666598819458579e-05,
      "loss": 0.9659,
      "step": 3527
    },
    {
      "epoch": 1.4342346005285627,
      "grad_norm": 0.10467347502708435,
      "learning_rate": 5.662527986973336e-05,
      "loss": 1.0181,
      "step": 3528
    },
    {
      "epoch": 1.4346411872331775,
      "grad_norm": 0.09940101206302643,
      "learning_rate": 5.658457154488093e-05,
      "loss": 0.9297,
      "step": 3529
    },
    {
      "epoch": 1.4350477739377923,
      "grad_norm": 0.10660876333713531,
      "learning_rate": 5.65438632200285e-05,
      "loss": 0.9767,
      "step": 3530
    },
    {
      "epoch": 1.435454360642407,
      "grad_norm": 0.10834870487451553,
      "learning_rate": 5.650315489517607e-05,
      "loss": 0.9989,
      "step": 3531
    },
    {
      "epoch": 1.4358609473470216,
      "grad_norm": 0.11498194932937622,
      "learning_rate": 5.646244657032364e-05,
      "loss": 1.062,
      "step": 3532
    },
    {
      "epoch": 1.4362675340516364,
      "grad_norm": 0.10452236235141754,
      "learning_rate": 5.64217382454712e-05,
      "loss": 0.9455,
      "step": 3533
    },
    {
      "epoch": 1.4366741207562512,
      "grad_norm": 0.10281018912792206,
      "learning_rate": 5.6381029920618766e-05,
      "loss": 1.0519,
      "step": 3534
    },
    {
      "epoch": 1.437080707460866,
      "grad_norm": 0.1045062243938446,
      "learning_rate": 5.634032159576633e-05,
      "loss": 0.9285,
      "step": 3535
    },
    {
      "epoch": 1.4374872941654808,
      "grad_norm": 0.09464049339294434,
      "learning_rate": 5.629961327091391e-05,
      "loss": 0.8502,
      "step": 3536
    },
    {
      "epoch": 1.4378938808700956,
      "grad_norm": 0.11245562881231308,
      "learning_rate": 5.6258904946061475e-05,
      "loss": 1.0127,
      "step": 3537
    },
    {
      "epoch": 1.4383004675747104,
      "grad_norm": 0.10821034759283066,
      "learning_rate": 5.621819662120904e-05,
      "loss": 0.9578,
      "step": 3538
    },
    {
      "epoch": 1.438707054279325,
      "grad_norm": 0.09722383320331573,
      "learning_rate": 5.6177488296356604e-05,
      "loss": 0.9295,
      "step": 3539
    },
    {
      "epoch": 1.4391136409839398,
      "grad_norm": 0.09911882877349854,
      "learning_rate": 5.613677997150417e-05,
      "loss": 0.8467,
      "step": 3540
    },
    {
      "epoch": 1.4395202276885546,
      "grad_norm": 0.0992516502737999,
      "learning_rate": 5.609607164665174e-05,
      "loss": 0.8095,
      "step": 3541
    },
    {
      "epoch": 1.4399268143931694,
      "grad_norm": 0.10205196589231491,
      "learning_rate": 5.6055363321799313e-05,
      "loss": 0.8391,
      "step": 3542
    },
    {
      "epoch": 1.4403334010977842,
      "grad_norm": 0.10054369270801544,
      "learning_rate": 5.601465499694688e-05,
      "loss": 0.8823,
      "step": 3543
    },
    {
      "epoch": 1.4407399878023988,
      "grad_norm": 0.10473167896270752,
      "learning_rate": 5.597394667209445e-05,
      "loss": 1.0052,
      "step": 3544
    },
    {
      "epoch": 1.4411465745070136,
      "grad_norm": 0.1048002541065216,
      "learning_rate": 5.593323834724201e-05,
      "loss": 0.9106,
      "step": 3545
    },
    {
      "epoch": 1.4415531612116284,
      "grad_norm": 0.10600331425666809,
      "learning_rate": 5.589253002238958e-05,
      "loss": 1.0016,
      "step": 3546
    },
    {
      "epoch": 1.4419597479162432,
      "grad_norm": 0.10743583738803864,
      "learning_rate": 5.585182169753715e-05,
      "loss": 1.0009,
      "step": 3547
    },
    {
      "epoch": 1.442366334620858,
      "grad_norm": 0.10742181539535522,
      "learning_rate": 5.581111337268472e-05,
      "loss": 0.861,
      "step": 3548
    },
    {
      "epoch": 1.4427729213254725,
      "grad_norm": 0.10677099227905273,
      "learning_rate": 5.577040504783229e-05,
      "loss": 1.0,
      "step": 3549
    },
    {
      "epoch": 1.4431795080300875,
      "grad_norm": 0.10537279397249222,
      "learning_rate": 5.5729696722979854e-05,
      "loss": 0.9087,
      "step": 3550
    },
    {
      "epoch": 1.4435860947347021,
      "grad_norm": 0.11347132921218872,
      "learning_rate": 5.5688988398127415e-05,
      "loss": 1.0444,
      "step": 3551
    },
    {
      "epoch": 1.443992681439317,
      "grad_norm": 0.10851515829563141,
      "learning_rate": 5.564828007327498e-05,
      "loss": 0.9637,
      "step": 3552
    },
    {
      "epoch": 1.4443992681439317,
      "grad_norm": 0.11019739508628845,
      "learning_rate": 5.560757174842256e-05,
      "loss": 1.0358,
      "step": 3553
    },
    {
      "epoch": 1.4448058548485465,
      "grad_norm": 0.10217945277690887,
      "learning_rate": 5.5566863423570125e-05,
      "loss": 0.8874,
      "step": 3554
    },
    {
      "epoch": 1.4452124415531613,
      "grad_norm": 0.099341981112957,
      "learning_rate": 5.552615509871769e-05,
      "loss": 0.8806,
      "step": 3555
    },
    {
      "epoch": 1.4456190282577759,
      "grad_norm": 0.11381018906831741,
      "learning_rate": 5.548544677386526e-05,
      "loss": 1.0111,
      "step": 3556
    },
    {
      "epoch": 1.4460256149623907,
      "grad_norm": 0.10798200964927673,
      "learning_rate": 5.544473844901282e-05,
      "loss": 1.0352,
      "step": 3557
    },
    {
      "epoch": 1.4464322016670055,
      "grad_norm": 0.10731607675552368,
      "learning_rate": 5.540403012416039e-05,
      "loss": 0.8602,
      "step": 3558
    },
    {
      "epoch": 1.4468387883716203,
      "grad_norm": 0.10364881157875061,
      "learning_rate": 5.536332179930796e-05,
      "loss": 0.9294,
      "step": 3559
    },
    {
      "epoch": 1.447245375076235,
      "grad_norm": 0.10474424064159393,
      "learning_rate": 5.532261347445553e-05,
      "loss": 0.922,
      "step": 3560
    },
    {
      "epoch": 1.4476519617808496,
      "grad_norm": 0.0995747447013855,
      "learning_rate": 5.52819051496031e-05,
      "loss": 0.9506,
      "step": 3561
    },
    {
      "epoch": 1.4480585484854644,
      "grad_norm": 0.11012274771928787,
      "learning_rate": 5.5241196824750666e-05,
      "loss": 1.0621,
      "step": 3562
    },
    {
      "epoch": 1.4484651351900792,
      "grad_norm": 0.10382426530122757,
      "learning_rate": 5.520048849989823e-05,
      "loss": 0.8777,
      "step": 3563
    },
    {
      "epoch": 1.448871721894694,
      "grad_norm": 0.11155311018228531,
      "learning_rate": 5.5159780175045795e-05,
      "loss": 0.9709,
      "step": 3564
    },
    {
      "epoch": 1.4492783085993088,
      "grad_norm": 0.11111366748809814,
      "learning_rate": 5.511907185019337e-05,
      "loss": 0.9665,
      "step": 3565
    },
    {
      "epoch": 1.4496848953039236,
      "grad_norm": 0.09758581221103668,
      "learning_rate": 5.507836352534094e-05,
      "loss": 0.8338,
      "step": 3566
    },
    {
      "epoch": 1.4500914820085384,
      "grad_norm": 0.09697642177343369,
      "learning_rate": 5.5037655200488504e-05,
      "loss": 0.8714,
      "step": 3567
    },
    {
      "epoch": 1.450498068713153,
      "grad_norm": 0.11022147536277771,
      "learning_rate": 5.4996946875636065e-05,
      "loss": 1.0589,
      "step": 3568
    },
    {
      "epoch": 1.4509046554177678,
      "grad_norm": 0.10376861691474915,
      "learning_rate": 5.495623855078363e-05,
      "loss": 0.9349,
      "step": 3569
    },
    {
      "epoch": 1.4513112421223826,
      "grad_norm": 0.11290717124938965,
      "learning_rate": 5.4915530225931214e-05,
      "loss": 0.9621,
      "step": 3570
    },
    {
      "epoch": 1.4517178288269974,
      "grad_norm": 0.11141788959503174,
      "learning_rate": 5.4874821901078775e-05,
      "loss": 0.9851,
      "step": 3571
    },
    {
      "epoch": 1.4521244155316122,
      "grad_norm": 0.097492516040802,
      "learning_rate": 5.483411357622634e-05,
      "loss": 0.8546,
      "step": 3572
    },
    {
      "epoch": 1.4525310022362268,
      "grad_norm": 0.1105538010597229,
      "learning_rate": 5.479340525137391e-05,
      "loss": 0.9972,
      "step": 3573
    },
    {
      "epoch": 1.4529375889408416,
      "grad_norm": 0.11950520426034927,
      "learning_rate": 5.475269692652147e-05,
      "loss": 1.0069,
      "step": 3574
    },
    {
      "epoch": 1.4533441756454564,
      "grad_norm": 0.11691899597644806,
      "learning_rate": 5.471198860166904e-05,
      "loss": 1.1141,
      "step": 3575
    },
    {
      "epoch": 1.4537507623500712,
      "grad_norm": 0.10801886022090912,
      "learning_rate": 5.467128027681662e-05,
      "loss": 0.9687,
      "step": 3576
    },
    {
      "epoch": 1.454157349054686,
      "grad_norm": 0.10855500400066376,
      "learning_rate": 5.463057195196418e-05,
      "loss": 0.9704,
      "step": 3577
    },
    {
      "epoch": 1.4545639357593005,
      "grad_norm": 0.10491110384464264,
      "learning_rate": 5.458986362711175e-05,
      "loss": 0.8492,
      "step": 3578
    },
    {
      "epoch": 1.4549705224639156,
      "grad_norm": 0.10783471167087555,
      "learning_rate": 5.4549155302259316e-05,
      "loss": 0.955,
      "step": 3579
    },
    {
      "epoch": 1.4553771091685301,
      "grad_norm": 0.10471615940332413,
      "learning_rate": 5.450844697740688e-05,
      "loss": 0.9144,
      "step": 3580
    },
    {
      "epoch": 1.455783695873145,
      "grad_norm": 0.10220612585544586,
      "learning_rate": 5.4467738652554444e-05,
      "loss": 0.9811,
      "step": 3581
    },
    {
      "epoch": 1.4561902825777597,
      "grad_norm": 0.10163000226020813,
      "learning_rate": 5.442703032770202e-05,
      "loss": 0.9597,
      "step": 3582
    },
    {
      "epoch": 1.4565968692823745,
      "grad_norm": 0.11535109579563141,
      "learning_rate": 5.4386322002849586e-05,
      "loss": 1.0492,
      "step": 3583
    },
    {
      "epoch": 1.4570034559869893,
      "grad_norm": 0.11015263944864273,
      "learning_rate": 5.4345613677997154e-05,
      "loss": 1.0299,
      "step": 3584
    },
    {
      "epoch": 1.457410042691604,
      "grad_norm": 0.10480646044015884,
      "learning_rate": 5.430490535314472e-05,
      "loss": 1.0163,
      "step": 3585
    },
    {
      "epoch": 1.4578166293962187,
      "grad_norm": 0.09951364248991013,
      "learning_rate": 5.426419702829228e-05,
      "loss": 0.8642,
      "step": 3586
    },
    {
      "epoch": 1.4582232161008335,
      "grad_norm": 0.10405910015106201,
      "learning_rate": 5.422348870343985e-05,
      "loss": 0.9905,
      "step": 3587
    },
    {
      "epoch": 1.4586298028054483,
      "grad_norm": 0.10927630215883255,
      "learning_rate": 5.4182780378587425e-05,
      "loss": 0.9738,
      "step": 3588
    },
    {
      "epoch": 1.459036389510063,
      "grad_norm": 0.10846424102783203,
      "learning_rate": 5.414207205373499e-05,
      "loss": 0.9662,
      "step": 3589
    },
    {
      "epoch": 1.4594429762146777,
      "grad_norm": 0.1038946732878685,
      "learning_rate": 5.410136372888256e-05,
      "loss": 0.9247,
      "step": 3590
    },
    {
      "epoch": 1.4598495629192925,
      "grad_norm": 0.10503431409597397,
      "learning_rate": 5.406065540403013e-05,
      "loss": 0.9135,
      "step": 3591
    },
    {
      "epoch": 1.4602561496239073,
      "grad_norm": 0.11139222979545593,
      "learning_rate": 5.401994707917769e-05,
      "loss": 1.0107,
      "step": 3592
    },
    {
      "epoch": 1.460662736328522,
      "grad_norm": 0.1007532998919487,
      "learning_rate": 5.397923875432527e-05,
      "loss": 0.8869,
      "step": 3593
    },
    {
      "epoch": 1.4610693230331369,
      "grad_norm": 0.10141917318105698,
      "learning_rate": 5.393853042947283e-05,
      "loss": 0.9378,
      "step": 3594
    },
    {
      "epoch": 1.4614759097377517,
      "grad_norm": 0.10145898908376694,
      "learning_rate": 5.38978221046204e-05,
      "loss": 0.9691,
      "step": 3595
    },
    {
      "epoch": 1.4618824964423665,
      "grad_norm": 0.11464249342679977,
      "learning_rate": 5.3857113779767966e-05,
      "loss": 1.0785,
      "step": 3596
    },
    {
      "epoch": 1.462289083146981,
      "grad_norm": 0.11114252358675003,
      "learning_rate": 5.381640545491553e-05,
      "loss": 1.0755,
      "step": 3597
    },
    {
      "epoch": 1.4626956698515958,
      "grad_norm": 0.10882499814033508,
      "learning_rate": 5.3775697130063094e-05,
      "loss": 0.9431,
      "step": 3598
    },
    {
      "epoch": 1.4631022565562106,
      "grad_norm": 0.09124311059713364,
      "learning_rate": 5.3734988805210675e-05,
      "loss": 0.8333,
      "step": 3599
    },
    {
      "epoch": 1.4635088432608254,
      "grad_norm": 0.1018030196428299,
      "learning_rate": 5.3694280480358236e-05,
      "loss": 0.965,
      "step": 3600
    },
    {
      "epoch": 1.4639154299654402,
      "grad_norm": 0.10035334527492523,
      "learning_rate": 5.3653572155505804e-05,
      "loss": 0.7887,
      "step": 3601
    },
    {
      "epoch": 1.4643220166700548,
      "grad_norm": 0.1025049090385437,
      "learning_rate": 5.361286383065337e-05,
      "loss": 0.981,
      "step": 3602
    },
    {
      "epoch": 1.4647286033746696,
      "grad_norm": 0.09964943677186966,
      "learning_rate": 5.357215550580094e-05,
      "loss": 0.795,
      "step": 3603
    },
    {
      "epoch": 1.4651351900792844,
      "grad_norm": 0.10191620141267776,
      "learning_rate": 5.35314471809485e-05,
      "loss": 0.9007,
      "step": 3604
    },
    {
      "epoch": 1.4655417767838992,
      "grad_norm": 0.10153241455554962,
      "learning_rate": 5.349073885609608e-05,
      "loss": 0.9305,
      "step": 3605
    },
    {
      "epoch": 1.465948363488514,
      "grad_norm": 0.09318286180496216,
      "learning_rate": 5.345003053124364e-05,
      "loss": 0.8347,
      "step": 3606
    },
    {
      "epoch": 1.4663549501931286,
      "grad_norm": 0.10995296388864517,
      "learning_rate": 5.340932220639121e-05,
      "loss": 0.9451,
      "step": 3607
    },
    {
      "epoch": 1.4667615368977436,
      "grad_norm": 0.10251913964748383,
      "learning_rate": 5.336861388153878e-05,
      "loss": 0.9004,
      "step": 3608
    },
    {
      "epoch": 1.4671681236023582,
      "grad_norm": 0.10498196631669998,
      "learning_rate": 5.3327905556686345e-05,
      "loss": 0.9679,
      "step": 3609
    },
    {
      "epoch": 1.467574710306973,
      "grad_norm": 0.09520924836397171,
      "learning_rate": 5.3287197231833906e-05,
      "loss": 0.8822,
      "step": 3610
    },
    {
      "epoch": 1.4679812970115877,
      "grad_norm": 0.10204574465751648,
      "learning_rate": 5.324648890698149e-05,
      "loss": 0.9217,
      "step": 3611
    },
    {
      "epoch": 1.4683878837162025,
      "grad_norm": 0.11443029344081879,
      "learning_rate": 5.320578058212905e-05,
      "loss": 1.0609,
      "step": 3612
    },
    {
      "epoch": 1.4687944704208173,
      "grad_norm": 0.10830609500408173,
      "learning_rate": 5.3165072257276615e-05,
      "loss": 1.0313,
      "step": 3613
    },
    {
      "epoch": 1.469201057125432,
      "grad_norm": 0.10037508606910706,
      "learning_rate": 5.312436393242418e-05,
      "loss": 0.9576,
      "step": 3614
    },
    {
      "epoch": 1.4696076438300467,
      "grad_norm": 0.09800178557634354,
      "learning_rate": 5.3083655607571744e-05,
      "loss": 0.8664,
      "step": 3615
    },
    {
      "epoch": 1.4700142305346615,
      "grad_norm": 0.10950475931167603,
      "learning_rate": 5.3042947282719325e-05,
      "loss": 0.9401,
      "step": 3616
    },
    {
      "epoch": 1.4704208172392763,
      "grad_norm": 0.10776437073945999,
      "learning_rate": 5.300223895786689e-05,
      "loss": 0.8952,
      "step": 3617
    },
    {
      "epoch": 1.470827403943891,
      "grad_norm": 0.10562727600336075,
      "learning_rate": 5.2961530633014454e-05,
      "loss": 0.9192,
      "step": 3618
    },
    {
      "epoch": 1.4712339906485057,
      "grad_norm": 0.10100565105676651,
      "learning_rate": 5.292082230816202e-05,
      "loss": 0.9039,
      "step": 3619
    },
    {
      "epoch": 1.4716405773531205,
      "grad_norm": 0.10758239030838013,
      "learning_rate": 5.288011398330959e-05,
      "loss": 0.8568,
      "step": 3620
    },
    {
      "epoch": 1.4720471640577353,
      "grad_norm": 0.10886979103088379,
      "learning_rate": 5.283940565845715e-05,
      "loss": 0.9095,
      "step": 3621
    },
    {
      "epoch": 1.47245375076235,
      "grad_norm": 0.10650348663330078,
      "learning_rate": 5.279869733360473e-05,
      "loss": 0.9933,
      "step": 3622
    },
    {
      "epoch": 1.4728603374669649,
      "grad_norm": 0.1114429384469986,
      "learning_rate": 5.27579890087523e-05,
      "loss": 1.014,
      "step": 3623
    },
    {
      "epoch": 1.4732669241715795,
      "grad_norm": 0.10653100162744522,
      "learning_rate": 5.271728068389986e-05,
      "loss": 0.9635,
      "step": 3624
    },
    {
      "epoch": 1.4736735108761945,
      "grad_norm": 0.09530437737703323,
      "learning_rate": 5.267657235904743e-05,
      "loss": 0.8877,
      "step": 3625
    },
    {
      "epoch": 1.474080097580809,
      "grad_norm": 0.10414159297943115,
      "learning_rate": 5.2635864034194995e-05,
      "loss": 0.9762,
      "step": 3626
    },
    {
      "epoch": 1.4744866842854238,
      "grad_norm": 0.1114436537027359,
      "learning_rate": 5.2595155709342556e-05,
      "loss": 1.0022,
      "step": 3627
    },
    {
      "epoch": 1.4748932709900386,
      "grad_norm": 0.1078386902809143,
      "learning_rate": 5.255444738449014e-05,
      "loss": 0.9903,
      "step": 3628
    },
    {
      "epoch": 1.4752998576946534,
      "grad_norm": 0.11050703376531601,
      "learning_rate": 5.2513739059637704e-05,
      "loss": 1.0857,
      "step": 3629
    },
    {
      "epoch": 1.4757064443992682,
      "grad_norm": 0.10695330053567886,
      "learning_rate": 5.2473030734785265e-05,
      "loss": 0.9716,
      "step": 3630
    },
    {
      "epoch": 1.4761130311038828,
      "grad_norm": 0.0953662171959877,
      "learning_rate": 5.243232240993283e-05,
      "loss": 0.908,
      "step": 3631
    },
    {
      "epoch": 1.4765196178084976,
      "grad_norm": 0.10216762125492096,
      "learning_rate": 5.23916140850804e-05,
      "loss": 0.8609,
      "step": 3632
    },
    {
      "epoch": 1.4769262045131124,
      "grad_norm": 0.10386509448289871,
      "learning_rate": 5.235090576022796e-05,
      "loss": 0.929,
      "step": 3633
    },
    {
      "epoch": 1.4773327912177272,
      "grad_norm": 0.11064155399799347,
      "learning_rate": 5.231019743537554e-05,
      "loss": 0.96,
      "step": 3634
    },
    {
      "epoch": 1.477739377922342,
      "grad_norm": 0.09775776416063309,
      "learning_rate": 5.2269489110523103e-05,
      "loss": 0.9248,
      "step": 3635
    },
    {
      "epoch": 1.4781459646269566,
      "grad_norm": 0.10205373913049698,
      "learning_rate": 5.222878078567067e-05,
      "loss": 0.9947,
      "step": 3636
    },
    {
      "epoch": 1.4785525513315716,
      "grad_norm": 0.10354917496442795,
      "learning_rate": 5.218807246081824e-05,
      "loss": 0.93,
      "step": 3637
    },
    {
      "epoch": 1.4789591380361862,
      "grad_norm": 0.11006239056587219,
      "learning_rate": 5.2147364135965806e-05,
      "loss": 0.9533,
      "step": 3638
    },
    {
      "epoch": 1.479365724740801,
      "grad_norm": 0.10362465679645538,
      "learning_rate": 5.210665581111338e-05,
      "loss": 0.8871,
      "step": 3639
    },
    {
      "epoch": 1.4797723114454158,
      "grad_norm": 0.103641577064991,
      "learning_rate": 5.206594748626095e-05,
      "loss": 0.9655,
      "step": 3640
    },
    {
      "epoch": 1.4801788981500306,
      "grad_norm": 0.10225971043109894,
      "learning_rate": 5.202523916140851e-05,
      "loss": 0.9341,
      "step": 3641
    },
    {
      "epoch": 1.4805854848546454,
      "grad_norm": 0.10774664580821991,
      "learning_rate": 5.198453083655608e-05,
      "loss": 0.8913,
      "step": 3642
    },
    {
      "epoch": 1.48099207155926,
      "grad_norm": 0.10669755935668945,
      "learning_rate": 5.1943822511703644e-05,
      "loss": 0.9488,
      "step": 3643
    },
    {
      "epoch": 1.4813986582638747,
      "grad_norm": 0.10005049407482147,
      "learning_rate": 5.190311418685121e-05,
      "loss": 0.8892,
      "step": 3644
    },
    {
      "epoch": 1.4818052449684895,
      "grad_norm": 0.09732303023338318,
      "learning_rate": 5.1862405861998787e-05,
      "loss": 0.8575,
      "step": 3645
    },
    {
      "epoch": 1.4822118316731043,
      "grad_norm": 0.10400817543268204,
      "learning_rate": 5.1821697537146354e-05,
      "loss": 0.9641,
      "step": 3646
    },
    {
      "epoch": 1.4826184183777191,
      "grad_norm": 0.09669985622167587,
      "learning_rate": 5.1780989212293915e-05,
      "loss": 0.8836,
      "step": 3647
    },
    {
      "epoch": 1.4830250050823337,
      "grad_norm": 0.0959673672914505,
      "learning_rate": 5.174028088744148e-05,
      "loss": 0.8678,
      "step": 3648
    },
    {
      "epoch": 1.4834315917869485,
      "grad_norm": 0.10482336580753326,
      "learning_rate": 5.169957256258905e-05,
      "loss": 0.94,
      "step": 3649
    },
    {
      "epoch": 1.4838381784915633,
      "grad_norm": 0.10262557864189148,
      "learning_rate": 5.165886423773662e-05,
      "loss": 0.9732,
      "step": 3650
    },
    {
      "epoch": 1.484244765196178,
      "grad_norm": 0.09837047010660172,
      "learning_rate": 5.161815591288419e-05,
      "loss": 0.9231,
      "step": 3651
    },
    {
      "epoch": 1.484651351900793,
      "grad_norm": 0.09526870399713516,
      "learning_rate": 5.157744758803176e-05,
      "loss": 0.8501,
      "step": 3652
    },
    {
      "epoch": 1.4850579386054075,
      "grad_norm": 0.10179829597473145,
      "learning_rate": 5.153673926317932e-05,
      "loss": 0.9574,
      "step": 3653
    },
    {
      "epoch": 1.4854645253100225,
      "grad_norm": 0.09894520044326782,
      "learning_rate": 5.149603093832689e-05,
      "loss": 0.9625,
      "step": 3654
    },
    {
      "epoch": 1.485871112014637,
      "grad_norm": 0.1013031080365181,
      "learning_rate": 5.1455322613474456e-05,
      "loss": 0.8199,
      "step": 3655
    },
    {
      "epoch": 1.4862776987192519,
      "grad_norm": 0.11384668201208115,
      "learning_rate": 5.1414614288622024e-05,
      "loss": 1.0381,
      "step": 3656
    },
    {
      "epoch": 1.4866842854238667,
      "grad_norm": 0.10072863847017288,
      "learning_rate": 5.13739059637696e-05,
      "loss": 0.9349,
      "step": 3657
    },
    {
      "epoch": 1.4870908721284815,
      "grad_norm": 0.10364335775375366,
      "learning_rate": 5.1333197638917166e-05,
      "loss": 0.8993,
      "step": 3658
    },
    {
      "epoch": 1.4874974588330963,
      "grad_norm": 0.09980635344982147,
      "learning_rate": 5.129248931406473e-05,
      "loss": 0.8615,
      "step": 3659
    },
    {
      "epoch": 1.4879040455377108,
      "grad_norm": 0.1187555268406868,
      "learning_rate": 5.1251780989212294e-05,
      "loss": 1.0562,
      "step": 3660
    },
    {
      "epoch": 1.4883106322423256,
      "grad_norm": 0.10120145976543427,
      "learning_rate": 5.121107266435986e-05,
      "loss": 0.9573,
      "step": 3661
    },
    {
      "epoch": 1.4887172189469404,
      "grad_norm": 0.10651153326034546,
      "learning_rate": 5.1170364339507436e-05,
      "loss": 1.0343,
      "step": 3662
    },
    {
      "epoch": 1.4891238056515552,
      "grad_norm": 0.10437972843647003,
      "learning_rate": 5.1129656014655004e-05,
      "loss": 0.9506,
      "step": 3663
    },
    {
      "epoch": 1.48953039235617,
      "grad_norm": 0.12389584630727768,
      "learning_rate": 5.108894768980257e-05,
      "loss": 1.0083,
      "step": 3664
    },
    {
      "epoch": 1.4899369790607846,
      "grad_norm": 0.12323293089866638,
      "learning_rate": 5.104823936495013e-05,
      "loss": 1.0145,
      "step": 3665
    },
    {
      "epoch": 1.4903435657653994,
      "grad_norm": 0.10193384438753128,
      "learning_rate": 5.10075310400977e-05,
      "loss": 0.9636,
      "step": 3666
    },
    {
      "epoch": 1.4907501524700142,
      "grad_norm": 0.11072493344545364,
      "learning_rate": 5.096682271524527e-05,
      "loss": 0.9733,
      "step": 3667
    },
    {
      "epoch": 1.491156739174629,
      "grad_norm": 0.11082090437412262,
      "learning_rate": 5.092611439039284e-05,
      "loss": 0.9273,
      "step": 3668
    },
    {
      "epoch": 1.4915633258792438,
      "grad_norm": 0.11326603591442108,
      "learning_rate": 5.088540606554041e-05,
      "loss": 1.0009,
      "step": 3669
    },
    {
      "epoch": 1.4919699125838586,
      "grad_norm": 0.10686499625444412,
      "learning_rate": 5.084469774068798e-05,
      "loss": 0.9802,
      "step": 3670
    },
    {
      "epoch": 1.4923764992884734,
      "grad_norm": 0.10559657961130142,
      "learning_rate": 5.080398941583554e-05,
      "loss": 0.933,
      "step": 3671
    },
    {
      "epoch": 1.492783085993088,
      "grad_norm": 0.11181288212537766,
      "learning_rate": 5.0763281090983106e-05,
      "loss": 1.0625,
      "step": 3672
    },
    {
      "epoch": 1.4931896726977028,
      "grad_norm": 0.11131290346384048,
      "learning_rate": 5.0722572766130673e-05,
      "loss": 1.0343,
      "step": 3673
    },
    {
      "epoch": 1.4935962594023175,
      "grad_norm": 0.1017846018075943,
      "learning_rate": 5.068186444127825e-05,
      "loss": 0.9214,
      "step": 3674
    },
    {
      "epoch": 1.4940028461069323,
      "grad_norm": 0.10858796536922455,
      "learning_rate": 5.0641156116425816e-05,
      "loss": 0.976,
      "step": 3675
    },
    {
      "epoch": 1.4944094328115471,
      "grad_norm": 0.10481224209070206,
      "learning_rate": 5.060044779157338e-05,
      "loss": 1.0067,
      "step": 3676
    },
    {
      "epoch": 1.4948160195161617,
      "grad_norm": 0.10739448666572571,
      "learning_rate": 5.0559739466720944e-05,
      "loss": 0.9278,
      "step": 3677
    },
    {
      "epoch": 1.4952226062207765,
      "grad_norm": 0.10282362252473831,
      "learning_rate": 5.051903114186851e-05,
      "loss": 0.8945,
      "step": 3678
    },
    {
      "epoch": 1.4956291929253913,
      "grad_norm": 0.10915033519268036,
      "learning_rate": 5.047832281701608e-05,
      "loss": 1.0137,
      "step": 3679
    },
    {
      "epoch": 1.4960357796300061,
      "grad_norm": 0.10232996195554733,
      "learning_rate": 5.0437614492163654e-05,
      "loss": 0.9135,
      "step": 3680
    },
    {
      "epoch": 1.496442366334621,
      "grad_norm": 0.10414308309555054,
      "learning_rate": 5.039690616731122e-05,
      "loss": 0.9884,
      "step": 3681
    },
    {
      "epoch": 1.4968489530392355,
      "grad_norm": 0.09622911363840103,
      "learning_rate": 5.035619784245878e-05,
      "loss": 0.9511,
      "step": 3682
    },
    {
      "epoch": 1.4972555397438505,
      "grad_norm": 0.10247783362865448,
      "learning_rate": 5.031548951760635e-05,
      "loss": 0.89,
      "step": 3683
    },
    {
      "epoch": 1.497662126448465,
      "grad_norm": 0.09925010800361633,
      "learning_rate": 5.027478119275392e-05,
      "loss": 0.9652,
      "step": 3684
    },
    {
      "epoch": 1.4980687131530799,
      "grad_norm": 0.10200038552284241,
      "learning_rate": 5.023407286790149e-05,
      "loss": 0.9097,
      "step": 3685
    },
    {
      "epoch": 1.4984752998576947,
      "grad_norm": 0.11495770514011383,
      "learning_rate": 5.019336454304906e-05,
      "loss": 1.0503,
      "step": 3686
    },
    {
      "epoch": 1.4988818865623095,
      "grad_norm": 0.10580781102180481,
      "learning_rate": 5.015265621819663e-05,
      "loss": 0.8902,
      "step": 3687
    },
    {
      "epoch": 1.4992884732669243,
      "grad_norm": 0.10454212874174118,
      "learning_rate": 5.011194789334419e-05,
      "loss": 0.8997,
      "step": 3688
    },
    {
      "epoch": 1.4996950599715388,
      "grad_norm": 0.09890579432249069,
      "learning_rate": 5.0071239568491756e-05,
      "loss": 0.9537,
      "step": 3689
    },
    {
      "epoch": 1.5001016466761536,
      "grad_norm": 0.10192213952541351,
      "learning_rate": 5.003053124363932e-05,
      "loss": 0.912,
      "step": 3690
    },
    {
      "epoch": 1.5005082333807684,
      "grad_norm": 0.10794500261545181,
      "learning_rate": 4.998982291878689e-05,
      "loss": 0.9215,
      "step": 3691
    },
    {
      "epoch": 1.5009148200853832,
      "grad_norm": 0.10989861935377121,
      "learning_rate": 4.994911459393446e-05,
      "loss": 0.9642,
      "step": 3692
    },
    {
      "epoch": 1.501321406789998,
      "grad_norm": 0.11163085699081421,
      "learning_rate": 4.990840626908203e-05,
      "loss": 0.9854,
      "step": 3693
    },
    {
      "epoch": 1.5017279934946126,
      "grad_norm": 0.0962003841996193,
      "learning_rate": 4.9867697944229594e-05,
      "loss": 0.843,
      "step": 3694
    },
    {
      "epoch": 1.5021345801992276,
      "grad_norm": 0.10547157377004623,
      "learning_rate": 4.982698961937716e-05,
      "loss": 0.8982,
      "step": 3695
    },
    {
      "epoch": 1.5025411669038422,
      "grad_norm": 0.11247014254331589,
      "learning_rate": 4.9786281294524736e-05,
      "loss": 1.0197,
      "step": 3696
    },
    {
      "epoch": 1.502947753608457,
      "grad_norm": 0.11089324206113815,
      "learning_rate": 4.97455729696723e-05,
      "loss": 1.0034,
      "step": 3697
    },
    {
      "epoch": 1.5033543403130718,
      "grad_norm": 0.11227573454380035,
      "learning_rate": 4.970486464481987e-05,
      "loss": 1.0355,
      "step": 3698
    },
    {
      "epoch": 1.5037609270176864,
      "grad_norm": 0.09788957238197327,
      "learning_rate": 4.966415631996744e-05,
      "loss": 0.8086,
      "step": 3699
    },
    {
      "epoch": 1.5041675137223014,
      "grad_norm": 0.106124147772789,
      "learning_rate": 4.9623447995115e-05,
      "loss": 0.9087,
      "step": 3700
    },
    {
      "epoch": 1.504574100426916,
      "grad_norm": 0.10806267708539963,
      "learning_rate": 4.9582739670262574e-05,
      "loss": 0.8783,
      "step": 3701
    },
    {
      "epoch": 1.5049806871315308,
      "grad_norm": 0.10819346457719803,
      "learning_rate": 4.954203134541014e-05,
      "loss": 0.9358,
      "step": 3702
    },
    {
      "epoch": 1.5053872738361456,
      "grad_norm": 0.10454476624727249,
      "learning_rate": 4.95013230205577e-05,
      "loss": 0.9136,
      "step": 3703
    },
    {
      "epoch": 1.5057938605407604,
      "grad_norm": 0.09978950023651123,
      "learning_rate": 4.946061469570528e-05,
      "loss": 0.9259,
      "step": 3704
    },
    {
      "epoch": 1.5062004472453752,
      "grad_norm": 0.10548686236143112,
      "learning_rate": 4.9419906370852845e-05,
      "loss": 1.0147,
      "step": 3705
    },
    {
      "epoch": 1.5066070339499897,
      "grad_norm": 0.09171932935714722,
      "learning_rate": 4.9379198046000405e-05,
      "loss": 0.8624,
      "step": 3706
    },
    {
      "epoch": 1.5070136206546048,
      "grad_norm": 0.10809264332056046,
      "learning_rate": 4.933848972114798e-05,
      "loss": 0.9103,
      "step": 3707
    },
    {
      "epoch": 1.5074202073592193,
      "grad_norm": 0.09878364950418472,
      "learning_rate": 4.929778139629555e-05,
      "loss": 0.8816,
      "step": 3708
    },
    {
      "epoch": 1.5078267940638341,
      "grad_norm": 0.10659473389387131,
      "learning_rate": 4.925707307144311e-05,
      "loss": 0.9861,
      "step": 3709
    },
    {
      "epoch": 1.508233380768449,
      "grad_norm": 0.11606935411691666,
      "learning_rate": 4.921636474659068e-05,
      "loss": 1.0614,
      "step": 3710
    },
    {
      "epoch": 1.5086399674730635,
      "grad_norm": 0.09988582134246826,
      "learning_rate": 4.917565642173825e-05,
      "loss": 0.856,
      "step": 3711
    },
    {
      "epoch": 1.5090465541776785,
      "grad_norm": 0.10419981181621552,
      "learning_rate": 4.913494809688581e-05,
      "loss": 0.9892,
      "step": 3712
    },
    {
      "epoch": 1.509453140882293,
      "grad_norm": 0.10400033742189407,
      "learning_rate": 4.9094239772033386e-05,
      "loss": 0.9907,
      "step": 3713
    },
    {
      "epoch": 1.509859727586908,
      "grad_norm": 0.10473748296499252,
      "learning_rate": 4.905353144718095e-05,
      "loss": 0.9011,
      "step": 3714
    },
    {
      "epoch": 1.5102663142915227,
      "grad_norm": 0.10208045691251755,
      "learning_rate": 4.9012823122328514e-05,
      "loss": 0.8276,
      "step": 3715
    },
    {
      "epoch": 1.5106729009961373,
      "grad_norm": 0.10542485117912292,
      "learning_rate": 4.897211479747609e-05,
      "loss": 1.0149,
      "step": 3716
    },
    {
      "epoch": 1.5110794877007523,
      "grad_norm": 0.10585687309503555,
      "learning_rate": 4.8931406472623656e-05,
      "loss": 0.9461,
      "step": 3717
    },
    {
      "epoch": 1.5114860744053669,
      "grad_norm": 0.10241574048995972,
      "learning_rate": 4.889069814777122e-05,
      "loss": 0.868,
      "step": 3718
    },
    {
      "epoch": 1.5118926611099817,
      "grad_norm": 0.10915213078260422,
      "learning_rate": 4.884998982291879e-05,
      "loss": 1.0097,
      "step": 3719
    },
    {
      "epoch": 1.5122992478145965,
      "grad_norm": 0.09911471605300903,
      "learning_rate": 4.880928149806636e-05,
      "loss": 0.8691,
      "step": 3720
    },
    {
      "epoch": 1.5127058345192113,
      "grad_norm": 0.10516642034053802,
      "learning_rate": 4.876857317321393e-05,
      "loss": 0.9422,
      "step": 3721
    },
    {
      "epoch": 1.513112421223826,
      "grad_norm": 0.10460437089204788,
      "learning_rate": 4.8727864848361494e-05,
      "loss": 0.9295,
      "step": 3722
    },
    {
      "epoch": 1.5135190079284406,
      "grad_norm": 0.10897176712751389,
      "learning_rate": 4.868715652350906e-05,
      "loss": 0.9497,
      "step": 3723
    },
    {
      "epoch": 1.5139255946330556,
      "grad_norm": 0.11495667695999146,
      "learning_rate": 4.864644819865663e-05,
      "loss": 0.9758,
      "step": 3724
    },
    {
      "epoch": 1.5143321813376702,
      "grad_norm": 0.1012863963842392,
      "learning_rate": 4.86057398738042e-05,
      "loss": 0.852,
      "step": 3725
    },
    {
      "epoch": 1.514738768042285,
      "grad_norm": 0.09959638863801956,
      "learning_rate": 4.8565031548951765e-05,
      "loss": 0.8484,
      "step": 3726
    },
    {
      "epoch": 1.5151453547468998,
      "grad_norm": 0.10632819682359695,
      "learning_rate": 4.852432322409933e-05,
      "loss": 0.9661,
      "step": 3727
    },
    {
      "epoch": 1.5155519414515144,
      "grad_norm": 0.11513801664113998,
      "learning_rate": 4.84836148992469e-05,
      "loss": 1.0592,
      "step": 3728
    },
    {
      "epoch": 1.5159585281561294,
      "grad_norm": 0.10134799033403397,
      "learning_rate": 4.844290657439446e-05,
      "loss": 0.9568,
      "step": 3729
    },
    {
      "epoch": 1.516365114860744,
      "grad_norm": 0.11659684777259827,
      "learning_rate": 4.8402198249542035e-05,
      "loss": 1.0208,
      "step": 3730
    },
    {
      "epoch": 1.5167717015653588,
      "grad_norm": 0.1074221208691597,
      "learning_rate": 4.83614899246896e-05,
      "loss": 0.8979,
      "step": 3731
    },
    {
      "epoch": 1.5171782882699736,
      "grad_norm": 0.10155625641345978,
      "learning_rate": 4.8320781599837164e-05,
      "loss": 0.8938,
      "step": 3732
    },
    {
      "epoch": 1.5175848749745884,
      "grad_norm": 0.11608471721410751,
      "learning_rate": 4.828007327498474e-05,
      "loss": 1.1216,
      "step": 3733
    },
    {
      "epoch": 1.5179914616792032,
      "grad_norm": 0.09947482496500015,
      "learning_rate": 4.8239364950132306e-05,
      "loss": 0.8629,
      "step": 3734
    },
    {
      "epoch": 1.5183980483838178,
      "grad_norm": 0.10868632793426514,
      "learning_rate": 4.819865662527987e-05,
      "loss": 0.9978,
      "step": 3735
    },
    {
      "epoch": 1.5188046350884326,
      "grad_norm": 7.418512344360352,
      "learning_rate": 4.815794830042744e-05,
      "loss": 0.953,
      "step": 3736
    },
    {
      "epoch": 1.5192112217930474,
      "grad_norm": 0.1094009056687355,
      "learning_rate": 4.811723997557501e-05,
      "loss": 0.9911,
      "step": 3737
    },
    {
      "epoch": 1.5196178084976621,
      "grad_norm": 0.10767845809459686,
      "learning_rate": 4.807653165072257e-05,
      "loss": 0.987,
      "step": 3738
    },
    {
      "epoch": 1.520024395202277,
      "grad_norm": 0.10403701663017273,
      "learning_rate": 4.8035823325870144e-05,
      "loss": 0.9136,
      "step": 3739
    },
    {
      "epoch": 1.5204309819068915,
      "grad_norm": 0.10876458138227463,
      "learning_rate": 4.799511500101771e-05,
      "loss": 0.9448,
      "step": 3740
    },
    {
      "epoch": 1.5208375686115065,
      "grad_norm": 0.09987885504961014,
      "learning_rate": 4.795440667616527e-05,
      "loss": 0.9005,
      "step": 3741
    },
    {
      "epoch": 1.5212441553161211,
      "grad_norm": 0.10714446008205414,
      "learning_rate": 4.791369835131285e-05,
      "loss": 0.9782,
      "step": 3742
    },
    {
      "epoch": 1.521650742020736,
      "grad_norm": 0.10182036459445953,
      "learning_rate": 4.7872990026460415e-05,
      "loss": 0.8578,
      "step": 3743
    },
    {
      "epoch": 1.5220573287253507,
      "grad_norm": 0.1074320524930954,
      "learning_rate": 4.783228170160798e-05,
      "loss": 0.9118,
      "step": 3744
    },
    {
      "epoch": 1.5224639154299653,
      "grad_norm": 0.11088522523641586,
      "learning_rate": 4.779157337675555e-05,
      "loss": 1.0281,
      "step": 3745
    },
    {
      "epoch": 1.5228705021345803,
      "grad_norm": 0.10585159808397293,
      "learning_rate": 4.775086505190312e-05,
      "loss": 0.9213,
      "step": 3746
    },
    {
      "epoch": 1.5232770888391949,
      "grad_norm": 0.10831379890441895,
      "learning_rate": 4.7710156727050685e-05,
      "loss": 0.9331,
      "step": 3747
    },
    {
      "epoch": 1.5236836755438097,
      "grad_norm": 0.09734898805618286,
      "learning_rate": 4.766944840219825e-05,
      "loss": 0.8925,
      "step": 3748
    },
    {
      "epoch": 1.5240902622484245,
      "grad_norm": 0.10137004405260086,
      "learning_rate": 4.762874007734582e-05,
      "loss": 0.9447,
      "step": 3749
    },
    {
      "epoch": 1.5244968489530393,
      "grad_norm": 0.1120719313621521,
      "learning_rate": 4.758803175249339e-05,
      "loss": 1.1214,
      "step": 3750
    },
    {
      "epoch": 1.524903435657654,
      "grad_norm": 0.11059883236885071,
      "learning_rate": 4.7547323427640956e-05,
      "loss": 0.9733,
      "step": 3751
    },
    {
      "epoch": 1.5253100223622686,
      "grad_norm": 0.10553129762411118,
      "learning_rate": 4.750661510278852e-05,
      "loss": 0.8984,
      "step": 3752
    },
    {
      "epoch": 1.5257166090668837,
      "grad_norm": 0.09985724091529846,
      "learning_rate": 4.746590677793609e-05,
      "loss": 0.8949,
      "step": 3753
    },
    {
      "epoch": 1.5261231957714982,
      "grad_norm": 0.09900239109992981,
      "learning_rate": 4.742519845308366e-05,
      "loss": 0.8907,
      "step": 3754
    },
    {
      "epoch": 1.526529782476113,
      "grad_norm": 0.10111631453037262,
      "learning_rate": 4.7384490128231226e-05,
      "loss": 0.9111,
      "step": 3755
    },
    {
      "epoch": 1.5269363691807278,
      "grad_norm": 0.10198728740215302,
      "learning_rate": 4.7343781803378794e-05,
      "loss": 0.9014,
      "step": 3756
    },
    {
      "epoch": 1.5273429558853424,
      "grad_norm": 0.10502500087022781,
      "learning_rate": 4.730307347852636e-05,
      "loss": 0.9398,
      "step": 3757
    },
    {
      "epoch": 1.5277495425899574,
      "grad_norm": 0.10820775479078293,
      "learning_rate": 4.726236515367393e-05,
      "loss": 0.9785,
      "step": 3758
    },
    {
      "epoch": 1.528156129294572,
      "grad_norm": 0.09791271388530731,
      "learning_rate": 4.72216568288215e-05,
      "loss": 0.8864,
      "step": 3759
    },
    {
      "epoch": 1.5285627159991868,
      "grad_norm": 0.10859858244657516,
      "learning_rate": 4.7180948503969064e-05,
      "loss": 0.9726,
      "step": 3760
    },
    {
      "epoch": 1.5289693027038016,
      "grad_norm": 0.11191640049219131,
      "learning_rate": 4.714024017911663e-05,
      "loss": 1.018,
      "step": 3761
    },
    {
      "epoch": 1.5293758894084164,
      "grad_norm": 0.10120069235563278,
      "learning_rate": 4.70995318542642e-05,
      "loss": 0.8942,
      "step": 3762
    },
    {
      "epoch": 1.5297824761130312,
      "grad_norm": 0.09827437251806259,
      "learning_rate": 4.705882352941177e-05,
      "loss": 0.8239,
      "step": 3763
    },
    {
      "epoch": 1.5301890628176458,
      "grad_norm": 0.11061054468154907,
      "learning_rate": 4.7018115204559335e-05,
      "loss": 1.0035,
      "step": 3764
    },
    {
      "epoch": 1.5305956495222606,
      "grad_norm": 0.11817970126867294,
      "learning_rate": 4.69774068797069e-05,
      "loss": 1.0296,
      "step": 3765
    },
    {
      "epoch": 1.5310022362268754,
      "grad_norm": 0.10169284790754318,
      "learning_rate": 4.693669855485447e-05,
      "loss": 0.9097,
      "step": 3766
    },
    {
      "epoch": 1.5314088229314902,
      "grad_norm": 0.11417925357818604,
      "learning_rate": 4.689599023000204e-05,
      "loss": 0.9863,
      "step": 3767
    },
    {
      "epoch": 1.531815409636105,
      "grad_norm": 0.11385629326105118,
      "learning_rate": 4.6855281905149606e-05,
      "loss": 1.0187,
      "step": 3768
    },
    {
      "epoch": 1.5322219963407195,
      "grad_norm": 0.10632526874542236,
      "learning_rate": 4.681457358029717e-05,
      "loss": 1.0233,
      "step": 3769
    },
    {
      "epoch": 1.5326285830453346,
      "grad_norm": 0.1070982813835144,
      "learning_rate": 4.677386525544474e-05,
      "loss": 0.9944,
      "step": 3770
    },
    {
      "epoch": 1.5330351697499491,
      "grad_norm": 0.10576360672712326,
      "learning_rate": 4.673315693059231e-05,
      "loss": 0.8884,
      "step": 3771
    },
    {
      "epoch": 1.533441756454564,
      "grad_norm": 0.11129205673933029,
      "learning_rate": 4.6692448605739876e-05,
      "loss": 0.9304,
      "step": 3772
    },
    {
      "epoch": 1.5338483431591787,
      "grad_norm": 0.10366874188184738,
      "learning_rate": 4.6651740280887444e-05,
      "loss": 0.8997,
      "step": 3773
    },
    {
      "epoch": 1.5342549298637933,
      "grad_norm": 0.10038387775421143,
      "learning_rate": 4.661103195603501e-05,
      "loss": 0.9118,
      "step": 3774
    },
    {
      "epoch": 1.5346615165684083,
      "grad_norm": 0.1056869626045227,
      "learning_rate": 4.657032363118258e-05,
      "loss": 0.9021,
      "step": 3775
    },
    {
      "epoch": 1.535068103273023,
      "grad_norm": 0.10934474319219589,
      "learning_rate": 4.6529615306330147e-05,
      "loss": 0.9975,
      "step": 3776
    },
    {
      "epoch": 1.5354746899776377,
      "grad_norm": 0.1077047809958458,
      "learning_rate": 4.6488906981477714e-05,
      "loss": 1.0119,
      "step": 3777
    },
    {
      "epoch": 1.5358812766822525,
      "grad_norm": 0.10552367568016052,
      "learning_rate": 4.644819865662528e-05,
      "loss": 0.8493,
      "step": 3778
    },
    {
      "epoch": 1.5362878633868673,
      "grad_norm": 0.09804526716470718,
      "learning_rate": 4.640749033177285e-05,
      "loss": 0.8906,
      "step": 3779
    },
    {
      "epoch": 1.536694450091482,
      "grad_norm": 0.10530523955821991,
      "learning_rate": 4.636678200692042e-05,
      "loss": 0.8358,
      "step": 3780
    },
    {
      "epoch": 1.5371010367960967,
      "grad_norm": 0.10684414952993393,
      "learning_rate": 4.6326073682067985e-05,
      "loss": 0.9865,
      "step": 3781
    },
    {
      "epoch": 1.5375076235007117,
      "grad_norm": 0.1129271611571312,
      "learning_rate": 4.628536535721555e-05,
      "loss": 1.0377,
      "step": 3782
    },
    {
      "epoch": 1.5379142102053263,
      "grad_norm": 0.10309012234210968,
      "learning_rate": 4.624465703236312e-05,
      "loss": 0.9689,
      "step": 3783
    },
    {
      "epoch": 1.538320796909941,
      "grad_norm": 0.10697636753320694,
      "learning_rate": 4.620394870751069e-05,
      "loss": 0.9462,
      "step": 3784
    },
    {
      "epoch": 1.5387273836145559,
      "grad_norm": 0.11021671444177628,
      "learning_rate": 4.6163240382658255e-05,
      "loss": 0.9503,
      "step": 3785
    },
    {
      "epoch": 1.5391339703191704,
      "grad_norm": 0.10349755734205246,
      "learning_rate": 4.612253205780582e-05,
      "loss": 0.9063,
      "step": 3786
    },
    {
      "epoch": 1.5395405570237854,
      "grad_norm": 0.10515953600406647,
      "learning_rate": 4.608182373295339e-05,
      "loss": 0.8909,
      "step": 3787
    },
    {
      "epoch": 1.5399471437284,
      "grad_norm": 0.10489808022975922,
      "learning_rate": 4.604111540810096e-05,
      "loss": 0.9039,
      "step": 3788
    },
    {
      "epoch": 1.5403537304330148,
      "grad_norm": 0.10147853195667267,
      "learning_rate": 4.6000407083248526e-05,
      "loss": 0.8947,
      "step": 3789
    },
    {
      "epoch": 1.5407603171376296,
      "grad_norm": 0.09851264208555222,
      "learning_rate": 4.59596987583961e-05,
      "loss": 0.8955,
      "step": 3790
    },
    {
      "epoch": 1.5411669038422442,
      "grad_norm": 0.10256364941596985,
      "learning_rate": 4.591899043354366e-05,
      "loss": 0.9723,
      "step": 3791
    },
    {
      "epoch": 1.5415734905468592,
      "grad_norm": 0.09893185645341873,
      "learning_rate": 4.587828210869123e-05,
      "loss": 0.8901,
      "step": 3792
    },
    {
      "epoch": 1.5419800772514738,
      "grad_norm": 0.1070183515548706,
      "learning_rate": 4.5837573783838796e-05,
      "loss": 0.9648,
      "step": 3793
    },
    {
      "epoch": 1.5423866639560886,
      "grad_norm": 0.10921451449394226,
      "learning_rate": 4.5796865458986364e-05,
      "loss": 0.926,
      "step": 3794
    },
    {
      "epoch": 1.5427932506607034,
      "grad_norm": 0.10185564309358597,
      "learning_rate": 4.575615713413393e-05,
      "loss": 0.9776,
      "step": 3795
    },
    {
      "epoch": 1.5431998373653182,
      "grad_norm": 0.11901550740003586,
      "learning_rate": 4.57154488092815e-05,
      "loss": 0.9979,
      "step": 3796
    },
    {
      "epoch": 1.543606424069933,
      "grad_norm": 0.10940925031900406,
      "learning_rate": 4.567474048442907e-05,
      "loss": 0.9565,
      "step": 3797
    },
    {
      "epoch": 1.5440130107745476,
      "grad_norm": 0.10260502994060516,
      "learning_rate": 4.5634032159576635e-05,
      "loss": 0.8529,
      "step": 3798
    },
    {
      "epoch": 1.5444195974791626,
      "grad_norm": 0.10641606152057648,
      "learning_rate": 4.55933238347242e-05,
      "loss": 0.9345,
      "step": 3799
    },
    {
      "epoch": 1.5448261841837772,
      "grad_norm": 0.09509759396314621,
      "learning_rate": 4.555261550987177e-05,
      "loss": 0.8873,
      "step": 3800
    },
    {
      "epoch": 1.545232770888392,
      "grad_norm": 0.10529722273349762,
      "learning_rate": 4.551190718501934e-05,
      "loss": 0.9447,
      "step": 3801
    },
    {
      "epoch": 1.5456393575930067,
      "grad_norm": 0.113713838160038,
      "learning_rate": 4.5471198860166905e-05,
      "loss": 0.9965,
      "step": 3802
    },
    {
      "epoch": 1.5460459442976213,
      "grad_norm": 0.11015161871910095,
      "learning_rate": 4.543049053531447e-05,
      "loss": 1.0089,
      "step": 3803
    },
    {
      "epoch": 1.5464525310022363,
      "grad_norm": 0.09919530153274536,
      "learning_rate": 4.538978221046204e-05,
      "loss": 0.885,
      "step": 3804
    },
    {
      "epoch": 1.546859117706851,
      "grad_norm": 0.10103622823953629,
      "learning_rate": 4.534907388560961e-05,
      "loss": 0.8922,
      "step": 3805
    },
    {
      "epoch": 1.5472657044114657,
      "grad_norm": 0.10861583799123764,
      "learning_rate": 4.5308365560757176e-05,
      "loss": 0.8814,
      "step": 3806
    },
    {
      "epoch": 1.5476722911160805,
      "grad_norm": 0.10311048477888107,
      "learning_rate": 4.526765723590474e-05,
      "loss": 0.8925,
      "step": 3807
    },
    {
      "epoch": 1.5480788778206953,
      "grad_norm": 0.10552438348531723,
      "learning_rate": 4.522694891105231e-05,
      "loss": 0.9121,
      "step": 3808
    },
    {
      "epoch": 1.54848546452531,
      "grad_norm": 0.1031796857714653,
      "learning_rate": 4.518624058619988e-05,
      "loss": 0.8877,
      "step": 3809
    },
    {
      "epoch": 1.5488920512299247,
      "grad_norm": 0.11424022167921066,
      "learning_rate": 4.5145532261347446e-05,
      "loss": 1.0865,
      "step": 3810
    },
    {
      "epoch": 1.5492986379345397,
      "grad_norm": 0.10076258331537247,
      "learning_rate": 4.5104823936495014e-05,
      "loss": 0.8801,
      "step": 3811
    },
    {
      "epoch": 1.5497052246391543,
      "grad_norm": 0.10160887986421585,
      "learning_rate": 4.506411561164258e-05,
      "loss": 0.9645,
      "step": 3812
    },
    {
      "epoch": 1.550111811343769,
      "grad_norm": 0.10750345140695572,
      "learning_rate": 4.5023407286790156e-05,
      "loss": 0.9595,
      "step": 3813
    },
    {
      "epoch": 1.5505183980483839,
      "grad_norm": 0.09937632828950882,
      "learning_rate": 4.498269896193772e-05,
      "loss": 0.9412,
      "step": 3814
    },
    {
      "epoch": 1.5509249847529984,
      "grad_norm": 0.1044396162033081,
      "learning_rate": 4.4941990637085284e-05,
      "loss": 0.9602,
      "step": 3815
    },
    {
      "epoch": 1.5513315714576135,
      "grad_norm": 0.09803607314825058,
      "learning_rate": 4.490128231223286e-05,
      "loss": 0.8488,
      "step": 3816
    },
    {
      "epoch": 1.551738158162228,
      "grad_norm": 0.09826266020536423,
      "learning_rate": 4.486057398738042e-05,
      "loss": 0.9429,
      "step": 3817
    },
    {
      "epoch": 1.5521447448668428,
      "grad_norm": 0.10339567065238953,
      "learning_rate": 4.481986566252799e-05,
      "loss": 0.9106,
      "step": 3818
    },
    {
      "epoch": 1.5525513315714576,
      "grad_norm": 0.10419493913650513,
      "learning_rate": 4.477915733767556e-05,
      "loss": 0.935,
      "step": 3819
    },
    {
      "epoch": 1.5529579182760722,
      "grad_norm": 0.10235986858606339,
      "learning_rate": 4.473844901282312e-05,
      "loss": 0.9237,
      "step": 3820
    },
    {
      "epoch": 1.5533645049806872,
      "grad_norm": 0.10505925863981247,
      "learning_rate": 4.469774068797069e-05,
      "loss": 0.9719,
      "step": 3821
    },
    {
      "epoch": 1.5537710916853018,
      "grad_norm": 0.1147008091211319,
      "learning_rate": 4.4657032363118265e-05,
      "loss": 0.9764,
      "step": 3822
    },
    {
      "epoch": 1.5541776783899166,
      "grad_norm": 0.10121449083089828,
      "learning_rate": 4.4616324038265825e-05,
      "loss": 0.8847,
      "step": 3823
    },
    {
      "epoch": 1.5545842650945314,
      "grad_norm": 0.10162410885095596,
      "learning_rate": 4.457561571341339e-05,
      "loss": 0.9766,
      "step": 3824
    },
    {
      "epoch": 1.5549908517991462,
      "grad_norm": 0.10154290497303009,
      "learning_rate": 4.453490738856097e-05,
      "loss": 0.9663,
      "step": 3825
    },
    {
      "epoch": 1.555397438503761,
      "grad_norm": 0.10372976958751678,
      "learning_rate": 4.449419906370853e-05,
      "loss": 0.9369,
      "step": 3826
    },
    {
      "epoch": 1.5558040252083756,
      "grad_norm": 0.10194465517997742,
      "learning_rate": 4.4453490738856096e-05,
      "loss": 0.9677,
      "step": 3827
    },
    {
      "epoch": 1.5562106119129906,
      "grad_norm": 0.11616487801074982,
      "learning_rate": 4.441278241400367e-05,
      "loss": 0.9678,
      "step": 3828
    },
    {
      "epoch": 1.5566171986176052,
      "grad_norm": 0.0968397706747055,
      "learning_rate": 4.437207408915123e-05,
      "loss": 0.8619,
      "step": 3829
    },
    {
      "epoch": 1.55702378532222,
      "grad_norm": 0.10480852425098419,
      "learning_rate": 4.43313657642988e-05,
      "loss": 0.9306,
      "step": 3830
    },
    {
      "epoch": 1.5574303720268348,
      "grad_norm": 0.099884994328022,
      "learning_rate": 4.429065743944637e-05,
      "loss": 0.8609,
      "step": 3831
    },
    {
      "epoch": 1.5578369587314493,
      "grad_norm": 0.10909198969602585,
      "learning_rate": 4.4249949114593934e-05,
      "loss": 0.9682,
      "step": 3832
    },
    {
      "epoch": 1.5582435454360644,
      "grad_norm": 0.10736821591854095,
      "learning_rate": 4.42092407897415e-05,
      "loss": 0.9483,
      "step": 3833
    },
    {
      "epoch": 1.558650132140679,
      "grad_norm": 0.10474716871976852,
      "learning_rate": 4.4168532464889076e-05,
      "loss": 0.9911,
      "step": 3834
    },
    {
      "epoch": 1.5590567188452937,
      "grad_norm": 0.09613660722970963,
      "learning_rate": 4.412782414003664e-05,
      "loss": 0.8299,
      "step": 3835
    },
    {
      "epoch": 1.5594633055499085,
      "grad_norm": 0.11056198924779892,
      "learning_rate": 4.4087115815184205e-05,
      "loss": 0.9418,
      "step": 3836
    },
    {
      "epoch": 1.5598698922545233,
      "grad_norm": 0.11200756579637527,
      "learning_rate": 4.404640749033178e-05,
      "loss": 1.0011,
      "step": 3837
    },
    {
      "epoch": 1.5602764789591381,
      "grad_norm": 0.10487156361341476,
      "learning_rate": 4.400569916547934e-05,
      "loss": 0.9965,
      "step": 3838
    },
    {
      "epoch": 1.5606830656637527,
      "grad_norm": 0.12069255858659744,
      "learning_rate": 4.3964990840626914e-05,
      "loss": 1.0753,
      "step": 3839
    },
    {
      "epoch": 1.5610896523683677,
      "grad_norm": 0.09854745864868164,
      "learning_rate": 4.392428251577448e-05,
      "loss": 0.8222,
      "step": 3840
    },
    {
      "epoch": 1.5614962390729823,
      "grad_norm": 0.10288074612617493,
      "learning_rate": 4.388357419092204e-05,
      "loss": 0.9101,
      "step": 3841
    },
    {
      "epoch": 1.561902825777597,
      "grad_norm": 0.11022932827472687,
      "learning_rate": 4.384286586606962e-05,
      "loss": 0.9908,
      "step": 3842
    },
    {
      "epoch": 1.562309412482212,
      "grad_norm": 0.10680433362722397,
      "learning_rate": 4.380215754121718e-05,
      "loss": 1.0098,
      "step": 3843
    },
    {
      "epoch": 1.5627159991868265,
      "grad_norm": 0.10699717700481415,
      "learning_rate": 4.3761449216364746e-05,
      "loss": 1.0427,
      "step": 3844
    },
    {
      "epoch": 1.5631225858914415,
      "grad_norm": 0.11625601351261139,
      "learning_rate": 4.372074089151232e-05,
      "loss": 1.0313,
      "step": 3845
    },
    {
      "epoch": 1.563529172596056,
      "grad_norm": 0.1125851646065712,
      "learning_rate": 4.368003256665988e-05,
      "loss": 0.9602,
      "step": 3846
    },
    {
      "epoch": 1.5639357593006709,
      "grad_norm": 0.09537433832883835,
      "learning_rate": 4.363932424180745e-05,
      "loss": 0.8338,
      "step": 3847
    },
    {
      "epoch": 1.5643423460052857,
      "grad_norm": 0.10118943452835083,
      "learning_rate": 4.359861591695502e-05,
      "loss": 0.8858,
      "step": 3848
    },
    {
      "epoch": 1.5647489327099002,
      "grad_norm": 0.12094996869564056,
      "learning_rate": 4.3557907592102584e-05,
      "loss": 1.107,
      "step": 3849
    },
    {
      "epoch": 1.5651555194145153,
      "grad_norm": 0.11586831510066986,
      "learning_rate": 4.351719926725015e-05,
      "loss": 0.9132,
      "step": 3850
    },
    {
      "epoch": 1.5655621061191298,
      "grad_norm": 0.10483038425445557,
      "learning_rate": 4.3476490942397726e-05,
      "loss": 0.8894,
      "step": 3851
    },
    {
      "epoch": 1.5659686928237446,
      "grad_norm": 0.10702569782733917,
      "learning_rate": 4.343578261754529e-05,
      "loss": 0.9354,
      "step": 3852
    },
    {
      "epoch": 1.5663752795283594,
      "grad_norm": 0.09847092628479004,
      "learning_rate": 4.3395074292692854e-05,
      "loss": 0.8192,
      "step": 3853
    },
    {
      "epoch": 1.5667818662329742,
      "grad_norm": 0.11142577975988388,
      "learning_rate": 4.335436596784043e-05,
      "loss": 1.0106,
      "step": 3854
    },
    {
      "epoch": 1.567188452937589,
      "grad_norm": 0.11825080960988998,
      "learning_rate": 4.331365764298799e-05,
      "loss": 1.0841,
      "step": 3855
    },
    {
      "epoch": 1.5675950396422036,
      "grad_norm": 0.10718253254890442,
      "learning_rate": 4.327294931813556e-05,
      "loss": 0.9786,
      "step": 3856
    },
    {
      "epoch": 1.5680016263468186,
      "grad_norm": 0.10958700627088547,
      "learning_rate": 4.323224099328313e-05,
      "loss": 1.0322,
      "step": 3857
    },
    {
      "epoch": 1.5684082130514332,
      "grad_norm": 0.09295843541622162,
      "learning_rate": 4.319153266843069e-05,
      "loss": 0.8317,
      "step": 3858
    },
    {
      "epoch": 1.568814799756048,
      "grad_norm": 0.11297357827425003,
      "learning_rate": 4.315082434357826e-05,
      "loss": 1.0802,
      "step": 3859
    },
    {
      "epoch": 1.5692213864606628,
      "grad_norm": 0.11033076047897339,
      "learning_rate": 4.3110116018725835e-05,
      "loss": 0.9561,
      "step": 3860
    },
    {
      "epoch": 1.5696279731652774,
      "grad_norm": 0.11169704794883728,
      "learning_rate": 4.3069407693873396e-05,
      "loss": 1.0622,
      "step": 3861
    },
    {
      "epoch": 1.5700345598698924,
      "grad_norm": 0.10404906421899796,
      "learning_rate": 4.302869936902097e-05,
      "loss": 0.9427,
      "step": 3862
    },
    {
      "epoch": 1.570441146574507,
      "grad_norm": 0.10455071181058884,
      "learning_rate": 4.298799104416854e-05,
      "loss": 0.9227,
      "step": 3863
    },
    {
      "epoch": 1.5708477332791217,
      "grad_norm": 0.10467982292175293,
      "learning_rate": 4.29472827193161e-05,
      "loss": 0.9466,
      "step": 3864
    },
    {
      "epoch": 1.5712543199837365,
      "grad_norm": 0.11249608546495438,
      "learning_rate": 4.290657439446367e-05,
      "loss": 1.0259,
      "step": 3865
    },
    {
      "epoch": 1.5716609066883513,
      "grad_norm": 0.10060025006532669,
      "learning_rate": 4.286586606961124e-05,
      "loss": 0.866,
      "step": 3866
    },
    {
      "epoch": 1.5720674933929661,
      "grad_norm": 0.10907735675573349,
      "learning_rate": 4.28251577447588e-05,
      "loss": 0.9269,
      "step": 3867
    },
    {
      "epoch": 1.5724740800975807,
      "grad_norm": 0.10577044636011124,
      "learning_rate": 4.2784449419906376e-05,
      "loss": 0.9555,
      "step": 3868
    },
    {
      "epoch": 1.5728806668021955,
      "grad_norm": 0.09949744492769241,
      "learning_rate": 4.274374109505394e-05,
      "loss": 0.9662,
      "step": 3869
    },
    {
      "epoch": 1.5732872535068103,
      "grad_norm": 0.11094196140766144,
      "learning_rate": 4.2703032770201504e-05,
      "loss": 0.9078,
      "step": 3870
    },
    {
      "epoch": 1.573693840211425,
      "grad_norm": 0.1124429702758789,
      "learning_rate": 4.266232444534908e-05,
      "loss": 1.0132,
      "step": 3871
    },
    {
      "epoch": 1.57410042691604,
      "grad_norm": 0.10895200073719025,
      "learning_rate": 4.2621616120496646e-05,
      "loss": 0.9692,
      "step": 3872
    },
    {
      "epoch": 1.5745070136206545,
      "grad_norm": 0.10914913564920425,
      "learning_rate": 4.258090779564421e-05,
      "loss": 0.944,
      "step": 3873
    },
    {
      "epoch": 1.5749136003252695,
      "grad_norm": 0.10090178996324539,
      "learning_rate": 4.254019947079178e-05,
      "loss": 0.9228,
      "step": 3874
    },
    {
      "epoch": 1.575320187029884,
      "grad_norm": 0.10193730890750885,
      "learning_rate": 4.249949114593935e-05,
      "loss": 0.9152,
      "step": 3875
    },
    {
      "epoch": 1.5757267737344989,
      "grad_norm": 0.10624190419912338,
      "learning_rate": 4.245878282108691e-05,
      "loss": 0.9702,
      "step": 3876
    },
    {
      "epoch": 1.5761333604391137,
      "grad_norm": 0.11110203713178635,
      "learning_rate": 4.2418074496234484e-05,
      "loss": 0.9431,
      "step": 3877
    },
    {
      "epoch": 1.5765399471437282,
      "grad_norm": 0.10701338946819305,
      "learning_rate": 4.237736617138205e-05,
      "loss": 0.9673,
      "step": 3878
    },
    {
      "epoch": 1.5769465338483433,
      "grad_norm": 0.10539649426937103,
      "learning_rate": 4.233665784652961e-05,
      "loss": 0.9492,
      "step": 3879
    },
    {
      "epoch": 1.5773531205529578,
      "grad_norm": 0.10319969803094864,
      "learning_rate": 4.229594952167719e-05,
      "loss": 0.8976,
      "step": 3880
    },
    {
      "epoch": 1.5777597072575726,
      "grad_norm": 0.11089177429676056,
      "learning_rate": 4.2255241196824755e-05,
      "loss": 0.8858,
      "step": 3881
    },
    {
      "epoch": 1.5781662939621874,
      "grad_norm": 0.10143184661865234,
      "learning_rate": 4.2214532871972316e-05,
      "loss": 0.8669,
      "step": 3882
    },
    {
      "epoch": 1.5785728806668022,
      "grad_norm": 0.10318692028522491,
      "learning_rate": 4.217382454711989e-05,
      "loss": 0.8788,
      "step": 3883
    },
    {
      "epoch": 1.578979467371417,
      "grad_norm": 0.11176548898220062,
      "learning_rate": 4.213311622226746e-05,
      "loss": 0.9668,
      "step": 3884
    },
    {
      "epoch": 1.5793860540760316,
      "grad_norm": 0.10598577558994293,
      "learning_rate": 4.2092407897415026e-05,
      "loss": 0.9206,
      "step": 3885
    },
    {
      "epoch": 1.5797926407806466,
      "grad_norm": 0.10618636012077332,
      "learning_rate": 4.205169957256259e-05,
      "loss": 0.9956,
      "step": 3886
    },
    {
      "epoch": 1.5801992274852612,
      "grad_norm": 0.11371050029993057,
      "learning_rate": 4.201099124771016e-05,
      "loss": 1.0301,
      "step": 3887
    },
    {
      "epoch": 1.580605814189876,
      "grad_norm": 0.10739018023014069,
      "learning_rate": 4.197028292285773e-05,
      "loss": 0.9769,
      "step": 3888
    },
    {
      "epoch": 1.5810124008944908,
      "grad_norm": 0.10079289227724075,
      "learning_rate": 4.1929574598005296e-05,
      "loss": 0.9888,
      "step": 3889
    },
    {
      "epoch": 1.5814189875991054,
      "grad_norm": 0.10348872095346451,
      "learning_rate": 4.188886627315286e-05,
      "loss": 0.936,
      "step": 3890
    },
    {
      "epoch": 1.5818255743037204,
      "grad_norm": 0.10830751806497574,
      "learning_rate": 4.184815794830043e-05,
      "loss": 1.0448,
      "step": 3891
    },
    {
      "epoch": 1.582232161008335,
      "grad_norm": 0.10880016535520554,
      "learning_rate": 4.1807449623448e-05,
      "loss": 1.0058,
      "step": 3892
    },
    {
      "epoch": 1.5826387477129498,
      "grad_norm": 0.10958044230937958,
      "learning_rate": 4.176674129859556e-05,
      "loss": 0.9277,
      "step": 3893
    },
    {
      "epoch": 1.5830453344175646,
      "grad_norm": 0.09938797354698181,
      "learning_rate": 4.1726032973743134e-05,
      "loss": 0.8826,
      "step": 3894
    },
    {
      "epoch": 1.5834519211221791,
      "grad_norm": 0.0986451730132103,
      "learning_rate": 4.16853246488907e-05,
      "loss": 0.835,
      "step": 3895
    },
    {
      "epoch": 1.5838585078267942,
      "grad_norm": 0.10798802226781845,
      "learning_rate": 4.164461632403826e-05,
      "loss": 0.9923,
      "step": 3896
    },
    {
      "epoch": 1.5842650945314087,
      "grad_norm": 0.10113084316253662,
      "learning_rate": 4.160390799918584e-05,
      "loss": 0.9178,
      "step": 3897
    },
    {
      "epoch": 1.5846716812360235,
      "grad_norm": 0.09963071346282959,
      "learning_rate": 4.1563199674333405e-05,
      "loss": 0.8942,
      "step": 3898
    },
    {
      "epoch": 1.5850782679406383,
      "grad_norm": 0.0994904637336731,
      "learning_rate": 4.1522491349480966e-05,
      "loss": 0.8754,
      "step": 3899
    },
    {
      "epoch": 1.5854848546452531,
      "grad_norm": 0.09343326836824417,
      "learning_rate": 4.148178302462854e-05,
      "loss": 0.8143,
      "step": 3900
    },
    {
      "epoch": 1.585891441349868,
      "grad_norm": 0.11337709426879883,
      "learning_rate": 4.144107469977611e-05,
      "loss": 1.0421,
      "step": 3901
    },
    {
      "epoch": 1.5862980280544825,
      "grad_norm": 0.10667706280946732,
      "learning_rate": 4.140036637492367e-05,
      "loss": 0.9213,
      "step": 3902
    },
    {
      "epoch": 1.5867046147590975,
      "grad_norm": 0.09903930872678757,
      "learning_rate": 4.135965805007124e-05,
      "loss": 0.8786,
      "step": 3903
    },
    {
      "epoch": 1.587111201463712,
      "grad_norm": 0.10696469992399216,
      "learning_rate": 4.131894972521881e-05,
      "loss": 0.9659,
      "step": 3904
    },
    {
      "epoch": 1.587517788168327,
      "grad_norm": 0.10829825699329376,
      "learning_rate": 4.127824140036637e-05,
      "loss": 0.9465,
      "step": 3905
    },
    {
      "epoch": 1.5879243748729417,
      "grad_norm": 0.09446293860673904,
      "learning_rate": 4.1237533075513946e-05,
      "loss": 0.8023,
      "step": 3906
    },
    {
      "epoch": 1.5883309615775563,
      "grad_norm": 0.11186923086643219,
      "learning_rate": 4.1196824750661513e-05,
      "loss": 1.0979,
      "step": 3907
    },
    {
      "epoch": 1.5887375482821713,
      "grad_norm": 0.1068292185664177,
      "learning_rate": 4.115611642580908e-05,
      "loss": 0.9801,
      "step": 3908
    },
    {
      "epoch": 1.5891441349867859,
      "grad_norm": 0.11344651877880096,
      "learning_rate": 4.111540810095665e-05,
      "loss": 1.0279,
      "step": 3909
    },
    {
      "epoch": 1.5895507216914007,
      "grad_norm": 0.11451148241758347,
      "learning_rate": 4.1074699776104216e-05,
      "loss": 1.022,
      "step": 3910
    },
    {
      "epoch": 1.5899573083960155,
      "grad_norm": 0.10979126393795013,
      "learning_rate": 4.1033991451251784e-05,
      "loss": 1.003,
      "step": 3911
    },
    {
      "epoch": 1.5903638951006303,
      "grad_norm": 0.10487376898527145,
      "learning_rate": 4.099328312639935e-05,
      "loss": 0.9479,
      "step": 3912
    },
    {
      "epoch": 1.590770481805245,
      "grad_norm": 0.10622530430555344,
      "learning_rate": 4.095257480154692e-05,
      "loss": 0.9565,
      "step": 3913
    },
    {
      "epoch": 1.5911770685098596,
      "grad_norm": 0.11741635948419571,
      "learning_rate": 4.091186647669449e-05,
      "loss": 1.0789,
      "step": 3914
    },
    {
      "epoch": 1.5915836552144746,
      "grad_norm": 0.11563640832901001,
      "learning_rate": 4.0871158151842055e-05,
      "loss": 0.9594,
      "step": 3915
    },
    {
      "epoch": 1.5919902419190892,
      "grad_norm": 0.09948024898767471,
      "learning_rate": 4.083044982698962e-05,
      "loss": 0.9188,
      "step": 3916
    },
    {
      "epoch": 1.592396828623704,
      "grad_norm": 0.10058055073022842,
      "learning_rate": 4.078974150213719e-05,
      "loss": 0.88,
      "step": 3917
    },
    {
      "epoch": 1.5928034153283188,
      "grad_norm": 0.11370790004730225,
      "learning_rate": 4.074903317728476e-05,
      "loss": 1.0117,
      "step": 3918
    },
    {
      "epoch": 1.5932100020329334,
      "grad_norm": 0.1042017936706543,
      "learning_rate": 4.0708324852432325e-05,
      "loss": 0.9712,
      "step": 3919
    },
    {
      "epoch": 1.5936165887375484,
      "grad_norm": 0.10406166315078735,
      "learning_rate": 4.066761652757989e-05,
      "loss": 0.9896,
      "step": 3920
    },
    {
      "epoch": 1.594023175442163,
      "grad_norm": 0.10339275002479553,
      "learning_rate": 4.062690820272746e-05,
      "loss": 0.9653,
      "step": 3921
    },
    {
      "epoch": 1.5944297621467778,
      "grad_norm": 0.1000717282295227,
      "learning_rate": 4.058619987787503e-05,
      "loss": 0.8748,
      "step": 3922
    },
    {
      "epoch": 1.5948363488513926,
      "grad_norm": 0.1118224561214447,
      "learning_rate": 4.0545491553022596e-05,
      "loss": 0.9712,
      "step": 3923
    },
    {
      "epoch": 1.5952429355560072,
      "grad_norm": 0.11152620613574982,
      "learning_rate": 4.050478322817016e-05,
      "loss": 0.9215,
      "step": 3924
    },
    {
      "epoch": 1.5956495222606222,
      "grad_norm": 0.11174870282411575,
      "learning_rate": 4.046407490331773e-05,
      "loss": 1.021,
      "step": 3925
    },
    {
      "epoch": 1.5960561089652368,
      "grad_norm": 0.11456409096717834,
      "learning_rate": 4.04233665784653e-05,
      "loss": 0.9776,
      "step": 3926
    },
    {
      "epoch": 1.5964626956698516,
      "grad_norm": 0.10741414874792099,
      "learning_rate": 4.0382658253612866e-05,
      "loss": 0.9605,
      "step": 3927
    },
    {
      "epoch": 1.5968692823744663,
      "grad_norm": 0.09653212875127792,
      "learning_rate": 4.0341949928760434e-05,
      "loss": 0.9439,
      "step": 3928
    },
    {
      "epoch": 1.5972758690790811,
      "grad_norm": 0.10057616978883743,
      "learning_rate": 4.0301241603908e-05,
      "loss": 0.9304,
      "step": 3929
    },
    {
      "epoch": 1.597682455783696,
      "grad_norm": 0.10348949581384659,
      "learning_rate": 4.026053327905557e-05,
      "loss": 0.9121,
      "step": 3930
    },
    {
      "epoch": 1.5980890424883105,
      "grad_norm": 0.11406022310256958,
      "learning_rate": 4.021982495420314e-05,
      "loss": 1.0656,
      "step": 3931
    },
    {
      "epoch": 1.5984956291929255,
      "grad_norm": 0.10392772406339645,
      "learning_rate": 4.0179116629350704e-05,
      "loss": 0.8859,
      "step": 3932
    },
    {
      "epoch": 1.5989022158975401,
      "grad_norm": 0.10656527429819107,
      "learning_rate": 4.013840830449827e-05,
      "loss": 0.9224,
      "step": 3933
    },
    {
      "epoch": 1.599308802602155,
      "grad_norm": 0.11588657647371292,
      "learning_rate": 4.009769997964584e-05,
      "loss": 1.0287,
      "step": 3934
    },
    {
      "epoch": 1.5997153893067697,
      "grad_norm": 0.11138034611940384,
      "learning_rate": 4.005699165479341e-05,
      "loss": 1.1117,
      "step": 3935
    },
    {
      "epoch": 1.6001219760113843,
      "grad_norm": 0.10900641232728958,
      "learning_rate": 4.0016283329940975e-05,
      "loss": 0.9072,
      "step": 3936
    },
    {
      "epoch": 1.6005285627159993,
      "grad_norm": 0.10668104141950607,
      "learning_rate": 3.997557500508854e-05,
      "loss": 0.8867,
      "step": 3937
    },
    {
      "epoch": 1.6009351494206139,
      "grad_norm": 0.09906437993049622,
      "learning_rate": 3.993486668023611e-05,
      "loss": 0.863,
      "step": 3938
    },
    {
      "epoch": 1.6013417361252287,
      "grad_norm": 0.10709775984287262,
      "learning_rate": 3.989415835538368e-05,
      "loss": 0.9174,
      "step": 3939
    },
    {
      "epoch": 1.6017483228298435,
      "grad_norm": 0.10237723588943481,
      "learning_rate": 3.9853450030531245e-05,
      "loss": 0.9447,
      "step": 3940
    },
    {
      "epoch": 1.6021549095344583,
      "grad_norm": 0.09341628849506378,
      "learning_rate": 3.981274170567881e-05,
      "loss": 0.8767,
      "step": 3941
    },
    {
      "epoch": 1.602561496239073,
      "grad_norm": 0.09939193725585938,
      "learning_rate": 3.977203338082638e-05,
      "loss": 0.8718,
      "step": 3942
    },
    {
      "epoch": 1.6029680829436876,
      "grad_norm": 0.11073851585388184,
      "learning_rate": 3.973132505597395e-05,
      "loss": 0.9874,
      "step": 3943
    },
    {
      "epoch": 1.6033746696483027,
      "grad_norm": 0.10323592275381088,
      "learning_rate": 3.9690616731121516e-05,
      "loss": 0.8661,
      "step": 3944
    },
    {
      "epoch": 1.6037812563529172,
      "grad_norm": 0.1047651469707489,
      "learning_rate": 3.9649908406269084e-05,
      "loss": 0.8734,
      "step": 3945
    },
    {
      "epoch": 1.604187843057532,
      "grad_norm": 0.10787742584943771,
      "learning_rate": 3.960920008141665e-05,
      "loss": 0.9911,
      "step": 3946
    },
    {
      "epoch": 1.6045944297621468,
      "grad_norm": 0.10103908181190491,
      "learning_rate": 3.956849175656422e-05,
      "loss": 0.9259,
      "step": 3947
    },
    {
      "epoch": 1.6050010164667614,
      "grad_norm": 0.10705123096704483,
      "learning_rate": 3.9527783431711786e-05,
      "loss": 0.9721,
      "step": 3948
    },
    {
      "epoch": 1.6054076031713764,
      "grad_norm": 0.11182446777820587,
      "learning_rate": 3.9487075106859354e-05,
      "loss": 1.049,
      "step": 3949
    },
    {
      "epoch": 1.605814189875991,
      "grad_norm": 0.1003599539399147,
      "learning_rate": 3.944636678200692e-05,
      "loss": 0.9564,
      "step": 3950
    },
    {
      "epoch": 1.6062207765806058,
      "grad_norm": 0.10500559955835342,
      "learning_rate": 3.940565845715449e-05,
      "loss": 0.8966,
      "step": 3951
    },
    {
      "epoch": 1.6066273632852206,
      "grad_norm": 0.09841228276491165,
      "learning_rate": 3.936495013230206e-05,
      "loss": 0.847,
      "step": 3952
    },
    {
      "epoch": 1.6070339499898352,
      "grad_norm": 0.11207973212003708,
      "learning_rate": 3.9324241807449625e-05,
      "loss": 1.0527,
      "step": 3953
    },
    {
      "epoch": 1.6074405366944502,
      "grad_norm": 0.10874827206134796,
      "learning_rate": 3.92835334825972e-05,
      "loss": 0.9597,
      "step": 3954
    },
    {
      "epoch": 1.6078471233990648,
      "grad_norm": 0.1068238690495491,
      "learning_rate": 3.924282515774476e-05,
      "loss": 0.9619,
      "step": 3955
    },
    {
      "epoch": 1.6082537101036796,
      "grad_norm": 0.10551256686449051,
      "learning_rate": 3.920211683289233e-05,
      "loss": 0.9647,
      "step": 3956
    },
    {
      "epoch": 1.6086602968082944,
      "grad_norm": 0.1033085510134697,
      "learning_rate": 3.9161408508039895e-05,
      "loss": 0.9121,
      "step": 3957
    },
    {
      "epoch": 1.6090668835129092,
      "grad_norm": 0.11028590798377991,
      "learning_rate": 3.912070018318746e-05,
      "loss": 1.0249,
      "step": 3958
    },
    {
      "epoch": 1.609473470217524,
      "grad_norm": 0.10885387659072876,
      "learning_rate": 3.907999185833503e-05,
      "loss": 0.9739,
      "step": 3959
    },
    {
      "epoch": 1.6098800569221385,
      "grad_norm": 0.09786680340766907,
      "learning_rate": 3.90392835334826e-05,
      "loss": 0.8931,
      "step": 3960
    },
    {
      "epoch": 1.6102866436267536,
      "grad_norm": 0.09145115315914154,
      "learning_rate": 3.8998575208630166e-05,
      "loss": 0.8121,
      "step": 3961
    },
    {
      "epoch": 1.6106932303313681,
      "grad_norm": 0.09834929555654526,
      "learning_rate": 3.895786688377773e-05,
      "loss": 0.8897,
      "step": 3962
    },
    {
      "epoch": 1.611099817035983,
      "grad_norm": 0.10126276314258575,
      "learning_rate": 3.89171585589253e-05,
      "loss": 0.8728,
      "step": 3963
    },
    {
      "epoch": 1.6115064037405977,
      "grad_norm": 0.10853146016597748,
      "learning_rate": 3.887645023407287e-05,
      "loss": 0.9516,
      "step": 3964
    },
    {
      "epoch": 1.6119129904452123,
      "grad_norm": 0.10366170108318329,
      "learning_rate": 3.8835741909220436e-05,
      "loss": 0.8539,
      "step": 3965
    },
    {
      "epoch": 1.6123195771498273,
      "grad_norm": 0.1102977991104126,
      "learning_rate": 3.8795033584368004e-05,
      "loss": 1.0544,
      "step": 3966
    },
    {
      "epoch": 1.612726163854442,
      "grad_norm": 0.09886328876018524,
      "learning_rate": 3.875432525951557e-05,
      "loss": 0.8698,
      "step": 3967
    },
    {
      "epoch": 1.6131327505590567,
      "grad_norm": 0.11173603683710098,
      "learning_rate": 3.871361693466314e-05,
      "loss": 1.0237,
      "step": 3968
    },
    {
      "epoch": 1.6135393372636715,
      "grad_norm": 0.10802386701107025,
      "learning_rate": 3.867290860981071e-05,
      "loss": 1.0073,
      "step": 3969
    },
    {
      "epoch": 1.6139459239682863,
      "grad_norm": 0.09934094548225403,
      "learning_rate": 3.8632200284958274e-05,
      "loss": 0.9459,
      "step": 3970
    },
    {
      "epoch": 1.614352510672901,
      "grad_norm": 0.09900476038455963,
      "learning_rate": 3.859149196010584e-05,
      "loss": 0.9196,
      "step": 3971
    },
    {
      "epoch": 1.6147590973775157,
      "grad_norm": 0.09684500098228455,
      "learning_rate": 3.855078363525341e-05,
      "loss": 0.8908,
      "step": 3972
    },
    {
      "epoch": 1.6151656840821305,
      "grad_norm": 0.10286570340394974,
      "learning_rate": 3.851007531040098e-05,
      "loss": 0.8566,
      "step": 3973
    },
    {
      "epoch": 1.6155722707867453,
      "grad_norm": 0.09902996569871902,
      "learning_rate": 3.8469366985548545e-05,
      "loss": 0.8783,
      "step": 3974
    },
    {
      "epoch": 1.61597885749136,
      "grad_norm": 0.11378595232963562,
      "learning_rate": 3.842865866069611e-05,
      "loss": 0.9855,
      "step": 3975
    },
    {
      "epoch": 1.6163854441959749,
      "grad_norm": 0.10718195885419846,
      "learning_rate": 3.838795033584368e-05,
      "loss": 0.9116,
      "step": 3976
    },
    {
      "epoch": 1.6167920309005894,
      "grad_norm": 0.11180385947227478,
      "learning_rate": 3.8347242010991255e-05,
      "loss": 0.9737,
      "step": 3977
    },
    {
      "epoch": 1.6171986176052044,
      "grad_norm": 0.12050411105155945,
      "learning_rate": 3.8306533686138816e-05,
      "loss": 1.1243,
      "step": 3978
    },
    {
      "epoch": 1.617605204309819,
      "grad_norm": 0.14076527953147888,
      "learning_rate": 3.826582536128638e-05,
      "loss": 0.9308,
      "step": 3979
    },
    {
      "epoch": 1.6180117910144338,
      "grad_norm": 0.10641691833734512,
      "learning_rate": 3.822511703643396e-05,
      "loss": 0.9298,
      "step": 3980
    },
    {
      "epoch": 1.6184183777190486,
      "grad_norm": 0.11289351433515549,
      "learning_rate": 3.818440871158152e-05,
      "loss": 0.9239,
      "step": 3981
    },
    {
      "epoch": 1.6188249644236632,
      "grad_norm": 0.11315469443798065,
      "learning_rate": 3.8143700386729086e-05,
      "loss": 1.1374,
      "step": 3982
    },
    {
      "epoch": 1.6192315511282782,
      "grad_norm": 0.11176195740699768,
      "learning_rate": 3.810299206187666e-05,
      "loss": 0.9552,
      "step": 3983
    },
    {
      "epoch": 1.6196381378328928,
      "grad_norm": 0.09827902913093567,
      "learning_rate": 3.806228373702422e-05,
      "loss": 0.8914,
      "step": 3984
    },
    {
      "epoch": 1.6200447245375076,
      "grad_norm": 0.10546936839818954,
      "learning_rate": 3.802157541217179e-05,
      "loss": 0.9572,
      "step": 3985
    },
    {
      "epoch": 1.6204513112421224,
      "grad_norm": 0.10368131846189499,
      "learning_rate": 3.798086708731936e-05,
      "loss": 0.884,
      "step": 3986
    },
    {
      "epoch": 1.6208578979467372,
      "grad_norm": 0.1099054366350174,
      "learning_rate": 3.7940158762466924e-05,
      "loss": 0.9648,
      "step": 3987
    },
    {
      "epoch": 1.621264484651352,
      "grad_norm": 0.10556711256504059,
      "learning_rate": 3.789945043761449e-05,
      "loss": 1.0355,
      "step": 3988
    },
    {
      "epoch": 1.6216710713559666,
      "grad_norm": 0.10475050657987595,
      "learning_rate": 3.7858742112762066e-05,
      "loss": 0.9746,
      "step": 3989
    },
    {
      "epoch": 1.6220776580605816,
      "grad_norm": 0.10798493772745132,
      "learning_rate": 3.781803378790963e-05,
      "loss": 1.0124,
      "step": 3990
    },
    {
      "epoch": 1.6224842447651961,
      "grad_norm": 0.10683320462703705,
      "learning_rate": 3.7777325463057195e-05,
      "loss": 0.9305,
      "step": 3991
    },
    {
      "epoch": 1.622890831469811,
      "grad_norm": 0.10677673667669296,
      "learning_rate": 3.773661713820477e-05,
      "loss": 0.9599,
      "step": 3992
    },
    {
      "epoch": 1.6232974181744257,
      "grad_norm": 0.11520379036664963,
      "learning_rate": 3.769590881335233e-05,
      "loss": 1.0277,
      "step": 3993
    },
    {
      "epoch": 1.6237040048790403,
      "grad_norm": 0.10414712876081467,
      "learning_rate": 3.76552004884999e-05,
      "loss": 1.0,
      "step": 3994
    },
    {
      "epoch": 1.6241105915836553,
      "grad_norm": 0.10957575589418411,
      "learning_rate": 3.761449216364747e-05,
      "loss": 0.9932,
      "step": 3995
    },
    {
      "epoch": 1.62451717828827,
      "grad_norm": 0.104839026927948,
      "learning_rate": 3.757378383879503e-05,
      "loss": 0.9374,
      "step": 3996
    },
    {
      "epoch": 1.6249237649928847,
      "grad_norm": 0.11117900162935257,
      "learning_rate": 3.75330755139426e-05,
      "loss": 1.0527,
      "step": 3997
    },
    {
      "epoch": 1.6253303516974995,
      "grad_norm": 0.10853046923875809,
      "learning_rate": 3.7492367189090175e-05,
      "loss": 1.0176,
      "step": 3998
    },
    {
      "epoch": 1.625736938402114,
      "grad_norm": 0.1107804998755455,
      "learning_rate": 3.7451658864237736e-05,
      "loss": 0.9765,
      "step": 3999
    },
    {
      "epoch": 1.626143525106729,
      "grad_norm": 0.10636276006698608,
      "learning_rate": 3.741095053938531e-05,
      "loss": 0.9435,
      "step": 4000
    },
    {
      "epoch": 1.6265501118113437,
      "grad_norm": 0.11162041872739792,
      "learning_rate": 3.737024221453288e-05,
      "loss": 0.9547,
      "step": 4001
    },
    {
      "epoch": 1.6269566985159585,
      "grad_norm": 0.11098874360322952,
      "learning_rate": 3.732953388968044e-05,
      "loss": 1.0169,
      "step": 4002
    },
    {
      "epoch": 1.6273632852205733,
      "grad_norm": 0.10745534300804138,
      "learning_rate": 3.728882556482801e-05,
      "loss": 1.0881,
      "step": 4003
    },
    {
      "epoch": 1.627769871925188,
      "grad_norm": 0.10475347936153412,
      "learning_rate": 3.7248117239975574e-05,
      "loss": 0.9398,
      "step": 4004
    },
    {
      "epoch": 1.6281764586298029,
      "grad_norm": 0.11412277817726135,
      "learning_rate": 3.720740891512314e-05,
      "loss": 1.0969,
      "step": 4005
    },
    {
      "epoch": 1.6285830453344174,
      "grad_norm": 0.1082950234413147,
      "learning_rate": 3.7166700590270716e-05,
      "loss": 0.8739,
      "step": 4006
    },
    {
      "epoch": 1.6289896320390325,
      "grad_norm": 0.10246314853429794,
      "learning_rate": 3.712599226541828e-05,
      "loss": 0.9542,
      "step": 4007
    },
    {
      "epoch": 1.629396218743647,
      "grad_norm": 0.10546763986349106,
      "learning_rate": 3.7085283940565845e-05,
      "loss": 0.9319,
      "step": 4008
    },
    {
      "epoch": 1.6298028054482618,
      "grad_norm": 0.10902021825313568,
      "learning_rate": 3.704457561571342e-05,
      "loss": 0.9704,
      "step": 4009
    },
    {
      "epoch": 1.6302093921528766,
      "grad_norm": 0.10512792319059372,
      "learning_rate": 3.700386729086098e-05,
      "loss": 0.8641,
      "step": 4010
    },
    {
      "epoch": 1.6306159788574912,
      "grad_norm": 0.11129160970449448,
      "learning_rate": 3.696315896600855e-05,
      "loss": 1.0655,
      "step": 4011
    },
    {
      "epoch": 1.6310225655621062,
      "grad_norm": 0.11214631050825119,
      "learning_rate": 3.692245064115612e-05,
      "loss": 1.0528,
      "step": 4012
    },
    {
      "epoch": 1.6314291522667208,
      "grad_norm": 0.11774066835641861,
      "learning_rate": 3.688174231630368e-05,
      "loss": 1.0389,
      "step": 4013
    },
    {
      "epoch": 1.6318357389713356,
      "grad_norm": 0.10817237198352814,
      "learning_rate": 3.684103399145125e-05,
      "loss": 0.9741,
      "step": 4014
    },
    {
      "epoch": 1.6322423256759504,
      "grad_norm": 0.10697966814041138,
      "learning_rate": 3.6800325666598825e-05,
      "loss": 0.9523,
      "step": 4015
    },
    {
      "epoch": 1.6326489123805652,
      "grad_norm": 0.09764157235622406,
      "learning_rate": 3.6759617341746386e-05,
      "loss": 0.8462,
      "step": 4016
    },
    {
      "epoch": 1.63305549908518,
      "grad_norm": 0.10472942143678665,
      "learning_rate": 3.671890901689395e-05,
      "loss": 0.9925,
      "step": 4017
    },
    {
      "epoch": 1.6334620857897946,
      "grad_norm": 0.1018701046705246,
      "learning_rate": 3.667820069204153e-05,
      "loss": 0.8639,
      "step": 4018
    },
    {
      "epoch": 1.6338686724944096,
      "grad_norm": 0.09498531371355057,
      "learning_rate": 3.663749236718909e-05,
      "loss": 0.8634,
      "step": 4019
    },
    {
      "epoch": 1.6342752591990242,
      "grad_norm": 0.10013435781002045,
      "learning_rate": 3.6596784042336656e-05,
      "loss": 0.8305,
      "step": 4020
    },
    {
      "epoch": 1.634681845903639,
      "grad_norm": 0.10749876499176025,
      "learning_rate": 3.655607571748423e-05,
      "loss": 0.9569,
      "step": 4021
    },
    {
      "epoch": 1.6350884326082538,
      "grad_norm": 0.10520561784505844,
      "learning_rate": 3.651536739263179e-05,
      "loss": 0.9167,
      "step": 4022
    },
    {
      "epoch": 1.6354950193128683,
      "grad_norm": 0.11006909608840942,
      "learning_rate": 3.6474659067779366e-05,
      "loss": 0.9469,
      "step": 4023
    },
    {
      "epoch": 1.6359016060174834,
      "grad_norm": 0.10426217317581177,
      "learning_rate": 3.6433950742926933e-05,
      "loss": 0.8905,
      "step": 4024
    },
    {
      "epoch": 1.636308192722098,
      "grad_norm": 0.10819458216428757,
      "learning_rate": 3.6393242418074494e-05,
      "loss": 1.0306,
      "step": 4025
    },
    {
      "epoch": 1.6367147794267127,
      "grad_norm": 0.10247842967510223,
      "learning_rate": 3.635253409322207e-05,
      "loss": 0.8619,
      "step": 4026
    },
    {
      "epoch": 1.6371213661313275,
      "grad_norm": 0.10943040996789932,
      "learning_rate": 3.6311825768369636e-05,
      "loss": 0.9491,
      "step": 4027
    },
    {
      "epoch": 1.637527952835942,
      "grad_norm": 0.10056941211223602,
      "learning_rate": 3.62711174435172e-05,
      "loss": 0.9287,
      "step": 4028
    },
    {
      "epoch": 1.6379345395405571,
      "grad_norm": 0.11309908330440521,
      "learning_rate": 3.623040911866477e-05,
      "loss": 1.0414,
      "step": 4029
    },
    {
      "epoch": 1.6383411262451717,
      "grad_norm": 0.10608269274234772,
      "learning_rate": 3.618970079381234e-05,
      "loss": 1.0614,
      "step": 4030
    },
    {
      "epoch": 1.6387477129497865,
      "grad_norm": 0.10660211741924286,
      "learning_rate": 3.61489924689599e-05,
      "loss": 0.9974,
      "step": 4031
    },
    {
      "epoch": 1.6391542996544013,
      "grad_norm": 0.10648634284734726,
      "learning_rate": 3.6108284144107475e-05,
      "loss": 0.9815,
      "step": 4032
    },
    {
      "epoch": 1.639560886359016,
      "grad_norm": 0.10975086688995361,
      "learning_rate": 3.606757581925504e-05,
      "loss": 0.9608,
      "step": 4033
    },
    {
      "epoch": 1.6399674730636309,
      "grad_norm": 0.09726303815841675,
      "learning_rate": 3.60268674944026e-05,
      "loss": 0.8703,
      "step": 4034
    },
    {
      "epoch": 1.6403740597682455,
      "grad_norm": NaN,
      "learning_rate": 3.598615916955018e-05,
      "loss": 1.128,
      "step": 4035
    },
    {
      "epoch": 1.6407806464728605,
      "grad_norm": 0.10834196954965591,
      "learning_rate": 3.5945450844697745e-05,
      "loss": 0.9676,
      "step": 4036
    },
    {
      "epoch": 1.641187233177475,
      "grad_norm": 0.10544741898775101,
      "learning_rate": 3.5904742519845306e-05,
      "loss": 0.8797,
      "step": 4037
    },
    {
      "epoch": 1.6415938198820899,
      "grad_norm": 0.11281091719865799,
      "learning_rate": 3.586403419499288e-05,
      "loss": 0.8458,
      "step": 4038
    },
    {
      "epoch": 1.6420004065867047,
      "grad_norm": 0.11323501914739609,
      "learning_rate": 3.582332587014045e-05,
      "loss": 1.054,
      "step": 4039
    },
    {
      "epoch": 1.6424069932913192,
      "grad_norm": 0.1242019459605217,
      "learning_rate": 3.578261754528801e-05,
      "loss": 0.978,
      "step": 4040
    },
    {
      "epoch": 1.6428135799959342,
      "grad_norm": 0.11558779329061508,
      "learning_rate": 3.574190922043558e-05,
      "loss": 0.9664,
      "step": 4041
    },
    {
      "epoch": 1.6432201667005488,
      "grad_norm": 0.11010782420635223,
      "learning_rate": 3.570120089558315e-05,
      "loss": 0.9728,
      "step": 4042
    },
    {
      "epoch": 1.6436267534051636,
      "grad_norm": 0.11480515450239182,
      "learning_rate": 3.566049257073071e-05,
      "loss": 0.9653,
      "step": 4043
    },
    {
      "epoch": 1.6440333401097784,
      "grad_norm": 0.10841669887304306,
      "learning_rate": 3.5619784245878286e-05,
      "loss": 0.9257,
      "step": 4044
    },
    {
      "epoch": 1.6444399268143932,
      "grad_norm": 0.11190009117126465,
      "learning_rate": 3.5579075921025854e-05,
      "loss": 0.979,
      "step": 4045
    },
    {
      "epoch": 1.644846513519008,
      "grad_norm": 0.12733308970928192,
      "learning_rate": 3.553836759617342e-05,
      "loss": 1.1891,
      "step": 4046
    },
    {
      "epoch": 1.6452531002236226,
      "grad_norm": 0.11118471622467041,
      "learning_rate": 3.549765927132099e-05,
      "loss": 0.9259,
      "step": 4047
    },
    {
      "epoch": 1.6456596869282376,
      "grad_norm": 0.10010193288326263,
      "learning_rate": 3.545695094646856e-05,
      "loss": 0.8568,
      "step": 4048
    },
    {
      "epoch": 1.6460662736328522,
      "grad_norm": 0.10972374677658081,
      "learning_rate": 3.5416242621616124e-05,
      "loss": 1.0115,
      "step": 4049
    },
    {
      "epoch": 1.646472860337467,
      "grad_norm": 0.10799884051084518,
      "learning_rate": 3.537553429676369e-05,
      "loss": 0.962,
      "step": 4050
    },
    {
      "epoch": 1.6468794470420818,
      "grad_norm": 0.10064574331045151,
      "learning_rate": 3.533482597191126e-05,
      "loss": 0.8995,
      "step": 4051
    },
    {
      "epoch": 1.6472860337466964,
      "grad_norm": 0.1071900725364685,
      "learning_rate": 3.529411764705883e-05,
      "loss": 0.9491,
      "step": 4052
    },
    {
      "epoch": 1.6476926204513114,
      "grad_norm": 0.09920529276132584,
      "learning_rate": 3.5253409322206395e-05,
      "loss": 0.8961,
      "step": 4053
    },
    {
      "epoch": 1.648099207155926,
      "grad_norm": 0.12487678974866867,
      "learning_rate": 3.5212700997353956e-05,
      "loss": 1.0935,
      "step": 4054
    },
    {
      "epoch": 1.6485057938605407,
      "grad_norm": 0.10418154299259186,
      "learning_rate": 3.517199267250153e-05,
      "loss": 0.9397,
      "step": 4055
    },
    {
      "epoch": 1.6489123805651555,
      "grad_norm": 0.1157752275466919,
      "learning_rate": 3.51312843476491e-05,
      "loss": 0.98,
      "step": 4056
    },
    {
      "epoch": 1.6493189672697701,
      "grad_norm": 0.11210603266954422,
      "learning_rate": 3.509057602279666e-05,
      "loss": 0.9784,
      "step": 4057
    },
    {
      "epoch": 1.6497255539743851,
      "grad_norm": 0.10065794736146927,
      "learning_rate": 3.504986769794423e-05,
      "loss": 0.8451,
      "step": 4058
    },
    {
      "epoch": 1.6501321406789997,
      "grad_norm": 0.11964450031518936,
      "learning_rate": 3.50091593730918e-05,
      "loss": 0.9799,
      "step": 4059
    },
    {
      "epoch": 1.6505387273836145,
      "grad_norm": 0.10936059057712555,
      "learning_rate": 3.496845104823936e-05,
      "loss": 0.927,
      "step": 4060
    },
    {
      "epoch": 1.6509453140882293,
      "grad_norm": 0.1044425368309021,
      "learning_rate": 3.4927742723386936e-05,
      "loss": 0.9029,
      "step": 4061
    },
    {
      "epoch": 1.651351900792844,
      "grad_norm": 0.1131415069103241,
      "learning_rate": 3.4887034398534504e-05,
      "loss": 0.9837,
      "step": 4062
    },
    {
      "epoch": 1.651758487497459,
      "grad_norm": 0.10537821054458618,
      "learning_rate": 3.4846326073682064e-05,
      "loss": 0.9502,
      "step": 4063
    },
    {
      "epoch": 1.6521650742020735,
      "grad_norm": 0.10901257395744324,
      "learning_rate": 3.480561774882964e-05,
      "loss": 0.9424,
      "step": 4064
    },
    {
      "epoch": 1.6525716609066885,
      "grad_norm": 0.1178942546248436,
      "learning_rate": 3.4764909423977206e-05,
      "loss": 1.0408,
      "step": 4065
    },
    {
      "epoch": 1.652978247611303,
      "grad_norm": 0.10703755915164948,
      "learning_rate": 3.472420109912477e-05,
      "loss": 0.8829,
      "step": 4066
    },
    {
      "epoch": 1.6533848343159179,
      "grad_norm": 0.11088605970144272,
      "learning_rate": 3.468349277427234e-05,
      "loss": 0.9954,
      "step": 4067
    },
    {
      "epoch": 1.6537914210205327,
      "grad_norm": 0.09962453693151474,
      "learning_rate": 3.464278444941991e-05,
      "loss": 0.8575,
      "step": 4068
    },
    {
      "epoch": 1.6541980077251472,
      "grad_norm": 0.111634261906147,
      "learning_rate": 3.460207612456747e-05,
      "loss": 1.0138,
      "step": 4069
    },
    {
      "epoch": 1.6546045944297623,
      "grad_norm": 0.11267593502998352,
      "learning_rate": 3.4561367799715045e-05,
      "loss": 1.0642,
      "step": 4070
    },
    {
      "epoch": 1.6550111811343768,
      "grad_norm": 0.09662552177906036,
      "learning_rate": 3.452065947486261e-05,
      "loss": 0.8354,
      "step": 4071
    },
    {
      "epoch": 1.6554177678389916,
      "grad_norm": 0.10733507573604584,
      "learning_rate": 3.447995115001018e-05,
      "loss": 0.9678,
      "step": 4072
    },
    {
      "epoch": 1.6558243545436064,
      "grad_norm": 0.11181973665952682,
      "learning_rate": 3.443924282515775e-05,
      "loss": 0.9635,
      "step": 4073
    },
    {
      "epoch": 1.6562309412482212,
      "grad_norm": 0.10606959462165833,
      "learning_rate": 3.4398534500305315e-05,
      "loss": 0.9822,
      "step": 4074
    },
    {
      "epoch": 1.656637527952836,
      "grad_norm": 0.10522132366895676,
      "learning_rate": 3.435782617545288e-05,
      "loss": 0.9106,
      "step": 4075
    },
    {
      "epoch": 1.6570441146574506,
      "grad_norm": 0.10762479156255722,
      "learning_rate": 3.431711785060045e-05,
      "loss": 0.8781,
      "step": 4076
    },
    {
      "epoch": 1.6574507013620656,
      "grad_norm": 0.10766679793596268,
      "learning_rate": 3.427640952574802e-05,
      "loss": 0.902,
      "step": 4077
    },
    {
      "epoch": 1.6578572880666802,
      "grad_norm": 0.10841795802116394,
      "learning_rate": 3.4235701200895586e-05,
      "loss": 0.9509,
      "step": 4078
    },
    {
      "epoch": 1.658263874771295,
      "grad_norm": 0.10554873943328857,
      "learning_rate": 3.419499287604315e-05,
      "loss": 0.9276,
      "step": 4079
    },
    {
      "epoch": 1.6586704614759098,
      "grad_norm": 0.10341285914182663,
      "learning_rate": 3.415428455119072e-05,
      "loss": 0.8962,
      "step": 4080
    },
    {
      "epoch": 1.6590770481805244,
      "grad_norm": 0.11342310160398483,
      "learning_rate": 3.411357622633829e-05,
      "loss": 0.9239,
      "step": 4081
    },
    {
      "epoch": 1.6594836348851394,
      "grad_norm": 0.11349890381097794,
      "learning_rate": 3.4072867901485856e-05,
      "loss": 1.0019,
      "step": 4082
    },
    {
      "epoch": 1.659890221589754,
      "grad_norm": 0.11031408607959747,
      "learning_rate": 3.4032159576633424e-05,
      "loss": 0.9638,
      "step": 4083
    },
    {
      "epoch": 1.6602968082943688,
      "grad_norm": 0.10704563558101654,
      "learning_rate": 3.399145125178099e-05,
      "loss": 1.0363,
      "step": 4084
    },
    {
      "epoch": 1.6607033949989836,
      "grad_norm": 0.10755988955497742,
      "learning_rate": 3.395074292692856e-05,
      "loss": 0.9333,
      "step": 4085
    },
    {
      "epoch": 1.6611099817035981,
      "grad_norm": 0.11083410680294037,
      "learning_rate": 3.391003460207613e-05,
      "loss": 0.9933,
      "step": 4086
    },
    {
      "epoch": 1.6615165684082132,
      "grad_norm": 0.11164813488721848,
      "learning_rate": 3.3869326277223694e-05,
      "loss": 0.9725,
      "step": 4087
    },
    {
      "epoch": 1.6619231551128277,
      "grad_norm": 0.10156459361314774,
      "learning_rate": 3.382861795237126e-05,
      "loss": 0.9173,
      "step": 4088
    },
    {
      "epoch": 1.6623297418174425,
      "grad_norm": 0.11313030123710632,
      "learning_rate": 3.378790962751883e-05,
      "loss": 1.0201,
      "step": 4089
    },
    {
      "epoch": 1.6627363285220573,
      "grad_norm": 0.1112866923213005,
      "learning_rate": 3.37472013026664e-05,
      "loss": 0.9779,
      "step": 4090
    },
    {
      "epoch": 1.6631429152266721,
      "grad_norm": 0.11510057747364044,
      "learning_rate": 3.3706492977813965e-05,
      "loss": 1.0491,
      "step": 4091
    },
    {
      "epoch": 1.663549501931287,
      "grad_norm": 0.10526982694864273,
      "learning_rate": 3.366578465296153e-05,
      "loss": 0.9397,
      "step": 4092
    },
    {
      "epoch": 1.6639560886359015,
      "grad_norm": 0.11020489037036896,
      "learning_rate": 3.36250763281091e-05,
      "loss": 1.0285,
      "step": 4093
    },
    {
      "epoch": 1.6643626753405165,
      "grad_norm": 0.12323461472988129,
      "learning_rate": 3.358436800325667e-05,
      "loss": 0.9692,
      "step": 4094
    },
    {
      "epoch": 1.664769262045131,
      "grad_norm": 0.1031549721956253,
      "learning_rate": 3.3543659678404235e-05,
      "loss": 0.9761,
      "step": 4095
    },
    {
      "epoch": 1.665175848749746,
      "grad_norm": 0.11584251374006271,
      "learning_rate": 3.35029513535518e-05,
      "loss": 0.9312,
      "step": 4096
    },
    {
      "epoch": 1.6655824354543607,
      "grad_norm": 0.12074989080429077,
      "learning_rate": 3.346224302869937e-05,
      "loss": 1.1116,
      "step": 4097
    },
    {
      "epoch": 1.6659890221589753,
      "grad_norm": 0.10211546719074249,
      "learning_rate": 3.342153470384694e-05,
      "loss": 0.9243,
      "step": 4098
    },
    {
      "epoch": 1.6663956088635903,
      "grad_norm": 0.10890009999275208,
      "learning_rate": 3.3380826378994506e-05,
      "loss": 0.9122,
      "step": 4099
    },
    {
      "epoch": 1.6668021955682049,
      "grad_norm": 0.10587424039840698,
      "learning_rate": 3.3340118054142074e-05,
      "loss": 0.9334,
      "step": 4100
    },
    {
      "epoch": 1.6672087822728197,
      "grad_norm": 0.11641372740268707,
      "learning_rate": 3.329940972928964e-05,
      "loss": 1.0075,
      "step": 4101
    },
    {
      "epoch": 1.6676153689774345,
      "grad_norm": 0.1025613471865654,
      "learning_rate": 3.325870140443721e-05,
      "loss": 0.8918,
      "step": 4102
    },
    {
      "epoch": 1.6680219556820493,
      "grad_norm": 0.10911375284194946,
      "learning_rate": 3.3217993079584777e-05,
      "loss": 0.9776,
      "step": 4103
    },
    {
      "epoch": 1.668428542386664,
      "grad_norm": 0.10893717408180237,
      "learning_rate": 3.3177284754732344e-05,
      "loss": 0.9123,
      "step": 4104
    },
    {
      "epoch": 1.6688351290912786,
      "grad_norm": 0.09526187181472778,
      "learning_rate": 3.313657642987991e-05,
      "loss": 0.8188,
      "step": 4105
    },
    {
      "epoch": 1.6692417157958934,
      "grad_norm": 0.12063385546207428,
      "learning_rate": 3.309586810502748e-05,
      "loss": 1.0367,
      "step": 4106
    },
    {
      "epoch": 1.6696483025005082,
      "grad_norm": 0.09977064281702042,
      "learning_rate": 3.305515978017505e-05,
      "loss": 0.8625,
      "step": 4107
    },
    {
      "epoch": 1.670054889205123,
      "grad_norm": 0.1041639968752861,
      "learning_rate": 3.3014451455322615e-05,
      "loss": 0.8441,
      "step": 4108
    },
    {
      "epoch": 1.6704614759097378,
      "grad_norm": 0.11623057723045349,
      "learning_rate": 3.297374313047018e-05,
      "loss": 0.9207,
      "step": 4109
    },
    {
      "epoch": 1.6708680626143524,
      "grad_norm": 0.1048574224114418,
      "learning_rate": 3.293303480561775e-05,
      "loss": 0.8714,
      "step": 4110
    },
    {
      "epoch": 1.6712746493189674,
      "grad_norm": 0.10759609192609787,
      "learning_rate": 3.289232648076532e-05,
      "loss": 0.9579,
      "step": 4111
    },
    {
      "epoch": 1.671681236023582,
      "grad_norm": 0.10754968971014023,
      "learning_rate": 3.2851618155912885e-05,
      "loss": 0.9247,
      "step": 4112
    },
    {
      "epoch": 1.6720878227281968,
      "grad_norm": 0.11272035539150238,
      "learning_rate": 3.281090983106045e-05,
      "loss": 0.9927,
      "step": 4113
    },
    {
      "epoch": 1.6724944094328116,
      "grad_norm": 0.11457539349794388,
      "learning_rate": 3.277020150620802e-05,
      "loss": 1.005,
      "step": 4114
    },
    {
      "epoch": 1.6729009961374262,
      "grad_norm": 0.10070742666721344,
      "learning_rate": 3.272949318135559e-05,
      "loss": 0.8809,
      "step": 4115
    },
    {
      "epoch": 1.6733075828420412,
      "grad_norm": 0.11025416105985641,
      "learning_rate": 3.2688784856503156e-05,
      "loss": 0.9467,
      "step": 4116
    },
    {
      "epoch": 1.6737141695466558,
      "grad_norm": 0.09717261791229248,
      "learning_rate": 3.2648076531650723e-05,
      "loss": 0.846,
      "step": 4117
    },
    {
      "epoch": 1.6741207562512705,
      "grad_norm": 0.10521706938743591,
      "learning_rate": 3.260736820679829e-05,
      "loss": 0.9839,
      "step": 4118
    },
    {
      "epoch": 1.6745273429558853,
      "grad_norm": 0.10194703936576843,
      "learning_rate": 3.256665988194586e-05,
      "loss": 0.8897,
      "step": 4119
    },
    {
      "epoch": 1.6749339296605001,
      "grad_norm": 0.1110045537352562,
      "learning_rate": 3.2525951557093426e-05,
      "loss": 0.9729,
      "step": 4120
    },
    {
      "epoch": 1.675340516365115,
      "grad_norm": 0.11252006888389587,
      "learning_rate": 3.2485243232240994e-05,
      "loss": 0.9685,
      "step": 4121
    },
    {
      "epoch": 1.6757471030697295,
      "grad_norm": 0.11200141161680222,
      "learning_rate": 3.244453490738856e-05,
      "loss": 0.9663,
      "step": 4122
    },
    {
      "epoch": 1.6761536897743445,
      "grad_norm": 0.116104856133461,
      "learning_rate": 3.240382658253613e-05,
      "loss": 1.1104,
      "step": 4123
    },
    {
      "epoch": 1.676560276478959,
      "grad_norm": 0.11117435991764069,
      "learning_rate": 3.23631182576837e-05,
      "loss": 0.9643,
      "step": 4124
    },
    {
      "epoch": 1.676966863183574,
      "grad_norm": 0.10212714970111847,
      "learning_rate": 3.2322409932831265e-05,
      "loss": 0.911,
      "step": 4125
    },
    {
      "epoch": 1.6773734498881887,
      "grad_norm": 0.11014258116483688,
      "learning_rate": 3.228170160797883e-05,
      "loss": 0.9405,
      "step": 4126
    },
    {
      "epoch": 1.6777800365928033,
      "grad_norm": 0.10939601808786392,
      "learning_rate": 3.22409932831264e-05,
      "loss": 0.9238,
      "step": 4127
    },
    {
      "epoch": 1.6781866232974183,
      "grad_norm": 0.10850725322961807,
      "learning_rate": 3.220028495827397e-05,
      "loss": 0.9634,
      "step": 4128
    },
    {
      "epoch": 1.6785932100020329,
      "grad_norm": 0.10427679866552353,
      "learning_rate": 3.2159576633421535e-05,
      "loss": 0.8854,
      "step": 4129
    },
    {
      "epoch": 1.6789997967066477,
      "grad_norm": 0.11108868569135666,
      "learning_rate": 3.21188683085691e-05,
      "loss": 1.0035,
      "step": 4130
    },
    {
      "epoch": 1.6794063834112625,
      "grad_norm": 0.09677024930715561,
      "learning_rate": 3.207815998371667e-05,
      "loss": 0.8773,
      "step": 4131
    },
    {
      "epoch": 1.679812970115877,
      "grad_norm": 0.11575711518526077,
      "learning_rate": 3.203745165886424e-05,
      "loss": 0.961,
      "step": 4132
    },
    {
      "epoch": 1.680219556820492,
      "grad_norm": 0.10279621928930283,
      "learning_rate": 3.1996743334011806e-05,
      "loss": 0.8362,
      "step": 4133
    },
    {
      "epoch": 1.6806261435251066,
      "grad_norm": 0.10634070634841919,
      "learning_rate": 3.195603500915937e-05,
      "loss": 0.9402,
      "step": 4134
    },
    {
      "epoch": 1.6810327302297214,
      "grad_norm": 0.09615077078342438,
      "learning_rate": 3.191532668430694e-05,
      "loss": 0.9251,
      "step": 4135
    },
    {
      "epoch": 1.6814393169343362,
      "grad_norm": 0.11199648678302765,
      "learning_rate": 3.187461835945451e-05,
      "loss": 1.0387,
      "step": 4136
    },
    {
      "epoch": 1.681845903638951,
      "grad_norm": 0.10691442340612411,
      "learning_rate": 3.1833910034602076e-05,
      "loss": 0.9449,
      "step": 4137
    },
    {
      "epoch": 1.6822524903435658,
      "grad_norm": 0.10192760825157166,
      "learning_rate": 3.1793201709749644e-05,
      "loss": 0.9167,
      "step": 4138
    },
    {
      "epoch": 1.6826590770481804,
      "grad_norm": 0.09949979931116104,
      "learning_rate": 3.175249338489721e-05,
      "loss": 0.897,
      "step": 4139
    },
    {
      "epoch": 1.6830656637527954,
      "grad_norm": 0.10565482079982758,
      "learning_rate": 3.171178506004478e-05,
      "loss": 0.8484,
      "step": 4140
    },
    {
      "epoch": 1.68347225045741,
      "grad_norm": 0.11706092953681946,
      "learning_rate": 3.1671076735192353e-05,
      "loss": 1.0888,
      "step": 4141
    },
    {
      "epoch": 1.6838788371620248,
      "grad_norm": 0.10487108677625656,
      "learning_rate": 3.1630368410339914e-05,
      "loss": 0.9405,
      "step": 4142
    },
    {
      "epoch": 1.6842854238666396,
      "grad_norm": 0.10859554260969162,
      "learning_rate": 3.158966008548748e-05,
      "loss": 1.017,
      "step": 4143
    },
    {
      "epoch": 1.6846920105712542,
      "grad_norm": 0.10497396439313889,
      "learning_rate": 3.1548951760635056e-05,
      "loss": 0.8899,
      "step": 4144
    },
    {
      "epoch": 1.6850985972758692,
      "grad_norm": 0.10445064306259155,
      "learning_rate": 3.150824343578262e-05,
      "loss": 0.9581,
      "step": 4145
    },
    {
      "epoch": 1.6855051839804838,
      "grad_norm": 0.11567474156618118,
      "learning_rate": 3.1467535110930185e-05,
      "loss": 1.0488,
      "step": 4146
    },
    {
      "epoch": 1.6859117706850986,
      "grad_norm": 0.10222821682691574,
      "learning_rate": 3.142682678607776e-05,
      "loss": 0.8523,
      "step": 4147
    },
    {
      "epoch": 1.6863183573897134,
      "grad_norm": 0.10138101130723953,
      "learning_rate": 3.138611846122532e-05,
      "loss": 0.8837,
      "step": 4148
    },
    {
      "epoch": 1.6867249440943282,
      "grad_norm": 0.10402019321918488,
      "learning_rate": 3.134541013637289e-05,
      "loss": 0.8731,
      "step": 4149
    },
    {
      "epoch": 1.687131530798943,
      "grad_norm": 0.10087074339389801,
      "learning_rate": 3.130470181152046e-05,
      "loss": 0.8866,
      "step": 4150
    },
    {
      "epoch": 1.6875381175035575,
      "grad_norm": 0.10861088335514069,
      "learning_rate": 3.126399348666802e-05,
      "loss": 0.9968,
      "step": 4151
    },
    {
      "epoch": 1.6879447042081726,
      "grad_norm": 0.10692057758569717,
      "learning_rate": 3.122328516181559e-05,
      "loss": 0.9646,
      "step": 4152
    },
    {
      "epoch": 1.6883512909127871,
      "grad_norm": 0.10919506847858429,
      "learning_rate": 3.1182576836963165e-05,
      "loss": 0.9265,
      "step": 4153
    },
    {
      "epoch": 1.688757877617402,
      "grad_norm": 0.11321806907653809,
      "learning_rate": 3.1141868512110726e-05,
      "loss": 1.0094,
      "step": 4154
    },
    {
      "epoch": 1.6891644643220167,
      "grad_norm": 0.09933071583509445,
      "learning_rate": 3.1101160187258294e-05,
      "loss": 0.8347,
      "step": 4155
    },
    {
      "epoch": 1.6895710510266313,
      "grad_norm": 0.10513114184141159,
      "learning_rate": 3.106045186240587e-05,
      "loss": 0.9218,
      "step": 4156
    },
    {
      "epoch": 1.6899776377312463,
      "grad_norm": 0.10074515640735626,
      "learning_rate": 3.101974353755343e-05,
      "loss": 0.8773,
      "step": 4157
    },
    {
      "epoch": 1.690384224435861,
      "grad_norm": 0.11252142488956451,
      "learning_rate": 3.0979035212700996e-05,
      "loss": 1.0388,
      "step": 4158
    },
    {
      "epoch": 1.6907908111404757,
      "grad_norm": 0.11315988004207611,
      "learning_rate": 3.093832688784857e-05,
      "loss": 0.9604,
      "step": 4159
    },
    {
      "epoch": 1.6911973978450905,
      "grad_norm": 0.09731707721948624,
      "learning_rate": 3.089761856299613e-05,
      "loss": 0.8388,
      "step": 4160
    },
    {
      "epoch": 1.691603984549705,
      "grad_norm": 0.117193803191185,
      "learning_rate": 3.08569102381437e-05,
      "loss": 1.0092,
      "step": 4161
    },
    {
      "epoch": 1.69201057125432,
      "grad_norm": 0.10533732920885086,
      "learning_rate": 3.0816201913291274e-05,
      "loss": 0.9124,
      "step": 4162
    },
    {
      "epoch": 1.6924171579589347,
      "grad_norm": 0.11219623684883118,
      "learning_rate": 3.0775493588438835e-05,
      "loss": 0.9709,
      "step": 4163
    },
    {
      "epoch": 1.6928237446635495,
      "grad_norm": 0.10370016098022461,
      "learning_rate": 3.073478526358641e-05,
      "loss": 1.0248,
      "step": 4164
    },
    {
      "epoch": 1.6932303313681643,
      "grad_norm": 0.10810839384794235,
      "learning_rate": 3.069407693873398e-05,
      "loss": 0.9126,
      "step": 4165
    },
    {
      "epoch": 1.693636918072779,
      "grad_norm": 0.09728986769914627,
      "learning_rate": 3.065336861388154e-05,
      "loss": 0.7728,
      "step": 4166
    },
    {
      "epoch": 1.6940435047773938,
      "grad_norm": 0.11202438920736313,
      "learning_rate": 3.061266028902911e-05,
      "loss": 0.9593,
      "step": 4167
    },
    {
      "epoch": 1.6944500914820084,
      "grad_norm": 0.9556000232696533,
      "learning_rate": 3.057195196417667e-05,
      "loss": 0.9398,
      "step": 4168
    },
    {
      "epoch": 1.6948566781866234,
      "grad_norm": 0.11005814373493195,
      "learning_rate": 3.053124363932424e-05,
      "loss": 0.9694,
      "step": 4169
    },
    {
      "epoch": 1.695263264891238,
      "grad_norm": 0.11754006147384644,
      "learning_rate": 3.049053531447181e-05,
      "loss": 1.0125,
      "step": 4170
    },
    {
      "epoch": 1.6956698515958528,
      "grad_norm": 0.1051679253578186,
      "learning_rate": 3.044982698961938e-05,
      "loss": 0.9359,
      "step": 4171
    },
    {
      "epoch": 1.6960764383004676,
      "grad_norm": 0.11605421453714371,
      "learning_rate": 3.0409118664766943e-05,
      "loss": 1.0338,
      "step": 4172
    },
    {
      "epoch": 1.6964830250050822,
      "grad_norm": 0.10814854502677917,
      "learning_rate": 3.0368410339914514e-05,
      "loss": 0.9421,
      "step": 4173
    },
    {
      "epoch": 1.6968896117096972,
      "grad_norm": 0.10694431513547897,
      "learning_rate": 3.0327702015062082e-05,
      "loss": 0.8746,
      "step": 4174
    },
    {
      "epoch": 1.6972961984143118,
      "grad_norm": 0.10969371348619461,
      "learning_rate": 3.0286993690209646e-05,
      "loss": 0.9407,
      "step": 4175
    },
    {
      "epoch": 1.6977027851189266,
      "grad_norm": 0.11277522146701813,
      "learning_rate": 3.0246285365357217e-05,
      "loss": 0.9694,
      "step": 4176
    },
    {
      "epoch": 1.6981093718235414,
      "grad_norm": 0.10470854490995407,
      "learning_rate": 3.0205577040504785e-05,
      "loss": 0.9102,
      "step": 4177
    },
    {
      "epoch": 1.6985159585281562,
      "grad_norm": 0.11162138730287552,
      "learning_rate": 3.016486871565235e-05,
      "loss": 0.9992,
      "step": 4178
    },
    {
      "epoch": 1.698922545232771,
      "grad_norm": 0.10514409840106964,
      "learning_rate": 3.012416039079992e-05,
      "loss": 0.9071,
      "step": 4179
    },
    {
      "epoch": 1.6993291319373856,
      "grad_norm": 0.10722570866346359,
      "learning_rate": 3.0083452065947488e-05,
      "loss": 0.9165,
      "step": 4180
    },
    {
      "epoch": 1.6997357186420006,
      "grad_norm": 0.11843981593847275,
      "learning_rate": 3.0042743741095052e-05,
      "loss": 1.0993,
      "step": 4181
    },
    {
      "epoch": 1.7001423053466151,
      "grad_norm": 0.09846517443656921,
      "learning_rate": 3.0002035416242623e-05,
      "loss": 0.7917,
      "step": 4182
    },
    {
      "epoch": 1.70054889205123,
      "grad_norm": 0.09692200273275375,
      "learning_rate": 2.996132709139019e-05,
      "loss": 0.8828,
      "step": 4183
    },
    {
      "epoch": 1.7009554787558447,
      "grad_norm": 0.11660193651914597,
      "learning_rate": 2.9920618766537755e-05,
      "loss": 1.0471,
      "step": 4184
    },
    {
      "epoch": 1.7013620654604593,
      "grad_norm": 0.10081440955400467,
      "learning_rate": 2.9879910441685326e-05,
      "loss": 0.8514,
      "step": 4185
    },
    {
      "epoch": 1.7017686521650743,
      "grad_norm": 0.10978380590677261,
      "learning_rate": 2.9839202116832894e-05,
      "loss": 0.9892,
      "step": 4186
    },
    {
      "epoch": 1.702175238869689,
      "grad_norm": 0.1116599589586258,
      "learning_rate": 2.9798493791980465e-05,
      "loss": 0.9694,
      "step": 4187
    },
    {
      "epoch": 1.7025818255743037,
      "grad_norm": 0.11739277094602585,
      "learning_rate": 2.975778546712803e-05,
      "loss": 1.0799,
      "step": 4188
    },
    {
      "epoch": 1.7029884122789185,
      "grad_norm": 0.1063208132982254,
      "learning_rate": 2.9717077142275596e-05,
      "loss": 0.8933,
      "step": 4189
    },
    {
      "epoch": 1.703394998983533,
      "grad_norm": 0.12064635753631592,
      "learning_rate": 2.9676368817423168e-05,
      "loss": 0.9448,
      "step": 4190
    },
    {
      "epoch": 1.703801585688148,
      "grad_norm": 0.11632698774337769,
      "learning_rate": 2.9635660492570732e-05,
      "loss": 1.0139,
      "step": 4191
    },
    {
      "epoch": 1.7042081723927627,
      "grad_norm": 0.10843649506568909,
      "learning_rate": 2.95949521677183e-05,
      "loss": 0.909,
      "step": 4192
    },
    {
      "epoch": 1.7046147590973775,
      "grad_norm": 0.11414767056703568,
      "learning_rate": 2.955424384286587e-05,
      "loss": 1.0077,
      "step": 4193
    },
    {
      "epoch": 1.7050213458019923,
      "grad_norm": 0.11367535591125488,
      "learning_rate": 2.9513535518013435e-05,
      "loss": 1.01,
      "step": 4194
    },
    {
      "epoch": 1.705427932506607,
      "grad_norm": 0.11367020756006241,
      "learning_rate": 2.9472827193161002e-05,
      "loss": 1.024,
      "step": 4195
    },
    {
      "epoch": 1.7058345192112219,
      "grad_norm": 0.1111876368522644,
      "learning_rate": 2.9432118868308573e-05,
      "loss": 0.9485,
      "step": 4196
    },
    {
      "epoch": 1.7062411059158364,
      "grad_norm": 0.11318478733301163,
      "learning_rate": 2.9391410543456138e-05,
      "loss": 0.9367,
      "step": 4197
    },
    {
      "epoch": 1.7066476926204515,
      "grad_norm": 0.10784564912319183,
      "learning_rate": 2.9350702218603705e-05,
      "loss": 0.8722,
      "step": 4198
    },
    {
      "epoch": 1.707054279325066,
      "grad_norm": 0.10362992435693741,
      "learning_rate": 2.9309993893751276e-05,
      "loss": 0.9683,
      "step": 4199
    },
    {
      "epoch": 1.7074608660296808,
      "grad_norm": 0.10294868052005768,
      "learning_rate": 2.926928556889884e-05,
      "loss": 0.8937,
      "step": 4200
    },
    {
      "epoch": 1.7078674527342956,
      "grad_norm": 0.09753353893756866,
      "learning_rate": 2.9228577244046408e-05,
      "loss": 0.8074,
      "step": 4201
    },
    {
      "epoch": 1.7082740394389102,
      "grad_norm": 0.1013207882642746,
      "learning_rate": 2.918786891919398e-05,
      "loss": 0.8929,
      "step": 4202
    },
    {
      "epoch": 1.7086806261435252,
      "grad_norm": 0.10623973608016968,
      "learning_rate": 2.9147160594341543e-05,
      "loss": 0.9603,
      "step": 4203
    },
    {
      "epoch": 1.7090872128481398,
      "grad_norm": 0.10540005564689636,
      "learning_rate": 2.910645226948911e-05,
      "loss": 0.9633,
      "step": 4204
    },
    {
      "epoch": 1.7094937995527546,
      "grad_norm": 0.10837602615356445,
      "learning_rate": 2.9065743944636682e-05,
      "loss": 0.9487,
      "step": 4205
    },
    {
      "epoch": 1.7099003862573694,
      "grad_norm": 0.11080582439899445,
      "learning_rate": 2.9025035619784246e-05,
      "loss": 0.9073,
      "step": 4206
    },
    {
      "epoch": 1.7103069729619842,
      "grad_norm": 0.09433023631572723,
      "learning_rate": 2.8984327294931814e-05,
      "loss": 0.8649,
      "step": 4207
    },
    {
      "epoch": 1.710713559666599,
      "grad_norm": 0.1045960932970047,
      "learning_rate": 2.8943618970079385e-05,
      "loss": 0.9516,
      "step": 4208
    },
    {
      "epoch": 1.7111201463712136,
      "grad_norm": 0.10738299041986465,
      "learning_rate": 2.890291064522695e-05,
      "loss": 0.971,
      "step": 4209
    },
    {
      "epoch": 1.7115267330758284,
      "grad_norm": 0.11573982238769531,
      "learning_rate": 2.886220232037452e-05,
      "loss": 1.0267,
      "step": 4210
    },
    {
      "epoch": 1.7119333197804432,
      "grad_norm": 0.0996336117386818,
      "learning_rate": 2.8821493995522088e-05,
      "loss": 0.8579,
      "step": 4211
    },
    {
      "epoch": 1.712339906485058,
      "grad_norm": 0.09528303146362305,
      "learning_rate": 2.8780785670669652e-05,
      "loss": 0.8302,
      "step": 4212
    },
    {
      "epoch": 1.7127464931896728,
      "grad_norm": 0.10783466696739197,
      "learning_rate": 2.8740077345817223e-05,
      "loss": 0.9427,
      "step": 4213
    },
    {
      "epoch": 1.7131530798942873,
      "grad_norm": 0.11174463480710983,
      "learning_rate": 2.869936902096479e-05,
      "loss": 1.046,
      "step": 4214
    },
    {
      "epoch": 1.7135596665989024,
      "grad_norm": 0.10815679281949997,
      "learning_rate": 2.8658660696112355e-05,
      "loss": 0.9673,
      "step": 4215
    },
    {
      "epoch": 1.713966253303517,
      "grad_norm": 0.11043757200241089,
      "learning_rate": 2.8617952371259926e-05,
      "loss": 0.9417,
      "step": 4216
    },
    {
      "epoch": 1.7143728400081317,
      "grad_norm": 0.10383492708206177,
      "learning_rate": 2.8577244046407494e-05,
      "loss": 0.9286,
      "step": 4217
    },
    {
      "epoch": 1.7147794267127465,
      "grad_norm": 0.11312952637672424,
      "learning_rate": 2.8536535721555058e-05,
      "loss": 0.9456,
      "step": 4218
    },
    {
      "epoch": 1.715186013417361,
      "grad_norm": 0.10684169828891754,
      "learning_rate": 2.849582739670263e-05,
      "loss": 0.9141,
      "step": 4219
    },
    {
      "epoch": 1.7155926001219761,
      "grad_norm": 0.11260170489549637,
      "learning_rate": 2.8455119071850193e-05,
      "loss": 1.0298,
      "step": 4220
    },
    {
      "epoch": 1.7159991868265907,
      "grad_norm": 0.10850688070058823,
      "learning_rate": 2.841441074699776e-05,
      "loss": 0.9917,
      "step": 4221
    },
    {
      "epoch": 1.7164057735312055,
      "grad_norm": 0.11166807264089584,
      "learning_rate": 2.8373702422145332e-05,
      "loss": 0.9929,
      "step": 4222
    },
    {
      "epoch": 1.7168123602358203,
      "grad_norm": 0.1088751032948494,
      "learning_rate": 2.8332994097292896e-05,
      "loss": 0.9841,
      "step": 4223
    },
    {
      "epoch": 1.717218946940435,
      "grad_norm": 0.10074079036712646,
      "learning_rate": 2.8292285772440464e-05,
      "loss": 0.925,
      "step": 4224
    },
    {
      "epoch": 1.7176255336450499,
      "grad_norm": 0.11211121827363968,
      "learning_rate": 2.8251577447588035e-05,
      "loss": 0.9529,
      "step": 4225
    },
    {
      "epoch": 1.7180321203496645,
      "grad_norm": 0.12092123180627823,
      "learning_rate": 2.82108691227356e-05,
      "loss": 1.078,
      "step": 4226
    },
    {
      "epoch": 1.7184387070542795,
      "grad_norm": 0.11624202877283096,
      "learning_rate": 2.8170160797883167e-05,
      "loss": 1.0283,
      "step": 4227
    },
    {
      "epoch": 1.718845293758894,
      "grad_norm": 0.09251300990581512,
      "learning_rate": 2.8129452473030738e-05,
      "loss": 0.8062,
      "step": 4228
    },
    {
      "epoch": 1.7192518804635089,
      "grad_norm": 0.10410971194505692,
      "learning_rate": 2.8088744148178302e-05,
      "loss": 0.9062,
      "step": 4229
    },
    {
      "epoch": 1.7196584671681237,
      "grad_norm": 0.10773943364620209,
      "learning_rate": 2.804803582332587e-05,
      "loss": 1.0352,
      "step": 4230
    },
    {
      "epoch": 1.7200650538727382,
      "grad_norm": 0.11410236358642578,
      "learning_rate": 2.800732749847344e-05,
      "loss": 1.1001,
      "step": 4231
    },
    {
      "epoch": 1.7204716405773532,
      "grad_norm": 0.10976003855466843,
      "learning_rate": 2.7966619173621005e-05,
      "loss": 0.9473,
      "step": 4232
    },
    {
      "epoch": 1.7208782272819678,
      "grad_norm": 0.10007011145353317,
      "learning_rate": 2.7925910848768576e-05,
      "loss": 0.8742,
      "step": 4233
    },
    {
      "epoch": 1.7212848139865826,
      "grad_norm": 0.10780924558639526,
      "learning_rate": 2.7885202523916143e-05,
      "loss": 1.0167,
      "step": 4234
    },
    {
      "epoch": 1.7216914006911974,
      "grad_norm": 0.10659775882959366,
      "learning_rate": 2.7844494199063708e-05,
      "loss": 0.8895,
      "step": 4235
    },
    {
      "epoch": 1.722097987395812,
      "grad_norm": 0.11290939152240753,
      "learning_rate": 2.780378587421128e-05,
      "loss": 0.9768,
      "step": 4236
    },
    {
      "epoch": 1.722504574100427,
      "grad_norm": 0.10749030113220215,
      "learning_rate": 2.7763077549358846e-05,
      "loss": 0.9507,
      "step": 4237
    },
    {
      "epoch": 1.7229111608050416,
      "grad_norm": 0.11117120832204819,
      "learning_rate": 2.772236922450641e-05,
      "loss": 1.0331,
      "step": 4238
    },
    {
      "epoch": 1.7233177475096564,
      "grad_norm": 0.10173874348402023,
      "learning_rate": 2.768166089965398e-05,
      "loss": 0.9539,
      "step": 4239
    },
    {
      "epoch": 1.7237243342142712,
      "grad_norm": 0.10627079010009766,
      "learning_rate": 2.764095257480155e-05,
      "loss": 0.9851,
      "step": 4240
    },
    {
      "epoch": 1.724130920918886,
      "grad_norm": 0.11093501001596451,
      "learning_rate": 2.7600244249949113e-05,
      "loss": 0.9549,
      "step": 4241
    },
    {
      "epoch": 1.7245375076235008,
      "grad_norm": 0.10598506033420563,
      "learning_rate": 2.7559535925096684e-05,
      "loss": 0.8464,
      "step": 4242
    },
    {
      "epoch": 1.7249440943281154,
      "grad_norm": 0.10644206404685974,
      "learning_rate": 2.7518827600244252e-05,
      "loss": 0.9621,
      "step": 4243
    },
    {
      "epoch": 1.7253506810327304,
      "grad_norm": 0.10193706303834915,
      "learning_rate": 2.7478119275391816e-05,
      "loss": 0.9247,
      "step": 4244
    },
    {
      "epoch": 1.725757267737345,
      "grad_norm": 0.11588188260793686,
      "learning_rate": 2.7437410950539387e-05,
      "loss": 0.9922,
      "step": 4245
    },
    {
      "epoch": 1.7261638544419597,
      "grad_norm": 0.102876678109169,
      "learning_rate": 2.7396702625686955e-05,
      "loss": 0.932,
      "step": 4246
    },
    {
      "epoch": 1.7265704411465745,
      "grad_norm": 0.11138436198234558,
      "learning_rate": 2.735599430083452e-05,
      "loss": 1.0089,
      "step": 4247
    },
    {
      "epoch": 1.7269770278511891,
      "grad_norm": 0.10576023161411285,
      "learning_rate": 2.731528597598209e-05,
      "loss": 0.9773,
      "step": 4248
    },
    {
      "epoch": 1.7273836145558041,
      "grad_norm": 0.112242192029953,
      "learning_rate": 2.7274577651129658e-05,
      "loss": 0.954,
      "step": 4249
    },
    {
      "epoch": 1.7277902012604187,
      "grad_norm": 0.1062515377998352,
      "learning_rate": 2.7233869326277222e-05,
      "loss": 0.9338,
      "step": 4250
    },
    {
      "epoch": 1.7281967879650335,
      "grad_norm": 0.10844583809375763,
      "learning_rate": 2.7193161001424793e-05,
      "loss": 1.0064,
      "step": 4251
    },
    {
      "epoch": 1.7286033746696483,
      "grad_norm": 0.11245914548635483,
      "learning_rate": 2.715245267657236e-05,
      "loss": 1.0079,
      "step": 4252
    },
    {
      "epoch": 1.729009961374263,
      "grad_norm": 0.10556118190288544,
      "learning_rate": 2.7111744351719925e-05,
      "loss": 0.8712,
      "step": 4253
    },
    {
      "epoch": 1.729416548078878,
      "grad_norm": 0.0991094782948494,
      "learning_rate": 2.7071036026867496e-05,
      "loss": 0.8997,
      "step": 4254
    },
    {
      "epoch": 1.7298231347834925,
      "grad_norm": 0.10705914348363876,
      "learning_rate": 2.7030327702015064e-05,
      "loss": 1.0391,
      "step": 4255
    },
    {
      "epoch": 1.7302297214881075,
      "grad_norm": 0.09982667118310928,
      "learning_rate": 2.6989619377162635e-05,
      "loss": 0.8559,
      "step": 4256
    },
    {
      "epoch": 1.730636308192722,
      "grad_norm": 0.11347133666276932,
      "learning_rate": 2.69489110523102e-05,
      "loss": 1.0799,
      "step": 4257
    },
    {
      "epoch": 1.7310428948973369,
      "grad_norm": 0.10920675843954086,
      "learning_rate": 2.6908202727457767e-05,
      "loss": 1.0552,
      "step": 4258
    },
    {
      "epoch": 1.7314494816019517,
      "grad_norm": 0.1066790297627449,
      "learning_rate": 2.6867494402605338e-05,
      "loss": 0.955,
      "step": 4259
    },
    {
      "epoch": 1.7318560683065662,
      "grad_norm": 0.10464853793382645,
      "learning_rate": 2.6826786077752902e-05,
      "loss": 0.9153,
      "step": 4260
    },
    {
      "epoch": 1.7322626550111813,
      "grad_norm": 0.10659107565879822,
      "learning_rate": 2.678607775290047e-05,
      "loss": 0.9485,
      "step": 4261
    },
    {
      "epoch": 1.7326692417157958,
      "grad_norm": 0.11267217993736267,
      "learning_rate": 2.674536942804804e-05,
      "loss": 1.0526,
      "step": 4262
    },
    {
      "epoch": 1.7330758284204106,
      "grad_norm": 0.11317162215709686,
      "learning_rate": 2.6704661103195605e-05,
      "loss": 0.9879,
      "step": 4263
    },
    {
      "epoch": 1.7334824151250254,
      "grad_norm": 0.10813785344362259,
      "learning_rate": 2.6663952778343172e-05,
      "loss": 0.9666,
      "step": 4264
    },
    {
      "epoch": 1.73388900182964,
      "grad_norm": 0.1164456233382225,
      "learning_rate": 2.6623244453490743e-05,
      "loss": 1.0068,
      "step": 4265
    },
    {
      "epoch": 1.734295588534255,
      "grad_norm": 0.10661870241165161,
      "learning_rate": 2.6582536128638308e-05,
      "loss": 0.91,
      "step": 4266
    },
    {
      "epoch": 1.7347021752388696,
      "grad_norm": 0.10373251140117645,
      "learning_rate": 2.6541827803785872e-05,
      "loss": 0.9769,
      "step": 4267
    },
    {
      "epoch": 1.7351087619434844,
      "grad_norm": 0.10973814874887466,
      "learning_rate": 2.6501119478933446e-05,
      "loss": 0.986,
      "step": 4268
    },
    {
      "epoch": 1.7355153486480992,
      "grad_norm": 0.11517725884914398,
      "learning_rate": 2.646041115408101e-05,
      "loss": 1.0583,
      "step": 4269
    },
    {
      "epoch": 1.735921935352714,
      "grad_norm": 0.11015557497739792,
      "learning_rate": 2.6419702829228575e-05,
      "loss": 0.8886,
      "step": 4270
    },
    {
      "epoch": 1.7363285220573288,
      "grad_norm": 0.09546652436256409,
      "learning_rate": 2.637899450437615e-05,
      "loss": 0.8361,
      "step": 4271
    },
    {
      "epoch": 1.7367351087619434,
      "grad_norm": 0.1123289093375206,
      "learning_rate": 2.6338286179523714e-05,
      "loss": 0.9679,
      "step": 4272
    },
    {
      "epoch": 1.7371416954665584,
      "grad_norm": 0.1125122606754303,
      "learning_rate": 2.6297577854671278e-05,
      "loss": 1.012,
      "step": 4273
    },
    {
      "epoch": 1.737548282171173,
      "grad_norm": 0.09992215782403946,
      "learning_rate": 2.6256869529818852e-05,
      "loss": 0.8977,
      "step": 4274
    },
    {
      "epoch": 1.7379548688757878,
      "grad_norm": 0.10778994858264923,
      "learning_rate": 2.6216161204966416e-05,
      "loss": 0.8809,
      "step": 4275
    },
    {
      "epoch": 1.7383614555804026,
      "grad_norm": 0.10639583319425583,
      "learning_rate": 2.617545288011398e-05,
      "loss": 0.8556,
      "step": 4276
    },
    {
      "epoch": 1.7387680422850171,
      "grad_norm": 0.11742980778217316,
      "learning_rate": 2.6134744555261552e-05,
      "loss": 1.0188,
      "step": 4277
    },
    {
      "epoch": 1.7391746289896322,
      "grad_norm": 0.10542727261781693,
      "learning_rate": 2.609403623040912e-05,
      "loss": 0.9337,
      "step": 4278
    },
    {
      "epoch": 1.7395812156942467,
      "grad_norm": 0.11276593804359436,
      "learning_rate": 2.605332790555669e-05,
      "loss": 0.919,
      "step": 4279
    },
    {
      "epoch": 1.7399878023988615,
      "grad_norm": 0.11198127269744873,
      "learning_rate": 2.6012619580704255e-05,
      "loss": 0.985,
      "step": 4280
    },
    {
      "epoch": 1.7403943891034763,
      "grad_norm": 0.10808583348989487,
      "learning_rate": 2.5971911255851822e-05,
      "loss": 0.9746,
      "step": 4281
    },
    {
      "epoch": 1.7408009758080911,
      "grad_norm": 0.10986792296171188,
      "learning_rate": 2.5931202930999393e-05,
      "loss": 0.9551,
      "step": 4282
    },
    {
      "epoch": 1.741207562512706,
      "grad_norm": 0.10446632653474808,
      "learning_rate": 2.5890494606146958e-05,
      "loss": 0.9267,
      "step": 4283
    },
    {
      "epoch": 1.7416141492173205,
      "grad_norm": 0.10390999913215637,
      "learning_rate": 2.5849786281294525e-05,
      "loss": 0.9594,
      "step": 4284
    },
    {
      "epoch": 1.7420207359219355,
      "grad_norm": 0.10901391506195068,
      "learning_rate": 2.5809077956442096e-05,
      "loss": 0.9243,
      "step": 4285
    },
    {
      "epoch": 1.74242732262655,
      "grad_norm": 0.1013227254152298,
      "learning_rate": 2.576836963158966e-05,
      "loss": 0.8896,
      "step": 4286
    },
    {
      "epoch": 1.7428339093311649,
      "grad_norm": 0.1072046160697937,
      "learning_rate": 2.5727661306737228e-05,
      "loss": 0.9676,
      "step": 4287
    },
    {
      "epoch": 1.7432404960357797,
      "grad_norm": 0.11231736093759537,
      "learning_rate": 2.56869529818848e-05,
      "loss": 1.0102,
      "step": 4288
    },
    {
      "epoch": 1.7436470827403943,
      "grad_norm": 0.10860041528940201,
      "learning_rate": 2.5646244657032363e-05,
      "loss": 0.9542,
      "step": 4289
    },
    {
      "epoch": 1.7440536694450093,
      "grad_norm": 0.10603651404380798,
      "learning_rate": 2.560553633217993e-05,
      "loss": 0.9641,
      "step": 4290
    },
    {
      "epoch": 1.7444602561496239,
      "grad_norm": 0.10350662469863892,
      "learning_rate": 2.5564828007327502e-05,
      "loss": 0.8459,
      "step": 4291
    },
    {
      "epoch": 1.7448668428542387,
      "grad_norm": 114.04313659667969,
      "learning_rate": 2.5524119682475066e-05,
      "loss": 0.9247,
      "step": 4292
    },
    {
      "epoch": 1.7452734295588535,
      "grad_norm": 0.10801190882921219,
      "learning_rate": 2.5483411357622634e-05,
      "loss": 0.8786,
      "step": 4293
    },
    {
      "epoch": 1.745680016263468,
      "grad_norm": 0.10285909473896027,
      "learning_rate": 2.5442703032770205e-05,
      "loss": 0.8542,
      "step": 4294
    },
    {
      "epoch": 1.746086602968083,
      "grad_norm": 0.10545431822538376,
      "learning_rate": 2.540199470791777e-05,
      "loss": 0.9207,
      "step": 4295
    },
    {
      "epoch": 1.7464931896726976,
      "grad_norm": 0.09978693723678589,
      "learning_rate": 2.5361286383065337e-05,
      "loss": 0.9143,
      "step": 4296
    },
    {
      "epoch": 1.7468997763773124,
      "grad_norm": 0.11697062104940414,
      "learning_rate": 2.5320578058212908e-05,
      "loss": 1.0319,
      "step": 4297
    },
    {
      "epoch": 1.7473063630819272,
      "grad_norm": 0.11616303026676178,
      "learning_rate": 2.5279869733360472e-05,
      "loss": 0.9238,
      "step": 4298
    },
    {
      "epoch": 1.747712949786542,
      "grad_norm": 0.10795867443084717,
      "learning_rate": 2.523916140850804e-05,
      "loss": 0.9084,
      "step": 4299
    },
    {
      "epoch": 1.7481195364911568,
      "grad_norm": 0.11287292838096619,
      "learning_rate": 2.519845308365561e-05,
      "loss": 1.1124,
      "step": 4300
    },
    {
      "epoch": 1.7485261231957714,
      "grad_norm": 0.09773558378219604,
      "learning_rate": 2.5157744758803175e-05,
      "loss": 0.9261,
      "step": 4301
    },
    {
      "epoch": 1.7489327099003864,
      "grad_norm": 0.10864662379026413,
      "learning_rate": 2.5117036433950746e-05,
      "loss": 0.9403,
      "step": 4302
    },
    {
      "epoch": 1.749339296605001,
      "grad_norm": 0.10947711020708084,
      "learning_rate": 2.5076328109098314e-05,
      "loss": 1.0085,
      "step": 4303
    },
    {
      "epoch": 1.7497458833096158,
      "grad_norm": 0.10114283859729767,
      "learning_rate": 2.5035619784245878e-05,
      "loss": 0.8175,
      "step": 4304
    },
    {
      "epoch": 1.7501524700142306,
      "grad_norm": 0.12078159302473068,
      "learning_rate": 2.4994911459393445e-05,
      "loss": 1.0181,
      "step": 4305
    },
    {
      "epoch": 1.7505590567188452,
      "grad_norm": 0.11305717378854752,
      "learning_rate": 2.4954203134541016e-05,
      "loss": 1.0076,
      "step": 4306
    },
    {
      "epoch": 1.7509656434234602,
      "grad_norm": 0.10791384428739548,
      "learning_rate": 2.491349480968858e-05,
      "loss": 0.942,
      "step": 4307
    },
    {
      "epoch": 1.7513722301280747,
      "grad_norm": 0.10973495990037918,
      "learning_rate": 2.487278648483615e-05,
      "loss": 0.9548,
      "step": 4308
    },
    {
      "epoch": 1.7517788168326895,
      "grad_norm": 0.11003004014492035,
      "learning_rate": 2.483207815998372e-05,
      "loss": 0.9808,
      "step": 4309
    },
    {
      "epoch": 1.7521854035373043,
      "grad_norm": 0.10978732258081436,
      "learning_rate": 2.4791369835131287e-05,
      "loss": 0.9631,
      "step": 4310
    },
    {
      "epoch": 1.7525919902419191,
      "grad_norm": 0.1190049797296524,
      "learning_rate": 2.475066151027885e-05,
      "loss": 1.0111,
      "step": 4311
    },
    {
      "epoch": 1.752998576946534,
      "grad_norm": 0.11224553734064102,
      "learning_rate": 2.4709953185426422e-05,
      "loss": 1.0278,
      "step": 4312
    },
    {
      "epoch": 1.7534051636511485,
      "grad_norm": 0.10069207847118378,
      "learning_rate": 2.466924486057399e-05,
      "loss": 0.9471,
      "step": 4313
    },
    {
      "epoch": 1.7538117503557635,
      "grad_norm": 0.10624197125434875,
      "learning_rate": 2.4628536535721554e-05,
      "loss": 0.9147,
      "step": 4314
    },
    {
      "epoch": 1.754218337060378,
      "grad_norm": 0.11235160380601883,
      "learning_rate": 2.4587828210869125e-05,
      "loss": 0.8664,
      "step": 4315
    },
    {
      "epoch": 1.754624923764993,
      "grad_norm": 0.10248378664255142,
      "learning_rate": 2.4547119886016693e-05,
      "loss": 0.9251,
      "step": 4316
    },
    {
      "epoch": 1.7550315104696077,
      "grad_norm": 0.10120463371276855,
      "learning_rate": 2.4506411561164257e-05,
      "loss": 0.9171,
      "step": 4317
    },
    {
      "epoch": 1.7554380971742223,
      "grad_norm": 0.1045912504196167,
      "learning_rate": 2.4465703236311828e-05,
      "loss": 0.8547,
      "step": 4318
    },
    {
      "epoch": 1.7558446838788373,
      "grad_norm": 0.10243544727563858,
      "learning_rate": 2.4424994911459396e-05,
      "loss": 0.8316,
      "step": 4319
    },
    {
      "epoch": 1.7562512705834519,
      "grad_norm": 0.1228775754570961,
      "learning_rate": 2.4384286586606963e-05,
      "loss": 1.0419,
      "step": 4320
    },
    {
      "epoch": 1.7566578572880667,
      "grad_norm": 0.10931418836116791,
      "learning_rate": 2.434357826175453e-05,
      "loss": 0.9295,
      "step": 4321
    },
    {
      "epoch": 1.7570644439926815,
      "grad_norm": 0.10567274689674377,
      "learning_rate": 2.43028699369021e-05,
      "loss": 0.8475,
      "step": 4322
    },
    {
      "epoch": 1.757471030697296,
      "grad_norm": 0.10765715688467026,
      "learning_rate": 2.4262161612049666e-05,
      "loss": 0.9455,
      "step": 4323
    },
    {
      "epoch": 1.757877617401911,
      "grad_norm": 0.1101839691400528,
      "learning_rate": 2.422145328719723e-05,
      "loss": 1.0432,
      "step": 4324
    },
    {
      "epoch": 1.7582842041065256,
      "grad_norm": 0.11012637615203857,
      "learning_rate": 2.41807449623448e-05,
      "loss": 1.0319,
      "step": 4325
    },
    {
      "epoch": 1.7586907908111404,
      "grad_norm": 0.10784610360860825,
      "learning_rate": 2.414003663749237e-05,
      "loss": 0.9805,
      "step": 4326
    },
    {
      "epoch": 1.7590973775157552,
      "grad_norm": 0.11275933682918549,
      "learning_rate": 2.4099328312639933e-05,
      "loss": 1.0016,
      "step": 4327
    },
    {
      "epoch": 1.75950396422037,
      "grad_norm": 0.12135179340839386,
      "learning_rate": 2.4058619987787504e-05,
      "loss": 1.0446,
      "step": 4328
    },
    {
      "epoch": 1.7599105509249848,
      "grad_norm": 0.11638684570789337,
      "learning_rate": 2.4017911662935072e-05,
      "loss": 0.9194,
      "step": 4329
    },
    {
      "epoch": 1.7603171376295994,
      "grad_norm": 0.10643386095762253,
      "learning_rate": 2.3977203338082636e-05,
      "loss": 0.8486,
      "step": 4330
    },
    {
      "epoch": 1.7607237243342144,
      "grad_norm": 0.11009577661752701,
      "learning_rate": 2.3936495013230207e-05,
      "loss": 0.9088,
      "step": 4331
    },
    {
      "epoch": 1.761130311038829,
      "grad_norm": 0.10450370609760284,
      "learning_rate": 2.3895786688377775e-05,
      "loss": 0.9038,
      "step": 4332
    },
    {
      "epoch": 1.7615368977434438,
      "grad_norm": 0.10662252455949783,
      "learning_rate": 2.3855078363525343e-05,
      "loss": 0.9645,
      "step": 4333
    },
    {
      "epoch": 1.7619434844480586,
      "grad_norm": 0.11794573813676834,
      "learning_rate": 2.381437003867291e-05,
      "loss": 1.0474,
      "step": 4334
    },
    {
      "epoch": 1.7623500711526732,
      "grad_norm": 0.09998206794261932,
      "learning_rate": 2.3773661713820478e-05,
      "loss": 0.8884,
      "step": 4335
    },
    {
      "epoch": 1.7627566578572882,
      "grad_norm": 0.10653196275234222,
      "learning_rate": 2.3732953388968046e-05,
      "loss": 0.9707,
      "step": 4336
    },
    {
      "epoch": 1.7631632445619028,
      "grad_norm": 0.10006160289049149,
      "learning_rate": 2.3692245064115613e-05,
      "loss": 0.8823,
      "step": 4337
    },
    {
      "epoch": 1.7635698312665176,
      "grad_norm": 0.10839603841304779,
      "learning_rate": 2.365153673926318e-05,
      "loss": 0.8994,
      "step": 4338
    },
    {
      "epoch": 1.7639764179711324,
      "grad_norm": 0.11682058125734329,
      "learning_rate": 2.361082841441075e-05,
      "loss": 0.997,
      "step": 4339
    },
    {
      "epoch": 1.7643830046757472,
      "grad_norm": 0.1066887304186821,
      "learning_rate": 2.3570120089558316e-05,
      "loss": 0.9488,
      "step": 4340
    },
    {
      "epoch": 1.764789591380362,
      "grad_norm": 0.10981225222349167,
      "learning_rate": 2.3529411764705884e-05,
      "loss": 0.9737,
      "step": 4341
    },
    {
      "epoch": 1.7651961780849765,
      "grad_norm": 0.10463444888591766,
      "learning_rate": 2.348870343985345e-05,
      "loss": 0.9606,
      "step": 4342
    },
    {
      "epoch": 1.7656027647895913,
      "grad_norm": 0.10684582591056824,
      "learning_rate": 2.344799511500102e-05,
      "loss": 0.9596,
      "step": 4343
    },
    {
      "epoch": 1.7660093514942061,
      "grad_norm": 0.10298759490251541,
      "learning_rate": 2.3407286790148587e-05,
      "loss": 0.9313,
      "step": 4344
    },
    {
      "epoch": 1.766415938198821,
      "grad_norm": 0.10359034687280655,
      "learning_rate": 2.3366578465296154e-05,
      "loss": 0.9196,
      "step": 4345
    },
    {
      "epoch": 1.7668225249034357,
      "grad_norm": 0.09916497021913528,
      "learning_rate": 2.3325870140443722e-05,
      "loss": 0.8912,
      "step": 4346
    },
    {
      "epoch": 1.7672291116080503,
      "grad_norm": 0.11100557446479797,
      "learning_rate": 2.328516181559129e-05,
      "loss": 0.9454,
      "step": 4347
    },
    {
      "epoch": 1.7676356983126653,
      "grad_norm": 0.10253948718309402,
      "learning_rate": 2.3244453490738857e-05,
      "loss": 0.9319,
      "step": 4348
    },
    {
      "epoch": 1.76804228501728,
      "grad_norm": 0.10206755995750427,
      "learning_rate": 2.3203745165886425e-05,
      "loss": 0.934,
      "step": 4349
    },
    {
      "epoch": 1.7684488717218947,
      "grad_norm": 0.11519122123718262,
      "learning_rate": 2.3163036841033992e-05,
      "loss": 1.0003,
      "step": 4350
    },
    {
      "epoch": 1.7688554584265095,
      "grad_norm": 0.10560178756713867,
      "learning_rate": 2.312232851618156e-05,
      "loss": 0.8264,
      "step": 4351
    },
    {
      "epoch": 1.769262045131124,
      "grad_norm": 0.10667610168457031,
      "learning_rate": 2.3081620191329128e-05,
      "loss": 0.9664,
      "step": 4352
    },
    {
      "epoch": 1.769668631835739,
      "grad_norm": 0.10857968032360077,
      "learning_rate": 2.3040911866476695e-05,
      "loss": 0.9806,
      "step": 4353
    },
    {
      "epoch": 1.7700752185403537,
      "grad_norm": 0.10619470477104187,
      "learning_rate": 2.3000203541624263e-05,
      "loss": 0.9465,
      "step": 4354
    },
    {
      "epoch": 1.7704818052449685,
      "grad_norm": 0.10277079790830612,
      "learning_rate": 2.295949521677183e-05,
      "loss": 0.9143,
      "step": 4355
    },
    {
      "epoch": 1.7708883919495833,
      "grad_norm": 0.10691865533590317,
      "learning_rate": 2.2918786891919398e-05,
      "loss": 1.0253,
      "step": 4356
    },
    {
      "epoch": 1.771294978654198,
      "grad_norm": 0.11174901574850082,
      "learning_rate": 2.2878078567066966e-05,
      "loss": 0.9733,
      "step": 4357
    },
    {
      "epoch": 1.7717015653588128,
      "grad_norm": 0.10610245913267136,
      "learning_rate": 2.2837370242214533e-05,
      "loss": 0.8913,
      "step": 4358
    },
    {
      "epoch": 1.7721081520634274,
      "grad_norm": 0.10555868595838547,
      "learning_rate": 2.27966619173621e-05,
      "loss": 0.9519,
      "step": 4359
    },
    {
      "epoch": 1.7725147387680424,
      "grad_norm": 0.10912936925888062,
      "learning_rate": 2.275595359250967e-05,
      "loss": 0.9556,
      "step": 4360
    },
    {
      "epoch": 1.772921325472657,
      "grad_norm": 0.10051053762435913,
      "learning_rate": 2.2715245267657236e-05,
      "loss": 0.9013,
      "step": 4361
    },
    {
      "epoch": 1.7733279121772718,
      "grad_norm": 0.11846382170915604,
      "learning_rate": 2.2674536942804804e-05,
      "loss": 1.0445,
      "step": 4362
    },
    {
      "epoch": 1.7737344988818866,
      "grad_norm": 0.11906945705413818,
      "learning_rate": 2.263382861795237e-05,
      "loss": 1.034,
      "step": 4363
    },
    {
      "epoch": 1.7741410855865012,
      "grad_norm": 0.10891875624656677,
      "learning_rate": 2.259312029309994e-05,
      "loss": 0.9367,
      "step": 4364
    },
    {
      "epoch": 1.7745476722911162,
      "grad_norm": 0.10430287569761276,
      "learning_rate": 2.2552411968247507e-05,
      "loss": 0.9557,
      "step": 4365
    },
    {
      "epoch": 1.7749542589957308,
      "grad_norm": 0.0998322144150734,
      "learning_rate": 2.2511703643395078e-05,
      "loss": 0.9317,
      "step": 4366
    },
    {
      "epoch": 1.7753608457003456,
      "grad_norm": 0.11559992283582687,
      "learning_rate": 2.2470995318542642e-05,
      "loss": 1.0788,
      "step": 4367
    },
    {
      "epoch": 1.7757674324049604,
      "grad_norm": 0.09992285072803497,
      "learning_rate": 2.243028699369021e-05,
      "loss": 0.8919,
      "step": 4368
    },
    {
      "epoch": 1.776174019109575,
      "grad_norm": 0.09705512970685959,
      "learning_rate": 2.238957866883778e-05,
      "loss": 0.8681,
      "step": 4369
    },
    {
      "epoch": 1.77658060581419,
      "grad_norm": 0.10248728096485138,
      "learning_rate": 2.2348870343985345e-05,
      "loss": 0.8919,
      "step": 4370
    },
    {
      "epoch": 1.7769871925188045,
      "grad_norm": 0.10350560396909714,
      "learning_rate": 2.2308162019132913e-05,
      "loss": 0.9809,
      "step": 4371
    },
    {
      "epoch": 1.7773937792234193,
      "grad_norm": 0.1068427711725235,
      "learning_rate": 2.2267453694280484e-05,
      "loss": 0.9996,
      "step": 4372
    },
    {
      "epoch": 1.7778003659280341,
      "grad_norm": 0.10956123471260071,
      "learning_rate": 2.2226745369428048e-05,
      "loss": 1.0518,
      "step": 4373
    },
    {
      "epoch": 1.778206952632649,
      "grad_norm": 0.108606718480587,
      "learning_rate": 2.2186037044575616e-05,
      "loss": 0.9675,
      "step": 4374
    },
    {
      "epoch": 1.7786135393372637,
      "grad_norm": 0.09994582086801529,
      "learning_rate": 2.2145328719723187e-05,
      "loss": 0.8852,
      "step": 4375
    },
    {
      "epoch": 1.7790201260418783,
      "grad_norm": 0.10806925594806671,
      "learning_rate": 2.210462039487075e-05,
      "loss": 0.8325,
      "step": 4376
    },
    {
      "epoch": 1.7794267127464933,
      "grad_norm": 0.10712796449661255,
      "learning_rate": 2.206391207001832e-05,
      "loss": 0.8711,
      "step": 4377
    },
    {
      "epoch": 1.779833299451108,
      "grad_norm": 0.0954805314540863,
      "learning_rate": 2.202320374516589e-05,
      "loss": 0.8266,
      "step": 4378
    },
    {
      "epoch": 1.7802398861557227,
      "grad_norm": 0.10875259339809418,
      "learning_rate": 2.1982495420313457e-05,
      "loss": 0.8732,
      "step": 4379
    },
    {
      "epoch": 1.7806464728603375,
      "grad_norm": 0.10435421019792557,
      "learning_rate": 2.194178709546102e-05,
      "loss": 0.9109,
      "step": 4380
    },
    {
      "epoch": 1.781053059564952,
      "grad_norm": 0.11331689357757568,
      "learning_rate": 2.190107877060859e-05,
      "loss": 1.0809,
      "step": 4381
    },
    {
      "epoch": 1.781459646269567,
      "grad_norm": 0.10554377734661102,
      "learning_rate": 2.186037044575616e-05,
      "loss": 0.9462,
      "step": 4382
    },
    {
      "epoch": 1.7818662329741817,
      "grad_norm": 0.10803189128637314,
      "learning_rate": 2.1819662120903724e-05,
      "loss": 0.8824,
      "step": 4383
    },
    {
      "epoch": 1.7822728196787965,
      "grad_norm": 0.11196300387382507,
      "learning_rate": 2.1778953796051292e-05,
      "loss": 1.0187,
      "step": 4384
    },
    {
      "epoch": 1.7826794063834113,
      "grad_norm": 0.11103217303752899,
      "learning_rate": 2.1738245471198863e-05,
      "loss": 0.8834,
      "step": 4385
    },
    {
      "epoch": 1.783085993088026,
      "grad_norm": 0.11082977801561356,
      "learning_rate": 2.1697537146346427e-05,
      "loss": 1.0117,
      "step": 4386
    },
    {
      "epoch": 1.7834925797926409,
      "grad_norm": 0.11907029151916504,
      "learning_rate": 2.1656828821493995e-05,
      "loss": 0.9475,
      "step": 4387
    },
    {
      "epoch": 1.7838991664972554,
      "grad_norm": 0.11966660618782043,
      "learning_rate": 2.1616120496641566e-05,
      "loss": 0.9932,
      "step": 4388
    },
    {
      "epoch": 1.7843057532018705,
      "grad_norm": 0.11029841005802155,
      "learning_rate": 2.157541217178913e-05,
      "loss": 0.9592,
      "step": 4389
    },
    {
      "epoch": 1.784712339906485,
      "grad_norm": 0.11848790943622589,
      "learning_rate": 2.1534703846936698e-05,
      "loss": 1.0834,
      "step": 4390
    },
    {
      "epoch": 1.7851189266110998,
      "grad_norm": 0.10386286675930023,
      "learning_rate": 2.149399552208427e-05,
      "loss": 0.9237,
      "step": 4391
    },
    {
      "epoch": 1.7855255133157146,
      "grad_norm": 0.10579628497362137,
      "learning_rate": 2.1453287197231836e-05,
      "loss": 0.953,
      "step": 4392
    },
    {
      "epoch": 1.7859321000203292,
      "grad_norm": 0.1021723523736,
      "learning_rate": 2.14125788723794e-05,
      "loss": 0.9341,
      "step": 4393
    },
    {
      "epoch": 1.7863386867249442,
      "grad_norm": 0.10522866249084473,
      "learning_rate": 2.137187054752697e-05,
      "loss": 0.9835,
      "step": 4394
    },
    {
      "epoch": 1.7867452734295588,
      "grad_norm": 0.10765165835618973,
      "learning_rate": 2.133116222267454e-05,
      "loss": 0.9663,
      "step": 4395
    },
    {
      "epoch": 1.7871518601341736,
      "grad_norm": 0.11220693588256836,
      "learning_rate": 2.1290453897822104e-05,
      "loss": 0.9155,
      "step": 4396
    },
    {
      "epoch": 1.7875584468387884,
      "grad_norm": 0.10197433829307556,
      "learning_rate": 2.1249745572969675e-05,
      "loss": 1.0136,
      "step": 4397
    },
    {
      "epoch": 1.787965033543403,
      "grad_norm": 0.10165443271398544,
      "learning_rate": 2.1209037248117242e-05,
      "loss": 0.8976,
      "step": 4398
    },
    {
      "epoch": 1.788371620248018,
      "grad_norm": 0.09981327503919601,
      "learning_rate": 2.1168328923264806e-05,
      "loss": 0.9235,
      "step": 4399
    },
    {
      "epoch": 1.7887782069526326,
      "grad_norm": 0.11018567532300949,
      "learning_rate": 2.1127620598412377e-05,
      "loss": 1.0019,
      "step": 4400
    },
    {
      "epoch": 1.7891847936572474,
      "grad_norm": 0.1057438850402832,
      "learning_rate": 2.1086912273559945e-05,
      "loss": 0.9121,
      "step": 4401
    },
    {
      "epoch": 1.7895913803618622,
      "grad_norm": 0.10720144212245941,
      "learning_rate": 2.1046203948707513e-05,
      "loss": 0.972,
      "step": 4402
    },
    {
      "epoch": 1.789997967066477,
      "grad_norm": 0.10562118142843246,
      "learning_rate": 2.100549562385508e-05,
      "loss": 0.959,
      "step": 4403
    },
    {
      "epoch": 1.7904045537710918,
      "grad_norm": 0.09891306608915329,
      "learning_rate": 2.0964787299002648e-05,
      "loss": 0.902,
      "step": 4404
    },
    {
      "epoch": 1.7908111404757063,
      "grad_norm": 0.10641779005527496,
      "learning_rate": 2.0924078974150216e-05,
      "loss": 0.8979,
      "step": 4405
    },
    {
      "epoch": 1.7912177271803214,
      "grad_norm": 0.11061422526836395,
      "learning_rate": 2.088337064929778e-05,
      "loss": 1.0014,
      "step": 4406
    },
    {
      "epoch": 1.791624313884936,
      "grad_norm": 0.11537324637174606,
      "learning_rate": 2.084266232444535e-05,
      "loss": 1.0432,
      "step": 4407
    },
    {
      "epoch": 1.7920309005895507,
      "grad_norm": 0.10998007655143738,
      "learning_rate": 2.080195399959292e-05,
      "loss": 0.9634,
      "step": 4408
    },
    {
      "epoch": 1.7924374872941655,
      "grad_norm": 0.10309130698442459,
      "learning_rate": 2.0761245674740483e-05,
      "loss": 0.8738,
      "step": 4409
    },
    {
      "epoch": 1.79284407399878,
      "grad_norm": 0.11149647831916809,
      "learning_rate": 2.0720537349888054e-05,
      "loss": 1.0297,
      "step": 4410
    },
    {
      "epoch": 1.7932506607033951,
      "grad_norm": 0.09828011691570282,
      "learning_rate": 2.067982902503562e-05,
      "loss": 0.8313,
      "step": 4411
    },
    {
      "epoch": 1.7936572474080097,
      "grad_norm": 0.10721366852521896,
      "learning_rate": 2.0639120700183186e-05,
      "loss": 0.9765,
      "step": 4412
    },
    {
      "epoch": 1.7940638341126245,
      "grad_norm": 0.10838527232408524,
      "learning_rate": 2.0598412375330757e-05,
      "loss": 0.9149,
      "step": 4413
    },
    {
      "epoch": 1.7944704208172393,
      "grad_norm": 0.10356704890727997,
      "learning_rate": 2.0557704050478324e-05,
      "loss": 0.8851,
      "step": 4414
    },
    {
      "epoch": 1.794877007521854,
      "grad_norm": 0.11622883379459381,
      "learning_rate": 2.0516995725625892e-05,
      "loss": 1.0725,
      "step": 4415
    },
    {
      "epoch": 1.7952835942264689,
      "grad_norm": 0.10502596199512482,
      "learning_rate": 2.047628740077346e-05,
      "loss": 0.9569,
      "step": 4416
    },
    {
      "epoch": 1.7956901809310835,
      "grad_norm": 0.11332449316978455,
      "learning_rate": 2.0435579075921027e-05,
      "loss": 1.0243,
      "step": 4417
    },
    {
      "epoch": 1.7960967676356985,
      "grad_norm": 0.10270780324935913,
      "learning_rate": 2.0394870751068595e-05,
      "loss": 0.892,
      "step": 4418
    },
    {
      "epoch": 1.796503354340313,
      "grad_norm": 0.11144386976957321,
      "learning_rate": 2.0354162426216163e-05,
      "loss": 1.0023,
      "step": 4419
    },
    {
      "epoch": 1.7969099410449278,
      "grad_norm": 0.10902906954288483,
      "learning_rate": 2.031345410136373e-05,
      "loss": 0.9472,
      "step": 4420
    },
    {
      "epoch": 1.7973165277495426,
      "grad_norm": 0.10864941775798798,
      "learning_rate": 2.0272745776511298e-05,
      "loss": 0.9976,
      "step": 4421
    },
    {
      "epoch": 1.7977231144541572,
      "grad_norm": 0.10872267186641693,
      "learning_rate": 2.0232037451658865e-05,
      "loss": 0.9251,
      "step": 4422
    },
    {
      "epoch": 1.7981297011587722,
      "grad_norm": 0.10153832286596298,
      "learning_rate": 2.0191329126806433e-05,
      "loss": 0.9093,
      "step": 4423
    },
    {
      "epoch": 1.7985362878633868,
      "grad_norm": 0.10530535131692886,
      "learning_rate": 2.0150620801954e-05,
      "loss": 0.9548,
      "step": 4424
    },
    {
      "epoch": 1.7989428745680016,
      "grad_norm": 0.10547920316457748,
      "learning_rate": 2.010991247710157e-05,
      "loss": 0.9341,
      "step": 4425
    },
    {
      "epoch": 1.7993494612726164,
      "grad_norm": 0.10784605890512466,
      "learning_rate": 2.0069204152249136e-05,
      "loss": 0.9403,
      "step": 4426
    },
    {
      "epoch": 1.799756047977231,
      "grad_norm": 0.10222501307725906,
      "learning_rate": 2.0028495827396704e-05,
      "loss": 0.9046,
      "step": 4427
    },
    {
      "epoch": 1.800162634681846,
      "grad_norm": 0.11365248262882233,
      "learning_rate": 1.998778750254427e-05,
      "loss": 0.9915,
      "step": 4428
    },
    {
      "epoch": 1.8005692213864606,
      "grad_norm": 0.10497547686100006,
      "learning_rate": 1.994707917769184e-05,
      "loss": 0.8882,
      "step": 4429
    },
    {
      "epoch": 1.8009758080910754,
      "grad_norm": 0.10671515762805939,
      "learning_rate": 1.9906370852839407e-05,
      "loss": 0.9459,
      "step": 4430
    },
    {
      "epoch": 1.8013823947956902,
      "grad_norm": 0.11096837371587753,
      "learning_rate": 1.9865662527986974e-05,
      "loss": 0.9122,
      "step": 4431
    },
    {
      "epoch": 1.801788981500305,
      "grad_norm": 0.10286138951778412,
      "learning_rate": 1.9824954203134542e-05,
      "loss": 0.8998,
      "step": 4432
    },
    {
      "epoch": 1.8021955682049198,
      "grad_norm": 0.09729085862636566,
      "learning_rate": 1.978424587828211e-05,
      "loss": 0.8153,
      "step": 4433
    },
    {
      "epoch": 1.8026021549095343,
      "grad_norm": 0.10512160509824753,
      "learning_rate": 1.9743537553429677e-05,
      "loss": 0.9588,
      "step": 4434
    },
    {
      "epoch": 1.8030087416141494,
      "grad_norm": 0.11304861307144165,
      "learning_rate": 1.9702829228577245e-05,
      "loss": 0.9857,
      "step": 4435
    },
    {
      "epoch": 1.803415328318764,
      "grad_norm": 0.11357636004686356,
      "learning_rate": 1.9662120903724812e-05,
      "loss": 1.0866,
      "step": 4436
    },
    {
      "epoch": 1.8038219150233787,
      "grad_norm": 0.10194739699363708,
      "learning_rate": 1.962141257887238e-05,
      "loss": 0.87,
      "step": 4437
    },
    {
      "epoch": 1.8042285017279935,
      "grad_norm": 0.10487458109855652,
      "learning_rate": 1.9580704254019948e-05,
      "loss": 0.9706,
      "step": 4438
    },
    {
      "epoch": 1.8046350884326081,
      "grad_norm": 0.10020367056131363,
      "learning_rate": 1.9539995929167515e-05,
      "loss": 0.9203,
      "step": 4439
    },
    {
      "epoch": 1.8050416751372231,
      "grad_norm": 0.10388026386499405,
      "learning_rate": 1.9499287604315083e-05,
      "loss": 0.8797,
      "step": 4440
    },
    {
      "epoch": 1.8054482618418377,
      "grad_norm": 0.11745689809322357,
      "learning_rate": 1.945857927946265e-05,
      "loss": 0.9467,
      "step": 4441
    },
    {
      "epoch": 1.8058548485464525,
      "grad_norm": 0.1165374368429184,
      "learning_rate": 1.9417870954610218e-05,
      "loss": 0.9579,
      "step": 4442
    },
    {
      "epoch": 1.8062614352510673,
      "grad_norm": 0.10849489271640778,
      "learning_rate": 1.9377162629757786e-05,
      "loss": 0.9887,
      "step": 4443
    },
    {
      "epoch": 1.806668021955682,
      "grad_norm": 0.10759218782186508,
      "learning_rate": 1.9336454304905353e-05,
      "loss": 0.8644,
      "step": 4444
    },
    {
      "epoch": 1.807074608660297,
      "grad_norm": 0.10003377497196198,
      "learning_rate": 1.929574598005292e-05,
      "loss": 0.8778,
      "step": 4445
    },
    {
      "epoch": 1.8074811953649115,
      "grad_norm": 0.11562558263540268,
      "learning_rate": 1.925503765520049e-05,
      "loss": 1.0265,
      "step": 4446
    },
    {
      "epoch": 1.8078877820695263,
      "grad_norm": 0.11590316146612167,
      "learning_rate": 1.9214329330348056e-05,
      "loss": 1.0377,
      "step": 4447
    },
    {
      "epoch": 1.808294368774141,
      "grad_norm": 0.10737669467926025,
      "learning_rate": 1.9173621005495627e-05,
      "loss": 0.9406,
      "step": 4448
    },
    {
      "epoch": 1.8087009554787559,
      "grad_norm": 0.10374566167593002,
      "learning_rate": 1.913291268064319e-05,
      "loss": 0.9487,
      "step": 4449
    },
    {
      "epoch": 1.8091075421833707,
      "grad_norm": 0.10888830572366714,
      "learning_rate": 1.909220435579076e-05,
      "loss": 0.9484,
      "step": 4450
    },
    {
      "epoch": 1.8095141288879852,
      "grad_norm": 0.09871388971805573,
      "learning_rate": 1.905149603093833e-05,
      "loss": 0.8424,
      "step": 4451
    },
    {
      "epoch": 1.8099207155926003,
      "grad_norm": 0.10062243789434433,
      "learning_rate": 1.9010787706085894e-05,
      "loss": 0.8976,
      "step": 4452
    },
    {
      "epoch": 1.8103273022972148,
      "grad_norm": 0.11193433403968811,
      "learning_rate": 1.8970079381233462e-05,
      "loss": 1.0214,
      "step": 4453
    },
    {
      "epoch": 1.8107338890018296,
      "grad_norm": 0.1083875447511673,
      "learning_rate": 1.8929371056381033e-05,
      "loss": 0.9821,
      "step": 4454
    },
    {
      "epoch": 1.8111404757064444,
      "grad_norm": 0.11033230274915695,
      "learning_rate": 1.8888662731528597e-05,
      "loss": 0.9665,
      "step": 4455
    },
    {
      "epoch": 1.811547062411059,
      "grad_norm": 0.10582058876752853,
      "learning_rate": 1.8847954406676165e-05,
      "loss": 0.8739,
      "step": 4456
    },
    {
      "epoch": 1.811953649115674,
      "grad_norm": 0.10182987153530121,
      "learning_rate": 1.8807246081823736e-05,
      "loss": 0.9246,
      "step": 4457
    },
    {
      "epoch": 1.8123602358202886,
      "grad_norm": 0.09945760667324066,
      "learning_rate": 1.87665377569713e-05,
      "loss": 0.9657,
      "step": 4458
    },
    {
      "epoch": 1.8127668225249034,
      "grad_norm": 0.11163794994354248,
      "learning_rate": 1.8725829432118868e-05,
      "loss": 1.0577,
      "step": 4459
    },
    {
      "epoch": 1.8131734092295182,
      "grad_norm": 0.11032053083181381,
      "learning_rate": 1.868512110726644e-05,
      "loss": 1.023,
      "step": 4460
    },
    {
      "epoch": 1.813579995934133,
      "grad_norm": 0.1140703558921814,
      "learning_rate": 1.8644412782414007e-05,
      "loss": 1.0206,
      "step": 4461
    },
    {
      "epoch": 1.8139865826387478,
      "grad_norm": 0.10543038696050644,
      "learning_rate": 1.860370445756157e-05,
      "loss": 0.891,
      "step": 4462
    },
    {
      "epoch": 1.8143931693433624,
      "grad_norm": 0.11133403331041336,
      "learning_rate": 1.856299613270914e-05,
      "loss": 0.9955,
      "step": 4463
    },
    {
      "epoch": 1.8147997560479774,
      "grad_norm": 0.10132287442684174,
      "learning_rate": 1.852228780785671e-05,
      "loss": 0.9612,
      "step": 4464
    },
    {
      "epoch": 1.815206342752592,
      "grad_norm": 0.10729202628135681,
      "learning_rate": 1.8481579483004274e-05,
      "loss": 0.9838,
      "step": 4465
    },
    {
      "epoch": 1.8156129294572068,
      "grad_norm": 0.10240829735994339,
      "learning_rate": 1.844087115815184e-05,
      "loss": 0.903,
      "step": 4466
    },
    {
      "epoch": 1.8160195161618216,
      "grad_norm": 0.09923898428678513,
      "learning_rate": 1.8400162833299412e-05,
      "loss": 0.9138,
      "step": 4467
    },
    {
      "epoch": 1.8164261028664361,
      "grad_norm": 0.11736813932657242,
      "learning_rate": 1.8359454508446977e-05,
      "loss": 1.0701,
      "step": 4468
    },
    {
      "epoch": 1.8168326895710512,
      "grad_norm": 0.10433974862098694,
      "learning_rate": 1.8318746183594544e-05,
      "loss": 0.8582,
      "step": 4469
    },
    {
      "epoch": 1.8172392762756657,
      "grad_norm": 0.10506631433963776,
      "learning_rate": 1.8278037858742115e-05,
      "loss": 1.002,
      "step": 4470
    },
    {
      "epoch": 1.8176458629802805,
      "grad_norm": 0.11175213009119034,
      "learning_rate": 1.8237329533889683e-05,
      "loss": 1.0239,
      "step": 4471
    },
    {
      "epoch": 1.8180524496848953,
      "grad_norm": 0.10588959604501724,
      "learning_rate": 1.8196621209037247e-05,
      "loss": 0.9909,
      "step": 4472
    },
    {
      "epoch": 1.81845903638951,
      "grad_norm": 0.11378835886716843,
      "learning_rate": 1.8155912884184818e-05,
      "loss": 0.9572,
      "step": 4473
    },
    {
      "epoch": 1.818865623094125,
      "grad_norm": 0.09671740978956223,
      "learning_rate": 1.8115204559332386e-05,
      "loss": 0.9486,
      "step": 4474
    },
    {
      "epoch": 1.8192722097987395,
      "grad_norm": 0.10830000042915344,
      "learning_rate": 1.807449623447995e-05,
      "loss": 0.9055,
      "step": 4475
    },
    {
      "epoch": 1.8196787965033543,
      "grad_norm": 0.1054878979921341,
      "learning_rate": 1.803378790962752e-05,
      "loss": 0.9878,
      "step": 4476
    },
    {
      "epoch": 1.820085383207969,
      "grad_norm": 0.11090472340583801,
      "learning_rate": 1.799307958477509e-05,
      "loss": 0.8731,
      "step": 4477
    },
    {
      "epoch": 1.8204919699125839,
      "grad_norm": 0.10842659324407578,
      "learning_rate": 1.7952371259922653e-05,
      "loss": 0.9303,
      "step": 4478
    },
    {
      "epoch": 1.8208985566171987,
      "grad_norm": 0.10983911901712418,
      "learning_rate": 1.7911662935070224e-05,
      "loss": 0.9713,
      "step": 4479
    },
    {
      "epoch": 1.8213051433218133,
      "grad_norm": 0.10775753855705261,
      "learning_rate": 1.787095461021779e-05,
      "loss": 1.008,
      "step": 4480
    },
    {
      "epoch": 1.8217117300264283,
      "grad_norm": 0.11450286209583282,
      "learning_rate": 1.7830246285365356e-05,
      "loss": 0.9387,
      "step": 4481
    },
    {
      "epoch": 1.8221183167310429,
      "grad_norm": 0.11208988726139069,
      "learning_rate": 1.7789537960512927e-05,
      "loss": 0.8588,
      "step": 4482
    },
    {
      "epoch": 1.8225249034356577,
      "grad_norm": 0.10878584533929825,
      "learning_rate": 1.7748829635660495e-05,
      "loss": 0.9913,
      "step": 4483
    },
    {
      "epoch": 1.8229314901402724,
      "grad_norm": 0.0990230068564415,
      "learning_rate": 1.7708121310808062e-05,
      "loss": 0.8097,
      "step": 4484
    },
    {
      "epoch": 1.823338076844887,
      "grad_norm": 0.10294988006353378,
      "learning_rate": 1.766741298595563e-05,
      "loss": 0.9606,
      "step": 4485
    },
    {
      "epoch": 1.823744663549502,
      "grad_norm": 0.1195545643568039,
      "learning_rate": 1.7626704661103197e-05,
      "loss": 1.1278,
      "step": 4486
    },
    {
      "epoch": 1.8241512502541166,
      "grad_norm": 0.10764475166797638,
      "learning_rate": 1.7585996336250765e-05,
      "loss": 0.86,
      "step": 4487
    },
    {
      "epoch": 1.8245578369587314,
      "grad_norm": 0.10787981748580933,
      "learning_rate": 1.754528801139833e-05,
      "loss": 0.9484,
      "step": 4488
    },
    {
      "epoch": 1.8249644236633462,
      "grad_norm": 0.11306875199079514,
      "learning_rate": 1.75045796865459e-05,
      "loss": 0.9843,
      "step": 4489
    },
    {
      "epoch": 1.825371010367961,
      "grad_norm": 0.10255679488182068,
      "learning_rate": 1.7463871361693468e-05,
      "loss": 0.9468,
      "step": 4490
    },
    {
      "epoch": 1.8257775970725758,
      "grad_norm": 0.11567334085702896,
      "learning_rate": 1.7423163036841032e-05,
      "loss": 1.1066,
      "step": 4491
    },
    {
      "epoch": 1.8261841837771904,
      "grad_norm": 0.0961284264922142,
      "learning_rate": 1.7382454711988603e-05,
      "loss": 0.8327,
      "step": 4492
    },
    {
      "epoch": 1.8265907704818054,
      "grad_norm": 0.11782846599817276,
      "learning_rate": 1.734174638713617e-05,
      "loss": 1.0241,
      "step": 4493
    },
    {
      "epoch": 1.82699735718642,
      "grad_norm": 0.10835573822259903,
      "learning_rate": 1.7301038062283735e-05,
      "loss": 0.989,
      "step": 4494
    },
    {
      "epoch": 1.8274039438910348,
      "grad_norm": 0.1051306203007698,
      "learning_rate": 1.7260329737431306e-05,
      "loss": 0.8624,
      "step": 4495
    },
    {
      "epoch": 1.8278105305956496,
      "grad_norm": 0.11646796017885208,
      "learning_rate": 1.7219621412578874e-05,
      "loss": 0.9987,
      "step": 4496
    },
    {
      "epoch": 1.8282171173002641,
      "grad_norm": 0.11364038288593292,
      "learning_rate": 1.717891308772644e-05,
      "loss": 1.034,
      "step": 4497
    },
    {
      "epoch": 1.8286237040048792,
      "grad_norm": 0.12037666887044907,
      "learning_rate": 1.713820476287401e-05,
      "loss": 1.004,
      "step": 4498
    },
    {
      "epoch": 1.8290302907094937,
      "grad_norm": 0.11295337229967117,
      "learning_rate": 1.7097496438021577e-05,
      "loss": 0.977,
      "step": 4499
    },
    {
      "epoch": 1.8294368774141085,
      "grad_norm": 0.12203505635261536,
      "learning_rate": 1.7056788113169144e-05,
      "loss": 1.122,
      "step": 4500
    },
    {
      "epoch": 1.8298434641187233,
      "grad_norm": 0.10481808334589005,
      "learning_rate": 1.7016079788316712e-05,
      "loss": 0.9512,
      "step": 4501
    },
    {
      "epoch": 1.830250050823338,
      "grad_norm": 0.11461866647005081,
      "learning_rate": 1.697537146346428e-05,
      "loss": 1.0002,
      "step": 4502
    },
    {
      "epoch": 1.830656637527953,
      "grad_norm": 0.10656670480966568,
      "learning_rate": 1.6934663138611847e-05,
      "loss": 0.9555,
      "step": 4503
    },
    {
      "epoch": 1.8310632242325675,
      "grad_norm": 0.10964150726795197,
      "learning_rate": 1.6893954813759415e-05,
      "loss": 0.9794,
      "step": 4504
    },
    {
      "epoch": 1.8314698109371823,
      "grad_norm": 0.12029408663511276,
      "learning_rate": 1.6853246488906982e-05,
      "loss": 1.0442,
      "step": 4505
    },
    {
      "epoch": 1.831876397641797,
      "grad_norm": 0.1036851778626442,
      "learning_rate": 1.681253816405455e-05,
      "loss": 0.8313,
      "step": 4506
    },
    {
      "epoch": 1.832282984346412,
      "grad_norm": 0.11831417679786682,
      "learning_rate": 1.6771829839202118e-05,
      "loss": 1.0545,
      "step": 4507
    },
    {
      "epoch": 1.8326895710510267,
      "grad_norm": 0.10131677240133286,
      "learning_rate": 1.6731121514349685e-05,
      "loss": 0.857,
      "step": 4508
    },
    {
      "epoch": 1.8330961577556413,
      "grad_norm": 0.10643882304430008,
      "learning_rate": 1.6690413189497253e-05,
      "loss": 0.9356,
      "step": 4509
    },
    {
      "epoch": 1.8335027444602563,
      "grad_norm": 0.10721322149038315,
      "learning_rate": 1.664970486464482e-05,
      "loss": 0.9221,
      "step": 4510
    },
    {
      "epoch": 1.8339093311648709,
      "grad_norm": 0.10363822430372238,
      "learning_rate": 1.6608996539792388e-05,
      "loss": 0.863,
      "step": 4511
    },
    {
      "epoch": 1.8343159178694857,
      "grad_norm": 0.10171811282634735,
      "learning_rate": 1.6568288214939956e-05,
      "loss": 0.86,
      "step": 4512
    },
    {
      "epoch": 1.8347225045741005,
      "grad_norm": 0.11216262727975845,
      "learning_rate": 1.6527579890087524e-05,
      "loss": 1.027,
      "step": 4513
    },
    {
      "epoch": 1.835129091278715,
      "grad_norm": 0.11542665213346481,
      "learning_rate": 1.648687156523509e-05,
      "loss": 0.9954,
      "step": 4514
    },
    {
      "epoch": 1.83553567798333,
      "grad_norm": 0.10395301133394241,
      "learning_rate": 1.644616324038266e-05,
      "loss": 0.8623,
      "step": 4515
    },
    {
      "epoch": 1.8359422646879446,
      "grad_norm": 0.0989793911576271,
      "learning_rate": 1.6405454915530226e-05,
      "loss": 0.8632,
      "step": 4516
    },
    {
      "epoch": 1.8363488513925594,
      "grad_norm": 0.1062735840678215,
      "learning_rate": 1.6364746590677794e-05,
      "loss": 1.0057,
      "step": 4517
    },
    {
      "epoch": 1.8367554380971742,
      "grad_norm": 0.11202707141637802,
      "learning_rate": 1.6324038265825362e-05,
      "loss": 0.9849,
      "step": 4518
    },
    {
      "epoch": 1.837162024801789,
      "grad_norm": 0.11205235123634338,
      "learning_rate": 1.628332994097293e-05,
      "loss": 0.9858,
      "step": 4519
    },
    {
      "epoch": 1.8375686115064038,
      "grad_norm": 0.10820986330509186,
      "learning_rate": 1.6242621616120497e-05,
      "loss": 0.9558,
      "step": 4520
    },
    {
      "epoch": 1.8379751982110184,
      "grad_norm": 0.10929597169160843,
      "learning_rate": 1.6201913291268065e-05,
      "loss": 0.9891,
      "step": 4521
    },
    {
      "epoch": 1.8383817849156334,
      "grad_norm": 0.10770545899868011,
      "learning_rate": 1.6161204966415632e-05,
      "loss": 0.9391,
      "step": 4522
    },
    {
      "epoch": 1.838788371620248,
      "grad_norm": 0.10973810404539108,
      "learning_rate": 1.61204966415632e-05,
      "loss": 0.9154,
      "step": 4523
    },
    {
      "epoch": 1.8391949583248628,
      "grad_norm": 0.09949386119842529,
      "learning_rate": 1.6079788316710768e-05,
      "loss": 0.885,
      "step": 4524
    },
    {
      "epoch": 1.8396015450294776,
      "grad_norm": 0.11300302296876907,
      "learning_rate": 1.6039079991858335e-05,
      "loss": 0.9821,
      "step": 4525
    },
    {
      "epoch": 1.8400081317340922,
      "grad_norm": 0.10687512159347534,
      "learning_rate": 1.5998371667005903e-05,
      "loss": 1.0058,
      "step": 4526
    },
    {
      "epoch": 1.8404147184387072,
      "grad_norm": 0.10568513721227646,
      "learning_rate": 1.595766334215347e-05,
      "loss": 0.9018,
      "step": 4527
    },
    {
      "epoch": 1.8408213051433218,
      "grad_norm": 0.10843189060688019,
      "learning_rate": 1.5916955017301038e-05,
      "loss": 0.9419,
      "step": 4528
    },
    {
      "epoch": 1.8412278918479366,
      "grad_norm": 0.10421909391880035,
      "learning_rate": 1.5876246692448606e-05,
      "loss": 0.853,
      "step": 4529
    },
    {
      "epoch": 1.8416344785525514,
      "grad_norm": 0.10745032131671906,
      "learning_rate": 1.5835538367596177e-05,
      "loss": 0.88,
      "step": 4530
    },
    {
      "epoch": 1.842041065257166,
      "grad_norm": 0.10188285261392593,
      "learning_rate": 1.579483004274374e-05,
      "loss": 0.9143,
      "step": 4531
    },
    {
      "epoch": 1.842447651961781,
      "grad_norm": 0.10237755626440048,
      "learning_rate": 1.575412171789131e-05,
      "loss": 0.8407,
      "step": 4532
    },
    {
      "epoch": 1.8428542386663955,
      "grad_norm": 0.11180367320775986,
      "learning_rate": 1.571341339303888e-05,
      "loss": 0.981,
      "step": 4533
    },
    {
      "epoch": 1.8432608253710103,
      "grad_norm": 0.11422519385814667,
      "learning_rate": 1.5672705068186444e-05,
      "loss": 1.0089,
      "step": 4534
    },
    {
      "epoch": 1.8436674120756251,
      "grad_norm": 0.10691160708665848,
      "learning_rate": 1.563199674333401e-05,
      "loss": 0.9363,
      "step": 4535
    },
    {
      "epoch": 1.84407399878024,
      "grad_norm": 0.1037185937166214,
      "learning_rate": 1.5591288418481583e-05,
      "loss": 0.8672,
      "step": 4536
    },
    {
      "epoch": 1.8444805854848547,
      "grad_norm": 0.11030171811580658,
      "learning_rate": 1.5550580093629147e-05,
      "loss": 0.975,
      "step": 4537
    },
    {
      "epoch": 1.8448871721894693,
      "grad_norm": 0.10458967834711075,
      "learning_rate": 1.5509871768776714e-05,
      "loss": 0.9169,
      "step": 4538
    },
    {
      "epoch": 1.8452937588940843,
      "grad_norm": 0.10651843994855881,
      "learning_rate": 1.5469163443924285e-05,
      "loss": 0.9499,
      "step": 4539
    },
    {
      "epoch": 1.8457003455986989,
      "grad_norm": 0.10716937482357025,
      "learning_rate": 1.542845511907185e-05,
      "loss": 1.0137,
      "step": 4540
    },
    {
      "epoch": 1.8461069323033137,
      "grad_norm": 0.09491477161645889,
      "learning_rate": 1.5387746794219417e-05,
      "loss": 0.8159,
      "step": 4541
    },
    {
      "epoch": 1.8465135190079285,
      "grad_norm": 0.11105383187532425,
      "learning_rate": 1.534703846936699e-05,
      "loss": 0.9554,
      "step": 4542
    },
    {
      "epoch": 1.846920105712543,
      "grad_norm": 0.1065712422132492,
      "learning_rate": 1.5306330144514556e-05,
      "loss": 0.896,
      "step": 4543
    },
    {
      "epoch": 1.847326692417158,
      "grad_norm": 0.11683394014835358,
      "learning_rate": 1.526562181966212e-05,
      "loss": 1.0732,
      "step": 4544
    },
    {
      "epoch": 1.8477332791217727,
      "grad_norm": 0.10906050354242325,
      "learning_rate": 1.522491349480969e-05,
      "loss": 1.0323,
      "step": 4545
    },
    {
      "epoch": 1.8481398658263875,
      "grad_norm": 0.11309327185153961,
      "learning_rate": 1.5184205169957257e-05,
      "loss": 0.9693,
      "step": 4546
    },
    {
      "epoch": 1.8485464525310022,
      "grad_norm": 0.11471321433782578,
      "learning_rate": 1.5143496845104823e-05,
      "loss": 0.9795,
      "step": 4547
    },
    {
      "epoch": 1.848953039235617,
      "grad_norm": 0.10957857966423035,
      "learning_rate": 1.5102788520252392e-05,
      "loss": 0.9773,
      "step": 4548
    },
    {
      "epoch": 1.8493596259402318,
      "grad_norm": 0.10189523547887802,
      "learning_rate": 1.506208019539996e-05,
      "loss": 0.887,
      "step": 4549
    },
    {
      "epoch": 1.8497662126448464,
      "grad_norm": 0.10361644625663757,
      "learning_rate": 1.5021371870547526e-05,
      "loss": 0.8694,
      "step": 4550
    },
    {
      "epoch": 1.8501727993494614,
      "grad_norm": 0.10304764658212662,
      "learning_rate": 1.4980663545695095e-05,
      "loss": 0.9005,
      "step": 4551
    },
    {
      "epoch": 1.850579386054076,
      "grad_norm": 0.1083732470870018,
      "learning_rate": 1.4939955220842663e-05,
      "loss": 0.9132,
      "step": 4552
    },
    {
      "epoch": 1.8509859727586908,
      "grad_norm": 0.109224334359169,
      "learning_rate": 1.4899246895990232e-05,
      "loss": 1.03,
      "step": 4553
    },
    {
      "epoch": 1.8513925594633056,
      "grad_norm": 0.09817371517419815,
      "learning_rate": 1.4858538571137798e-05,
      "loss": 0.8626,
      "step": 4554
    },
    {
      "epoch": 1.8517991461679202,
      "grad_norm": 0.10599929839372635,
      "learning_rate": 1.4817830246285366e-05,
      "loss": 1.0029,
      "step": 4555
    },
    {
      "epoch": 1.8522057328725352,
      "grad_norm": 0.10864468663930893,
      "learning_rate": 1.4777121921432935e-05,
      "loss": 0.9572,
      "step": 4556
    },
    {
      "epoch": 1.8526123195771498,
      "grad_norm": 0.11035089194774628,
      "learning_rate": 1.4736413596580501e-05,
      "loss": 0.9821,
      "step": 4557
    },
    {
      "epoch": 1.8530189062817646,
      "grad_norm": 0.10766597837209702,
      "learning_rate": 1.4695705271728069e-05,
      "loss": 0.9368,
      "step": 4558
    },
    {
      "epoch": 1.8534254929863794,
      "grad_norm": 0.1012941375374794,
      "learning_rate": 1.4654996946875638e-05,
      "loss": 0.9643,
      "step": 4559
    },
    {
      "epoch": 1.853832079690994,
      "grad_norm": 0.10959739238023758,
      "learning_rate": 1.4614288622023204e-05,
      "loss": 0.9543,
      "step": 4560
    },
    {
      "epoch": 1.854238666395609,
      "grad_norm": 0.10180888324975967,
      "learning_rate": 1.4573580297170772e-05,
      "loss": 0.9113,
      "step": 4561
    },
    {
      "epoch": 1.8546452531002235,
      "grad_norm": 0.1098785549402237,
      "learning_rate": 1.4532871972318341e-05,
      "loss": 0.9224,
      "step": 4562
    },
    {
      "epoch": 1.8550518398048383,
      "grad_norm": 0.10429586470127106,
      "learning_rate": 1.4492163647465907e-05,
      "loss": 0.9505,
      "step": 4563
    },
    {
      "epoch": 1.8554584265094531,
      "grad_norm": 0.1072763204574585,
      "learning_rate": 1.4451455322613475e-05,
      "loss": 0.9893,
      "step": 4564
    },
    {
      "epoch": 1.855865013214068,
      "grad_norm": 0.11279455572366714,
      "learning_rate": 1.4410746997761044e-05,
      "loss": 0.9223,
      "step": 4565
    },
    {
      "epoch": 1.8562715999186827,
      "grad_norm": 0.11097732186317444,
      "learning_rate": 1.4370038672908612e-05,
      "loss": 0.9855,
      "step": 4566
    },
    {
      "epoch": 1.8566781866232973,
      "grad_norm": 0.09808061271905899,
      "learning_rate": 1.4329330348056177e-05,
      "loss": 0.9168,
      "step": 4567
    },
    {
      "epoch": 1.8570847733279123,
      "grad_norm": 0.10069447755813599,
      "learning_rate": 1.4288622023203747e-05,
      "loss": 0.8709,
      "step": 4568
    },
    {
      "epoch": 1.857491360032527,
      "grad_norm": 0.10356453061103821,
      "learning_rate": 1.4247913698351314e-05,
      "loss": 0.876,
      "step": 4569
    },
    {
      "epoch": 1.8578979467371417,
      "grad_norm": 0.10439286381006241,
      "learning_rate": 1.420720537349888e-05,
      "loss": 1.0009,
      "step": 4570
    },
    {
      "epoch": 1.8583045334417565,
      "grad_norm": 0.1026233658194542,
      "learning_rate": 1.4166497048646448e-05,
      "loss": 0.8783,
      "step": 4571
    },
    {
      "epoch": 1.858711120146371,
      "grad_norm": 0.0925007164478302,
      "learning_rate": 1.4125788723794017e-05,
      "loss": 0.818,
      "step": 4572
    },
    {
      "epoch": 1.859117706850986,
      "grad_norm": 0.09667190164327621,
      "learning_rate": 1.4085080398941583e-05,
      "loss": 0.857,
      "step": 4573
    },
    {
      "epoch": 1.8595242935556007,
      "grad_norm": 0.10364139080047607,
      "learning_rate": 1.4044372074089151e-05,
      "loss": 0.8918,
      "step": 4574
    },
    {
      "epoch": 1.8599308802602155,
      "grad_norm": 0.10816872864961624,
      "learning_rate": 1.400366374923672e-05,
      "loss": 0.9472,
      "step": 4575
    },
    {
      "epoch": 1.8603374669648303,
      "grad_norm": 0.10876300930976868,
      "learning_rate": 1.3962955424384288e-05,
      "loss": 0.946,
      "step": 4576
    },
    {
      "epoch": 1.860744053669445,
      "grad_norm": 0.1095566526055336,
      "learning_rate": 1.3922247099531854e-05,
      "loss": 0.9764,
      "step": 4577
    },
    {
      "epoch": 1.8611506403740599,
      "grad_norm": 0.1096457913517952,
      "learning_rate": 1.3881538774679423e-05,
      "loss": 0.9573,
      "step": 4578
    },
    {
      "epoch": 1.8615572270786744,
      "grad_norm": 0.11302363127470016,
      "learning_rate": 1.384083044982699e-05,
      "loss": 1.0051,
      "step": 4579
    },
    {
      "epoch": 1.8619638137832892,
      "grad_norm": 0.098774753510952,
      "learning_rate": 1.3800122124974557e-05,
      "loss": 0.8779,
      "step": 4580
    },
    {
      "epoch": 1.862370400487904,
      "grad_norm": 0.10221240669488907,
      "learning_rate": 1.3759413800122126e-05,
      "loss": 0.9076,
      "step": 4581
    },
    {
      "epoch": 1.8627769871925188,
      "grad_norm": 0.11016833037137985,
      "learning_rate": 1.3718705475269694e-05,
      "loss": 0.9252,
      "step": 4582
    },
    {
      "epoch": 1.8631835738971336,
      "grad_norm": 0.1114361584186554,
      "learning_rate": 1.367799715041726e-05,
      "loss": 0.9954,
      "step": 4583
    },
    {
      "epoch": 1.8635901606017482,
      "grad_norm": 0.10552554577589035,
      "learning_rate": 1.3637288825564829e-05,
      "loss": 0.8927,
      "step": 4584
    },
    {
      "epoch": 1.8639967473063632,
      "grad_norm": 0.10773453116416931,
      "learning_rate": 1.3596580500712397e-05,
      "loss": 0.9652,
      "step": 4585
    },
    {
      "epoch": 1.8644033340109778,
      "grad_norm": 0.10485775023698807,
      "learning_rate": 1.3555872175859963e-05,
      "loss": 0.9559,
      "step": 4586
    },
    {
      "epoch": 1.8648099207155926,
      "grad_norm": 0.10935719311237335,
      "learning_rate": 1.3515163851007532e-05,
      "loss": 0.9871,
      "step": 4587
    },
    {
      "epoch": 1.8652165074202074,
      "grad_norm": 0.10393113642930984,
      "learning_rate": 1.34744555261551e-05,
      "loss": 0.861,
      "step": 4588
    },
    {
      "epoch": 1.865623094124822,
      "grad_norm": 0.11607681959867477,
      "learning_rate": 1.3433747201302669e-05,
      "loss": 0.978,
      "step": 4589
    },
    {
      "epoch": 1.866029680829437,
      "grad_norm": 0.1011018306016922,
      "learning_rate": 1.3393038876450235e-05,
      "loss": 0.9045,
      "step": 4590
    },
    {
      "epoch": 1.8664362675340516,
      "grad_norm": 0.11329011619091034,
      "learning_rate": 1.3352330551597802e-05,
      "loss": 1.0339,
      "step": 4591
    },
    {
      "epoch": 1.8668428542386664,
      "grad_norm": 0.10683320462703705,
      "learning_rate": 1.3311622226745372e-05,
      "loss": 0.9406,
      "step": 4592
    },
    {
      "epoch": 1.8672494409432812,
      "grad_norm": 0.1118168905377388,
      "learning_rate": 1.3270913901892936e-05,
      "loss": 0.9632,
      "step": 4593
    },
    {
      "epoch": 1.867656027647896,
      "grad_norm": 0.10472162812948227,
      "learning_rate": 1.3230205577040505e-05,
      "loss": 0.9109,
      "step": 4594
    },
    {
      "epoch": 1.8680626143525108,
      "grad_norm": 0.11069684475660324,
      "learning_rate": 1.3189497252188075e-05,
      "loss": 1.0543,
      "step": 4595
    },
    {
      "epoch": 1.8684692010571253,
      "grad_norm": 0.105756476521492,
      "learning_rate": 1.3148788927335639e-05,
      "loss": 0.9705,
      "step": 4596
    },
    {
      "epoch": 1.8688757877617403,
      "grad_norm": 0.1085345521569252,
      "learning_rate": 1.3108080602483208e-05,
      "loss": 0.9485,
      "step": 4597
    },
    {
      "epoch": 1.869282374466355,
      "grad_norm": 0.10738877952098846,
      "learning_rate": 1.3067372277630776e-05,
      "loss": 1.0104,
      "step": 4598
    },
    {
      "epoch": 1.8696889611709697,
      "grad_norm": 0.11370383948087692,
      "learning_rate": 1.3026663952778345e-05,
      "loss": 0.9926,
      "step": 4599
    },
    {
      "epoch": 1.8700955478755845,
      "grad_norm": 0.11408769339323044,
      "learning_rate": 1.2985955627925911e-05,
      "loss": 0.9852,
      "step": 4600
    },
    {
      "epoch": 1.870502134580199,
      "grad_norm": 0.1028301939368248,
      "learning_rate": 1.2945247303073479e-05,
      "loss": 0.9189,
      "step": 4601
    },
    {
      "epoch": 1.8709087212848141,
      "grad_norm": 0.10195198655128479,
      "learning_rate": 1.2904538978221048e-05,
      "loss": 0.907,
      "step": 4602
    },
    {
      "epoch": 1.8713153079894287,
      "grad_norm": 0.10509088635444641,
      "learning_rate": 1.2863830653368614e-05,
      "loss": 0.9704,
      "step": 4603
    },
    {
      "epoch": 1.8717218946940435,
      "grad_norm": 0.10894305258989334,
      "learning_rate": 1.2823122328516182e-05,
      "loss": 0.8871,
      "step": 4604
    },
    {
      "epoch": 1.8721284813986583,
      "grad_norm": 0.11078134924173355,
      "learning_rate": 1.2782414003663751e-05,
      "loss": 0.9472,
      "step": 4605
    },
    {
      "epoch": 1.8725350681032729,
      "grad_norm": 0.0979735478758812,
      "learning_rate": 1.2741705678811317e-05,
      "loss": 0.8495,
      "step": 4606
    },
    {
      "epoch": 1.8729416548078879,
      "grad_norm": 0.09745296090841293,
      "learning_rate": 1.2700997353958885e-05,
      "loss": 0.8289,
      "step": 4607
    },
    {
      "epoch": 1.8733482415125025,
      "grad_norm": 0.10826481133699417,
      "learning_rate": 1.2660289029106454e-05,
      "loss": 0.9967,
      "step": 4608
    },
    {
      "epoch": 1.8737548282171173,
      "grad_norm": 0.1063094213604927,
      "learning_rate": 1.261958070425402e-05,
      "loss": 0.9209,
      "step": 4609
    },
    {
      "epoch": 1.874161414921732,
      "grad_norm": 0.11431606858968735,
      "learning_rate": 1.2578872379401587e-05,
      "loss": 1.0041,
      "step": 4610
    },
    {
      "epoch": 1.8745680016263468,
      "grad_norm": 0.10370271652936935,
      "learning_rate": 1.2538164054549157e-05,
      "loss": 0.9162,
      "step": 4611
    },
    {
      "epoch": 1.8749745883309616,
      "grad_norm": 0.10634543746709824,
      "learning_rate": 1.2497455729696723e-05,
      "loss": 0.9517,
      "step": 4612
    },
    {
      "epoch": 1.8753811750355762,
      "grad_norm": 0.11096760630607605,
      "learning_rate": 1.245674740484429e-05,
      "loss": 0.9201,
      "step": 4613
    },
    {
      "epoch": 1.8757877617401912,
      "grad_norm": 0.10658212751150131,
      "learning_rate": 1.241603907999186e-05,
      "loss": 0.8743,
      "step": 4614
    },
    {
      "epoch": 1.8761943484448058,
      "grad_norm": 0.11457500606775284,
      "learning_rate": 1.2375330755139426e-05,
      "loss": 1.0447,
      "step": 4615
    },
    {
      "epoch": 1.8766009351494206,
      "grad_norm": 0.10246486961841583,
      "learning_rate": 1.2334622430286995e-05,
      "loss": 0.8899,
      "step": 4616
    },
    {
      "epoch": 1.8770075218540354,
      "grad_norm": 0.1061936691403389,
      "learning_rate": 1.2293914105434563e-05,
      "loss": 0.8886,
      "step": 4617
    },
    {
      "epoch": 1.87741410855865,
      "grad_norm": 0.11726386845111847,
      "learning_rate": 1.2253205780582129e-05,
      "loss": 1.0317,
      "step": 4618
    },
    {
      "epoch": 1.877820695263265,
      "grad_norm": 0.11178486049175262,
      "learning_rate": 1.2212497455729698e-05,
      "loss": 1.0469,
      "step": 4619
    },
    {
      "epoch": 1.8782272819678796,
      "grad_norm": 0.10353215783834457,
      "learning_rate": 1.2171789130877265e-05,
      "loss": 0.9078,
      "step": 4620
    },
    {
      "epoch": 1.8786338686724944,
      "grad_norm": 0.10023000091314316,
      "learning_rate": 1.2131080806024833e-05,
      "loss": 0.8645,
      "step": 4621
    },
    {
      "epoch": 1.8790404553771092,
      "grad_norm": 0.11288487911224365,
      "learning_rate": 1.20903724811724e-05,
      "loss": 1.0863,
      "step": 4622
    },
    {
      "epoch": 1.879447042081724,
      "grad_norm": 0.10953675955533981,
      "learning_rate": 1.2049664156319967e-05,
      "loss": 0.9686,
      "step": 4623
    },
    {
      "epoch": 1.8798536287863388,
      "grad_norm": 0.10264912992715836,
      "learning_rate": 1.2008955831467536e-05,
      "loss": 0.9058,
      "step": 4624
    },
    {
      "epoch": 1.8802602154909533,
      "grad_norm": 0.10558958351612091,
      "learning_rate": 1.1968247506615104e-05,
      "loss": 0.8903,
      "step": 4625
    },
    {
      "epoch": 1.8806668021955684,
      "grad_norm": 0.11385960131883621,
      "learning_rate": 1.1927539181762671e-05,
      "loss": 0.9852,
      "step": 4626
    },
    {
      "epoch": 1.881073388900183,
      "grad_norm": 0.127496600151062,
      "learning_rate": 1.1886830856910239e-05,
      "loss": 1.0174,
      "step": 4627
    },
    {
      "epoch": 1.8814799756047977,
      "grad_norm": 0.1023222878575325,
      "learning_rate": 1.1846122532057807e-05,
      "loss": 0.8181,
      "step": 4628
    },
    {
      "epoch": 1.8818865623094125,
      "grad_norm": 0.10723838210105896,
      "learning_rate": 1.1805414207205374e-05,
      "loss": 0.8972,
      "step": 4629
    },
    {
      "epoch": 1.8822931490140271,
      "grad_norm": 0.10070095211267471,
      "learning_rate": 1.1764705882352942e-05,
      "loss": 0.8543,
      "step": 4630
    },
    {
      "epoch": 1.8826997357186421,
      "grad_norm": 0.10262621194124222,
      "learning_rate": 1.172399755750051e-05,
      "loss": 0.8568,
      "step": 4631
    },
    {
      "epoch": 1.8831063224232567,
      "grad_norm": 0.1155037060379982,
      "learning_rate": 1.1683289232648077e-05,
      "loss": 0.9725,
      "step": 4632
    },
    {
      "epoch": 1.8835129091278715,
      "grad_norm": 0.1091943308711052,
      "learning_rate": 1.1642580907795645e-05,
      "loss": 0.9869,
      "step": 4633
    },
    {
      "epoch": 1.8839194958324863,
      "grad_norm": 0.10687655955553055,
      "learning_rate": 1.1601872582943212e-05,
      "loss": 0.9417,
      "step": 4634
    },
    {
      "epoch": 1.8843260825371009,
      "grad_norm": 0.11003026366233826,
      "learning_rate": 1.156116425809078e-05,
      "loss": 0.9662,
      "step": 4635
    },
    {
      "epoch": 1.884732669241716,
      "grad_norm": 0.10457431524991989,
      "learning_rate": 1.1520455933238348e-05,
      "loss": 0.8877,
      "step": 4636
    },
    {
      "epoch": 1.8851392559463305,
      "grad_norm": 0.09627239406108856,
      "learning_rate": 1.1479747608385915e-05,
      "loss": 0.8781,
      "step": 4637
    },
    {
      "epoch": 1.8855458426509453,
      "grad_norm": 0.10630346089601517,
      "learning_rate": 1.1439039283533483e-05,
      "loss": 0.9602,
      "step": 4638
    },
    {
      "epoch": 1.88595242935556,
      "grad_norm": 0.10510063916444778,
      "learning_rate": 1.139833095868105e-05,
      "loss": 0.9213,
      "step": 4639
    },
    {
      "epoch": 1.8863590160601749,
      "grad_norm": 0.11134610325098038,
      "learning_rate": 1.1357622633828618e-05,
      "loss": 0.9601,
      "step": 4640
    },
    {
      "epoch": 1.8867656027647897,
      "grad_norm": 0.11184284090995789,
      "learning_rate": 1.1316914308976186e-05,
      "loss": 0.9936,
      "step": 4641
    },
    {
      "epoch": 1.8871721894694042,
      "grad_norm": 0.10282327234745026,
      "learning_rate": 1.1276205984123753e-05,
      "loss": 0.8698,
      "step": 4642
    },
    {
      "epoch": 1.8875787761740193,
      "grad_norm": 0.1153402030467987,
      "learning_rate": 1.1235497659271321e-05,
      "loss": 0.9564,
      "step": 4643
    },
    {
      "epoch": 1.8879853628786338,
      "grad_norm": 0.10442263633012772,
      "learning_rate": 1.119478933441889e-05,
      "loss": 0.9136,
      "step": 4644
    },
    {
      "epoch": 1.8883919495832486,
      "grad_norm": 0.09091400355100632,
      "learning_rate": 1.1154081009566456e-05,
      "loss": 0.7862,
      "step": 4645
    },
    {
      "epoch": 1.8887985362878634,
      "grad_norm": 0.11083805561065674,
      "learning_rate": 1.1113372684714024e-05,
      "loss": 0.9317,
      "step": 4646
    },
    {
      "epoch": 1.889205122992478,
      "grad_norm": 0.10703961551189423,
      "learning_rate": 1.1072664359861593e-05,
      "loss": 0.9247,
      "step": 4647
    },
    {
      "epoch": 1.889611709697093,
      "grad_norm": 0.10570546984672546,
      "learning_rate": 1.103195603500916e-05,
      "loss": 0.97,
      "step": 4648
    },
    {
      "epoch": 1.8900182964017076,
      "grad_norm": 0.10433092713356018,
      "learning_rate": 1.0991247710156729e-05,
      "loss": 0.8298,
      "step": 4649
    },
    {
      "epoch": 1.8904248831063224,
      "grad_norm": 0.10301043838262558,
      "learning_rate": 1.0950539385304295e-05,
      "loss": 0.8934,
      "step": 4650
    },
    {
      "epoch": 1.8908314698109372,
      "grad_norm": 0.1076212227344513,
      "learning_rate": 1.0909831060451862e-05,
      "loss": 1.0001,
      "step": 4651
    },
    {
      "epoch": 1.891238056515552,
      "grad_norm": 0.11841297894716263,
      "learning_rate": 1.0869122735599431e-05,
      "loss": 0.9722,
      "step": 4652
    },
    {
      "epoch": 1.8916446432201668,
      "grad_norm": 0.10684975981712341,
      "learning_rate": 1.0828414410746997e-05,
      "loss": 0.9199,
      "step": 4653
    },
    {
      "epoch": 1.8920512299247814,
      "grad_norm": 0.11354047805070877,
      "learning_rate": 1.0787706085894565e-05,
      "loss": 0.9114,
      "step": 4654
    },
    {
      "epoch": 1.8924578166293964,
      "grad_norm": 0.11264258623123169,
      "learning_rate": 1.0746997761042134e-05,
      "loss": 1.0013,
      "step": 4655
    },
    {
      "epoch": 1.892864403334011,
      "grad_norm": 0.11007174849510193,
      "learning_rate": 1.07062894361897e-05,
      "loss": 0.8797,
      "step": 4656
    },
    {
      "epoch": 1.8932709900386258,
      "grad_norm": 0.10631585121154785,
      "learning_rate": 1.066558111133727e-05,
      "loss": 0.8326,
      "step": 4657
    },
    {
      "epoch": 1.8936775767432406,
      "grad_norm": 0.10875297337770462,
      "learning_rate": 1.0624872786484837e-05,
      "loss": 1.0055,
      "step": 4658
    },
    {
      "epoch": 1.8940841634478551,
      "grad_norm": 0.0996069461107254,
      "learning_rate": 1.0584164461632403e-05,
      "loss": 0.8888,
      "step": 4659
    },
    {
      "epoch": 1.8944907501524701,
      "grad_norm": 0.1021905168890953,
      "learning_rate": 1.0543456136779973e-05,
      "loss": 0.961,
      "step": 4660
    },
    {
      "epoch": 1.8948973368570847,
      "grad_norm": 0.1107843890786171,
      "learning_rate": 1.050274781192754e-05,
      "loss": 0.9494,
      "step": 4661
    },
    {
      "epoch": 1.8953039235616995,
      "grad_norm": 0.10896456986665726,
      "learning_rate": 1.0462039487075108e-05,
      "loss": 0.9543,
      "step": 4662
    },
    {
      "epoch": 1.8957105102663143,
      "grad_norm": 0.09973134100437164,
      "learning_rate": 1.0421331162222675e-05,
      "loss": 0.865,
      "step": 4663
    },
    {
      "epoch": 1.896117096970929,
      "grad_norm": 0.11522912234067917,
      "learning_rate": 1.0380622837370241e-05,
      "loss": 0.9884,
      "step": 4664
    },
    {
      "epoch": 1.896523683675544,
      "grad_norm": 0.11264660954475403,
      "learning_rate": 1.033991451251781e-05,
      "loss": 0.9839,
      "step": 4665
    },
    {
      "epoch": 1.8969302703801585,
      "grad_norm": 0.10416486859321594,
      "learning_rate": 1.0299206187665378e-05,
      "loss": 0.9232,
      "step": 4666
    },
    {
      "epoch": 1.8973368570847733,
      "grad_norm": 0.10714686661958694,
      "learning_rate": 1.0258497862812946e-05,
      "loss": 0.9947,
      "step": 4667
    },
    {
      "epoch": 1.897743443789388,
      "grad_norm": 0.10765139013528824,
      "learning_rate": 1.0217789537960514e-05,
      "loss": 0.9115,
      "step": 4668
    },
    {
      "epoch": 1.8981500304940029,
      "grad_norm": 0.1037706732749939,
      "learning_rate": 1.0177081213108081e-05,
      "loss": 0.9301,
      "step": 4669
    },
    {
      "epoch": 1.8985566171986177,
      "grad_norm": 0.10889194905757904,
      "learning_rate": 1.0136372888255649e-05,
      "loss": 0.9222,
      "step": 4670
    },
    {
      "epoch": 1.8989632039032323,
      "grad_norm": 0.11055561900138855,
      "learning_rate": 1.0095664563403217e-05,
      "loss": 1.0298,
      "step": 4671
    },
    {
      "epoch": 1.8993697906078473,
      "grad_norm": 0.10710859298706055,
      "learning_rate": 1.0054956238550784e-05,
      "loss": 0.9167,
      "step": 4672
    },
    {
      "epoch": 1.8997763773124619,
      "grad_norm": 0.10510309040546417,
      "learning_rate": 1.0014247913698352e-05,
      "loss": 0.941,
      "step": 4673
    },
    {
      "epoch": 1.9001829640170766,
      "grad_norm": 0.11385677009820938,
      "learning_rate": 9.97353958884592e-06,
      "loss": 0.9312,
      "step": 4674
    },
    {
      "epoch": 1.9005895507216914,
      "grad_norm": 0.11595090478658676,
      "learning_rate": 9.932831263993487e-06,
      "loss": 1.0468,
      "step": 4675
    },
    {
      "epoch": 1.900996137426306,
      "grad_norm": 0.10831689089536667,
      "learning_rate": 9.892122939141055e-06,
      "loss": 0.9394,
      "step": 4676
    },
    {
      "epoch": 1.901402724130921,
      "grad_norm": 0.10828686505556107,
      "learning_rate": 9.851414614288622e-06,
      "loss": 0.8823,
      "step": 4677
    },
    {
      "epoch": 1.9018093108355356,
      "grad_norm": 0.11457982659339905,
      "learning_rate": 9.81070628943619e-06,
      "loss": 0.9871,
      "step": 4678
    },
    {
      "epoch": 1.9022158975401504,
      "grad_norm": 0.10354585945606232,
      "learning_rate": 9.769997964583758e-06,
      "loss": 0.9164,
      "step": 4679
    },
    {
      "epoch": 1.9026224842447652,
      "grad_norm": 0.1089097335934639,
      "learning_rate": 9.729289639731325e-06,
      "loss": 0.8705,
      "step": 4680
    },
    {
      "epoch": 1.90302907094938,
      "grad_norm": 0.09594661742448807,
      "learning_rate": 9.688581314878893e-06,
      "loss": 0.8553,
      "step": 4681
    },
    {
      "epoch": 1.9034356576539948,
      "grad_norm": 0.0939720869064331,
      "learning_rate": 9.64787299002646e-06,
      "loss": 0.9019,
      "step": 4682
    },
    {
      "epoch": 1.9038422443586094,
      "grad_norm": 0.10637430101633072,
      "learning_rate": 9.607164665174028e-06,
      "loss": 0.9498,
      "step": 4683
    },
    {
      "epoch": 1.9042488310632242,
      "grad_norm": 0.11103527247905731,
      "learning_rate": 9.566456340321596e-06,
      "loss": 0.9628,
      "step": 4684
    },
    {
      "epoch": 1.904655417767839,
      "grad_norm": 0.1133730486035347,
      "learning_rate": 9.525748015469165e-06,
      "loss": 0.9158,
      "step": 4685
    },
    {
      "epoch": 1.9050620044724538,
      "grad_norm": 0.11607538908720016,
      "learning_rate": 9.485039690616731e-06,
      "loss": 0.9259,
      "step": 4686
    },
    {
      "epoch": 1.9054685911770686,
      "grad_norm": 0.10650195926427841,
      "learning_rate": 9.444331365764299e-06,
      "loss": 0.8266,
      "step": 4687
    },
    {
      "epoch": 1.9058751778816831,
      "grad_norm": 0.11444000154733658,
      "learning_rate": 9.403623040911868e-06,
      "loss": 0.9656,
      "step": 4688
    },
    {
      "epoch": 1.9062817645862982,
      "grad_norm": 0.11130890995264053,
      "learning_rate": 9.362914716059434e-06,
      "loss": 0.9636,
      "step": 4689
    },
    {
      "epoch": 1.9066883512909127,
      "grad_norm": 0.114923857152462,
      "learning_rate": 9.322206391207003e-06,
      "loss": 0.9988,
      "step": 4690
    },
    {
      "epoch": 1.9070949379955275,
      "grad_norm": 0.11439431458711624,
      "learning_rate": 9.28149806635457e-06,
      "loss": 1.0728,
      "step": 4691
    },
    {
      "epoch": 1.9075015247001423,
      "grad_norm": 0.10544802248477936,
      "learning_rate": 9.240789741502137e-06,
      "loss": 0.9356,
      "step": 4692
    },
    {
      "epoch": 1.907908111404757,
      "grad_norm": 0.1091354712843895,
      "learning_rate": 9.200081416649706e-06,
      "loss": 1.0261,
      "step": 4693
    },
    {
      "epoch": 1.908314698109372,
      "grad_norm": 0.11757560819387436,
      "learning_rate": 9.159373091797272e-06,
      "loss": 1.0694,
      "step": 4694
    },
    {
      "epoch": 1.9087212848139865,
      "grad_norm": 0.1126813217997551,
      "learning_rate": 9.118664766944841e-06,
      "loss": 0.995,
      "step": 4695
    },
    {
      "epoch": 1.9091278715186013,
      "grad_norm": 0.10527123510837555,
      "learning_rate": 9.077956442092409e-06,
      "loss": 0.8656,
      "step": 4696
    },
    {
      "epoch": 1.909534458223216,
      "grad_norm": 0.11032869666814804,
      "learning_rate": 9.037248117239975e-06,
      "loss": 1.0283,
      "step": 4697
    },
    {
      "epoch": 1.909941044927831,
      "grad_norm": 0.11626307666301727,
      "learning_rate": 8.996539792387544e-06,
      "loss": 1.0522,
      "step": 4698
    },
    {
      "epoch": 1.9103476316324457,
      "grad_norm": 0.11547650396823883,
      "learning_rate": 8.955831467535112e-06,
      "loss": 1.0599,
      "step": 4699
    },
    {
      "epoch": 1.9107542183370603,
      "grad_norm": 0.1148945763707161,
      "learning_rate": 8.915123142682678e-06,
      "loss": 1.0609,
      "step": 4700
    },
    {
      "epoch": 1.9111608050416753,
      "grad_norm": 0.11297351121902466,
      "learning_rate": 8.874414817830247e-06,
      "loss": 0.9328,
      "step": 4701
    },
    {
      "epoch": 1.9115673917462899,
      "grad_norm": 0.10873715579509735,
      "learning_rate": 8.833706492977815e-06,
      "loss": 1.0021,
      "step": 4702
    },
    {
      "epoch": 1.9119739784509047,
      "grad_norm": 0.10393388569355011,
      "learning_rate": 8.792998168125383e-06,
      "loss": 0.9525,
      "step": 4703
    },
    {
      "epoch": 1.9123805651555195,
      "grad_norm": 0.10606920719146729,
      "learning_rate": 8.75228984327295e-06,
      "loss": 0.929,
      "step": 4704
    },
    {
      "epoch": 1.912787151860134,
      "grad_norm": 0.11683373153209686,
      "learning_rate": 8.711581518420516e-06,
      "loss": 1.0064,
      "step": 4705
    },
    {
      "epoch": 1.913193738564749,
      "grad_norm": 0.09577450901269913,
      "learning_rate": 8.670873193568085e-06,
      "loss": 0.849,
      "step": 4706
    },
    {
      "epoch": 1.9136003252693636,
      "grad_norm": 0.10738305747509003,
      "learning_rate": 8.630164868715653e-06,
      "loss": 0.8143,
      "step": 4707
    },
    {
      "epoch": 1.9140069119739784,
      "grad_norm": 0.10720358788967133,
      "learning_rate": 8.58945654386322e-06,
      "loss": 0.9651,
      "step": 4708
    },
    {
      "epoch": 1.9144134986785932,
      "grad_norm": 0.10535360872745514,
      "learning_rate": 8.548748219010788e-06,
      "loss": 0.9265,
      "step": 4709
    },
    {
      "epoch": 1.9148200853832078,
      "grad_norm": 0.0978621169924736,
      "learning_rate": 8.508039894158356e-06,
      "loss": 0.8808,
      "step": 4710
    },
    {
      "epoch": 1.9152266720878228,
      "grad_norm": 0.09929387271404266,
      "learning_rate": 8.467331569305924e-06,
      "loss": 0.9438,
      "step": 4711
    },
    {
      "epoch": 1.9156332587924374,
      "grad_norm": 0.10723693668842316,
      "learning_rate": 8.426623244453491e-06,
      "loss": 0.9293,
      "step": 4712
    },
    {
      "epoch": 1.9160398454970522,
      "grad_norm": 0.11256638914346695,
      "learning_rate": 8.385914919601059e-06,
      "loss": 0.9946,
      "step": 4713
    },
    {
      "epoch": 1.916446432201667,
      "grad_norm": 0.10032477974891663,
      "learning_rate": 8.345206594748627e-06,
      "loss": 0.9008,
      "step": 4714
    },
    {
      "epoch": 1.9168530189062818,
      "grad_norm": 0.10227346420288086,
      "learning_rate": 8.304498269896194e-06,
      "loss": 0.9495,
      "step": 4715
    },
    {
      "epoch": 1.9172596056108966,
      "grad_norm": 0.11047019064426422,
      "learning_rate": 8.263789945043762e-06,
      "loss": 1.0152,
      "step": 4716
    },
    {
      "epoch": 1.9176661923155112,
      "grad_norm": 0.10809038579463959,
      "learning_rate": 8.22308162019133e-06,
      "loss": 0.9082,
      "step": 4717
    },
    {
      "epoch": 1.9180727790201262,
      "grad_norm": 0.10758131742477417,
      "learning_rate": 8.182373295338897e-06,
      "loss": 0.9599,
      "step": 4718
    },
    {
      "epoch": 1.9184793657247408,
      "grad_norm": 0.11720570921897888,
      "learning_rate": 8.141664970486465e-06,
      "loss": 1.0244,
      "step": 4719
    },
    {
      "epoch": 1.9188859524293556,
      "grad_norm": 0.10745330154895782,
      "learning_rate": 8.100956645634032e-06,
      "loss": 1.0118,
      "step": 4720
    },
    {
      "epoch": 1.9192925391339704,
      "grad_norm": 0.10367954522371292,
      "learning_rate": 8.0602483207816e-06,
      "loss": 0.9511,
      "step": 4721
    },
    {
      "epoch": 1.919699125838585,
      "grad_norm": 0.10603120177984238,
      "learning_rate": 8.019539995929168e-06,
      "loss": 0.8881,
      "step": 4722
    },
    {
      "epoch": 1.9201057125432,
      "grad_norm": 0.11715482175350189,
      "learning_rate": 7.978831671076735e-06,
      "loss": 0.9882,
      "step": 4723
    },
    {
      "epoch": 1.9205122992478145,
      "grad_norm": 0.10853135585784912,
      "learning_rate": 7.938123346224303e-06,
      "loss": 0.9684,
      "step": 4724
    },
    {
      "epoch": 1.9209188859524293,
      "grad_norm": 0.10487642139196396,
      "learning_rate": 7.89741502137187e-06,
      "loss": 0.9444,
      "step": 4725
    },
    {
      "epoch": 1.9213254726570441,
      "grad_norm": 0.10407551378011703,
      "learning_rate": 7.85670669651944e-06,
      "loss": 0.9244,
      "step": 4726
    },
    {
      "epoch": 1.921732059361659,
      "grad_norm": 0.11093860864639282,
      "learning_rate": 7.815998371667006e-06,
      "loss": 0.8877,
      "step": 4727
    },
    {
      "epoch": 1.9221386460662737,
      "grad_norm": 0.10927627235651016,
      "learning_rate": 7.775290046814573e-06,
      "loss": 0.9305,
      "step": 4728
    },
    {
      "epoch": 1.9225452327708883,
      "grad_norm": 0.1054539605975151,
      "learning_rate": 7.734581721962143e-06,
      "loss": 0.9661,
      "step": 4729
    },
    {
      "epoch": 1.9229518194755033,
      "grad_norm": 0.10301047563552856,
      "learning_rate": 7.693873397109709e-06,
      "loss": 0.9134,
      "step": 4730
    },
    {
      "epoch": 1.9233584061801179,
      "grad_norm": 0.1252659112215042,
      "learning_rate": 7.653165072257278e-06,
      "loss": 0.9546,
      "step": 4731
    },
    {
      "epoch": 1.9237649928847327,
      "grad_norm": 0.09721864759922028,
      "learning_rate": 7.612456747404845e-06,
      "loss": 0.9061,
      "step": 4732
    },
    {
      "epoch": 1.9241715795893475,
      "grad_norm": 0.0942339301109314,
      "learning_rate": 7.5717484225524116e-06,
      "loss": 0.8571,
      "step": 4733
    },
    {
      "epoch": 1.924578166293962,
      "grad_norm": 0.11213699728250504,
      "learning_rate": 7.53104009769998e-06,
      "loss": 1.0756,
      "step": 4734
    },
    {
      "epoch": 1.924984752998577,
      "grad_norm": 0.11195732653141022,
      "learning_rate": 7.490331772847548e-06,
      "loss": 1.0448,
      "step": 4735
    },
    {
      "epoch": 1.9253913397031917,
      "grad_norm": 0.11141734570264816,
      "learning_rate": 7.449623447995116e-06,
      "loss": 0.966,
      "step": 4736
    },
    {
      "epoch": 1.9257979264078064,
      "grad_norm": 0.11802522838115692,
      "learning_rate": 7.408915123142683e-06,
      "loss": 1.0228,
      "step": 4737
    },
    {
      "epoch": 1.9262045131124212,
      "grad_norm": 0.11186794191598892,
      "learning_rate": 7.3682067982902506e-06,
      "loss": 0.9879,
      "step": 4738
    },
    {
      "epoch": 1.9266110998170358,
      "grad_norm": 0.10062884539365768,
      "learning_rate": 7.327498473437819e-06,
      "loss": 0.9387,
      "step": 4739
    },
    {
      "epoch": 1.9270176865216508,
      "grad_norm": 0.09903592616319656,
      "learning_rate": 7.286790148585386e-06,
      "loss": 0.8054,
      "step": 4740
    },
    {
      "epoch": 1.9274242732262654,
      "grad_norm": 0.11208473145961761,
      "learning_rate": 7.2460818237329535e-06,
      "loss": 0.9749,
      "step": 4741
    },
    {
      "epoch": 1.9278308599308802,
      "grad_norm": 0.10989855974912643,
      "learning_rate": 7.205373498880522e-06,
      "loss": 1.0421,
      "step": 4742
    },
    {
      "epoch": 1.928237446635495,
      "grad_norm": 0.10330630093812943,
      "learning_rate": 7.164665174028089e-06,
      "loss": 0.9525,
      "step": 4743
    },
    {
      "epoch": 1.9286440333401098,
      "grad_norm": 0.10825500637292862,
      "learning_rate": 7.123956849175657e-06,
      "loss": 1.0131,
      "step": 4744
    },
    {
      "epoch": 1.9290506200447246,
      "grad_norm": 0.11086854338645935,
      "learning_rate": 7.083248524323224e-06,
      "loss": 0.9638,
      "step": 4745
    },
    {
      "epoch": 1.9294572067493392,
      "grad_norm": 0.09380614757537842,
      "learning_rate": 7.042540199470792e-06,
      "loss": 0.7884,
      "step": 4746
    },
    {
      "epoch": 1.9298637934539542,
      "grad_norm": 0.11035917699337006,
      "learning_rate": 7.00183187461836e-06,
      "loss": 0.9481,
      "step": 4747
    },
    {
      "epoch": 1.9302703801585688,
      "grad_norm": 0.10530402511358261,
      "learning_rate": 6.961123549765927e-06,
      "loss": 0.9452,
      "step": 4748
    },
    {
      "epoch": 1.9306769668631836,
      "grad_norm": 0.10548150539398193,
      "learning_rate": 6.920415224913495e-06,
      "loss": 0.9303,
      "step": 4749
    },
    {
      "epoch": 1.9310835535677984,
      "grad_norm": 0.11273462325334549,
      "learning_rate": 6.879706900061063e-06,
      "loss": 0.9262,
      "step": 4750
    },
    {
      "epoch": 1.931490140272413,
      "grad_norm": 0.10211534053087234,
      "learning_rate": 6.83899857520863e-06,
      "loss": 0.8864,
      "step": 4751
    },
    {
      "epoch": 1.931896726977028,
      "grad_norm": 0.108225978910923,
      "learning_rate": 6.798290250356198e-06,
      "loss": 0.9758,
      "step": 4752
    },
    {
      "epoch": 1.9323033136816425,
      "grad_norm": NaN,
      "learning_rate": 6.757581925503766e-06,
      "loss": 0.9683,
      "step": 4753
    },
    {
      "epoch": 1.9327099003862573,
      "grad_norm": 0.10094906389713287,
      "learning_rate": 6.716873600651334e-06,
      "loss": 0.8494,
      "step": 4754
    },
    {
      "epoch": 1.9331164870908721,
      "grad_norm": 0.10754916816949844,
      "learning_rate": 6.676165275798901e-06,
      "loss": 1.0235,
      "step": 4755
    },
    {
      "epoch": 1.933523073795487,
      "grad_norm": 0.10774201899766922,
      "learning_rate": 6.635456950946468e-06,
      "loss": 0.9243,
      "step": 4756
    },
    {
      "epoch": 1.9339296605001017,
      "grad_norm": 0.10798453539609909,
      "learning_rate": 6.594748626094037e-06,
      "loss": 0.9023,
      "step": 4757
    },
    {
      "epoch": 1.9343362472047163,
      "grad_norm": 0.10341257601976395,
      "learning_rate": 6.554040301241604e-06,
      "loss": 0.9192,
      "step": 4758
    },
    {
      "epoch": 1.9347428339093313,
      "grad_norm": 0.10943766683340073,
      "learning_rate": 6.513331976389173e-06,
      "loss": 0.9635,
      "step": 4759
    },
    {
      "epoch": 1.935149420613946,
      "grad_norm": 0.10420777648687363,
      "learning_rate": 6.472623651536739e-06,
      "loss": 0.9566,
      "step": 4760
    },
    {
      "epoch": 1.9355560073185607,
      "grad_norm": 0.1093553900718689,
      "learning_rate": 6.431915326684307e-06,
      "loss": 0.9353,
      "step": 4761
    },
    {
      "epoch": 1.9359625940231755,
      "grad_norm": 0.10667470842599869,
      "learning_rate": 6.3912070018318755e-06,
      "loss": 0.9312,
      "step": 4762
    },
    {
      "epoch": 1.93636918072779,
      "grad_norm": 0.10576347261667252,
      "learning_rate": 6.350498676979442e-06,
      "loss": 0.9651,
      "step": 4763
    },
    {
      "epoch": 1.936775767432405,
      "grad_norm": 0.106935515999794,
      "learning_rate": 6.30979035212701e-06,
      "loss": 0.9552,
      "step": 4764
    },
    {
      "epoch": 1.9371823541370197,
      "grad_norm": 0.11244690418243408,
      "learning_rate": 6.269082027274578e-06,
      "loss": 0.9965,
      "step": 4765
    },
    {
      "epoch": 1.9375889408416345,
      "grad_norm": 0.10496662557125092,
      "learning_rate": 6.228373702422145e-06,
      "loss": 0.9158,
      "step": 4766
    },
    {
      "epoch": 1.9379955275462493,
      "grad_norm": 0.11604844778776169,
      "learning_rate": 6.187665377569713e-06,
      "loss": 1.0076,
      "step": 4767
    },
    {
      "epoch": 1.9384021142508638,
      "grad_norm": 0.10365528613328934,
      "learning_rate": 6.146957052717281e-06,
      "loss": 0.9314,
      "step": 4768
    },
    {
      "epoch": 1.9388087009554789,
      "grad_norm": 0.1046256497502327,
      "learning_rate": 6.106248727864849e-06,
      "loss": 0.8937,
      "step": 4769
    },
    {
      "epoch": 1.9392152876600934,
      "grad_norm": 0.10845932364463806,
      "learning_rate": 6.0655404030124166e-06,
      "loss": 0.9361,
      "step": 4770
    },
    {
      "epoch": 1.9396218743647082,
      "grad_norm": 0.1074332445859909,
      "learning_rate": 6.024832078159983e-06,
      "loss": 0.8536,
      "step": 4771
    },
    {
      "epoch": 1.940028461069323,
      "grad_norm": 0.10240095853805542,
      "learning_rate": 5.984123753307552e-06,
      "loss": 0.965,
      "step": 4772
    },
    {
      "epoch": 1.9404350477739378,
      "grad_norm": 0.11463471502065659,
      "learning_rate": 5.9434154284551195e-06,
      "loss": 1.0433,
      "step": 4773
    },
    {
      "epoch": 1.9408416344785526,
      "grad_norm": 0.1043282225728035,
      "learning_rate": 5.902707103602687e-06,
      "loss": 0.9409,
      "step": 4774
    },
    {
      "epoch": 1.9412482211831672,
      "grad_norm": 0.10512422770261765,
      "learning_rate": 5.861998778750255e-06,
      "loss": 0.9548,
      "step": 4775
    },
    {
      "epoch": 1.9416548078877822,
      "grad_norm": 0.11762488633394241,
      "learning_rate": 5.821290453897822e-06,
      "loss": 1.02,
      "step": 4776
    },
    {
      "epoch": 1.9420613945923968,
      "grad_norm": 0.10974336415529251,
      "learning_rate": 5.78058212904539e-06,
      "loss": 0.9563,
      "step": 4777
    },
    {
      "epoch": 1.9424679812970116,
      "grad_norm": 0.1021999642252922,
      "learning_rate": 5.739873804192958e-06,
      "loss": 0.8839,
      "step": 4778
    },
    {
      "epoch": 1.9428745680016264,
      "grad_norm": 0.09945038706064224,
      "learning_rate": 5.699165479340525e-06,
      "loss": 0.9092,
      "step": 4779
    },
    {
      "epoch": 1.943281154706241,
      "grad_norm": 0.10554639250040054,
      "learning_rate": 5.658457154488093e-06,
      "loss": 0.9512,
      "step": 4780
    },
    {
      "epoch": 1.943687741410856,
      "grad_norm": 0.10311925411224365,
      "learning_rate": 5.6177488296356605e-06,
      "loss": 0.895,
      "step": 4781
    },
    {
      "epoch": 1.9440943281154706,
      "grad_norm": 0.1108926311135292,
      "learning_rate": 5.577040504783228e-06,
      "loss": 1.0093,
      "step": 4782
    },
    {
      "epoch": 1.9445009148200854,
      "grad_norm": 0.12254206836223602,
      "learning_rate": 5.536332179930797e-06,
      "loss": 0.9449,
      "step": 4783
    },
    {
      "epoch": 1.9449075015247002,
      "grad_norm": 0.11230570077896118,
      "learning_rate": 5.495623855078364e-06,
      "loss": 0.9471,
      "step": 4784
    },
    {
      "epoch": 1.945314088229315,
      "grad_norm": 0.11160276085138321,
      "learning_rate": 5.454915530225931e-06,
      "loss": 0.9878,
      "step": 4785
    },
    {
      "epoch": 1.9457206749339298,
      "grad_norm": 0.11728110909461975,
      "learning_rate": 5.414207205373499e-06,
      "loss": 1.0934,
      "step": 4786
    },
    {
      "epoch": 1.9461272616385443,
      "grad_norm": 0.10678225010633469,
      "learning_rate": 5.373498880521067e-06,
      "loss": 0.9944,
      "step": 4787
    },
    {
      "epoch": 1.9465338483431591,
      "grad_norm": 0.11184896528720856,
      "learning_rate": 5.332790555668635e-06,
      "loss": 0.8622,
      "step": 4788
    },
    {
      "epoch": 1.946940435047774,
      "grad_norm": 0.1041015014052391,
      "learning_rate": 5.292082230816202e-06,
      "loss": 0.8879,
      "step": 4789
    },
    {
      "epoch": 1.9473470217523887,
      "grad_norm": 0.11516954004764557,
      "learning_rate": 5.25137390596377e-06,
      "loss": 1.0263,
      "step": 4790
    },
    {
      "epoch": 1.9477536084570035,
      "grad_norm": 0.1053466945886612,
      "learning_rate": 5.210665581111338e-06,
      "loss": 0.957,
      "step": 4791
    },
    {
      "epoch": 1.948160195161618,
      "grad_norm": 0.10122139006853104,
      "learning_rate": 5.169957256258905e-06,
      "loss": 0.9314,
      "step": 4792
    },
    {
      "epoch": 1.948566781866233,
      "grad_norm": 0.11456303298473358,
      "learning_rate": 5.129248931406473e-06,
      "loss": 0.9948,
      "step": 4793
    },
    {
      "epoch": 1.9489733685708477,
      "grad_norm": 0.11062067002058029,
      "learning_rate": 5.088540606554041e-06,
      "loss": 0.9113,
      "step": 4794
    },
    {
      "epoch": 1.9493799552754625,
      "grad_norm": 0.10972128063440323,
      "learning_rate": 5.047832281701608e-06,
      "loss": 1.0531,
      "step": 4795
    },
    {
      "epoch": 1.9497865419800773,
      "grad_norm": 0.10982213169336319,
      "learning_rate": 5.007123956849176e-06,
      "loss": 0.8759,
      "step": 4796
    },
    {
      "epoch": 1.9501931286846919,
      "grad_norm": 0.10222429037094116,
      "learning_rate": 4.9664156319967435e-06,
      "loss": 0.8341,
      "step": 4797
    },
    {
      "epoch": 1.9505997153893069,
      "grad_norm": 0.1121751070022583,
      "learning_rate": 4.925707307144311e-06,
      "loss": 1.0322,
      "step": 4798
    },
    {
      "epoch": 1.9510063020939215,
      "grad_norm": 0.09666823595762253,
      "learning_rate": 4.884998982291879e-06,
      "loss": 0.8805,
      "step": 4799
    },
    {
      "epoch": 1.9514128887985362,
      "grad_norm": 0.09243001788854599,
      "learning_rate": 4.8442906574394464e-06,
      "loss": 0.746,
      "step": 4800
    },
    {
      "epoch": 1.951819475503151,
      "grad_norm": 0.10632304102182388,
      "learning_rate": 4.803582332587014e-06,
      "loss": 0.9029,
      "step": 4801
    },
    {
      "epoch": 1.9522260622077658,
      "grad_norm": 0.11486592143774033,
      "learning_rate": 4.7628740077345826e-06,
      "loss": 1.0341,
      "step": 4802
    },
    {
      "epoch": 1.9526326489123806,
      "grad_norm": 0.10841212421655655,
      "learning_rate": 4.722165682882149e-06,
      "loss": 0.9374,
      "step": 4803
    },
    {
      "epoch": 1.9530392356169952,
      "grad_norm": 0.11145360767841339,
      "learning_rate": 4.681457358029717e-06,
      "loss": 0.9147,
      "step": 4804
    },
    {
      "epoch": 1.9534458223216102,
      "grad_norm": 0.11122753471136093,
      "learning_rate": 4.640749033177285e-06,
      "loss": 0.9332,
      "step": 4805
    },
    {
      "epoch": 1.9538524090262248,
      "grad_norm": 0.10207870602607727,
      "learning_rate": 4.600040708324853e-06,
      "loss": 0.937,
      "step": 4806
    },
    {
      "epoch": 1.9542589957308396,
      "grad_norm": 0.11454325169324875,
      "learning_rate": 4.559332383472421e-06,
      "loss": 1.0435,
      "step": 4807
    },
    {
      "epoch": 1.9546655824354544,
      "grad_norm": 0.10648126155138016,
      "learning_rate": 4.5186240586199875e-06,
      "loss": 0.927,
      "step": 4808
    },
    {
      "epoch": 1.955072169140069,
      "grad_norm": 0.10996894538402557,
      "learning_rate": 4.477915733767556e-06,
      "loss": 0.9693,
      "step": 4809
    },
    {
      "epoch": 1.955478755844684,
      "grad_norm": 0.10057996213436127,
      "learning_rate": 4.437207408915124e-06,
      "loss": 0.9325,
      "step": 4810
    },
    {
      "epoch": 1.9558853425492986,
      "grad_norm": 0.10628996044397354,
      "learning_rate": 4.396499084062691e-06,
      "loss": 0.891,
      "step": 4811
    },
    {
      "epoch": 1.9562919292539134,
      "grad_norm": 0.10557537525892258,
      "learning_rate": 4.355790759210258e-06,
      "loss": 0.8736,
      "step": 4812
    },
    {
      "epoch": 1.9566985159585282,
      "grad_norm": 0.10447331517934799,
      "learning_rate": 4.3150824343578265e-06,
      "loss": 0.9717,
      "step": 4813
    },
    {
      "epoch": 1.957105102663143,
      "grad_norm": 0.10446681082248688,
      "learning_rate": 4.274374109505394e-06,
      "loss": 1.0103,
      "step": 4814
    },
    {
      "epoch": 1.9575116893677578,
      "grad_norm": 0.10121920704841614,
      "learning_rate": 4.233665784652962e-06,
      "loss": 0.875,
      "step": 4815
    },
    {
      "epoch": 1.9579182760723723,
      "grad_norm": 0.10913816094398499,
      "learning_rate": 4.1929574598005294e-06,
      "loss": 1.0491,
      "step": 4816
    },
    {
      "epoch": 1.9583248627769871,
      "grad_norm": 0.11767001450061798,
      "learning_rate": 4.152249134948097e-06,
      "loss": 1.025,
      "step": 4817
    },
    {
      "epoch": 1.958731449481602,
      "grad_norm": 0.10180991888046265,
      "learning_rate": 4.111540810095665e-06,
      "loss": 0.892,
      "step": 4818
    },
    {
      "epoch": 1.9591380361862167,
      "grad_norm": 0.11216012388467789,
      "learning_rate": 4.070832485243232e-06,
      "loss": 0.9754,
      "step": 4819
    },
    {
      "epoch": 1.9595446228908315,
      "grad_norm": 0.1098812147974968,
      "learning_rate": 4.0301241603908e-06,
      "loss": 0.9805,
      "step": 4820
    },
    {
      "epoch": 1.959951209595446,
      "grad_norm": 0.10524158924818039,
      "learning_rate": 3.989415835538368e-06,
      "loss": 0.9045,
      "step": 4821
    },
    {
      "epoch": 1.9603577963000611,
      "grad_norm": 0.09650178253650665,
      "learning_rate": 3.948707510685935e-06,
      "loss": 0.7913,
      "step": 4822
    },
    {
      "epoch": 1.9607643830046757,
      "grad_norm": 0.11418919265270233,
      "learning_rate": 3.907999185833503e-06,
      "loss": 0.9991,
      "step": 4823
    },
    {
      "epoch": 1.9611709697092905,
      "grad_norm": 0.11137097328901291,
      "learning_rate": 3.867290860981071e-06,
      "loss": 0.978,
      "step": 4824
    },
    {
      "epoch": 1.9615775564139053,
      "grad_norm": 0.1029028594493866,
      "learning_rate": 3.826582536128639e-06,
      "loss": 0.8791,
      "step": 4825
    },
    {
      "epoch": 1.9619841431185199,
      "grad_norm": 0.10152295976877213,
      "learning_rate": 3.7858742112762058e-06,
      "loss": 0.8855,
      "step": 4826
    },
    {
      "epoch": 1.962390729823135,
      "grad_norm": 0.11157593131065369,
      "learning_rate": 3.745165886423774e-06,
      "loss": 1.0097,
      "step": 4827
    },
    {
      "epoch": 1.9627973165277495,
      "grad_norm": 0.10975543409585953,
      "learning_rate": 3.7044575615713415e-06,
      "loss": 1.0269,
      "step": 4828
    },
    {
      "epoch": 1.9632039032323643,
      "grad_norm": 0.10318556427955627,
      "learning_rate": 3.6637492367189095e-06,
      "loss": 0.9094,
      "step": 4829
    },
    {
      "epoch": 1.963610489936979,
      "grad_norm": 0.09540821611881256,
      "learning_rate": 3.6230409118664767e-06,
      "loss": 0.7923,
      "step": 4830
    },
    {
      "epoch": 1.9640170766415939,
      "grad_norm": 0.11185004562139511,
      "learning_rate": 3.5823325870140444e-06,
      "loss": 0.9945,
      "step": 4831
    },
    {
      "epoch": 1.9644236633462087,
      "grad_norm": 0.1030164510011673,
      "learning_rate": 3.541624262161612e-06,
      "loss": 0.8952,
      "step": 4832
    },
    {
      "epoch": 1.9648302500508232,
      "grad_norm": 0.10606315732002258,
      "learning_rate": 3.50091593730918e-06,
      "loss": 0.8872,
      "step": 4833
    },
    {
      "epoch": 1.9652368367554383,
      "grad_norm": 0.10676340013742447,
      "learning_rate": 3.4602076124567477e-06,
      "loss": 0.9616,
      "step": 4834
    },
    {
      "epoch": 1.9656434234600528,
      "grad_norm": 0.11374758929014206,
      "learning_rate": 3.419499287604315e-06,
      "loss": 1.0619,
      "step": 4835
    },
    {
      "epoch": 1.9660500101646676,
      "grad_norm": 0.10142536461353302,
      "learning_rate": 3.378790962751883e-06,
      "loss": 0.8787,
      "step": 4836
    },
    {
      "epoch": 1.9664565968692824,
      "grad_norm": 0.1088085025548935,
      "learning_rate": 3.3380826378994506e-06,
      "loss": 1.0706,
      "step": 4837
    },
    {
      "epoch": 1.966863183573897,
      "grad_norm": 0.11617989093065262,
      "learning_rate": 3.2973743130470187e-06,
      "loss": 1.0758,
      "step": 4838
    },
    {
      "epoch": 1.967269770278512,
      "grad_norm": 0.10999471694231033,
      "learning_rate": 3.2566659881945863e-06,
      "loss": 0.8955,
      "step": 4839
    },
    {
      "epoch": 1.9676763569831266,
      "grad_norm": 0.10413683950901031,
      "learning_rate": 3.2159576633421535e-06,
      "loss": 0.8774,
      "step": 4840
    },
    {
      "epoch": 1.9680829436877414,
      "grad_norm": 0.10912149399518967,
      "learning_rate": 3.175249338489721e-06,
      "loss": 0.9151,
      "step": 4841
    },
    {
      "epoch": 1.9684895303923562,
      "grad_norm": 0.10065335780382156,
      "learning_rate": 3.134541013637289e-06,
      "loss": 0.8947,
      "step": 4842
    },
    {
      "epoch": 1.9688961170969708,
      "grad_norm": 0.10842598974704742,
      "learning_rate": 3.0938326887848564e-06,
      "loss": 0.9149,
      "step": 4843
    },
    {
      "epoch": 1.9693027038015858,
      "grad_norm": 0.09546621143817902,
      "learning_rate": 3.0531243639324245e-06,
      "loss": 0.8106,
      "step": 4844
    },
    {
      "epoch": 1.9697092905062004,
      "grad_norm": 0.10605739057064056,
      "learning_rate": 3.0124160390799917e-06,
      "loss": 0.8663,
      "step": 4845
    },
    {
      "epoch": 1.9701158772108152,
      "grad_norm": 0.11531540751457214,
      "learning_rate": 2.9717077142275597e-06,
      "loss": 0.9487,
      "step": 4846
    },
    {
      "epoch": 1.97052246391543,
      "grad_norm": 0.112498939037323,
      "learning_rate": 2.9309993893751274e-06,
      "loss": 0.9846,
      "step": 4847
    },
    {
      "epoch": 1.9709290506200448,
      "grad_norm": 0.10680878907442093,
      "learning_rate": 2.890291064522695e-06,
      "loss": 0.9092,
      "step": 4848
    },
    {
      "epoch": 1.9713356373246596,
      "grad_norm": 0.11008645594120026,
      "learning_rate": 2.8495827396702626e-06,
      "loss": 0.918,
      "step": 4849
    },
    {
      "epoch": 1.9717422240292741,
      "grad_norm": 0.1180918887257576,
      "learning_rate": 2.8088744148178303e-06,
      "loss": 1.1026,
      "step": 4850
    },
    {
      "epoch": 1.9721488107338891,
      "grad_norm": 0.10788023471832275,
      "learning_rate": 2.7681660899653983e-06,
      "loss": 0.9422,
      "step": 4851
    },
    {
      "epoch": 1.9725553974385037,
      "grad_norm": 0.11532583087682724,
      "learning_rate": 2.7274577651129655e-06,
      "loss": 0.9619,
      "step": 4852
    },
    {
      "epoch": 1.9729619841431185,
      "grad_norm": 0.1164373904466629,
      "learning_rate": 2.6867494402605336e-06,
      "loss": 1.0735,
      "step": 4853
    },
    {
      "epoch": 1.9733685708477333,
      "grad_norm": 0.10352805256843567,
      "learning_rate": 2.646041115408101e-06,
      "loss": 0.9302,
      "step": 4854
    },
    {
      "epoch": 1.973775157552348,
      "grad_norm": 0.09697481989860535,
      "learning_rate": 2.605332790555669e-06,
      "loss": 0.8169,
      "step": 4855
    },
    {
      "epoch": 1.974181744256963,
      "grad_norm": 0.10641641169786453,
      "learning_rate": 2.5646244657032365e-06,
      "loss": 0.9379,
      "step": 4856
    },
    {
      "epoch": 1.9745883309615775,
      "grad_norm": 0.12247955799102783,
      "learning_rate": 2.523916140850804e-06,
      "loss": 1.1005,
      "step": 4857
    },
    {
      "epoch": 1.9749949176661923,
      "grad_norm": 0.11470235139131546,
      "learning_rate": 2.4832078159983718e-06,
      "loss": 1.0682,
      "step": 4858
    },
    {
      "epoch": 1.975401504370807,
      "grad_norm": 0.10415980964899063,
      "learning_rate": 2.4424994911459394e-06,
      "loss": 0.9184,
      "step": 4859
    },
    {
      "epoch": 1.9758080910754219,
      "grad_norm": 0.10580716282129288,
      "learning_rate": 2.401791166293507e-06,
      "loss": 0.9137,
      "step": 4860
    },
    {
      "epoch": 1.9762146777800367,
      "grad_norm": 0.10806702822446823,
      "learning_rate": 2.3610828414410747e-06,
      "loss": 1.0023,
      "step": 4861
    },
    {
      "epoch": 1.9766212644846513,
      "grad_norm": 0.10730385035276413,
      "learning_rate": 2.3203745165886423e-06,
      "loss": 0.9394,
      "step": 4862
    },
    {
      "epoch": 1.9770278511892663,
      "grad_norm": 0.11646751314401627,
      "learning_rate": 2.2796661917362104e-06,
      "loss": 1.0452,
      "step": 4863
    },
    {
      "epoch": 1.9774344378938808,
      "grad_norm": 0.11328614503145218,
      "learning_rate": 2.238957866883778e-06,
      "loss": 1.0363,
      "step": 4864
    },
    {
      "epoch": 1.9778410245984956,
      "grad_norm": 0.10477136820554733,
      "learning_rate": 2.1982495420313456e-06,
      "loss": 0.8967,
      "step": 4865
    },
    {
      "epoch": 1.9782476113031104,
      "grad_norm": 0.1011333018541336,
      "learning_rate": 2.1575412171789133e-06,
      "loss": 0.9051,
      "step": 4866
    },
    {
      "epoch": 1.978654198007725,
      "grad_norm": 0.10585794597864151,
      "learning_rate": 2.116832892326481e-06,
      "loss": 0.9641,
      "step": 4867
    },
    {
      "epoch": 1.97906078471234,
      "grad_norm": 0.10518283396959305,
      "learning_rate": 2.0761245674740485e-06,
      "loss": 0.9738,
      "step": 4868
    },
    {
      "epoch": 1.9794673714169546,
      "grad_norm": 0.10781599581241608,
      "learning_rate": 2.035416242621616e-06,
      "loss": 0.9535,
      "step": 4869
    },
    {
      "epoch": 1.9798739581215694,
      "grad_norm": 0.10149887949228287,
      "learning_rate": 1.994707917769184e-06,
      "loss": 0.7832,
      "step": 4870
    },
    {
      "epoch": 1.9802805448261842,
      "grad_norm": 0.10625772923231125,
      "learning_rate": 1.9539995929167514e-06,
      "loss": 0.8969,
      "step": 4871
    },
    {
      "epoch": 1.9806871315307988,
      "grad_norm": 0.100648894906044,
      "learning_rate": 1.9132912680643195e-06,
      "loss": 0.8592,
      "step": 4872
    },
    {
      "epoch": 1.9810937182354138,
      "grad_norm": 0.10639602690935135,
      "learning_rate": 1.872582943211887e-06,
      "loss": 0.9377,
      "step": 4873
    },
    {
      "epoch": 1.9815003049400284,
      "grad_norm": 0.10608502477407455,
      "learning_rate": 1.8318746183594548e-06,
      "loss": 0.8221,
      "step": 4874
    },
    {
      "epoch": 1.9819068916446432,
      "grad_norm": 0.1076526865363121,
      "learning_rate": 1.7911662935070222e-06,
      "loss": 1.0001,
      "step": 4875
    },
    {
      "epoch": 1.982313478349258,
      "grad_norm": 0.10484609007835388,
      "learning_rate": 1.75045796865459e-06,
      "loss": 0.9281,
      "step": 4876
    },
    {
      "epoch": 1.9827200650538728,
      "grad_norm": 0.11033840477466583,
      "learning_rate": 1.7097496438021575e-06,
      "loss": 1.012,
      "step": 4877
    },
    {
      "epoch": 1.9831266517584876,
      "grad_norm": 0.10178755968809128,
      "learning_rate": 1.6690413189497253e-06,
      "loss": 0.8751,
      "step": 4878
    },
    {
      "epoch": 1.9835332384631021,
      "grad_norm": 0.09968069940805435,
      "learning_rate": 1.6283329940972931e-06,
      "loss": 0.8481,
      "step": 4879
    },
    {
      "epoch": 1.9839398251677172,
      "grad_norm": 0.11199220269918442,
      "learning_rate": 1.5876246692448606e-06,
      "loss": 1.0553,
      "step": 4880
    },
    {
      "epoch": 1.9843464118723317,
      "grad_norm": 0.10771384090185165,
      "learning_rate": 1.5469163443924282e-06,
      "loss": 0.9871,
      "step": 4881
    },
    {
      "epoch": 1.9847529985769465,
      "grad_norm": 0.1033516600728035,
      "learning_rate": 1.5062080195399958e-06,
      "loss": 0.8731,
      "step": 4882
    },
    {
      "epoch": 1.9851595852815613,
      "grad_norm": 0.10771310329437256,
      "learning_rate": 1.4654996946875637e-06,
      "loss": 1.0152,
      "step": 4883
    },
    {
      "epoch": 1.985566171986176,
      "grad_norm": 0.10385514050722122,
      "learning_rate": 1.4247913698351313e-06,
      "loss": 0.8569,
      "step": 4884
    },
    {
      "epoch": 1.985972758690791,
      "grad_norm": 0.10435989499092102,
      "learning_rate": 1.3840830449826992e-06,
      "loss": 0.8999,
      "step": 4885
    },
    {
      "epoch": 1.9863793453954055,
      "grad_norm": 0.10604739189147949,
      "learning_rate": 1.3433747201302668e-06,
      "loss": 0.8837,
      "step": 4886
    },
    {
      "epoch": 1.9867859321000203,
      "grad_norm": 0.11071362346410751,
      "learning_rate": 1.3026663952778344e-06,
      "loss": 0.9995,
      "step": 4887
    },
    {
      "epoch": 1.987192518804635,
      "grad_norm": 0.11492349952459335,
      "learning_rate": 1.261958070425402e-06,
      "loss": 1.0693,
      "step": 4888
    },
    {
      "epoch": 1.98759910550925,
      "grad_norm": 0.11402280628681183,
      "learning_rate": 1.2212497455729697e-06,
      "loss": 1.0973,
      "step": 4889
    },
    {
      "epoch": 1.9880056922138647,
      "grad_norm": 0.10784902423620224,
      "learning_rate": 1.1805414207205373e-06,
      "loss": 0.9591,
      "step": 4890
    },
    {
      "epoch": 1.9884122789184793,
      "grad_norm": 0.10509707778692245,
      "learning_rate": 1.1398330958681052e-06,
      "loss": 0.9233,
      "step": 4891
    },
    {
      "epoch": 1.9888188656230943,
      "grad_norm": 0.10772809386253357,
      "learning_rate": 1.0991247710156728e-06,
      "loss": 0.9239,
      "step": 4892
    },
    {
      "epoch": 1.9892254523277089,
      "grad_norm": 0.10139593482017517,
      "learning_rate": 1.0584164461632405e-06,
      "loss": 0.8991,
      "step": 4893
    },
    {
      "epoch": 1.9896320390323237,
      "grad_norm": 0.11088011413812637,
      "learning_rate": 1.017708121310808e-06,
      "loss": 0.9746,
      "step": 4894
    },
    {
      "epoch": 1.9900386257369385,
      "grad_norm": 0.1069415956735611,
      "learning_rate": 9.769997964583757e-07,
      "loss": 0.9667,
      "step": 4895
    },
    {
      "epoch": 1.990445212441553,
      "grad_norm": 0.11252355575561523,
      "learning_rate": 9.362914716059435e-07,
      "loss": 0.9521,
      "step": 4896
    },
    {
      "epoch": 1.990851799146168,
      "grad_norm": 0.11555030941963196,
      "learning_rate": 8.955831467535111e-07,
      "loss": 0.9464,
      "step": 4897
    },
    {
      "epoch": 1.9912583858507826,
      "grad_norm": 0.10089296847581863,
      "learning_rate": 8.548748219010787e-07,
      "loss": 0.9118,
      "step": 4898
    },
    {
      "epoch": 1.9916649725553974,
      "grad_norm": 0.10483364015817642,
      "learning_rate": 8.141664970486466e-07,
      "loss": 0.9561,
      "step": 4899
    },
    {
      "epoch": 1.9920715592600122,
      "grad_norm": 0.10259924083948135,
      "learning_rate": 7.734581721962141e-07,
      "loss": 0.937,
      "step": 4900
    },
    {
      "epoch": 1.9924781459646268,
      "grad_norm": 0.10515905171632767,
      "learning_rate": 7.327498473437818e-07,
      "loss": 0.9686,
      "step": 4901
    },
    {
      "epoch": 1.9928847326692418,
      "grad_norm": 0.1109880730509758,
      "learning_rate": 6.920415224913496e-07,
      "loss": 0.9375,
      "step": 4902
    },
    {
      "epoch": 1.9932913193738564,
      "grad_norm": 0.10059867799282074,
      "learning_rate": 6.513331976389172e-07,
      "loss": 0.9148,
      "step": 4903
    },
    {
      "epoch": 1.9936979060784712,
      "grad_norm": 0.1153227686882019,
      "learning_rate": 6.106248727864849e-07,
      "loss": 1.065,
      "step": 4904
    },
    {
      "epoch": 1.994104492783086,
      "grad_norm": 0.10817611962556839,
      "learning_rate": 5.699165479340526e-07,
      "loss": 0.9162,
      "step": 4905
    },
    {
      "epoch": 1.9945110794877008,
      "grad_norm": 0.09951157122850418,
      "learning_rate": 5.292082230816202e-07,
      "loss": 0.885,
      "step": 4906
    },
    {
      "epoch": 1.9949176661923156,
      "grad_norm": 0.1026596650481224,
      "learning_rate": 4.884998982291879e-07,
      "loss": 0.9054,
      "step": 4907
    },
    {
      "epoch": 1.9953242528969302,
      "grad_norm": 0.10928881913423538,
      "learning_rate": 4.4779157337675555e-07,
      "loss": 0.9206,
      "step": 4908
    },
    {
      "epoch": 1.9957308396015452,
      "grad_norm": 0.1039741113781929,
      "learning_rate": 4.070832485243233e-07,
      "loss": 0.9762,
      "step": 4909
    },
    {
      "epoch": 1.9961374263061598,
      "grad_norm": 0.10720765590667725,
      "learning_rate": 3.663749236718909e-07,
      "loss": 0.9376,
      "step": 4910
    },
    {
      "epoch": 1.9965440130107746,
      "grad_norm": 0.11087562888860703,
      "learning_rate": 3.256665988194586e-07,
      "loss": 1.0135,
      "step": 4911
    },
    {
      "epoch": 1.9969505997153894,
      "grad_norm": 0.11333035677671432,
      "learning_rate": 2.849582739670263e-07,
      "loss": 0.9378,
      "step": 4912
    },
    {
      "epoch": 1.997357186420004,
      "grad_norm": 0.10567180067300797,
      "learning_rate": 2.4424994911459393e-07,
      "loss": 0.8727,
      "step": 4913
    },
    {
      "epoch": 1.997763773124619,
      "grad_norm": 0.09908761829137802,
      "learning_rate": 2.0354162426216164e-07,
      "loss": 0.8175,
      "step": 4914
    },
    {
      "epoch": 1.9981703598292335,
      "grad_norm": 0.1148877665400505,
      "learning_rate": 1.628332994097293e-07,
      "loss": 0.9689,
      "step": 4915
    },
    {
      "epoch": 1.9985769465338483,
      "grad_norm": 0.1073300689458847,
      "learning_rate": 1.2212497455729696e-07,
      "loss": 0.9064,
      "step": 4916
    },
    {
      "epoch": 1.9989835332384631,
      "grad_norm": 0.10753702372312546,
      "learning_rate": 8.141664970486465e-08,
      "loss": 0.9366,
      "step": 4917
    },
    {
      "epoch": 1.999390119943078,
      "grad_norm": 0.10542717576026917,
      "learning_rate": 4.0708324852432326e-08,
      "loss": 0.8963,
      "step": 4918
    }
  ],
  "logging_steps": 1,
  "max_steps": 4918,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 2,
  "save_steps": 100,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 1.57185946392996e+19,
  "train_batch_size": 16,
  "trial_name": null,
  "trial_params": null
}