{
  "best_metric": 0.05946353077888489,
  "best_model_checkpoint": "miner_id_24/checkpoint-400",
  "epoch": 0.42317380352644834,
  "eval_steps": 100,
  "global_step": 462,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0009159606136936112,
      "grad_norm": 5.519550323486328,
      "learning_rate": 2e-05,
      "loss": 4.8758,
      "step": 1
    },
    {
      "epoch": 0.0009159606136936112,
      "eval_loss": 0.6018432974815369,
      "eval_runtime": 431.5799,
      "eval_samples_per_second": 3.374,
      "eval_steps_per_second": 0.843,
      "step": 1
    },
    {
      "epoch": 0.0018319212273872224,
      "grad_norm": 5.216701507568359,
      "learning_rate": 4e-05,
      "loss": 5.0302,
      "step": 2
    },
    {
      "epoch": 0.0027478818410808336,
      "grad_norm": 15.041247367858887,
      "learning_rate": 6e-05,
      "loss": 5.6025,
      "step": 3
    },
    {
      "epoch": 0.0036638424547744447,
      "grad_norm": 4.073129177093506,
      "learning_rate": 8e-05,
      "loss": 4.5046,
      "step": 4
    },
    {
      "epoch": 0.004579803068468056,
      "grad_norm": 3.793393850326538,
      "learning_rate": 0.0001,
      "loss": 3.7209,
      "step": 5
    },
    {
      "epoch": 0.005495763682161667,
      "grad_norm": 3.9871528148651123,
      "learning_rate": 0.00012,
      "loss": 2.6306,
      "step": 6
    },
    {
      "epoch": 0.006411724295855278,
      "grad_norm": 1.6556649208068848,
      "learning_rate": 0.00014,
      "loss": 1.8335,
      "step": 7
    },
    {
      "epoch": 0.0073276849095488894,
      "grad_norm": 2.6266279220581055,
      "learning_rate": 0.00016,
      "loss": 1.5011,
      "step": 8
    },
    {
      "epoch": 0.0082436455232425,
      "grad_norm": 2.764836311340332,
      "learning_rate": 0.00018,
      "loss": 1.3103,
      "step": 9
    },
    {
      "epoch": 0.009159606136936112,
      "grad_norm": 3.4879281520843506,
      "learning_rate": 0.0002,
      "loss": 1.3028,
      "step": 10
    },
    {
      "epoch": 0.010075566750629723,
      "grad_norm": 2.448042869567871,
      "learning_rate": 0.00019999758458848847,
      "loss": 1.3211,
      "step": 11
    },
    {
      "epoch": 0.010991527364323335,
      "grad_norm": 1.9464507102966309,
      "learning_rate": 0.00019999033847063811,
      "loss": 1.2399,
      "step": 12
    },
    {
      "epoch": 0.011907487978016945,
      "grad_norm": 1.6776124238967896,
      "learning_rate": 0.00019997826199649605,
      "loss": 1.0416,
      "step": 13
    },
    {
      "epoch": 0.012823448591710557,
      "grad_norm": 1.573235273361206,
      "learning_rate": 0.00019996135574945544,
      "loss": 1.3556,
      "step": 14
    },
    {
      "epoch": 0.013739409205404167,
      "grad_norm": 1.5061469078063965,
      "learning_rate": 0.00019993962054622703,
      "loss": 1.2534,
      "step": 15
    },
    {
      "epoch": 0.014655369819097779,
      "grad_norm": 1.4490333795547485,
      "learning_rate": 0.00019991305743680013,
      "loss": 0.8765,
      "step": 16
    },
    {
      "epoch": 0.01557133043279139,
      "grad_norm": 1.4613972902297974,
      "learning_rate": 0.00019988166770439154,
      "loss": 1.0973,
      "step": 17
    },
    {
      "epoch": 0.016487291046485,
      "grad_norm": 1.417468547821045,
      "learning_rate": 0.0001998454528653836,
      "loss": 1.2705,
      "step": 18
    },
    {
      "epoch": 0.01740325166017861,
      "grad_norm": 1.23396897315979,
      "learning_rate": 0.00019980441466925118,
      "loss": 1.007,
      "step": 19
    },
    {
      "epoch": 0.018319212273872225,
      "grad_norm": 1.4013880491256714,
      "learning_rate": 0.00019975855509847686,
      "loss": 0.8394,
      "step": 20
    },
    {
      "epoch": 0.019235172887565835,
      "grad_norm": 1.7472220659255981,
      "learning_rate": 0.00019970787636845535,
      "loss": 1.1364,
      "step": 21
    },
    {
      "epoch": 0.020151133501259445,
      "grad_norm": 1.3513895273208618,
      "learning_rate": 0.00019965238092738643,
      "loss": 0.9134,
      "step": 22
    },
    {
      "epoch": 0.021067094114953056,
      "grad_norm": 1.5269527435302734,
      "learning_rate": 0.00019959207145615665,
      "loss": 0.8255,
      "step": 23
    },
    {
      "epoch": 0.02198305472864667,
      "grad_norm": 0.8268145322799683,
      "learning_rate": 0.00019952695086820975,
      "loss": 0.6196,
      "step": 24
    },
    {
      "epoch": 0.02289901534234028,
      "grad_norm": 1.1546183824539185,
      "learning_rate": 0.00019945702230940614,
      "loss": 0.9068,
      "step": 25
    },
    {
      "epoch": 0.02381497595603389,
      "grad_norm": 0.8031617999076843,
      "learning_rate": 0.0001993822891578708,
      "loss": 0.5674,
      "step": 26
    },
    {
      "epoch": 0.024730936569727503,
      "grad_norm": 0.937088668346405,
      "learning_rate": 0.0001993027550238299,
      "loss": 0.7044,
      "step": 27
    },
    {
      "epoch": 0.025646897183421113,
      "grad_norm": 1.08357834815979,
      "learning_rate": 0.0001992184237494368,
      "loss": 0.9674,
      "step": 28
    },
    {
      "epoch": 0.026562857797114724,
      "grad_norm": 1.0530093908309937,
      "learning_rate": 0.00019912929940858607,
      "loss": 0.7554,
      "step": 29
    },
    {
      "epoch": 0.027478818410808334,
      "grad_norm": 1.334518551826477,
      "learning_rate": 0.0001990353863067169,
      "loss": 0.746,
      "step": 30
    },
    {
      "epoch": 0.028394779024501948,
      "grad_norm": 1.6642454862594604,
      "learning_rate": 0.00019893668898060502,
      "loss": 1.189,
      "step": 31
    },
    {
      "epoch": 0.029310739638195558,
      "grad_norm": 1.232517957687378,
      "learning_rate": 0.0001988332121981436,
      "loss": 1.0104,
      "step": 32
    },
    {
      "epoch": 0.030226700251889168,
      "grad_norm": 0.9858556389808655,
      "learning_rate": 0.00019872496095811286,
      "loss": 0.6634,
      "step": 33
    },
    {
      "epoch": 0.03114266086558278,
      "grad_norm": 1.123953104019165,
      "learning_rate": 0.00019861194048993863,
      "loss": 0.8598,
      "step": 34
    },
    {
      "epoch": 0.03205862147927639,
      "grad_norm": 1.0000923871994019,
      "learning_rate": 0.0001984941562534397,
      "loss": 0.8872,
      "step": 35
    },
    {
      "epoch": 0.03297458209297,
      "grad_norm": 1.1624726057052612,
      "learning_rate": 0.0001983716139385641,
      "loss": 0.8707,
      "step": 36
    },
    {
      "epoch": 0.03389054270666361,
      "grad_norm": 1.0879074335098267,
      "learning_rate": 0.0001982443194651142,
      "loss": 0.8361,
      "step": 37
    },
    {
      "epoch": 0.03480650332035722,
      "grad_norm": 1.3374804258346558,
      "learning_rate": 0.0001981122789824607,
      "loss": 0.7765,
      "step": 38
    },
    {
      "epoch": 0.03572246393405083,
      "grad_norm": 0.8522550463676453,
      "learning_rate": 0.00019797549886924566,
      "loss": 0.8699,
      "step": 39
    },
    {
      "epoch": 0.03663842454774445,
      "grad_norm": 0.8720804452896118,
      "learning_rate": 0.00019783398573307428,
      "loss": 0.6595,
      "step": 40
    },
    {
      "epoch": 0.03755438516143806,
      "grad_norm": 1.0943487882614136,
      "learning_rate": 0.0001976877464101957,
      "loss": 0.8158,
      "step": 41
    },
    {
      "epoch": 0.03847034577513167,
      "grad_norm": 1.0142332315444946,
      "learning_rate": 0.00019753678796517282,
      "loss": 0.867,
      "step": 42
    },
    {
      "epoch": 0.03938630638882528,
      "grad_norm": 0.9329423904418945,
      "learning_rate": 0.00019738111769054093,
      "loss": 0.7283,
      "step": 43
    },
    {
      "epoch": 0.04030226700251889,
      "grad_norm": 0.7856294512748718,
      "learning_rate": 0.00019722074310645553,
      "loss": 0.6204,
      "step": 44
    },
    {
      "epoch": 0.0412182276162125,
      "grad_norm": 1.2970919609069824,
      "learning_rate": 0.00019705567196032892,
      "loss": 0.6546,
      "step": 45
    },
    {
      "epoch": 0.04213418822990611,
      "grad_norm": 1.1588696241378784,
      "learning_rate": 0.00019688591222645607,
      "loss": 0.8384,
      "step": 46
    },
    {
      "epoch": 0.04305014884359973,
      "grad_norm": 0.9822084903717041,
      "learning_rate": 0.00019671147210562927,
      "loss": 0.6732,
      "step": 47
    },
    {
      "epoch": 0.04396610945729334,
      "grad_norm": 0.8491467237472534,
      "learning_rate": 0.000196532360024742,
      "loss": 0.6257,
      "step": 48
    },
    {
      "epoch": 0.04488207007098695,
      "grad_norm": 1.0204273462295532,
      "learning_rate": 0.000196348584636382,
      "loss": 0.7814,
      "step": 49
    },
    {
      "epoch": 0.04579803068468056,
      "grad_norm": 1.1780798435211182,
      "learning_rate": 0.0001961601548184129,
      "loss": 0.6859,
      "step": 50
    },
    {
      "epoch": 0.04671399129837417,
      "grad_norm": 0.9798137545585632,
      "learning_rate": 0.00019596707967354585,
      "loss": 0.6813,
      "step": 51
    },
    {
      "epoch": 0.04762995191206778,
      "grad_norm": 1.10402512550354,
      "learning_rate": 0.00019576936852889936,
      "loss": 0.6647,
      "step": 52
    },
    {
      "epoch": 0.04854591252576139,
      "grad_norm": 1.1076951026916504,
      "learning_rate": 0.0001955670309355489,
      "loss": 0.7283,
      "step": 53
    },
    {
      "epoch": 0.049461873139455007,
      "grad_norm": 0.946972131729126,
      "learning_rate": 0.00019536007666806556,
      "loss": 0.6674,
      "step": 54
    },
    {
      "epoch": 0.05037783375314862,
      "grad_norm": 0.8705822825431824,
      "learning_rate": 0.00019514851572404368,
      "loss": 0.5044,
      "step": 55
    },
    {
      "epoch": 0.05129379436684223,
      "grad_norm": 0.8742169737815857,
      "learning_rate": 0.0001949323583236181,
      "loss": 0.7942,
      "step": 56
    },
    {
      "epoch": 0.05220975498053584,
      "grad_norm": 1.487743616104126,
      "learning_rate": 0.00019471161490897029,
      "loss": 0.5322,
      "step": 57
    },
    {
      "epoch": 0.05312571559422945,
      "grad_norm": 0.8018708825111389,
      "learning_rate": 0.0001944862961438239,
      "loss": 0.7712,
      "step": 58
    },
    {
      "epoch": 0.05404167620792306,
      "grad_norm": 0.9287647008895874,
      "learning_rate": 0.00019425641291292978,
      "loss": 0.6638,
      "step": 59
    },
    {
      "epoch": 0.05495763682161667,
      "grad_norm": 1.1208561658859253,
      "learning_rate": 0.00019402197632153992,
      "loss": 0.6888,
      "step": 60
    },
    {
      "epoch": 0.055873597435310285,
      "grad_norm": 0.8650437593460083,
      "learning_rate": 0.00019378299769487117,
      "loss": 0.602,
      "step": 61
    },
    {
      "epoch": 0.056789558049003895,
      "grad_norm": 0.9438541531562805,
      "learning_rate": 0.00019353948857755803,
      "loss": 0.8007,
      "step": 62
    },
    {
      "epoch": 0.057705518662697505,
      "grad_norm": 1.0785539150238037,
      "learning_rate": 0.00019329146073309504,
      "loss": 0.6443,
      "step": 63
    },
    {
      "epoch": 0.058621479276391115,
      "grad_norm": 0.938920259475708,
      "learning_rate": 0.00019303892614326836,
      "loss": 0.5166,
      "step": 64
    },
    {
      "epoch": 0.059537439890084726,
      "grad_norm": 0.7306029796600342,
      "learning_rate": 0.00019278189700757715,
      "loss": 0.6121,
      "step": 65
    },
    {
      "epoch": 0.060453400503778336,
      "grad_norm": 0.6238380670547485,
      "learning_rate": 0.00019252038574264405,
      "loss": 0.6597,
      "step": 66
    },
    {
      "epoch": 0.061369361117471946,
      "grad_norm": 0.9852589964866638,
      "learning_rate": 0.00019225440498161546,
      "loss": 0.7409,
      "step": 67
    },
    {
      "epoch": 0.06228532173116556,
      "grad_norm": 0.8592156171798706,
      "learning_rate": 0.00019198396757355118,
      "loss": 0.7738,
      "step": 68
    },
    {
      "epoch": 0.06320128234485917,
      "grad_norm": 1.1071726083755493,
      "learning_rate": 0.00019170908658280386,
      "loss": 0.95,
      "step": 69
    },
    {
      "epoch": 0.06411724295855278,
      "grad_norm": 0.8783465623855591,
      "learning_rate": 0.00019142977528838762,
      "loss": 0.7653,
      "step": 70
    },
    {
      "epoch": 0.06503320357224639,
      "grad_norm": 1.0121217966079712,
      "learning_rate": 0.0001911460471833368,
      "loss": 0.9466,
      "step": 71
    },
    {
      "epoch": 0.06594916418594,
      "grad_norm": 0.8773440718650818,
      "learning_rate": 0.00019085791597405404,
      "loss": 0.7238,
      "step": 72
    },
    {
      "epoch": 0.06686512479963362,
      "grad_norm": 0.7618806958198547,
      "learning_rate": 0.00019056539557964813,
      "loss": 0.4984,
      "step": 73
    },
    {
      "epoch": 0.06778108541332722,
      "grad_norm": 0.9363157153129578,
      "learning_rate": 0.00019026850013126157,
      "loss": 0.7261,
      "step": 74
    },
    {
      "epoch": 0.06869704602702084,
      "grad_norm": 1.3099620342254639,
      "learning_rate": 0.00018996724397138813,
      "loss": 0.7283,
      "step": 75
    },
    {
      "epoch": 0.06961300664071444,
      "grad_norm": 0.882304847240448,
      "learning_rate": 0.00018966164165317966,
      "loss": 0.5816,
      "step": 76
    },
    {
      "epoch": 0.07052896725440806,
      "grad_norm": 0.8315114378929138,
      "learning_rate": 0.00018935170793974335,
      "loss": 0.7533,
      "step": 77
    },
    {
      "epoch": 0.07144492786810167,
      "grad_norm": 0.8620697259902954,
      "learning_rate": 0.00018903745780342839,
      "loss": 0.6984,
      "step": 78
    },
    {
      "epoch": 0.07236088848179528,
      "grad_norm": 0.784542977809906,
      "learning_rate": 0.0001887189064251027,
      "loss": 0.5505,
      "step": 79
    },
    {
      "epoch": 0.0732768490954889,
      "grad_norm": 0.7392190098762512,
      "learning_rate": 0.0001883960691934196,
      "loss": 0.6052,
      "step": 80
    },
    {
      "epoch": 0.0741928097091825,
      "grad_norm": 0.810517430305481,
      "learning_rate": 0.00018806896170407437,
      "loss": 0.5008,
      "step": 81
    },
    {
      "epoch": 0.07510877032287612,
      "grad_norm": 0.6403000950813293,
      "learning_rate": 0.00018773759975905098,
      "loss": 0.3958,
      "step": 82
    },
    {
      "epoch": 0.07602473093656972,
      "grad_norm": 0.6503390073776245,
      "learning_rate": 0.00018740199936585853,
      "loss": 0.552,
      "step": 83
    },
    {
      "epoch": 0.07694069155026334,
      "grad_norm": 0.9502889513969421,
      "learning_rate": 0.00018706217673675811,
      "loss": 0.5942,
      "step": 84
    },
    {
      "epoch": 0.07785665216395694,
      "grad_norm": 1.326459527015686,
      "learning_rate": 0.0001867181482879795,
      "loss": 0.7382,
      "step": 85
    },
    {
      "epoch": 0.07877261277765056,
      "grad_norm": 0.7194717526435852,
      "learning_rate": 0.0001863699306389282,
      "loss": 0.584,
      "step": 86
    },
    {
      "epoch": 0.07968857339134418,
      "grad_norm": 0.8091100454330444,
      "learning_rate": 0.00018601754061138256,
      "loss": 0.5447,
      "step": 87
    },
    {
      "epoch": 0.08060453400503778,
      "grad_norm": 0.9021152853965759,
      "learning_rate": 0.00018566099522868119,
      "loss": 0.7408,
      "step": 88
    },
    {
      "epoch": 0.0815204946187314,
      "grad_norm": 1.156087040901184,
      "learning_rate": 0.00018530031171490053,
      "loss": 0.763,
      "step": 89
    },
    {
      "epoch": 0.082436455232425,
      "grad_norm": 1.2682101726531982,
      "learning_rate": 0.00018493550749402278,
      "loss": 0.7645,
      "step": 90
    },
    {
      "epoch": 0.08335241584611862,
      "grad_norm": 1.40992271900177,
      "learning_rate": 0.00018456660018909425,
      "loss": 0.9506,
      "step": 91
    },
    {
      "epoch": 0.08426837645981222,
      "grad_norm": 1.1955220699310303,
      "learning_rate": 0.00018419360762137395,
      "loss": 0.6177,
      "step": 92
    },
    {
      "epoch": 0.08518433707350584,
      "grad_norm": 1.3912885189056396,
      "learning_rate": 0.0001838165478094727,
      "loss": 0.8448,
      "step": 93
    },
    {
      "epoch": 0.08610029768719946,
      "grad_norm": 0.8000448346138,
      "learning_rate": 0.00018343543896848273,
      "loss": 0.5822,
      "step": 94
    },
    {
      "epoch": 0.08701625830089306,
      "grad_norm": 0.8547763228416443,
      "learning_rate": 0.00018305029950909768,
      "loss": 0.598,
      "step": 95
    },
    {
      "epoch": 0.08793221891458668,
      "grad_norm": 0.8347606062889099,
      "learning_rate": 0.00018266114803672318,
      "loss": 0.4001,
      "step": 96
    },
    {
      "epoch": 0.08884817952828028,
      "grad_norm": 0.5576004385948181,
      "learning_rate": 0.00018226800335057822,
      "loss": 0.3399,
      "step": 97
    },
    {
      "epoch": 0.0897641401419739,
      "grad_norm": 0.7549009323120117,
      "learning_rate": 0.00018187088444278674,
      "loss": 0.6359,
      "step": 98
    },
    {
      "epoch": 0.0906801007556675,
      "grad_norm": 0.7920951247215271,
      "learning_rate": 0.00018146981049746043,
      "loss": 0.6335,
      "step": 99
    },
    {
      "epoch": 0.09159606136936112,
      "grad_norm": 1.2786948680877686,
      "learning_rate": 0.00018106480088977172,
      "loss": 0.6916,
      "step": 100
    },
    {
      "epoch": 0.09159606136936112,
      "eval_loss": 0.0789576843380928,
      "eval_runtime": 436.0546,
      "eval_samples_per_second": 3.339,
      "eval_steps_per_second": 0.835,
      "step": 100
    },
    {
      "epoch": 0.09251202198305473,
      "grad_norm": 0.7070974111557007,
      "learning_rate": 0.00018065587518501804,
      "loss": 0.58,
      "step": 101
    },
    {
      "epoch": 0.09342798259674834,
      "grad_norm": 0.6558282971382141,
      "learning_rate": 0.00018024305313767646,
      "loss": 0.5625,
      "step": 102
    },
    {
      "epoch": 0.09434394321044196,
      "grad_norm": 0.8452802896499634,
      "learning_rate": 0.0001798263546904495,
      "loss": 0.9307,
      "step": 103
    },
    {
      "epoch": 0.09525990382413556,
      "grad_norm": 0.8087717294692993,
      "learning_rate": 0.00017940579997330165,
      "loss": 0.5895,
      "step": 104
    },
    {
      "epoch": 0.09617586443782918,
      "grad_norm": 1.0061838626861572,
      "learning_rate": 0.00017898140930248704,
      "loss": 0.5726,
      "step": 105
    },
    {
      "epoch": 0.09709182505152278,
      "grad_norm": 0.8311317563056946,
      "learning_rate": 0.00017855320317956784,
      "loss": 0.5909,
      "step": 106
    },
    {
      "epoch": 0.0980077856652164,
      "grad_norm": 0.8104123473167419,
      "learning_rate": 0.00017812120229042416,
      "loss": 0.4301,
      "step": 107
    },
    {
      "epoch": 0.09892374627891001,
      "grad_norm": 0.8177468776702881,
      "learning_rate": 0.00017768542750425426,
      "loss": 0.4303,
      "step": 108
    },
    {
      "epoch": 0.09983970689260362,
      "grad_norm": 2.4188196659088135,
      "learning_rate": 0.00017724589987256698,
      "loss": 0.5251,
      "step": 109
    },
    {
      "epoch": 0.10075566750629723,
      "grad_norm": 1.1067713499069214,
      "learning_rate": 0.0001768026406281642,
      "loss": 0.5276,
      "step": 110
    },
    {
      "epoch": 0.10167162811999084,
      "grad_norm": 0.7167340517044067,
      "learning_rate": 0.0001763556711841157,
      "loss": 0.5237,
      "step": 111
    },
    {
      "epoch": 0.10258758873368445,
      "grad_norm": 0.9675881862640381,
      "learning_rate": 0.00017590501313272415,
      "loss": 0.776,
      "step": 112
    },
    {
      "epoch": 0.10350354934737806,
      "grad_norm": 0.8013266324996948,
      "learning_rate": 0.00017545068824448255,
      "loss": 0.6585,
      "step": 113
    },
    {
      "epoch": 0.10441950996107167,
      "grad_norm": 0.9880797863006592,
      "learning_rate": 0.00017499271846702213,
      "loss": 0.7176,
      "step": 114
    },
    {
      "epoch": 0.10533547057476529,
      "grad_norm": 0.7393934726715088,
      "learning_rate": 0.00017453112592405242,
      "loss": 0.5301,
      "step": 115
    },
    {
      "epoch": 0.1062514311884589,
      "grad_norm": 0.773297905921936,
      "learning_rate": 0.00017406593291429217,
      "loss": 0.8689,
      "step": 116
    },
    {
      "epoch": 0.10716739180215251,
      "grad_norm": 0.8815104961395264,
      "learning_rate": 0.00017359716191039248,
      "loss": 0.6419,
      "step": 117
    },
    {
      "epoch": 0.10808335241584612,
      "grad_norm": 0.825872540473938,
      "learning_rate": 0.00017312483555785086,
      "loss": 0.5714,
      "step": 118
    },
    {
      "epoch": 0.10899931302953973,
      "grad_norm": 0.5747569799423218,
      "learning_rate": 0.00017264897667391754,
      "loss": 0.4421,
      "step": 119
    },
    {
      "epoch": 0.10991527364323334,
      "grad_norm": 1.2853175401687622,
      "learning_rate": 0.00017216960824649303,
      "loss": 0.6273,
      "step": 120
    },
    {
      "epoch": 0.11083123425692695,
      "grad_norm": 0.6967117190361023,
      "learning_rate": 0.00017168675343301769,
      "loss": 0.547,
      "step": 121
    },
    {
      "epoch": 0.11174719487062057,
      "grad_norm": 1.197424054145813,
      "learning_rate": 0.00017120043555935298,
      "loss": 0.7641,
      "step": 122
    },
    {
      "epoch": 0.11266315548431417,
      "grad_norm": 1.1903916597366333,
      "learning_rate": 0.00017071067811865476,
      "loss": 0.8993,
      "step": 123
    },
    {
      "epoch": 0.11357911609800779,
      "grad_norm": 1.3580857515335083,
      "learning_rate": 0.0001702175047702382,
      "loss": 0.6937,
      "step": 124
    },
    {
      "epoch": 0.1144950767117014,
      "grad_norm": 0.8347879648208618,
      "learning_rate": 0.000169720939338435,
      "loss": 0.5253,
      "step": 125
    },
    {
      "epoch": 0.11541103732539501,
      "grad_norm": 0.8995674252510071,
      "learning_rate": 0.00016922100581144228,
      "loss": 0.8104,
      "step": 126
    },
    {
      "epoch": 0.11632699793908861,
      "grad_norm": 0.7819614410400391,
      "learning_rate": 0.00016871772834016406,
      "loss": 0.6911,
      "step": 127
    },
    {
      "epoch": 0.11724295855278223,
      "grad_norm": 0.9112820029258728,
      "learning_rate": 0.00016821113123704424,
      "loss": 0.4639,
      "step": 128
    },
    {
      "epoch": 0.11815891916647585,
      "grad_norm": 0.8054628372192383,
      "learning_rate": 0.00016770123897489228,
      "loss": 0.7766,
      "step": 129
    },
    {
      "epoch": 0.11907487978016945,
      "grad_norm": 0.7405731081962585,
      "learning_rate": 0.00016718807618570106,
      "loss": 0.5029,
      "step": 130
    },
    {
      "epoch": 0.11999084039386307,
      "grad_norm": 0.5705666542053223,
      "learning_rate": 0.00016667166765945668,
      "loss": 0.3547,
      "step": 131
    },
    {
      "epoch": 0.12090680100755667,
      "grad_norm": 0.5570306777954102,
      "learning_rate": 0.00016615203834294119,
      "loss": 0.4366,
      "step": 132
    },
    {
      "epoch": 0.12182276162125029,
      "grad_norm": 0.7209002375602722,
      "learning_rate": 0.00016562921333852714,
      "loss": 0.6965,
      "step": 133
    },
    {
      "epoch": 0.12273872223494389,
      "grad_norm": 0.8611632585525513,
      "learning_rate": 0.00016510321790296525,
      "loss": 0.7107,
      "step": 134
    },
    {
      "epoch": 0.12365468284863751,
      "grad_norm": 1.1321237087249756,
      "learning_rate": 0.0001645740774461642,
      "loss": 0.6989,
      "step": 135
    },
    {
      "epoch": 0.12457064346233113,
      "grad_norm": 0.8014971017837524,
      "learning_rate": 0.00016404181752996289,
      "loss": 0.6939,
      "step": 136
    },
    {
      "epoch": 0.12548660407602474,
      "grad_norm": 0.8466368913650513,
      "learning_rate": 0.00016350646386689593,
      "loss": 0.501,
      "step": 137
    },
    {
      "epoch": 0.12640256468971833,
      "grad_norm": 0.9968240857124329,
      "learning_rate": 0.00016296804231895142,
      "loss": 0.5624,
      "step": 138
    },
    {
      "epoch": 0.12731852530341195,
      "grad_norm": 1.1174370050430298,
      "learning_rate": 0.00016242657889632133,
      "loss": 0.8239,
      "step": 139
    },
    {
      "epoch": 0.12823448591710557,
      "grad_norm": 1.014504075050354,
      "learning_rate": 0.00016188209975614542,
      "loss": 0.7467,
      "step": 140
    },
    {
      "epoch": 0.12915044653079918,
      "grad_norm": 0.8318601846694946,
      "learning_rate": 0.00016133463120124731,
      "loss": 0.6325,
      "step": 141
    },
    {
      "epoch": 0.13006640714449277,
      "grad_norm": 0.7232715487480164,
      "learning_rate": 0.00016078419967886402,
      "loss": 0.4998,
      "step": 142
    },
    {
      "epoch": 0.1309823677581864,
      "grad_norm": 0.49094462394714355,
      "learning_rate": 0.00016023083177936823,
      "loss": 0.4028,
      "step": 143
    },
    {
      "epoch": 0.13189832837188,
      "grad_norm": 0.7213066816329956,
      "learning_rate": 0.00015967455423498387,
      "loss": 0.548,
      "step": 144
    },
    {
      "epoch": 0.13281428898557363,
      "grad_norm": 1.2420037984848022,
      "learning_rate": 0.00015911539391849462,
      "loss": 0.5315,
      "step": 145
    },
    {
      "epoch": 0.13373024959926724,
      "grad_norm": 0.7429931163787842,
      "learning_rate": 0.00015855337784194577,
      "loss": 0.5053,
      "step": 146
    },
    {
      "epoch": 0.13464621021296083,
      "grad_norm": 0.6793218851089478,
      "learning_rate": 0.00015798853315533931,
      "loss": 0.4798,
      "step": 147
    },
    {
      "epoch": 0.13556217082665445,
      "grad_norm": 0.8552589416503906,
      "learning_rate": 0.00015742088714532247,
      "loss": 0.7091,
      "step": 148
    },
    {
      "epoch": 0.13647813144034807,
      "grad_norm": 1.0349805355072021,
      "learning_rate": 0.00015685046723386937,
      "loss": 0.799,
      "step": 149
    },
    {
      "epoch": 0.13739409205404168,
      "grad_norm": 0.7529215812683105,
      "learning_rate": 0.00015627730097695638,
      "loss": 0.6193,
      "step": 150
    },
    {
      "epoch": 0.1383100526677353,
      "grad_norm": 0.8655149340629578,
      "learning_rate": 0.00015570141606323105,
      "loss": 0.5076,
      "step": 151
    },
    {
      "epoch": 0.1392260132814289,
      "grad_norm": 0.724417507648468,
      "learning_rate": 0.00015512284031267437,
      "loss": 0.5132,
      "step": 152
    },
    {
      "epoch": 0.1401419738951225,
      "grad_norm": 0.724981427192688,
      "learning_rate": 0.00015454160167525685,
      "loss": 0.5337,
      "step": 153
    },
    {
      "epoch": 0.14105793450881612,
      "grad_norm": 1.0957812070846558,
      "learning_rate": 0.00015395772822958845,
      "loss": 0.6742,
      "step": 154
    },
    {
      "epoch": 0.14197389512250974,
      "grad_norm": 0.7567093372344971,
      "learning_rate": 0.00015337124818156205,
      "loss": 0.7095,
      "step": 155
    },
    {
      "epoch": 0.14288985573620333,
      "grad_norm": 0.891369640827179,
      "learning_rate": 0.00015278218986299074,
      "loss": 0.5988,
      "step": 156
    },
    {
      "epoch": 0.14380581634989695,
      "grad_norm": 0.7748335003852844,
      "learning_rate": 0.0001521905817302395,
      "loss": 0.6206,
      "step": 157
    },
    {
      "epoch": 0.14472177696359056,
      "grad_norm": 0.7905634641647339,
      "learning_rate": 0.0001515964523628501,
      "loss": 0.5326,
      "step": 158
    },
    {
      "epoch": 0.14563773757728418,
      "grad_norm": 0.9001010060310364,
      "learning_rate": 0.0001509998304621609,
      "loss": 0.6336,
      "step": 159
    },
    {
      "epoch": 0.1465536981909778,
      "grad_norm": 0.8940883278846741,
      "learning_rate": 0.00015040074484992,
      "loss": 0.6034,
      "step": 160
    },
    {
      "epoch": 0.1474696588046714,
      "grad_norm": 1.0931828022003174,
      "learning_rate": 0.00014979922446689306,
      "loss": 0.7144,
      "step": 161
    },
    {
      "epoch": 0.148385619418365,
      "grad_norm": 0.7018395662307739,
      "learning_rate": 0.00014919529837146528,
      "loss": 0.4174,
      "step": 162
    },
    {
      "epoch": 0.14930158003205862,
      "grad_norm": 0.6879329085350037,
      "learning_rate": 0.00014858899573823753,
      "loss": 0.3973,
      "step": 163
    },
    {
      "epoch": 0.15021754064575224,
      "grad_norm": 0.7063140869140625,
      "learning_rate": 0.00014798034585661695,
      "loss": 0.5576,
      "step": 164
    },
    {
      "epoch": 0.15113350125944586,
      "grad_norm": 0.5726875066757202,
      "learning_rate": 0.00014736937812940217,
      "loss": 0.464,
      "step": 165
    },
    {
      "epoch": 0.15204946187313945,
      "grad_norm": 0.6210931539535522,
      "learning_rate": 0.0001467561220713628,
      "loss": 0.431,
      "step": 166
    },
    {
      "epoch": 0.15296542248683306,
      "grad_norm": 0.855782687664032,
      "learning_rate": 0.00014614060730781377,
      "loss": 0.5312,
      "step": 167
    },
    {
      "epoch": 0.15388138310052668,
      "grad_norm": 0.6990883350372314,
      "learning_rate": 0.0001455228635731839,
      "loss": 0.5557,
      "step": 168
    },
    {
      "epoch": 0.1547973437142203,
      "grad_norm": 0.6450179219245911,
      "learning_rate": 0.0001449029207095798,
      "loss": 0.4738,
      "step": 169
    },
    {
      "epoch": 0.1557133043279139,
      "grad_norm": 0.8470801711082458,
      "learning_rate": 0.00014428080866534396,
      "loss": 0.8791,
      "step": 170
    },
    {
      "epoch": 0.1566292649416075,
      "grad_norm": 0.8217272758483887,
      "learning_rate": 0.00014365655749360833,
      "loss": 0.7099,
      "step": 171
    },
    {
      "epoch": 0.15754522555530112,
      "grad_norm": 0.8225908875465393,
      "learning_rate": 0.00014303019735084226,
      "loss": 0.9925,
      "step": 172
    },
    {
      "epoch": 0.15846118616899474,
      "grad_norm": 0.8007174134254456,
      "learning_rate": 0.00014240175849539565,
      "loss": 0.5416,
      "step": 173
    },
    {
      "epoch": 0.15937714678268836,
      "grad_norm": 0.8009291291236877,
      "learning_rate": 0.00014177127128603745,
      "loss": 0.8397,
      "step": 174
    },
    {
      "epoch": 0.16029310739638195,
      "grad_norm": 0.5703025460243225,
      "learning_rate": 0.00014113876618048897,
      "loss": 0.5596,
      "step": 175
    },
    {
      "epoch": 0.16120906801007556,
      "grad_norm": 0.824314296245575,
      "learning_rate": 0.0001405042737339524,
      "loss": 0.5353,
      "step": 176
    },
    {
      "epoch": 0.16212502862376918,
      "grad_norm": 0.7707583904266357,
      "learning_rate": 0.000139867824597635,
      "loss": 0.5362,
      "step": 177
    },
    {
      "epoch": 0.1630409892374628,
      "grad_norm": 0.7862932682037354,
      "learning_rate": 0.0001392294495172681,
      "loss": 0.5944,
      "step": 178
    },
    {
      "epoch": 0.1639569498511564,
      "grad_norm": 0.6190929412841797,
      "learning_rate": 0.0001385891793316221,
      "loss": 0.764,
      "step": 179
    },
    {
      "epoch": 0.16487291046485,
      "grad_norm": 0.457586407661438,
      "learning_rate": 0.00013794704497101655,
      "loss": 0.4105,
      "step": 180
    },
    {
      "epoch": 0.16578887107854362,
      "grad_norm": 0.4811520278453827,
      "learning_rate": 0.00013730307745582593,
      "loss": 0.358,
      "step": 181
    },
    {
      "epoch": 0.16670483169223724,
      "grad_norm": 0.790302574634552,
      "learning_rate": 0.0001366573078949813,
      "loss": 0.5677,
      "step": 182
    },
    {
      "epoch": 0.16762079230593085,
      "grad_norm": 0.8360859155654907,
      "learning_rate": 0.0001360097674844672,
      "loss": 0.4774,
      "step": 183
    },
    {
      "epoch": 0.16853675291962444,
      "grad_norm": 0.6449018716812134,
      "learning_rate": 0.00013536048750581494,
      "loss": 0.5234,
      "step": 184
    },
    {
      "epoch": 0.16945271353331806,
      "grad_norm": 0.7947589755058289,
      "learning_rate": 0.00013470949932459117,
      "loss": 0.4427,
      "step": 185
    },
    {
      "epoch": 0.17036867414701168,
      "grad_norm": 0.5645202994346619,
      "learning_rate": 0.00013405683438888282,
      "loss": 0.4278,
      "step": 186
    },
    {
      "epoch": 0.1712846347607053,
      "grad_norm": 0.6219568848609924,
      "learning_rate": 0.00013340252422777788,
      "loss": 0.3971,
      "step": 187
    },
    {
      "epoch": 0.1722005953743989,
      "grad_norm": 0.6408222317695618,
      "learning_rate": 0.00013274660044984224,
      "loss": 0.5036,
      "step": 188
    },
    {
      "epoch": 0.1731165559880925,
      "grad_norm": 0.7281380891799927,
      "learning_rate": 0.0001320890947415928,
      "loss": 0.5588,
      "step": 189
    },
    {
      "epoch": 0.17403251660178612,
      "grad_norm": 0.5523554682731628,
      "learning_rate": 0.00013143003886596669,
      "loss": 0.4022,
      "step": 190
    },
    {
      "epoch": 0.17494847721547974,
      "grad_norm": 0.923069179058075,
      "learning_rate": 0.0001307694646607869,
      "loss": 0.5009,
      "step": 191
    },
    {
      "epoch": 0.17586443782917335,
      "grad_norm": 0.5798431038856506,
      "learning_rate": 0.0001301074040372242,
      "loss": 0.4506,
      "step": 192
    },
    {
      "epoch": 0.17678039844286697,
      "grad_norm": 0.6347712874412537,
      "learning_rate": 0.0001294438889782556,
      "loss": 0.4864,
      "step": 193
    },
    {
      "epoch": 0.17769635905656056,
      "grad_norm": 0.7514728307723999,
      "learning_rate": 0.00012877895153711935,
      "loss": 0.5996,
      "step": 194
    },
    {
      "epoch": 0.17861231967025418,
      "grad_norm": 0.9295995235443115,
      "learning_rate": 0.00012811262383576646,
      "loss": 0.6651,
      "step": 195
    },
    {
      "epoch": 0.1795282802839478,
      "grad_norm": 0.8588167428970337,
      "learning_rate": 0.0001274449380633089,
      "loss": 0.6871,
      "step": 196
    },
    {
      "epoch": 0.1804442408976414,
      "grad_norm": 0.6991099119186401,
      "learning_rate": 0.00012677592647446472,
      "loss": 0.5572,
      "step": 197
    },
    {
      "epoch": 0.181360201511335,
      "grad_norm": 0.7341340780258179,
      "learning_rate": 0.00012610562138799978,
      "loss": 0.5595,
      "step": 198
    },
    {
      "epoch": 0.18227616212502862,
      "grad_norm": 0.8957074284553528,
      "learning_rate": 0.0001254340551851665,
      "loss": 0.573,
      "step": 199
    },
    {
      "epoch": 0.18319212273872224,
      "grad_norm": 0.9093302488327026,
      "learning_rate": 0.00012476126030813963,
      "loss": 0.7076,
      "step": 200
    },
    {
      "epoch": 0.18319212273872224,
      "eval_loss": 0.06856601685285568,
      "eval_runtime": 436.2246,
      "eval_samples_per_second": 3.338,
      "eval_steps_per_second": 0.834,
      "step": 200
    },
    {
      "epoch": 0.18410808335241585,
      "grad_norm": 0.9715799689292908,
      "learning_rate": 0.000124087269258449,
      "loss": 0.4213,
      "step": 201
    },
    {
      "epoch": 0.18502404396610947,
      "grad_norm": 0.9131537079811096,
      "learning_rate": 0.0001234121145954094,
      "loss": 0.7887,
      "step": 202
    },
    {
      "epoch": 0.18594000457980306,
      "grad_norm": 0.8268325924873352,
      "learning_rate": 0.00012273582893454775,
      "loss": 0.464,
      "step": 203
    },
    {
      "epoch": 0.18685596519349668,
      "grad_norm": 0.63968825340271,
      "learning_rate": 0.0001220584449460274,
      "loss": 0.5542,
      "step": 204
    },
    {
      "epoch": 0.1877719258071903,
      "grad_norm": 0.619226336479187,
      "learning_rate": 0.0001213799953530701,
      "loss": 0.5164,
      "step": 205
    },
    {
      "epoch": 0.1886878864208839,
      "grad_norm": 0.6572562456130981,
      "learning_rate": 0.00012070051293037492,
      "loss": 0.5105,
      "step": 206
    },
    {
      "epoch": 0.1896038470345775,
      "grad_norm": 0.8703852891921997,
      "learning_rate": 0.00012002003050253522,
      "loss": 0.724,
      "step": 207
    },
    {
      "epoch": 0.19051980764827112,
      "grad_norm": 0.6222299933433533,
      "learning_rate": 0.00011933858094245281,
      "loss": 0.4516,
      "step": 208
    },
    {
      "epoch": 0.19143576826196473,
      "grad_norm": 0.9056154489517212,
      "learning_rate": 0.00011865619716974984,
      "loss": 0.5116,
      "step": 209
    },
    {
      "epoch": 0.19235172887565835,
      "grad_norm": 0.5980194807052612,
      "learning_rate": 0.00011797291214917881,
      "loss": 0.4606,
      "step": 210
    },
    {
      "epoch": 0.19326768948935197,
      "grad_norm": 0.5795995593070984,
      "learning_rate": 0.00011728875888902975,
      "loss": 0.7419,
      "step": 211
    },
    {
      "epoch": 0.19418365010304556,
      "grad_norm": 0.8155220150947571,
      "learning_rate": 0.00011660377043953588,
      "loss": 0.5837,
      "step": 212
    },
    {
      "epoch": 0.19509961071673917,
      "grad_norm": 1.20220947265625,
      "learning_rate": 0.0001159179798912769,
      "loss": 0.5612,
      "step": 213
    },
    {
      "epoch": 0.1960155713304328,
      "grad_norm": 0.7634713649749756,
      "learning_rate": 0.0001152314203735805,
      "loss": 0.6243,
      "step": 214
    },
    {
      "epoch": 0.1969315319441264,
      "grad_norm": 0.5542187094688416,
      "learning_rate": 0.000114544125052922,
      "loss": 0.5042,
      "step": 215
    },
    {
      "epoch": 0.19784749255782003,
      "grad_norm": 0.8429003953933716,
      "learning_rate": 0.0001138561271313219,
      "loss": 0.6943,
      "step": 216
    },
    {
      "epoch": 0.19876345317151362,
      "grad_norm": 0.7061911225318909,
      "learning_rate": 0.00011316745984474226,
      "loss": 0.5509,
      "step": 217
    },
    {
      "epoch": 0.19967941378520723,
      "grad_norm": 1.0747299194335938,
      "learning_rate": 0.00011247815646148087,
      "loss": 0.4586,
      "step": 218
    },
    {
      "epoch": 0.20059537439890085,
      "grad_norm": 0.5371668338775635,
      "learning_rate": 0.0001117882502805643,
      "loss": 0.5494,
      "step": 219
    },
    {
      "epoch": 0.20151133501259447,
      "grad_norm": 1.095410704612732,
      "learning_rate": 0.00011109777463013915,
      "loss": 0.7209,
      "step": 220
    },
    {
      "epoch": 0.20242729562628806,
      "grad_norm": 1.1136436462402344,
      "learning_rate": 0.00011040676286586211,
      "loss": 0.5977,
      "step": 221
    },
    {
      "epoch": 0.20334325623998167,
      "grad_norm": 1.0239177942276,
      "learning_rate": 0.0001097152483692886,
      "loss": 0.7531,
      "step": 222
    },
    {
      "epoch": 0.2042592168536753,
      "grad_norm": 0.813605010509491,
      "learning_rate": 0.0001090232645462601,
      "loss": 0.5142,
      "step": 223
    },
    {
      "epoch": 0.2051751774673689,
      "grad_norm": 0.7018038034439087,
      "learning_rate": 0.00010833084482529048,
      "loss": 0.6111,
      "step": 224
    },
    {
      "epoch": 0.20609113808106252,
      "grad_norm": 0.9322212338447571,
      "learning_rate": 0.00010763802265595102,
      "loss": 0.594,
      "step": 225
    },
    {
      "epoch": 0.20700709869475611,
      "grad_norm": 0.8066678643226624,
      "learning_rate": 0.00010694483150725458,
      "loss": 0.5689,
      "step": 226
    },
    {
      "epoch": 0.20792305930844973,
      "grad_norm": 0.6629310846328735,
      "learning_rate": 0.00010625130486603878,
      "loss": 0.62,
      "step": 227
    },
    {
      "epoch": 0.20883901992214335,
      "grad_norm": 0.6048685908317566,
      "learning_rate": 0.00010555747623534831,
      "loss": 0.5044,
      "step": 228
    },
    {
      "epoch": 0.20975498053583697,
      "grad_norm": 0.8310180306434631,
      "learning_rate": 0.00010486337913281632,
      "loss": 0.4644,
      "step": 229
    },
    {
      "epoch": 0.21067094114953058,
      "grad_norm": 0.8517028093338013,
      "learning_rate": 0.00010416904708904548,
      "loss": 0.6316,
      "step": 230
    },
    {
      "epoch": 0.21158690176322417,
      "grad_norm": 0.6673111319541931,
      "learning_rate": 0.00010347451364598804,
      "loss": 0.4012,
      "step": 231
    },
    {
      "epoch": 0.2125028623769178,
      "grad_norm": 1.011790156364441,
      "learning_rate": 0.00010277981235532541,
      "loss": 0.3571,
      "step": 232
    },
    {
      "epoch": 0.2134188229906114,
      "grad_norm": 0.632638692855835,
      "learning_rate": 0.00010208497677684754,
      "loss": 0.3398,
      "step": 233
    },
    {
      "epoch": 0.21433478360430502,
      "grad_norm": 0.6011660695075989,
      "learning_rate": 0.00010139004047683151,
      "loss": 0.3741,
      "step": 234
    },
    {
      "epoch": 0.2152507442179986,
      "grad_norm": 1.4447038173675537,
      "learning_rate": 0.00010069503702642011,
      "loss": 0.5175,
      "step": 235
    },
    {
      "epoch": 0.21616670483169223,
      "grad_norm": 1.0104140043258667,
      "learning_rate": 0.0001,
      "loss": 0.4162,
      "step": 236
    },
    {
      "epoch": 0.21708266544538585,
      "grad_norm": 0.8013536334037781,
      "learning_rate": 9.930496297357993e-05,
      "loss": 0.5423,
      "step": 237
    },
    {
      "epoch": 0.21799862605907946,
      "grad_norm": 0.8041028380393982,
      "learning_rate": 9.860995952316851e-05,
      "loss": 0.6906,
      "step": 238
    },
    {
      "epoch": 0.21891458667277308,
      "grad_norm": 0.9673544764518738,
      "learning_rate": 9.791502322315249e-05,
      "loss": 0.7141,
      "step": 239
    },
    {
      "epoch": 0.21983054728646667,
      "grad_norm": 0.8152205348014832,
      "learning_rate": 9.722018764467461e-05,
      "loss": 0.6435,
      "step": 240
    },
    {
      "epoch": 0.2207465079001603,
      "grad_norm": 0.7280937433242798,
      "learning_rate": 9.652548635401201e-05,
      "loss": 0.5696,
      "step": 241
    },
    {
      "epoch": 0.2216624685138539,
      "grad_norm": 0.7318608164787292,
      "learning_rate": 9.583095291095453e-05,
      "loss": 0.5423,
      "step": 242
    },
    {
      "epoch": 0.22257842912754752,
      "grad_norm": 0.5921342372894287,
      "learning_rate": 9.513662086718372e-05,
      "loss": 0.6546,
      "step": 243
    },
    {
      "epoch": 0.22349438974124114,
      "grad_norm": 0.6680089831352234,
      "learning_rate": 9.444252376465171e-05,
      "loss": 0.5059,
      "step": 244
    },
    {
      "epoch": 0.22441035035493473,
      "grad_norm": 1.1326377391815186,
      "learning_rate": 9.374869513396123e-05,
      "loss": 0.5881,
      "step": 245
    },
    {
      "epoch": 0.22532631096862835,
      "grad_norm": 0.6495856642723083,
      "learning_rate": 9.305516849274541e-05,
      "loss": 0.4135,
      "step": 246
    },
    {
      "epoch": 0.22624227158232196,
      "grad_norm": 0.8290500044822693,
      "learning_rate": 9.236197734404901e-05,
      "loss": 0.6022,
      "step": 247
    },
    {
      "epoch": 0.22715823219601558,
      "grad_norm": 0.7909289002418518,
      "learning_rate": 9.166915517470953e-05,
      "loss": 0.4046,
      "step": 248
    },
    {
      "epoch": 0.22807419280970917,
      "grad_norm": 0.6260089874267578,
      "learning_rate": 9.09767354537399e-05,
      "loss": 0.4616,
      "step": 249
    },
    {
      "epoch": 0.2289901534234028,
      "grad_norm": 0.9241347908973694,
      "learning_rate": 9.028475163071141e-05,
      "loss": 0.5557,
      "step": 250
    },
    {
      "epoch": 0.2299061140370964,
      "grad_norm": 0.6013506054878235,
      "learning_rate": 8.959323713413791e-05,
      "loss": 0.4998,
      "step": 251
    },
    {
      "epoch": 0.23082207465079002,
      "grad_norm": 0.5378098487854004,
      "learning_rate": 8.890222536986085e-05,
      "loss": 0.5686,
      "step": 252
    },
    {
      "epoch": 0.23173803526448364,
      "grad_norm": 0.6205247640609741,
      "learning_rate": 8.821174971943572e-05,
      "loss": 0.4678,
      "step": 253
    },
    {
      "epoch": 0.23265399587817723,
      "grad_norm": 0.5681025981903076,
      "learning_rate": 8.752184353851916e-05,
      "loss": 0.4752,
      "step": 254
    },
    {
      "epoch": 0.23356995649187084,
      "grad_norm": 0.8576029539108276,
      "learning_rate": 8.683254015525776e-05,
      "loss": 0.6218,
      "step": 255
    },
    {
      "epoch": 0.23448591710556446,
      "grad_norm": 0.6471827626228333,
      "learning_rate": 8.614387286867814e-05,
      "loss": 0.5229,
      "step": 256
    },
    {
      "epoch": 0.23540187771925808,
      "grad_norm": 0.4855286478996277,
      "learning_rate": 8.545587494707803e-05,
      "loss": 0.297,
      "step": 257
    },
    {
      "epoch": 0.2363178383329517,
      "grad_norm": 7.151229381561279,
      "learning_rate": 8.47685796264195e-05,
      "loss": 0.5928,
      "step": 258
    },
    {
      "epoch": 0.23723379894664529,
      "grad_norm": 1.3571738004684448,
      "learning_rate": 8.408202010872312e-05,
      "loss": 0.63,
      "step": 259
    },
    {
      "epoch": 0.2381497595603389,
      "grad_norm": 0.8930146098136902,
      "learning_rate": 8.339622956046417e-05,
      "loss": 0.5622,
      "step": 260
    },
    {
      "epoch": 0.23906572017403252,
      "grad_norm": 0.5761184692382812,
      "learning_rate": 8.271124111097026e-05,
      "loss": 0.4166,
      "step": 261
    },
    {
      "epoch": 0.23998168078772614,
      "grad_norm": 0.89448082447052,
      "learning_rate": 8.202708785082121e-05,
      "loss": 0.5383,
      "step": 262
    },
    {
      "epoch": 0.24089764140141973,
      "grad_norm": 0.579725444316864,
      "learning_rate": 8.134380283025014e-05,
      "loss": 0.4337,
      "step": 263
    },
    {
      "epoch": 0.24181360201511334,
      "grad_norm": 0.692746102809906,
      "learning_rate": 8.066141905754723e-05,
      "loss": 0.7086,
      "step": 264
    },
    {
      "epoch": 0.24272956262880696,
      "grad_norm": 0.6122776865959167,
      "learning_rate": 7.997996949746477e-05,
      "loss": 0.7151,
      "step": 265
    },
    {
      "epoch": 0.24364552324250058,
      "grad_norm": 0.5838832259178162,
      "learning_rate": 7.929948706962508e-05,
      "loss": 0.4687,
      "step": 266
    },
    {
      "epoch": 0.2445614838561942,
      "grad_norm": 0.7323639988899231,
      "learning_rate": 7.862000464692991e-05,
      "loss": 0.6293,
      "step": 267
    },
    {
      "epoch": 0.24547744446988778,
      "grad_norm": 0.4069867730140686,
      "learning_rate": 7.794155505397261e-05,
      "loss": 0.3459,
      "step": 268
    },
    {
      "epoch": 0.2463934050835814,
      "grad_norm": 0.6492972373962402,
      "learning_rate": 7.72641710654523e-05,
      "loss": 0.5204,
      "step": 269
    },
    {
      "epoch": 0.24730936569727502,
      "grad_norm": 0.744772732257843,
      "learning_rate": 7.658788540459062e-05,
      "loss": 0.6464,
      "step": 270
    },
    {
      "epoch": 0.24822532631096864,
      "grad_norm": 0.6655353307723999,
      "learning_rate": 7.591273074155104e-05,
      "loss": 0.4257,
      "step": 271
    },
    {
      "epoch": 0.24914128692466225,
      "grad_norm": 0.7041018009185791,
      "learning_rate": 7.523873969186039e-05,
      "loss": 0.5422,
      "step": 272
    },
    {
      "epoch": 0.25005724753835584,
      "grad_norm": 0.7334919571876526,
      "learning_rate": 7.456594481483355e-05,
      "loss": 0.4416,
      "step": 273
    },
    {
      "epoch": 0.2509732081520495,
      "grad_norm": 0.6543317437171936,
      "learning_rate": 7.389437861200024e-05,
      "loss": 0.5293,
      "step": 274
    },
    {
      "epoch": 0.2518891687657431,
      "grad_norm": 0.8528045415878296,
      "learning_rate": 7.322407352553529e-05,
      "loss": 0.7414,
      "step": 275
    },
    {
      "epoch": 0.25280512937943667,
      "grad_norm": 0.7721304297447205,
      "learning_rate": 7.25550619366911e-05,
      "loss": 0.6171,
      "step": 276
    },
    {
      "epoch": 0.2537210899931303,
      "grad_norm": 0.6353263854980469,
      "learning_rate": 7.188737616423356e-05,
      "loss": 0.6457,
      "step": 277
    },
    {
      "epoch": 0.2546370506068239,
      "grad_norm": 0.8189141750335693,
      "learning_rate": 7.122104846288064e-05,
      "loss": 0.4837,
      "step": 278
    },
    {
      "epoch": 0.25555301122051755,
      "grad_norm": 0.6262407898902893,
      "learning_rate": 7.055611102174442e-05,
      "loss": 0.3863,
      "step": 279
    },
    {
      "epoch": 0.25646897183421113,
      "grad_norm": 0.8298683762550354,
      "learning_rate": 6.989259596277582e-05,
      "loss": 0.6259,
      "step": 280
    },
    {
      "epoch": 0.2573849324479047,
      "grad_norm": 0.8603139519691467,
      "learning_rate": 6.923053533921312e-05,
      "loss": 0.7719,
      "step": 281
    },
    {
      "epoch": 0.25830089306159837,
      "grad_norm": 0.42033544182777405,
      "learning_rate": 6.85699611340333e-05,
      "loss": 0.3598,
      "step": 282
    },
    {
      "epoch": 0.25921685367529196,
      "grad_norm": 0.9463576078414917,
      "learning_rate": 6.791090525840722e-05,
      "loss": 0.5428,
      "step": 283
    },
    {
      "epoch": 0.26013281428898555,
      "grad_norm": 0.42964935302734375,
      "learning_rate": 6.725339955015777e-05,
      "loss": 0.3862,
      "step": 284
    },
    {
      "epoch": 0.2610487749026792,
      "grad_norm": 0.8793493509292603,
      "learning_rate": 6.659747577222216e-05,
      "loss": 0.6907,
      "step": 285
    },
    {
      "epoch": 0.2619647355163728,
      "grad_norm": 0.7326351404190063,
      "learning_rate": 6.594316561111724e-05,
      "loss": 0.599,
      "step": 286
    },
    {
      "epoch": 0.2628806961300664,
      "grad_norm": 0.866133451461792,
      "learning_rate": 6.529050067540887e-05,
      "loss": 0.5384,
      "step": 287
    },
    {
      "epoch": 0.26379665674376,
      "grad_norm": 0.8126096725463867,
      "learning_rate": 6.46395124941851e-05,
      "loss": 0.7318,
      "step": 288
    },
    {
      "epoch": 0.2647126173574536,
      "grad_norm": 0.8120489120483398,
      "learning_rate": 6.39902325155328e-05,
      "loss": 0.5245,
      "step": 289
    },
    {
      "epoch": 0.26562857797114725,
      "grad_norm": 0.7803285121917725,
      "learning_rate": 6.334269210501875e-05,
      "loss": 0.5096,
      "step": 290
    },
    {
      "epoch": 0.26654453858484084,
      "grad_norm": 0.8887495994567871,
      "learning_rate": 6.269692254417408e-05,
      "loss": 0.5458,
      "step": 291
    },
    {
      "epoch": 0.2674604991985345,
      "grad_norm": 0.9203519821166992,
      "learning_rate": 6.205295502898348e-05,
      "loss": 0.7658,
      "step": 292
    },
    {
      "epoch": 0.2683764598122281,
      "grad_norm": 0.818914532661438,
      "learning_rate": 6.141082066837791e-05,
      "loss": 0.7152,
      "step": 293
    },
    {
      "epoch": 0.26929242042592166,
      "grad_norm": 0.8341096639633179,
      "learning_rate": 6.0770550482731924e-05,
      "loss": 0.785,
      "step": 294
    },
    {
      "epoch": 0.2702083810396153,
      "grad_norm": 0.7843111753463745,
      "learning_rate": 6.013217540236502e-05,
      "loss": 0.5326,
      "step": 295
    },
    {
      "epoch": 0.2711243416533089,
      "grad_norm": 0.8106828927993774,
      "learning_rate": 5.9495726266047605e-05,
      "loss": 0.5984,
      "step": 296
    },
    {
      "epoch": 0.27204030226700254,
      "grad_norm": 0.6807352900505066,
      "learning_rate": 5.886123381951103e-05,
      "loss": 0.5012,
      "step": 297
    },
    {
      "epoch": 0.27295626288069613,
      "grad_norm": 0.6257815957069397,
      "learning_rate": 5.8228728713962543e-05,
      "loss": 0.4357,
      "step": 298
    },
    {
      "epoch": 0.2738722234943897,
      "grad_norm": 0.7749564051628113,
      "learning_rate": 5.759824150460435e-05,
      "loss": 0.4646,
      "step": 299
    },
    {
      "epoch": 0.27478818410808337,
      "grad_norm": 0.6776332259178162,
      "learning_rate": 5.696980264915777e-05,
      "loss": 0.5005,
      "step": 300
    },
    {
      "epoch": 0.27478818410808337,
      "eval_loss": 0.06278909742832184,
      "eval_runtime": 436.0829,
      "eval_samples_per_second": 3.339,
      "eval_steps_per_second": 0.835,
      "step": 300
    },
    {
      "epoch": 0.27570414472177696,
      "grad_norm": 0.9247587323188782,
      "learning_rate": 5.63434425063917e-05,
      "loss": 0.4515,
      "step": 301
    },
    {
      "epoch": 0.2766201053354706,
      "grad_norm": 0.5948374271392822,
      "learning_rate": 5.571919133465605e-05,
      "loss": 0.4276,
      "step": 302
    },
    {
      "epoch": 0.2775360659491642,
      "grad_norm": 0.504909873008728,
      "learning_rate": 5.50970792904203e-05,
      "loss": 0.4076,
      "step": 303
    },
    {
      "epoch": 0.2784520265628578,
      "grad_norm": 0.5952023863792419,
      "learning_rate": 5.447713642681612e-05,
      "loss": 0.3557,
      "step": 304
    },
    {
      "epoch": 0.2793679871765514,
      "grad_norm": 0.5355116724967957,
      "learning_rate": 5.385939269218625e-05,
      "loss": 0.3973,
      "step": 305
    },
    {
      "epoch": 0.280283947790245,
      "grad_norm": 0.6517271995544434,
      "learning_rate": 5.324387792863719e-05,
      "loss": 0.5735,
      "step": 306
    },
    {
      "epoch": 0.2811999084039386,
      "grad_norm": 0.5679106712341309,
      "learning_rate": 5.263062187059785e-05,
      "loss": 0.2186,
      "step": 307
    },
    {
      "epoch": 0.28211586901763225,
      "grad_norm": 0.6380028128623962,
      "learning_rate": 5.201965414338308e-05,
      "loss": 0.5342,
      "step": 308
    },
    {
      "epoch": 0.28303182963132584,
      "grad_norm": 0.5233590006828308,
      "learning_rate": 5.14110042617625e-05,
      "loss": 0.3793,
      "step": 309
    },
    {
      "epoch": 0.2839477902450195,
      "grad_norm": 0.6338326334953308,
      "learning_rate": 5.080470162853472e-05,
      "loss": 0.4085,
      "step": 310
    },
    {
      "epoch": 0.28486375085871307,
      "grad_norm": 1.0255392789840698,
      "learning_rate": 5.020077553310694e-05,
      "loss": 0.534,
      "step": 311
    },
    {
      "epoch": 0.28577971147240666,
      "grad_norm": 0.6331342458724976,
      "learning_rate": 4.959925515008002e-05,
      "loss": 0.5402,
      "step": 312
    },
    {
      "epoch": 0.2866956720861003,
      "grad_norm": 0.5389162302017212,
      "learning_rate": 4.900016953783912e-05,
      "loss": 0.4272,
      "step": 313
    },
    {
      "epoch": 0.2876116326997939,
      "grad_norm": 0.8973006010055542,
      "learning_rate": 4.840354763714991e-05,
      "loss": 0.6449,
      "step": 314
    },
    {
      "epoch": 0.28852759331348754,
      "grad_norm": 0.4471147060394287,
      "learning_rate": 4.7809418269760545e-05,
      "loss": 0.3032,
      "step": 315
    },
    {
      "epoch": 0.28944355392718113,
      "grad_norm": 0.8016964197158813,
      "learning_rate": 4.7217810137009274e-05,
      "loss": 0.449,
      "step": 316
    },
    {
      "epoch": 0.2903595145408747,
      "grad_norm": 0.6460964679718018,
      "learning_rate": 4.6628751818437985e-05,
      "loss": 0.4222,
      "step": 317
    },
    {
      "epoch": 0.29127547515456836,
      "grad_norm": 0.8219476342201233,
      "learning_rate": 4.604227177041156e-05,
      "loss": 0.4928,
      "step": 318
    },
    {
      "epoch": 0.29219143576826195,
      "grad_norm": 0.8703890442848206,
      "learning_rate": 4.545839832474318e-05,
      "loss": 0.7624,
      "step": 319
    },
    {
      "epoch": 0.2931073963819556,
      "grad_norm": 0.6923498511314392,
      "learning_rate": 4.487715968732568e-05,
      "loss": 0.6358,
      "step": 320
    },
    {
      "epoch": 0.2940233569956492,
      "grad_norm": 1.0428471565246582,
      "learning_rate": 4.4298583936768976e-05,
      "loss": 0.474,
      "step": 321
    },
    {
      "epoch": 0.2949393176093428,
      "grad_norm": 0.5889642238616943,
      "learning_rate": 4.372269902304363e-05,
      "loss": 0.5298,
      "step": 322
    },
    {
      "epoch": 0.2958552782230364,
      "grad_norm": 0.7348572611808777,
      "learning_rate": 4.314953276613066e-05,
      "loss": 0.4542,
      "step": 323
    },
    {
      "epoch": 0.29677123883673,
      "grad_norm": 0.6076330542564392,
      "learning_rate": 4.257911285467754e-05,
      "loss": 0.4647,
      "step": 324
    },
    {
      "epoch": 0.29768719945042366,
      "grad_norm": 0.6895171999931335,
      "learning_rate": 4.2011466844660655e-05,
      "loss": 0.5157,
      "step": 325
    },
    {
      "epoch": 0.29860316006411725,
      "grad_norm": 0.5947766304016113,
      "learning_rate": 4.144662215805426e-05,
      "loss": 0.4753,
      "step": 326
    },
    {
      "epoch": 0.29951912067781084,
      "grad_norm": 0.9488824009895325,
      "learning_rate": 4.0884606081505374e-05,
      "loss": 0.7115,
      "step": 327
    },
    {
      "epoch": 0.3004350812915045,
      "grad_norm": 0.6592872142791748,
      "learning_rate": 4.0325445765016145e-05,
      "loss": 0.565,
      "step": 328
    },
    {
      "epoch": 0.30135104190519807,
      "grad_norm": 0.7744607925415039,
      "learning_rate": 3.9769168220631745e-05,
      "loss": 0.5028,
      "step": 329
    },
    {
      "epoch": 0.3022670025188917,
      "grad_norm": 0.5825055837631226,
      "learning_rate": 3.921580032113602e-05,
      "loss": 0.3888,
      "step": 330
    },
    {
      "epoch": 0.3031829631325853,
      "grad_norm": 0.9765796065330505,
      "learning_rate": 3.866536879875269e-05,
      "loss": 0.82,
      "step": 331
    },
    {
      "epoch": 0.3040989237462789,
      "grad_norm": 0.6944766640663147,
      "learning_rate": 3.8117900243854595e-05,
      "loss": 0.544,
      "step": 332
    },
    {
      "epoch": 0.30501488435997254,
      "grad_norm": 0.6479721665382385,
      "learning_rate": 3.757342110367871e-05,
      "loss": 0.306,
      "step": 333
    },
    {
      "epoch": 0.3059308449736661,
      "grad_norm": 0.7690525054931641,
      "learning_rate": 3.7031957681048604e-05,
      "loss": 0.6559,
      "step": 334
    },
    {
      "epoch": 0.3068468055873597,
      "grad_norm": 0.6109075546264648,
      "learning_rate": 3.649353613310409e-05,
      "loss": 0.5924,
      "step": 335
    },
    {
      "epoch": 0.30776276620105336,
      "grad_norm": 0.9807355403900146,
      "learning_rate": 3.595818247003713e-05,
      "loss": 0.4463,
      "step": 336
    },
    {
      "epoch": 0.30867872681474695,
      "grad_norm": 0.6450587511062622,
      "learning_rate": 3.542592255383586e-05,
      "loss": 0.6291,
      "step": 337
    },
    {
      "epoch": 0.3095946874284406,
      "grad_norm": 0.5581932663917542,
      "learning_rate": 3.489678209703475e-05,
      "loss": 0.3438,
      "step": 338
    },
    {
      "epoch": 0.3105106480421342,
      "grad_norm": 0.5771118402481079,
      "learning_rate": 3.437078666147292e-05,
      "loss": 0.3232,
      "step": 339
    },
    {
      "epoch": 0.3114266086558278,
      "grad_norm": 0.5707378387451172,
      "learning_rate": 3.3847961657058845e-05,
      "loss": 0.4577,
      "step": 340
    },
    {
      "epoch": 0.3123425692695214,
      "grad_norm": 0.7203882336616516,
      "learning_rate": 3.332833234054331e-05,
      "loss": 0.6825,
      "step": 341
    },
    {
      "epoch": 0.313258529883215,
      "grad_norm": 0.7787111401557922,
      "learning_rate": 3.281192381429894e-05,
      "loss": 0.5245,
      "step": 342
    },
    {
      "epoch": 0.31417449049690865,
      "grad_norm": 0.5097988247871399,
      "learning_rate": 3.2298761025107706e-05,
      "loss": 0.4066,
      "step": 343
    },
    {
      "epoch": 0.31509045111060224,
      "grad_norm": 0.7460060119628906,
      "learning_rate": 3.178886876295578e-05,
      "loss": 0.5263,
      "step": 344
    },
    {
      "epoch": 0.31600641172429583,
      "grad_norm": 0.5440467000007629,
      "learning_rate": 3.1282271659835946e-05,
      "loss": 0.4018,
      "step": 345
    },
    {
      "epoch": 0.3169223723379895,
      "grad_norm": 0.5210183262825012,
      "learning_rate": 3.077899418855772e-05,
      "loss": 0.3788,
      "step": 346
    },
    {
      "epoch": 0.31783833295168307,
      "grad_norm": 0.7923403978347778,
      "learning_rate": 3.0279060661565028e-05,
      "loss": 0.3204,
      "step": 347
    },
    {
      "epoch": 0.3187542935653767,
      "grad_norm": 0.6634204387664795,
      "learning_rate": 2.9782495229761808e-05,
      "loss": 0.4491,
      "step": 348
    },
    {
      "epoch": 0.3196702541790703,
      "grad_norm": 0.6991851925849915,
      "learning_rate": 2.9289321881345254e-05,
      "loss": 0.4511,
      "step": 349
    },
    {
      "epoch": 0.3205862147927639,
      "grad_norm": 0.48899880051612854,
      "learning_rate": 2.879956444064703e-05,
      "loss": 0.4142,
      "step": 350
    },
    {
      "epoch": 0.32150217540645754,
      "grad_norm": 1.1039400100708008,
      "learning_rate": 2.8313246566982345e-05,
      "loss": 0.5192,
      "step": 351
    },
    {
      "epoch": 0.3224181360201511,
      "grad_norm": 0.6065302491188049,
      "learning_rate": 2.783039175350699e-05,
      "loss": 0.4985,
      "step": 352
    },
    {
      "epoch": 0.32333409663384477,
      "grad_norm": 0.5304561257362366,
      "learning_rate": 2.735102332608247e-05,
      "loss": 0.3723,
      "step": 353
    },
    {
      "epoch": 0.32425005724753836,
      "grad_norm": 0.594549834728241,
      "learning_rate": 2.6875164442149147e-05,
      "loss": 0.4838,
      "step": 354
    },
    {
      "epoch": 0.32516601786123195,
      "grad_norm": 0.7670130729675293,
      "learning_rate": 2.640283808960754e-05,
      "loss": 0.4514,
      "step": 355
    },
    {
      "epoch": 0.3260819784749256,
      "grad_norm": 0.7829962968826294,
      "learning_rate": 2.5934067085707834e-05,
      "loss": 0.8716,
      "step": 356
    },
    {
      "epoch": 0.3269979390886192,
      "grad_norm": 0.6512992978096008,
      "learning_rate": 2.54688740759476e-05,
      "loss": 0.7148,
      "step": 357
    },
    {
      "epoch": 0.3279138997023128,
      "grad_norm": 0.5506056547164917,
      "learning_rate": 2.500728153297788e-05,
      "loss": 0.4458,
      "step": 358
    },
    {
      "epoch": 0.3288298603160064,
      "grad_norm": 0.8548818230628967,
      "learning_rate": 2.4549311755517457e-05,
      "loss": 0.4366,
      "step": 359
    },
    {
      "epoch": 0.3297458209297,
      "grad_norm": 0.5396615862846375,
      "learning_rate": 2.409498686727587e-05,
      "loss": 0.5165,
      "step": 360
    },
    {
      "epoch": 0.33066178154339365,
      "grad_norm": 0.5458703637123108,
      "learning_rate": 2.364432881588431e-05,
      "loss": 0.512,
      "step": 361
    },
    {
      "epoch": 0.33157774215708724,
      "grad_norm": 0.6842907071113586,
      "learning_rate": 2.3197359371835802e-05,
      "loss": 0.5221,
      "step": 362
    },
    {
      "epoch": 0.33249370277078083,
      "grad_norm": 0.6149701476097107,
      "learning_rate": 2.275410012743303e-05,
      "loss": 0.5035,
      "step": 363
    },
    {
      "epoch": 0.3334096633844745,
      "grad_norm": 0.46057558059692383,
      "learning_rate": 2.2314572495745746e-05,
      "loss": 0.4132,
      "step": 364
    },
    {
      "epoch": 0.33432562399816806,
      "grad_norm": 0.7470035552978516,
      "learning_rate": 2.1878797709575847e-05,
      "loss": 0.4181,
      "step": 365
    },
    {
      "epoch": 0.3352415846118617,
      "grad_norm": 0.4799354076385498,
      "learning_rate": 2.1446796820432167e-05,
      "loss": 0.3314,
      "step": 366
    },
    {
      "epoch": 0.3361575452255553,
      "grad_norm": 0.7197858691215515,
      "learning_rate": 2.101859069751301e-05,
      "loss": 0.575,
      "step": 367
    },
    {
      "epoch": 0.3370735058392489,
      "grad_norm": 0.7954948544502258,
      "learning_rate": 2.0594200026698363e-05,
      "loss": 0.4358,
      "step": 368
    },
    {
      "epoch": 0.33798946645294253,
      "grad_norm": 0.5921378135681152,
      "learning_rate": 2.0173645309550548e-05,
      "loss": 0.3168,
      "step": 369
    },
    {
      "epoch": 0.3389054270666361,
      "grad_norm": 0.7418331503868103,
      "learning_rate": 1.9756946862323535e-05,
      "loss": 0.4903,
      "step": 370
    },
    {
      "epoch": 0.33982138768032977,
      "grad_norm": 0.5789597034454346,
      "learning_rate": 1.934412481498198e-05,
      "loss": 0.4672,
      "step": 371
    },
    {
      "epoch": 0.34073734829402336,
      "grad_norm": 0.6937307119369507,
      "learning_rate": 1.8935199110228275e-05,
      "loss": 0.5296,
      "step": 372
    },
    {
      "epoch": 0.34165330890771695,
      "grad_norm": 1.0454219579696655,
      "learning_rate": 1.8530189502539607e-05,
      "loss": 0.4224,
      "step": 373
    },
    {
      "epoch": 0.3425692695214106,
      "grad_norm": 0.5890056490898132,
      "learning_rate": 1.8129115557213262e-05,
      "loss": 0.8359,
      "step": 374
    },
    {
      "epoch": 0.3434852301351042,
      "grad_norm": 0.4845948815345764,
      "learning_rate": 1.7731996649421802e-05,
      "loss": 0.3724,
      "step": 375
    },
    {
      "epoch": 0.3444011907487978,
      "grad_norm": 0.6197191476821899,
      "learning_rate": 1.7338851963276825e-05,
      "loss": 0.5535,
      "step": 376
    },
    {
      "epoch": 0.3453171513624914,
      "grad_norm": 1.0201290845870972,
      "learning_rate": 1.6949700490902344e-05,
      "loss": 0.7269,
      "step": 377
    },
    {
      "epoch": 0.346233111976185,
      "grad_norm": 0.9290168881416321,
      "learning_rate": 1.656456103151728e-05,
      "loss": 0.5618,
      "step": 378
    },
    {
      "epoch": 0.34714907258987865,
      "grad_norm": 0.7306623458862305,
      "learning_rate": 1.6183452190527316e-05,
      "loss": 0.4795,
      "step": 379
    },
    {
      "epoch": 0.34806503320357224,
      "grad_norm": 0.6636963486671448,
      "learning_rate": 1.580639237862608e-05,
      "loss": 0.3895,
      "step": 380
    },
    {
      "epoch": 0.3489809938172659,
      "grad_norm": 1.0929412841796875,
      "learning_rate": 1.543339981090578e-05,
      "loss": 0.6531,
      "step": 381
    },
    {
      "epoch": 0.3498969544309595,
      "grad_norm": 0.6249703764915466,
      "learning_rate": 1.5064492505977234e-05,
      "loss": 0.4611,
      "step": 382
    },
    {
      "epoch": 0.35081291504465306,
      "grad_norm": 0.734741747379303,
      "learning_rate": 1.4699688285099489e-05,
      "loss": 0.8428,
      "step": 383
    },
    {
      "epoch": 0.3517288756583467,
      "grad_norm": 0.5528324246406555,
      "learning_rate": 1.433900477131882e-05,
      "loss": 0.5264,
      "step": 384
    },
    {
      "epoch": 0.3526448362720403,
      "grad_norm": 0.7774396538734436,
      "learning_rate": 1.3982459388617452e-05,
      "loss": 0.4981,
      "step": 385
    },
    {
      "epoch": 0.35356079688573394,
      "grad_norm": 0.7864802479743958,
      "learning_rate": 1.363006936107183e-05,
      "loss": 0.4647,
      "step": 386
    },
    {
      "epoch": 0.35447675749942753,
      "grad_norm": 0.624590277671814,
      "learning_rate": 1.328185171202052e-05,
      "loss": 0.4544,
      "step": 387
    },
    {
      "epoch": 0.3553927181131211,
      "grad_norm": 0.7023553848266602,
      "learning_rate": 1.29378232632419e-05,
      "loss": 0.5434,
      "step": 388
    },
    {
      "epoch": 0.35630867872681476,
      "grad_norm": 0.8227251172065735,
      "learning_rate": 1.259800063414146e-05,
      "loss": 0.7397,
      "step": 389
    },
    {
      "epoch": 0.35722463934050835,
      "grad_norm": 0.7069660425186157,
      "learning_rate": 1.2262400240949023e-05,
      "loss": 0.4147,
      "step": 390
    },
    {
      "epoch": 0.35814059995420194,
      "grad_norm": 0.5528481602668762,
      "learning_rate": 1.1931038295925645e-05,
      "loss": 0.4243,
      "step": 391
    },
    {
      "epoch": 0.3590565605678956,
      "grad_norm": 0.4696814715862274,
      "learning_rate": 1.1603930806580444e-05,
      "loss": 0.4239,
      "step": 392
    },
    {
      "epoch": 0.3599725211815892,
      "grad_norm": 1.4645037651062012,
      "learning_rate": 1.1281093574897338e-05,
      "loss": 0.455,
      "step": 393
    },
    {
      "epoch": 0.3608884817952828,
      "grad_norm": 0.7214867472648621,
      "learning_rate": 1.0962542196571634e-05,
      "loss": 0.4941,
      "step": 394
    },
    {
      "epoch": 0.3618044424089764,
      "grad_norm": 0.5920029878616333,
      "learning_rate": 1.0648292060256649e-05,
      "loss": 0.4427,
      "step": 395
    },
    {
      "epoch": 0.36272040302267,
      "grad_norm": 0.7575090527534485,
      "learning_rate": 1.0338358346820353e-05,
      "loss": 0.4352,
      "step": 396
    },
    {
      "epoch": 0.36363636363636365,
      "grad_norm": 0.5316157341003418,
      "learning_rate": 1.0032756028611878e-05,
      "loss": 0.4208,
      "step": 397
    },
    {
      "epoch": 0.36455232425005724,
      "grad_norm": 0.5491222739219666,
      "learning_rate": 9.731499868738447e-06,
      "loss": 0.3124,
      "step": 398
    },
    {
      "epoch": 0.3654682848637509,
      "grad_norm": 0.7701692581176758,
      "learning_rate": 9.434604420351911e-06,
      "loss": 0.3397,
      "step": 399
    },
    {
      "epoch": 0.36638424547744447,
      "grad_norm": 0.5326722264289856,
      "learning_rate": 9.142084025945984e-06,
      "loss": 0.3623,
      "step": 400
    },
    {
      "epoch": 0.36638424547744447,
      "eval_loss": 0.05946353077888489,
      "eval_runtime": 435.6161,
      "eval_samples_per_second": 3.342,
      "eval_steps_per_second": 0.836,
      "step": 400
    },
    {
      "epoch": 0.36730020609113806,
      "grad_norm": 0.6355307698249817,
      "learning_rate": 8.853952816663213e-06,
      "loss": 0.4239,
      "step": 401
    },
    {
      "epoch": 0.3682161667048317,
      "grad_norm": 0.5561894774436951,
      "learning_rate": 8.570224711612385e-06,
      "loss": 0.4807,
      "step": 402
    },
    {
      "epoch": 0.3691321273185253,
      "grad_norm": 0.6513518691062927,
      "learning_rate": 8.290913417196177e-06,
      "loss": 0.3068,
      "step": 403
    },
    {
      "epoch": 0.37004808793221894,
      "grad_norm": 0.6410642862319946,
      "learning_rate": 8.016032426448817e-06,
      "loss": 0.476,
      "step": 404
    },
    {
      "epoch": 0.37096404854591253,
      "grad_norm": 1.7064963579177856,
      "learning_rate": 7.745595018384578e-06,
      "loss": 0.7487,
      "step": 405
    },
    {
      "epoch": 0.3718800091596061,
      "grad_norm": 0.8433213233947754,
      "learning_rate": 7.479614257355971e-06,
      "loss": 0.4346,
      "step": 406
    },
    {
      "epoch": 0.37279596977329976,
      "grad_norm": 0.736240804195404,
      "learning_rate": 7.2181029924228814e-06,
      "loss": 0.4794,
      "step": 407
    },
    {
      "epoch": 0.37371193038699335,
      "grad_norm": 0.6535281538963318,
      "learning_rate": 6.961073856731648e-06,
      "loss": 0.5845,
      "step": 408
    },
    {
      "epoch": 0.374627891000687,
      "grad_norm": 0.5158947706222534,
      "learning_rate": 6.708539266905001e-06,
      "loss": 0.4633,
      "step": 409
    },
    {
      "epoch": 0.3755438516143806,
      "grad_norm": 0.7100207805633545,
      "learning_rate": 6.460511422441984e-06,
      "loss": 0.3909,
      "step": 410
    },
    {
      "epoch": 0.3764598122280742,
      "grad_norm": 0.49073487520217896,
      "learning_rate": 6.217002305128849e-06,
      "loss": 0.347,
      "step": 411
    },
    {
      "epoch": 0.3773757728417678,
      "grad_norm": 0.8159440159797668,
      "learning_rate": 5.978023678460099e-06,
      "loss": 0.6058,
      "step": 412
    },
    {
      "epoch": 0.3782917334554614,
      "grad_norm": 0.6300156712532043,
      "learning_rate": 5.743587087070235e-06,
      "loss": 0.566,
      "step": 413
    },
    {
      "epoch": 0.379207694069155,
      "grad_norm": 0.605778694152832,
      "learning_rate": 5.5137038561761115e-06,
      "loss": 0.5922,
      "step": 414
    },
    {
      "epoch": 0.38012365468284864,
      "grad_norm": 0.5530845522880554,
      "learning_rate": 5.2883850910297235e-06,
      "loss": 0.4646,
      "step": 415
    },
    {
      "epoch": 0.38103961529654223,
      "grad_norm": 0.5650749206542969,
      "learning_rate": 5.067641676381918e-06,
      "loss": 0.34,
      "step": 416
    },
    {
      "epoch": 0.3819555759102359,
      "grad_norm": 0.7632530331611633,
      "learning_rate": 4.8514842759563306e-06,
      "loss": 0.4461,
      "step": 417
    },
    {
      "epoch": 0.38287153652392947,
      "grad_norm": 0.9296861290931702,
      "learning_rate": 4.639923331934471e-06,
      "loss": 0.7671,
      "step": 418
    },
    {
      "epoch": 0.38378749713762306,
      "grad_norm": 0.5393553376197815,
      "learning_rate": 4.432969064451109e-06,
      "loss": 0.4055,
      "step": 419
    },
    {
      "epoch": 0.3847034577513167,
      "grad_norm": 0.6695364713668823,
      "learning_rate": 4.230631471100655e-06,
      "loss": 0.5134,
      "step": 420
    },
    {
      "epoch": 0.3856194183650103,
      "grad_norm": 0.5043479800224304,
      "learning_rate": 4.032920326454159e-06,
      "loss": 0.3956,
      "step": 421
    },
    {
      "epoch": 0.38653537897870394,
      "grad_norm": 1.0091841220855713,
      "learning_rate": 3.839845181587098e-06,
      "loss": 0.3713,
      "step": 422
    },
    {
      "epoch": 0.3874513395923975,
      "grad_norm": 0.4001891314983368,
      "learning_rate": 3.6514153636180383e-06,
      "loss": 0.2791,
      "step": 423
    },
    {
      "epoch": 0.3883673002060911,
      "grad_norm": 0.626771867275238,
      "learning_rate": 3.467639975257997e-06,
      "loss": 0.3861,
      "step": 424
    },
    {
      "epoch": 0.38928326081978476,
      "grad_norm": 0.4506821930408478,
      "learning_rate": 3.288527894370752e-06,
      "loss": 0.3876,
      "step": 425
    },
    {
      "epoch": 0.39019922143347835,
      "grad_norm": 0.7395780682563782,
      "learning_rate": 3.1140877735439387e-06,
      "loss": 0.51,
      "step": 426
    },
    {
      "epoch": 0.391115182047172,
      "grad_norm": 0.6306988596916199,
      "learning_rate": 2.944328039671085e-06,
      "loss": 0.4696,
      "step": 427
    },
    {
      "epoch": 0.3920311426608656,
      "grad_norm": 0.7126988768577576,
      "learning_rate": 2.7792568935444796e-06,
      "loss": 0.4441,
      "step": 428
    },
    {
      "epoch": 0.3929471032745592,
      "grad_norm": 0.5041316151618958,
      "learning_rate": 2.618882309459081e-06,
      "loss": 0.3623,
      "step": 429
    },
    {
      "epoch": 0.3938630638882528,
      "grad_norm": 0.6836007237434387,
      "learning_rate": 2.4632120348272003e-06,
      "loss": 0.7209,
      "step": 430
    },
    {
      "epoch": 0.3947790245019464,
      "grad_norm": 0.5515868067741394,
      "learning_rate": 2.312253589804314e-06,
      "loss": 0.3443,
      "step": 431
    },
    {
      "epoch": 0.39569498511564005,
      "grad_norm": 0.5961638689041138,
      "learning_rate": 2.166014266925731e-06,
      "loss": 0.3673,
      "step": 432
    },
    {
      "epoch": 0.39661094572933364,
      "grad_norm": 0.5713266134262085,
      "learning_rate": 2.0245011307543416e-06,
      "loss": 0.3757,
      "step": 433
    },
    {
      "epoch": 0.39752690634302723,
      "grad_norm": 0.7907454967498779,
      "learning_rate": 1.88772101753929e-06,
      "loss": 0.5574,
      "step": 434
    },
    {
      "epoch": 0.3984428669567209,
      "grad_norm": 0.5408697128295898,
      "learning_rate": 1.7556805348858064e-06,
      "loss": 0.4235,
      "step": 435
    },
    {
      "epoch": 0.39935882757041447,
      "grad_norm": 0.8136631846427917,
      "learning_rate": 1.6283860614358936e-06,
      "loss": 0.5299,
      "step": 436
    },
    {
      "epoch": 0.4002747881841081,
      "grad_norm": 0.6327822208404541,
      "learning_rate": 1.5058437465602982e-06,
      "loss": 0.5456,
      "step": 437
    },
    {
      "epoch": 0.4011907487978017,
      "grad_norm": 0.5776308178901672,
      "learning_rate": 1.3880595100613792e-06,
      "loss": 0.5112,
      "step": 438
    },
    {
      "epoch": 0.4021067094114953,
      "grad_norm": 0.4584222435951233,
      "learning_rate": 1.2750390418871604e-06,
      "loss": 0.3786,
      "step": 439
    },
    {
      "epoch": 0.40302267002518893,
      "grad_norm": 0.5486764311790466,
      "learning_rate": 1.1667878018564171e-06,
      "loss": 0.488,
      "step": 440
    },
    {
      "epoch": 0.4039386306388825,
      "grad_norm": 0.510213315486908,
      "learning_rate": 1.063311019395008e-06,
      "loss": 0.535,
      "step": 441
    },
    {
      "epoch": 0.4048545912525761,
      "grad_norm": 0.5908893346786499,
      "learning_rate": 9.64613693283123e-07,
      "loss": 0.4208,
      "step": 442
    },
    {
      "epoch": 0.40577055186626976,
      "grad_norm": 0.9434095025062561,
      "learning_rate": 8.707005914139422e-07,
      "loss": 0.6034,
      "step": 443
    },
    {
      "epoch": 0.40668651247996335,
      "grad_norm": 0.42651069164276123,
      "learning_rate": 7.815762505632096e-07,
      "loss": 0.3516,
      "step": 444
    },
    {
      "epoch": 0.407602473093657,
      "grad_norm": 0.6566410064697266,
      "learning_rate": 6.972449761700861e-07,
      "loss": 0.4768,
      "step": 445
    },
    {
      "epoch": 0.4085184337073506,
      "grad_norm": 0.6780850291252136,
      "learning_rate": 6.177108421292266e-07,
      "loss": 0.4385,
      "step": 446
    },
    {
      "epoch": 0.40943439432104417,
      "grad_norm": 0.6476641297340393,
      "learning_rate": 5.429776905938489e-07,
      "loss": 0.6201,
      "step": 447
    },
    {
      "epoch": 0.4103503549347378,
      "grad_norm": 0.5498802065849304,
      "learning_rate": 4.7304913179025965e-07,
      "loss": 0.4303,
      "step": 448
    },
    {
      "epoch": 0.4112663155484314,
      "grad_norm": 0.722607433795929,
      "learning_rate": 4.0792854384338333e-07,
      "loss": 0.5461,
      "step": 449
    },
    {
      "epoch": 0.41218227616212505,
      "grad_norm": 0.7780727744102478,
      "learning_rate": 3.4761907261356976e-07,
      "loss": 0.5537,
      "step": 450
    },
    {
      "epoch": 0.41309823677581864,
      "grad_norm": 0.5408957600593567,
      "learning_rate": 2.921236315446385e-07,
      "loss": 0.4887,
      "step": 451
    },
    {
      "epoch": 0.41401419738951223,
      "grad_norm": 0.7068083882331848,
      "learning_rate": 2.414449015231357e-07,
      "loss": 0.6064,
      "step": 452
    },
    {
      "epoch": 0.4149301580032059,
      "grad_norm": 0.6516684889793396,
      "learning_rate": 1.9558533074882646e-07,
      "loss": 0.6653,
      "step": 453
    },
    {
      "epoch": 0.41584611861689946,
      "grad_norm": 0.5151285529136658,
      "learning_rate": 1.545471346164007e-07,
      "loss": 0.4024,
      "step": 454
    },
    {
      "epoch": 0.4167620792305931,
      "grad_norm": 0.6734718084335327,
      "learning_rate": 1.1833229560848092e-07,
      "loss": 0.6197,
      "step": 455
    },
    {
      "epoch": 0.4176780398442867,
      "grad_norm": 0.5829524993896484,
      "learning_rate": 8.694256319987659e-08,
      "loss": 0.5708,
      "step": 456
    },
    {
      "epoch": 0.4185940004579803,
      "grad_norm": 0.5721414685249329,
      "learning_rate": 6.037945377297405e-08,
      "loss": 0.2411,
      "step": 457
    },
    {
      "epoch": 0.41950996107167393,
      "grad_norm": 0.47081419825553894,
      "learning_rate": 3.8644250544594975e-08,
      "loss": 0.3974,
      "step": 458
    },
    {
      "epoch": 0.4204259216853675,
      "grad_norm": 0.5603060126304626,
      "learning_rate": 2.1738003503946057e-08,
      "loss": 0.5078,
      "step": 459
    },
    {
      "epoch": 0.42134188229906117,
      "grad_norm": 0.6785393953323364,
      "learning_rate": 9.661529361892907e-09,
      "loss": 0.4146,
      "step": 460
    },
    {
      "epoch": 0.42225784291275476,
      "grad_norm": 0.7074640989303589,
      "learning_rate": 2.4154115115360144e-09,
      "loss": 0.3551,
      "step": 461
    },
    {
      "epoch": 0.42317380352644834,
      "grad_norm": 0.7316946983337402,
      "learning_rate": 0.0,
      "loss": 0.5625,
      "step": 462
    }
  ],
  "logging_steps": 1,
  "max_steps": 462,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 100,
  "stateful_callbacks": {
    "EarlyStoppingCallback": {
      "args": {
        "early_stopping_patience": 2,
        "early_stopping_threshold": 0.0
      },
      "attributes": {
        "early_stopping_patience_counter": 0
      }
    },
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 3.1453536664210637e+18,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}