{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 2.973821989528796,
  "eval_steps": 500,
  "global_step": 285,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.10471204188481675,
      "grad_norm": 1.821414025646637,
      "learning_rate": 1.5517241379310346e-05,
      "loss": 0.4862,
      "step": 10
    },
    {
      "epoch": 0.2094240837696335,
      "grad_norm": 0.3942393541183772,
      "learning_rate": 3.275862068965517e-05,
      "loss": 0.3896,
      "step": 20
    },
    {
      "epoch": 0.31413612565445026,
      "grad_norm": 0.8255719356243104,
      "learning_rate": 5e-05,
      "loss": 0.3473,
      "step": 30
    },
    {
      "epoch": 0.418848167539267,
      "grad_norm": 0.37799831218550856,
      "learning_rate": 4.981198836496775e-05,
      "loss": 0.3207,
      "step": 40
    },
    {
      "epoch": 0.5235602094240838,
      "grad_norm": 0.2838496737375545,
      "learning_rate": 4.9250781329863606e-05,
      "loss": 0.3069,
      "step": 50
    },
    {
      "epoch": 0.6282722513089005,
      "grad_norm": 0.35016464634119265,
      "learning_rate": 4.8324819970868473e-05,
      "loss": 0.2959,
      "step": 60
    },
    {
      "epoch": 0.7329842931937173,
      "grad_norm": 0.265970946973345,
      "learning_rate": 4.7048031608708876e-05,
      "loss": 0.2888,
      "step": 70
    },
    {
      "epoch": 0.837696335078534,
      "grad_norm": 0.3283794303490618,
      "learning_rate": 4.5439620328789593e-05,
      "loss": 0.2812,
      "step": 80
    },
    {
      "epoch": 0.9424083769633508,
      "grad_norm": 0.2933681591769702,
      "learning_rate": 4.352377813387398e-05,
      "loss": 0.2821,
      "step": 90
    },
    {
      "epoch": 1.0418848167539267,
      "grad_norm": 0.784193010247334,
      "learning_rate": 4.1329321073844415e-05,
      "loss": 0.2646,
      "step": 100
    },
    {
      "epoch": 1.1465968586387434,
      "grad_norm": 0.43774945809310173,
      "learning_rate": 3.888925582549006e-05,
      "loss": 0.2466,
      "step": 110
    },
    {
      "epoch": 1.2513089005235603,
      "grad_norm": 0.24107111209505053,
      "learning_rate": 3.624028324136517e-05,
      "loss": 0.2429,
      "step": 120
    },
    {
      "epoch": 1.356020942408377,
      "grad_norm": 0.20357460785884127,
      "learning_rate": 3.34222463348055e-05,
      "loss": 0.2408,
      "step": 130
    },
    {
      "epoch": 1.4607329842931938,
      "grad_norm": 0.22761632175708865,
      "learning_rate": 3.0477531003921745e-05,
      "loss": 0.2407,
      "step": 140
    },
    {
      "epoch": 1.5654450261780104,
      "grad_norm": 0.201935897320554,
      "learning_rate": 2.7450428508239024e-05,
      "loss": 0.237,
      "step": 150
    },
    {
      "epoch": 1.6701570680628273,
      "grad_norm": 0.17116285919322816,
      "learning_rate": 2.4386469286927196e-05,
      "loss": 0.2366,
      "step": 160
    },
    {
      "epoch": 1.7748691099476441,
      "grad_norm": 0.16436756820682696,
      "learning_rate": 2.1331738138615958e-05,
      "loss": 0.2357,
      "step": 170
    },
    {
      "epoch": 1.8795811518324608,
      "grad_norm": 0.16052539184685044,
      "learning_rate": 1.8332181063127545e-05,
      "loss": 0.2338,
      "step": 180
    },
    {
      "epoch": 1.9842931937172774,
      "grad_norm": 0.1441672107096996,
      "learning_rate": 1.5432914190872757e-05,
      "loss": 0.235,
      "step": 190
    },
    {
      "epoch": 2.0837696335078535,
      "grad_norm": 0.18363079335917978,
      "learning_rate": 1.2677545194255402e-05,
      "loss": 0.207,
      "step": 200
    },
    {
      "epoch": 2.18848167539267,
      "grad_norm": 0.15661072667443002,
      "learning_rate": 1.0107517387689166e-05,
      "loss": 0.2003,
      "step": 210
    },
    {
      "epoch": 2.2931937172774868,
      "grad_norm": 0.13738033977955236,
      "learning_rate": 7.761486381573327e-06,
      "loss": 0.2002,
      "step": 220
    },
    {
      "epoch": 2.3979057591623034,
      "grad_norm": 0.11864758554083908,
      "learning_rate": 5.674738665931575e-06,
      "loss": 0.2016,
      "step": 230
    },
    {
      "epoch": 2.5026178010471205,
      "grad_norm": 0.11174553605583903,
      "learning_rate": 3.878660868757323e-06,
      "loss": 0.2018,
      "step": 240
    },
    {
      "epoch": 2.607329842931937,
      "grad_norm": 0.10468557565480724,
      "learning_rate": 2.4002676719139166e-06,
      "loss": 0.1992,
      "step": 250
    },
    {
      "epoch": 2.712041884816754,
      "grad_norm": 0.11078267776032583,
      "learning_rate": 1.2617954851740832e-06,
      "loss": 0.1994,
      "step": 260
    },
    {
      "epoch": 2.816753926701571,
      "grad_norm": 0.09962367034420198,
      "learning_rate": 4.803679899192392e-07,
      "loss": 0.2002,
      "step": 270
    },
    {
      "epoch": 2.9214659685863875,
      "grad_norm": 0.10060873138847887,
      "learning_rate": 6.773858303274483e-08,
      "loss": 0.1993,
      "step": 280
    },
    {
      "epoch": 2.973821989528796,
      "step": 285,
      "total_flos": 2039941919932416.0,
      "train_loss": 0.25715315007326894,
      "train_runtime": 42232.1226,
      "train_samples_per_second": 3.469,
      "train_steps_per_second": 0.007
    }
  ],
  "logging_steps": 10,
  "max_steps": 285,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 3,
  "save_steps": 50,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 2039941919932416.0,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}