{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.08135207142711871,
  "eval_steps": 500,
  "global_step": 1000,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.004067603571355936,
      "grad_norm": 9.81611442565918,
      "learning_rate": 2.2131887985546525e-06,
      "loss": 2.9739,
      "step": 50
    },
    {
      "epoch": 0.008135207142711872,
      "grad_norm": 9.377063751220703,
      "learning_rate": 4.471544715447155e-06,
      "loss": 3.0327,
      "step": 100
    },
    {
      "epoch": 0.012202810714067806,
      "grad_norm": 8.876896858215332,
      "learning_rate": 6.729900632339657e-06,
      "loss": 2.9709,
      "step": 150
    },
    {
      "epoch": 0.016270414285423744,
      "grad_norm": 9.680480003356934,
      "learning_rate": 8.988256549232158e-06,
      "loss": 3.0062,
      "step": 200
    },
    {
      "epoch": 0.02033801785677968,
      "grad_norm": 9.369101524353027,
      "learning_rate": 1.1246612466124661e-05,
      "loss": 2.9465,
      "step": 250
    },
    {
      "epoch": 0.024405621428135613,
      "grad_norm": 10.327061653137207,
      "learning_rate": 1.3504968383017163e-05,
      "loss": 2.9502,
      "step": 300
    },
    {
      "epoch": 0.02847322499949155,
      "grad_norm": 9.842864036560059,
      "learning_rate": 1.5763324299909665e-05,
      "loss": 2.9462,
      "step": 350
    },
    {
      "epoch": 0.03254082857084749,
      "grad_norm": 10.124307632446289,
      "learning_rate": 1.8021680216802168e-05,
      "loss": 2.9537,
      "step": 400
    },
    {
      "epoch": 0.03660843214220342,
      "grad_norm": 10.100955963134766,
      "learning_rate": 2.028003613369467e-05,
      "loss": 2.9044,
      "step": 450
    },
    {
      "epoch": 0.04067603571355936,
      "grad_norm": 9.107622146606445,
      "learning_rate": 2.253839205058717e-05,
      "loss": 2.9053,
      "step": 500
    },
    {
      "epoch": 0.04067603571355936,
      "eval_loss": 2.963116407394409,
      "eval_runtime": 642.2788,
      "eval_samples_per_second": 3.094,
      "eval_steps_per_second": 0.774,
      "step": 500
    },
    {
      "epoch": 0.044743639284915294,
      "grad_norm": 10.40243148803711,
      "learning_rate": 2.4796747967479675e-05,
      "loss": 2.9854,
      "step": 550
    },
    {
      "epoch": 0.048811242856271225,
      "grad_norm": 9.785271644592285,
      "learning_rate": 2.7055103884372178e-05,
      "loss": 2.918,
      "step": 600
    },
    {
      "epoch": 0.05287884642762716,
      "grad_norm": 9.53836727142334,
      "learning_rate": 2.931345980126468e-05,
      "loss": 2.9519,
      "step": 650
    },
    {
      "epoch": 0.0569464499989831,
      "grad_norm": 9.973458290100098,
      "learning_rate": 3.1571815718157185e-05,
      "loss": 2.9703,
      "step": 700
    },
    {
      "epoch": 0.06101405357033903,
      "grad_norm": 9.470365524291992,
      "learning_rate": 3.3830171635049685e-05,
      "loss": 2.9412,
      "step": 750
    },
    {
      "epoch": 0.06508165714169498,
      "grad_norm": 9.310904502868652,
      "learning_rate": 3.6088527551942185e-05,
      "loss": 2.9269,
      "step": 800
    },
    {
      "epoch": 0.0691492607130509,
      "grad_norm": 9.931621551513672,
      "learning_rate": 3.8346883468834685e-05,
      "loss": 2.8926,
      "step": 850
    },
    {
      "epoch": 0.07321686428440684,
      "grad_norm": 10.827827453613281,
      "learning_rate": 4.060523938572719e-05,
      "loss": 2.8518,
      "step": 900
    },
    {
      "epoch": 0.07728446785576278,
      "grad_norm": 10.306633949279785,
      "learning_rate": 4.28635953026197e-05,
      "loss": 2.8674,
      "step": 950
    },
    {
      "epoch": 0.08135207142711871,
      "grad_norm": 9.62368106842041,
      "learning_rate": 4.51219512195122e-05,
      "loss": 2.8594,
      "step": 1000
    },
    {
      "epoch": 0.08135207142711871,
      "eval_loss": 2.8507587909698486,
      "eval_runtime": 642.7633,
      "eval_samples_per_second": 3.091,
      "eval_steps_per_second": 0.773,
      "step": 1000
    }
  ],
  "logging_steps": 50,
  "max_steps": 36879,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 3,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 4.3766488498176e+16,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}