{ "best_metric": null, "best_model_checkpoint": null, "epoch": null, "eval_steps": 500, "global_step": 310, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.29, "learning_rate": 2.8125e-06, "loss": 0.5093, "reward": 3.7168, "step": 9 }, { "epoch": 0.61, "learning_rate": 5.9375e-06, "loss": 0.3901, "reward": 3.9072, "step": 19 }, { "epoch": 0.94, "learning_rate": 9.0625e-06, "loss": 0.3463, "reward": 3.795, "step": 29 }, { "epoch": 1.26, "learning_rate": 9.985430661522333e-06, "loss": 0.1907, "reward": 4.8141, "step": 39 }, { "epoch": 1.58, "learning_rate": 9.914274958326507e-06, "loss": 0.1381, "reward": 4.9855, "step": 49 }, { "epoch": 1.9, "learning_rate": 9.784701678661045e-06, "loss": 0.1166, "reward": 5.1631, "step": 59 }, { "epoch": 2.23, "learning_rate": 9.598251102025463e-06, "loss": 0.1019, "reward": 5.3393, "step": 69 }, { "epoch": 2.55, "learning_rate": 9.357139626751308e-06, "loss": 0.0751, "reward": 5.4161, "step": 79 }, { "epoch": 2.87, "learning_rate": 9.064233422958078e-06, "loss": 0.1013, "reward": 5.4679, "step": 89 }, { "epoch": 3.19, "learning_rate": 8.723014361461633e-06, "loss": 0.0473, "reward": 5.523, "step": 99 }, { "epoch": 3.52, "learning_rate": 8.337538623649237e-06, "loss": 0.0388, "reward": 5.5629, "step": 109 }, { "epoch": 3.84, "learning_rate": 7.912388484339012e-06, "loss": 0.0874, "reward": 5.3349, "step": 119 }, { "epoch": 4.16, "learning_rate": 7.4526178407965396e-06, "loss": 0.0367, "reward": 5.4507, "step": 129 }, { "epoch": 4.48, "learning_rate": 6.963692135422872e-06, "loss": 0.0387, "reward": 5.6017, "step": 139 }, { "epoch": 4.81, "learning_rate": 6.451423386272312e-06, "loss": 0.0302, "reward": 5.798, "step": 149 }, { "epoch": 5.13, "learning_rate": 5.921901097713317e-06, "loss": 0.0424, "reward": 5.7266, "step": 159 }, { "epoch": 5.45, "learning_rate": 5.381419872519763e-06, "loss": 0.0397, "reward": 5.762, "step": 169 }, { "epoch": 5.77, "learning_rate": 4.83640458589112e-06, "loss": 0.0327, "reward": 5.7574, "step": 179 }, { "epoch": 6.1, "learning_rate": 4.293334010882164e-06, "loss": 0.0301, "reward": 5.7441, "step": 189 }, { "epoch": 6.42, "learning_rate": 3.7586638031314182e-06, "loss": 0.0149, "reward": 5.8132, "step": 199 }, { "epoch": 6.74, "learning_rate": 3.2387497603938327e-06, "loss": 0.0367, "reward": 5.7579, "step": 209 }, { "epoch": 7.06, "learning_rate": 2.739772269116402e-06, "loss": 0.0369, "reward": 5.7852, "step": 219 }, { "epoch": 7.39, "learning_rate": 2.2676628361847834e-06, "loss": 0.038, "reward": 5.7178, "step": 229 }, { "epoch": 7.71, "learning_rate": 1.8280335791817733e-06, "loss": 0.0198, "reward": 5.8608, "step": 239 }, { "epoch": 8.03, "learning_rate": 1.4261105133297693e-06, "loss": 0.0323, "reward": 5.7764, "step": 249 }, { "epoch": 8.35, "learning_rate": 1.0666714281569152e-06, "loss": 0.0215, "reward": 5.8996, "step": 259 }, { "epoch": 8.68, "learning_rate": 7.539890923671061e-07, "loss": 0.0206, "reward": 5.8818, "step": 269 }, { "epoch": 9.0, "learning_rate": 4.917804620559202e-07, "loss": 0.0445, "reward": 5.689, "step": 279 }, { "epoch": 9.32, "learning_rate": 2.8316249605087386e-07, "loss": 0.0234, "reward": 6.027, "step": 289 }, { "epoch": 9.65, "learning_rate": 1.3061510361333186e-07, "loss": 0.0142, "reward": 5.8128, "step": 299 }, { "epoch": 9.97, "learning_rate": 3.59516649547248e-08, "loss": 0.0158, "reward": 5.8429, "step": 309 } ], "logging_steps": 500, "max_steps": 310, "num_input_tokens_seen": 0, "num_train_epochs": 10.0, "save_steps": 500, "stateful_callbacks": {}, "total_flos": 0, "train_batch_size": null, "trial_name": null, "trial_params": null }