{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 14, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14285714285714285, "grad_norm": 3.1672253608703613, "learning_rate": 1.9714285714285718e-05, "logps/anchor": 0.4348521828651428, "logps/policy": -69.89246368408203, "loss": 0.3538, "step": 1 }, { "epoch": 0.2857142857142857, "grad_norm": 12.092653274536133, "learning_rate": 1.942857142857143e-05, "logps/anchor": 0.6565061807632446, "logps/policy": -72.86190032958984, "loss": 0.4353, "step": 2 }, { "epoch": 0.42857142857142855, "grad_norm": 4.009256839752197, "learning_rate": 1.9142857142857146e-05, "logps/anchor": 0.24830026924610138, "logps/policy": -72.61256408691406, "loss": 0.3256, "step": 3 }, { "epoch": 0.5714285714285714, "grad_norm": 2.732830286026001, "learning_rate": 1.885714285714286e-05, "logps/anchor": 0.28446492552757263, "logps/policy": -60.2266731262207, "loss": 0.3314, "step": 4 }, { "epoch": 0.7142857142857143, "grad_norm": 1.894191026687622, "learning_rate": 1.8571428571428575e-05, "logps/anchor": 0.4391743540763855, "logps/policy": -66.86183166503906, "loss": 0.3081, "step": 5 }, { "epoch": 0.8571428571428571, "grad_norm": 1.734279751777649, "learning_rate": 1.8285714285714288e-05, "logps/anchor": 0.29031452536582947, "logps/policy": -59.80109786987305, "loss": 0.2218, "step": 6 }, { "epoch": 1.0, "grad_norm": 1.2599438428878784, "learning_rate": 1.8e-05, "logps/anchor": 0.062131937593221664, "logps/policy": -66.06212615966797, "loss": 0.1533, "step": 7 }, { "epoch": 1.1428571428571428, "grad_norm": 1.0272470712661743, "learning_rate": 1.7714285714285717e-05, "logps/anchor": 0.08727352321147919, "logps/policy": -31.806640625, "loss": 0.127, "step": 8 }, { "epoch": 1.2857142857142856, "grad_norm": 1.0852797031402588, "learning_rate": 1.742857142857143e-05, "logps/anchor": 0.16654472053050995, "logps/policy": -37.08126449584961, "loss": 0.1622, "step": 9 }, { "epoch": 1.4285714285714286, "grad_norm": 1.2848658561706543, "learning_rate": 1.7142857142857142e-05, "logps/anchor": 0.2580033540725708, "logps/policy": -34.01033401489258, "loss": 0.1272, "step": 10 }, { "epoch": 1.5714285714285714, "grad_norm": 1.1476317644119263, "learning_rate": 1.6857142857142858e-05, "logps/anchor": 0.1905135065317154, "logps/policy": -29.186994552612305, "loss": 0.1409, "step": 11 }, { "epoch": 1.7142857142857144, "grad_norm": 1.1124459505081177, "learning_rate": 1.6571428571428574e-05, "logps/anchor": 0.08628815412521362, "logps/policy": -32.86701202392578, "loss": 0.091, "step": 12 }, { "epoch": 1.8571428571428572, "grad_norm": 1.09062922000885, "learning_rate": 1.6285714285714287e-05, "logps/anchor": 0.06508912891149521, "logps/policy": -31.805673599243164, "loss": 0.093, "step": 13 }, { "epoch": 2.0, "grad_norm": 0.8760924935340881, "learning_rate": 1.6000000000000003e-05, "logps/anchor": 0.022830532863736153, "logps/policy": -36.078948974609375, "loss": 0.0725, "step": 14 } ], "logging_steps": 1.0, "max_steps": 70, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 10, "trial_name": null, "trial_params": null }