{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.24647887323943662, "eval_steps": 500, "global_step": 35, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.035211267605633804, "grad_norm": 4.435381889343262, "learning_rate": 8.401166180758017e-06, "loss": 0.3742, "mean_token_accuracy": 0.8945995926856994, "num_tokens": 301695.0, "step": 5 }, { "epoch": 0.07042253521126761, "grad_norm": 1.0536936521530151, "learning_rate": 1.8902623906705538e-05, "loss": 0.2437, "mean_token_accuracy": 0.9109556913375855, "num_tokens": 607230.0, "step": 10 }, { "epoch": 0.1056338028169014, "grad_norm": 0.8927873373031616, "learning_rate": 2.9404081632653062e-05, "loss": 0.201, "mean_token_accuracy": 0.9295879244804383, "num_tokens": 913896.0, "step": 15 }, { "epoch": 0.14084507042253522, "grad_norm": 0.6603397130966187, "learning_rate": 3.990553935860058e-05, "loss": 0.1947, "mean_token_accuracy": 0.9296464204788208, "num_tokens": 1234009.0, "step": 20 }, { "epoch": 0.176056338028169, "grad_norm": 0.5713601112365723, "learning_rate": 5.0406997084548104e-05, "loss": 0.1712, "mean_token_accuracy": 0.9373371362686157, "num_tokens": 1536066.0, "step": 25 }, { "epoch": 0.2112676056338028, "grad_norm": 0.6312325596809387, "learning_rate": 6.090845481049563e-05, "loss": 0.1683, "mean_token_accuracy": 0.9364290118217469, "num_tokens": 1854154.0, "step": 30 }, { "epoch": 0.24647887323943662, "grad_norm": 1.2735894918441772, "learning_rate": 7.140991253644314e-05, "loss": 0.1312, "mean_token_accuracy": 0.9548299431800842, "num_tokens": 2160408.0, "step": 35 } ], "logging_steps": 5, "max_steps": 142, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.105089563623424e+16, "train_batch_size": 24, "trial_name": null, "trial_params": null }