{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.982857142857143, "eval_steps": 500, "global_step": 215, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.22857142857142856, "grad_norm": 1.4335445165634155, "learning_rate": 1.8181818181818182e-05, "loss": 0.2099, "step": 10 }, { "epoch": 0.45714285714285713, "grad_norm": 1.0458570718765259, "learning_rate": 4.0909090909090915e-05, "loss": 0.0488, "step": 20 }, { "epoch": 0.6857142857142857, "grad_norm": 2.0838749408721924, "learning_rate": 4.988086142592657e-05, "loss": 0.0793, "step": 30 }, { "epoch": 0.9142857142857143, "grad_norm": 0.2897286117076874, "learning_rate": 4.9156900467474784e-05, "loss": 0.0443, "step": 40 }, { "epoch": 1.16, "grad_norm": 0.42701688408851624, "learning_rate": 4.779428322078716e-05, "loss": 0.0483, "step": 50 }, { "epoch": 1.3885714285714286, "grad_norm": 1.3219645023345947, "learning_rate": 4.582903434367222e-05, "loss": 0.0234, "step": 60 }, { "epoch": 1.617142857142857, "grad_norm": 0.7189111113548279, "learning_rate": 4.3313110773031186e-05, "loss": 0.0351, "step": 70 }, { "epoch": 1.8457142857142856, "grad_norm": 0.19501705467700958, "learning_rate": 4.031302809563292e-05, "loss": 0.0282, "step": 80 }, { "epoch": 2.0914285714285716, "grad_norm": 0.47736552357673645, "learning_rate": 3.6908102019621666e-05, "loss": 0.0241, "step": 90 }, { "epoch": 2.32, "grad_norm": 0.5043835043907166, "learning_rate": 3.3188351438453996e-05, "loss": 0.0127, "step": 100 }, { "epoch": 2.5485714285714285, "grad_norm": 0.4290105402469635, "learning_rate": 2.925211852572667e-05, "loss": 0.0146, "step": 110 }, { "epoch": 2.777142857142857, "grad_norm": 0.4446091055870056, "learning_rate": 2.5203468780449323e-05, "loss": 0.0115, "step": 120 }, { "epoch": 3.0228571428571427, "grad_norm": 0.5306875109672546, "learning_rate": 2.1149439759951277e-05, "loss": 0.0113, "step": 130 }, { "epoch": 3.2514285714285713, "grad_norm": 0.09367158263921738, "learning_rate": 1.719721123798362e-05, "loss": 0.008, "step": 140 }, { "epoch": 3.48, "grad_norm": 0.40400221943855286, "learning_rate": 1.3451271602926246e-05, "loss": 0.003, "step": 150 }, { "epoch": 3.7085714285714286, "grad_norm": 0.9837874174118042, "learning_rate": 1.0010655410412744e-05, "loss": 0.0042, "step": 160 }, { "epoch": 3.9371428571428573, "grad_norm": 1.6841741800308228, "learning_rate": 6.966325123517109e-06, "loss": 0.0098, "step": 170 }, { "epoch": 4.182857142857143, "grad_norm": 1.109780192375183, "learning_rate": 4.398766261638271e-06, "loss": 0.0042, "step": 180 }, { "epoch": 4.411428571428571, "grad_norm": 0.17733371257781982, "learning_rate": 2.375859537153302e-06, "loss": 0.0036, "step": 190 }, { "epoch": 4.64, "grad_norm": 0.039348188787698746, "learning_rate": 9.510862359517814e-07, "loss": 0.0007, "step": 200 }, { "epoch": 4.868571428571428, "grad_norm": 0.06708058714866638, "learning_rate": 1.6211428771484295e-07, "loss": 0.0005, "step": 210 }, { "epoch": 4.982857142857143, "step": 215, "total_flos": 6.16643394722857e+16, "train_loss": 0.029113658708195354, "train_runtime": 1637.6537, "train_samples_per_second": 1.069, "train_steps_per_second": 0.131 } ], "logging_steps": 10, "max_steps": 215, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.16643394722857e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }