{ "best_global_step": 4512, "best_metric": 0.6173886673369102, "best_model_checkpoint": "./results-freezed-seed-1337/checkpoint-4512", "epoch": 12.0, "eval_steps": 500, "global_step": 4512, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.8514221556886228, "eval_f1": 0.5200098481984468, "eval_loss": 0.41102635860443115, "eval_precision": 0.6923180742188034, "eval_recall": 0.4806756140132551, "eval_runtime": 4.5834, "eval_samples_per_second": 582.968, "eval_steps_per_second": 9.163, "step": 376 }, { "epoch": 1.3297872340425532, "grad_norm": 0.7757077217102051, "learning_rate": 9.98e-05, "loss": 0.495, "step": 500 }, { "epoch": 2.0, "eval_accuracy": 0.8577844311377245, "eval_f1": 0.5730788120751112, "eval_loss": 0.39201080799102783, "eval_precision": 0.6977099441536995, "eval_recall": 0.5293901508987233, "eval_runtime": 4.604, "eval_samples_per_second": 580.37, "eval_steps_per_second": 9.123, "step": 752 }, { "epoch": 2.6595744680851063, "grad_norm": 1.0780051946640015, "learning_rate": 9.727322404371585e-05, "loss": 0.3715, "step": 1000 }, { "epoch": 3.0, "eval_accuracy": 0.8562874251497006, "eval_f1": 0.5806868386243386, "eval_loss": 0.39746958017349243, "eval_precision": 0.712392115222304, "eval_recall": 0.5741099719517916, "eval_runtime": 4.5817, "eval_samples_per_second": 583.191, "eval_steps_per_second": 9.167, "step": 1128 }, { "epoch": 3.9893617021276597, "grad_norm": 1.1657006740570068, "learning_rate": 9.454098360655738e-05, "loss": 0.3742, "step": 1500 }, { "epoch": 4.0, "eval_accuracy": 0.8581586826347305, "eval_f1": 0.5920838495005096, "eval_loss": 0.38324663043022156, "eval_precision": 0.6939596170509158, "eval_recall": 0.5721612756031641, "eval_runtime": 4.5777, "eval_samples_per_second": 583.698, "eval_steps_per_second": 9.175, "step": 1504 }, { "epoch": 5.0, "eval_accuracy": 0.8626497005988024, "eval_f1": 0.6006349511818206, "eval_loss": 0.38067346811294556, "eval_precision": 0.7274851647000302, "eval_recall": 0.5764902405636445, "eval_runtime": 4.5968, "eval_samples_per_second": 581.279, "eval_steps_per_second": 9.137, "step": 1880 }, { "epoch": 5.319148936170213, "grad_norm": 0.9436430335044861, "learning_rate": 9.180874316939892e-05, "loss": 0.3653, "step": 2000 }, { "epoch": 6.0, "eval_accuracy": 0.8611526946107785, "eval_f1": 0.5985470178350242, "eval_loss": 0.3742508888244629, "eval_precision": 0.6949008756795468, "eval_recall": 0.5653264792318812, "eval_runtime": 4.5845, "eval_samples_per_second": 582.836, "eval_steps_per_second": 9.161, "step": 2256 }, { "epoch": 6.648936170212766, "grad_norm": 0.9061560034751892, "learning_rate": 8.907650273224044e-05, "loss": 0.3549, "step": 2500 }, { "epoch": 7.0, "eval_accuracy": 0.8637724550898204, "eval_f1": 0.6031509777257861, "eval_loss": 0.37474435567855835, "eval_precision": 0.7055885161401955, "eval_recall": 0.5722105753552295, "eval_runtime": 4.5864, "eval_samples_per_second": 582.595, "eval_steps_per_second": 9.158, "step": 2632 }, { "epoch": 7.9787234042553195, "grad_norm": 1.1018545627593994, "learning_rate": 8.634426229508197e-05, "loss": 0.3629, "step": 3000 }, { "epoch": 8.0, "eval_accuracy": 0.8600299401197605, "eval_f1": 0.5634030324961963, "eval_loss": 0.3930947184562683, "eval_precision": 0.7388251505680451, "eval_recall": 0.5248301631518656, "eval_runtime": 4.5855, "eval_samples_per_second": 582.709, "eval_steps_per_second": 9.159, "step": 3008 }, { "epoch": 9.0, "eval_accuracy": 0.8622754491017964, "eval_f1": 0.6073500599359862, "eval_loss": 0.37540125846862793, "eval_precision": 0.7171149745776613, "eval_recall": 0.5780268292760754, "eval_runtime": 4.5802, "eval_samples_per_second": 583.383, "eval_steps_per_second": 9.17, "step": 3384 }, { "epoch": 9.308510638297872, "grad_norm": 0.7780441641807556, "learning_rate": 8.361202185792351e-05, "loss": 0.3501, "step": 3500 }, { "epoch": 10.0, "eval_accuracy": 0.8611526946107785, "eval_f1": 0.5852506203922853, "eval_loss": 0.37850335240364075, "eval_precision": 0.7259173963793549, "eval_recall": 0.5616111286611846, "eval_runtime": 4.5776, "eval_samples_per_second": 583.717, "eval_steps_per_second": 9.175, "step": 3760 }, { "epoch": 10.638297872340425, "grad_norm": 0.9797687530517578, "learning_rate": 8.087978142076503e-05, "loss": 0.3546, "step": 4000 }, { "epoch": 11.0, "eval_accuracy": 0.8619011976047904, "eval_f1": 0.5814284313893647, "eval_loss": 0.3815386891365051, "eval_precision": 0.7212152666879591, "eval_recall": 0.5368830750281599, "eval_runtime": 4.5836, "eval_samples_per_second": 582.951, "eval_steps_per_second": 9.163, "step": 4136 }, { "epoch": 11.96808510638298, "grad_norm": 0.7723910808563232, "learning_rate": 7.814754098360655e-05, "loss": 0.3452, "step": 4500 }, { "epoch": 12.0, "eval_accuracy": 0.8656437125748503, "eval_f1": 0.6173886673369102, "eval_loss": 0.36994901299476624, "eval_precision": 0.7115342814799931, "eval_recall": 0.5872418942592114, "eval_runtime": 4.5837, "eval_samples_per_second": 582.931, "eval_steps_per_second": 9.163, "step": 4512 } ], "logging_steps": 500, "max_steps": 18800, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 9487869843271680.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }