{ "best_global_step": 4512, "best_metric": 0.6166987217288957, "best_model_checkpoint": "./results-freezed-seed-6969/checkpoint-4512", "epoch": 12.0, "eval_steps": 500, "global_step": 4512, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.8510479041916168, "eval_f1": 0.5188233379385703, "eval_loss": 0.412404328584671, "eval_precision": 0.6887281002079136, "eval_recall": 0.4794410461120206, "eval_runtime": 4.5198, "eval_samples_per_second": 591.181, "eval_steps_per_second": 9.293, "step": 376 }, { "epoch": 1.3297872340425532, "grad_norm": 0.7580151557922363, "learning_rate": 9.98e-05, "loss": 0.4997, "step": 500 }, { "epoch": 2.0, "eval_accuracy": 0.8562874251497006, "eval_f1": 0.5689887233924689, "eval_loss": 0.39307722449302673, "eval_precision": 0.687569748053619, "eval_recall": 0.5266166010932101, "eval_runtime": 4.5316, "eval_samples_per_second": 589.632, "eval_steps_per_second": 9.268, "step": 752 }, { "epoch": 2.6595744680851063, "grad_norm": 1.0576425790786743, "learning_rate": 9.727322404371585e-05, "loss": 0.3714, "step": 1000 }, { "epoch": 3.0, "eval_accuracy": 0.8559131736526946, "eval_f1": 0.5818129831345137, "eval_loss": 0.3961324989795685, "eval_precision": 0.7151325535790987, "eval_recall": 0.5721308022936351, "eval_runtime": 4.5058, "eval_samples_per_second": 593.008, "eval_steps_per_second": 9.321, "step": 1128 }, { "epoch": 3.9893617021276597, "grad_norm": 1.1340179443359375, "learning_rate": 9.454098360655738e-05, "loss": 0.3744, "step": 1500 }, { "epoch": 4.0, "eval_accuracy": 0.8607784431137725, "eval_f1": 0.5989395235334739, "eval_loss": 0.3834174573421478, "eval_precision": 0.7108143819134923, "eval_recall": 0.5768115664557467, "eval_runtime": 4.5202, "eval_samples_per_second": 591.121, "eval_steps_per_second": 9.292, "step": 1504 }, { "epoch": 5.0, "eval_accuracy": 0.8630239520958084, "eval_f1": 0.5987124405203501, "eval_loss": 0.38012266159057617, "eval_precision": 0.7223505874677526, "eval_recall": 0.5712306430666042, "eval_runtime": 4.5115, "eval_samples_per_second": 592.269, "eval_steps_per_second": 9.31, "step": 1880 }, { "epoch": 5.319148936170213, "grad_norm": 0.933506965637207, "learning_rate": 9.180874316939892e-05, "loss": 0.3659, "step": 2000 }, { "epoch": 6.0, "eval_accuracy": 0.8592814371257484, "eval_f1": 0.5928244790291561, "eval_loss": 0.3744014799594879, "eval_precision": 0.6865504890362129, "eval_recall": 0.5591536397257083, "eval_runtime": 4.5207, "eval_samples_per_second": 591.061, "eval_steps_per_second": 9.291, "step": 2256 }, { "epoch": 6.648936170212766, "grad_norm": 0.8730207085609436, "learning_rate": 8.907650273224044e-05, "loss": 0.3555, "step": 2500 }, { "epoch": 7.0, "eval_accuracy": 0.8626497005988024, "eval_f1": 0.6020826153439911, "eval_loss": 0.37463295459747314, "eval_precision": 0.7012962512912608, "eval_recall": 0.5681000290373944, "eval_runtime": 4.5095, "eval_samples_per_second": 592.525, "eval_steps_per_second": 9.314, "step": 2632 }, { "epoch": 7.9787234042553195, "grad_norm": 1.1565885543823242, "learning_rate": 8.634426229508197e-05, "loss": 0.3634, "step": 3000 }, { "epoch": 8.0, "eval_accuracy": 0.8592814371257484, "eval_f1": 0.5600785912802647, "eval_loss": 0.3952018916606903, "eval_precision": 0.7409860335210962, "eval_recall": 0.5220232682066058, "eval_runtime": 4.5084, "eval_samples_per_second": 592.671, "eval_steps_per_second": 9.316, "step": 3008 }, { "epoch": 9.0, "eval_accuracy": 0.8604041916167665, "eval_f1": 0.6007831220499781, "eval_loss": 0.375332772731781, "eval_precision": 0.7087979954051029, "eval_recall": 0.5722608323840338, "eval_runtime": 4.5089, "eval_samples_per_second": 592.605, "eval_steps_per_second": 9.315, "step": 3384 }, { "epoch": 9.308510638297872, "grad_norm": 0.7825000286102295, "learning_rate": 8.361202185792351e-05, "loss": 0.3507, "step": 3500 }, { "epoch": 10.0, "eval_accuracy": 0.8622754491017964, "eval_f1": 0.5922042178515996, "eval_loss": 0.37907975912094116, "eval_precision": 0.7326262784423546, "eval_recall": 0.567072711550182, "eval_runtime": 4.5166, "eval_samples_per_second": 591.594, "eval_steps_per_second": 9.299, "step": 3760 }, { "epoch": 10.638297872340425, "grad_norm": 0.9841243028640747, "learning_rate": 8.087978142076503e-05, "loss": 0.3557, "step": 4000 }, { "epoch": 11.0, "eval_accuracy": 0.8619011976047904, "eval_f1": 0.5805935400886648, "eval_loss": 0.38299164175987244, "eval_precision": 0.7313354075263248, "eval_recall": 0.534718353228735, "eval_runtime": 4.5139, "eval_samples_per_second": 591.949, "eval_steps_per_second": 9.305, "step": 4136 }, { "epoch": 11.96808510638298, "grad_norm": 0.8043486475944519, "learning_rate": 7.814754098360655e-05, "loss": 0.3462, "step": 4500 }, { "epoch": 12.0, "eval_accuracy": 0.8648952095808383, "eval_f1": 0.6166987217288957, "eval_loss": 0.3703169822692871, "eval_precision": 0.7199382884314391, "eval_recall": 0.5858551193564548, "eval_runtime": 4.5215, "eval_samples_per_second": 590.952, "eval_steps_per_second": 9.289, "step": 4512 } ], "logging_steps": 500, "max_steps": 18800, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 9487869843271680.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }