{ "best_global_step": 180, "best_metric": 0.9137479662895203, "best_model_checkpoint": "/content/drive/MyDrive/tez-bildiri/qwen2.5-3b-instruct-trl-sft-mimic-cxr/checkpoint-180", "epoch": 0.23502529786192264, "eval_steps": 10, "global_step": 180, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.013056960992329036, "grad_norm": 8.122729301452637, "learning_rate": 0.0002, "loss": 25.0274, "mean_token_accuracy": 0.49861262924969196, "num_tokens": 172741.0, "step": 10 }, { "epoch": 0.013056960992329036, "eval_loss": 2.7354915142059326, "eval_mean_token_accuracy": 0.5200360960499736, "eval_num_tokens": 172741.0, "eval_runtime": 583.8163, "eval_samples_per_second": 5.247, "eval_steps_per_second": 1.312, "step": 10 }, { "epoch": 0.026113921984658072, "grad_norm": 9.37191104888916, "learning_rate": 0.0002, "loss": 20.059, "mean_token_accuracy": 0.5394352838397026, "num_tokens": 345170.0, "step": 20 }, { "epoch": 0.026113921984658072, "eval_loss": 2.262531280517578, "eval_mean_token_accuracy": 0.5700084223915329, "eval_num_tokens": 345170.0, "eval_runtime": 584.3785, "eval_samples_per_second": 5.241, "eval_steps_per_second": 1.311, "step": 20 }, { "epoch": 0.039170882976987106, "grad_norm": 8.885587692260742, "learning_rate": 0.0002, "loss": 16.4452, "mean_token_accuracy": 0.6004979901015759, "num_tokens": 518282.0, "step": 30 }, { "epoch": 0.039170882976987106, "eval_loss": 1.779425859451294, "eval_mean_token_accuracy": 0.6544274031804684, "eval_num_tokens": 518282.0, "eval_runtime": 584.3483, "eval_samples_per_second": 5.242, "eval_steps_per_second": 1.311, "step": 30 }, { "epoch": 0.052227843969316144, "grad_norm": 8.050444602966309, "learning_rate": 0.0002, "loss": 12.3059, "mean_token_accuracy": 0.7168269947171211, "num_tokens": 690414.0, "step": 40 }, { "epoch": 0.052227843969316144, "eval_loss": 1.309248924255371, "eval_mean_token_accuracy": 0.7652701906684796, "eval_num_tokens": 690414.0, "eval_runtime": 583.5678, "eval_samples_per_second": 5.249, "eval_steps_per_second": 1.313, "step": 40 }, { "epoch": 0.06528480496164518, "grad_norm": 2.590376615524292, "learning_rate": 0.0002, "loss": 10.1827, "mean_token_accuracy": 0.7656703971326351, "num_tokens": 863569.0, "step": 50 }, { "epoch": 0.06528480496164518, "eval_loss": 1.1905678510665894, "eval_mean_token_accuracy": 0.7744907616946467, "eval_num_tokens": 863569.0, "eval_runtime": 583.2888, "eval_samples_per_second": 5.251, "eval_steps_per_second": 1.313, "step": 50 }, { "epoch": 0.07834176595397421, "grad_norm": 2.414226770401001, "learning_rate": 0.0002, "loss": 9.4839, "mean_token_accuracy": 0.773311423510313, "num_tokens": 1037125.0, "step": 60 }, { "epoch": 0.07834176595397421, "eval_loss": 1.119614601135254, "eval_mean_token_accuracy": 0.7835657717976184, "eval_num_tokens": 1037125.0, "eval_runtime": 584.5117, "eval_samples_per_second": 5.24, "eval_steps_per_second": 1.31, "step": 60 }, { "epoch": 0.09139872694630324, "grad_norm": 1.8030030727386475, "learning_rate": 0.0002, "loss": 8.8096, "mean_token_accuracy": 0.7858213528990745, "num_tokens": 1209583.0, "step": 70 }, { "epoch": 0.09139872694630324, "eval_loss": 1.0682815313339233, "eval_mean_token_accuracy": 0.7881310254568842, "eval_num_tokens": 1209583.0, "eval_runtime": 585.4405, "eval_samples_per_second": 5.232, "eval_steps_per_second": 1.308, "step": 70 }, { "epoch": 0.10445568793863229, "grad_norm": 2.2091376781463623, "learning_rate": 0.0002, "loss": 8.6008, "mean_token_accuracy": 0.7877494558691979, "num_tokens": 1381768.0, "step": 80 }, { "epoch": 0.10445568793863229, "eval_loss": 1.034247636795044, "eval_mean_token_accuracy": 0.7920899202867214, "eval_num_tokens": 1381768.0, "eval_runtime": 587.3448, "eval_samples_per_second": 5.215, "eval_steps_per_second": 1.304, "step": 80 }, { "epoch": 0.11751264893096132, "grad_norm": 2.903419256210327, "learning_rate": 0.0002, "loss": 8.1348, "mean_token_accuracy": 0.7928629629313946, "num_tokens": 1553850.0, "step": 90 }, { "epoch": 0.11751264893096132, "eval_loss": 1.009232759475708, "eval_mean_token_accuracy": 0.7953966308978456, "eval_num_tokens": 1553850.0, "eval_runtime": 586.354, "eval_samples_per_second": 5.224, "eval_steps_per_second": 1.306, "step": 90 }, { "epoch": 0.13056960992329036, "grad_norm": 2.6068789958953857, "learning_rate": 0.0002, "loss": 8.3369, "mean_token_accuracy": 0.790278784930706, "num_tokens": 1727356.0, "step": 100 }, { "epoch": 0.13056960992329036, "eval_loss": 0.9901596903800964, "eval_mean_token_accuracy": 0.7973068164316233, "eval_num_tokens": 1727356.0, "eval_runtime": 584.3882, "eval_samples_per_second": 5.241, "eval_steps_per_second": 1.311, "step": 100 }, { "epoch": 0.1436265709156194, "grad_norm": 2.7159364223480225, "learning_rate": 0.0002, "loss": 8.0685, "mean_token_accuracy": 0.7934540964663028, "num_tokens": 1900444.0, "step": 110 }, { "epoch": 0.1436265709156194, "eval_loss": 0.9750204086303711, "eval_mean_token_accuracy": 0.7992984780910433, "eval_num_tokens": 1900444.0, "eval_runtime": 584.7476, "eval_samples_per_second": 5.238, "eval_steps_per_second": 1.31, "step": 110 }, { "epoch": 0.15668353190794843, "grad_norm": 2.747244119644165, "learning_rate": 0.0002, "loss": 7.7226, "mean_token_accuracy": 0.8012088306248188, "num_tokens": 2071987.0, "step": 120 }, { "epoch": 0.15668353190794843, "eval_loss": 0.9617533683776855, "eval_mean_token_accuracy": 0.8009014339733372, "eval_num_tokens": 2071987.0, "eval_runtime": 584.8225, "eval_samples_per_second": 5.237, "eval_steps_per_second": 1.31, "step": 120 }, { "epoch": 0.16974049290027746, "grad_norm": 2.888685464859009, "learning_rate": 0.0002, "loss": 7.7642, "mean_token_accuracy": 0.7987834617495537, "num_tokens": 2245073.0, "step": 130 }, { "epoch": 0.16974049290027746, "eval_loss": 0.951227605342865, "eval_mean_token_accuracy": 0.8025058584175907, "eval_num_tokens": 2245073.0, "eval_runtime": 585.0529, "eval_samples_per_second": 5.235, "eval_steps_per_second": 1.309, "step": 130 }, { "epoch": 0.1827974538926065, "grad_norm": 3.1910297870635986, "learning_rate": 0.0002, "loss": 7.6321, "mean_token_accuracy": 0.8011799998581409, "num_tokens": 2417106.0, "step": 140 }, { "epoch": 0.1827974538926065, "eval_loss": 0.941814661026001, "eval_mean_token_accuracy": 0.8037396753425697, "eval_num_tokens": 2417106.0, "eval_runtime": 586.384, "eval_samples_per_second": 5.224, "eval_steps_per_second": 1.306, "step": 140 }, { "epoch": 0.19585441488493552, "grad_norm": 3.866354465484619, "learning_rate": 0.0002, "loss": 7.6023, "mean_token_accuracy": 0.8026318639516831, "num_tokens": 2589875.0, "step": 150 }, { "epoch": 0.19585441488493552, "eval_loss": 0.932219922542572, "eval_mean_token_accuracy": 0.8049114516914357, "eval_num_tokens": 2589875.0, "eval_runtime": 586.7629, "eval_samples_per_second": 5.22, "eval_steps_per_second": 1.305, "step": 150 }, { "epoch": 0.20891137587726458, "grad_norm": 3.0838913917541504, "learning_rate": 0.0002, "loss": 7.2869, "mean_token_accuracy": 0.8092540368437767, "num_tokens": 2761587.0, "step": 160 }, { "epoch": 0.20891137587726458, "eval_loss": 0.9249224066734314, "eval_mean_token_accuracy": 0.8061499763717851, "eval_num_tokens": 2761587.0, "eval_runtime": 587.3149, "eval_samples_per_second": 5.215, "eval_steps_per_second": 1.304, "step": 160 }, { "epoch": 0.2219683368695936, "grad_norm": 3.295142889022827, "learning_rate": 0.0002, "loss": 7.3085, "mean_token_accuracy": 0.8087648630142212, "num_tokens": 2933386.0, "step": 170 }, { "epoch": 0.2219683368695936, "eval_loss": 0.9189748167991638, "eval_mean_token_accuracy": 0.807062828431864, "eval_num_tokens": 2933386.0, "eval_runtime": 587.1146, "eval_samples_per_second": 5.217, "eval_steps_per_second": 1.305, "step": 170 }, { "epoch": 0.23502529786192264, "grad_norm": 3.147334337234497, "learning_rate": 0.0002, "loss": 7.5534, "mean_token_accuracy": 0.8029283680021763, "num_tokens": 3105920.0, "step": 180 }, { "epoch": 0.23502529786192264, "eval_loss": 0.9137479662895203, "eval_mean_token_accuracy": 0.8077044271457912, "eval_num_tokens": 3105920.0, "eval_runtime": 587.163, "eval_samples_per_second": 5.217, "eval_steps_per_second": 1.305, "step": 180 } ], "logging_steps": 10, "max_steps": 2298, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 20, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 6.973832705428685e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }