icd10-ru-subgroup-c / training_config.json
Dmitry43243242's picture
Upload training_config.json with huggingface_hub
872edb3 verified
Raw
History Blame Contribute Delete
3.58 kB
{
"batch": 8,
"direct_hit3_threshold": 0.9,
"direct_report": {
"final_dir": "/content/yandex_disk_cache/ml/subgroup_distillation/group_c/direct/alexyalunin-rubiobert/final",
"model_name": "alexyalunin/RuBioBERT",
"run_slug": "direct_hard_training",
"selection_metric": "hit@3",
"selection_metric_value": 0.96,
"test_metrics": {
"hit@1": 0.8666666666666667,
"hit@3": 0.92,
"macro_f1": 0.5899124288410003,
"micro_f1": 0.5925925925925926,
"mrr": 0.9022657952069718,
"recall@3": 0.8813333333333333,
"subset_accuracy": 0.21333333333333335,
"weighted_f1": 0.6299727806306753
},
"train_duration_sec": 82.6,
"train_metrics": {
"epoch": 11.0,
"total_flos": 1002012076437504.0,
"train_loss": 0.6542927627721109,
"train_runtime": 82.2708,
"train_samples_per_second": 50.468,
"train_steps_per_second": 6.418
},
"val_metrics": {
"hit@1": 0.92,
"hit@3": 0.96,
"macro_f1": 0.5838706694799131,
"micro_f1": 0.6025316455696202,
"mrr": 0.9442271062271061,
"recall@3": 0.9150476190476191,
"subset_accuracy": 0.26666666666666666,
"weighted_f1": 0.6568128092988036
}
},
"dropout": 0.1,
"epochs": 12,
"generated_at_utc": "2026-05-26T06:53:06+00:00",
"group_letter": "C",
"kd_grid": [
{
"hard_loss_weight": 0.5,
"temperature": 2.0
},
{
"hard_loss_weight": 0.3,
"temperature": 2.0
},
{
"hard_loss_weight": 0.3,
"temperature": 2.5
}
],
"lr": 2e-05,
"max_len": 512,
"pos_weight_cap": 20.0,
"pos_weight_stats": {
"labels_at_cap": 65,
"labels_without_positives": 0,
"max": 20.0,
"mean": 19.734392166137695,
"min": 14.727272987365723
},
"seed": 42,
"selected_run": {
"final_dir": "/content/yandex_disk_cache/ml/subgroup_distillation/group_c/direct/alexyalunin-rubiobert/final",
"model_name": "alexyalunin/RuBioBERT",
"run_slug": "direct_hard_training",
"selection_metric": "hit@3",
"selection_metric_value": 0.96,
"test_metrics": {
"hit@1": 0.8666666666666667,
"hit@3": 0.92,
"macro_f1": 0.5899124288410003,
"micro_f1": 0.5925925925925926,
"mrr": 0.9022657952069718,
"recall@3": 0.8813333333333333,
"subset_accuracy": 0.21333333333333335,
"weighted_f1": 0.6299727806306753
},
"train_duration_sec": 82.6,
"train_metrics": {
"epoch": 11.0,
"total_flos": 1002012076437504.0,
"train_loss": 0.6542927627721109,
"train_runtime": 82.2708,
"train_samples_per_second": 50.468,
"train_steps_per_second": 6.418
},
"val_metrics": {
"hit@1": 0.92,
"hit@3": 0.96,
"macro_f1": 0.5838706694799131,
"micro_f1": 0.6025316455696202,
"mrr": 0.9442271062271061,
"recall@3": 0.9150476190476191,
"subset_accuracy": 0.26666666666666666,
"weighted_f1": 0.6568128092988036
}
},
"source_csv": "/content/yandex_disk_cache/datasets/subgroups/group_C.csv",
"source_csv_sha256": "9526d7bd571f6aa94d0e162b727474a36dc63f71f79f0d78f400195b786bec26",
"specialist_repo_id": "Dmitry43243242/icd10-ru-subgroup-c",
"split_sizes": {
"test": 75,
"train": 346,
"val": 75
},
"student_model": "alexyalunin/RuBioBERT",
"teacher_errors": {},
"teacher_models": [],
"teacher_reports": [],
"teacher_source": null,
"threshold": 0.5,
"torch_version": "2.10.0+cu128",
"transformers_version": "5.0.0",
"warmup_ratio": 0.1,
"weight_decay": 0.01
}