{ "batch": 8, "direct_hit3_threshold": 0.9, "direct_report": { "final_dir": "/content/yandex_disk_cache/ml/subgroup_distillation/group_e/direct/alexyalunin-rubiobert/final", "model_name": "alexyalunin/RuBioBERT", "run_slug": "direct_hard_training", "selection_metric": "hit@3", "selection_metric_value": 0.835, "test_metrics": { "hit@1": 0.628140703517588, "hit@3": 0.8291457286432161, "macro_f1": 0.5157875868327818, "micro_f1": 0.48293963254593175, "mrr": 0.744193071244231, "recall@3": 0.8197654941373534, "subset_accuracy": 0.1708542713567839, "weighted_f1": 0.5126833520048437 }, "train_duration_sec": 100.4, "train_metrics": { "epoch": 9.0, "total_flos": 2177031722148864.0, "train_loss": 0.47315873063128927, "train_runtime": 100.0873, "train_samples_per_second": 110.184, "train_steps_per_second": 13.788 }, "val_metrics": { "hit@1": 0.645, "hit@3": 0.835, "macro_f1": 0.5239739935027187, "micro_f1": 0.48812664907651715, "mrr": 0.750362070235822, "recall@3": 0.8273333333333333, "subset_accuracy": 0.175, "weighted_f1": 0.5181260346642617 } }, "dropout": 0.1, "epochs": 12, "generated_at_utc": "2026-05-26T08:05:56+00:00", "group_letter": "E", "kd_grid": [ { "hard_loss_weight": 0.5, "temperature": 2.0 }, { "hard_loss_weight": 0.3, "temperature": 2.0 }, { "hard_loss_weight": 0.3, "temperature": 2.5 } ], "lr": 2e-05, "max_len": 512, "pos_weight_cap": 20.0, "pos_weight_stats": { "labels_at_cap": 39, "labels_without_positives": 0, "max": 20.0, "mean": 18.944591522216797, "min": 4.536144733428955 }, "seed": 42, "selected_run": { "final_dir": "/content/yandex_disk_cache/ml/subgroup_distillation/group_e/runs/alexyalunin-rubiobert/t2p0_hard0p5/final", "hard_loss_weight": 0.5, "run_slug": "t2p0_hard0p5", "selection_metric": "val_macro_f1", "selection_metric_value": 0.6257847034784294, "temperature": 2.0, "test_metrics": { "test_hit@1": 0.6482412060301508, "test_hit@3": 0.8241206030150754, "test_loss": 1.2573049068450928, "test_macro_f1": 0.6121827915051391, "test_micro_f1": 0.5606299212598426, "test_mrr": 0.7515661740428731, "test_recall@3": 0.8147403685092127, "test_runtime": 0.4221, "test_samples_per_second": 471.485, "test_steps_per_second": 30.801, "test_subset_accuracy": 0.2964824120603015, "test_weighted_f1": 0.5830428669342762 }, "train_duration_sec": 133.0, "train_metrics": { "epoch": 12.0, "total_flos": 2902708962865152.0, "train_loss": 1.3432866200156834, "train_runtime": 132.7733, "train_samples_per_second": 83.059, "train_steps_per_second": 10.394 }, "val_metrics": { "epoch": 12.0, "val_hit@1": 0.635, "val_hit@3": 0.8, "val_loss": 1.2582203149795532, "val_macro_f1": 0.6257847034784294, "val_micro_f1": 0.5578446909667195, "val_mrr": 0.7417874066293184, "val_recall@3": 0.7939999999999999, "val_runtime": 0.458, "val_samples_per_second": 436.701, "val_steps_per_second": 28.386, "val_subset_accuracy": 0.3, "val_weighted_f1": 0.581422089009702 } }, "source_csv": "/content/yandex_disk_cache/datasets/subgroups/group_E.csv", "source_csv_sha256": "7bd98fc0eea937b8edf1391e86ca15afd2aed5c98996951f822684805713ed0b", "specialist_repo_id": "Dmitry43243242/icd10-ru-subgroup-e", "split_sizes": { "test": 199, "train": 919, "val": 200 }, "student_model": "alexyalunin/RuBioBERT", "teacher_errors": {}, "teacher_models": [ "alexyalunin/RuBioRoBERTa", "ai-forever/ruBert-base", "DeepPavlov/rubert-base-cased" ], "teacher_reports": [ { "final_dir": "/content/yandex_disk_cache/ml/subgroup_distillation/group_e/teachers/alexyalunin-rubioroberta/final", "model_name": "alexyalunin/RuBioRoBERTa", "pushed_to_hub": false, "run_slug": "alexyalunin-rubioroberta", "test_metrics": { "hit@1": 0.6482412060301508, "hit@3": 0.8592964824120602, "macro_f1": 0.6865864313407604, "micro_f1": 0.6485436893203883, "mrr": 0.7635155763702912, "recall@3": 0.8532663316582914, "subset_accuracy": 0.507537688442211, "weighted_f1": 0.6514279861261107 }, "train_duration_sec": 314.8, "train_metrics": { "epoch": 12.0, "total_flos": 1.0278832248336384e+16, "train_loss": 0.31829168511473616, "train_runtime": 299.4665, "train_samples_per_second": 36.825, "train_steps_per_second": 4.608 }, "val_metrics": { "hit@1": 0.665, "hit@3": 0.83, "macro_f1": 0.6816071208348493, "micro_f1": 0.6377358490566037, "mrr": 0.7651676992824679, "recall@3": 0.8214999999999999, "subset_accuracy": 0.48, "weighted_f1": 0.654032532287305 } }, { "final_dir": "/content/yandex_disk_cache/ml/subgroup_distillation/group_e/teachers/ai-forever-rubert-base/final", "model_name": "ai-forever/ruBert-base", "pushed_to_hub": false, "run_slug": "ai-forever-rubert-base", "test_metrics": { "hit@1": 0.6633165829145728, "hit@3": 0.8241206030150754, "macro_f1": 0.567726321569097, "micro_f1": 0.5497630331753555, "mrr": 0.7552527410980668, "recall@3": 0.8164154103852594, "subset_accuracy": 0.3015075376884422, "weighted_f1": 0.5636139940062055 }, "train_duration_sec": 144.6, "train_metrics": { "epoch": 12.0, "total_flos": 2902708962865152.0, "train_loss": 0.4238858779271444, "train_runtime": 133.7006, "train_samples_per_second": 82.483, "train_steps_per_second": 10.322 }, "val_metrics": { "hit@1": 0.65, "hit@3": 0.82, "macro_f1": 0.5512155779574014, "micro_f1": 0.5529953917050692, "mrr": 0.7548125116338592, "recall@3": 0.814, "subset_accuracy": 0.315, "weighted_f1": 0.5726346327425935 } }, { "final_dir": "/content/yandex_disk_cache/ml/subgroup_distillation/group_e/teachers/deeppavlov-rubert-base-cased/final", "model_name": "DeepPavlov/rubert-base-cased", "pushed_to_hub": false, "run_slug": "deeppavlov-rubert-base-cased", "test_metrics": { "hit@1": 0.6130653266331658, "hit@3": 0.7939698492462312, "macro_f1": 0.5623096108520506, "micro_f1": 0.5097451274362819, "mrr": 0.7264908372260924, "recall@3": 0.7879396984924623, "subset_accuracy": 0.2613065326633166, "weighted_f1": 0.5247350484683884 }, "train_duration_sec": 145.8, "train_metrics": { "epoch": 12.0, "total_flos": 2902708962865152.0, "train_loss": 0.45112168028734734, "train_runtime": 136.666, "train_samples_per_second": 80.693, "train_steps_per_second": 10.098 }, "val_metrics": { "hit@1": 0.66, "hit@3": 0.81, "macro_f1": 0.5291229985764946, "micro_f1": 0.5123010130246021, "mrr": 0.755827678749699, "recall@3": 0.8014999999999999, "subset_accuracy": 0.23, "weighted_f1": 0.5291843956354477 } } ], "teacher_source": "local_bert_models", "threshold": 0.5, "torch_version": "2.10.0+cu128", "transformers_version": "5.0.0", "warmup_ratio": 0.1, "weight_decay": 0.01 }