{ "approach": "direct_hard_training_no_distillation", "batch": 8, "direct_hit3_threshold": 0.9, "direct_report": { "final_dir": "/content/yandex_disk_cache/ml/subgroup_distillation/group_c/direct/alexyalunin-rubiobert/final", "model_name": "alexyalunin/RuBioBERT", "run_slug": "direct_hard_training", "selection_metric": "hit@3", "selection_metric_value": 0.96, "test_metrics": { "hit@1": 0.8666666666666667, "hit@3": 0.92, "macro_f1": 0.5899124288410003, "micro_f1": 0.5925925925925926, "mrr": 0.9022657952069718, "recall@3": 0.8813333333333333, "subset_accuracy": 0.21333333333333335, "weighted_f1": 0.6299727806306753 }, "train_duration_sec": 82.6, "train_metrics": { "epoch": 11.0, "total_flos": 1002012076437504.0, "train_loss": 0.6542927627721109, "train_runtime": 82.2708, "train_samples_per_second": 50.468, "train_steps_per_second": 6.418 }, "val_metrics": { "hit@1": 0.92, "hit@3": 0.96, "macro_f1": 0.5838706694799131, "micro_f1": 0.6025316455696202, "mrr": 0.9442271062271061, "recall@3": 0.9150476190476191, "subset_accuracy": 0.26666666666666666, "weighted_f1": 0.6568128092988036 } }, "direct_test_metrics": { "hit@1": 0.8666666666666667, "hit@3": 0.92, "macro_f1": 0.5899124288410003, "micro_f1": 0.5925925925925926, "mrr": 0.9022657952069718, "recall@3": 0.8813333333333333, "subset_accuracy": 0.21333333333333335, "weighted_f1": 0.6299727806306753 }, "direct_val_metrics": { "hit@1": 0.92, "hit@3": 0.96, "macro_f1": 0.5838706694799131, "micro_f1": 0.6025316455696202, "mrr": 0.9442271062271061, "recall@3": 0.9150476190476191, "subset_accuracy": 0.26666666666666666, "weighted_f1": 0.6568128092988036 }, "dropout": 0.1, "epochs": 12, "generated_at_utc": "2026-05-26T06:53:06+00:00", "group_letter": "C", "kd_grid": [ { "hard_loss_weight": 0.5, "temperature": 2.0 }, { "hard_loss_weight": 0.3, "temperature": 2.0 }, { "hard_loss_weight": 0.3, "temperature": 2.5 } ], "lr": 2e-05, "max_len": 512, "paths": { "direct_dir": "/content/yandex_disk_cache/ml/subgroup_distillation/group_c/direct/alexyalunin-rubiobert", "final_dir": "/content/yandex_disk_cache/ml/subgroup_distillation/group_c/final", "group_dir": "/content/yandex_disk_cache/ml/subgroup_distillation/group_c", "reports_dir": "/content/yandex_disk_cache/ml/subgroup_distillation/group_c/reports" }, "pos_weight_cap": 20.0, "pos_weight_stats": { "labels_at_cap": 65, "labels_without_positives": 0, "max": 20.0, "mean": 19.734392166137695, "min": 14.727272987365723 }, "quality_gate": { "direct_hit3_threshold": 0.9, "force_push_weak": false, "hit3_floor": 0.9, "macro_floor": 0.3, "min_macro_f1_for_push": 0.3, "passed": true, "push_allowed": true }, "run_reports": [ { "final_dir": "/content/yandex_disk_cache/ml/subgroup_distillation/group_c/direct/alexyalunin-rubiobert/final", "model_name": "alexyalunin/RuBioBERT", "run_slug": "direct_hard_training", "selection_metric": "hit@3", "selection_metric_value": 0.96, "test_metrics": { "hit@1": 0.8666666666666667, "hit@3": 0.92, "macro_f1": 0.5899124288410003, "micro_f1": 0.5925925925925926, "mrr": 0.9022657952069718, "recall@3": 0.8813333333333333, "subset_accuracy": 0.21333333333333335, "weighted_f1": 0.6299727806306753 }, "train_duration_sec": 82.6, "train_metrics": { "epoch": 11.0, "total_flos": 1002012076437504.0, "train_loss": 0.6542927627721109, "train_runtime": 82.2708, "train_samples_per_second": 50.468, "train_steps_per_second": 6.418 }, "val_metrics": { "hit@1": 0.92, "hit@3": 0.96, "macro_f1": 0.5838706694799131, "micro_f1": 0.6025316455696202, "mrr": 0.9442271062271061, "recall@3": 0.9150476190476191, "subset_accuracy": 0.26666666666666666, "weighted_f1": 0.6568128092988036 } } ], "seed": 42, "selected_run": { "final_dir": "/content/yandex_disk_cache/ml/subgroup_distillation/group_c/direct/alexyalunin-rubiobert/final", "model_name": "alexyalunin/RuBioBERT", "run_slug": "direct_hard_training", "selection_metric": "hit@3", "selection_metric_value": 0.96, "test_metrics": { "hit@1": 0.8666666666666667, "hit@3": 0.92, "macro_f1": 0.5899124288410003, "micro_f1": 0.5925925925925926, "mrr": 0.9022657952069718, "recall@3": 0.8813333333333333, "subset_accuracy": 0.21333333333333335, "weighted_f1": 0.6299727806306753 }, "train_duration_sec": 82.6, "train_metrics": { "epoch": 11.0, "total_flos": 1002012076437504.0, "train_loss": 0.6542927627721109, "train_runtime": 82.2708, "train_samples_per_second": 50.468, "train_steps_per_second": 6.418 }, "val_metrics": { "hit@1": 0.92, "hit@3": 0.96, "macro_f1": 0.5838706694799131, "micro_f1": 0.6025316455696202, "mrr": 0.9442271062271061, "recall@3": 0.9150476190476191, "subset_accuracy": 0.26666666666666666, "weighted_f1": 0.6568128092988036 } }, "source_csv": "/content/yandex_disk_cache/datasets/subgroups/group_C.csv", "source_csv_sha256": "9526d7bd571f6aa94d0e162b727474a36dc63f71f79f0d78f400195b786bec26", "specialist_repo_id": "Dmitry43243242/icd10-ru-subgroup-c", "split_sizes": { "test": 75, "train": 346, "val": 75 }, "student_model": "alexyalunin/RuBioBERT", "student_test_metrics": { "hit@1": 0.8666666666666667, "hit@3": 0.92, "macro_f1": 0.5899124288410003, "micro_f1": 0.5925925925925926, "mrr": 0.9022657952069718, "recall@3": 0.8813333333333333, "subset_accuracy": 0.21333333333333335, "weighted_f1": 0.6299727806306753 }, "student_val_metrics": { "hit@1": 0.92, "hit@3": 0.96, "macro_f1": 0.5838706694799131, "micro_f1": 0.6025316455696202, "mrr": 0.9442271062271061, "recall@3": 0.9150476190476191, "subset_accuracy": 0.26666666666666666, "weighted_f1": 0.6568128092988036 }, "teacher_errors": {}, "teacher_models": [], "teacher_reports": [], "teacher_source": null, "teacher_test_metrics": {}, "teacher_val_metrics": {}, "threshold": 0.5, "torch_version": "2.10.0+cu128", "transformers_version": "5.0.0", "warmup_ratio": 0.1, "weight_decay": 0.01 }