Dmitry43243242 commited on
Commit
af82947
·
verified ·
1 Parent(s): 7af854b

Upload distillation_summary.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. distillation_summary.json +463 -0
distillation_summary.json ADDED
@@ -0,0 +1,463 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "approach": "local_teacher_ensemble_knowledge_distillation",
3
+ "batch": 8,
4
+ "direct_hit3_threshold": 0.9,
5
+ "direct_report": {
6
+ "final_dir": "/content/yandex_disk_cache/ml/subgroup_distillation/group_e/direct/alexyalunin-rubiobert/final",
7
+ "model_name": "alexyalunin/RuBioBERT",
8
+ "run_slug": "direct_hard_training",
9
+ "selection_metric": "hit@3",
10
+ "selection_metric_value": 0.835,
11
+ "test_metrics": {
12
+ "hit@1": 0.628140703517588,
13
+ "hit@3": 0.8291457286432161,
14
+ "macro_f1": 0.5157875868327818,
15
+ "micro_f1": 0.48293963254593175,
16
+ "mrr": 0.744193071244231,
17
+ "recall@3": 0.8197654941373534,
18
+ "subset_accuracy": 0.1708542713567839,
19
+ "weighted_f1": 0.5126833520048437
20
+ },
21
+ "train_duration_sec": 100.4,
22
+ "train_metrics": {
23
+ "epoch": 9.0,
24
+ "total_flos": 2177031722148864.0,
25
+ "train_loss": 0.47315873063128927,
26
+ "train_runtime": 100.0873,
27
+ "train_samples_per_second": 110.184,
28
+ "train_steps_per_second": 13.788
29
+ },
30
+ "val_metrics": {
31
+ "hit@1": 0.645,
32
+ "hit@3": 0.835,
33
+ "macro_f1": 0.5239739935027187,
34
+ "micro_f1": 0.48812664907651715,
35
+ "mrr": 0.750362070235822,
36
+ "recall@3": 0.8273333333333333,
37
+ "subset_accuracy": 0.175,
38
+ "weighted_f1": 0.5181260346642617
39
+ }
40
+ },
41
+ "direct_test_metrics": {
42
+ "hit@1": 0.628140703517588,
43
+ "hit@3": 0.8291457286432161,
44
+ "macro_f1": 0.5157875868327818,
45
+ "micro_f1": 0.48293963254593175,
46
+ "mrr": 0.744193071244231,
47
+ "recall@3": 0.8197654941373534,
48
+ "subset_accuracy": 0.1708542713567839,
49
+ "weighted_f1": 0.5126833520048437
50
+ },
51
+ "direct_val_metrics": {
52
+ "hit@1": 0.645,
53
+ "hit@3": 0.835,
54
+ "macro_f1": 0.5239739935027187,
55
+ "micro_f1": 0.48812664907651715,
56
+ "mrr": 0.750362070235822,
57
+ "recall@3": 0.8273333333333333,
58
+ "subset_accuracy": 0.175,
59
+ "weighted_f1": 0.5181260346642617
60
+ },
61
+ "dropout": 0.1,
62
+ "epochs": 12,
63
+ "generated_at_utc": "2026-05-26T08:05:56+00:00",
64
+ "group_letter": "E",
65
+ "kd_grid": [
66
+ {
67
+ "hard_loss_weight": 0.5,
68
+ "temperature": 2.0
69
+ },
70
+ {
71
+ "hard_loss_weight": 0.3,
72
+ "temperature": 2.0
73
+ },
74
+ {
75
+ "hard_loss_weight": 0.3,
76
+ "temperature": 2.5
77
+ }
78
+ ],
79
+ "lr": 2e-05,
80
+ "max_len": 512,
81
+ "paths": {
82
+ "direct_dir": "/content/yandex_disk_cache/ml/subgroup_distillation/group_e/direct/alexyalunin-rubiobert",
83
+ "final_dir": "/content/yandex_disk_cache/ml/subgroup_distillation/group_e/final",
84
+ "group_dir": "/content/yandex_disk_cache/ml/subgroup_distillation/group_e",
85
+ "reports_dir": "/content/yandex_disk_cache/ml/subgroup_distillation/group_e/reports",
86
+ "teachers_dir": "/content/yandex_disk_cache/ml/subgroup_distillation/group_e/teachers"
87
+ },
88
+ "pos_weight_cap": 20.0,
89
+ "pos_weight_stats": {
90
+ "labels_at_cap": 39,
91
+ "labels_without_positives": 0,
92
+ "max": 20.0,
93
+ "mean": 18.944591522216797,
94
+ "min": 4.536144733428955
95
+ },
96
+ "quality_gate": {
97
+ "direct_hit3_threshold": 0.9,
98
+ "force_push_weak": true,
99
+ "hit3_drop_tolerance": 0.02,
100
+ "hit3_floor": 0.8242211055276382,
101
+ "macro_f1_drop_tolerance": 0.03,
102
+ "macro_floor": 0.65047534381331,
103
+ "min_macro_f1_for_push": 0.3,
104
+ "passed": false,
105
+ "push_allowed": true,
106
+ "push_policy": "always_push_when_push_to_hub_enabled"
107
+ },
108
+ "run_reports": [
109
+ {
110
+ "final_dir": "/content/yandex_disk_cache/ml/subgroup_distillation/group_e/runs/alexyalunin-rubiobert/t2p0_hard0p5/final",
111
+ "hard_loss_weight": 0.5,
112
+ "run_slug": "t2p0_hard0p5",
113
+ "selection_metric": "val_macro_f1",
114
+ "selection_metric_value": 0.6257847034784294,
115
+ "temperature": 2.0,
116
+ "test_metrics": {
117
+ "test_hit@1": 0.6482412060301508,
118
+ "test_hit@3": 0.8241206030150754,
119
+ "test_loss": 1.2573049068450928,
120
+ "test_macro_f1": 0.6121827915051391,
121
+ "test_micro_f1": 0.5606299212598426,
122
+ "test_mrr": 0.7515661740428731,
123
+ "test_recall@3": 0.8147403685092127,
124
+ "test_runtime": 0.4221,
125
+ "test_samples_per_second": 471.485,
126
+ "test_steps_per_second": 30.801,
127
+ "test_subset_accuracy": 0.2964824120603015,
128
+ "test_weighted_f1": 0.5830428669342762
129
+ },
130
+ "train_duration_sec": 133.0,
131
+ "train_metrics": {
132
+ "epoch": 12.0,
133
+ "total_flos": 2902708962865152.0,
134
+ "train_loss": 1.3432866200156834,
135
+ "train_runtime": 132.7733,
136
+ "train_samples_per_second": 83.059,
137
+ "train_steps_per_second": 10.394
138
+ },
139
+ "val_metrics": {
140
+ "epoch": 12.0,
141
+ "val_hit@1": 0.635,
142
+ "val_hit@3": 0.8,
143
+ "val_loss": 1.2582203149795532,
144
+ "val_macro_f1": 0.6257847034784294,
145
+ "val_micro_f1": 0.5578446909667195,
146
+ "val_mrr": 0.7417874066293184,
147
+ "val_recall@3": 0.7939999999999999,
148
+ "val_runtime": 0.458,
149
+ "val_samples_per_second": 436.701,
150
+ "val_steps_per_second": 28.386,
151
+ "val_subset_accuracy": 0.3,
152
+ "val_weighted_f1": 0.581422089009702
153
+ }
154
+ },
155
+ {
156
+ "final_dir": "/content/yandex_disk_cache/ml/subgroup_distillation/group_e/runs/alexyalunin-rubiobert/t2p0_hard0p3/final",
157
+ "hard_loss_weight": 0.3,
158
+ "run_slug": "t2p0_hard0p3",
159
+ "selection_metric": "val_macro_f1",
160
+ "selection_metric_value": 0.6216561527355456,
161
+ "temperature": 2.0,
162
+ "test_metrics": {
163
+ "test_hit@1": 0.6331658291457286,
164
+ "test_hit@3": 0.8492462311557789,
165
+ "test_loss": 1.6216875314712524,
166
+ "test_macro_f1": 0.6671811105707237,
167
+ "test_micro_f1": 0.5963756177924218,
168
+ "test_mrr": 0.7480870311294261,
169
+ "test_recall@3": 0.84321608040201,
170
+ "test_runtime": 0.4365,
171
+ "test_samples_per_second": 455.915,
172
+ "test_steps_per_second": 29.783,
173
+ "test_subset_accuracy": 0.37185929648241206,
174
+ "test_weighted_f1": 0.6131844757263718
175
+ },
176
+ "train_duration_sec": 124.8,
177
+ "train_metrics": {
178
+ "epoch": 11.0,
179
+ "total_flos": 2660816549293056.0,
180
+ "train_loss": 1.7215259325834131,
181
+ "train_runtime": 124.4873,
182
+ "train_samples_per_second": 88.587,
183
+ "train_steps_per_second": 11.085
184
+ },
185
+ "val_metrics": {
186
+ "epoch": 11.0,
187
+ "val_hit@1": 0.625,
188
+ "val_hit@3": 0.81,
189
+ "val_loss": 1.6304773092269897,
190
+ "val_macro_f1": 0.6216561527355456,
191
+ "val_micro_f1": 0.551948051948052,
192
+ "val_mrr": 0.7394910669621955,
193
+ "val_recall@3": 0.8039999999999999,
194
+ "val_runtime": 0.4602,
195
+ "val_samples_per_second": 434.552,
196
+ "val_steps_per_second": 28.246,
197
+ "val_subset_accuracy": 0.305,
198
+ "val_weighted_f1": 0.5740311697328536
199
+ }
200
+ },
201
+ {
202
+ "final_dir": "/content/yandex_disk_cache/ml/subgroup_distillation/group_e/runs/alexyalunin-rubiobert/t2p5_hard0p3/final",
203
+ "hard_loss_weight": 0.3,
204
+ "run_slug": "t2p5_hard0p3",
205
+ "selection_metric": "val_macro_f1",
206
+ "selection_metric_value": 0.6103253831459362,
207
+ "temperature": 2.5,
208
+ "test_metrics": {
209
+ "test_hit@1": 0.6381909547738693,
210
+ "test_hit@3": 0.8442211055276382,
211
+ "test_loss": 2.669990301132202,
212
+ "test_macro_f1": 0.6120593171802363,
213
+ "test_micro_f1": 0.5938009787928222,
214
+ "test_mrr": 0.7512173965722264,
215
+ "test_recall@3": 0.8365159128978223,
216
+ "test_runtime": 0.4397,
217
+ "test_samples_per_second": 452.559,
218
+ "test_steps_per_second": 29.564,
219
+ "test_subset_accuracy": 0.35678391959798994,
220
+ "test_weighted_f1": 0.6056969298332823
221
+ },
222
+ "train_duration_sec": 131.2,
223
+ "train_metrics": {
224
+ "epoch": 12.0,
225
+ "total_flos": 2902708962865152.0,
226
+ "train_loss": 2.7761391142140264,
227
+ "train_runtime": 130.887,
228
+ "train_samples_per_second": 84.256,
229
+ "train_steps_per_second": 10.543
230
+ },
231
+ "val_metrics": {
232
+ "epoch": 12.0,
233
+ "val_hit@1": 0.65,
234
+ "val_hit@3": 0.825,
235
+ "val_loss": 2.679783821105957,
236
+ "val_macro_f1": 0.6103253831459362,
237
+ "val_micro_f1": 0.5737976782752903,
238
+ "val_mrr": 0.7497367052881758,
239
+ "val_recall@3": 0.819,
240
+ "val_runtime": 0.4725,
241
+ "val_samples_per_second": 423.317,
242
+ "val_steps_per_second": 27.516,
243
+ "val_subset_accuracy": 0.37,
244
+ "val_weighted_f1": 0.5931617850243789
245
+ }
246
+ }
247
+ ],
248
+ "seed": 42,
249
+ "selected_run": {
250
+ "final_dir": "/content/yandex_disk_cache/ml/subgroup_distillation/group_e/runs/alexyalunin-rubiobert/t2p0_hard0p5/final",
251
+ "hard_loss_weight": 0.5,
252
+ "run_slug": "t2p0_hard0p5",
253
+ "selection_metric": "val_macro_f1",
254
+ "selection_metric_value": 0.6257847034784294,
255
+ "temperature": 2.0,
256
+ "test_metrics": {
257
+ "test_hit@1": 0.6482412060301508,
258
+ "test_hit@3": 0.8241206030150754,
259
+ "test_loss": 1.2573049068450928,
260
+ "test_macro_f1": 0.6121827915051391,
261
+ "test_micro_f1": 0.5606299212598426,
262
+ "test_mrr": 0.7515661740428731,
263
+ "test_recall@3": 0.8147403685092127,
264
+ "test_runtime": 0.4221,
265
+ "test_samples_per_second": 471.485,
266
+ "test_steps_per_second": 30.801,
267
+ "test_subset_accuracy": 0.2964824120603015,
268
+ "test_weighted_f1": 0.5830428669342762
269
+ },
270
+ "train_duration_sec": 133.0,
271
+ "train_metrics": {
272
+ "epoch": 12.0,
273
+ "total_flos": 2902708962865152.0,
274
+ "train_loss": 1.3432866200156834,
275
+ "train_runtime": 132.7733,
276
+ "train_samples_per_second": 83.059,
277
+ "train_steps_per_second": 10.394
278
+ },
279
+ "val_metrics": {
280
+ "epoch": 12.0,
281
+ "val_hit@1": 0.635,
282
+ "val_hit@3": 0.8,
283
+ "val_loss": 1.2582203149795532,
284
+ "val_macro_f1": 0.6257847034784294,
285
+ "val_micro_f1": 0.5578446909667195,
286
+ "val_mrr": 0.7417874066293184,
287
+ "val_recall@3": 0.7939999999999999,
288
+ "val_runtime": 0.458,
289
+ "val_samples_per_second": 436.701,
290
+ "val_steps_per_second": 28.386,
291
+ "val_subset_accuracy": 0.3,
292
+ "val_weighted_f1": 0.581422089009702
293
+ }
294
+ },
295
+ "source_csv": "/content/yandex_disk_cache/datasets/subgroups/group_E.csv",
296
+ "source_csv_sha256": "7bd98fc0eea937b8edf1391e86ca15afd2aed5c98996951f822684805713ed0b",
297
+ "specialist_repo_id": "Dmitry43243242/icd10-ru-subgroup-e",
298
+ "split_sizes": {
299
+ "test": 199,
300
+ "train": 919,
301
+ "val": 200
302
+ },
303
+ "student_model": "alexyalunin/RuBioBERT",
304
+ "student_test_metrics": {
305
+ "hit@1": 0.6482412060301508,
306
+ "hit@3": 0.8241206030150754,
307
+ "macro_f1": 0.6121827915051391,
308
+ "micro_f1": 0.5606299212598426,
309
+ "mrr": 0.7515661740428731,
310
+ "recall@3": 0.8147403685092127,
311
+ "subset_accuracy": 0.2964824120603015,
312
+ "weighted_f1": 0.5830428669342762
313
+ },
314
+ "student_val_metrics": {
315
+ "hit@1": 0.635,
316
+ "hit@3": 0.8,
317
+ "macro_f1": 0.6257847034784294,
318
+ "micro_f1": 0.5578446909667195,
319
+ "mrr": 0.7417874066293184,
320
+ "recall@3": 0.7939999999999999,
321
+ "subset_accuracy": 0.3,
322
+ "weighted_f1": 0.581422089009702
323
+ },
324
+ "teacher_errors": {},
325
+ "teacher_models": [
326
+ "alexyalunin/RuBioRoBERTa",
327
+ "ai-forever/ruBert-base",
328
+ "DeepPavlov/rubert-base-cased"
329
+ ],
330
+ "teacher_reports": [
331
+ {
332
+ "final_dir": "/content/yandex_disk_cache/ml/subgroup_distillation/group_e/teachers/alexyalunin-rubioroberta/final",
333
+ "model_name": "alexyalunin/RuBioRoBERTa",
334
+ "pushed_to_hub": false,
335
+ "run_slug": "alexyalunin-rubioroberta",
336
+ "test_metrics": {
337
+ "hit@1": 0.6482412060301508,
338
+ "hit@3": 0.8592964824120602,
339
+ "macro_f1": 0.6865864313407604,
340
+ "micro_f1": 0.6485436893203883,
341
+ "mrr": 0.7635155763702912,
342
+ "recall@3": 0.8532663316582914,
343
+ "subset_accuracy": 0.507537688442211,
344
+ "weighted_f1": 0.6514279861261107
345
+ },
346
+ "train_duration_sec": 314.8,
347
+ "train_metrics": {
348
+ "epoch": 12.0,
349
+ "total_flos": 1.0278832248336384e+16,
350
+ "train_loss": 0.31829168511473616,
351
+ "train_runtime": 299.4665,
352
+ "train_samples_per_second": 36.825,
353
+ "train_steps_per_second": 4.608
354
+ },
355
+ "val_metrics": {
356
+ "hit@1": 0.665,
357
+ "hit@3": 0.83,
358
+ "macro_f1": 0.6816071208348493,
359
+ "micro_f1": 0.6377358490566037,
360
+ "mrr": 0.7651676992824679,
361
+ "recall@3": 0.8214999999999999,
362
+ "subset_accuracy": 0.48,
363
+ "weighted_f1": 0.654032532287305
364
+ }
365
+ },
366
+ {
367
+ "final_dir": "/content/yandex_disk_cache/ml/subgroup_distillation/group_e/teachers/ai-forever-rubert-base/final",
368
+ "model_name": "ai-forever/ruBert-base",
369
+ "pushed_to_hub": false,
370
+ "run_slug": "ai-forever-rubert-base",
371
+ "test_metrics": {
372
+ "hit@1": 0.6633165829145728,
373
+ "hit@3": 0.8241206030150754,
374
+ "macro_f1": 0.567726321569097,
375
+ "micro_f1": 0.5497630331753555,
376
+ "mrr": 0.7552527410980668,
377
+ "recall@3": 0.8164154103852594,
378
+ "subset_accuracy": 0.3015075376884422,
379
+ "weighted_f1": 0.5636139940062055
380
+ },
381
+ "train_duration_sec": 144.6,
382
+ "train_metrics": {
383
+ "epoch": 12.0,
384
+ "total_flos": 2902708962865152.0,
385
+ "train_loss": 0.4238858779271444,
386
+ "train_runtime": 133.7006,
387
+ "train_samples_per_second": 82.483,
388
+ "train_steps_per_second": 10.322
389
+ },
390
+ "val_metrics": {
391
+ "hit@1": 0.65,
392
+ "hit@3": 0.82,
393
+ "macro_f1": 0.5512155779574014,
394
+ "micro_f1": 0.5529953917050692,
395
+ "mrr": 0.7548125116338592,
396
+ "recall@3": 0.814,
397
+ "subset_accuracy": 0.315,
398
+ "weighted_f1": 0.5726346327425935
399
+ }
400
+ },
401
+ {
402
+ "final_dir": "/content/yandex_disk_cache/ml/subgroup_distillation/group_e/teachers/deeppavlov-rubert-base-cased/final",
403
+ "model_name": "DeepPavlov/rubert-base-cased",
404
+ "pushed_to_hub": false,
405
+ "run_slug": "deeppavlov-rubert-base-cased",
406
+ "test_metrics": {
407
+ "hit@1": 0.6130653266331658,
408
+ "hit@3": 0.7939698492462312,
409
+ "macro_f1": 0.5623096108520506,
410
+ "micro_f1": 0.5097451274362819,
411
+ "mrr": 0.7264908372260924,
412
+ "recall@3": 0.7879396984924623,
413
+ "subset_accuracy": 0.2613065326633166,
414
+ "weighted_f1": 0.5247350484683884
415
+ },
416
+ "train_duration_sec": 145.8,
417
+ "train_metrics": {
418
+ "epoch": 12.0,
419
+ "total_flos": 2902708962865152.0,
420
+ "train_loss": 0.45112168028734734,
421
+ "train_runtime": 136.666,
422
+ "train_samples_per_second": 80.693,
423
+ "train_steps_per_second": 10.098
424
+ },
425
+ "val_metrics": {
426
+ "hit@1": 0.66,
427
+ "hit@3": 0.81,
428
+ "macro_f1": 0.5291229985764946,
429
+ "micro_f1": 0.5123010130246021,
430
+ "mrr": 0.755827678749699,
431
+ "recall@3": 0.8014999999999999,
432
+ "subset_accuracy": 0.23,
433
+ "weighted_f1": 0.5291843956354477
434
+ }
435
+ }
436
+ ],
437
+ "teacher_source": "local_bert_models",
438
+ "teacher_test_metrics": {
439
+ "hit@1": 0.678391959798995,
440
+ "hit@3": 0.8442211055276382,
441
+ "macro_f1": 0.68047534381331,
442
+ "micro_f1": 0.6541353383458647,
443
+ "mrr": 0.7735248354904848,
444
+ "recall@3": 0.8381909547738693,
445
+ "subset_accuracy": 0.4824120603015075,
446
+ "weighted_f1": 0.6548964986426676
447
+ },
448
+ "teacher_val_metrics": {
449
+ "hit@1": 0.675,
450
+ "hit@3": 0.84,
451
+ "macro_f1": 0.6592846608011086,
452
+ "micro_f1": 0.6483516483516484,
453
+ "mrr": 0.7742923534798536,
454
+ "recall@3": 0.834,
455
+ "subset_accuracy": 0.48,
456
+ "weighted_f1": 0.6615922586454719
457
+ },
458
+ "threshold": 0.5,
459
+ "torch_version": "2.10.0+cu128",
460
+ "transformers_version": "5.0.0",
461
+ "warmup_ratio": 0.1,
462
+ "weight_decay": 0.01
463
+ }