philipjohnbasile commited on
Commit
99d5478
·
verified ·
1 Parent(s): 9f85410

Upload design/facet_calib.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. design/facet_calib.py +7 -0
design/facet_calib.py CHANGED
@@ -13,6 +13,7 @@ import glob
13
  import json
14
  import os
15
  import random
 
16
  from collections import Counter
17
 
18
  HERE = os.path.dirname(__file__)
@@ -43,6 +44,12 @@ def main():
43
  mix.append((os.path.basename(fp).split("_")[0], open(fp, encoding="utf-8", errors="ignore").read()))
44
  for t in _sample("heal/design/seeds.jsonl", 9):
45
  mix.append(("design", t))
 
 
 
 
 
 
46
  # 2) balanced samples per capability from the heal corpora
47
  for cap, rel, k in [("design", "heal/design/train.jsonl", 40),
48
  ("math", "heal/lean/train.jsonl", 40),
 
13
  import json
14
  import os
15
  import random
16
+ import sys
17
  from collections import Counter
18
 
19
  HERE = os.path.dirname(__file__)
 
44
  mix.append((os.path.basename(fp).split("_")[0], open(fp, encoding="utf-8", errors="ignore").read()))
45
  for t in _sample("heal/design/seeds.jsonl", 9):
46
  mix.append(("design", t))
47
+ # the facet CANONS — dense facet vocabulary (OKLCH/Tufte/Saltzer/Erdős/…), a strong per-facet REAP activator
48
+ # that exercises each facet's experts so a harder prune (14/7GB) keeps them. Full balance comes from the flywheel.
49
+ sys.path.insert(0, os.path.join(ROOT, "src"))
50
+ from soul import FACETS # noqa: E402
51
+ for name, facet in FACETS.items():
52
+ mix.append((name, facet.canon))
53
  # 2) balanced samples per capability from the heal corpora
54
  for cap, rel, k in [("design", "heal/design/train.jsonl", 40),
55
  ("math", "heal/lean/train.jsonl", 40),