Food Desert commited on
Commit
ddf8c33
·
1 Parent(s): dacabb8

Remove probe sequential species experiment; keep probe pick cap

Browse files
Files changed (1) hide show
  1. psq_rag/llm/select.py +2 -52
psq_rag/llm/select.py CHANGED
@@ -1563,23 +1563,6 @@ def _split_probe_tags_by_bundle(
1563
  return out
1564
 
1565
 
1566
- def _species_cap_from_count_tags(selected_tags: Sequence[str]) -> Optional[int]:
1567
- """Derive species max-picks cap from count-like probe tags."""
1568
- s = set(selected_tags)
1569
- if "zero_pictured" in s:
1570
- return 0
1571
- if "solo" in s:
1572
- return 1
1573
- if "duo" in s:
1574
- return 2
1575
- if "trio" in s:
1576
- return 3
1577
- if "group" in s:
1578
- # Conservative finite cap for open-ended "group".
1579
- return 4
1580
- return None
1581
-
1582
-
1583
  def llm_infer_probe_tags(
1584
  query_text: str,
1585
  log=None,
@@ -1644,15 +1627,10 @@ def llm_infer_probe_tags(
1644
  if max_pick_override < 0:
1645
  max_pick_override = 0
1646
 
1647
- sequential_species = (os.environ.get("PSQ_PROBE_SEQUENTIAL_SPECIES", "0") or "0").strip().lower() in {
1648
- "1", "true", "yes", "on"
1649
- }
1650
-
1651
  def _call_chunks(
1652
  chunks: Sequence[Sequence[str]],
1653
  *,
1654
  label: str,
1655
- max_pick_cap: Optional[int] = None,
1656
  ) -> List[str]:
1657
  out_tags: List[str] = []
1658
  out_seen: Set[str] = set()
@@ -1666,8 +1644,6 @@ def llm_infer_probe_tags(
1666
  per_call_budget = len(chunk_tags)
1667
  if max_pick_override > 0:
1668
  per_call_budget = min(per_call_budget, max_pick_override)
1669
- if max_pick_cap is not None:
1670
- per_call_budget = min(per_call_budget, max(0, int(max_pick_cap)))
1671
  if per_call_budget <= 0:
1672
  if log:
1673
  log(f"Stage3p {label}: skipping call with budget=0")
@@ -1699,34 +1675,8 @@ def llm_infer_probe_tags(
1699
  out_tags.append(t)
1700
  return out_tags
1701
 
1702
- selected: List[str] = []
1703
-
1704
- if sequential_species:
1705
- bundle_by_tag = _probe_bundle_map(log=log)
1706
- species_tags = [t for t in probe_tags if bundle_by_tag.get(t, "") == "species_taxonomy"]
1707
- non_species_tags = [t for t in probe_tags if t not in set(species_tags)]
1708
- if log:
1709
- log(
1710
- "Stage3p: sequential species mode on "
1711
- f"(non_species={len(non_species_tags)}, species={len(species_tags)})"
1712
- )
1713
-
1714
- non_species_chunks = _split_probe_tags_by_bundle(non_species_tags, split_calls, log=log) if non_species_tags else []
1715
- selected_non_species = _call_chunks(non_species_chunks, label="non_species")
1716
- selected.extend(selected_non_species)
1717
-
1718
- if species_tags:
1719
- species_cap = _species_cap_from_count_tags(selected_non_species)
1720
- if log:
1721
- log(f"Stage3p: species cap from count tags = {species_cap!r}")
1722
- species_chunks = [species_tags]
1723
- selected_species = _call_chunks(species_chunks, label="species", max_pick_cap=species_cap)
1724
- for t in selected_species:
1725
- if t not in selected:
1726
- selected.append(t)
1727
- else:
1728
- probe_chunks = _split_probe_tags_by_bundle(probe_tags, split_calls, log=log)
1729
- selected = _call_chunks(probe_chunks, label="all")
1730
 
1731
  selected = _apply_species_anchor_mapping(selected, query_text=query_text, log=log)
1732
 
 
1563
  return out
1564
 
1565
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1566
  def llm_infer_probe_tags(
1567
  query_text: str,
1568
  log=None,
 
1627
  if max_pick_override < 0:
1628
  max_pick_override = 0
1629
 
 
 
 
 
1630
  def _call_chunks(
1631
  chunks: Sequence[Sequence[str]],
1632
  *,
1633
  label: str,
 
1634
  ) -> List[str]:
1635
  out_tags: List[str] = []
1636
  out_seen: Set[str] = set()
 
1644
  per_call_budget = len(chunk_tags)
1645
  if max_pick_override > 0:
1646
  per_call_budget = min(per_call_budget, max_pick_override)
 
 
1647
  if per_call_budget <= 0:
1648
  if log:
1649
  log(f"Stage3p {label}: skipping call with budget=0")
 
1675
  out_tags.append(t)
1676
  return out_tags
1677
 
1678
+ probe_chunks = _split_probe_tags_by_bundle(probe_tags, split_calls, log=log)
1679
+ selected: List[str] = _call_chunks(probe_chunks, label="all")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1680
 
1681
  selected = _apply_species_anchor_mapping(selected, query_text=query_text, log=log)
1682