Spaces:
Running
Running
Food Desert commited on
Commit ·
ddf8c33
1
Parent(s): dacabb8
Remove probe sequential species experiment; keep probe pick cap
Browse files- psq_rag/llm/select.py +2 -52
psq_rag/llm/select.py
CHANGED
|
@@ -1563,23 +1563,6 @@ def _split_probe_tags_by_bundle(
|
|
| 1563 |
return out
|
| 1564 |
|
| 1565 |
|
| 1566 |
-
def _species_cap_from_count_tags(selected_tags: Sequence[str]) -> Optional[int]:
|
| 1567 |
-
"""Derive species max-picks cap from count-like probe tags."""
|
| 1568 |
-
s = set(selected_tags)
|
| 1569 |
-
if "zero_pictured" in s:
|
| 1570 |
-
return 0
|
| 1571 |
-
if "solo" in s:
|
| 1572 |
-
return 1
|
| 1573 |
-
if "duo" in s:
|
| 1574 |
-
return 2
|
| 1575 |
-
if "trio" in s:
|
| 1576 |
-
return 3
|
| 1577 |
-
if "group" in s:
|
| 1578 |
-
# Conservative finite cap for open-ended "group".
|
| 1579 |
-
return 4
|
| 1580 |
-
return None
|
| 1581 |
-
|
| 1582 |
-
|
| 1583 |
def llm_infer_probe_tags(
|
| 1584 |
query_text: str,
|
| 1585 |
log=None,
|
|
@@ -1644,15 +1627,10 @@ def llm_infer_probe_tags(
|
|
| 1644 |
if max_pick_override < 0:
|
| 1645 |
max_pick_override = 0
|
| 1646 |
|
| 1647 |
-
sequential_species = (os.environ.get("PSQ_PROBE_SEQUENTIAL_SPECIES", "0") or "0").strip().lower() in {
|
| 1648 |
-
"1", "true", "yes", "on"
|
| 1649 |
-
}
|
| 1650 |
-
|
| 1651 |
def _call_chunks(
|
| 1652 |
chunks: Sequence[Sequence[str]],
|
| 1653 |
*,
|
| 1654 |
label: str,
|
| 1655 |
-
max_pick_cap: Optional[int] = None,
|
| 1656 |
) -> List[str]:
|
| 1657 |
out_tags: List[str] = []
|
| 1658 |
out_seen: Set[str] = set()
|
|
@@ -1666,8 +1644,6 @@ def llm_infer_probe_tags(
|
|
| 1666 |
per_call_budget = len(chunk_tags)
|
| 1667 |
if max_pick_override > 0:
|
| 1668 |
per_call_budget = min(per_call_budget, max_pick_override)
|
| 1669 |
-
if max_pick_cap is not None:
|
| 1670 |
-
per_call_budget = min(per_call_budget, max(0, int(max_pick_cap)))
|
| 1671 |
if per_call_budget <= 0:
|
| 1672 |
if log:
|
| 1673 |
log(f"Stage3p {label}: skipping call with budget=0")
|
|
@@ -1699,34 +1675,8 @@ def llm_infer_probe_tags(
|
|
| 1699 |
out_tags.append(t)
|
| 1700 |
return out_tags
|
| 1701 |
|
| 1702 |
-
|
| 1703 |
-
|
| 1704 |
-
if sequential_species:
|
| 1705 |
-
bundle_by_tag = _probe_bundle_map(log=log)
|
| 1706 |
-
species_tags = [t for t in probe_tags if bundle_by_tag.get(t, "") == "species_taxonomy"]
|
| 1707 |
-
non_species_tags = [t for t in probe_tags if t not in set(species_tags)]
|
| 1708 |
-
if log:
|
| 1709 |
-
log(
|
| 1710 |
-
"Stage3p: sequential species mode on "
|
| 1711 |
-
f"(non_species={len(non_species_tags)}, species={len(species_tags)})"
|
| 1712 |
-
)
|
| 1713 |
-
|
| 1714 |
-
non_species_chunks = _split_probe_tags_by_bundle(non_species_tags, split_calls, log=log) if non_species_tags else []
|
| 1715 |
-
selected_non_species = _call_chunks(non_species_chunks, label="non_species")
|
| 1716 |
-
selected.extend(selected_non_species)
|
| 1717 |
-
|
| 1718 |
-
if species_tags:
|
| 1719 |
-
species_cap = _species_cap_from_count_tags(selected_non_species)
|
| 1720 |
-
if log:
|
| 1721 |
-
log(f"Stage3p: species cap from count tags = {species_cap!r}")
|
| 1722 |
-
species_chunks = [species_tags]
|
| 1723 |
-
selected_species = _call_chunks(species_chunks, label="species", max_pick_cap=species_cap)
|
| 1724 |
-
for t in selected_species:
|
| 1725 |
-
if t not in selected:
|
| 1726 |
-
selected.append(t)
|
| 1727 |
-
else:
|
| 1728 |
-
probe_chunks = _split_probe_tags_by_bundle(probe_tags, split_calls, log=log)
|
| 1729 |
-
selected = _call_chunks(probe_chunks, label="all")
|
| 1730 |
|
| 1731 |
selected = _apply_species_anchor_mapping(selected, query_text=query_text, log=log)
|
| 1732 |
|
|
|
|
| 1563 |
return out
|
| 1564 |
|
| 1565 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1566 |
def llm_infer_probe_tags(
|
| 1567 |
query_text: str,
|
| 1568 |
log=None,
|
|
|
|
| 1627 |
if max_pick_override < 0:
|
| 1628 |
max_pick_override = 0
|
| 1629 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1630 |
def _call_chunks(
|
| 1631 |
chunks: Sequence[Sequence[str]],
|
| 1632 |
*,
|
| 1633 |
label: str,
|
|
|
|
| 1634 |
) -> List[str]:
|
| 1635 |
out_tags: List[str] = []
|
| 1636 |
out_seen: Set[str] = set()
|
|
|
|
| 1644 |
per_call_budget = len(chunk_tags)
|
| 1645 |
if max_pick_override > 0:
|
| 1646 |
per_call_budget = min(per_call_budget, max_pick_override)
|
|
|
|
|
|
|
| 1647 |
if per_call_budget <= 0:
|
| 1648 |
if log:
|
| 1649 |
log(f"Stage3p {label}: skipping call with budget=0")
|
|
|
|
| 1675 |
out_tags.append(t)
|
| 1676 |
return out_tags
|
| 1677 |
|
| 1678 |
+
probe_chunks = _split_probe_tags_by_bundle(probe_tags, split_calls, log=log)
|
| 1679 |
+
selected: List[str] = _call_chunks(probe_chunks, label="all")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1680 |
|
| 1681 |
selected = _apply_species_anchor_mapping(selected, query_text=query_text, log=log)
|
| 1682 |
|