legal-eye / tau_rag /fuse /borda.py
Legal-i's picture
Initial deploy: legal-eye Hebrew legal RAG (17K corpus, verbatim-from-precedent)
3be54c6 verified
Raw
History Blame Contribute Delete
1.25 kB
"""Borda Count fusion: score(d) = Σ_L (|L| − rank_L(d))."""
from __future__ import annotations
from typing import Dict, List
from ..core.types import Retrieved
class BordaFuser:
name = "borda"
def fuse(
self,
per_retriever_results: List[List[Retrieved]],
top_n: int = 20,
) -> List[Retrieved]:
scores: Dict[str, float] = {}
best: Dict[str, Retrieved] = {}
for lst in per_retriever_results:
n = len(lst)
for rank0, r in enumerate(lst):
cid = r.chunk.chunk_id
scores[cid] = scores.get(cid, 0.0) + (n - rank0)
if cid not in best or r.score > best[cid].score:
best[cid] = r
fused: List[Retrieved] = []
for cid, s in sorted(scores.items(), key=lambda kv: kv[1], reverse=True)[:top_n]:
base = best[cid]
merged_extra = dict(base.extra or {})
merged_extra.setdefault("origin_retriever", base.retriever)
fused.append(Retrieved(
chunk=base.chunk,
score=float(s),
retriever="borda",
rank=len(fused) + 1,
extra=merged_extra,
))
return fused