Spaces:

FoodDesert
/

Prompt_Squirrel_RAG

Running

App Files Files Community

Food Desert commited on May 20

Commit

5c070ef

1 Parent(s): 3127166

Update pipeline diagram and classifier context rescoring

Browse files

Files changed (5) hide show

app.py +40 -28
docs/retrieval_contract.md +2 -2
docs/rewrite_contract.md +1 -1
docs/space_overview.md +10 -10
scripts/eval_pipeline.py +0 -1

app.py CHANGED Viewed

@@ -385,7 +385,7 @@ def _build_arch_diagram_html() -> str:
     return """
 <div class="arch-diagram-wrap">
   <h2>Architecture At A Glance</h2>
-  <svg class="arch-flow" viewBox="0 0 1200 820" role="img" aria-label="Prompt Squirrel pipeline diagram showing query reformulation, lexical matching, scene composition, tag classification, semantic retrieval, candidate ranking, and editable output">
     <defs>
       <marker id="arch-arrowhead" markerWidth="10" markerHeight="8" refX="9" refY="4" orient="auto" markerUnits="strokeWidth">
         <path d="M0,0 L10,4 L0,8 Z" fill="#334155" />
@@ -436,37 +436,43 @@ def _build_arch_diagram_html() -> str:
     <path d="M600 174 C652 210, 700 224, 735 280" class="arch-soft-line" />
     <path d="M600 174 C760 202, 916 210, 995 280" class="arch-soft-line" />
-    <rect x="140" y="480" width="360" height="76" class="arch-box arch-retrieval" />
-    <text x="320" y="510" text-anchor="middle" class="arch-title">Semantic Retrieval</text>
-    <text x="320" y="533" text-anchor="middle" class="arch-small">FastText/HNSW plus context scoring</text>
-    <rect x="700" y="480" width="340" height="76" class="arch-box arch-llm" />
-    <text x="870" y="510" text-anchor="middle" class="arch-title">Candidate Ranking</text>
-    <text x="870" y="533" text-anchor="middle" class="arch-small">Mistral 24B candidate indices</text>
-    <path d="M205 352 C200 394, 240 438, 290 480" class="arch-line" />
-    <path d="M465 352 C418 402, 370 428, 340 480" class="arch-line" />
-    <path d="M500 518 L700 518" class="arch-line" />
-    <path d="M735 352 C650 398, 498 438, 395 480" class="arch-soft-line" />
-    <path d="M735 352 C774 408, 820 448, 855 480" class="arch-soft-line" />
-    <path d="M995 352 C1010 402, 965 448, 910 480" class="arch-soft-line" />
     <text x="990" y="455" text-anchor="middle" class="arch-small">top candidates</text>
-    <rect x="320" y="640" width="290" height="76" class="arch-box arch-deterministic" />
-    <text x="465" y="670" text-anchor="middle" class="arch-title">Deterministic additions</text>
-    <text x="465" y="693" text-anchor="middle" class="arch-small">scene, classifier auto, implications</text>
-    <rect x="720" y="640" width="300" height="76" class="arch-box arch-output" />
-    <text x="870" y="670" text-anchor="middle" class="arch-title">Editable output</text>
-    <text x="870" y="693" text-anchor="middle" class="arch-small">ranked rows and suggested prompt</text>
-    <path d="M870 556 C820 604, 704 620, 590 650" class="arch-line" />
-    <path d="M735 352 C660 470, 530 544, 477 640" class="arch-soft-line" />
-    <path d="M995 352 C1038 486, 726 568, 508 640" class="arch-soft-line" />
-    <path d="M610 678 L720 678" class="arch-line" />
-    <text x="736" y="392" text-anchor="middle" class="arch-small">context and auto-tags</text>
-    <text x="570" y="622" text-anchor="middle" class="arch-small">selected tags</text>
   </svg>
 </div>
 """
@@ -3122,7 +3128,13 @@ def rag_pipeline_ui(
         log("Step 2: Prompt Squirrel retrieval (hidden)")
         try:
             t0 = time.perf_counter()
-            retrieval_context_tags = list(dict.fromkeys((structural_tags or []) + (probe_tags or [])))
             rewrite_phrases = [p.strip() for p in (rewrite_for_retrieval or "").split(",") if p.strip()]
             retrieval_result = psq_candidates_from_rewrite_phrases(
                 rewrite_phrases=rewrite_phrases,

     return """
 <div class="arch-diagram-wrap">
   <h2>Architecture At A Glance</h2>
+  <svg class="arch-flow" viewBox="0 0 1200 940" role="img" aria-label="Prompt Squirrel pipeline diagram showing query reformulation, lexical matching, scene composition, tag classification, semantic retrieval, context rescoring, candidate ranking, and editable output">
     <defs>
       <marker id="arch-arrowhead" markerWidth="10" markerHeight="8" refX="9" refY="4" orient="auto" markerUnits="strokeWidth">
         <path d="M0,0 L10,4 L0,8 Z" fill="#334155" />
     <path d="M600 174 C652 210, 700 224, 735 280" class="arch-soft-line" />
     <path d="M600 174 C760 202, 916 210, 995 280" class="arch-soft-line" />
+    <rect x="80" y="480" width="270" height="76" class="arch-box arch-retrieval" />
+    <text x="215" y="510" text-anchor="middle" class="arch-title">Semantic Retrieval</text>
+    <text x="215" y="533" text-anchor="middle" class="arch-small">FastText/HNSW candidates</text>
+    <rect x="465" y="480" width="270" height="76" class="arch-box arch-retrieval" />
+    <text x="600" y="510" text-anchor="middle" class="arch-title">Context Rescoring</text>
+    <text x="600" y="533" text-anchor="middle" class="arch-small">TF-IDF/SVD context</text>
+    <rect x="850" y="480" width="270" height="76" class="arch-box arch-llm" />
+    <text x="985" y="510" text-anchor="middle" class="arch-title">Candidate Ranking</text>
+    <text x="985" y="533" text-anchor="middle" class="arch-small">Mistral 24B indices</text>
+    <path d="M205 352 C190 398, 188 438, 215 480" class="arch-line" />
+    <path d="M465 352 C390 402, 330 435, 275 480" class="arch-line" />
+    <path d="M350 518 L465 518" class="arch-line" />
+    <path d="M735 518 L850 518" class="arch-line" />
+    <path d="M735 352 C710 404, 660 440, 630 480" class="arch-soft-line" />
+    <path d="M995 352 C950 404, 760 438, 670 480" class="arch-soft-line" />
+    <path d="M735 352 C785 405, 840 440, 900 480" class="arch-soft-line" />
+    <path d="M995 352 C1010 402, 970 448, 940 480" class="arch-soft-line" />
     <text x="990" y="455" text-anchor="middle" class="arch-small">top candidates</text>
+    <rect x="455" y="650" width="290" height="76" class="arch-box arch-deterministic" />
+    <text x="600" y="680" text-anchor="middle" class="arch-title">Final Tag Merge</text>
+    <text x="600" y="703" text-anchor="middle" class="arch-small">scene, classifier auto, implications</text>
+    <rect x="450" y="800" width="300" height="76" class="arch-box arch-output" />
+    <text x="600" y="830" text-anchor="middle" class="arch-title">Editable output</text>
+    <text x="600" y="853" text-anchor="middle" class="arch-small">ranked rows and suggested prompt</text>
+    <path d="M985 556 C900 618, 760 636, 630 650" class="arch-line" />
+    <path d="M735 352 C700 490, 620 570, 600 650" class="arch-soft-line" />
+    <path d="M995 352 C1045 505, 795 610, 630 650" class="arch-soft-line" />
+    <path d="M600 726 L600 800" class="arch-line" />
+    <text x="820" y="392" text-anchor="middle" class="arch-small">context and auto-tags</text>
+    <text x="760" y="625" text-anchor="middle" class="arch-small">selected tags</text>
   </svg>
 </div>
 """
         log("Step 2: Prompt Squirrel retrieval (hidden)")
         try:
             t0 = time.perf_counter()
+            retrieval_context_tags = list(
+                dict.fromkeys(
+                    (structural_tags or [])
+                    + (probe_tags or [])
+                    + (classifier_auto_tags or [])
+                )
+            )
             rewrite_phrases = [p.strip() for p in (rewrite_for_retrieval or "").split(",") if p.strip()]
             retrieval_result = psq_candidates_from_rewrite_phrases(
                 rewrite_phrases=rewrite_phrases,

docs/retrieval_contract.md CHANGED Viewed

@@ -27,8 +27,8 @@ Notes:
   - `PSQ_RETRIEVAL_PER_PHRASE_K` (default 10)
   - `PSQ_RETRIEVAL_PER_PHRASE_FINAL_K` (default 1)
   - `PSQ_MIN_TAG_COUNT` (default 100 in app path)
-- Stage 2 may be called with structural tags in `context_tags` to improve context scoring.
-- In app orchestration, classifier candidate tags can be injected into the candidate pool after Stage 2 retrieval and before Stage 3 selection.
 ---

   - `PSQ_RETRIEVAL_PER_PHRASE_K` (default 10)
   - `PSQ_RETRIEVAL_PER_PHRASE_FINAL_K` (default 1)
   - `PSQ_MIN_TAG_COUNT` (default 100 in app path)
+- Stage 2 may be called with Scene Composition tags and high-confidence Tag Classifier tags in `context_tags` to improve TF-IDF/SVD context scoring.
+- In app orchestration, lower-confidence classifier candidate tags can be injected into the candidate pool after Stage 2 retrieval and before Stage 3 selection.
 ---

docs/rewrite_contract.md CHANGED Viewed

@@ -51,7 +51,7 @@ The OpenRouter rewrite helper uses one deterministic call:
 Model/auth endpoint behavior comes from `openrouter_client.py`:
 - OpenRouter endpoint: `/chat/completions`
 - API key from `OPENROUTER_API_KEY`
-- model from `OPENROUTER_MODEL` (default `meta-llama/llama-3.1-8b-instruct`).
 ---

 Model/auth endpoint behavior comes from `openrouter_client.py`:
 - OpenRouter endpoint: `/chat/completions`
 - API key from `OPENROUTER_API_KEY`
+- model from `OPENROUTER_MODEL` (default `mistralai/mistral-small-24b-instruct-2501`).
 ---

docs/space_overview.md CHANGED Viewed

@@ -24,17 +24,21 @@ Design goals:
 - `Tag Classifier`:
   Runs a local multi-label classifier over the user prompt. Tags above calibrated high-precision thresholds are added automatically, and the next highest-scoring tags are added to the reranker candidate pool.
 - `Semantic Retrieval`:
-  Uses reformulated phrases and lexical matches to pull a high-recall candidate pool from the fixed vocabulary using FastText/HNSW, then applies lightweight context scoring and deduplication before ranking.
 - `Candidate Ranking`:
   Runs an LLM call that can only choose from the retrieved candidate list, including classifier-suggested candidates. It cannot invent new tags.
-After Candidate Ranking, Prompt Squirrel applies deterministic implication expansion, merges in high-confidence Scene Composition and Tag Classifier tags, and presents editable rows plus the final suggested prompt.
 ## Design Rationale
 - Query Reformulation and Semantic Retrieval are separate so search phrase generation stays flexible while candidate generation stays deterministic.
 - Lexical Matching protects obvious prompt words that are already canonical tags or aliases, especially for rarer tags where the rewrite model may miss the term.
-- Semantic Retrieval uses fast nearest-neighbor search plus lightweight context scoring to maximize recall before Candidate Ranking sharpens the final selection.
 - Candidate Ranking is constrained to a high-quality candidate set so the ranking LLM cannot invent tags.
 - Scene Composition and Tag Classifier run in parallel with Query Reformulation so they can add context and high-confidence tags without adding much latency.
 - Users control the final prompt by toggling suggested tags on/off; the prompt text is generated from those toggle states.
@@ -86,7 +90,7 @@ This keeps the extracted wiki file immutable while allowing targeted manual fixe
 - Local `t5-small` encoder-decoder (text-to-text seq2seq) model for query reformulation.
 - Local ModernBERT multi-label classifier for calibrated high-confidence tags and extra reranker candidates.
 - OpenRouter-served instruction LLMs for structural inference and closed-set selection.
-  Default model: `meta-llama/llama-3.1-8b-instruct` (configurable).
 - Gradio for the interactive web UI (tag toggles, ranked rows, and suggested prompt text).
 - Python pipeline orchestration with CSV/JSON data sources and implication-graph expansion.
@@ -94,11 +98,7 @@ This keeps the extracted wiki file immutable while allowing targeted manual fixe
 Current evaluation style compares selected tags against ground-truth tags on caption-evident samples.
-Primary metrics:
-- Precision: `TP / (TP + FP)`
-- Recall: `TP / (TP + FN)`
-- F1: harmonic mean of precision/recall
 The evaluation focus is practical:
@@ -121,7 +121,7 @@ Summary from `data/analysis/rewrite_ablation_n30_e2e_ndcg_20260509.json`:
 ## System Snapshot
-Prompt Squirrel maps unstructured text into a closed, editable tag vocabulary under practical latency constraints. The online path runs Query Reformulation, Lexical Matching, Scene Composition, and Tag Classifier inference before Semantic Retrieval builds the candidate pool. Candidate Ranking selects from that pool, after which high-confidence Scene Composition tags, calibrated Tag Classifier tags, and implication rules are merged into the editable output. Current N=30 results show materially higher retrieval quality and end-to-end F1 after moving Query Reformulation to the local T5 model and adding Lexical Matching plus classifier candidates.
 ## Evaluation Dataset Snapshot

 - `Tag Classifier`:
   Runs a local multi-label classifier over the user prompt. Tags above calibrated high-precision thresholds are added automatically, and the next highest-scoring tags are added to the reranker candidate pool.
 - `Semantic Retrieval`:
+  Uses reformulated phrases and lexical matches to pull a high-recall candidate pool from the fixed vocabulary using FastText/HNSW.
+- `Context Rescoring`:
+  Reweights and deduplicates retrieved candidates using TF-IDF/SVD context from the query, Scene Composition tags, and high-confidence Tag Classifier tags before final ranking.
 - `Candidate Ranking`:
   Runs an LLM call that can only choose from the retrieved candidate list, including classifier-suggested candidates. It cannot invent new tags.
+- `Final Tag Merge`:
+  Merges Candidate Ranking selections with high-confidence Scene Composition tags, high-confidence Tag Classifier tags, and implication-rule additions before building the editable output.
+After Final Tag Merge, Prompt Squirrel presents editable rows plus the final suggested prompt.
 ## Design Rationale
 - Query Reformulation and Semantic Retrieval are separate so search phrase generation stays flexible while candidate generation stays deterministic.
 - Lexical Matching protects obvious prompt words that are already canonical tags or aliases, especially for rarer tags where the rewrite model may miss the term.
+- Semantic Retrieval uses fast nearest-neighbor search to maximize recall, then Context Rescoring uses lightweight context signals before Candidate Ranking sharpens the final selection.
 - Candidate Ranking is constrained to a high-quality candidate set so the ranking LLM cannot invent tags.
 - Scene Composition and Tag Classifier run in parallel with Query Reformulation so they can add context and high-confidence tags without adding much latency.
 - Users control the final prompt by toggling suggested tags on/off; the prompt text is generated from those toggle states.
 - Local `t5-small` encoder-decoder (text-to-text seq2seq) model for query reformulation.
 - Local ModernBERT multi-label classifier for calibrated high-confidence tags and extra reranker candidates.
 - OpenRouter-served instruction LLMs for structural inference and closed-set selection.
+  Default model: `mistralai/mistral-small-24b-instruct-2501` (configurable).
 - Gradio for the interactive web UI (tag toggles, ranked rows, and suggested prompt text).
 - Python pipeline orchestration with CSV/JSON data sources and implication-graph expansion.
 Current evaluation style compares selected tags against ground-truth tags on caption-evident samples.
+F1 is the primary end-to-end metric.
 The evaluation focus is practical:
 ## System Snapshot
+Prompt Squirrel maps unstructured text into a closed, editable tag vocabulary under practical latency constraints. The online path runs Query Reformulation, Lexical Matching, Scene Composition, and Tag Classifier inference before Semantic Retrieval builds the candidate pool and Context Rescoring reweights it. Candidate Ranking selects from that pool, after which Final Tag Merge combines high-confidence Scene Composition tags, calibrated Tag Classifier tags, and implication rules into the editable output. Current N=30 results show materially higher retrieval quality and end-to-end F1 after moving Query Reformulation to the local T5 model and adding Lexical Matching plus classifier candidates.
 ## Evaluation Dataset Snapshot

scripts/eval_pipeline.py CHANGED Viewed

@@ -408,7 +408,6 @@ def _process_one_sample(
                         result.structural_tags
                         + result.probe_tags
                         + result.classifier_auto_tags
-                        + result.classifier_candidate_tags
                     )
                 )
                 retrieval_result = psq_candidates_from_rewrite_phrases(

                         result.structural_tags
                         + result.probe_tags
                         + result.classifier_auto_tags
                     )
                 )
                 retrieval_result = psq_candidates_from_rewrite_phrases(