Spaces:
Running
Running
Food Desert commited on
Commit ·
06a3c46
1
Parent(s): 30bedf0
Switch Stage3 to explicit-only no-why selection, drop bear probe, and set k=1 defaults
Browse files- app.py +246 -28
- data/analysis/simplified_probe_tags.csv +1 -1
- data/eval_results/k1_default_recheck_seed42_n10.jsonl +11 -0
- data/eval_results/k_sweep_explicit_no_why_seed42_k1.jsonl +11 -0
- data/eval_results/k_sweep_explicit_no_why_seed42_k10.jsonl +0 -0
- data/eval_results/k_sweep_explicit_no_why_seed42_k2.jsonl +11 -0
- data/eval_results/k_sweep_explicit_no_why_seed42_k3.jsonl +11 -0
- data/eval_results/k_sweep_explicit_no_why_seed42_k4.jsonl +11 -0
- data/eval_results/k_sweep_explicit_no_why_seed42_k6.jsonl +0 -0
- data/eval_results/latency_baseline_seed42.jsonl +11 -0
- data/eval_results/latency_baseline_seed43.jsonl +11 -0
- data/eval_results/latency_chunk100_seed42.jsonl +11 -0
- data/eval_results/latency_chunk60_k6_seed42.jsonl +11 -0
- data/eval_results/latency_chunk60_k6_seed43.jsonl +11 -0
- data/eval_results/latency_k1_seed42.jsonl +11 -0
- data/eval_results/latency_k1_seed43.jsonl +11 -0
- data/eval_results/latency_k4_seed43.jsonl +11 -0
- data/eval_results/latency_single_shot_seed42.jsonl +11 -0
- data/eval_results/smoke_no_why_explicit_only_n1.jsonl +2 -0
- data/eval_results/smoke_no_why_explicit_only_n1_v2.jsonl +2 -0
- data/eval_results/why_gate_compare_explicit_n10.jsonl +11 -0
- data/eval_results/why_gate_compare_strong_implied_n10.jsonl +11 -0
- data/runtime_debug/eval_no_why_explicit_instruction_n10_20260303T005633Z.json +222 -0
- data/runtime_debug/eval_no_why_n10_20260302T210359Z.json +308 -0
- data/runtime_debug/false_positive_case_review_looking_anthro_bear_20260304.md +159 -0
- data/runtime_debug/llm_capture_20260302T162119Z/input_prompt.txt +1 -0
- data/runtime_debug/llm_capture_20260302T162202Z/input_prompt.txt +1 -0
- data/runtime_debug/llm_capture_20260302T162202Z/structural_request.json +13 -0
- data/runtime_debug/llm_capture_20260302T162202Z/structural_response_parsed.json +16 -0
- data/runtime_debug/llm_capture_20260302T162202Z/structural_response_raw.txt +1 -0
- data/runtime_debug/llm_capture_20260302T162249Z/input_prompt.txt +1 -0
- data/runtime_debug/llm_capture_20260302T162249Z/probe_request.json +14 -0
- data/runtime_debug/llm_capture_20260302T162249Z/probe_response_parsed.json +28 -0
- data/runtime_debug/llm_capture_20260302T162249Z/probe_response_raw.txt +10 -0
- data/runtime_debug/llm_capture_20260302T162249Z/selection_request.json +38 -0
- data/runtime_debug/llm_capture_20260302T162249Z/selection_response_parsed.json +3 -0
- data/runtime_debug/llm_capture_20260302T162249Z/selection_response_raw.txt +1 -0
- data/runtime_debug/llm_capture_20260302T162249Z/structural_request.json +13 -0
- data/runtime_debug/llm_capture_20260302T162249Z/structural_response_parsed.json +19 -0
- data/runtime_debug/llm_capture_20260302T162249Z/structural_response_raw.txt +1 -0
- data/runtime_debug/llm_capture_20260302T162249Z/summary.json +51 -0
- data/runtime_debug/selection_why_vs_no_why_20260302T191813Z.json +217 -0
- data/runtime_debug/whyless_replication_seeds_42_43_20260303T060318Z.json +123 -0
- data/runtime_metrics/ui_pipeline_timings.jsonl +3 -0
- data/structural_tag_definitions.csv +8 -8
- psq_rag/llm/select.py +1243 -1291
- psq_rag/retrieval/psq_retrieval.py +1 -1
- psq_rag/ui/group_ranked_display.py +198 -0
- scripts/eval_pipeline.py +2 -2
app.py
CHANGED
|
@@ -1,16 +1,20 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import os
|
| 3 |
import logging
|
|
|
|
|
|
|
|
|
|
| 4 |
from PIL import Image
|
| 5 |
from pathlib import Path
|
| 6 |
from typing import List
|
| 7 |
-
from concurrent.futures import ThreadPoolExecutor
|
| 8 |
|
| 9 |
from psq_rag.pipeline.preproc import extract_user_provided_tags_upto_3_words
|
| 10 |
from psq_rag.llm.rewrite import llm_rewrite_prompt
|
| 11 |
from psq_rag.retrieval.psq_retrieval import psq_candidates_from_rewrite_phrases, _norm_tag_for_lookup
|
| 12 |
from psq_rag.llm.select import llm_select_indices, llm_infer_structural_tags, llm_infer_probe_tags
|
| 13 |
from psq_rag.retrieval.state import expand_tags_via_implications
|
|
|
|
| 14 |
|
| 15 |
|
| 16 |
def _split_prompt_commas(s: str) -> List[str]:
|
|
@@ -80,6 +84,18 @@ os.environ["GRADIO_ANALYTICS_ENABLED"] = "0"
|
|
| 80 |
MASCOT_DIR = Path(__file__).parent / "mascotimages"
|
| 81 |
MASCOT_FILE = MASCOT_DIR / "transparentsquirrel.png"
|
| 82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
try:
|
| 84 |
from gradio_client import utils as _gc_utils
|
| 85 |
|
|
@@ -115,10 +131,39 @@ except Exception as e:
|
|
| 115 |
|
| 116 |
|
| 117 |
allow_nsfw_tags = False
|
| 118 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
verbose_retrieval_all = False
|
| 120 |
verbose_retrieval_limit = 20
|
| 121 |
enable_probe_tags = os.environ.get("PSQ_ENABLE_PROBE", "1").strip() not in {"0", "false", "False"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
|
| 123 |
css = """
|
| 124 |
.scrollable-content{
|
|
@@ -147,21 +192,110 @@ css = """
|
|
| 147 |
"""
|
| 148 |
|
| 149 |
|
| 150 |
-
def rag_pipeline_ui(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
logs = []
|
| 152 |
def log(s): logs.append(s)
|
| 153 |
|
| 154 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
log("Start: received prompt")
|
| 156 |
prompt_in = (user_prompt or "").strip()
|
| 157 |
if not prompt_in:
|
| 158 |
-
return "Error: empty prompt", ""
|
| 159 |
|
| 160 |
log("Input:")
|
| 161 |
log(prompt_in)
|
| 162 |
log("")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
|
|
|
|
| 164 |
user_tags = extract_user_provided_tags_upto_3_words(prompt_in)
|
|
|
|
|
|
|
|
|
|
| 165 |
log("Heuristically extracted user tags:")
|
| 166 |
if user_tags:
|
| 167 |
log(", ".join(user_tags))
|
|
@@ -176,9 +310,16 @@ def rag_pipeline_ui(user_prompt: str):
|
|
| 176 |
fut_struct = ex.submit(llm_infer_structural_tags, prompt_in, log=log)
|
| 177 |
fut_probe = ex.submit(llm_infer_probe_tags, prompt_in, log=log) if enable_probe_tags else None
|
| 178 |
|
| 179 |
-
rewritten =
|
| 180 |
-
|
| 181 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 182 |
|
| 183 |
log("Rewrite:")
|
| 184 |
log(rewritten if rewritten else "(empty)")
|
|
@@ -192,19 +333,28 @@ def rag_pipeline_ui(user_prompt: str):
|
|
| 192 |
|
| 193 |
log("Step 2: Prompt Squirrel retrieval (hidden)")
|
| 194 |
try:
|
|
|
|
| 195 |
retrieval_context_tags = list(dict.fromkeys((structural_tags or []) + (probe_tags or [])))
|
| 196 |
rewrite_phrases = [p.strip() for p in (rewrite_for_retrieval or "").split(",") if p.strip()]
|
| 197 |
retrieval_result = psq_candidates_from_rewrite_phrases(
|
| 198 |
rewrite_phrases=rewrite_phrases,
|
| 199 |
allow_nsfw_tags=allow_nsfw_tags,
|
| 200 |
context_tags=retrieval_context_tags,
|
| 201 |
-
global_k=
|
|
|
|
|
|
|
| 202 |
verbose=verbose_retrieval,
|
| 203 |
)
|
| 204 |
if isinstance(retrieval_result, tuple):
|
| 205 |
candidates, phrase_reports = retrieval_result
|
| 206 |
else:
|
| 207 |
candidates, phrase_reports = retrieval_result, []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 208 |
log(f"Retrieved {len(candidates)} candidate tags")
|
| 209 |
if verbose_retrieval:
|
| 210 |
log(f"Total unique candidates: {len(candidates)}")
|
|
@@ -255,12 +405,20 @@ def rag_pipeline_ui(user_prompt: str):
|
|
| 255 |
structural_tags=structural_tags,
|
| 256 |
probe_tags=probe_tags,
|
| 257 |
)
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 264 |
|
| 265 |
selected_tags = [candidates[i].tag for i in picked_indices] if picked_indices else []
|
| 266 |
|
|
@@ -282,8 +440,12 @@ def rag_pipeline_ui(user_prompt: str):
|
|
| 282 |
log(" No probe tags inferred")
|
| 283 |
|
| 284 |
log("Step 3c: Expand via tag implications")
|
|
|
|
| 285 |
tag_set = set(selected_tags)
|
| 286 |
expanded, implied_only = expand_tags_via_implications(tag_set)
|
|
|
|
|
|
|
|
|
|
| 287 |
if implied_only:
|
| 288 |
selected_tags.extend(sorted(implied_only))
|
| 289 |
log(f" Added {len(implied_only)} implied tags: {', '.join(sorted(implied_only))}")
|
|
@@ -291,14 +453,41 @@ def rag_pipeline_ui(user_prompt: str):
|
|
| 291 |
log(" No additional implied tags")
|
| 292 |
|
| 293 |
log("Step 4: Compose final prompt")
|
|
|
|
| 294 |
final_prompt = compose_final_prompt(rewritten, selected_tags)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 295 |
|
|
|
|
|
|
|
|
|
|
| 296 |
log("Done: final prompt ready")
|
| 297 |
-
return "\n".join(logs), final_prompt
|
| 298 |
|
| 299 |
except Exception as e:
|
| 300 |
log(f"Error: {type(e).__name__}: {e}")
|
| 301 |
-
return "\n".join(logs), ""
|
| 302 |
|
| 303 |
|
| 304 |
|
|
@@ -311,14 +500,17 @@ with gr.Blocks(css=css) as app:
|
|
| 311 |
lines=1
|
| 312 |
)
|
| 313 |
with gr.Column(scale=1):
|
| 314 |
-
_mascot_pil =
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
|
|
|
|
|
|
|
|
|
| 322 |
submit_button = gr.Button("Run", variant="primary")
|
| 323 |
|
| 324 |
gr.Markdown(
|
|
@@ -344,16 +536,42 @@ then returns a cleaned, model-friendly prompt.
|
|
| 344 |
placeholder="Your optimized prompt will appear here."
|
| 345 |
)
|
| 346 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 347 |
submit_button.click(
|
| 348 |
rag_pipeline_ui,
|
| 349 |
-
inputs=[image_tags],
|
| 350 |
-
outputs=[console, final_prompt]
|
| 351 |
)
|
| 352 |
|
| 353 |
image_tags.submit(
|
| 354 |
rag_pipeline_ui,
|
| 355 |
-
inputs=[image_tags],
|
| 356 |
-
outputs=[console, final_prompt]
|
| 357 |
)
|
| 358 |
|
| 359 |
if __name__ == "__main__":
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import os
|
| 3 |
import logging
|
| 4 |
+
import time
|
| 5 |
+
import json
|
| 6 |
+
from datetime import datetime
|
| 7 |
from PIL import Image
|
| 8 |
from pathlib import Path
|
| 9 |
from typing import List
|
| 10 |
+
from concurrent.futures import ThreadPoolExecutor, TimeoutError as FutureTimeoutError
|
| 11 |
|
| 12 |
from psq_rag.pipeline.preproc import extract_user_provided_tags_upto_3_words
|
| 13 |
from psq_rag.llm.rewrite import llm_rewrite_prompt
|
| 14 |
from psq_rag.retrieval.psq_retrieval import psq_candidates_from_rewrite_phrases, _norm_tag_for_lookup
|
| 15 |
from psq_rag.llm.select import llm_select_indices, llm_infer_structural_tags, llm_infer_probe_tags
|
| 16 |
from psq_rag.retrieval.state import expand_tags_via_implications
|
| 17 |
+
from psq_rag.ui.group_ranked_display import render_group_rankings_markdown
|
| 18 |
|
| 19 |
|
| 20 |
def _split_prompt_commas(s: str) -> List[str]:
|
|
|
|
| 84 |
MASCOT_DIR = Path(__file__).parent / "mascotimages"
|
| 85 |
MASCOT_FILE = MASCOT_DIR / "transparentsquirrel.png"
|
| 86 |
|
| 87 |
+
|
| 88 |
+
def _load_mascot_image():
|
| 89 |
+
"""Load mascot image if available; return None when missing/unreadable."""
|
| 90 |
+
if not MASCOT_FILE.exists():
|
| 91 |
+
logging.warning("Mascot image missing: %s", MASCOT_FILE)
|
| 92 |
+
return None
|
| 93 |
+
try:
|
| 94 |
+
return Image.open(MASCOT_FILE).convert("RGBA")
|
| 95 |
+
except Exception as e:
|
| 96 |
+
logging.warning("Failed to load mascot image (%s): %s", MASCOT_FILE, e)
|
| 97 |
+
return None
|
| 98 |
+
|
| 99 |
try:
|
| 100 |
from gradio_client import utils as _gc_utils
|
| 101 |
|
|
|
|
| 131 |
|
| 132 |
|
| 133 |
allow_nsfw_tags = False
|
| 134 |
+
def _is_production_runtime() -> bool:
|
| 135 |
+
"""Best-effort detection for deployed runtime (HF Spaces or explicit env)."""
|
| 136 |
+
if os.environ.get("PSQ_PRODUCTION", "").strip().lower() in {"1", "true", "yes"}:
|
| 137 |
+
return True
|
| 138 |
+
if os.environ.get("SPACE_ID"):
|
| 139 |
+
return True
|
| 140 |
+
if os.environ.get("HF_SPACE_ID"):
|
| 141 |
+
return True
|
| 142 |
+
if os.environ.get("SYSTEM") == "spaces":
|
| 143 |
+
return True
|
| 144 |
+
return False
|
| 145 |
+
|
| 146 |
+
|
| 147 |
+
verbose_retrieval_default = "0" if _is_production_runtime() else "1"
|
| 148 |
+
verbose_retrieval = os.environ.get("PSQ_VERBOSE_RETRIEVAL", verbose_retrieval_default).strip().lower() in {"1", "true", "yes"}
|
| 149 |
verbose_retrieval_all = False
|
| 150 |
verbose_retrieval_limit = 20
|
| 151 |
enable_probe_tags = os.environ.get("PSQ_ENABLE_PROBE", "1").strip() not in {"0", "false", "False"}
|
| 152 |
+
display_top_groups_default = int(os.environ.get("PSQ_DISPLAY_TOP_GROUPS", "10"))
|
| 153 |
+
display_top_tags_per_group_default = int(os.environ.get("PSQ_DISPLAY_TOP_TAGS_PER_GROUP", "5"))
|
| 154 |
+
display_rank_top_k_default = int(os.environ.get("PSQ_DISPLAY_GROUP_RANK_TOP_K", "5"))
|
| 155 |
+
retrieval_global_k = int(os.environ.get("PSQ_RETRIEVAL_GLOBAL_K", "300"))
|
| 156 |
+
retrieval_per_phrase_k = int(os.environ.get("PSQ_RETRIEVAL_PER_PHRASE_K", "10"))
|
| 157 |
+
retrieval_per_phrase_final_k = int(os.environ.get("PSQ_RETRIEVAL_PER_PHRASE_FINAL_K", "1"))
|
| 158 |
+
selection_mode = os.environ.get("PSQ_SELECTION_MODE", "chunked_map_union").strip()
|
| 159 |
+
selection_chunk_size = int(os.environ.get("PSQ_SELECTION_CHUNK_SIZE", "60"))
|
| 160 |
+
selection_per_phrase_k = int(os.environ.get("PSQ_SELECTION_PER_PHRASE_K", "2"))
|
| 161 |
+
selection_candidate_cap = int(os.environ.get("PSQ_SELECTION_CANDIDATE_CAP", "0"))
|
| 162 |
+
stage1_rewrite_timeout_s = float(os.environ.get("PSQ_TIMEOUT_REWRITE_S", "45"))
|
| 163 |
+
stage1_struct_timeout_s = float(os.environ.get("PSQ_TIMEOUT_STRUCT_S", "45"))
|
| 164 |
+
stage1_probe_timeout_s = float(os.environ.get("PSQ_TIMEOUT_PROBE_S", "45"))
|
| 165 |
+
stage3_select_timeout_s = float(os.environ.get("PSQ_TIMEOUT_SELECT_S", "45"))
|
| 166 |
+
timing_log_path = Path(os.environ.get("PSQ_TIMING_LOG_PATH", "data/runtime_metrics/ui_pipeline_timings.jsonl"))
|
| 167 |
|
| 168 |
css = """
|
| 169 |
.scrollable-content{
|
|
|
|
| 192 |
"""
|
| 193 |
|
| 194 |
|
| 195 |
+
def rag_pipeline_ui(
|
| 196 |
+
user_prompt: str,
|
| 197 |
+
display_top_groups: float,
|
| 198 |
+
display_top_tags_per_group: float,
|
| 199 |
+
display_rank_top_k: float,
|
| 200 |
+
):
|
| 201 |
logs = []
|
| 202 |
def log(s): logs.append(s)
|
| 203 |
|
| 204 |
try:
|
| 205 |
+
stage_timings = {}
|
| 206 |
+
|
| 207 |
+
def _record_timing(stage: str, dt_s: float):
|
| 208 |
+
stage_timings[stage] = float(dt_s)
|
| 209 |
+
|
| 210 |
+
def _emit_timing_summary(total_s: float):
|
| 211 |
+
summary_order = [
|
| 212 |
+
"preprocess",
|
| 213 |
+
"rewrite",
|
| 214 |
+
"structural",
|
| 215 |
+
"probe",
|
| 216 |
+
"retrieval",
|
| 217 |
+
"selection",
|
| 218 |
+
"implication_expansion",
|
| 219 |
+
"prompt_composition",
|
| 220 |
+
"group_display",
|
| 221 |
+
]
|
| 222 |
+
lines = []
|
| 223 |
+
for k in summary_order:
|
| 224 |
+
if k in stage_timings:
|
| 225 |
+
lines.append(f"{k}={stage_timings[k]:.2f}s")
|
| 226 |
+
slowest = max(stage_timings.items(), key=lambda kv: kv[1])[0] if stage_timings else "n/a"
|
| 227 |
+
log("Timing Summary: " + ", ".join(lines))
|
| 228 |
+
log(f"Timing Slowest Stage: {slowest}")
|
| 229 |
+
log(f"Timing Total: {total_s:.2f}s")
|
| 230 |
+
|
| 231 |
+
def _append_timing_jsonl(total_s: float):
|
| 232 |
+
try:
|
| 233 |
+
timing_log_path.parent.mkdir(parents=True, exist_ok=True)
|
| 234 |
+
rec = {
|
| 235 |
+
"timestamp_utc": datetime.utcnow().isoformat(timespec="seconds") + "Z",
|
| 236 |
+
"stages_s": stage_timings,
|
| 237 |
+
"total_s": float(total_s),
|
| 238 |
+
"config": {
|
| 239 |
+
"timeout_rewrite_s": stage1_rewrite_timeout_s,
|
| 240 |
+
"timeout_struct_s": stage1_struct_timeout_s,
|
| 241 |
+
"timeout_probe_s": stage1_probe_timeout_s,
|
| 242 |
+
"timeout_select_s": stage3_select_timeout_s,
|
| 243 |
+
},
|
| 244 |
+
}
|
| 245 |
+
with timing_log_path.open("a", encoding="utf-8") as f:
|
| 246 |
+
f.write(json.dumps(rec, ensure_ascii=True) + "\n")
|
| 247 |
+
log(f"Timing Log: wrote {timing_log_path}")
|
| 248 |
+
except Exception as e:
|
| 249 |
+
log(f"Timing Log: failed ({type(e).__name__}: {e})")
|
| 250 |
+
|
| 251 |
+
def _future_with_timeout(fut, timeout_s: float, stage_name: str, fallback):
|
| 252 |
+
t0 = time.perf_counter()
|
| 253 |
+
try:
|
| 254 |
+
out = fut.result(timeout=max(1.0, float(timeout_s)))
|
| 255 |
+
dt = time.perf_counter() - t0
|
| 256 |
+
log(f"{stage_name}: {dt:.2f}s")
|
| 257 |
+
stage_key = {
|
| 258 |
+
"Rewrite": "rewrite",
|
| 259 |
+
"Structural inference": "structural",
|
| 260 |
+
"Probe inference": "probe",
|
| 261 |
+
"Index selection": "selection",
|
| 262 |
+
}.get(stage_name)
|
| 263 |
+
if stage_key:
|
| 264 |
+
_record_timing(stage_key, dt)
|
| 265 |
+
return out
|
| 266 |
+
except FutureTimeoutError:
|
| 267 |
+
fut.cancel()
|
| 268 |
+
log(f"{stage_name}: timed out after {timeout_s:.0f}s; using fallback")
|
| 269 |
+
return fallback
|
| 270 |
+
except Exception as e:
|
| 271 |
+
log(f"{stage_name}: failed ({type(e).__name__}: {e}); using fallback")
|
| 272 |
+
return fallback
|
| 273 |
+
|
| 274 |
+
t_total0 = time.perf_counter()
|
| 275 |
log("Start: received prompt")
|
| 276 |
prompt_in = (user_prompt or "").strip()
|
| 277 |
if not prompt_in:
|
| 278 |
+
return "Error: empty prompt", "", ""
|
| 279 |
|
| 280 |
log("Input:")
|
| 281 |
log(prompt_in)
|
| 282 |
log("")
|
| 283 |
+
log(
|
| 284 |
+
"Runtime config: "
|
| 285 |
+
f"retrieval_global_k={retrieval_global_k} "
|
| 286 |
+
f"retrieval_per_phrase_k={retrieval_per_phrase_k} "
|
| 287 |
+
f"retrieval_per_phrase_final_k={retrieval_per_phrase_final_k} "
|
| 288 |
+
f"selection_mode={selection_mode} "
|
| 289 |
+
f"selection_chunk_size={selection_chunk_size} "
|
| 290 |
+
f"selection_per_phrase_k={selection_per_phrase_k}"
|
| 291 |
+
)
|
| 292 |
+
log("")
|
| 293 |
|
| 294 |
+
t0 = time.perf_counter()
|
| 295 |
user_tags = extract_user_provided_tags_upto_3_words(prompt_in)
|
| 296 |
+
dt = time.perf_counter()-t0
|
| 297 |
+
_record_timing("preprocess", dt)
|
| 298 |
+
log(f"Preprocess (user tag extraction): {dt:.2f}s")
|
| 299 |
log("Heuristically extracted user tags:")
|
| 300 |
if user_tags:
|
| 301 |
log(", ".join(user_tags))
|
|
|
|
| 310 |
fut_struct = ex.submit(llm_infer_structural_tags, prompt_in, log=log)
|
| 311 |
fut_probe = ex.submit(llm_infer_probe_tags, prompt_in, log=log) if enable_probe_tags else None
|
| 312 |
|
| 313 |
+
rewritten = _future_with_timeout(
|
| 314 |
+
fut_rewrite, stage1_rewrite_timeout_s, "Rewrite", prompt_in
|
| 315 |
+
)
|
| 316 |
+
structural_tags = _future_with_timeout(
|
| 317 |
+
fut_struct, stage1_struct_timeout_s, "Structural inference", []
|
| 318 |
+
)
|
| 319 |
+
probe_tags = (
|
| 320 |
+
_future_with_timeout(fut_probe, stage1_probe_timeout_s, "Probe inference", [])
|
| 321 |
+
if fut_probe else []
|
| 322 |
+
)
|
| 323 |
|
| 324 |
log("Rewrite:")
|
| 325 |
log(rewritten if rewritten else "(empty)")
|
|
|
|
| 333 |
|
| 334 |
log("Step 2: Prompt Squirrel retrieval (hidden)")
|
| 335 |
try:
|
| 336 |
+
t0 = time.perf_counter()
|
| 337 |
retrieval_context_tags = list(dict.fromkeys((structural_tags or []) + (probe_tags or [])))
|
| 338 |
rewrite_phrases = [p.strip() for p in (rewrite_for_retrieval or "").split(",") if p.strip()]
|
| 339 |
retrieval_result = psq_candidates_from_rewrite_phrases(
|
| 340 |
rewrite_phrases=rewrite_phrases,
|
| 341 |
allow_nsfw_tags=allow_nsfw_tags,
|
| 342 |
context_tags=retrieval_context_tags,
|
| 343 |
+
global_k=max(1, retrieval_global_k),
|
| 344 |
+
per_phrase_k=max(1, retrieval_per_phrase_k),
|
| 345 |
+
per_phrase_final_k=max(1, retrieval_per_phrase_final_k),
|
| 346 |
verbose=verbose_retrieval,
|
| 347 |
)
|
| 348 |
if isinstance(retrieval_result, tuple):
|
| 349 |
candidates, phrase_reports = retrieval_result
|
| 350 |
else:
|
| 351 |
candidates, phrase_reports = retrieval_result, []
|
| 352 |
+
if selection_candidate_cap > 0 and len(candidates) > selection_candidate_cap:
|
| 353 |
+
candidates = candidates[:selection_candidate_cap]
|
| 354 |
+
log(f"Selection candidate cap applied: {selection_candidate_cap}")
|
| 355 |
+
dt = time.perf_counter()-t0
|
| 356 |
+
_record_timing("retrieval", dt)
|
| 357 |
+
log(f"Retrieval: {dt:.2f}s")
|
| 358 |
log(f"Retrieved {len(candidates)} candidate tags")
|
| 359 |
if verbose_retrieval:
|
| 360 |
log(f"Total unique candidates: {len(candidates)}")
|
|
|
|
| 405 |
structural_tags=structural_tags,
|
| 406 |
probe_tags=probe_tags,
|
| 407 |
)
|
| 408 |
+
with ThreadPoolExecutor(max_workers=1) as ex:
|
| 409 |
+
fut_sel = ex.submit(
|
| 410 |
+
llm_select_indices,
|
| 411 |
+
query_text=selection_query,
|
| 412 |
+
candidates=candidates,
|
| 413 |
+
max_pick=0,
|
| 414 |
+
log=log,
|
| 415 |
+
mode=selection_mode,
|
| 416 |
+
chunk_size=max(1, selection_chunk_size),
|
| 417 |
+
per_phrase_k=max(1, selection_per_phrase_k),
|
| 418 |
+
)
|
| 419 |
+
picked_indices = _future_with_timeout(
|
| 420 |
+
fut_sel, stage3_select_timeout_s, "Index selection", []
|
| 421 |
+
)
|
| 422 |
|
| 423 |
selected_tags = [candidates[i].tag for i in picked_indices] if picked_indices else []
|
| 424 |
|
|
|
|
| 440 |
log(" No probe tags inferred")
|
| 441 |
|
| 442 |
log("Step 3c: Expand via tag implications")
|
| 443 |
+
t0 = time.perf_counter()
|
| 444 |
tag_set = set(selected_tags)
|
| 445 |
expanded, implied_only = expand_tags_via_implications(tag_set)
|
| 446 |
+
dt = time.perf_counter()-t0
|
| 447 |
+
_record_timing("implication_expansion", dt)
|
| 448 |
+
log(f"Implication expansion: {dt:.2f}s")
|
| 449 |
if implied_only:
|
| 450 |
selected_tags.extend(sorted(implied_only))
|
| 451 |
log(f" Added {len(implied_only)} implied tags: {', '.join(sorted(implied_only))}")
|
|
|
|
| 453 |
log(" No additional implied tags")
|
| 454 |
|
| 455 |
log("Step 4: Compose final prompt")
|
| 456 |
+
t0 = time.perf_counter()
|
| 457 |
final_prompt = compose_final_prompt(rewritten, selected_tags)
|
| 458 |
+
dt = time.perf_counter()-t0
|
| 459 |
+
_record_timing("prompt_composition", dt)
|
| 460 |
+
log(f"Prompt composition: {dt:.2f}s")
|
| 461 |
+
|
| 462 |
+
log("Step 5: Build ranked group/category display")
|
| 463 |
+
t0 = time.perf_counter()
|
| 464 |
+
seed_terms = []
|
| 465 |
+
seed_terms.extend(user_tags)
|
| 466 |
+
seed_terms.extend([p.strip() for p in (rewritten or "").split(",") if p.strip()])
|
| 467 |
+
seed_terms.extend(structural_tags or [])
|
| 468 |
+
seed_terms.extend(probe_tags or [])
|
| 469 |
+
seed_terms.extend(selected_tags)
|
| 470 |
+
seed_terms = list(dict.fromkeys(seed_terms))
|
| 471 |
+
|
| 472 |
+
groups_md = render_group_rankings_markdown(
|
| 473 |
+
seed_terms=seed_terms,
|
| 474 |
+
top_groups=max(1, int(display_top_groups)),
|
| 475 |
+
top_tags_per_group=max(1, int(display_top_tags_per_group)),
|
| 476 |
+
group_rank_top_k=max(1, int(display_rank_top_k)),
|
| 477 |
+
)
|
| 478 |
+
dt = time.perf_counter()-t0
|
| 479 |
+
_record_timing("group_display", dt)
|
| 480 |
+
log(f"Ranked group display: {dt:.2f}s")
|
| 481 |
|
| 482 |
+
total_dt = time.perf_counter()-t_total0
|
| 483 |
+
_emit_timing_summary(total_dt)
|
| 484 |
+
_append_timing_jsonl(total_dt)
|
| 485 |
log("Done: final prompt ready")
|
| 486 |
+
return "\n".join(logs), final_prompt, groups_md
|
| 487 |
|
| 488 |
except Exception as e:
|
| 489 |
log(f"Error: {type(e).__name__}: {e}")
|
| 490 |
+
return "\n".join(logs), "", ""
|
| 491 |
|
| 492 |
|
| 493 |
|
|
|
|
| 500 |
lines=1
|
| 501 |
)
|
| 502 |
with gr.Column(scale=1):
|
| 503 |
+
_mascot_pil = _load_mascot_image()
|
| 504 |
+
if _mascot_pil is not None:
|
| 505 |
+
mascot_img = gr.Image(
|
| 506 |
+
value=_mascot_pil,
|
| 507 |
+
show_label=False,
|
| 508 |
+
interactive=False,
|
| 509 |
+
height=220,
|
| 510 |
+
elem_id="mascot"
|
| 511 |
+
)
|
| 512 |
+
else:
|
| 513 |
+
mascot_img = gr.Markdown("`(mascot image unavailable)`")
|
| 514 |
submit_button = gr.Button("Run", variant="primary")
|
| 515 |
|
| 516 |
gr.Markdown(
|
|
|
|
| 536 |
placeholder="Your optimized prompt will appear here."
|
| 537 |
)
|
| 538 |
|
| 539 |
+
with gr.Accordion("Display Settings", open=False):
|
| 540 |
+
with gr.Row():
|
| 541 |
+
display_top_groups = gr.Number(
|
| 542 |
+
value=display_top_groups_default,
|
| 543 |
+
precision=0,
|
| 544 |
+
label="Rows (Top Groups/Categories)",
|
| 545 |
+
minimum=1,
|
| 546 |
+
)
|
| 547 |
+
display_top_tags_per_group = gr.Number(
|
| 548 |
+
value=display_top_tags_per_group_default,
|
| 549 |
+
precision=0,
|
| 550 |
+
label="Top Tags Shown Per Row",
|
| 551 |
+
minimum=1,
|
| 552 |
+
)
|
| 553 |
+
display_rank_top_k = gr.Number(
|
| 554 |
+
value=display_rank_top_k_default,
|
| 555 |
+
precision=0,
|
| 556 |
+
label="Top Tags Used for Row Ranking",
|
| 557 |
+
minimum=1,
|
| 558 |
+
)
|
| 559 |
+
|
| 560 |
+
group_rankings_md = gr.Markdown(
|
| 561 |
+
label="Ranked Group/Category Tag Suggestions",
|
| 562 |
+
value="",
|
| 563 |
+
)
|
| 564 |
+
|
| 565 |
submit_button.click(
|
| 566 |
rag_pipeline_ui,
|
| 567 |
+
inputs=[image_tags, display_top_groups, display_top_tags_per_group, display_rank_top_k],
|
| 568 |
+
outputs=[console, final_prompt, group_rankings_md]
|
| 569 |
)
|
| 570 |
|
| 571 |
image_tags.submit(
|
| 572 |
rag_pipeline_ui,
|
| 573 |
+
inputs=[image_tags, display_top_groups, display_top_tags_per_group, display_rank_top_k],
|
| 574 |
+
outputs=[console, final_prompt, group_rankings_md]
|
| 575 |
)
|
| 576 |
|
| 577 |
if __name__ == "__main__":
|
data/analysis/simplified_probe_tags.csv
CHANGED
|
@@ -32,5 +32,5 @@ thick_thighs,body_shape_breasts,0,0.025000,0.001367,1,1,0.500000,1.000000,0.6666
|
|
| 32 |
nude,clothing_state,0,0.057000,0.004049,1,3,0.000000,0.000000,0.000000,0.000000,0.001012,0,"support=3, f1=0.000, prec=0.000, rec=0.000"
|
| 33 |
humanoid,body_type_presence,1,0.076000,0.003484,1,6,0.000000,0.000000,0.000000,0.000000,0.000871,0,"support=6, f1=0.000, prec=0.000, rec=0.000"
|
| 34 |
bird,species_taxonomy,0,0.042000,0.001184,1,6,0.571429,0.666667,0.615385,0.615385,0.000842,1,"support=6, f1=0.615, prec=0.571, rec=0.667"
|
| 35 |
-
bear,species_taxonomy,0,0.038000,0.001141,
|
| 36 |
<3,text_symbols,1,0.050000,0.000364,1,6,1.000000,0.500000,0.666667,0.666667,0.000273,1,"support=6, f1=0.667, prec=1.000, rec=0.500"
|
|
|
|
| 32 |
nude,clothing_state,0,0.057000,0.004049,1,3,0.000000,0.000000,0.000000,0.000000,0.001012,0,"support=3, f1=0.000, prec=0.000, rec=0.000"
|
| 33 |
humanoid,body_type_presence,1,0.076000,0.003484,1,6,0.000000,0.000000,0.000000,0.000000,0.000871,0,"support=6, f1=0.000, prec=0.000, rec=0.000"
|
| 34 |
bird,species_taxonomy,0,0.042000,0.001184,1,6,0.571429,0.666667,0.615385,0.615385,0.000842,1,"support=6, f1=0.615, prec=0.571, rec=0.667"
|
| 35 |
+
bear,species_taxonomy,0,0.038000,0.001141,0,5,0.500000,0.800000,0.615385,0.615385,0.000812,0,"support=5, f1=0.615, prec=0.500, rec=0.800"
|
| 36 |
<3,text_symbols,1,0.050000,0.000364,1,6,1.000000,0.500000,0.666667,0.666667,0.000273,1,"support=6, f1=0.667, prec=1.000, rec=0.500"
|
data/eval_results/k1_default_recheck_seed42_n10.jsonl
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"_meta": true, "timestamp": "2026-03-03T07:10:22.047827", "n_samples": 10, "caption_field": "caption_cogvlm", "skip_rewrite": false, "allow_nsfw": false, "mode": "chunked_map_union", "chunk_size": 60, "eval_path": "data\\eval_samples\\e621_sfw_sample_1000_seed123_buffer10000_caption_evident.jsonl", "per_phrase_k": 2, "per_phrase_final_k": 1, "temperature": 0.0, "shuffle": true, "seed": 42, "workers": 1, "min_why": "strong_implied", "expand_implications": true, "infer_structural": true, "infer_probe": true, "n_errors": 0, "n_issue_samples": 10, "n_issues_total": 20}
|
| 2 |
+
{"id": 17482, "n_gt": 22, "n_retrieved": 20, "n_selected": 29, "n_implied": 17, "n_structural": 4, "n_probe": 3, "ret_R": 0.2727, "P": 0.5172, "R": 0.6818, "F1": 0.5882, "leaf_P": 0.4167, "leaf_R": 0.3846, "leaf_F1": 0.4, "n_leaf_sel": 12, "n_leaf_gt": 13, "ret_P": 0.3, "sel_given_ret": 2.5, "over_sel": 1.32, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 7, "attempts_by_n_local": {"22": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.5172, "gen_R": 0.6818, "gen_F1": 0.5882, "missed": ["bass_guitar", "canine", "fingers", "fur", "holding_musical_instrument", "holding_object", "music"], "extra": ["bottomwear", "denim", "denim_clothing", "flowing_hair", "jeans", "looking_at_viewer", "pants", "pastel_background", "playing_guitar", "playing_music", "pose", "torn_bottomwear", "torn_jeans", "torn_pants"], "ground_truth_tags": ["anthro", "bass_guitar", "canid", "canine", "claws", "clothed", "clothing", "fingers", "fur", "guitar", "hair", "holding_musical_instrument", "holding_object", "mammal", "music", "musical_instrument", "plucked_string_instrument", "solo", "spade_tail", "string_instrument", "tail", "torn_clothing"], "selected_tags": ["anthro", "bottomwear", "canid", "claws", "clothed", "clothing", "denim", "denim_clothing", "flowing_hair", "guitar", "hair", "jeans", "looking_at_viewer", "mammal", "musical_instrument", "pants", "pastel_background", "playing_guitar", "playing_music", "plucked_string_instrument", "pose", "solo", "spade_tail", "string_instrument", "tail", "torn_bottomwear", "torn_clothing", "torn_jeans", "torn_pants"], "stage3_selected": ["claws", "flowing_hair", "pastel_background", "playing_guitar", "pose", "spade_tail", "torn_jeans"], "stage3_selected_scores": {"claws": 0.5637, "pose": 0.5717, "spade_tail": 0.6167, "playing_guitar": 0.9311, "torn_jeans": 0.481, "flowing_hair": 0.5655, "pastel_background": 0.56}, "stage3_selected_ranks": {"claws": 10, "pose": 7, "spade_tail": 3, "playing_guitar": 2, "torn_jeans": 18, "flowing_hair": 9, "pastel_background": 12}, "stage3_selected_phrase_ranks": {"claws": 1, "pose": 1, "spade_tail": 1, "playing_guitar": 1, "torn_jeans": 1, "flowing_hair": 1, "pastel_background": 1}, "extra_evidence": {"bottomwear": {"source": "implied"}, "denim": {"source": "implied"}, "denim_clothing": {"source": "implied"}, "flowing_hair": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5655}, "jeans": {"source": "implied"}, "looking_at_viewer": {"source": "structural"}, "pants": {"source": "implied"}, "pastel_background": {"source": "stage3", "why": "unknown", "retrieval_score": 0.56}, "playing_guitar": {"source": "stage3", "why": "unknown", "retrieval_score": 0.9311}, "playing_music": {"source": "implied"}, "pose": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5717}, "torn_bottomwear": {"source": "implied"}, "torn_jeans": {"source": "stage3", "why": "unknown", "retrieval_score": 0.481}, "torn_pants": {"source": "implied"}}, "structural": ["solo", "anthro", "clothed", "looking_at_viewer"], "probe": ["solo", "canid", "anthro"], "t1": 3.4, "t2": 3.66, "t3": 1.38, "t3s": 3.59, "t3p": 5.79, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=22 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
| 3 |
+
{"id": 1624724, "n_gt": 4, "n_retrieved": 12, "n_selected": 17, "n_implied": 3, "n_structural": 4, "n_probe": 3, "ret_R": 0.75, "P": 0.1765, "R": 0.75, "F1": 0.2857, "leaf_P": 0.2143, "leaf_R": 0.75, "leaf_F1": 0.3333, "n_leaf_sel": 14, "n_leaf_gt": 4, "ret_P": 0.25, "sel_given_ret": 1.0, "over_sel": 4.25, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 10, "attempts_by_n_local": {"15": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.1765, "gen_R": 0.75, "gen_F1": 0.2857, "missed": ["smile"], "extra": ["ambiguous_gender", "anthro", "bear", "big_eyes", "cartoon_character", "clothed", "clothing", "eyes", "floating", "mammal", "nose", "pink_mouth", "spots", "topless"], "ground_truth_tags": ["red_nose", "smile", "solo", "tan_body"], "selected_tags": ["ambiguous_gender", "anthro", "bear", "big_eyes", "cartoon_character", "clothed", "clothing", "eyes", "floating", "mammal", "nose", "pink_mouth", "red_nose", "solo", "spots", "tan_body", "topless"], "stage3_selected": ["big_eyes", "cartoon_character", "eyes", "floating", "nose", "pink_mouth", "red_nose", "spots", "tan_body", "white_background"], "stage3_selected_scores": {"white_background": 0.6199, "tan_body": 0.667, "spots": 0.6295, "big_eyes": 0.6992, "red_nose": 0.752, "floating": 0.6502, "pink_mouth": 0.639, "nose": 0.8607, "cartoon_character": 0.5052, "eyes": 0.9251}, "stage3_selected_ranks": {"white_background": 9, "tan_body": 5, "spots": 8, "big_eyes": 4, "red_nose": 3, "floating": 6, "pink_mouth": 7, "nose": 2, "cartoon_character": 13, "eyes": 1}, "stage3_selected_phrase_ranks": {"white_background": 1, "tan_body": 1, "spots": 1, "big_eyes": 1, "red_nose": 1, "floating": 1, "pink_mouth": 1, "nose": 1, "cartoon_character": 1, "eyes": 1}, "extra_evidence": {"ambiguous_gender": {"source": "structural"}, "anthro": {"source": "structural"}, "bear": {"source": "probe"}, "big_eyes": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6992}, "cartoon_character": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5052}, "clothed": {"source": "implied"}, "clothing": {"source": "implied"}, "eyes": {"source": "stage3", "why": "unknown", "retrieval_score": 0.9251}, "floating": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6502}, "mammal": {"source": "implied"}, "nose": {"source": "stage3", "why": "unknown", "retrieval_score": 0.8607}, "pink_mouth": {"source": "stage3", "why": "unknown", "retrieval_score": 0.639}, "spots": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6295}, "topless": {"source": "structural"}}, "structural": ["solo", "anthro", "ambiguous_gender", "topless"], "probe": ["solo", "simple_background", "bear"], "t1": 2.19, "t2": 1.18, "t3": 11.61, "t3s": 1.02, "t3p": 3.9, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=15 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=5"]}
|
| 4 |
+
{"id": 1078019, "n_gt": 14, "n_retrieved": 15, "n_selected": 15, "n_implied": 2, "n_structural": 4, "n_probe": 5, "ret_R": 0.3571, "P": 0.5333, "R": 0.5714, "F1": 0.5517, "leaf_P": 0.5455, "leaf_R": 0.6667, "leaf_F1": 0.6, "n_leaf_sel": 11, "n_leaf_gt": 9, "ret_P": 0.3333, "sel_given_ret": 1.6, "over_sel": 1.07, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 6, "attempts_by_n_local": {"16": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.5333, "gen_R": 0.5714, "gen_F1": 0.5517, "missed": ["lagomorph", "leporid", "mammal", "rabbit", "romantic", "romantic_couple"], "extra": ["<3", "coat", "holding_object", "holding_plushie", "looking_at_viewer", "relationship", "topwear"], "ground_truth_tags": ["anthro", "blue_eyes", "blush", "clothed", "clothing", "duo", "lagomorph", "leporid", "mammal", "plushie", "rabbit", "romantic", "romantic_couple", "teal_eyes"], "selected_tags": ["<3", "anthro", "blue_eyes", "blush", "clothed", "clothing", "coat", "duo", "holding_object", "holding_plushie", "looking_at_viewer", "plushie", "relationship", "teal_eyes", "topwear"], "stage3_selected": ["blue_eyes", "coat", "holding_plushie", "plushie", "relationship", "teal_eyes"], "stage3_selected_scores": {"blue_eyes": 0.6151, "coat": 0.6383, "plushie": 0.7455, "teal_eyes": 0.6283, "holding_plushie": 0.7793, "relationship": 0.6206}, "stage3_selected_ranks": {"blue_eyes": 9, "coat": 5, "plushie": 3, "teal_eyes": 6, "holding_plushie": 2, "relationship": 7}, "stage3_selected_phrase_ranks": {"blue_eyes": 1, "coat": 1, "plushie": 1, "teal_eyes": 1, "holding_plushie": 1, "relationship": 1}, "extra_evidence": {"<3": {"source": "probe"}, "coat": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6383}, "holding_object": {"source": "implied"}, "holding_plushie": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7793}, "looking_at_viewer": {"source": "structural"}, "relationship": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6206}, "topwear": {"source": "implied"}}, "structural": ["duo", "anthro", "clothed", "looking_at_viewer"], "probe": ["duo", "clothing", "blush", "anthro", "<3"], "t1": 2.27, "t2": 1.48, "t3": 1.38, "t3s": 1.8, "t3p": 2.97, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=16 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
| 5 |
+
{"id": 2021552, "n_gt": 25, "n_retrieved": 16, "n_selected": 31, "n_implied": 12, "n_structural": 4, "n_probe": 7, "ret_R": 0.44, "P": 0.6774, "R": 0.84, "F1": 0.75, "leaf_P": 0.5882, "leaf_R": 0.6667, "leaf_F1": 0.625, "n_leaf_sel": 17, "n_leaf_gt": 15, "ret_P": 0.6875, "sel_given_ret": 1.9091, "over_sel": 1.24, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 11, "attempts_by_n_local": {"18": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.6774, "gen_R": 0.84, "gen_F1": 0.75, "missed": ["canine", "fox", "looking_at_another", "standing"], "extra": ["black_bottomwear", "black_clothing", "black_pants", "blush", "felid", "looking_at_viewer", "open_mouth", "white_clothing", "white_shirt", "white_topwear"], "ground_truth_tags": ["anthro", "bottomwear", "canid", "canine", "claws", "clothed", "clothing", "crossed_arms", "duo", "facial_markings", "fox", "fur", "grey_background", "head_markings", "lagomorph", "leporid", "looking_at_another", "mammal", "markings", "overalls", "pants", "rabbit", "shirt", "standing", "topwear"], "selected_tags": ["anthro", "black_bottomwear", "black_clothing", "black_pants", "blush", "bottomwear", "canid", "claws", "clothed", "clothing", "crossed_arms", "duo", "facial_markings", "felid", "fur", "grey_background", "head_markings", "lagomorph", "leporid", "looking_at_viewer", "mammal", "markings", "open_mouth", "overalls", "pants", "rabbit", "shirt", "topwear", "white_clothing", "white_shirt", "white_topwear"], "stage3_selected": ["black_pants", "claws", "crossed_arms", "facial_markings", "fur", "grey_background", "open_mouth", "overalls", "rabbit", "shirt", "white_shirt"], "stage3_selected_scores": {"fur": 0.6548, "open_mouth": 0.6344, "claws": 0.6317, "shirt": 0.7497, "rabbit": 0.6521, "grey_background": 0.6797, "facial_markings": 0.6956, "crossed_arms": 0.7298, "white_shirt": 0.8206, "overalls": 0.8782, "black_pants": 0.8338}, "stage3_selected_ranks": {"fur": 11, "open_mouth": 14, "claws": 15, "shirt": 6, "rabbit": 12, "grey_background": 10, "facial_markings": 8, "crossed_arms": 7, "white_shirt": 4, "overalls": 2, "black_pants": 3}, "stage3_selected_phrase_ranks": {"fur": 1, "open_mouth": 1, "claws": 1, "shirt": 1, "rabbit": 1, "grey_background": 1, "facial_markings": 1, "crossed_arms": 1, "white_shirt": 1, "overalls": 1, "black_pants": 1}, "extra_evidence": {"black_bottomwear": {"source": "implied"}, "black_clothing": {"source": "implied"}, "black_pants": {"source": "stage3", "why": "unknown", "retrieval_score": 0.8338}, "blush": {"source": "probe"}, "felid": {"source": "probe"}, "looking_at_viewer": {"source": "structural"}, "open_mouth": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6344}, "white_clothing": {"source": "implied"}, "white_shirt": {"source": "stage3", "why": "unknown", "retrieval_score": 0.8206}, "white_topwear": {"source": "implied"}}, "structural": ["duo", "anthro", "clothed", "looking_at_viewer"], "probe": ["simple_background", "felid", "duo", "clothing", "canid", "blush", "anthro"], "t1": 5.92, "t2": 1.55, "t3": 4.86, "t3s": 4.42, "t3p": 7.2, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=18 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
| 6 |
+
{"id": 1023509, "n_gt": 13, "n_retrieved": 17, "n_selected": 31, "n_implied": 6, "n_structural": 4, "n_probe": 5, "ret_R": 0.2308, "P": 0.2903, "R": 0.6923, "F1": 0.4091, "leaf_P": 0.16, "leaf_R": 0.6667, "leaf_F1": 0.2581, "n_leaf_sel": 25, "n_leaf_gt": 6, "ret_P": 0.1765, "sel_given_ret": 3.0, "over_sel": 2.38, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 17, "attempts_by_n_local": {"17": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.2903, "gen_R": 0.6923, "gen_F1": 0.4091, "missed": ["dialogue", "fur", "white_body", "white_fur"], "extra": ["<3", "anthro", "bear", "bubble", "darkness", "duo", "face_mask", "felid", "figurine", "group", "light", "lying_on_ground", "note", "pear-shaped_figure", "power_lines", "solo", "speech_bubble", "standing_over", "texting", "underground", "unknown_species", "wide_hips"], "ground_truth_tags": ["bovid", "caprine", "dialogue", "fur", "goat", "human", "lizard", "mammal", "reptile", "scalie", "text", "white_body", "white_fur"], "selected_tags": ["<3", "anthro", "bear", "bovid", "bubble", "caprine", "darkness", "duo", "face_mask", "felid", "figurine", "goat", "group", "human", "light", "lizard", "lying_on_ground", "mammal", "note", "pear-shaped_figure", "power_lines", "reptile", "scalie", "solo", "speech_bubble", "standing_over", "text", "texting", "underground", "unknown_species", "wide_hips"], "stage3_selected": ["bubble", "darkness", "face_mask", "figurine", "goat", "human", "light", "lizard", "lying_on_ground", "note", "pear-shaped_figure", "power_lines", "speech_bubble", "standing_over", "texting", "underground", "unknown_species"], "stage3_selected_scores": {"human": 0.669, "speech_bubble": 0.7584, "lizard": 0.839, "goat": 0.7768, "light": 0.7793, "unknown_species": 0.7697, "bubble": 0.7508, "pear-shaped_figure": 0.5657, "lying_on_ground": 0.7947, "face_mask": 0.5493, "darkness": 0.8328, "texting": 0.5661, "note": 0.7398, "underground": 0.5853, "figurine": 0.7007, "standing_over": 0.7647, "power_lines": 0.5072}, "stage3_selected_ranks": {"human": 12, "speech_bubble": 8, "lizard": 1, "goat": 5, "light": 4, "unknown_species": 6, "bubble": 9, "pear-shaped_figure": 15, "lying_on_ground": 3, "face_mask": 16, "darkness": 2, "texting": 14, "note": 10, "underground": 13, "figurine": 11, "standing_over": 7, "power_lines": 17}, "stage3_selected_phrase_ranks": {"human": 1, "speech_bubble": 1, "lizard": 1, "goat": 1, "light": 1, "unknown_species": 1, "bubble": 1, "pear-shaped_figure": 1, "lying_on_ground": 1, "face_mask": 1, "darkness": 1, "texting": 1, "note": 1, "underground": 1, "figurine": 1, "standing_over": 1, "power_lines": 1}, "extra_evidence": {"<3": {"source": "probe"}, "anthro": {"source": "probe"}, "bear": {"source": "probe"}, "bubble": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7508}, "darkness": {"source": "stage3", "why": "unknown", "retrieval_score": 0.8328}, "duo": {"source": "structural"}, "face_mask": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5493}, "felid": {"source": "probe"}, "figurine": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7007}, "group": {"source": "structural"}, "light": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7793}, "lying_on_ground": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7947}, "note": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7398}, "pear-shaped_figure": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5657}, "power_lines": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5072}, "solo": {"source": "structural"}, "speech_bubble": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7584}, "standing_over": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7647}, "texting": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5661}, "underground": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5853}, "unknown_species": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7697}, "wide_hips": {"source": "implied"}}, "structural": ["solo", "duo", "group", "text"], "probe": ["group", "felid", "bear", "anthro", "<3"], "t1": 3.19, "t2": 1.51, "t3": 4.0, "t3s": 0.83, "t3p": 1.94, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=17 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0"]}
|
| 7 |
+
{"id": 335343, "n_gt": 15, "n_retrieved": 24, "n_selected": 31, "n_implied": 3, "n_structural": 3, "n_probe": 6, "ret_R": 0.6, "P": 0.3226, "R": 0.6667, "F1": 0.4348, "leaf_P": 0.3077, "leaf_R": 0.6667, "leaf_F1": 0.4211, "n_leaf_sel": 26, "n_leaf_gt": 12, "ret_P": 0.375, "sel_given_ret": 1.1111, "over_sel": 2.07, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 23, "attempts_by_n_local": {"25": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.3226, "gen_R": 0.6667, "gen_F1": 0.4348, "missed": ["angry", "bed", "eyes_closed", "eyeshadow", "furniture"], "extra": ["annoyed_expression", "anthro", "atmosphere", "bedroom", "blush", "distracting_watermark", "eyes", "felid", "font", "humanoid", "mammal", "membrane_(anatomy)", "palette", "playful", "purple_membrane", "resting", "romantic", "romantic_ambiance", "stats", "walking", "watermark"], "ground_truth_tags": ["angry", "bed", "blonde_hair", "blue_eyes", "duo", "eyes_closed", "eyeshadow", "furniture", "green_eyes", "hair", "lying", "makeup", "purple_hair", "sleeping", "text"], "selected_tags": ["annoyed_expression", "anthro", "atmosphere", "bedroom", "blonde_hair", "blue_eyes", "blush", "distracting_watermark", "duo", "eyes", "felid", "font", "green_eyes", "hair", "humanoid", "lying", "makeup", "mammal", "membrane_(anatomy)", "palette", "playful", "purple_hair", "purple_membrane", "resting", "romantic", "romantic_ambiance", "sleeping", "stats", "text", "walking", "watermark"], "stage3_selected": ["annoyed_expression", "atmosphere", "bedroom", "blonde_hair", "blue_eyes", "distracting_watermark", "eyes", "font", "green_eyes", "hair", "lying", "makeup", "palette", "playful", "purple_hair", "purple_membrane", "resting", "romantic_ambiance", "sleeping", "stats", "text", "walking", "watermark"], "stage3_selected_scores": {"hair": 0.6041, "text": 0.6017, "blue_eyes": 0.6023, "lying": 0.4504, "green_eyes": 0.5999, "blonde_hair": 0.5995, "purple_hair": 0.5647, "makeup": 0.5972, "watermark": 0.6051, "bedroom": 0.491, "sleeping": 0.6037, "walking": 0.3595, "romantic_ambiance": 0.4811, "distracting_watermark": 0.4792, "playful": 0.4474, "resting": 0.5152, "annoyed_expression": 0.7259, "stats": 0.5067, "palette": 0.669, "purple_membrane": 0.5791, "atmosphere": 0.5048, "font": 0.5305, "eyes": 0.895}, "stage3_selected_ranks": {"hair": 5, "text": 8, "blue_eyes": 7, "lying": 22, "green_eyes": 9, "blonde_hair": 10, "purple_hair": 13, "makeup": 11, "watermark": 4, "bedroom": 19, "sleeping": 6, "walking": 25, "romantic_ambiance": 20, "distracting_watermark": 21, "playful": 23, "resting": 16, "annoyed_expression": 2, "stats": 17, "palette": 3, "purple_membrane": 12, "atmosphere": 18, "font": 15, "eyes": 1}, "stage3_selected_phrase_ranks": {"hair": 1, "text": 1, "blue_eyes": 1, "lying": 1, "green_eyes": 1, "blonde_hair": 1, "purple_hair": 1, "makeup": 1, "watermark": 1, "bedroom": 1, "sleeping": 1, "walking": 1, "romantic_ambiance": 1, "distracting_watermark": 1, "playful": 1, "resting": 1, "annoyed_expression": 1, "stats": 1, "palette": 1, "purple_membrane": 1, "atmosphere": 1, "font": 1, "eyes": 1}, "extra_evidence": {"annoyed_expression": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7259}, "anthro": {"source": "probe"}, "atmosphere": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5048}, "bedroom": {"source": "stage3", "why": "unknown", "retrieval_score": 0.491}, "blush": {"source": "probe"}, "distracting_watermark": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4792}, "eyes": {"source": "stage3", "why": "unknown", "retrieval_score": 0.895}, "felid": {"source": "probe"}, "font": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5305}, "humanoid": {"source": "structural"}, "mammal": {"source": "implied"}, "membrane_(anatomy)": {"source": "implied"}, "palette": {"source": "stage3", "why": "unknown", "retrieval_score": 0.669}, "playful": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4474}, "purple_membrane": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5791}, "resting": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5152}, "romantic": {"source": "implied"}, "romantic_ambiance": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4811}, "stats": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5067}, "walking": {"source": "stage3", "why": "unknown", "retrieval_score": 0.3595}, "watermark": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6051}}, "structural": ["duo", "humanoid", "text"], "probe": ["text", "simple_background", "felid", "duo", "blush", "anthro"], "t1": 2.71, "t2": 2.24, "t3": 7.99, "t3s": 3.84, "t3p": 5.53, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=25 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
| 8 |
+
{"id": 2034167, "n_gt": 11, "n_retrieved": 20, "n_selected": 33, "n_implied": 9, "n_structural": 4, "n_probe": 4, "ret_R": 0.5455, "P": 0.3333, "R": 1.0, "F1": 0.5, "leaf_P": 0.3333, "leaf_R": 0.8571, "leaf_F1": 0.48, "n_leaf_sel": 18, "n_leaf_gt": 7, "ret_P": 0.3, "sel_given_ret": 1.8333, "over_sel": 3.0, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 21, "attempts_by_n_local": {"23": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.3333, "gen_R": 1.0, "gen_F1": 0.5, "missed": [], "extra": ["action_pose", "ambiguous_gender", "animal_humanoid", "animated_png", "anthro", "canid_humanoid", "canine_humanoid", "clothed", "clothing", "curved_tail", "eyes", "half-length_portrait", "humanoid", "mammal_humanoid", "nose", "pink_stripes", "pink_tail", "portrait", "pose", "stripes", "tail", "topless"], "ground_truth_tags": ["blue_eyes", "blue_nose", "canid", "canine", "fur", "mammal", "open_mouth", "purple_body", "solo", "white_body", "white_fur"], "selected_tags": ["action_pose", "ambiguous_gender", "animal_humanoid", "animated_png", "anthro", "blue_eyes", "blue_nose", "canid", "canid_humanoid", "canine", "canine_humanoid", "clothed", "clothing", "curved_tail", "eyes", "fur", "half-length_portrait", "humanoid", "mammal", "mammal_humanoid", "nose", "open_mouth", "pink_stripes", "pink_tail", "portrait", "pose", "purple_body", "solo", "stripes", "tail", "topless", "white_body", "white_fur"], "stage3_selected": ["action_pose", "animated_png", "blue_eyes", "blue_nose", "canine_humanoid", "curved_tail", "eyes", "fur", "half-length_portrait", "humanoid", "invalid_background", "nose", "open_mouth", "pink_stripes", "pink_tail", "pose", "purple_body", "simple_background", "stripes", "tail", "white_fur"], "stage3_selected_scores": {"fur": 0.5679, "simple_background": 0.5795, "open_mouth": 0.5861, "tail": 0.5909, "blue_eyes": 0.5832, "white_fur": 0.5785, "humanoid": 0.6719, "stripes": 0.5793, "pose": 0.6, "purple_body": 0.5484, "canine_humanoid": 0.9129, "blue_nose": 0.5927, "half-length_portrait": 0.464, "pink_tail": 0.5172, "action_pose": 0.5954, "pink_stripes": 0.5455, "curved_tail": 0.5963, "nose": 0.7036, "invalid_background": 0.5524, "eyes": 0.7512, "animated_png": 0.4463}, "stage3_selected_ranks": {"fur": 15, "simple_background": 12, "open_mouth": 10, "tail": 9, "blue_eyes": 11, "white_fur": 14, "humanoid": 4, "stripes": 13, "pose": 5, "purple_body": 17, "canine_humanoid": 1, "blue_nose": 8, "half-length_portrait": 22, "pink_tail": 20, "action_pose": 7, "pink_stripes": 18, "curved_tail": 6, "nose": 3, "invalid_background": 16, "eyes": 2, "animated_png": 23}, "stage3_selected_phrase_ranks": {"fur": 1, "simple_background": 1, "open_mouth": 1, "tail": 1, "blue_eyes": 1, "white_fur": 1, "humanoid": 1, "stripes": 1, "pose": 1, "purple_body": 1, "canine_humanoid": 1, "blue_nose": 1, "half-length_portrait": 1, "pink_tail": 1, "action_pose": 1, "pink_stripes": 1, "curved_tail": 1, "nose": 1, "invalid_background": 1, "eyes": 1, "animated_png": 1}, "extra_evidence": {"action_pose": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5954}, "ambiguous_gender": {"source": "structural"}, "animal_humanoid": {"source": "implied"}, "animated_png": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4463}, "anthro": {"source": "structural"}, "canid_humanoid": {"source": "implied"}, "canine_humanoid": {"source": "stage3", "why": "unknown", "retrieval_score": 0.9129}, "clothed": {"source": "implied"}, "clothing": {"source": "implied"}, "curved_tail": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5963}, "eyes": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7512}, "half-length_portrait": {"source": "stage3", "why": "unknown", "retrieval_score": 0.464}, "humanoid": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6719}, "mammal_humanoid": {"source": "implied"}, "nose": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7036}, "pink_stripes": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5455}, "pink_tail": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5172}, "portrait": {"source": "implied"}, "pose": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6}, "stripes": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5793}, "tail": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5909}, "topless": {"source": "structural"}}, "structural": ["solo", "anthro", "ambiguous_gender", "topless"], "probe": ["solo", "simple_background", "canid", "anthro"], "t1": 2.73, "t2": 1.82, "t3": 6.54, "t3s": 1.47, "t3p": 1.91, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=23 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=6"]}
|
| 9 |
+
{"id": 1325009, "n_gt": 22, "n_retrieved": 19, "n_selected": 29, "n_implied": 11, "n_structural": 5, "n_probe": 5, "ret_R": 0.1818, "P": 0.4828, "R": 0.6364, "F1": 0.549, "leaf_P": 0.25, "leaf_R": 0.3333, "leaf_F1": 0.2857, "n_leaf_sel": 16, "n_leaf_gt": 12, "ret_P": 0.2105, "sel_given_ret": 3.5, "over_sel": 1.32, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 10, "attempts_by_n_local": {"21": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.4828, "gen_R": 0.6364, "gen_F1": 0.549, "missed": ["chest_tuft", "countershading", "hand_on_head", "muscular", "muscular_anthro", "muscular_male", "topless", "tuft"], "extra": ["bear", "countershade_body", "fluffy_fur", "forest", "forest_background", "gesture", "looking_at_viewer", "nature", "nature_background", "plant", "raised_hand", "striped_body", "striped_fur", "tree", "white_chest"], "ground_truth_tags": ["anthro", "blue_eyes", "bottomwear", "chest_tuft", "clothed", "clothing", "countershading", "felid", "fur", "hand_on_head", "male", "mammal", "muscular", "muscular_anthro", "muscular_male", "pantherine", "shorts", "solo", "stripes", "tiger", "topless", "tuft"], "selected_tags": ["anthro", "bear", "blue_eyes", "bottomwear", "clothed", "clothing", "countershade_body", "felid", "fluffy_fur", "forest", "forest_background", "fur", "gesture", "looking_at_viewer", "male", "mammal", "nature", "nature_background", "pantherine", "plant", "raised_hand", "shorts", "solo", "striped_body", "striped_fur", "stripes", "tiger", "tree", "white_chest"], "stage3_selected": ["blue_eyes", "countershade_body", "fluffy_fur", "forest_background", "gesture", "raised_hand", "shorts", "striped_fur", "tiger", "white_chest"], "stage3_selected_scores": {"blue_eyes": 0.6084, "shorts": 0.6188, "tiger": 0.6311, "gesture": 0.6237, "striped_fur": 0.6808, "raised_hand": 0.7178, "forest_background": 0.6326, "white_chest": 0.9238, "countershade_body": 0.8643, "fluffy_fur": 0.6859}, "stage3_selected_ranks": {"blue_eyes": 17, "shorts": 16, "tiger": 13, "gesture": 15, "striped_fur": 8, "raised_hand": 6, "forest_background": 12, "white_chest": 2, "countershade_body": 3, "fluffy_fur": 7}, "stage3_selected_phrase_ranks": {"blue_eyes": 1, "shorts": 1, "tiger": 1, "gesture": 1, "striped_fur": 1, "raised_hand": 1, "forest_background": 1, "white_chest": 1, "countershade_body": 1, "fluffy_fur": 1}, "extra_evidence": {"bear": {"source": "probe"}, "countershade_body": {"source": "stage3", "why": "unknown", "retrieval_score": 0.8643}, "fluffy_fur": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6859}, "forest": {"source": "implied"}, "forest_background": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6326}, "gesture": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6237}, "looking_at_viewer": {"source": "structural"}, "nature": {"source": "implied"}, "nature_background": {"source": "implied"}, "plant": {"source": "implied"}, "raised_hand": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7178}, "striped_body": {"source": "implied"}, "striped_fur": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6808}, "tree": {"source": "implied"}, "white_chest": {"source": "stage3", "why": "unknown", "retrieval_score": 0.9238}}, "structural": ["solo", "anthro", "male", "clothed", "looking_at_viewer"], "probe": ["solo", "felid", "clothing", "bear", "anthro"], "t1": 2.03, "t2": 1.98, "t3": 4.75, "t3s": 0.62, "t3p": 1.98, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=21 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=3"]}
|
| 10 |
+
{"id": 3285630, "n_gt": 12, "n_retrieved": 15, "n_selected": 25, "n_implied": 5, "n_structural": 4, "n_probe": 5, "ret_R": 0.25, "P": 0.4, "R": 0.8333, "F1": 0.5405, "leaf_P": 0.4, "leaf_R": 0.6667, "leaf_F1": 0.5, "n_leaf_sel": 15, "n_leaf_gt": 9, "ret_P": 0.2, "sel_given_ret": 3.3333, "over_sel": 2.08, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 15, "attempts_by_n_local": {"18": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.4, "gen_R": 0.8333, "gen_F1": 0.5405, "missed": ["alpha_channel", "fingers"], "extra": ["black_body", "black_fur", "business_attire", "formal", "holding_mug", "holding_object", "mug", "necktie", "shirt", "teal_shirt", "teal_topwear", "text", "topwear", "vest", "white_necktie"], "ground_truth_tags": ["alpha_channel", "anthro", "clothed", "clothing", "felid", "feline", "fingers", "fur", "hair", "male", "mammal", "solo"], "selected_tags": ["anthro", "black_body", "black_fur", "business_attire", "clothed", "clothing", "felid", "feline", "formal", "fur", "hair", "holding_mug", "holding_object", "male", "mammal", "mug", "necktie", "shirt", "solo", "teal_shirt", "teal_topwear", "text", "topwear", "vest", "white_necktie"], "stage3_selected": ["black_fur", "business_attire", "feline", "formal", "fur", "hair", "holding_mug", "invalid_background", "mug", "necktie", "shirt", "simple_background", "teal_shirt", "vest", "white_necktie"], "stage3_selected_scores": {"hair": 0.6803, "fur": 0.7146, "simple_background": 0.6978, "feline": 0.7062, "shirt": 0.7998, "black_fur": 0.7183, "necktie": 0.7314, "vest": 0.8403, "mug": 0.8841, "holding_mug": 0.916, "formal": 0.5993, "business_attire": 0.5558, "teal_shirt": 0.7474, "white_necktie": 0.6418, "invalid_background": 0.6495}, "stage3_selected_ranks": {"hair": 12, "fur": 9, "simple_background": 11, "feline": 10, "shirt": 5, "black_fur": 8, "necktie": 7, "vest": 3, "mug": 2, "holding_mug": 1, "formal": 16, "business_attire": 18, "teal_shirt": 6, "white_necktie": 14, "invalid_background": 13}, "stage3_selected_phrase_ranks": {"hair": 1, "fur": 1, "simple_background": 1, "feline": 1, "shirt": 1, "black_fur": 1, "necktie": 1, "vest": 1, "mug": 1, "holding_mug": 1, "formal": 1, "business_attire": 1, "teal_shirt": 1, "white_necktie": 1, "invalid_background": 1}, "extra_evidence": {"black_body": {"source": "implied"}, "black_fur": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7183}, "business_attire": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5558}, "formal": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5993}, "holding_mug": {"source": "stage3", "why": "unknown", "retrieval_score": 0.916}, "holding_object": {"source": "implied"}, "mug": {"source": "stage3", "why": "unknown", "retrieval_score": 0.8841}, "necktie": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7314}, "shirt": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7998}, "teal_shirt": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7474}, "teal_topwear": {"source": "implied"}, "text": {"source": "probe"}, "topwear": {"source": "implied"}, "vest": {"source": "stage3", "why": "unknown", "retrieval_score": 0.8403}, "white_necktie": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6418}}, "structural": ["solo", "anthro", "male", "clothed"], "probe": ["text", "solo", "felid", "clothing", "anthro"], "t1": 2.8, "t2": 1.62, "t3": 0.98, "t3s": 0.7, "t3p": 1.22, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=18 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
| 11 |
+
{"id": 260449, "n_gt": 14, "n_retrieved": 18, "n_selected": 26, "n_implied": 5, "n_structural": 7, "n_probe": 5, "ret_R": 0.5, "P": 0.4615, "R": 0.8571, "F1": 0.6, "leaf_P": 0.3333, "leaf_R": 0.6, "leaf_F1": 0.4286, "n_leaf_sel": 18, "n_leaf_gt": 10, "ret_P": 0.3889, "sel_given_ret": 1.7143, "over_sel": 1.86, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 15, "attempts_by_n_local": {"21": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.4615, "gen_R": 0.8571, "gen_F1": 0.6, "missed": ["fur", "human"], "extra": ["anthro", "bottomwear", "cheeky", "duo", "feral", "grin", "laugh", "loincloth", "raised_arm", "raised_arms", "smile", "topless", "trio", "wide_grin"], "ground_truth_tags": ["ape", "bear", "clothed", "clothing", "dancing", "fur", "group", "hair", "haplorhine", "human", "looking_at_viewer", "male", "mammal", "primate"], "selected_tags": ["anthro", "ape", "bear", "bottomwear", "cheeky", "clothed", "clothing", "dancing", "duo", "feral", "grin", "group", "hair", "haplorhine", "laugh", "loincloth", "looking_at_viewer", "male", "mammal", "primate", "raised_arm", "raised_arms", "smile", "topless", "trio", "wide_grin"], "stage3_selected": ["ape", "bear", "cheeky", "dancing", "grin", "hair", "laugh", "loincloth", "looking_at_viewer", "male", "primate", "raised_arm", "raised_arms", "simple_background", "wide_grin"], "stage3_selected_scores": {"male": 0.5604, "hair": 0.5445, "simple_background": 0.5491, "looking_at_viewer": 0.5475, "bear": 0.5735, "grin": 0.5653, "raised_arm": 0.421, "primate": 0.8905, "loincloth": 0.5685, "dancing": 0.5568, "laugh": 0.5259, "ape": 0.9767, "raised_arms": 0.5445, "cheeky": 0.3903, "wide_grin": 0.5267}, "stage3_selected_ranks": {"male": 6, "hair": 11, "simple_background": 8, "looking_at_viewer": 9, "bear": 3, "grin": 5, "raised_arm": 18, "primate": 2, "loincloth": 4, "dancing": 7, "laugh": 13, "ape": 1, "raised_arms": 10, "cheeky": 20, "wide_grin": 12}, "stage3_selected_phrase_ranks": {"male": 1, "hair": 1, "simple_background": 1, "looking_at_viewer": 1, "bear": 1, "grin": 1, "raised_arm": 1, "primate": 1, "loincloth": 1, "dancing": 1, "laugh": 1, "ape": 1, "raised_arms": 1, "cheeky": 1, "wide_grin": 1}, "extra_evidence": {"anthro": {"source": "structural"}, "bottomwear": {"source": "implied"}, "cheeky": {"source": "stage3", "why": "unknown", "retrieval_score": 0.3903}, "duo": {"source": "probe"}, "feral": {"source": "structural"}, "grin": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5653}, "laugh": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5259}, "loincloth": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5685}, "raised_arm": {"source": "stage3", "why": "unknown", "retrieval_score": 0.421}, "raised_arms": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5445}, "smile": {"source": "implied"}, "topless": {"source": "structural"}, "trio": {"source": "structural"}, "wide_grin": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5267}}, "structural": ["trio", "anthro", "feral", "male", "clothed", "topless", "looking_at_viewer"], "probe": ["simple_background", "group", "duo", "bear", "anthro"], "t1": 2.25, "t2": 1.84, "t3": 4.03, "t3s": 1.91, "t3p": 2.25, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=21 entity=0 copyright_filtered=0 generic_char_to_general=1 unknown_type=2"]}
|
data/eval_results/k_sweep_explicit_no_why_seed42_k1.jsonl
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"_meta": true, "timestamp": "2026-03-03T05:54:21.251706", "n_samples": 10, "caption_field": "caption_cogvlm", "skip_rewrite": false, "allow_nsfw": false, "mode": "chunked_map_union", "chunk_size": 60, "eval_path": "data\\eval_samples\\e621_sfw_sample_1000_seed123_buffer10000_caption_evident.jsonl", "per_phrase_k": 2, "per_phrase_final_k": 1, "temperature": 0.0, "shuffle": true, "seed": 42, "workers": 1, "min_why": "strong_implied", "expand_implications": true, "infer_structural": true, "infer_probe": true, "n_errors": 0, "n_issue_samples": 10, "n_issues_total": 20}
|
| 2 |
+
{"id": 17482, "n_gt": 22, "n_retrieved": 20, "n_selected": 36, "n_implied": 18, "n_structural": 4, "n_probe": 5, "ret_R": 0.2727, "P": 0.4444, "R": 0.7273, "F1": 0.5517, "leaf_P": 0.3333, "leaf_R": 0.3846, "leaf_F1": 0.3571, "n_leaf_sel": 15, "n_leaf_gt": 13, "ret_P": 0.3, "sel_given_ret": 2.6667, "over_sel": 1.64, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 11, "attempts_by_n_local": {"22": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.4444, "gen_R": 0.7273, "gen_F1": 0.5517, "missed": ["bass_guitar", "fingers", "fur", "holding_musical_instrument", "holding_object", "music"], "extra": ["action_pose", "atmosphere", "bear", "bottomwear", "canis", "denim", "denim_clothing", "domestic_dog", "flowing_hair", "jeans", "looking_at_viewer", "pants", "pastel_background", "playing_guitar", "playing_music", "pose", "torn_bottomwear", "torn_jeans", "torn_pants", "unknown_species"], "ground_truth_tags": ["anthro", "bass_guitar", "canid", "canine", "claws", "clothed", "clothing", "fingers", "fur", "guitar", "hair", "holding_musical_instrument", "holding_object", "mammal", "music", "musical_instrument", "plucked_string_instrument", "solo", "spade_tail", "string_instrument", "tail", "torn_clothing"], "selected_tags": ["action_pose", "anthro", "atmosphere", "bear", "bottomwear", "canid", "canine", "canis", "claws", "clothed", "clothing", "denim", "denim_clothing", "domestic_dog", "flowing_hair", "guitar", "hair", "jeans", "looking_at_viewer", "mammal", "musical_instrument", "pants", "pastel_background", "playing_guitar", "playing_music", "plucked_string_instrument", "pose", "solo", "spade_tail", "string_instrument", "tail", "torn_bottomwear", "torn_clothing", "torn_jeans", "torn_pants", "unknown_species"], "stage3_selected": ["action_pose", "atmosphere", "claws", "domestic_dog", "flowing_hair", "guitar", "pastel_background", "playing_guitar", "spade_tail", "torn_jeans", "unknown_species"], "stage3_selected_scores": {"claws": 0.5694, "domestic_dog": 0.5598, "unknown_species": 0.5792, "spade_tail": 0.6166, "guitar": 0.9627, "action_pose": 0.5829, "playing_guitar": 0.9312, "torn_jeans": 0.4829, "flowing_hair": 0.5661, "atmosphere": 0.5022, "pastel_background": 0.5696}, "stage3_selected_ranks": {"claws": 10, "domestic_dog": 14, "unknown_species": 6, "spade_tail": 3, "guitar": 1, "action_pose": 5, "playing_guitar": 2, "torn_jeans": 18, "flowing_hair": 12, "atmosphere": 17, "pastel_background": 9}, "stage3_selected_phrase_ranks": {"claws": 1, "domestic_dog": 1, "unknown_species": 1, "spade_tail": 1, "guitar": 1, "action_pose": 1, "playing_guitar": 1, "torn_jeans": 1, "flowing_hair": 1, "atmosphere": 1, "pastel_background": 1}, "extra_evidence": {"action_pose": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5829}, "atmosphere": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5022}, "bear": {"source": "probe"}, "bottomwear": {"source": "implied"}, "canis": {"source": "implied"}, "denim": {"source": "implied"}, "denim_clothing": {"source": "implied"}, "domestic_dog": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5598}, "flowing_hair": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5661}, "jeans": {"source": "implied"}, "looking_at_viewer": {"source": "structural"}, "pants": {"source": "implied"}, "pastel_background": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5696}, "playing_guitar": {"source": "stage3", "why": "unknown", "retrieval_score": 0.9312}, "playing_music": {"source": "implied"}, "pose": {"source": "implied"}, "torn_bottomwear": {"source": "implied"}, "torn_jeans": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4829}, "torn_pants": {"source": "implied"}, "unknown_species": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5792}}, "structural": ["solo", "anthro", "clothed", "looking_at_viewer"], "probe": ["solo", "clothing", "canid", "bear", "anthro"], "t1": 2.61, "t2": 3.15, "t3": 3.19, "t3s": 3.3, "t3p": 3.23, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=22 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
| 3 |
+
{"id": 1624724, "n_gt": 4, "n_retrieved": 11, "n_selected": 12, "n_implied": 0, "n_structural": 4, "n_probe": 4, "ret_R": 0.75, "P": 0.25, "R": 0.75, "F1": 0.375, "leaf_P": 0.25, "leaf_R": 0.75, "leaf_F1": 0.375, "n_leaf_sel": 12, "n_leaf_gt": 4, "ret_P": 0.2727, "sel_given_ret": 1.0, "over_sel": 3.0, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 7, "attempts_by_n_local": {"14": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.25, "gen_R": 0.75, "gen_F1": 0.375, "missed": ["smile"], "extra": ["<3", "ambiguous_gender", "anthro", "big_eyes", "cartoon", "feral", "floating", "nude", "spots"], "ground_truth_tags": ["red_nose", "smile", "solo", "tan_body"], "selected_tags": ["<3", "ambiguous_gender", "anthro", "big_eyes", "cartoon", "feral", "floating", "nude", "red_nose", "solo", "spots", "tan_body"], "stage3_selected": ["big_eyes", "cartoon", "floating", "red_nose", "spots", "tan_body", "white_background"], "stage3_selected_scores": {"white_background": 0.6138, "tan_body": 0.6627, "spots": 0.6272, "big_eyes": 0.696, "red_nose": 0.7501, "floating": 0.6519, "cartoon": 0.5003}, "stage3_selected_ranks": {"white_background": 9, "tan_body": 5, "spots": 8, "big_eyes": 4, "red_nose": 3, "floating": 6, "cartoon": 13}, "stage3_selected_phrase_ranks": {"white_background": 1, "tan_body": 1, "spots": 1, "big_eyes": 1, "red_nose": 1, "floating": 1, "cartoon": 1}, "extra_evidence": {"<3": {"source": "probe"}, "ambiguous_gender": {"source": "structural"}, "anthro": {"source": "probe"}, "big_eyes": {"source": "stage3", "why": "unknown", "retrieval_score": 0.696}, "cartoon": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5003}, "feral": {"source": "structural"}, "floating": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6519}, "nude": {"source": "structural"}, "spots": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6272}}, "structural": ["solo", "feral", "ambiguous_gender", "nude"], "probe": ["solo", "simple_background", "anthro", "<3"], "t1": 4.56, "t2": 1.12, "t3": 2.0, "t3s": 1.48, "t3p": 2.52, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=14 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=5"]}
|
| 4 |
+
{"id": 1078019, "n_gt": 14, "n_retrieved": 13, "n_selected": 16, "n_implied": 5, "n_structural": 4, "n_probe": 4, "ret_R": 0.2857, "P": 0.6875, "R": 0.7857, "F1": 0.7333, "leaf_P": 0.6364, "leaf_R": 0.7778, "leaf_F1": 0.7, "n_leaf_sel": 11, "n_leaf_gt": 9, "ret_P": 0.3077, "sel_given_ret": 2.75, "over_sel": 1.14, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 5, "attempts_by_n_local": {"14": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.6875, "gen_R": 0.7857, "gen_F1": 0.7333, "missed": ["blue_eyes", "romantic", "romantic_couple"], "extra": ["<3", "coat", "looking_at_viewer", "round_eyes", "topwear"], "ground_truth_tags": ["anthro", "blue_eyes", "blush", "clothed", "clothing", "duo", "lagomorph", "leporid", "mammal", "plushie", "rabbit", "romantic", "romantic_couple", "teal_eyes"], "selected_tags": ["<3", "anthro", "blush", "clothed", "clothing", "coat", "duo", "lagomorph", "leporid", "looking_at_viewer", "mammal", "plushie", "rabbit", "round_eyes", "teal_eyes", "topwear"], "stage3_selected": ["coat", "plushie", "rabbit", "round_eyes", "teal_eyes"], "stage3_selected_scores": {"rabbit": 0.5842, "coat": 0.6315, "plushie": 0.6566, "teal_eyes": 0.6344, "round_eyes": 0.4982}, "stage3_selected_ranks": {"rabbit": 8, "coat": 5, "plushie": 3, "teal_eyes": 4, "round_eyes": 14}, "stage3_selected_phrase_ranks": {"rabbit": 1, "coat": 1, "plushie": 1, "teal_eyes": 1, "round_eyes": 1}, "extra_evidence": {"<3": {"source": "probe"}, "coat": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6315}, "looking_at_viewer": {"source": "structural"}, "round_eyes": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4982}, "topwear": {"source": "implied"}}, "structural": ["duo", "anthro", "clothed", "looking_at_viewer"], "probe": ["duo", "blush", "anthro", "<3"], "t1": 3.2, "t2": 1.43, "t3": 2.44, "t3s": 1.9, "t3p": 3.27, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=14 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
| 5 |
+
{"id": 2021552, "n_gt": 25, "n_retrieved": 17, "n_selected": 30, "n_implied": 11, "n_structural": 4, "n_probe": 5, "ret_R": 0.48, "P": 0.7, "R": 0.84, "F1": 0.7636, "leaf_P": 0.6875, "leaf_R": 0.7333, "leaf_F1": 0.7097, "n_leaf_sel": 16, "n_leaf_gt": 15, "ret_P": 0.7059, "sel_given_ret": 1.75, "over_sel": 1.2, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 13, "attempts_by_n_local": {"19": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.7, "gen_R": 0.84, "gen_F1": 0.7636, "missed": ["canid", "canine", "fox", "looking_at_another"], "extra": ["black_bottomwear", "black_clothing", "black_pants", "looking_at_viewer", "open_mouth", "text", "white_clothing", "white_shirt", "white_topwear"], "ground_truth_tags": ["anthro", "bottomwear", "canid", "canine", "claws", "clothed", "clothing", "crossed_arms", "duo", "facial_markings", "fox", "fur", "grey_background", "head_markings", "lagomorph", "leporid", "looking_at_another", "mammal", "markings", "overalls", "pants", "rabbit", "shirt", "standing", "topwear"], "selected_tags": ["anthro", "black_bottomwear", "black_clothing", "black_pants", "bottomwear", "claws", "clothed", "clothing", "crossed_arms", "duo", "facial_markings", "fur", "grey_background", "head_markings", "lagomorph", "leporid", "looking_at_viewer", "mammal", "markings", "open_mouth", "overalls", "pants", "rabbit", "shirt", "standing", "text", "topwear", "white_clothing", "white_shirt", "white_topwear"], "stage3_selected": ["black_pants", "claws", "crossed_arms", "facial_markings", "fur", "grey_background", "open_mouth", "overalls", "pants", "rabbit", "shirt", "standing", "white_shirt"], "stage3_selected_scores": {"fur": 0.6532, "open_mouth": 0.6331, "claws": 0.6304, "standing": 0.6879, "shirt": 0.7484, "rabbit": 0.6511, "pants": 0.759, "grey_background": 0.6785, "facial_markings": 0.6946, "crossed_arms": 0.7286, "white_shirt": 0.8198, "overalls": 0.8776, "black_pants": 0.8331}, "stage3_selected_ranks": {"fur": 12, "open_mouth": 15, "claws": 16, "standing": 9, "shirt": 6, "rabbit": 13, "pants": 5, "grey_background": 11, "facial_markings": 8, "crossed_arms": 7, "white_shirt": 4, "overalls": 2, "black_pants": 3}, "stage3_selected_phrase_ranks": {"fur": 1, "open_mouth": 1, "claws": 1, "standing": 1, "shirt": 1, "rabbit": 1, "pants": 1, "grey_background": 1, "facial_markings": 1, "crossed_arms": 1, "white_shirt": 1, "overalls": 1, "black_pants": 1}, "extra_evidence": {"black_bottomwear": {"source": "implied"}, "black_clothing": {"source": "implied"}, "black_pants": {"source": "stage3", "why": "unknown", "retrieval_score": 0.8331}, "looking_at_viewer": {"source": "structural"}, "open_mouth": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6331}, "text": {"source": "probe"}, "white_clothing": {"source": "implied"}, "white_shirt": {"source": "stage3", "why": "unknown", "retrieval_score": 0.8198}, "white_topwear": {"source": "implied"}}, "structural": ["duo", "anthro", "clothed", "looking_at_viewer"], "probe": ["text", "simple_background", "duo", "clothing", "anthro"], "t1": 1.89, "t2": 1.57, "t3": 2.15, "t3s": 1.02, "t3p": 0.8, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=19 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
| 6 |
+
{"id": 1023509, "n_gt": 13, "n_retrieved": 22, "n_selected": 36, "n_implied": 7, "n_structural": 6, "n_probe": 5, "ret_R": 0.2308, "P": 0.25, "R": 0.6923, "F1": 0.3673, "leaf_P": 0.1429, "leaf_R": 0.6667, "leaf_F1": 0.2353, "n_leaf_sel": 28, "n_leaf_gt": 6, "ret_P": 0.1364, "sel_given_ret": 3.0, "over_sel": 2.77, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 20, "attempts_by_n_local": {"22": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 1, "char_F1": 0.0, "gen_P": 0.2571, "gen_R": 0.6923, "gen_F1": 0.375, "missed": ["dialogue", "fur", "white_body", "white_fur"], "extra": ["<3", "anthro", "bear", "bubble", "cjk_character", "clothed", "clothing", "darkness", "empty_speech_bubble", "epaulet", "felid", "flask", "group", "intersex", "light", "lying_on_ground", "model_sheet", "solo", "speech_bubble", "standing", "standing_over", "taur", "topwear", "unknown_species", "unnamed_character", "vest", "waist"], "ground_truth_tags": ["bovid", "caprine", "dialogue", "fur", "goat", "human", "lizard", "mammal", "reptile", "scalie", "text", "white_body", "white_fur"], "selected_tags": ["<3", "anthro", "bear", "bovid", "bubble", "caprine", "cjk_character", "clothed", "clothing", "darkness", "empty_speech_bubble", "epaulet", "felid", "flask", "goat", "group", "human", "intersex", "light", "lizard", "lying_on_ground", "mammal", "model_sheet", "reptile", "scalie", "solo", "speech_bubble", "standing", "standing_over", "taur", "text", "topwear", "unknown_species", "unnamed_character", "vest", "waist"], "stage3_selected": ["bubble", "cjk_character", "darkness", "empty_speech_bubble", "epaulet", "flask", "goat", "group", "human", "light", "lizard", "lying_on_ground", "model_sheet", "speech_bubble", "standing", "standing_over", "unknown_species", "unnamed_character", "vest", "waist"], "stage3_selected_scores": {"group": 0.4649, "standing": 0.579, "human": 0.5558, "speech_bubble": 0.567, "lizard": 0.5896, "goat": 0.5748, "light": 0.5716, "model_sheet": 0.4033, "unknown_species": 0.5842, "vest": 0.39, "bubble": 0.5665, "lying_on_ground": 0.583, "darkness": 0.5867, "flask": 0.3707, "standing_over": 0.4257, "unnamed_character": 0.4123, "waist": 0.7395, "empty_speech_bubble": 0.39, "epaulet": 0.3917, "cjk_character": 0.4178}, "stage3_selected_ranks": {"group": 12, "standing": 6, "human": 11, "speech_bubble": 9, "lizard": 2, "goat": 7, "light": 8, "model_sheet": 16, "unknown_species": 4, "vest": 18, "bubble": 10, "lying_on_ground": 5, "darkness": 3, "flask": 20, "standing_over": 13, "unnamed_character": 15, "waist": 1, "empty_speech_bubble": 19, "epaulet": 17, "cjk_character": 14}, "stage3_selected_phrase_ranks": {"group": 1, "standing": 1, "human": 1, "speech_bubble": 1, "lizard": 1, "goat": 1, "light": 1, "model_sheet": 1, "unknown_species": 1, "vest": 1, "bubble": 1, "lying_on_ground": 1, "darkness": 1, "flask": 1, "standing_over": 1, "unnamed_character": 1, "waist": 1, "empty_speech_bubble": 1, "epaulet": 1, "cjk_character": 1}, "extra_evidence": {"<3": {"source": "probe"}, "anthro": {"source": "structural"}, "bear": {"source": "probe"}, "bubble": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5665}, "cjk_character": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4178}, "clothed": {"source": "structural"}, "clothing": {"source": "implied"}, "darkness": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5867}, "empty_speech_bubble": {"source": "stage3", "why": "unknown", "retrieval_score": 0.39}, "epaulet": {"source": "stage3", "why": "unknown", "retrieval_score": 0.3917}, "felid": {"source": "probe"}, "flask": {"source": "stage3", "why": "unknown", "retrieval_score": 0.3707}, "group": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4649}, "intersex": {"source": "structural"}, "light": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5716}, "lying_on_ground": {"source": "stage3", "why": "unknown", "retrieval_score": 0.583}, "model_sheet": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4033}, "solo": {"source": "structural"}, "speech_bubble": {"source": "stage3", "why": "unknown", "retrieval_score": 0.567}, "standing": {"source": "stage3", "why": "unknown", "retrieval_score": 0.579}, "standing_over": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4257}, "taur": {"source": "structural"}, "topwear": {"source": "implied"}, "unknown_species": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5842}, "unnamed_character": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4123}, "vest": {"source": "stage3", "why": "unknown", "retrieval_score": 0.39}, "waist": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7395}}, "structural": ["solo", "anthro", "taur", "intersex", "clothed", "text"], "probe": ["group", "felid", "bear", "anthro", "<3"], "t1": 1.37, "t2": 2.0, "t3": 2.92, "t3s": 1.37, "t3p": 1.37, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=22 entity=0 copyright_filtered=0 generic_char_to_general=1 unknown_type=1"]}
|
| 7 |
+
{"id": 335343, "n_gt": 15, "n_retrieved": 22, "n_selected": 27, "n_implied": 3, "n_structural": 3, "n_probe": 1, "ret_R": 0.5333, "P": 0.3333, "R": 0.6, "F1": 0.4286, "leaf_P": 0.3182, "leaf_R": 0.5833, "leaf_F1": 0.4118, "n_leaf_sel": 22, "n_leaf_gt": 12, "ret_P": 0.3636, "sel_given_ret": 1.125, "over_sel": 1.8, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 22, "attempts_by_n_local": {"23": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.3333, "gen_R": 0.6, "gen_F1": 0.4286, "missed": ["angry", "bed", "eyes_closed", "eyeshadow", "furniture", "lying"], "extra": ["bedroom", "distracting_watermark", "eyes", "felid", "font", "hand_on_own_chest", "humanoid", "mammal", "membrane_(anatomy)", "palette", "playful", "purple_membrane", "resting", "romantic", "romantic_ambiance", "sleepover", "stats", "watermark"], "ground_truth_tags": ["angry", "bed", "blonde_hair", "blue_eyes", "duo", "eyes_closed", "eyeshadow", "furniture", "green_eyes", "hair", "lying", "makeup", "purple_hair", "sleeping", "text"], "selected_tags": ["bedroom", "blonde_hair", "blue_eyes", "distracting_watermark", "duo", "eyes", "felid", "font", "green_eyes", "hair", "hand_on_own_chest", "humanoid", "makeup", "mammal", "membrane_(anatomy)", "palette", "playful", "purple_hair", "purple_membrane", "resting", "romantic", "romantic_ambiance", "sleeping", "sleepover", "stats", "text", "watermark"], "stage3_selected": ["bedroom", "blonde_hair", "blue_eyes", "distracting_watermark", "eyes", "font", "green_eyes", "hair", "hand_on_own_chest", "invalid_tag", "makeup", "palette", "playful", "purple_hair", "purple_membrane", "resting", "romantic_ambiance", "sleeping", "sleepover", "stats", "text", "watermark"], "stage3_selected_scores": {"hair": 0.6037, "text": 0.6013, "blue_eyes": 0.6019, "green_eyes": 0.5995, "blonde_hair": 0.5991, "purple_hair": 0.5644, "makeup": 0.5969, "watermark": 0.6047, "bedroom": 0.4906, "sleeping": 0.6033, "romantic_ambiance": 0.4808, "distracting_watermark": 0.4788, "playful": 0.447, "resting": 0.5149, "invalid_tag": 0.5594, "stats": 0.5066, "palette": 0.6688, "sleepover": 0.3804, "purple_membrane": 0.579, "hand_on_own_chest": 0.5253, "font": 0.5303, "eyes": 0.895}, "stage3_selected_ranks": {"hair": 5, "text": 8, "blue_eyes": 7, "green_eyes": 9, "blonde_hair": 10, "purple_hair": 13, "makeup": 11, "watermark": 4, "bedroom": 19, "sleeping": 6, "romantic_ambiance": 20, "distracting_watermark": 21, "playful": 22, "resting": 17, "invalid_tag": 14, "stats": 18, "palette": 3, "sleepover": 23, "purple_membrane": 12, "hand_on_own_chest": 16, "font": 15, "eyes": 1}, "stage3_selected_phrase_ranks": {"hair": 1, "text": 1, "blue_eyes": 1, "green_eyes": 1, "blonde_hair": 1, "purple_hair": 1, "makeup": 1, "watermark": 1, "bedroom": 1, "sleeping": 1, "romantic_ambiance": 1, "distracting_watermark": 1, "playful": 1, "resting": 1, "invalid_tag": 1, "stats": 1, "palette": 1, "sleepover": 1, "purple_membrane": 1, "hand_on_own_chest": 1, "font": 1, "eyes": 1}, "extra_evidence": {"bedroom": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4906}, "distracting_watermark": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4788}, "eyes": {"source": "stage3", "why": "unknown", "retrieval_score": 0.895}, "felid": {"source": "probe"}, "font": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5303}, "hand_on_own_chest": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5253}, "humanoid": {"source": "structural"}, "mammal": {"source": "implied"}, "membrane_(anatomy)": {"source": "implied"}, "palette": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6688}, "playful": {"source": "stage3", "why": "unknown", "retrieval_score": 0.447}, "purple_membrane": {"source": "stage3", "why": "unknown", "retrieval_score": 0.579}, "resting": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5149}, "romantic": {"source": "implied"}, "romantic_ambiance": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4808}, "sleepover": {"source": "stage3", "why": "unknown", "retrieval_score": 0.3804}, "stats": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5066}, "watermark": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6047}}, "structural": ["duo", "humanoid", "text"], "probe": ["felid"], "t1": 1.83, "t2": 2.0, "t3": 1.38, "t3s": 3.32, "t3p": 1.84, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=23 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
| 8 |
+
{"id": 2034167, "n_gt": 11, "n_retrieved": 21, "n_selected": 19, "n_implied": 7, "n_structural": 4, "n_probe": 5, "ret_R": 0.5455, "P": 0.3158, "R": 0.5455, "F1": 0.4, "leaf_P": 0.2, "leaf_R": 0.2857, "leaf_F1": 0.2353, "n_leaf_sel": 10, "n_leaf_gt": 7, "ret_P": 0.2857, "sel_given_ret": 1.0, "over_sel": 1.73, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 7, "attempts_by_n_local": {"24": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.3158, "gen_R": 0.5455, "gen_F1": 0.4, "missed": ["blue_eyes", "blue_nose", "open_mouth", "white_body", "white_fur"], "extra": ["animal_humanoid", "anthro", "canid_humanoid", "canine_humanoid", "clothed", "clothing", "curved_tail", "humanoid", "male", "mammal_humanoid", "pink_stripes", "stripes", "tail"], "ground_truth_tags": ["blue_eyes", "blue_nose", "canid", "canine", "fur", "mammal", "open_mouth", "purple_body", "solo", "white_body", "white_fur"], "selected_tags": ["animal_humanoid", "anthro", "canid", "canid_humanoid", "canine", "canine_humanoid", "clothed", "clothing", "curved_tail", "fur", "humanoid", "male", "mammal", "mammal_humanoid", "pink_stripes", "purple_body", "solo", "stripes", "tail"], "stage3_selected": ["canine_humanoid", "curved_tail", "fur", "pink_stripes", "purple_body", "simple_background", "tail"], "stage3_selected_scores": {"fur": 0.5666, "simple_background": 0.5782, "tail": 0.5897, "purple_body": 0.5476, "canine_humanoid": 0.9128, "pink_stripes": 0.5444, "curved_tail": 0.5958}, "stage3_selected_ranks": {"fur": 16, "simple_background": 13, "tail": 9, "purple_body": 18, "canine_humanoid": 1, "pink_stripes": 19, "curved_tail": 6}, "stage3_selected_phrase_ranks": {"fur": 1, "simple_background": 1, "tail": 1, "purple_body": 1, "canine_humanoid": 1, "pink_stripes": 1, "curved_tail": 1}, "extra_evidence": {"animal_humanoid": {"source": "implied"}, "anthro": {"source": "structural"}, "canid_humanoid": {"source": "implied"}, "canine_humanoid": {"source": "stage3", "why": "unknown", "retrieval_score": 0.9128}, "clothed": {"source": "structural"}, "clothing": {"source": "probe"}, "curved_tail": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5958}, "humanoid": {"source": "implied"}, "male": {"source": "structural"}, "mammal_humanoid": {"source": "implied"}, "pink_stripes": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5444}, "stripes": {"source": "implied"}, "tail": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5897}}, "structural": ["solo", "anthro", "male", "clothed"], "probe": ["solo", "simple_background", "clothing", "canid", "anthro"], "t1": 2.01, "t2": 1.78, "t3": 1.92, "t3s": 1.67, "t3p": 2.98, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=24 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=4"]}
|
| 9 |
+
{"id": 1325009, "n_gt": 22, "n_retrieved": 18, "n_selected": 20, "n_implied": 4, "n_structural": 4, "n_probe": 5, "ret_R": 0.2273, "P": 0.65, "R": 0.5909, "F1": 0.619, "leaf_P": 0.2857, "leaf_R": 0.3333, "leaf_F1": 0.3077, "n_leaf_sel": 14, "n_leaf_gt": 12, "ret_P": 0.2778, "sel_given_ret": 2.6, "over_sel": 0.91, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 9, "attempts_by_n_local": {"18": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.65, "gen_R": 0.5909, "gen_F1": 0.619, "missed": ["chest_tuft", "countershading", "muscular", "muscular_anthro", "muscular_male", "pantherine", "tiger", "topless", "tuft"], "extra": ["bear", "countershade_body", "fluffy_fur", "pose", "striped_body", "striped_fur", "white_chest"], "ground_truth_tags": ["anthro", "blue_eyes", "bottomwear", "chest_tuft", "clothed", "clothing", "countershading", "felid", "fur", "hand_on_head", "male", "mammal", "muscular", "muscular_anthro", "muscular_male", "pantherine", "shorts", "solo", "stripes", "tiger", "topless", "tuft"], "selected_tags": ["anthro", "bear", "blue_eyes", "bottomwear", "clothed", "clothing", "countershade_body", "felid", "fluffy_fur", "fur", "hand_on_head", "male", "mammal", "pose", "shorts", "solo", "striped_body", "striped_fur", "stripes", "white_chest"], "stage3_selected": ["blue_eyes", "countershade_body", "fluffy_fur", "fur", "hand_on_head", "pose", "shorts", "striped_fur", "white_chest"], "stage3_selected_scores": {"fur": 0.5941, "blue_eyes": 0.5774, "pose": 0.6303, "shorts": 0.5899, "striped_fur": 0.6464, "hand_on_head": 0.6005, "white_chest": 0.9168, "countershade_body": 0.872, "fluffy_fur": 0.6674}, "stage3_selected_ranks": {"fur": 12, "blue_eyes": 14, "pose": 9, "shorts": 13, "striped_fur": 8, "hand_on_head": 11, "white_chest": 2, "countershade_body": 3, "fluffy_fur": 6}, "stage3_selected_phrase_ranks": {"fur": 1, "blue_eyes": 1, "pose": 1, "shorts": 1, "striped_fur": 1, "hand_on_head": 1, "white_chest": 1, "countershade_body": 1, "fluffy_fur": 1}, "extra_evidence": {"bear": {"source": "probe"}, "countershade_body": {"source": "stage3", "why": "unknown", "retrieval_score": 0.872}, "fluffy_fur": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6674}, "pose": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6303}, "striped_body": {"source": "implied"}, "striped_fur": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6464}, "white_chest": {"source": "stage3", "why": "unknown", "retrieval_score": 0.9168}}, "structural": ["solo", "anthro", "male", "clothed"], "probe": ["solo", "felid", "clothing", "bear", "anthro"], "t1": 1.95, "t2": 1.54, "t3": 0.82, "t3s": 1.63, "t3p": 1.99, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=18 entity=0 copyright_filtered=1 generic_char_to_general=0 unknown_type=2"]}
|
| 10 |
+
{"id": 3285630, "n_gt": 12, "n_retrieved": 16, "n_selected": 29, "n_implied": 8, "n_structural": 4, "n_probe": 6, "ret_R": 0.1667, "P": 0.3448, "R": 0.8333, "F1": 0.4878, "leaf_P": 0.3333, "leaf_R": 0.5556, "leaf_F1": 0.4167, "n_leaf_sel": 15, "n_leaf_gt": 9, "ret_P": 0.125, "sel_given_ret": 5.0, "over_sel": 2.42, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 15, "attempts_by_n_local": {"19": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.3448, "gen_R": 0.8333, "gen_F1": 0.4878, "missed": ["alpha_channel", "fingers"], "extra": ["black_body", "black_fur", "brown_clothing", "brown_topwear", "brown_vest", "business_attire", "formal", "hair_bun", "holding_mug", "holding_object", "mug", "necktie", "shirt", "teal_shirt", "teal_topwear", "text", "topwear", "vest", "white_necktie"], "ground_truth_tags": ["alpha_channel", "anthro", "clothed", "clothing", "felid", "feline", "fingers", "fur", "hair", "male", "mammal", "solo"], "selected_tags": ["anthro", "black_body", "black_fur", "brown_clothing", "brown_topwear", "brown_vest", "business_attire", "clothed", "clothing", "felid", "feline", "formal", "fur", "hair", "hair_bun", "holding_mug", "holding_object", "male", "mammal", "mug", "necktie", "shirt", "solo", "teal_shirt", "teal_topwear", "text", "topwear", "vest", "white_necktie"], "stage3_selected": ["black_fur", "brown_vest", "business_attire", "feline", "formal", "fur", "hair_bun", "holding_mug", "mug", "necktie", "shirt", "simple_background", "teal_shirt", "vest", "white_necktie"], "stage3_selected_scores": {"fur": 0.7147, "simple_background": 0.6978, "feline": 0.7062, "shirt": 0.7998, "black_fur": 0.7183, "necktie": 0.7314, "vest": 0.8404, "hair_bun": 0.6926, "mug": 0.8841, "holding_mug": 0.916, "formal": 0.5993, "business_attire": 0.5558, "brown_vest": 0.8153, "teal_shirt": 0.7475, "white_necktie": 0.6418}, "stage3_selected_ranks": {"fur": 9, "simple_background": 11, "feline": 10, "shirt": 5, "black_fur": 8, "necktie": 7, "vest": 3, "hair_bun": 12, "mug": 2, "holding_mug": 1, "formal": 16, "business_attire": 18, "brown_vest": 4, "teal_shirt": 6, "white_necktie": 14}, "stage3_selected_phrase_ranks": {"fur": 1, "simple_background": 1, "feline": 1, "shirt": 1, "black_fur": 1, "necktie": 1, "vest": 1, "hair_bun": 1, "mug": 1, "holding_mug": 1, "formal": 1, "business_attire": 1, "brown_vest": 1, "teal_shirt": 1, "white_necktie": 1}, "extra_evidence": {"black_body": {"source": "implied"}, "black_fur": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7183}, "brown_clothing": {"source": "implied"}, "brown_topwear": {"source": "implied"}, "brown_vest": {"source": "stage3", "why": "unknown", "retrieval_score": 0.8153}, "business_attire": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5558}, "formal": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5993}, "hair_bun": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6926}, "holding_mug": {"source": "stage3", "why": "unknown", "retrieval_score": 0.916}, "holding_object": {"source": "implied"}, "mug": {"source": "stage3", "why": "unknown", "retrieval_score": 0.8841}, "necktie": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7314}, "shirt": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7998}, "teal_shirt": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7475}, "teal_topwear": {"source": "implied"}, "text": {"source": "probe"}, "topwear": {"source": "implied"}, "vest": {"source": "stage3", "why": "unknown", "retrieval_score": 0.8404}, "white_necktie": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6418}}, "structural": ["solo", "anthro", "male", "clothed"], "probe": ["text", "solo", "simple_background", "felid", "clothing", "anthro"], "t1": 1.96, "t2": 1.49, "t3": 2.52, "t3s": 1.91, "t3p": 1.51, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=19 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
| 11 |
+
{"id": 260449, "n_gt": 14, "n_retrieved": 21, "n_selected": 21, "n_implied": 5, "n_structural": 6, "n_probe": 5, "ret_R": 0.5, "P": 0.5238, "R": 0.7857, "F1": 0.6286, "leaf_P": 0.3571, "leaf_R": 0.5, "leaf_F1": 0.4167, "n_leaf_sel": 14, "n_leaf_gt": 10, "ret_P": 0.3333, "sel_given_ret": 1.5714, "over_sel": 1.5, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 9, "attempts_by_n_local": {"24": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.5238, "gen_R": 0.7857, "gen_F1": 0.6286, "missed": ["fur", "human", "male"], "extra": ["anthro", "duo", "grin", "humanoid", "mischievous", "raised_arms", "smile", "topless", "trio", "wide_grin"], "ground_truth_tags": ["ape", "bear", "clothed", "clothing", "dancing", "fur", "group", "hair", "haplorhine", "human", "looking_at_viewer", "male", "mammal", "primate"], "selected_tags": ["anthro", "ape", "bear", "clothed", "clothing", "dancing", "duo", "grin", "group", "hair", "haplorhine", "humanoid", "looking_at_viewer", "mammal", "mischievous", "primate", "raised_arms", "smile", "topless", "trio", "wide_grin"], "stage3_selected": ["ape", "bear", "dancing", "grin", "hair", "mischievous", "raised_arms", "simple_background", "wide_grin"], "stage3_selected_scores": {"hair": 0.5495, "simple_background": 0.5541, "bear": 0.5758, "grin": 0.5711, "dancing": 0.5627, "ape": 0.9769, "raised_arms": 0.5526, "mischievous": 0.5449, "wide_grin": 0.5315}, "stage3_selected_ranks": {"hair": 11, "simple_background": 8, "bear": 3, "grin": 5, "dancing": 7, "ape": 1, "raised_arms": 10, "mischievous": 12, "wide_grin": 14}, "stage3_selected_phrase_ranks": {"hair": 1, "simple_background": 1, "bear": 1, "grin": 1, "dancing": 1, "ape": 1, "raised_arms": 1, "mischievous": 1, "wide_grin": 1}, "extra_evidence": {"anthro": {"source": "structural"}, "duo": {"source": "probe"}, "grin": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5711}, "humanoid": {"source": "structural"}, "mischievous": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5449}, "raised_arms": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5526}, "smile": {"source": "implied"}, "topless": {"source": "structural"}, "trio": {"source": "structural"}, "wide_grin": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5315}}, "structural": ["trio", "anthro", "humanoid", "clothed", "topless", "looking_at_viewer"], "probe": ["simple_background", "group", "duo", "bear", "anthro"], "t1": 2.29, "t2": 1.97, "t3": 2.21, "t3s": 1.21, "t3p": 0.61, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=24 entity=0 copyright_filtered=0 generic_char_to_general=1 unknown_type=2"]}
|
data/eval_results/k_sweep_explicit_no_why_seed42_k10.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/eval_results/k_sweep_explicit_no_why_seed42_k2.jsonl
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"_meta": true, "timestamp": "2026-03-03T05:55:46.995089", "n_samples": 10, "caption_field": "caption_cogvlm", "skip_rewrite": false, "allow_nsfw": false, "mode": "chunked_map_union", "chunk_size": 60, "eval_path": "data\\eval_samples\\e621_sfw_sample_1000_seed123_buffer10000_caption_evident.jsonl", "per_phrase_k": 2, "per_phrase_final_k": 2, "temperature": 0.0, "shuffle": true, "seed": 42, "workers": 1, "min_why": "strong_implied", "expand_implications": true, "infer_structural": true, "infer_probe": true, "n_errors": 0, "n_issue_samples": 10, "n_issues_total": 20}
|
| 2 |
+
{"id": 17482, "n_gt": 22, "n_retrieved": 31, "n_selected": 31, "n_implied": 11, "n_structural": 3, "n_probe": 3, "ret_R": 0.1818, "P": 0.3871, "R": 0.5455, "F1": 0.4528, "leaf_P": 0.2222, "leaf_R": 0.3077, "leaf_F1": 0.2581, "n_leaf_sel": 18, "n_leaf_gt": 13, "ret_P": 0.129, "sel_given_ret": 3.0, "over_sel": 1.41, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 17, "attempts_by_n_local": {"34": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.3871, "gen_R": 0.5455, "gen_F1": 0.4528, "missed": ["bass_guitar", "fingers", "fur", "guitar", "holding_musical_instrument", "holding_object", "music", "musical_instrument", "plucked_string_instrument", "string_instrument"], "extra": ["5_claws", "atmosphere", "bass_(disambiguation)", "bonfire", "canis", "clawed_fingers", "flowing_hair", "leggings", "legwear", "long_hair", "pastel_background", "playing", "playing_bass", "stockings", "string", "torn_leggings", "torn_legwear", "torn_stockings", "wolf"], "ground_truth_tags": ["anthro", "bass_guitar", "canid", "canine", "claws", "clothed", "clothing", "fingers", "fur", "guitar", "hair", "holding_musical_instrument", "holding_object", "mammal", "music", "musical_instrument", "plucked_string_instrument", "solo", "spade_tail", "string_instrument", "tail", "torn_clothing"], "selected_tags": ["5_claws", "anthro", "atmosphere", "bass_(disambiguation)", "bonfire", "canid", "canine", "canis", "clawed_fingers", "claws", "clothed", "clothing", "flowing_hair", "hair", "leggings", "legwear", "long_hair", "mammal", "pastel_background", "playing", "playing_bass", "solo", "spade_tail", "stockings", "string", "tail", "torn_clothing", "torn_leggings", "torn_legwear", "torn_stockings", "wolf"], "stage3_selected": ["5_claws", "atmosphere", "bass_(disambiguation)", "bonfire", "clawed_fingers", "claws", "flowing_hair", "invalid_background", "long_hair", "pastel_background", "playing", "playing_bass", "spade_tail", "string", "torn_leggings", "torn_stockings", "wolf"], "stage3_selected_scores": {"claws": 0.6305, "wolf": 0.5983, "long_hair": 0.5166, "spade_tail": 0.872, "clawed_fingers": 0.5176, "playing": 0.4743, "string": 0.6132, "torn_stockings": 0.4607, "flowing_hair": 0.7019, "torn_leggings": 0.4903, "bonfire": 0.4621, "5_claws": 0.5907, "atmosphere": 0.503, "bass_(disambiguation)": 0.5206, "playing_bass": 0.5052, "pastel_background": 0.6263, "invalid_background": 0.6032}, "stage3_selected_ranks": {"claws": 5, "wolf": 10, "long_hair": 20, "spade_tail": 1, "clawed_fingers": 19, "playing": 27, "string": 8, "torn_stockings": 30, "flowing_hair": 2, "torn_leggings": 24, "bonfire": 29, "5_claws": 11, "atmosphere": 23, "bass_(disambiguation)": 18, "playing_bass": 21, "pastel_background": 6, "invalid_background": 9}, "stage3_selected_phrase_ranks": {"claws": 1, "wolf": 1, "long_hair": 2, "spade_tail": 1, "clawed_fingers": 2, "playing": 2, "string": 1, "torn_stockings": 2, "flowing_hair": 1, "torn_leggings": 1, "bonfire": 1, "5_claws": 2, "atmosphere": 1, "bass_(disambiguation)": 1, "playing_bass": 1, "pastel_background": 1, "invalid_background": 1}, "extra_evidence": {"5_claws": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5907}, "atmosphere": {"source": "stage3", "why": "unknown", "retrieval_score": 0.503}, "bass_(disambiguation)": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5206}, "bonfire": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4621}, "canis": {"source": "implied"}, "clawed_fingers": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5176}, "flowing_hair": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7019}, "leggings": {"source": "implied"}, "legwear": {"source": "implied"}, "long_hair": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5166}, "pastel_background": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6263}, "playing": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4743}, "playing_bass": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5052}, "stockings": {"source": "implied"}, "string": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6132}, "torn_leggings": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4903}, "torn_legwear": {"source": "implied"}, "torn_stockings": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4607}, "wolf": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5983}}, "structural": ["solo", "anthro", "clothed"], "probe": ["solo", "canid", "anthro"], "t1": 2.58, "t2": 2.58, "t3": 5.43, "t3s": 2.84, "t3p": 3.0, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=34 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=3"]}
|
| 3 |
+
{"id": 1624724, "n_gt": 4, "n_retrieved": 23, "n_selected": 12, "n_implied": 1, "n_structural": 5, "n_probe": 3, "ret_R": 0.75, "P": 0.25, "R": 0.75, "F1": 0.375, "leaf_P": 0.2727, "leaf_R": 0.75, "leaf_F1": 0.4, "n_leaf_sel": 11, "n_leaf_gt": 4, "ret_P": 0.1304, "sel_given_ret": 1.0, "over_sel": 3.0, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 6, "attempts_by_n_local": {"26": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.25, "gen_R": 0.75, "gen_F1": 0.375, "missed": ["smile"], "extra": ["ambiguous_gender", "bear", "big_eyes", "feral", "looking_at_viewer", "mammal", "nude", "spots", "toony"], "ground_truth_tags": ["red_nose", "smile", "solo", "tan_body"], "selected_tags": ["ambiguous_gender", "bear", "big_eyes", "feral", "looking_at_viewer", "mammal", "nude", "red_nose", "solo", "spots", "tan_body", "toony"], "stage3_selected": ["big_eyes", "red_nose", "spots", "tan_body", "toony", "white_background"], "stage3_selected_scores": {"white_background": 0.6243, "tan_body": 0.6695, "spots": 0.6322, "toony": 0.6076, "big_eyes": 0.7003, "red_nose": 0.7533}, "stage3_selected_ranks": {"white_background": 13, "tan_body": 7, "spots": 12, "toony": 16, "big_eyes": 4, "red_nose": 3}, "stage3_selected_phrase_ranks": {"white_background": 1, "tan_body": 2, "spots": 2, "toony": 1, "big_eyes": 1, "red_nose": 1}, "extra_evidence": {"ambiguous_gender": {"source": "structural"}, "bear": {"source": "probe"}, "big_eyes": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7003}, "feral": {"source": "structural"}, "looking_at_viewer": {"source": "structural"}, "mammal": {"source": "implied"}, "nude": {"source": "structural"}, "spots": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6322}, "toony": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6076}}, "structural": ["solo", "feral", "ambiguous_gender", "nude", "looking_at_viewer"], "probe": ["solo", "simple_background", "bear"], "t1": 3.09, "t2": 1.05, "t3": 1.61, "t3s": 1.49, "t3p": 1.35, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=26 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=5"]}
|
| 4 |
+
{"id": 1078019, "n_gt": 14, "n_retrieved": 31, "n_selected": 28, "n_implied": 6, "n_structural": 4, "n_probe": 5, "ret_R": 0.5, "P": 0.5, "R": 1.0, "F1": 0.6667, "leaf_P": 0.4211, "leaf_R": 0.8889, "leaf_F1": 0.5714, "n_leaf_sel": 19, "n_leaf_gt": 9, "ret_P": 0.2258, "sel_given_ret": 2.0, "over_sel": 2.0, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 15, "attempts_by_n_local": {"32": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.5, "gen_R": 1.0, "gen_F1": 0.6667, "missed": [], "extra": ["<3", "coat", "expressions", "eyes", "group", "holding_object", "holding_plushie", "looking_at_viewer", "raincoat", "relationship", "rosy_cheeks", "setting", "surprised_look", "topwear"], "ground_truth_tags": ["anthro", "blue_eyes", "blush", "clothed", "clothing", "duo", "lagomorph", "leporid", "mammal", "plushie", "rabbit", "romantic", "romantic_couple", "teal_eyes"], "selected_tags": ["<3", "anthro", "blue_eyes", "blush", "clothed", "clothing", "coat", "duo", "expressions", "eyes", "group", "holding_object", "holding_plushie", "lagomorph", "leporid", "looking_at_viewer", "mammal", "plushie", "rabbit", "raincoat", "relationship", "romantic", "romantic_couple", "rosy_cheeks", "setting", "surprised_look", "teal_eyes", "topwear"], "stage3_selected": ["blue_eyes", "coat", "expressions", "eyes", "group", "holding_plushie", "plushie", "rabbit", "raincoat", "relationship", "romantic_couple", "rosy_cheeks", "setting", "surprised_look", "teal_eyes"], "stage3_selected_scores": {"blue_eyes": 0.6151, "group": 0.3374, "rabbit": 0.5939, "romantic_couple": 0.5621, "coat": 0.6383, "plushie": 0.7455, "teal_eyes": 0.6283, "rosy_cheeks": 0.472, "expressions": 0.5454, "holding_plushie": 0.7793, "raincoat": 0.5262, "surprised_look": 0.6399, "relationship": 0.6206, "setting": 0.5567, "eyes": 0.8767}, "stage3_selected_ranks": {"blue_eyes": 10, "group": 32, "rabbit": 11, "romantic_couple": 13, "coat": 6, "plushie": 3, "teal_eyes": 7, "rosy_cheeks": 28, "expressions": 17, "holding_plushie": 2, "raincoat": 20, "surprised_look": 4, "relationship": 8, "setting": 15, "eyes": 1}, "stage3_selected_phrase_ranks": {"blue_eyes": 1, "group": 2, "rabbit": 1, "romantic_couple": 1, "coat": 1, "plushie": 1, "teal_eyes": 1, "rosy_cheeks": 2, "expressions": 2, "holding_plushie": 1, "raincoat": 2, "surprised_look": 1, "relationship": 1, "setting": 1, "eyes": 1}, "extra_evidence": {"<3": {"source": "probe"}, "coat": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6383}, "expressions": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5454}, "eyes": {"source": "stage3", "why": "unknown", "retrieval_score": 0.8767}, "group": {"source": "stage3", "why": "unknown", "retrieval_score": 0.3374}, "holding_object": {"source": "implied"}, "holding_plushie": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7793}, "looking_at_viewer": {"source": "structural"}, "raincoat": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5262}, "relationship": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6206}, "rosy_cheeks": {"source": "stage3", "why": "unknown", "retrieval_score": 0.472}, "setting": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5567}, "surprised_look": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6399}, "topwear": {"source": "implied"}}, "structural": ["duo", "anthro", "clothed", "looking_at_viewer"], "probe": ["duo", "clothing", "blush", "anthro", "<3"], "t1": 1.79, "t2": 1.4, "t3": 2.82, "t3s": 0.98, "t3p": 1.24, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=32 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
| 5 |
+
{"id": 2021552, "n_gt": 25, "n_retrieved": 29, "n_selected": 43, "n_implied": 17, "n_structural": 4, "n_probe": 5, "ret_R": 0.48, "P": 0.4651, "R": 0.8, "F1": 0.5882, "leaf_P": 0.4286, "leaf_R": 0.6, "leaf_F1": 0.5, "n_leaf_sel": 21, "n_leaf_gt": 15, "ret_P": 0.4138, "sel_given_ret": 1.6667, "over_sel": 1.72, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 21, "attempts_by_n_local": {"30": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.4651, "gen_R": 0.8, "gen_F1": 0.5882, "missed": ["lagomorph", "leporid", "looking_at_another", "rabbit", "standing"], "extra": ["black_bottomwear", "black_clothing", "black_pants", "blue_overalls", "brown_clothing", "brown_shirt", "brown_topwear", "dyed_fur", "eye_markings", "grey_bottomwear", "grey_clothing", "grey_pants", "looking_at_viewer", "marble_fox", "open_mouth", "open_smile", "red_fox", "smile", "t-shirt", "text", "white_clothing", "white_shirt", "white_topwear"], "ground_truth_tags": ["anthro", "bottomwear", "canid", "canine", "claws", "clothed", "clothing", "crossed_arms", "duo", "facial_markings", "fox", "fur", "grey_background", "head_markings", "lagomorph", "leporid", "looking_at_another", "mammal", "markings", "overalls", "pants", "rabbit", "shirt", "standing", "topwear"], "selected_tags": ["anthro", "black_bottomwear", "black_clothing", "black_pants", "blue_overalls", "bottomwear", "brown_clothing", "brown_shirt", "brown_topwear", "canid", "canine", "claws", "clothed", "clothing", "crossed_arms", "duo", "dyed_fur", "eye_markings", "facial_markings", "fox", "fur", "grey_background", "grey_bottomwear", "grey_clothing", "grey_pants", "head_markings", "looking_at_viewer", "mammal", "marble_fox", "markings", "open_mouth", "open_smile", "overalls", "pants", "red_fox", "shirt", "smile", "t-shirt", "text", "topwear", "white_clothing", "white_shirt", "white_topwear"], "stage3_selected": ["black_pants", "blue_overalls", "brown_shirt", "claws", "crossed_arms", "dyed_fur", "eye_markings", "facial_markings", "fox", "fur", "grey_background", "grey_pants", "head_markings", "invalid_tag", "marble_fox", "open_mouth", "open_smile", "overalls", "shirt", "t-shirt", "white_shirt"], "stage3_selected_scores": {"fur": 0.6531, "open_mouth": 0.633, "claws": 0.6304, "fox": 0.638, "shirt": 0.7483, "open_smile": 0.5273, "grey_background": 0.6784, "head_markings": 0.6327, "facial_markings": 0.6945, "t-shirt": 0.724, "crossed_arms": 0.7285, "white_shirt": 0.8197, "overalls": 0.8776, "black_pants": 0.833, "eye_markings": 0.6361, "grey_pants": 0.7571, "invalid_tag": 0.5412, "brown_shirt": 0.7774, "marble_fox": 0.5572, "dyed_fur": 0.5284, "blue_overalls": 0.9203}, "stage3_selected_ranks": {"fur": 14, "open_mouth": 18, "claws": 20, "fox": 16, "shirt": 8, "open_smile": 31, "grey_background": 13, "head_markings": 19, "facial_markings": 11, "t-shirt": 10, "crossed_arms": 9, "white_shirt": 4, "overalls": 2, "black_pants": 3, "eye_markings": 17, "grey_pants": 7, "invalid_tag": 28, "brown_shirt": 5, "marble_fox": 27, "dyed_fur": 30, "blue_overalls": 1}, "stage3_selected_phrase_ranks": {"fur": 1, "open_mouth": 1, "claws": 1, "fox": 1, "shirt": 1, "open_smile": 2, "grey_background": 1, "head_markings": 2, "facial_markings": 1, "t-shirt": 2, "crossed_arms": 1, "white_shirt": 1, "overalls": 1, "black_pants": 1, "eye_markings": 2, "grey_pants": 2, "invalid_tag": 2, "brown_shirt": 2, "marble_fox": 2, "dyed_fur": 2, "blue_overalls": 1}, "extra_evidence": {"black_bottomwear": {"source": "implied"}, "black_clothing": {"source": "implied"}, "black_pants": {"source": "stage3", "why": "unknown", "retrieval_score": 0.833}, "blue_overalls": {"source": "stage3", "why": "unknown", "retrieval_score": 0.9203}, "brown_clothing": {"source": "implied"}, "brown_shirt": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7774}, "brown_topwear": {"source": "implied"}, "dyed_fur": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5284}, "eye_markings": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6361}, "grey_bottomwear": {"source": "implied"}, "grey_clothing": {"source": "implied"}, "grey_pants": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7571}, "looking_at_viewer": {"source": "structural"}, "marble_fox": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5572}, "open_mouth": {"source": "stage3", "why": "unknown", "retrieval_score": 0.633}, "open_smile": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5273}, "red_fox": {"source": "implied"}, "smile": {"source": "implied"}, "t-shirt": {"source": "stage3", "why": "unknown", "retrieval_score": 0.724}, "text": {"source": "probe"}, "white_clothing": {"source": "implied"}, "white_shirt": {"source": "stage3", "why": "unknown", "retrieval_score": 0.8197}, "white_topwear": {"source": "implied"}}, "structural": ["duo", "anthro", "clothed", "looking_at_viewer"], "probe": ["text", "simple_background", "duo", "clothing", "anthro"], "t1": 2.0, "t2": 1.4, "t3": 3.28, "t3s": 0.96, "t3p": 1.52, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=30 entity=1 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
| 6 |
+
{"id": 1023509, "n_gt": 13, "n_retrieved": 34, "n_selected": 31, "n_implied": 5, "n_structural": 5, "n_probe": 8, "ret_R": 0.2308, "P": 0.1935, "R": 0.4615, "F1": 0.2727, "leaf_P": 0.1154, "leaf_R": 0.5, "leaf_F1": 0.1875, "n_leaf_sel": 26, "n_leaf_gt": 6, "ret_P": 0.0882, "sel_given_ret": 2.0, "over_sel": 2.38, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 16, "attempts_by_n_local": {"34": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.1935, "gen_R": 0.4615, "gen_F1": 0.2727, "missed": ["dialogue", "fur", "lizard", "reptile", "scalie", "white_body", "white_fur"], "extra": ["air_bubble", "anthro", "bear", "bubble", "canid", "clothed", "clothing", "cracked_ground", "darkner", "darkness", "duo", "felid", "group", "intersex", "laying_on_ground", "light", "lying_on_ground", "note_pad", "speech_bubble", "standing", "standing_over", "taur", "topwear", "vest", "waist"], "ground_truth_tags": ["bovid", "caprine", "dialogue", "fur", "goat", "human", "lizard", "mammal", "reptile", "scalie", "text", "white_body", "white_fur"], "selected_tags": ["air_bubble", "anthro", "bear", "bovid", "bubble", "canid", "caprine", "clothed", "clothing", "cracked_ground", "darkner", "darkness", "duo", "felid", "goat", "group", "human", "intersex", "laying_on_ground", "light", "lying_on_ground", "mammal", "note_pad", "speech_bubble", "standing", "standing_over", "taur", "text", "topwear", "vest", "waist"], "stage3_selected": ["air_bubble", "bubble", "cracked_ground", "darkner", "darkness", "goat", "human", "laying_on_ground", "light", "lying_on_ground", "note_pad", "speech_bubble", "standing", "standing_over", "vest", "waist"], "stage3_selected_scores": {"standing": 0.476, "human": 0.5621, "speech_bubble": 0.5831, "goat": 0.5841, "light": 0.5879, "vest": 0.3206, "bubble": 0.5745, "darkner": 0.4159, "lying_on_ground": 0.5998, "darkness": 0.6022, "air_bubble": 0.4381, "laying_on_ground": 0.5611, "standing_over": 0.5881, "waist": 0.7518, "cracked_ground": 0.3404, "note_pad": 0.4198}, "stage3_selected_ranks": {"standing": 19, "human": 12, "speech_bubble": 9, "goat": 8, "light": 7, "vest": 34, "bubble": 10, "darkner": 26, "lying_on_ground": 4, "darkness": 3, "air_bubble": 21, "laying_on_ground": 13, "standing_over": 6, "waist": 1, "cracked_ground": 32, "note_pad": 25}, "stage3_selected_phrase_ranks": {"standing": 2, "human": 1, "speech_bubble": 1, "goat": 1, "light": 1, "vest": 2, "bubble": 1, "darkner": 2, "lying_on_ground": 1, "darkness": 1, "air_bubble": 2, "laying_on_ground": 2, "standing_over": 1, "waist": 1, "cracked_ground": 2, "note_pad": 2}, "extra_evidence": {"air_bubble": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4381}, "anthro": {"source": "probe"}, "bear": {"source": "probe"}, "bubble": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5745}, "canid": {"source": "probe"}, "clothed": {"source": "structural"}, "clothing": {"source": "implied"}, "cracked_ground": {"source": "stage3", "why": "unknown", "retrieval_score": 0.3404}, "darkner": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4159}, "darkness": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6022}, "duo": {"source": "probe"}, "felid": {"source": "probe"}, "group": {"source": "structural"}, "intersex": {"source": "structural"}, "laying_on_ground": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5611}, "light": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5879}, "lying_on_ground": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5998}, "note_pad": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4198}, "speech_bubble": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5831}, "standing": {"source": "stage3", "why": "unknown", "retrieval_score": 0.476}, "standing_over": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5881}, "taur": {"source": "structural"}, "topwear": {"source": "implied"}, "vest": {"source": "stage3", "why": "unknown", "retrieval_score": 0.3206}, "waist": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7518}}, "structural": ["group", "taur", "intersex", "clothed", "text"], "probe": ["text", "simple_background", "group", "felid", "duo", "canid", "bear", "anthro"], "t1": 2.98, "t2": 1.52, "t3": 3.72, "t3s": 1.33, "t3p": 2.16, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=34 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=1"]}
|
| 7 |
+
{"id": 335343, "n_gt": 15, "n_retrieved": 40, "n_selected": 23, "n_implied": 2, "n_structural": 3, "n_probe": 6, "ret_R": 0.5333, "P": 0.3478, "R": 0.5333, "F1": 0.4211, "leaf_P": 0.3, "leaf_R": 0.5, "leaf_F1": 0.375, "n_leaf_sel": 20, "n_leaf_gt": 12, "ret_P": 0.2, "sel_given_ret": 1.0, "over_sel": 1.53, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 15, "attempts_by_n_local": {"41": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.3478, "gen_R": 0.5333, "gen_F1": 0.4211, "missed": ["angry", "bed", "eyes_closed", "eyeshadow", "furniture", "lying", "sleeping"], "extra": ["<3", "annoyed", "annoyed_expression", "anthro", "bedroom", "blush", "contest", "curtains_open", "dialogue", "felid", "humanoid", "mammal", "membrane_(anatomy)", "purple_membrane", "sleepover"], "ground_truth_tags": ["angry", "bed", "blonde_hair", "blue_eyes", "duo", "eyes_closed", "eyeshadow", "furniture", "green_eyes", "hair", "lying", "makeup", "purple_hair", "sleeping", "text"], "selected_tags": ["<3", "annoyed", "annoyed_expression", "anthro", "bedroom", "blonde_hair", "blue_eyes", "blush", "contest", "curtains_open", "dialogue", "duo", "felid", "green_eyes", "hair", "humanoid", "makeup", "mammal", "membrane_(anatomy)", "purple_hair", "purple_membrane", "sleepover", "text"], "stage3_selected": ["annoyed", "annoyed_expression", "bedroom", "blonde_hair", "blue_eyes", "contest", "curtains_open", "dialogue", "green_eyes", "hair", "makeup", "purple_hair", "purple_membrane", "sleepover", "text"], "stage3_selected_scores": {"hair": 0.6041, "text": 0.6017, "blue_eyes": 0.6023, "dialogue": 0.4457, "green_eyes": 0.5999, "blonde_hair": 0.5995, "purple_hair": 0.5647, "makeup": 0.5972, "bedroom": 0.491, "annoyed": 0.5736, "annoyed_expression": 0.7259, "curtains_open": 0.4199, "contest": 0.3499, "sleepover": 0.3806, "purple_membrane": 0.5791}, "stage3_selected_ranks": {"hair": 5, "text": 8, "blue_eyes": 7, "dialogue": 33, "green_eyes": 9, "blonde_hair": 10, "purple_hair": 14, "makeup": 11, "bedroom": 27, "annoyed": 13, "annoyed_expression": 2, "curtains_open": 36, "contest": 41, "sleepover": 40, "purple_membrane": 12}, "stage3_selected_phrase_ranks": {"hair": 1, "text": 1, "blue_eyes": 1, "dialogue": 2, "green_eyes": 1, "blonde_hair": 1, "purple_hair": 1, "makeup": 1, "bedroom": 1, "annoyed": 2, "annoyed_expression": 1, "curtains_open": 2, "contest": 2, "sleepover": 1, "purple_membrane": 1}, "extra_evidence": {"<3": {"source": "probe"}, "annoyed": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5736}, "annoyed_expression": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7259}, "anthro": {"source": "probe"}, "bedroom": {"source": "stage3", "why": "unknown", "retrieval_score": 0.491}, "blush": {"source": "probe"}, "contest": {"source": "stage3", "why": "unknown", "retrieval_score": 0.3499}, "curtains_open": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4199}, "dialogue": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4457}, "felid": {"source": "probe"}, "humanoid": {"source": "structural"}, "mammal": {"source": "implied"}, "membrane_(anatomy)": {"source": "implied"}, "purple_membrane": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5791}, "sleepover": {"source": "stage3", "why": "unknown", "retrieval_score": 0.3806}}, "structural": ["duo", "humanoid", "text"], "probe": ["simple_background", "felid", "duo", "blush", "anthro", "<3"], "t1": 2.2, "t2": 1.79, "t3": 2.9, "t3s": 2.2, "t3p": 2.2, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=41 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
| 8 |
+
{"id": 2034167, "n_gt": 11, "n_retrieved": 44, "n_selected": 39, "n_implied": 12, "n_structural": 4, "n_probe": 5, "ret_R": 0.5455, "P": 0.2308, "R": 0.8182, "F1": 0.36, "leaf_P": 0.15, "leaf_R": 0.4286, "leaf_F1": 0.2222, "n_leaf_sel": 20, "n_leaf_gt": 7, "ret_P": 0.1364, "sel_given_ret": 1.5, "over_sel": 3.55, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 22, "attempts_by_n_local": {"47": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.2308, "gen_R": 0.8182, "gen_F1": 0.36, "missed": ["blue_eyes", "blue_nose"], "extra": ["animal_humanoid", "anime_eyes", "anthro", "blue_eyebrows", "blue_stripes", "canid_humanoid", "canine_humanoid", "clothed", "clothing", "curved_tail", "eyebrows", "fluffy_fur", "glistening", "glistening_tail", "humanoid", "inner_ear_fluff", "jumper", "male", "mammal_humanoid", "membrane_(anatomy)", "open_smile", "purple_membrane", "skimpy", "small_mouth", "smile", "strider-orion", "striped_back", "stripes", "tuft", "white_inner_ear_fluff"], "ground_truth_tags": ["blue_eyes", "blue_nose", "canid", "canine", "fur", "mammal", "open_mouth", "purple_body", "solo", "white_body", "white_fur"], "selected_tags": ["animal_humanoid", "anime_eyes", "anthro", "blue_eyebrows", "blue_stripes", "canid", "canid_humanoid", "canine", "canine_humanoid", "clothed", "clothing", "curved_tail", "eyebrows", "fluffy_fur", "fur", "glistening", "glistening_tail", "humanoid", "inner_ear_fluff", "jumper", "male", "mammal", "mammal_humanoid", "membrane_(anatomy)", "open_mouth", "open_smile", "purple_body", "purple_membrane", "skimpy", "small_mouth", "smile", "solo", "strider-orion", "striped_back", "stripes", "tuft", "white_body", "white_fur", "white_inner_ear_fluff"], "stage3_selected": ["anime_eyes", "blue_eyebrows", "blue_stripes", "canid_humanoid", "canine_humanoid", "curved_tail", "fluffy_fur", "fur", "glistening_tail", "humanoid", "jumper", "open_smile", "purple_body", "purple_membrane", "simple_background", "skimpy", "small_mouth", "strider-orion", "striped_back", "stripes", "white_fur", "white_inner_ear_fluff"], "stage3_selected_scores": {"fur": 0.5666, "simple_background": 0.5782, "white_fur": 0.5773, "humanoid": 0.6714, "stripes": 0.578, "purple_body": 0.5476, "open_smile": 0.4623, "skimpy": 0.361, "canid_humanoid": 0.8744, "canine_humanoid": 0.9128, "white_inner_ear_fluff": 0.5661, "blue_stripes": 0.5367, "blue_eyebrows": 0.4546, "glistening_tail": 0.5615, "fluffy_fur": 0.5081, "curved_tail": 0.5958, "striped_back": 0.5609, "strider-orion": 0.3692, "anime_eyes": 0.4791, "small_mouth": 0.5007, "purple_membrane": 0.5453, "jumper": 0.4005}, "stage3_selected_ranks": {"fur": 18, "simple_background": 15, "white_fur": 17, "humanoid": 5, "stripes": 16, "purple_body": 24, "open_smile": 39, "skimpy": 46, "canid_humanoid": 2, "canine_humanoid": 1, "white_inner_ear_fluff": 19, "blue_stripes": 28, "blue_eyebrows": 40, "glistening_tail": 20, "fluffy_fur": 32, "curved_tail": 8, "striped_back": 21, "strider-orion": 45, "anime_eyes": 36, "small_mouth": 34, "purple_membrane": 26, "jumper": 44}, "stage3_selected_phrase_ranks": {"fur": 1, "simple_background": 1, "white_fur": 1, "humanoid": 1, "stripes": 1, "purple_body": 1, "open_smile": 2, "skimpy": 2, "canid_humanoid": 2, "canine_humanoid": 1, "white_inner_ear_fluff": 2, "blue_stripes": 2, "blue_eyebrows": 2, "glistening_tail": 2, "fluffy_fur": 2, "curved_tail": 1, "striped_back": 2, "strider-orion": 1, "anime_eyes": 2, "small_mouth": 2, "purple_membrane": 2, "jumper": 2}, "extra_evidence": {"animal_humanoid": {"source": "implied"}, "anime_eyes": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4791}, "anthro": {"source": "structural"}, "blue_eyebrows": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4546}, "blue_stripes": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5367}, "canid_humanoid": {"source": "stage3", "why": "unknown", "retrieval_score": 0.8744}, "canine_humanoid": {"source": "stage3", "why": "unknown", "retrieval_score": 0.9128}, "clothed": {"source": "structural"}, "clothing": {"source": "probe"}, "curved_tail": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5958}, "eyebrows": {"source": "implied"}, "fluffy_fur": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5081}, "glistening": {"source": "implied"}, "glistening_tail": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5615}, "humanoid": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6714}, "inner_ear_fluff": {"source": "implied"}, "jumper": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4005}, "male": {"source": "structural"}, "mammal_humanoid": {"source": "implied"}, "membrane_(anatomy)": {"source": "implied"}, "open_smile": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4623}, "purple_membrane": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5453}, "skimpy": {"source": "stage3", "why": "unknown", "retrieval_score": 0.361}, "small_mouth": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5007}, "smile": {"source": "implied"}, "strider-orion": {"source": "stage3", "why": "unknown", "retrieval_score": 0.3692}, "striped_back": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5609}, "stripes": {"source": "stage3", "why": "unknown", "retrieval_score": 0.578}, "tuft": {"source": "implied"}, "white_inner_ear_fluff": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5661}}, "structural": ["solo", "anthro", "male", "clothed"], "probe": ["solo", "simple_background", "clothing", "canid", "anthro"], "t1": 2.34, "t2": 1.82, "t3": 7.62, "t3s": 1.54, "t3p": 3.03, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=47 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=4"]}
|
| 9 |
+
{"id": 1325009, "n_gt": 22, "n_retrieved": 37, "n_selected": 44, "n_implied": 11, "n_structural": 5, "n_probe": 5, "ret_R": 0.2273, "P": 0.3636, "R": 0.7273, "F1": 0.4848, "leaf_P": 0.1034, "leaf_R": 0.25, "leaf_F1": 0.1463, "n_leaf_sel": 29, "n_leaf_gt": 12, "ret_P": 0.1351, "sel_given_ret": 3.2, "over_sel": 2.0, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 26, "attempts_by_n_local": {"38": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.3636, "gen_R": 0.7273, "gen_F1": 0.4848, "missed": ["chest_tuft", "muscular", "muscular_anthro", "muscular_male", "topless", "tuft"], "extra": ["avian", "belly", "bird", "confident", "countershade_belly", "countershade_body", "eyes", "fluffy_fur", "gesture", "looking_at_viewer", "muscular_arms", "muscular_legs", "no_irises", "pattern_background", "pattern_kerchief", "poof_effect", "pose", "round_head", "siberian_tiger", "striped_body", "striped_ears", "striped_fur", "suggestive_pose", "tan_bottomwear", "tan_chest", "tan_clothing", "tan_shorts", "white_chest"], "ground_truth_tags": ["anthro", "blue_eyes", "bottomwear", "chest_tuft", "clothed", "clothing", "countershading", "felid", "fur", "hand_on_head", "male", "mammal", "muscular", "muscular_anthro", "muscular_male", "pantherine", "shorts", "solo", "stripes", "tiger", "topless", "tuft"], "selected_tags": ["anthro", "avian", "belly", "bird", "blue_eyes", "bottomwear", "clothed", "clothing", "confident", "countershade_belly", "countershade_body", "countershading", "eyes", "felid", "fluffy_fur", "fur", "gesture", "hand_on_head", "looking_at_viewer", "male", "mammal", "muscular_arms", "muscular_legs", "no_irises", "pantherine", "pattern_background", "pattern_kerchief", "poof_effect", "pose", "round_head", "shorts", "siberian_tiger", "solo", "striped_body", "striped_ears", "striped_fur", "stripes", "suggestive_pose", "tan_bottomwear", "tan_chest", "tan_clothing", "tan_shorts", "tiger", "white_chest"], "stage3_selected": ["blue_eyes", "confident", "countershade_belly", "countershade_body", "eyes", "fluffy_fur", "gesture", "hand_on_head", "muscular_arms", "muscular_legs", "no_irises", "pattern_background", "pattern_kerchief", "poof_effect", "pose", "round_head", "shorts", "siberian_tiger", "striped_body", "striped_ears", "striped_fur", "suggestive_pose", "tan_chest", "tan_shorts", "tiger", "white_chest"], "stage3_selected_scores": {"blue_eyes": 0.5752, "pose": 0.6281, "shorts": 0.5828, "tiger": 0.5995, "gesture": 0.5905, "striped_body": 0.4104, "striped_fur": 0.6411, "hand_on_head": 0.5966, "pattern_background": 0.5244, "muscular_arms": 0.7958, "muscular_legs": 0.7903, "confident": 0.492, "white_chest": 0.9205, "suggestive_pose": 0.6275, "tan_chest": 0.8501, "no_irises": 0.488, "striped_ears": 0.4517, "countershade_body": 0.8753, "round_head": 0.4815, "fluffy_fur": 0.6703, "countershade_belly": 0.8309, "siberian_tiger": 0.4862, "tan_shorts": 0.5507, "pattern_kerchief": 0.5157, "poof_effect": 0.4448, "eyes": 0.9805}, "stage3_selected_ranks": {"blue_eyes": 25, "pose": 14, "shorts": 24, "tiger": 18, "gesture": 22, "striped_body": 38, "striped_fur": 12, "hand_on_head": 19, "pattern_background": 28, "muscular_arms": 6, "muscular_legs": 7, "confident": 31, "white_chest": 2, "suggestive_pose": 15, "tan_chest": 4, "no_irises": 32, "striped_ears": 35, "countershade_body": 3, "round_head": 34, "fluffy_fur": 10, "countershade_belly": 5, "siberian_tiger": 33, "tan_shorts": 26, "pattern_kerchief": 30, "poof_effect": 36, "eyes": 1}, "stage3_selected_phrase_ranks": {"blue_eyes": 2, "pose": 1, "shorts": 1, "tiger": 1, "gesture": 1, "striped_body": 2, "striped_fur": 2, "hand_on_head": 2, "pattern_background": 1, "muscular_arms": 1, "muscular_legs": 2, "confident": 1, "white_chest": 1, "suggestive_pose": 1, "tan_chest": 1, "no_irises": 2, "striped_ears": 1, "countershade_body": 1, "round_head": 2, "fluffy_fur": 1, "countershade_belly": 2, "siberian_tiger": 2, "tan_shorts": 2, "pattern_kerchief": 2, "poof_effect": 1, "eyes": 1}, "extra_evidence": {"avian": {"source": "implied"}, "belly": {"source": "implied"}, "bird": {"source": "probe"}, "confident": {"source": "stage3", "why": "unknown", "retrieval_score": 0.492}, "countershade_belly": {"source": "stage3", "why": "unknown", "retrieval_score": 0.8309}, "countershade_body": {"source": "stage3", "why": "unknown", "retrieval_score": 0.8753}, "eyes": {"source": "stage3", "why": "unknown", "retrieval_score": 0.9805}, "fluffy_fur": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6703}, "gesture": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5905}, "looking_at_viewer": {"source": "structural"}, "muscular_arms": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7958}, "muscular_legs": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7903}, "no_irises": {"source": "stage3", "why": "unknown", "retrieval_score": 0.488}, "pattern_background": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5244}, "pattern_kerchief": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5157}, "poof_effect": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4448}, "pose": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6281}, "round_head": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4815}, "siberian_tiger": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4862}, "striped_body": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4104}, "striped_ears": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4517}, "striped_fur": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6411}, "suggestive_pose": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6275}, "tan_bottomwear": {"source": "implied"}, "tan_chest": {"source": "stage3", "why": "unknown", "retrieval_score": 0.8501}, "tan_clothing": {"source": "implied"}, "tan_shorts": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5507}, "white_chest": {"source": "stage3", "why": "unknown", "retrieval_score": 0.9205}}, "structural": ["solo", "anthro", "male", "clothed", "looking_at_viewer"], "probe": ["solo", "felid", "clothing", "bird", "anthro"], "t1": 2.2, "t2": 1.72, "t3": 3.56, "t3s": 0.69, "t3p": 1.87, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=38 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
| 10 |
+
{"id": 3285630, "n_gt": 12, "n_retrieved": 34, "n_selected": 58, "n_implied": 18, "n_structural": 4, "n_probe": 5, "ret_R": 0.25, "P": 0.1724, "R": 0.8333, "F1": 0.2857, "leaf_P": 0.125, "leaf_R": 0.4444, "leaf_F1": 0.1951, "n_leaf_sel": 32, "n_leaf_gt": 9, "ret_P": 0.0882, "sel_given_ret": 3.3333, "over_sel": 4.83, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 37, "attempts_by_n_local": {"37": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.1724, "gen_R": 0.8333, "gen_F1": 0.2857, "missed": ["alpha_channel", "fingers"], "extra": ["bag", "black_body", "black_fur", "black_nose", "blowup_background", "breasts", "brown_clothing", "brown_jacket", "brown_topwear", "brown_vest", "business_attire", "can", "clasped_hands", "cleavage", "cleavage_overflow", "container", "dress_shirt", "dyed_fur", "fist", "formal", "green_background", "grey_clothing", "grey_shirt", "grey_topwear", "hair_bun", "hands_together", "holding_can", "holding_container", "holding_mug", "holding_object", "humor", "jacket", "jacket_vest", "mug", "necktie", "pun", "serious", "shirt", "t-shirt", "teal_shirt", "teal_topwear", "text", "topwear", "vest", "white_clothing", "white_necktie", "white_topwear", "wide_hips"], "ground_truth_tags": ["alpha_channel", "anthro", "clothed", "clothing", "felid", "feline", "fingers", "fur", "hair", "male", "mammal", "solo"], "selected_tags": ["anthro", "bag", "black_body", "black_fur", "black_nose", "blowup_background", "breasts", "brown_clothing", "brown_jacket", "brown_topwear", "brown_vest", "business_attire", "can", "clasped_hands", "cleavage", "cleavage_overflow", "clothed", "clothing", "container", "dress_shirt", "dyed_fur", "felid", "feline", "fist", "formal", "fur", "green_background", "grey_clothing", "grey_shirt", "grey_topwear", "hair", "hair_bun", "hands_together", "holding_can", "holding_container", "holding_mug", "holding_object", "humor", "jacket", "jacket_vest", "male", "mammal", "mug", "necktie", "pun", "serious", "shirt", "solo", "t-shirt", "teal_shirt", "teal_topwear", "text", "topwear", "vest", "white_clothing", "white_necktie", "white_topwear", "wide_hips"], "stage3_selected": ["bag", "black_fur", "black_nose", "blowup_background", "brown_jacket", "brown_vest", "business_attire", "clasped_hands", "cleavage_overflow", "dress_shirt", "dyed_fur", "felid", "feline", "fist", "formal", "fur", "green_background", "grey_shirt", "hair_bun", "hands_together", "holding_can", "holding_mug", "invalid_background", "invalid_tag", "jacket_vest", "mug", "necktie", "pun", "serious", "shirt", "simple_background", "t-shirt", "teal_shirt", "vest", "white_necktie", "white_topwear", "wide_hips"], "stage3_selected_scores": {"fur": 0.7146, "simple_background": 0.6978, "felid": 0.6418, "feline": 0.7062, "shirt": 0.7998, "wide_hips": 0.4732, "black_nose": 0.6261, "black_fur": 0.7183, "necktie": 0.7314, "t-shirt": 0.7846, "white_topwear": 0.7154, "vest": 0.8403, "green_background": 0.6069, "fist": 0.5544, "bag": 0.5527, "dress_shirt": 0.6132, "hair_bun": 0.6926, "pun": 0.5182, "cleavage_overflow": 0.4789, "mug": 0.8841, "hands_together": 0.5547, "grey_shirt": 0.7582, "serious": 0.5823, "invalid_tag": 0.5751, "holding_mug": 0.916, "clasped_hands": 0.6268, "brown_jacket": 0.7523, "blowup_background": 0.6356, "holding_can": 0.7864, "formal": 0.5993, "business_attire": 0.5558, "dyed_fur": 0.6226, "jacket_vest": 0.772, "brown_vest": 0.8153, "teal_shirt": 0.7474, "white_necktie": 0.6418, "invalid_background": 0.6495}, "stage3_selected_ranks": {"fur": 15, "simple_background": 17, "felid": 20, "feline": 16, "shirt": 5, "wide_hips": 37, "black_nose": 24, "black_fur": 13, "necktie": 12, "t-shirt": 7, "white_topwear": 14, "vest": 3, "green_background": 27, "fist": 33, "bag": 34, "dress_shirt": 26, "hair_bun": 18, "pun": 35, "cleavage_overflow": 36, "mug": 2, "hands_together": 32, "grey_shirt": 9, "serious": 29, "invalid_tag": 30, "holding_mug": 1, "clasped_hands": 23, "brown_jacket": 10, "blowup_background": 22, "holding_can": 6, "formal": 28, "business_attire": 31, "dyed_fur": 25, "jacket_vest": 8, "brown_vest": 4, "teal_shirt": 11, "white_necktie": 21, "invalid_background": 19}, "stage3_selected_phrase_ranks": {"fur": 1, "simple_background": 1, "felid": 2, "feline": 1, "shirt": 1, "wide_hips": 2, "black_nose": 2, "black_fur": 1, "necktie": 1, "t-shirt": 2, "white_topwear": 1, "vest": 1, "green_background": 2, "fist": 2, "bag": 2, "dress_shirt": 2, "hair_bun": 1, "pun": 2, "cleavage_overflow": 1, "mug": 1, "hands_together": 2, "grey_shirt": 1, "serious": 2, "invalid_tag": 1, "holding_mug": 1, "clasped_hands": 1, "brown_jacket": 2, "blowup_background": 2, "holding_can": 2, "formal": 1, "business_attire": 1, "dyed_fur": 2, "jacket_vest": 2, "brown_vest": 1, "teal_shirt": 2, "white_necktie": 2, "invalid_background": 1}, "extra_evidence": {"bag": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5527}, "black_body": {"source": "implied"}, "black_fur": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7183}, "black_nose": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6261}, "blowup_background": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6356}, "breasts": {"source": "implied"}, "brown_clothing": {"source": "implied"}, "brown_jacket": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7523}, "brown_topwear": {"source": "implied"}, "brown_vest": {"source": "stage3", "why": "unknown", "retrieval_score": 0.8153}, "business_attire": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5558}, "can": {"source": "implied"}, "clasped_hands": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6268}, "cleavage": {"source": "implied"}, "cleavage_overflow": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4789}, "container": {"source": "implied"}, "dress_shirt": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6132}, "dyed_fur": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6226}, "fist": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5544}, "formal": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5993}, "green_background": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6069}, "grey_clothing": {"source": "implied"}, "grey_shirt": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7582}, "grey_topwear": {"source": "implied"}, "hair_bun": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6926}, "hands_together": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5547}, "holding_can": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7864}, "holding_container": {"source": "implied"}, "holding_mug": {"source": "stage3", "why": "unknown", "retrieval_score": 0.916}, "holding_object": {"source": "implied"}, "humor": {"source": "implied"}, "jacket": {"source": "implied"}, "jacket_vest": {"source": "stage3", "why": "unknown", "retrieval_score": 0.772}, "mug": {"source": "stage3", "why": "unknown", "retrieval_score": 0.8841}, "necktie": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7314}, "pun": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5182}, "serious": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5823}, "shirt": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7998}, "t-shirt": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7846}, "teal_shirt": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7474}, "teal_topwear": {"source": "implied"}, "text": {"source": "probe"}, "topwear": {"source": "implied"}, "vest": {"source": "stage3", "why": "unknown", "retrieval_score": 0.8403}, "white_clothing": {"source": "implied"}, "white_necktie": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6418}, "white_topwear": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7154}, "wide_hips": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4732}}, "structural": ["solo", "anthro", "male", "clothed"], "probe": ["text", "solo", "felid", "clothing", "anthro"], "t1": 1.77, "t2": 1.45, "t3": 5.25, "t3s": 4.25, "t3p": 3.94, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=37 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
| 11 |
+
{"id": 260449, "n_gt": 14, "n_retrieved": 35, "n_selected": 26, "n_implied": 4, "n_structural": 6, "n_probe": 6, "ret_R": 0.5714, "P": 0.4231, "R": 0.7857, "F1": 0.55, "leaf_P": 0.2353, "leaf_R": 0.4, "leaf_F1": 0.2963, "n_leaf_sel": 17, "n_leaf_gt": 10, "ret_P": 0.2286, "sel_given_ret": 1.375, "over_sel": 1.86, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 15, "attempts_by_n_local": {"36": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.4231, "gen_R": 0.7857, "gen_F1": 0.55, "missed": ["fur", "human", "male"], "extra": ["anthro", "arm_hair", "body_hair", "cheeky", "duo", "flash", "front_view", "gorilla", "grin", "humanoid", "loincloth_only", "raised_arms", "smile", "topless", "trio"], "ground_truth_tags": ["ape", "bear", "clothed", "clothing", "dancing", "fur", "group", "hair", "haplorhine", "human", "looking_at_viewer", "male", "mammal", "primate"], "selected_tags": ["anthro", "ape", "arm_hair", "bear", "body_hair", "cheeky", "clothed", "clothing", "dancing", "duo", "flash", "front_view", "gorilla", "grin", "group", "hair", "haplorhine", "humanoid", "loincloth_only", "looking_at_viewer", "mammal", "primate", "raised_arms", "smile", "topless", "trio"], "stage3_selected": ["ape", "arm_hair", "bear", "cheeky", "dancing", "flash", "front_view", "gorilla", "grin", "hair", "haplorhine", "loincloth_only", "looking_at_viewer", "raised_arms", "simple_background"], "stage3_selected_scores": {"hair": 0.5455, "simple_background": 0.5491, "looking_at_viewer": 0.5483, "bear": 0.5736, "front_view": 0.4614, "grin": 0.5653, "haplorhine": 0.8324, "dancing": 0.5576, "ape": 0.9767, "raised_arms": 0.5461, "gorilla": 0.8299, "arm_hair": 0.3661, "flash": 0.3198, "loincloth_only": 0.4961, "cheeky": 0.3905}, "stage3_selected_ranks": {"hair": 14, "simple_background": 11, "looking_at_viewer": 12, "bear": 6, "front_view": 24, "grin": 8, "haplorhine": 3, "dancing": 10, "ape": 1, "raised_arms": 13, "gorilla": 4, "arm_hair": 36, "flash": 38, "loincloth_only": 21, "cheeky": 33}, "stage3_selected_phrase_ranks": {"hair": 1, "simple_background": 1, "looking_at_viewer": 1, "bear": 1, "front_view": 2, "grin": 1, "haplorhine": 2, "dancing": 1, "ape": 1, "raised_arms": 1, "gorilla": 1, "arm_hair": 2, "flash": 2, "loincloth_only": 2, "cheeky": 1}, "extra_evidence": {"anthro": {"source": "structural"}, "arm_hair": {"source": "stage3", "why": "unknown", "retrieval_score": 0.3661}, "body_hair": {"source": "implied"}, "cheeky": {"source": "stage3", "why": "unknown", "retrieval_score": 0.3905}, "duo": {"source": "probe"}, "flash": {"source": "stage3", "why": "unknown", "retrieval_score": 0.3198}, "front_view": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4614}, "gorilla": {"source": "stage3", "why": "unknown", "retrieval_score": 0.8299}, "grin": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5653}, "humanoid": {"source": "structural"}, "loincloth_only": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4961}, "raised_arms": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5461}, "smile": {"source": "implied"}, "topless": {"source": "structural"}, "trio": {"source": "structural"}}, "structural": ["trio", "anthro", "humanoid", "clothed", "topless", "looking_at_viewer"], "probe": ["simple_background", "group", "duo", "clothing", "bear", "anthro"], "t1": 1.79, "t2": 1.83, "t3": 2.31, "t3s": 1.03, "t3p": 1.86, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=36 entity=1 copyright_filtered=1 generic_char_to_general=1 unknown_type=2"]}
|
data/eval_results/k_sweep_explicit_no_why_seed42_k3.jsonl
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"_meta": true, "timestamp": "2026-03-03T05:57:22.405923", "n_samples": 10, "caption_field": "caption_cogvlm", "skip_rewrite": false, "allow_nsfw": false, "mode": "chunked_map_union", "chunk_size": 60, "eval_path": "data\\eval_samples\\e621_sfw_sample_1000_seed123_buffer10000_caption_evident.jsonl", "per_phrase_k": 2, "per_phrase_final_k": 3, "temperature": 0.0, "shuffle": true, "seed": 42, "workers": 1, "min_why": "strong_implied", "expand_implications": true, "infer_structural": true, "infer_probe": true, "n_errors": 0, "n_issue_samples": 10, "n_issues_total": 20}
|
| 2 |
+
{"id": 17482, "n_gt": 22, "n_retrieved": 41, "n_selected": 47, "n_implied": 20, "n_structural": 3, "n_probe": 4, "ret_R": 0.2727, "P": 0.3617, "R": 0.7727, "F1": 0.4928, "leaf_P": 0.25, "leaf_R": 0.3846, "leaf_F1": 0.303, "n_leaf_sel": 20, "n_leaf_gt": 13, "ret_P": 0.1463, "sel_given_ret": 2.8333, "over_sel": 2.14, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 24, "attempts_by_n_local": {"44": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.3617, "gen_R": 0.7727, "gen_F1": 0.4928, "missed": ["fingers", "fur", "holding_musical_instrument", "holding_object", "music"], "extra": ["5_claws", "arctic_wolf", "blank_expression", "blowup_background", "bottomwear", "canis", "colorful", "colorful_background", "denim", "denim_clothing", "flowing_hair", "glowing", "glowing_hair", "jeans", "long_hair", "maned_wolf", "membrane_(anatomy)", "membranous_wings", "pants", "playing_guitar", "playing_music", "red_hair", "t-pose", "tail_tuft", "torn_bottomwear", "torn_jeans", "torn_pants", "tuft", "wings", "wolf"], "ground_truth_tags": ["anthro", "bass_guitar", "canid", "canine", "claws", "clothed", "clothing", "fingers", "fur", "guitar", "hair", "holding_musical_instrument", "holding_object", "mammal", "music", "musical_instrument", "plucked_string_instrument", "solo", "spade_tail", "string_instrument", "tail", "torn_clothing"], "selected_tags": ["5_claws", "anthro", "arctic_wolf", "bass_guitar", "blank_expression", "blowup_background", "bottomwear", "canid", "canine", "canis", "claws", "clothed", "clothing", "colorful", "colorful_background", "denim", "denim_clothing", "flowing_hair", "glowing", "glowing_hair", "guitar", "hair", "jeans", "long_hair", "mammal", "maned_wolf", "membrane_(anatomy)", "membranous_wings", "musical_instrument", "pants", "playing_guitar", "playing_music", "plucked_string_instrument", "red_hair", "solo", "spade_tail", "string_instrument", "t-pose", "tail", "tail_tuft", "torn_bottomwear", "torn_clothing", "torn_jeans", "torn_pants", "tuft", "wings", "wolf"], "stage3_selected": ["5_claws", "abstract_background", "arctic_wolf", "bass_guitar", "blank_expression", "blowup_background", "claws", "colorful_background", "flowing_hair", "glowing_hair", "hair", "long_hair", "maned_wolf", "membranous_wings", "playing_guitar", "red_hair", "spade_tail", "t-pose", "tail", "tail_tuft", "torn_bottomwear", "torn_jeans", "torn_pants", "wolf"], "stage3_selected_scores": {"hair": 0.573, "tail": 0.5659, "claws": 0.5684, "wolf": 0.5782, "long_hair": 0.4286, "red_hair": 0.4258, "membranous_wings": 0.4106, "abstract_background": 0.4924, "tail_tuft": 0.4302, "spade_tail": 0.618, "torn_bottomwear": 0.4362, "torn_pants": 0.4639, "maned_wolf": 0.4599, "arctic_wolf": 0.4908, "playing_guitar": 0.9317, "torn_jeans": 0.4824, "glowing_hair": 0.4302, "bass_guitar": 0.9118, "flowing_hair": 0.5669, "blowup_background": 0.5038, "t-pose": 0.5519, "colorful_background": 0.5132, "5_claws": 0.4601, "blank_expression": 0.4242}, "stage3_selected_ranks": {"hair": 9, "tail": 12, "claws": 10, "wolf": 7, "long_hair": 39, "red_hair": 40, "membranous_wings": 42, "abstract_background": 24, "tail_tuft": 38, "spade_tail": 5, "torn_bottomwear": 34, "torn_pants": 28, "maned_wolf": 31, "arctic_wolf": 26, "playing_guitar": 2, "torn_jeans": 27, "glowing_hair": 36, "bass_guitar": 3, "flowing_hair": 11, "blowup_background": 22, "t-pose": 15, "colorful_background": 21, "5_claws": 30, "blank_expression": 41}, "stage3_selected_phrase_ranks": {"hair": 1, "tail": 1, "claws": 1, "wolf": 1, "long_hair": 2, "red_hair": 3, "membranous_wings": 2, "abstract_background": 3, "tail_tuft": 3, "spade_tail": 1, "torn_bottomwear": 3, "torn_pants": 2, "maned_wolf": 3, "arctic_wolf": 2, "playing_guitar": 1, "torn_jeans": 1, "glowing_hair": 2, "bass_guitar": 2, "flowing_hair": 1, "blowup_background": 2, "t-pose": 2, "colorful_background": 2, "5_claws": 3, "blank_expression": 3}, "extra_evidence": {"5_claws": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4601}, "arctic_wolf": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4908}, "blank_expression": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4242}, "blowup_background": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5038}, "bottomwear": {"source": "implied"}, "canis": {"source": "implied"}, "colorful": {"source": "implied"}, "colorful_background": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5132}, "denim": {"source": "implied"}, "denim_clothing": {"source": "implied"}, "flowing_hair": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5669}, "glowing": {"source": "implied"}, "glowing_hair": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4302}, "jeans": {"source": "implied"}, "long_hair": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4286}, "maned_wolf": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4599}, "membrane_(anatomy)": {"source": "implied"}, "membranous_wings": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4106}, "pants": {"source": "implied"}, "playing_guitar": {"source": "stage3", "why": "unknown", "retrieval_score": 0.9317}, "playing_music": {"source": "implied"}, "red_hair": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4258}, "t-pose": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5519}, "tail_tuft": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4302}, "torn_bottomwear": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4362}, "torn_jeans": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4824}, "torn_pants": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4639}, "tuft": {"source": "implied"}, "wings": {"source": "implied"}, "wolf": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5782}}, "structural": ["solo", "anthro", "clothed"], "probe": ["solo", "clothing", "canid", "anthro"], "t1": 1.83, "t2": 2.54, "t3": 3.41, "t3s": 3.01, "t3p": 4.22, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=44 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
| 3 |
+
{"id": 1624724, "n_gt": 4, "n_retrieved": 38, "n_selected": 24, "n_implied": 2, "n_structural": 3, "n_probe": 4, "ret_R": 0.75, "P": 0.1667, "R": 1.0, "F1": 0.2857, "leaf_P": 0.1429, "leaf_R": 0.75, "leaf_F1": 0.24, "n_leaf_sel": 21, "n_leaf_gt": 4, "ret_P": 0.0789, "sel_given_ret": 1.3333, "over_sel": 6.0, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 19, "attempts_by_n_local": {"40": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.1667, "gen_R": 1.0, "gen_F1": 0.2857, "missed": [], "extra": ["<3", "anthro", "big_eyes", "big_iris", "clothed", "clothing", "floating", "glistening", "glistening_eyes", "light_nose", "no_irises", "nose", "pale_body", "pink_mouth", "smiling_at_viewer", "spots", "spotted_face", "tan_chest", "toony", "unknown_species"], "ground_truth_tags": ["red_nose", "smile", "solo", "tan_body"], "selected_tags": ["<3", "anthro", "big_eyes", "big_iris", "clothed", "clothing", "floating", "glistening", "glistening_eyes", "light_nose", "no_irises", "nose", "pale_body", "pink_mouth", "red_nose", "smile", "smiling_at_viewer", "solo", "spots", "spotted_face", "tan_body", "tan_chest", "toony", "unknown_species"], "stage3_selected": ["big_eyes", "big_iris", "floating", "glistening_eyes", "light_nose", "no_irises", "nose", "pale_body", "pink_mouth", "red_nose", "smile", "smiling_at_viewer", "spots", "spotted_face", "tan_body", "tan_chest", "toony", "unknown_species", "white_background"], "stage3_selected_scores": {"smile": 0.5956, "white_background": 0.6072, "tan_body": 0.6582, "spots": 0.6224, "toony": 0.5172, "glistening_eyes": 0.494, "unknown_species": 0.5802, "smiling_at_viewer": 0.5323, "big_eyes": 0.6934, "red_nose": 0.7475, "floating": 0.6454, "tan_chest": 0.6867, "spotted_face": 0.6973, "no_irises": 0.4925, "pink_mouth": 0.6468, "light_nose": 0.6631, "big_iris": 0.566, "pale_body": 0.4677, "nose": 0.8611}, "stage3_selected_ranks": {"smile": 19, "white_background": 18, "tan_body": 10, "spots": 16, "toony": 31, "glistening_eyes": 32, "unknown_species": 20, "smiling_at_viewer": 28, "big_eyes": 7, "red_nose": 3, "floating": 12, "tan_chest": 8, "spotted_face": 5, "no_irises": 33, "pink_mouth": 11, "light_nose": 9, "big_iris": 23, "pale_body": 38, "nose": 2}, "stage3_selected_phrase_ranks": {"smile": 2, "white_background": 1, "tan_body": 3, "spots": 3, "toony": 1, "glistening_eyes": 2, "unknown_species": 1, "smiling_at_viewer": 3, "big_eyes": 1, "red_nose": 1, "floating": 1, "tan_chest": 2, "spotted_face": 2, "no_irises": 3, "pink_mouth": 1, "light_nose": 2, "big_iris": 3, "pale_body": 3, "nose": 1}, "extra_evidence": {"<3": {"source": "probe"}, "anthro": {"source": "structural"}, "big_eyes": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6934}, "big_iris": {"source": "stage3", "why": "unknown", "retrieval_score": 0.566}, "clothed": {"source": "structural"}, "clothing": {"source": "implied"}, "floating": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6454}, "glistening": {"source": "implied"}, "glistening_eyes": {"source": "stage3", "why": "unknown", "retrieval_score": 0.494}, "light_nose": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6631}, "no_irises": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4925}, "nose": {"source": "stage3", "why": "unknown", "retrieval_score": 0.8611}, "pale_body": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4677}, "pink_mouth": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6468}, "smiling_at_viewer": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5323}, "spots": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6224}, "spotted_face": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6973}, "tan_chest": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6867}, "toony": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5172}, "unknown_species": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5802}}, "structural": ["solo", "anthro", "clothed"], "probe": ["solo", "simple_background", "anthro", "<3"], "t1": 1.26, "t2": 1.13, "t3": 1.33, "t3s": 1.5, "t3p": 1.23, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=40 entity=0 copyright_filtered=1 generic_char_to_general=0 unknown_type=4"]}
|
| 4 |
+
{"id": 1078019, "n_gt": 14, "n_retrieved": 46, "n_selected": 38, "n_implied": 8, "n_structural": 4, "n_probe": 5, "ret_R": 0.6429, "P": 0.3421, "R": 0.9286, "F1": 0.5, "leaf_P": 0.25, "leaf_R": 0.6667, "leaf_F1": 0.3636, "n_leaf_sel": 24, "n_leaf_gt": 9, "ret_P": 0.1957, "sel_given_ret": 1.4444, "over_sel": 2.71, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 24, "attempts_by_n_local": {"47": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.3421, "gen_R": 0.9286, "gen_F1": 0.5, "missed": ["romantic_couple"], "extra": ["<3", "blush_lines", "cheek_tuft", "close-up", "coat", "diaper", "eyes_closed", "facial_tuft", "glistening", "glistening_eyes", "gradient_eyes", "half-closed_eyes", "holding_object", "holding_plushie", "looking_at_viewer", "narrowed_eyes", "pull-ups_(diaper)", "raincoat", "rosy_cheeks", "round_eyes", "small_eyes", "surprised_expression", "surprised_look", "topwear", "tuft"], "ground_truth_tags": ["anthro", "blue_eyes", "blush", "clothed", "clothing", "duo", "lagomorph", "leporid", "mammal", "plushie", "rabbit", "romantic", "romantic_couple", "teal_eyes"], "selected_tags": ["<3", "anthro", "blue_eyes", "blush", "blush_lines", "cheek_tuft", "close-up", "clothed", "clothing", "coat", "diaper", "duo", "eyes_closed", "facial_tuft", "glistening", "glistening_eyes", "gradient_eyes", "half-closed_eyes", "holding_object", "holding_plushie", "lagomorph", "leporid", "looking_at_viewer", "mammal", "narrowed_eyes", "plushie", "pull-ups_(diaper)", "rabbit", "raincoat", "romantic", "rosy_cheeks", "round_eyes", "small_eyes", "surprised_expression", "surprised_look", "teal_eyes", "topwear", "tuft"], "stage3_selected": ["blue_eyes", "blush_lines", "cheek_tuft", "close-up", "coat", "duo", "eyes_closed", "glistening_eyes", "gradient_eyes", "half-closed_eyes", "holding_plushie", "lagomorph", "leporid", "plushie", "pull-ups_(diaper)", "rabbit", "raincoat", "romantic", "rosy_cheeks", "round_eyes", "small_eyes", "surprised_expression", "surprised_look", "teal_eyes"], "stage3_selected_scores": {"duo": 0.3257, "blue_eyes": 0.6151, "eyes_closed": 0.4028, "lagomorph": 0.5325, "leporid": 0.5311, "rabbit": 0.5939, "half-closed_eyes": 0.5138, "cheek_tuft": 0.4678, "romantic": 0.5603, "close-up": 0.3803, "blush_lines": 0.4756, "glistening_eyes": 0.4543, "coat": 0.6383, "plushie": 0.7455, "teal_eyes": 0.6283, "surprised_expression": 0.639, "rosy_cheeks": 0.472, "holding_plushie": 0.7793, "raincoat": 0.5262, "small_eyes": 0.6187, "surprised_look": 0.6399, "round_eyes": 0.4887, "pull-ups_(diaper)": 0.5206, "gradient_eyes": 0.4784}, "stage3_selected_ranks": {"duo": 47, "blue_eyes": 12, "eyes_closed": 44, "lagomorph": 23, "leporid": 24, "rabbit": 13, "half-closed_eyes": 30, "cheek_tuft": 39, "romantic": 16, "close-up": 45, "blush_lines": 36, "glistening_eyes": 42, "coat": 7, "plushie": 3, "teal_eyes": 8, "surprised_expression": 6, "rosy_cheeks": 37, "holding_plushie": 2, "raincoat": 26, "small_eyes": 11, "surprised_look": 5, "round_eyes": 32, "pull-ups_(diaper)": 29, "gradient_eyes": 35}, "stage3_selected_phrase_ranks": {"duo": 3, "blue_eyes": 1, "eyes_closed": 2, "lagomorph": 2, "leporid": 3, "rabbit": 1, "half-closed_eyes": 2, "cheek_tuft": 3, "romantic": 2, "close-up": 3, "blush_lines": 3, "glistening_eyes": 3, "coat": 1, "plushie": 1, "teal_eyes": 1, "surprised_expression": 2, "rosy_cheeks": 2, "holding_plushie": 1, "raincoat": 2, "small_eyes": 1, "surprised_look": 1, "round_eyes": 1, "pull-ups_(diaper)": 2, "gradient_eyes": 2}, "extra_evidence": {"<3": {"source": "probe"}, "blush_lines": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4756}, "cheek_tuft": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4678}, "close-up": {"source": "stage3", "why": "unknown", "retrieval_score": 0.3803}, "coat": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6383}, "diaper": {"source": "implied"}, "eyes_closed": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4028}, "facial_tuft": {"source": "implied"}, "glistening": {"source": "implied"}, "glistening_eyes": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4543}, "gradient_eyes": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4784}, "half-closed_eyes": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5138}, "holding_object": {"source": "implied"}, "holding_plushie": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7793}, "looking_at_viewer": {"source": "structural"}, "narrowed_eyes": {"source": "implied"}, "pull-ups_(diaper)": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5206}, "raincoat": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5262}, "rosy_cheeks": {"source": "stage3", "why": "unknown", "retrieval_score": 0.472}, "round_eyes": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4887}, "small_eyes": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6187}, "surprised_expression": {"source": "stage3", "why": "unknown", "retrieval_score": 0.639}, "surprised_look": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6399}, "topwear": {"source": "implied"}, "tuft": {"source": "implied"}}, "structural": ["duo", "anthro", "clothed", "looking_at_viewer"], "probe": ["duo", "clothing", "blush", "anthro", "<3"], "t1": 1.85, "t2": 1.4, "t3": 4.65, "t3s": 1.07, "t3p": 1.53, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=47 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
| 5 |
+
{"id": 2021552, "n_gt": 25, "n_retrieved": 51, "n_selected": 65, "n_implied": 20, "n_structural": 4, "n_probe": 6, "ret_R": 0.56, "P": 0.3692, "R": 0.96, "F1": 0.5333, "leaf_P": 0.2353, "leaf_R": 0.5333, "leaf_F1": 0.3265, "n_leaf_sel": 34, "n_leaf_gt": 15, "ret_P": 0.2745, "sel_given_ret": 1.7143, "over_sel": 2.6, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 39, "attempts_by_n_local": {"52": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.3692, "gen_R": 0.96, "gen_F1": 0.5333, "missed": ["looking_at_another"], "extra": ["4_claws", "5_claws", "black_bottomwear", "black_clothing", "black_pants", "blowup_background", "blue_clothing", "blue_overalls", "blue_shirt", "blue_topwear", "blush", "buckteeth", "eye_markings", "fennec_fox", "front_view", "geometric_background", "gloves_(marking)", "grid_background", "long_arms", "looking_at_viewer", "on_one_leg", "open_mouth", "open_smile", "pattern_background", "red_mouth", "smile", "snout", "snout_markings", "t-shirt", "tan_body", "tan_bottomwear", "tan_clothing", "tan_fur", "tan_pants", "teeth", "undershirt", "white_clothing", "white_shirt", "white_topwear", "wide_eyed", "yellow_background"], "ground_truth_tags": ["anthro", "bottomwear", "canid", "canine", "claws", "clothed", "clothing", "crossed_arms", "duo", "facial_markings", "fox", "fur", "grey_background", "head_markings", "lagomorph", "leporid", "looking_at_another", "mammal", "markings", "overalls", "pants", "rabbit", "shirt", "standing", "topwear"], "selected_tags": ["4_claws", "5_claws", "anthro", "black_bottomwear", "black_clothing", "black_pants", "blowup_background", "blue_clothing", "blue_overalls", "blue_shirt", "blue_topwear", "blush", "bottomwear", "buckteeth", "canid", "canine", "claws", "clothed", "clothing", "crossed_arms", "duo", "eye_markings", "facial_markings", "fennec_fox", "fox", "front_view", "fur", "geometric_background", "gloves_(marking)", "grey_background", "grid_background", "head_markings", "lagomorph", "leporid", "long_arms", "looking_at_viewer", "mammal", "markings", "on_one_leg", "open_mouth", "open_smile", "overalls", "pants", "pattern_background", "rabbit", "red_mouth", "shirt", "smile", "snout", "snout_markings", "standing", "t-shirt", "tan_body", "tan_bottomwear", "tan_clothing", "tan_fur", "tan_pants", "teeth", "topwear", "undershirt", "white_clothing", "white_shirt", "white_topwear", "wide_eyed", "yellow_background"], "stage3_selected": ["4_claws", "5_claws", "black_pants", "blowup_background", "blue_overalls", "blue_shirt", "buckteeth", "claws", "crossed_arms", "eye_markings", "facial_markings", "fennec_fox", "fox", "front_view", "fur", "gloves_(marking)", "grey_background", "grid_background", "head_markings", "invalid_tag", "lagomorph", "long_arms", "on_one_leg", "open_mouth", "open_smile", "overalls", "rabbit", "red_mouth", "shirt", "snout_markings", "standing", "t-shirt", "tan_fur", "tan_pants", "undershirt", "white_shirt", "white_topwear", "wide_eyed", "yellow_background"], "stage3_selected_scores": {"fur": 0.654, "open_mouth": 0.6338, "claws": 0.6311, "standing": 0.6886, "fox": 0.6387, "shirt": 0.7491, "lagomorph": 0.5942, "rabbit": 0.6517, "front_view": 0.5154, "tan_fur": 0.52, "open_smile": 0.528, "grey_background": 0.6792, "gloves_(marking)": 0.6271, "head_markings": 0.6334, "buckteeth": 0.532, "facial_markings": 0.6951, "t-shirt": 0.7246, "crossed_arms": 0.7292, "wide_eyed": 0.4677, "white_topwear": 0.7676, "white_shirt": 0.8202, "on_one_leg": 0.5769, "yellow_background": 0.5951, "overalls": 0.878, "black_pants": 0.8334, "blue_shirt": 0.6699, "eye_markings": 0.6366, "snout_markings": 0.6219, "invalid_tag": 0.5414, "undershirt": 0.7069, "4_claws": 0.5923, "blowup_background": 0.5948, "long_arms": 0.586, "tan_pants": 0.7502, "grid_background": 0.6147, "blue_overalls": 0.9204, "5_claws": 0.6023, "red_mouth": 0.545, "fennec_fox": 0.5037}, "stage3_selected_ranks": {"fur": 21, "open_mouth": 25, "claws": 27, "standing": 16, "fox": 23, "shirt": 10, "lagomorph": 36, "rabbit": 22, "front_view": 51, "tan_fur": 50, "open_smile": 49, "grey_background": 18, "gloves_(marking)": 28, "head_markings": 26, "buckteeth": 47, "facial_markings": 15, "t-shirt": 12, "crossed_arms": 11, "wide_eyed": 53, "white_topwear": 6, "white_shirt": 4, "on_one_leg": 39, "yellow_background": 34, "overalls": 2, "black_pants": 3, "blue_shirt": 20, "eye_markings": 24, "snout_markings": 29, "invalid_tag": 45, "undershirt": 13, "4_claws": 37, "blowup_background": 35, "long_arms": 38, "tan_pants": 9, "grid_background": 31, "blue_overalls": 1, "5_claws": 32, "red_mouth": 44, "fennec_fox": 52}, "stage3_selected_phrase_ranks": {"fur": 1, "open_mouth": 1, "claws": 1, "standing": 1, "fox": 1, "shirt": 1, "lagomorph": 3, "rabbit": 1, "front_view": 3, "tan_fur": 3, "open_smile": 2, "grey_background": 1, "gloves_(marking)": 3, "head_markings": 2, "buckteeth": 3, "facial_markings": 1, "t-shirt": 2, "crossed_arms": 1, "wide_eyed": 3, "white_topwear": 3, "white_shirt": 1, "on_one_leg": 2, "yellow_background": 3, "overalls": 1, "black_pants": 1, "blue_shirt": 3, "eye_markings": 2, "snout_markings": 3, "invalid_tag": 3, "undershirt": 3, "4_claws": 3, "blowup_background": 3, "long_arms": 2, "tan_pants": 3, "grid_background": 2, "blue_overalls": 1, "5_claws": 2, "red_mouth": 2, "fennec_fox": 3}, "extra_evidence": {"4_claws": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5923}, "5_claws": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6023}, "black_bottomwear": {"source": "implied"}, "black_clothing": {"source": "implied"}, "black_pants": {"source": "stage3", "why": "unknown", "retrieval_score": 0.8334}, "blowup_background": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5948}, "blue_clothing": {"source": "implied"}, "blue_overalls": {"source": "stage3", "why": "unknown", "retrieval_score": 0.9204}, "blue_shirt": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6699}, "blue_topwear": {"source": "implied"}, "blush": {"source": "probe"}, "buckteeth": {"source": "stage3", "why": "unknown", "retrieval_score": 0.532}, "eye_markings": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6366}, "fennec_fox": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5037}, "front_view": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5154}, "geometric_background": {"source": "implied"}, "gloves_(marking)": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6271}, "grid_background": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6147}, "long_arms": {"source": "stage3", "why": "unknown", "retrieval_score": 0.586}, "looking_at_viewer": {"source": "structural"}, "on_one_leg": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5769}, "open_mouth": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6338}, "open_smile": {"source": "stage3", "why": "unknown", "retrieval_score": 0.528}, "pattern_background": {"source": "implied"}, "red_mouth": {"source": "stage3", "why": "unknown", "retrieval_score": 0.545}, "smile": {"source": "implied"}, "snout": {"source": "implied"}, "snout_markings": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6219}, "t-shirt": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7246}, "tan_body": {"source": "implied"}, "tan_bottomwear": {"source": "implied"}, "tan_clothing": {"source": "implied"}, "tan_fur": {"source": "stage3", "why": "unknown", "retrieval_score": 0.52}, "tan_pants": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7502}, "teeth": {"source": "implied"}, "undershirt": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7069}, "white_clothing": {"source": "implied"}, "white_shirt": {"source": "stage3", "why": "unknown", "retrieval_score": 0.8202}, "white_topwear": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7676}, "wide_eyed": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4677}, "yellow_background": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5951}}, "structural": ["duo", "anthro", "clothed", "looking_at_viewer"], "probe": ["simple_background", "duo", "clothing", "canid", "blush", "anthro"], "t1": 1.91, "t2": 1.47, "t3": 7.15, "t3s": 1.9, "t3p": 2.93, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=52 entity=1 copyright_filtered=0 generic_char_to_general=0 unknown_type=3"]}
|
| 6 |
+
{"id": 1023509, "n_gt": 13, "n_retrieved": 67, "n_selected": 64, "n_implied": 10, "n_structural": 5, "n_probe": 5, "ret_R": 0.3077, "P": 0.1406, "R": 0.6923, "F1": 0.2338, "leaf_P": 0.0435, "leaf_R": 0.3333, "leaf_F1": 0.0769, "n_leaf_sel": 46, "n_leaf_gt": 6, "ret_P": 0.0597, "sel_given_ret": 2.25, "over_sel": 4.92, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 2, "calls_with_selection": 2, "calls_exhausted_retries": 0, "attempts_total": 2, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 2, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 47, "attempts_by_n_local": {"60": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}, "5": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 1, "char_F1": 0.0, "gen_P": 0.1429, "gen_R": 0.6923, "gen_F1": 0.2368, "missed": ["dialogue", "fur", "white_body", "white_fur"], "extra": ["2_panel_comic", "3_panel_comic", "4_panel_comic", "<3", "agamid", "air_bubble", "anthro", "bear", "capricorn", "carrying_over_shoulder", "cjk_character", "clothed", "clothing", "dark", "darkner", "darkness", "duo", "duo_focus", "empty_speech_bubble", "fan_character", "felid", "figurine", "frilled_lizard", "group", "human_only", "laying_on_ground", "light", "lying_on_ground", "male_human", "mask", "medical_instrument", "monitor_lizard", "not_furry", "note", "note_pad", "notebook", "on_ground", "oxygen_mask", "pear-shaped_figure", "question_mark", "sad", "scientific_instrument", "soap_bubbles", "speech_bubble", "standing", "standing_over", "stick_figure", "striped_body", "stripes", "sunlight", "taur", "thought_bubble", "trio", "waist", "wide_hips"], "ground_truth_tags": ["bovid", "caprine", "dialogue", "fur", "goat", "human", "lizard", "mammal", "reptile", "scalie", "text", "white_body", "white_fur"], "selected_tags": ["2_panel_comic", "3_panel_comic", "4_panel_comic", "<3", "agamid", "air_bubble", "anthro", "bear", "bovid", "capricorn", "caprine", "carrying_over_shoulder", "cjk_character", "clothed", "clothing", "dark", "darkner", "darkness", "duo", "duo_focus", "empty_speech_bubble", "fan_character", "felid", "figurine", "frilled_lizard", "goat", "group", "human", "human_only", "laying_on_ground", "light", "lizard", "lying_on_ground", "male_human", "mammal", "mask", "medical_instrument", "monitor_lizard", "not_furry", "note", "note_pad", "notebook", "on_ground", "oxygen_mask", "pear-shaped_figure", "question_mark", "reptile", "sad", "scalie", "scientific_instrument", "soap_bubbles", "speech_bubble", "standing", "standing_over", "stick_figure", "striped_body", "stripes", "sunlight", "taur", "text", "thought_bubble", "trio", "waist", "wide_hips"], "stage3_selected": ["2_panel_comic", "3_panel_comic", "4_panel_comic", "air_bubble", "capricorn", "caprine", "carrying_over_shoulder", "cjk_character", "dark", "darkner", "darkness", "duo", "duo_focus", "empty_speech_bubble", "fan_character", "figurine", "frilled_lizard", "goat", "group", "human", "human_only", "laying_on_ground", "light", "lizard", "lying_on_ground", "male_human", "mask", "monitor_lizard", "note", "note_pad", "notebook", "on_ground", "oxygen_mask", "pear-shaped_figure", "question_mark", "sad", "soap_bubbles", "speech_bubble", "standing", "standing_over", "stick_figure", "striped_body", "stripes", "sunlight", "thought_bubble", "trio", "waist"], "stage3_selected_scores": {"duo": 0.379, "group": 0.4732, "standing": 0.4714, "human": 0.5598, "speech_bubble": 0.5792, "stripes": 0.4622, "fan_character": 0.4163, "caprine": 0.47, "trio": 0.3761, "lizard": 0.5978, "striped_body": 0.3966, "goat": 0.5805, "mask": 0.3754, "light": 0.5849, "question_mark": 0.3121, "duo_focus": 0.3571, "on_ground": 0.4857, "thought_bubble": 0.475, "sad": 0.4012, "sunlight": 0.4787, "human_only": 0.4179, "dark": 0.4135, "darkner": 0.4149, "monitor_lizard": 0.4607, "pear-shaped_figure": 0.4006, "lying_on_ground": 0.5972, "notebook": 0.4057, "darkness": 0.6, "air_bubble": 0.4378, "note": 0.5684, "figurine": 0.5577, "laying_on_ground": 0.5581, "stick_figure": 0.4166, "soap_bubbles": 0.4325, "frilled_lizard": 0.4601, "standing_over": 0.5829, "oxygen_mask": 0.3762, "3_panel_comic": 0.4314, "carrying_over_shoulder": 0.409, "4_panel_comic": 0.4507, "waist": 0.7512, "2_panel_comic": 0.4346, "empty_speech_bubble": 0.5477, "capricorn": 0.5212, "note_pad": 0.4185, "male_human": 0.4242, "cjk_character": 0.4245}, "stage3_selected_ranks": {"duo": 53, "group": 21, "standing": 23, "human": 12, "speech_bubble": 9, "stripes": 25, "fan_character": 41, "caprine": 24, "trio": 56, "lizard": 3, "striped_body": 51, "goat": 8, "mask": 57, "light": 6, "question_mark": 67, "duo_focus": 62, "on_ground": 18, "thought_bubble": 20, "sad": 49, "sunlight": 19, "human_only": 39, "dark": 44, "darkner": 42, "monitor_lizard": 26, "pear-shaped_figure": 50, "lying_on_ground": 4, "notebook": 48, "darkness": 2, "air_bubble": 30, "note": 11, "figurine": 14, "laying_on_ground": 13, "stick_figure": 40, "soap_bubbles": 32, "frilled_lizard": 27, "standing_over": 7, "oxygen_mask": 55, "3_panel_comic": 33, "carrying_over_shoulder": 45, "4_panel_comic": 29, "waist": 1, "2_panel_comic": 31, "empty_speech_bubble": 15, "capricorn": 16, "note_pad": 38, "male_human": 36, "cjk_character": 34}, "stage3_selected_phrase_ranks": {"duo": 3, "group": 1, "standing": 2, "human": 1, "speech_bubble": 1, "stripes": 1, "fan_character": 2, "caprine": 3, "trio": 2, "lizard": 1, "striped_body": 3, "goat": 1, "mask": 3, "light": 1, "question_mark": 3, "duo_focus": 3, "on_ground": 3, "thought_bubble": 3, "sad": 3, "sunlight": 3, "human_only": 3, "dark": 3, "darkner": 2, "monitor_lizard": 2, "pear-shaped_figure": 3, "lying_on_ground": 1, "notebook": 3, "darkness": 1, "air_bubble": 2, "note": 1, "figurine": 2, "laying_on_ground": 2, "stick_figure": 3, "soap_bubbles": 3, "frilled_lizard": 3, "standing_over": 1, "oxygen_mask": 2, "3_panel_comic": 3, "carrying_over_shoulder": 3, "4_panel_comic": 1, "waist": 1, "2_panel_comic": 2, "empty_speech_bubble": 1, "capricorn": 2, "note_pad": 2, "male_human": 2, "cjk_character": 1}, "extra_evidence": {"2_panel_comic": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4346}, "3_panel_comic": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4314}, "4_panel_comic": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4507}, "<3": {"source": "probe"}, "agamid": {"source": "implied"}, "air_bubble": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4378}, "anthro": {"source": "structural"}, "bear": {"source": "probe"}, "capricorn": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5212}, "carrying_over_shoulder": {"source": "stage3", "why": "unknown", "retrieval_score": 0.409}, "cjk_character": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4245}, "clothed": {"source": "structural"}, "clothing": {"source": "implied"}, "dark": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4135}, "darkner": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4149}, "darkness": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6}, "duo": {"source": "stage3", "why": "unknown", "retrieval_score": 0.379}, "duo_focus": {"source": "stage3", "why": "unknown", "retrieval_score": 0.3571}, "empty_speech_bubble": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5477}, "fan_character": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4163}, "felid": {"source": "probe"}, "figurine": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5577}, "frilled_lizard": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4601}, "group": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4732}, "human_only": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4179}, "laying_on_ground": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5581}, "light": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5849}, "lying_on_ground": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5972}, "male_human": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4242}, "mask": {"source": "stage3", "why": "unknown", "retrieval_score": 0.3754}, "medical_instrument": {"source": "implied"}, "monitor_lizard": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4607}, "not_furry": {"source": "implied"}, "note": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5684}, "note_pad": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4185}, "notebook": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4057}, "on_ground": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4857}, "oxygen_mask": {"source": "stage3", "why": "unknown", "retrieval_score": 0.3762}, "pear-shaped_figure": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4006}, "question_mark": {"source": "stage3", "why": "unknown", "retrieval_score": 0.3121}, "sad": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4012}, "scientific_instrument": {"source": "implied"}, "soap_bubbles": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4325}, "speech_bubble": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5792}, "standing": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4714}, "standing_over": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5829}, "stick_figure": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4166}, "striped_body": {"source": "stage3", "why": "unknown", "retrieval_score": 0.3966}, "stripes": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4622}, "sunlight": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4787}, "taur": {"source": "structural"}, "thought_bubble": {"source": "stage3", "why": "unknown", "retrieval_score": 0.475}, "trio": {"source": "stage3", "why": "unknown", "retrieval_score": 0.3761}, "waist": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7512}, "wide_hips": {"source": "implied"}}, "structural": ["group", "anthro", "taur", "clothed", "text"], "probe": ["group", "felid", "bear", "anthro", "<3"], "t1": 2.44, "t2": 1.95, "t3": 4.96, "t3s": 1.94, "t3p": 2.26, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=65 entity=0 copyright_filtered=2 generic_char_to_general=2 unknown_type=2"]}
|
| 7 |
+
{"id": 335343, "n_gt": 15, "n_retrieved": 67, "n_selected": 43, "n_implied": 6, "n_structural": 3, "n_probe": 6, "ret_R": 0.7333, "P": 0.2326, "R": 0.6667, "F1": 0.3448, "leaf_P": 0.2222, "leaf_R": 0.6667, "leaf_F1": 0.3333, "n_leaf_sel": 36, "n_leaf_gt": 12, "ret_P": 0.1642, "sel_given_ret": 0.9091, "over_sel": 2.87, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 2, "calls_with_selection": 2, "calls_exhausted_retries": 0, "attempts_total": 2, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 2, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 31, "attempts_by_n_local": {"60": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}, "8": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.2326, "gen_R": 0.6667, "gen_F1": 0.3448, "missed": ["angry", "bed", "eyes_closed", "furniture", "sleeping"], "extra": ["3rd_party_watermark", "<3", "annoyed", "anthro", "bed_covers", "bedding", "bedroom", "big_eyes", "blush", "clothing", "color_swatch", "contest", "curtains_open", "dialogue", "distracting_watermark", "expressions", "felid", "green_ears", "highlights_(coloring)", "humanoid", "long_hair", "mammal", "pajamas", "portuguese_text", "purple_background", "purple_eyes", "purple_hands", "purple_highlights", "restricted_palette", "sleeping_together", "sleepover", "tired", "watermark"], "ground_truth_tags": ["angry", "bed", "blonde_hair", "blue_eyes", "duo", "eyes_closed", "eyeshadow", "furniture", "green_eyes", "hair", "lying", "makeup", "purple_hair", "sleeping", "text"], "selected_tags": ["3rd_party_watermark", "<3", "annoyed", "anthro", "bed_covers", "bedding", "bedroom", "big_eyes", "blonde_hair", "blue_eyes", "blush", "clothing", "color_swatch", "contest", "curtains_open", "dialogue", "distracting_watermark", "duo", "expressions", "eyeshadow", "felid", "green_ears", "green_eyes", "hair", "highlights_(coloring)", "humanoid", "long_hair", "lying", "makeup", "mammal", "pajamas", "portuguese_text", "purple_background", "purple_eyes", "purple_hair", "purple_hands", "purple_highlights", "restricted_palette", "sleeping_together", "sleepover", "text", "tired", "watermark"], "stage3_selected": ["3rd_party_watermark", "annoyed", "bed_covers", "bedroom", "big_eyes", "blonde_hair", "blue_eyes", "color_swatch", "contest", "curtains_open", "dialogue", "distracting_watermark", "expressions", "eyeshadow", "green_ears", "green_eyes", "long_hair", "lying", "makeup", "pajamas", "portuguese_text", "purple_background", "purple_eyes", "purple_hair", "purple_hands", "purple_highlights", "restricted_palette", "sleeping_together", "sleepover", "text", "tired"], "stage3_selected_scores": {"text": 0.6017, "blue_eyes": 0.6023, "dialogue": 0.4457, "lying": 0.4504, "green_eyes": 0.5999, "long_hair": 0.4595, "blonde_hair": 0.5995, "purple_eyes": 0.434, "purple_hair": 0.5647, "makeup": 0.5972, "eyeshadow": 0.4769, "bedroom": 0.491, "purple_background": 0.4971, "big_eyes": 0.4297, "annoyed": 0.5736, "restricted_palette": 0.4777, "tired": 0.5551, "color_swatch": 0.4623, "distracting_watermark": 0.5007, "pajamas": 0.3762, "green_ears": 0.4402, "purple_highlights": 0.4307, "bed_covers": 0.4156, "curtains_open": 0.4199, "expressions": 0.5449, "sleeping_together": 0.5093, "3rd_party_watermark": 0.3981, "contest": 0.3499, "sleepover": 0.5276, "purple_hands": 0.5398, "portuguese_text": 0.4433}, "stage3_selected_ranks": {"text": 8, "blue_eyes": 7, "dialogue": 44, "lying": 41, "green_eyes": 9, "long_hair": 38, "blonde_hair": 10, "purple_eyes": 49, "purple_hair": 14, "makeup": 11, "eyeshadow": 35, "bedroom": 31, "purple_background": 29, "big_eyes": 54, "annoyed": 13, "restricted_palette": 34, "tired": 16, "color_swatch": 37, "distracting_watermark": 28, "pajamas": 65, "green_ears": 47, "purple_highlights": 51, "bed_covers": 57, "curtains_open": 55, "expressions": 18, "sleeping_together": 24, "3rd_party_watermark": 59, "contest": 67, "sleepover": 23, "purple_hands": 19, "portuguese_text": 45}, "stage3_selected_phrase_ranks": {"text": 1, "blue_eyes": 1, "dialogue": 2, "lying": 1, "green_eyes": 1, "long_hair": 3, "blonde_hair": 1, "purple_eyes": 2, "purple_hair": 1, "makeup": 1, "eyeshadow": 3, "bedroom": 1, "purple_background": 3, "big_eyes": 3, "annoyed": 2, "restricted_palette": 2, "tired": 2, "color_swatch": 3, "distracting_watermark": 1, "pajamas": 3, "green_ears": 3, "purple_highlights": 3, "bed_covers": 3, "curtains_open": 2, "expressions": 3, "sleeping_together": 2, "3rd_party_watermark": 3, "contest": 2, "sleepover": 1, "purple_hands": 2, "portuguese_text": 3}, "extra_evidence": {"3rd_party_watermark": {"source": "stage3", "why": "unknown", "retrieval_score": 0.3981}, "<3": {"source": "probe"}, "annoyed": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5736}, "anthro": {"source": "probe"}, "bed_covers": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4156}, "bedding": {"source": "implied"}, "bedroom": {"source": "stage3", "why": "unknown", "retrieval_score": 0.491}, "big_eyes": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4297}, "blush": {"source": "probe"}, "clothing": {"source": "implied"}, "color_swatch": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4623}, "contest": {"source": "stage3", "why": "unknown", "retrieval_score": 0.3499}, "curtains_open": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4199}, "dialogue": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4457}, "distracting_watermark": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5007}, "expressions": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5449}, "felid": {"source": "probe"}, "green_ears": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4402}, "highlights_(coloring)": {"source": "implied"}, "humanoid": {"source": "structural"}, "long_hair": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4595}, "mammal": {"source": "implied"}, "pajamas": {"source": "stage3", "why": "unknown", "retrieval_score": 0.3762}, "portuguese_text": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4433}, "purple_background": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4971}, "purple_eyes": {"source": "stage3", "why": "unknown", "retrieval_score": 0.434}, "purple_hands": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5398}, "purple_highlights": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4307}, "restricted_palette": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4777}, "sleeping_together": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5093}, "sleepover": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5276}, "tired": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5551}, "watermark": {"source": "implied"}}, "structural": ["duo", "humanoid", "text"], "probe": ["simple_background", "felid", "duo", "blush", "anthro", "<3"], "t1": 2.63, "t2": 1.98, "t3": 7.61, "t3s": 0.68, "t3p": 2.34, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=68 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=3"]}
|
| 8 |
+
{"id": 2034167, "n_gt": 11, "n_retrieved": 68, "n_selected": 35, "n_implied": 6, "n_structural": 4, "n_probe": 5, "ret_R": 0.5455, "P": 0.2857, "R": 0.9091, "F1": 0.4348, "leaf_P": 0.2381, "leaf_R": 0.7143, "leaf_F1": 0.3571, "n_leaf_sel": 21, "n_leaf_gt": 7, "ret_P": 0.0882, "sel_given_ret": 1.6667, "over_sel": 3.18, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 2, "calls_with_selection": 2, "calls_exhausted_retries": 0, "attempts_total": 2, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 2, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 26, "attempts_by_n_local": {"60": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}, "13": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.2857, "gen_R": 0.9091, "gen_F1": 0.4348, "missed": ["open_mouth"], "extra": ["animal_humanoid", "anthro", "blue_stripes", "canid_humanoid", "canine_humanoid", "clothed", "clothing", "curved_tail", "fluffy_fur", "fox_humanoid", "gradient_tail", "humanoid", "long_tail", "male", "mammal_humanoid", "midair", "pale_body", "pink_stripes", "riding", "skimpy", "slim_humanoid", "stripes", "tail", "tan_stripes", "white_nose"], "ground_truth_tags": ["blue_eyes", "blue_nose", "canid", "canine", "fur", "mammal", "open_mouth", "purple_body", "solo", "white_body", "white_fur"], "selected_tags": ["animal_humanoid", "anthro", "blue_eyes", "blue_nose", "blue_stripes", "canid", "canid_humanoid", "canine", "canine_humanoid", "clothed", "clothing", "curved_tail", "fluffy_fur", "fox_humanoid", "fur", "gradient_tail", "humanoid", "long_tail", "male", "mammal", "mammal_humanoid", "midair", "pale_body", "pink_stripes", "purple_body", "riding", "skimpy", "slim_humanoid", "solo", "stripes", "tail", "tan_stripes", "white_body", "white_fur", "white_nose"], "stage3_selected": ["blue_eyes", "blue_nose", "blue_stripes", "blurred_background", "canid_humanoid", "canine_humanoid", "curved_tail", "fluffy_fur", "fox_humanoid", "fur", "gradient_background", "gradient_tail", "long_tail", "midair", "pale_body", "pink_stripes", "purple_body", "riding", "simple_background", "skimpy", "slim_humanoid", "stripes", "tail", "tan_stripes", "white_fur", "white_nose"], "stage3_selected_scores": {"fur": 0.5962, "simple_background": 0.604, "tail": 0.6262, "blue_eyes": 0.6113, "white_fur": 0.6152, "stripes": 0.6216, "purple_body": 0.5754, "skimpy": 0.3825, "long_tail": 0.6362, "gradient_background": 0.5021, "canid_humanoid": 0.8514, "canine_humanoid": 0.8898, "blue_nose": 0.6093, "fox_humanoid": 0.81, "blurred_background": 0.5177, "riding": 0.3675, "blue_stripes": 0.6999, "midair": 0.4366, "white_nose": 0.5565, "pink_stripes": 0.7069, "fluffy_fur": 0.5831, "curved_tail": 0.7269, "gradient_tail": 0.5945, "pale_body": 0.4458, "slim_humanoid": 0.592, "tan_stripes": 0.6293}, "stage3_selected_ranks": {"fur": 29, "simple_background": 27, "tail": 16, "blue_eyes": 24, "white_fur": 21, "stripes": 18, "purple_body": 35, "skimpy": 69, "long_tail": 11, "gradient_background": 56, "canid_humanoid": 2, "canine_humanoid": 1, "blue_nose": 26, "fox_humanoid": 3, "blurred_background": 50, "riding": 72, "blue_stripes": 8, "midair": 67, "white_nose": 40, "pink_stripes": 7, "fluffy_fur": 34, "curved_tail": 6, "gradient_tail": 30, "pale_body": 66, "slim_humanoid": 31, "tan_stripes": 14}, "stage3_selected_phrase_ranks": {"fur": 1, "simple_background": 1, "tail": 1, "blue_eyes": 1, "white_fur": 2, "stripes": 2, "purple_body": 3, "skimpy": 2, "long_tail": 1, "gradient_background": 3, "canid_humanoid": 2, "canine_humanoid": 1, "blue_nose": 1, "fox_humanoid": 3, "blurred_background": 3, "riding": 2, "blue_stripes": 1, "midair": 2, "white_nose": 2, "pink_stripes": 1, "fluffy_fur": 2, "curved_tail": 1, "gradient_tail": 3, "pale_body": 3, "slim_humanoid": 3, "tan_stripes": 3}, "extra_evidence": {"animal_humanoid": {"source": "implied"}, "anthro": {"source": "structural"}, "blue_stripes": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6999}, "canid_humanoid": {"source": "stage3", "why": "unknown", "retrieval_score": 0.8514}, "canine_humanoid": {"source": "stage3", "why": "unknown", "retrieval_score": 0.8898}, "clothed": {"source": "structural"}, "clothing": {"source": "probe"}, "curved_tail": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7269}, "fluffy_fur": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5831}, "fox_humanoid": {"source": "stage3", "why": "unknown", "retrieval_score": 0.81}, "gradient_tail": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5945}, "humanoid": {"source": "implied"}, "long_tail": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6362}, "male": {"source": "structural"}, "mammal_humanoid": {"source": "implied"}, "midair": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4366}, "pale_body": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4458}, "pink_stripes": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7069}, "riding": {"source": "stage3", "why": "unknown", "retrieval_score": 0.3675}, "skimpy": {"source": "stage3", "why": "unknown", "retrieval_score": 0.3825}, "slim_humanoid": {"source": "stage3", "why": "unknown", "retrieval_score": 0.592}, "stripes": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6216}, "tail": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6262}, "tan_stripes": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6293}, "white_nose": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5565}}, "structural": ["solo", "anthro", "male", "clothed"], "probe": ["solo", "simple_background", "clothing", "canid", "anthro"], "t1": 1.11, "t2": 2.57, "t3": 4.82, "t3s": 1.11, "t3p": 2.24, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=73 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=4"]}
|
| 9 |
+
{"id": 1325009, "n_gt": 22, "n_retrieved": 56, "n_selected": 36, "n_implied": 13, "n_structural": 5, "n_probe": 5, "ret_R": 0.2727, "P": 0.3889, "R": 0.6364, "F1": 0.4828, "leaf_P": 0.1429, "leaf_R": 0.25, "leaf_F1": 0.1818, "n_leaf_sel": 21, "n_leaf_gt": 12, "ret_P": 0.1071, "sel_given_ret": 2.3333, "over_sel": 1.64, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 16, "attempts_by_n_local": {"57": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.3889, "gen_R": 0.6364, "gen_F1": 0.4828, "missed": ["chest_tuft", "muscular", "muscular_anthro", "muscular_male", "pantherine", "tiger", "topless", "tuft"], "extra": ["bear", "belly", "countershade_belly", "countershade_body", "cross-hatching", "glistening", "glistening_body", "glistening_eyes", "glistening_fur", "hatching_(art)", "light_chest", "looking_at_viewer", "muscular_legs", "pattern_background", "shaded", "striped_back", "striped_body", "striped_fur", "tan_bottomwear", "tan_clothing", "tan_shorts", "white_chest"], "ground_truth_tags": ["anthro", "blue_eyes", "bottomwear", "chest_tuft", "clothed", "clothing", "countershading", "felid", "fur", "hand_on_head", "male", "mammal", "muscular", "muscular_anthro", "muscular_male", "pantherine", "shorts", "solo", "stripes", "tiger", "topless", "tuft"], "selected_tags": ["anthro", "bear", "belly", "blue_eyes", "bottomwear", "clothed", "clothing", "countershade_belly", "countershade_body", "countershading", "cross-hatching", "felid", "fur", "glistening", "glistening_body", "glistening_eyes", "glistening_fur", "hand_on_head", "hatching_(art)", "light_chest", "looking_at_viewer", "male", "mammal", "muscular_legs", "pattern_background", "shaded", "shorts", "solo", "striped_back", "striped_body", "striped_fur", "stripes", "tan_bottomwear", "tan_clothing", "tan_shorts", "white_chest"], "stage3_selected": ["blue_eyes", "countershade_belly", "countershade_body", "cross-hatching", "glistening_eyes", "glistening_fur", "hand_on_head", "light_chest", "muscular_legs", "pattern_background", "shorts", "striped_back", "striped_body", "striped_fur", "tan_shorts", "white_chest"], "stage3_selected_scores": {"blue_eyes": 0.5785, "shorts": 0.5914, "striped_body": 0.4159, "striped_fur": 0.6475, "hand_on_head": 0.6014, "glistening_eyes": 0.4769, "pattern_background": 0.5269, "glistening_fur": 0.501, "muscular_legs": 0.791, "white_chest": 0.917, "countershade_body": 0.8721, "striped_back": 0.7029, "countershade_belly": 0.828, "cross-hatching": 0.4762, "light_chest": 0.7491, "tan_shorts": 0.5498}, "stage3_selected_ranks": {"blue_eyes": 30, "shorts": 28, "striped_body": 55, "striped_fur": 16, "hand_on_head": 24, "glistening_eyes": 48, "pattern_background": 36, "glistening_fur": 41, "muscular_legs": 8, "white_chest": 2, "countershade_body": 3, "striped_back": 13, "countershade_belly": 6, "cross-hatching": 49, "light_chest": 11, "tan_shorts": 32}, "stage3_selected_phrase_ranks": {"blue_eyes": 2, "shorts": 1, "striped_body": 2, "striped_fur": 2, "hand_on_head": 2, "glistening_eyes": 3, "pattern_background": 1, "glistening_fur": 3, "muscular_legs": 2, "white_chest": 1, "countershade_body": 1, "striped_back": 2, "countershade_belly": 2, "cross-hatching": 3, "light_chest": 2, "tan_shorts": 2}, "extra_evidence": {"bear": {"source": "probe"}, "belly": {"source": "implied"}, "countershade_belly": {"source": "stage3", "why": "unknown", "retrieval_score": 0.828}, "countershade_body": {"source": "stage3", "why": "unknown", "retrieval_score": 0.8721}, "cross-hatching": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4762}, "glistening": {"source": "implied"}, "glistening_body": {"source": "implied"}, "glistening_eyes": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4769}, "glistening_fur": {"source": "stage3", "why": "unknown", "retrieval_score": 0.501}, "hatching_(art)": {"source": "implied"}, "light_chest": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7491}, "looking_at_viewer": {"source": "structural"}, "muscular_legs": {"source": "stage3", "why": "unknown", "retrieval_score": 0.791}, "pattern_background": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5269}, "shaded": {"source": "implied"}, "striped_back": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7029}, "striped_body": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4159}, "striped_fur": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6475}, "tan_bottomwear": {"source": "implied"}, "tan_clothing": {"source": "implied"}, "tan_shorts": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5498}, "white_chest": {"source": "stage3", "why": "unknown", "retrieval_score": 0.917}}, "structural": ["solo", "anthro", "male", "clothed", "looking_at_viewer"], "probe": ["solo", "felid", "clothing", "bear", "anthro"], "t1": 1.9, "t2": 1.78, "t3": 2.98, "t3s": 1.08, "t3p": 1.38, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=57 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
| 10 |
+
{"id": 3285630, "n_gt": 12, "n_retrieved": 50, "n_selected": 47, "n_implied": 16, "n_structural": 4, "n_probe": 5, "ret_R": 0.25, "P": 0.2128, "R": 0.8333, "F1": 0.339, "leaf_P": 0.2083, "leaf_R": 0.5556, "leaf_F1": 0.303, "n_leaf_sel": 24, "n_leaf_gt": 9, "ret_P": 0.06, "sel_given_ret": 3.3333, "over_sel": 3.92, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 27, "attempts_by_n_local": {"53": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.2128, "gen_R": 0.8333, "gen_F1": 0.339, "missed": ["alpha_channel", "fingers"], "extra": ["beverage", "black_body", "black_fur", "black_necktie", "bottomwear", "brown_clothing", "brown_topwear", "brown_vest", "business_attire", "coffee_mug", "dress_shirt", "formal", "green_background", "grey_clothing", "grey_shirt", "grey_topwear", "hair_bun", "holding_beverage", "holding_mug", "holding_object", "mug", "necktie", "pants", "pockets", "serious", "shirt", "tan_bottomwear", "tan_clothing", "tan_pants", "teal_shirt", "teal_topwear", "text", "topwear", "vest", "white_body", "white_fur", "white_necktie"], "ground_truth_tags": ["alpha_channel", "anthro", "clothed", "clothing", "felid", "feline", "fingers", "fur", "hair", "male", "mammal", "solo"], "selected_tags": ["anthro", "beverage", "black_body", "black_fur", "black_necktie", "bottomwear", "brown_clothing", "brown_topwear", "brown_vest", "business_attire", "clothed", "clothing", "coffee_mug", "dress_shirt", "felid", "feline", "formal", "fur", "green_background", "grey_clothing", "grey_shirt", "grey_topwear", "hair", "hair_bun", "holding_beverage", "holding_mug", "holding_object", "male", "mammal", "mug", "necktie", "pants", "pockets", "serious", "shirt", "solo", "tan_bottomwear", "tan_clothing", "tan_pants", "teal_shirt", "teal_topwear", "text", "topwear", "vest", "white_body", "white_fur", "white_necktie"], "stage3_selected": ["black_fur", "black_necktie", "brown_vest", "business_attire", "coffee_mug", "dress_shirt", "felid", "feline", "formal", "fur", "green_background", "grey_shirt", "hair_bun", "holding_beverage", "holding_mug", "invalid_background", "mug", "necktie", "pockets", "serious", "shirt", "simple_background", "tan_pants", "teal_shirt", "vest", "white_fur", "white_necktie"], "stage3_selected_scores": {"fur": 0.7146, "simple_background": 0.6978, "felid": 0.6418, "white_fur": 0.5834, "feline": 0.7062, "shirt": 0.7998, "black_fur": 0.7183, "necktie": 0.7314, "vest": 0.8403, "green_background": 0.6069, "dress_shirt": 0.6132, "pockets": 0.6095, "hair_bun": 0.6926, "holding_beverage": 0.7721, "coffee_mug": 0.7055, "mug": 0.8841, "grey_shirt": 0.7582, "serious": 0.5823, "holding_mug": 0.916, "black_necktie": 0.7132, "tan_pants": 0.7373, "formal": 0.5993, "business_attire": 0.5657, "brown_vest": 0.8153, "teal_shirt": 0.7474, "white_necktie": 0.6418, "invalid_background": 0.6495}, "stage3_selected_ranks": {"fur": 20, "simple_background": 24, "felid": 27, "white_fur": 40, "feline": 22, "shirt": 5, "black_fur": 18, "necktie": 17, "vest": 3, "green_background": 37, "dress_shirt": 35, "pockets": 36, "hair_bun": 25, "holding_beverage": 8, "coffee_mug": 23, "mug": 2, "grey_shirt": 11, "serious": 41, "holding_mug": 1, "black_necktie": 21, "tan_pants": 16, "formal": 38, "business_attire": 43, "brown_vest": 4, "teal_shirt": 15, "white_necktie": 28, "invalid_background": 26}, "stage3_selected_phrase_ranks": {"fur": 1, "simple_background": 1, "felid": 2, "white_fur": 3, "feline": 1, "shirt": 1, "black_fur": 1, "necktie": 1, "vest": 1, "green_background": 2, "dress_shirt": 2, "pockets": 3, "hair_bun": 1, "holding_beverage": 3, "coffee_mug": 3, "mug": 1, "grey_shirt": 1, "serious": 2, "holding_mug": 1, "black_necktie": 2, "tan_pants": 3, "formal": 1, "business_attire": 1, "brown_vest": 1, "teal_shirt": 3, "white_necktie": 3, "invalid_background": 1}, "extra_evidence": {"beverage": {"source": "implied"}, "black_body": {"source": "implied"}, "black_fur": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7183}, "black_necktie": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7132}, "bottomwear": {"source": "implied"}, "brown_clothing": {"source": "implied"}, "brown_topwear": {"source": "implied"}, "brown_vest": {"source": "stage3", "why": "unknown", "retrieval_score": 0.8153}, "business_attire": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5657}, "coffee_mug": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7055}, "dress_shirt": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6132}, "formal": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5993}, "green_background": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6069}, "grey_clothing": {"source": "implied"}, "grey_shirt": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7582}, "grey_topwear": {"source": "implied"}, "hair_bun": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6926}, "holding_beverage": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7721}, "holding_mug": {"source": "stage3", "why": "unknown", "retrieval_score": 0.916}, "holding_object": {"source": "implied"}, "mug": {"source": "stage3", "why": "unknown", "retrieval_score": 0.8841}, "necktie": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7314}, "pants": {"source": "implied"}, "pockets": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6095}, "serious": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5823}, "shirt": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7998}, "tan_bottomwear": {"source": "implied"}, "tan_clothing": {"source": "implied"}, "tan_pants": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7373}, "teal_shirt": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7474}, "teal_topwear": {"source": "implied"}, "text": {"source": "probe"}, "topwear": {"source": "implied"}, "vest": {"source": "stage3", "why": "unknown", "retrieval_score": 0.8403}, "white_body": {"source": "implied"}, "white_fur": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5834}, "white_necktie": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6418}}, "structural": ["solo", "anthro", "male", "clothed"], "probe": ["text", "solo", "felid", "clothing", "anthro"], "t1": 2.15, "t2": 1.43, "t3": 4.3, "t3s": 0.88, "t3p": 1.95, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=53 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
| 11 |
+
{"id": 260449, "n_gt": 14, "n_retrieved": 57, "n_selected": 29, "n_implied": 5, "n_structural": 6, "n_probe": 5, "ret_R": 0.5714, "P": 0.3793, "R": 0.7857, "F1": 0.5116, "leaf_P": 0.2, "leaf_R": 0.4, "leaf_F1": 0.2667, "n_leaf_sel": 20, "n_leaf_gt": 10, "ret_P": 0.1404, "sel_given_ret": 1.375, "over_sel": 2.07, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 17, "attempts_by_n_local": {"60": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.3793, "gen_R": 0.7857, "gen_F1": 0.5116, "missed": ["fur", "human", "male"], "extra": ["anthro", "body_hair", "bored_expression", "dancer_outfit", "duo", "feral", "flash", "grin", "leg_hair", "mischievous", "raised_arms", "red_hair", "smile", "smirk", "toony_expression", "topless", "trio", "wide_grin"], "ground_truth_tags": ["ape", "bear", "clothed", "clothing", "dancing", "fur", "group", "hair", "haplorhine", "human", "looking_at_viewer", "male", "mammal", "primate"], "selected_tags": ["anthro", "ape", "bear", "body_hair", "bored_expression", "clothed", "clothing", "dancer_outfit", "dancing", "duo", "feral", "flash", "grin", "group", "hair", "haplorhine", "leg_hair", "looking_at_viewer", "mammal", "mischievous", "primate", "raised_arms", "red_hair", "smile", "smirk", "toony_expression", "topless", "trio", "wide_grin"], "stage3_selected": ["ape", "bear", "bored_expression", "dancer_outfit", "dancing", "flash", "grin", "hair", "leg_hair", "mischievous", "primate", "raised_arms", "red_hair", "simple_background", "smirk", "toony_expression", "wide_grin"], "stage3_selected_scores": {"hair": 0.5485, "simple_background": 0.5541, "red_hair": 0.3689, "bear": 0.5757, "grin": 0.5711, "smirk": 0.3664, "primate": 0.8911, "dancing": 0.562, "ape": 0.9769, "raised_arms": 0.551, "leg_hair": 0.3824, "flash": 0.3227, "mischievous": 0.545, "bored_expression": 0.4389, "dancer_outfit": 0.4203, "toony_expression": 0.4737, "wide_grin": 0.5312}, "stage3_selected_ranks": {"hair": 14, "simple_background": 11, "red_hair": 52, "bear": 6, "grin": 8, "smirk": 53, "primate": 2, "dancing": 10, "ape": 1, "raised_arms": 13, "leg_hair": 48, "flash": 58, "mischievous": 15, "bored_expression": 34, "dancer_outfit": 38, "toony_expression": 26, "wide_grin": 17}, "stage3_selected_phrase_ranks": {"hair": 1, "simple_background": 1, "red_hair": 3, "bear": 1, "grin": 1, "smirk": 3, "primate": 1, "dancing": 1, "ape": 1, "raised_arms": 1, "leg_hair": 1, "flash": 2, "mischievous": 1, "bored_expression": 3, "dancer_outfit": 3, "toony_expression": 1, "wide_grin": 1}, "extra_evidence": {"anthro": {"source": "structural"}, "body_hair": {"source": "implied"}, "bored_expression": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4389}, "dancer_outfit": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4203}, "duo": {"source": "probe"}, "feral": {"source": "structural"}, "flash": {"source": "stage3", "why": "unknown", "retrieval_score": 0.3227}, "grin": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5711}, "leg_hair": {"source": "stage3", "why": "unknown", "retrieval_score": 0.3824}, "mischievous": {"source": "stage3", "why": "unknown", "retrieval_score": 0.545}, "raised_arms": {"source": "stage3", "why": "unknown", "retrieval_score": 0.551}, "red_hair": {"source": "stage3", "why": "unknown", "retrieval_score": 0.3689}, "smile": {"source": "implied"}, "smirk": {"source": "stage3", "why": "unknown", "retrieval_score": 0.3664}, "toony_expression": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4737}, "topless": {"source": "structural"}, "trio": {"source": "structural"}, "wide_grin": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5312}}, "structural": ["trio", "anthro", "feral", "clothed", "topless", "looking_at_viewer"], "probe": ["simple_background", "group", "duo", "bear", "anthro"], "t1": 3.55, "t2": 2.0, "t3": 6.31, "t3s": 1.45, "t3p": 1.45, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=60 entity=0 copyright_filtered=1 generic_char_to_general=1 unknown_type=2"]}
|
data/eval_results/k_sweep_explicit_no_why_seed42_k4.jsonl
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"_meta": true, "timestamp": "2026-03-03T05:59:49.506942", "n_samples": 10, "caption_field": "caption_cogvlm", "skip_rewrite": false, "allow_nsfw": false, "mode": "chunked_map_union", "chunk_size": 60, "eval_path": "data\\eval_samples\\e621_sfw_sample_1000_seed123_buffer10000_caption_evident.jsonl", "per_phrase_k": 2, "per_phrase_final_k": 4, "temperature": 0.0, "shuffle": true, "seed": 42, "workers": 1, "min_why": "strong_implied", "expand_implications": true, "infer_structural": true, "infer_probe": true, "n_errors": 0, "n_issue_samples": 10, "n_issues_total": 20}
|
| 2 |
+
{"id": 17482, "n_gt": 22, "n_retrieved": 52, "n_selected": 47, "n_implied": 20, "n_structural": 3, "n_probe": 4, "ret_R": 0.2727, "P": 0.3617, "R": 0.7727, "F1": 0.4928, "leaf_P": 0.2174, "leaf_R": 0.3846, "leaf_F1": 0.2778, "n_leaf_sel": 23, "n_leaf_gt": 13, "ret_P": 0.1154, "sel_given_ret": 2.8333, "over_sel": 2.14, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 22, "attempts_by_n_local": {"55": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.3617, "gen_R": 0.7727, "gen_F1": 0.4928, "missed": ["fingers", "fur", "holding_musical_instrument", "holding_object", "music"], "extra": ["4_claws", "5_claws", "<3", "arctic_wolf", "black_hair", "bottomwear", "campfire", "canis", "demon", "denim", "denim_clothing", "determined", "electric_guitar", "flowing_hair", "jeans", "membrane_(anatomy)", "membranous_wings", "notched_ear", "pants", "pastel_background", "playing_guitar", "playing_music", "succubus", "t-pose", "tire", "torn_bottomwear", "torn_jeans", "torn_pants", "wings", "wolf"], "ground_truth_tags": ["anthro", "bass_guitar", "canid", "canine", "claws", "clothed", "clothing", "fingers", "fur", "guitar", "hair", "holding_musical_instrument", "holding_object", "mammal", "music", "musical_instrument", "plucked_string_instrument", "solo", "spade_tail", "string_instrument", "tail", "torn_clothing"], "selected_tags": ["4_claws", "5_claws", "<3", "anthro", "arctic_wolf", "bass_guitar", "black_hair", "bottomwear", "campfire", "canid", "canine", "canis", "claws", "clothed", "clothing", "demon", "denim", "denim_clothing", "determined", "electric_guitar", "flowing_hair", "guitar", "hair", "jeans", "mammal", "membrane_(anatomy)", "membranous_wings", "musical_instrument", "notched_ear", "pants", "pastel_background", "playing_guitar", "playing_music", "plucked_string_instrument", "solo", "spade_tail", "string_instrument", "succubus", "t-pose", "tail", "tire", "torn_bottomwear", "torn_clothing", "torn_jeans", "torn_pants", "wings", "wolf"], "stage3_selected": ["4_claws", "5_claws", "arctic_wolf", "bass_guitar", "black_hair", "campfire", "claws", "demon", "determined", "electric_guitar", "flowing_hair", "guitar", "membranous_wings", "notched_ear", "pastel_background", "playing_guitar", "spade_tail", "succubus", "t-pose", "tire", "torn_bottomwear", "torn_jeans"], "stage3_selected_scores": {"claws": 0.5684, "black_hair": 0.3899, "membranous_wings": 0.4106, "demon": 0.4008, "spade_tail": 0.618, "notched_ear": 0.4315, "torn_bottomwear": 0.4362, "guitar": 0.9623, "succubus": 0.3867, "campfire": 0.4496, "arctic_wolf": 0.4908, "playing_guitar": 0.9317, "torn_jeans": 0.4824, "tire": 0.4151, "electric_guitar": 0.8664, "bass_guitar": 0.9118, "flowing_hair": 0.5669, "4_claws": 0.4516, "determined": 0.4471, "t-pose": 0.5519, "5_claws": 0.4601, "pastel_background": 0.5632}, "stage3_selected_ranks": {"claws": 11, "black_hair": 54, "membranous_wings": 49, "demon": 51, "spade_tail": 6, "notched_ear": 41, "torn_bottomwear": 39, "guitar": 1, "succubus": 55, "campfire": 36, "arctic_wolf": 29, "playing_guitar": 2, "torn_jeans": 31, "tire": 48, "electric_guitar": 5, "bass_guitar": 3, "flowing_hair": 12, "4_claws": 35, "determined": 38, "t-pose": 16, "5_claws": 33, "pastel_background": 14}, "stage3_selected_phrase_ranks": {"claws": 1, "black_hair": 4, "membranous_wings": 2, "demon": 3, "spade_tail": 1, "notched_ear": 4, "torn_bottomwear": 3, "guitar": 1, "succubus": 4, "campfire": 2, "arctic_wolf": 2, "playing_guitar": 1, "torn_jeans": 1, "tire": 4, "electric_guitar": 4, "bass_guitar": 2, "flowing_hair": 1, "4_claws": 4, "determined": 3, "t-pose": 2, "5_claws": 3, "pastel_background": 1}, "extra_evidence": {"4_claws": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4516}, "5_claws": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4601}, "<3": {"source": "probe"}, "arctic_wolf": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4908}, "black_hair": {"source": "stage3", "why": "unknown", "retrieval_score": 0.3899}, "bottomwear": {"source": "implied"}, "campfire": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4496}, "canis": {"source": "implied"}, "demon": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4008}, "denim": {"source": "implied"}, "denim_clothing": {"source": "implied"}, "determined": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4471}, "electric_guitar": {"source": "stage3", "why": "unknown", "retrieval_score": 0.8664}, "flowing_hair": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5669}, "jeans": {"source": "implied"}, "membrane_(anatomy)": {"source": "implied"}, "membranous_wings": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4106}, "notched_ear": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4315}, "pants": {"source": "implied"}, "pastel_background": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5632}, "playing_guitar": {"source": "stage3", "why": "unknown", "retrieval_score": 0.9317}, "playing_music": {"source": "implied"}, "succubus": {"source": "stage3", "why": "unknown", "retrieval_score": 0.3867}, "t-pose": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5519}, "tire": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4151}, "torn_bottomwear": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4362}, "torn_jeans": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4824}, "torn_pants": {"source": "implied"}, "wings": {"source": "implied"}, "wolf": {"source": "implied"}}, "structural": ["solo", "anthro", "clothed"], "probe": ["solo", "canid", "anthro", "<3"], "t1": 1.9, "t2": 2.52, "t3": 5.63, "t3s": 3.71, "t3p": 3.22, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=55 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
| 3 |
+
{"id": 1624724, "n_gt": 4, "n_retrieved": 53, "n_selected": 31, "n_implied": 4, "n_structural": 4, "n_probe": 3, "ret_R": 0.75, "P": 0.0968, "R": 0.75, "F1": 0.1714, "leaf_P": 0.1111, "leaf_R": 0.75, "leaf_F1": 0.1935, "n_leaf_sel": 27, "n_leaf_gt": 4, "ret_P": 0.0566, "sel_given_ret": 1.0, "over_sel": 7.75, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 23, "attempts_by_n_local": {"54": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.0968, "gen_R": 0.75, "gen_F1": 0.1714, "missed": ["smile"], "extra": ["ambiguous_gender", "bear", "black_eyelids", "black_inner_ear", "covering", "covering_crotch", "covering_face", "eye_spots", "feral", "floating_hands", "floating_head", "full-length_portrait", "glistening", "glistening_eyes", "jagged_mouth", "light_nose", "looking_away", "mammal", "mouth_closed", "no_irises", "nude", "portrait", "round_eyes", "round_nose", "spots", "spotted_back", "toony", "yellow_background"], "ground_truth_tags": ["red_nose", "smile", "solo", "tan_body"], "selected_tags": ["ambiguous_gender", "bear", "black_eyelids", "black_inner_ear", "covering", "covering_crotch", "covering_face", "eye_spots", "feral", "floating_hands", "floating_head", "full-length_portrait", "glistening", "glistening_eyes", "jagged_mouth", "light_nose", "looking_away", "mammal", "mouth_closed", "no_irises", "nude", "portrait", "red_nose", "round_eyes", "round_nose", "solo", "spots", "spotted_back", "tan_body", "toony", "yellow_background"], "stage3_selected": ["black_eyelids", "black_inner_ear", "covering_crotch", "covering_face", "eye_spots", "floating_hands", "floating_head", "full-length_portrait", "glistening_eyes", "jagged_mouth", "light_nose", "looking_away", "mouth_closed", "no_irises", "red_nose", "round_eyes", "round_nose", "spots", "spotted_back", "tan_body", "toony", "white_background", "yellow_background"], "stage3_selected_scores": {"white_background": 0.6356, "tan_body": 0.6834, "spots": 0.6374, "full-length_portrait": 0.4759, "toony": 0.6426, "looking_away": 0.5307, "glistening_eyes": 0.5244, "mouth_closed": 0.662, "red_nose": 0.7489, "yellow_background": 0.5688, "covering_crotch": 0.4463, "black_inner_ear": 0.6379, "covering_face": 0.4644, "floating_hands": 0.4635, "no_irises": 0.5909, "floating_head": 0.5049, "jagged_mouth": 0.5874, "light_nose": 0.6896, "round_eyes": 0.8869, "black_eyelids": 0.6551, "eye_spots": 0.7021, "spotted_back": 0.7237, "round_nose": 0.5839}, "stage3_selected_ranks": {"white_background": 27, "tan_body": 13, "spots": 26, "full-length_portrait": 51, "toony": 24, "looking_away": 42, "glistening_eyes": 43, "mouth_closed": 19, "red_nose": 4, "yellow_background": 37, "covering_crotch": 56, "black_inner_ear": 25, "covering_face": 54, "floating_hands": 55, "no_irises": 33, "floating_head": 47, "jagged_mouth": 34, "light_nose": 12, "round_eyes": 2, "black_eyelids": 21, "eye_spots": 11, "spotted_back": 7, "round_nose": 35}, "stage3_selected_phrase_ranks": {"white_background": 1, "tan_body": 4, "spots": 4, "full-length_portrait": 4, "toony": 1, "looking_away": 4, "glistening_eyes": 4, "mouth_closed": 2, "red_nose": 1, "yellow_background": 2, "covering_crotch": 4, "black_inner_ear": 4, "covering_face": 3, "floating_hands": 4, "no_irises": 3, "floating_head": 2, "jagged_mouth": 4, "light_nose": 2, "round_eyes": 1, "black_eyelids": 4, "eye_spots": 3, "spotted_back": 2, "round_nose": 4}, "extra_evidence": {"ambiguous_gender": {"source": "structural"}, "bear": {"source": "probe"}, "black_eyelids": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6551}, "black_inner_ear": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6379}, "covering": {"source": "implied"}, "covering_crotch": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4463}, "covering_face": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4644}, "eye_spots": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7021}, "feral": {"source": "structural"}, "floating_hands": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4635}, "floating_head": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5049}, "full-length_portrait": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4759}, "glistening": {"source": "implied"}, "glistening_eyes": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5244}, "jagged_mouth": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5874}, "light_nose": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6896}, "looking_away": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5307}, "mammal": {"source": "implied"}, "mouth_closed": {"source": "stage3", "why": "unknown", "retrieval_score": 0.662}, "no_irises": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5909}, "nude": {"source": "structural"}, "portrait": {"source": "implied"}, "round_eyes": {"source": "stage3", "why": "unknown", "retrieval_score": 0.8869}, "round_nose": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5839}, "spots": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6374}, "spotted_back": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7237}, "toony": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6426}, "yellow_background": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5688}}, "structural": ["solo", "feral", "ambiguous_gender", "nude"], "probe": ["solo", "simple_background", "bear"], "t1": 2.73, "t2": 1.16, "t3": 1.28, "t3s": 0.66, "t3p": 1.43, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=54 entity=0 copyright_filtered=2 generic_char_to_general=0 unknown_type=5"]}
|
| 4 |
+
{"id": 1078019, "n_gt": 14, "n_retrieved": 55, "n_selected": 28, "n_implied": 11, "n_structural": 4, "n_probe": 4, "ret_R": 0.7143, "P": 0.4286, "R": 0.8571, "F1": 0.5714, "leaf_P": 0.4667, "leaf_R": 0.7778, "leaf_F1": 0.5833, "n_leaf_sel": 15, "n_leaf_gt": 9, "ret_P": 0.1818, "sel_given_ret": 1.2, "over_sel": 2.0, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 12, "attempts_by_n_local": {"55": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.4286, "gen_R": 0.8571, "gen_F1": 0.5714, "missed": ["romantic", "romantic_couple"], "extra": ["<3", "coat", "diaper", "heterochromia", "holding_object", "holding_plushie", "looking_at_viewer", "pull-ups_(diaper)", "red_clothing", "red_coat", "red_topwear", "topwear", "white_clothing", "white_coat", "white_topwear", "wide_eyed"], "ground_truth_tags": ["anthro", "blue_eyes", "blush", "clothed", "clothing", "duo", "lagomorph", "leporid", "mammal", "plushie", "rabbit", "romantic", "romantic_couple", "teal_eyes"], "selected_tags": ["<3", "anthro", "blue_eyes", "blush", "clothed", "clothing", "coat", "diaper", "duo", "heterochromia", "holding_object", "holding_plushie", "lagomorph", "leporid", "looking_at_viewer", "mammal", "plushie", "pull-ups_(diaper)", "rabbit", "red_clothing", "red_coat", "red_topwear", "teal_eyes", "topwear", "white_clothing", "white_coat", "white_topwear", "wide_eyed"], "stage3_selected": ["blue_eyes", "blush", "coat", "heterochromia", "holding_plushie", "leporid", "pull-ups_(diaper)", "rabbit", "red_coat", "teal_eyes", "white_coat", "wide_eyed"], "stage3_selected_scores": {"blush": 0.6084, "blue_eyes": 0.6154, "leporid": 0.5313, "rabbit": 0.5941, "heterochromia": 0.4304, "coat": 0.6386, "wide_eyed": 0.4619, "teal_eyes": 0.6285, "holding_plushie": 0.7794, "white_coat": 0.5255, "pull-ups_(diaper)": 0.5206, "red_coat": 0.5209}, "stage3_selected_ranks": {"blush": 13, "blue_eyes": 12, "leporid": 26, "rabbit": 14, "heterochromia": 51, "coat": 7, "wide_eyed": 41, "teal_eyes": 8, "holding_plushie": 2, "white_coat": 29, "pull-ups_(diaper)": 31, "red_coat": 30}, "stage3_selected_phrase_ranks": {"blush": 1, "blue_eyes": 1, "leporid": 3, "rabbit": 1, "heterochromia": 4, "coat": 1, "wide_eyed": 4, "teal_eyes": 1, "holding_plushie": 1, "white_coat": 3, "pull-ups_(diaper)": 2, "red_coat": 4}, "extra_evidence": {"<3": {"source": "probe"}, "coat": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6386}, "diaper": {"source": "implied"}, "heterochromia": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4304}, "holding_object": {"source": "implied"}, "holding_plushie": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7794}, "looking_at_viewer": {"source": "structural"}, "pull-ups_(diaper)": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5206}, "red_clothing": {"source": "implied"}, "red_coat": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5209}, "red_topwear": {"source": "implied"}, "topwear": {"source": "implied"}, "white_clothing": {"source": "implied"}, "white_coat": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5255}, "white_topwear": {"source": "implied"}, "wide_eyed": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4619}}, "structural": ["duo", "anthro", "clothed", "looking_at_viewer"], "probe": ["duo", "blush", "anthro", "<3"], "t1": 1.65, "t2": 1.19, "t3": 4.13, "t3s": 1.49, "t3p": 1.11, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=55 entity=0 copyright_filtered=1 generic_char_to_general=0 unknown_type=2"]}
|
| 5 |
+
{"id": 2021552, "n_gt": 25, "n_retrieved": 62, "n_selected": 72, "n_implied": 25, "n_structural": 4, "n_probe": 7, "ret_R": 0.56, "P": 0.3194, "R": 0.92, "F1": 0.4742, "leaf_P": 0.1944, "leaf_R": 0.4667, "leaf_F1": 0.2745, "n_leaf_sel": 36, "n_leaf_gt": 15, "ret_P": 0.2258, "sel_given_ret": 1.6429, "over_sel": 2.88, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 2, "calls_with_selection": 2, "calls_exhausted_retries": 0, "attempts_total": 2, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 2, "invalid_items_total": 0, "oob_indices_total": 3, "dupe_indices_total": 0, "kept_total": 39, "attempts_by_n_local": {"60": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}, "3": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.3194, "gen_R": 0.92, "gen_F1": 0.4742, "missed": ["looking_at_another", "standing"], "extra": ["4_claws", "arms_out", "black_bottomwear", "black_clothing", "black_pants", "blue_clothing", "blue_overalls", "blue_shirt", "blue_topwear", "blush", "brown_clothing", "brown_shirt", "brown_topwear", "buckteeth", "cheek_markings", "cross_fox", "eye_markings", "felid", "gloves_(marking)", "grey_bottomwear", "grey_clothing", "grey_pants", "grey_shirt", "grey_topwear", "hand_in_pocket", "long_arms", "looking_at_viewer", "marble_fox", "one_eye_half-closed", "open_mouth", "open_smile", "pockets", "rabbit_ears", "red_fox", "smile", "snout", "snout_markings", "tail", "tail_markings", "tan_body", "tan_fur", "teeth", "undershirt", "white_body", "white_clothing", "white_fur", "white_shirt", "white_topwear", "wide_eyed"], "ground_truth_tags": ["anthro", "bottomwear", "canid", "canine", "claws", "clothed", "clothing", "crossed_arms", "duo", "facial_markings", "fox", "fur", "grey_background", "head_markings", "lagomorph", "leporid", "looking_at_another", "mammal", "markings", "overalls", "pants", "rabbit", "shirt", "standing", "topwear"], "selected_tags": ["4_claws", "anthro", "arms_out", "black_bottomwear", "black_clothing", "black_pants", "blue_clothing", "blue_overalls", "blue_shirt", "blue_topwear", "blush", "bottomwear", "brown_clothing", "brown_shirt", "brown_topwear", "buckteeth", "canid", "canine", "cheek_markings", "claws", "clothed", "clothing", "cross_fox", "crossed_arms", "duo", "eye_markings", "facial_markings", "felid", "fox", "fur", "gloves_(marking)", "grey_background", "grey_bottomwear", "grey_clothing", "grey_pants", "grey_shirt", "grey_topwear", "hand_in_pocket", "head_markings", "lagomorph", "leporid", "long_arms", "looking_at_viewer", "mammal", "marble_fox", "markings", "one_eye_half-closed", "open_mouth", "open_smile", "overalls", "pants", "pockets", "rabbit", "rabbit_ears", "red_fox", "shirt", "smile", "snout", "snout_markings", "tail", "tail_markings", "tan_body", "tan_fur", "teeth", "topwear", "undershirt", "white_body", "white_clothing", "white_fur", "white_shirt", "white_topwear", "wide_eyed"], "stage3_selected": ["4_claws", "arms_out", "black_pants", "blue_overalls", "blue_shirt", "brown_shirt", "buckteeth", "cheek_markings", "claws", "cross_fox", "crossed_arms", "eye_markings", "facial_markings", "fox", "fur", "gloves_(marking)", "grey_background", "grey_pants", "grey_shirt", "hand_in_pocket", "lagomorph", "leporid", "long_arms", "marble_fox", "one_eye_half-closed", "open_mouth", "open_smile", "overalls", "rabbit", "rabbit_ears", "shirt", "snout_markings", "tail_markings", "tan_fur", "undershirt", "white_fur", "white_shirt", "white_topwear", "wide_eyed"], "stage3_selected_scores": {"fur": 0.6548, "open_mouth": 0.6344, "claws": 0.6317, "white_fur": 0.5166, "fox": 0.6393, "shirt": 0.7497, "lagomorph": 0.5947, "leporid": 0.5837, "rabbit": 0.6521, "tan_fur": 0.5207, "tail_markings": 0.6221, "open_smile": 0.5285, "grey_background": 0.6797, "gloves_(marking)": 0.6278, "buckteeth": 0.5324, "facial_markings": 0.6956, "crossed_arms": 0.7298, "wide_eyed": 0.4682, "white_topwear": 0.768, "white_shirt": 0.8206, "overalls": 0.8782, "black_pants": 0.8338, "blue_shirt": 0.7663, "hand_in_pocket": 0.5675, "eye_markings": 0.637, "snout_markings": 0.6224, "grey_shirt": 0.693, "grey_pants": 0.7578, "undershirt": 0.7074, "rabbit_ears": 0.6003, "cross_fox": 0.4701, "one_eye_half-closed": 0.4534, "brown_shirt": 0.7778, "cheek_markings": 0.6222, "4_claws": 0.5925, "long_arms": 0.5862, "marble_fox": 0.5584, "arms_out": 0.5673, "blue_overalls": 0.9205}, "stage3_selected_ranks": {"fur": 22, "open_mouth": 26, "claws": 28, "white_fur": 60, "fox": 24, "shirt": 11, "lagomorph": 39, "leporid": 43, "rabbit": 23, "tan_fur": 59, "tail_markings": 32, "open_smile": 58, "grey_background": 19, "gloves_(marking)": 29, "buckteeth": 56, "facial_markings": 16, "crossed_arms": 13, "wide_eyed": 63, "white_topwear": 6, "white_shirt": 4, "overalls": 2, "black_pants": 3, "blue_shirt": 7, "hand_in_pocket": 47, "eye_markings": 25, "snout_markings": 30, "grey_shirt": 17, "grey_pants": 9, "undershirt": 15, "rabbit_ears": 36, "cross_fox": 62, "one_eye_half-closed": 64, "brown_shirt": 5, "cheek_markings": 31, "4_claws": 40, "long_arms": 42, "marble_fox": 50, "arms_out": 48, "blue_overalls": 1}, "stage3_selected_phrase_ranks": {"fur": 1, "open_mouth": 1, "claws": 1, "white_fur": 4, "fox": 1, "shirt": 1, "lagomorph": 3, "leporid": 4, "rabbit": 1, "tan_fur": 3, "tail_markings": 4, "open_smile": 2, "grey_background": 1, "gloves_(marking)": 3, "buckteeth": 3, "facial_markings": 1, "crossed_arms": 1, "wide_eyed": 3, "white_topwear": 3, "white_shirt": 1, "overalls": 1, "black_pants": 1, "blue_shirt": 3, "hand_in_pocket": 3, "eye_markings": 2, "snout_markings": 3, "grey_shirt": 4, "grey_pants": 2, "undershirt": 3, "rabbit_ears": 1, "cross_fox": 4, "one_eye_half-closed": 4, "brown_shirt": 2, "cheek_markings": 4, "4_claws": 3, "long_arms": 2, "marble_fox": 2, "arms_out": 3, "blue_overalls": 1}, "extra_evidence": {"4_claws": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5925}, "arms_out": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5673}, "black_bottomwear": {"source": "implied"}, "black_clothing": {"source": "implied"}, "black_pants": {"source": "stage3", "why": "unknown", "retrieval_score": 0.8338}, "blue_clothing": {"source": "implied"}, "blue_overalls": {"source": "stage3", "why": "unknown", "retrieval_score": 0.9205}, "blue_shirt": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7663}, "blue_topwear": {"source": "implied"}, "blush": {"source": "probe"}, "brown_clothing": {"source": "implied"}, "brown_shirt": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7778}, "brown_topwear": {"source": "implied"}, "buckteeth": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5324}, "cheek_markings": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6222}, "cross_fox": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4701}, "eye_markings": {"source": "stage3", "why": "unknown", "retrieval_score": 0.637}, "felid": {"source": "probe"}, "gloves_(marking)": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6278}, "grey_bottomwear": {"source": "implied"}, "grey_clothing": {"source": "implied"}, "grey_pants": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7578}, "grey_shirt": {"source": "stage3", "why": "unknown", "retrieval_score": 0.693}, "grey_topwear": {"source": "implied"}, "hand_in_pocket": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5675}, "long_arms": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5862}, "looking_at_viewer": {"source": "structural"}, "marble_fox": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5584}, "one_eye_half-closed": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4534}, "open_mouth": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6344}, "open_smile": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5285}, "pockets": {"source": "implied"}, "rabbit_ears": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6003}, "red_fox": {"source": "implied"}, "smile": {"source": "implied"}, "snout": {"source": "implied"}, "snout_markings": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6224}, "tail": {"source": "implied"}, "tail_markings": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6221}, "tan_body": {"source": "implied"}, "tan_fur": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5207}, "teeth": {"source": "implied"}, "undershirt": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7074}, "white_body": {"source": "implied"}, "white_clothing": {"source": "implied"}, "white_fur": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5166}, "white_shirt": {"source": "stage3", "why": "unknown", "retrieval_score": 0.8206}, "white_topwear": {"source": "stage3", "why": "unknown", "retrieval_score": 0.768}, "wide_eyed": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4682}}, "structural": ["duo", "anthro", "clothed", "looking_at_viewer"], "probe": ["simple_background", "felid", "duo", "clothing", "canid", "blush", "anthro"], "t1": 1.85, "t2": 1.4, "t3": 11.42, "t3s": 1.32, "t3p": 2.01, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=63 entity=1 copyright_filtered=0 generic_char_to_general=0 unknown_type=3"]}
|
| 6 |
+
{"id": 1023509, "n_gt": 13, "n_retrieved": 77, "n_selected": 65, "n_implied": 10, "n_structural": 5, "n_probe": 5, "ret_R": 0.4615, "P": 0.1538, "R": 0.7692, "F1": 0.2564, "leaf_P": 0.0408, "leaf_R": 0.3333, "leaf_F1": 0.0727, "n_leaf_sel": 49, "n_leaf_gt": 6, "ret_P": 0.0779, "sel_given_ret": 1.6667, "over_sel": 5.0, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 2, "calls_with_selection": 2, "calls_exhausted_retries": 0, "attempts_total": 2, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 2, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 46, "attempts_by_n_local": {"60": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}, "14": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.1538, "gen_R": 0.7692, "gen_F1": 0.2564, "missed": ["fur", "white_body", "white_fur"], "extra": ["2_panel_comic", "3_panel_comic", "<3", "agamid", "anthro", "bear", "black_speech_bubble", "bodily_fluids", "border", "bubble", "clothed", "clothing", "comic_panel", "dark_theme", "darkness", "defeated", "domestic_goat", "duo", "evil_look", "face_mask", "felid", "flask", "frilled_lizard", "gecko", "goo_creature", "group", "human_only", "hunched_over", "iguanid", "light", "lying_on_ground", "male_human", "medieval", "medieval_fantasy", "noseless", "not_furry", "note", "note_pad", "on_ground", "patchwork_creature", "rubble", "snot", "snot_bubble", "solo", "speech_bubble", "standing_over", "striped_body", "stripes", "text_message", "thought_bubble", "threatening", "topwear", "torch", "unknown_species", "vest"], "ground_truth_tags": ["bovid", "caprine", "dialogue", "fur", "goat", "human", "lizard", "mammal", "reptile", "scalie", "text", "white_body", "white_fur"], "selected_tags": ["2_panel_comic", "3_panel_comic", "<3", "agamid", "anthro", "bear", "black_speech_bubble", "bodily_fluids", "border", "bovid", "bubble", "caprine", "clothed", "clothing", "comic_panel", "dark_theme", "darkness", "defeated", "dialogue", "domestic_goat", "duo", "evil_look", "face_mask", "felid", "flask", "frilled_lizard", "gecko", "goat", "goo_creature", "group", "human", "human_only", "hunched_over", "iguanid", "light", "lizard", "lying_on_ground", "male_human", "mammal", "medieval", "medieval_fantasy", "noseless", "not_furry", "note", "note_pad", "on_ground", "patchwork_creature", "reptile", "rubble", "scalie", "snot", "snot_bubble", "solo", "speech_bubble", "standing_over", "striped_body", "stripes", "text", "text_message", "thought_bubble", "threatening", "topwear", "torch", "unknown_species", "vest"], "stage3_selected": ["2_panel_comic", "3_panel_comic", "black_speech_bubble", "border", "bovid", "bubble", "caprine", "comic_panel", "dark_theme", "darkness", "defeated", "dialogue", "domestic_goat", "evil_look", "face_mask", "flask", "frilled_lizard", "gecko", "goat", "goo_creature", "human", "human_only", "hunched_over", "iguanid", "light", "lizard", "lying_on_ground", "male_human", "medieval", "medieval_fantasy", "noseless", "note", "note_pad", "on_ground", "patchwork_creature", "rubble", "snot_bubble", "speech_bubble", "standing_over", "striped_body", "text_message", "thought_bubble", "threatening", "torch", "unknown_species", "vest"], "stage3_selected_scores": {"dialogue": 0.6426, "human": 0.669, "speech_bubble": 0.7584, "bovid": 0.6057, "caprine": 0.638, "border": 0.5087, "lizard": 0.839, "striped_body": 0.5492, "goat": 0.7768, "light": 0.7793, "unknown_species": 0.7697, "on_ground": 0.674, "goo_creature": 0.5154, "vest": 0.502, "thought_bubble": 0.6581, "bubble": 0.7508, "human_only": 0.5271, "noseless": 0.4577, "gecko": 0.6408, "torch": 0.5677, "defeated": 0.6174, "lying_on_ground": 0.7947, "face_mask": 0.5493, "iguanid": 0.6016, "medieval": 0.5307, "comic_panel": 0.6176, "threatening": 0.5625, "darkness": 0.8329, "dark_theme": 0.5945, "note": 0.7399, "domestic_goat": 0.604, "text_message": 0.5644, "flask": 0.5338, "rubble": 0.6096, "black_speech_bubble": 0.6325, "patchwork_creature": 0.6123, "hunched_over": 0.5729, "frilled_lizard": 0.675, "standing_over": 0.7647, "3_panel_comic": 0.6265, "snot_bubble": 0.612, "evil_look": 0.5665, "2_panel_comic": 0.6184, "note_pad": 0.558, "medieval_fantasy": 0.5207, "male_human": 0.5565}, "stage3_selected_ranks": {"dialogue": 22, "human": 18, "speech_bubble": 8, "bovid": 36, "caprine": 24, "border": 71, "lizard": 1, "striped_body": 60, "goat": 5, "light": 4, "unknown_species": 6, "on_ground": 16, "goo_creature": 69, "vest": 74, "thought_bubble": 19, "bubble": 9, "human_only": 65, "noseless": 76, "gecko": 23, "torch": 48, "defeated": 30, "lying_on_ground": 3, "face_mask": 59, "iguanid": 38, "medieval": 64, "comic_panel": 29, "threatening": 53, "darkness": 2, "dark_theme": 41, "note": 10, "domestic_goat": 37, "text_message": 52, "flask": 62, "rubble": 35, "black_speech_bubble": 25, "patchwork_creature": 32, "hunched_over": 46, "frilled_lizard": 15, "standing_over": 7, "3_panel_comic": 27, "snot_bubble": 33, "evil_look": 49, "2_panel_comic": 28, "note_pad": 56, "medieval_fantasy": 67, "male_human": 57}, "stage3_selected_phrase_ranks": {"dialogue": 3, "human": 1, "speech_bubble": 1, "bovid": 3, "caprine": 2, "border": 3, "lizard": 1, "striped_body": 4, "goat": 1, "light": 1, "unknown_species": 1, "on_ground": 3, "goo_creature": 3, "vest": 3, "thought_bubble": 2, "bubble": 1, "human_only": 3, "noseless": 4, "gecko": 3, "torch": 3, "defeated": 2, "lying_on_ground": 1, "face_mask": 1, "iguanid": 4, "medieval": 4, "comic_panel": 4, "threatening": 4, "darkness": 1, "dark_theme": 3, "note": 1, "domestic_goat": 4, "text_message": 3, "flask": 2, "rubble": 4, "black_speech_bubble": 4, "patchwork_creature": 2, "hunched_over": 4, "frilled_lizard": 2, "standing_over": 1, "3_panel_comic": 2, "snot_bubble": 2, "evil_look": 4, "2_panel_comic": 3, "note_pad": 4, "medieval_fantasy": 4, "male_human": 2}, "extra_evidence": {"2_panel_comic": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6184}, "3_panel_comic": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6265}, "<3": {"source": "probe"}, "agamid": {"source": "implied"}, "anthro": {"source": "probe"}, "bear": {"source": "probe"}, "black_speech_bubble": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6325}, "bodily_fluids": {"source": "implied"}, "border": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5087}, "bubble": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7508}, "clothed": {"source": "structural"}, "clothing": {"source": "implied"}, "comic_panel": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6176}, "dark_theme": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5945}, "darkness": {"source": "stage3", "why": "unknown", "retrieval_score": 0.8329}, "defeated": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6174}, "domestic_goat": {"source": "stage3", "why": "unknown", "retrieval_score": 0.604}, "duo": {"source": "structural"}, "evil_look": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5665}, "face_mask": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5493}, "felid": {"source": "probe"}, "flask": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5338}, "frilled_lizard": {"source": "stage3", "why": "unknown", "retrieval_score": 0.675}, "gecko": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6408}, "goo_creature": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5154}, "group": {"source": "structural"}, "human_only": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5271}, "hunched_over": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5729}, "iguanid": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6016}, "light": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7793}, "lying_on_ground": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7947}, "male_human": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5565}, "medieval": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5307}, "medieval_fantasy": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5207}, "noseless": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4577}, "not_furry": {"source": "implied"}, "note": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7399}, "note_pad": {"source": "stage3", "why": "unknown", "retrieval_score": 0.558}, "on_ground": {"source": "stage3", "why": "unknown", "retrieval_score": 0.674}, "patchwork_creature": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6123}, "rubble": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6096}, "snot": {"source": "implied"}, "snot_bubble": {"source": "stage3", "why": "unknown", "retrieval_score": 0.612}, "solo": {"source": "structural"}, "speech_bubble": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7584}, "standing_over": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7647}, "striped_body": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5492}, "stripes": {"source": "implied"}, "text_message": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5644}, "thought_bubble": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6581}, "threatening": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5625}, "topwear": {"source": "implied"}, "torch": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5677}, "unknown_species": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7697}, "vest": {"source": "stage3", "why": "unknown", "retrieval_score": 0.502}}, "structural": ["solo", "duo", "group", "clothed", "text"], "probe": ["group", "felid", "bear", "anthro", "<3"], "t1": 3.39, "t2": 1.61, "t3": 11.09, "t3s": 1.45, "t3p": 2.12, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=74 entity=1 copyright_filtered=2 generic_char_to_general=0 unknown_type=1"]}
|
| 7 |
+
{"id": 335343, "n_gt": 15, "n_retrieved": 94, "n_selected": 67, "n_implied": 11, "n_structural": 3, "n_probe": 6, "ret_R": 0.7333, "P": 0.194, "R": 0.8667, "F1": 0.3171, "leaf_P": 0.1509, "leaf_R": 0.6667, "leaf_F1": 0.2462, "n_leaf_sel": 53, "n_leaf_gt": 12, "ret_P": 0.117, "sel_given_ret": 1.1818, "over_sel": 4.47, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 2, "calls_with_selection": 2, "calls_exhausted_retries": 0, "attempts_total": 2, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 2, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 53, "attempts_by_n_local": {"60": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}, "37": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 1, "char_F1": 0.0, "gen_P": 0.197, "gen_R": 0.8667, "gen_F1": 0.321, "missed": ["angry", "eyes_closed"], "extra": ["2_frame_animation", "<3", "accessory", "animated", "animated_png", "anime_eyes", "annoyed", "annoyed_expression", "anthro", "applying_makeup", "background_character", "bedroom", "big_eyes", "blush", "character_request", "clothing", "color_swatch", "contest", "curtains_open", "distracting_watermark", "english_text", "eyes", "font", "hair_accessory", "hair_sticks", "highlights_(coloring)", "humanoid", "lipstick", "long_hair", "looking_away", "lying_on_bed", "membrane_(anatomy)", "model_sheet", "name_tag", "on_bed", "pajamas", "palette", "path_lines", "personal_grooming", "playful", "purple_eyes", "purple_hands", "purple_highlights", "purple_membrane", "resting", "restricted_palette", "romantic", "romantic_ambiance", "scene_kid", "scenery", "scenery_porn", "sleeping_together", "stated_age", "watermark"], "ground_truth_tags": ["angry", "bed", "blonde_hair", "blue_eyes", "duo", "eyes_closed", "eyeshadow", "furniture", "green_eyes", "hair", "lying", "makeup", "purple_hair", "sleeping", "text"], "selected_tags": ["2_frame_animation", "<3", "accessory", "animated", "animated_png", "anime_eyes", "annoyed", "annoyed_expression", "anthro", "applying_makeup", "background_character", "bed", "bedroom", "big_eyes", "blonde_hair", "blue_eyes", "blush", "character_request", "clothing", "color_swatch", "contest", "curtains_open", "distracting_watermark", "duo", "english_text", "eyes", "eyeshadow", "font", "furniture", "green_eyes", "hair", "hair_accessory", "hair_sticks", "highlights_(coloring)", "humanoid", "lipstick", "long_hair", "looking_away", "lying", "lying_on_bed", "makeup", "membrane_(anatomy)", "model_sheet", "name_tag", "on_bed", "pajamas", "palette", "path_lines", "personal_grooming", "playful", "purple_eyes", "purple_hair", "purple_hands", "purple_highlights", "purple_membrane", "resting", "restricted_palette", "romantic", "romantic_ambiance", "scene_kid", "scenery", "scenery_porn", "sleeping", "sleeping_together", "stated_age", "text", "watermark"], "stage3_selected": ["2_frame_animation", "animated", "animated_png", "anime_eyes", "annoyed", "annoyed_expression", "applying_makeup", "background_character", "bedroom", "big_eyes", "blonde_hair", "blue_eyes", "blurred_background", "character_request", "color_swatch", "contest", "curtains_open", "distracting_watermark", "duo", "english_text", "eyes", "eyeshadow", "font", "green_eyes", "hair", "hair_sticks", "lipstick", "long_hair", "looking_away", "lying", "lying_on_bed", "makeup", "model_sheet", "name_tag", "pajamas", "palette", "path_lines", "playful", "purple_eyes", "purple_hair", "purple_hands", "purple_highlights", "purple_membrane", "resting", "restricted_palette", "romantic_ambiance", "scene_kid", "scenery", "scenery_porn", "sleeping", "sleeping_together", "stated_age", "text"], "stage3_selected_scores": {"hair": 0.6035, "duo": 0.4376, "text": 0.6011, "blue_eyes": 0.6018, "lying": 0.4498, "green_eyes": 0.5992, "long_hair": 0.459, "blonde_hair": 0.599, "purple_eyes": 0.4336, "purple_hair": 0.5644, "makeup": 0.5968, "eyeshadow": 0.4766, "lipstick": 0.4877, "bedroom": 0.4904, "sleeping": 0.6031, "model_sheet": 0.4234, "looking_away": 0.4294, "big_eyes": 0.4292, "annoyed": 0.5731, "blurred_background": 0.4119, "romantic_ambiance": 0.4804, "restricted_palette": 0.4771, "lying_on_bed": 0.4097, "color_swatch": 0.4617, "distracting_watermark": 0.5001, "pajamas": 0.3756, "playful": 0.4466, "scenery": 0.4938, "purple_highlights": 0.4302, "name_tag": 0.3238, "character_request": 0.3755, "path_lines": 0.4129, "resting": 0.5146, "background_character": 0.3893, "annoyed_expression": 0.7254, "curtains_open": 0.4194, "sleeping_together": 0.5087, "scene_kid": 0.4097, "anime_eyes": 0.4409, "hair_sticks": 0.5301, "stated_age": 0.4306, "palette": 0.6685, "contest": 0.3494, "purple_hands": 0.5399, "scenery_porn": 0.4297, "purple_membrane": 0.5791, "applying_makeup": 0.4732, "font": 0.5303, "eyes": 0.8951, "english_text": 0.4193, "animated_png": 0.4721, "animated": 0.3974, "2_frame_animation": 0.4462}, "stage3_selected_ranks": {"hair": 5, "duo": 56, "text": 8, "blue_eyes": 7, "lying": 47, "green_eyes": 9, "long_hair": 43, "blonde_hair": 10, "purple_eyes": 57, "purple_hair": 14, "makeup": 11, "eyeshadow": 37, "lipstick": 34, "bedroom": 33, "sleeping": 6, "model_sheet": 67, "looking_away": 62, "big_eyes": 63, "annoyed": 13, "blurred_background": 73, "romantic_ambiance": 35, "restricted_palette": 36, "lying_on_bed": 74, "color_swatch": 42, "distracting_watermark": 29, "pajamas": 88, "playful": 49, "scenery": 31, "purple_highlights": 60, "name_tag": 96, "character_request": 89, "path_lines": 72, "resting": 24, "background_character": 83, "annoyed_expression": 2, "curtains_open": 69, "sleeping_together": 25, "scene_kid": 75, "anime_eyes": 53, "hair_sticks": 22, "stated_age": 58, "palette": 3, "contest": 93, "purple_hands": 19, "scenery_porn": 61, "purple_membrane": 12, "applying_makeup": 39, "font": 21, "eyes": 1, "english_text": 70, "animated_png": 40, "animated": 79, "2_frame_animation": 50}, "stage3_selected_phrase_ranks": {"hair": 1, "duo": 2, "text": 1, "blue_eyes": 1, "lying": 1, "green_eyes": 1, "long_hair": 3, "blonde_hair": 1, "purple_eyes": 2, "purple_hair": 1, "makeup": 1, "eyeshadow": 3, "lipstick": 2, "bedroom": 1, "sleeping": 1, "model_sheet": 1, "looking_away": 3, "big_eyes": 3, "annoyed": 2, "blurred_background": 4, "romantic_ambiance": 1, "restricted_palette": 2, "lying_on_bed": 4, "color_swatch": 3, "distracting_watermark": 1, "pajamas": 3, "playful": 1, "scenery": 2, "purple_highlights": 3, "name_tag": 3, "character_request": 3, "path_lines": 2, "resting": 1, "background_character": 2, "annoyed_expression": 1, "curtains_open": 2, "sleeping_together": 2, "scene_kid": 4, "anime_eyes": 2, "hair_sticks": 2, "stated_age": 3, "palette": 1, "contest": 2, "purple_hands": 2, "scenery_porn": 3, "purple_membrane": 1, "applying_makeup": 4, "font": 1, "eyes": 1, "english_text": 4, "animated_png": 1, "animated": 4, "2_frame_animation": 3}, "extra_evidence": {"2_frame_animation": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4462}, "<3": {"source": "probe"}, "accessory": {"source": "implied"}, "animated": {"source": "stage3", "why": "unknown", "retrieval_score": 0.3974}, "animated_png": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4721}, "anime_eyes": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4409}, "annoyed": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5731}, "annoyed_expression": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7254}, "anthro": {"source": "probe"}, "applying_makeup": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4732}, "background_character": {"source": "stage3", "why": "unknown", "retrieval_score": 0.3893}, "bedroom": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4904}, "big_eyes": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4292}, "blush": {"source": "probe"}, "character_request": {"source": "stage3", "why": "unknown", "retrieval_score": 0.3755}, "clothing": {"source": "implied"}, "color_swatch": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4617}, "contest": {"source": "stage3", "why": "unknown", "retrieval_score": 0.3494}, "curtains_open": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4194}, "distracting_watermark": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5001}, "english_text": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4193}, "eyes": {"source": "stage3", "why": "unknown", "retrieval_score": 0.8951}, "font": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5303}, "hair_accessory": {"source": "implied"}, "hair_sticks": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5301}, "highlights_(coloring)": {"source": "implied"}, "humanoid": {"source": "structural"}, "lipstick": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4877}, "long_hair": {"source": "stage3", "why": "unknown", "retrieval_score": 0.459}, "looking_away": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4294}, "lying_on_bed": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4097}, "membrane_(anatomy)": {"source": "implied"}, "model_sheet": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4234}, "name_tag": {"source": "stage3", "why": "unknown", "retrieval_score": 0.3238}, "on_bed": {"source": "implied"}, "pajamas": {"source": "stage3", "why": "unknown", "retrieval_score": 0.3756}, "palette": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6685}, "path_lines": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4129}, "personal_grooming": {"source": "implied"}, "playful": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4466}, "purple_eyes": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4336}, "purple_hands": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5399}, "purple_highlights": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4302}, "purple_membrane": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5791}, "resting": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5146}, "restricted_palette": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4771}, "romantic": {"source": "implied"}, "romantic_ambiance": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4804}, "scene_kid": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4097}, "scenery": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4938}, "scenery_porn": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4297}, "sleeping_together": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5087}, "stated_age": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4306}, "watermark": {"source": "implied"}}, "structural": ["duo", "humanoid", "text"], "probe": ["text", "simple_background", "duo", "blush", "anthro", "<3"], "t1": 2.32, "t2": 1.99, "t3": 12.04, "t3s": 2.47, "t3p": 2.14, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=97 entity=0 copyright_filtered=0 generic_char_to_general=1 unknown_type=8"]}
|
| 8 |
+
{"id": 2034167, "n_gt": 11, "n_retrieved": 87, "n_selected": 55, "n_implied": 14, "n_structural": 4, "n_probe": 5, "ret_R": 0.5455, "P": 0.1818, "R": 0.9091, "F1": 0.303, "leaf_P": 0.1389, "leaf_R": 0.7143, "leaf_F1": 0.2326, "n_leaf_sel": 36, "n_leaf_gt": 7, "ret_P": 0.069, "sel_given_ret": 1.6667, "over_sel": 5.0, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 2, "calls_with_selection": 2, "calls_exhausted_retries": 0, "attempts_total": 2, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 2, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 37, "attempts_by_n_local": {"60": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}, "32": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.1818, "gen_R": 0.9091, "gen_F1": 0.303, "missed": ["open_mouth"], "extra": ["actual_fur", "animal_humanoid", "anthro", "belly", "big_nose", "blue_inner_ear_fluff", "blue_stripes", "blue_tail", "bored_expression", "canid_humanoid", "canine_humanoid", "clothed", "clothing", "expressions", "facial_stripes", "fluffy_fur", "fox_humanoid", "glistening", "glistening_body", "glistening_fur", "glistening_tail", "gradient_tail", "half_body", "humanoid", "inner_ear_fluff", "jumper", "jumping", "light_tail", "lotus_pose", "male", "mammal_humanoid", "midriff", "pink_ears", "pink_stripes", "pink_tongue", "purple_belly", "slim_humanoid", "stripes", "tail", "tail_tuft", "tongue", "tuft", "white_inner_ear_fluff", "white_nose", "wolf_humanoid"], "ground_truth_tags": ["blue_eyes", "blue_nose", "canid", "canine", "fur", "mammal", "open_mouth", "purple_body", "solo", "white_body", "white_fur"], "selected_tags": ["actual_fur", "animal_humanoid", "anthro", "belly", "big_nose", "blue_eyes", "blue_inner_ear_fluff", "blue_nose", "blue_stripes", "blue_tail", "bored_expression", "canid", "canid_humanoid", "canine", "canine_humanoid", "clothed", "clothing", "expressions", "facial_stripes", "fluffy_fur", "fox_humanoid", "fur", "glistening", "glistening_body", "glistening_fur", "glistening_tail", "gradient_tail", "half_body", "humanoid", "inner_ear_fluff", "jumper", "jumping", "light_tail", "lotus_pose", "male", "mammal", "mammal_humanoid", "midriff", "pink_ears", "pink_stripes", "pink_tongue", "purple_belly", "purple_body", "slim_humanoid", "solo", "stripes", "tail", "tail_tuft", "tongue", "tuft", "white_body", "white_fur", "white_inner_ear_fluff", "white_nose", "wolf_humanoid"], "stage3_selected": ["actual_fur", "animal_humanoid", "big_nose", "blue_eyes", "blue_inner_ear_fluff", "blue_nose", "blue_stripes", "blue_tail", "blurred_background", "bored_expression", "canine_humanoid", "expressions", "facial_stripes", "fluffy_fur", "fox_humanoid", "glistening_fur", "glistening_tail", "gradient_tail", "half_body", "jumper", "jumping", "light_tail", "lotus_pose", "midriff", "pink_ears", "pink_stripes", "pink_tongue", "purple_belly", "purple_body", "simple_background", "slim_humanoid", "stripes", "tail_tuft", "white_fur", "white_inner_ear_fluff", "white_nose", "wolf_humanoid"], "stage3_selected_scores": {"simple_background": 0.5948, "blue_eyes": 0.5995, "white_fur": 0.5995, "animal_humanoid": 0.6159, "stripes": 0.6068, "purple_body": 0.564, "midriff": 0.3707, "tail_tuft": 0.4994, "pink_tongue": 0.4215, "canine_humanoid": 0.9003, "blue_nose": 0.6032, "fox_humanoid": 0.8204, "blue_tail": 0.5411, "blurred_background": 0.4989, "white_inner_ear_fluff": 0.597, "wolf_humanoid": 0.819, "jumping": 0.6014, "pink_ears": 0.5255, "glistening_fur": 0.5349, "blue_stripes": 0.6748, "big_nose": 0.476, "white_nose": 0.5269, "glistening_tail": 0.5986, "expressions": 0.4957, "light_tail": 0.5671, "pink_stripes": 0.682, "blue_inner_ear_fluff": 0.4727, "fluffy_fur": 0.5593, "purple_belly": 0.5454, "gradient_tail": 0.5876, "bored_expression": 0.4512, "slim_humanoid": 0.588, "facial_stripes": 0.5968, "half_body": 0.4115, "lotus_pose": 0.4767, "jumper": 0.4077, "actual_fur": 0.4563}, "stage3_selected_ranks": {"simple_background": 28, "blue_eyes": 23, "white_fur": 24, "animal_humanoid": 14, "stripes": 18, "purple_body": 37, "midriff": 89, "tail_tuft": 59, "pink_tongue": 81, "canine_humanoid": 1, "blue_nose": 20, "fox_humanoid": 3, "blue_tail": 44, "blurred_background": 60, "white_inner_ear_fluff": 26, "wolf_humanoid": 4, "jumping": 21, "pink_ears": 51, "glistening_fur": 46, "blue_stripes": 8, "big_nose": 72, "white_nose": 50, "glistening_tail": 25, "expressions": 61, "light_tail": 36, "pink_stripes": 7, "blue_inner_ear_fluff": 74, "fluffy_fur": 38, "purple_belly": 42, "gradient_tail": 31, "bored_expression": 78, "slim_humanoid": 30, "facial_stripes": 27, "half_body": 84, "lotus_pose": 71, "jumper": 85, "actual_fur": 76}, "stage3_selected_phrase_ranks": {"simple_background": 1, "blue_eyes": 1, "white_fur": 1, "animal_humanoid": 2, "stripes": 1, "purple_body": 2, "midriff": 3, "tail_tuft": 4, "pink_tongue": 3, "canine_humanoid": 1, "blue_nose": 1, "fox_humanoid": 3, "blue_tail": 2, "blurred_background": 4, "white_inner_ear_fluff": 2, "wolf_humanoid": 4, "jumping": 1, "pink_ears": 4, "glistening_fur": 4, "blue_stripes": 1, "big_nose": 3, "white_nose": 2, "glistening_tail": 2, "expressions": 3, "light_tail": 4, "pink_stripes": 1, "blue_inner_ear_fluff": 4, "fluffy_fur": 2, "purple_belly": 4, "gradient_tail": 3, "bored_expression": 4, "slim_humanoid": 4, "facial_stripes": 3, "half_body": 4, "lotus_pose": 3, "jumper": 3, "actual_fur": 3}, "extra_evidence": {"actual_fur": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4563}, "animal_humanoid": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6159}, "anthro": {"source": "structural"}, "belly": {"source": "implied"}, "big_nose": {"source": "stage3", "why": "unknown", "retrieval_score": 0.476}, "blue_inner_ear_fluff": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4727}, "blue_stripes": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6748}, "blue_tail": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5411}, "bored_expression": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4512}, "canid_humanoid": {"source": "implied"}, "canine_humanoid": {"source": "stage3", "why": "unknown", "retrieval_score": 0.9003}, "clothed": {"source": "structural"}, "clothing": {"source": "probe"}, "expressions": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4957}, "facial_stripes": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5968}, "fluffy_fur": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5593}, "fox_humanoid": {"source": "stage3", "why": "unknown", "retrieval_score": 0.8204}, "glistening": {"source": "implied"}, "glistening_body": {"source": "implied"}, "glistening_fur": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5349}, "glistening_tail": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5986}, "gradient_tail": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5876}, "half_body": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4115}, "humanoid": {"source": "implied"}, "inner_ear_fluff": {"source": "implied"}, "jumper": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4077}, "jumping": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6014}, "light_tail": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5671}, "lotus_pose": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4767}, "male": {"source": "structural"}, "mammal_humanoid": {"source": "implied"}, "midriff": {"source": "stage3", "why": "unknown", "retrieval_score": 0.3707}, "pink_ears": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5255}, "pink_stripes": {"source": "stage3", "why": "unknown", "retrieval_score": 0.682}, "pink_tongue": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4215}, "purple_belly": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5454}, "slim_humanoid": {"source": "stage3", "why": "unknown", "retrieval_score": 0.588}, "stripes": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6068}, "tail": {"source": "implied"}, "tail_tuft": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4994}, "tongue": {"source": "implied"}, "tuft": {"source": "implied"}, "white_inner_ear_fluff": {"source": "stage3", "why": "unknown", "retrieval_score": 0.597}, "white_nose": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5269}, "wolf_humanoid": {"source": "stage3", "why": "unknown", "retrieval_score": 0.819}}, "structural": ["solo", "anthro", "male", "clothed"], "probe": ["solo", "simple_background", "clothing", "canid", "anthro"], "t1": 2.03, "t2": 1.93, "t3": 30.68, "t3s": 1.63, "t3p": 2.59, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=92 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=4"]}
|
| 9 |
+
{"id": 1325009, "n_gt": 22, "n_retrieved": 74, "n_selected": 46, "n_implied": 18, "n_structural": 5, "n_probe": 5, "ret_R": 0.3636, "P": 0.3696, "R": 0.7727, "F1": 0.5, "leaf_P": 0.125, "leaf_R": 0.25, "leaf_F1": 0.1667, "n_leaf_sel": 24, "n_leaf_gt": 12, "ret_P": 0.1081, "sel_given_ret": 2.125, "over_sel": 2.09, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 2, "calls_with_selection": 2, "calls_exhausted_retries": 0, "attempts_total": 2, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 2, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 21, "attempts_by_n_local": {"60": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}, "14": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.3696, "gen_R": 0.7727, "gen_F1": 0.5, "missed": ["chest_tuft", "muscular", "muscular_anthro", "muscular_male", "topless"], "extra": ["bear", "belly", "blue_inner_ear_fluff", "countershade_belly", "countershade_body", "cross-hatching", "glistening", "glistening_body", "glistening_eyes", "glistening_fur", "hand_on_own_head", "hatching_(art)", "inner_ear_fluff", "looking_at_viewer", "muscular_legs", "pattern_background", "pinup", "pose", "quads", "shaded", "siberian_tiger", "striped_body", "striped_fur", "tan_body", "tan_bottomwear", "tan_clothing", "tan_countershading", "tan_shorts", "white_chest"], "ground_truth_tags": ["anthro", "blue_eyes", "bottomwear", "chest_tuft", "clothed", "clothing", "countershading", "felid", "fur", "hand_on_head", "male", "mammal", "muscular", "muscular_anthro", "muscular_male", "pantherine", "shorts", "solo", "stripes", "tiger", "topless", "tuft"], "selected_tags": ["anthro", "bear", "belly", "blue_eyes", "blue_inner_ear_fluff", "bottomwear", "clothed", "clothing", "countershade_belly", "countershade_body", "countershading", "cross-hatching", "felid", "fur", "glistening", "glistening_body", "glistening_eyes", "glistening_fur", "hand_on_head", "hand_on_own_head", "hatching_(art)", "inner_ear_fluff", "looking_at_viewer", "male", "mammal", "muscular_legs", "pantherine", "pattern_background", "pinup", "pose", "quads", "shaded", "shorts", "siberian_tiger", "solo", "striped_body", "striped_fur", "stripes", "tan_body", "tan_bottomwear", "tan_clothing", "tan_countershading", "tan_shorts", "tiger", "tuft", "white_chest"], "stage3_selected": ["blue_eyes", "blue_inner_ear_fluff", "countershade_belly", "countershade_body", "cross-hatching", "glistening_eyes", "glistening_fur", "hand_on_head", "hand_on_own_head", "muscular_legs", "pattern_background", "pinup", "quads", "shorts", "siberian_tiger", "striped_body", "striped_fur", "tan_countershading", "tan_shorts", "tuft", "white_chest"], "stage3_selected_scores": {"blue_eyes": 0.5785, "tuft": 0.497, "shorts": 0.5914, "striped_body": 0.4439, "pinup": 0.5187, "striped_fur": 0.6475, "hand_on_head": 0.6014, "glistening_eyes": 0.4769, "pattern_background": 0.5269, "quads": 0.6744, "tan_countershading": 0.7245, "glistening_fur": 0.501, "muscular_legs": 0.791, "white_chest": 0.917, "countershade_body": 0.8721, "blue_inner_ear_fluff": 0.428, "countershade_belly": 0.828, "cross-hatching": 0.4762, "siberian_tiger": 0.4939, "tan_shorts": 0.5498, "hand_on_own_head": 0.529}, "stage3_selected_ranks": {"blue_eyes": 36, "tuft": 53, "shorts": 34, "striped_body": 70, "pinup": 46, "striped_fur": 21, "hand_on_head": 29, "glistening_eyes": 61, "pattern_background": 44, "quads": 18, "tan_countershading": 13, "glistening_fur": 51, "muscular_legs": 9, "white_chest": 2, "countershade_body": 3, "blue_inner_ear_fluff": 72, "countershade_belly": 7, "cross-hatching": 62, "siberian_tiger": 56, "tan_shorts": 38, "hand_on_own_head": 43}, "stage3_selected_phrase_ranks": {"blue_eyes": 2, "tuft": 4, "shorts": 1, "striped_body": 2, "pinup": 3, "striped_fur": 2, "hand_on_head": 2, "glistening_eyes": 3, "pattern_background": 1, "quads": 4, "tan_countershading": 4, "glistening_fur": 3, "muscular_legs": 2, "white_chest": 1, "countershade_body": 1, "blue_inner_ear_fluff": 4, "countershade_belly": 2, "cross-hatching": 3, "siberian_tiger": 2, "tan_shorts": 2, "hand_on_own_head": 4}, "extra_evidence": {"bear": {"source": "probe"}, "belly": {"source": "implied"}, "blue_inner_ear_fluff": {"source": "stage3", "why": "unknown", "retrieval_score": 0.428}, "countershade_belly": {"source": "stage3", "why": "unknown", "retrieval_score": 0.828}, "countershade_body": {"source": "stage3", "why": "unknown", "retrieval_score": 0.8721}, "cross-hatching": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4762}, "glistening": {"source": "implied"}, "glistening_body": {"source": "implied"}, "glistening_eyes": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4769}, "glistening_fur": {"source": "stage3", "why": "unknown", "retrieval_score": 0.501}, "hand_on_own_head": {"source": "stage3", "why": "unknown", "retrieval_score": 0.529}, "hatching_(art)": {"source": "implied"}, "inner_ear_fluff": {"source": "implied"}, "looking_at_viewer": {"source": "structural"}, "muscular_legs": {"source": "stage3", "why": "unknown", "retrieval_score": 0.791}, "pattern_background": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5269}, "pinup": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5187}, "pose": {"source": "implied"}, "quads": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6744}, "shaded": {"source": "implied"}, "siberian_tiger": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4939}, "striped_body": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4439}, "striped_fur": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6475}, "tan_body": {"source": "implied"}, "tan_bottomwear": {"source": "implied"}, "tan_clothing": {"source": "implied"}, "tan_countershading": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7245}, "tan_shorts": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5498}, "white_chest": {"source": "stage3", "why": "unknown", "retrieval_score": 0.917}}, "structural": ["solo", "anthro", "male", "clothed", "looking_at_viewer"], "probe": ["solo", "felid", "clothing", "bear", "anthro"], "t1": 1.84, "t2": 1.74, "t3": 4.2, "t3s": 1.32, "t3p": 1.64, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=74 entity=1 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
| 10 |
+
{"id": 3285630, "n_gt": 12, "n_retrieved": 66, "n_selected": 78, "n_implied": 27, "n_structural": 4, "n_probe": 5, "ret_R": 0.25, "P": 0.1282, "R": 0.8333, "F1": 0.2222, "leaf_P": 0.093, "leaf_R": 0.4444, "leaf_F1": 0.1538, "n_leaf_sel": 43, "n_leaf_gt": 9, "ret_P": 0.0455, "sel_given_ret": 3.3333, "over_sel": 6.5, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 2, "calls_with_selection": 2, "calls_exhausted_retries": 0, "attempts_total": 2, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 2, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 46, "attempts_by_n_local": {"60": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}, "10": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.1282, "gen_R": 0.8333, "gen_F1": 0.2222, "missed": ["alpha_channel", "fingers"], "extra": ["beverage", "big_hands", "black_necktie", "black_nose", "blue_clothing", "blue_shirt", "blue_topwear", "bottom_heavy", "bottomwear", "brown_clothing", "brown_topwear", "brown_vest", "business_attire", "clasped_hands", "coffee_cup", "coffee_mug", "container", "cup", "domestic_cat", "dress_shirt", "felis", "formal", "frown", "gesture", "green_background", "grey_clothing", "grey_shirt", "grey_topwear", "hair_bun", "hands_together", "handshake", "holding_beverage", "holding_container", "holding_cup", "holding_mug", "holding_object", "jacket", "jacket_vest", "left-handed", "mug", "necktie", "pants", "running", "scowl", "shirt", "sketch", "sketch_background", "suit_jacket", "sweater", "sweater_vest", "t-shirt", "tan_body", "tan_bottomwear", "tan_clothing", "tan_fur", "tan_pants", "teal_shirt", "teal_topwear", "text", "thick_thighs", "topwear", "undershirt", "vest", "white_clothing", "white_necktie", "white_topwear", "wide_hips", "yellow_background"], "ground_truth_tags": ["alpha_channel", "anthro", "clothed", "clothing", "felid", "feline", "fingers", "fur", "hair", "male", "mammal", "solo"], "selected_tags": ["anthro", "beverage", "big_hands", "black_necktie", "black_nose", "blue_clothing", "blue_shirt", "blue_topwear", "bottom_heavy", "bottomwear", "brown_clothing", "brown_topwear", "brown_vest", "business_attire", "clasped_hands", "clothed", "clothing", "coffee_cup", "coffee_mug", "container", "cup", "domestic_cat", "dress_shirt", "felid", "feline", "felis", "formal", "frown", "fur", "gesture", "green_background", "grey_clothing", "grey_shirt", "grey_topwear", "hair", "hair_bun", "hands_together", "handshake", "holding_beverage", "holding_container", "holding_cup", "holding_mug", "holding_object", "jacket", "jacket_vest", "left-handed", "male", "mammal", "mug", "necktie", "pants", "running", "scowl", "shirt", "sketch", "sketch_background", "solo", "suit_jacket", "sweater", "sweater_vest", "t-shirt", "tan_body", "tan_bottomwear", "tan_clothing", "tan_fur", "tan_pants", "teal_shirt", "teal_topwear", "text", "thick_thighs", "topwear", "undershirt", "vest", "white_clothing", "white_necktie", "white_topwear", "wide_hips", "yellow_background"], "stage3_selected": ["big_hands", "black_necktie", "black_nose", "blue_shirt", "bottom_heavy", "brown_vest", "business_attire", "clasped_hands", "coffee_cup", "coffee_mug", "domestic_cat", "dress_shirt", "feline", "formal", "fur", "green_background", "grey_shirt", "hair_bun", "hands_together", "handshake", "holding_beverage", "holding_cup", "holding_mug", "invalid_background", "jacket_vest", "left-handed", "mug", "necktie", "running", "scowl", "shirt", "simple_background", "sketch_background", "suit_jacket", "sweater_vest", "t-shirt", "tan_fur", "tan_pants", "teal_shirt", "thick_thighs", "undershirt", "vest", "white_necktie", "white_topwear", "wide_hips", "yellow_background"], "stage3_selected_scores": {"fur": 0.7146, "simple_background": 0.6978, "feline": 0.7062, "thick_thighs": 0.4711, "shirt": 0.7998, "domestic_cat": 0.6329, "wide_hips": 0.4732, "black_nose": 0.6261, "tan_fur": 0.5779, "necktie": 0.7314, "t-shirt": 0.7846, "white_topwear": 0.7154, "vest": 0.8403, "green_background": 0.6069, "running": 0.5147, "yellow_background": 0.6334, "dress_shirt": 0.6132, "blue_shirt": 0.751, "holding_cup": 0.7667, "hair_bun": 0.6926, "big_hands": 0.4968, "holding_beverage": 0.7721, "bottom_heavy": 0.4663, "coffee_mug": 0.7055, "mug": 0.8841, "hands_together": 0.5547, "grey_shirt": 0.7582, "coffee_cup": 0.6863, "undershirt": 0.7599, "scowl": 0.5567, "sweater_vest": 0.7532, "holding_mug": 0.916, "clasped_hands": 0.6268, "suit_jacket": 0.5924, "black_necktie": 0.7132, "tan_pants": 0.7373, "handshake": 0.5511, "formal": 0.5993, "business_attire": 0.5657, "left-handed": 0.5479, "sketch_background": 0.5928, "jacket_vest": 0.772, "brown_vest": 0.8153, "teal_shirt": 0.7474, "white_necktie": 0.6418, "invalid_background": 0.6495}, "stage3_selected_ranks": {"fur": 23, "simple_background": 28, "feline": 25, "thick_thighs": 68, "shirt": 5, "domestic_cat": 37, "wide_hips": 67, "black_nose": 40, "tan_fur": 52, "necktie": 19, "t-shirt": 7, "white_topwear": 22, "vest": 3, "green_background": 45, "running": 64, "yellow_background": 36, "dress_shirt": 42, "blue_shirt": 16, "holding_cup": 10, "hair_bun": 29, "big_hands": 65, "holding_beverage": 8, "bottom_heavy": 69, "coffee_mug": 26, "mug": 2, "hands_together": 56, "grey_shirt": 12, "coffee_cup": 30, "undershirt": 11, "scowl": 55, "sweater_vest": 14, "holding_mug": 1, "clasped_hands": 39, "suit_jacket": 48, "black_necktie": 24, "tan_pants": 18, "handshake": 59, "formal": 46, "business_attire": 54, "left-handed": 60, "sketch_background": 47, "jacket_vest": 9, "brown_vest": 4, "teal_shirt": 17, "white_necktie": 33, "invalid_background": 31}, "stage3_selected_phrase_ranks": {"fur": 1, "simple_background": 1, "feline": 1, "thick_thighs": 3, "shirt": 1, "domestic_cat": 4, "wide_hips": 2, "black_nose": 2, "tan_fur": 4, "necktie": 1, "t-shirt": 2, "white_topwear": 1, "vest": 1, "green_background": 2, "running": 3, "yellow_background": 3, "dress_shirt": 2, "blue_shirt": 3, "holding_cup": 4, "hair_bun": 1, "big_hands": 4, "holding_beverage": 3, "bottom_heavy": 4, "coffee_mug": 3, "mug": 1, "hands_together": 2, "grey_shirt": 1, "coffee_cup": 4, "undershirt": 3, "scowl": 4, "sweater_vest": 3, "holding_mug": 1, "clasped_hands": 1, "suit_jacket": 4, "black_necktie": 2, "tan_pants": 3, "handshake": 3, "formal": 1, "business_attire": 1, "left-handed": 4, "sketch_background": 3, "jacket_vest": 2, "brown_vest": 1, "teal_shirt": 4, "white_necktie": 4, "invalid_background": 1}, "extra_evidence": {"beverage": {"source": "implied"}, "big_hands": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4968}, "black_necktie": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7132}, "black_nose": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6261}, "blue_clothing": {"source": "implied"}, "blue_shirt": {"source": "stage3", "why": "unknown", "retrieval_score": 0.751}, "blue_topwear": {"source": "implied"}, "bottom_heavy": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4663}, "bottomwear": {"source": "implied"}, "brown_clothing": {"source": "implied"}, "brown_topwear": {"source": "implied"}, "brown_vest": {"source": "stage3", "why": "unknown", "retrieval_score": 0.8153}, "business_attire": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5657}, "clasped_hands": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6268}, "coffee_cup": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6863}, "coffee_mug": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7055}, "container": {"source": "implied"}, "cup": {"source": "implied"}, "domestic_cat": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6329}, "dress_shirt": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6132}, "felis": {"source": "implied"}, "formal": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5993}, "frown": {"source": "implied"}, "gesture": {"source": "implied"}, "green_background": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6069}, "grey_clothing": {"source": "implied"}, "grey_shirt": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7582}, "grey_topwear": {"source": "implied"}, "hair_bun": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6926}, "hands_together": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5547}, "handshake": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5511}, "holding_beverage": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7721}, "holding_container": {"source": "implied"}, "holding_cup": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7667}, "holding_mug": {"source": "stage3", "why": "unknown", "retrieval_score": 0.916}, "holding_object": {"source": "implied"}, "jacket": {"source": "implied"}, "jacket_vest": {"source": "stage3", "why": "unknown", "retrieval_score": 0.772}, "left-handed": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5479}, "mug": {"source": "stage3", "why": "unknown", "retrieval_score": 0.8841}, "necktie": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7314}, "pants": {"source": "implied"}, "running": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5147}, "scowl": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5567}, "shirt": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7998}, "sketch": {"source": "implied"}, "sketch_background": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5928}, "suit_jacket": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5924}, "sweater": {"source": "implied"}, "sweater_vest": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7532}, "t-shirt": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7846}, "tan_body": {"source": "implied"}, "tan_bottomwear": {"source": "implied"}, "tan_clothing": {"source": "implied"}, "tan_fur": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5779}, "tan_pants": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7373}, "teal_shirt": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7474}, "teal_topwear": {"source": "implied"}, "text": {"source": "probe"}, "thick_thighs": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4711}, "topwear": {"source": "implied"}, "undershirt": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7599}, "vest": {"source": "stage3", "why": "unknown", "retrieval_score": 0.8403}, "white_clothing": {"source": "implied"}, "white_necktie": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6418}, "white_topwear": {"source": "stage3", "why": "unknown", "retrieval_score": 0.7154}, "wide_hips": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4732}, "yellow_background": {"source": "stage3", "why": "unknown", "retrieval_score": 0.6334}}, "structural": ["solo", "anthro", "male", "clothed"], "probe": ["text", "solo", "felid", "clothing", "anthro"], "t1": 2.99, "t2": 1.43, "t3": 6.03, "t3s": 0.98, "t3p": 0.75, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=70 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
| 11 |
+
{"id": 260449, "n_gt": 14, "n_retrieved": 66, "n_selected": 49, "n_implied": 9, "n_structural": 6, "n_probe": 5, "ret_R": 0.5714, "P": 0.2245, "R": 0.7857, "F1": 0.3492, "leaf_P": 0.0625, "leaf_R": 0.2, "leaf_F1": 0.0952, "n_leaf_sel": 32, "n_leaf_gt": 10, "ret_P": 0.1212, "sel_given_ret": 1.375, "over_sel": 3.5, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 2, "calls_with_selection": 2, "calls_exhausted_retries": 0, "attempts_total": 2, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 2, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 34, "attempts_by_n_local": {"60": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}, "9": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.2245, "gen_R": 0.7857, "gen_F1": 0.3492, "missed": ["fur", "human", "male"], "extra": ["anthro", "balancing", "blonde_hair", "bottomwear", "cheek_to_cheek", "chimpanzee", "crossed_arms", "dancer_outfit", "duo", "feral", "flash", "front_view", "gorilla", "grin", "grinning_at_viewer", "interactive", "laugh", "loincloth", "no_sound", "one_eye_closed", "pan_(genus)", "raised_arms", "raised_leg", "red_hair", "shaggy_hair", "smile", "smiling_at_viewer", "smirk", "smirking_at_viewer", "smug_grin", "staff", "toony_expression", "topless", "trio", "wide_grin", "wink", "winking_at_viewer", "yelling"], "ground_truth_tags": ["ape", "bear", "clothed", "clothing", "dancing", "fur", "group", "hair", "haplorhine", "human", "looking_at_viewer", "male", "mammal", "primate"], "selected_tags": ["anthro", "ape", "balancing", "bear", "blonde_hair", "bottomwear", "cheek_to_cheek", "chimpanzee", "clothed", "clothing", "crossed_arms", "dancer_outfit", "dancing", "duo", "feral", "flash", "front_view", "gorilla", "grin", "grinning_at_viewer", "group", "hair", "haplorhine", "interactive", "laugh", "loincloth", "looking_at_viewer", "mammal", "no_sound", "one_eye_closed", "pan_(genus)", "primate", "raised_arms", "raised_leg", "red_hair", "shaggy_hair", "smile", "smiling_at_viewer", "smirk", "smirking_at_viewer", "smug_grin", "staff", "toony_expression", "topless", "trio", "wide_grin", "wink", "winking_at_viewer", "yelling"], "stage3_selected": ["ape", "balancing", "bear", "blonde_hair", "cheek_to_cheek", "chimpanzee", "crossed_arms", "dancer_outfit", "dancing", "flash", "front_view", "gorilla", "grin", "grinning_at_viewer", "hair", "interactive", "laugh", "loincloth", "looking_at_viewer", "no_sound", "primate", "raised_arms", "raised_leg", "red_hair", "shaggy_hair", "simple_background", "smirk", "smirking_at_viewer", "smug_grin", "staff", "toony_expression", "wide_grin", "winking_at_viewer", "yelling"], "stage3_selected_scores": {"hair": 0.5445, "simple_background": 0.5491, "looking_at_viewer": 0.5475, "blonde_hair": 0.3637, "red_hair": 0.3652, "bear": 0.5735, "front_view": 0.4609, "grin": 0.5653, "raised_leg": 0.4324, "smirk": 0.3593, "primate": 0.8905, "loincloth": 0.5685, "crossed_arms": 0.421, "dancing": 0.5568, "laugh": 0.5259, "staff": 0.3682, "ape": 0.9767, "raised_arms": 0.5445, "yelling": 0.3709, "gorilla": 0.8299, "winking_at_viewer": 0.404, "smug_grin": 0.3703, "flash": 0.3205, "chimpanzee": 0.8275, "smirking_at_viewer": 0.4352, "no_sound": 0.2973, "balancing": 0.4094, "interactive": 0.4085, "grinning_at_viewer": 0.442, "shaggy_hair": 0.3489, "dancer_outfit": 0.4163, "cheek_to_cheek": 0.3714, "toony_expression": 0.4685, "wide_grin": 0.5267}, "stage3_selected_ranks": {"hair": 14, "simple_background": 11, "looking_at_viewer": 12, "blonde_hair": 63, "red_hair": 61, "bear": 6, "front_view": 27, "grin": 8, "raised_leg": 36, "smirk": 64, "primate": 2, "loincloth": 7, "crossed_arms": 41, "dancing": 10, "laugh": 16, "staff": 60, "ape": 1, "raised_arms": 13, "yelling": 57, "gorilla": 4, "winking_at_viewer": 47, "smug_grin": 58, "flash": 69, "chimpanzee": 5, "smirking_at_viewer": 34, "no_sound": 71, "balancing": 45, "interactive": 46, "grinning_at_viewer": 32, "shaggy_hair": 66, "dancer_outfit": 43, "cheek_to_cheek": 56, "toony_expression": 25, "wide_grin": 15}, "stage3_selected_phrase_ranks": {"hair": 1, "simple_background": 1, "looking_at_viewer": 1, "blonde_hair": 4, "red_hair": 3, "bear": 1, "front_view": 2, "grin": 1, "raised_leg": 2, "smirk": 3, "primate": 1, "loincloth": 1, "crossed_arms": 4, "dancing": 1, "laugh": 1, "staff": 4, "ape": 1, "raised_arms": 1, "yelling": 3, "gorilla": 2, "winking_at_viewer": 4, "smug_grin": 4, "flash": 2, "chimpanzee": 3, "smirking_at_viewer": 4, "no_sound": 4, "balancing": 4, "interactive": 1, "grinning_at_viewer": 3, "shaggy_hair": 3, "dancer_outfit": 3, "cheek_to_cheek": 3, "toony_expression": 1, "wide_grin": 1}, "extra_evidence": {"anthro": {"source": "structural"}, "balancing": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4094}, "blonde_hair": {"source": "stage3", "why": "unknown", "retrieval_score": 0.3637}, "bottomwear": {"source": "implied"}, "cheek_to_cheek": {"source": "stage3", "why": "unknown", "retrieval_score": 0.3714}, "chimpanzee": {"source": "stage3", "why": "unknown", "retrieval_score": 0.8275}, "crossed_arms": {"source": "stage3", "why": "unknown", "retrieval_score": 0.421}, "dancer_outfit": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4163}, "duo": {"source": "probe"}, "feral": {"source": "structural"}, "flash": {"source": "stage3", "why": "unknown", "retrieval_score": 0.3205}, "front_view": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4609}, "gorilla": {"source": "stage3", "why": "unknown", "retrieval_score": 0.8299}, "grin": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5653}, "grinning_at_viewer": {"source": "stage3", "why": "unknown", "retrieval_score": 0.442}, "interactive": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4085}, "laugh": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5259}, "loincloth": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5685}, "no_sound": {"source": "stage3", "why": "unknown", "retrieval_score": 0.2973}, "one_eye_closed": {"source": "implied"}, "pan_(genus)": {"source": "implied"}, "raised_arms": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5445}, "raised_leg": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4324}, "red_hair": {"source": "stage3", "why": "unknown", "retrieval_score": 0.3652}, "shaggy_hair": {"source": "stage3", "why": "unknown", "retrieval_score": 0.3489}, "smile": {"source": "implied"}, "smiling_at_viewer": {"source": "implied"}, "smirk": {"source": "stage3", "why": "unknown", "retrieval_score": 0.3593}, "smirking_at_viewer": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4352}, "smug_grin": {"source": "stage3", "why": "unknown", "retrieval_score": 0.3703}, "staff": {"source": "stage3", "why": "unknown", "retrieval_score": 0.3682}, "toony_expression": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4685}, "topless": {"source": "structural"}, "trio": {"source": "structural"}, "wide_grin": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5267}, "wink": {"source": "implied"}, "winking_at_viewer": {"source": "stage3", "why": "unknown", "retrieval_score": 0.404}, "yelling": {"source": "stage3", "why": "unknown", "retrieval_score": 0.3709}}, "structural": ["trio", "anthro", "feral", "clothed", "topless", "looking_at_viewer"], "probe": ["simple_background", "group", "duo", "bear", "anthro"], "t1": 3.18, "t2": 1.78, "t3": 4.3, "t3s": 1.95, "t3p": 1.6, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=69 entity=1 copyright_filtered=1 generic_char_to_general=1 unknown_type=2"]}
|
data/eval_results/k_sweep_explicit_no_why_seed42_k6.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/eval_results/latency_baseline_seed42.jsonl
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"_meta": true, "timestamp": "2026-03-02T05:51:59.829566", "n_samples": 10, "caption_field": "caption_cogvlm", "skip_rewrite": false, "allow_nsfw": false, "mode": "chunked_map_union", "chunk_size": 60, "eval_path": "data/eval_samples/e621_sfw_sample_1000_seed123_buffer10000_caption_evident.jsonl", "per_phrase_k": 2, "per_phrase_final_k": 10, "temperature": 0.0, "shuffle": false, "seed": 42, "workers": 4, "min_why": "strong_implied", "expand_implications": true, "infer_structural": true, "infer_probe": true, "n_errors": 0, "n_issue_samples": 10, "n_issues_total": 27}
|
| 2 |
+
{"id": 3285630, "n_gt": 12, "n_retrieved": 150, "n_selected": 59, "n_implied": 26, "n_structural": 4, "n_probe": 5, "ret_R": 0.25, "P": 0.1525, "R": 0.75, "F1": 0.2535, "leaf_P": 0.0741, "leaf_R": 0.2222, "leaf_F1": 0.1111, "n_leaf_sel": 27, "n_leaf_gt": 9, "ret_P": 0.02, "sel_given_ret": 3.0, "over_sel": 4.92, "why": {"explicit": 24, "strong_implied": 3}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 3, "calls_with_selection": 3, "calls_exhausted_retries": 0, "attempts_total": 3, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 3, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 86, "attempts_by_n_local": {"60": {"attempts": 2, "parse_ok": 2, "parse_fail": 0, "errors": 0}, "33": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.1525, "gen_R": 0.75, "gen_F1": 0.2535, "missed": ["alpha_channel", "fingers", "male"], "extra": ["beverage", "black_body", "black_clothing", "black_fur", "black_shirt", "black_topwear", "black_vest", "business_attire", "business_suit", "businesswear", "coffee", "container", "cup", "domestic_cat", "felis", "formal", "green_clothing", "green_shirt", "green_t-shirt", "green_topwear", "green_vest", "grey_clothing", "grey_shirt", "grey_topwear", "hair_bun", "holding_container", "holding_cup", "holding_mug", "holding_object", "jacket", "mug", "necktie", "polo_shirt", "red_clothing", "red_topwear", "red_vest", "shirt", "sleeveless_shirt", "suit", "suit_jacket", "t-shirt", "text", "topless", "topwear", "vest", "waiter", "white_clothing", "white_necktie", "white_shirt", "white_topwear"], "ground_truth_tags": ["alpha_channel", "anthro", "clothed", "clothing", "felid", "feline", "fingers", "fur", "hair", "male", "mammal", "solo"], "selected_tags": ["anthro", "beverage", "black_body", "black_clothing", "black_fur", "black_shirt", "black_topwear", "black_vest", "business_attire", "business_suit", "businesswear", "clothed", "clothing", "coffee", "container", "cup", "domestic_cat", "felid", "feline", "felis", "formal", "fur", "green_clothing", "green_shirt", "green_t-shirt", "green_topwear", "green_vest", "grey_clothing", "grey_shirt", "grey_topwear", "hair", "hair_bun", "holding_container", "holding_cup", "holding_mug", "holding_object", "jacket", "mammal", "mug", "necktie", "polo_shirt", "red_clothing", "red_topwear", "red_vest", "shirt", "sleeveless_shirt", "solo", "suit", "suit_jacket", "t-shirt", "text", "topless", "topwear", "vest", "waiter", "white_clothing", "white_necktie", "white_shirt", "white_topwear"], "stage3_selected": ["black_fur", "black_shirt", "black_vest", "business_attire", "business_suit", "businesswear", "coffee", "domestic_cat", "formal", "green_t-shirt", "green_vest", "grey_shirt", "hair_bun", "holding_cup", "holding_mug", "jacket", "polo_shirt", "red_vest", "shirt", "simple_background", "sleeveless_shirt", "suit_jacket", "topwear", "vest", "waiter", "white_necktie", "white_shirt"], "stage3_selected_scores": {"simple_background": 0.7012, "topwear": 0.7053, "shirt": 0.8041, "domestic_cat": 0.6355, "black_fur": 0.722, "jacket": 0.6702, "vest": 0.8437, "white_shirt": 0.7408, "black_shirt": 0.7383, "coffee": 0.6427, "holding_cup": 0.7694, "hair_bun": 0.6946, "waiter": 0.5913, "sleeveless_shirt": 0.7091, "grey_shirt": 0.7606, "business_suit": 0.5775, "polo_shirt": 0.7142, "holding_mug": 0.9184, "black_vest": 0.7142, "businesswear": 0.5741, "green_t-shirt": 0.7407, "green_vest": 0.7238, "red_vest": 0.6652, "white_necktie": 0.644, "suit_jacket": 0.6893, "formal": 0.601, "business_attire": 0.5683}, "stage3_selected_ranks": {"simple_background": 49, "topwear": 47, "shirt": 5, "domestic_cat": 72, "black_fur": 35, "jacket": 58, "vest": 3, "white_shirt": 22, "black_shirt": 25, "coffee": 67, "holding_cup": 10, "hair_bun": 52, "waiter": 89, "sleeveless_shirt": 45, "grey_shirt": 14, "business_suit": 96, "polo_shirt": 41, "holding_mug": 1, "black_vest": 42, "businesswear": 100, "green_t-shirt": 23, "green_vest": 34, "red_vest": 59, "white_necktie": 66, "suit_jacket": 54, "formal": 86, "business_attire": 102}, "stage3_selected_phrase_ranks": {"simple_background": 1, "topwear": 9, "shirt": 1, "domestic_cat": 4, "black_fur": 1, "jacket": 8, "vest": 1, "white_shirt": 4, "black_shirt": 8, "coffee": 8, "holding_cup": 4, "hair_bun": 1, "waiter": 5, "sleeveless_shirt": 7, "grey_shirt": 1, "business_suit": 8, "polo_shirt": 6, "holding_mug": 1, "black_vest": 9, "businesswear": 9, "green_t-shirt": 7, "green_vest": 6, "red_vest": 9, "white_necktie": 10, "suit_jacket": 4, "formal": 1, "business_attire": 1}, "extra_evidence": {"beverage": {"source": "implied"}, "black_body": {"source": "implied"}, "black_clothing": {"source": "implied"}, "black_fur": {"source": "stage3", "why": "explicit", "retrieval_score": 0.722}, "black_shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7383}, "black_topwear": {"source": "implied"}, "black_vest": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7142}, "business_attire": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.5683}, "business_suit": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5775}, "businesswear": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5741}, "coffee": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6427}, "container": {"source": "implied"}, "cup": {"source": "implied"}, "domestic_cat": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6355}, "felis": {"source": "implied"}, "formal": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.601}, "green_clothing": {"source": "implied"}, "green_shirt": {"source": "implied"}, "green_t-shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7407}, "green_topwear": {"source": "implied"}, "green_vest": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7238}, "grey_clothing": {"source": "implied"}, "grey_shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7606}, "grey_topwear": {"source": "implied"}, "hair_bun": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6946}, "holding_container": {"source": "implied"}, "holding_cup": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7694}, "holding_mug": {"source": "stage3", "why": "explicit", "retrieval_score": 0.9184}, "holding_object": {"source": "implied"}, "jacket": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6702}, "mug": {"source": "implied"}, "necktie": {"source": "implied"}, "polo_shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7142}, "red_clothing": {"source": "implied"}, "red_topwear": {"source": "implied"}, "red_vest": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6652}, "shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8041}, "sleeveless_shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7091}, "suit": {"source": "implied"}, "suit_jacket": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.6893}, "t-shirt": {"source": "implied"}, "text": {"source": "probe"}, "topless": {"source": "structural"}, "topwear": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7053}, "vest": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8437}, "waiter": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5913}, "white_clothing": {"source": "implied"}, "white_necktie": {"source": "stage3", "why": "explicit", "retrieval_score": 0.644}, "white_shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7408}, "white_topwear": {"source": "implied"}}, "structural": ["solo", "anthro", "clothed", "topless"], "probe": ["clothing", "anthro", "text", "felid", "solo"], "t1": 2.31, "t2": 1.8, "t3": 47.39, "t3s": 6.34, "t3p": 8.32, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=153 entity=1 copyright_filtered=1 generic_char_to_general=0 unknown_type=2"]}
|
| 3 |
+
{"id": 260449, "n_gt": 14, "n_retrieved": 160, "n_selected": 42, "n_implied": 14, "n_structural": 6, "n_probe": 6, "ret_R": 0.5714, "P": 0.2619, "R": 0.7857, "F1": 0.3929, "leaf_P": 0.0435, "leaf_R": 0.1, "leaf_F1": 0.0606, "n_leaf_sel": 23, "n_leaf_gt": 10, "ret_P": 0.05, "sel_given_ret": 1.375, "over_sel": 3.0, "why": {"explicit": 15, "strong_implied": 5}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 3, "calls_with_selection": 3, "calls_exhausted_retries": 0, "attempts_total": 3, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 3, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 82, "attempts_by_n_local": {"60": {"attempts": 2, "parse_ok": 2, "parse_fail": 0, "errors": 0}, "41": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.2619, "gen_R": 0.7857, "gen_F1": 0.3929, "missed": ["fur", "human", "male"], "extra": ["<3", "american_black_bear", "anthro", "arms_above_head", "auburn_hair", "black_bear", "bottomwear", "chimpanzee", "duo", "feral", "gesture", "gorilla", "grinning_at_viewer", "kermode_bear", "loincloth", "monkey", "one_eye_closed", "pan_(genus)", "pointing", "pointing_at_viewer", "raised_arms", "sloth_bear", "smug_eyes", "spread_arms", "sun_bear", "tap_dancing", "topless", "trio", "ursine", "wink", "winking_at_viewer"], "ground_truth_tags": ["ape", "bear", "clothed", "clothing", "dancing", "fur", "group", "hair", "haplorhine", "human", "looking_at_viewer", "male", "mammal", "primate"], "selected_tags": ["<3", "american_black_bear", "anthro", "ape", "arms_above_head", "auburn_hair", "bear", "black_bear", "bottomwear", "chimpanzee", "clothed", "clothing", "dancing", "duo", "feral", "gesture", "gorilla", "grinning_at_viewer", "group", "hair", "haplorhine", "kermode_bear", "loincloth", "looking_at_viewer", "mammal", "monkey", "one_eye_closed", "pan_(genus)", "pointing", "pointing_at_viewer", "primate", "raised_arms", "sloth_bear", "smug_eyes", "spread_arms", "sun_bear", "tap_dancing", "topless", "trio", "ursine", "wink", "winking_at_viewer"], "stage3_selected": ["arms_above_head", "auburn_hair", "bear", "chimpanzee", "dancing", "gorilla", "grinning_at_viewer", "kermode_bear", "loincloth", "looking_at_viewer", "monkey", "pointing_at_viewer", "primate", "raised_arms", "sloth_bear", "smug_eyes", "spread_arms", "sun_bear", "tap_dancing", "winking_at_viewer"], "stage3_selected_scores": {"bear": 0.5735, "primate": 0.8905, "loincloth": 0.5685, "monkey": 0.7558, "arms_above_head": 0.3975, "gorilla": 0.8299, "spread_arms": 0.4027, "winking_at_viewer": 0.4324, "pointing_at_viewer": 0.4203, "sloth_bear": 0.4453, "chimpanzee": 0.8275, "auburn_hair": 0.346, "sun_bear": 0.4335, "kermode_bear": 0.4397, "smug_eyes": 0.3465, "looking_at_viewer": 0.5475, "dancing": 0.5568, "raised_arms": 0.5445, "grinning_at_viewer": 0.442, "tap_dancing": 0.4339}, "stage3_selected_ranks": {"bear": 12, "primate": 2, "loincloth": 13, "monkey": 6, "arms_above_head": 81, "gorilla": 4, "spread_arms": 76, "winking_at_viewer": 53, "pointing_at_viewer": 63, "sloth_bear": 41, "chimpanzee": 5, "auburn_hair": 130, "sun_bear": 50, "kermode_bear": 44, "smug_eyes": 128, "looking_at_viewer": 18, "dancing": 16, "raised_arms": 19, "grinning_at_viewer": 42, "tap_dancing": 49}, "stage3_selected_phrase_ranks": {"bear": 1, "primate": 1, "loincloth": 1, "monkey": 6, "arms_above_head": 5, "gorilla": 2, "spread_arms": 5, "winking_at_viewer": 4, "pointing_at_viewer": 8, "sloth_bear": 6, "chimpanzee": 3, "auburn_hair": 7, "sun_bear": 10, "kermode_bear": 7, "smug_eyes": 5, "looking_at_viewer": 1, "dancing": 1, "raised_arms": 1, "grinning_at_viewer": 3, "tap_dancing": 2}, "extra_evidence": {"<3": {"source": "probe"}, "american_black_bear": {"source": "implied"}, "anthro": {"source": "structural"}, "arms_above_head": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3975}, "auburn_hair": {"source": "stage3", "why": "explicit", "retrieval_score": 0.346}, "black_bear": {"source": "implied"}, "bottomwear": {"source": "implied"}, "chimpanzee": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8275}, "duo": {"source": "probe"}, "feral": {"source": "structural"}, "gesture": {"source": "implied"}, "gorilla": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8299}, "grinning_at_viewer": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.442}, "kermode_bear": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4397}, "loincloth": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5685}, "monkey": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7558}, "one_eye_closed": {"source": "implied"}, "pan_(genus)": {"source": "implied"}, "pointing": {"source": "implied"}, "pointing_at_viewer": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4203}, "raised_arms": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.5445}, "sloth_bear": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4453}, "smug_eyes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3465}, "spread_arms": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4027}, "sun_bear": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4335}, "tap_dancing": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.4339}, "topless": {"source": "structural"}, "trio": {"source": "structural"}, "ursine": {"source": "implied"}, "wink": {"source": "implied"}, "winking_at_viewer": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4324}}, "structural": ["trio", "anthro", "feral", "clothed", "topless", "looking_at_viewer"], "probe": ["anthro", "duo", "group", "bear", "simple_background", "<3"], "t1": 3.37, "t2": 4.07, "t3": 31.86, "t3s": 5.95, "t3p": 4.45, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=161 entity=5 copyright_filtered=2 generic_char_to_general=1 unknown_type=3"]}
|
| 4 |
+
{"id": 1078019, "n_gt": 14, "n_retrieved": 133, "n_selected": 37, "n_implied": 12, "n_structural": 4, "n_probe": 5, "ret_R": 0.7143, "P": 0.3243, "R": 0.8571, "F1": 0.4706, "leaf_P": 0.2609, "leaf_R": 0.6667, "leaf_F1": 0.375, "n_leaf_sel": 23, "n_leaf_gt": 9, "ret_P": 0.0752, "sel_given_ret": 1.2, "over_sel": 2.64, "why": {"explicit": 19}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 3, "calls_with_selection": 3, "calls_exhausted_retries": 0, "attempts_total": 3, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 3, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 62, "attempts_by_n_local": {"60": {"attempts": 2, "parse_ok": 2, "parse_fail": 0, "errors": 0}, "11": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.3243, "gen_R": 0.8571, "gen_F1": 0.4706, "missed": ["romantic", "romantic_couple"], "extra": ["<3", "blue_hair", "coat", "confident", "diaper", "domestic_rabbit", "dutch_rabbit", "expressions", "hair", "hand_holding", "holding_object", "holding_plushie", "holding_toy", "lab_coat", "looking_at_viewer", "oryctolagus", "relationship", "round_ears", "setting", "teddy_bear", "topwear", "touching_diaper", "toy", "vest", "winter_coat"], "ground_truth_tags": ["anthro", "blue_eyes", "blush", "clothed", "clothing", "duo", "lagomorph", "leporid", "mammal", "plushie", "rabbit", "romantic", "romantic_couple", "teal_eyes"], "selected_tags": ["<3", "anthro", "blue_eyes", "blue_hair", "blush", "clothed", "clothing", "coat", "confident", "diaper", "domestic_rabbit", "duo", "dutch_rabbit", "expressions", "hair", "hand_holding", "holding_object", "holding_plushie", "holding_toy", "lab_coat", "lagomorph", "leporid", "looking_at_viewer", "mammal", "oryctolagus", "plushie", "rabbit", "relationship", "round_ears", "setting", "teal_eyes", "teddy_bear", "topwear", "touching_diaper", "toy", "vest", "winter_coat"], "stage3_selected": ["blue_eyes", "blue_hair", "coat", "confident", "duo", "dutch_rabbit", "expressions", "hand_holding", "holding_plushie", "holding_toy", "lab_coat", "relationship", "round_ears", "setting", "teal_eyes", "teddy_bear", "touching_diaper", "vest", "winter_coat"], "stage3_selected_scores": {"duo": 0.3628, "blue_eyes": 0.6151, "blue_hair": 0.4202, "hand_holding": 0.4283, "coat": 0.6383, "vest": 0.5028, "teal_eyes": 0.6283, "lab_coat": 0.516, "round_ears": 0.4343, "teddy_bear": 0.5459, "confident": 0.5161, "expressions": 0.5454, "touching_diaper": 0.4638, "holding_plushie": 0.7793, "winter_coat": 0.4759, "dutch_rabbit": 0.4583, "holding_toy": 0.5855, "relationship": 0.6206, "setting": 0.5567}, "stage3_selected_ranks": {"duo": 131, "blue_eyes": 12, "blue_hair": 118, "hand_holding": 112, "coat": 7, "vest": 47, "teal_eyes": 8, "lab_coat": 40, "round_ears": 102, "teddy_bear": 22, "confident": 39, "expressions": 23, "touching_diaper": 75, "holding_plushie": 2, "winter_coat": 66, "dutch_rabbit": 80, "holding_toy": 14, "relationship": 9, "setting": 20}, "stage3_selected_phrase_ranks": {"duo": 3, "blue_eyes": 1, "blue_hair": 8, "hand_holding": 9, "coat": 1, "vest": 6, "teal_eyes": 1, "lab_coat": 5, "round_ears": 10, "teddy_bear": 5, "confident": 7, "expressions": 2, "touching_diaper": 6, "holding_plushie": 1, "winter_coat": 10, "dutch_rabbit": 4, "holding_toy": 4, "relationship": 1, "setting": 1}, "extra_evidence": {"<3": {"source": "probe"}, "blue_hair": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4202}, "coat": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6383}, "confident": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5161}, "diaper": {"source": "implied"}, "domestic_rabbit": {"source": "implied"}, "dutch_rabbit": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4583}, "expressions": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5454}, "hair": {"source": "implied"}, "hand_holding": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4283}, "holding_object": {"source": "implied"}, "holding_plushie": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7793}, "holding_toy": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5855}, "lab_coat": {"source": "stage3", "why": "explicit", "retrieval_score": 0.516}, "looking_at_viewer": {"source": "structural"}, "oryctolagus": {"source": "implied"}, "relationship": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6206}, "round_ears": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4343}, "setting": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5567}, "teddy_bear": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5459}, "topwear": {"source": "implied"}, "touching_diaper": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4638}, "toy": {"source": "implied"}, "vest": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5028}, "winter_coat": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4759}}, "structural": ["duo", "anthro", "clothed", "looking_at_viewer"], "probe": ["clothing", "anthro", "blush", "duo", "<3"], "t1": 2.39, "t2": 3.53, "t3": 32.53, "t3s": 5.33, "t3p": 6.83, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=131 entity=2 copyright_filtered=1 generic_char_to_general=0 unknown_type=2"]}
|
| 5 |
+
{"id": 1624724, "n_gt": 4, "n_retrieved": 124, "n_selected": 30, "n_implied": 5, "n_structural": 5, "n_probe": 3, "ret_R": 0.75, "P": 0.1333, "R": 1.0, "F1": 0.2353, "leaf_P": 0.125, "leaf_R": 0.75, "leaf_F1": 0.2143, "n_leaf_sel": 24, "n_leaf_gt": 4, "ret_P": 0.0242, "sel_given_ret": 1.3333, "over_sel": 7.5, "why": {"explicit": 20}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 3, "calls_with_selection": 3, "calls_exhausted_retries": 0, "attempts_total": 3, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 3, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 55, "attempts_by_n_local": {"60": {"attempts": 2, "parse_ok": 2, "parse_fail": 0, "errors": 0}, "5": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.1333, "gen_R": 1.0, "gen_F1": 0.2353, "missed": [], "extra": ["ambiguous_gender", "anthro", "belly", "covering", "covering_mouth", "feral", "floating", "grinning_at_viewer", "looking_at_viewer", "nose", "nude", "red_spots", "round_eyes", "smiling_at_viewer", "smirk", "smirking_at_viewer", "spots", "spotted_legs", "tan_belly", "tan_chest", "tan_face", "tan_head", "tan_stripes", "toony", "toony_eyes", "wide_eyed"], "ground_truth_tags": ["red_nose", "smile", "solo", "tan_body"], "selected_tags": ["ambiguous_gender", "anthro", "belly", "covering", "covering_mouth", "feral", "floating", "grinning_at_viewer", "looking_at_viewer", "nose", "nude", "red_nose", "red_spots", "round_eyes", "smile", "smiling_at_viewer", "smirk", "smirking_at_viewer", "solo", "spots", "spotted_legs", "tan_belly", "tan_body", "tan_chest", "tan_face", "tan_head", "tan_stripes", "toony", "toony_eyes", "wide_eyed"], "stage3_selected": ["covering_mouth", "floating", "grinning_at_viewer", "nose", "red_nose", "red_spots", "round_eyes", "smile", "smirking_at_viewer", "spotted_legs", "tan_belly", "tan_body", "tan_chest", "tan_face", "tan_head", "tan_stripes", "toony", "toony_eyes", "white_background", "wide_eyed"], "stage3_selected_scores": {"smile": 0.6098, "white_background": 0.6267, "tan_body": 0.6777, "toony": 0.638, "wide_eyed": 0.4762, "red_nose": 0.7461, "floating": 0.6778, "covering_mouth": 0.3954, "tan_belly": 0.6631, "tan_face": 0.7162, "spotted_legs": 0.6719, "tan_chest": 0.7032, "smirking_at_viewer": 0.4548, "red_spots": 0.6307, "round_eyes": 0.8856, "grinning_at_viewer": 0.4958, "tan_head": 0.6682, "tan_stripes": 0.6821, "toony_eyes": 0.3921, "nose": 0.8851}, "stage3_selected_ranks": {"smile": 45, "white_background": 40, "tan_body": 19, "toony": 35, "wide_eyed": 89, "red_nose": 4, "floating": 18, "covering_mouth": 120, "tan_belly": 27, "tan_face": 8, "spotted_legs": 21, "tan_chest": 11, "smirking_at_viewer": 99, "red_spots": 39, "round_eyes": 2, "grinning_at_viewer": 83, "tan_head": 23, "tan_stripes": 15, "toony_eyes": 121, "nose": 3}, "stage3_selected_phrase_ranks": {"smile": 2, "white_background": 1, "tan_body": 6, "toony": 1, "wide_eyed": 8, "red_nose": 1, "floating": 1, "covering_mouth": 10, "tan_belly": 10, "tan_face": 1, "spotted_legs": 5, "tan_chest": 4, "smirking_at_viewer": 8, "red_spots": 10, "round_eyes": 1, "grinning_at_viewer": 6, "tan_head": 8, "tan_stripes": 5, "toony_eyes": 7, "nose": 1}, "extra_evidence": {"ambiguous_gender": {"source": "structural"}, "anthro": {"source": "probe"}, "belly": {"source": "implied"}, "covering": {"source": "implied"}, "covering_mouth": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3954}, "feral": {"source": "structural"}, "floating": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6778}, "grinning_at_viewer": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4958}, "looking_at_viewer": {"source": "structural"}, "nose": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8851}, "nude": {"source": "structural"}, "red_spots": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6307}, "round_eyes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8856}, "smiling_at_viewer": {"source": "implied"}, "smirk": {"source": "implied"}, "smirking_at_viewer": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4548}, "spots": {"source": "implied"}, "spotted_legs": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6719}, "tan_belly": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6631}, "tan_chest": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7032}, "tan_face": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7162}, "tan_head": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6682}, "tan_stripes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6821}, "toony": {"source": "stage3", "why": "explicit", "retrieval_score": 0.638}, "toony_eyes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3921}, "wide_eyed": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4762}}, "structural": ["solo", "feral", "ambiguous_gender", "nude", "looking_at_viewer"], "probe": ["simple_background", "anthro", "solo"], "t1": 4.89, "t2": 1.55, "t3": 21.08, "t3s": 3.65, "t3p": 8.37, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=125 entity=0 copyright_filtered=4 generic_char_to_general=0 unknown_type=5"]}
|
| 6 |
+
{"id": 1325009, "n_gt": 22, "n_retrieved": 10, "n_selected": 11, "n_implied": 3, "n_structural": 5, "n_probe": 3, "ret_R": 0.0, "P": 0.6364, "R": 0.3182, "F1": 0.4242, "leaf_P": 0.125, "leaf_R": 0.0833, "leaf_F1": 0.1, "n_leaf_sel": 8, "n_leaf_gt": 12, "ret_P": 0.0, "sel_given_ret": 0.0, "over_sel": 0.5, "why": {"explicit": 2}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 2, "attempts_by_n_local": {"7": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.6364, "gen_R": 0.3182, "gen_F1": 0.4242, "missed": ["blue_eyes", "bottomwear", "chest_tuft", "countershading", "fur", "hand_on_head", "muscular", "muscular_anthro", "muscular_male", "pantherine", "shorts", "stripes", "tiger", "topless", "tuft"], "extra": ["comic", "doujinshi", "humor", "looking_at_viewer"], "ground_truth_tags": ["anthro", "blue_eyes", "bottomwear", "chest_tuft", "clothed", "clothing", "countershading", "felid", "fur", "hand_on_head", "male", "mammal", "muscular", "muscular_anthro", "muscular_male", "pantherine", "shorts", "solo", "stripes", "tiger", "topless", "tuft"], "selected_tags": ["anthro", "clothed", "clothing", "comic", "doujinshi", "felid", "humor", "looking_at_viewer", "male", "mammal", "solo"], "stage3_selected": ["doujinshi", "humor"], "stage3_selected_scores": {"humor": 0.442, "doujinshi": 0.3981}, "stage3_selected_ranks": {"humor": 3, "doujinshi": 5}, "stage3_selected_phrase_ranks": {"humor": 3, "doujinshi": 5}, "extra_evidence": {"comic": {"source": "implied"}, "doujinshi": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3981}, "humor": {"source": "stage3", "why": "explicit", "retrieval_score": 0.442}, "looking_at_viewer": {"source": "structural"}}, "structural": ["solo", "anthro", "male", "clothed", "looking_at_viewer"], "probe": ["anthro", "felid", "solo"], "t1": 7.53, "t2": 0.08, "t3": 1.75, "t3s": 0.99, "t3p": 1.53, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=7 entity=0 copyright_filtered=3 generic_char_to_general=0 unknown_type=0"]}
|
| 7 |
+
{"id": 1023509, "n_gt": 13, "n_retrieved": 174, "n_selected": 8, "n_implied": 0, "n_structural": 3, "n_probe": 4, "ret_R": 0.6923, "P": 0.125, "R": 0.0769, "F1": 0.0952, "leaf_P": 0.125, "leaf_R": 0.1667, "leaf_F1": 0.1429, "n_leaf_sel": 8, "n_leaf_gt": 6, "ret_P": 0.0517, "sel_given_ret": 0.1111, "over_sel": 0.62, "why": {"explicit": 3}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 3, "calls_with_selection": 2, "calls_exhausted_retries": 1, "attempts_total": 6, "attempt_errors": 4, "attempt_parse_fail": 0, "attempt_parse_ok": 2, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 53, "attempts_by_n_local": {"60": {"attempts": 4, "parse_ok": 1, "parse_fail": 0, "errors": 3}, "48": {"attempts": 2, "parse_ok": 1, "parse_fail": 0, "errors": 1}}, "attempt_failure_rate": 0.6666666666666666, "call_exhaustion_rate": 0.3333333333333333}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.125, "gen_R": 0.0769, "gen_F1": 0.0952, "missed": ["bovid", "caprine", "dialogue", "fur", "goat", "human", "lizard", "mammal", "reptile", "scalie", "white_body", "white_fur"], "extra": ["anthro", "clothing", "darkness", "group", "light", "solo", "threatening"], "ground_truth_tags": ["bovid", "caprine", "dialogue", "fur", "goat", "human", "lizard", "mammal", "reptile", "scalie", "text", "white_body", "white_fur"], "selected_tags": ["anthro", "clothing", "darkness", "group", "light", "solo", "text", "threatening"], "stage3_selected": ["darkness", "light", "threatening"], "stage3_selected_scores": {"light": 0.7785, "threatening": 0.5582, "darkness": 0.8348}, "stage3_selected_ranks": {"light": 4, "threatening": 72, "darkness": 2}, "stage3_selected_phrase_ranks": {"light": 1, "threatening": 4, "darkness": 1}, "extra_evidence": {"anthro": {"source": "probe"}, "clothing": {"source": "probe"}, "darkness": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8348}, "group": {"source": "structural"}, "light": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7785}, "solo": {"source": "structural"}, "threatening": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5582}}, "structural": ["solo", "group", "text"], "probe": ["clothing", "anthro", "text", "group"], "t1": 4.86, "t2": 1.62, "t3": 41.08, "t3s": 1.97, "t3p": 2.29, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=168 entity=4 copyright_filtered=2 generic_char_to_general=0 unknown_type=2", "Stage3 general_chunk_1: attempt 1 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 1, \"why\": \"other\"}, {\"i\": 2, \"why\": \"weak_implied\"}, {\"i\": 3, \"why\": \"other\"}, {\"i\": 4, \"why\": \"explicit\"}, {\"i\": 5, \"why\": \"strong_implied\"}, {\"i\": 6, \"why\": \"style_or_meta\"}, {\"i\": 7, \"why\": \"weak_implied\"}, {\"i\": 8, \"why\": \"style_or_meta\"}, {\"i\": 9, \"why\": \"weak_implied\"}, {\"i\": 10, \"why\": \"weak_implied\"}, {\"i\": 11, \"why\": \"weak_implied\"}, {\"i\": 12, \"why\": \"weak_implied\"}, {\"i\": 13, \"why\": \"weak_implied\"}, {\"i\": 14, \"why\": \"style_or_meta\"}, {\"i\": 15, \"why\": \"weak_implied\"}, {\"i\": 16, \"why\": \"weak_implied\"}, {\"i\": 17, \"why\": \"weak_implied\"}, {\"i\": 18, \"why\": \"weak_implied\"}, {\"i\": 19, \"why\": \"style_or_meta\"}, {\"i\": 20, \"why\": \"weak_implied\"}, {\"i\": 21, \"why\": \"other\"}, {\"i\": 22, \"why\": \"other\"}, {\"i\": 23, \"why\": \"weak_implied\"}, {\"i\": 24, \"why\": \"weak_implied\"}, {\"i\": 25, \"why\": \"strong_implied\"}, {\"i\": 26, \"why\": \"weak_implied\"}, {\"i\": 27, \"why\": \"weak_implied\"}, {\"i\": 28, \"why\": \"weak_implied\"}, {\"i\": 29, \"why\": \"weak_implied\"}, {\"i\": 30, \"why\": \"weak_implied\"}, {\"i\": 31, \"why\": \"weak_implied\"}, {\"i\": 32, \"why\": \"weak_implied\"}, {\"i\": 33, \"why\": \"weak_implied\"}, {\"i\": 34, \"why\": \"style_or_meta\"}, {}]}. Got: 2 validation errors for Stage3SelectionResponse\nselections.34.i\n Field required [type=missing, input_value={}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nselections.34.why\n Field required [type=missing, input_value={}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE ", "Stage3 general_chunk_1: attempt 2 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 1, \"why\": \"other\"}, {\"i\": 2, \"why\": \"weak_implied\"}, {\"i\": 3, \"why\": \"other\"}, {\"i\": 4, \"why\": \"explicit\"}, {\"i\": 5, \"why\": \"strong_implied\"}, {\"i\": 6, \"why\": \"style_or_meta\"}, {\"i\": 7, \"why\": \"weak_implied\"}, {\"i\": 8, \"why\": \"style_or_meta\"}, {\"i\": 9, \"why\": \"weak_implied\"}, {\"i\": 10, \"why\": \"weak_implied\"}, {\"i\": 11, \"why\": \"weak_implied\"}, {\"i\": 12, \"why\": \"explicit\"}, {\"i\": 13, \"why\": \"weak_implied\"}, {\"i\": 14, \"why\": \"style_or_meta\"}, {\"i\": 15, \"why\": \"weak_implied\"}, {\"i\": 16, \"why\": \"weak_implied\"}, {\"i\": 17, \"why\": \"weak_implied\"}, {\"i\": 18, \"why\": \"weak_implied\"}, {\"i\": 19, \"why\": \"style_or_meta\"}, {\"i\": 20, \"why\": \"weak_implied\"}, {\"i\": 21, \"why\": \"other\"}, {\"i\": 22, \"why\": \"other\"}, {\"i\": 23, \"why\": \"weak_implied\"}, {\"i\": 24, \"why\": \"weak_implied\"}, {\"i\": 25, \"why\": \"strong_implied\"}, {\"i\": 26, \"why\": \"weak_implied\"}, {\"i\": 27, \"why\": \"weak_implied\"}, {\"i\": 28, \"why\": \"weak_implied\"}, {\"i\": 29, \"why\": \"weak_implied\"}, {\"i\": 30, \"why\": \"weak_implied\"}, {\"i\": 31, \"why\": \"weak_implied\"}, {\"i\": 32, \"why\": \"weak_implied\"}, {\"i\": 33, \"why\": \"weak_implied\"}, {\"i\": 34, \"why\": \"style_or_meta\"}, {\"i\": 35}]}. Got: 1 validation error for Stage3SelectionResponse\nselections.34.why\n Field required [type=missing, input_value={'i': 35}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE ", "Stage3 general_chunk_1: attempt 3 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 2, \"why\": \"weak_implied\"}, {\"i\": 3, \"why\": \"other\"}, {\"i\": 4, \"why\": \"explicit\"}, {\"i\": 5, \"why\": \"strong_implied\"}, {\"i\": 7, \"why\": \"weak_implied\"}, {\"i\": 8, \"why\": \"style_or_meta\"}, {\"i\": 10, \"why\": \"weak_implied\"}, {\"i\": 11, \"why\": \"weak_implied\"}, {\"i\": 12, \"why\": \"weak_implied\"}, {\"i\": 13, \"why\": \"weak_implied\"}, {\"i\": 14, \"why\": \"style_or_meta\"}, {\"i\": 16, \"why\": \"weak_implied\"}, {\"i\": 17, \"why\": \"weak_implied\"}, {\"i\": 20, \"why\": \"weak_implied\"}, {\"i\": 21, \"why\": \"other\"}, {\"i\": 23, \"why\": \"weak_implied\"}, {\"i\": 24, \"why\": \"weak_implied\"}, {\"i\": 25, \"why\": \"strong_implied\"}, {\"i\": 27, \"why\": \"weak_implied\"}, {\"i\": 28, \"why\": \"weak_implied\"}, {\"i\": 30, \"why\": \"weak_implied\"}, {\"i\": 31, \"why\": \"weak_implied\"}, {\"i\": 33, \"why\": \"weak_implied\"}, {\"i\": 34, \"why\": \"style_or_meta\"}, {\"i\": 35, \"why\": \"weak_implied\"}, {\"i\": 37, \"why\": \"weak_implied\"}, {\"i\": 39, \"why\": \"other\"}, {\"i\": 40, \"why\": \"other\"}, {\"i\": 42, \"why\": \"other\"}, {\"i\": 43, \"why\": \"weak_implied\"}, {\"i\": 45, \"why\": \"weak_implied\"}, {\"i\": 46, \"why\": \"weak_implied\"}, {\"i\": 47, \"why\": \"weak_implied\"}, {\"i\": 48, \"why\": \"other\"}, {\"i\": 49}]}. Got: 1 validation error for Stage3SelectionResponse\nselections.34.why\n Field required [type=missing, input_value={'i': 49}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE ", "Stage3 general_chunk_1: gave up after 3 attempts", "Stage3 general_chunk_2: attempt 1 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 1, \"why\": \"weak_implied\"}, {\"i\": 2, \"why\": \"weak_implied\"}, {\"i\": 3, \"why\": \"weak_implied\"}, {\"i\": 4, \"why\": \"style_or_meta\"}, {\"i\": 5, \"why\": \"weak_implied\"}, {\"i\": 6, \"why\": \"weak_implied\"}, {\"i\": 7, \"why\": \"weak_implied\"}, {\"i\": 8, \"why\": \"weak_implied\"}, {\"i\": 9, \"why\": \"weak_implied\"}, {\"i\": 10, \"why\": \"weak_implied\"}, {\"i\": 11, \"why\": \"weak_implied\"}, {\"i\": 12, \"why\": \"weak_implied\"}, {\"i\": 13, \"why\": \"weak_implied\"}, {\"i\": 14, \"why\": \"weak_implied\"}, {\"i\": 15, \"why\": \"weak_implied\"}, {\"i\": 16, \"why\": \"weak_implied\"}, {\"i\": 17, \"why\": \"weak_implied\"}, {\"i\": 18, \"why\": \"weak_implied\"}, {\"i\": 19, \"why\": \"weak_implied\"}, {\"i\": 20, \"why\": \"weak_implied\"}, {\"i\": 21, \"why\": \"weak_implied\"}, {\"i\": 22, \"why\": \"weak_implied\"}, {\"i\": 23, \"why\": \"weak_implied\"}, {\"i\": 24, \"why\": \"weak_implied\"}, {\"i\": 25, \"why\": \"weak_implied\"}, {\"i\": 26, \"why\": \"weak_implied\"}, {\"i\": 27, \"why\": \"weak_implied\"}, {\"i\": 28, \"why\": \"weak_implied\"}, {\"i\": 29, \"why\": \"weak_implied\"}, {\"i\": 30, \"why\": \"weak_implied\"}, {\"i\": 31, \"why\": \"weak_implied\"}, {\"i\": 32, \"why\": \"weak_implied\"}, {\"i\": 33, \"why\": \"weak_implied\"}, {\"i\": 34}]}. Got: 1 validation error for Stage3SelectionResponse\nselections.33.why\n Field required [type=missing, input_value={'i': 34}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE "]}
|
| 8 |
+
{"id": 335343, "n_gt": 15, "n_retrieved": 208, "n_selected": 49, "n_implied": 8, "n_structural": 3, "n_probe": 3, "ret_R": 0.6667, "P": 0.2245, "R": 0.7333, "F1": 0.3438, "leaf_P": 0.1667, "leaf_R": 0.5, "leaf_F1": 0.25, "n_leaf_sel": 36, "n_leaf_gt": 12, "ret_P": 0.0481, "sel_given_ret": 1.1, "over_sel": 3.27, "why": {"explicit": 38}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 4, "calls_with_selection": 4, "calls_exhausted_retries": 0, "attempts_total": 4, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 4, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 91, "attempts_by_n_local": {"60": {"attempts": 3, "parse_ok": 3, "parse_fail": 0, "errors": 0}, "31": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.2245, "gen_R": 0.7333, "gen_F1": 0.3438, "missed": ["angry", "eyes_closed", "eyeshadow", "sleeping"], "extra": ["3rd_party_watermark", "annoyed_expression", "anthro", "applying_makeup", "artist_logo", "bed_sheet", "bedding", "bedroom", "blanket", "clothing", "comic", "english_text", "expressions", "eyebrows", "eyeliner", "eyes", "green_eyebrows", "half-closed_eyes", "head_on_pillow", "humanoid", "letters", "logo", "looking_down_at_another", "lying_on_bed", "mascara", "narrowed_eyes", "on_bed", "pajamas", "personal_grooming", "purple_theme", "relaxed_expression", "resting", "romantic", "sleepover", "text_box", "under_blanket", "vase", "watermark"], "ground_truth_tags": ["angry", "bed", "blonde_hair", "blue_eyes", "duo", "eyes_closed", "eyeshadow", "furniture", "green_eyes", "hair", "lying", "makeup", "purple_hair", "sleeping", "text"], "selected_tags": ["3rd_party_watermark", "annoyed_expression", "anthro", "applying_makeup", "artist_logo", "bed", "bed_sheet", "bedding", "bedroom", "blanket", "blonde_hair", "blue_eyes", "clothing", "comic", "duo", "english_text", "expressions", "eyebrows", "eyeliner", "eyes", "furniture", "green_eyebrows", "green_eyes", "hair", "half-closed_eyes", "head_on_pillow", "humanoid", "letters", "logo", "looking_down_at_another", "lying", "lying_on_bed", "makeup", "mascara", "narrowed_eyes", "on_bed", "pajamas", "personal_grooming", "purple_hair", "purple_theme", "relaxed_expression", "resting", "romantic", "sleepover", "text", "text_box", "under_blanket", "vase", "watermark"], "stage3_selected": ["3rd_party_watermark", "annoyed_expression", "applying_makeup", "artist_logo", "bed_sheet", "bedding", "bedroom", "blanket", "blonde_hair", "blue_eyes", "comic", "english_text", "expressions", "eyeliner", "eyes", "green_eyebrows", "green_eyes", "hair", "half-closed_eyes", "head_on_pillow", "letters", "looking_down_at_another", "lying", "lying_on_bed", "makeup", "mascara", "pajamas", "purple_hair", "purple_theme", "relaxed_expression", "resting", "romantic", "sleepover", "text", "text_box", "under_blanket", "vase", "watermark"], "stage3_selected_scores": {"hair": 0.6031, "text": 0.6007, "blue_eyes": 0.6014, "lying": 0.4409, "green_eyes": 0.5989, "comic": 0.3867, "blonde_hair": 0.5986, "half-closed_eyes": 0.3951, "purple_hair": 0.5642, "makeup": 0.5965, "watermark": 0.6042, "bedroom": 0.4901, "romantic": 0.3813, "bedding": 0.3909, "bed_sheet": 0.3993, "blanket": 0.4205, "mascara": 0.4462, "eyeliner": 0.4454, "lying_on_bed": 0.4093, "artist_logo": 0.3933, "text_box": 0.3916, "pajamas": 0.4086, "purple_theme": 0.4555, "vase": 0.3521, "resting": 0.5034, "annoyed_expression": 0.7251, "expressions": 0.5439, "head_on_pillow": 0.3887, "green_eyebrows": 0.5014, "looking_down_at_another": 0.4491, "3rd_party_watermark": 0.398, "sleepover": 0.5269, "under_blanket": 0.4281, "letters": 0.3656, "applying_makeup": 0.473, "relaxed_expression": 0.5056, "eyes": 0.8951, "english_text": 0.4189}, "stage3_selected_ranks": {"hair": 5, "text": 8, "blue_eyes": 7, "lying": 79, "green_eyes": 9, "comic": 152, "blonde_hair": 10, "half-closed_eyes": 140, "purple_hair": 14, "makeup": 11, "watermark": 4, "bedroom": 43, "romantic": 157, "bedding": 149, "bed_sheet": 134, "blanket": 98, "mascara": 74, "eyeliner": 76, "lying_on_bed": 116, "artist_logo": 143, "text_box": 146, "pajamas": 120, "purple_theme": 63, "vase": 187, "resting": 33, "annoyed_expression": 2, "expressions": 18, "head_on_pillow": 151, "green_eyebrows": 34, "looking_down_at_another": 69, "3rd_party_watermark": 135, "sleepover": 26, "under_blanket": 91, "letters": 176, "applying_makeup": 55, "relaxed_expression": 32, "eyes": 1, "english_text": 103}, "stage3_selected_phrase_ranks": {"hair": 1, "text": 1, "blue_eyes": 1, "lying": 7, "green_eyes": 1, "comic": 9, "blonde_hair": 1, "half-closed_eyes": 10, "purple_hair": 1, "makeup": 1, "watermark": 1, "bedroom": 1, "romantic": 2, "bedding": 7, "bed_sheet": 5, "blanket": 7, "mascara": 9, "eyeliner": 10, "lying_on_bed": 4, "artist_logo": 7, "text_box": 8, "pajamas": 3, "purple_theme": 10, "vase": 8, "resting": 1, "annoyed_expression": 1, "expressions": 3, "head_on_pillow": 8, "green_eyebrows": 2, "looking_down_at_another": 5, "3rd_party_watermark": 3, "sleepover": 1, "under_blanket": 8, "letters": 9, "applying_makeup": 4, "relaxed_expression": 6, "eyes": 1, "english_text": 4}, "extra_evidence": {"3rd_party_watermark": {"source": "stage3", "why": "explicit", "retrieval_score": 0.398}, "annoyed_expression": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7251}, "anthro": {"source": "probe"}, "applying_makeup": {"source": "stage3", "why": "explicit", "retrieval_score": 0.473}, "artist_logo": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3933}, "bed_sheet": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3993}, "bedding": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3909}, "bedroom": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4901}, "blanket": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4205}, "clothing": {"source": "implied"}, "comic": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3867}, "english_text": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4189}, "expressions": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5439}, "eyebrows": {"source": "implied"}, "eyeliner": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4454}, "eyes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8951}, "green_eyebrows": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5014}, "half-closed_eyes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3951}, "head_on_pillow": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3887}, "humanoid": {"source": "structural"}, "letters": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3656}, "logo": {"source": "implied"}, "looking_down_at_another": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4491}, "lying_on_bed": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4093}, "mascara": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4462}, "narrowed_eyes": {"source": "implied"}, "on_bed": {"source": "implied"}, "pajamas": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4086}, "personal_grooming": {"source": "implied"}, "purple_theme": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4555}, "relaxed_expression": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5056}, "resting": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5034}, "romantic": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3813}, "sleepover": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5269}, "text_box": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3916}, "under_blanket": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4281}, "vase": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3521}, "watermark": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6042}}, "structural": ["duo", "humanoid", "text"], "probe": ["simple_background", "anthro", "duo"], "t1": 3.8, "t2": 1.97, "t3": 26.17, "t3s": 0.65, "t3p": 5.66, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=211 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=4"]}
|
| 9 |
+
{"id": 17482, "n_gt": 22, "n_retrieved": 185, "n_selected": 45, "n_implied": 17, "n_structural": 3, "n_probe": 3, "ret_R": 0.5, "P": 0.3556, "R": 0.7273, "F1": 0.4776, "leaf_P": 0.1818, "leaf_R": 0.3077, "leaf_F1": 0.2286, "n_leaf_sel": 22, "n_leaf_gt": 13, "ret_P": 0.0595, "sel_given_ret": 1.4545, "over_sel": 2.05, "why": {"explicit": 24}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 4, "calls_with_selection": 4, "calls_exhausted_retries": 0, "attempts_total": 4, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 4, "invalid_items_total": 0, "oob_indices_total": 1, "dupe_indices_total": 0, "kept_total": 70, "attempts_by_n_local": {"60": {"attempts": 3, "parse_ok": 3, "parse_fail": 0, "errors": 0}, "5": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.3556, "gen_R": 0.7273, "gen_F1": 0.4776, "missed": ["canine", "fur", "holding_musical_instrument", "holding_object", "music", "spade_tail"], "extra": ["3_claws", "3_fingers", "4_claws", "acoustic_guitar", "blonde_hair", "bottomwear", "curled_hair", "denim", "denim_clothing", "flowing_hair", "hand_gesture", "jeans", "long_tail", "pants", "pastel_background", "percussion_instrument", "playing_guitar", "playing_music", "poster_(object)", "shirt", "shorts", "toe_claws", "topwear", "torn_bottomwear", "torn_jeans", "torn_pants", "torn_shirt", "torn_shorts", "torn_topwear"], "ground_truth_tags": ["anthro", "bass_guitar", "canid", "canine", "claws", "clothed", "clothing", "fingers", "fur", "guitar", "hair", "holding_musical_instrument", "holding_object", "mammal", "music", "musical_instrument", "plucked_string_instrument", "solo", "spade_tail", "string_instrument", "tail", "torn_clothing"], "selected_tags": ["3_claws", "3_fingers", "4_claws", "acoustic_guitar", "anthro", "bass_guitar", "blonde_hair", "bottomwear", "canid", "claws", "clothed", "clothing", "curled_hair", "denim", "denim_clothing", "fingers", "flowing_hair", "guitar", "hair", "hand_gesture", "jeans", "long_tail", "mammal", "musical_instrument", "pants", "pastel_background", "percussion_instrument", "playing_guitar", "playing_music", "plucked_string_instrument", "poster_(object)", "shirt", "shorts", "solo", "string_instrument", "tail", "toe_claws", "topwear", "torn_bottomwear", "torn_clothing", "torn_jeans", "torn_pants", "torn_shirt", "torn_shorts", "torn_topwear"], "stage3_selected": ["3_claws", "3_fingers", "4_claws", "acoustic_guitar", "bass_guitar", "blonde_hair", "curled_hair", "flowing_hair", "hand_gesture", "long_tail", "pastel_background", "percussion_instrument", "playing_guitar", "plucked_string_instrument", "poster_(object)", "string_instrument", "tail", "toe_claws", "torn_bottomwear", "torn_clothing", "torn_jeans", "torn_shirt", "torn_shorts", "torn_topwear"], "stage3_selected_scores": {"tail": 0.5659, "blonde_hair": 0.382, "toe_claws": 0.4913, "torn_clothing": 0.4132, "long_tail": 0.4222, "3_fingers": 0.4085, "string_instrument": 0.8617, "torn_bottomwear": 0.4362, "plucked_string_instrument": 0.8658, "curled_hair": 0.3875, "torn_topwear": 0.3945, "torn_shirt": 0.4049, "playing_guitar": 0.9317, "torn_jeans": 0.4824, "percussion_instrument": 0.8503, "3_claws": 0.4377, "hand_gesture": 0.4013, "torn_shorts": 0.3996, "bass_guitar": 0.9118, "flowing_hair": 0.5669, "4_claws": 0.4516, "poster_(object)": 0.4455, "acoustic_guitar": 0.8654, "pastel_background": 0.5632}, "stage3_selected_ranks": {"tail": 21, "blonde_hair": 147, "toe_claws": 38, "torn_clothing": 112, "long_tail": 104, "3_fingers": 117, "string_instrument": 8, "torn_bottomwear": 85, "plucked_string_instrument": 6, "curled_hair": 142, "torn_topwear": 131, "torn_shirt": 119, "playing_guitar": 2, "torn_jeans": 44, "percussion_instrument": 9, "3_claws": 83, "hand_gesture": 121, "torn_shorts": 126, "bass_guitar": 3, "flowing_hair": 20, "4_claws": 70, "poster_(object)": 76, "acoustic_guitar": 7, "pastel_background": 22}, "stage3_selected_phrase_ranks": {"tail": 1, "blonde_hair": 6, "toe_claws": 2, "torn_clothing": 6, "long_tail": 5, "3_fingers": 8, "string_instrument": 7, "torn_bottomwear": 3, "plucked_string_instrument": 5, "curled_hair": 5, "torn_topwear": 10, "torn_shirt": 7, "playing_guitar": 1, "torn_jeans": 1, "percussion_instrument": 7, "3_claws": 7, "hand_gesture": 9, "torn_shorts": 8, "bass_guitar": 2, "flowing_hair": 1, "4_claws": 4, "poster_(object)": 1, "acoustic_guitar": 5, "pastel_background": 1}, "extra_evidence": {"3_claws": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4377}, "3_fingers": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4085}, "4_claws": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4516}, "acoustic_guitar": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8654}, "blonde_hair": {"source": "stage3", "why": "explicit", "retrieval_score": 0.382}, "bottomwear": {"source": "implied"}, "curled_hair": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3875}, "denim": {"source": "implied"}, "denim_clothing": {"source": "implied"}, "flowing_hair": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5669}, "hand_gesture": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4013}, "jeans": {"source": "implied"}, "long_tail": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4222}, "pants": {"source": "implied"}, "pastel_background": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5632}, "percussion_instrument": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8503}, "playing_guitar": {"source": "stage3", "why": "explicit", "retrieval_score": 0.9317}, "playing_music": {"source": "implied"}, "poster_(object)": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4455}, "shirt": {"source": "implied"}, "shorts": {"source": "implied"}, "toe_claws": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4913}, "topwear": {"source": "implied"}, "torn_bottomwear": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4362}, "torn_jeans": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4824}, "torn_pants": {"source": "implied"}, "torn_shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4049}, "torn_shorts": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3996}, "torn_topwear": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3945}}, "structural": ["solo", "anthro", "clothed"], "probe": ["anthro", "canid", "solo"], "t1": 2.75, "t2": 1.86, "t3": 26.63, "t3s": 0.95, "t3p": 1.28, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=185 entity=2 copyright_filtered=4 generic_char_to_general=0 unknown_type=2", "Stage3 general_chunk_2: candidates (local indices):\n1. auburn hair\n2. pointing\n3. red sclera\n4. studded bracelet\n5. tail ring\n6. torn clothing\n7. ethiopian wolf\n8. inspired by formal art\n9. fire\n10. star-shaped background\n11. 3 claws\n12. wanted poster\n13. bulldog\n14. crosslegged pose\n15. angry expression\n16. 3 fingers\n17. big hair\n18. string instrument\n19. hair tie\n20. head horn\n21. ring\n22. holding tail\n23. torn shirt\n24. coywolf\n25. attack\n26. pun\n27. starry background\n28. long claws\n29. lined paper\n30. rottweiler\n31. lotus pose\n32. annoyed expression\n33. fingerpads\n34. hairclip\n35. percussion instrument\n36. hair dye\n37. demon humanoid\n38. drawstring\n39. big tail\n40. torn shorts\n41. coyote\n42. escape\n43. business attire\n44. sky background\n45. big claws\n46. warning sign\n47. fighting pose\n48. hand on chin\n49. wavy hair\n50. musical instrument\n51. blue hair\n52. short horn\n53. ear piercing\n54. scaly tail\n55. torn body\n56. abyssal wolf\n57. burning\n58. transparent background\n59. digitigrade\n60. contrapposto"]}
|
| 10 |
+
{"id": 2021552, "n_gt": 25, "n_retrieved": 151, "n_selected": 28, "n_implied": 14, "n_structural": 4, "n_probe": 3, "ret_R": 0.6, "P": 0.5714, "R": 0.64, "F1": 0.6038, "leaf_P": 0.3846, "leaf_R": 0.3333, "leaf_F1": 0.3571, "n_leaf_sel": 13, "n_leaf_gt": 15, "ret_P": 0.0993, "sel_given_ret": 1.0667, "over_sel": 1.12, "why": {"explicit": 11}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 3, "calls_with_selection": 3, "calls_exhausted_retries": 0, "attempts_total": 4, "attempt_errors": 1, "attempt_parse_fail": 0, "attempt_parse_ok": 3, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 89, "attempts_by_n_local": {"60": {"attempts": 3, "parse_ok": 2, "parse_fail": 0, "errors": 1}, "31": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.25, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.5714, "gen_R": 0.64, "gen_F1": 0.6038, "missed": ["claws", "crossed_arms", "facial_markings", "fur", "head_markings", "looking_at_another", "markings", "overalls", "standing"], "extra": ["actual_fur", "black_bottomwear", "black_clothing", "black_pants", "blue_clothing", "blue_overalls", "corsac_fox", "looking_at_viewer", "t-shirt", "white_clothing", "white_shirt", "white_topwear"], "ground_truth_tags": ["anthro", "bottomwear", "canid", "canine", "claws", "clothed", "clothing", "crossed_arms", "duo", "facial_markings", "fox", "fur", "grey_background", "head_markings", "lagomorph", "leporid", "looking_at_another", "mammal", "markings", "overalls", "pants", "rabbit", "shirt", "standing", "topwear"], "selected_tags": ["actual_fur", "anthro", "black_bottomwear", "black_clothing", "black_pants", "blue_clothing", "blue_overalls", "bottomwear", "canid", "canine", "clothed", "clothing", "corsac_fox", "duo", "fox", "grey_background", "lagomorph", "leporid", "looking_at_viewer", "mammal", "pants", "rabbit", "shirt", "t-shirt", "topwear", "white_clothing", "white_shirt", "white_topwear"], "stage3_selected": ["actual_fur", "black_pants", "blue_clothing", "blue_overalls", "corsac_fox", "fox", "grey_background", "rabbit", "simple_background", "t-shirt", "white_shirt"], "stage3_selected_scores": {"simple_background": 0.416, "fox": 0.638, "rabbit": 0.6511, "grey_background": 0.6784, "blue_clothing": 0.6538, "t-shirt": 0.724, "white_shirt": 0.8197, "black_pants": 0.833, "corsac_fox": 0.4193, "blue_overalls": 0.9203, "actual_fur": 0.4837}, "stage3_selected_ranks": {"simple_background": 153, "fox": 46, "rabbit": 41, "grey_background": 30, "blue_clothing": 39, "t-shirt": 22, "white_shirt": 4, "black_pants": 3, "corsac_fox": 152, "blue_overalls": 1, "actual_fur": 137}, "stage3_selected_phrase_ranks": {"simple_background": 8, "fox": 1, "rabbit": 1, "grey_background": 1, "blue_clothing": 8, "t-shirt": 2, "white_shirt": 1, "black_pants": 1, "corsac_fox": 9, "blue_overalls": 1, "actual_fur": 10}, "extra_evidence": {"actual_fur": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4837}, "black_bottomwear": {"source": "implied"}, "black_clothing": {"source": "implied"}, "black_pants": {"source": "stage3", "why": "explicit", "retrieval_score": 0.833}, "blue_clothing": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6538}, "blue_overalls": {"source": "stage3", "why": "explicit", "retrieval_score": 0.9203}, "corsac_fox": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4193}, "looking_at_viewer": {"source": "structural"}, "t-shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.724}, "white_clothing": {"source": "implied"}, "white_shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8197}, "white_topwear": {"source": "implied"}}, "structural": ["duo", "anthro", "clothed", "looking_at_viewer"], "probe": ["simple_background", "anthro", "duo"], "t1": 1.59, "t2": 1.41, "t3": 44.32, "t3s": 1.83, "t3p": 6.61, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=151 entity=5 copyright_filtered=0 generic_char_to_general=0 unknown_type=3", "Stage3 general_chunk_1: attempt 1 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 1, \"why\": \"weak_implied\"}, {\"i\": 3, \"why\": \"weak_implied\"}, {\"i\": 4, \"why\": \"weak_implied\"}, {\"i\": 5, \"why\": \"weak_implied\"}, {\"i\": 6, \"why\": \"style_or_meta\"}, {\"i\": 8, \"why\": \"weak_implied\"}, {\"i\": 9, \"why\": \"weak_implied\"}, {\"i\": 10, \"why\": \"weak_implied\"}, {\"i\": 11, \"why\": \"weak_implied\"}, {\"i\": 13, \"why\": \"weak_implied\"}, {\"i\": 14, \"why\": \"style_or_meta\"}, {\"i\": 15, \"why\": \"weak_implied\"}, {\"i\": 17, \"why\": \"weak_implied\"}, {\"i\": 18, \"why\": \"weak_implied\"}, {\"i\": 20, \"why\": \"weak_implied\"}, {\"i\": 21, \"why\": \"weak_implied\"}, {\"i\": 23, \"why\": \"weak_implied\"}, {\"i\": 24, \"why\": \"weak_implied\"}, {\"i\": 26, \"why\": \"weak_implied\"}, {\"i\": 28, \"why\": \"style_or_meta\"}, {\"i\": 29, \"why\": \"weak_implied\"}, {\"i\": 31, \"why\": \"style_or_meta\"}, {\"i\": 32, \"why\": \"weak_implied\"}, {\"i\": 33, \"why\": \"weak_implied\"}, {\"i\": 35, \"why\": \"weak_implied\"}, {\"i\": 36, \"why\": \"weak_implied\"}, {\"i\": 37, \"why\": \"weak_implied\"}, {\"i\": 39, \"why\": \"weak_implied\"}, {\"i\": 40, \"why\": \"weak_implied\"}, {\"i\": 41, \"why\": \"weak_implied\"}, {\"i\": 42, \"why\": \"other\"}, {\"i\": 43, \"why\": \"style_or_meta\"}, {\"i\": 44, \"why\": \"weak_implied\"}, {\"i\": 45}]}. Got: 1 validation error for Stage3SelectionResponse\nselections.33.why\n Field required [type=missing, input_value={'i': 45}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE "]}
|
| 11 |
+
{"id": 2034167, "n_gt": 11, "n_retrieved": 204, "n_selected": 86, "n_implied": 26, "n_structural": 4, "n_probe": 3, "ret_R": 0.6364, "P": 0.1163, "R": 0.9091, "F1": 0.2062, "leaf_P": 0.08, "leaf_R": 0.5714, "leaf_F1": 0.1404, "n_leaf_sel": 50, "n_leaf_gt": 7, "ret_P": 0.0343, "sel_given_ret": 1.4286, "over_sel": 7.82, "why": {"explicit": 52, "strong_implied": 3}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 4, "calls_with_selection": 4, "calls_exhausted_retries": 0, "attempts_total": 4, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 4, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 108, "attempts_by_n_local": {"60": {"attempts": 3, "parse_ok": 3, "parse_fail": 0, "errors": 0}, "25": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.1163, "gen_R": 0.9091, "gen_F1": 0.2062, "missed": ["purple_body"], "extra": ["abs", "action_pose", "animal_humanoid", "anthro", "belly", "big_eyes", "blue_fingers", "blue_pawpads", "blue_paws", "blue_stripes", "blue_toes", "blue_tuft", "bovid", "bovid_humanoid", "canid_humanoid", "canine_humanoid", "caprine", "caprine_humanoid", "clothed", "clothing", "curved_tail", "expression_sheet", "facial_markings", "facial_stripes", "fennec_humanoid", "fluffy_fur", "fox_humanoid", "goat_humanoid", "head_markings", "heterochromia", "humanoid", "jumper", "jumping", "light_tail", "male", "male_humanoid", "mammal_humanoid", "markings", "mouth_closed", "multicolored_body", "multicolored_fur", "multicolored_tail", "muscular", "muscular_male", "open_smile", "pawpads", "pig_humanoid", "pink_ears", "pink_stripes", "pink_tail", "pink_tongue", "pose", "posed", "purple_belly", "smile", "striped_face", "striped_neck", "stripes", "suid", "suid_humanoid", "suina", "suina_humanoid", "tail", "tailed_humanoid", "tanuki_humanoid", "thin_tail", "tongue", "tongue_out", "two_tone_body", "two_tone_fur", "two_tone_tail", "walking", "wavy_tail", "white_tail", "wolf_humanoid", "x_eyes"], "ground_truth_tags": ["blue_eyes", "blue_nose", "canid", "canine", "fur", "mammal", "open_mouth", "purple_body", "solo", "white_body", "white_fur"], "selected_tags": ["abs", "action_pose", "animal_humanoid", "anthro", "belly", "big_eyes", "blue_eyes", "blue_fingers", "blue_nose", "blue_pawpads", "blue_paws", "blue_stripes", "blue_toes", "blue_tuft", "bovid", "bovid_humanoid", "canid", "canid_humanoid", "canine", "canine_humanoid", "caprine", "caprine_humanoid", "clothed", "clothing", "curved_tail", "expression_sheet", "facial_markings", "facial_stripes", "fennec_humanoid", "fluffy_fur", "fox_humanoid", "fur", "goat_humanoid", "head_markings", "heterochromia", "humanoid", "jumper", "jumping", "light_tail", "male", "male_humanoid", "mammal", "mammal_humanoid", "markings", "mouth_closed", "multicolored_body", "multicolored_fur", "multicolored_tail", "muscular", "muscular_male", "open_mouth", "open_smile", "pawpads", "pig_humanoid", "pink_ears", "pink_stripes", "pink_tail", "pink_tongue", "pose", "posed", "purple_belly", "smile", "solo", "striped_face", "striped_neck", "stripes", "suid", "suid_humanoid", "suina", "suina_humanoid", "tail", "tailed_humanoid", "tanuki_humanoid", "thin_tail", "tongue", "tongue_out", "two_tone_body", "two_tone_fur", "two_tone_tail", "walking", "wavy_tail", "white_body", "white_fur", "white_tail", "wolf_humanoid", "x_eyes"], "stage3_selected": ["abs", "action_pose", "big_eyes", "blue_eyes", "blue_fingers", "blue_nose", "blue_pawpads", "blue_paws", "blue_stripes", "blue_toes", "blue_tuft", "canine_humanoid", "curved_tail", "expression_sheet", "facial_stripes", "fennec_humanoid", "fluffy_fur", "goat_humanoid", "heterochromia", "jumper", "jumping", "light_tail", "male_humanoid", "mouth_closed", "multicolored_fur", "multicolored_tail", "muscular", "muscular_male", "open_mouth", "open_smile", "pig_humanoid", "pink_ears", "pink_stripes", "pink_tail", "pink_tongue", "pose", "posed", "purple_belly", "striped_face", "striped_neck", "suina_humanoid", "tail", "tailed_humanoid", "tanuki_humanoid", "thin_tail", "tongue_out", "two_tone_fur", "two_tone_tail", "walking", "wavy_tail", "white_body", "white_fur", "white_tail", "wolf_humanoid", "x_eyes"], "stage3_selected_scores": {"open_mouth": 0.6008, "tail": 0.6107, "white_body": 0.4875, "tongue_out": 0.3536, "blue_eyes": 0.5995, "white_fur": 0.5995, "muscular": 0.3548, "muscular_male": 0.3102, "multicolored_fur": 0.4995, "abs": 0.3223, "two_tone_fur": 0.4901, "open_smile": 0.4868, "multicolored_tail": 0.4718, "pink_tongue": 0.4215, "canine_humanoid": 0.9003, "white_tail": 0.5202, "heterochromia": 0.4423, "two_tone_tail": 0.5197, "blue_nose": 0.6032, "big_eyes": 0.4207, "mouth_closed": 0.5218, "walking": 0.3534, "pink_tail": 0.5444, "blue_pawpads": 0.4891, "wolf_humanoid": 0.819, "pink_ears": 0.5255, "blue_stripes": 0.6748, "thin_tail": 0.5604, "x_eyes": 0.3999, "goat_humanoid": 0.5534, "blue_paws": 0.4986, "light_tail": 0.5671, "striped_face": 0.5807, "tanuki_humanoid": 0.7574, "expression_sheet": 0.4555, "pink_stripes": 0.682, "tailed_humanoid": 0.5525, "wavy_tail": 0.5224, "fluffy_fur": 0.5593, "curved_tail": 0.637, "male_humanoid": 0.5627, "purple_belly": 0.5454, "striped_neck": 0.5948, "blue_fingers": 0.5077, "blue_toes": 0.5148, "suina_humanoid": 0.563, "blue_tuft": 0.5037, "facial_stripes": 0.5968, "pig_humanoid": 0.5894, "fennec_humanoid": 0.7741, "posed": 0.4484, "jumper": 0.4077, "pose": 0.6199, "action_pose": 0.617, "jumping": 0.6014}, "stage3_selected_ranks": {"open_mouth": 28, "tail": 23, "white_body": 114, "tongue_out": 194, "blue_eyes": 29, "white_fur": 30, "muscular": 193, "muscular_male": 202, "multicolored_fur": 100, "abs": 199, "two_tone_fur": 108, "open_smile": 116, "multicolored_tail": 131, "pink_tongue": 166, "canine_humanoid": 1, "white_tail": 82, "heterochromia": 158, "two_tone_tail": 83, "blue_nose": 26, "big_eyes": 167, "mouth_closed": 81, "walking": 195, "pink_tail": 62, "blue_pawpads": 109, "wolf_humanoid": 4, "pink_ears": 75, "blue_stripes": 14, "thin_tail": 53, "x_eyes": 177, "goat_humanoid": 58, "blue_paws": 102, "light_tail": 48, "striped_face": 43, "tanuki_humanoid": 8, "expression_sheet": 147, "pink_stripes": 13, "tailed_humanoid": 59, "wavy_tail": 79, "fluffy_fur": 54, "curved_tail": 16, "male_humanoid": 52, "purple_belly": 61, "striped_neck": 35, "blue_fingers": 93, "blue_toes": 87, "suina_humanoid": 51, "blue_tuft": 96, "facial_stripes": 33, "pig_humanoid": 37, "fennec_humanoid": 6, "posed": 154, "jumper": 173, "pose": 18, "action_pose": 19, "jumping": 27}, "stage3_selected_phrase_ranks": {"open_mouth": 1, "tail": 1, "white_body": 8, "tongue_out": 10, "blue_eyes": 1, "white_fur": 1, "muscular": 6, "muscular_male": 8, "multicolored_fur": 6, "abs": 7, "two_tone_fur": 7, "open_smile": 2, "multicolored_tail": 9, "pink_tongue": 3, "canine_humanoid": 1, "white_tail": 7, "heterochromia": 6, "two_tone_tail": 8, "blue_nose": 1, "big_eyes": 6, "mouth_closed": 3, "walking": 10, "pink_tail": 1, "blue_pawpads": 10, "wolf_humanoid": 4, "pink_ears": 4, "blue_stripes": 1, "thin_tail": 5, "x_eyes": 9, "goat_humanoid": 8, "blue_paws": 9, "light_tail": 4, "striped_face": 5, "tanuki_humanoid": 7, "expression_sheet": 8, "pink_stripes": 1, "tailed_humanoid": 9, "wavy_tail": 7, "fluffy_fur": 2, "curved_tail": 1, "male_humanoid": 7, "purple_belly": 4, "striped_neck": 4, "blue_fingers": 5, "blue_toes": 4, "suina_humanoid": 6, "blue_tuft": 7, "facial_stripes": 3, "pig_humanoid": 3, "fennec_humanoid": 6, "posed": 9, "jumper": 3, "pose": 1, "action_pose": 1, "jumping": 1}, "extra_evidence": {"abs": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3223}, "action_pose": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.617}, "animal_humanoid": {"source": "implied"}, "anthro": {"source": "structural"}, "belly": {"source": "implied"}, "big_eyes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4207}, "blue_fingers": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5077}, "blue_pawpads": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4891}, "blue_paws": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4986}, "blue_stripes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6748}, "blue_toes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5148}, "blue_tuft": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5037}, "bovid": {"source": "implied"}, "bovid_humanoid": {"source": "implied"}, "canid_humanoid": {"source": "implied"}, "canine_humanoid": {"source": "stage3", "why": "explicit", "retrieval_score": 0.9003}, "caprine": {"source": "implied"}, "caprine_humanoid": {"source": "implied"}, "clothed": {"source": "structural"}, "clothing": {"source": "implied"}, "curved_tail": {"source": "stage3", "why": "explicit", "retrieval_score": 0.637}, "expression_sheet": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4555}, "facial_markings": {"source": "implied"}, "facial_stripes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5968}, "fennec_humanoid": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7741}, "fluffy_fur": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5593}, "fox_humanoid": {"source": "implied"}, "goat_humanoid": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5534}, "head_markings": {"source": "implied"}, "heterochromia": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4423}, "humanoid": {"source": "implied"}, "jumper": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4077}, "jumping": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.6014}, "light_tail": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5671}, "male": {"source": "structural"}, "male_humanoid": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5627}, "mammal_humanoid": {"source": "implied"}, "markings": {"source": "implied"}, "mouth_closed": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5218}, "multicolored_body": {"source": "implied"}, "multicolored_fur": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4995}, "multicolored_tail": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4718}, "muscular": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3548}, "muscular_male": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3102}, "open_smile": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4868}, "pawpads": {"source": "implied"}, "pig_humanoid": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5894}, "pink_ears": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5255}, "pink_stripes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.682}, "pink_tail": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5444}, "pink_tongue": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4215}, "pose": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.6199}, "posed": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4484}, "purple_belly": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5454}, "smile": {"source": "implied"}, "striped_face": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5807}, "striped_neck": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5948}, "stripes": {"source": "implied"}, "suid": {"source": "implied"}, "suid_humanoid": {"source": "implied"}, "suina": {"source": "implied"}, "suina_humanoid": {"source": "stage3", "why": "explicit", "retrieval_score": 0.563}, "tail": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6107}, "tailed_humanoid": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5525}, "tanuki_humanoid": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7574}, "thin_tail": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5604}, "tongue": {"source": "implied"}, "tongue_out": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3536}, "two_tone_body": {"source": "implied"}, "two_tone_fur": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4901}, "two_tone_tail": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5197}, "walking": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3534}, "wavy_tail": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5224}, "white_tail": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5202}, "wolf_humanoid": {"source": "stage3", "why": "explicit", "retrieval_score": 0.819}, "x_eyes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3999}}, "structural": ["solo", "anthro", "male", "clothed"], "probe": ["anthro", "canid", "solo"], "t1": 1.31, "t2": 2.03, "t3": 85.25, "t3s": 1.97, "t3p": 0.96, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=205 entity=4 copyright_filtered=2 generic_char_to_general=0 unknown_type=5"]}
|
data/eval_results/latency_baseline_seed43.jsonl
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"_meta": true, "timestamp": "2026-03-02T06:02:06.240469", "n_samples": 10, "caption_field": "caption_cogvlm", "skip_rewrite": false, "allow_nsfw": false, "mode": "chunked_map_union", "chunk_size": 60, "eval_path": "data/eval_samples/e621_sfw_sample_1000_seed123_buffer10000_caption_evident.jsonl", "per_phrase_k": 2, "per_phrase_final_k": 10, "temperature": 0.0, "shuffle": false, "seed": 43, "workers": 4, "min_why": "strong_implied", "expand_implications": true, "infer_structural": true, "infer_probe": true, "n_errors": 0, "n_issue_samples": 10, "n_issues_total": 27}
|
| 2 |
+
{"id": 3285630, "n_gt": 12, "n_retrieved": 142, "n_selected": 55, "n_implied": 22, "n_structural": 0, "n_probe": 5, "ret_R": 0.3333, "P": 0.1455, "R": 0.6667, "F1": 0.2388, "leaf_P": 0.08, "leaf_R": 0.2222, "leaf_F1": 0.1176, "n_leaf_sel": 25, "n_leaf_gt": 9, "ret_P": 0.0282, "sel_given_ret": 2.0, "over_sel": 4.58, "why": {"explicit": 29}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 3, "calls_with_selection": 3, "calls_exhausted_retries": 0, "attempts_total": 3, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 3, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 53, "attempts_by_n_local": {"60": {"attempts": 2, "parse_ok": 2, "parse_fail": 0, "errors": 0}, "21": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.1455, "gen_R": 0.6667, "gen_F1": 0.2388, "missed": ["alpha_channel", "clothed", "fingers", "male"], "extra": ["beer_mug", "beverage", "black_body", "black_clothing", "black_fur", "black_shirt", "black_topwear", "bobcat", "business_attire", "business_suit", "businesswear", "coffee_cup", "coffee_mug", "container", "cup", "domestic_cat", "dress_shirt", "felis", "grey_clothing", "grey_shirt", "grey_topwear", "hair_bun", "holding_beverage", "holding_container", "holding_cup", "holding_mug", "holding_object", "lynx", "mug", "pockets", "shirt", "siamese", "suit", "suit_jacket", "sweater", "sweater_vest", "t-shirt", "teal_shirt", "teal_topwear", "text", "topwear", "vest", "white_clothing", "white_dress_shirt", "white_shirt", "white_t-shirt", "white_topwear"], "ground_truth_tags": ["alpha_channel", "anthro", "clothed", "clothing", "felid", "feline", "fingers", "fur", "hair", "male", "mammal", "solo"], "selected_tags": ["anthro", "beer_mug", "beverage", "black_body", "black_clothing", "black_fur", "black_shirt", "black_topwear", "bobcat", "business_attire", "business_suit", "businesswear", "clothing", "coffee_cup", "coffee_mug", "container", "cup", "domestic_cat", "dress_shirt", "felid", "feline", "felis", "fur", "grey_clothing", "grey_shirt", "grey_topwear", "hair", "hair_bun", "holding_beverage", "holding_container", "holding_cup", "holding_mug", "holding_object", "lynx", "mammal", "mug", "pockets", "shirt", "siamese", "solo", "suit", "suit_jacket", "sweater", "sweater_vest", "t-shirt", "teal_shirt", "teal_topwear", "text", "topwear", "vest", "white_clothing", "white_dress_shirt", "white_shirt", "white_t-shirt", "white_topwear"], "stage3_selected": ["anthro", "beer_mug", "black_fur", "black_shirt", "bobcat", "business_attire", "business_suit", "businesswear", "coffee_cup", "coffee_mug", "dress_shirt", "feline", "grey_shirt", "hair_bun", "holding_beverage", "holding_container", "holding_cup", "holding_mug", "lynx", "pockets", "shirt", "siamese", "suit_jacket", "sweater_vest", "teal_shirt", "vest", "white_dress_shirt", "white_shirt", "white_t-shirt"], "stage3_selected_scores": {"anthro": 0.4929, "feline": 0.7061, "shirt": 0.7997, "black_fur": 0.7182, "lynx": 0.4992, "vest": 0.8403, "white_shirt": 0.738, "holding_container": 0.7598, "black_shirt": 0.7352, "dress_shirt": 0.7241, "pockets": 0.6589, "holding_cup": 0.7667, "hair_bun": 0.6926, "siamese": 0.6226, "holding_beverage": 0.772, "coffee_mug": 0.7055, "grey_shirt": 0.7582, "business_suit": 0.5745, "coffee_cup": 0.6863, "bobcat": 0.5768, "sweater_vest": 0.7532, "holding_mug": 0.9159, "beer_mug": 0.6598, "white_t-shirt": 0.7329, "suit_jacket": 0.6863, "businesswear": 0.5715, "white_dress_shirt": 0.6881, "business_attire": 0.5657, "teal_shirt": 0.7474}, "stage3_selected_ranks": {"anthro": 120, "feline": 44, "shirt": 5, "black_fur": 35, "lynx": 117, "vest": 3, "white_shirt": 22, "holding_container": 11, "black_shirt": 25, "dress_shirt": 33, "pockets": 60, "holding_cup": 10, "hair_bun": 51, "siamese": 71, "holding_beverage": 8, "coffee_mug": 45, "grey_shirt": 13, "business_suit": 85, "coffee_cup": 54, "bobcat": 82, "sweater_vest": 16, "holding_mug": 1, "beer_mug": 59, "white_t-shirt": 26, "suit_jacket": 53, "businesswear": 87, "white_dress_shirt": 52, "business_attire": 89, "teal_shirt": 19}, "stage3_selected_phrase_ranks": {"anthro": 9, "feline": 1, "shirt": 1, "black_fur": 1, "lynx": 6, "vest": 1, "white_shirt": 4, "holding_container": 5, "black_shirt": 8, "dress_shirt": 2, "pockets": 3, "holding_cup": 4, "hair_bun": 1, "siamese": 5, "holding_beverage": 3, "coffee_mug": 3, "grey_shirt": 1, "business_suit": 8, "coffee_cup": 4, "bobcat": 6, "sweater_vest": 3, "holding_mug": 1, "beer_mug": 5, "white_t-shirt": 3, "suit_jacket": 4, "businesswear": 9, "white_dress_shirt": 5, "business_attire": 1, "teal_shirt": 4}, "extra_evidence": {"beer_mug": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6598}, "beverage": {"source": "implied"}, "black_body": {"source": "implied"}, "black_clothing": {"source": "implied"}, "black_fur": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7182}, "black_shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7352}, "black_topwear": {"source": "implied"}, "bobcat": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5768}, "business_attire": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5657}, "business_suit": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5745}, "businesswear": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5715}, "coffee_cup": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6863}, "coffee_mug": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7055}, "container": {"source": "implied"}, "cup": {"source": "implied"}, "domestic_cat": {"source": "implied"}, "dress_shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7241}, "felis": {"source": "implied"}, "grey_clothing": {"source": "implied"}, "grey_shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7582}, "grey_topwear": {"source": "implied"}, "hair_bun": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6926}, "holding_beverage": {"source": "stage3", "why": "explicit", "retrieval_score": 0.772}, "holding_container": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7598}, "holding_cup": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7667}, "holding_mug": {"source": "stage3", "why": "explicit", "retrieval_score": 0.9159}, "holding_object": {"source": "implied"}, "lynx": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4992}, "mug": {"source": "implied"}, "pockets": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6589}, "shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7997}, "siamese": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6226}, "suit": {"source": "implied"}, "suit_jacket": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6863}, "sweater": {"source": "implied"}, "sweater_vest": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7532}, "t-shirt": {"source": "implied"}, "teal_shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7474}, "teal_topwear": {"source": "implied"}, "text": {"source": "probe"}, "topwear": {"source": "implied"}, "vest": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8403}, "white_clothing": {"source": "implied"}, "white_dress_shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6881}, "white_shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.738}, "white_t-shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7329}, "white_topwear": {"source": "implied"}}, "structural": [], "probe": ["clothing", "anthro", "text", "felid", "solo"], "t1": 3.66, "t2": 1.76, "t3": 34.39, "t3s": 5.18, "t3p": 12.81, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=141 entity=1 copyright_filtered=1 generic_char_to_general=0 unknown_type=1"]}
|
| 3 |
+
{"id": 260449, "n_gt": 14, "n_retrieved": 160, "n_selected": 26, "n_implied": 4, "n_structural": 6, "n_probe": 6, "ret_R": 0.5714, "P": 0.3846, "R": 0.7143, "F1": 0.5, "leaf_P": 0.125, "leaf_R": 0.2, "leaf_F1": 0.1538, "n_leaf_sel": 16, "n_leaf_gt": 10, "ret_P": 0.05, "sel_given_ret": 1.25, "over_sel": 1.86, "why": {"explicit": 10, "strong_implied": 4}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 3, "calls_with_selection": 3, "calls_exhausted_retries": 0, "attempts_total": 3, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 3, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 69, "attempts_by_n_local": {"60": {"attempts": 2, "parse_ok": 2, "parse_fail": 0, "errors": 0}, "41": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.3846, "gen_R": 0.7143, "gen_F1": 0.5, "missed": ["fur", "hair", "human", "male"], "extra": ["anthro", "belly_dancer_outfit", "bottomwear", "dancewear", "duo", "gorilla", "grinning_at_viewer", "humanoid", "loincloth", "monkey", "raised_arms", "spread_arms", "sun_bear", "topless", "trio", "ursine"], "ground_truth_tags": ["ape", "bear", "clothed", "clothing", "dancing", "fur", "group", "hair", "haplorhine", "human", "looking_at_viewer", "male", "mammal", "primate"], "selected_tags": ["anthro", "ape", "bear", "belly_dancer_outfit", "bottomwear", "clothed", "clothing", "dancewear", "dancing", "duo", "gorilla", "grinning_at_viewer", "group", "haplorhine", "humanoid", "loincloth", "looking_at_viewer", "mammal", "monkey", "primate", "raised_arms", "spread_arms", "sun_bear", "topless", "trio", "ursine"], "stage3_selected": ["ape", "bear", "belly_dancer_outfit", "dancewear", "dancing", "gorilla", "grinning_at_viewer", "loincloth", "looking_at_viewer", "monkey", "primate", "raised_arms", "spread_arms", "sun_bear"], "stage3_selected_scores": {"looking_at_viewer": 0.5483, "bear": 0.5736, "primate": 0.8904, "loincloth": 0.5697, "dancing": 0.5576, "ape": 0.9767, "raised_arms": 0.5461, "dancewear": 0.3475, "sun_bear": 0.4334, "belly_dancer_outfit": 0.3547, "monkey": 0.7558, "gorilla": 0.8299, "spread_arms": 0.403, "grinning_at_viewer": 0.4425}, "stage3_selected_ranks": {"looking_at_viewer": 18, "bear": 12, "primate": 2, "loincloth": 13, "dancing": 16, "ape": 1, "raised_arms": 19, "dancewear": 127, "sun_bear": 51, "belly_dancer_outfit": 119, "monkey": 6, "gorilla": 4, "spread_arms": 77, "grinning_at_viewer": 42}, "stage3_selected_phrase_ranks": {"looking_at_viewer": 1, "bear": 1, "primate": 1, "loincloth": 1, "dancing": 1, "ape": 1, "raised_arms": 1, "dancewear": 8, "sun_bear": 10, "belly_dancer_outfit": 7, "monkey": 6, "gorilla": 2, "spread_arms": 5, "grinning_at_viewer": 3}, "extra_evidence": {"anthro": {"source": "structural"}, "belly_dancer_outfit": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3547}, "bottomwear": {"source": "implied"}, "dancewear": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3475}, "duo": {"source": "probe"}, "gorilla": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.8299}, "grinning_at_viewer": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.4425}, "humanoid": {"source": "structural"}, "loincloth": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5697}, "monkey": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.7558}, "raised_arms": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5461}, "spread_arms": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.403}, "sun_bear": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4334}, "topless": {"source": "structural"}, "trio": {"source": "structural"}, "ursine": {"source": "implied"}}, "structural": ["trio", "anthro", "humanoid", "clothed", "topless", "looking_at_viewer"], "probe": ["clothing", "simple_background", "anthro", "duo", "group", "bear"], "t1": 2.77, "t2": 3.88, "t3": 30.11, "t3s": 5.47, "t3p": 6.35, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=161 entity=5 copyright_filtered=2 generic_char_to_general=1 unknown_type=3"]}
|
| 4 |
+
{"id": 1078019, "n_gt": 14, "n_retrieved": 133, "n_selected": 34, "n_implied": 11, "n_structural": 3, "n_probe": 5, "ret_R": 0.7143, "P": 0.3529, "R": 0.8571, "F1": 0.5, "leaf_P": 0.2857, "leaf_R": 0.6667, "leaf_F1": 0.4, "n_leaf_sel": 21, "n_leaf_gt": 9, "ret_P": 0.0752, "sel_given_ret": 1.2, "over_sel": 2.43, "why": {"explicit": 16, "strong_implied": 2}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 3, "calls_with_selection": 3, "calls_exhausted_retries": 0, "attempts_total": 3, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 3, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 67, "attempts_by_n_local": {"60": {"attempts": 2, "parse_ok": 2, "parse_fail": 0, "errors": 0}, "11": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.3529, "gen_R": 0.8571, "gen_F1": 0.5, "missed": ["romantic", "romantic_couple"], "extra": ["<3", "blue_hair", "coat", "confident", "domestic_rabbit", "dutch_rabbit", "expressions", "fur_coat", "hair", "hand_holding", "holding_object", "holding_plushie", "holding_toy", "lab_coat", "oryctolagus", "relationship", "round_ears", "setting", "teddy_bear", "topwear", "toy", "winter_coat"], "ground_truth_tags": ["anthro", "blue_eyes", "blush", "clothed", "clothing", "duo", "lagomorph", "leporid", "mammal", "plushie", "rabbit", "romantic", "romantic_couple", "teal_eyes"], "selected_tags": ["<3", "anthro", "blue_eyes", "blue_hair", "blush", "clothed", "clothing", "coat", "confident", "domestic_rabbit", "duo", "dutch_rabbit", "expressions", "fur_coat", "hair", "hand_holding", "holding_object", "holding_plushie", "holding_toy", "lab_coat", "lagomorph", "leporid", "mammal", "oryctolagus", "plushie", "rabbit", "relationship", "round_ears", "setting", "teal_eyes", "teddy_bear", "topwear", "toy", "winter_coat"], "stage3_selected": ["blue_eyes", "blue_hair", "coat", "confident", "duo", "dutch_rabbit", "expressions", "fur_coat", "hand_holding", "holding_plushie", "holding_toy", "lab_coat", "relationship", "round_ears", "setting", "teal_eyes", "teddy_bear", "winter_coat"], "stage3_selected_scores": {"duo": 0.3628, "blue_eyes": 0.615, "blue_hair": 0.4201, "coat": 0.6383, "teal_eyes": 0.6283, "lab_coat": 0.516, "teddy_bear": 0.5459, "confident": 0.5161, "expressions": 0.5454, "holding_plushie": 0.7793, "fur_coat": 0.4906, "winter_coat": 0.4759, "dutch_rabbit": 0.4583, "holding_toy": 0.5855, "relationship": 0.6206, "setting": 0.5567, "hand_holding": 0.4283, "round_ears": 0.4342}, "stage3_selected_ranks": {"duo": 131, "blue_eyes": 12, "blue_hair": 118, "coat": 7, "teal_eyes": 8, "lab_coat": 40, "teddy_bear": 22, "confident": 39, "expressions": 23, "holding_plushie": 2, "fur_coat": 53, "winter_coat": 66, "dutch_rabbit": 80, "holding_toy": 14, "relationship": 9, "setting": 20, "hand_holding": 112, "round_ears": 102}, "stage3_selected_phrase_ranks": {"duo": 3, "blue_eyes": 1, "blue_hair": 8, "coat": 1, "teal_eyes": 1, "lab_coat": 5, "teddy_bear": 5, "confident": 7, "expressions": 2, "holding_plushie": 1, "fur_coat": 9, "winter_coat": 10, "dutch_rabbit": 4, "holding_toy": 4, "relationship": 1, "setting": 1, "hand_holding": 9, "round_ears": 10}, "extra_evidence": {"<3": {"source": "probe"}, "blue_hair": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4201}, "coat": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6383}, "confident": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5161}, "domestic_rabbit": {"source": "implied"}, "dutch_rabbit": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4583}, "expressions": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5454}, "fur_coat": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4906}, "hair": {"source": "implied"}, "hand_holding": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.4283}, "holding_object": {"source": "implied"}, "holding_plushie": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7793}, "holding_toy": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5855}, "lab_coat": {"source": "stage3", "why": "explicit", "retrieval_score": 0.516}, "oryctolagus": {"source": "implied"}, "relationship": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6206}, "round_ears": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.4342}, "setting": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5567}, "teddy_bear": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5459}, "topwear": {"source": "implied"}, "toy": {"source": "implied"}, "winter_coat": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4759}}, "structural": ["duo", "anthro", "clothed"], "probe": ["clothing", "anthro", "blush", "duo", "<3"], "t1": 3.58, "t2": 1.8, "t3": 18.71, "t3s": 7.52, "t3p": 13.4, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=131 entity=2 copyright_filtered=1 generic_char_to_general=0 unknown_type=2"]}
|
| 5 |
+
{"id": 1624724, "n_gt": 4, "n_retrieved": 117, "n_selected": 21, "n_implied": 4, "n_structural": 3, "n_probe": 4, "ret_R": 0.75, "P": 0.1905, "R": 1.0, "F1": 0.32, "leaf_P": 0.25, "leaf_R": 1.0, "leaf_F1": 0.4, "n_leaf_sel": 16, "n_leaf_gt": 4, "ret_P": 0.0256, "sel_given_ret": 1.3333, "over_sel": 5.25, "why": {"explicit": 14}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 2, "calls_with_selection": 2, "calls_exhausted_retries": 0, "attempts_total": 2, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 2, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 29, "attempts_by_n_local": {"60": {"attempts": 2, "parse_ok": 2, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.1905, "gen_R": 1.0, "gen_F1": 0.32, "missed": [], "extra": ["anthro", "big_eyes", "clothed", "clothing", "elemental_creature", "floating", "gem", "gem_creature", "glistening", "glistening_nose", "mineral_fauna", "nose", "sparkling_background", "spots", "spotted_legs", "toothy_smile", "wide_eyed"], "ground_truth_tags": ["red_nose", "smile", "solo", "tan_body"], "selected_tags": ["anthro", "big_eyes", "clothed", "clothing", "elemental_creature", "floating", "gem", "gem_creature", "glistening", "glistening_nose", "mineral_fauna", "nose", "red_nose", "smile", "solo", "sparkling_background", "spots", "spotted_legs", "tan_body", "toothy_smile", "wide_eyed"], "stage3_selected": ["big_eyes", "floating", "gem_creature", "glistening_nose", "nose", "red_nose", "smile", "sparkling_background", "spots", "spotted_legs", "tan_body", "toothy_smile", "white_background", "wide_eyed"], "stage3_selected_scores": {"smile": 0.5956, "white_background": 0.6072, "tan_body": 0.6582, "spots": 0.6224, "wide_eyed": 0.4482, "big_eyes": 0.6934, "red_nose": 0.7475, "floating": 0.6454, "glistening_nose": 0.5913, "spotted_legs": 0.6492, "gem_creature": 0.4594, "sparkling_background": 0.4258, "toothy_smile": 0.4302, "nose": 0.8611}, "stage3_selected_ranks": {"smile": 36, "white_background": 32, "tan_body": 15, "spots": 27, "wide_eyed": 91, "big_eyes": 7, "red_nose": 3, "floating": 20, "glistening_nose": 38, "spotted_legs": 16, "gem_creature": 85, "sparkling_background": 106, "toothy_smile": 101, "nose": 2}, "stage3_selected_phrase_ranks": {"smile": 2, "white_background": 1, "tan_body": 6, "spots": 7, "wide_eyed": 8, "big_eyes": 1, "red_nose": 1, "floating": 1, "glistening_nose": 4, "spotted_legs": 5, "gem_creature": 5, "sparkling_background": 7, "toothy_smile": 10, "nose": 1}, "extra_evidence": {"anthro": {"source": "structural"}, "big_eyes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6934}, "clothed": {"source": "structural"}, "clothing": {"source": "probe"}, "elemental_creature": {"source": "implied"}, "floating": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6454}, "gem": {"source": "implied"}, "gem_creature": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4594}, "glistening": {"source": "implied"}, "glistening_nose": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5913}, "mineral_fauna": {"source": "implied"}, "nose": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8611}, "sparkling_background": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4258}, "spots": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6224}, "spotted_legs": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6492}, "toothy_smile": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4302}, "wide_eyed": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4482}}, "structural": ["solo", "anthro", "clothed"], "probe": ["clothing", "simple_background", "anthro", "solo"], "t1": 3.57, "t2": 1.49, "t3": 8.55, "t3s": 5.0, "t3p": 13.39, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=120 entity=0 copyright_filtered=2 generic_char_to_general=0 unknown_type=4"]}
|
| 6 |
+
{"id": 1325009, "n_gt": 22, "n_retrieved": 184, "n_selected": 40, "n_implied": 13, "n_structural": 5, "n_probe": 3, "ret_R": 0.4091, "P": 0.375, "R": 0.6818, "F1": 0.4839, "leaf_P": 0.1923, "leaf_R": 0.4167, "leaf_F1": 0.2632, "n_leaf_sel": 26, "n_leaf_gt": 12, "ret_P": 0.0489, "sel_given_ret": 1.6667, "over_sel": 1.82, "why": {"explicit": 18, "strong_implied": 3}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 4, "calls_with_selection": 3, "calls_exhausted_retries": 1, "attempts_total": 6, "attempt_errors": 3, "attempt_parse_fail": 0, "attempt_parse_ok": 3, "invalid_items_total": 0, "oob_indices_total": 1, "dupe_indices_total": 0, "kept_total": 57, "attempts_by_n_local": {"60": {"attempts": 5, "parse_ok": 2, "parse_fail": 0, "errors": 3}, "1": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.5, "call_exhaustion_rate": 0.25}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.375, "gen_R": 0.6818, "gen_F1": 0.4839, "missed": ["chest_tuft", "countershading", "muscular", "muscular_anthro", "muscular_male", "pantherine", "tiger"], "extra": ["action_pose", "back_muscles", "big_biceps", "cheek_tuft", "countershade_body", "eyes", "facial_tuft", "flexing", "flexing_both_biceps", "full-length_portrait", "gesture", "heterochromia", "light_hands", "pattern_clothing", "pattern_topwear", "portrait", "pose", "smile", "smiling_at_viewer", "striped_body", "striped_ears", "striped_fur", "suggestive_pose", "topwear", "white_chest"], "ground_truth_tags": ["anthro", "blue_eyes", "bottomwear", "chest_tuft", "clothed", "clothing", "countershading", "felid", "fur", "hand_on_head", "male", "mammal", "muscular", "muscular_anthro", "muscular_male", "pantherine", "shorts", "solo", "stripes", "tiger", "topless", "tuft"], "selected_tags": ["action_pose", "anthro", "back_muscles", "big_biceps", "blue_eyes", "bottomwear", "cheek_tuft", "clothed", "clothing", "countershade_body", "eyes", "facial_tuft", "felid", "flexing", "flexing_both_biceps", "full-length_portrait", "fur", "gesture", "hand_on_head", "heterochromia", "light_hands", "male", "mammal", "pattern_clothing", "pattern_topwear", "portrait", "pose", "shorts", "smile", "smiling_at_viewer", "solo", "striped_body", "striped_ears", "striped_fur", "stripes", "suggestive_pose", "topless", "topwear", "tuft", "white_chest"], "stage3_selected": ["action_pose", "back_muscles", "big_biceps", "blue_eyes", "cheek_tuft", "countershade_body", "eyes", "flexing", "flexing_both_biceps", "full-length_portrait", "gesture", "hand_on_head", "heterochromia", "light_hands", "pattern_topwear", "shorts", "smiling_at_viewer", "striped_ears", "striped_fur", "suggestive_pose", "white_chest"], "stage3_selected_scores": {"blue_eyes": 0.5739, "shorts": 0.5818, "cheek_tuft": 0.472, "gesture": 0.5883, "full-length_portrait": 0.4425, "striped_fur": 0.6411, "heterochromia": 0.4, "hand_on_head": 0.5952, "flexing": 0.5536, "back_muscles": 0.5889, "action_pose": 0.4747, "big_biceps": 0.6943, "white_chest": 0.9204, "striped_ears": 0.4514, "countershade_body": 0.8758, "light_hands": 0.8131, "flexing_both_biceps": 0.5644, "eyes": 0.9807, "smiling_at_viewer": 0.4503, "pattern_topwear": 0.4408, "suggestive_pose": 0.6259}, "stage3_selected_ranks": {"blue_eyes": 63, "shorts": 62, "cheek_tuft": 116, "gesture": 59, "full-length_portrait": 140, "striped_fur": 36, "heterochromia": 162, "hand_on_head": 53, "flexing": 68, "back_muscles": 58, "action_pose": 113, "big_biceps": 24, "white_chest": 2, "striped_ears": 133, "countershade_body": 3, "light_hands": 13, "flexing_both_biceps": 65, "eyes": 1, "smiling_at_viewer": 134, "pattern_topwear": 142, "suggestive_pose": 43}, "stage3_selected_phrase_ranks": {"blue_eyes": 2, "shorts": 1, "cheek_tuft": 8, "gesture": 1, "full-length_portrait": 9, "striped_fur": 2, "heterochromia": 8, "hand_on_head": 2, "flexing": 10, "back_muscles": 7, "action_pose": 7, "big_biceps": 7, "white_chest": 1, "striped_ears": 1, "countershade_body": 1, "light_hands": 10, "flexing_both_biceps": 8, "eyes": 1, "smiling_at_viewer": 8, "pattern_topwear": 10, "suggestive_pose": 1}, "extra_evidence": {"action_pose": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4747}, "back_muscles": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5889}, "big_biceps": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6943}, "cheek_tuft": {"source": "stage3", "why": "explicit", "retrieval_score": 0.472}, "countershade_body": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8758}, "eyes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.9807}, "facial_tuft": {"source": "implied"}, "flexing": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5536}, "flexing_both_biceps": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5644}, "full-length_portrait": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4425}, "gesture": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5883}, "heterochromia": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4}, "light_hands": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8131}, "pattern_clothing": {"source": "implied"}, "pattern_topwear": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.4408}, "portrait": {"source": "implied"}, "pose": {"source": "implied"}, "smile": {"source": "implied"}, "smiling_at_viewer": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.4503}, "striped_body": {"source": "implied"}, "striped_ears": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4514}, "striped_fur": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6411}, "suggestive_pose": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.6259}, "topwear": {"source": "implied"}, "white_chest": {"source": "stage3", "why": "explicit", "retrieval_score": 0.9204}}, "structural": ["solo", "anthro", "male", "clothed", "topless"], "probe": ["anthro", "felid", "solo"], "t1": 2.06, "t2": 1.77, "t3": 63.87, "t3s": 1.99, "t3p": 3.52, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=181 entity=2 copyright_filtered=2 generic_char_to_general=0 unknown_type=2", "Stage3 general_chunk_1: attempt 1 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 1, \"why\": \"weak_implied\"}, {\"i\": 2, \"why\": \"weak_implied\"}, {\"i\": 3, \"why\": \"weak_implied\"}, {\"i\": 4, \"why\": \"weak_implied\"}, {\"i\": 5, \"why\": \"weak_implied\"}, {\"i\": 6, \"why\": \"explicit\"}, {\"i\": 7, \"why\": \"explicit\"}, {\"i\": 8, \"why\": \"weak_implied\"}, {\"i\": 9, \"why\": \"weak_implied\"}, {\"i\": 10, \"why\": \"weak_implied\"}, {\"i\": 11, \"why\": \"weak_implied\"}, {\"i\": 12, \"why\": \"weak_implied\"}, {\"i\": 13, \"why\": \"weak_implied\"}, {\"i\": 14, \"why\": \"weak_implied\"}, {\"i\": 15, \"why\": \"weak_implied\"}, {\"i\": 16, \"why\": \"weak_implied\"}, {\"i\": 17, \"why\": \"weak_implied\"}, {\"i\": 18, \"why\": \"weak_implied\"}, {\"i\": 19, \"why\": \"weak_implied\"}, {\"i\": 20, \"why\": \"explicit\"}, {\"i\": 21, \"why\": \"weak_implied\"}, {\"i\": 22, \"why\": \"explicit\"}, {\"i\": 23, \"why\": \"weak_implied\"}, {\"i\": 24, \"why\": \"weak_implied\"}, {\"i\": 25, \"why\": \"weak_implied\"}, {\"i\": 26, \"why\": \"weak_implied\"}, {\"i\": 27, \"why\": \"weak_implied\"}, {\"i\": 28, \"why\": \"weak_implied\"}, {\"i\": 29, \"why\": \"weak_implied\"}, {\"i\": 30, \"why\": \"explicit\"}, {\"i\": 31, \"why\": \"weak_implied\"}, {\"i\": 32, \"why\": \"explicit\"}, {\"i\": 33, \"why\": \"weak_implied\"}, {\"i\": 34, \"why\": \"weak_implied\"}, {\"i\": 35}]}. Got: 1 validation error for Stage3SelectionResponse\nselections.34.why\n Field required [type=missing, input_value={'i': 35}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE ", "Stage3 general_chunk_1: attempt 2 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 1, \"why\": \"weak_implied\"}, {\"i\": 2, \"why\": \"weak_implied\"}, {\"i\": 3, \"why\": \"weak_implied\"}, {\"i\": 4, \"why\": \"weak_implied\"}, {\"i\": 5, \"why\": \"weak_implied\"}, {\"i\": 6, \"why\": \"explicit\"}, {\"i\": 7, \"why\": \"explicit\"}, {\"i\": 8, \"why\": \"weak_implied\"}, {\"i\": 9, \"why\": \"weak_implied\"}, {\"i\": 10, \"why\": \"weak_implied\"}, {\"i\": 11, \"why\": \"weak_implied\"}, {\"i\": 12, \"why\": \"weak_implied\"}, {\"i\": 13, \"why\": \"weak_implied\"}, {\"i\": 14, \"why\": \"weak_implied\"}, {\"i\": 15, \"why\": \"weak_implied\"}, {\"i\": 16, \"why\": \"weak_implied\"}, {\"i\": 17, \"why\": \"weak_implied\"}, {\"i\": 18, \"why\": \"weak_implied\"}, {\"i\": 19, \"why\": \"weak_implied\"}, {\"i\": 20, \"why\": \"explicit\"}, {\"i\": 21, \"why\": \"weak_implied\"}, {\"i\": 22, \"why\": \"explicit\"}, {\"i\": 23, \"why\": \"weak_implied\"}, {\"i\": 24, \"why\": \"weak_implied\"}, {\"i\": 25, \"why\": \"weak_implied\"}, {\"i\": 26, \"why\": \"weak_implied\"}, {\"i\": 27, \"why\": \"weak_implied\"}, {\"i\": 28, \"why\": \"weak_implied\"}, {\"i\": 29, \"why\": \"weak_implied\"}, {\"i\": 30, \"why\": \"explicit\"}, {\"i\": 31, \"why\": \"weak_implied\"}, {\"i\": 32, \"why\": \"explicit\"}, {\"i\": 33, \"why\": \"weak_implied\"}, {\"i\": 34, \"why\": \"weak_implied\"}, {\"i\": 35}]}. Got: 1 validation error for Stage3SelectionResponse\nselections.34.why\n Field required [type=missing, input_value={'i': 35}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE ", "Stage3 general_chunk_1: attempt 3 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 1, \"why\": \"weak_implied\"}, {\"i\": 2, \"why\": \"weak_implied\"}, {\"i\": 3, \"why\": \"weak_implied\"}, {\"i\": 4, \"why\": \"weak_implied\"}, {\"i\": 5, \"why\": \"weak_implied\"}, {\"i\": 6, \"why\": \"explicit\"}, {\"i\": 7, \"why\": \"explicit\"}, {\"i\": 8, \"why\": \"weak_implied\"}, {\"i\": 9, \"why\": \"weak_implied\"}, {\"i\": 10, \"why\": \"weak_implied\"}, {\"i\": 11, \"why\": \"weak_implied\"}, {\"i\": 12, \"why\": \"weak_implied\"}, {\"i\": 13, \"why\": \"weak_implied\"}, {\"i\": 14, \"why\": \"weak_implied\"}, {\"i\": 15, \"why\": \"weak_implied\"}, {\"i\": 16, \"why\": \"weak_implied\"}, {\"i\": 17, \"why\": \"weak_implied\"}, {\"i\": 18, \"why\": \"weak_implied\"}, {\"i\": 19, \"why\": \"weak_implied\"}, {\"i\": 20, \"why\": \"explicit\"}, {\"i\": 21, \"why\": \"weak_implied\"}, {\"i\": 22, \"why\": \"explicit\"}, {\"i\": 23, \"why\": \"weak_implied\"}, {\"i\": 24, \"why\": \"weak_implied\"}, {\"i\": 25, \"why\": \"weak_implied\"}, {\"i\": 26, \"why\": \"weak_implied\"}, {\"i\": 27, \"why\": \"weak_implied\"}, {\"i\": 28, \"why\": \"weak_implied\"}, {\"i\": 29, \"why\": \"weak_implied\"}, {\"i\": 30, \"why\": \"explicit\"}, {\"i\": 31, \"why\": \"weak_implied\"}, {\"i\": 32, \"why\": \"explicit\"}, {\"i\": 33, \"why\": \"weak_implied\"}, {\"i\": 34, \"why\": \"weak_implied\"}, {\"i\": 35}]}. Got: 1 validation error for Stage3SelectionResponse\nselections.34.why\n Field required [type=missing, input_value={'i': 35}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE ", "Stage3 general_chunk_1: gave up after 3 attempts"]}
|
| 7 |
+
{"id": 1023509, "n_gt": 13, "n_retrieved": 190, "n_selected": 23, "n_implied": 6, "n_structural": 5, "n_probe": 4, "ret_R": 0.6154, "P": 0.3913, "R": 0.6923, "F1": 0.5, "leaf_P": 0.2, "leaf_R": 0.5, "leaf_F1": 0.2857, "n_leaf_sel": 15, "n_leaf_gt": 6, "ret_P": 0.0421, "sel_given_ret": 1.125, "over_sel": 1.77, "why": {"explicit": 7, "strong_implied": 5}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 3, "calls_with_selection": 3, "calls_exhausted_retries": 0, "attempts_total": 5, "attempt_errors": 2, "attempt_parse_fail": 0, "attempt_parse_ok": 3, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 98, "attempts_by_n_local": {"60": {"attempts": 4, "parse_ok": 2, "parse_fail": 0, "errors": 2}, "58": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.4, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.3913, "gen_R": 0.6923, "gen_F1": 0.5, "missed": ["dialogue", "fur", "white_body", "white_fur"], "extra": ["agamid", "anthro", "clothed", "clothing", "dark_theme", "darkness", "frilled_lizard", "group", "guardian", "light", "male_human", "mask", "note", "taur"], "ground_truth_tags": ["bovid", "caprine", "dialogue", "fur", "goat", "human", "lizard", "mammal", "reptile", "scalie", "text", "white_body", "white_fur"], "selected_tags": ["agamid", "anthro", "bovid", "caprine", "clothed", "clothing", "dark_theme", "darkness", "frilled_lizard", "goat", "group", "guardian", "human", "light", "lizard", "male_human", "mammal", "mask", "note", "reptile", "scalie", "taur", "text"], "stage3_selected": ["dark_theme", "darkness", "frilled_lizard", "goat", "guardian", "human", "light", "lizard", "male_human", "mask", "note", "text"], "stage3_selected_scores": {"text": 0.3659, "mask": 0.3726, "light": 0.5823, "darkness": 0.5976, "dark_theme": 0.3993, "note": 0.5657, "guardian": 0.3706, "human": 0.5571, "lizard": 0.5942, "goat": 0.5776, "frilled_lizard": 0.458, "male_human": 0.4223}, "stage3_selected_ranks": {"text": 127, "mask": 114, "light": 6, "darkness": 2, "dark_theme": 88, "note": 11, "guardian": 118, "human": 12, "lizard": 3, "goat": 8, "frilled_lizard": 29, "male_human": 65}, "stage3_selected_phrase_ranks": {"text": 8, "mask": 3, "light": 1, "darkness": 1, "dark_theme": 4, "note": 1, "guardian": 3, "human": 1, "lizard": 1, "goat": 1, "frilled_lizard": 2, "male_human": 2}, "extra_evidence": {"agamid": {"source": "implied"}, "anthro": {"source": "structural"}, "clothed": {"source": "structural"}, "clothing": {"source": "probe"}, "dark_theme": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3993}, "darkness": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5976}, "frilled_lizard": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.458}, "group": {"source": "structural"}, "guardian": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3706}, "light": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5823}, "male_human": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.4223}, "mask": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3726}, "note": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5657}, "taur": {"source": "structural"}}, "structural": ["group", "anthro", "taur", "clothed", "text"], "probe": ["clothing", "simple_background", "anthro", "text"], "t1": 2.21, "t2": 1.75, "t3": 62.45, "t3s": 1.02, "t3p": 4.36, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=178 entity=4 copyright_filtered=8 generic_char_to_general=0 unknown_type=1", "Stage3 general_chunk_0: attempt 1 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 1, \"why\": \"other\"}, {\"i\": 3, \"why\": \"explicit\"}, {\"i\": 4, \"why\": \"explicit\"}, {\"i\": 5, \"why\": \"weak_implied\"}, {\"i\": 6, \"why\": \"explicit\"}, {\"i\": 7, \"why\": \"explicit\"}, {\"i\": 8, \"why\": \"other\"}, {\"i\": 9, \"why\": \"other\"}, {\"i\": 10, \"why\": \"explicit\"}, {\"i\": 11, \"why\": \"explicit\"}, {\"i\": 12, \"why\": \"explicit\"}, {\"i\": 13, \"why\": \"explicit\"}, {\"i\": 14, \"why\": \"other\"}, {\"i\": 15, \"why\": \"explicit\"}, {\"i\": 16, \"why\": \"other\"}, {\"i\": 17, \"why\": \"explicit\"}, {\"i\": 18, \"why\": \"explicit\"}, {\"i\": 20, \"why\": \"other\"}, {\"i\": 23, \"why\": \"other\"}, {\"i\": 25, \"why\": \"other\"}, {\"i\": 30, \"why\": \"explicit\"}, {\"i\": 31, \"why\": \"other\"}, {\"i\": 32, \"why\": \"explicit\"}, {\"i\": 33, \"why\": \"other\"}, {\"i\": 34, \"why\": \"explicit\"}, {\"i\": 35, \"why\": \"explicit\"}, {\"i\": 36, \"why\": \"other\"}, {\"i\": 37, \"why\": \"other\"}, {\"i\": 38, \"why\": \"other\"}, {\"i\": 40, \"why\": \"explicit\"}, {\"i\": 41, \"why\": \"other\"}, {\"i\": 42, \"why\": \"other\"}, {\"i\": 43, \"why\": \"other\"}, {\"i\": 45, \"why\": \"other\"}, {\"i\": 50, \"why\": \"other\"}, {\"i\": 51, \"why\": \"explicit\"}, {\"i\": 52, \"why\": \"other\"}, {\"i\": 55, \"why\": \"explicit\"}, {\"i\": 56}]}. Got: 1 validation error for Stage3SelectionResponse\nselections.38.why\n Field required [type=missing, input_value={'i': 56}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE ", "Stage3 general_chunk_0: attempt 2 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 1, \"why\": \"other\"}, {\"i\": 3, \"why\": \"explicit\"}, {\"i\": 4, \"why\": \"explicit\"}, {\"i\": 5, \"why\": \"weak_implied\"}, {\"i\": 6, \"why\": \"explicit\"}, {\"i\": 7, \"why\": \"explicit\"}, {\"i\": 8, \"why\": \"other\"}, {\"i\": 9, \"why\": \"other\"}, {\"i\": 10, \"why\": \"explicit\"}, {\"i\": 11, \"why\": \"explicit\"}, {\"i\": 12, \"why\": \"explicit\"}, {\"i\": 13, \"why\": \"explicit\"}, {\"i\": 14, \"why\": \"other\"}, {\"i\": 15, \"why\": \"explicit\"}, {\"i\": 16, \"why\": \"other\"}, {\"i\": 17, \"why\": \"other\"}, {\"i\": 18, \"why\": \"explicit\"}, {\"i\": 20, \"why\": \"other\"}, {\"i\": 21, \"why\": \"other\"}, {\"i\": 23, \"why\": \"other\"}, {\"i\": 25, \"why\": \"other\"}, {\"i\": 27, \"why\": \"other\"}, {\"i\": 30, \"why\": \"explicit\"}, {\"i\": 31, \"why\": \"other\"}, {\"i\": 32, \"why\": \"explicit\"}, {\"i\": 34, \"why\": \"explicit\"}, {\"i\": 35, \"why\": \"explicit\"}, {\"i\": 36, \"why\": \"other\"}, {\"i\": 37, \"why\": \"other\"}, {\"i\": 38, \"why\": \"other\"}, {\"i\": 40, \"why\": \"explicit\"}, {\"i\": 41, \"why\": \"other\"}, {\"i\": 42, \"why\": \"other\"}, {\"i\": 43, \"why\": \"other\"}, {\"i\": 45, \"why\": \"other\"}, {\"i\": 46, \"why\": \"other\"}, {\"i\": 48, \"why\": \"explicit\"}, {\"i\": 49, \"why\": \"other\"}, {\"i\": 50}]}. Got: 1 validation error for Stage3SelectionResponse\nselections.38.why\n Field required [type=missing, input_value={'i': 50}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE "]}
|
| 8 |
+
{"id": 335343, "n_gt": 15, "n_retrieved": 208, "n_selected": 46, "n_implied": 9, "n_structural": 3, "n_probe": 4, "ret_R": 0.6667, "P": 0.2826, "R": 0.8667, "F1": 0.4262, "leaf_P": 0.2059, "leaf_R": 0.5833, "leaf_F1": 0.3043, "n_leaf_sel": 34, "n_leaf_gt": 12, "ret_P": 0.0481, "sel_given_ret": 1.3, "over_sel": 3.07, "why": {"explicit": 32, "strong_implied": 2}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 4, "calls_with_selection": 4, "calls_exhausted_retries": 0, "attempts_total": 4, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 4, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 93, "attempts_by_n_local": {"60": {"attempts": 3, "parse_ok": 3, "parse_fail": 0, "errors": 0}, "31": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.2826, "gen_R": 0.8667, "gen_F1": 0.4262, "missed": ["angry", "eyes_closed"], "extra": ["annoyed_expression", "anthro", "applying_makeup", "bed_sheet", "bedding", "bedroom", "blanket", "clothing", "english_text", "expressions", "eyes", "half-closed_eyes", "half-length_portrait", "head_on_pillow", "humanoid", "letters", "looking_down_at_another", "lying_on_bed", "mascara", "narrowed_eyes", "on_bed", "pajamas", "personal_grooming", "portrait", "purple_theme", "relaxed_expression", "resting", "sleeping_together", "sleepover", "text_box", "under_blanket", "vase", "yellow_eyeshadow"], "ground_truth_tags": ["angry", "bed", "blonde_hair", "blue_eyes", "duo", "eyes_closed", "eyeshadow", "furniture", "green_eyes", "hair", "lying", "makeup", "purple_hair", "sleeping", "text"], "selected_tags": ["annoyed_expression", "anthro", "applying_makeup", "bed", "bed_sheet", "bedding", "bedroom", "blanket", "blonde_hair", "blue_eyes", "clothing", "duo", "english_text", "expressions", "eyes", "eyeshadow", "furniture", "green_eyes", "hair", "half-closed_eyes", "half-length_portrait", "head_on_pillow", "humanoid", "letters", "looking_down_at_another", "lying", "lying_on_bed", "makeup", "mascara", "narrowed_eyes", "on_bed", "pajamas", "personal_grooming", "portrait", "purple_hair", "purple_theme", "relaxed_expression", "resting", "sleeping", "sleeping_together", "sleepover", "text", "text_box", "under_blanket", "vase", "yellow_eyeshadow"], "stage3_selected": ["annoyed_expression", "applying_makeup", "bed_sheet", "bedding", "bedroom", "blanket", "blonde_hair", "blue_eyes", "english_text", "expressions", "eyes", "green_eyes", "hair", "half-closed_eyes", "half-length_portrait", "head_on_pillow", "letters", "looking_down_at_another", "lying_on_bed", "makeup", "mascara", "pajamas", "purple_hair", "purple_theme", "relaxed_expression", "resting", "sleeping", "sleeping_together", "sleepover", "text", "text_box", "under_blanket", "vase", "yellow_eyeshadow"], "stage3_selected_scores": {"hair": 0.6031, "text": 0.6007, "blue_eyes": 0.6014, "green_eyes": 0.5989, "blonde_hair": 0.5986, "half-closed_eyes": 0.3951, "purple_hair": 0.5642, "makeup": 0.5965, "bedroom": 0.4901, "sleeping": 0.6027, "bedding": 0.3909, "half-length_portrait": 0.4197, "bed_sheet": 0.3993, "blanket": 0.4205, "mascara": 0.4462, "lying_on_bed": 0.4093, "text_box": 0.3916, "purple_theme": 0.4555, "vase": 0.3521, "resting": 0.5034, "annoyed_expression": 0.7251, "expressions": 0.5439, "head_on_pillow": 0.3887, "sleeping_together": 0.5084, "sleepover": 0.5269, "under_blanket": 0.4281, "yellow_eyeshadow": 0.4551, "letters": 0.3656, "applying_makeup": 0.473, "relaxed_expression": 0.5056, "eyes": 0.8951, "english_text": 0.4189, "pajamas": 0.4086, "looking_down_at_another": 0.4491}, "stage3_selected_ranks": {"hair": 5, "text": 8, "blue_eyes": 7, "green_eyes": 9, "blonde_hair": 10, "half-closed_eyes": 140, "purple_hair": 14, "makeup": 11, "bedroom": 43, "sleeping": 6, "bedding": 149, "half-length_portrait": 100, "bed_sheet": 134, "blanket": 98, "mascara": 74, "lying_on_bed": 116, "text_box": 146, "purple_theme": 63, "vase": 187, "resting": 33, "annoyed_expression": 2, "expressions": 18, "head_on_pillow": 151, "sleeping_together": 31, "sleepover": 26, "under_blanket": 91, "yellow_eyeshadow": 64, "letters": 176, "applying_makeup": 55, "relaxed_expression": 32, "eyes": 1, "english_text": 103, "pajamas": 120, "looking_down_at_another": 69}, "stage3_selected_phrase_ranks": {"hair": 1, "text": 1, "blue_eyes": 1, "green_eyes": 1, "blonde_hair": 1, "half-closed_eyes": 10, "purple_hair": 1, "makeup": 1, "bedroom": 1, "sleeping": 1, "bedding": 7, "half-length_portrait": 7, "bed_sheet": 5, "blanket": 7, "mascara": 9, "lying_on_bed": 4, "text_box": 8, "purple_theme": 10, "vase": 8, "resting": 1, "annoyed_expression": 1, "expressions": 3, "head_on_pillow": 8, "sleeping_together": 2, "sleepover": 1, "under_blanket": 8, "yellow_eyeshadow": 6, "letters": 9, "applying_makeup": 4, "relaxed_expression": 6, "eyes": 1, "english_text": 4, "pajamas": 3, "looking_down_at_another": 5}, "extra_evidence": {"annoyed_expression": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7251}, "anthro": {"source": "probe"}, "applying_makeup": {"source": "stage3", "why": "explicit", "retrieval_score": 0.473}, "bed_sheet": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3993}, "bedding": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3909}, "bedroom": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4901}, "blanket": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4205}, "clothing": {"source": "implied"}, "english_text": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4189}, "expressions": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5439}, "eyes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8951}, "half-closed_eyes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3951}, "half-length_portrait": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4197}, "head_on_pillow": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3887}, "humanoid": {"source": "structural"}, "letters": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3656}, "looking_down_at_another": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.4491}, "lying_on_bed": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4093}, "mascara": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4462}, "narrowed_eyes": {"source": "implied"}, "on_bed": {"source": "implied"}, "pajamas": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.4086}, "personal_grooming": {"source": "implied"}, "portrait": {"source": "implied"}, "purple_theme": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4555}, "relaxed_expression": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5056}, "resting": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5034}, "sleeping_together": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5084}, "sleepover": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5269}, "text_box": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3916}, "under_blanket": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4281}, "vase": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3521}, "yellow_eyeshadow": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4551}}, "structural": ["duo", "humanoid", "text"], "probe": ["simple_background", "anthro", "duo", "text"], "t1": 1.35, "t2": 1.92, "t3": 31.07, "t3s": 0.86, "t3p": 2.75, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=211 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=4"]}
|
| 9 |
+
{"id": 17482, "n_gt": 22, "n_retrieved": 125, "n_selected": 58, "n_implied": 20, "n_structural": 4, "n_probe": 3, "ret_R": 0.4545, "P": 0.2931, "R": 0.7727, "F1": 0.425, "leaf_P": 0.1613, "leaf_R": 0.3846, "leaf_F1": 0.2273, "n_leaf_sel": 31, "n_leaf_gt": 13, "ret_P": 0.08, "sel_given_ret": 1.7, "over_sel": 2.64, "why": {"explicit": 34, "strong_implied": 1}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 3, "calls_with_selection": 3, "calls_exhausted_retries": 0, "attempts_total": 4, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 4, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 61, "attempts_by_n_local": {"60": {"attempts": 2, "parse_ok": 2, "parse_fail": 0, "errors": 0}, "11": {"attempts": 2, "parse_ok": 2, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.2931, "gen_R": 0.7727, "gen_F1": 0.425, "missed": ["fingers", "fur", "holding_musical_instrument", "holding_object", "music"], "extra": ["abyssal_wolf", "acoustic_guitar", "alpha_channel", "annoyed_expression", "auburn_hair", "big_claws", "big_tail", "blonde_hair", "bottomwear", "canis", "claws_out", "curled_hair", "denim", "denim_clothing", "digitigrade", "electric_guitar", "finger_claws", "flowing_hair", "holding_guitar", "jeans", "leggings", "legwear", "long_claws", "long_tail", "looking_at_viewer", "pants", "playing_guitar", "playing_music", "pointed_tail", "shirt", "shorts", "topwear", "torn_bottomwear", "torn_jeans", "torn_leggings", "torn_legwear", "torn_pants", "torn_shirt", "torn_shorts", "torn_topwear", "wavy_hair"], "ground_truth_tags": ["anthro", "bass_guitar", "canid", "canine", "claws", "clothed", "clothing", "fingers", "fur", "guitar", "hair", "holding_musical_instrument", "holding_object", "mammal", "music", "musical_instrument", "plucked_string_instrument", "solo", "spade_tail", "string_instrument", "tail", "torn_clothing"], "selected_tags": ["abyssal_wolf", "acoustic_guitar", "alpha_channel", "annoyed_expression", "anthro", "auburn_hair", "bass_guitar", "big_claws", "big_tail", "blonde_hair", "bottomwear", "canid", "canine", "canis", "claws", "claws_out", "clothed", "clothing", "curled_hair", "denim", "denim_clothing", "digitigrade", "electric_guitar", "finger_claws", "flowing_hair", "guitar", "hair", "holding_guitar", "jeans", "leggings", "legwear", "long_claws", "long_tail", "looking_at_viewer", "mammal", "musical_instrument", "pants", "playing_guitar", "playing_music", "plucked_string_instrument", "pointed_tail", "shirt", "shorts", "solo", "spade_tail", "string_instrument", "tail", "topwear", "torn_bottomwear", "torn_clothing", "torn_jeans", "torn_leggings", "torn_legwear", "torn_pants", "torn_shirt", "torn_shorts", "torn_topwear", "wavy_hair"], "stage3_selected": ["abyssal_wolf", "acoustic_guitar", "annoyed_expression", "auburn_hair", "bass_guitar", "big_claws", "big_tail", "blonde_hair", "canis", "claws", "claws_out", "curled_hair", "digitigrade", "electric_guitar", "finger_claws", "flowing_hair", "hair", "holding_guitar", "long_claws", "long_tail", "playing_guitar", "plucked_string_instrument", "pointed_tail", "spade_tail", "string_instrument", "tail", "torn_bottomwear", "torn_clothing", "torn_jeans", "torn_leggings", "torn_shirt", "torn_shorts", "torn_topwear", "transparent_background", "wavy_hair"], "stage3_selected_scores": {"hair": 0.5731, "tail": 0.5659, "canis": 0.4098, "claws": 0.5684, "blonde_hair": 0.382, "digitigrade": 0.4195, "torn_clothing": 0.4133, "long_tail": 0.4222, "finger_claws": 0.4395, "big_tail": 0.3841, "spade_tail": 0.618, "string_instrument": 0.8617, "torn_bottomwear": 0.4362, "plucked_string_instrument": 0.8658, "curled_hair": 0.3875, "torn_topwear": 0.3945, "wavy_hair": 0.3492, "torn_shirt": 0.4049, "long_claws": 0.4365, "playing_guitar": 0.9317, "torn_jeans": 0.4824, "claws_out": 0.438, "big_claws": 0.4299, "annoyed_expression": 0.4693, "electric_guitar": 0.8664, "pointed_tail": 0.3768, "torn_shorts": 0.3996, "auburn_hair": 0.3767, "bass_guitar": 0.9118, "flowing_hair": 0.5669, "abyssal_wolf": 0.4098, "holding_guitar": 0.8442, "torn_leggings": 0.4244, "acoustic_guitar": 0.8654, "transparent_background": 0.4526}, "stage3_selected_ranks": {"hair": 16, "tail": 19, "canis": 90, "claws": 17, "blonde_hair": 112, "digitigrade": 84, "torn_clothing": 87, "long_tail": 83, "finger_claws": 66, "big_tail": 110, "spade_tail": 12, "string_instrument": 8, "torn_bottomwear": 71, "plucked_string_instrument": 6, "curled_hair": 107, "torn_topwear": 99, "wavy_hair": 130, "torn_shirt": 91, "long_claws": 70, "playing_guitar": 2, "torn_jeans": 39, "claws_out": 68, "big_claws": 79, "annoyed_expression": 45, "electric_guitar": 5, "pointed_tail": 116, "torn_shorts": 95, "auburn_hair": 117, "bass_guitar": 3, "flowing_hair": 18, "abyssal_wolf": 89, "holding_guitar": 11, "torn_leggings": 82, "acoustic_guitar": 7, "transparent_background": 59}, "stage3_selected_phrase_ranks": {"hair": 1, "tail": 1, "canis": 10, "claws": 1, "blonde_hair": 6, "digitigrade": 10, "torn_clothing": 6, "long_tail": 5, "finger_claws": 5, "big_tail": 9, "spade_tail": 1, "string_instrument": 7, "torn_bottomwear": 3, "plucked_string_instrument": 5, "curled_hair": 5, "torn_topwear": 10, "wavy_hair": 9, "torn_shirt": 7, "long_claws": 8, "playing_guitar": 1, "torn_jeans": 1, "claws_out": 6, "big_claws": 9, "annoyed_expression": 10, "electric_guitar": 4, "pointed_tail": 5, "torn_shorts": 8, "auburn_hair": 7, "bass_guitar": 2, "flowing_hair": 1, "abyssal_wolf": 9, "holding_guitar": 10, "torn_leggings": 5, "acoustic_guitar": 5, "transparent_background": 9}, "extra_evidence": {"abyssal_wolf": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4098}, "acoustic_guitar": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8654}, "alpha_channel": {"source": "implied"}, "annoyed_expression": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4693}, "auburn_hair": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3767}, "big_claws": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4299}, "big_tail": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3841}, "blonde_hair": {"source": "stage3", "why": "explicit", "retrieval_score": 0.382}, "bottomwear": {"source": "implied"}, "canis": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4098}, "claws_out": {"source": "stage3", "why": "explicit", "retrieval_score": 0.438}, "curled_hair": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3875}, "denim": {"source": "implied"}, "denim_clothing": {"source": "implied"}, "digitigrade": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4195}, "electric_guitar": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8664}, "finger_claws": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4395}, "flowing_hair": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5669}, "holding_guitar": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8442}, "jeans": {"source": "implied"}, "leggings": {"source": "implied"}, "legwear": {"source": "implied"}, "long_claws": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4365}, "long_tail": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4222}, "looking_at_viewer": {"source": "structural"}, "pants": {"source": "implied"}, "playing_guitar": {"source": "stage3", "why": "explicit", "retrieval_score": 0.9317}, "playing_music": {"source": "implied"}, "pointed_tail": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3768}, "shirt": {"source": "implied"}, "shorts": {"source": "implied"}, "topwear": {"source": "implied"}, "torn_bottomwear": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4362}, "torn_jeans": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4824}, "torn_leggings": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4244}, "torn_legwear": {"source": "implied"}, "torn_pants": {"source": "implied"}, "torn_shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4049}, "torn_shorts": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3996}, "torn_topwear": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3945}, "wavy_hair": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3492}}, "structural": ["solo", "anthro", "clothed", "looking_at_viewer"], "probe": ["anthro", "canid", "solo"], "t1": 1.43, "t2": 1.23, "t3": 40.57, "t3s": 1.09, "t3p": 3.22, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=131 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
| 10 |
+
{"id": 2021552, "n_gt": 25, "n_retrieved": 169, "n_selected": 42, "n_implied": 16, "n_structural": 4, "n_probe": 4, "ret_R": 0.64, "P": 0.5, "R": 0.84, "F1": 0.6269, "leaf_P": 0.3684, "leaf_R": 0.4667, "leaf_F1": 0.4118, "n_leaf_sel": 19, "n_leaf_gt": 15, "ret_P": 0.0947, "sel_given_ret": 1.3125, "over_sel": 1.68, "why": {"explicit": 22}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 3, "calls_with_selection": 3, "calls_exhausted_retries": 0, "attempts_total": 4, "attempt_errors": 1, "attempt_parse_fail": 0, "attempt_parse_ok": 3, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 92, "attempts_by_n_local": {"60": {"attempts": 3, "parse_ok": 2, "parse_fail": 0, "errors": 1}, "49": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.25, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.5, "gen_R": 0.84, "gen_F1": 0.6269, "missed": ["crossed_arms", "looking_at_another", "overalls", "standing"], "extra": ["3_claws", "black_bottomwear", "black_clothing", "black_pants", "blue_clothing", "dress_shirt", "fluffy_fur", "grey_clothing", "grey_shirt", "grey_topwear", "long_ears", "looking_at_viewer", "open_mouth", "t-shirt", "toe_claws", "white_body", "white_clothing", "white_fur", "white_shirt", "white_t-shirt", "white_topwear"], "ground_truth_tags": ["anthro", "bottomwear", "canid", "canine", "claws", "clothed", "clothing", "crossed_arms", "duo", "facial_markings", "fox", "fur", "grey_background", "head_markings", "lagomorph", "leporid", "looking_at_another", "mammal", "markings", "overalls", "pants", "rabbit", "shirt", "standing", "topwear"], "selected_tags": ["3_claws", "anthro", "black_bottomwear", "black_clothing", "black_pants", "blue_clothing", "bottomwear", "canid", "canine", "claws", "clothed", "clothing", "dress_shirt", "duo", "facial_markings", "fluffy_fur", "fox", "fur", "grey_background", "grey_clothing", "grey_shirt", "grey_topwear", "head_markings", "lagomorph", "leporid", "long_ears", "looking_at_viewer", "mammal", "markings", "open_mouth", "pants", "rabbit", "shirt", "t-shirt", "toe_claws", "topwear", "white_body", "white_clothing", "white_fur", "white_shirt", "white_t-shirt", "white_topwear"], "stage3_selected": ["3_claws", "black_bottomwear", "black_pants", "blue_clothing", "claws", "dress_shirt", "facial_markings", "fluffy_fur", "fox", "fur", "grey_background", "grey_shirt", "long_ears", "open_mouth", "rabbit", "simple_background", "toe_claws", "topwear", "white_fur", "white_shirt", "white_t-shirt", "white_topwear"], "stage3_selected_scores": {"fur": 0.6531, "simple_background": 0.416, "open_mouth": 0.633, "claws": 0.6303, "white_fur": 0.5149, "topwear": 0.6439, "fox": 0.638, "rabbit": 0.6511, "toe_claws": 0.5549, "grey_background": 0.6784, "long_ears": 0.4628, "facial_markings": 0.6945, "blue_clothing": 0.6538, "white_topwear": 0.7671, "black_bottomwear": 0.7384, "white_shirt": 0.8197, "dress_shirt": 0.6688, "black_pants": 0.833, "grey_shirt": 0.6923, "3_claws": 0.5531, "white_t-shirt": 0.7504, "fluffy_fur": 0.4964}, "stage3_selected_ranks": {"fur": 40, "simple_background": 171, "open_mouth": 48, "claws": 50, "white_fur": 132, "topwear": 43, "fox": 46, "rabbit": 41, "toe_claws": 95, "grey_background": 30, "long_ears": 155, "facial_markings": 27, "blue_clothing": 39, "white_topwear": 6, "black_bottomwear": 19, "white_shirt": 4, "dress_shirt": 32, "black_pants": 3, "grey_shirt": 28, "3_claws": 96, "white_t-shirt": 15, "fluffy_fur": 139}, "stage3_selected_phrase_ranks": {"fur": 1, "simple_background": 8, "open_mouth": 1, "claws": 1, "white_fur": 4, "topwear": 7, "fox": 1, "rabbit": 1, "toe_claws": 4, "grey_background": 1, "long_ears": 10, "facial_markings": 1, "blue_clothing": 8, "white_topwear": 3, "black_bottomwear": 5, "white_shirt": 1, "dress_shirt": 5, "black_pants": 1, "grey_shirt": 4, "3_claws": 5, "white_t-shirt": 10, "fluffy_fur": 5}, "extra_evidence": {"3_claws": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5531}, "black_bottomwear": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7384}, "black_clothing": {"source": "implied"}, "black_pants": {"source": "stage3", "why": "explicit", "retrieval_score": 0.833}, "blue_clothing": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6538}, "dress_shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6688}, "fluffy_fur": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4964}, "grey_clothing": {"source": "implied"}, "grey_shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6923}, "grey_topwear": {"source": "implied"}, "long_ears": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4628}, "looking_at_viewer": {"source": "structural"}, "open_mouth": {"source": "stage3", "why": "explicit", "retrieval_score": 0.633}, "t-shirt": {"source": "implied"}, "toe_claws": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5549}, "white_body": {"source": "implied"}, "white_clothing": {"source": "implied"}, "white_fur": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5149}, "white_shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8197}, "white_t-shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7504}, "white_topwear": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7671}}, "structural": ["duo", "anthro", "clothed", "looking_at_viewer"], "probe": ["simple_background", "anthro", "clothing", "duo"], "t1": 1.6, "t2": 1.58, "t3": 53.13, "t3s": 2.35, "t3p": 1.07, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=169 entity=5 copyright_filtered=0 generic_char_to_general=0 unknown_type=3", "Stage3 general_chunk_1: attempt 1 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 2, \"why\": \"explicit\"}, {\"i\": 3, \"why\": \"explicit\"}, {\"i\": 4, \"why\": \"style_or_meta\"}, {\"i\": 5, \"why\": \"explicit\"}, {\"i\": 6, \"why\": \"explicit\"}, {\"i\": 7, \"why\": \"explicit\"}, {\"i\": 10, \"why\": \"explicit\"}, {\"i\": 11, \"why\": \"explicit\"}, {\"i\": 12, \"why\": \"style_or_meta\"}, {\"i\": 14, \"why\": \"explicit\"}, {\"i\": 15, \"why\": \"weak_implied\"}, {\"i\": 16, \"why\": \"weak_implied\"}, {\"i\": 17, \"why\": \"style_or_meta\"}, {\"i\": 20, \"why\": \"other\"}, {\"i\": 22, \"why\": \"explicit\"}, {\"i\": 23, \"why\": \"explicit\"}, {\"i\": 24, \"why\": \"style_or_meta\"}, {\"i\": 26, \"why\": \"explicit\"}, {\"i\": 27, \"why\": \"explicit\"}, {\"i\": 29, \"why\": \"explicit\"}, {\"i\": 30, \"why\": \"explicit\"}, {\"i\": 31, \"why\": \"explicit\"}, {\"i\": 32, \"why\": \"explicit\"}, {\"i\": 33, \"why\": \"other\"}, {\"i\": 35, \"why\": \"style_or_meta\"}, {\"i\": 36, \"why\": \"explicit\"}, {\"i\": 38, \"why\": \"other\"}, {\"i\": 40, \"why\": \"other\"}, {\"i\": 42, \"why\": \"style_or_meta\"}, {\"i\": 43, \"why\": \"style_or_meta\"}, {\"i\": 45, \"why\": \"weak_implied\"}, {\"i\": 46, \"why\": \"explicit\"}, {\"i\": 47, \"why\": \"explicit\"}, {\"i\": 48, \"why\": \"weak_implied\"}, {\"i\": 49, \"why\": \"explicit\"}, {\"i\": 50, \"why\": \"weak_implied\"}, {\"i\": 51}]}. Got: 1 validation error for Stage3SelectionResponse\nselections.36.why\n Field required [type=missing, input_value={'i': 51}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE "]}
|
| 11 |
+
{"id": 2034167, "n_gt": 11, "n_retrieved": 202, "n_selected": 60, "n_implied": 18, "n_structural": 5, "n_probe": 3, "ret_R": 0.6364, "P": 0.1833, "R": 1.0, "F1": 0.3099, "leaf_P": 0.1538, "leaf_R": 0.8571, "leaf_F1": 0.2609, "n_leaf_sel": 39, "n_leaf_gt": 7, "ret_P": 0.0347, "sel_given_ret": 1.5714, "over_sel": 5.45, "why": {"explicit": 36, "strong_implied": 1}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 4, "calls_with_selection": 4, "calls_exhausted_retries": 0, "attempts_total": 4, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 4, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 105, "attempts_by_n_local": {"60": {"attempts": 3, "parse_ok": 3, "parse_fail": 0, "errors": 0}, "26": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.1897, "gen_R": 1.0, "gen_F1": 0.3188, "missed": [], "extra": ["abs", "animal_humanoid", "animated", "animated_png", "anthro", "blue_inner_ear_fluff", "blue_pawpads", "blue_paws", "blue_tail", "blue_tuft", "blush", "canid_humanoid", "canine_humanoid", "clothed", "clothing", "curved_tail", "dog_humanoid", "generation_2_pokemon", "glistening", "glistening_nose", "grey_nose", "heterochromia", "humanoid", "inner_ear_fluff", "intersex", "jumping", "jumpluff", "looking_at_viewer", "mammal_humanoid", "muscular", "nintendo", "pawpads", "pink_mouth", "pink_stripes", "pokemon", "pokemon_(species)", "posed", "sparkling_character", "stripes", "swinging", "tail", "teeth", "thong_straps", "tuft", "two_tone_tail", "white_inner_ear_fluff", "white_nose", "white_stripes", "white_tail"], "ground_truth_tags": ["blue_eyes", "blue_nose", "canid", "canine", "fur", "mammal", "open_mouth", "purple_body", "solo", "white_body", "white_fur"], "selected_tags": ["abs", "animal_humanoid", "animated", "animated_png", "anthro", "blue_eyes", "blue_inner_ear_fluff", "blue_nose", "blue_pawpads", "blue_paws", "blue_tail", "blue_tuft", "blush", "canid", "canid_humanoid", "canine", "canine_humanoid", "clothed", "clothing", "curved_tail", "dog_humanoid", "fur", "generation_2_pokemon", "glistening", "glistening_nose", "grey_nose", "heterochromia", "humanoid", "inner_ear_fluff", "intersex", "jumping", "jumpluff", "looking_at_viewer", "mammal", "mammal_humanoid", "muscular", "nintendo", "open_mouth", "pawpads", "pink_mouth", "pink_stripes", "pokemon", "pokemon_(species)", "posed", "purple_body", "solo", "sparkling_character", "stripes", "swinging", "tail", "teeth", "thong_straps", "tuft", "two_tone_tail", "white_body", "white_fur", "white_inner_ear_fluff", "white_nose", "white_stripes", "white_tail"], "stage3_selected": ["abs", "animated", "animated_png", "blue_eyes", "blue_inner_ear_fluff", "blue_nose", "blue_pawpads", "blue_paws", "blue_tail", "blue_tuft", "blush", "canine_humanoid", "curved_tail", "dog_humanoid", "glistening_nose", "grey_nose", "heterochromia", "jumping", "jumpluff", "muscular", "open_mouth", "pink_mouth", "pink_stripes", "posed", "purple_body", "simple_background", "sparkling_character", "stripes", "swinging", "teeth", "thong_straps", "two_tone_tail", "white_fur", "white_inner_ear_fluff", "white_nose", "white_stripes", "white_tail"], "stage3_selected_scores": {"simple_background": 0.5831, "blush": 0.3631, "open_mouth": 0.5904, "teeth": 0.3451, "blue_eyes": 0.5873, "white_fur": 0.5819, "muscular": 0.3492, "abs": 0.3151, "stripes": 0.583, "purple_body": 0.5531, "canine_humanoid": 0.9138, "white_tail": 0.4849, "heterochromia": 0.428, "two_tone_tail": 0.4834, "blue_nose": 0.5939, "blue_tail": 0.5092, "blue_pawpads": 0.4725, "white_inner_ear_fluff": 0.5683, "dog_humanoid": 0.8087, "grey_nose": 0.4311, "glistening_nose": 0.4258, "white_stripes": 0.534, "white_nose": 0.4882, "thong_straps": 0.3103, "blue_paws": 0.4669, "pink_stripes": 0.5472, "blue_inner_ear_fluff": 0.4647, "curved_tail": 0.5999, "pink_mouth": 0.4798, "swinging": 0.338, "sparkling_character": 0.3409, "blue_tuft": 0.4615, "jumpluff": 0.3484, "posed": 0.4332, "animated_png": 0.4499, "animated": 0.3743, "jumping": 0.5806}, "stage3_selected_ranks": {"simple_background": 23, "blush": 175, "open_mouth": 21, "teeth": 186, "blue_eyes": 22, "white_fur": 25, "muscular": 181, "abs": 201, "stripes": 24, "purple_body": 38, "canine_humanoid": 1, "white_tail": 83, "heterochromia": 143, "two_tone_tail": 89, "blue_nose": 19, "blue_tail": 67, "blue_pawpads": 100, "white_inner_ear_fluff": 29, "dog_humanoid": 5, "grey_nose": 139, "glistening_nose": 144, "white_stripes": 52, "white_nose": 80, "thong_straps": 203, "blue_paws": 104, "pink_stripes": 42, "blue_inner_ear_fluff": 108, "curved_tail": 16, "pink_mouth": 93, "swinging": 192, "sparkling_character": 191, "blue_tuft": 112, "jumpluff": 182, "posed": 137, "animated_png": 123, "animated": 170, "jumping": 26}, "stage3_selected_phrase_ranks": {"simple_background": 1, "blush": 4, "open_mouth": 1, "teeth": 9, "blue_eyes": 1, "white_fur": 1, "muscular": 6, "abs": 7, "stripes": 1, "purple_body": 1, "canine_humanoid": 1, "white_tail": 7, "heterochromia": 6, "two_tone_tail": 9, "blue_nose": 1, "blue_tail": 2, "blue_pawpads": 6, "white_inner_ear_fluff": 2, "dog_humanoid": 5, "grey_nose": 7, "glistening_nose": 9, "white_stripes": 4, "white_nose": 3, "thong_straps": 7, "blue_paws": 8, "pink_stripes": 1, "blue_inner_ear_fluff": 5, "curved_tail": 1, "pink_mouth": 6, "swinging": 9, "sparkling_character": 8, "blue_tuft": 10, "jumpluff": 7, "posed": 8, "animated_png": 1, "animated": 4, "jumping": 1}, "extra_evidence": {"abs": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3151}, "animal_humanoid": {"source": "implied"}, "animated": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3743}, "animated_png": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4499}, "anthro": {"source": "structural"}, "blue_inner_ear_fluff": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4647}, "blue_pawpads": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4725}, "blue_paws": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4669}, "blue_tail": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5092}, "blue_tuft": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4615}, "blush": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3631}, "canid_humanoid": {"source": "implied"}, "canine_humanoid": {"source": "stage3", "why": "explicit", "retrieval_score": 0.9138}, "clothed": {"source": "structural"}, "clothing": {"source": "implied"}, "curved_tail": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5999}, "dog_humanoid": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8087}, "generation_2_pokemon": {"source": "implied"}, "glistening": {"source": "implied"}, "glistening_nose": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4258}, "grey_nose": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4311}, "heterochromia": {"source": "stage3", "why": "explicit", "retrieval_score": 0.428}, "humanoid": {"source": "implied"}, "inner_ear_fluff": {"source": "implied"}, "intersex": {"source": "structural"}, "jumping": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.5806}, "jumpluff": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3484}, "looking_at_viewer": {"source": "structural"}, "mammal_humanoid": {"source": "implied"}, "muscular": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3492}, "nintendo": {"source": "implied"}, "pawpads": {"source": "implied"}, "pink_mouth": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4798}, "pink_stripes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5472}, "pokemon": {"source": "implied"}, "pokemon_(species)": {"source": "implied"}, "posed": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4332}, "sparkling_character": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3409}, "stripes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.583}, "swinging": {"source": "stage3", "why": "explicit", "retrieval_score": 0.338}, "tail": {"source": "implied"}, "teeth": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3451}, "thong_straps": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3103}, "tuft": {"source": "implied"}, "two_tone_tail": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4834}, "white_inner_ear_fluff": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5683}, "white_nose": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4882}, "white_stripes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.534}, "white_tail": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4849}}, "structural": ["solo", "anthro", "intersex", "clothed", "looking_at_viewer"], "probe": ["anthro", "canid", "solo"], "t1": 2.77, "t2": 1.81, "t3": 47.96, "t3s": 1.23, "t3p": 6.5, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=206 entity=2 copyright_filtered=1 generic_char_to_general=4 unknown_type=12"]}
|
data/eval_results/latency_chunk100_seed42.jsonl
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"_meta": true, "timestamp": "2026-03-02T05:55:26.334510", "n_samples": 10, "caption_field": "caption_cogvlm", "skip_rewrite": false, "allow_nsfw": false, "mode": "chunked_map_union", "chunk_size": 100, "eval_path": "data/eval_samples/e621_sfw_sample_1000_seed123_buffer10000_caption_evident.jsonl", "per_phrase_k": 2, "per_phrase_final_k": 10, "temperature": 0.0, "shuffle": false, "seed": 42, "workers": 4, "min_why": "strong_implied", "expand_implications": true, "infer_structural": true, "infer_probe": true, "n_errors": 0, "n_issue_samples": 10, "n_issues_total": 45}
|
| 2 |
+
{"id": 3285630, "n_gt": 12, "n_retrieved": 153, "n_selected": 38, "n_implied": 20, "n_structural": 4, "n_probe": 5, "ret_R": 0.3333, "P": 0.2105, "R": 0.6667, "F1": 0.32, "leaf_P": 0.2353, "leaf_R": 0.4444, "leaf_F1": 0.3077, "n_leaf_sel": 17, "n_leaf_gt": 9, "ret_P": 0.0261, "sel_given_ret": 2.0, "over_sel": 3.17, "why": {"explicit": 13}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 2, "calls_with_selection": 2, "calls_exhausted_retries": 0, "attempts_total": 2, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 2, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 54, "attempts_by_n_local": {"100": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}, "56": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.2105, "gen_R": 0.6667, "gen_F1": 0.32, "missed": ["alpha_channel", "feline", "fingers", "hair"], "extra": ["beverage", "black_body", "black_fur", "business_suit", "container", "cup", "formal", "grey_clothing", "grey_shirt", "grey_topwear", "holding_beverage", "holding_container", "holding_cup", "holding_mug", "holding_object", "mug", "necktie", "shirt", "suit", "t-shirt", "teal_shirt", "teal_topwear", "text", "topwear", "vest", "white_clothing", "white_necktie", "white_shirt", "white_t-shirt", "white_topwear"], "ground_truth_tags": ["alpha_channel", "anthro", "clothed", "clothing", "felid", "feline", "fingers", "fur", "hair", "male", "mammal", "solo"], "selected_tags": ["anthro", "beverage", "black_body", "black_fur", "business_suit", "clothed", "clothing", "container", "cup", "felid", "formal", "fur", "grey_clothing", "grey_shirt", "grey_topwear", "holding_beverage", "holding_container", "holding_cup", "holding_mug", "holding_object", "male", "mammal", "mug", "necktie", "shirt", "solo", "suit", "t-shirt", "teal_shirt", "teal_topwear", "text", "topwear", "vest", "white_clothing", "white_necktie", "white_shirt", "white_t-shirt", "white_topwear"], "stage3_selected": ["anthro", "black_fur", "business_suit", "formal", "grey_shirt", "holding_beverage", "holding_cup", "holding_mug", "simple_background", "teal_shirt", "vest", "white_necktie", "white_t-shirt"], "stage3_selected_scores": {"anthro": 0.4929, "simple_background": 0.6978, "black_fur": 0.7183, "vest": 0.8403, "holding_cup": 0.7667, "holding_beverage": 0.7721, "grey_shirt": 0.7582, "business_suit": 0.5746, "holding_mug": 0.916, "white_t-shirt": 0.7329, "formal": 0.5993, "teal_shirt": 0.7474, "white_necktie": 0.6418}, "stage3_selected_ranks": {"anthro": 135, "simple_background": 50, "black_fur": 35, "vest": 3, "holding_cup": 10, "holding_beverage": 8, "grey_shirt": 13, "business_suit": 97, "holding_mug": 1, "white_t-shirt": 26, "formal": 84, "teal_shirt": 19, "white_necktie": 66}, "stage3_selected_phrase_ranks": {"anthro": 9, "simple_background": 1, "black_fur": 1, "vest": 1, "holding_cup": 4, "holding_beverage": 3, "grey_shirt": 1, "business_suit": 8, "holding_mug": 1, "white_t-shirt": 3, "formal": 1, "teal_shirt": 4, "white_necktie": 10}, "extra_evidence": {"beverage": {"source": "implied"}, "black_body": {"source": "implied"}, "black_fur": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7183}, "business_suit": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5746}, "container": {"source": "implied"}, "cup": {"source": "implied"}, "formal": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5993}, "grey_clothing": {"source": "implied"}, "grey_shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7582}, "grey_topwear": {"source": "implied"}, "holding_beverage": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7721}, "holding_container": {"source": "implied"}, "holding_cup": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7667}, "holding_mug": {"source": "stage3", "why": "explicit", "retrieval_score": 0.916}, "holding_object": {"source": "implied"}, "mug": {"source": "implied"}, "necktie": {"source": "implied"}, "shirt": {"source": "implied"}, "suit": {"source": "implied"}, "t-shirt": {"source": "implied"}, "teal_shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7474}, "teal_topwear": {"source": "implied"}, "text": {"source": "probe"}, "topwear": {"source": "implied"}, "vest": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8403}, "white_clothing": {"source": "implied"}, "white_necktie": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6418}, "white_shirt": {"source": "implied"}, "white_t-shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7329}, "white_topwear": {"source": "implied"}}, "structural": ["solo", "anthro", "male", "clothed"], "probe": ["clothing", "anthro", "text", "felid", "solo"], "t1": 3.14, "t2": 7.11, "t3": 95.33, "t3s": 5.66, "t3p": 3.88, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=156 entity=1 copyright_filtered=1 generic_char_to_general=0 unknown_type=2"]}
|
| 3 |
+
{"id": 260449, "n_gt": 14, "n_retrieved": 161, "n_selected": 24, "n_implied": 4, "n_structural": 5, "n_probe": 6, "ret_R": 0.5714, "P": 0.4167, "R": 0.7143, "F1": 0.5263, "leaf_P": 0.2, "leaf_R": 0.3, "leaf_F1": 0.24, "n_leaf_sel": 15, "n_leaf_gt": 10, "ret_P": 0.0497, "sel_given_ret": 1.25, "over_sel": 1.71, "why": {"explicit": 8, "strong_implied": 5}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 2, "calls_with_selection": 2, "calls_exhausted_retries": 0, "attempts_total": 4, "attempt_errors": 2, "attempt_parse_fail": 0, "attempt_parse_ok": 2, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 42, "attempts_by_n_local": {"100": {"attempts": 3, "parse_ok": 1, "parse_fail": 0, "errors": 2}, "62": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.5, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.4167, "gen_R": 0.7143, "gen_F1": 0.5263, "missed": ["ape", "fur", "hair", "human"], "extra": ["anthro", "bottomwear", "chasing", "cloth", "duo", "flash", "interactive", "laugh", "loincloth", "monkey", "raised_arms", "topless", "trio", "ursine"], "ground_truth_tags": ["ape", "bear", "clothed", "clothing", "dancing", "fur", "group", "hair", "haplorhine", "human", "looking_at_viewer", "male", "mammal", "primate"], "selected_tags": ["anthro", "bear", "bottomwear", "chasing", "cloth", "clothed", "clothing", "dancing", "duo", "flash", "group", "haplorhine", "interactive", "laugh", "loincloth", "looking_at_viewer", "male", "mammal", "monkey", "primate", "raised_arms", "topless", "trio", "ursine"], "stage3_selected": ["bear", "chasing", "cloth", "dancing", "interactive", "laugh", "loincloth", "looking_at_viewer", "male", "monkey", "primate", "raised_arms", "ursine"], "stage3_selected_scores": {"looking_at_viewer": 0.5455, "bear": 0.5731, "primate": 0.89, "ursine": 0.4377, "loincloth": 0.5677, "monkey": 0.7553, "raised_arms": 0.5437, "cloth": 0.325, "male": 0.5579, "dancing": 0.5556, "laugh": 0.5253, "chasing": 0.3326, "interactive": 0.4063}, "stage3_selected_ranks": {"looking_at_viewer": 18, "bear": 12, "primate": 2, "ursine": 45, "loincloth": 13, "monkey": 6, "raised_arms": 19, "cloth": 155, "male": 15, "dancing": 16, "laugh": 22, "chasing": 148, "interactive": 73}, "stage3_selected_phrase_ranks": {"looking_at_viewer": 1, "bear": 1, "primate": 1, "ursine": 8, "loincloth": 1, "monkey": 6, "raised_arms": 1, "cloth": 9, "male": 1, "dancing": 1, "laugh": 1, "chasing": 9, "interactive": 1}, "extra_evidence": {"anthro": {"source": "structural"}, "bottomwear": {"source": "implied"}, "chasing": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.3326}, "cloth": {"source": "stage3", "why": "explicit", "retrieval_score": 0.325}, "duo": {"source": "probe"}, "flash": {"source": "implied"}, "interactive": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.4063}, "laugh": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.5253}, "loincloth": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5677}, "monkey": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7553}, "raised_arms": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5437}, "topless": {"source": "structural"}, "trio": {"source": "structural"}, "ursine": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4377}}, "structural": ["trio", "anthro", "male", "clothed", "topless"], "probe": ["clothing", "simple_background", "anthro", "duo", "group", "bear"], "t1": 2.5, "t2": 5.44, "t3": 54.43, "t3s": 4.59, "t3p": 8.2, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=162 entity=5 copyright_filtered=2 generic_char_to_general=1 unknown_type=3", "Stage3 general_chunk_0: attempt 1 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 1, \"why\": \"explicit\"}, {\"i\": 3, \"why\": \"explicit\"}, {\"i\": 5, \"why\": \"explicit\"}, {\"i\": 6, \"why\": \"explicit\"}, {\"i\": 9, \"why\": \"explicit\"}, {\"i\": 15, \"why\": \"explicit\"}, {\"i\": 16, \"why\": \"explicit\"}, {\"i\": 17, \"why\": \"weak_implied\"}, {\"i\": 20, \"why\": \"weak_implied\"}, {\"i\": 25, \"why\": \"explicit\"}, {\"i\": 27, \"why\": \"weak_implied\"}, {\"i\": 30, \"why\": \"other\"}, {\"i\": 33, \"why\": \"weak_implied\"}, {\"i\": 35, \"why\": \"other\"}, {\"i\": 40, \"why\": \"style_or_meta\"}, {\"i\": 43, \"why\": \"weak_implied\"}, {\"i\": 45, \"why\": \"weak_implied\"}, {\"i\": 47, \"why\": \"weak_implied\"}, {\"i\": 50, \"why\": \"weak_implied\"}, {\"i\": 53, \"why\": \"weak_implied\"}, {\"i\": 55, \"why\": \"weak_implied\"}, {\"i\": 57, \"why\": \"weak_implied\"}, {\"i\": 59, \"why\": \"weak_implied\"}, {\"i\": 61, \"why\": \"weak_implied\"}, {\"i\": 63, \"why\": \"style_or_meta\"}, {\"i\": 65, \"why\": \"weak_implied\"}, {\"i\": 67, \"why\": \"weak_implied\"}, {\"i\": 69, \"why\": \"explicit\"}, {\"i\": 71, \"why\": \"style_or_meta\"}, {\"i\": 73, \"why\": \"weak_implied\"}, {\"i\": 75, \"why\": \"weak_implied\"}, {\"i\": 77, \"why\": \"weak_implied\"}, {\"i\": 79, \"why\": \"other\"}, {\"i\": 81, \"why\": \"weak_implied\"}, {\"i\": 83, \"why\": \"style_or_meta\"}, {}]}. Got: 2 validation errors for Stage3SelectionResponse\nselections.35.i\n Field required [type=missing, input_value={}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nselections.35.why\n Field required [type=missing, input_value={}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE ", "Stage3 general_chunk_0: attempt 2 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 1, \"why\": \"explicit\"}, {\"i\": 3, \"why\": \"explicit\"}, {\"i\": 5, \"why\": \"explicit\"}, {\"i\": 6, \"why\": \"explicit\"}, {\"i\": 9, \"why\": \"explicit\"}, {\"i\": 15, \"why\": \"explicit\"}, {\"i\": 16, \"why\": \"explicit\"}, {\"i\": 17, \"why\": \"weak_implied\"}, {\"i\": 20, \"why\": \"weak_implied\"}, {\"i\": 25, \"why\": \"explicit\"}, {\"i\": 27, \"why\": \"weak_implied\"}, {\"i\": 30, \"why\": \"other\"}, {\"i\": 33, \"why\": \"weak_implied\"}, {\"i\": 35, \"why\": \"other\"}, {\"i\": 37, \"why\": \"other\"}, {\"i\": 39, \"why\": \"weak_implied\"}, {\"i\": 41, \"why\": \"weak_implied\"}, {\"i\": 43, \"why\": \"weak_implied\"}, {\"i\": 45, \"why\": \"weak_implied\"}, {\"i\": 47, \"why\": \"weak_implied\"}, {\"i\": 49, \"why\": \"weak_implied\"}, {\"i\": 51, \"why\": \"other\"}, {\"i\": 53, \"why\": \"weak_implied\"}, {\"i\": 55, \"why\": \"weak_implied\"}, {\"i\": 57, \"why\": \"weak_implied\"}, {\"i\": 59, \"why\": \"weak_implied\"}, {\"i\": 61, \"why\": \"weak_implied\"}, {\"i\": 63, \"why\": \"weak_implied\"}, {\"i\": 65, \"why\": \"weak_implied\"}, {\"i\": 67, \"why\": \"weak_implied\"}, {\"i\": 69, \"why\": \"explicit\"}, {\"i\": 71, \"why\": \"other\"}, {\"i\": 73, \"why\": \"weak_implied\"}, {\"i\": 75, \"why\": \"weak_implied\"}, {\"i\": 77, \"why\": \"weak_implied\"}, {\"i\": 79, \"\": null}]}. Got: 1 validation error for Stage3SelectionResponse\nselections.35.why\n Field required [type=missing, input_value={'i': 79, '': None}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE "]}
|
| 4 |
+
{"id": 1078019, "n_gt": 14, "n_retrieved": 149, "n_selected": 17, "n_implied": 2, "n_structural": 3, "n_probe": 5, "ret_R": 0.7143, "P": 0.2941, "R": 0.3571, "F1": 0.3226, "leaf_P": 0.2857, "leaf_R": 0.4444, "leaf_F1": 0.3478, "n_leaf_sel": 14, "n_leaf_gt": 9, "ret_P": 0.0671, "sel_given_ret": 0.5, "over_sel": 1.21, "why": {"explicit": 10}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 2, "calls_with_selection": 1, "calls_exhausted_retries": 1, "attempts_total": 4, "attempt_errors": 3, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 14, "attempts_by_n_local": {"100": {"attempts": 3, "parse_ok": 0, "parse_fail": 0, "errors": 3}, "46": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.75, "call_exhaustion_rate": 0.5}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.2941, "gen_R": 0.3571, "gen_F1": 0.3226, "missed": ["blue_eyes", "lagomorph", "leporid", "mammal", "plushie", "rabbit", "romantic", "romantic_couple", "teal_eyes"], "extra": ["<3", "blue_hair", "blushing_profusely", "confident", "fur_coat", "hair", "overcoat", "relaxed_expression", "teal_clothing", "topwear", "vest", "winter_coat"], "ground_truth_tags": ["anthro", "blue_eyes", "blush", "clothed", "clothing", "duo", "lagomorph", "leporid", "mammal", "plushie", "rabbit", "romantic", "romantic_couple", "teal_eyes"], "selected_tags": ["<3", "anthro", "blue_hair", "blush", "blushing_profusely", "clothed", "clothing", "confident", "duo", "fur_coat", "hair", "overcoat", "relaxed_expression", "teal_clothing", "topwear", "vest", "winter_coat"], "stage3_selected": ["anthro", "blue_hair", "blushing_profusely", "confident", "fur_coat", "overcoat", "relaxed_expression", "teal_clothing", "vest", "winter_coat"], "stage3_selected_scores": {"anthro": 0.4155, "blue_hair": 0.4164, "vest": 0.4931, "blushing_profusely": 0.4371, "confident": 0.5026, "fur_coat": 0.4929, "winter_coat": 0.4685, "teal_clothing": 0.4159, "overcoat": 0.4981, "relaxed_expression": 0.5032}, "stage3_selected_ranks": {"anthro": 119, "blue_hair": 115, "vest": 57, "blushing_profusely": 94, "confident": 46, "fur_coat": 58, "winter_coat": 71, "teal_clothing": 117, "overcoat": 52, "relaxed_expression": 44}, "stage3_selected_phrase_ranks": {"anthro": 8, "blue_hair": 7, "vest": 8, "blushing_profusely": 4, "confident": 5, "fur_coat": 9, "winter_coat": 10, "teal_clothing": 9, "overcoat": 6, "relaxed_expression": 1}, "extra_evidence": {"<3": {"source": "probe"}, "blue_hair": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4164}, "blushing_profusely": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4371}, "confident": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5026}, "fur_coat": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4929}, "hair": {"source": "implied"}, "overcoat": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4981}, "relaxed_expression": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5032}, "teal_clothing": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4159}, "topwear": {"source": "implied"}, "vest": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4931}, "winter_coat": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4685}}, "structural": ["duo", "anthro", "clothed"], "probe": ["clothing", "anthro", "blush", "duo", "<3"], "t1": 3.51, "t2": 7.03, "t3": 55.27, "t3s": 5.85, "t3p": 4.92, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=146 entity=3 copyright_filtered=1 generic_char_to_general=0 unknown_type=2", "Stage3 general_chunk_0: attempt 1 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 2, \"why\": \"explicit\"}, {\"i\": 7, \"why\": \"explicit\"}, {\"i\": 11, \"why\": \"explicit\"}, {\"i\": 16, \"why\": \"weak_implied\"}, {\"i\": 17, \"why\": \"weak_implied\"}, {\"i\": 19, \"why\": \"weak_implied\"}, {\"i\": 20, \"why\": \"weak_implied\"}, {\"i\": 25, \"why\": \"weak_implied\"}, {\"i\": 26, \"why\": \"weak_implied\"}, {\"i\": 35, \"why\": \"weak_implied\"}, {\"i\": 36, \"why\": \"explicit\"}, {\"i\": 37, \"why\": \"other\"}, {\"i\": 38, \"why\": \"explicit\"}, {\"i\": 45, \"why\": \"weak_implied\"}, {\"i\": 46, \"why\": \"weak_implied\"}, {\"i\": 47, \"why\": \"weak_implied\"}, {\"i\": 48, \"why\": \"weak_implied\"}, {\"i\": 49, \"why\": \"weak_implied\"}, {\"i\": 50, \"why\": \"weak_implied\"}, {\"i\": 51, \"why\": \"weak_implied\"}, {\"i\": 52, \"why\": \"weak_implied\"}, {\"i\": 53, \"why\": \"weak_implied\"}, {\"i\": 54, \"why\": \"explicit\"}, {\"i\": 55, \"why\": \"weak_implied\"}, {\"i\": 56, \"why\": \"weak_implied\"}, {\"i\": 57, \"why\": \"weak_implied\"}, {\"i\": 58, \"why\": \"weak_implied\"}, {\"i\": 59, \"why\": \"weak_implied\"}, {\"i\": 60, \"why\": \"weak_implied\"}, {\"i\": 61, \"why\": \"weak_implied\"}, {\"i\": 62, \"why\": \"weak_implied\"}, {\"i\": 63, \"why\": \"weak_implied\"}, {\"i\": 64, \"why\": \"weak_implied\"}, {\"i\": 65, \"why\": \"weak_implied\"}, {\"i\": 66}]}. Got: 1 validation error for Stage3SelectionResponse\nselections.34.why\n Field required [type=missing, input_value={'i': 66}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE ", "Stage3 general_chunk_0: attempt 2 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 2, \"why\": \"explicit\"}, {\"i\": 7, \"why\": \"explicit\"}, {\"i\": 11, \"why\": \"explicit\"}, {\"i\": 15, \"why\": \"explicit\"}, {\"i\": 25, \"why\": \"explicit\"}, {\"i\": 5, \"why\": \"strong_implied\"}, {\"i\": 12, \"why\": \"strong_implied\"}, {\"i\": 68, \"why\": \"other\"}, {\"i\": 27, \"why\": \"style_or_meta\"}, {\"i\": 16, \"why\": \"weak_implied\"}, {\"i\": 20, \"why\": \"weak_implied\"}, {\"i\": 22, \"why\": \"weak_implied\"}, {\"i\": 45, \"why\": \"weak_implied\"}, {\"i\": 46, \"why\": \"weak_implied\"}, {\"i\": 1, \"why\": \"other\"}, {\"i\": 3, \"why\": \"other\"}, {\"i\": 6, \"why\": \"other\"}, {\"i\": 9, \"why\": \"other\"}, {\"i\": 10, \"why\": \"other\"}, {\"i\": 13, \"why\": \"other\"}, {\"i\": 17, \"why\": \"other\"}, {\"i\": 19, \"why\": \"other\"}, {\"i\": 23, \"why\": \"other\"}, {\"i\": 24, \"why\": \"other\"}, {\"i\": 30, \"why\": \"other\"}, {\"i\": 31, \"why\": \"other\"}, {\"i\": 33, \"why\": \"other\"}, {\"i\": 34, \"why\": \"other\"}, {\"i\": 35, \"why\": \"other\"}, {\"i\": 37, \"why\": \"other\"}, {\"i\": 38, \"why\": \"other\"}, {\"i\": 40, \"why\": \"other\"}, {\"i\": 41, \"why\": \"other\"}, {\"i\": 42, \"why\": \"other\"}, {\"i\": 43, \"why\": \"other\"}, {\"i\": 44, \"why\": \"other\"}, {\"i\": 47, \"why\": \"other\"}, {\"i\": 48, \"\": null}]}. Got: 1 validation error for Stage3SelectionResponse\nselections.37.why\n Field required [type=missing, input_value={'i': 48, '': None}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE ", "Stage3 general_chunk_0: attempt 3 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 2, \"why\": \"explicit\"}, {\"i\": 5, \"why\": \"explicit\"}, {\"i\": 7, \"why\": \"explicit\"}, {\"i\": 11, \"why\": \"explicit\"}, {\"i\": 16, \"why\": \"weak_implied\"}, {\"i\": 17, \"why\": \"weak_implied\"}, {\"i\": 19, \"why\": \"weak_implied\"}, {\"i\": 20, \"why\": \"weak_implied\"}, {\"i\": 25, \"why\": \"weak_implied\"}, {\"i\": 26, \"why\": \"weak_implied\"}, {\"i\": 27, \"why\": \"weak_implied\"}, {\"i\": 31, \"why\": \"weak_implied\"}, {\"i\": 35, \"why\": \"weak_implied\"}, {\"i\": 36, \"why\": \"explicit\"}, {\"i\": 37, \"why\": \"other\"}, {\"i\": 38, \"why\": \"explicit\"}, {\"i\": 45, \"why\": \"weak_implied\"}, {\"i\": 46, \"why\": \"weak_implied\"}, {\"i\": 47, \"why\": \"weak_implied\"}, {\"i\": 48, \"why\": \"weak_implied\"}, {\"i\": 49, \"why\": \"weak_implied\"}, {\"i\": 50, \"why\": \"weak_implied\"}, {\"i\": 51, \"why\": \"weak_implied\"}, {\"i\": 52, \"why\": \"weak_implied\"}, {\"i\": 53, \"why\": \"weak_implied\"}, {\"i\": 54, \"why\": \"explicit\"}, {\"i\": 55, \"why\": \"weak_implied\"}, {\"i\": 56, \"why\": \"weak_implied\"}, {\"i\": 57, \"why\": \"weak_implied\"}, {\"i\": 58, \"why\": \"weak_implied\"}, {\"i\": 59, \"why\": \"weak_implied\"}, {\"i\": 60, \"why\": \"weak_implied\"}, {\"i\": 61, \"why\": \"weak_implied\"}, {\"i\": 62, \"why\": \"weak_implied\"}, {\"i\": 63}]}. Got: 1 validation error for Stage3SelectionResponse\nselections.34.why\n Field required [type=missing, input_value={'i': 63}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE ", "Stage3 general_chunk_0: gave up after 3 attempts"]}
|
| 5 |
+
{"id": 1624724, "n_gt": 4, "n_retrieved": 111, "n_selected": 13, "n_implied": 1, "n_structural": 5, "n_probe": 3, "ret_R": 0.75, "P": 0.1538, "R": 0.5, "F1": 0.2353, "leaf_P": 0.1667, "leaf_R": 0.5, "leaf_F1": 0.25, "n_leaf_sel": 12, "n_leaf_gt": 4, "ret_P": 0.027, "sel_given_ret": 0.6667, "over_sel": 3.25, "why": {"explicit": 6, "strong_implied": 1}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 2, "calls_with_selection": 2, "calls_exhausted_retries": 0, "attempts_total": 2, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 2, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 27, "attempts_by_n_local": {"100": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}, "12": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.1538, "gen_R": 0.5, "gen_F1": 0.2353, "missed": ["smile", "tan_body"], "extra": ["ambiguous_gender", "anthro", "big_eyes", "feral", "floating", "looking_at_viewer", "nose", "nude", "red_spots", "spots", "toony"], "ground_truth_tags": ["red_nose", "smile", "solo", "tan_body"], "selected_tags": ["ambiguous_gender", "anthro", "big_eyes", "feral", "floating", "looking_at_viewer", "nose", "nude", "red_nose", "red_spots", "solo", "spots", "toony"], "stage3_selected": ["big_eyes", "floating", "nose", "red_nose", "red_spots", "toony", "white_background"], "stage3_selected_scores": {"white_background": 0.6138, "toony": 0.6021, "red_nose": 0.7501, "floating": 0.6519, "red_spots": 0.6068, "nose": 0.8607, "big_eyes": 0.6961}, "stage3_selected_ranks": {"white_background": 31, "toony": 35, "red_nose": 3, "floating": 16, "red_spots": 34, "nose": 2, "big_eyes": 6}, "stage3_selected_phrase_ranks": {"white_background": 1, "toony": 1, "red_nose": 1, "floating": 1, "red_spots": 10, "nose": 1, "big_eyes": 1}, "extra_evidence": {"ambiguous_gender": {"source": "structural"}, "anthro": {"source": "probe"}, "big_eyes": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.6961}, "feral": {"source": "structural"}, "floating": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6519}, "looking_at_viewer": {"source": "structural"}, "nose": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8607}, "nude": {"source": "structural"}, "red_spots": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6068}, "spots": {"source": "implied"}, "toony": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6021}}, "structural": ["solo", "feral", "ambiguous_gender", "nude", "looking_at_viewer"], "probe": ["simple_background", "anthro", "solo"], "t1": 2.03, "t2": 5.34, "t3": 22.63, "t3s": 3.79, "t3p": 7.0, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=112 entity=0 copyright_filtered=4 generic_char_to_general=0 unknown_type=5"]}
|
| 6 |
+
{"id": 1325009, "n_gt": 22, "n_retrieved": 192, "n_selected": 29, "n_implied": 9, "n_structural": 4, "n_probe": 3, "ret_R": 0.4545, "P": 0.4483, "R": 0.5909, "F1": 0.5098, "leaf_P": 0.1579, "leaf_R": 0.25, "leaf_F1": 0.1935, "n_leaf_sel": 19, "n_leaf_gt": 12, "ret_P": 0.0521, "sel_given_ret": 1.3, "over_sel": 1.32, "why": {"explicit": 15}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 2, "calls_with_selection": 2, "calls_exhausted_retries": 0, "attempts_total": 3, "attempt_errors": 1, "attempt_parse_fail": 0, "attempt_parse_ok": 2, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 63, "attempts_by_n_local": {"100": {"attempts": 2, "parse_ok": 1, "parse_fail": 0, "errors": 1}, "88": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.3333333333333333, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.4483, "gen_R": 0.5909, "gen_F1": 0.5098, "missed": ["chest_tuft", "hand_on_head", "muscular", "muscular_anthro", "muscular_male", "pantherine", "tiger", "topless", "tuft"], "extra": ["backlighting", "belly", "confident", "countershade_belly", "countershade_body", "eyes", "flexing_both_biceps", "flexing_muscles", "gesture", "light", "lighting", "raised_hand", "striped_body", "striped_fur", "warm_lighting", "white_chest"], "ground_truth_tags": ["anthro", "blue_eyes", "bottomwear", "chest_tuft", "clothed", "clothing", "countershading", "felid", "fur", "hand_on_head", "male", "mammal", "muscular", "muscular_anthro", "muscular_male", "pantherine", "shorts", "solo", "stripes", "tiger", "topless", "tuft"], "selected_tags": ["anthro", "backlighting", "belly", "blue_eyes", "bottomwear", "clothed", "clothing", "confident", "countershade_belly", "countershade_body", "countershading", "eyes", "felid", "flexing_both_biceps", "flexing_muscles", "fur", "gesture", "light", "lighting", "male", "mammal", "raised_hand", "shorts", "solo", "striped_body", "striped_fur", "stripes", "warm_lighting", "white_chest"], "stage3_selected": ["backlighting", "blue_eyes", "confident", "countershade_belly", "countershade_body", "eyes", "flexing_both_biceps", "flexing_muscles", "gesture", "raised_hand", "shorts", "striped_body", "striped_fur", "warm_lighting", "white_chest"], "stage3_selected_scores": {"blue_eyes": 0.5973, "shorts": 0.6091, "gesture": 0.6156, "striped_body": 0.6187, "striped_fur": 0.6688, "raised_hand": 0.7153, "backlighting": 0.5866, "confident": 0.5188, "white_chest": 0.9284, "countershade_body": 0.8712, "flexing_both_biceps": 0.5618, "countershade_belly": 0.835, "warm_lighting": 0.901, "flexing_muscles": 0.6008, "eyes": 0.9788}, "stage3_selected_ranks": {"blue_eyes": 75, "shorts": 70, "gesture": 64, "striped_body": 63, "striped_fur": 46, "raised_hand": 28, "backlighting": 77, "confident": 115, "white_chest": 2, "countershade_body": 4, "flexing_both_biceps": 84, "countershade_belly": 7, "warm_lighting": 3, "flexing_muscles": 72, "eyes": 1}, "stage3_selected_phrase_ranks": {"blue_eyes": 2, "shorts": 1, "gesture": 1, "striped_body": 1, "striped_fur": 2, "raised_hand": 1, "backlighting": 6, "confident": 1, "white_chest": 1, "countershade_body": 1, "flexing_both_biceps": 8, "countershade_belly": 2, "warm_lighting": 1, "flexing_muscles": 6, "eyes": 1}, "extra_evidence": {"backlighting": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5866}, "belly": {"source": "implied"}, "confident": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5188}, "countershade_belly": {"source": "stage3", "why": "explicit", "retrieval_score": 0.835}, "countershade_body": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8712}, "eyes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.9788}, "flexing_both_biceps": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5618}, "flexing_muscles": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6008}, "gesture": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6156}, "light": {"source": "implied"}, "lighting": {"source": "implied"}, "raised_hand": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7153}, "striped_body": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6187}, "striped_fur": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6688}, "warm_lighting": {"source": "stage3", "why": "explicit", "retrieval_score": 0.901}, "white_chest": {"source": "stage3", "why": "explicit", "retrieval_score": 0.9284}}, "structural": ["solo", "anthro", "male", "clothed"], "probe": ["anthro", "felid", "solo"], "t1": 3.19, "t2": 1.89, "t3": 43.33, "t3s": 3.06, "t3p": 5.17, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=188 entity=2 copyright_filtered=3 generic_char_to_general=0 unknown_type=2", "Stage3 general_chunk_0: attempt 1 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 1, \"why\": \"explicit\"}, {\"i\": 3, \"why\": \"explicit\"}, {\"i\": 4, \"why\": \"explicit\"}, {\"i\": 5, \"why\": \"weak_implied\"}, {\"i\": 7, \"why\": \"weak_implied\"}, {\"i\": 13, \"why\": \"explicit\"}, {\"i\": 14, \"why\": \"weak_implied\"}, {\"i\": 16, \"why\": \"weak_implied\"}, {\"i\": 17, \"why\": \"weak_implied\"}, {\"i\": 20, \"why\": \"explicit\"}, {\"i\": 21, \"why\": \"weak_implied\"}, {\"i\": 23, \"why\": \"style_or_meta\"}, {\"i\": 24, \"why\": \"weak_implied\"}, {\"i\": 26, \"why\": \"explicit\"}, {\"i\": 27, \"why\": \"explicit\"}, {\"i\": 28, \"why\": \"weak_implied\"}, {\"i\": 30, \"why\": \"explicit\"}, {\"i\": 31, \"why\": \"weak_implied\"}, {\"i\": 33, \"why\": \"other\"}, {\"i\": 35, \"why\": \"weak_implied\"}, {\"i\": 37, \"why\": \"explicit\"}, {\"i\": 39, \"why\": \"weak_implied\"}, {\"i\": 41, \"why\": \"weak_implied\"}, {\"i\": 43, \"why\": \"style_or_meta\"}, {\"i\": 45, \"why\": \"weak_implied\"}, {\"i\": 47, \"why\": \"weak_implied\"}, {\"i\": 49, \"why\": \"weak_implied\"}, {\"i\": 50, \"why\": \"explicit\"}, {\"i\": 67, \"why\": \"explicit\"}, {\"i\": 68, \"why\": \"weak_implied\"}, {\"i\": 70, \"why\": \"style_or_meta\"}, {\"i\": 71, \"why\": \"weak_implied\"}, {\"i\": 72, \"why\": \"weak_implied\"}, {\"i\": 74, \"why\": \"weak_implied\"}, {\"i\": 75, \"why\": \"weak_implied\"}, {}]}. Got: 2 validation errors for Stage3SelectionResponse\nselections.35.i\n Field required [type=missing, input_value={}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nselections.35.why\n Field required [type=missing, input_value={}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE "]}
|
| 7 |
+
{"id": 1023509, "n_gt": 13, "n_retrieved": 202, "n_selected": 7, "n_implied": 0, "n_structural": 5, "n_probe": 4, "ret_R": 0.6923, "P": 0.1429, "R": 0.0769, "F1": 0.1, "leaf_P": 0.1667, "leaf_R": 0.1667, "leaf_F1": 0.1667, "n_leaf_sel": 6, "n_leaf_gt": 6, "ret_P": 0.0446, "sel_given_ret": 0.1111, "over_sel": 0.54, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 2, "calls_with_selection": 0, "calls_exhausted_retries": 2, "attempts_total": 6, "attempt_errors": 6, "attempt_parse_fail": 0, "attempt_parse_ok": 0, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 0, "attempts_by_n_local": {"100": {"attempts": 3, "parse_ok": 0, "parse_fail": 0, "errors": 3}, "91": {"attempts": 3, "parse_ok": 0, "parse_fail": 0, "errors": 3}}, "attempt_failure_rate": 1.0, "call_exhaustion_rate": 1.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.1429, "gen_R": 0.0769, "gen_F1": 0.1, "missed": ["bovid", "caprine", "dialogue", "fur", "goat", "human", "lizard", "mammal", "reptile", "scalie", "white_body", "white_fur"], "extra": ["anthro", "clothed", "clothing", "duo", "group", "solo"], "ground_truth_tags": ["bovid", "caprine", "dialogue", "fur", "goat", "human", "lizard", "mammal", "reptile", "scalie", "text", "white_body", "white_fur"], "selected_tags": ["anthro", "clothed", "clothing", "duo", "group", "solo", "text"], "stage3_selected": [], "stage3_selected_scores": {}, "stage3_selected_ranks": {}, "stage3_selected_phrase_ranks": {}, "extra_evidence": {"anthro": {"source": "probe"}, "clothed": {"source": "structural"}, "clothing": {"source": "probe"}, "duo": {"source": "structural"}, "group": {"source": "structural"}, "solo": {"source": "structural"}}, "structural": ["solo", "duo", "group", "clothed", "text"], "probe": ["clothing", "anthro", "text", "group"], "t1": 1.89, "t2": 2.2, "t3": 68.08, "t3s": 1.55, "t3p": 1.68, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=191 entity=5 copyright_filtered=6 generic_char_to_general=2 unknown_type=2", "Stage3 general_chunk_0: attempt 1 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 2, \"why\": \"explicit\"}, {\"i\": 3, \"why\": \"explicit\"}, {\"i\": 6, \"why\": \"explicit\"}, {\"i\": 8, \"why\": \"explicit\"}, {\"i\": 13, \"why\": \"explicit\"}, {\"i\": 14, \"why\": \"explicit\"}, {\"i\": 22, \"why\": \"weak_implied\"}, {\"i\": 27, \"why\": \"weak_implied\"}, {\"i\": 31, \"why\": \"weak_implied\"}, {\"i\": 33, \"why\": \"weak_implied\"}, {\"i\": 36, \"why\": \"weak_implied\"}, {\"i\": 37, \"why\": \"weak_implied\"}, {\"i\": 39, \"why\": \"weak_implied\"}, {\"i\": 40, \"why\": \"weak_implied\"}, {\"i\": 42, \"why\": \"weak_implied\"}, {\"i\": 45, \"why\": \"weak_implied\"}, {\"i\": 46, \"why\": \"weak_implied\"}, {\"i\": 48, \"why\": \"weak_implied\"}, {\"i\": 50, \"why\": \"weak_implied\"}, {\"i\": 52, \"why\": \"weak_implied\"}, {\"i\": 55, \"why\": \"weak_implied\"}, {\"i\": 57, \"why\": \"weak_implied\"}, {\"i\": 59, \"why\": \"weak_implied\"}, {\"i\": 61, \"why\": \"weak_implied\"}, {\"i\": 62, \"why\": \"weak_implied\"}, {\"i\": 64, \"why\": \"weak_implied\"}, {\"i\": 66, \"why\": \"weak_implied\"}, {\"i\": 68, \"why\": \"weak_implied\"}, {\"i\": 70, \"why\": \"weak_implied\"}, {\"i\": 72, \"why\": \"weak_implied\"}, {\"i\": 74, \"why\": \"weak_implied\"}, {\"i\": 76, \"why\": \"weak_implied\"}, {\"i\": 80, \"why\": \"weak_implied\"}, {\"i\": 82, \"why\": \"weak_implied\"}, {\"i\": 84}]}. Got: 1 validation error for Stage3SelectionResponse\nselections.34.why\n Field required [type=missing, input_value={'i': 84}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE ", "Stage3 general_chunk_0: attempt 2 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 2, \"why\": \"explicit\"}, {\"i\": 3, \"why\": \"explicit\"}, {\"i\": 6, \"why\": \"explicit\"}, {\"i\": 8, \"why\": \"explicit\"}, {\"i\": 10, \"why\": \"explicit\"}, {\"i\": 13, \"why\": \"explicit\"}, {\"i\": 14, \"why\": \"explicit\"}, {\"i\": 18, \"why\": \"weak_implied\"}, {\"i\": 20, \"why\": \"weak_implied\"}, {\"i\": 22, \"why\": \"weak_implied\"}, {\"i\": 24, \"why\": \"weak_implied\"}, {\"i\": 26, \"why\": \"style_or_meta\"}, {\"i\": 30, \"why\": \"explicit\"}, {\"i\": 35, \"why\": \"explicit\"}, {\"i\": 39, \"why\": \"weak_implied\"}, {\"i\": 41, \"why\": \"weak_implied\"}, {\"i\": 43, \"why\": \"weak_implied\"}, {\"i\": 45, \"why\": \"weak_implied\"}, {\"i\": 46, \"why\": \"weak_implied\"}, {\"i\": 48, \"why\": \"weak_implied\"}, {\"i\": 50, \"why\": \"weak_implied\"}, {\"i\": 52, \"why\": \"weak_implied\"}, {\"i\": 54, \"why\": \"weak_implied\"}, {\"i\": 56, \"why\": \"weak_implied\"}, {\"i\": 58, \"why\": \"weak_implied\"}, {\"i\": 60, \"why\": \"weak_implied\"}, {\"i\": 62, \"why\": \"weak_implied\"}, {\"i\": 64, \"why\": \"weak_implied\"}, {\"i\": 66, \"why\": \"weak_implied\"}, {\"i\": 68, \"why\": \"weak_implied\"}, {\"i\": 70, \"why\": \"weak_implied\"}, {\"i\": 72, \"why\": \"weak_implied\"}, {\"i\": 74, \"why\": \"weak_implied\"}, {\"i\": 76, \"why\": \"weak_implied\"}, {\"i\": 78, \"why\": \"weak_implied\"}, {\"i\": 80, \"why\": \"weak_implied\"}, {\"i\": 82, \"why\": \"weak_implied\"}, {\"i\": 84}]}. Got: 1 validation error for Stage3SelectionResponse\nselections.37.why\n Field required [type=missing, input_value={'i': 84}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE ", "Stage3 general_chunk_0: attempt 3 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 2, \"why\": \"explicit\"}, {\"i\": 6, \"why\": \"explicit\"}, {\"i\": 8, \"why\": \"explicit\"}, {\"i\": 13, \"why\": \"explicit\"}, {\"i\": 14, \"why\": \"explicit\"}, {\"i\": 22, \"why\": \"weak_implied\"}, {\"i\": 31, \"why\": \"weak_implied\"}, {\"i\": 33, \"why\": \"weak_implied\"}, {\"i\": 36, \"why\": \"weak_implied\"}, {\"i\": 40, \"why\": \"weak_implied\"}, {\"i\": 43, \"why\": \"weak_implied\"}, {\"i\": 46, \"why\": \"weak_implied\"}, {\"i\": 50, \"why\": \"weak_implied\"}, {\"i\": 54, \"why\": \"weak_implied\"}, {\"i\": 55, \"why\": \"weak_implied\"}, {\"i\": 60, \"why\": \"weak_implied\"}, {\"i\": 62, \"why\": \"weak_implied\"}, {\"i\": 64, \"why\": \"weak_implied\"}, {\"i\": 66, \"why\": \"weak_implied\"}, {\"i\": 68, \"why\": \"weak_implied\"}, {\"i\": 70, \"why\": \"weak_implied\"}, {\"i\": 72, \"why\": \"weak_implied\"}, {\"i\": 74, \"why\": \"weak_implied\"}, {\"i\": 76, \"why\": \"weak_implied\"}, {\"i\": 80, \"why\": \"weak_implied\"}, {\"i\": 82, \"why\": \"weak_implied\"}, {\"i\": 84, \"why\": \"weak_implied\"}, {\"i\": 86, \"why\": \"weak_implied\"}, {\"i\": 88, \"why\": \"weak_implied\"}, {\"i\": 90, \"why\": \"weak_implied\"}, {\"i\": 92, \"why\": \"weak_implied\"}, {\"i\": 94, \"why\": \"weak_implied\"}, {\"i\": 96, \"why\": \"weak_implied\"}, {\"i\": 98, \"why\": \"weak_implied\"}, {}]}. Got: 2 validation errors for Stage3SelectionResponse\nselections.34.i\n Field required [type=missing, input_value={}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nselections.34.why\n Field required [type=missing, input_value={}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE ", "Stage3 general_chunk_0: gave up after 3 attempts", "Stage3 general_chunk_1: attempt 1 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 2, \"why\": \"explicit\"}, {\"i\": 5, \"why\": \"explicit\"}, {\"i\": 8, \"why\": \"explicit\"}, {\"i\": 9, \"why\": \"explicit\"}, {\"i\": 17, \"why\": \"explicit\"}, {\"i\": 19, \"why\": \"explicit\"}, {\"i\": 27, \"why\": \"weak_implied\"}, {\"i\": 29, \"why\": \"weak_implied\"}, {\"i\": 31, \"why\": \"weak_implied\"}, {\"i\": 33, \"why\": \"weak_implied\"}, {\"i\": 35, \"why\": \"weak_implied\"}, {\"i\": 37, \"why\": \"weak_implied\"}, {\"i\": 39, \"why\": \"weak_implied\"}, {\"i\": 41, \"why\": \"weak_implied\"}, {\"i\": 43, \"why\": \"weak_implied\"}, {\"i\": 45, \"why\": \"weak_implied\"}, {\"i\": 47, \"why\": \"weak_implied\"}, {\"i\": 49, \"why\": \"weak_implied\"}, {\"i\": 51, \"why\": \"weak_implied\"}, {\"i\": 53, \"why\": \"weak_implied\"}, {\"i\": 55, \"why\": \"weak_implied\"}, {\"i\": 57, \"why\": \"weak_implied\"}, {\"i\": 59, \"why\": \"weak_implied\"}, {\"i\": 61, \"why\": \"weak_implied\"}, {\"i\": 63, \"why\": \"weak_implied\"}, {\"i\": 65, \"why\": \"weak_implied\"}, {\"i\": 67, \"why\": \"weak_implied\"}, {\"i\": 69, \"why\": \"weak_implied\"}, {\"i\": 71, \"why\": \"weak_implied\"}, {\"i\": 73, \"why\": \"weak_implied\"}, {\"i\": 75, \"why\": \"weak_implied\"}, {\"i\": 77, \"why\": \"weak_implied\"}, {\"i\": 79, \"why\": \"weak_implied\"}, {\"i\": 81, \"why\": \"weak_implied\"}, {\"i\": 83}]}. Got: 1 validation error for Stage3SelectionResponse\nselections.34.why\n Field required [type=missing, input_value={'i': 83}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE ", "Stage3 general_chunk_1: attempt 2 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 2, \"why\": \"explicit\"}, {\"i\": 5, \"why\": \"explicit\"}, {\"i\": 8, \"why\": \"explicit\"}, {\"i\": 9, \"why\": \"explicit\"}, {\"i\": 13, \"why\": \"explicit\"}, {\"i\": 18, \"why\": \"explicit\"}, {\"i\": 19, \"why\": \"explicit\"}, {\"i\": 27, \"why\": \"weak_implied\"}, {\"i\": 29, \"why\": \"weak_implied\"}, {\"i\": 31, \"why\": \"weak_implied\"}, {\"i\": 33, \"why\": \"weak_implied\"}, {\"i\": 35, \"why\": \"weak_implied\"}, {\"i\": 37, \"why\": \"weak_implied\"}, {\"i\": 39, \"why\": \"weak_implied\"}, {\"i\": 41, \"why\": \"weak_implied\"}, {\"i\": 43, \"why\": \"weak_implied\"}, {\"i\": 45, \"why\": \"weak_implied\"}, {\"i\": 47, \"why\": \"weak_implied\"}, {\"i\": 49, \"why\": \"weak_implied\"}, {\"i\": 51, \"why\": \"weak_implied\"}, {\"i\": 53, \"why\": \"weak_implied\"}, {\"i\": 55, \"why\": \"weak_implied\"}, {\"i\": 57, \"why\": \"weak_implied\"}, {\"i\": 59, \"why\": \"weak_implied\"}, {\"i\": 61, \"why\": \"weak_implied\"}, {\"i\": 63, \"why\": \"weak_implied\"}, {\"i\": 65, \"why\": \"weak_implied\"}, {\"i\": 67, \"why\": \"weak_implied\"}, {\"i\": 69, \"why\": \"weak_implied\"}, {\"i\": 71, \"why\": \"weak_implied\"}, {\"i\": 73, \"why\": \"weak_implied\"}, {\"i\": 75, \"why\": \"weak_implied\"}, {\"i\": 77, \"why\": \"weak_implied\"}, {\"i\": 79, \"why\": \"weak_implied\"}, {\"i\": 81}]}. Got: 1 validation error for Stage3SelectionResponse\nselections.34.why\n Field required [type=missing, input_value={'i': 81}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE ", "Stage3 general_chunk_1: attempt 3 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 2, \"why\": \"explicit\"}, {\"i\": 5, \"why\": \"explicit\"}, {\"i\": 8, \"why\": \"explicit\"}, {\"i\": 9, \"why\": \"explicit\"}, {\"i\": 13, \"why\": \"explicit\"}, {\"i\": 18, \"why\": \"explicit\"}, {\"i\": 19, \"why\": \"explicit\"}, {\"i\": 27, \"why\": \"weak_implied\"}, {\"i\": 29, \"why\": \"weak_implied\"}, {\"i\": 31, \"why\": \"weak_implied\"}, {\"i\": 33, \"why\": \"weak_implied\"}, {\"i\": 35, \"why\": \"weak_implied\"}, {\"i\": 37, \"why\": \"weak_implied\"}, {\"i\": 39, \"why\": \"weak_implied\"}, {\"i\": 41, \"why\": \"weak_implied\"}, {\"i\": 43, \"why\": \"weak_implied\"}, {\"i\": 45, \"why\": \"weak_implied\"}, {\"i\": 47, \"why\": \"weak_implied\"}, {\"i\": 49, \"why\": \"weak_implied\"}, {\"i\": 51, \"why\": \"weak_implied\"}, {\"i\": 53, \"why\": \"weak_implied\"}, {\"i\": 55, \"why\": \"weak_implied\"}, {\"i\": 57, \"why\": \"weak_implied\"}, {\"i\": 59, \"why\": \"weak_implied\"}, {\"i\": 61, \"why\": \"weak_implied\"}, {\"i\": 63, \"why\": \"weak_implied\"}, {\"i\": 65, \"why\": \"weak_implied\"}, {\"i\": 67, \"why\": \"weak_implied\"}, {\"i\": 69, \"why\": \"weak_implied\"}, {\"i\": 71, \"why\": \"weak_implied\"}, {\"i\": 73, \"why\": \"weak_implied\"}, {\"i\": 75, \"why\": \"weak_implied\"}, {\"i\": 77, \"why\": \"weak_implied\"}, {\"i\": 79, \"why\": \"weak_implied\"}, {\"i\": 81}]}. Got: 1 validation error for Stage3SelectionResponse\nselections.34.why\n Field required [type=missing, input_value={'i': 81}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE ", "Stage3 general_chunk_1: gave up after 3 attempts"]}
|
| 8 |
+
{"id": 335343, "n_gt": 15, "n_retrieved": 191, "n_selected": 13, "n_implied": 3, "n_structural": 4, "n_probe": 4, "ret_R": 0.6667, "P": 0.3077, "R": 0.2667, "F1": 0.2857, "leaf_P": 0.3, "leaf_R": 0.25, "leaf_F1": 0.2727, "n_leaf_sel": 10, "n_leaf_gt": 12, "ret_P": 0.0524, "sel_given_ret": 0.4, "over_sel": 0.87, "why": {"explicit": 4}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 2, "calls_with_selection": 1, "calls_exhausted_retries": 1, "attempts_total": 4, "attempt_errors": 3, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 36, "attempts_by_n_local": {"100": {"attempts": 3, "parse_ok": 0, "parse_fail": 0, "errors": 3}, "94": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.75, "call_exhaustion_rate": 0.5}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.3077, "gen_R": 0.2667, "gen_F1": 0.2857, "missed": ["angry", "bed", "blonde_hair", "blue_eyes", "eyes_closed", "eyeshadow", "furniture", "green_eyes", "hair", "purple_hair", "sleeping"], "extra": ["anthro", "bedding", "blush", "clothed", "clothing", "humanoid", "lipstick", "red_lipstick", "x_eyes"], "ground_truth_tags": ["angry", "bed", "blonde_hair", "blue_eyes", "duo", "eyes_closed", "eyeshadow", "furniture", "green_eyes", "hair", "lying", "makeup", "purple_hair", "sleeping", "text"], "selected_tags": ["anthro", "bedding", "blush", "clothed", "clothing", "duo", "humanoid", "lipstick", "lying", "makeup", "red_lipstick", "text", "x_eyes"], "stage3_selected": ["bedding", "lying", "red_lipstick", "x_eyes"], "stage3_selected_scores": {"lying": 0.4413, "bedding": 0.3914, "red_lipstick": 0.4712, "x_eyes": 0.4222}, "stage3_selected_ranks": {"lying": 75, "bedding": 139, "red_lipstick": 53, "x_eyes": 91}, "stage3_selected_phrase_ranks": {"lying": 7, "bedding": 7, "red_lipstick": 5, "x_eyes": 5}, "extra_evidence": {"anthro": {"source": "probe"}, "bedding": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3914}, "blush": {"source": "probe"}, "clothed": {"source": "structural"}, "clothing": {"source": "implied"}, "humanoid": {"source": "structural"}, "lipstick": {"source": "implied"}, "red_lipstick": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4712}, "x_eyes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4222}}, "structural": ["duo", "humanoid", "clothed", "text"], "probe": ["simple_background", "anthro", "blush", "duo"], "t1": 2.63, "t2": 2.17, "t3": 66.86, "t3s": 1.42, "t3p": 0.61, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=194 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=4", "Stage3 general_chunk_0: attempt 1 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 2, \"why\": \"explicit\"}, {\"i\": 3, \"why\": \"explicit\"}, {\"i\": 4, \"why\": \"explicit\"}, {\"i\": 5, \"why\": \"other\"}, {\"i\": 6, \"why\": \"explicit\"}, {\"i\": 7, \"why\": \"other\"}, {\"i\": 10, \"why\": \"style_or_meta\"}, {\"i\": 12, \"why\": \"other\"}, {\"i\": 14, \"why\": \"explicit\"}, {\"i\": 16, \"why\": \"style_or_meta\"}, {\"i\": 17, \"why\": \"explicit\"}, {\"i\": 20, \"why\": \"other\"}, {\"i\": 22, \"why\": \"explicit\"}, {\"i\": 24, \"why\": \"other\"}, {\"i\": 26, \"why\": \"explicit\"}, {\"i\": 27, \"why\": \"other\"}, {\"i\": 28, \"why\": \"explicit\"}, {\"i\": 30, \"why\": \"explicit\"}, {\"i\": 31, \"why\": \"other\"}, {\"i\": 33, \"why\": \"other\"}, {\"i\": 34, \"why\": \"other\"}, {\"i\": 36, \"why\": \"other\"}, {\"i\": 38, \"why\": \"explicit\"}, {\"i\": 40, \"why\": \"other\"}, {\"i\": 42, \"why\": \"other\"}, {\"i\": 44, \"why\": \"other\"}, {\"i\": 46, \"why\": \"other\"}, {\"i\": 47, \"why\": \"style_or_meta\"}, {\"i\": 49, \"why\": \"explicit\"}, {\"i\": 50, \"why\": \"other\"}, {\"i\": 52, \"why\": \"other\"}, {\"i\": 54, \"why\": \"other\"}, {\"i\": 56, \"why\": \"other\"}, {\"i\": 58, \"why\": \"other\"}, {\"i\": 60, \"why\": \"other\"}, {\"i\": 62, \"why\": \"other\"}, {\"i\": 64, \"why\": \"other\"}, {\"i\": 66, \"why\": \"other\"}, {}]}. Got: 2 validation errors for Stage3SelectionResponse\nselections.38.i\n Field required [type=missing, input_value={}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nselections.38.why\n Field required [type=missing, input_value={}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE ", "Stage3 general_chunk_0: attempt 2 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 2, \"why\": \"explicit\"}, {\"i\": 3, \"why\": \"explicit\"}, {\"i\": 4, \"why\": \"explicit\"}, {\"i\": 5, \"why\": \"other\"}, {\"i\": 6, \"why\": \"explicit\"}, {\"i\": 7, \"why\": \"other\"}, {\"i\": 9, \"why\": \"explicit\"}, {\"i\": 10, \"why\": \"style_or_meta\"}, {\"i\": 12, \"why\": \"other\"}, {\"i\": 14, \"why\": \"explicit\"}, {\"i\": 16, \"why\": \"style_or_meta\"}, {\"i\": 17, \"why\": \"explicit\"}, {\"i\": 18, \"why\": \"other\"}, {\"i\": 19, \"why\": \"style_or_meta\"}, {\"i\": 20, \"why\": \"other\"}, {\"i\": 22, \"why\": \"explicit\"}, {\"i\": 24, \"why\": \"other\"}, {\"i\": 26, \"why\": \"explicit\"}, {\"i\": 27, \"why\": \"other\"}, {\"i\": 28, \"why\": \"explicit\"}, {\"i\": 30, \"why\": \"explicit\"}, {\"i\": 31, \"why\": \"explicit\"}, {\"i\": 33, \"why\": \"other\"}, {\"i\": 34, \"why\": \"other\"}, {\"i\": 36, \"why\": \"other\"}, {\"i\": 38, \"why\": \"explicit\"}, {\"i\": 40, \"why\": \"other\"}, {\"i\": 42, \"why\": \"other\"}, {\"i\": 44, \"why\": \"other\"}, {\"i\": 46, \"why\": \"other\"}, {\"i\": 47, \"why\": \"style_or_meta\"}, {\"i\": 49, \"why\": \"explicit\"}, {\"i\": 50, \"why\": \"other\"}, {\"i\": 52, \"why\": \"other\"}, {\"i\": 54, \"why\": \"other\"}, {\"i\": 56, \"why\": \"other\"}, {\"i\": 58, \"why\": \"other\"}, {\"i\": 60, \"why\": \"other\"}, {}]}. Got: 2 validation errors for Stage3SelectionResponse\nselections.38.i\n Field required [type=missing, input_value={}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nselections.38.why\n Field required [type=missing, input_value={}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE ", "Stage3 general_chunk_0: attempt 3 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 2, \"why\": \"explicit\"}, {\"i\": 3, \"why\": \"explicit\"}, {\"i\": 4, \"why\": \"explicit\"}, {\"i\": 5, \"why\": \"other\"}, {\"i\": 6, \"why\": \"explicit\"}, {\"i\": 7, \"why\": \"other\"}, {\"i\": 10, \"why\": \"style_or_meta\"}, {\"i\": 12, \"why\": \"other\"}, {\"i\": 14, \"why\": \"explicit\"}, {\"i\": 16, \"why\": \"style_or_meta\"}, {\"i\": 17, \"why\": \"explicit\"}, {\"i\": 20, \"why\": \"other\"}, {\"i\": 22, \"why\": \"explicit\"}, {\"i\": 24, \"why\": \"other\"}, {\"i\": 26, \"why\": \"explicit\"}, {\"i\": 27, \"why\": \"other\"}, {\"i\": 28, \"why\": \"explicit\"}, {\"i\": 30, \"why\": \"explicit\"}, {\"i\": 31, \"why\": \"explicit\"}, {\"i\": 33, \"why\": \"other\"}, {\"i\": 34, \"why\": \"other\"}, {\"i\": 36, \"why\": \"other\"}, {\"i\": 38, \"why\": \"explicit\"}, {\"i\": 40, \"why\": \"other\"}, {\"i\": 42, \"why\": \"other\"}, {\"i\": 44, \"why\": \"other\"}, {\"i\": 46, \"why\": \"other\"}, {\"i\": 47, \"why\": \"style_or_meta\"}, {\"i\": 49, \"why\": \"explicit\"}, {\"i\": 50, \"why\": \"other\"}, {\"i\": 52, \"why\": \"other\"}, {\"i\": 54, \"why\": \"other\"}, {\"i\": 56, \"why\": \"other\"}, {\"i\": 58, \"why\": \"other\"}, {\"i\": 60, \"why\": \"other\"}, {\"i\": 62, \"why\": \"other\"}, {\"i\": 64, \"why\": \"other\"}, {\"i\": 66, \"why\": \"style_or_meta\"}, {}]}. Got: 2 validation errors for Stage3SelectionResponse\nselections.38.i\n Field required [type=missing, input_value={}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nselections.38.why\n Field required [type=missing, input_value={}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE ", "Stage3 general_chunk_0: gave up after 3 attempts"]}
|
| 9 |
+
{"id": 17482, "n_gt": 22, "n_retrieved": 177, "n_selected": 41, "n_implied": 16, "n_structural": 3, "n_probe": 3, "ret_R": 0.5909, "P": 0.439, "R": 0.8182, "F1": 0.5714, "leaf_P": 0.3889, "leaf_R": 0.5385, "leaf_F1": 0.4516, "n_leaf_sel": 18, "n_leaf_gt": 13, "ret_P": 0.0734, "sel_given_ret": 1.3846, "over_sel": 1.86, "why": {"explicit": 22}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 2, "calls_with_selection": 2, "calls_exhausted_retries": 0, "attempts_total": 3, "attempt_errors": 1, "attempt_parse_fail": 0, "attempt_parse_ok": 2, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 64, "attempts_by_n_local": {"100": {"attempts": 2, "parse_ok": 1, "parse_fail": 0, "errors": 1}, "77": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.3333333333333333, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.439, "gen_R": 0.8182, "gen_F1": 0.5714, "missed": ["fur", "holding_musical_instrument", "holding_object", "music"], "extra": ["acoustic_guitar", "action_pose", "bottomwear", "canis", "denim", "denim_clothing", "domestic_dog", "flowing_hair", "holding_guitar", "hybrid", "jeans", "pants", "pastel_background", "playing_guitar", "playing_music", "pose", "posed", "spitz", "torn_bottomwear", "torn_jeans", "torn_pants", "wolf", "wolfdog"], "ground_truth_tags": ["anthro", "bass_guitar", "canid", "canine", "claws", "clothed", "clothing", "fingers", "fur", "guitar", "hair", "holding_musical_instrument", "holding_object", "mammal", "music", "musical_instrument", "plucked_string_instrument", "solo", "spade_tail", "string_instrument", "tail", "torn_clothing"], "selected_tags": ["acoustic_guitar", "action_pose", "anthro", "bass_guitar", "bottomwear", "canid", "canine", "canis", "claws", "clothed", "clothing", "denim", "denim_clothing", "domestic_dog", "fingers", "flowing_hair", "guitar", "hair", "holding_guitar", "hybrid", "jeans", "mammal", "musical_instrument", "pants", "pastel_background", "playing_guitar", "playing_music", "plucked_string_instrument", "pose", "posed", "solo", "spade_tail", "spitz", "string_instrument", "tail", "torn_bottomwear", "torn_clothing", "torn_jeans", "torn_pants", "wolf", "wolfdog"], "stage3_selected": ["acoustic_guitar", "action_pose", "bass_guitar", "canid", "claws", "fingers", "flowing_hair", "guitar", "holding_guitar", "pastel_background", "playing_guitar", "playing_music", "posed", "spade_tail", "spitz", "string_instrument", "torn_bottomwear", "torn_clothing", "torn_jeans", "torn_pants", "wolf", "wolfdog"], "stage3_selected_scores": {"canid": 0.422, "claws": 0.5637, "fingers": 0.4356, "wolf": 0.458, "torn_clothing": 0.4105, "spitz": 0.4401, "spade_tail": 0.6167, "string_instrument": 0.8611, "torn_bottomwear": 0.4339, "guitar": 0.9617, "action_pose": 0.5789, "torn_pants": 0.4619, "wolfdog": 0.4194, "playing_music": 0.8725, "playing_guitar": 0.9311, "torn_jeans": 0.481, "bass_guitar": 0.9112, "flowing_hair": 0.5655, "holding_guitar": 0.8441, "acoustic_guitar": 0.8647, "posed": 0.4462, "pastel_background": 0.56}, "stage3_selected_ranks": {"canid": 105, "claws": 20, "fingers": 82, "wolf": 60, "torn_clothing": 114, "spitz": 78, "spade_tail": 13, "string_instrument": 8, "torn_bottomwear": 84, "guitar": 1, "action_pose": 15, "torn_pants": 54, "wolfdog": 109, "playing_music": 4, "playing_guitar": 2, "torn_jeans": 43, "bass_guitar": 3, "flowing_hair": 19, "holding_guitar": 11, "acoustic_guitar": 7, "posed": 72, "pastel_background": 22}, "stage3_selected_phrase_ranks": {"canid": 7, "claws": 1, "fingers": 1, "wolf": 1, "torn_clothing": 6, "spitz": 7, "spade_tail": 1, "string_instrument": 7, "torn_bottomwear": 3, "guitar": 1, "action_pose": 1, "torn_pants": 2, "wolfdog": 8, "playing_music": 3, "playing_guitar": 1, "torn_jeans": 1, "bass_guitar": 2, "flowing_hair": 1, "holding_guitar": 10, "acoustic_guitar": 5, "posed": 7, "pastel_background": 1}, "extra_evidence": {"acoustic_guitar": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8647}, "action_pose": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5789}, "bottomwear": {"source": "implied"}, "canis": {"source": "implied"}, "denim": {"source": "implied"}, "denim_clothing": {"source": "implied"}, "domestic_dog": {"source": "implied"}, "flowing_hair": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5655}, "holding_guitar": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8441}, "hybrid": {"source": "implied"}, "jeans": {"source": "implied"}, "pants": {"source": "implied"}, "pastel_background": {"source": "stage3", "why": "explicit", "retrieval_score": 0.56}, "playing_guitar": {"source": "stage3", "why": "explicit", "retrieval_score": 0.9311}, "playing_music": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8725}, "pose": {"source": "implied"}, "posed": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4462}, "spitz": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4401}, "torn_bottomwear": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4339}, "torn_jeans": {"source": "stage3", "why": "explicit", "retrieval_score": 0.481}, "torn_pants": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4619}, "wolf": {"source": "stage3", "why": "explicit", "retrieval_score": 0.458}, "wolfdog": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4194}}, "structural": ["solo", "anthro", "clothed"], "probe": ["anthro", "canid", "solo"], "t1": 2.63, "t2": 2.03, "t3": 31.06, "t3s": 0.91, "t3p": 1.4, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=177 entity=2 copyright_filtered=4 generic_char_to_general=0 unknown_type=2", "Stage3 general_chunk_0: attempt 1 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 1, \"why\": \"weak_implied\"}, {\"i\": 5, \"why\": \"explicit\"}, {\"i\": 6, \"why\": \"weak_implied\"}, {\"i\": 7, \"why\": \"weak_implied\"}, {\"i\": 9, \"why\": \"explicit\"}, {\"i\": 11, \"why\": \"explicit\"}, {\"i\": 12, \"why\": \"explicit\"}, {\"i\": 13, \"why\": \"explicit\"}, {\"i\": 16, \"why\": \"explicit\"}, {\"i\": 17, \"why\": \"explicit\"}, {\"i\": 19, \"why\": \"explicit\"}, {\"i\": 21, \"why\": \"weak_implied\"}, {\"i\": 22, \"why\": \"weak_implied\"}, {\"i\": 23, \"why\": \"weak_implied\"}, {\"i\": 25, \"why\": \"weak_implied\"}, {\"i\": 27, \"why\": \"weak_implied\"}, {\"i\": 31, \"why\": \"other\"}, {\"i\": 33, \"why\": \"other\"}, {\"i\": 35, \"why\": \"weak_implied\"}, {\"i\": 36, \"why\": \"explicit\"}, {\"i\": 37, \"why\": \"weak_implied\"}, {\"i\": 38, \"why\": \"weak_implied\"}, {\"i\": 40, \"why\": \"weak_implied\"}, {\"i\": 41, \"why\": \"other\"}, {\"i\": 43, \"why\": \"weak_implied\"}, {\"i\": 44, \"why\": \"weak_implied\"}, {\"i\": 45, \"why\": \"weak_implied\"}, {\"i\": 47, \"why\": \"style_or_meta\"}, {\"i\": 51, \"why\": \"weak_implied\"}, {\"i\": 53, \"why\": \"weak_implied\"}, {\"i\": 55, \"why\": \"weak_implied\"}, {\"i\": 57, \"why\": \"explicit\"}, {\"i\": 58, \"why\": \"weak_implied\"}, {\"i\": 59, \"why\": \"weak_implied\"}, {\"i\": 61, \"why\": \"explicit\"}, {\"i\": 62, \"\": null}]}. Got: 1 validation error for Stage3SelectionResponse\nselections.35.why\n Field required [type=missing, input_value={'i': 62, '': None}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE "]}
|
| 10 |
+
{"id": 2021552, "n_gt": 25, "n_retrieved": 161, "n_selected": 26, "n_implied": 15, "n_structural": 4, "n_probe": 3, "ret_R": 0.64, "P": 0.4615, "R": 0.48, "F1": 0.4706, "leaf_P": 0.4, "leaf_R": 0.2667, "leaf_F1": 0.32, "n_leaf_sel": 10, "n_leaf_gt": 15, "ret_P": 0.0994, "sel_given_ret": 0.75, "over_sel": 1.04, "why": {"explicit": 8}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 2, "calls_with_selection": 2, "calls_exhausted_retries": 0, "attempts_total": 3, "attempt_errors": 1, "attempt_parse_fail": 0, "attempt_parse_ok": 2, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 74, "attempts_by_n_local": {"100": {"attempts": 2, "parse_ok": 1, "parse_fail": 0, "errors": 1}, "61": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.3333333333333333, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.4615, "gen_R": 0.48, "gen_F1": 0.4706, "missed": ["claws", "crossed_arms", "facial_markings", "fur", "grey_background", "head_markings", "lagomorph", "leporid", "looking_at_another", "markings", "overalls", "rabbit", "standing"], "extra": ["black_bottomwear", "black_clothing", "black_pants", "blue_clothing", "blue_overalls", "blue_shirt", "blue_topwear", "holding_object", "holding_tool", "looking_at_viewer", "tools", "white_clothing", "white_shirt", "white_topwear"], "ground_truth_tags": ["anthro", "bottomwear", "canid", "canine", "claws", "clothed", "clothing", "crossed_arms", "duo", "facial_markings", "fox", "fur", "grey_background", "head_markings", "lagomorph", "leporid", "looking_at_another", "mammal", "markings", "overalls", "pants", "rabbit", "shirt", "standing", "topwear"], "selected_tags": ["anthro", "black_bottomwear", "black_clothing", "black_pants", "blue_clothing", "blue_overalls", "blue_shirt", "blue_topwear", "bottomwear", "canid", "canine", "clothed", "clothing", "duo", "fox", "holding_object", "holding_tool", "looking_at_viewer", "mammal", "pants", "shirt", "tools", "topwear", "white_clothing", "white_shirt", "white_topwear"], "stage3_selected": ["black_pants", "blue_overalls", "blue_shirt", "fox", "holding_tool", "shirt", "simple_background", "white_shirt"], "stage3_selected_scores": {"simple_background": 0.4161, "fox": 0.638, "shirt": 0.7484, "white_shirt": 0.8198, "black_pants": 0.8331, "blue_shirt": 0.7656, "holding_tool": 0.5163, "blue_overalls": 0.9203}, "stage3_selected_ranks": {"simple_background": 163, "fox": 47, "shirt": 18, "white_shirt": 4, "black_pants": 3, "blue_shirt": 7, "holding_tool": 124, "blue_overalls": 1}, "stage3_selected_phrase_ranks": {"simple_background": 8, "fox": 1, "shirt": 1, "white_shirt": 1, "black_pants": 1, "blue_shirt": 3, "holding_tool": 10, "blue_overalls": 1}, "extra_evidence": {"black_bottomwear": {"source": "implied"}, "black_clothing": {"source": "implied"}, "black_pants": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8331}, "blue_clothing": {"source": "implied"}, "blue_overalls": {"source": "stage3", "why": "explicit", "retrieval_score": 0.9203}, "blue_shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7656}, "blue_topwear": {"source": "implied"}, "holding_object": {"source": "implied"}, "holding_tool": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5163}, "looking_at_viewer": {"source": "structural"}, "tools": {"source": "implied"}, "white_clothing": {"source": "implied"}, "white_shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8198}, "white_topwear": {"source": "implied"}}, "structural": ["duo", "anthro", "clothed", "looking_at_viewer"], "probe": ["simple_background", "anthro", "duo"], "t1": 2.36, "t2": 1.82, "t3": 49.39, "t3s": 1.59, "t3p": 5.34, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=161 entity=5 copyright_filtered=0 generic_char_to_general=0 unknown_type=3", "Stage3 general_chunk_0: attempt 1 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 1, \"why\": \"explicit\"}, {\"i\": 3, \"why\": \"explicit\"}, {\"i\": 5, \"why\": \"explicit\"}, {\"i\": 7, \"why\": \"explicit\"}, {\"i\": 9, \"why\": \"explicit\"}, {\"i\": 10, \"why\": \"explicit\"}, {\"i\": 12, \"why\": \"weak_implied\"}, {\"i\": 14, \"why\": \"explicit\"}, {\"i\": 17, \"why\": \"weak_implied\"}, {\"i\": 19, \"why\": \"explicit\"}, {\"i\": 20, \"why\": \"explicit\"}, {\"i\": 22, \"why\": \"other\"}, {\"i\": 24, \"why\": \"weak_implied\"}, {\"i\": 26, \"why\": \"other\"}, {\"i\": 28, \"why\": \"weak_implied\"}, {\"i\": 30, \"why\": \"other\"}, {\"i\": 32, \"why\": \"weak_implied\"}, {\"i\": 34, \"why\": \"explicit\"}, {\"i\": 36, \"why\": \"weak_implied\"}, {\"i\": 38, \"why\": \"weak_implied\"}, {\"i\": 40, \"why\": \"weak_implied\"}, {\"i\": 42, \"why\": \"other\"}, {\"i\": 44, \"why\": \"weak_implied\"}, {\"i\": 46, \"why\": \"other\"}, {\"i\": 48, \"why\": \"weak_implied\"}, {\"i\": 50, \"why\": \"other\"}, {\"i\": 52, \"why\": \"weak_implied\"}, {\"i\": 54, \"why\": \"weak_implied\"}, {\"i\": 56, \"why\": \"weak_implied\"}, {\"i\": 58, \"why\": \"style_or_meta\"}, {\"i\": 60, \"why\": \"weak_implied\"}, {\"i\": 62, \"why\": \"weak_implied\"}, {\"i\": 64, \"why\": \"weak_implied\"}, {\"i\": 66, \"why\": \"weak_implied\"}, {\"i\": 68, \"why\": \"weak_implied\"}, {\"i\": 70}]}. Got: 1 validation error for Stage3SelectionResponse\nselections.35.why\n Field required [type=missing, input_value={'i': 70}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE "]}
|
| 11 |
+
{"id": 2034167, "n_gt": 11, "n_retrieved": 183, "n_selected": 64, "n_implied": 27, "n_structural": 4, "n_probe": 3, "ret_R": 0.6364, "P": 0.0781, "R": 0.4545, "F1": 0.1333, "leaf_P": 0.0294, "leaf_R": 0.1429, "leaf_F1": 0.0488, "n_leaf_sel": 34, "n_leaf_gt": 7, "ret_P": 0.0383, "sel_given_ret": 0.7143, "over_sel": 5.82, "why": {"explicit": 32}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 2, "calls_with_selection": 1, "calls_exhausted_retries": 1, "attempts_total": 4, "attempt_errors": 3, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 32, "attempts_by_n_local": {"100": {"attempts": 3, "parse_ok": 0, "parse_fail": 0, "errors": 3}, "86": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.75, "call_exhaustion_rate": 0.5}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.0781, "gen_R": 0.4545, "gen_F1": 0.1333, "missed": ["blue_eyes", "blue_nose", "open_mouth", "purple_body", "white_body", "white_fur"], "extra": ["amphibian", "amphibian_humanoid", "animal_humanoid", "anthro", "blinking", "blue_ears", "blue_pawpads", "blue_paws", "blue_stripes", "body_hair", "border", "canid_humanoid", "canine_humanoid", "clothed", "clothing", "corpse", "countershading", "crosslegged_pose", "curled_up", "facial_markings", "fennec_humanoid", "fetal_pose", "fighting_pose", "fox_humanoid", "frog_humanoid", "glistening", "glistening_nose", "grey_nose", "head_markings", "heterochromia", "humanoid", "male", "male_humanoid", "mammal_humanoid", "markings", "mouth_full", "muscular", "muscular_male", "pawpads", "pink_body", "pink_countershading", "pose", "purple_border", "purple_tongue", "striped_face", "stripes", "swinging", "tail", "tail_tuft", "tan_body", "tan_fur", "tan_tail", "tanuki_humanoid", "tongue", "tuft", "two_tone_tail", "wavy_tail", "white_stripes", "white_tail"], "ground_truth_tags": ["blue_eyes", "blue_nose", "canid", "canine", "fur", "mammal", "open_mouth", "purple_body", "solo", "white_body", "white_fur"], "selected_tags": ["amphibian", "amphibian_humanoid", "animal_humanoid", "anthro", "blinking", "blue_ears", "blue_pawpads", "blue_paws", "blue_stripes", "body_hair", "border", "canid", "canid_humanoid", "canine", "canine_humanoid", "clothed", "clothing", "corpse", "countershading", "crosslegged_pose", "curled_up", "facial_markings", "fennec_humanoid", "fetal_pose", "fighting_pose", "fox_humanoid", "frog_humanoid", "fur", "glistening", "glistening_nose", "grey_nose", "head_markings", "heterochromia", "humanoid", "male", "male_humanoid", "mammal", "mammal_humanoid", "markings", "mouth_full", "muscular", "muscular_male", "pawpads", "pink_body", "pink_countershading", "pose", "purple_border", "purple_tongue", "solo", "striped_face", "stripes", "swinging", "tail", "tail_tuft", "tan_body", "tan_fur", "tan_tail", "tanuki_humanoid", "tongue", "tuft", "two_tone_tail", "wavy_tail", "white_stripes", "white_tail"], "stage3_selected": ["blinking", "blue_ears", "blue_pawpads", "blue_paws", "blue_stripes", "body_hair", "corpse", "crosslegged_pose", "fennec_humanoid", "fetal_pose", "fighting_pose", "frog_humanoid", "glistening_nose", "grey_nose", "heterochromia", "male_humanoid", "mouth_full", "muscular_male", "pink_countershading", "purple_border", "purple_tongue", "striped_face", "swinging", "tail_tuft", "tan_fur", "tan_tail", "tanuki_humanoid", "tongue", "two_tone_tail", "wavy_tail", "white_stripes", "white_tail"], "stage3_selected_scores": {"tongue": 0.3351, "muscular_male": 0.2998, "tan_fur": 0.4046, "body_hair": 0.2971, "tail_tuft": 0.4789, "white_tail": 0.4815, "heterochromia": 0.4217, "two_tone_tail": 0.4801, "blue_pawpads": 0.4689, "tan_tail": 0.4917, "blue_ears": 0.4793, "purple_tongue": 0.4737, "grey_nose": 0.4276, "glistening_nose": 0.4234, "blue_stripes": 0.5367, "corpse": 0.3037, "fighting_pose": 0.4379, "white_stripes": 0.532, "purple_border": 0.513, "blue_paws": 0.4653, "striped_face": 0.5338, "tanuki_humanoid": 0.77, "wavy_tail": 0.4877, "frog_humanoid": 0.5239, "pink_countershading": 0.493, "male_humanoid": 0.5449, "crosslegged_pose": 0.4261, "swinging": 0.3357, "blinking": 0.3357, "fetal_pose": 0.4288, "mouth_full": 0.4434, "fennec_humanoid": 0.7856}, "stage3_selected_ranks": {"tongue": 183, "muscular_male": 189, "tan_fur": 148, "body_hair": 190, "tail_tuft": 91, "white_tail": 85, "heterochromia": 142, "two_tone_tail": 88, "blue_pawpads": 98, "tan_tail": 71, "blue_ears": 89, "purple_tongue": 94, "grey_nose": 138, "glistening_nose": 141, "blue_stripes": 47, "corpse": 188, "fighting_pose": 128, "white_stripes": 51, "purple_border": 63, "blue_paws": 101, "striped_face": 50, "tanuki_humanoid": 7, "wavy_tail": 75, "frog_humanoid": 54, "pink_countershading": 70, "male_humanoid": 43, "crosslegged_pose": 139, "swinging": 181, "blinking": 182, "fetal_pose": 137, "mouth_full": 126, "fennec_humanoid": 6}, "stage3_selected_phrase_ranks": {"tongue": 10, "muscular_male": 9, "tan_fur": 8, "body_hair": 10, "tail_tuft": 3, "white_tail": 8, "heterochromia": 6, "two_tone_tail": 9, "blue_pawpads": 6, "tan_tail": 6, "blue_ears": 7, "purple_tongue": 9, "grey_nose": 7, "glistening_nose": 9, "blue_stripes": 2, "corpse": 8, "fighting_pose": 5, "white_stripes": 4, "purple_border": 6, "blue_paws": 8, "striped_face": 6, "tanuki_humanoid": 7, "wavy_tail": 7, "frog_humanoid": 10, "pink_countershading": 9, "male_humanoid": 9, "crosslegged_pose": 10, "swinging": 8, "blinking": 9, "fetal_pose": 9, "mouth_full": 9, "fennec_humanoid": 6}, "extra_evidence": {"amphibian": {"source": "implied"}, "amphibian_humanoid": {"source": "implied"}, "animal_humanoid": {"source": "implied"}, "anthro": {"source": "structural"}, "blinking": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3357}, "blue_ears": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4793}, "blue_pawpads": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4689}, "blue_paws": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4653}, "blue_stripes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5367}, "body_hair": {"source": "stage3", "why": "explicit", "retrieval_score": 0.2971}, "border": {"source": "implied"}, "canid_humanoid": {"source": "implied"}, "canine_humanoid": {"source": "implied"}, "clothed": {"source": "structural"}, "clothing": {"source": "implied"}, "corpse": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3037}, "countershading": {"source": "implied"}, "crosslegged_pose": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4261}, "curled_up": {"source": "implied"}, "facial_markings": {"source": "implied"}, "fennec_humanoid": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7856}, "fetal_pose": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4288}, "fighting_pose": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4379}, "fox_humanoid": {"source": "implied"}, "frog_humanoid": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5239}, "glistening": {"source": "implied"}, "glistening_nose": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4234}, "grey_nose": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4276}, "head_markings": {"source": "implied"}, "heterochromia": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4217}, "humanoid": {"source": "implied"}, "male": {"source": "structural"}, "male_humanoid": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5449}, "mammal_humanoid": {"source": "implied"}, "markings": {"source": "implied"}, "mouth_full": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4434}, "muscular": {"source": "implied"}, "muscular_male": {"source": "stage3", "why": "explicit", "retrieval_score": 0.2998}, "pawpads": {"source": "implied"}, "pink_body": {"source": "implied"}, "pink_countershading": {"source": "stage3", "why": "explicit", "retrieval_score": 0.493}, "pose": {"source": "implied"}, "purple_border": {"source": "stage3", "why": "explicit", "retrieval_score": 0.513}, "purple_tongue": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4737}, "striped_face": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5338}, "stripes": {"source": "implied"}, "swinging": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3357}, "tail": {"source": "implied"}, "tail_tuft": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4789}, "tan_body": {"source": "implied"}, "tan_fur": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4046}, "tan_tail": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4917}, "tanuki_humanoid": {"source": "stage3", "why": "explicit", "retrieval_score": 0.77}, "tongue": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3351}, "tuft": {"source": "implied"}, "two_tone_tail": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4801}, "wavy_tail": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4877}, "white_stripes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.532}, "white_tail": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4815}}, "structural": ["solo", "anthro", "male", "clothed"], "probe": ["anthro", "canid", "solo"], "t1": 1.95, "t2": 1.8, "t3": 64.08, "t3s": 1.95, "t3p": 4.82, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=186 entity=2 copyright_filtered=2 generic_char_to_general=4 unknown_type=12", "Stage3 general_chunk_0: attempt 1 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 1, \"why\": \"explicit\"}, {\"i\": 2, \"why\": \"explicit\"}, {\"i\": 3, \"why\": \"weak_implied\"}, {\"i\": 4, \"why\": \"explicit\"}, {\"i\": 5, \"why\": \"explicit\"}, {\"i\": 6, \"why\": \"weak_implied\"}, {\"i\": 7, \"why\": \"explicit\"}, {\"i\": 11, \"why\": \"explicit\"}, {\"i\": 12, \"why\": \"weak_implied\"}, {\"i\": 13, \"why\": \"explicit\"}, {\"i\": 16, \"why\": \"explicit\"}, {\"i\": 17, \"why\": \"weak_implied\"}, {\"i\": 19, \"why\": \"explicit\"}, {\"i\": 21, \"why\": \"explicit\"}, {\"i\": 22, \"why\": \"explicit\"}, {\"i\": 23, \"why\": \"weak_implied\"}, {\"i\": 25, \"why\": \"other\"}, {\"i\": 26, \"why\": \"weak_implied\"}, {\"i\": 27, \"why\": \"weak_implied\"}, {\"i\": 29, \"why\": \"weak_implied\"}, {\"i\": 31, \"why\": \"explicit\"}, {\"i\": 33, \"why\": \"weak_implied\"}, {\"i\": 35, \"why\": \"weak_implied\"}, {\"i\": 36, \"why\": \"weak_implied\"}, {\"i\": 38, \"why\": \"weak_implied\"}, {\"i\": 40, \"why\": \"weak_implied\"}, {\"i\": 42, \"why\": \"style_or_meta\"}, {\"i\": 44, \"why\": \"weak_implied\"}, {\"i\": 46, \"why\": \"weak_implied\"}, {\"i\": 47, \"why\": \"explicit\"}, {\"i\": 49, \"why\": \"weak_implied\"}, {\"i\": 51, \"why\": \"weak_implied\"}, {\"i\": 53, \"why\": \"weak_implied\"}, {\"i\": 55, \"why\": \"weak_implied\"}, {\"i\": 57, \"why\": \"weak_implied\"}, {\"i\": 59, \"\": null}]}. Got: 1 validation error for Stage3SelectionResponse\nselections.35.why\n Field required [type=missing, input_value={'i': 59, '': None}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE ", "Stage3 general_chunk_0: attempt 2 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 1, \"why\": \"explicit\"}, {\"i\": 2, \"why\": \"explicit\"}, {\"i\": 3, \"why\": \"weak_implied\"}, {\"i\": 4, \"why\": \"explicit\"}, {\"i\": 5, \"why\": \"explicit\"}, {\"i\": 6, \"why\": \"weak_implied\"}, {\"i\": 7, \"why\": \"explicit\"}, {\"i\": 9, \"why\": \"explicit\"}, {\"i\": 10, \"why\": \"explicit\"}, {\"i\": 11, \"why\": \"explicit\"}, {\"i\": 12, \"why\": \"weak_implied\"}, {\"i\": 13, \"why\": \"explicit\"}, {\"i\": 16, \"why\": \"explicit\"}, {\"i\": 17, \"why\": \"weak_implied\"}, {\"i\": 19, \"why\": \"explicit\"}, {\"i\": 21, \"why\": \"weak_implied\"}, {\"i\": 22, \"why\": \"explicit\"}, {\"i\": 23, \"why\": \"weak_implied\"}, {\"i\": 25, \"why\": \"other\"}, {\"i\": 26, \"why\": \"weak_implied\"}, {\"i\": 27, \"why\": \"weak_implied\"}, {\"i\": 29, \"why\": \"weak_implied\"}, {\"i\": 31, \"why\": \"weak_implied\"}, {\"i\": 33, \"why\": \"weak_implied\"}, {\"i\": 35, \"why\": \"weak_implied\"}, {\"i\": 37, \"why\": \"weak_implied\"}, {\"i\": 39, \"why\": \"weak_implied\"}, {\"i\": 41, \"why\": \"weak_implied\"}, {\"i\": 43, \"why\": \"other\"}, {\"i\": 45, \"why\": \"weak_implied\"}, {\"i\": 47, \"why\": \"explicit\"}, {\"i\": 49, \"why\": \"weak_implied\"}, {\"i\": 51, \"why\": \"explicit\"}, {\"i\": 53, \"why\": \"weak_implied\"}, {\"i\": 55, \"why\": \"weak_implied\"}, {\"i\": 57}]}. Got: 1 validation error for Stage3SelectionResponse\nselections.35.why\n Field required [type=missing, input_value={'i': 57}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE ", "Stage3 general_chunk_0: attempt 3 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 1, \"why\": \"explicit\"}, {\"i\": 2, \"why\": \"explicit\"}, {\"i\": 3, \"why\": \"weak_implied\"}, {\"i\": 4, \"why\": \"explicit\"}, {\"i\": 5, \"why\": \"explicit\"}, {\"i\": 6, \"why\": \"weak_implied\"}, {\"i\": 7, \"why\": \"explicit\"}, {\"i\": 11, \"why\": \"explicit\"}, {\"i\": 12, \"why\": \"weak_implied\"}, {\"i\": 13, \"why\": \"explicit\"}, {\"i\": 16, \"why\": \"explicit\"}, {\"i\": 17, \"why\": \"weak_implied\"}, {\"i\": 19, \"why\": \"explicit\"}, {\"i\": 21, \"why\": \"explicit\"}, {\"i\": 22, \"why\": \"explicit\"}, {\"i\": 25, \"why\": \"other\"}, {\"i\": 26, \"why\": \"weak_implied\"}, {\"i\": 29, \"why\": \"weak_implied\"}, {\"i\": 31, \"why\": \"other\"}, {\"i\": 33, \"why\": \"weak_implied\"}, {\"i\": 35, \"why\": \"weak_implied\"}, {\"i\": 37, \"why\": \"weak_implied\"}, {\"i\": 39, \"why\": \"weak_implied\"}, {\"i\": 41, \"why\": \"weak_implied\"}, {\"i\": 43, \"why\": \"other\"}, {\"i\": 45, \"why\": \"weak_implied\"}, {\"i\": 47, \"why\": \"other\"}, {\"i\": 49, \"why\": \"weak_implied\"}, {\"i\": 51, \"why\": \"weak_implied\"}, {\"i\": 53, \"why\": \"weak_implied\"}, {\"i\": 55, \"why\": \"weak_implied\"}, {\"i\": 57, \"why\": \"weak_implied\"}, {\"i\": 59, \"why\": \"weak_implied\"}, {\"i\": 61, \"why\": \"weak_implied\"}, {\"i\": 63, \"why\": \"weak_implied\"}, {\"i\": 65}]}. Got: 1 validation error for Stage3SelectionResponse\nselections.35.why\n Field required [type=missing, input_value={'i': 65}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE ", "Stage3 general_chunk_0: gave up after 3 attempts"]}
|
data/eval_results/latency_chunk60_k6_seed42.jsonl
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"_meta": true, "timestamp": "2026-03-02T05:59:20.226851", "n_samples": 10, "caption_field": "caption_cogvlm", "skip_rewrite": false, "allow_nsfw": false, "mode": "chunked_map_union", "chunk_size": 60, "eval_path": "data/eval_samples/e621_sfw_sample_1000_seed123_buffer10000_caption_evident.jsonl", "per_phrase_k": 2, "per_phrase_final_k": 6, "temperature": 0.0, "shuffle": false, "seed": 42, "workers": 4, "min_why": "strong_implied", "expand_implications": true, "infer_structural": true, "infer_probe": true, "n_errors": 0, "n_issue_samples": 10, "n_issues_total": 27}
|
| 2 |
+
{"id": 3285630, "n_gt": 12, "n_retrieved": 6, "n_selected": 9, "n_implied": 1, "n_structural": 4, "n_probe": 5, "ret_R": 0.0, "P": 0.7778, "R": 0.5833, "F1": 0.6667, "leaf_P": 0.5714, "leaf_R": 0.4444, "leaf_F1": 0.5, "n_leaf_sel": 7, "n_leaf_gt": 9, "ret_P": 0.0, "sel_given_ret": 0.0, "over_sel": 0.75, "why": {"strong_implied": 1}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 2, "attempts_by_n_local": {"6": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.7778, "gen_R": 0.5833, "gen_F1": 0.6667, "missed": ["alpha_channel", "feline", "fingers", "fur", "hair"], "extra": ["humor", "text"], "ground_truth_tags": ["alpha_channel", "anthro", "clothed", "clothing", "felid", "feline", "fingers", "fur", "hair", "male", "mammal", "solo"], "selected_tags": ["anthro", "clothed", "clothing", "felid", "humor", "male", "mammal", "solo", "text"], "stage3_selected": ["humor"], "stage3_selected_scores": {"humor": 0.4396}, "stage3_selected_ranks": {"humor": 3}, "stage3_selected_phrase_ranks": {"humor": 3}, "extra_evidence": {"humor": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.4396}, "text": {"source": "probe"}}, "structural": ["solo", "anthro", "male", "clothed"], "probe": ["clothing", "anthro", "text", "felid", "solo"], "t1": 5.4, "t2": 2.04, "t3": 0.67, "t3s": 3.86, "t3p": 4.31, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=6 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0"]}
|
| 3 |
+
{"id": 260449, "n_gt": 14, "n_retrieved": 6, "n_selected": 14, "n_implied": 2, "n_structural": 7, "n_probe": 5, "ret_R": 0.0, "P": 0.4286, "R": 0.4286, "F1": 0.4286, "leaf_P": 0.2, "leaf_R": 0.2, "leaf_F1": 0.2, "n_leaf_sel": 10, "n_leaf_gt": 10, "ret_P": 0.0, "sel_given_ret": 0.0, "over_sel": 1.0, "why": {"strong_implied": 2}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 2, "attempts_by_n_local": {"6": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.4286, "gen_R": 0.4286, "gen_F1": 0.4286, "missed": ["ape", "dancing", "fur", "hair", "haplorhine", "human", "male", "primate"], "extra": ["anthro", "crossover", "duo", "feral", "humor", "text", "topless", "trio"], "ground_truth_tags": ["ape", "bear", "clothed", "clothing", "dancing", "fur", "group", "hair", "haplorhine", "human", "looking_at_viewer", "male", "mammal", "primate"], "selected_tags": ["anthro", "bear", "clothed", "clothing", "crossover", "duo", "feral", "group", "humor", "looking_at_viewer", "mammal", "text", "topless", "trio"], "stage3_selected": ["crossover", "humor"], "stage3_selected_scores": {"humor": 0.3489, "crossover": 0.3287}, "stage3_selected_ranks": {"humor": 4, "crossover": 6}, "stage3_selected_phrase_ranks": {"humor": 4, "crossover": 6}, "extra_evidence": {"anthro": {"source": "structural"}, "crossover": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.3287}, "duo": {"source": "probe"}, "feral": {"source": "structural"}, "humor": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.3489}, "text": {"source": "structural"}, "topless": {"source": "structural"}, "trio": {"source": "structural"}}, "structural": ["trio", "anthro", "feral", "clothed", "topless", "looking_at_viewer", "text"], "probe": ["anthro", "duo", "group", "bear", "simple_background"], "t1": 6.1, "t2": 1.48, "t3": 0.78, "t3s": 6.44, "t3p": 4.25, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=6 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0"]}
|
| 4 |
+
{"id": 1078019, "n_gt": 14, "n_retrieved": 83, "n_selected": 23, "n_implied": 3, "n_structural": 4, "n_probe": 5, "ret_R": 0.6429, "P": 0.4348, "R": 0.7143, "F1": 0.5405, "leaf_P": 0.4118, "leaf_R": 0.7778, "leaf_F1": 0.5385, "n_leaf_sel": 17, "n_leaf_gt": 9, "ret_P": 0.1084, "sel_given_ret": 1.1111, "over_sel": 1.64, "why": {"explicit": 13, "strong_implied": 1}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 2, "calls_with_selection": 2, "calls_exhausted_retries": 0, "attempts_total": 3, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 3, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 42, "attempts_by_n_local": {"60": {"attempts": 2, "parse_ok": 2, "parse_fail": 0, "errors": 0}, "24": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.4348, "gen_R": 0.7143, "gen_F1": 0.5405, "missed": ["lagomorph", "leporid", "mammal", "rabbit"], "extra": ["<3", "coat", "expressions", "holding_object", "holding_plushie", "intimate", "lab_coat", "looking_at_viewer", "love", "relationship", "shocked_expression", "topwear", "vest"], "ground_truth_tags": ["anthro", "blue_eyes", "blush", "clothed", "clothing", "duo", "lagomorph", "leporid", "mammal", "plushie", "rabbit", "romantic", "romantic_couple", "teal_eyes"], "selected_tags": ["<3", "anthro", "blue_eyes", "blush", "clothed", "clothing", "coat", "duo", "expressions", "holding_object", "holding_plushie", "intimate", "lab_coat", "looking_at_viewer", "love", "plushie", "relationship", "romantic", "romantic_couple", "shocked_expression", "teal_eyes", "topwear", "vest"], "stage3_selected": ["blue_eyes", "coat", "duo", "expressions", "holding_plushie", "intimate", "lab_coat", "love", "plushie", "relationship", "romantic_couple", "shocked_expression", "teal_eyes", "vest"], "stage3_selected_scores": {"duo": 0.3628, "blue_eyes": 0.6151, "romantic_couple": 0.5621, "coat": 0.6383, "plushie": 0.7455, "vest": 0.5028, "love": 0.4693, "teal_eyes": 0.6283, "lab_coat": 0.516, "intimate": 0.4403, "expressions": 0.5454, "holding_plushie": 0.7793, "relationship": 0.6206, "shocked_expression": 0.5745}, "stage3_selected_ranks": {"duo": 81, "blue_eyes": 12, "romantic_couple": 18, "coat": 7, "plushie": 3, "vest": 42, "love": 55, "teal_eyes": 8, "lab_coat": 38, "intimate": 70, "expressions": 23, "holding_plushie": 2, "relationship": 9, "shocked_expression": 16}, "stage3_selected_phrase_ranks": {"duo": 3, "blue_eyes": 1, "romantic_couple": 1, "coat": 1, "plushie": 1, "vest": 6, "love": 5, "teal_eyes": 1, "lab_coat": 5, "intimate": 6, "expressions": 2, "holding_plushie": 1, "relationship": 1, "shocked_expression": 4}, "extra_evidence": {"<3": {"source": "probe"}, "coat": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6383}, "expressions": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5454}, "holding_object": {"source": "implied"}, "holding_plushie": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7793}, "intimate": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4403}, "lab_coat": {"source": "stage3", "why": "explicit", "retrieval_score": 0.516}, "looking_at_viewer": {"source": "structural"}, "love": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4693}, "relationship": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6206}, "shocked_expression": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.5745}, "topwear": {"source": "implied"}, "vest": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5028}}, "structural": ["duo", "anthro", "clothed", "looking_at_viewer"], "probe": ["clothing", "anthro", "blush", "duo", "<3"], "t1": 2.83, "t2": 1.55, "t3": 14.48, "t3s": 3.46, "t3p": 8.24, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=84 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
| 5 |
+
{"id": 1624724, "n_gt": 4, "n_retrieved": 80, "n_selected": 10, "n_implied": 0, "n_structural": 4, "n_probe": 3, "ret_R": 0.75, "P": 0.3, "R": 0.75, "F1": 0.4286, "leaf_P": 0.3, "leaf_R": 0.75, "leaf_F1": 0.4286, "n_leaf_sel": 10, "n_leaf_gt": 4, "ret_P": 0.0375, "sel_given_ret": 1.0, "over_sel": 2.5, "why": {"explicit": 6}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 2, "calls_with_selection": 1, "calls_exhausted_retries": 1, "attempts_total": 4, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 4, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 13, "attempts_by_n_local": {"60": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}, "24": {"attempts": 3, "parse_ok": 3, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.5}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.3, "gen_R": 0.75, "gen_F1": 0.4286, "missed": ["tan_body"], "extra": ["ambiguous_gender", "anthro", "eyes", "feral", "floating", "nude", "round_eyes"], "ground_truth_tags": ["red_nose", "smile", "solo", "tan_body"], "selected_tags": ["ambiguous_gender", "anthro", "eyes", "feral", "floating", "nude", "red_nose", "round_eyes", "smile", "solo"], "stage3_selected": ["eyes", "floating", "red_nose", "round_eyes", "simple_background", "smile"], "stage3_selected_scores": {"simple_background": 0.5334, "smile": 0.6084, "red_nose": 0.7451, "floating": 0.6767, "round_eyes": 0.8853, "eyes": 0.929}, "stage3_selected_ranks": {"simple_background": 54, "smile": 34, "red_nose": 4, "floating": 18, "round_eyes": 2, "eyes": 1}, "stage3_selected_phrase_ranks": {"simple_background": 6, "smile": 2, "red_nose": 1, "floating": 1, "round_eyes": 1, "eyes": 1}, "extra_evidence": {"ambiguous_gender": {"source": "structural"}, "anthro": {"source": "probe"}, "eyes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.929}, "feral": {"source": "structural"}, "floating": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6767}, "nude": {"source": "structural"}, "round_eyes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8853}}, "structural": ["solo", "feral", "ambiguous_gender", "nude"], "probe": ["simple_background", "anthro", "solo"], "t1": 2.71, "t2": 1.42, "t3": 8.73, "t3s": 3.78, "t3p": 8.48, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=84 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=4", "Stage3 general_chunk_1: gave up after 3 attempts"]}
|
| 6 |
+
{"id": 1325009, "n_gt": 22, "n_retrieved": 108, "n_selected": 34, "n_implied": 12, "n_structural": 4, "n_probe": 3, "ret_R": 0.3636, "P": 0.5, "R": 0.7727, "F1": 0.6071, "leaf_P": 0.1579, "leaf_R": 0.25, "leaf_F1": 0.1935, "n_leaf_sel": 19, "n_leaf_gt": 12, "ret_P": 0.0741, "sel_given_ret": 2.125, "over_sel": 1.55, "why": {"explicit": 10, "strong_implied": 7}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 2, "calls_with_selection": 2, "calls_exhausted_retries": 0, "attempts_total": 2, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 2, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 36, "attempts_by_n_local": {"60": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}, "47": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.5, "gen_R": 0.7727, "gen_F1": 0.6071, "missed": ["chest_tuft", "muscular", "muscular_anthro", "muscular_male", "topless"], "extra": ["belly", "bengal_tiger", "countershade_belly", "countershade_body", "gesture", "glistening", "glistening_body", "glistening_eyes", "glistening_fur", "hand_on_own_head", "hotpants", "minishorts", "muscular_legs", "quads", "striped_body", "striped_fur", "white_chest"], "ground_truth_tags": ["anthro", "blue_eyes", "bottomwear", "chest_tuft", "clothed", "clothing", "countershading", "felid", "fur", "hand_on_head", "male", "mammal", "muscular", "muscular_anthro", "muscular_male", "pantherine", "shorts", "solo", "stripes", "tiger", "topless", "tuft"], "selected_tags": ["anthro", "belly", "bengal_tiger", "blue_eyes", "bottomwear", "clothed", "clothing", "countershade_belly", "countershade_body", "countershading", "felid", "fur", "gesture", "glistening", "glistening_body", "glistening_eyes", "glistening_fur", "hand_on_head", "hand_on_own_head", "hotpants", "male", "mammal", "minishorts", "muscular_legs", "pantherine", "quads", "shorts", "solo", "striped_body", "striped_fur", "stripes", "tiger", "tuft", "white_chest"], "stage3_selected": ["bengal_tiger", "blue_eyes", "countershade_belly", "countershade_body", "gesture", "glistening_eyes", "glistening_fur", "hand_on_head", "hand_on_own_head", "minishorts", "muscular_legs", "quads", "shorts", "striped_fur", "stripes", "tuft", "white_chest"], "stage3_selected_scores": {"blue_eyes": 0.5727, "tuft": 0.4907, "stripes": 0.469, "shorts": 0.58, "gesture": 0.5868, "hand_on_head": 0.5941, "quads": 0.6704, "bengal_tiger": 0.4385, "minishorts": 0.5208, "hand_on_own_head": 0.526, "striped_fur": 0.6394, "glistening_eyes": 0.4754, "glistening_fur": 0.4988, "muscular_legs": 0.7895, "white_chest": 0.92, "countershade_body": 0.8756, "countershade_belly": 0.8307}, "stage3_selected_ranks": {"blue_eyes": 47, "tuft": 68, "stripes": 85, "shorts": 46, "gesture": 44, "hand_on_head": 40, "quads": 23, "bengal_tiger": 94, "minishorts": 55, "hand_on_own_head": 51, "striped_fur": 29, "glistening_eyes": 82, "glistening_fur": 65, "muscular_legs": 11, "white_chest": 2, "countershade_body": 3, "countershade_belly": 9}, "stage3_selected_phrase_ranks": {"blue_eyes": 2, "tuft": 4, "stripes": 3, "shorts": 1, "gesture": 1, "hand_on_head": 2, "quads": 4, "bengal_tiger": 5, "minishorts": 6, "hand_on_own_head": 4, "striped_fur": 2, "glistening_eyes": 3, "glistening_fur": 3, "muscular_legs": 2, "white_chest": 1, "countershade_body": 1, "countershade_belly": 2}, "extra_evidence": {"belly": {"source": "implied"}, "bengal_tiger": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4385}, "countershade_belly": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.8307}, "countershade_body": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.8756}, "gesture": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5868}, "glistening": {"source": "implied"}, "glistening_body": {"source": "implied"}, "glistening_eyes": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.4754}, "glistening_fur": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.4988}, "hand_on_own_head": {"source": "stage3", "why": "explicit", "retrieval_score": 0.526}, "hotpants": {"source": "implied"}, "minishorts": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5208}, "muscular_legs": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.7895}, "quads": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6704}, "striped_body": {"source": "implied"}, "striped_fur": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.6394}, "white_chest": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.92}}, "structural": ["solo", "anthro", "male", "clothed"], "probe": ["anthro", "felid", "solo"], "t1": 1.55, "t2": 1.88, "t3": 13.52, "t3s": 2.53, "t3p": 1.33, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=107 entity=1 copyright_filtered=1 generic_char_to_general=0 unknown_type=2"]}
|
| 7 |
+
{"id": 1023509, "n_gt": 13, "n_retrieved": 124, "n_selected": 15, "n_implied": 2, "n_structural": 0, "n_probe": 4, "ret_R": 0.6154, "P": 0.4, "R": 0.4615, "F1": 0.4286, "leaf_P": 0.2727, "leaf_R": 0.5, "leaf_F1": 0.3529, "n_leaf_sel": 11, "n_leaf_gt": 6, "ret_P": 0.0645, "sel_given_ret": 0.75, "over_sel": 1.15, "why": {"explicit": 4, "strong_implied": 6}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 2, "calls_with_selection": 1, "calls_exhausted_retries": 1, "attempts_total": 5, "attempt_errors": 4, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 24, "attempts_by_n_local": {"60": {"attempts": 2, "parse_ok": 1, "parse_fail": 0, "errors": 1}, "58": {"attempts": 3, "parse_ok": 0, "parse_fail": 0, "errors": 3}}, "attempt_failure_rate": 0.8, "call_exhaustion_rate": 0.5}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.4, "gen_R": 0.4615, "gen_F1": 0.4286, "missed": ["dialogue", "fur", "lizard", "reptile", "scalie", "white_body", "white_fur"], "extra": ["anthro", "clothing", "dark_theme", "darkness", "group", "knife", "light", "medieval_fantasy", "trio"], "ground_truth_tags": ["bovid", "caprine", "dialogue", "fur", "goat", "human", "lizard", "mammal", "reptile", "scalie", "text", "white_body", "white_fur"], "selected_tags": ["anthro", "bovid", "caprine", "clothing", "dark_theme", "darkness", "goat", "group", "human", "knife", "light", "mammal", "medieval_fantasy", "text", "trio"], "stage3_selected": ["bovid", "dark_theme", "darkness", "goat", "group", "human", "knife", "light", "medieval_fantasy", "trio"], "stage3_selected_scores": {"light": 0.7782, "darkness": 0.8346, "dark_theme": 0.5937, "medieval_fantasy": 0.4783, "group": 0.6233, "human": 0.6639, "bovid": 0.5984, "trio": 0.5291, "goat": 0.7749, "knife": 0.5268}, "stage3_selected_ranks": {"light": 4, "darkness": 2, "dark_theme": 41, "medieval_fantasy": 114, "group": 29, "human": 19, "bovid": 39, "trio": 82, "goat": 5, "knife": 84}, "stage3_selected_phrase_ranks": {"light": 1, "darkness": 1, "dark_theme": 2, "medieval_fantasy": 5, "group": 1, "human": 1, "bovid": 3, "trio": 2, "goat": 1, "knife": 3}, "extra_evidence": {"anthro": {"source": "probe"}, "clothing": {"source": "probe"}, "dark_theme": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5937}, "darkness": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8346}, "group": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.6233}, "knife": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.5268}, "light": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7782}, "medieval_fantasy": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4783}, "trio": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.5291}}, "structural": [], "probe": ["clothing", "anthro", "text", "group"], "t1": 1.75, "t2": 1.89, "t3": 79.86, "t3s": 0.66, "t3p": 1.77, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=118 entity=1 copyright_filtered=5 generic_char_to_general=2 unknown_type=2", "Stage3 general_chunk_0: attempt 1 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 1, \"why\": \"weak_implied\"}, {\"i\": 2, \"why\": \"explicit\"}, {\"i\": 3, \"why\": \"other\"}, {\"i\": 4, \"why\": \"weak_implied\"}, {\"i\": 5, \"why\": \"other\"}, {\"i\": 6, \"why\": \"explicit\"}, {\"i\": 7, \"why\": \"other\"}, {\"i\": 8, \"why\": \"explicit\"}, {\"i\": 9, \"why\": \"weak_implied\"}, {\"i\": 10, \"why\": \"weak_implied\"}, {\"i\": 11, \"why\": \"other\"}, {\"i\": 12, \"why\": \"other\"}, {\"i\": 13, \"why\": \"explicit\"}, {\"i\": 14, \"why\": \"explicit\"}, {\"i\": 15, \"why\": \"explicit\"}, {\"i\": 16, \"why\": \"weak_implied\"}, {\"i\": 17, \"why\": \"weak_implied\"}, {\"i\": 18, \"why\": \"other\"}, {\"i\": 19, \"why\": \"other\"}, {\"i\": 20, \"why\": \"weak_implied\"}, {\"i\": 21, \"why\": \"weak_implied\"}, {\"i\": 22, \"why\": \"other\"}, {\"i\": 23, \"why\": \"weak_implied\"}, {\"i\": 24, \"why\": \"other\"}, {\"i\": 25, \"why\": \"weak_implied\"}, {\"i\": 26, \"why\": \"other\"}, {\"i\": 27, \"why\": \"other\"}, {\"i\": 28, \"why\": \"weak_implied\"}, {\"i\": 29, \"why\": \"other\"}, {\"i\": 30, \"why\": \"weak_implied\"}, {\"i\": 31, \"why\": \"other\"}, {\"i\": 32, \"why\": \"weak_implied\"}, {\"i\": 33, \"why\": \"weak_implied\"}, {\"i\": 34, \"why\": \"other\"}, {\"i\": 35, \"why\": \"weak_implied\"}, {\"i\": 36, \"why\": \"weak_implied\"}, {}]}. Got: 2 validation errors for Stage3SelectionResponse\nselections.36.i\n Field required [type=missing, input_value={}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nselections.36.why\n Field required [type=missing, input_value={}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE ", "Stage3 general_chunk_1: attempt 1 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 1, \"why\": \"weak_implied\"}, {\"i\": 2, \"why\": \"other\"}, {\"i\": 3, \"why\": \"weak_implied\"}, {\"i\": 4, \"why\": \"style_or_meta\"}, {\"i\": 5, \"why\": \"weak_implied\"}, {\"i\": 6, \"why\": \"other\"}, {\"i\": 7, \"why\": \"other\"}, {\"i\": 8, \"why\": \"style_or_meta\"}, {\"i\": 9, \"why\": \"weak_implied\"}, {\"i\": 10, \"why\": \"other\"}, {\"i\": 11, \"why\": \"other\"}, {\"i\": 12, \"why\": \"style_or_meta\"}, {\"i\": 13, \"why\": \"other\"}, {\"i\": 14, \"why\": \"other\"}, {\"i\": 15, \"why\": \"other\"}, {\"i\": 16, \"why\": \"other\"}, {\"i\": 17, \"why\": \"other\"}, {\"i\": 18, \"why\": \"style_or_meta\"}, {\"i\": 19, \"why\": \"other\"}, {\"i\": 20, \"why\": \"other\"}, {\"i\": 21, \"why\": \"weak_implied\"}, {\"i\": 22, \"why\": \"other\"}, {\"i\": 23, \"why\": \"other\"}, {\"i\": 24, \"why\": \"style_or_meta\"}, {\"i\": 25, \"why\": \"other\"}, {\"i\": 26, \"why\": \"other\"}, {\"i\": 27, \"why\": \"other\"}, {\"i\": 28, \"why\": \"style_or_meta\"}, {\"i\": 29, \"why\": \"other\"}, {\"i\": 30, \"why\": \"other\"}, {\"i\": 31, \"why\": \"style_or_meta\"}, {\"i\": 32, \"why\": \"weak_implied\"}, {\"i\": 33, \"why\": \"weak_implied\"}, {\"i\": 34, \"why\": \"weak_implied\"}, {\"i\": 35, \"why\": \"other\"}, {\"i\": 36, \"why\": \"other\"}, {\"i\": 37}]}. Got: 1 validation error for Stage3SelectionResponse\nselections.36.why\n Field required [type=missing, input_value={'i': 37}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE ", "Stage3 general_chunk_1: attempt 2 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 1, \"why\": \"weak_implied\"}, {\"i\": 2, \"why\": \"other\"}, {\"i\": 3, \"why\": \"weak_implied\"}, {\"i\": 4, \"why\": \"style_or_meta\"}, {\"i\": 5, \"why\": \"weak_implied\"}, {\"i\": 6, \"why\": \"other\"}, {\"i\": 7, \"why\": \"other\"}, {\"i\": 8, \"why\": \"weak_implied\"}, {\"i\": 9, \"why\": \"weak_implied\"}, {\"i\": 10, \"why\": \"other\"}, {\"i\": 11, \"why\": \"explicit\"}, {\"i\": 12, \"why\": \"style_or_meta\"}, {\"i\": 13, \"why\": \"other\"}, {\"i\": 14, \"why\": \"other\"}, {\"i\": 15, \"why\": \"other\"}, {\"i\": 16, \"why\": \"other\"}, {\"i\": 17, \"why\": \"weak_implied\"}, {\"i\": 18, \"why\": \"style_or_meta\"}, {\"i\": 19, \"why\": \"weak_implied\"}, {\"i\": 20, \"why\": \"weak_implied\"}, {\"i\": 21, \"why\": \"weak_implied\"}, {\"i\": 22, \"why\": \"weak_implied\"}, {\"i\": 23, \"why\": \"weak_implied\"}, {\"i\": 24, \"why\": \"style_or_meta\"}, {\"i\": 25, \"why\": \"other\"}, {\"i\": 26, \"why\": \"other\"}, {\"i\": 27, \"why\": \"weak_implied\"}, {\"i\": 28, \"why\": \"weak_implied\"}, {\"i\": 29, \"why\": \"other\"}, {\"i\": 30, \"why\": \"other\"}, {\"i\": 31, \"why\": \"style_or_meta\"}, {\"i\": 32, \"why\": \"explicit\"}, {\"i\": 33, \"why\": \"weak_implied\"}, {\"i\": 34, \"why\": \"weak_implied\"}, {\"i\": 35, \"why\": \"weak_implied\"}, {\"i\": 36, \"\": null}]}. Got: 1 validation error for Stage3SelectionResponse\nselections.35.why\n Field required [type=missing, input_value={'i': 36, '': None}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE ", "Stage3 general_chunk_1: attempt 3 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 1, \"why\": \"weak_implied\"}, {\"i\": 2, \"why\": \"other\"}, {\"i\": 3, \"why\": \"weak_implied\"}, {\"i\": 4, \"why\": \"style_or_meta\"}, {\"i\": 5, \"why\": \"weak_implied\"}, {\"i\": 6, \"why\": \"other\"}, {\"i\": 7, \"why\": \"other\"}, {\"i\": 8, \"why\": \"style_or_meta\"}, {\"i\": 9, \"why\": \"weak_implied\"}, {\"i\": 10, \"why\": \"other\"}, {\"i\": 11, \"why\": \"other\"}, {\"i\": 12, \"why\": \"style_or_meta\"}, {\"i\": 13, \"why\": \"other\"}, {\"i\": 14, \"why\": \"other\"}, {\"i\": 15, \"why\": \"other\"}, {\"i\": 16, \"why\": \"other\"}, {\"i\": 17, \"why\": \"other\"}, {\"i\": 18, \"why\": \"style_or_meta\"}, {\"i\": 19, \"why\": \"other\"}, {\"i\": 20, \"why\": \"other\"}, {\"i\": 21, \"why\": \"weak_implied\"}, {\"i\": 22, \"why\": \"other\"}, {\"i\": 23, \"why\": \"other\"}, {\"i\": 24, \"why\": \"style_or_meta\"}, {\"i\": 25, \"why\": \"other\"}, {\"i\": 26, \"why\": \"other\"}, {\"i\": 27, \"why\": \"other\"}, {\"i\": 28, \"why\": \"style_or_meta\"}, {\"i\": 29, \"why\": \"other\"}, {\"i\": 30, \"why\": \"other\"}, {\"i\": 31, \"why\": \"style_or_meta\"}, {\"i\": 32, \"why\": \"weak_implied\"}, {\"i\": 33, \"why\": \"weak_implied\"}, {\"i\": 34, \"why\": \"weak_implied\"}, {\"i\": 35, \"why\": \"other\"}, {\"i\": 36, \"why\": \"other\"}, {\"i\": 37}]}. Got: 1 validation error for Stage3SelectionResponse\nselections.36.why\n Field required [type=missing, input_value={'i': 37}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE ", "Stage3 general_chunk_1: gave up after 3 attempts"]}
|
| 8 |
+
{"id": 335343, "n_gt": 15, "n_retrieved": 128, "n_selected": 36, "n_implied": 7, "n_structural": 3, "n_probe": 3, "ret_R": 0.6, "P": 0.3333, "R": 0.8, "F1": 0.4706, "leaf_P": 0.2692, "leaf_R": 0.5833, "leaf_F1": 0.3684, "n_leaf_sel": 26, "n_leaf_gt": 12, "ret_P": 0.0703, "sel_given_ret": 1.3333, "over_sel": 2.4, "why": {"explicit": 26}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 3, "calls_with_selection": 3, "calls_exhausted_retries": 0, "attempts_total": 3, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 3, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 56, "attempts_by_n_local": {"60": {"attempts": 2, "parse_ok": 2, "parse_fail": 0, "errors": 0}, "11": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.3333, "gen_R": 0.8, "gen_F1": 0.4706, "missed": ["angry", "eyes_closed", "sleeping"], "extra": ["annoyed", "annoyed_expression", "anthro", "bed_covers", "bedding", "bedroom", "big_eyes", "clothing", "english_text", "expressions", "eyebrows", "eyes", "green_eyebrows", "humanoid", "lipstick", "lying_on_bed", "on_bed", "pajamas", "red_lipstick", "relaxed_expression", "scenery", "sleepover", "sleepwear", "watermark"], "ground_truth_tags": ["angry", "bed", "blonde_hair", "blue_eyes", "duo", "eyes_closed", "eyeshadow", "furniture", "green_eyes", "hair", "lying", "makeup", "purple_hair", "sleeping", "text"], "selected_tags": ["annoyed", "annoyed_expression", "anthro", "bed", "bed_covers", "bedding", "bedroom", "big_eyes", "blonde_hair", "blue_eyes", "clothing", "duo", "english_text", "expressions", "eyebrows", "eyes", "eyeshadow", "furniture", "green_eyebrows", "green_eyes", "hair", "humanoid", "lipstick", "lying", "lying_on_bed", "makeup", "on_bed", "pajamas", "purple_hair", "red_lipstick", "relaxed_expression", "scenery", "sleepover", "sleepwear", "text", "watermark"], "stage3_selected": ["annoyed", "annoyed_expression", "bed_covers", "bedroom", "big_eyes", "blonde_hair", "blue_eyes", "english_text", "expressions", "eyes", "eyeshadow", "green_eyebrows", "green_eyes", "hair", "lipstick", "lying_on_bed", "makeup", "pajamas", "purple_hair", "red_lipstick", "relaxed_expression", "scenery", "sleepover", "sleepwear", "text", "watermark"], "stage3_selected_scores": {"hair": 0.6031, "text": 0.6007, "blue_eyes": 0.6014, "green_eyes": 0.5989, "blonde_hair": 0.5986, "purple_hair": 0.5642, "makeup": 0.5965, "eyeshadow": 0.4763, "watermark": 0.6042, "lipstick": 0.4874, "bedroom": 0.4901, "big_eyes": 0.4289, "annoyed": 0.5727, "lying_on_bed": 0.4093, "pajamas": 0.3753, "red_lipstick": 0.4709, "scenery": 0.4936, "annoyed_expression": 0.7251, "bed_covers": 0.4145, "expressions": 0.5439, "green_eyebrows": 0.5014, "sleepover": 0.5269, "sleepwear": 0.4462, "relaxed_expression": 0.4534, "eyes": 0.8951, "english_text": 0.4189}, "stage3_selected_ranks": {"hair": 5, "text": 8, "blue_eyes": 7, "green_eyes": 9, "blonde_hair": 10, "purple_hair": 14, "makeup": 11, "eyeshadow": 46, "watermark": 4, "lipstick": 40, "bedroom": 38, "big_eyes": 74, "annoyed": 13, "lying_on_bed": 94, "pajamas": 112, "red_lipstick": 49, "scenery": 36, "annoyed_expression": 2, "bed_covers": 89, "expressions": 18, "green_eyebrows": 31, "sleepover": 26, "sleepwear": 62, "relaxed_expression": 55, "eyes": 1, "english_text": 85}, "stage3_selected_phrase_ranks": {"hair": 1, "text": 1, "blue_eyes": 1, "green_eyes": 1, "blonde_hair": 1, "purple_hair": 1, "makeup": 1, "eyeshadow": 3, "watermark": 1, "lipstick": 2, "bedroom": 1, "big_eyes": 3, "annoyed": 2, "lying_on_bed": 4, "pajamas": 3, "red_lipstick": 5, "scenery": 2, "annoyed_expression": 1, "bed_covers": 3, "expressions": 3, "green_eyebrows": 2, "sleepover": 1, "sleepwear": 6, "relaxed_expression": 6, "eyes": 1, "english_text": 4}, "extra_evidence": {"annoyed": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5727}, "annoyed_expression": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7251}, "anthro": {"source": "probe"}, "bed_covers": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4145}, "bedding": {"source": "implied"}, "bedroom": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4901}, "big_eyes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4289}, "clothing": {"source": "implied"}, "english_text": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4189}, "expressions": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5439}, "eyebrows": {"source": "implied"}, "eyes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8951}, "green_eyebrows": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5014}, "humanoid": {"source": "structural"}, "lipstick": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4874}, "lying_on_bed": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4093}, "on_bed": {"source": "implied"}, "pajamas": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3753}, "red_lipstick": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4709}, "relaxed_expression": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4534}, "scenery": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4936}, "sleepover": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5269}, "sleepwear": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4462}, "watermark": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6042}}, "structural": ["duo", "humanoid", "text"], "probe": ["simple_background", "anthro", "duo"], "t1": 3.37, "t2": 1.96, "t3": 29.39, "t3s": 2.54, "t3p": 6.41, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=131 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=4"]}
|
| 9 |
+
{"id": 17482, "n_gt": 22, "n_retrieved": 93, "n_selected": 26, "n_implied": 10, "n_structural": 4, "n_probe": 3, "ret_R": 0.2727, "P": 0.4615, "R": 0.5455, "F1": 0.5, "leaf_P": 0.4, "leaf_R": 0.4615, "leaf_F1": 0.4286, "n_leaf_sel": 15, "n_leaf_gt": 13, "ret_P": 0.0645, "sel_given_ret": 2.0, "over_sel": 1.18, "why": {"explicit": 11}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 2, "calls_with_selection": 2, "calls_exhausted_retries": 0, "attempts_total": 2, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 2, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 57, "attempts_by_n_local": {"60": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}, "38": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.4615, "gen_R": 0.5455, "gen_F1": 0.5, "missed": ["bass_guitar", "canine", "fur", "guitar", "holding_musical_instrument", "holding_object", "music", "musical_instrument", "plucked_string_instrument", "string_instrument"], "extra": ["accessory", "bottomwear", "flowing_hair", "hair_accessory", "hair_tie", "holding_hair", "long_tail", "looking_at_viewer", "playful", "playing_bass", "shorts", "torn_bottomwear", "torn_shorts", "touching_hair"], "ground_truth_tags": ["anthro", "bass_guitar", "canid", "canine", "claws", "clothed", "clothing", "fingers", "fur", "guitar", "hair", "holding_musical_instrument", "holding_object", "mammal", "music", "musical_instrument", "plucked_string_instrument", "solo", "spade_tail", "string_instrument", "tail", "torn_clothing"], "selected_tags": ["accessory", "anthro", "bottomwear", "canid", "claws", "clothed", "clothing", "fingers", "flowing_hair", "hair", "hair_accessory", "hair_tie", "holding_hair", "long_tail", "looking_at_viewer", "mammal", "playful", "playing_bass", "shorts", "solo", "spade_tail", "tail", "torn_bottomwear", "torn_clothing", "torn_shorts", "touching_hair"], "stage3_selected": ["claws", "fingers", "flowing_hair", "hair_tie", "holding_hair", "long_tail", "playful", "playing_bass", "spade_tail", "torn_clothing", "torn_shorts"], "stage3_selected_scores": {"claws": 0.6306, "fingers": 0.5071, "torn_clothing": 0.4536, "long_tail": 0.502, "spade_tail": 0.8721, "hair_tie": 0.4654, "playful": 0.3766, "torn_shorts": 0.4535, "flowing_hair": 0.702, "holding_hair": 0.4924, "playing_bass": 0.5052}, "stage3_selected_ranks": {"claws": 5, "fingers": 41, "torn_clothing": 68, "long_tail": 45, "spade_tail": 1, "hair_tie": 62, "playful": 88, "torn_shorts": 69, "flowing_hair": 2, "holding_hair": 51, "playing_bass": 42}, "stage3_selected_phrase_ranks": {"claws": 1, "fingers": 4, "torn_clothing": 3, "long_tail": 5, "spade_tail": 1, "hair_tie": 3, "playful": 4, "torn_shorts": 4, "flowing_hair": 1, "holding_hair": 3, "playing_bass": 1}, "extra_evidence": {"accessory": {"source": "implied"}, "bottomwear": {"source": "implied"}, "flowing_hair": {"source": "stage3", "why": "explicit", "retrieval_score": 0.702}, "hair_accessory": {"source": "implied"}, "hair_tie": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4654}, "holding_hair": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4924}, "long_tail": {"source": "stage3", "why": "explicit", "retrieval_score": 0.502}, "looking_at_viewer": {"source": "structural"}, "playful": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3766}, "playing_bass": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5052}, "shorts": {"source": "implied"}, "torn_bottomwear": {"source": "implied"}, "torn_shorts": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4535}, "touching_hair": {"source": "implied"}}, "structural": ["solo", "anthro", "clothed", "looking_at_viewer"], "probe": ["anthro", "canid", "solo"], "t1": 8.84, "t2": 1.31, "t3": 17.56, "t3s": 0.65, "t3p": 5.44, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=98 entity=0 copyright_filtered=1 generic_char_to_general=0 unknown_type=3"]}
|
| 10 |
+
{"id": 2021552, "n_gt": 25, "n_retrieved": 59, "n_selected": 17, "n_implied": 6, "n_structural": 4, "n_probe": 3, "ret_R": 0.36, "P": 0.6471, "R": 0.44, "F1": 0.5238, "leaf_P": 0.5556, "leaf_R": 0.3333, "leaf_F1": 0.4167, "n_leaf_sel": 9, "n_leaf_gt": 15, "ret_P": 0.1525, "sel_given_ret": 1.2222, "over_sel": 0.68, "why": {"explicit": 7}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 23, "attempts_by_n_local": {"58": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.6471, "gen_R": 0.44, "gen_F1": 0.5238, "missed": ["bottomwear", "canid", "canine", "crossed_arms", "fox", "grey_background", "lagomorph", "leporid", "looking_at_another", "mammal", "overalls", "pants", "rabbit", "standing"], "extra": ["4_claws", "5_claws", "looking_at_viewer", "white_clothing", "white_shirt", "white_topwear"], "ground_truth_tags": ["anthro", "bottomwear", "canid", "canine", "claws", "clothed", "clothing", "crossed_arms", "duo", "facial_markings", "fox", "fur", "grey_background", "head_markings", "lagomorph", "leporid", "looking_at_another", "mammal", "markings", "overalls", "pants", "rabbit", "shirt", "standing", "topwear"], "selected_tags": ["4_claws", "5_claws", "anthro", "claws", "clothed", "clothing", "duo", "facial_markings", "fur", "head_markings", "looking_at_viewer", "markings", "shirt", "topwear", "white_clothing", "white_shirt", "white_topwear"], "stage3_selected": ["4_claws", "5_claws", "claws", "facial_markings", "fur", "shirt", "white_shirt"], "stage3_selected_scores": {"fur": 0.7019, "claws": 0.6694, "shirt": 0.7044, "facial_markings": 0.9019, "white_shirt": 0.5527, "4_claws": 0.6125, "5_claws": 0.6238}, "stage3_selected_ranks": {"fur": 11, "claws": 13, "shirt": 10, "facial_markings": 1, "white_shirt": 48, "4_claws": 21, "5_claws": 20}, "stage3_selected_phrase_ranks": {"fur": 1, "claws": 1, "shirt": 1, "facial_markings": 1, "white_shirt": 4, "4_claws": 3, "5_claws": 1}, "extra_evidence": {"4_claws": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6125}, "5_claws": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6238}, "looking_at_viewer": {"source": "structural"}, "white_clothing": {"source": "implied"}, "white_shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5527}, "white_topwear": {"source": "implied"}}, "structural": ["duo", "anthro", "clothed", "looking_at_viewer"], "probe": ["simple_background", "anthro", "duo"], "t1": 31.27, "t2": 0.92, "t3": 15.93, "t3s": 0.43, "t3p": 11.26, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "LLM rewrite: fallback (error: ReadTimeout: The read operation timed out)", "Stage3 split: general=58 entity=1 copyright_filtered=0 generic_char_to_general=0 unknown_type=0"]}
|
| 11 |
+
{"id": 2034167, "n_gt": 11, "n_retrieved": 127, "n_selected": 34, "n_implied": 9, "n_structural": 5, "n_probe": 3, "ret_R": 0.5455, "P": 0.2647, "R": 0.8182, "F1": 0.4, "leaf_P": 0.2, "leaf_R": 0.5714, "leaf_F1": 0.2963, "n_leaf_sel": 20, "n_leaf_gt": 7, "ret_P": 0.0472, "sel_given_ret": 1.5, "over_sel": 3.09, "why": {"explicit": 8, "strong_implied": 12}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 3, "calls_with_selection": 3, "calls_exhausted_retries": 0, "attempts_total": 3, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 3, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 55, "attempts_by_n_local": {"60": {"attempts": 2, "parse_ok": 2, "parse_fail": 0, "errors": 0}, "12": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.2647, "gen_R": 0.8182, "gen_F1": 0.4, "missed": ["blue_nose", "open_mouth"], "extra": ["animal_humanoid", "anime_eyes", "anthro", "big_eyes", "blue_ears", "blue_stripes", "canid_humanoid", "canine_humanoid", "clothed", "clothing", "cute_expression", "fennec_humanoid", "fluffy_fur", "fox_humanoid", "humanoid", "intersex", "jumper", "looking_at_viewer", "mammal_humanoid", "multi_tone_fur", "multicolored_body", "multicolored_fur", "pink_ears", "pink_stripes", "stripes"], "ground_truth_tags": ["blue_eyes", "blue_nose", "canid", "canine", "fur", "mammal", "open_mouth", "purple_body", "solo", "white_body", "white_fur"], "selected_tags": ["animal_humanoid", "anime_eyes", "anthro", "big_eyes", "blue_ears", "blue_eyes", "blue_stripes", "canid", "canid_humanoid", "canine", "canine_humanoid", "clothed", "clothing", "cute_expression", "fennec_humanoid", "fluffy_fur", "fox_humanoid", "fur", "humanoid", "intersex", "jumper", "looking_at_viewer", "mammal", "mammal_humanoid", "multi_tone_fur", "multicolored_body", "multicolored_fur", "pink_ears", "pink_stripes", "purple_body", "solo", "stripes", "white_body", "white_fur"], "stage3_selected": ["animal_humanoid", "anime_eyes", "big_eyes", "blue_ears", "blue_eyes", "blue_stripes", "canid_humanoid", "canine_humanoid", "cute_expression", "fennec_humanoid", "fluffy_fur", "fox_humanoid", "jumper", "multi_tone_fur", "multicolored_fur", "pink_ears", "pink_stripes", "purple_body", "simple_background", "white_fur"], "stage3_selected_scores": {"simple_background": 0.5994, "blue_eyes": 0.6045, "white_fur": 0.6039, "purple_body": 0.5693, "canine_humanoid": 0.9013, "blue_stripes": 0.6786, "pink_stripes": 0.6846, "fennec_humanoid": 0.7743, "multicolored_fur": 0.5035, "animal_humanoid": 0.6191, "canid_humanoid": 0.863, "fox_humanoid": 0.8214, "big_eyes": 0.4219, "blue_ears": 0.5093, "cute_expression": 0.4501, "pink_ears": 0.5282, "multi_tone_fur": 0.5185, "fluffy_fur": 0.5591, "anime_eyes": 0.4933, "jumper": 0.4075}, "stage3_selected_ranks": {"simple_background": 29, "blue_eyes": 24, "white_fur": 25, "purple_body": 41, "canine_humanoid": 1, "blue_stripes": 10, "pink_stripes": 9, "fennec_humanoid": 6, "multicolored_fur": 76, "animal_humanoid": 15, "canid_humanoid": 2, "fox_humanoid": 3, "big_eyes": 112, "blue_ears": 73, "cute_expression": 107, "pink_ears": 62, "multi_tone_fur": 67, "fluffy_fur": 48, "anime_eyes": 82, "jumper": 116}, "stage3_selected_phrase_ranks": {"simple_background": 1, "blue_eyes": 1, "white_fur": 1, "purple_body": 2, "canine_humanoid": 1, "blue_stripes": 1, "pink_stripes": 1, "fennec_humanoid": 6, "multicolored_fur": 6, "animal_humanoid": 2, "canid_humanoid": 2, "fox_humanoid": 3, "big_eyes": 6, "blue_ears": 5, "cute_expression": 6, "pink_ears": 5, "multi_tone_fur": 5, "fluffy_fur": 2, "anime_eyes": 2, "jumper": 3}, "extra_evidence": {"animal_humanoid": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.6191}, "anime_eyes": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.4933}, "anthro": {"source": "structural"}, "big_eyes": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.4219}, "blue_ears": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.5093}, "blue_stripes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6786}, "canid_humanoid": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.863}, "canine_humanoid": {"source": "stage3", "why": "explicit", "retrieval_score": 0.9013}, "clothed": {"source": "structural"}, "clothing": {"source": "implied"}, "cute_expression": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.4501}, "fennec_humanoid": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7743}, "fluffy_fur": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.5591}, "fox_humanoid": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.8214}, "humanoid": {"source": "implied"}, "intersex": {"source": "structural"}, "jumper": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.4075}, "looking_at_viewer": {"source": "structural"}, "mammal_humanoid": {"source": "implied"}, "multi_tone_fur": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.5185}, "multicolored_body": {"source": "implied"}, "multicolored_fur": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.5035}, "pink_ears": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.5282}, "pink_stripes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6846}, "stripes": {"source": "implied"}}, "structural": ["solo", "anthro", "intersex", "clothed", "looking_at_viewer"], "probe": ["anthro", "canid", "solo"], "t1": 7.78, "t2": 1.99, "t3": 32.33, "t3s": 0.94, "t3p": 3.05, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=132 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=4"]}
|
data/eval_results/latency_chunk60_k6_seed43.jsonl
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"_meta": true, "timestamp": "2026-03-02T06:01:27.775437", "n_samples": 10, "caption_field": "caption_cogvlm", "skip_rewrite": false, "allow_nsfw": false, "mode": "chunked_map_union", "chunk_size": 60, "eval_path": "data/eval_samples/e621_sfw_sample_1000_seed123_buffer10000_caption_evident.jsonl", "per_phrase_k": 2, "per_phrase_final_k": 6, "temperature": 0.0, "shuffle": false, "seed": 43, "workers": 4, "min_why": "strong_implied", "expand_implications": true, "infer_structural": true, "infer_probe": true, "n_errors": 0, "n_issue_samples": 10, "n_issues_total": 26}
|
| 2 |
+
{"id": 3285630, "n_gt": 12, "n_retrieved": 92, "n_selected": 51, "n_implied": 23, "n_structural": 4, "n_probe": 5, "ret_R": 0.25, "P": 0.1765, "R": 0.75, "F1": 0.2857, "leaf_P": 0.12, "leaf_R": 0.3333, "leaf_F1": 0.1765, "n_leaf_sel": 25, "n_leaf_gt": 9, "ret_P": 0.0326, "sel_given_ret": 3.0, "over_sel": 4.25, "why": {"explicit": 14, "strong_implied": 8}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 2, "calls_with_selection": 2, "calls_exhausted_retries": 0, "attempts_total": 2, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 2, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 43, "attempts_by_n_local": {"60": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}, "35": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.1765, "gen_R": 0.75, "gen_F1": 0.2857, "missed": ["alpha_channel", "fingers", "male"], "extra": ["beer_mug", "black_hands", "bottom_heavy", "brown_clothing", "brown_coat", "brown_topwear", "business_attire", "coat", "container", "cup", "domestic_cat", "dress_shirt", "felis", "formal", "gesture", "grey_clothing", "grey_shirt", "grey_topwear", "hair_bun", "handshake", "holding_container", "holding_cup", "holding_object", "huge_hips", "looking_at_viewer", "necktie", "pockets", "shirt", "sweater", "sweater_vest", "teal_shirt", "teal_topwear", "text", "topwear", "vest", "white_body", "white_clothing", "white_dress_shirt", "white_fur", "white_necktie", "white_shirt", "white_topwear"], "ground_truth_tags": ["alpha_channel", "anthro", "clothed", "clothing", "felid", "feline", "fingers", "fur", "hair", "male", "mammal", "solo"], "selected_tags": ["anthro", "beer_mug", "black_hands", "bottom_heavy", "brown_clothing", "brown_coat", "brown_topwear", "business_attire", "clothed", "clothing", "coat", "container", "cup", "domestic_cat", "dress_shirt", "felid", "feline", "felis", "formal", "fur", "gesture", "grey_clothing", "grey_shirt", "grey_topwear", "hair", "hair_bun", "handshake", "holding_container", "holding_cup", "holding_object", "huge_hips", "looking_at_viewer", "mammal", "necktie", "pockets", "shirt", "solo", "sweater", "sweater_vest", "teal_shirt", "teal_topwear", "text", "topwear", "vest", "white_body", "white_clothing", "white_dress_shirt", "white_fur", "white_necktie", "white_shirt", "white_topwear"], "stage3_selected": ["beer_mug", "black_hands", "bottom_heavy", "brown_coat", "business_attire", "domestic_cat", "dress_shirt", "formal", "grey_shirt", "hair_bun", "handshake", "holding_container", "holding_cup", "huge_hips", "pockets", "simple_background", "sweater_vest", "teal_shirt", "white_dress_shirt", "white_fur", "white_necktie", "white_shirt"], "stage3_selected_scores": {"simple_background": 0.6979, "white_shirt": 0.738, "holding_container": 0.76, "dress_shirt": 0.7242, "hair_bun": 0.6927, "bottom_heavy": 0.4664, "grey_shirt": 0.7582, "sweater_vest": 0.7533, "white_dress_shirt": 0.6881, "handshake": 0.5512, "formal": 0.5993, "business_attire": 0.5658, "teal_shirt": 0.7475, "white_necktie": 0.6418, "white_fur": 0.598, "domestic_cat": 0.633, "huge_hips": 0.4406, "pockets": 0.6095, "holding_cup": 0.7668, "black_hands": 0.4563, "beer_mug": 0.6599, "brown_coat": 0.7267}, "stage3_selected_ranks": {"simple_background": 35, "white_shirt": 20, "holding_container": 11, "dress_shirt": 25, "hair_bun": 37, "bottom_heavy": 90, "grey_shirt": 13, "sweater_vest": 16, "white_dress_shirt": 38, "handshake": 75, "formal": 59, "business_attire": 70, "teal_shirt": 19, "white_necktie": 44, "white_fur": 60, "domestic_cat": 48, "huge_hips": 93, "pockets": 56, "holding_cup": 10, "black_hands": 91, "beer_mug": 41, "brown_coat": 24}, "stage3_selected_phrase_ranks": {"simple_background": 1, "white_shirt": 4, "holding_container": 5, "dress_shirt": 2, "hair_bun": 1, "bottom_heavy": 4, "grey_shirt": 1, "sweater_vest": 3, "white_dress_shirt": 5, "handshake": 3, "formal": 1, "business_attire": 1, "teal_shirt": 4, "white_necktie": 6, "white_fur": 3, "domestic_cat": 4, "huge_hips": 5, "pockets": 3, "holding_cup": 4, "black_hands": 6, "beer_mug": 5, "brown_coat": 5}, "extra_evidence": {"beer_mug": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.6599}, "black_hands": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.4563}, "bottom_heavy": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4664}, "brown_clothing": {"source": "implied"}, "brown_coat": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.7267}, "brown_topwear": {"source": "implied"}, "business_attire": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5658}, "coat": {"source": "implied"}, "container": {"source": "implied"}, "cup": {"source": "implied"}, "domestic_cat": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.633}, "dress_shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7242}, "felis": {"source": "implied"}, "formal": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5993}, "gesture": {"source": "implied"}, "grey_clothing": {"source": "implied"}, "grey_shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7582}, "grey_topwear": {"source": "implied"}, "hair_bun": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6927}, "handshake": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5512}, "holding_container": {"source": "stage3", "why": "explicit", "retrieval_score": 0.76}, "holding_cup": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.7668}, "holding_object": {"source": "implied"}, "huge_hips": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.4406}, "looking_at_viewer": {"source": "structural"}, "necktie": {"source": "implied"}, "pockets": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.6095}, "shirt": {"source": "implied"}, "sweater": {"source": "implied"}, "sweater_vest": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7533}, "teal_shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7475}, "teal_topwear": {"source": "implied"}, "text": {"source": "probe"}, "topwear": {"source": "implied"}, "vest": {"source": "implied"}, "white_body": {"source": "implied"}, "white_clothing": {"source": "implied"}, "white_dress_shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6881}, "white_fur": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.598}, "white_necktie": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6418}, "white_shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.738}, "white_topwear": {"source": "implied"}}, "structural": ["solo", "anthro", "clothed", "looking_at_viewer"], "probe": ["clothing", "anthro", "text", "felid", "solo"], "t1": 3.57, "t2": 4.55, "t3": 14.76, "t3s": 6.34, "t3p": 5.75, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=95 entity=1 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
| 3 |
+
{"id": 260449, "n_gt": 14, "n_retrieved": 98, "n_selected": 19, "n_implied": 6, "n_structural": 0, "n_probe": 6, "ret_R": 0.5714, "P": 0.4737, "R": 0.6429, "F1": 0.5455, "leaf_P": 0.3636, "leaf_R": 0.4, "leaf_F1": 0.381, "n_leaf_sel": 11, "n_leaf_gt": 10, "ret_P": 0.0816, "sel_given_ret": 1.125, "over_sel": 1.36, "why": {"explicit": 10}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 2, "calls_with_selection": 2, "calls_exhausted_retries": 0, "attempts_total": 2, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 2, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 71, "attempts_by_n_local": {"60": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}, "41": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.5294, "gen_R": 0.6429, "gen_F1": 0.5806, "missed": ["clothed", "fur", "hair", "human", "male"], "extra": ["anthro", "bottomwear", "cheeky", "donkey_kong_(series)", "duo", "gorilla", "kong", "loincloth", "nintendo", "raised_arms"], "ground_truth_tags": ["ape", "bear", "clothed", "clothing", "dancing", "fur", "group", "hair", "haplorhine", "human", "looking_at_viewer", "male", "mammal", "primate"], "selected_tags": ["anthro", "ape", "bear", "bottomwear", "cheeky", "clothing", "dancing", "donkey_kong_(series)", "duo", "gorilla", "group", "haplorhine", "kong", "loincloth", "looking_at_viewer", "mammal", "nintendo", "primate", "raised_arms"], "stage3_selected": ["bear", "cheeky", "dancing", "gorilla", "kong", "loincloth", "looking_at_viewer", "primate", "raised_arms", "simple_background"], "stage3_selected_scores": {"simple_background": 0.5459, "looking_at_viewer": 0.5448, "bear": 0.5727, "primate": 0.8898, "loincloth": 0.5668, "dancing": 0.5549, "raised_arms": 0.543, "gorilla": 0.8294, "kong": 0.7484, "cheeky": 0.3883}, "stage3_selected_ranks": {"simple_background": 13, "looking_at_viewer": 14, "bear": 8, "primate": 2, "loincloth": 9, "dancing": 12, "raised_arms": 15, "gorilla": 4, "kong": 7, "cheeky": 64}, "stage3_selected_phrase_ranks": {"simple_background": 1, "looking_at_viewer": 1, "bear": 1, "primate": 1, "loincloth": 1, "dancing": 1, "raised_arms": 1, "gorilla": 2, "kong": 5, "cheeky": 1}, "extra_evidence": {"anthro": {"source": "probe"}, "bottomwear": {"source": "implied"}, "cheeky": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3883}, "donkey_kong_(series)": {"source": "implied"}, "duo": {"source": "probe"}, "gorilla": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8294}, "kong": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7484}, "loincloth": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5668}, "nintendo": {"source": "implied"}, "raised_arms": {"source": "stage3", "why": "explicit", "retrieval_score": 0.543}}, "structural": [], "probe": ["clothing", "simple_background", "anthro", "duo", "group", "bear"], "t1": 5.98, "t2": 5.27, "t3": 11.94, "t3s": 5.13, "t3p": 5.56, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=101 entity=2 copyright_filtered=1 generic_char_to_general=1 unknown_type=3"]}
|
| 4 |
+
{"id": 1078019, "n_gt": 14, "n_retrieved": 79, "n_selected": 20, "n_implied": 3, "n_structural": 4, "n_probe": 5, "ret_R": 0.7143, "P": 0.4, "R": 0.5714, "F1": 0.4706, "leaf_P": 0.4, "leaf_R": 0.6667, "leaf_F1": 0.5, "n_leaf_sel": 15, "n_leaf_gt": 9, "ret_P": 0.1266, "sel_given_ret": 0.8, "over_sel": 1.43, "why": {"explicit": 12}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 2, "calls_with_selection": 2, "calls_exhausted_retries": 0, "attempts_total": 2, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 2, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 36, "attempts_by_n_local": {"60": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}, "19": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.4, "gen_R": 0.5714, "gen_F1": 0.4706, "missed": ["lagomorph", "leporid", "mammal", "rabbit", "romantic", "romantic_couple"], "extra": ["<3", "coat", "holding_object", "holding_plushie", "intimate", "lab_coat", "looking_at_viewer", "love", "surprised_expression", "teal_clothing", "topwear", "vest"], "ground_truth_tags": ["anthro", "blue_eyes", "blush", "clothed", "clothing", "duo", "lagomorph", "leporid", "mammal", "plushie", "rabbit", "romantic", "romantic_couple", "teal_eyes"], "selected_tags": ["<3", "anthro", "blue_eyes", "blush", "clothed", "clothing", "coat", "duo", "holding_object", "holding_plushie", "intimate", "lab_coat", "looking_at_viewer", "love", "plushie", "surprised_expression", "teal_clothing", "teal_eyes", "topwear", "vest"], "stage3_selected": ["blue_eyes", "blush", "coat", "duo", "holding_plushie", "intimate", "lab_coat", "love", "surprised_expression", "teal_clothing", "teal_eyes", "vest"], "stage3_selected_scores": {"duo": 0.3632, "blush": 0.6084, "blue_eyes": 0.6154, "coat": 0.6386, "vest": 0.503, "love": 0.4696, "teal_eyes": 0.6285, "surprised_expression": 0.6393, "lab_coat": 0.5162, "intimate": 0.4406, "holding_plushie": 0.7794, "teal_clothing": 0.4339}, "stage3_selected_ranks": {"duo": 77, "blush": 13, "blue_eyes": 12, "coat": 7, "vest": 43, "love": 51, "teal_eyes": 8, "surprised_expression": 6, "lab_coat": 37, "intimate": 63, "holding_plushie": 2, "teal_clothing": 65}, "stage3_selected_phrase_ranks": {"duo": 3, "blush": 1, "blue_eyes": 1, "coat": 1, "vest": 6, "love": 5, "teal_eyes": 1, "surprised_expression": 2, "lab_coat": 5, "intimate": 6, "holding_plushie": 1, "teal_clothing": 6}, "extra_evidence": {"<3": {"source": "probe"}, "coat": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6386}, "holding_object": {"source": "implied"}, "holding_plushie": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7794}, "intimate": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4406}, "lab_coat": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5162}, "looking_at_viewer": {"source": "structural"}, "love": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4696}, "surprised_expression": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6393}, "teal_clothing": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4339}, "topwear": {"source": "implied"}, "vest": {"source": "stage3", "why": "explicit", "retrieval_score": 0.503}}, "structural": ["duo", "anthro", "clothed", "looking_at_viewer"], "probe": ["clothing", "anthro", "blush", "duo", "<3"], "t1": 3.56, "t2": 1.54, "t3": 8.42, "t3s": 5.22, "t3p": 12.79, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=79 entity=0 copyright_filtered=1 generic_char_to_general=0 unknown_type=2"]}
|
| 5 |
+
{"id": 1624724, "n_gt": 4, "n_retrieved": 67, "n_selected": 15, "n_implied": 0, "n_structural": 3, "n_probe": 4, "ret_R": 0.75, "P": 0.2667, "R": 1.0, "F1": 0.4211, "leaf_P": 0.2667, "leaf_R": 1.0, "leaf_F1": 0.4211, "n_leaf_sel": 15, "n_leaf_gt": 4, "ret_P": 0.0448, "sel_given_ret": 1.3333, "over_sel": 3.75, "why": {"explicit": 9, "strong_implied": 2}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 2, "calls_with_selection": 2, "calls_exhausted_retries": 0, "attempts_total": 2, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 2, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 27, "attempts_by_n_local": {"60": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}, "8": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.2667, "gen_R": 1.0, "gen_F1": 0.4211, "missed": [], "extra": ["anthro", "big_eyes", "clothing", "feral", "floating", "floating_hands", "floating_limbs", "nose", "nude", "spots", "toony"], "ground_truth_tags": ["red_nose", "smile", "solo", "tan_body"], "selected_tags": ["anthro", "big_eyes", "clothing", "feral", "floating", "floating_hands", "floating_limbs", "nose", "nude", "red_nose", "smile", "solo", "spots", "tan_body", "toony"], "stage3_selected": ["big_eyes", "floating", "floating_hands", "floating_limbs", "nose", "red_nose", "smile", "spots", "tan_body", "toony", "white_background"], "stage3_selected_scores": {"smile": 0.6007, "white_background": 0.6129, "tan_body": 0.6622, "spots": 0.6267, "toony": 0.6012, "big_eyes": 0.6955, "red_nose": 0.7496, "floating": 0.6508, "nose": 0.8608, "floating_hands": 0.4342, "floating_limbs": 0.4364}, "stage3_selected_ranks": {"smile": 28, "white_background": 25, "tan_body": 14, "spots": 22, "toony": 27, "big_eyes": 6, "red_nose": 3, "floating": 16, "nose": 2, "floating_hands": 66, "floating_limbs": 65}, "stage3_selected_phrase_ranks": {"smile": 2, "white_background": 1, "tan_body": 6, "spots": 6, "toony": 1, "big_eyes": 1, "red_nose": 1, "floating": 1, "nose": 1, "floating_hands": 5, "floating_limbs": 4}, "extra_evidence": {"anthro": {"source": "probe"}, "big_eyes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6955}, "clothing": {"source": "probe"}, "feral": {"source": "structural"}, "floating": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6508}, "floating_hands": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.4342}, "floating_limbs": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.4364}, "nose": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8608}, "nude": {"source": "structural"}, "spots": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6267}, "toony": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6012}}, "structural": ["solo", "feral", "nude"], "probe": ["clothing", "simple_background", "anthro", "solo"], "t1": 4.99, "t2": 1.38, "t3": 3.29, "t3s": 5.69, "t3p": 12.8, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=68 entity=0 copyright_filtered=2 generic_char_to_general=0 unknown_type=5"]}
|
| 6 |
+
{"id": 1325009, "n_gt": 22, "n_retrieved": 104, "n_selected": 26, "n_implied": 7, "n_structural": 5, "n_probe": 3, "ret_R": 0.3636, "P": 0.5, "R": 0.5909, "F1": 0.5417, "leaf_P": 0.2105, "leaf_R": 0.3333, "leaf_F1": 0.2581, "n_leaf_sel": 19, "n_leaf_gt": 12, "ret_P": 0.0769, "sel_given_ret": 1.625, "over_sel": 1.18, "why": {"explicit": 13}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 2, "calls_with_selection": 2, "calls_exhausted_retries": 0, "attempts_total": 2, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 2, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 63, "attempts_by_n_local": {"60": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}, "42": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.5, "gen_R": 0.5909, "gen_F1": 0.5417, "missed": ["chest_tuft", "countershading", "muscular", "muscular_anthro", "muscular_male", "pantherine", "tiger", "topless", "tuft"], "extra": ["countershade_body", "facial_tuft", "flexing_bicep", "fluffy_fur", "hand_on_arm", "hand_on_own_head", "looking_at_viewer", "playing", "striped_body", "striped_fur", "white_chest", "yellow_bottomwear", "yellow_clothing"], "ground_truth_tags": ["anthro", "blue_eyes", "bottomwear", "chest_tuft", "clothed", "clothing", "countershading", "felid", "fur", "hand_on_head", "male", "mammal", "muscular", "muscular_anthro", "muscular_male", "pantherine", "shorts", "solo", "stripes", "tiger", "topless", "tuft"], "selected_tags": ["anthro", "blue_eyes", "bottomwear", "clothed", "clothing", "countershade_body", "facial_tuft", "felid", "flexing_bicep", "fluffy_fur", "fur", "hand_on_arm", "hand_on_head", "hand_on_own_head", "looking_at_viewer", "male", "mammal", "playing", "shorts", "solo", "striped_body", "striped_fur", "stripes", "white_chest", "yellow_bottomwear", "yellow_clothing"], "stage3_selected": ["blue_eyes", "countershade_body", "facial_tuft", "flexing_bicep", "fluffy_fur", "hand_on_arm", "hand_on_head", "hand_on_own_head", "playing", "shorts", "striped_fur", "white_chest", "yellow_bottomwear"], "stage3_selected_scores": {"blue_eyes": 0.5717, "shorts": 0.5785, "facial_tuft": 0.4821, "striped_fur": 0.6385, "hand_on_head": 0.5932, "hand_on_arm": 0.608, "playing": 0.3366, "flexing_bicep": 0.6624, "yellow_bottomwear": 0.652, "white_chest": 0.9198, "countershade_body": 0.8754, "fluffy_fur": 0.6693, "hand_on_own_head": 0.525}, "stage3_selected_ranks": {"blue_eyes": 45, "shorts": 44, "facial_tuft": 64, "striped_fur": 29, "hand_on_head": 40, "hand_on_arm": 34, "playing": 99, "flexing_bicep": 26, "yellow_bottomwear": 28, "white_chest": 2, "countershade_body": 3, "fluffy_fur": 24, "hand_on_own_head": 48}, "stage3_selected_phrase_ranks": {"blue_eyes": 2, "shorts": 1, "facial_tuft": 6, "striped_fur": 2, "hand_on_head": 2, "hand_on_arm": 1, "playing": 3, "flexing_bicep": 5, "yellow_bottomwear": 1, "white_chest": 1, "countershade_body": 1, "fluffy_fur": 1, "hand_on_own_head": 4}, "extra_evidence": {"countershade_body": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8754}, "facial_tuft": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4821}, "flexing_bicep": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6624}, "fluffy_fur": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6693}, "hand_on_arm": {"source": "stage3", "why": "explicit", "retrieval_score": 0.608}, "hand_on_own_head": {"source": "stage3", "why": "explicit", "retrieval_score": 0.525}, "looking_at_viewer": {"source": "structural"}, "playing": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3366}, "striped_body": {"source": "implied"}, "striped_fur": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6385}, "white_chest": {"source": "stage3", "why": "explicit", "retrieval_score": 0.9198}, "yellow_bottomwear": {"source": "stage3", "why": "explicit", "retrieval_score": 0.652}, "yellow_clothing": {"source": "implied"}}, "structural": ["solo", "anthro", "male", "clothed", "looking_at_viewer"], "probe": ["anthro", "felid", "solo"], "t1": 1.93, "t2": 1.48, "t3": 34.51, "t3s": 1.74, "t3p": 5.82, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=102 entity=1 copyright_filtered=2 generic_char_to_general=0 unknown_type=2"]}
|
| 7 |
+
{"id": 1023509, "n_gt": 13, "n_retrieved": 118, "n_selected": 7, "n_implied": 0, "n_structural": 5, "n_probe": 4, "ret_R": 0.4615, "P": 0.1429, "R": 0.0769, "F1": 0.1, "leaf_P": 0.1429, "leaf_R": 0.1667, "leaf_F1": 0.1538, "n_leaf_sel": 7, "n_leaf_gt": 6, "ret_P": 0.0508, "sel_given_ret": 0.1667, "over_sel": 0.54, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 2, "calls_with_selection": 0, "calls_exhausted_retries": 2, "attempts_total": 6, "attempt_errors": 4, "attempt_parse_fail": 0, "attempt_parse_ok": 2, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 0, "attempts_by_n_local": {"60": {"attempts": 3, "parse_ok": 0, "parse_fail": 0, "errors": 3}, "50": {"attempts": 3, "parse_ok": 2, "parse_fail": 0, "errors": 1}}, "attempt_failure_rate": 0.6666666666666666, "call_exhaustion_rate": 1.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.1429, "gen_R": 0.0769, "gen_F1": 0.1, "missed": ["bovid", "caprine", "dialogue", "fur", "goat", "human", "lizard", "mammal", "reptile", "scalie", "white_body", "white_fur"], "extra": ["anthro", "clothing", "duo", "group", "solo", "taur"], "ground_truth_tags": ["bovid", "caprine", "dialogue", "fur", "goat", "human", "lizard", "mammal", "reptile", "scalie", "text", "white_body", "white_fur"], "selected_tags": ["anthro", "clothing", "duo", "group", "solo", "taur", "text"], "stage3_selected": [], "stage3_selected_scores": {}, "stage3_selected_ranks": {}, "stage3_selected_phrase_ranks": {}, "extra_evidence": {"anthro": {"source": "probe"}, "clothing": {"source": "probe"}, "duo": {"source": "structural"}, "group": {"source": "structural"}, "solo": {"source": "structural"}, "taur": {"source": "structural"}}, "structural": ["solo", "duo", "group", "taur", "text"], "probe": ["clothing", "anthro", "text", "group"], "t1": 2.11, "t2": 1.85, "t3": 80.12, "t3s": 2.32, "t3p": 5.81, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=110 entity=4 copyright_filtered=4 generic_char_to_general=0 unknown_type=1", "Stage3 general_chunk_0: attempt 1 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 1, \"why\": \"weak_implied\"}, {\"i\": 2, \"why\": \"style_or_meta\"}, {\"i\": 3, \"why\": \"other\"}, {\"i\": 4, \"why\": \"explicit\"}, {\"i\": 5, \"why\": \"weak_implied\"}, {\"i\": 6, \"why\": \"weak_implied\"}, {\"i\": 7, \"why\": \"explicit\"}, {\"i\": 8, \"why\": \"weak_implied\"}, {\"i\": 9, \"why\": \"weak_implied\"}, {\"i\": 10, \"why\": \"explicit\"}, {\"i\": 11, \"why\": \"explicit\"}, {\"i\": 12, \"why\": \"weak_implied\"}, {\"i\": 13, \"why\": \"explicit\"}, {\"i\": 14, \"why\": \"weak_implied\"}, {\"i\": 15, \"why\": \"explicit\"}, {\"i\": 16, \"why\": \"weak_implied\"}, {\"i\": 17, \"why\": \"explicit\"}, {\"i\": 18, \"why\": \"explicit\"}, {\"i\": 19, \"why\": \"weak_implied\"}, {\"i\": 20, \"why\": \"style_or_meta\"}, {\"i\": 21, \"why\": \"other\"}, {\"i\": 22, \"why\": \"weak_implied\"}, {\"i\": 23, \"why\": \"weak_implied\"}, {\"i\": 24, \"why\": \"style_or_meta\"}, {\"i\": 25, \"why\": \"weak_implied\"}, {\"i\": 26, \"why\": \"other\"}, {\"i\": 27, \"why\": \"style_or_meta\"}, {\"i\": 28, \"why\": \"weak_implied\"}, {\"i\": 29, \"why\": \"weak_implied\"}, {\"i\": 30, \"why\": \"explicit\"}, {\"i\": 31, \"why\": \"weak_implied\"}, {\"i\": 32, \"why\": \"explicit\"}, {\"i\": 33, \"why\": \"weak_implied\"}, {\"i\": 34, \"why\": \"weak_implied\"}, {\"i\": 35, \"why\": \"explicit\"}, {\"i\": 36, \"\": null}]}. Got: 1 validation error for Stage3SelectionResponse\nselections.35.why\n Field required [type=missing, input_value={'i': 36, '': None}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE ", "Stage3 general_chunk_0: attempt 2 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 1, \"why\": \"weak_implied\"}, {\"i\": 2, \"why\": \"style_or_meta\"}, {\"i\": 3, \"why\": \"other\"}, {\"i\": 4, \"why\": \"explicit\"}, {\"i\": 5, \"why\": \"weak_implied\"}, {\"i\": 6, \"why\": \"weak_implied\"}, {\"i\": 7, \"why\": \"weak_implied\"}, {\"i\": 8, \"why\": \"weak_implied\"}, {\"i\": 9, \"why\": \"weak_implied\"}, {\"i\": 10, \"why\": \"weak_implied\"}, {\"i\": 11, \"why\": \"explicit\"}, {\"i\": 12, \"why\": \"weak_implied\"}, {\"i\": 13, \"why\": \"weak_implied\"}, {\"i\": 14, \"why\": \"weak_implied\"}, {\"i\": 15, \"why\": \"weak_implied\"}, {\"i\": 16, \"why\": \"weak_implied\"}, {\"i\": 17, \"why\": \"weak_implied\"}, {\"i\": 18, \"why\": \"weak_implied\"}, {\"i\": 19, \"why\": \"style_or_meta\"}, {\"i\": 20, \"why\": \"weak_implied\"}, {\"i\": 21, \"why\": \"other\"}, {\"i\": 22, \"why\": \"weak_implied\"}, {\"i\": 23, \"why\": \"weak_implied\"}, {\"i\": 24, \"why\": \"weak_implied\"}, {\"i\": 25, \"why\": \"weak_implied\"}, {\"i\": 26, \"why\": \"other\"}, {\"i\": 27, \"why\": \"style_or_meta\"}, {\"i\": 28, \"why\": \"weak_implied\"}, {\"i\": 29, \"why\": \"weak_implied\"}, {\"i\": 30, \"why\": \"explicit\"}, {\"i\": 31, \"why\": \"weak_implied\"}, {\"i\": 32, \"why\": \"weak_implied\"}, {\"i\": 33, \"why\": \"weak_implied\"}, {\"i\": 34, \"why\": \"weak_implied\"}, {\"i\": 35}]}. Got: 1 validation error for Stage3SelectionResponse\nselections.34.why\n Field required [type=missing, input_value={'i': 35}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE ", "Stage3 general_chunk_0: attempt 3 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 1, \"why\": \"weak_implied\"}, {\"i\": 2, \"why\": \"style_or_meta\"}, {\"i\": 3, \"why\": \"other\"}, {\"i\": 4, \"why\": \"explicit\"}, {\"i\": 5, \"why\": \"weak_implied\"}, {\"i\": 6, \"why\": \"weak_implied\"}, {\"i\": 7, \"why\": \"weak_implied\"}, {\"i\": 8, \"why\": \"weak_implied\"}, {\"i\": 9, \"why\": \"weak_implied\"}, {\"i\": 10, \"why\": \"weak_implied\"}, {\"i\": 11, \"why\": \"explicit\"}, {\"i\": 12, \"why\": \"weak_implied\"}, {\"i\": 13, \"why\": \"weak_implied\"}, {\"i\": 14, \"why\": \"weak_implied\"}, {\"i\": 15, \"why\": \"weak_implied\"}, {\"i\": 16, \"why\": \"weak_implied\"}, {\"i\": 17, \"why\": \"weak_implied\"}, {\"i\": 18, \"why\": \"weak_implied\"}, {\"i\": 19, \"why\": \"style_or_meta\"}, {\"i\": 20, \"why\": \"weak_implied\"}, {\"i\": 21, \"why\": \"other\"}, {\"i\": 22, \"why\": \"weak_implied\"}, {\"i\": 23, \"why\": \"weak_implied\"}, {\"i\": 24, \"why\": \"weak_implied\"}, {\"i\": 25, \"why\": \"weak_implied\"}, {\"i\": 26, \"why\": \"other\"}, {\"i\": 27, \"why\": \"style_or_meta\"}, {\"i\": 28, \"why\": \"weak_implied\"}, {\"i\": 29, \"why\": \"weak_implied\"}, {\"i\": 30, \"why\": \"explicit\"}, {\"i\": 31, \"why\": \"weak_implied\"}, {\"i\": 32, \"why\": \"weak_implied\"}, {\"i\": 33, \"why\": \"weak_implied\"}, {\"i\": 34, \"why\": \"weak_implied\"}, {\"i\": 35}]}. Got: 1 validation error for Stage3SelectionResponse\nselections.34.why\n Field required [type=missing, input_value={'i': 35}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE ", "Stage3 general_chunk_0: gave up after 3 attempts", "Stage3 general_chunk_1: attempt 1 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 1, \"why\": \"weak_implied\"}, {\"i\": 2, \"why\": \"weak_implied\"}, {\"i\": 3, \"why\": \"weak_implied\"}, {\"i\": 4, \"why\": \"weak_implied\"}, {\"i\": 5, \"why\": \"weak_implied\"}, {\"i\": 6, \"why\": \"other\"}, {\"i\": 7, \"why\": \"other\"}, {\"i\": 8, \"why\": \"other\"}, {\"i\": 9, \"why\": \"explicit\"}, {\"i\": 10, \"why\": \"other\"}, {\"i\": 11, \"why\": \"explicit\"}, {\"i\": 12, \"why\": \"other\"}, {\"i\": 13, \"why\": \"other\"}, {\"i\": 14, \"why\": \"weak_implied\"}, {\"i\": 15, \"why\": \"explicit\"}, {\"i\": 16, \"why\": \"weak_implied\"}, {\"i\": 17, \"why\": \"other\"}, {\"i\": 18, \"why\": \"other\"}, {\"i\": 19, \"why\": \"weak_implied\"}, {\"i\": 20, \"why\": \"weak_implied\"}, {\"i\": 21, \"why\": \"weak_implied\"}, {\"i\": 22, \"why\": \"other\"}, {\"i\": 23, \"why\": \"weak_implied\"}, {\"i\": 24, \"why\": \"other\"}, {\"i\": 25, \"why\": \"other\"}, {\"i\": 26, \"why\": \"weak_implied\"}, {\"i\": 27, \"why\": \"other\"}, {\"i\": 28, \"why\": \"weak_implied\"}, {\"i\": 29, \"why\": \"other\"}, {\"i\": 30, \"why\": \"explicit\"}, {\"i\": 31, \"why\": \"weak_implied\"}, {\"i\": 32, \"why\": \"weak_implied\"}, {\"i\": 33, \"why\": \"explicit\"}, {\"i\": 34, \"why\": \"weak_implied\"}, {\"i\": 35, \"why\": \"other\"}, {\"i\": 36, \"why\": \"weak_implied\"}, {}]}. Got: 2 validation errors for Stage3SelectionResponse\nselections.36.i\n Field required [type=missing, input_value={}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nselections.36.why\n Field required [type=missing, input_value={}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE ", "Stage3 general_chunk_1: gave up after 3 attempts"]}
|
| 8 |
+
{"id": 335343, "n_gt": 15, "n_retrieved": 128, "n_selected": 28, "n_implied": 5, "n_structural": 3, "n_probe": 3, "ret_R": 0.6, "P": 0.3929, "R": 0.7333, "F1": 0.5116, "leaf_P": 0.25, "leaf_R": 0.4167, "leaf_F1": 0.3125, "n_leaf_sel": 20, "n_leaf_gt": 12, "ret_P": 0.0703, "sel_given_ret": 1.2222, "over_sel": 1.87, "why": {"explicit": 20}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 3, "calls_with_selection": 3, "calls_exhausted_retries": 0, "attempts_total": 3, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 3, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 12, "kept_total": 53, "attempts_by_n_local": {"60": {"attempts": 2, "parse_ok": 2, "parse_fail": 0, "errors": 0}, "11": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.3929, "gen_R": 0.7333, "gen_F1": 0.5116, "missed": ["angry", "eyes_closed", "green_eyes", "sleeping"], "extra": ["annoyed_expression", "anthro", "bedroom", "clothing", "english_text", "expressions", "eyes", "humanoid", "lipstick", "lying_on_bed", "on_bed", "pajamas", "relaxed_expression", "resting", "sleepover", "watermark", "yellow_eyeshadow"], "ground_truth_tags": ["angry", "bed", "blonde_hair", "blue_eyes", "duo", "eyes_closed", "eyeshadow", "furniture", "green_eyes", "hair", "lying", "makeup", "purple_hair", "sleeping", "text"], "selected_tags": ["annoyed_expression", "anthro", "bed", "bedroom", "blonde_hair", "blue_eyes", "clothing", "duo", "english_text", "expressions", "eyes", "eyeshadow", "furniture", "hair", "humanoid", "lipstick", "lying", "lying_on_bed", "makeup", "on_bed", "pajamas", "purple_hair", "relaxed_expression", "resting", "sleepover", "text", "watermark", "yellow_eyeshadow"], "stage3_selected": ["annoyed_expression", "bedroom", "blonde_hair", "blue_eyes", "english_text", "expressions", "eyes", "eyeshadow", "hair", "lipstick", "lying_on_bed", "makeup", "pajamas", "purple_hair", "relaxed_expression", "resting", "sleepover", "text", "watermark", "yellow_eyeshadow"], "stage3_selected_scores": {"hair": 0.6031, "text": 0.6007, "blue_eyes": 0.6014, "blonde_hair": 0.5986, "purple_hair": 0.5642, "makeup": 0.5965, "eyeshadow": 0.4763, "watermark": 0.6042, "lipstick": 0.4874, "bedroom": 0.4901, "lying_on_bed": 0.4093, "pajamas": 0.3753, "resting": 0.5144, "annoyed_expression": 0.7251, "expressions": 0.5439, "sleepover": 0.5269, "yellow_eyeshadow": 0.4551, "relaxed_expression": 0.4534, "eyes": 0.8951, "english_text": 0.4189}, "stage3_selected_ranks": {"hair": 5, "text": 8, "blue_eyes": 7, "blonde_hair": 10, "purple_hair": 14, "makeup": 11, "eyeshadow": 46, "watermark": 4, "lipstick": 40, "bedroom": 38, "lying_on_bed": 92, "pajamas": 112, "resting": 28, "annoyed_expression": 2, "expressions": 18, "sleepover": 25, "yellow_eyeshadow": 54, "relaxed_expression": 55, "eyes": 1, "english_text": 83}, "stage3_selected_phrase_ranks": {"hair": 1, "text": 1, "blue_eyes": 1, "blonde_hair": 1, "purple_hair": 1, "makeup": 1, "eyeshadow": 3, "watermark": 1, "lipstick": 2, "bedroom": 1, "lying_on_bed": 4, "pajamas": 3, "resting": 1, "annoyed_expression": 1, "expressions": 3, "sleepover": 1, "yellow_eyeshadow": 6, "relaxed_expression": 6, "eyes": 1, "english_text": 4}, "extra_evidence": {"annoyed_expression": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7251}, "anthro": {"source": "probe"}, "bedroom": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4901}, "clothing": {"source": "implied"}, "english_text": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4189}, "expressions": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5439}, "eyes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8951}, "humanoid": {"source": "structural"}, "lipstick": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4874}, "lying_on_bed": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4093}, "on_bed": {"source": "implied"}, "pajamas": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3753}, "relaxed_expression": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4534}, "resting": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5144}, "sleepover": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5269}, "watermark": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6042}, "yellow_eyeshadow": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4551}}, "structural": ["duo", "humanoid", "text"], "probe": ["simple_background", "anthro", "duo"], "t1": 2.43, "t2": 2.06, "t3": 16.44, "t3s": 1.29, "t3p": 5.82, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=131 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=4"]}
|
| 9 |
+
{"id": 17482, "n_gt": 22, "n_retrieved": 74, "n_selected": 36, "n_implied": 11, "n_structural": 3, "n_probe": 3, "ret_R": 0.2727, "P": 0.4167, "R": 0.6818, "F1": 0.5172, "leaf_P": 0.2222, "leaf_R": 0.3077, "leaf_F1": 0.2581, "n_leaf_sel": 18, "n_leaf_gt": 13, "ret_P": 0.0811, "sel_given_ret": 2.5, "over_sel": 1.64, "why": {"explicit": 21}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 2, "calls_with_selection": 2, "calls_exhausted_retries": 0, "attempts_total": 2, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 2, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 41, "attempts_by_n_local": {"60": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}, "17": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.4167, "gen_R": 0.6818, "gen_F1": 0.5172, "missed": ["fingers", "fur", "holding_musical_instrument", "holding_object", "music", "spade_tail", "tail"], "extra": ["3_claws", "acoustic_guitar", "blonde_hair", "canis", "crosslegged_pose", "dire_wolf", "electric_guitar", "finger_claws", "flowing_hair", "holding_hair", "leggings", "legwear", "mexican_wolf", "pastel_background", "playing_guitar", "playing_music", "toe_claws", "torn_leggings", "torn_legwear", "touching_hair", "wolf"], "ground_truth_tags": ["anthro", "bass_guitar", "canid", "canine", "claws", "clothed", "clothing", "fingers", "fur", "guitar", "hair", "holding_musical_instrument", "holding_object", "mammal", "music", "musical_instrument", "plucked_string_instrument", "solo", "spade_tail", "string_instrument", "tail", "torn_clothing"], "selected_tags": ["3_claws", "acoustic_guitar", "anthro", "bass_guitar", "blonde_hair", "canid", "canine", "canis", "claws", "clothed", "clothing", "crosslegged_pose", "dire_wolf", "electric_guitar", "finger_claws", "flowing_hair", "guitar", "hair", "holding_hair", "leggings", "legwear", "mammal", "mexican_wolf", "musical_instrument", "pastel_background", "playing_guitar", "playing_music", "plucked_string_instrument", "solo", "string_instrument", "toe_claws", "torn_clothing", "torn_leggings", "torn_legwear", "touching_hair", "wolf"], "stage3_selected": ["3_claws", "acoustic_guitar", "bass_guitar", "blonde_hair", "claws", "crosslegged_pose", "dire_wolf", "electric_guitar", "finger_claws", "flowing_hair", "guitar", "holding_hair", "mexican_wolf", "pastel_background", "playing_guitar", "playing_music", "plucked_string_instrument", "toe_claws", "torn_clothing", "torn_leggings", "wolf"], "stage3_selected_scores": {"claws": 0.5504, "wolf": 0.5691, "blonde_hair": 0.3645, "toe_claws": 0.4749, "torn_clothing": 0.3951, "finger_claws": 0.422, "plucked_string_instrument": 0.8817, "guitar": 0.9788, "playing_music": 0.8891, "playing_guitar": 0.9494, "3_claws": 0.4218, "dire_wolf": 0.4342, "electric_guitar": 0.8829, "bass_guitar": 0.9286, "flowing_hair": 0.5466, "crosslegged_pose": 0.445, "mexican_wolf": 0.4285, "torn_leggings": 0.3987, "holding_hair": 0.3725, "acoustic_guitar": 0.8816, "pastel_background": 0.5453}, "stage3_selected_ranks": {"claws": 13, "wolf": 8, "blonde_hair": 70, "toe_claws": 30, "torn_clothing": 62, "finger_claws": 52, "plucked_string_instrument": 6, "guitar": 1, "playing_music": 4, "playing_guitar": 2, "3_claws": 53, "dire_wolf": 48, "electric_guitar": 5, "bass_guitar": 3, "flowing_hair": 15, "crosslegged_pose": 42, "mexican_wolf": 50, "torn_leggings": 60, "holding_hair": 67, "acoustic_guitar": 7, "pastel_background": 16}, "stage3_selected_phrase_ranks": {"claws": 1, "wolf": 1, "blonde_hair": 6, "toe_claws": 2, "torn_clothing": 6, "finger_claws": 5, "plucked_string_instrument": 5, "guitar": 1, "playing_music": 3, "playing_guitar": 1, "3_claws": 6, "dire_wolf": 5, "electric_guitar": 4, "bass_guitar": 2, "flowing_hair": 1, "crosslegged_pose": 6, "mexican_wolf": 6, "torn_leggings": 5, "holding_hair": 4, "acoustic_guitar": 5, "pastel_background": 1}, "extra_evidence": {"3_claws": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4218}, "acoustic_guitar": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8816}, "blonde_hair": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3645}, "canis": {"source": "implied"}, "crosslegged_pose": {"source": "stage3", "why": "explicit", "retrieval_score": 0.445}, "dire_wolf": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4342}, "electric_guitar": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8829}, "finger_claws": {"source": "stage3", "why": "explicit", "retrieval_score": 0.422}, "flowing_hair": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5466}, "holding_hair": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3725}, "leggings": {"source": "implied"}, "legwear": {"source": "implied"}, "mexican_wolf": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4285}, "pastel_background": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5453}, "playing_guitar": {"source": "stage3", "why": "explicit", "retrieval_score": 0.9494}, "playing_music": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8891}, "toe_claws": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4749}, "torn_leggings": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3987}, "torn_legwear": {"source": "implied"}, "touching_hair": {"source": "implied"}, "wolf": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5691}}, "structural": ["solo", "anthro", "clothed"], "probe": ["anthro", "canid", "solo"], "t1": 2.51, "t2": 1.43, "t3": 7.44, "t3s": 1.39, "t3p": 2.51, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=77 entity=0 copyright_filtered=1 generic_char_to_general=0 unknown_type=2"]}
|
| 10 |
+
{"id": 2021552, "n_gt": 25, "n_retrieved": 91, "n_selected": 34, "n_implied": 17, "n_structural": 4, "n_probe": 3, "ret_R": 0.56, "P": 0.4706, "R": 0.64, "F1": 0.5424, "leaf_P": 0.3333, "leaf_R": 0.3333, "leaf_F1": 0.3333, "n_leaf_sel": 15, "n_leaf_gt": 15, "ret_P": 0.1538, "sel_given_ret": 1.1429, "over_sel": 1.36, "why": {"explicit": 11, "strong_implied": 2}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 2, "calls_with_selection": 2, "calls_exhausted_retries": 0, "attempts_total": 2, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 2, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 56, "attempts_by_n_local": {"60": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}, "31": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.4706, "gen_R": 0.64, "gen_F1": 0.5424, "missed": ["claws", "crossed_arms", "facial_markings", "fur", "head_markings", "looking_at_another", "markings", "overalls", "standing"], "extra": ["black_bottomwear", "black_clothing", "black_pants", "blue_clothing", "blue_overalls", "blue_topwear", "cross_fox", "dress_shirt", "grey_clothing", "grey_shirt", "grey_topwear", "looking_at_viewer", "mouth_closed", "open_mouth", "red_fox", "white_clothing", "white_shirt", "white_topwear"], "ground_truth_tags": ["anthro", "bottomwear", "canid", "canine", "claws", "clothed", "clothing", "crossed_arms", "duo", "facial_markings", "fox", "fur", "grey_background", "head_markings", "lagomorph", "leporid", "looking_at_another", "mammal", "markings", "overalls", "pants", "rabbit", "shirt", "standing", "topwear"], "selected_tags": ["anthro", "black_bottomwear", "black_clothing", "black_pants", "blue_clothing", "blue_overalls", "blue_topwear", "bottomwear", "canid", "canine", "clothed", "clothing", "cross_fox", "dress_shirt", "duo", "fox", "grey_background", "grey_clothing", "grey_shirt", "grey_topwear", "lagomorph", "leporid", "looking_at_viewer", "mammal", "mouth_closed", "open_mouth", "pants", "rabbit", "red_fox", "shirt", "topwear", "white_clothing", "white_shirt", "white_topwear"], "stage3_selected": ["black_bottomwear", "black_pants", "blue_overalls", "blue_topwear", "cross_fox", "dress_shirt", "fox", "grey_background", "grey_shirt", "mouth_closed", "open_mouth", "rabbit", "white_shirt"], "stage3_selected_scores": {"open_mouth": 0.633, "fox": 0.638, "rabbit": 0.6511, "grey_background": 0.6784, "black_bottomwear": 0.7384, "blue_topwear": 0.666, "white_shirt": 0.8197, "dress_shirt": 0.6688, "black_pants": 0.833, "grey_shirt": 0.6923, "blue_overalls": 0.9203, "mouth_closed": 0.5678, "cross_fox": 0.4688}, "stage3_selected_ranks": {"open_mouth": 34, "fox": 32, "rabbit": 31, "grey_background": 24, "black_bottomwear": 15, "blue_topwear": 27, "white_shirt": 4, "dress_shirt": 26, "black_pants": 3, "grey_shirt": 21, "blue_overalls": 1, "mouth_closed": 58, "cross_fox": 87}, "stage3_selected_phrase_ranks": {"open_mouth": 1, "fox": 1, "rabbit": 1, "grey_background": 1, "black_bottomwear": 5, "blue_topwear": 4, "white_shirt": 1, "dress_shirt": 5, "black_pants": 1, "grey_shirt": 4, "blue_overalls": 1, "mouth_closed": 1, "cross_fox": 4}, "extra_evidence": {"black_bottomwear": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7384}, "black_clothing": {"source": "implied"}, "black_pants": {"source": "stage3", "why": "explicit", "retrieval_score": 0.833}, "blue_clothing": {"source": "implied"}, "blue_overalls": {"source": "stage3", "why": "explicit", "retrieval_score": 0.9203}, "blue_topwear": {"source": "stage3", "why": "explicit", "retrieval_score": 0.666}, "cross_fox": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.4688}, "dress_shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6688}, "grey_clothing": {"source": "implied"}, "grey_shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6923}, "grey_topwear": {"source": "implied"}, "looking_at_viewer": {"source": "structural"}, "mouth_closed": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.5678}, "open_mouth": {"source": "stage3", "why": "explicit", "retrieval_score": 0.633}, "red_fox": {"source": "implied"}, "white_clothing": {"source": "implied"}, "white_shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8197}, "white_topwear": {"source": "implied"}}, "structural": ["duo", "anthro", "clothed", "looking_at_viewer"], "probe": ["simple_background", "anthro", "duo"], "t1": 1.99, "t2": 1.49, "t3": 17.7, "t3s": 1.15, "t3p": 1.25, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=91 entity=2 copyright_filtered=0 generic_char_to_general=0 unknown_type=3"]}
|
| 11 |
+
{"id": 2034167, "n_gt": 11, "n_retrieved": 127, "n_selected": 58, "n_implied": 18, "n_structural": 5, "n_probe": 3, "ret_R": 0.5455, "P": 0.1379, "R": 0.7273, "F1": 0.2319, "leaf_P": 0.0541, "leaf_R": 0.2857, "leaf_F1": 0.0909, "n_leaf_sel": 37, "n_leaf_gt": 7, "ret_P": 0.0472, "sel_given_ret": 1.3333, "over_sel": 5.27, "why": {"explicit": 31, "strong_implied": 4}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 3, "calls_with_selection": 3, "calls_exhausted_retries": 0, "attempts_total": 3, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 3, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 61, "attempts_by_n_local": {"60": {"attempts": 2, "parse_ok": 2, "parse_fail": 0, "errors": 0}, "12": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.1379, "gen_R": 0.7273, "gen_F1": 0.2319, "missed": ["blue_eyes", "blue_nose", "purple_body"], "extra": ["actual_fur", "angry_expression", "animal_humanoid", "anthro", "belly", "big_eyes", "blue_fingers", "blue_inner_ear_fluff", "blue_toes", "canid_humanoid", "canine_humanoid", "clothed", "clothing", "curved_tail", "cute_expression", "expressions", "facial_stripes", "fighting_pose", "fluffy_fur", "fox_humanoid", "half_body", "humanoid", "inner_ear_fluff", "jumper", "jumping", "light_tail", "looking_at_viewer", "male", "mammal_humanoid", "mouth_closed", "multi_tone_fur", "open_smile", "pink_ears", "pink_legs", "pink_stripes", "pink_tongue", "pose", "purple_belly", "purple_face", "scales", "scaly_tail", "smile", "stripes", "tail", "tail_tuft", "tan_nose", "teal_nose", "tongue", "tuft", "wolf_humanoid"], "ground_truth_tags": ["blue_eyes", "blue_nose", "canid", "canine", "fur", "mammal", "open_mouth", "purple_body", "solo", "white_body", "white_fur"], "selected_tags": ["actual_fur", "angry_expression", "animal_humanoid", "anthro", "belly", "big_eyes", "blue_fingers", "blue_inner_ear_fluff", "blue_toes", "canid", "canid_humanoid", "canine", "canine_humanoid", "clothed", "clothing", "curved_tail", "cute_expression", "expressions", "facial_stripes", "fighting_pose", "fluffy_fur", "fox_humanoid", "fur", "half_body", "humanoid", "inner_ear_fluff", "jumper", "jumping", "light_tail", "looking_at_viewer", "male", "mammal", "mammal_humanoid", "mouth_closed", "multi_tone_fur", "open_mouth", "open_smile", "pink_ears", "pink_legs", "pink_stripes", "pink_tongue", "pose", "purple_belly", "purple_face", "scales", "scaly_tail", "smile", "solo", "stripes", "tail", "tail_tuft", "tan_nose", "teal_nose", "tongue", "tuft", "white_body", "white_fur", "wolf_humanoid"], "stage3_selected": ["actual_fur", "angry_expression", "big_eyes", "blue_fingers", "blue_inner_ear_fluff", "blue_toes", "canine_humanoid", "curved_tail", "cute_expression", "expressions", "facial_stripes", "fighting_pose", "fluffy_fur", "fox_humanoid", "half_body", "jumper", "jumping", "light_tail", "mouth_closed", "multi_tone_fur", "open_mouth", "open_smile", "pink_ears", "pink_legs", "pink_stripes", "pink_tongue", "purple_belly", "purple_face", "scaly_tail", "simple_background", "tail_tuft", "tan_nose", "teal_nose", "white_fur", "wolf_humanoid"], "stage3_selected_scores": {"simple_background": 0.5948, "white_fur": 0.5995, "tail_tuft": 0.4995, "pink_tongue": 0.4215, "canine_humanoid": 0.9003, "fox_humanoid": 0.8204, "big_eyes": 0.4207, "mouth_closed": 0.5218, "wolf_humanoid": 0.819, "cute_expression": 0.4486, "pink_ears": 0.5255, "fighting_pose": 0.4594, "multi_tone_fur": 0.5135, "tan_nose": 0.473, "expressions": 0.4957, "light_tail": 0.5671, "pink_stripes": 0.682, "blue_inner_ear_fluff": 0.4727, "purple_face": 0.5577, "fluffy_fur": 0.5593, "angry_expression": 0.4879, "purple_belly": 0.5454, "scaly_tail": 0.4822, "blue_fingers": 0.5077, "blue_toes": 0.5148, "facial_stripes": 0.5968, "half_body": 0.4115, "teal_nose": 0.4695, "jumper": 0.4077, "pink_legs": 0.5285, "actual_fur": 0.4563, "open_mouth": 0.6008, "open_smile": 0.4868, "jumping": 0.6014, "curved_tail": 0.637}, "stage3_selected_ranks": {"simple_background": 31, "white_fur": 26, "tail_tuft": 77, "pink_tongue": 111, "canine_humanoid": 1, "fox_humanoid": 3, "big_eyes": 112, "mouth_closed": 66, "wolf_humanoid": 4, "cute_expression": 106, "pink_ears": 62, "fighting_pose": 101, "multi_tone_fur": 70, "tan_nose": 97, "expressions": 79, "light_tail": 41, "pink_stripes": 9, "blue_inner_ear_fluff": 98, "purple_face": 48, "fluffy_fur": 46, "angry_expression": 86, "purple_belly": 50, "scaly_tail": 89, "blue_fingers": 73, "blue_toes": 68, "facial_stripes": 29, "half_body": 115, "teal_nose": 99, "jumper": 116, "pink_legs": 60, "actual_fur": 104, "open_mouth": 24, "open_smile": 87, "jumping": 23, "curved_tail": 12}, "stage3_selected_phrase_ranks": {"simple_background": 1, "white_fur": 1, "tail_tuft": 4, "pink_tongue": 3, "canine_humanoid": 1, "fox_humanoid": 3, "big_eyes": 6, "mouth_closed": 3, "wolf_humanoid": 4, "cute_expression": 6, "pink_ears": 4, "fighting_pose": 5, "multi_tone_fur": 5, "tan_nose": 4, "expressions": 3, "light_tail": 4, "pink_stripes": 1, "blue_inner_ear_fluff": 4, "purple_face": 3, "fluffy_fur": 2, "angry_expression": 4, "purple_belly": 4, "scaly_tail": 5, "blue_fingers": 5, "blue_toes": 4, "facial_stripes": 3, "half_body": 4, "teal_nose": 5, "jumper": 3, "pink_legs": 3, "actual_fur": 3, "open_mouth": 1, "open_smile": 2, "jumping": 1, "curved_tail": 1}, "extra_evidence": {"actual_fur": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4563}, "angry_expression": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4879}, "animal_humanoid": {"source": "implied"}, "anthro": {"source": "structural"}, "belly": {"source": "implied"}, "big_eyes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4207}, "blue_fingers": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5077}, "blue_inner_ear_fluff": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4727}, "blue_toes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5148}, "canid_humanoid": {"source": "implied"}, "canine_humanoid": {"source": "stage3", "why": "explicit", "retrieval_score": 0.9003}, "clothed": {"source": "structural"}, "clothing": {"source": "implied"}, "curved_tail": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.637}, "cute_expression": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4486}, "expressions": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4957}, "facial_stripes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5968}, "fighting_pose": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4594}, "fluffy_fur": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5593}, "fox_humanoid": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8204}, "half_body": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4115}, "humanoid": {"source": "implied"}, "inner_ear_fluff": {"source": "implied"}, "jumper": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4077}, "jumping": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.6014}, "light_tail": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5671}, "looking_at_viewer": {"source": "structural"}, "male": {"source": "structural"}, "mammal_humanoid": {"source": "implied"}, "mouth_closed": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5218}, "multi_tone_fur": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5135}, "open_smile": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.4868}, "pink_ears": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5255}, "pink_legs": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5285}, "pink_stripes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.682}, "pink_tongue": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4215}, "pose": {"source": "implied"}, "purple_belly": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5454}, "purple_face": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5577}, "scales": {"source": "implied"}, "scaly_tail": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4822}, "smile": {"source": "implied"}, "stripes": {"source": "implied"}, "tail": {"source": "implied"}, "tail_tuft": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4995}, "tan_nose": {"source": "stage3", "why": "explicit", "retrieval_score": 0.473}, "teal_nose": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4695}, "tongue": {"source": "implied"}, "tuft": {"source": "implied"}, "wolf_humanoid": {"source": "stage3", "why": "explicit", "retrieval_score": 0.819}}, "structural": ["solo", "anthro", "male", "clothed", "looking_at_viewer"], "probe": ["anthro", "canid", "solo"], "t1": 4.15, "t2": 2.03, "t3": 36.05, "t3s": 1.41, "t3p": 5.23, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=132 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=4"]}
|
data/eval_results/latency_k1_seed42.jsonl
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"_meta": true, "timestamp": "2026-03-02T08:44:10.811021", "n_samples": 10, "caption_field": "caption_cogvlm", "skip_rewrite": false, "allow_nsfw": false, "mode": "chunked_map_union", "chunk_size": 60, "eval_path": "data/eval_samples/e621_sfw_sample_1000_seed123_buffer10000_caption_evident.jsonl", "per_phrase_k": 2, "per_phrase_final_k": 1, "temperature": 0.0, "shuffle": false, "seed": 42, "workers": 4, "min_why": "strong_implied", "expand_implications": true, "infer_structural": true, "infer_probe": true, "n_errors": 0, "n_issue_samples": 10, "n_issues_total": 20}
|
| 2 |
+
{"id": 3285630, "n_gt": 12, "n_retrieved": 16, "n_selected": 25, "n_implied": 7, "n_structural": 4, "n_probe": 5, "ret_R": 0.1667, "P": 0.4, "R": 0.8333, "F1": 0.5405, "leaf_P": 0.3571, "leaf_R": 0.5556, "leaf_F1": 0.4348, "n_leaf_sel": 14, "n_leaf_gt": 9, "ret_P": 0.125, "sel_given_ret": 5.0, "over_sel": 2.08, "why": {"explicit": 11, "strong_implied": 2}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 15, "attempts_by_n_local": {"19": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.4, "gen_R": 0.8333, "gen_F1": 0.5405, "missed": ["alpha_channel", "fingers"], "extra": ["black_body", "black_fur", "business_attire", "formal", "hair_bun", "holding_mug", "holding_object", "mug", "necktie", "teal_shirt", "teal_topwear", "text", "topwear", "vest", "white_necktie"], "ground_truth_tags": ["alpha_channel", "anthro", "clothed", "clothing", "felid", "feline", "fingers", "fur", "hair", "male", "mammal", "solo"], "selected_tags": ["anthro", "black_body", "black_fur", "business_attire", "clothed", "clothing", "felid", "feline", "formal", "fur", "hair", "hair_bun", "holding_mug", "holding_object", "male", "mammal", "mug", "necktie", "solo", "teal_shirt", "teal_topwear", "text", "topwear", "vest", "white_necktie"], "stage3_selected": ["black_fur", "business_attire", "feline", "formal", "fur", "hair_bun", "holding_mug", "invalid_background", "necktie", "simple_background", "teal_shirt", "vest", "white_necktie"], "stage3_selected_scores": {"fur": 0.7146, "simple_background": 0.6978, "black_fur": 0.7183, "necktie": 0.7314, "vest": 0.8403, "hair_bun": 0.6926, "holding_mug": 0.916, "formal": 0.5993, "business_attire": 0.5558, "teal_shirt": 0.7474, "white_necktie": 0.6418, "feline": 0.7062, "invalid_background": 0.6495}, "stage3_selected_ranks": {"fur": 9, "simple_background": 11, "black_fur": 8, "necktie": 7, "vest": 3, "hair_bun": 12, "holding_mug": 1, "formal": 16, "business_attire": 18, "teal_shirt": 6, "white_necktie": 14, "feline": 10, "invalid_background": 13}, "stage3_selected_phrase_ranks": {"fur": 1, "simple_background": 1, "black_fur": 1, "necktie": 1, "vest": 1, "hair_bun": 1, "holding_mug": 1, "formal": 1, "business_attire": 1, "teal_shirt": 1, "white_necktie": 1, "feline": 1, "invalid_background": 1}, "extra_evidence": {"black_body": {"source": "implied"}, "black_fur": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7183}, "business_attire": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5558}, "formal": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5993}, "hair_bun": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6926}, "holding_mug": {"source": "stage3", "why": "explicit", "retrieval_score": 0.916}, "holding_object": {"source": "implied"}, "mug": {"source": "implied"}, "necktie": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7314}, "teal_shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7474}, "teal_topwear": {"source": "implied"}, "text": {"source": "probe"}, "topwear": {"source": "implied"}, "vest": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8403}, "white_necktie": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6418}}, "structural": ["solo", "anthro", "male", "clothed"], "probe": ["clothing", "anthro", "text", "felid", "solo"], "t1": 4.11, "t2": 3.46, "t3": 3.15, "t3s": 5.64, "t3p": 8.43, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=19 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
| 3 |
+
{"id": 260449, "n_gt": 14, "n_retrieved": 18, "n_selected": 24, "n_implied": 5, "n_structural": 7, "n_probe": 5, "ret_R": 0.5, "P": 0.4583, "R": 0.7857, "F1": 0.5789, "leaf_P": 0.3125, "leaf_R": 0.5, "leaf_F1": 0.3846, "n_leaf_sel": 16, "n_leaf_gt": 10, "ret_P": 0.3889, "sel_given_ret": 1.5714, "over_sel": 1.71, "why": {"explicit": 13}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 17, "attempts_by_n_local": {"21": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.4583, "gen_R": 0.7857, "gen_F1": 0.5789, "missed": ["fur", "hair", "human"], "extra": ["anthro", "bottomwear", "cheeky", "duo", "feral", "grin", "laugh", "loincloth", "raised_arms", "smile", "topless", "trio", "wide_grin"], "ground_truth_tags": ["ape", "bear", "clothed", "clothing", "dancing", "fur", "group", "hair", "haplorhine", "human", "looking_at_viewer", "male", "mammal", "primate"], "selected_tags": ["anthro", "ape", "bear", "bottomwear", "cheeky", "clothed", "clothing", "dancing", "duo", "feral", "grin", "group", "haplorhine", "laugh", "loincloth", "looking_at_viewer", "male", "mammal", "primate", "raised_arms", "smile", "topless", "trio", "wide_grin"], "stage3_selected": ["ape", "bear", "cheeky", "dancing", "grin", "laugh", "loincloth", "looking_at_viewer", "male", "primate", "raised_arms", "simple_background", "wide_grin"], "stage3_selected_scores": {"male": 0.5604, "simple_background": 0.5491, "looking_at_viewer": 0.5475, "bear": 0.5735, "grin": 0.5653, "primate": 0.8905, "loincloth": 0.5685, "dancing": 0.5568, "laugh": 0.5259, "ape": 0.9767, "raised_arms": 0.5445, "cheeky": 0.3903, "wide_grin": 0.5267}, "stage3_selected_ranks": {"male": 6, "simple_background": 8, "looking_at_viewer": 9, "bear": 3, "grin": 5, "primate": 2, "loincloth": 4, "dancing": 7, "laugh": 13, "ape": 1, "raised_arms": 10, "cheeky": 20, "wide_grin": 12}, "stage3_selected_phrase_ranks": {"male": 1, "simple_background": 1, "looking_at_viewer": 1, "bear": 1, "grin": 1, "primate": 1, "loincloth": 1, "dancing": 1, "laugh": 1, "ape": 1, "raised_arms": 1, "cheeky": 1, "wide_grin": 1}, "extra_evidence": {"anthro": {"source": "structural"}, "bottomwear": {"source": "implied"}, "cheeky": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3903}, "duo": {"source": "probe"}, "feral": {"source": "structural"}, "grin": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5653}, "laugh": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5259}, "loincloth": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5685}, "raised_arms": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5445}, "smile": {"source": "implied"}, "topless": {"source": "structural"}, "trio": {"source": "structural"}, "wide_grin": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5267}}, "structural": ["trio", "anthro", "feral", "male", "clothed", "topless", "looking_at_viewer"], "probe": ["anthro", "duo", "group", "bear", "simple_background"], "t1": 3.2, "t2": 4.2, "t3": 9.91, "t3s": 4.45, "t3p": 5.29, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=21 entity=0 copyright_filtered=0 generic_char_to_general=1 unknown_type=2"]}
|
| 4 |
+
{"id": 1078019, "n_gt": 14, "n_retrieved": 15, "n_selected": 23, "n_implied": 5, "n_structural": 4, "n_probe": 5, "ret_R": 0.3571, "P": 0.6087, "R": 1.0, "F1": 0.7568, "leaf_P": 0.5294, "leaf_R": 1.0, "leaf_F1": 0.6923, "n_leaf_sel": 17, "n_leaf_gt": 9, "ret_P": 0.3333, "sel_given_ret": 2.8, "over_sel": 1.64, "why": {"explicit": 5, "strong_implied": 6}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 13, "attempts_by_n_local": {"16": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.6087, "gen_R": 1.0, "gen_F1": 0.7568, "missed": [], "extra": ["<3", "coat", "eyes", "intimate", "looking_at_viewer", "relationship", "round_eyes", "setting", "topwear"], "ground_truth_tags": ["anthro", "blue_eyes", "blush", "clothed", "clothing", "duo", "lagomorph", "leporid", "mammal", "plushie", "rabbit", "romantic", "romantic_couple", "teal_eyes"], "selected_tags": ["<3", "anthro", "blue_eyes", "blush", "clothed", "clothing", "coat", "duo", "eyes", "intimate", "lagomorph", "leporid", "looking_at_viewer", "mammal", "plushie", "rabbit", "relationship", "romantic", "romantic_couple", "round_eyes", "setting", "teal_eyes", "topwear"], "stage3_selected": ["blue_eyes", "coat", "eyes", "intimate", "plushie", "rabbit", "relationship", "romantic_couple", "round_eyes", "setting", "teal_eyes"], "stage3_selected_scores": {"blue_eyes": 0.6105, "coat": 0.6317, "plushie": 0.6568, "teal_eyes": 0.6345, "relationship": 0.6088, "rabbit": 0.5844, "romantic_couple": 0.5619, "intimate": 0.4706, "round_eyes": 0.4982, "setting": 0.5515, "eyes": 0.913}, "stage3_selected_ranks": {"blue_eyes": 7, "coat": 5, "plushie": 3, "teal_eyes": 4, "relationship": 8, "rabbit": 9, "romantic_couple": 11, "intimate": 16, "round_eyes": 15, "setting": 12, "eyes": 1}, "stage3_selected_phrase_ranks": {"blue_eyes": 1, "coat": 1, "plushie": 1, "teal_eyes": 1, "relationship": 1, "rabbit": 1, "romantic_couple": 1, "intimate": 1, "round_eyes": 1, "setting": 1, "eyes": 1}, "extra_evidence": {"<3": {"source": "probe"}, "coat": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6317}, "eyes": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.913}, "intimate": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.4706}, "looking_at_viewer": {"source": "structural"}, "relationship": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6088}, "round_eyes": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.4982}, "setting": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.5515}, "topwear": {"source": "implied"}}, "structural": ["duo", "anthro", "clothed", "looking_at_viewer"], "probe": ["clothing", "anthro", "blush", "duo", "<3"], "t1": 3.09, "t2": 4.27, "t3": 2.74, "t3s": 4.59, "t3p": 7.55, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=16 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
| 5 |
+
{"id": 1624724, "n_gt": 4, "n_retrieved": 12, "n_selected": 13, "n_implied": 1, "n_structural": 3, "n_probe": 3, "ret_R": 0.75, "P": 0.3077, "R": 1.0, "F1": 0.4706, "leaf_P": 0.3333, "leaf_R": 1.0, "leaf_F1": 0.5, "n_leaf_sel": 12, "n_leaf_gt": 4, "ret_P": 0.25, "sel_given_ret": 1.3333, "over_sel": 3.25, "why": {"explicit": 11}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 13, "attempts_by_n_local": {"15": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.3077, "gen_R": 1.0, "gen_F1": 0.4706, "missed": [], "extra": ["anthro", "clothed", "clothing", "eyes", "floating", "nose", "spots", "toony", "unknown_species"], "ground_truth_tags": ["red_nose", "smile", "solo", "tan_body"], "selected_tags": ["anthro", "clothed", "clothing", "eyes", "floating", "nose", "red_nose", "smile", "solo", "spots", "tan_body", "toony", "unknown_species"], "stage3_selected": ["eyes", "floating", "invalid_tag", "nose", "red_nose", "smile", "spots", "tan_body", "toony", "unknown_species", "white_background"], "stage3_selected_scores": {"smile": 0.5956, "white_background": 0.6072, "tan_body": 0.6582, "spots": 0.6224, "toony": 0.5172, "unknown_species": 0.5802, "red_nose": 0.7475, "floating": 0.6454, "invalid_tag": 0.5285, "nose": 0.8611, "eyes": 0.9242}, "stage3_selected_ranks": {"smile": 10, "white_background": 9, "tan_body": 5, "spots": 8, "toony": 14, "unknown_species": 11, "red_nose": 3, "floating": 6, "invalid_tag": 13, "nose": 2, "eyes": 1}, "stage3_selected_phrase_ranks": {"smile": 1, "white_background": 1, "tan_body": 1, "spots": 1, "toony": 1, "unknown_species": 1, "red_nose": 1, "floating": 1, "invalid_tag": 1, "nose": 1, "eyes": 1}, "extra_evidence": {"anthro": {"source": "structural"}, "clothed": {"source": "structural"}, "clothing": {"source": "implied"}, "eyes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.9242}, "floating": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6454}, "nose": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8611}, "spots": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6224}, "toony": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5172}, "unknown_species": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5802}}, "structural": ["solo", "anthro", "clothed"], "probe": ["simple_background", "anthro", "solo"], "t1": 2.91, "t2": 4.8, "t3": 6.45, "t3s": 4.59, "t3p": 5.4, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=15 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=4"]}
|
| 6 |
+
{"id": 1325009, "n_gt": 22, "n_retrieved": 18, "n_selected": 22, "n_implied": 6, "n_structural": 5, "n_probe": 3, "ret_R": 0.1818, "P": 0.6818, "R": 0.6818, "F1": 0.6818, "leaf_P": 0.3846, "leaf_R": 0.4167, "leaf_F1": 0.4, "n_leaf_sel": 13, "n_leaf_gt": 12, "ret_P": 0.2222, "sel_given_ret": 3.75, "over_sel": 1.0, "why": {"explicit": 10}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 15, "attempts_by_n_local": {"18": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.6818, "gen_R": 0.6818, "gen_F1": 0.6818, "missed": ["chest_tuft", "countershading", "hand_on_head", "muscular", "muscular_anthro", "muscular_male", "tuft"], "extra": ["countershade_body", "playful", "pose", "raised_hand", "striped_body", "striped_fur", "white_chest"], "ground_truth_tags": ["anthro", "blue_eyes", "bottomwear", "chest_tuft", "clothed", "clothing", "countershading", "felid", "fur", "hand_on_head", "male", "mammal", "muscular", "muscular_anthro", "muscular_male", "pantherine", "shorts", "solo", "stripes", "tiger", "topless", "tuft"], "selected_tags": ["anthro", "blue_eyes", "bottomwear", "clothed", "clothing", "countershade_body", "felid", "fur", "male", "mammal", "pantherine", "playful", "pose", "raised_hand", "shorts", "solo", "striped_body", "striped_fur", "stripes", "tiger", "topless", "white_chest"], "stage3_selected": ["blue_eyes", "countershade_body", "fur", "playful", "pose", "raised_hand", "shorts", "striped_fur", "tiger", "white_chest"], "stage3_selected_scores": {"fur": 0.5959, "blue_eyes": 0.5842, "pose": 0.6367, "shorts": 0.5939, "tiger": 0.6053, "striped_fur": 0.655, "raised_hand": 0.7024, "playful": 0.4435, "white_chest": 0.9243, "countershade_body": 0.8719}, "stage3_selected_ranks": {"fur": 12, "blue_eyes": 14, "pose": 10, "shorts": 13, "tiger": 11, "striped_fur": 9, "raised_hand": 6, "playful": 19, "white_chest": 2, "countershade_body": 3}, "stage3_selected_phrase_ranks": {"fur": 1, "blue_eyes": 1, "pose": 1, "shorts": 1, "tiger": 1, "striped_fur": 1, "raised_hand": 1, "playful": 1, "white_chest": 1, "countershade_body": 1}, "extra_evidence": {"countershade_body": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8719}, "playful": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4435}, "pose": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6367}, "raised_hand": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7024}, "striped_body": {"source": "implied"}, "striped_fur": {"source": "stage3", "why": "explicit", "retrieval_score": 0.655}, "white_chest": {"source": "stage3", "why": "explicit", "retrieval_score": 0.9243}}, "structural": ["solo", "anthro", "male", "clothed", "topless"], "probe": ["anthro", "felid", "solo"], "t1": 5.38, "t2": 1.5, "t3": 1.51, "t3s": 0.61, "t3p": 6.85, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=18 entity=0 copyright_filtered=1 generic_char_to_general=0 unknown_type=2"]}
|
| 7 |
+
{"id": 1023509, "n_gt": 13, "n_retrieved": 19, "n_selected": 9, "n_implied": 0, "n_structural": 5, "n_probe": 4, "ret_R": 0.2308, "P": 0.1111, "R": 0.0769, "F1": 0.0909, "leaf_P": 0.125, "leaf_R": 0.1667, "leaf_F1": 0.1429, "n_leaf_sel": 8, "n_leaf_gt": 6, "ret_P": 0.1579, "sel_given_ret": 0.3333, "over_sel": 0.69, "why": {"explicit": 2}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 8, "attempts_by_n_local": {"19": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.1111, "gen_R": 0.0769, "gen_F1": 0.0909, "missed": ["bovid", "caprine", "dialogue", "fur", "goat", "human", "lizard", "mammal", "reptile", "scalie", "white_body", "white_fur"], "extra": ["anthro", "clothed", "clothing", "group", "intersex", "light", "speech_bubble", "taur"], "ground_truth_tags": ["bovid", "caprine", "dialogue", "fur", "goat", "human", "lizard", "mammal", "reptile", "scalie", "text", "white_body", "white_fur"], "selected_tags": ["anthro", "clothed", "clothing", "group", "intersex", "light", "speech_bubble", "taur", "text"], "stage3_selected": ["light", "speech_bubble"], "stage3_selected_scores": {"speech_bubble": 0.5783, "light": 0.5852}, "stage3_selected_ranks": {"speech_bubble": 9, "light": 6}, "stage3_selected_phrase_ranks": {"speech_bubble": 1, "light": 1}, "extra_evidence": {"anthro": {"source": "probe"}, "clothed": {"source": "structural"}, "clothing": {"source": "probe"}, "group": {"source": "structural"}, "intersex": {"source": "structural"}, "light": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5852}, "speech_bubble": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5783}, "taur": {"source": "structural"}}, "structural": ["group", "taur", "intersex", "clothed", "text"], "probe": ["clothing", "anthro", "text", "group"], "t1": 3.11, "t2": 1.63, "t3": 1.05, "t3s": 1.38, "t3p": 4.03, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=19 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0"]}
|
| 8 |
+
{"id": 335343, "n_gt": 15, "n_retrieved": 21, "n_selected": 16, "n_implied": 5, "n_structural": 3, "n_probe": 3, "ret_R": 0.5333, "P": 0.6875, "R": 0.7333, "F1": 0.7097, "leaf_P": 0.5455, "leaf_R": 0.5, "leaf_F1": 0.5217, "n_leaf_sel": 11, "n_leaf_gt": 12, "ret_P": 0.381, "sel_given_ret": 1.375, "over_sel": 1.07, "why": {"explicit": 8}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 15, "attempts_by_n_local": {"22": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.6875, "gen_R": 0.7333, "gen_F1": 0.7097, "missed": ["angry", "eyes_closed", "eyeshadow", "sleeping"], "extra": ["annoyed_expression", "anthro", "humanoid", "lying_on_bed", "on_bed"], "ground_truth_tags": ["angry", "bed", "blonde_hair", "blue_eyes", "duo", "eyes_closed", "eyeshadow", "furniture", "green_eyes", "hair", "lying", "makeup", "purple_hair", "sleeping", "text"], "selected_tags": ["annoyed_expression", "anthro", "bed", "blonde_hair", "blue_eyes", "duo", "furniture", "green_eyes", "hair", "humanoid", "lying", "lying_on_bed", "makeup", "on_bed", "purple_hair", "text"], "stage3_selected": ["annoyed_expression", "blonde_hair", "blue_eyes", "green_eyes", "lying_on_bed", "makeup", "purple_hair", "text"], "stage3_selected_scores": {"text": 0.6007, "blue_eyes": 0.6014, "green_eyes": 0.5989, "blonde_hair": 0.5986, "purple_hair": 0.5642, "makeup": 0.5965, "lying_on_bed": 0.4241, "annoyed_expression": 0.7251}, "stage3_selected_ranks": {"text": 8, "blue_eyes": 7, "green_eyes": 9, "blonde_hair": 10, "purple_hair": 13, "makeup": 11, "lying_on_bed": 21, "annoyed_expression": 2}, "stage3_selected_phrase_ranks": {"text": 1, "blue_eyes": 1, "green_eyes": 1, "blonde_hair": 1, "purple_hair": 1, "makeup": 1, "lying_on_bed": 1, "annoyed_expression": 1}, "extra_evidence": {"annoyed_expression": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7251}, "anthro": {"source": "probe"}, "humanoid": {"source": "structural"}, "lying_on_bed": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4241}, "on_bed": {"source": "implied"}}, "structural": ["duo", "humanoid", "text"], "probe": ["simple_background", "anthro", "duo"], "t1": 3.99, "t2": 1.96, "t3": 4.0, "t3s": 2.23, "t3p": 1.34, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=22 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
| 9 |
+
{"id": 17482, "n_gt": 22, "n_retrieved": 16, "n_selected": 17, "n_implied": 6, "n_structural": 3, "n_probe": 3, "ret_R": 0.1818, "P": 0.6471, "R": 0.5, "F1": 0.5641, "leaf_P": 0.5, "leaf_R": 0.3846, "leaf_F1": 0.4348, "n_leaf_sel": 10, "n_leaf_gt": 13, "ret_P": 0.25, "sel_given_ret": 2.75, "over_sel": 0.77, "why": {"explicit": 7}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 11, "attempts_by_n_local": {"18": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.6471, "gen_R": 0.5, "gen_F1": 0.5641, "missed": ["bass_guitar", "fingers", "fur", "guitar", "holding_musical_instrument", "holding_object", "music", "musical_instrument", "plucked_string_instrument", "string_instrument", "torn_clothing"], "extra": ["bass_(disambiguation)", "canis", "flowing_hair", "pastel_background", "playing_bass", "wolf"], "ground_truth_tags": ["anthro", "bass_guitar", "canid", "canine", "claws", "clothed", "clothing", "fingers", "fur", "guitar", "hair", "holding_musical_instrument", "holding_object", "mammal", "music", "musical_instrument", "plucked_string_instrument", "solo", "spade_tail", "string_instrument", "tail", "torn_clothing"], "selected_tags": ["anthro", "bass_(disambiguation)", "canid", "canine", "canis", "claws", "clothed", "clothing", "flowing_hair", "hair", "mammal", "pastel_background", "playing_bass", "solo", "spade_tail", "tail", "wolf"], "stage3_selected": ["bass_(disambiguation)", "claws", "flowing_hair", "pastel_background", "playing_bass", "spade_tail", "wolf"], "stage3_selected_scores": {"claws": 0.6305, "wolf": 0.5983, "spade_tail": 0.872, "flowing_hair": 0.7019, "bass_(disambiguation)": 0.5206, "playing_bass": 0.5052, "pastel_background": 0.6263}, "stage3_selected_ranks": {"claws": 5, "wolf": 9, "spade_tail": 1, "flowing_hair": 2, "bass_(disambiguation)": 12, "playing_bass": 13, "pastel_background": 6}, "stage3_selected_phrase_ranks": {"claws": 1, "wolf": 1, "spade_tail": 1, "flowing_hair": 1, "bass_(disambiguation)": 1, "playing_bass": 1, "pastel_background": 1}, "extra_evidence": {"bass_(disambiguation)": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5206}, "canis": {"source": "implied"}, "flowing_hair": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7019}, "pastel_background": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6263}, "playing_bass": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5052}, "wolf": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5983}}, "structural": ["solo", "anthro", "clothed"], "probe": ["anthro", "canid", "solo"], "t1": 1.3, "t2": 1.38, "t3": 2.28, "t3s": 1.42, "t3p": 3.55, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=18 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=3"]}
|
| 10 |
+
{"id": 2021552, "n_gt": 25, "n_retrieved": 17, "n_selected": 29, "n_implied": 13, "n_structural": 4, "n_probe": 3, "ret_R": 0.44, "P": 0.6897, "R": 0.8, "F1": 0.7407, "leaf_P": 0.6667, "leaf_R": 0.6667, "leaf_F1": 0.6667, "n_leaf_sel": 15, "n_leaf_gt": 15, "ret_P": 0.6471, "sel_given_ret": 1.8182, "over_sel": 1.16, "why": {"explicit": 12}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 13, "attempts_by_n_local": {"19": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.6897, "gen_R": 0.8, "gen_F1": 0.7407, "missed": ["grey_background", "lagomorph", "leporid", "looking_at_another", "rabbit"], "extra": ["black_bottomwear", "black_clothing", "black_pants", "blue_overalls", "looking_at_viewer", "open_mouth", "white_clothing", "white_shirt", "white_topwear"], "ground_truth_tags": ["anthro", "bottomwear", "canid", "canine", "claws", "clothed", "clothing", "crossed_arms", "duo", "facial_markings", "fox", "fur", "grey_background", "head_markings", "lagomorph", "leporid", "looking_at_another", "mammal", "markings", "overalls", "pants", "rabbit", "shirt", "standing", "topwear"], "selected_tags": ["anthro", "black_bottomwear", "black_clothing", "black_pants", "blue_overalls", "bottomwear", "canid", "canine", "claws", "clothed", "clothing", "crossed_arms", "duo", "facial_markings", "fox", "fur", "head_markings", "looking_at_viewer", "mammal", "markings", "open_mouth", "overalls", "pants", "shirt", "standing", "topwear", "white_clothing", "white_shirt", "white_topwear"], "stage3_selected": ["black_pants", "blue_overalls", "claws", "crossed_arms", "facial_markings", "fox", "fur", "open_mouth", "overalls", "shirt", "standing", "white_shirt"], "stage3_selected_scores": {"fur": 0.647, "open_mouth": 0.6268, "claws": 0.5818, "standing": 0.681, "fox": 0.634, "shirt": 0.7434, "facial_markings": 0.6877, "crossed_arms": 0.7223, "white_shirt": 0.8155, "overalls": 0.8759, "black_pants": 0.8282, "blue_overalls": 0.9189}, "stage3_selected_ranks": {"fur": 12, "open_mouth": 14, "claws": 17, "standing": 9, "fox": 13, "shirt": 6, "facial_markings": 8, "crossed_arms": 7, "white_shirt": 4, "overalls": 2, "black_pants": 3, "blue_overalls": 1}, "stage3_selected_phrase_ranks": {"fur": 1, "open_mouth": 1, "claws": 1, "standing": 1, "fox": 1, "shirt": 1, "facial_markings": 1, "crossed_arms": 1, "white_shirt": 1, "overalls": 1, "black_pants": 1, "blue_overalls": 1}, "extra_evidence": {"black_bottomwear": {"source": "implied"}, "black_clothing": {"source": "implied"}, "black_pants": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8282}, "blue_overalls": {"source": "stage3", "why": "explicit", "retrieval_score": 0.9189}, "looking_at_viewer": {"source": "structural"}, "open_mouth": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6268}, "white_clothing": {"source": "implied"}, "white_shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8155}, "white_topwear": {"source": "implied"}}, "structural": ["duo", "anthro", "clothed", "looking_at_viewer"], "probe": ["simple_background", "anthro", "duo"], "t1": 1.05, "t2": 1.43, "t3": 2.54, "t3s": 0.62, "t3p": 2.58, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=19 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
| 11 |
+
{"id": 2034167, "n_gt": 11, "n_retrieved": 21, "n_selected": 21, "n_implied": 9, "n_structural": 4, "n_probe": 3, "ret_R": 0.5455, "P": 0.381, "R": 0.7273, "F1": 0.5, "leaf_P": 0.3636, "leaf_R": 0.5714, "leaf_F1": 0.4444, "n_leaf_sel": 11, "n_leaf_gt": 7, "ret_P": 0.2857, "sel_given_ret": 1.3333, "over_sel": 1.91, "why": {"explicit": 6, "strong_implied": 2}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 16, "attempts_by_n_local": {"24": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.381, "gen_R": 0.7273, "gen_F1": 0.5, "missed": ["open_mouth", "white_body", "white_fur"], "extra": ["action_pose", "ambiguous_gender", "animal_humanoid", "anthro", "canid_humanoid", "canine_humanoid", "clothed", "clothing", "humanoid", "jumping", "mammal_humanoid", "pose", "topless"], "ground_truth_tags": ["blue_eyes", "blue_nose", "canid", "canine", "fur", "mammal", "open_mouth", "purple_body", "solo", "white_body", "white_fur"], "selected_tags": ["action_pose", "ambiguous_gender", "animal_humanoid", "anthro", "blue_eyes", "blue_nose", "canid", "canid_humanoid", "canine", "canine_humanoid", "clothed", "clothing", "fur", "humanoid", "jumping", "mammal", "mammal_humanoid", "pose", "purple_body", "solo", "topless"], "stage3_selected": ["action_pose", "blue_eyes", "blue_nose", "canine_humanoid", "fur", "jumping", "purple_body", "simple_background"], "stage3_selected_scores": {"fur": 0.5679, "simple_background": 0.5795, "blue_eyes": 0.5832, "purple_body": 0.5484, "canine_humanoid": 0.9129, "blue_nose": 0.5927, "action_pose": 0.5954, "jumping": 0.5819}, "stage3_selected_ranks": {"fur": 16, "simple_background": 13, "blue_eyes": 11, "purple_body": 18, "canine_humanoid": 1, "blue_nose": 8, "action_pose": 7, "jumping": 12}, "stage3_selected_phrase_ranks": {"fur": 1, "simple_background": 1, "blue_eyes": 1, "purple_body": 1, "canine_humanoid": 1, "blue_nose": 1, "action_pose": 1, "jumping": 1}, "extra_evidence": {"action_pose": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.5954}, "ambiguous_gender": {"source": "structural"}, "animal_humanoid": {"source": "implied"}, "anthro": {"source": "structural"}, "canid_humanoid": {"source": "implied"}, "canine_humanoid": {"source": "stage3", "why": "explicit", "retrieval_score": 0.9129}, "clothed": {"source": "implied"}, "clothing": {"source": "implied"}, "humanoid": {"source": "implied"}, "jumping": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.5819}, "mammal_humanoid": {"source": "implied"}, "pose": {"source": "implied"}, "topless": {"source": "structural"}}, "structural": ["solo", "anthro", "ambiguous_gender", "topless"], "probe": ["anthro", "canid", "solo"], "t1": 1.41, "t2": 1.68, "t3": 5.47, "t3s": 1.42, "t3p": 4.61, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=24 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=4"]}
|
data/eval_results/latency_k1_seed43.jsonl
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"_meta": true, "timestamp": "2026-03-02T08:44:09.015467", "n_samples": 10, "caption_field": "caption_cogvlm", "skip_rewrite": false, "allow_nsfw": false, "mode": "chunked_map_union", "chunk_size": 60, "eval_path": "data/eval_samples/e621_sfw_sample_1000_seed123_buffer10000_caption_evident.jsonl", "per_phrase_k": 2, "per_phrase_final_k": 1, "temperature": 0.0, "shuffle": false, "seed": 43, "workers": 4, "min_why": "strong_implied", "expand_implications": true, "infer_structural": true, "infer_probe": true, "n_errors": 0, "n_issue_samples": 10, "n_issues_total": 20}
|
| 2 |
+
{"id": 3285630, "n_gt": 12, "n_retrieved": 15, "n_selected": 24, "n_implied": 6, "n_structural": 4, "n_probe": 5, "ret_R": 0.25, "P": 0.375, "R": 0.75, "F1": 0.5, "leaf_P": 0.3571, "leaf_R": 0.5556, "leaf_F1": 0.4348, "n_leaf_sel": 14, "n_leaf_gt": 9, "ret_P": 0.2, "sel_given_ret": 3.0, "over_sel": 2.0, "why": {"explicit": 11, "strong_implied": 1}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 15, "attempts_by_n_local": {"18": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.375, "gen_R": 0.75, "gen_F1": 0.5, "missed": ["alpha_channel", "fingers", "hair"], "extra": ["black_body", "black_fur", "business_attire", "formal", "holding_mug", "holding_object", "mug", "necktie", "shirt", "teal_shirt", "teal_topwear", "text", "topwear", "vest", "white_necktie"], "ground_truth_tags": ["alpha_channel", "anthro", "clothed", "clothing", "felid", "feline", "fingers", "fur", "hair", "male", "mammal", "solo"], "selected_tags": ["anthro", "black_body", "black_fur", "business_attire", "clothed", "clothing", "felid", "feline", "formal", "fur", "holding_mug", "holding_object", "male", "mammal", "mug", "necktie", "shirt", "solo", "teal_shirt", "teal_topwear", "text", "topwear", "vest", "white_necktie"], "stage3_selected": ["black_fur", "business_attire", "feline", "formal", "holding_mug", "mug", "necktie", "shirt", "simple_background", "teal_shirt", "vest", "white_necktie"], "stage3_selected_scores": {"simple_background": 0.6978, "shirt": 0.7998, "black_fur": 0.7183, "necktie": 0.7314, "vest": 0.8403, "mug": 0.8841, "holding_mug": 0.916, "formal": 0.5993, "business_attire": 0.5558, "teal_shirt": 0.7474, "white_necktie": 0.6418, "feline": 0.7062}, "stage3_selected_ranks": {"simple_background": 11, "shirt": 5, "black_fur": 8, "necktie": 7, "vest": 3, "mug": 2, "holding_mug": 1, "formal": 16, "business_attire": 18, "teal_shirt": 6, "white_necktie": 14, "feline": 10}, "stage3_selected_phrase_ranks": {"simple_background": 1, "shirt": 1, "black_fur": 1, "necktie": 1, "vest": 1, "mug": 1, "holding_mug": 1, "formal": 1, "business_attire": 1, "teal_shirt": 1, "white_necktie": 1, "feline": 1}, "extra_evidence": {"black_body": {"source": "implied"}, "black_fur": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7183}, "business_attire": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5558}, "formal": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5993}, "holding_mug": {"source": "stage3", "why": "explicit", "retrieval_score": 0.916}, "holding_object": {"source": "implied"}, "mug": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8841}, "necktie": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7314}, "shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7998}, "teal_shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7474}, "teal_topwear": {"source": "implied"}, "text": {"source": "probe"}, "topwear": {"source": "implied"}, "vest": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8403}, "white_necktie": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6418}}, "structural": ["solo", "anthro", "male", "clothed"], "probe": ["clothing", "anthro", "text", "felid", "solo"], "t1": 2.86, "t2": 5.08, "t3": 0.66, "t3s": 4.7, "t3p": 7.96, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=18 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
| 3 |
+
{"id": 260449, "n_gt": 14, "n_retrieved": 18, "n_selected": 19, "n_implied": 5, "n_structural": 6, "n_probe": 4, "ret_R": 0.5, "P": 0.5263, "R": 0.7143, "F1": 0.6061, "leaf_P": 0.3333, "leaf_R": 0.4, "leaf_F1": 0.3636, "n_leaf_sel": 12, "n_leaf_gt": 10, "ret_P": 0.3889, "sel_given_ret": 1.4286, "over_sel": 1.36, "why": {"explicit": 9}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 13, "attempts_by_n_local": {"21": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.5263, "gen_R": 0.7143, "gen_F1": 0.6061, "missed": ["fur", "hair", "human", "male"], "extra": ["anthro", "cheeky", "grin", "humanoid", "laugh", "raised_arms", "smile", "topless", "trio"], "ground_truth_tags": ["ape", "bear", "clothed", "clothing", "dancing", "fur", "group", "hair", "haplorhine", "human", "looking_at_viewer", "male", "mammal", "primate"], "selected_tags": ["anthro", "ape", "bear", "cheeky", "clothed", "clothing", "dancing", "grin", "group", "haplorhine", "humanoid", "laugh", "looking_at_viewer", "mammal", "primate", "raised_arms", "smile", "topless", "trio"], "stage3_selected": ["ape", "bear", "cheeky", "dancing", "grin", "laugh", "looking_at_viewer", "raised_arms", "simple_background"], "stage3_selected_scores": {"simple_background": 0.5491, "looking_at_viewer": 0.5483, "bear": 0.5736, "grin": 0.5653, "dancing": 0.5576, "laugh": 0.526, "ape": 0.9767, "raised_arms": 0.5461, "cheeky": 0.3905}, "stage3_selected_ranks": {"simple_background": 8, "looking_at_viewer": 9, "bear": 3, "grin": 5, "dancing": 7, "laugh": 13, "ape": 1, "raised_arms": 10, "cheeky": 20}, "stage3_selected_phrase_ranks": {"simple_background": 1, "looking_at_viewer": 1, "bear": 1, "grin": 1, "dancing": 1, "laugh": 1, "ape": 1, "raised_arms": 1, "cheeky": 1}, "extra_evidence": {"anthro": {"source": "structural"}, "cheeky": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3905}, "grin": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5653}, "humanoid": {"source": "structural"}, "laugh": {"source": "stage3", "why": "explicit", "retrieval_score": 0.526}, "raised_arms": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5461}, "smile": {"source": "implied"}, "topless": {"source": "structural"}, "trio": {"source": "structural"}}, "structural": ["trio", "anthro", "humanoid", "clothed", "topless", "looking_at_viewer"], "probe": ["simple_background", "anthro", "group", "bear"], "t1": 5.6, "t2": 7.33, "t3": 5.32, "t3s": 4.65, "t3p": 5.08, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=21 entity=0 copyright_filtered=0 generic_char_to_general=1 unknown_type=2"]}
|
| 4 |
+
{"id": 1078019, "n_gt": 14, "n_retrieved": 14, "n_selected": 15, "n_implied": 3, "n_structural": 4, "n_probe": 4, "ret_R": 0.3571, "P": 0.6667, "R": 0.7143, "F1": 0.6897, "leaf_P": 0.6667, "leaf_R": 0.8889, "leaf_F1": 0.7619, "n_leaf_sel": 12, "n_leaf_gt": 9, "ret_P": 0.3571, "sel_given_ret": 2.0, "over_sel": 1.07, "why": {"explicit": 4, "strong_implied": 2}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 12, "attempts_by_n_local": {"15": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.6667, "gen_R": 0.7143, "gen_F1": 0.6897, "missed": ["lagomorph", "leporid", "mammal", "rabbit"], "extra": ["<3", "coat", "looking_at_viewer", "relationship", "topwear"], "ground_truth_tags": ["anthro", "blue_eyes", "blush", "clothed", "clothing", "duo", "lagomorph", "leporid", "mammal", "plushie", "rabbit", "romantic", "romantic_couple", "teal_eyes"], "selected_tags": ["<3", "anthro", "blue_eyes", "blush", "clothed", "clothing", "coat", "duo", "looking_at_viewer", "plushie", "relationship", "romantic", "romantic_couple", "teal_eyes", "topwear"], "stage3_selected": ["blue_eyes", "coat", "plushie", "relationship", "romantic_couple", "teal_eyes"], "stage3_selected_scores": {"blue_eyes": 0.6105, "coat": 0.6317, "plushie": 0.6568, "teal_eyes": 0.6345, "romantic_couple": 0.5619, "relationship": 0.6088}, "stage3_selected_ranks": {"blue_eyes": 7, "coat": 5, "plushie": 3, "teal_eyes": 4, "romantic_couple": 11, "relationship": 8}, "stage3_selected_phrase_ranks": {"blue_eyes": 1, "coat": 1, "plushie": 1, "teal_eyes": 1, "romantic_couple": 1, "relationship": 1}, "extra_evidence": {"<3": {"source": "probe"}, "coat": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6317}, "looking_at_viewer": {"source": "structural"}, "relationship": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.6088}, "topwear": {"source": "implied"}}, "structural": ["duo", "anthro", "clothed", "looking_at_viewer"], "probe": ["anthro", "blush", "duo", "<3"], "t1": 3.45, "t2": 6.07, "t3": 3.22, "t3s": 6.61, "t3p": 5.75, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=15 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
| 5 |
+
{"id": 1624724, "n_gt": 4, "n_retrieved": 13, "n_selected": 15, "n_implied": 0, "n_structural": 4, "n_probe": 4, "ret_R": 0.75, "P": 0.2, "R": 0.75, "F1": 0.3158, "leaf_P": 0.2, "leaf_R": 0.75, "leaf_F1": 0.3158, "n_leaf_sel": 15, "n_leaf_gt": 4, "ret_P": 0.2308, "sel_given_ret": 1.0, "over_sel": 3.75, "why": {"explicit": 9}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 10, "attempts_by_n_local": {"16": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.2, "gen_R": 0.75, "gen_F1": 0.3158, "missed": ["smile"], "extra": ["<3", "ambiguous_gender", "anthro", "cartoon", "clothing", "eyes", "feral", "floating", "nose", "nude", "round_eyes", "spots"], "ground_truth_tags": ["red_nose", "smile", "solo", "tan_body"], "selected_tags": ["<3", "ambiguous_gender", "anthro", "cartoon", "clothing", "eyes", "feral", "floating", "nose", "nude", "red_nose", "round_eyes", "solo", "spots", "tan_body"], "stage3_selected": ["cartoon", "eyes", "floating", "nose", "red_nose", "round_eyes", "spots", "tan_body", "white_background"], "stage3_selected_scores": {"white_background": 0.6267, "tan_body": 0.6777, "spots": 0.6331, "red_nose": 0.7461, "floating": 0.6778, "round_eyes": 0.8856, "cartoon": 0.514, "nose": 0.8851, "eyes": 0.929}, "stage3_selected_ranks": {"white_background": 11, "tan_body": 7, "spots": 10, "red_nose": 4, "floating": 6, "round_eyes": 2, "cartoon": 15, "nose": 3, "eyes": 1}, "stage3_selected_phrase_ranks": {"white_background": 1, "tan_body": 1, "spots": 1, "red_nose": 1, "floating": 1, "round_eyes": 1, "cartoon": 1, "nose": 1, "eyes": 1}, "extra_evidence": {"<3": {"source": "probe"}, "ambiguous_gender": {"source": "structural"}, "anthro": {"source": "probe"}, "cartoon": {"source": "stage3", "why": "explicit", "retrieval_score": 0.514}, "clothing": {"source": "probe"}, "eyes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.929}, "feral": {"source": "structural"}, "floating": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6778}, "nose": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8851}, "nude": {"source": "structural"}, "round_eyes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8856}, "spots": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6331}}, "structural": ["solo", "feral", "ambiguous_gender", "nude"], "probe": ["anthro", "simple_background", "clothing", "<3"], "t1": 3.09, "t2": 6.6, "t3": 6.66, "t3s": 5.75, "t3p": 5.14, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=16 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=5"]}
|
| 6 |
+
{"id": 1325009, "n_gt": 22, "n_retrieved": 18, "n_selected": 20, "n_implied": 6, "n_structural": 5, "n_probe": 3, "ret_R": 0.2273, "P": 0.65, "R": 0.5909, "F1": 0.619, "leaf_P": 0.2857, "leaf_R": 0.3333, "leaf_F1": 0.3077, "n_leaf_sel": 14, "n_leaf_gt": 12, "ret_P": 0.2778, "sel_given_ret": 2.6, "over_sel": 0.91, "why": {"explicit": 5, "strong_implied": 3}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 11, "attempts_by_n_local": {"18": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.65, "gen_R": 0.5909, "gen_F1": 0.619, "missed": ["chest_tuft", "countershading", "muscular", "muscular_anthro", "muscular_male", "pantherine", "tiger", "topless", "tuft"], "extra": ["looking_at_viewer", "muscular_arms", "playful", "pose", "striped_body", "striped_fur", "white_chest"], "ground_truth_tags": ["anthro", "blue_eyes", "bottomwear", "chest_tuft", "clothed", "clothing", "countershading", "felid", "fur", "hand_on_head", "male", "mammal", "muscular", "muscular_anthro", "muscular_male", "pantherine", "shorts", "solo", "stripes", "tiger", "topless", "tuft"], "selected_tags": ["anthro", "blue_eyes", "bottomwear", "clothed", "clothing", "felid", "fur", "hand_on_head", "looking_at_viewer", "male", "mammal", "muscular_arms", "playful", "pose", "shorts", "solo", "striped_body", "striped_fur", "stripes", "white_chest"], "stage3_selected": ["blue_eyes", "hand_on_head", "muscular_arms", "playful", "pose", "shorts", "striped_fur", "white_chest"], "stage3_selected_scores": {"blue_eyes": 0.5717, "shorts": 0.5785, "striped_fur": 0.6385, "hand_on_head": 0.5932, "white_chest": 0.9198, "pose": 0.6235, "muscular_arms": 0.7948, "playful": 0.4236}, "stage3_selected_ranks": {"blue_eyes": 14, "shorts": 13, "striped_fur": 8, "hand_on_head": 11, "white_chest": 2, "pose": 9, "muscular_arms": 4, "playful": 19}, "stage3_selected_phrase_ranks": {"blue_eyes": 1, "shorts": 1, "striped_fur": 1, "hand_on_head": 1, "white_chest": 1, "pose": 1, "muscular_arms": 1, "playful": 1}, "extra_evidence": {"looking_at_viewer": {"source": "structural"}, "muscular_arms": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.7948}, "playful": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.4236}, "pose": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.6235}, "striped_body": {"source": "implied"}, "striped_fur": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6385}, "white_chest": {"source": "stage3", "why": "explicit", "retrieval_score": 0.9198}}, "structural": ["solo", "anthro", "male", "clothed", "looking_at_viewer"], "probe": ["anthro", "felid", "solo"], "t1": 1.21, "t2": 1.33, "t3": 0.77, "t3s": 1.08, "t3p": 1.28, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=18 entity=0 copyright_filtered=1 generic_char_to_general=0 unknown_type=2"]}
|
| 7 |
+
{"id": 1023509, "n_gt": 13, "n_retrieved": 16, "n_selected": 7, "n_implied": 0, "n_structural": 3, "n_probe": 4, "ret_R": 0.2308, "P": 0.1429, "R": 0.0769, "F1": 0.1, "leaf_P": 0.1429, "leaf_R": 0.1667, "leaf_F1": 0.1538, "n_leaf_sel": 7, "n_leaf_gt": 6, "ret_P": 0.1875, "sel_given_ret": 0.3333, "over_sel": 0.54, "why": {"explicit": 2}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 12, "attempts_by_n_local": {"16": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.1429, "gen_R": 0.0769, "gen_F1": 0.1, "missed": ["bovid", "caprine", "dialogue", "fur", "goat", "human", "lizard", "mammal", "reptile", "scalie", "white_body", "white_fur"], "extra": ["anthro", "clothing", "darkness", "group", "light", "solo"], "ground_truth_tags": ["bovid", "caprine", "dialogue", "fur", "goat", "human", "lizard", "mammal", "reptile", "scalie", "text", "white_body", "white_fur"], "selected_tags": ["anthro", "clothing", "darkness", "group", "light", "solo", "text"], "stage3_selected": ["darkness", "light"], "stage3_selected_scores": {"light": 0.7785, "darkness": 0.8348}, "stage3_selected_ranks": {"light": 4, "darkness": 2}, "stage3_selected_phrase_ranks": {"light": 1, "darkness": 1}, "extra_evidence": {"anthro": {"source": "probe"}, "clothing": {"source": "probe"}, "darkness": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8348}, "group": {"source": "structural"}, "light": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7785}, "solo": {"source": "structural"}}, "structural": ["solo", "group", "text"], "probe": ["clothing", "anthro", "text", "group"], "t1": 2.27, "t2": 1.61, "t3": 6.21, "t3s": 1.43, "t3p": 3.67, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=16 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0"]}
|
| 8 |
+
{"id": 335343, "n_gt": 15, "n_retrieved": 22, "n_selected": 14, "n_implied": 1, "n_structural": 3, "n_probe": 3, "ret_R": 0.5333, "P": 0.6429, "R": 0.6, "F1": 0.6207, "leaf_P": 0.5385, "leaf_R": 0.5833, "leaf_F1": 0.56, "n_leaf_sel": 13, "n_leaf_gt": 12, "ret_P": 0.3636, "sel_given_ret": 1.125, "over_sel": 0.93, "why": {"explicit": 10}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 19, "attempts_by_n_local": {"23": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.6429, "gen_R": 0.6, "gen_F1": 0.6207, "missed": ["angry", "bed", "eyes_closed", "eyeshadow", "furniture", "lying"], "extra": ["annoyed_expression", "anthro", "bedroom", "humanoid", "resting"], "ground_truth_tags": ["angry", "bed", "blonde_hair", "blue_eyes", "duo", "eyes_closed", "eyeshadow", "furniture", "green_eyes", "hair", "lying", "makeup", "purple_hair", "sleeping", "text"], "selected_tags": ["annoyed_expression", "anthro", "bedroom", "blonde_hair", "blue_eyes", "duo", "green_eyes", "hair", "humanoid", "makeup", "purple_hair", "resting", "sleeping", "text"], "stage3_selected": ["annoyed_expression", "bedroom", "blonde_hair", "blue_eyes", "green_eyes", "makeup", "purple_hair", "resting", "sleeping", "text"], "stage3_selected_scores": {"text": 0.6007, "blue_eyes": 0.6014, "green_eyes": 0.5989, "blonde_hair": 0.5986, "purple_hair": 0.5642, "makeup": 0.5965, "bedroom": 0.4901, "sleeping": 0.6027, "resting": 0.5144, "annoyed_expression": 0.7251}, "stage3_selected_ranks": {"text": 8, "blue_eyes": 7, "green_eyes": 9, "blonde_hair": 10, "purple_hair": 13, "makeup": 11, "bedroom": 19, "sleeping": 6, "resting": 17, "annoyed_expression": 2}, "stage3_selected_phrase_ranks": {"text": 1, "blue_eyes": 1, "green_eyes": 1, "blonde_hair": 1, "purple_hair": 1, "makeup": 1, "bedroom": 1, "sleeping": 1, "resting": 1, "annoyed_expression": 1}, "extra_evidence": {"annoyed_expression": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7251}, "anthro": {"source": "probe"}, "bedroom": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4901}, "humanoid": {"source": "structural"}, "resting": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5144}}, "structural": ["duo", "humanoid", "text"], "probe": ["simple_background", "anthro", "duo"], "t1": 2.06, "t2": 2.13, "t3": 6.29, "t3s": 1.01, "t3p": 1.25, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=23 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
| 9 |
+
{"id": 17482, "n_gt": 22, "n_retrieved": 16, "n_selected": 18, "n_implied": 7, "n_structural": 4, "n_probe": 3, "ret_R": 0.2273, "P": 0.7778, "R": 0.6364, "F1": 0.7, "leaf_P": 0.4545, "leaf_R": 0.3846, "leaf_F1": 0.4167, "n_leaf_sel": 11, "n_leaf_gt": 13, "ret_P": 0.3125, "sel_given_ret": 2.8, "over_sel": 0.82, "why": {"explicit": 5, "strong_implied": 1}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 10, "attempts_by_n_local": {"18": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.7778, "gen_R": 0.6364, "gen_F1": 0.7, "missed": ["bass_guitar", "canine", "fingers", "fur", "holding_musical_instrument", "holding_object", "music", "torn_clothing"], "extra": ["flowing_hair", "male", "pastel_background", "pose"], "ground_truth_tags": ["anthro", "bass_guitar", "canid", "canine", "claws", "clothed", "clothing", "fingers", "fur", "guitar", "hair", "holding_musical_instrument", "holding_object", "mammal", "music", "musical_instrument", "plucked_string_instrument", "solo", "spade_tail", "string_instrument", "tail", "torn_clothing"], "selected_tags": ["anthro", "canid", "claws", "clothed", "clothing", "flowing_hair", "guitar", "hair", "male", "mammal", "musical_instrument", "pastel_background", "plucked_string_instrument", "pose", "solo", "spade_tail", "string_instrument", "tail"], "stage3_selected": ["claws", "flowing_hair", "guitar", "pastel_background", "pose", "spade_tail"], "stage3_selected_scores": {"claws": 0.5684, "spade_tail": 0.618, "guitar": 0.9623, "flowing_hair": 0.5669, "pastel_background": 0.5632, "pose": 0.5761}, "stage3_selected_ranks": {"claws": 8, "spade_tail": 3, "guitar": 1, "flowing_hair": 9, "pastel_background": 11, "pose": 6}, "stage3_selected_phrase_ranks": {"claws": 1, "spade_tail": 1, "guitar": 1, "flowing_hair": 1, "pastel_background": 1, "pose": 1}, "extra_evidence": {"flowing_hair": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5669}, "male": {"source": "structural"}, "pastel_background": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5632}, "pose": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.5761}}, "structural": ["solo", "anthro", "male", "clothed"], "probe": ["anthro", "canid", "solo"], "t1": 3.66, "t2": 1.37, "t3": 2.75, "t3s": 1.5, "t3p": 1.9, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=18 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
| 10 |
+
{"id": 2021552, "n_gt": 25, "n_retrieved": 18, "n_selected": 32, "n_implied": 14, "n_structural": 4, "n_probe": 3, "ret_R": 0.44, "P": 0.7188, "R": 0.92, "F1": 0.807, "leaf_P": 0.6875, "leaf_R": 0.7333, "leaf_F1": 0.7097, "n_leaf_sel": 16, "n_leaf_gt": 15, "ret_P": 0.6111, "sel_given_ret": 2.0909, "over_sel": 1.28, "why": {"explicit": 14}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 19, "attempts_by_n_local": {"20": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.7188, "gen_R": 0.92, "gen_F1": 0.807, "missed": ["looking_at_another", "standing"], "extra": ["black_bottomwear", "black_clothing", "black_pants", "blue_overalls", "looking_at_viewer", "open_mouth", "white_clothing", "white_shirt", "white_topwear"], "ground_truth_tags": ["anthro", "bottomwear", "canid", "canine", "claws", "clothed", "clothing", "crossed_arms", "duo", "facial_markings", "fox", "fur", "grey_background", "head_markings", "lagomorph", "leporid", "looking_at_another", "mammal", "markings", "overalls", "pants", "rabbit", "shirt", "standing", "topwear"], "selected_tags": ["anthro", "black_bottomwear", "black_clothing", "black_pants", "blue_overalls", "bottomwear", "canid", "canine", "claws", "clothed", "clothing", "crossed_arms", "duo", "facial_markings", "fox", "fur", "grey_background", "head_markings", "lagomorph", "leporid", "looking_at_viewer", "mammal", "markings", "open_mouth", "overalls", "pants", "rabbit", "shirt", "topwear", "white_clothing", "white_shirt", "white_topwear"], "stage3_selected": ["black_pants", "blue_overalls", "claws", "crossed_arms", "facial_markings", "fox", "fur", "grey_background", "open_mouth", "overalls", "pants", "rabbit", "shirt", "white_shirt"], "stage3_selected_scores": {"fur": 0.6531, "open_mouth": 0.633, "claws": 0.6303, "fox": 0.638, "shirt": 0.7483, "rabbit": 0.6511, "pants": 0.7589, "grey_background": 0.6784, "facial_markings": 0.6945, "crossed_arms": 0.7285, "white_shirt": 0.8197, "overalls": 0.8776, "black_pants": 0.833, "blue_overalls": 0.9203}, "stage3_selected_ranks": {"fur": 11, "open_mouth": 14, "claws": 15, "fox": 13, "shirt": 6, "rabbit": 12, "pants": 5, "grey_background": 10, "facial_markings": 8, "crossed_arms": 7, "white_shirt": 4, "overalls": 2, "black_pants": 3, "blue_overalls": 1}, "stage3_selected_phrase_ranks": {"fur": 1, "open_mouth": 1, "claws": 1, "fox": 1, "shirt": 1, "rabbit": 1, "pants": 1, "grey_background": 1, "facial_markings": 1, "crossed_arms": 1, "white_shirt": 1, "overalls": 1, "black_pants": 1, "blue_overalls": 1}, "extra_evidence": {"black_bottomwear": {"source": "implied"}, "black_clothing": {"source": "implied"}, "black_pants": {"source": "stage3", "why": "explicit", "retrieval_score": 0.833}, "blue_overalls": {"source": "stage3", "why": "explicit", "retrieval_score": 0.9203}, "looking_at_viewer": {"source": "structural"}, "open_mouth": {"source": "stage3", "why": "explicit", "retrieval_score": 0.633}, "white_clothing": {"source": "implied"}, "white_shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8197}, "white_topwear": {"source": "implied"}}, "structural": ["duo", "anthro", "clothed", "looking_at_viewer"], "probe": ["simple_background", "anthro", "duo"], "t1": 2.68, "t2": 1.57, "t3": 1.21, "t3s": 0.41, "t3p": 3.87, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=20 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
| 11 |
+
{"id": 2034167, "n_gt": 11, "n_retrieved": 23, "n_selected": 21, "n_implied": 8, "n_structural": 4, "n_probe": 3, "ret_R": 0.5455, "P": 0.381, "R": 0.7273, "F1": 0.5, "leaf_P": 0.3333, "leaf_R": 0.5714, "leaf_F1": 0.4211, "n_leaf_sel": 12, "n_leaf_gt": 7, "ret_P": 0.2609, "sel_given_ret": 1.3333, "over_sel": 1.91, "why": {"explicit": 6, "strong_implied": 3}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 13, "attempts_by_n_local": {"26": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.381, "gen_R": 0.7273, "gen_F1": 0.5, "missed": ["open_mouth", "white_body", "white_fur"], "extra": ["action_pose", "animal_humanoid", "anthro", "canid_humanoid", "canine_humanoid", "clothed", "clothing", "curved_tail", "humanoid", "male", "mammal_humanoid", "pose", "tail"], "ground_truth_tags": ["blue_eyes", "blue_nose", "canid", "canine", "fur", "mammal", "open_mouth", "purple_body", "solo", "white_body", "white_fur"], "selected_tags": ["action_pose", "animal_humanoid", "anthro", "blue_eyes", "blue_nose", "canid", "canid_humanoid", "canine", "canine_humanoid", "clothed", "clothing", "curved_tail", "fur", "humanoid", "male", "mammal", "mammal_humanoid", "pose", "purple_body", "solo", "tail"], "stage3_selected": ["action_pose", "blue_eyes", "blue_nose", "canine_humanoid", "curved_tail", "fur", "purple_body", "simple_background", "tail"], "stage3_selected_scores": {"fur": 0.5841, "simple_background": 0.5948, "blue_eyes": 0.5995, "purple_body": 0.564, "canine_humanoid": 0.9003, "blue_nose": 0.6032, "tail": 0.6107, "action_pose": 0.617, "curved_tail": 0.637}, "stage3_selected_ranks": {"fur": 18, "simple_background": 17, "blue_eyes": 15, "purple_body": 21, "canine_humanoid": 1, "blue_nose": 12, "tail": 10, "action_pose": 9, "curved_tail": 7}, "stage3_selected_phrase_ranks": {"fur": 1, "simple_background": 1, "blue_eyes": 1, "purple_body": 1, "canine_humanoid": 1, "blue_nose": 1, "tail": 1, "action_pose": 1, "curved_tail": 1}, "extra_evidence": {"action_pose": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.617}, "animal_humanoid": {"source": "implied"}, "anthro": {"source": "structural"}, "canid_humanoid": {"source": "implied"}, "canine_humanoid": {"source": "stage3", "why": "explicit", "retrieval_score": 0.9003}, "clothed": {"source": "structural"}, "clothing": {"source": "implied"}, "curved_tail": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.637}, "humanoid": {"source": "implied"}, "male": {"source": "structural"}, "mammal_humanoid": {"source": "implied"}, "pose": {"source": "implied"}, "tail": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.6107}}, "structural": ["solo", "anthro", "male", "clothed"], "probe": ["anthro", "canid", "solo"], "t1": 3.87, "t2": 1.89, "t3": 3.1, "t3s": 1.78, "t3p": 0.98, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=26 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=4"]}
|
data/eval_results/latency_k4_seed43.jsonl
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"_meta": true, "timestamp": "2026-03-02T08:45:18.372193", "n_samples": 10, "caption_field": "caption_cogvlm", "skip_rewrite": false, "allow_nsfw": false, "mode": "chunked_map_union", "chunk_size": 60, "eval_path": "data/eval_samples/e621_sfw_sample_1000_seed123_buffer10000_caption_evident.jsonl", "per_phrase_k": 2, "per_phrase_final_k": 4, "temperature": 0.0, "shuffle": false, "seed": 43, "workers": 4, "min_why": "strong_implied", "expand_implications": true, "infer_structural": true, "infer_probe": true, "n_errors": 0, "n_issue_samples": 10, "n_issues_total": 22}
|
| 2 |
+
{"id": 3285630, "n_gt": 12, "n_retrieved": 65, "n_selected": 39, "n_implied": 17, "n_structural": 4, "n_probe": 5, "ret_R": 0.25, "P": 0.2051, "R": 0.6667, "F1": 0.3137, "leaf_P": 0.1111, "leaf_R": 0.2222, "leaf_F1": 0.1481, "n_leaf_sel": 18, "n_leaf_gt": 9, "ret_P": 0.0462, "sel_given_ret": 2.6667, "over_sel": 3.25, "why": {"explicit": 15, "strong_implied": 2}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 2, "calls_with_selection": 2, "calls_exhausted_retries": 0, "attempts_total": 3, "attempt_errors": 1, "attempt_parse_fail": 0, "attempt_parse_ok": 2, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 43, "attempts_by_n_local": {"60": {"attempts": 2, "parse_ok": 1, "parse_fail": 0, "errors": 1}, "9": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.3333333333333333, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.2051, "gen_R": 0.6667, "gen_F1": 0.3137, "missed": ["alpha_channel", "fingers", "fur", "male"], "extra": ["bottom_heavy", "business_attire", "container", "cup", "domestic_cat", "felis", "formal", "gesture", "grey_clothing", "grey_shirt", "grey_topwear", "hair_bun", "handshake", "holding_container", "holding_cup", "holding_mug", "holding_object", "mug", "necktie", "ranged_weapon", "raygun", "shirt", "suit_jacket", "teal_shirt", "teal_topwear", "text", "topless", "topwear", "vest", "weapon", "white_necktie"], "ground_truth_tags": ["alpha_channel", "anthro", "clothed", "clothing", "felid", "feline", "fingers", "fur", "hair", "male", "mammal", "solo"], "selected_tags": ["anthro", "bottom_heavy", "business_attire", "clothed", "clothing", "container", "cup", "domestic_cat", "felid", "feline", "felis", "formal", "gesture", "grey_clothing", "grey_shirt", "grey_topwear", "hair", "hair_bun", "handshake", "holding_container", "holding_cup", "holding_mug", "holding_object", "mammal", "mug", "necktie", "ranged_weapon", "raygun", "shirt", "solo", "suit_jacket", "teal_shirt", "teal_topwear", "text", "topless", "topwear", "vest", "weapon", "white_necktie"], "stage3_selected": ["bottom_heavy", "business_attire", "domestic_cat", "feline", "formal", "grey_shirt", "hair_bun", "handshake", "holding_cup", "holding_mug", "invalid_background", "raygun", "simple_background", "suit_jacket", "teal_shirt", "vest", "white_necktie"], "stage3_selected_scores": {"simple_background": 0.7012, "feline": 0.7092, "vest": 0.8437, "holding_cup": 0.7694, "hair_bun": 0.6946, "bottom_heavy": 0.468, "grey_shirt": 0.7606, "holding_mug": 0.9184, "suit_jacket": 0.5953, "handshake": 0.5545, "formal": 0.601, "business_attire": 0.5683, "teal_shirt": 0.7483, "white_necktie": 0.644, "invalid_background": 0.6512, "domestic_cat": 0.6355, "raygun": 0.4506}, "stage3_selected_ranks": {"simple_background": 28, "feline": 26, "vest": 3, "holding_cup": 10, "hair_bun": 29, "bottom_heavy": 68, "grey_shirt": 12, "holding_mug": 1, "suit_jacket": 48, "handshake": 59, "formal": 46, "business_attire": 54, "teal_shirt": 17, "white_necktie": 33, "invalid_background": 31, "domestic_cat": 37, "raygun": 69}, "stage3_selected_phrase_ranks": {"simple_background": 1, "feline": 1, "vest": 1, "holding_cup": 4, "hair_bun": 1, "bottom_heavy": 4, "grey_shirt": 1, "holding_mug": 1, "suit_jacket": 4, "handshake": 3, "formal": 1, "business_attire": 1, "teal_shirt": 4, "white_necktie": 4, "invalid_background": 1, "domestic_cat": 4, "raygun": 4}, "extra_evidence": {"bottom_heavy": {"source": "stage3", "why": "explicit", "retrieval_score": 0.468}, "business_attire": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5683}, "container": {"source": "implied"}, "cup": {"source": "implied"}, "domestic_cat": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.6355}, "felis": {"source": "implied"}, "formal": {"source": "stage3", "why": "explicit", "retrieval_score": 0.601}, "gesture": {"source": "implied"}, "grey_clothing": {"source": "implied"}, "grey_shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7606}, "grey_topwear": {"source": "implied"}, "hair_bun": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6946}, "handshake": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5545}, "holding_container": {"source": "implied"}, "holding_cup": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7694}, "holding_mug": {"source": "stage3", "why": "explicit", "retrieval_score": 0.9184}, "holding_object": {"source": "implied"}, "mug": {"source": "implied"}, "necktie": {"source": "implied"}, "ranged_weapon": {"source": "implied"}, "raygun": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.4506}, "shirt": {"source": "implied"}, "suit_jacket": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5953}, "teal_shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7483}, "teal_topwear": {"source": "implied"}, "text": {"source": "probe"}, "topless": {"source": "structural"}, "topwear": {"source": "implied"}, "vest": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8437}, "weapon": {"source": "implied"}, "white_necktie": {"source": "stage3", "why": "explicit", "retrieval_score": 0.644}}, "structural": ["solo", "anthro", "clothed", "topless"], "probe": ["clothing", "anthro", "text", "felid", "solo"], "t1": 3.76, "t2": 5.25, "t3": 16.36, "t3s": 3.62, "t3p": 5.07, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=69 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=2", "Stage3 general_chunk_0: attempt 1 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 2, \"why\": \"explicit\"}, {\"i\": 3, \"why\": \"explicit\"}, {\"i\": 4, \"why\": \"explicit\"}, {\"i\": 5, \"why\": \"explicit\"}, {\"i\": 6, \"why\": \"explicit\"}, {\"i\": 7, \"why\": \"explicit\"}, {\"i\": 8, \"why\": \"explicit\"}, {\"i\": 10, \"why\": \"explicit\"}, {\"i\": 11, \"why\": \"other\"}, {\"i\": 13, \"why\": \"other\"}, {\"i\": 14, \"why\": \"explicit\"}, {\"i\": 15, \"why\": \"explicit\"}, {\"i\": 16, \"why\": \"other\"}, {\"i\": 17, \"why\": \"explicit\"}, {\"i\": 18, \"why\": \"explicit\"}, {\"i\": 19, \"why\": \"explicit\"}, {\"i\": 20, \"why\": \"explicit\"}, {\"i\": 23, \"why\": \"other\"}, {\"i\": 25, \"why\": \"explicit\"}, {\"i\": 26, \"why\": \"other\"}, {\"i\": 27, \"why\": \"other\"}, {\"i\": 30, \"why\": \"other\"}, {\"i\": 31, \"why\": \"explicit\"}, {\"i\": 33, \"why\": \"other\"}, {\"i\": 34, \"why\": \"other\"}, {\"i\": 35, \"why\": \"other\"}, {\"i\": 36, \"why\": \"explicit\"}, {\"i\": 37, \"why\": \"other\"}, {\"i\": 38, \"why\": \"explicit\"}, {\"i\": 40, \"why\": \"other\"}, {\"i\": 43, \"why\": \"other\"}, {\"i\": 45, \"why\": \"other\"}, {\"i\": 46, \"why\": \"other\"}, {\"i\": 47, \"why\": \"explicit\"}, {\"i\": 48, \"why\": \"other\"}, {\"i\": 49, \"why\": \"explicit\"}, {\"i\": 50, \"why\": \"other\"}, {\"i\": 51, \"why\": \"other\"}, {\"i\": 52}]}. Got: 1 validation error for Stage3SelectionResponse\nselections.38.why\n Field required [type=missing, input_value={'i': 52}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE "]}
|
| 3 |
+
{"id": 260449, "n_gt": 14, "n_retrieved": 66, "n_selected": 30, "n_implied": 10, "n_structural": 7, "n_probe": 6, "ret_R": 0.5714, "P": 0.3667, "R": 0.7857, "F1": 0.5, "leaf_P": 0.1875, "leaf_R": 0.3, "leaf_F1": 0.2308, "n_leaf_sel": 16, "n_leaf_gt": 10, "ret_P": 0.1212, "sel_given_ret": 1.375, "over_sel": 2.14, "why": {"explicit": 13}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 2, "calls_with_selection": 2, "calls_exhausted_retries": 0, "attempts_total": 2, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 2, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 28, "attempts_by_n_local": {"60": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}, "9": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.3667, "gen_R": 0.7857, "gen_F1": 0.5, "missed": ["fur", "hair", "human"], "extra": ["<3", "anthro", "chimpanzee", "duo", "feral", "gorilla", "grinning_at_viewer", "one_eye_closed", "pan_(genus)", "raised_arms", "smile", "smiling_at_viewer", "smirk", "smirking_at_viewer", "topless", "trio", "wide_grin", "wink", "winking_at_viewer"], "ground_truth_tags": ["ape", "bear", "clothed", "clothing", "dancing", "fur", "group", "hair", "haplorhine", "human", "looking_at_viewer", "male", "mammal", "primate"], "selected_tags": ["<3", "anthro", "ape", "bear", "chimpanzee", "clothed", "clothing", "dancing", "duo", "feral", "gorilla", "grinning_at_viewer", "group", "haplorhine", "looking_at_viewer", "male", "mammal", "one_eye_closed", "pan_(genus)", "primate", "raised_arms", "smile", "smiling_at_viewer", "smirk", "smirking_at_viewer", "topless", "trio", "wide_grin", "wink", "winking_at_viewer"], "stage3_selected": ["bear", "chimpanzee", "dancing", "gorilla", "grinning_at_viewer", "looking_at_viewer", "male", "primate", "raised_arms", "simple_background", "smirking_at_viewer", "wide_grin", "winking_at_viewer"], "stage3_selected_scores": {"male": 0.5604, "simple_background": 0.5491, "looking_at_viewer": 0.5475, "bear": 0.5735, "primate": 0.8905, "dancing": 0.5568, "raised_arms": 0.5445, "gorilla": 0.8299, "winking_at_viewer": 0.404, "chimpanzee": 0.8275, "smirking_at_viewer": 0.4352, "grinning_at_viewer": 0.442, "wide_grin": 0.5267}, "stage3_selected_ranks": {"male": 9, "simple_background": 11, "looking_at_viewer": 12, "bear": 6, "primate": 2, "dancing": 10, "raised_arms": 13, "gorilla": 4, "winking_at_viewer": 47, "chimpanzee": 5, "smirking_at_viewer": 34, "grinning_at_viewer": 32, "wide_grin": 15}, "stage3_selected_phrase_ranks": {"male": 1, "simple_background": 1, "looking_at_viewer": 1, "bear": 1, "primate": 1, "dancing": 1, "raised_arms": 1, "gorilla": 2, "winking_at_viewer": 4, "chimpanzee": 3, "smirking_at_viewer": 4, "grinning_at_viewer": 3, "wide_grin": 1}, "extra_evidence": {"<3": {"source": "probe"}, "anthro": {"source": "structural"}, "chimpanzee": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8275}, "duo": {"source": "probe"}, "feral": {"source": "structural"}, "gorilla": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8299}, "grinning_at_viewer": {"source": "stage3", "why": "explicit", "retrieval_score": 0.442}, "one_eye_closed": {"source": "implied"}, "pan_(genus)": {"source": "implied"}, "raised_arms": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5445}, "smile": {"source": "implied"}, "smiling_at_viewer": {"source": "implied"}, "smirk": {"source": "implied"}, "smirking_at_viewer": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4352}, "topless": {"source": "structural"}, "trio": {"source": "structural"}, "wide_grin": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5267}, "wink": {"source": "implied"}, "winking_at_viewer": {"source": "stage3", "why": "explicit", "retrieval_score": 0.404}}, "structural": ["trio", "anthro", "feral", "male", "clothed", "topless", "looking_at_viewer"], "probe": ["anthro", "duo", "group", "bear", "simple_background", "<3"], "t1": 2.82, "t2": 5.79, "t3": 12.87, "t3s": 3.36, "t3p": 4.98, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=69 entity=1 copyright_filtered=1 generic_char_to_general=1 unknown_type=2"]}
|
| 4 |
+
{"id": 1078019, "n_gt": 14, "n_retrieved": 62, "n_selected": 29, "n_implied": 9, "n_structural": 3, "n_probe": 4, "ret_R": 0.6429, "P": 0.4828, "R": 1.0, "F1": 0.6512, "leaf_P": 0.4444, "leaf_R": 0.8889, "leaf_F1": 0.5926, "n_leaf_sel": 18, "n_leaf_gt": 9, "ret_P": 0.1452, "sel_given_ret": 1.5556, "over_sel": 2.07, "why": {"explicit": 14, "strong_implied": 1}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 2, "calls_with_selection": 2, "calls_exhausted_retries": 0, "attempts_total": 2, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 2, "invalid_items_total": 0, "oob_indices_total": 3, "dupe_indices_total": 0, "kept_total": 26, "attempts_by_n_local": {"60": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}, "3": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.4828, "gen_R": 1.0, "gen_F1": 0.6512, "missed": [], "extra": ["<3", "coat", "cuddling", "holding_object", "holding_plushie", "padding", "raincoat", "red_clothing", "red_coat", "red_topwear", "relationship", "rosy_cheeks", "teal_body", "topwear", "wide_eyed"], "ground_truth_tags": ["anthro", "blue_eyes", "blush", "clothed", "clothing", "duo", "lagomorph", "leporid", "mammal", "plushie", "rabbit", "romantic", "romantic_couple", "teal_eyes"], "selected_tags": ["<3", "anthro", "blue_eyes", "blush", "clothed", "clothing", "coat", "cuddling", "duo", "holding_object", "holding_plushie", "lagomorph", "leporid", "mammal", "padding", "plushie", "rabbit", "raincoat", "red_clothing", "red_coat", "red_topwear", "relationship", "romantic", "romantic_couple", "rosy_cheeks", "teal_body", "teal_eyes", "topwear", "wide_eyed"], "stage3_selected": ["blue_eyes", "coat", "cuddling", "holding_plushie", "padding", "rabbit", "raincoat", "red_coat", "relationship", "romantic", "romantic_couple", "rosy_cheeks", "teal_body", "teal_eyes", "wide_eyed"], "stage3_selected_scores": {"blue_eyes": 0.615, "rabbit": 0.5939, "romantic": 0.5602, "romantic_couple": 0.562, "coat": 0.6383, "wide_eyed": 0.4616, "cuddling": 0.4804, "teal_eyes": 0.6283, "rosy_cheeks": 0.472, "teal_body": 0.4519, "holding_plushie": 0.7793, "raincoat": 0.5262, "red_coat": 0.5207, "relationship": 0.6206, "padding": 0.4927}, "stage3_selected_ranks": {"blue_eyes": 12, "rabbit": 13, "romantic": 18, "romantic_couple": 17, "coat": 7, "wide_eyed": 49, "cuddling": 42, "teal_eyes": 8, "rosy_cheeks": 45, "teal_body": 54, "holding_plushie": 2, "raincoat": 29, "red_coat": 32, "relationship": 9, "padding": 38}, "stage3_selected_phrase_ranks": {"blue_eyes": 1, "rabbit": 1, "romantic": 2, "romantic_couple": 1, "coat": 1, "wide_eyed": 4, "cuddling": 4, "teal_eyes": 1, "rosy_cheeks": 2, "teal_body": 4, "holding_plushie": 1, "raincoat": 2, "red_coat": 4, "relationship": 1, "padding": 4}, "extra_evidence": {"<3": {"source": "probe"}, "coat": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6383}, "cuddling": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4804}, "holding_object": {"source": "implied"}, "holding_plushie": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7793}, "padding": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.4927}, "raincoat": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5262}, "red_clothing": {"source": "implied"}, "red_coat": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5207}, "red_topwear": {"source": "implied"}, "relationship": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6206}, "rosy_cheeks": {"source": "stage3", "why": "explicit", "retrieval_score": 0.472}, "teal_body": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4519}, "topwear": {"source": "implied"}, "wide_eyed": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4616}}, "structural": ["duo", "anthro", "clothed"], "probe": ["anthro", "blush", "duo", "<3"], "t1": 2.97, "t2": 6.21, "t3": 10.05, "t3s": 3.13, "t3p": 4.08, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=63 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
| 5 |
+
{"id": 1624724, "n_gt": 4, "n_retrieved": 50, "n_selected": 11, "n_implied": 0, "n_structural": 5, "n_probe": 3, "ret_R": 0.75, "P": 0.2727, "R": 0.75, "F1": 0.4, "leaf_P": 0.2727, "leaf_R": 0.75, "leaf_F1": 0.4, "n_leaf_sel": 11, "n_leaf_gt": 4, "ret_P": 0.06, "sel_given_ret": 1.0, "over_sel": 2.75, "why": {"explicit": 6}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 10, "attempts_by_n_local": {"52": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.2727, "gen_R": 0.75, "gen_F1": 0.4, "missed": ["smile"], "extra": ["ambiguous_gender", "anthro", "big_eyes", "feral", "floating", "looking_at_viewer", "nude", "spotted_face"], "ground_truth_tags": ["red_nose", "smile", "solo", "tan_body"], "selected_tags": ["ambiguous_gender", "anthro", "big_eyes", "feral", "floating", "looking_at_viewer", "nude", "red_nose", "solo", "spotted_face", "tan_body"], "stage3_selected": ["big_eyes", "floating", "red_nose", "spotted_face", "tan_body", "white_background"], "stage3_selected_scores": {"white_background": 0.6138, "tan_body": 0.6628, "big_eyes": 0.6961, "red_nose": 0.7501, "floating": 0.6519, "spotted_face": 0.6967}, "stage3_selected_ranks": {"white_background": 21, "tan_body": 11, "big_eyes": 6, "red_nose": 3, "floating": 13, "spotted_face": 5}, "stage3_selected_phrase_ranks": {"white_background": 1, "tan_body": 4, "big_eyes": 1, "red_nose": 1, "floating": 1, "spotted_face": 2}, "extra_evidence": {"ambiguous_gender": {"source": "structural"}, "anthro": {"source": "probe"}, "big_eyes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6961}, "feral": {"source": "structural"}, "floating": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6519}, "looking_at_viewer": {"source": "structural"}, "nude": {"source": "structural"}, "spotted_face": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6967}}, "structural": ["solo", "feral", "ambiguous_gender", "nude", "looking_at_viewer"], "probe": ["anthro", "simple_background", "solo"], "t1": 2.62, "t2": 5.68, "t3": 2.6, "t3s": 3.81, "t3p": 4.34, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=52 entity=0 copyright_filtered=1 generic_char_to_general=0 unknown_type=4"]}
|
| 6 |
+
{"id": 1325009, "n_gt": 22, "n_retrieved": 80, "n_selected": 26, "n_implied": 8, "n_structural": 4, "n_probe": 3, "ret_R": 0.3182, "P": 0.5, "R": 0.5909, "F1": 0.5417, "leaf_P": 0.125, "leaf_R": 0.1667, "leaf_F1": 0.1429, "n_leaf_sel": 16, "n_leaf_gt": 12, "ret_P": 0.0875, "sel_given_ret": 1.8571, "over_sel": 1.18, "why": {"explicit": 6, "strong_implied": 7}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 2, "calls_with_selection": 2, "calls_exhausted_retries": 0, "attempts_total": 2, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 2, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 38, "attempts_by_n_local": {"60": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}, "20": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.5, "gen_R": 0.5909, "gen_F1": 0.5417, "missed": ["chest_tuft", "countershading", "hand_on_head", "muscular", "muscular_anthro", "muscular_male", "pantherine", "tiger", "topless"], "extra": ["blue_bottomwear", "blue_clothing", "blue_shorts", "firelight", "gesture", "hand_on_own_head", "light", "lighting", "muscular_legs", "raised_hand", "striped_body", "striped_fur", "white_chest"], "ground_truth_tags": ["anthro", "blue_eyes", "bottomwear", "chest_tuft", "clothed", "clothing", "countershading", "felid", "fur", "hand_on_head", "male", "mammal", "muscular", "muscular_anthro", "muscular_male", "pantherine", "shorts", "solo", "stripes", "tiger", "topless", "tuft"], "selected_tags": ["anthro", "blue_bottomwear", "blue_clothing", "blue_eyes", "blue_shorts", "bottomwear", "clothed", "clothing", "felid", "firelight", "fur", "gesture", "hand_on_own_head", "light", "lighting", "male", "mammal", "muscular_legs", "raised_hand", "shorts", "solo", "striped_body", "striped_fur", "stripes", "tuft", "white_chest"], "stage3_selected": ["blue_eyes", "blue_shorts", "firelight", "gesture", "hand_on_own_head", "lighting", "muscular_legs", "raised_hand", "shorts", "striped_body", "striped_fur", "tuft", "white_chest"], "stage3_selected_scores": {"blue_eyes": 0.5973, "tuft": 0.5246, "gesture": 0.6156, "striped_body": 0.4667, "raised_hand": 0.7153, "hand_on_own_head": 0.5995, "shorts": 0.6091, "striped_fur": 0.6688, "lighting": 0.7417, "muscular_legs": 0.7909, "blue_shorts": 0.6425, "white_chest": 0.9284, "firelight": 0.6667}, "stage3_selected_ranks": {"blue_eyes": 45, "tuft": 59, "gesture": 37, "striped_body": 77, "raised_hand": 21, "hand_on_own_head": 44, "shorts": 42, "striped_fur": 28, "lighting": 16, "muscular_legs": 10, "blue_shorts": 33, "white_chest": 2, "firelight": 29}, "stage3_selected_phrase_ranks": {"blue_eyes": 2, "tuft": 4, "gesture": 1, "striped_body": 1, "raised_hand": 1, "hand_on_own_head": 4, "shorts": 1, "striped_fur": 2, "lighting": 4, "muscular_legs": 2, "blue_shorts": 3, "white_chest": 1, "firelight": 4}, "extra_evidence": {"blue_bottomwear": {"source": "implied"}, "blue_clothing": {"source": "implied"}, "blue_shorts": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.6425}, "firelight": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.6667}, "gesture": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6156}, "hand_on_own_head": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5995}, "light": {"source": "implied"}, "lighting": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.7417}, "muscular_legs": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.7909}, "raised_hand": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7153}, "striped_body": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4667}, "striped_fur": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.6688}, "white_chest": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.9284}}, "structural": ["solo", "anthro", "male", "clothed"], "probe": ["anthro", "felid", "solo"], "t1": 1.09, "t2": 1.69, "t3": 6.94, "t3s": 0.9, "t3p": 2.79, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=80 entity=1 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
| 7 |
+
{"id": 1023509, "n_gt": 13, "n_retrieved": 71, "n_selected": 36, "n_implied": 7, "n_structural": 5, "n_probe": 6, "ret_R": 0.3846, "P": 0.25, "R": 0.6923, "F1": 0.3673, "leaf_P": 0.125, "leaf_R": 0.5, "leaf_F1": 0.2, "n_leaf_sel": 24, "n_leaf_gt": 6, "ret_P": 0.0704, "sel_given_ret": 1.8, "over_sel": 2.77, "why": {"explicit": 21, "strong_implied": 1}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 2, "calls_with_selection": 2, "calls_exhausted_retries": 0, "attempts_total": 3, "attempt_errors": 1, "attempt_parse_fail": 0, "attempt_parse_ok": 2, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 47, "attempts_by_n_local": {"60": {"attempts": 2, "parse_ok": 1, "parse_fail": 0, "errors": 1}, "9": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.3333333333333333, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.25, "gen_R": 0.6923, "gen_F1": 0.3673, "missed": ["dialogue", "fur", "white_body", "white_fur"], "extra": ["action_figure", "agamid", "anthro", "clothed", "clothing", "dark", "darkness", "emote", "frilled_lizard", "gecko", "group", "guardian", "light", "lying_on_ground", "mask", "medical_instrument", "note", "note_pad", "on_ground", "pointy_speech_bubble", "scientific_instrument", "solo", "speech_bubble", "standing_over", "surgical_mask", "taur", "yuman"], "ground_truth_tags": ["bovid", "caprine", "dialogue", "fur", "goat", "human", "lizard", "mammal", "reptile", "scalie", "text", "white_body", "white_fur"], "selected_tags": ["action_figure", "agamid", "anthro", "bovid", "caprine", "clothed", "clothing", "dark", "darkness", "emote", "frilled_lizard", "gecko", "goat", "group", "guardian", "human", "light", "lizard", "lying_on_ground", "mammal", "mask", "medical_instrument", "note", "note_pad", "on_ground", "pointy_speech_bubble", "reptile", "scalie", "scientific_instrument", "solo", "speech_bubble", "standing_over", "surgical_mask", "taur", "text", "yuman"], "stage3_selected": ["action_figure", "bovid", "caprine", "dark", "darkness", "emote", "frilled_lizard", "gecko", "goat", "guardian", "human", "light", "lizard", "lying_on_ground", "note", "note_pad", "on_ground", "pointy_speech_bubble", "speech_bubble", "standing_over", "surgical_mask", "yuman"], "stage3_selected_scores": {"human": 0.5572, "speech_bubble": 0.5746, "bovid": 0.4536, "caprine": 0.4677, "lizard": 0.5943, "goat": 0.5777, "light": 0.5824, "on_ground": 0.4822, "dark": 0.4091, "gecko": 0.4436, "pointy_speech_bubble": 0.4666, "lying_on_ground": 0.5929, "darkness": 0.5977, "note": 0.5658, "emote": 0.3803, "yuman": 0.3939, "frilled_lizard": 0.4581, "standing_over": 0.5799, "surgical_mask": 0.369, "note_pad": 0.4164, "guardian": 0.3707, "action_figure": 0.4064}, "stage3_selected_ranks": {"human": 12, "speech_bubble": 9, "bovid": 28, "caprine": 23, "lizard": 3, "goat": 8, "light": 6, "on_ground": 18, "dark": 42, "gecko": 31, "pointy_speech_bubble": 25, "lying_on_ground": 4, "darkness": 2, "note": 11, "emote": 53, "yuman": 51, "frilled_lizard": 26, "standing_over": 7, "surgical_mask": 63, "note_pad": 38, "guardian": 62, "action_figure": 44}, "stage3_selected_phrase_ranks": {"human": 1, "speech_bubble": 1, "bovid": 4, "caprine": 3, "lizard": 1, "goat": 1, "light": 1, "on_ground": 3, "dark": 3, "gecko": 4, "pointy_speech_bubble": 4, "lying_on_ground": 1, "darkness": 1, "note": 1, "emote": 4, "yuman": 4, "frilled_lizard": 2, "standing_over": 1, "surgical_mask": 4, "note_pad": 2, "guardian": 3, "action_figure": 4}, "extra_evidence": {"action_figure": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.4064}, "agamid": {"source": "implied"}, "anthro": {"source": "structural"}, "clothed": {"source": "structural"}, "clothing": {"source": "probe"}, "dark": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4091}, "darkness": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5977}, "emote": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3803}, "frilled_lizard": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4581}, "gecko": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4436}, "group": {"source": "structural"}, "guardian": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3707}, "light": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5824}, "lying_on_ground": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5929}, "mask": {"source": "implied"}, "medical_instrument": {"source": "implied"}, "note": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5658}, "note_pad": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4164}, "on_ground": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4822}, "pointy_speech_bubble": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4666}, "scientific_instrument": {"source": "implied"}, "solo": {"source": "probe"}, "speech_bubble": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5746}, "standing_over": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5799}, "surgical_mask": {"source": "stage3", "why": "explicit", "retrieval_score": 0.369}, "taur": {"source": "structural"}, "yuman": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3939}}, "structural": ["group", "anthro", "taur", "clothed", "text"], "probe": ["clothing", "simple_background", "anthro", "text", "solo", "group"], "t1": 2.16, "t2": 1.47, "t3": 30.46, "t3s": 0.7, "t3p": 2.35, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=69 entity=0 copyright_filtered=2 generic_char_to_general=0 unknown_type=1", "Stage3 general_chunk_0: attempt 1 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 1, \"why\": \"other\"}, {\"i\": 2, \"why\": \"other\"}, {\"i\": 3, \"why\": \"other\"}, {\"i\": 4, \"why\": \"explicit\"}, {\"i\": 5, \"why\": \"explicit\"}, {\"i\": 6, \"why\": \"weak_implied\"}, {\"i\": 7, \"why\": \"strong_implied\"}, {\"i\": 8, \"why\": \"explicit\"}, {\"i\": 9, \"why\": \"other\"}, {\"i\": 10, \"why\": \"other\"}, {\"i\": 11, \"why\": \"strong_implied\"}, {\"i\": 12, \"why\": \"explicit\"}, {\"i\": 13, \"why\": \"strong_implied\"}, {\"i\": 14, \"why\": \"explicit\"}, {\"i\": 15, \"why\": \"explicit\"}, {\"i\": 16, \"why\": \"explicit\"}, {\"i\": 17, \"why\": \"explicit\"}, {\"i\": 18, \"why\": \"other\"}, {\"i\": 19, \"why\": \"other\"}, {\"i\": 20, \"why\": \"other\"}, {\"i\": 21, \"why\": \"other\"}, {\"i\": 22, \"why\": \"weak_implied\"}, {\"i\": 23, \"why\": \"other\"}, {\"i\": 24, \"why\": \"other\"}, {\"i\": 25, \"why\": \"other\"}, {\"i\": 26, \"why\": \"other\"}, {\"i\": 27, \"why\": \"other\"}, {\"i\": 28, \"why\": \"other\"}, {\"i\": 29, \"why\": \"explicit\"}, {\"i\": 30, \"why\": \"weak_implied\"}, {\"i\": 31, \"why\": \"strong_implied\"}, {\"i\": 32, \"why\": \"other\"}, {\"i\": 33, \"why\": \"other\"}, {\"i\": 34, \"why\": \"explicit\"}, {\"i\": 35, \"why\": \"explicit\"}, {\"i\": 36, \"why\": \"other\"}, {\"i\": 37, \"why\": \"explicit\"}, {\"i\": 38}]}. Got: 1 validation error for Stage3SelectionResponse\nselections.37.why\n Field required [type=missing, input_value={'i': 38}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE "]}
|
| 8 |
+
{"id": 335343, "n_gt": 15, "n_retrieved": 94, "n_selected": 23, "n_implied": 5, "n_structural": 4, "n_probe": 2, "ret_R": 0.7333, "P": 0.5217, "R": 0.8, "F1": 0.6316, "leaf_P": 0.4375, "leaf_R": 0.5833, "leaf_F1": 0.5, "n_leaf_sel": 16, "n_leaf_gt": 12, "ret_P": 0.117, "sel_given_ret": 1.0909, "over_sel": 1.53, "why": {"explicit": 16}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 2, "calls_with_selection": 2, "calls_exhausted_retries": 0, "attempts_total": 2, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 2, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 31, "attempts_by_n_local": {"60": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}, "36": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.5217, "gen_R": 0.8, "gen_F1": 0.6316, "missed": ["angry", "eyes_closed", "sleeping"], "extra": ["annoyed_expression", "anthro", "atmosphere", "calm", "clothed", "clothing", "english_text", "lying_on_bed", "on_bed", "pajamas", "sphere"], "ground_truth_tags": ["angry", "bed", "blonde_hair", "blue_eyes", "duo", "eyes_closed", "eyeshadow", "furniture", "green_eyes", "hair", "lying", "makeup", "purple_hair", "sleeping", "text"], "selected_tags": ["annoyed_expression", "anthro", "atmosphere", "bed", "blonde_hair", "blue_eyes", "calm", "clothed", "clothing", "duo", "english_text", "eyeshadow", "furniture", "green_eyes", "hair", "lying", "lying_on_bed", "makeup", "on_bed", "pajamas", "purple_hair", "sphere", "text"], "stage3_selected": ["annoyed_expression", "atmosphere", "blonde_hair", "blue_eyes", "calm", "duo", "english_text", "eyeshadow", "green_eyes", "lying", "lying_on_bed", "makeup", "pajamas", "purple_hair", "sphere", "text"], "stage3_selected_scores": {"duo": 0.4298, "text": 0.594, "blue_eyes": 0.595, "lying": 0.4445, "green_eyes": 0.5934, "blonde_hair": 0.5873, "purple_hair": 0.5592, "makeup": 0.5894, "eyeshadow": 0.4713, "lying_on_bed": 0.4059, "pajamas": 0.371, "annoyed_expression": 0.7219, "calm": 0.3466, "sphere": 0.4546, "atmosphere": 0.5039, "english_text": 0.4128}, "stage3_selected_ranks": {"duo": 54, "text": 7, "blue_eyes": 6, "lying": 46, "green_eyes": 9, "blonde_hair": 11, "purple_hair": 14, "makeup": 10, "eyeshadow": 39, "lying_on_bed": 73, "pajamas": 84, "annoyed_expression": 2, "calm": 90, "sphere": 43, "atmosphere": 26, "english_text": 67}, "stage3_selected_phrase_ranks": {"duo": 2, "text": 1, "blue_eyes": 1, "lying": 1, "green_eyes": 1, "blonde_hair": 1, "purple_hair": 1, "makeup": 1, "eyeshadow": 3, "lying_on_bed": 4, "pajamas": 4, "annoyed_expression": 1, "calm": 4, "sphere": 2, "atmosphere": 1, "english_text": 4}, "extra_evidence": {"annoyed_expression": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7219}, "anthro": {"source": "structural"}, "atmosphere": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5039}, "calm": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3466}, "clothed": {"source": "structural"}, "clothing": {"source": "implied"}, "english_text": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4128}, "lying_on_bed": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4059}, "on_bed": {"source": "implied"}, "pajamas": {"source": "stage3", "why": "explicit", "retrieval_score": 0.371}, "sphere": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4546}}, "structural": ["duo", "anthro", "clothed", "text"], "probe": ["anthro", "duo"], "t1": 2.14, "t2": 2.07, "t3": 11.58, "t3s": 1.41, "t3p": 1.3, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=96 entity=0 copyright_filtered=1 generic_char_to_general=0 unknown_type=4"]}
|
| 9 |
+
{"id": 17482, "n_gt": 22, "n_retrieved": 45, "n_selected": 28, "n_implied": 13, "n_structural": 4, "n_probe": 3, "ret_R": 0.1818, "P": 0.5, "R": 0.6364, "F1": 0.56, "leaf_P": 0.4167, "leaf_R": 0.3846, "leaf_F1": 0.4, "n_leaf_sel": 12, "n_leaf_gt": 13, "ret_P": 0.0889, "sel_given_ret": 3.5, "over_sel": 1.27, "why": {"explicit": 10}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 24, "attempts_by_n_local": {"48": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.5, "gen_R": 0.6364, "gen_F1": 0.56, "missed": ["canine", "fingers", "fur", "holding_musical_instrument", "holding_object", "music", "spade_tail", "tail"], "extra": ["bottomwear", "denim", "denim_clothing", "electric_guitar", "flowing_hair", "jeans", "looking_at_viewer", "pants", "pastel_background", "playing_guitar", "playing_music", "torn_bottomwear", "torn_jeans", "torn_pants"], "ground_truth_tags": ["anthro", "bass_guitar", "canid", "canine", "claws", "clothed", "clothing", "fingers", "fur", "guitar", "hair", "holding_musical_instrument", "holding_object", "mammal", "music", "musical_instrument", "plucked_string_instrument", "solo", "spade_tail", "string_instrument", "tail", "torn_clothing"], "selected_tags": ["anthro", "bass_guitar", "bottomwear", "canid", "claws", "clothed", "clothing", "denim", "denim_clothing", "electric_guitar", "flowing_hair", "guitar", "hair", "jeans", "looking_at_viewer", "mammal", "musical_instrument", "pants", "pastel_background", "playing_guitar", "playing_music", "plucked_string_instrument", "solo", "string_instrument", "torn_bottomwear", "torn_clothing", "torn_jeans", "torn_pants"], "stage3_selected": ["bass_guitar", "claws", "electric_guitar", "flowing_hair", "guitar", "pastel_background", "playing_guitar", "torn_bottomwear", "torn_jeans", "torn_pants"], "stage3_selected_scores": {"claws": 0.5504, "torn_bottomwear": 0.4254, "guitar": 0.9788, "torn_pants": 0.4559, "playing_guitar": 0.9494, "torn_jeans": 0.4784, "electric_guitar": 0.8829, "bass_guitar": 0.9286, "flowing_hair": 0.5466, "pastel_background": 0.5453}, "stage3_selected_ranks": {"claws": 11, "torn_bottomwear": 37, "guitar": 1, "torn_pants": 30, "playing_guitar": 2, "torn_jeans": 24, "electric_guitar": 5, "bass_guitar": 3, "flowing_hair": 13, "pastel_background": 14}, "stage3_selected_phrase_ranks": {"claws": 1, "torn_bottomwear": 3, "guitar": 1, "torn_pants": 2, "playing_guitar": 1, "torn_jeans": 1, "electric_guitar": 4, "bass_guitar": 2, "flowing_hair": 1, "pastel_background": 1}, "extra_evidence": {"bottomwear": {"source": "implied"}, "denim": {"source": "implied"}, "denim_clothing": {"source": "implied"}, "electric_guitar": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8829}, "flowing_hair": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5466}, "jeans": {"source": "implied"}, "looking_at_viewer": {"source": "structural"}, "pants": {"source": "implied"}, "pastel_background": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5453}, "playing_guitar": {"source": "stage3", "why": "explicit", "retrieval_score": 0.9494}, "playing_music": {"source": "implied"}, "torn_bottomwear": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4254}, "torn_jeans": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4784}, "torn_pants": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4559}}, "structural": ["solo", "anthro", "clothed", "looking_at_viewer"], "probe": ["anthro", "canid", "solo"], "t1": 2.07, "t2": 0.98, "t3": 4.74, "t3s": 1.05, "t3p": 2.05, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=48 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
| 10 |
+
{"id": 2021552, "n_gt": 25, "n_retrieved": 71, "n_selected": 12, "n_implied": 6, "n_structural": 4, "n_probe": 3, "ret_R": 0.56, "P": 0.5, "R": 0.24, "F1": 0.3243, "leaf_P": 0.5, "leaf_R": 0.2, "leaf_F1": 0.2857, "n_leaf_sel": 6, "n_leaf_gt": 15, "ret_P": 0.1972, "sel_given_ret": 0.4286, "over_sel": 0.48, "why": {"explicit": 2}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 2, "calls_with_selection": 2, "calls_exhausted_retries": 0, "attempts_total": 2, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 2, "invalid_items_total": 0, "oob_indices_total": 1, "dupe_indices_total": 0, "kept_total": 43, "attempts_by_n_local": {"60": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}, "11": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.5, "gen_R": 0.24, "gen_F1": 0.3243, "missed": ["bottomwear", "canid", "canine", "claws", "crossed_arms", "facial_markings", "fox", "fur", "grey_background", "head_markings", "lagomorph", "leporid", "looking_at_another", "mammal", "markings", "overalls", "pants", "rabbit", "standing"], "extra": ["blue_clothing", "blue_topwear", "grey_clothing", "grey_shirt", "grey_topwear", "looking_at_viewer"], "ground_truth_tags": ["anthro", "bottomwear", "canid", "canine", "claws", "clothed", "clothing", "crossed_arms", "duo", "facial_markings", "fox", "fur", "grey_background", "head_markings", "lagomorph", "leporid", "looking_at_another", "mammal", "markings", "overalls", "pants", "rabbit", "shirt", "standing", "topwear"], "selected_tags": ["anthro", "blue_clothing", "blue_topwear", "clothed", "clothing", "duo", "grey_clothing", "grey_shirt", "grey_topwear", "looking_at_viewer", "shirt", "topwear"], "stage3_selected": ["blue_topwear", "grey_shirt"], "stage3_selected_scores": {"blue_topwear": 0.6595, "grey_shirt": 0.6862}, "stage3_selected_ranks": {"blue_topwear": 21, "grey_shirt": 16}, "stage3_selected_phrase_ranks": {"blue_topwear": 4, "grey_shirt": 4}, "extra_evidence": {"blue_clothing": {"source": "implied"}, "blue_topwear": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6595}, "grey_clothing": {"source": "implied"}, "grey_shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6862}, "grey_topwear": {"source": "implied"}, "looking_at_viewer": {"source": "structural"}}, "structural": ["duo", "anthro", "clothed", "looking_at_viewer"], "probe": ["simple_background", "anthro", "duo"], "t1": 1.76, "t2": 1.7, "t3": 10.45, "t3s": 0.81, "t3p": 2.01, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=71 entity=2 copyright_filtered=0 generic_char_to_general=2 unknown_type=3"]}
|
| 11 |
+
{"id": 2034167, "n_gt": 11, "n_retrieved": 81, "n_selected": 26, "n_implied": 8, "n_structural": 4, "n_probe": 3, "ret_R": 0.5455, "P": 0.3077, "R": 0.7273, "F1": 0.4324, "leaf_P": 0.25, "leaf_R": 0.4286, "leaf_F1": 0.3158, "n_leaf_sel": 12, "n_leaf_gt": 7, "ret_P": 0.0741, "sel_given_ret": 1.3333, "over_sel": 2.36, "why": {"explicit": 13, "strong_implied": 1}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 2, "calls_with_selection": 2, "calls_exhausted_retries": 0, "attempts_total": 2, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 2, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 49, "attempts_by_n_local": {"60": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}, "26": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.3077, "gen_R": 0.7273, "gen_F1": 0.4324, "missed": ["blue_eyes", "blue_nose", "open_mouth"], "extra": ["animal_humanoid", "anthro", "blue_eyebrows", "blue_stripes", "canid_humanoid", "canine_humanoid", "clothed", "clothing", "eyebrows", "fox_humanoid", "humanoid", "jumper", "male", "mammal_humanoid", "pink_stripes", "pink_tail", "stripes", "tail"], "ground_truth_tags": ["blue_eyes", "blue_nose", "canid", "canine", "fur", "mammal", "open_mouth", "purple_body", "solo", "white_body", "white_fur"], "selected_tags": ["animal_humanoid", "anthro", "blue_eyebrows", "blue_stripes", "canid", "canid_humanoid", "canine", "canine_humanoid", "clothed", "clothing", "eyebrows", "fox_humanoid", "fur", "humanoid", "jumper", "male", "mammal", "mammal_humanoid", "pink_stripes", "pink_tail", "purple_body", "solo", "stripes", "tail", "white_body", "white_fur"], "stage3_selected": ["blue_eyebrows", "blue_stripes", "canid_humanoid", "canine_humanoid", "fox_humanoid", "fur", "jumper", "pink_stripes", "pink_tail", "purple_body", "simple_background", "stripes", "tail", "white_fur"], "stage3_selected_scores": {"fur": 0.5666, "simple_background": 0.5782, "tail": 0.5897, "white_fur": 0.5773, "stripes": 0.578, "purple_body": 0.5476, "canid_humanoid": 0.8744, "canine_humanoid": 0.9128, "pink_tail": 0.5166, "blue_stripes": 0.5367, "blue_eyebrows": 0.4546, "pink_stripes": 0.5444, "jumper": 0.4005, "fox_humanoid": 0.8327}, "stage3_selected_ranks": {"fur": 22, "simple_background": 19, "tail": 14, "white_fur": 21, "stripes": 20, "purple_body": 29, "canid_humanoid": 2, "canine_humanoid": 1, "pink_tail": 42, "blue_stripes": 35, "blue_eyebrows": 64, "pink_stripes": 34, "jumper": 74, "fox_humanoid": 4}, "stage3_selected_phrase_ranks": {"fur": 1, "simple_background": 1, "tail": 1, "white_fur": 1, "stripes": 1, "purple_body": 1, "canid_humanoid": 2, "canine_humanoid": 1, "pink_tail": 1, "blue_stripes": 2, "blue_eyebrows": 2, "pink_stripes": 1, "jumper": 2, "fox_humanoid": 4}, "extra_evidence": {"animal_humanoid": {"source": "implied"}, "anthro": {"source": "structural"}, "blue_eyebrows": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4546}, "blue_stripes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5367}, "canid_humanoid": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8744}, "canine_humanoid": {"source": "stage3", "why": "explicit", "retrieval_score": 0.9128}, "clothed": {"source": "structural"}, "clothing": {"source": "implied"}, "eyebrows": {"source": "implied"}, "fox_humanoid": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.8327}, "humanoid": {"source": "implied"}, "jumper": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4005}, "male": {"source": "structural"}, "mammal_humanoid": {"source": "implied"}, "pink_stripes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5444}, "pink_tail": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5166}, "stripes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.578}, "tail": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5897}}, "structural": ["solo", "anthro", "male", "clothed"], "probe": ["anthro", "canid", "solo"], "t1": 1.63, "t2": 1.69, "t3": 4.7, "t3s": 0.61, "t3p": 3.1, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=86 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=4"]}
|
data/eval_results/latency_single_shot_seed42.jsonl
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"_meta": true, "timestamp": "2026-03-02T05:57:34.402565", "n_samples": 10, "caption_field": "caption_cogvlm", "skip_rewrite": false, "allow_nsfw": false, "mode": "single_shot", "chunk_size": 60, "eval_path": "data/eval_samples/e621_sfw_sample_1000_seed123_buffer10000_caption_evident.jsonl", "per_phrase_k": 2, "per_phrase_final_k": 10, "temperature": 0.0, "shuffle": false, "seed": 42, "workers": 4, "min_why": "strong_implied", "expand_implications": true, "infer_structural": true, "infer_probe": true, "n_errors": 0, "n_issue_samples": 10, "n_issues_total": 43}
|
| 2 |
+
{"id": 3285630, "n_gt": 12, "n_retrieved": 153, "n_selected": 8, "n_implied": 1, "n_structural": 4, "n_probe": 5, "ret_R": 0.3333, "P": 0.875, "R": 0.5833, "F1": 0.7, "leaf_P": 0.6667, "leaf_R": 0.4444, "leaf_F1": 0.5333, "n_leaf_sel": 6, "n_leaf_gt": 9, "ret_P": 0.0261, "sel_given_ret": 1.75, "over_sel": 0.67, "why": {}, "stage3_diag": {"mode": "single_shot", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 0, "calls_exhausted_retries": 1, "attempts_total": 3, "attempt_errors": 3, "attempt_parse_fail": 0, "attempt_parse_ok": 0, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 0, "attempts_by_n_local": {"156": {"attempts": 3, "parse_ok": 0, "parse_fail": 0, "errors": 3}}, "attempt_failure_rate": 1.0, "call_exhaustion_rate": 1.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.875, "gen_R": 0.5833, "gen_F1": 0.7, "missed": ["alpha_channel", "feline", "fingers", "fur", "hair"], "extra": ["text"], "ground_truth_tags": ["alpha_channel", "anthro", "clothed", "clothing", "felid", "feline", "fingers", "fur", "hair", "male", "mammal", "solo"], "selected_tags": ["anthro", "clothed", "clothing", "felid", "male", "mammal", "solo", "text"], "stage3_selected": [], "stage3_selected_scores": {}, "stage3_selected_ranks": {}, "stage3_selected_phrase_ranks": {}, "extra_evidence": {"text": {"source": "probe"}}, "structural": ["solo", "anthro", "male", "clothed"], "probe": ["clothing", "anthro", "text", "felid", "solo"], "t1": 3.18, "t2": 1.54, "t3": 60.6, "t3s": 5.79, "t3p": 8.12, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=156 entity=1 copyright_filtered=1 generic_char_to_general=0 unknown_type=2", "Stage3 general_single_shot: attempt 1 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 2, \"why\": \"strong_implied\"}, {\"i\": 5, \"why\": \"explicit\"}, {\"i\": 6, \"why\": \"explicit\"}, {\"i\": 8, \"why\": \"explicit\"}, {\"i\": 9, \"why\": \"explicit\"}, {\"i\": 11, \"why\": \"explicit\"}, {\"i\": 16, \"why\": \"explicit\"}, {\"i\": 18, \"why\": \"explicit\"}, {\"i\": 21, \"why\": \"strong_implied\"}, {\"i\": 22, \"why\": \"strong_implied\"}, {\"i\": 25, \"why\": \"weak_implied\"}, {\"i\": 26, \"why\": \"other\"}, {\"i\": 28, \"why\": \"weak_implied\"}, {\"i\": 31, \"why\": \"weak_implied\"}, {\"i\": 33, \"why\": \"weak_implied\"}, {\"i\": 35, \"why\": \"weak_implied\"}, {\"i\": 36, \"why\": \"style_or_meta\"}, {\"i\": 38, \"why\": \"explicit\"}, {\"i\": 40, \"why\": \"explicit\"}, {\"i\": 42, \"why\": \"weak_implied\"}, {\"i\": 45, \"why\": \"weak_implied\"}, {\"i\": 51, \"why\": \"weak_implied\"}, {\"i\": 52, \"why\": \"weak_implied\"}, {\"i\": 54, \"why\": \"weak_implied\"}, {\"i\": 56, \"why\": \"explicit\"}, {\"i\": 58, \"why\": \"weak_implied\"}, {\"i\": 60, \"why\": \"weak_implied\"}, {\"i\": 62, \"why\": \"style_or_meta\"}, {\"i\": 64, \"why\": \"weak_implied\"}, {\"i\": 67, \"why\": \"weak_implied\"}, {\"i\": 69, \"why\": \"weak_implied\"}, {\"i\": 70, \"why\": \"weak_implied\"}, {\"i\": 72, \"why\": \"explicit\"}, {\"i\": 74, \"why\": \"explicit\"}, {\"i\": 76, \"why\": \"weak_implied\"}, {\"i\": 78}]}. Got: 1 validation error for Stage3SelectionResponse\nselections.35.why\n Field required [type=missing, input_value={'i': 78}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE ", "Stage3 general_single_shot: attempt 2 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 2, \"why\": \"strong_implied\"}, {\"i\": 5, \"why\": \"explicit\"}, {\"i\": 6, \"why\": \"explicit\"}, {\"i\": 8, \"why\": \"explicit\"}, {\"i\": 9, \"why\": \"explicit\"}, {\"i\": 11, \"why\": \"explicit\"}, {\"i\": 15, \"why\": \"explicit\"}, {\"i\": 16, \"why\": \"explicit\"}, {\"i\": 21, \"why\": \"strong_implied\"}, {\"i\": 22, \"why\": \"strong_implied\"}, {\"i\": 25, \"why\": \"weak_implied\"}, {\"i\": 26, \"why\": \"other\"}, {\"i\": 31, \"why\": \"weak_implied\"}, {\"i\": 33, \"why\": \"weak_implied\"}, {\"i\": 35, \"why\": \"weak_implied\"}, {\"i\": 36, \"why\": \"style_or_meta\"}, {\"i\": 38, \"why\": \"explicit\"}, {\"i\": 39, \"why\": \"explicit\"}, {\"i\": 42, \"why\": \"other\"}, {\"i\": 45, \"why\": \"weak_implied\"}, {\"i\": 51, \"why\": \"weak_implied\"}, {\"i\": 53, \"why\": \"weak_implied\"}, {\"i\": 55, \"why\": \"style_or_meta\"}, {\"i\": 56, \"why\": \"explicit\"}, {\"i\": 58, \"why\": \"weak_implied\"}, {\"i\": 60, \"why\": \"weak_implied\"}, {\"i\": 65, \"why\": \"other\"}, {\"i\": 67, \"why\": \"weak_implied\"}, {\"i\": 69, \"why\": \"weak_implied\"}, {\"i\": 70, \"why\": \"weak_implied\"}, {\"i\": 72, \"why\": \"explicit\"}, {\"i\": 74, \"why\": \"explicit\"}, {\"i\": 76, \"why\": \"weak_implied\"}, {\"i\": 80, \"why\": \"style_or_meta\"}, {\"i\": 82, \"why\": \"weak_implied\"}, {\"i\": 84}]}. Got: 1 validation error for Stage3SelectionResponse\nselections.35.why\n Field required [type=missing, input_value={'i': 84}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE ", "Stage3 general_single_shot: attempt 3 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 2, \"why\": \"explicit\"}, {\"i\": 5, \"why\": \"explicit\"}, {\"i\": 6, \"why\": \"explicit\"}, {\"i\": 16, \"why\": \"explicit\"}, {\"i\": 21, \"why\": \"explicit\"}, {\"i\": 22, \"why\": \"explicit\"}, {\"i\": 26, \"why\": \"other\"}, {\"i\": 31, \"why\": \"other\"}, {\"i\": 35, \"why\": \"explicit\"}, {\"i\": 36, \"why\": \"explicit\"}, {\"i\": 40, \"why\": \"explicit\"}, {\"i\": 41, \"why\": \"explicit\"}, {\"i\": 45, \"why\": \"weak_implied\"}, {\"i\": 51, \"why\": \"weak_implied\"}, {\"i\": 52, \"why\": \"weak_implied\"}, {\"i\": 55, \"why\": \"weak_implied\"}, {\"i\": 56, \"why\": \"explicit\"}, {\"i\": 57, \"why\": \"explicit\"}, {\"i\": 58, \"why\": \"explicit\"}, {\"i\": 59, \"why\": \"explicit\"}, {\"i\": 62, \"why\": \"explicit\"}, {\"i\": 65, \"why\": \"other\"}, {\"i\": 67, \"why\": \"other\"}, {\"i\": 69, \"why\": \"weak_implied\"}, {\"i\": 70, \"why\": \"other\"}, {\"i\": 72, \"why\": \"explicit\"}, {\"i\": 73, \"why\": \"explicit\"}, {\"i\": 74, \"why\": \"explicit\"}, {\"i\": 76, \"why\": \"explicit\"}, {\"i\": 77, \"why\": \"explicit\"}, {\"i\": 80, \"why\": \"weak_implied\"}, {\"i\": 82, \"why\": \"weak_implied\"}, {\"i\": 84, \"why\": \"other\"}, {\"i\": 87, \"why\": \"weak_implied\"}, {\"i\": 88, \"why\": \"other\"}, {\"i\": 90, \"why\": \"explicit\"}, {\"i\": 91, \"why\": \"weak_implied\"}, {\"i\": 93}]}. Got: 1 validation error for Stage3SelectionResponse\nselections.37.why\n Field required [type=missing, input_value={'i': 93}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE ", "Stage3 general_single_shot: gave up after 3 attempts"]}
|
| 3 |
+
{"id": 260449, "n_gt": 14, "n_retrieved": 160, "n_selected": 11, "n_implied": 1, "n_structural": 6, "n_probe": 6, "ret_R": 0.5714, "P": 0.5455, "R": 0.4286, "F1": 0.48, "leaf_P": 0.2857, "leaf_R": 0.2, "leaf_F1": 0.2353, "n_leaf_sel": 7, "n_leaf_gt": 10, "ret_P": 0.05, "sel_given_ret": 0.75, "over_sel": 0.79, "why": {}, "stage3_diag": {"mode": "single_shot", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 0, "calls_exhausted_retries": 1, "attempts_total": 3, "attempt_errors": 2, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 0, "attempts_by_n_local": {"161": {"attempts": 3, "parse_ok": 1, "parse_fail": 0, "errors": 2}}, "attempt_failure_rate": 0.6666666666666666, "call_exhaustion_rate": 1.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.5455, "gen_R": 0.4286, "gen_F1": 0.48, "missed": ["ape", "dancing", "fur", "hair", "haplorhine", "human", "male", "primate"], "extra": ["anthro", "duo", "humanoid", "topless", "trio"], "ground_truth_tags": ["ape", "bear", "clothed", "clothing", "dancing", "fur", "group", "hair", "haplorhine", "human", "looking_at_viewer", "male", "mammal", "primate"], "selected_tags": ["anthro", "bear", "clothed", "clothing", "duo", "group", "humanoid", "looking_at_viewer", "mammal", "topless", "trio"], "stage3_selected": [], "stage3_selected_scores": {}, "stage3_selected_ranks": {}, "stage3_selected_phrase_ranks": {}, "extra_evidence": {"anthro": {"source": "structural"}, "duo": {"source": "probe"}, "humanoid": {"source": "structural"}, "topless": {"source": "structural"}, "trio": {"source": "structural"}}, "structural": ["trio", "anthro", "humanoid", "clothed", "topless", "looking_at_viewer"], "probe": ["clothing", "simple_background", "anthro", "duo", "group", "bear"], "t1": 3.09, "t2": 1.9, "t3": 11.5, "t3s": 3.64, "t3p": 8.74, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=161 entity=5 copyright_filtered=2 generic_char_to_general=1 unknown_type=3", "Stage3 general_single_shot: attempt 2 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 1, \"why\": \"explicit\"}, {\"i\": 3, \"why\": \"explicit\"}, {\"i\": 5, \"why\": \"explicit\"}, {\"i\": 9, \"why\": \"explicit\"}, {\"i\": 15, \"why\": \"explicit\"}, {\"i\": 16, \"why\": \"explicit\"}, {\"i\": 17, \"why\": \"weak_implied\"}, {\"i\": 25, \"why\": \"explicit\"}, {\"i\": 31, \"why\": \"weak_implied\"}, {\"i\": 33, \"why\": \"weak_implied\"}, {\"i\": 35, \"why\": \"weak_implied\"}, {\"i\": 41, \"why\": \"weak_implied\"}, {\"i\": 43, \"why\": \"weak_implied\"}, {\"i\": 45, \"why\": \"weak_implied\"}, {\"i\": 47, \"why\": \"weak_implied\"}, {\"i\": 53, \"why\": \"weak_implied\"}, {\"i\": 55, \"why\": \"style_or_meta\"}, {\"i\": 57, \"why\": \"weak_implied\"}, {\"i\": 61, \"why\": \"weak_implied\"}, {\"i\": 63, \"why\": \"weak_implied\"}, {\"i\": 65, \"why\": \"style_or_meta\"}, {\"i\": 67, \"why\": \"weak_implied\"}, {\"i\": 69, \"why\": \"explicit\"}, {\"i\": 71, \"why\": \"style_or_meta\"}, {\"i\": 73, \"why\": \"weak_implied\"}, {\"i\": 75, \"why\": \"weak_implied\"}, {\"i\": 77, \"why\": \"weak_implied\"}, {\"i\": 81, \"why\": \"weak_implied\"}, {\"i\": 83, \"why\": \"style_or_meta\"}, {\"i\": 85, \"why\": \"weak_implied\"}, {\"i\": 87, \"why\": \"weak_implied\"}, {\"i\": 89, \"why\": \"style_or_meta\"}, {\"i\": 91, \"why\": \"weak_implied\"}, {\"i\": 93, \"why\": \"weak_implied\"}, {\"i\": 95}]}. Got: 1 validation error for Stage3SelectionResponse\nselections.34.why\n Field required [type=missing, input_value={'i': 95}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE ", "Stage3 general_single_shot: attempt 3 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 3, \"why\": \"explicit\"}, {\"i\": 5, \"why\": \"explicit\"}, {\"i\": 9, \"why\": \"explicit\"}, {\"i\": 15, \"why\": \"explicit\"}, {\"i\": 16, \"why\": \"explicit\"}, {\"i\": 25, \"why\": \"explicit\"}, {\"i\": 4, \"why\": \"style_or_meta\"}, {\"i\": 6, \"why\": \"weak_implied\"}, {\"i\": 10, \"why\": \"weak_implied\"}, {\"i\": 11, \"why\": \"weak_implied\"}, {\"i\": 14, \"why\": \"weak_implied\"}, {\"i\": 17, \"why\": \"weak_implied\"}, {\"i\": 21, \"why\": \"style_or_meta\"}, {\"i\": 23, \"why\": \"weak_implied\"}, {\"i\": 24, \"why\": \"weak_implied\"}, {\"i\": 30, \"why\": \"other\"}, {\"i\": 31, \"why\": \"other\"}, {\"i\": 33, \"why\": \"weak_implied\"}, {\"i\": 35, \"why\": \"weak_implied\"}, {\"i\": 37, \"why\": \"weak_implied\"}, {\"i\": 39, \"why\": \"weak_implied\"}, {\"i\": 41, \"why\": \"weak_implied\"}, {\"i\": 43, \"why\": \"weak_implied\"}, {\"i\": 45, \"why\": \"weak_implied\"}, {\"i\": 47, \"why\": \"weak_implied\"}, {\"i\": 51, \"why\": \"style_or_meta\"}, {\"i\": 53, \"why\": \"weak_implied\"}, {\"i\": 55, \"why\": \"weak_implied\"}, {\"i\": 57, \"why\": \"weak_implied\"}, {\"i\": 59, \"why\": \"weak_implied\"}, {\"i\": 61, \"why\": \"weak_implied\"}, {\"i\": 63, \"why\": \"weak_implied\"}, {\"i\": 65, \"why\": \"weak_implied\"}, {\"i\": 67, \"why\": \"weak_implied\"}, {\"i\": 69}]}. Got: 1 validation error for Stage3SelectionResponse\nselections.34.why\n Field required [type=missing, input_value={'i': 69}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE ", "Stage3 general_single_shot: gave up after 3 attempts"]}
|
| 4 |
+
{"id": 1078019, "n_gt": 14, "n_retrieved": 123, "n_selected": 27, "n_implied": 9, "n_structural": 5, "n_probe": 4, "ret_R": 0.7143, "P": 0.5185, "R": 1.0, "F1": 0.6829, "leaf_P": 0.5, "leaf_R": 0.8889, "leaf_F1": 0.64, "n_leaf_sel": 16, "n_leaf_gt": 9, "ret_P": 0.0813, "sel_given_ret": 1.4, "over_sel": 1.93, "why": {"explicit": 9, "strong_implied": 2}, "stage3_diag": {"mode": "single_shot", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 2, "attempt_errors": 1, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 35, "attempts_by_n_local": {"121": {"attempts": 2, "parse_ok": 1, "parse_fail": 0, "errors": 1}}, "attempt_failure_rate": 0.5, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.5185, "gen_R": 1.0, "gen_F1": 0.6829, "missed": [], "extra": ["<3", "coat", "holding_object", "holding_plushie", "looking_at_viewer", "love", "male", "raincoat", "topwear", "vest", "white_clothing", "white_coat", "white_topwear"], "ground_truth_tags": ["anthro", "blue_eyes", "blush", "clothed", "clothing", "duo", "lagomorph", "leporid", "mammal", "plushie", "rabbit", "romantic", "romantic_couple", "teal_eyes"], "selected_tags": ["<3", "anthro", "blue_eyes", "blush", "clothed", "clothing", "coat", "duo", "holding_object", "holding_plushie", "lagomorph", "leporid", "looking_at_viewer", "love", "male", "mammal", "plushie", "rabbit", "raincoat", "romantic", "romantic_couple", "teal_eyes", "topwear", "vest", "white_clothing", "white_coat", "white_topwear"], "stage3_selected": ["blue_eyes", "coat", "holding_plushie", "love", "plushie", "rabbit", "raincoat", "romantic_couple", "teal_eyes", "vest", "white_coat"], "stage3_selected_scores": {"blue_eyes": 0.4246, "rabbit": 0.5842, "coat": 0.6315, "plushie": 0.6566, "vest": 0.4922, "teal_eyes": 0.6344, "holding_plushie": 0.5459, "raincoat": 0.5029, "white_coat": 0.5129, "romantic_couple": 0.5616, "love": 0.4648}, "stage3_selected_ranks": {"blue_eyes": 86, "rabbit": 9, "coat": 5, "plushie": 3, "vest": 47, "teal_eyes": 4, "holding_plushie": 17, "raincoat": 32, "white_coat": 27, "romantic_couple": 13, "love": 56}, "stage3_selected_phrase_ranks": {"blue_eyes": 6, "rabbit": 1, "coat": 1, "plushie": 1, "vest": 9, "teal_eyes": 1, "holding_plushie": 2, "raincoat": 5, "white_coat": 4, "romantic_couple": 1, "love": 5}, "extra_evidence": {"<3": {"source": "probe"}, "coat": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6315}, "holding_object": {"source": "implied"}, "holding_plushie": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5459}, "looking_at_viewer": {"source": "structural"}, "love": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.4648}, "male": {"source": "structural"}, "raincoat": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5029}, "topwear": {"source": "implied"}, "vest": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4922}, "white_clothing": {"source": "implied"}, "white_coat": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5129}, "white_topwear": {"source": "implied"}}, "structural": ["duo", "anthro", "male", "clothed", "looking_at_viewer"], "probe": ["anthro", "blush", "duo", "<3"], "t1": 2.97, "t2": 3.17, "t3": 56.28, "t3s": 4.35, "t3p": 3.9, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=121 entity=3 copyright_filtered=0 generic_char_to_general=0 unknown_type=2", "Stage3 general_single_shot: attempt 1 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 1, \"why\": \"explicit\"}, {\"i\": 2, \"why\": \"weak_implied\"}, {\"i\": 5, \"why\": \"explicit\"}, {\"i\": 7, \"why\": \"weak_implied\"}, {\"i\": 8, \"why\": \"weak_implied\"}, {\"i\": 9, \"why\": \"weak_implied\"}, {\"i\": 10, \"why\": \"explicit\"}, {\"i\": 12, \"why\": \"weak_implied\"}, {\"i\": 14, \"why\": \"weak_implied\"}, {\"i\": 17, \"why\": \"explicit\"}, {\"i\": 20, \"why\": \"weak_implied\"}, {\"i\": 21, \"why\": \"weak_implied\"}, {\"i\": 25, \"why\": \"weak_implied\"}, {\"i\": 30, \"why\": \"weak_implied\"}, {\"i\": 33, \"why\": \"weak_implied\"}, {\"i\": 37, \"why\": \"weak_implied\"}, {\"i\": 38, \"why\": \"style_or_meta\"}, {\"i\": 40, \"why\": \"weak_implied\"}, {\"i\": 45, \"why\": \"weak_implied\"}, {\"i\": 46, \"why\": \"weak_implied\"}, {\"i\": 50, \"why\": \"weak_implied\"}, {\"i\": 53, \"why\": \"weak_implied\"}, {\"i\": 54, \"why\": \"weak_implied\"}, {\"i\": 55, \"why\": \"weak_implied\"}, {\"i\": 57, \"why\": \"weak_implied\"}, {\"i\": 60, \"why\": \"weak_implied\"}, {\"i\": 62, \"why\": \"weak_implied\"}, {\"i\": 65, \"why\": \"weak_implied\"}, {\"i\": 67, \"why\": \"weak_implied\"}, {\"i\": 69, \"why\": \"weak_implied\"}, {\"i\": 71, \"why\": \"weak_implied\"}, {\"i\": 73, \"why\": \"weak_implied\"}, {\"i\": 76, \"why\": \"weak_implied\"}, {\"i\": 77, \"why\": \"weak_implied\"}, {}]}. Got: 2 validation errors for Stage3SelectionResponse\nselections.34.i\n Field required [type=missing, input_value={}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nselections.34.why\n Field required [type=missing, input_value={}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE "]}
|
| 5 |
+
{"id": 1624724, "n_gt": 4, "n_retrieved": 66, "n_selected": 9, "n_implied": 0, "n_structural": 5, "n_probe": 3, "ret_R": 0.0, "P": 0.1111, "R": 0.25, "F1": 0.1538, "leaf_P": 0.1111, "leaf_R": 0.25, "leaf_F1": 0.1538, "n_leaf_sel": 9, "n_leaf_gt": 4, "ret_P": 0.0, "sel_given_ret": 0.0, "over_sel": 2.25, "why": {"explicit": 5}, "stage3_diag": {"mode": "single_shot", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 14, "attempts_by_n_local": {"62": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.1111, "gen_R": 0.25, "gen_F1": 0.1538, "missed": ["red_nose", "smile", "tan_body"], "extra": ["ambiguous_gender", "anthro", "feral", "looking_at_viewer", "nude", "sky_background", "toony", "wide_eyed"], "ground_truth_tags": ["red_nose", "smile", "solo", "tan_body"], "selected_tags": ["ambiguous_gender", "anthro", "feral", "looking_at_viewer", "nude", "sky_background", "solo", "toony", "wide_eyed"], "stage3_selected": ["simple_background", "sky_background", "toony", "white_background", "wide_eyed"], "stage3_selected_scores": {"simple_background": 0.5582, "white_background": 0.5301, "toony": 0.5337, "wide_eyed": 0.4535, "sky_background": 0.5476}, "stage3_selected_ranks": {"simple_background": 5, "white_background": 11, "toony": 9, "wide_eyed": 29, "sky_background": 6}, "stage3_selected_phrase_ranks": {"simple_background": 1, "white_background": 8, "toony": 2, "wide_eyed": 1, "sky_background": 4}, "extra_evidence": {"ambiguous_gender": {"source": "structural"}, "anthro": {"source": "probe"}, "feral": {"source": "structural"}, "looking_at_viewer": {"source": "structural"}, "nude": {"source": "structural"}, "sky_background": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5476}, "toony": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5337}, "wide_eyed": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4535}}, "structural": ["solo", "feral", "ambiguous_gender", "nude", "looking_at_viewer"], "probe": ["anthro", "simple_background", "solo"], "t1": 31.72, "t2": 0.61, "t3": 1.85, "t3s": 5.36, "t3p": 4.34, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "LLM rewrite: fallback (error: ReadTimeout: The read operation timed out)", "Stage3 split: general=62 entity=1 copyright_filtered=9 generic_char_to_general=0 unknown_type=1"]}
|
| 6 |
+
{"id": 1325009, "n_gt": 22, "n_retrieved": 192, "n_selected": 8, "n_implied": 2, "n_structural": 5, "n_probe": 3, "ret_R": 0.4091, "P": 0.875, "R": 0.3182, "F1": 0.4667, "leaf_P": 0.1667, "leaf_R": 0.0833, "leaf_F1": 0.1111, "n_leaf_sel": 6, "n_leaf_gt": 12, "ret_P": 0.0469, "sel_given_ret": 0.7778, "over_sel": 0.36, "why": {}, "stage3_diag": {"mode": "single_shot", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 0, "calls_exhausted_retries": 1, "attempts_total": 3, "attempt_errors": 3, "attempt_parse_fail": 0, "attempt_parse_ok": 0, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 0, "attempts_by_n_local": {"193": {"attempts": 3, "parse_ok": 0, "parse_fail": 0, "errors": 3}}, "attempt_failure_rate": 1.0, "call_exhaustion_rate": 1.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.875, "gen_R": 0.3182, "gen_F1": 0.4667, "missed": ["blue_eyes", "bottomwear", "chest_tuft", "countershading", "fur", "hand_on_head", "muscular", "muscular_anthro", "muscular_male", "pantherine", "shorts", "stripes", "tiger", "topless", "tuft"], "extra": ["looking_at_viewer"], "ground_truth_tags": ["anthro", "blue_eyes", "bottomwear", "chest_tuft", "clothed", "clothing", "countershading", "felid", "fur", "hand_on_head", "male", "mammal", "muscular", "muscular_anthro", "muscular_male", "pantherine", "shorts", "solo", "stripes", "tiger", "topless", "tuft"], "selected_tags": ["anthro", "clothed", "clothing", "felid", "looking_at_viewer", "male", "mammal", "solo"], "stage3_selected": [], "stage3_selected_scores": {}, "stage3_selected_ranks": {}, "stage3_selected_phrase_ranks": {}, "extra_evidence": {"looking_at_viewer": {"source": "structural"}}, "structural": ["solo", "anthro", "male", "clothed", "looking_at_viewer"], "probe": ["anthro", "felid", "solo"], "t1": 1.93, "t2": 1.89, "t3": 38.4, "t3s": 0.87, "t3p": 1.34, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=193 entity=1 copyright_filtered=2 generic_char_to_general=0 unknown_type=3", "Stage3 general_single_shot: attempt 1 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 1, \"why\": \"strong_implied\"}, {\"i\": 4, \"why\": \"explicit\"}, {\"i\": 11, \"why\": \"explicit\"}, {\"i\": 14, \"why\": \"explicit\"}, {\"i\": 16, \"why\": \"style_or_meta\"}, {\"i\": 21, \"why\": \"explicit\"}, {\"i\": 24, \"why\": \"weak_implied\"}, {\"i\": 27, \"why\": \"style_or_meta\"}, {\"i\": 28, \"why\": \"explicit\"}, {\"i\": 31, \"why\": \"style_or_meta\"}, {\"i\": 35, \"why\": \"other\"}, {\"i\": 41, \"why\": \"explicit\"}, {\"i\": 43, \"why\": \"other\"}, {\"i\": 46, \"why\": \"explicit\"}, {\"i\": 51, \"why\": \"style_or_meta\"}, {\"i\": 55, \"why\": \"style_or_meta\"}, {\"i\": 57, \"why\": \"other\"}, {\"i\": 61, \"why\": \"weak_implied\"}, {\"i\": 65, \"why\": \"style_or_meta\"}, {\"i\": 68, \"why\": \"other\"}, {\"i\": 71, \"why\": \"weak_implied\"}, {\"i\": 74, \"why\": \"style_or_meta\"}, {\"i\": 77, \"why\": \"weak_implied\"}, {\"i\": 81, \"why\": \"other\"}, {\"i\": 84, \"why\": \"weak_implied\"}, {\"i\": 87, \"why\": \"strong_implied\"}, {\"i\": 91, \"why\": \"explicit\"}, {\"i\": 94, \"why\": \"weak_implied\"}, {\"i\": 97, \"why\": \"style_or_meta\"}, {\"i\": 100, \"why\": \"weak_implied\"}, {\"i\": 103, \"why\": \"other\"}, {\"i\": 106, \"why\": \"weak_implied\"}, {\"i\": 109, \"why\": \"strong_implied\"}, {\"i\": 113, \"why\": \"other\"}, {\"i\": 116, \"why\": \"weak_implied\"}, {\"i\": 119}]}. Got: 1 validation error for Stage3SelectionResponse\nselections.35.why\n Field required [type=missing, input_value={'i': 119}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE ", "Stage3 general_single_shot: attempt 2 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 1, \"why\": \"explicit\"}, {\"i\": 4, \"why\": \"explicit\"}, {\"i\": 11, \"why\": \"explicit\"}, {\"i\": 14, \"why\": \"explicit\"}, {\"i\": 16, \"why\": \"explicit\"}, {\"i\": 21, \"why\": \"explicit\"}, {\"i\": 25, \"why\": \"explicit\"}, {\"i\": 27, \"why\": \"explicit\"}, {\"i\": 28, \"why\": \"explicit\"}, {\"i\": 31, \"why\": \"explicit\"}, {\"i\": 34, \"why\": \"explicit\"}, {\"i\": 37, \"why\": \"explicit\"}, {\"i\": 39, \"why\": \"explicit\"}, {\"i\": 41, \"why\": \"explicit\"}, {\"i\": 43, \"why\": \"explicit\"}, {\"i\": 45, \"why\": \"explicit\"}, {\"i\": 47, \"why\": \"explicit\"}, {\"i\": 51, \"why\": \"explicit\"}, {\"i\": 53, \"why\": \"explicit\"}, {\"i\": 55, \"why\": \"explicit\"}, {\"i\": 57, \"why\": \"explicit\"}, {\"i\": 59, \"why\": \"explicit\"}, {\"i\": 61, \"why\": \"explicit\"}, {\"i\": 63, \"why\": \"explicit\"}, {\"i\": 65, \"why\": \"explicit\"}, {\"i\": 67, \"why\": \"explicit\"}, {\"i\": 69, \"why\": \"explicit\"}, {\"i\": 71, \"why\": \"explicit\"}, {\"i\": 73, \"why\": \"explicit\"}, {\"i\": 75, \"why\": \"explicit\"}, {\"i\": 77, \"why\": \"explicit\"}, {\"i\": 79, \"why\": \"explicit\"}, {\"i\": 81, \"why\": \"explicit\"}, {\"i\": 83, \"why\": \"explicit\"}, {\"i\": 85, \"why\": \"explicit\"}, {\"i\": 87, \"why\": \"explicit\"}, {\"i\": 89, \"why\": \"explicit\"}, {\"i\": 91, \"why\": \"explicit\"}, {\"i\": 93}]}. Got: 1 validation error for Stage3SelectionResponse\nselections.38.why\n Field required [type=missing, input_value={'i': 93}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE ", "Stage3 general_single_shot: attempt 3 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 1, \"why\": \"explicit\"}, {\"i\": 4, \"why\": \"explicit\"}, {\"i\": 14, \"why\": \"explicit\"}, {\"i\": 21, \"why\": \"explicit\"}, {\"i\": 28, \"why\": \"explicit\"}, {\"i\": 31, \"why\": \"explicit\"}, {\"i\": 41, \"why\": \"explicit\"}, {\"i\": 68, \"why\": \"weak_implied\"}, {\"i\": 71, \"why\": \"explicit\"}, {\"i\": 76, \"why\": \"weak_implied\"}, {\"i\": 87, \"why\": \"strong_implied\"}, {\"i\": 91, \"why\": \"weak_implied\"}, {\"i\": 95, \"why\": \"weak_implied\"}, {\"i\": 97, \"why\": \"weak_implied\"}, {\"i\": 109, \"why\": \"strong_implied\"}, {\"i\": 113, \"why\": \"other\"}, {\"i\": 115, \"why\": \"weak_implied\"}, {\"i\": 119, \"why\": \"style_or_meta\"}, {\"i\": 128, \"why\": \"weak_implied\"}, {\"i\": 131, \"why\": \"weak_implied\"}, {\"i\": 137, \"why\": \"weak_implied\"}, {\"i\": 141, \"why\": \"weak_implied\"}, {\"i\": 145, \"why\": \"weak_implied\"}, {\"i\": 149, \"why\": \"weak_implied\"}, {\"i\": 153, \"why\": \"style_or_meta\"}, {\"i\": 157, \"why\": \"weak_implied\"}, {\"i\": 161, \"why\": \"weak_implied\"}, {\"i\": 165, \"why\": \"weak_implied\"}, {\"i\": 169, \"why\": \"weak_implied\"}, {\"i\": 173, \"why\": \"weak_implied\"}, {\"i\": 177, \"why\": \"weak_implied\"}, {\"i\": 181, \"why\": \"weak_implied\"}, {\"i\": 185, \"why\": \"weak_implied\"}, {\"i\": 189, \"why\": \"weak_implied\"}, {\"i\": 193}]}. Got: 1 validation error for Stage3SelectionResponse\nselections.34.why\n Field required [type=missing, input_value={'i': 193}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE ", "Stage3 general_single_shot: gave up after 3 attempts"]}
|
| 7 |
+
{"id": 1023509, "n_gt": 13, "n_retrieved": 177, "n_selected": 7, "n_implied": 0, "n_structural": 5, "n_probe": 4, "ret_R": 0.6923, "P": 0.1429, "R": 0.0769, "F1": 0.1, "leaf_P": 0.1667, "leaf_R": 0.1667, "leaf_F1": 0.1667, "n_leaf_sel": 6, "n_leaf_gt": 6, "ret_P": 0.0508, "sel_given_ret": 0.1111, "over_sel": 0.54, "why": {}, "stage3_diag": {"mode": "single_shot", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 0, "calls_exhausted_retries": 1, "attempts_total": 3, "attempt_errors": 3, "attempt_parse_fail": 0, "attempt_parse_ok": 0, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 0, "attempts_by_n_local": {"168": {"attempts": 3, "parse_ok": 0, "parse_fail": 0, "errors": 3}}, "attempt_failure_rate": 1.0, "call_exhaustion_rate": 1.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.1429, "gen_R": 0.0769, "gen_F1": 0.1, "missed": ["bovid", "caprine", "dialogue", "fur", "goat", "human", "lizard", "mammal", "reptile", "scalie", "white_body", "white_fur"], "extra": ["anthro", "clothed", "clothing", "group", "intersex", "taur"], "ground_truth_tags": ["bovid", "caprine", "dialogue", "fur", "goat", "human", "lizard", "mammal", "reptile", "scalie", "text", "white_body", "white_fur"], "selected_tags": ["anthro", "clothed", "clothing", "group", "intersex", "taur", "text"], "stage3_selected": [], "stage3_selected_scores": {}, "stage3_selected_ranks": {}, "stage3_selected_phrase_ranks": {}, "extra_evidence": {"anthro": {"source": "probe"}, "clothed": {"source": "structural"}, "clothing": {"source": "probe"}, "group": {"source": "structural"}, "intersex": {"source": "structural"}, "taur": {"source": "structural"}}, "structural": ["group", "taur", "intersex", "clothed", "text"], "probe": ["clothing", "simple_background", "anthro", "text"], "t1": 2.38, "t2": 1.56, "t3": 37.47, "t3s": 2.28, "t3p": 1.41, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=168 entity=3 copyright_filtered=6 generic_char_to_general=0 unknown_type=2", "Stage3 general_single_shot: attempt 1 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 1, \"why\": \"weak_implied\"}, {\"i\": 2, \"why\": \"weak_implied\"}, {\"i\": 3, \"why\": \"weak_implied\"}, {\"i\": 4, \"why\": \"weak_implied\"}, {\"i\": 5, \"why\": \"explicit\"}, {\"i\": 6, \"why\": \"weak_implied\"}, {\"i\": 7, \"why\": \"explicit\"}, {\"i\": 8, \"why\": \"weak_implied\"}, {\"i\": 9, \"why\": \"weak_implied\"}, {\"i\": 10, \"why\": \"weak_implied\"}, {\"i\": 11, \"why\": \"explicit\"}, {\"i\": 12, \"why\": \"explicit\"}, {\"i\": 13, \"why\": \"explicit\"}, {\"i\": 14, \"why\": \"weak_implied\"}, {\"i\": 15, \"why\": \"weak_implied\"}, {\"i\": 16, \"why\": \"weak_implied\"}, {\"i\": 17, \"why\": \"weak_implied\"}, {\"i\": 18, \"why\": \"weak_implied\"}, {\"i\": 19, \"why\": \"weak_implied\"}, {\"i\": 20, \"why\": \"weak_implied\"}, {\"i\": 21, \"why\": \"weak_implied\"}, {\"i\": 22, \"why\": \"weak_implied\"}, {\"i\": 23, \"why\": \"weak_implied\"}, {\"i\": 24, \"why\": \"weak_implied\"}, {\"i\": 25, \"why\": \"weak_implied\"}, {\"i\": 26, \"why\": \"weak_implied\"}, {\"i\": 27, \"why\": \"weak_implied\"}, {\"i\": 28, \"why\": \"weak_implied\"}, {\"i\": 29, \"why\": \"weak_implied\"}, {\"i\": 30, \"why\": \"explicit\"}, {\"i\": 31, \"why\": \"weak_implied\"}, {\"i\": 32, \"why\": \"weak_implied\"}, {\"i\": 33, \"why\": \"weak_implied\"}, {\"i\": 34, \"why\": \"weak_implied\"}, {\"i\": 35}]}. Got: 1 validation error for Stage3SelectionResponse\nselections.34.why\n Field required [type=missing, input_value={'i': 35}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE ", "Stage3 general_single_shot: attempt 2 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 1, \"why\": \"weak_implied\"}, {\"i\": 2, \"why\": \"weak_implied\"}, {\"i\": 3, \"why\": \"explicit\"}, {\"i\": 4, \"why\": \"weak_implied\"}, {\"i\": 5, \"why\": \"explicit\"}, {\"i\": 6, \"why\": \"weak_implied\"}, {\"i\": 7, \"why\": \"explicit\"}, {\"i\": 8, \"why\": \"weak_implied\"}, {\"i\": 9, \"why\": \"weak_implied\"}, {\"i\": 10, \"why\": \"weak_implied\"}, {\"i\": 11, \"why\": \"explicit\"}, {\"i\": 12, \"why\": \"explicit\"}, {\"i\": 13, \"why\": \"explicit\"}, {\"i\": 14, \"why\": \"weak_implied\"}, {\"i\": 15, \"why\": \"explicit\"}, {\"i\": 16, \"why\": \"weak_implied\"}, {\"i\": 17, \"why\": \"weak_implied\"}, {\"i\": 18, \"why\": \"weak_implied\"}, {\"i\": 19, \"why\": \"weak_implied\"}, {\"i\": 20, \"why\": \"weak_implied\"}, {\"i\": 21, \"why\": \"weak_implied\"}, {\"i\": 22, \"why\": \"weak_implied\"}, {\"i\": 23, \"why\": \"weak_implied\"}, {\"i\": 24, \"why\": \"weak_implied\"}, {\"i\": 25, \"why\": \"weak_implied\"}, {\"i\": 26, \"why\": \"weak_implied\"}, {\"i\": 27, \"why\": \"weak_implied\"}, {\"i\": 28, \"why\": \"weak_implied\"}, {\"i\": 29, \"why\": \"weak_implied\"}, {\"i\": 30, \"why\": \"explicit\"}, {\"i\": 31, \"why\": \"weak_implied\"}, {\"i\": 32, \"why\": \"weak_implied\"}, {\"i\": 33, \"why\": \"weak_implied\"}, {\"i\": 34, \"why\": \"weak_implied\"}, {\"i\": 35}]}. Got: 1 validation error for Stage3SelectionResponse\nselections.34.why\n Field required [type=missing, input_value={'i': 35}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE ", "Stage3 general_single_shot: attempt 3 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 1, \"why\": \"weak_implied\"}, {\"i\": 2, \"why\": \"weak_implied\"}, {\"i\": 3, \"why\": \"explicit\"}, {\"i\": 4, \"why\": \"weak_implied\"}, {\"i\": 5, \"why\": \"explicit\"}, {\"i\": 6, \"why\": \"weak_implied\"}, {\"i\": 7, \"why\": \"explicit\"}, {\"i\": 8, \"why\": \"explicit\"}, {\"i\": 9, \"why\": \"weak_implied\"}, {\"i\": 10, \"why\": \"weak_implied\"}, {\"i\": 11, \"why\": \"explicit\"}, {\"i\": 12, \"why\": \"explicit\"}, {\"i\": 13, \"why\": \"explicit\"}, {\"i\": 14, \"why\": \"explicit\"}, {\"i\": 15, \"why\": \"explicit\"}, {\"i\": 16, \"why\": \"weak_implied\"}, {\"i\": 17, \"why\": \"weak_implied\"}, {\"i\": 18, \"why\": \"explicit\"}, {\"i\": 19, \"why\": \"weak_implied\"}, {\"i\": 20, \"why\": \"weak_implied\"}, {\"i\": 21, \"why\": \"weak_implied\"}, {\"i\": 22, \"why\": \"weak_implied\"}, {\"i\": 23, \"why\": \"weak_implied\"}, {\"i\": 24, \"why\": \"explicit\"}, {\"i\": 25, \"why\": \"weak_implied\"}, {\"i\": 26, \"why\": \"weak_implied\"}, {\"i\": 27, \"why\": \"weak_implied\"}, {\"i\": 28, \"why\": \"weak_implied\"}, {\"i\": 29, \"why\": \"weak_implied\"}, {\"i\": 30, \"why\": \"explicit\"}, {\"i\": 31, \"why\": \"weak_implied\"}, {\"i\": 32, \"why\": \"weak_implied\"}, {\"i\": 33, \"why\": \"weak_implied\"}, {\"i\": 34, \"why\": \"weak_implied\"}, {\"i\": 35, \"why\": \"explicit\"}, {\"i\": 36}]}. Got: 1 validation error for Stage3SelectionResponse\nselections.35.why\n Field required [type=missing, input_value={'i': 36}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE ", "Stage3 general_single_shot: gave up after 3 attempts"]}
|
| 8 |
+
{"id": 335343, "n_gt": 15, "n_retrieved": 208, "n_selected": 18, "n_implied": 1, "n_structural": 3, "n_probe": 3, "ret_R": 0.6667, "P": 0.4444, "R": 0.5333, "F1": 0.4848, "leaf_P": 0.4, "leaf_R": 0.5, "leaf_F1": 0.4444, "n_leaf_sel": 15, "n_leaf_gt": 12, "ret_P": 0.0481, "sel_given_ret": 0.8, "over_sel": 1.2, "why": {"explicit": 14}, "stage3_diag": {"mode": "single_shot", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 2, "attempt_errors": 1, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 36, "attempts_by_n_local": {"211": {"attempts": 2, "parse_ok": 1, "parse_fail": 0, "errors": 1}}, "attempt_failure_rate": 0.5, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.4444, "gen_R": 0.5333, "gen_F1": 0.4848, "missed": ["angry", "bed", "eyes_closed", "furniture", "green_eyes", "lying", "sleeping"], "extra": ["annoyed", "annoyed_expression", "anthro", "bed_covers", "bedding", "bedroom", "blanket", "expressions", "eyes", "humanoid"], "ground_truth_tags": ["angry", "bed", "blonde_hair", "blue_eyes", "duo", "eyes_closed", "eyeshadow", "furniture", "green_eyes", "hair", "lying", "makeup", "purple_hair", "sleeping", "text"], "selected_tags": ["annoyed", "annoyed_expression", "anthro", "bed_covers", "bedding", "bedroom", "blanket", "blonde_hair", "blue_eyes", "duo", "expressions", "eyes", "eyeshadow", "hair", "humanoid", "makeup", "purple_hair", "text"], "stage3_selected": ["annoyed", "annoyed_expression", "bed_covers", "bedroom", "blanket", "blonde_hair", "blue_eyes", "expressions", "eyes", "eyeshadow", "hair", "makeup", "purple_hair", "text"], "stage3_selected_scores": {"hair": 0.6031, "text": 0.6007, "blue_eyes": 0.6014, "blonde_hair": 0.5986, "purple_hair": 0.5642, "makeup": 0.5965, "eyeshadow": 0.4763, "bedroom": 0.4901, "annoyed": 0.5727, "blanket": 0.4205, "annoyed_expression": 0.7251, "bed_covers": 0.4145, "expressions": 0.5439, "eyes": 0.8951}, "stage3_selected_ranks": {"hair": 5, "text": 8, "blue_eyes": 7, "blonde_hair": 10, "purple_hair": 14, "makeup": 11, "eyeshadow": 53, "bedroom": 43, "annoyed": 13, "blanket": 98, "annoyed_expression": 2, "bed_covers": 108, "expressions": 18, "eyes": 1}, "stage3_selected_phrase_ranks": {"hair": 1, "text": 1, "blue_eyes": 1, "blonde_hair": 1, "purple_hair": 1, "makeup": 1, "eyeshadow": 3, "bedroom": 1, "annoyed": 2, "blanket": 7, "annoyed_expression": 1, "bed_covers": 3, "expressions": 3, "eyes": 1}, "extra_evidence": {"annoyed": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5727}, "annoyed_expression": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7251}, "anthro": {"source": "probe"}, "bed_covers": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4145}, "bedding": {"source": "implied"}, "bedroom": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4901}, "blanket": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4205}, "expressions": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5439}, "eyes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8951}, "humanoid": {"source": "structural"}}, "structural": ["duo", "humanoid", "text"], "probe": ["simple_background", "anthro", "duo"], "t1": 2.3, "t2": 1.92, "t3": 28.93, "t3s": 0.87, "t3p": 7.59, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=211 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=4", "Stage3 general_single_shot: attempt 1 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 2, \"why\": \"explicit\"}, {\"i\": 3, \"why\": \"explicit\"}, {\"i\": 4, \"why\": \"explicit\"}, {\"i\": 6, \"why\": \"explicit\"}, {\"i\": 8, \"why\": \"explicit\"}, {\"i\": 10, \"why\": \"weak_implied\"}, {\"i\": 13, \"why\": \"explicit\"}, {\"i\": 16, \"why\": \"explicit\"}, {\"i\": 18, \"why\": \"style_or_meta\"}, {\"i\": 20, \"why\": \"style_or_meta\"}, {\"i\": 24, \"why\": \"explicit\"}, {\"i\": 26, \"why\": \"weak_implied\"}, {\"i\": 28, \"why\": \"explicit\"}, {\"i\": 30, \"why\": \"explicit\"}, {\"i\": 33, \"why\": \"explicit\"}, {\"i\": 34, \"why\": \"weak_implied\"}, {\"i\": 36, \"why\": \"weak_implied\"}, {\"i\": 39, \"why\": \"explicit\"}, {\"i\": 41, \"why\": \"style_or_meta\"}, {\"i\": 43, \"why\": \"style_or_meta\"}, {\"i\": 45, \"why\": \"style_or_meta\"}, {\"i\": 47, \"why\": \"explicit\"}, {\"i\": 49, \"why\": \"weak_implied\"}, {\"i\": 51, \"why\": \"style_or_meta\"}, {\"i\": 53, \"why\": \"weak_implied\"}, {\"i\": 55, \"why\": \"weak_implied\"}, {\"i\": 57, \"why\": \"weak_implied\"}, {\"i\": 59, \"why\": \"weak_implied\"}, {\"i\": 61, \"why\": \"weak_implied\"}, {\"i\": 63, \"why\": \"weak_implied\"}, {\"i\": 65, \"why\": \"weak_implied\"}, {\"i\": 67, \"why\": \"weak_implied\"}, {\"i\": 69, \"why\": \"style_or_meta\"}, {\"i\": 71, \"why\": \"style_or_meta\"}, {\"i\": 73, \"why\": \"style_or_meta\"}, {\"i\": 75}]}. Got: 1 validation error for Stage3SelectionResponse\nselections.35.why\n Field required [type=missing, input_value={'i': 75}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE "]}
|
| 9 |
+
{"id": 17482, "n_gt": 22, "n_retrieved": 106, "n_selected": 40, "n_implied": 17, "n_structural": 3, "n_probe": 3, "ret_R": 0.3636, "P": 0.375, "R": 0.6818, "F1": 0.4839, "leaf_P": 0.25, "leaf_R": 0.3077, "leaf_F1": 0.2759, "n_leaf_sel": 16, "n_leaf_gt": 13, "ret_P": 0.0755, "sel_given_ret": 1.875, "over_sel": 1.82, "why": {"explicit": 18, "strong_implied": 1}, "stage3_diag": {"mode": "single_shot", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 3, "attempt_errors": 2, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 28, "attempts_by_n_local": {"109": {"attempts": 3, "parse_ok": 1, "parse_fail": 0, "errors": 2}}, "attempt_failure_rate": 0.6666666666666666, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.375, "gen_R": 0.6818, "gen_F1": 0.4839, "missed": ["fingers", "fur", "holding_musical_instrument", "holding_object", "music", "spade_tail", "tail"], "extra": ["action_pose", "blonde_hair", "bottomwear", "canis", "crosslegged_pose", "denim", "denim_clothing", "electric_guitar", "finger_claws", "flowing_hair", "jeans", "pants", "pastel_background", "playing_guitar", "playing_music", "pose", "shirt", "topwear", "torn_body", "torn_bottomwear", "torn_jeans", "torn_pants", "torn_shirt", "torn_topwear", "wolf"], "ground_truth_tags": ["anthro", "bass_guitar", "canid", "canine", "claws", "clothed", "clothing", "fingers", "fur", "guitar", "hair", "holding_musical_instrument", "holding_object", "mammal", "music", "musical_instrument", "plucked_string_instrument", "solo", "spade_tail", "string_instrument", "tail", "torn_clothing"], "selected_tags": ["action_pose", "anthro", "bass_guitar", "blonde_hair", "bottomwear", "canid", "canine", "canis", "claws", "clothed", "clothing", "crosslegged_pose", "denim", "denim_clothing", "electric_guitar", "finger_claws", "flowing_hair", "guitar", "hair", "jeans", "mammal", "musical_instrument", "pants", "pastel_background", "playing_guitar", "playing_music", "plucked_string_instrument", "pose", "shirt", "solo", "string_instrument", "topwear", "torn_body", "torn_bottomwear", "torn_clothing", "torn_jeans", "torn_pants", "torn_shirt", "torn_topwear", "wolf"], "stage3_selected": ["action_pose", "bass_guitar", "blonde_hair", "claws", "crosslegged_pose", "electric_guitar", "finger_claws", "flowing_hair", "guitar", "pastel_background", "playing_guitar", "playing_music", "torn_body", "torn_bottomwear", "torn_clothing", "torn_jeans", "torn_pants", "torn_shirt", "wolf"], "stage3_selected_scores": {"claws": 0.5504, "wolf": 0.5691, "blonde_hair": 0.3645, "torn_clothing": 0.3951, "finger_claws": 0.422, "torn_bottomwear": 0.4254, "guitar": 0.9788, "torn_pants": 0.4559, "playing_music": 0.8891, "torn_shirt": 0.3906, "playing_guitar": 0.9494, "torn_jeans": 0.4784, "electric_guitar": 0.8829, "bass_guitar": 0.9286, "flowing_hair": 0.5466, "crosslegged_pose": 0.445, "torn_body": 0.388, "pastel_background": 0.5453, "action_pose": 0.5685}, "stage3_selected_ranks": {"claws": 17, "wolf": 12, "blonde_hair": 101, "torn_clothing": 83, "finger_claws": 65, "torn_bottomwear": 64, "guitar": 1, "torn_pants": 43, "playing_music": 4, "torn_shirt": 85, "playing_guitar": 2, "torn_jeans": 30, "electric_guitar": 5, "bass_guitar": 3, "flowing_hair": 19, "crosslegged_pose": 49, "torn_body": 87, "pastel_background": 20, "action_pose": 13}, "stage3_selected_phrase_ranks": {"claws": 1, "wolf": 1, "blonde_hair": 6, "torn_clothing": 6, "finger_claws": 5, "torn_bottomwear": 3, "guitar": 1, "torn_pants": 2, "playing_music": 3, "torn_shirt": 7, "playing_guitar": 1, "torn_jeans": 1, "electric_guitar": 4, "bass_guitar": 2, "flowing_hair": 1, "crosslegged_pose": 6, "torn_body": 8, "pastel_background": 1, "action_pose": 1}, "extra_evidence": {"action_pose": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.5685}, "blonde_hair": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3645}, "bottomwear": {"source": "implied"}, "canis": {"source": "implied"}, "crosslegged_pose": {"source": "stage3", "why": "explicit", "retrieval_score": 0.445}, "denim": {"source": "implied"}, "denim_clothing": {"source": "implied"}, "electric_guitar": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8829}, "finger_claws": {"source": "stage3", "why": "explicit", "retrieval_score": 0.422}, "flowing_hair": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5466}, "jeans": {"source": "implied"}, "pants": {"source": "implied"}, "pastel_background": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5453}, "playing_guitar": {"source": "stage3", "why": "explicit", "retrieval_score": 0.9494}, "playing_music": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8891}, "pose": {"source": "implied"}, "shirt": {"source": "implied"}, "topwear": {"source": "implied"}, "torn_body": {"source": "stage3", "why": "explicit", "retrieval_score": 0.388}, "torn_bottomwear": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4254}, "torn_jeans": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4784}, "torn_pants": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4559}, "torn_shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3906}, "torn_topwear": {"source": "implied"}, "wolf": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5691}}, "structural": ["solo", "anthro", "clothed"], "probe": ["anthro", "canid", "solo"], "t1": 2.21, "t2": 1.05, "t3": 33.71, "t3s": 1.77, "t3p": 4.53, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=109 entity=2 copyright_filtered=0 generic_char_to_general=0 unknown_type=2", "Stage3 general_single_shot: attempt 1 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 3, \"why\": \"explicit\"}, {\"i\": 6, \"why\": \"explicit\"}, {\"i\": 10, \"why\": \"explicit\"}, {\"i\": 11, \"why\": \"explicit\"}, {\"i\": 13, \"why\": \"explicit\"}, {\"i\": 14, \"why\": \"explicit\"}, {\"i\": 16, \"why\": \"weak_implied\"}, {\"i\": 21, \"why\": \"explicit\"}, {\"i\": 24, \"why\": \"weak_implied\"}, {\"i\": 26, \"why\": \"explicit\"}, {\"i\": 31, \"why\": \"weak_implied\"}, {\"i\": 34, \"why\": \"explicit\"}, {\"i\": 35, \"why\": \"explicit\"}, {\"i\": 36, \"why\": \"weak_implied\"}, {\"i\": 38, \"why\": \"explicit\"}, {\"i\": 40, \"why\": \"other\"}, {\"i\": 43, \"why\": \"weak_implied\"}, {\"i\": 46, \"why\": \"explicit\"}, {\"i\": 48, \"why\": \"weak_implied\"}, {\"i\": 51, \"why\": \"other\"}, {\"i\": 54, \"why\": \"weak_implied\"}, {\"i\": 58, \"why\": \"explicit\"}, {\"i\": 60, \"why\": \"explicit\"}, {\"i\": 64, \"why\": \"weak_implied\"}, {\"i\": 67, \"why\": \"weak_implied\"}, {\"i\": 69, \"why\": \"style_or_meta\"}, {\"i\": 71, \"why\": \"explicit\"}, {\"i\": 73, \"why\": \"other\"}, {\"i\": 76, \"why\": \"weak_implied\"}, {\"i\": 80, \"why\": \"weak_implied\"}, {\"i\": 82, \"why\": \"explicit\"}, {\"i\": 85, \"why\": \"weak_implied\"}, {\"i\": 87, \"why\": \"other\"}, {\"i\": 90, \"why\": \"style_or_meta\"}, {\"i\": 92, \"why\": \"explicit\"}, {\"i\": 94, \"why\": \"weak_implied\"}, {\"i\": 96}]}. Got: 1 validation error for Stage3SelectionResponse\nselections.36.why\n Field required [type=missing, input_value={'i': 96}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE ", "Stage3 general_single_shot: attempt 2 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 3, \"why\": \"explicit\"}, {\"i\": 6, \"why\": \"explicit\"}, {\"i\": 7, \"why\": \"explicit\"}, {\"i\": 10, \"why\": \"explicit\"}, {\"i\": 11, \"why\": \"explicit\"}, {\"i\": 13, \"why\": \"explicit\"}, {\"i\": 14, \"why\": \"explicit\"}, {\"i\": 16, \"why\": \"weak_implied\"}, {\"i\": 21, \"why\": \"explicit\"}, {\"i\": 24, \"why\": \"weak_implied\"}, {\"i\": 26, \"why\": \"explicit\"}, {\"i\": 27, \"why\": \"explicit\"}, {\"i\": 31, \"why\": \"weak_implied\"}, {\"i\": 34, \"why\": \"explicit\"}, {\"i\": 36, \"why\": \"weak_implied\"}, {\"i\": 38, \"why\": \"explicit\"}, {\"i\": 40, \"why\": \"other\"}, {\"i\": 43, \"why\": \"weak_implied\"}, {\"i\": 46, \"why\": \"explicit\"}, {\"i\": 48, \"why\": \"weak_implied\"}, {\"i\": 51, \"why\": \"other\"}, {\"i\": 54, \"why\": \"weak_implied\"}, {\"i\": 58, \"why\": \"explicit\"}, {\"i\": 60, \"why\": \"explicit\"}, {\"i\": 62, \"why\": \"weak_implied\"}, {\"i\": 65, \"why\": \"weak_implied\"}, {\"i\": 68, \"why\": \"explicit\"}, {\"i\": 70, \"why\": \"weak_implied\"}, {\"i\": 72, \"why\": \"explicit\"}, {\"i\": 74, \"why\": \"weak_implied\"}, {\"i\": 76, \"why\": \"weak_implied\"}, {\"i\": 80, \"why\": \"weak_implied\"}, {\"i\": 82, \"why\": \"explicit\"}, {\"i\": 84, \"why\": \"weak_implied\"}, {\"i\": 86, \"why\": \"weak_implied\"}, {\"i\": 88, \"why\": \"other\"}, {\"i\": 90}]}. Got: 1 validation error for Stage3SelectionResponse\nselections.36.why\n Field required [type=missing, input_value={'i': 90}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE "]}
|
| 10 |
+
{"id": 2021552, "n_gt": 25, "n_retrieved": 161, "n_selected": 5, "n_implied": 1, "n_structural": 4, "n_probe": 3, "ret_R": 0.64, "P": 0.8, "R": 0.16, "F1": 0.2667, "leaf_P": 0.75, "leaf_R": 0.2, "leaf_F1": 0.3158, "n_leaf_sel": 4, "n_leaf_gt": 15, "ret_P": 0.0994, "sel_given_ret": 0.25, "over_sel": 0.2, "why": {}, "stage3_diag": {"mode": "single_shot", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 0, "calls_exhausted_retries": 1, "attempts_total": 3, "attempt_errors": 2, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 0, "attempts_by_n_local": {"161": {"attempts": 3, "parse_ok": 1, "parse_fail": 0, "errors": 2}}, "attempt_failure_rate": 0.6666666666666666, "call_exhaustion_rate": 1.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.8, "gen_R": 0.16, "gen_F1": 0.2667, "missed": ["bottomwear", "canid", "canine", "claws", "crossed_arms", "facial_markings", "fox", "fur", "grey_background", "head_markings", "lagomorph", "leporid", "looking_at_another", "mammal", "markings", "overalls", "pants", "rabbit", "shirt", "standing", "topwear"], "extra": ["looking_at_viewer"], "ground_truth_tags": ["anthro", "bottomwear", "canid", "canine", "claws", "clothed", "clothing", "crossed_arms", "duo", "facial_markings", "fox", "fur", "grey_background", "head_markings", "lagomorph", "leporid", "looking_at_another", "mammal", "markings", "overalls", "pants", "rabbit", "shirt", "standing", "topwear"], "selected_tags": ["anthro", "clothed", "clothing", "duo", "looking_at_viewer"], "stage3_selected": [], "stage3_selected_scores": {}, "stage3_selected_ranks": {}, "stage3_selected_phrase_ranks": {}, "extra_evidence": {"looking_at_viewer": {"source": "structural"}}, "structural": ["duo", "anthro", "clothed", "looking_at_viewer"], "probe": ["simple_background", "anthro", "duo"], "t1": 2.22, "t2": 1.6, "t3": 26.78, "t3s": 0.46, "t3p": 8.01, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=161 entity=5 copyright_filtered=0 generic_char_to_general=0 unknown_type=3", "Stage3 general_single_shot: attempt 2 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 1, \"why\": \"weak_implied\"}, {\"i\": 3, \"why\": \"explicit\"}, {\"i\": 4, \"why\": \"explicit\"}, {\"i\": 5, \"why\": \"weak_implied\"}, {\"i\": 7, \"why\": \"weak_implied\"}, {\"i\": 8, \"why\": \"explicit\"}, {\"i\": 9, \"why\": \"weak_implied\"}, {\"i\": 10, \"why\": \"explicit\"}, {\"i\": 11, \"why\": \"explicit\"}, {\"i\": 12, \"why\": \"weak_implied\"}, {\"i\": 13, \"why\": \"weak_implied\"}, {\"i\": 14, \"why\": \"explicit\"}, {\"i\": 16, \"why\": \"explicit\"}, {\"i\": 17, \"why\": \"other\"}, {\"i\": 18, \"why\": \"explicit\"}, {\"i\": 19, \"why\": \"explicit\"}, {\"i\": 20, \"why\": \"explicit\"}, {\"i\": 21, \"why\": \"other\"}, {\"i\": 23, \"why\": \"other\"}, {\"i\": 25, \"why\": \"weak_implied\"}, {\"i\": 27, \"why\": \"weak_implied\"}, {\"i\": 29, \"why\": \"weak_implied\"}, {\"i\": 31, \"why\": \"weak_implied\"}, {\"i\": 33, \"why\": \"other\"}, {\"i\": 35, \"why\": \"other\"}, {\"i\": 37, \"why\": \"other\"}, {\"i\": 39, \"why\": \"other\"}, {\"i\": 41, \"why\": \"other\"}, {\"i\": 43, \"why\": \"explicit\"}, {\"i\": 45, \"why\": \"weak_implied\"}, {\"i\": 47, \"why\": \"weak_implied\"}, {\"i\": 49, \"why\": \"weak_implied\"}, {\"i\": 51, \"why\": \"other\"}, {\"i\": 53, \"why\": \"other\"}, {\"i\": 55, \"why\": \"other\"}, {\"i\": 57, \"why\": \"other\"}, {\"i\": 59}]}. Got: 1 validation error for Stage3SelectionResponse\nselections.36.why\n Field required [type=missing, input_value={'i': 59}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE ", "Stage3 general_single_shot: attempt 3 error: Failed to parse Stage3SelectionResponse from completion {\"selections\": [{\"i\": 1, \"why\": \"explicit\"}, {\"i\": 3, \"why\": \"explicit\"}, {\"i\": 5, \"why\": \"explicit\"}, {\"i\": 7, \"why\": \"explicit\"}, {\"i\": 9, \"why\": \"explicit\"}, {\"i\": 10, \"why\": \"explicit\"}, {\"i\": 12, \"why\": \"weak_implied\"}, {\"i\": 14, \"why\": \"explicit\"}, {\"i\": 16, \"why\": \"explicit\"}, {\"i\": 18, \"why\": \"explicit\"}, {\"i\": 20, \"why\": \"explicit\"}, {\"i\": 22, \"why\": \"other\"}, {\"i\": 24, \"why\": \"explicit\"}, {\"i\": 26, \"why\": \"other\"}, {\"i\": 28, \"why\": \"other\"}, {\"i\": 30, \"why\": \"other\"}, {\"i\": 32, \"why\": \"weak_implied\"}, {\"i\": 34, \"why\": \"weak_implied\"}, {\"i\": 36, \"why\": \"explicit\"}, {\"i\": 38, \"why\": \"explicit\"}, {\"i\": 40, \"why\": \"explicit\"}, {\"i\": 42, \"why\": \"weak_implied\"}, {\"i\": 44, \"why\": \"explicit\"}, {\"i\": 46, \"why\": \"weak_implied\"}, {\"i\": 48, \"why\": \"other\"}, {\"i\": 50, \"why\": \"other\"}, {\"i\": 52, \"why\": \"weak_implied\"}, {\"i\": 54, \"why\": \"weak_implied\"}, {\"i\": 56, \"why\": \"explicit\"}, {\"i\": 58, \"why\": \"other\"}, {\"i\": 60, \"why\": \"weak_implied\"}, {\"i\": 62, \"why\": \"other\"}, {\"i\": 64, \"why\": \"weak_implied\"}, {\"i\": 66, \"why\": \"other\"}, {\"i\": 68, \"why\": \"other\"}, {\"i\": 70, \"why\": \"other\"}, {\"i\": 72, \"why\": \"weak_implied\"}, {}]}. Got: 2 validation errors for Stage3SelectionResponse\nselections.37.i\n Field required [type=missing, input_value={}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nselections.37.why\n Field required [type=missing, input_value={}, input_type=dict]\n For further information visit https://errors.pydantic.dev/2.12/v/missing\nFor troubleshooting, visit: https://docs.langchain.com/oss/python/langchain/errors/OUTPUT_PARSING_FAILURE ", "Stage3 general_single_shot: gave up after 3 attempts"]}
|
| 11 |
+
{"id": 2034167, "n_gt": 11, "n_retrieved": 204, "n_selected": 87, "n_implied": 31, "n_structural": 4, "n_probe": 3, "ret_R": 0.6364, "P": 0.1034, "R": 0.8182, "F1": 0.1837, "leaf_P": 0.0577, "leaf_R": 0.4286, "leaf_F1": 0.1017, "n_leaf_sel": 52, "n_leaf_gt": 7, "ret_P": 0.0343, "sel_given_ret": 1.2857, "over_sel": 7.91, "why": {"explicit": 52}, "stage3_diag": {"mode": "single_shot", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 54, "attempts_by_n_local": {"205": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.1034, "gen_R": 0.8182, "gen_F1": 0.1837, "missed": ["blue_eyes", "blue_nose"], "extra": ["2_tails", "abs", "action_pose", "amber_eyes", "animal_humanoid", "anthro", "blue_background", "blue_fingers", "blue_neck", "blue_pawpads", "blue_paws", "blue_toes", "body_hair", "bovid", "bovid_humanoid", "canid_humanoid", "canine_humanoid", "caprine", "caprine_humanoid", "clothed", "clothing", "female_humanoid", "fighting_pose", "goat_humanoid", "grey_nose", "half-closed_eyes", "half_body", "heterochromia", "holding_tail", "humanoid", "inner_ear_fluff", "jagged_mouth", "male", "male_humanoid", "mammal_humanoid", "melee_weapon", "mouth_full", "multi_tail", "muscular", "muscular_male", "narrow_tail", "narrowed_eyes", "nervous_expression", "no_irises", "open_smile", "paw_pose", "pawpads", "pig_humanoid", "pink_ears", "pink_legs", "pink_mouth", "pink_stripes", "pink_tongue", "polearm", "pose", "pupils", "purple_background", "purple_face", "rider", "slim_humanoid", "slit_pupils", "smile", "striped_neck", "stripes", "suid", "suid_humanoid", "suina", "suina_humanoid", "tail", "teeth", "tongue", "trident", "tuft", "two_tone_tail", "wave", "weapon", "white_inner_ear_fluff", "white_tail"], "ground_truth_tags": ["blue_eyes", "blue_nose", "canid", "canine", "fur", "mammal", "open_mouth", "purple_body", "solo", "white_body", "white_fur"], "selected_tags": ["2_tails", "abs", "action_pose", "amber_eyes", "animal_humanoid", "anthro", "blue_background", "blue_fingers", "blue_neck", "blue_pawpads", "blue_paws", "blue_toes", "body_hair", "bovid", "bovid_humanoid", "canid", "canid_humanoid", "canine", "canine_humanoid", "caprine", "caprine_humanoid", "clothed", "clothing", "female_humanoid", "fighting_pose", "fur", "goat_humanoid", "grey_nose", "half-closed_eyes", "half_body", "heterochromia", "holding_tail", "humanoid", "inner_ear_fluff", "jagged_mouth", "male", "male_humanoid", "mammal", "mammal_humanoid", "melee_weapon", "mouth_full", "multi_tail", "muscular", "muscular_male", "narrow_tail", "narrowed_eyes", "nervous_expression", "no_irises", "open_mouth", "open_smile", "paw_pose", "pawpads", "pig_humanoid", "pink_ears", "pink_legs", "pink_mouth", "pink_stripes", "pink_tongue", "polearm", "pose", "pupils", "purple_background", "purple_body", "purple_face", "rider", "slim_humanoid", "slit_pupils", "smile", "solo", "striped_neck", "stripes", "suid", "suid_humanoid", "suina", "suina_humanoid", "tail", "teeth", "tongue", "trident", "tuft", "two_tone_tail", "wave", "weapon", "white_body", "white_fur", "white_inner_ear_fluff", "white_tail"], "stage3_selected": ["2_tails", "abs", "action_pose", "amber_eyes", "animal_humanoid", "blue_background", "blue_fingers", "blue_neck", "blue_pawpads", "blue_paws", "blue_toes", "body_hair", "canine_humanoid", "female_humanoid", "fighting_pose", "goat_humanoid", "grey_nose", "half-closed_eyes", "half_body", "heterochromia", "holding_tail", "jagged_mouth", "male_humanoid", "mouth_full", "muscular_male", "narrow_tail", "nervous_expression", "no_irises", "open_smile", "paw_pose", "pig_humanoid", "pink_ears", "pink_legs", "pink_mouth", "pink_stripes", "pink_tongue", "purple_background", "purple_body", "purple_face", "rider", "simple_background", "slim_humanoid", "slit_pupils", "striped_neck", "teeth", "trident", "two_tone_tail", "wave", "white_body", "white_fur", "white_inner_ear_fluff", "white_tail"], "stage3_selected_scores": {"simple_background": 0.5948, "teeth": 0.3603, "white_body": 0.4875, "white_fur": 0.5995, "muscular_male": 0.3102, "abs": 0.3223, "half-closed_eyes": 0.3629, "animal_humanoid": 0.6159, "purple_body": 0.564, "open_smile": 0.4868, "body_hair": 0.305, "slit_pupils": 0.396, "pink_tongue": 0.4215, "blue_background": 0.48, "canine_humanoid": 0.9003, "white_tail": 0.5202, "heterochromia": 0.4423, "two_tone_tail": 0.5197, "amber_eyes": 0.4076, "purple_background": 0.5414, "blue_pawpads": 0.4891, "2_tails": 0.4672, "white_inner_ear_fluff": 0.597, "action_pose": 0.617, "grey_nose": 0.4662, "pink_ears": 0.5255, "holding_tail": 0.5079, "fighting_pose": 0.4593, "wave": 0.3632, "narrow_tail": 0.5074, "paw_pose": 0.5582, "trident": 0.2683, "goat_humanoid": 0.5534, "blue_paws": 0.4986, "no_irises": 0.4008, "pink_stripes": 0.682, "female_humanoid": 0.563, "purple_face": 0.5577, "jagged_mouth": 0.5168, "male_humanoid": 0.5627, "pink_mouth": 0.5127, "striped_neck": 0.5948, "blue_fingers": 0.5077, "blue_toes": 0.5148, "mouth_full": 0.458, "slim_humanoid": 0.588, "rider": 0.2712, "half_body": 0.4115, "pig_humanoid": 0.5894, "blue_neck": 0.5222, "nervous_expression": 0.4772, "pink_legs": 0.5285}, "stage3_selected_ranks": {"simple_background": 36, "teeth": 190, "white_body": 114, "white_fur": 30, "muscular_male": 202, "abs": 199, "half-closed_eyes": 187, "animal_humanoid": 20, "purple_body": 49, "open_smile": 116, "body_hair": 203, "slit_pupils": 178, "pink_tongue": 166, "blue_background": 122, "canine_humanoid": 1, "white_tail": 82, "heterochromia": 158, "two_tone_tail": 83, "amber_eyes": 174, "purple_background": 64, "blue_pawpads": 109, "2_tails": 136, "white_inner_ear_fluff": 32, "action_pose": 19, "grey_nose": 137, "pink_ears": 75, "holding_tail": 92, "fighting_pose": 140, "wave": 186, "narrow_tail": 94, "paw_pose": 56, "trident": 209, "goat_humanoid": 58, "blue_paws": 102, "no_irises": 176, "pink_stripes": 13, "female_humanoid": 50, "purple_face": 57, "jagged_mouth": 86, "male_humanoid": 52, "pink_mouth": 90, "striped_neck": 35, "blue_fingers": 93, "blue_toes": 87, "mouth_full": 143, "slim_humanoid": 38, "rider": 207, "half_body": 172, "pig_humanoid": 37, "blue_neck": 80, "nervous_expression": 127, "pink_legs": 73}, "stage3_selected_phrase_ranks": {"simple_background": 1, "teeth": 7, "white_body": 8, "white_fur": 1, "muscular_male": 8, "abs": 7, "half-closed_eyes": 6, "animal_humanoid": 2, "purple_body": 2, "open_smile": 2, "body_hair": 9, "slit_pupils": 10, "pink_tongue": 3, "blue_background": 7, "canine_humanoid": 1, "white_tail": 7, "heterochromia": 6, "two_tone_tail": 8, "amber_eyes": 9, "purple_background": 5, "blue_pawpads": 10, "2_tails": 10, "white_inner_ear_fluff": 2, "action_pose": 1, "grey_nose": 7, "pink_ears": 4, "holding_tail": 8, "fighting_pose": 5, "wave": 6, "narrow_tail": 9, "paw_pose": 2, "trident": 8, "goat_humanoid": 8, "blue_paws": 9, "no_irises": 8, "pink_stripes": 1, "female_humanoid": 5, "purple_face": 3, "jagged_mouth": 5, "male_humanoid": 7, "pink_mouth": 6, "striped_neck": 4, "blue_fingers": 5, "blue_toes": 4, "mouth_full": 9, "slim_humanoid": 4, "rider": 6, "half_body": 4, "pig_humanoid": 3, "blue_neck": 3, "nervous_expression": 5, "pink_legs": 3}, "extra_evidence": {"2_tails": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4672}, "abs": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3223}, "action_pose": {"source": "stage3", "why": "explicit", "retrieval_score": 0.617}, "amber_eyes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4076}, "animal_humanoid": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6159}, "anthro": {"source": "structural"}, "blue_background": {"source": "stage3", "why": "explicit", "retrieval_score": 0.48}, "blue_fingers": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5077}, "blue_neck": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5222}, "blue_pawpads": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4891}, "blue_paws": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4986}, "blue_toes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5148}, "body_hair": {"source": "stage3", "why": "explicit", "retrieval_score": 0.305}, "bovid": {"source": "implied"}, "bovid_humanoid": {"source": "implied"}, "canid_humanoid": {"source": "implied"}, "canine_humanoid": {"source": "stage3", "why": "explicit", "retrieval_score": 0.9003}, "caprine": {"source": "implied"}, "caprine_humanoid": {"source": "implied"}, "clothed": {"source": "structural"}, "clothing": {"source": "implied"}, "female_humanoid": {"source": "stage3", "why": "explicit", "retrieval_score": 0.563}, "fighting_pose": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4593}, "goat_humanoid": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5534}, "grey_nose": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4662}, "half-closed_eyes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3629}, "half_body": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4115}, "heterochromia": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4423}, "holding_tail": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5079}, "humanoid": {"source": "implied"}, "inner_ear_fluff": {"source": "implied"}, "jagged_mouth": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5168}, "male": {"source": "structural"}, "male_humanoid": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5627}, "mammal_humanoid": {"source": "implied"}, "melee_weapon": {"source": "implied"}, "mouth_full": {"source": "stage3", "why": "explicit", "retrieval_score": 0.458}, "multi_tail": {"source": "implied"}, "muscular": {"source": "implied"}, "muscular_male": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3102}, "narrow_tail": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5074}, "narrowed_eyes": {"source": "implied"}, "nervous_expression": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4772}, "no_irises": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4008}, "open_smile": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4868}, "paw_pose": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5582}, "pawpads": {"source": "implied"}, "pig_humanoid": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5894}, "pink_ears": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5255}, "pink_legs": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5285}, "pink_mouth": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5127}, "pink_stripes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.682}, "pink_tongue": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4215}, "polearm": {"source": "implied"}, "pose": {"source": "implied"}, "pupils": {"source": "implied"}, "purple_background": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5414}, "purple_face": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5577}, "rider": {"source": "stage3", "why": "explicit", "retrieval_score": 0.2712}, "slim_humanoid": {"source": "stage3", "why": "explicit", "retrieval_score": 0.588}, "slit_pupils": {"source": "stage3", "why": "explicit", "retrieval_score": 0.396}, "smile": {"source": "implied"}, "striped_neck": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5948}, "stripes": {"source": "implied"}, "suid": {"source": "implied"}, "suid_humanoid": {"source": "implied"}, "suina": {"source": "implied"}, "suina_humanoid": {"source": "implied"}, "tail": {"source": "implied"}, "teeth": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3603}, "tongue": {"source": "implied"}, "trident": {"source": "stage3", "why": "explicit", "retrieval_score": 0.2683}, "tuft": {"source": "implied"}, "two_tone_tail": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5197}, "wave": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3632}, "weapon": {"source": "implied"}, "white_inner_ear_fluff": {"source": "stage3", "why": "explicit", "retrieval_score": 0.597}, "white_tail": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5202}}, "structural": ["solo", "anthro", "male", "clothed"], "probe": ["anthro", "canid", "solo"], "t1": 1.23, "t2": 2.07, "t3": 0.74, "t3s": 2.63, "t3p": 2.06, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=205 entity=4 copyright_filtered=2 generic_char_to_general=0 unknown_type=5"]}
|
data/eval_results/smoke_no_why_explicit_only_n1.jsonl
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"_meta": true, "timestamp": "2026-03-03T04:05:15.045798", "n_samples": 1, "caption_field": "caption_cogvlm", "skip_rewrite": false, "allow_nsfw": false, "mode": "chunked_map_union", "chunk_size": 60, "eval_path": "data\\eval_samples\\e621_sfw_sample_1000_seed123_buffer10000_caption_evident.jsonl", "per_phrase_k": 2, "per_phrase_final_k": 1, "temperature": 0.0, "shuffle": true, "seed": 42, "workers": 1, "min_why": "strong_implied", "expand_implications": true, "infer_structural": true, "infer_probe": true, "n_errors": 0, "n_issue_samples": 1, "n_issues_total": 2}
|
| 2 |
+
{"id": 17482, "n_gt": 22, "n_retrieved": 16, "n_selected": 32, "n_implied": 16, "n_structural": 3, "n_probe": 4, "ret_R": 0.2727, "P": 0.5, "R": 0.7273, "F1": 0.5926, "leaf_P": 0.4167, "leaf_R": 0.3846, "leaf_F1": 0.4, "n_leaf_sel": 12, "n_leaf_gt": 13, "ret_P": 0.375, "sel_given_ret": 2.6667, "over_sel": 1.45, "why": {"explicit": 11}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 11, "attempts_by_n_local": {"18": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.5, "gen_R": 0.7273, "gen_F1": 0.5926, "missed": ["bass_guitar", "fingers", "fur", "holding_musical_instrument", "holding_object", "music"], "extra": ["bottomwear", "canis", "denim", "denim_clothing", "flowing_hair", "jeans", "pants", "pastel_background", "playing_guitar", "playing_music", "pose", "string", "torn_bottomwear", "torn_jeans", "torn_pants", "wolf"], "ground_truth_tags": ["anthro", "bass_guitar", "canid", "canine", "claws", "clothed", "clothing", "fingers", "fur", "guitar", "hair", "holding_musical_instrument", "holding_object", "mammal", "music", "musical_instrument", "plucked_string_instrument", "solo", "spade_tail", "string_instrument", "tail", "torn_clothing"], "selected_tags": ["anthro", "bottomwear", "canid", "canine", "canis", "claws", "clothed", "clothing", "denim", "denim_clothing", "flowing_hair", "guitar", "hair", "jeans", "mammal", "musical_instrument", "pants", "pastel_background", "playing_guitar", "playing_music", "plucked_string_instrument", "pose", "solo", "spade_tail", "string", "string_instrument", "tail", "torn_bottomwear", "torn_clothing", "torn_jeans", "torn_pants", "wolf"], "stage3_selected": ["claws", "flowing_hair", "guitar", "pastel_background", "playing_guitar", "pose", "spade_tail", "string", "tail", "torn_jeans", "wolf"], "stage3_selected_scores": {"tail": 0.5423, "claws": 0.5488, "wolf": 0.564, "pose": 0.5518, "spade_tail": 0.5579, "guitar": 0.9729, "playing_guitar": 0.9849, "torn_jeans": 0.4765, "string": 0.5804, "flowing_hair": 0.5336, "pastel_background": 0.542}, "stage3_selected_ranks": {"tail": 10, "claws": 9, "wolf": 4, "pose": 7, "spade_tail": 6, "guitar": 2, "playing_guitar": 1, "torn_jeans": 16, "string": 3, "flowing_hair": 13, "pastel_background": 11}, "stage3_selected_phrase_ranks": {"tail": 1, "claws": 1, "wolf": 1, "pose": 1, "spade_tail": 1, "guitar": 1, "playing_guitar": 1, "torn_jeans": 1, "string": 1, "flowing_hair": 1, "pastel_background": 1}, "extra_evidence": {"bottomwear": {"source": "implied"}, "canis": {"source": "implied"}, "denim": {"source": "implied"}, "denim_clothing": {"source": "implied"}, "flowing_hair": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5336}, "jeans": {"source": "implied"}, "pants": {"source": "implied"}, "pastel_background": {"source": "stage3", "why": "explicit", "retrieval_score": 0.542}, "playing_guitar": {"source": "stage3", "why": "explicit", "retrieval_score": 0.9849}, "playing_music": {"source": "implied"}, "pose": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5518}, "string": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5804}, "torn_bottomwear": {"source": "implied"}, "torn_jeans": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4765}, "torn_pants": {"source": "implied"}, "wolf": {"source": "stage3", "why": "explicit", "retrieval_score": 0.564}}, "structural": ["solo", "anthro", "clothed"], "probe": ["solo", "clothing", "canid", "anthro"], "t1": 2.43, "t2": 3.78, "t3": 1.97, "t3s": 7.61, "t3p": 3.89, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=18 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
data/eval_results/smoke_no_why_explicit_only_n1_v2.jsonl
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"_meta": true, "timestamp": "2026-03-03T04:05:53.753317", "n_samples": 1, "caption_field": "caption_cogvlm", "skip_rewrite": false, "allow_nsfw": false, "mode": "chunked_map_union", "chunk_size": 60, "eval_path": "data\\eval_samples\\e621_sfw_sample_1000_seed123_buffer10000_caption_evident.jsonl", "per_phrase_k": 2, "per_phrase_final_k": 1, "temperature": 0.0, "shuffle": true, "seed": 42, "workers": 1, "min_why": "strong_implied", "expand_implications": true, "infer_structural": true, "infer_probe": true, "n_errors": 0, "n_issue_samples": 1, "n_issues_total": 2}
|
| 2 |
+
{"id": 17482, "n_gt": 22, "n_retrieved": 20, "n_selected": 27, "n_implied": 16, "n_structural": 3, "n_probe": 3, "ret_R": 0.2727, "P": 0.5556, "R": 0.6818, "F1": 0.6122, "leaf_P": 0.5, "leaf_R": 0.3846, "leaf_F1": 0.4348, "n_leaf_sel": 10, "n_leaf_gt": 13, "ret_P": 0.3, "sel_given_ret": 2.5, "over_sel": 1.23, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 7, "attempts_by_n_local": {"22": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.5556, "gen_R": 0.6818, "gen_F1": 0.6122, "missed": ["bass_guitar", "canine", "fingers", "fur", "holding_musical_instrument", "holding_object", "music"], "extra": ["bottomwear", "denim", "denim_clothing", "flowing_hair", "jeans", "pants", "pastel_background", "playing_guitar", "playing_music", "torn_bottomwear", "torn_jeans", "torn_pants"], "ground_truth_tags": ["anthro", "bass_guitar", "canid", "canine", "claws", "clothed", "clothing", "fingers", "fur", "guitar", "hair", "holding_musical_instrument", "holding_object", "mammal", "music", "musical_instrument", "plucked_string_instrument", "solo", "spade_tail", "string_instrument", "tail", "torn_clothing"], "selected_tags": ["anthro", "bottomwear", "canid", "claws", "clothed", "clothing", "denim", "denim_clothing", "flowing_hair", "guitar", "hair", "jeans", "mammal", "musical_instrument", "pants", "pastel_background", "playing_guitar", "playing_music", "plucked_string_instrument", "solo", "spade_tail", "string_instrument", "tail", "torn_bottomwear", "torn_clothing", "torn_jeans", "torn_pants"], "stage3_selected": ["claws", "flowing_hair", "guitar", "pastel_background", "playing_guitar", "spade_tail", "torn_jeans"], "stage3_selected_scores": {"claws": 0.5465, "spade_tail": 0.5572, "guitar": 0.9726, "playing_guitar": 0.9847, "torn_jeans": 0.4758, "flowing_hair": 0.5328, "pastel_background": 0.5404}, "stage3_selected_ranks": {"claws": 9, "spade_tail": 5, "guitar": 2, "playing_guitar": 1, "torn_jeans": 18, "flowing_hair": 14, "pastel_background": 12}, "stage3_selected_phrase_ranks": {"claws": 1, "spade_tail": 1, "guitar": 1, "playing_guitar": 1, "torn_jeans": 1, "flowing_hair": 1, "pastel_background": 1}, "extra_evidence": {"bottomwear": {"source": "implied"}, "denim": {"source": "implied"}, "denim_clothing": {"source": "implied"}, "flowing_hair": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5328}, "jeans": {"source": "implied"}, "pants": {"source": "implied"}, "pastel_background": {"source": "stage3", "why": "unknown", "retrieval_score": 0.5404}, "playing_guitar": {"source": "stage3", "why": "unknown", "retrieval_score": 0.9847}, "playing_music": {"source": "implied"}, "torn_bottomwear": {"source": "implied"}, "torn_jeans": {"source": "stage3", "why": "unknown", "retrieval_score": 0.4758}, "torn_pants": {"source": "implied"}}, "structural": ["solo", "anthro", "clothed"], "probe": ["solo", "canid", "anthro"], "t1": 2.25, "t2": 3.46, "t3": 3.76, "t3s": 4.18, "t3p": 6.08, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=22 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
data/eval_results/why_gate_compare_explicit_n10.jsonl
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"_meta": true, "timestamp": "2026-03-02T12:34:17.390682", "n_samples": 10, "caption_field": "caption_cogvlm", "skip_rewrite": false, "allow_nsfw": false, "mode": "chunked_map_union", "chunk_size": 60, "eval_path": "data\\eval_samples\\e621_sfw_sample_1000_seed123_buffer10000_caption_evident.jsonl", "per_phrase_k": 2, "per_phrase_final_k": 1, "temperature": 0.0, "shuffle": true, "seed": 42, "workers": 1, "min_why": "explicit", "expand_implications": true, "infer_structural": true, "infer_probe": true, "n_errors": 0, "n_issue_samples": 10, "n_issues_total": 20}
|
| 2 |
+
{"id": 17482, "n_gt": 22, "n_retrieved": 20, "n_selected": 27, "n_implied": 16, "n_structural": 3, "n_probe": 3, "ret_R": 0.2727, "P": 0.5556, "R": 0.6818, "F1": 0.6122, "leaf_P": 0.5, "leaf_R": 0.3846, "leaf_F1": 0.4348, "n_leaf_sel": 10, "n_leaf_gt": 13, "ret_P": 0.3, "sel_given_ret": 2.5, "over_sel": 1.23, "why": {"explicit": 7}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 17, "attempts_by_n_local": {"22": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.5556, "gen_R": 0.6818, "gen_F1": 0.6122, "missed": ["bass_guitar", "canine", "fingers", "fur", "holding_musical_instrument", "holding_object", "music"], "extra": ["bottomwear", "denim", "denim_clothing", "flowing_hair", "jeans", "pants", "pastel_background", "playing_guitar", "playing_music", "torn_bottomwear", "torn_jeans", "torn_pants"], "ground_truth_tags": ["anthro", "bass_guitar", "canid", "canine", "claws", "clothed", "clothing", "fingers", "fur", "guitar", "hair", "holding_musical_instrument", "holding_object", "mammal", "music", "musical_instrument", "plucked_string_instrument", "solo", "spade_tail", "string_instrument", "tail", "torn_clothing"], "selected_tags": ["anthro", "bottomwear", "canid", "claws", "clothed", "clothing", "denim", "denim_clothing", "flowing_hair", "guitar", "hair", "jeans", "mammal", "musical_instrument", "pants", "pastel_background", "playing_guitar", "playing_music", "plucked_string_instrument", "solo", "spade_tail", "string_instrument", "tail", "torn_bottomwear", "torn_clothing", "torn_jeans", "torn_pants"], "stage3_selected": ["claws", "flowing_hair", "pastel_background", "playing_guitar", "spade_tail", "tail", "torn_jeans"], "stage3_selected_scores": {"tail": 0.5404, "claws": 0.5465, "spade_tail": 0.5572, "playing_guitar": 0.9847, "torn_jeans": 0.4758, "flowing_hair": 0.5328, "pastel_background": 0.5404}, "stage3_selected_ranks": {"tail": 11, "claws": 9, "spade_tail": 5, "playing_guitar": 1, "torn_jeans": 18, "flowing_hair": 14, "pastel_background": 12}, "stage3_selected_phrase_ranks": {"tail": 1, "claws": 1, "spade_tail": 1, "playing_guitar": 1, "torn_jeans": 1, "flowing_hair": 1, "pastel_background": 1}, "extra_evidence": {"bottomwear": {"source": "implied"}, "denim": {"source": "implied"}, "denim_clothing": {"source": "implied"}, "flowing_hair": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5328}, "jeans": {"source": "implied"}, "pants": {"source": "implied"}, "pastel_background": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5404}, "playing_guitar": {"source": "stage3", "why": "explicit", "retrieval_score": 0.9847}, "playing_music": {"source": "implied"}, "torn_bottomwear": {"source": "implied"}, "torn_jeans": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4758}, "torn_pants": {"source": "implied"}}, "structural": ["solo", "anthro", "clothed"], "probe": ["anthro", "canid", "solo"], "t1": 2.88, "t2": 3.41, "t3": 4.5, "t3s": 3.76, "t3p": 5.46, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=22 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
| 3 |
+
{"id": 1624724, "n_gt": 4, "n_retrieved": 11, "n_selected": 13, "n_implied": 0, "n_structural": 4, "n_probe": 3, "ret_R": 0.75, "P": 0.3077, "R": 1.0, "F1": 0.4706, "leaf_P": 0.3077, "leaf_R": 1.0, "leaf_F1": 0.4706, "n_leaf_sel": 13, "n_leaf_gt": 4, "ret_P": 0.2727, "sel_given_ret": 1.3333, "over_sel": 3.25, "why": {"explicit": 9}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 11, "attempts_by_n_local": {"14": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.3077, "gen_R": 1.0, "gen_F1": 0.4706, "missed": [], "extra": ["ambiguous_gender", "anthro", "big_eyes", "cartoon", "eyes", "feral", "nose", "nude", "spots"], "ground_truth_tags": ["red_nose", "smile", "solo", "tan_body"], "selected_tags": ["ambiguous_gender", "anthro", "big_eyes", "cartoon", "eyes", "feral", "nose", "nude", "red_nose", "smile", "solo", "spots", "tan_body"], "stage3_selected": ["big_eyes", "cartoon", "eyes", "nose", "red_nose", "smile", "spots", "tan_body", "white_background"], "stage3_selected_scores": {"smile": 0.6013, "white_background": 0.6138, "tan_body": 0.6627, "spots": 0.6272, "big_eyes": 0.696, "red_nose": 0.7501, "cartoon": 0.5003, "nose": 0.8607, "eyes": 0.9241}, "stage3_selected_ranks": {"smile": 10, "white_background": 9, "tan_body": 5, "spots": 8, "big_eyes": 4, "red_nose": 3, "cartoon": 13, "nose": 2, "eyes": 1}, "stage3_selected_phrase_ranks": {"smile": 1, "white_background": 1, "tan_body": 1, "spots": 1, "big_eyes": 1, "red_nose": 1, "cartoon": 1, "nose": 1, "eyes": 1}, "extra_evidence": {"ambiguous_gender": {"source": "structural"}, "anthro": {"source": "probe"}, "big_eyes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.696}, "cartoon": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5003}, "eyes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.9241}, "feral": {"source": "structural"}, "nose": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8607}, "nude": {"source": "structural"}, "spots": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6272}}, "structural": ["solo", "feral", "ambiguous_gender", "nude"], "probe": ["simple_background", "anthro", "solo"], "t1": 1.45, "t2": 1.09, "t3": 2.54, "t3s": 0.76, "t3p": 0.94, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=14 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=5"]}
|
| 4 |
+
{"id": 1078019, "n_gt": 14, "n_retrieved": 15, "n_selected": 15, "n_implied": 3, "n_structural": 4, "n_probe": 5, "ret_R": 0.3571, "P": 0.5333, "R": 0.5714, "F1": 0.5517, "leaf_P": 0.5455, "leaf_R": 0.6667, "leaf_F1": 0.6, "n_leaf_sel": 11, "n_leaf_gt": 9, "ret_P": 0.3333, "sel_given_ret": 1.6, "over_sel": 1.07, "why": {"explicit": 5}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 14, "attempts_by_n_local": {"16": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.5333, "gen_R": 0.5714, "gen_F1": 0.5517, "missed": ["lagomorph", "leporid", "mammal", "rabbit", "romantic", "romantic_couple"], "extra": ["<3", "coat", "holding_object", "holding_plushie", "looking_at_viewer", "relationship", "topwear"], "ground_truth_tags": ["anthro", "blue_eyes", "blush", "clothed", "clothing", "duo", "lagomorph", "leporid", "mammal", "plushie", "rabbit", "romantic", "romantic_couple", "teal_eyes"], "selected_tags": ["<3", "anthro", "blue_eyes", "blush", "clothed", "clothing", "coat", "duo", "holding_object", "holding_plushie", "looking_at_viewer", "plushie", "relationship", "teal_eyes", "topwear"], "stage3_selected": ["blue_eyes", "coat", "holding_plushie", "relationship", "teal_eyes"], "stage3_selected_scores": {"blue_eyes": 0.6151, "coat": 0.6383, "teal_eyes": 0.6283, "holding_plushie": 0.7793, "relationship": 0.6206}, "stage3_selected_ranks": {"blue_eyes": 9, "coat": 5, "teal_eyes": 6, "holding_plushie": 2, "relationship": 7}, "stage3_selected_phrase_ranks": {"blue_eyes": 1, "coat": 1, "teal_eyes": 1, "holding_plushie": 1, "relationship": 1}, "extra_evidence": {"<3": {"source": "probe"}, "coat": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6383}, "holding_object": {"source": "implied"}, "holding_plushie": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7793}, "looking_at_viewer": {"source": "structural"}, "relationship": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6206}, "topwear": {"source": "implied"}}, "structural": ["duo", "anthro", "clothed", "looking_at_viewer"], "probe": ["clothing", "anthro", "blush", "duo", "<3"], "t1": 2.38, "t2": 1.53, "t3": 3.67, "t3s": 0.93, "t3p": 1.03, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=16 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
| 5 |
+
{"id": 2021552, "n_gt": 25, "n_retrieved": 17, "n_selected": 28, "n_implied": 13, "n_structural": 4, "n_probe": 3, "ret_R": 0.48, "P": 0.7143, "R": 0.8, "F1": 0.7547, "leaf_P": 0.7143, "leaf_R": 0.6667, "leaf_F1": 0.6897, "n_leaf_sel": 14, "n_leaf_gt": 15, "ret_P": 0.7059, "sel_given_ret": 1.6667, "over_sel": 1.12, "why": {"explicit": 11}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 16, "attempts_by_n_local": {"19": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.7143, "gen_R": 0.8, "gen_F1": 0.7547, "missed": ["canid", "canine", "crossed_arms", "fox", "looking_at_another"], "extra": ["black_bottomwear", "black_clothing", "black_pants", "looking_at_viewer", "open_mouth", "white_clothing", "white_shirt", "white_topwear"], "ground_truth_tags": ["anthro", "bottomwear", "canid", "canine", "claws", "clothed", "clothing", "crossed_arms", "duo", "facial_markings", "fox", "fur", "grey_background", "head_markings", "lagomorph", "leporid", "looking_at_another", "mammal", "markings", "overalls", "pants", "rabbit", "shirt", "standing", "topwear"], "selected_tags": ["anthro", "black_bottomwear", "black_clothing", "black_pants", "bottomwear", "claws", "clothed", "clothing", "duo", "facial_markings", "fur", "grey_background", "head_markings", "lagomorph", "leporid", "looking_at_viewer", "mammal", "markings", "open_mouth", "overalls", "pants", "rabbit", "shirt", "standing", "topwear", "white_clothing", "white_shirt", "white_topwear"], "stage3_selected": ["black_pants", "claws", "facial_markings", "fur", "grey_background", "open_mouth", "overalls", "rabbit", "shirt", "standing", "white_shirt"], "stage3_selected_scores": {"fur": 0.6532, "open_mouth": 0.6331, "claws": 0.6304, "standing": 0.6879, "shirt": 0.7484, "rabbit": 0.6511, "grey_background": 0.6785, "facial_markings": 0.6946, "white_shirt": 0.8198, "overalls": 0.8776, "black_pants": 0.8331}, "stage3_selected_ranks": {"fur": 12, "open_mouth": 15, "claws": 16, "standing": 9, "shirt": 6, "rabbit": 13, "grey_background": 11, "facial_markings": 8, "white_shirt": 4, "overalls": 2, "black_pants": 3}, "stage3_selected_phrase_ranks": {"fur": 1, "open_mouth": 1, "claws": 1, "standing": 1, "shirt": 1, "rabbit": 1, "grey_background": 1, "facial_markings": 1, "white_shirt": 1, "overalls": 1, "black_pants": 1}, "extra_evidence": {"black_bottomwear": {"source": "implied"}, "black_clothing": {"source": "implied"}, "black_pants": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8331}, "looking_at_viewer": {"source": "structural"}, "open_mouth": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6331}, "white_clothing": {"source": "implied"}, "white_shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8198}, "white_topwear": {"source": "implied"}}, "structural": ["duo", "anthro", "clothed", "looking_at_viewer"], "probe": ["simple_background", "anthro", "duo"], "t1": 1.78, "t2": 1.56, "t3": 3.98, "t3s": 0.93, "t3p": 3.06, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=19 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
| 6 |
+
{"id": 1023509, "n_gt": 13, "n_retrieved": 21, "n_selected": 19, "n_implied": 5, "n_structural": 4, "n_probe": 4, "ret_R": 0.3077, "P": 0.5263, "R": 0.7692, "F1": 0.625, "leaf_P": 0.3846, "leaf_R": 0.8333, "leaf_F1": 0.5263, "n_leaf_sel": 13, "n_leaf_gt": 6, "ret_P": 0.1905, "sel_given_ret": 2.5, "over_sel": 1.46, "why": {"explicit": 9}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 20, "attempts_by_n_local": {"20": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.5263, "gen_R": 0.7692, "gen_F1": 0.625, "missed": ["fur", "white_body", "white_fur"], "extra": ["anthro", "clothed", "clothing", "darkness", "group", "light", "lying_on_ground", "note", "solo"], "ground_truth_tags": ["bovid", "caprine", "dialogue", "fur", "goat", "human", "lizard", "mammal", "reptile", "scalie", "text", "white_body", "white_fur"], "selected_tags": ["anthro", "bovid", "caprine", "clothed", "clothing", "darkness", "dialogue", "goat", "group", "human", "light", "lizard", "lying_on_ground", "mammal", "note", "reptile", "scalie", "solo", "text"], "stage3_selected": ["darkness", "dialogue", "goat", "group", "human", "light", "lizard", "lying_on_ground", "note"], "stage3_selected_scores": {"dialogue": 0.7405, "group": 0.6236, "human": 0.664, "lizard": 0.8364, "goat": 0.775, "light": 0.7785, "lying_on_ground": 0.7876, "darkness": 0.8348, "note": 0.7377}, "stage3_selected_ranks": {"dialogue": 10, "group": 15, "human": 13, "lizard": 1, "goat": 5, "light": 4, "lying_on_ground": 3, "darkness": 2, "note": 11}, "stage3_selected_phrase_ranks": {"dialogue": 1, "group": 1, "human": 1, "lizard": 1, "goat": 1, "light": 1, "lying_on_ground": 1, "darkness": 1, "note": 1}, "extra_evidence": {"anthro": {"source": "probe"}, "clothed": {"source": "structural"}, "clothing": {"source": "probe"}, "darkness": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8348}, "group": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6236}, "light": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7785}, "lying_on_ground": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7876}, "note": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7377}, "solo": {"source": "structural"}}, "structural": ["solo", "group", "clothed", "text"], "probe": ["clothing", "simple_background", "anthro", "text"], "t1": 2.79, "t2": 1.87, "t3": 5.37, "t3s": 0.99, "t3p": 3.86, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=20 entity=0 copyright_filtered=1 generic_char_to_general=0 unknown_type=0"]}
|
| 7 |
+
{"id": 335343, "n_gt": 15, "n_retrieved": 24, "n_selected": 15, "n_implied": 1, "n_structural": 3, "n_probe": 2, "ret_R": 0.6, "P": 0.6667, "R": 0.6667, "F1": 0.6667, "leaf_P": 0.5714, "leaf_R": 0.6667, "leaf_F1": 0.6154, "n_leaf_sel": 14, "n_leaf_gt": 12, "ret_P": 0.375, "sel_given_ret": 1.1111, "over_sel": 1.0, "why": {"explicit": 11}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 18, "attempts_by_n_local": {"25": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.6667, "gen_R": 0.6667, "gen_F1": 0.6667, "missed": ["angry", "bed", "eyes_closed", "eyeshadow", "furniture"], "extra": ["annoyed_expression", "anthro", "atmosphere", "humanoid", "playful"], "ground_truth_tags": ["angry", "bed", "blonde_hair", "blue_eyes", "duo", "eyes_closed", "eyeshadow", "furniture", "green_eyes", "hair", "lying", "makeup", "purple_hair", "sleeping", "text"], "selected_tags": ["annoyed_expression", "anthro", "atmosphere", "blonde_hair", "blue_eyes", "duo", "green_eyes", "hair", "humanoid", "lying", "makeup", "playful", "purple_hair", "sleeping", "text"], "stage3_selected": ["annoyed_expression", "atmosphere", "blonde_hair", "blue_eyes", "green_eyes", "lying", "makeup", "playful", "purple_hair", "sleeping", "text"], "stage3_selected_scores": {"text": 0.6007, "blue_eyes": 0.6013, "lying": 0.4494, "green_eyes": 0.5989, "blonde_hair": 0.5986, "purple_hair": 0.5642, "makeup": 0.5965, "sleeping": 0.6027, "playful": 0.4463, "annoyed_expression": 0.7251, "atmosphere": 0.5048}, "stage3_selected_ranks": {"text": 8, "blue_eyes": 7, "lying": 22, "green_eyes": 9, "blonde_hair": 10, "purple_hair": 13, "makeup": 11, "sleeping": 6, "playful": 23, "annoyed_expression": 2, "atmosphere": 18}, "stage3_selected_phrase_ranks": {"text": 1, "blue_eyes": 1, "lying": 1, "green_eyes": 1, "blonde_hair": 1, "purple_hair": 1, "makeup": 1, "sleeping": 1, "playful": 1, "annoyed_expression": 1, "atmosphere": 1}, "extra_evidence": {"annoyed_expression": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7251}, "anthro": {"source": "probe"}, "atmosphere": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5048}, "humanoid": {"source": "structural"}, "playful": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4463}}, "structural": ["duo", "humanoid", "text"], "probe": ["anthro", "duo"], "t1": 1.99, "t2": 2.09, "t3": 3.25, "t3s": 0.99, "t3p": 2.55, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=25 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
| 8 |
+
{"id": 2034167, "n_gt": 11, "n_retrieved": 23, "n_selected": 23, "n_implied": 7, "n_structural": 5, "n_probe": 3, "ret_R": 0.5455, "P": 0.3478, "R": 0.7273, "F1": 0.4706, "leaf_P": 0.2857, "leaf_R": 0.5714, "leaf_F1": 0.381, "n_leaf_sel": 14, "n_leaf_gt": 7, "ret_P": 0.2609, "sel_given_ret": 1.3333, "over_sel": 2.09, "why": {"explicit": 11}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 12, "attempts_by_n_local": {"26": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.3478, "gen_R": 0.7273, "gen_F1": 0.4706, "missed": ["open_mouth", "white_body", "white_fur"], "extra": ["animal_humanoid", "anthro", "blue_stripes", "canid_humanoid", "canine_humanoid", "clothed", "clothing", "curved_tail", "humanoid", "intersex", "looking_at_viewer", "mammal_humanoid", "pink_stripes", "stripes", "tail"], "ground_truth_tags": ["blue_eyes", "blue_nose", "canid", "canine", "fur", "mammal", "open_mouth", "purple_body", "solo", "white_body", "white_fur"], "selected_tags": ["animal_humanoid", "anthro", "blue_eyes", "blue_nose", "blue_stripes", "canid", "canid_humanoid", "canine", "canine_humanoid", "clothed", "clothing", "curved_tail", "fur", "humanoid", "intersex", "looking_at_viewer", "mammal", "mammal_humanoid", "pink_stripes", "purple_body", "solo", "stripes", "tail"], "stage3_selected": ["blue_eyes", "blue_nose", "blue_stripes", "canine_humanoid", "curved_tail", "fur", "humanoid", "pink_stripes", "purple_body", "simple_background", "tail"], "stage3_selected_scores": {"fur": 0.5887, "simple_background": 0.5994, "tail": 0.6162, "blue_eyes": 0.6045, "humanoid": 0.675, "purple_body": 0.5693, "canine_humanoid": 0.9013, "blue_nose": 0.6049, "blue_stripes": 0.6786, "pink_stripes": 0.6846, "curved_tail": 0.6409}, "stage3_selected_ranks": {"fur": 18, "simple_background": 17, "tail": 10, "blue_eyes": 14, "humanoid": 6, "purple_body": 20, "canine_humanoid": 1, "blue_nose": 13, "blue_stripes": 5, "pink_stripes": 4, "curved_tail": 7}, "stage3_selected_phrase_ranks": {"fur": 1, "simple_background": 1, "tail": 1, "blue_eyes": 1, "humanoid": 1, "purple_body": 1, "canine_humanoid": 1, "blue_nose": 1, "blue_stripes": 1, "pink_stripes": 1, "curved_tail": 1}, "extra_evidence": {"animal_humanoid": {"source": "implied"}, "anthro": {"source": "structural"}, "blue_stripes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6786}, "canid_humanoid": {"source": "implied"}, "canine_humanoid": {"source": "stage3", "why": "explicit", "retrieval_score": 0.9013}, "clothed": {"source": "structural"}, "clothing": {"source": "implied"}, "curved_tail": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6409}, "humanoid": {"source": "stage3", "why": "explicit", "retrieval_score": 0.675}, "intersex": {"source": "structural"}, "looking_at_viewer": {"source": "structural"}, "mammal_humanoid": {"source": "implied"}, "pink_stripes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6846}, "stripes": {"source": "implied"}, "tail": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6162}}, "structural": ["solo", "anthro", "intersex", "clothed", "looking_at_viewer"], "probe": ["anthro", "canid", "solo"], "t1": 8.0, "t2": 2.08, "t3": 3.24, "t3s": 1.22, "t3p": 1.73, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=26 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=4"]}
|
| 9 |
+
{"id": 1325009, "n_gt": 22, "n_retrieved": 20, "n_selected": 23, "n_implied": 6, "n_structural": 5, "n_probe": 3, "ret_R": 0.1818, "P": 0.5652, "R": 0.5909, "F1": 0.5778, "leaf_P": 0.2667, "leaf_R": 0.3333, "leaf_F1": 0.2963, "n_leaf_sel": 15, "n_leaf_gt": 12, "ret_P": 0.2, "sel_given_ret": 3.25, "over_sel": 1.05, "why": {"explicit": 11}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 15, "attempts_by_n_local": {"21": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.5652, "gen_R": 0.5909, "gen_F1": 0.5778, "missed": ["chest_tuft", "countershading", "hand_on_head", "muscular", "muscular_anthro", "muscular_male", "pantherine", "tiger", "tuft"], "extra": ["countershade_body", "eyes", "gesture", "pose", "raised_hand", "striped_body", "striped_fur", "white_chest", "yellow_bottomwear", "yellow_clothing"], "ground_truth_tags": ["anthro", "blue_eyes", "bottomwear", "chest_tuft", "clothed", "clothing", "countershading", "felid", "fur", "hand_on_head", "male", "mammal", "muscular", "muscular_anthro", "muscular_male", "pantherine", "shorts", "solo", "stripes", "tiger", "topless", "tuft"], "selected_tags": ["anthro", "blue_eyes", "bottomwear", "clothed", "clothing", "countershade_body", "eyes", "felid", "fur", "gesture", "male", "mammal", "pose", "raised_hand", "shorts", "solo", "striped_body", "striped_fur", "stripes", "topless", "white_chest", "yellow_bottomwear", "yellow_clothing"], "stage3_selected": ["blue_eyes", "countershade_body", "eyes", "fur", "gesture", "pose", "raised_hand", "shorts", "striped_fur", "white_chest", "yellow_bottomwear"], "stage3_selected_scores": {"fur": 0.597, "blue_eyes": 0.5852, "pose": 0.638, "shorts": 0.5953, "gesture": 0.6013, "striped_fur": 0.6559, "raised_hand": 0.7033, "yellow_bottomwear": 0.6671, "white_chest": 0.9245, "countershade_body": 0.872, "eyes": 0.9776}, "stage3_selected_ranks": {"fur": 14, "blue_eyes": 16, "pose": 10, "shorts": 15, "gesture": 13, "striped_fur": 9, "raised_hand": 6, "yellow_bottomwear": 8, "white_chest": 2, "countershade_body": 3, "eyes": 1}, "stage3_selected_phrase_ranks": {"fur": 1, "blue_eyes": 1, "pose": 1, "shorts": 1, "gesture": 1, "striped_fur": 1, "raised_hand": 1, "yellow_bottomwear": 1, "white_chest": 1, "countershade_body": 1, "eyes": 1}, "extra_evidence": {"countershade_body": {"source": "stage3", "why": "explicit", "retrieval_score": 0.872}, "eyes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.9776}, "gesture": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6013}, "pose": {"source": "stage3", "why": "explicit", "retrieval_score": 0.638}, "raised_hand": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7033}, "striped_body": {"source": "implied"}, "striped_fur": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6559}, "white_chest": {"source": "stage3", "why": "explicit", "retrieval_score": 0.9245}, "yellow_bottomwear": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6671}, "yellow_clothing": {"source": "implied"}}, "structural": ["solo", "anthro", "male", "clothed", "topless"], "probe": ["anthro", "felid", "solo"], "t1": 1.82, "t2": 1.77, "t3": 2.75, "t3s": 0.94, "t3p": 1.67, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=21 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
| 10 |
+
{"id": 3285630, "n_gt": 12, "n_retrieved": 15, "n_selected": 24, "n_implied": 7, "n_structural": 4, "n_probe": 5, "ret_R": 0.25, "P": 0.4167, "R": 0.8333, "F1": 0.5556, "leaf_P": 0.3846, "leaf_R": 0.5556, "leaf_F1": 0.4545, "n_leaf_sel": 13, "n_leaf_gt": 9, "ret_P": 0.2, "sel_given_ret": 3.3333, "over_sel": 2.0, "why": {"explicit": 12}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 18, "attempts_by_n_local": {"19": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.4167, "gen_R": 0.8333, "gen_F1": 0.5556, "missed": ["fingers", "male"], "extra": ["black_body", "black_fur", "holding_mug", "holding_object", "mug", "necktie", "shirt", "teal_shirt", "teal_topwear", "text", "topless", "topwear", "vest", "white_necktie"], "ground_truth_tags": ["alpha_channel", "anthro", "clothed", "clothing", "felid", "feline", "fingers", "fur", "hair", "male", "mammal", "solo"], "selected_tags": ["alpha_channel", "anthro", "black_body", "black_fur", "clothed", "clothing", "felid", "feline", "fur", "hair", "holding_mug", "holding_object", "mammal", "mug", "necktie", "shirt", "solo", "teal_shirt", "teal_topwear", "text", "topless", "topwear", "vest", "white_necktie"], "stage3_selected": ["black_fur", "feline", "fur", "hair", "holding_mug", "mug", "necktie", "shirt", "teal_shirt", "transparent_background", "vest", "white_necktie"], "stage3_selected_scores": {"hair": 0.7279, "fur": 0.7575, "feline": 0.7328, "shirt": 0.8216, "black_fur": 0.7477, "necktie": 0.7525, "transparent_background": 0.7407, "vest": 0.8646, "mug": 0.8935, "holding_mug": 0.9171, "teal_shirt": 0.7462, "white_necktie": 0.6377}, "stage3_selected_ranks": {"hair": 13, "fur": 6, "feline": 12, "shirt": 4, "black_fur": 9, "necktie": 8, "transparent_background": 11, "vest": 3, "mug": 2, "holding_mug": 1, "teal_shirt": 10, "white_necktie": 16}, "stage3_selected_phrase_ranks": {"hair": 1, "fur": 1, "feline": 1, "shirt": 1, "black_fur": 1, "necktie": 1, "transparent_background": 1, "vest": 1, "mug": 1, "holding_mug": 1, "teal_shirt": 1, "white_necktie": 1}, "extra_evidence": {"black_body": {"source": "implied"}, "black_fur": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7477}, "holding_mug": {"source": "stage3", "why": "explicit", "retrieval_score": 0.9171}, "holding_object": {"source": "implied"}, "mug": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8935}, "necktie": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7525}, "shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8216}, "teal_shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7462}, "teal_topwear": {"source": "implied"}, "text": {"source": "probe"}, "topless": {"source": "structural"}, "topwear": {"source": "implied"}, "vest": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8646}, "white_necktie": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6377}}, "structural": ["solo", "anthro", "clothed", "topless"], "probe": ["clothing", "anthro", "text", "felid", "solo"], "t1": 1.76, "t2": 1.82, "t3": 2.02, "t3s": 2.36, "t3p": 1.38, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=19 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
| 11 |
+
{"id": 260449, "n_gt": 14, "n_retrieved": 18, "n_selected": 22, "n_implied": 5, "n_structural": 5, "n_probe": 5, "ret_R": 0.5, "P": 0.5, "R": 0.7857, "F1": 0.6111, "leaf_P": 0.3571, "leaf_R": 0.5, "leaf_F1": 0.4167, "n_leaf_sel": 14, "n_leaf_gt": 10, "ret_P": 0.3889, "sel_given_ret": 1.5714, "over_sel": 1.57, "why": {"explicit": 12}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 17, "attempts_by_n_local": {"21": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.5, "gen_R": 0.7857, "gen_F1": 0.6111, "missed": ["fur", "hair", "human"], "extra": ["anthro", "bottomwear", "cheeky", "duo", "grin", "laugh", "loincloth", "raised_arms", "smile", "topless", "trio"], "ground_truth_tags": ["ape", "bear", "clothed", "clothing", "dancing", "fur", "group", "hair", "haplorhine", "human", "looking_at_viewer", "male", "mammal", "primate"], "selected_tags": ["anthro", "ape", "bear", "bottomwear", "cheeky", "clothed", "clothing", "dancing", "duo", "grin", "group", "haplorhine", "laugh", "loincloth", "looking_at_viewer", "male", "mammal", "primate", "raised_arms", "smile", "topless", "trio"], "stage3_selected": ["ape", "bear", "cheeky", "dancing", "grin", "laugh", "loincloth", "looking_at_viewer", "male", "primate", "raised_arms", "simple_background"], "stage3_selected_scores": {"male": 0.5579, "simple_background": 0.5466, "looking_at_viewer": 0.5455, "bear": 0.5731, "grin": 0.5635, "primate": 0.89, "loincloth": 0.5677, "dancing": 0.5556, "laugh": 0.5253, "ape": 0.9764, "raised_arms": 0.5437, "cheeky": 0.3888}, "stage3_selected_ranks": {"male": 6, "simple_background": 8, "looking_at_viewer": 9, "bear": 3, "grin": 5, "primate": 2, "loincloth": 4, "dancing": 7, "laugh": 13, "ape": 1, "raised_arms": 10, "cheeky": 20}, "stage3_selected_phrase_ranks": {"male": 1, "simple_background": 1, "looking_at_viewer": 1, "bear": 1, "grin": 1, "primate": 1, "loincloth": 1, "dancing": 1, "laugh": 1, "ape": 1, "raised_arms": 1, "cheeky": 1}, "extra_evidence": {"anthro": {"source": "structural"}, "bottomwear": {"source": "implied"}, "cheeky": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3888}, "duo": {"source": "probe"}, "grin": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5635}, "laugh": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5253}, "loincloth": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5677}, "raised_arms": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5437}, "smile": {"source": "implied"}, "topless": {"source": "structural"}, "trio": {"source": "structural"}}, "structural": ["trio", "anthro", "male", "clothed", "topless"], "probe": ["anthro", "duo", "group", "bear", "simple_background"], "t1": 8.77, "t2": 2.21, "t3": 4.46, "t3s": 1.39, "t3p": 1.6, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=21 entity=0 copyright_filtered=0 generic_char_to_general=1 unknown_type=2"]}
|
data/eval_results/why_gate_compare_strong_implied_n10.jsonl
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"_meta": true, "timestamp": "2026-03-02T12:32:32.397015", "n_samples": 10, "caption_field": "caption_cogvlm", "skip_rewrite": false, "allow_nsfw": false, "mode": "chunked_map_union", "chunk_size": 60, "eval_path": "data\\eval_samples\\e621_sfw_sample_1000_seed123_buffer10000_caption_evident.jsonl", "per_phrase_k": 2, "per_phrase_final_k": 1, "temperature": 0.0, "shuffle": true, "seed": 42, "workers": 1, "min_why": "strong_implied", "expand_implications": true, "infer_structural": true, "infer_probe": true, "n_errors": 0, "n_issue_samples": 10, "n_issues_total": 20}
|
| 2 |
+
{"id": 17482, "n_gt": 22, "n_retrieved": 14, "n_selected": 26, "n_implied": 15, "n_structural": 4, "n_probe": 3, "ret_R": 0.2273, "P": 0.5769, "R": 0.6818, "F1": 0.625, "leaf_P": 0.4545, "leaf_R": 0.3846, "leaf_F1": 0.4167, "n_leaf_sel": 11, "n_leaf_gt": 13, "ret_P": 0.3571, "sel_given_ret": 3.0, "over_sel": 1.18, "why": {"explicit": 6}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 11, "attempts_by_n_local": {"16": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.5769, "gen_R": 0.6818, "gen_F1": 0.625, "missed": ["bass_guitar", "canine", "fingers", "fur", "holding_musical_instrument", "holding_object", "music"], "extra": ["bottomwear", "denim", "denim_clothing", "flowing_hair", "jeans", "male", "pants", "pastel_background", "torn_bottomwear", "torn_jeans", "torn_pants"], "ground_truth_tags": ["anthro", "bass_guitar", "canid", "canine", "claws", "clothed", "clothing", "fingers", "fur", "guitar", "hair", "holding_musical_instrument", "holding_object", "mammal", "music", "musical_instrument", "plucked_string_instrument", "solo", "spade_tail", "string_instrument", "tail", "torn_clothing"], "selected_tags": ["anthro", "bottomwear", "canid", "claws", "clothed", "clothing", "denim", "denim_clothing", "flowing_hair", "guitar", "hair", "jeans", "male", "mammal", "musical_instrument", "pants", "pastel_background", "plucked_string_instrument", "solo", "spade_tail", "string_instrument", "tail", "torn_bottomwear", "torn_clothing", "torn_jeans", "torn_pants"], "stage3_selected": ["claws", "flowing_hair", "guitar", "pastel_background", "spade_tail", "torn_jeans"], "stage3_selected_scores": {"claws": 0.5684, "spade_tail": 0.618, "guitar": 0.9623, "torn_jeans": 0.4824, "flowing_hair": 0.5669, "pastel_background": 0.5632}, "stage3_selected_ranks": {"claws": 8, "spade_tail": 3, "guitar": 1, "torn_jeans": 15, "flowing_hair": 9, "pastel_background": 11}, "stage3_selected_phrase_ranks": {"claws": 1, "spade_tail": 1, "guitar": 1, "torn_jeans": 1, "flowing_hair": 1, "pastel_background": 1}, "extra_evidence": {"bottomwear": {"source": "implied"}, "denim": {"source": "implied"}, "denim_clothing": {"source": "implied"}, "flowing_hair": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5669}, "jeans": {"source": "implied"}, "male": {"source": "structural"}, "pants": {"source": "implied"}, "pastel_background": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5632}, "torn_bottomwear": {"source": "implied"}, "torn_jeans": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4824}, "torn_pants": {"source": "implied"}}, "structural": ["solo", "anthro", "male", "clothed"], "probe": ["anthro", "canid", "solo"], "t1": 2.17, "t2": 2.75, "t3": 4.15, "t3s": 4.67, "t3p": 4.24, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=16 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
| 3 |
+
{"id": 1624724, "n_gt": 4, "n_retrieved": 1, "n_selected": 6, "n_implied": 0, "n_structural": 5, "n_probe": 3, "ret_R": 0.0, "P": 0.1667, "R": 0.25, "F1": 0.2, "leaf_P": 0.1667, "leaf_R": 0.25, "leaf_F1": 0.2, "n_leaf_sel": 6, "n_leaf_gt": 4, "ret_P": 0.0, "sel_given_ret": 0.0, "over_sel": 1.5, "why": {}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 3, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 3, "invalid_items_total": 0, "oob_indices_total": 18, "dupe_indices_total": 0, "kept_total": 1, "attempts_by_n_local": {"1": {"attempts": 3, "parse_ok": 3, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.1667, "gen_R": 0.25, "gen_F1": 0.2, "missed": ["red_nose", "smile", "tan_body"], "extra": ["ambiguous_gender", "anthro", "feral", "looking_at_viewer", "nude"], "ground_truth_tags": ["red_nose", "smile", "solo", "tan_body"], "selected_tags": ["ambiguous_gender", "anthro", "feral", "looking_at_viewer", "nude", "solo"], "stage3_selected": [], "stage3_selected_scores": {}, "stage3_selected_ranks": {}, "stage3_selected_phrase_ranks": {}, "extra_evidence": {"ambiguous_gender": {"source": "structural"}, "anthro": {"source": "probe"}, "feral": {"source": "structural"}, "looking_at_viewer": {"source": "structural"}, "nude": {"source": "structural"}}, "structural": ["solo", "feral", "ambiguous_gender", "nude", "looking_at_viewer"], "probe": ["simple_background", "anthro", "solo"], "t1": 8.13, "t2": 0.12, "t3": 6.49, "t3s": 1.65, "t3p": 4.77, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=1 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0"]}
|
| 4 |
+
{"id": 1078019, "n_gt": 14, "n_retrieved": 15, "n_selected": 27, "n_implied": 6, "n_structural": 4, "n_probe": 5, "ret_R": 0.3571, "P": 0.5185, "R": 1.0, "F1": 0.6829, "leaf_P": 0.4211, "leaf_R": 0.8889, "leaf_F1": 0.5714, "n_leaf_sel": 19, "n_leaf_gt": 9, "ret_P": 0.3333, "sel_given_ret": 2.8, "over_sel": 1.93, "why": {"explicit": 5, "strong_implied": 9}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 14, "attempts_by_n_local": {"16": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.5185, "gen_R": 1.0, "gen_F1": 0.6829, "missed": [], "extra": ["<3", "coat", "holding_object", "holding_plushie", "looking_at_viewer", "relationship", "relaxed_expression", "round_eyes", "setting", "small_eyes", "surprised_expression", "topwear", "worried"], "ground_truth_tags": ["anthro", "blue_eyes", "blush", "clothed", "clothing", "duo", "lagomorph", "leporid", "mammal", "plushie", "rabbit", "romantic", "romantic_couple", "teal_eyes"], "selected_tags": ["<3", "anthro", "blue_eyes", "blush", "clothed", "clothing", "coat", "duo", "holding_object", "holding_plushie", "lagomorph", "leporid", "looking_at_viewer", "mammal", "plushie", "rabbit", "relationship", "relaxed_expression", "romantic", "romantic_couple", "round_eyes", "setting", "small_eyes", "surprised_expression", "teal_eyes", "topwear", "worried"], "stage3_selected": ["blue_eyes", "coat", "holding_plushie", "plushie", "rabbit", "relationship", "relaxed_expression", "romantic_couple", "round_eyes", "setting", "small_eyes", "surprised_expression", "teal_eyes", "worried"], "stage3_selected_scores": {"blue_eyes": 0.6151, "coat": 0.6383, "teal_eyes": 0.6283, "holding_plushie": 0.7793, "relationship": 0.6206, "rabbit": 0.5939, "romantic_couple": 0.5621, "plushie": 0.7455, "worried": 0.5495, "surprised_expression": 0.639, "small_eyes": 0.6187, "round_eyes": 0.4887, "relaxed_expression": 0.5218, "setting": 0.5567}, "stage3_selected_ranks": {"blue_eyes": 9, "coat": 5, "teal_eyes": 6, "holding_plushie": 2, "relationship": 7, "rabbit": 10, "romantic_couple": 12, "plushie": 3, "worried": 14, "surprised_expression": 4, "small_eyes": 8, "round_eyes": 16, "relaxed_expression": 15, "setting": 13}, "stage3_selected_phrase_ranks": {"blue_eyes": 1, "coat": 1, "teal_eyes": 1, "holding_plushie": 1, "relationship": 1, "rabbit": 1, "romantic_couple": 1, "plushie": 1, "worried": 1, "surprised_expression": 1, "small_eyes": 1, "round_eyes": 1, "relaxed_expression": 1, "setting": 1}, "extra_evidence": {"<3": {"source": "probe"}, "coat": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6383}, "holding_object": {"source": "implied"}, "holding_plushie": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7793}, "looking_at_viewer": {"source": "structural"}, "relationship": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6206}, "relaxed_expression": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.5218}, "round_eyes": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.4887}, "setting": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.5567}, "small_eyes": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.6187}, "surprised_expression": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.639}, "topwear": {"source": "implied"}, "worried": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.5495}}, "structural": ["duo", "anthro", "clothed", "looking_at_viewer"], "probe": ["clothing", "anthro", "blush", "duo", "<3"], "t1": 1.8, "t2": 1.52, "t3": 7.56, "t3s": 1.82, "t3p": 1.0, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=16 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
| 5 |
+
{"id": 2021552, "n_gt": 25, "n_retrieved": 17, "n_selected": 30, "n_implied": 13, "n_structural": 4, "n_probe": 3, "ret_R": 0.48, "P": 0.7, "R": 0.84, "F1": 0.7636, "leaf_P": 0.6875, "leaf_R": 0.7333, "leaf_F1": 0.7097, "n_leaf_sel": 16, "n_leaf_gt": 15, "ret_P": 0.7059, "sel_given_ret": 1.75, "over_sel": 1.2, "why": {"explicit": 11, "strong_implied": 2}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 16, "attempts_by_n_local": {"19": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.7, "gen_R": 0.84, "gen_F1": 0.7636, "missed": ["canid", "canine", "fox", "looking_at_another"], "extra": ["black_bottomwear", "black_clothing", "black_pants", "blue_overalls", "looking_at_viewer", "open_mouth", "white_clothing", "white_shirt", "white_topwear"], "ground_truth_tags": ["anthro", "bottomwear", "canid", "canine", "claws", "clothed", "clothing", "crossed_arms", "duo", "facial_markings", "fox", "fur", "grey_background", "head_markings", "lagomorph", "leporid", "looking_at_another", "mammal", "markings", "overalls", "pants", "rabbit", "shirt", "standing", "topwear"], "selected_tags": ["anthro", "black_bottomwear", "black_clothing", "black_pants", "blue_overalls", "bottomwear", "claws", "clothed", "clothing", "crossed_arms", "duo", "facial_markings", "fur", "grey_background", "head_markings", "lagomorph", "leporid", "looking_at_viewer", "mammal", "markings", "open_mouth", "overalls", "pants", "rabbit", "shirt", "standing", "topwear", "white_clothing", "white_shirt", "white_topwear"], "stage3_selected": ["black_pants", "blue_overalls", "claws", "crossed_arms", "facial_markings", "fur", "grey_background", "open_mouth", "overalls", "rabbit", "shirt", "standing", "white_shirt"], "stage3_selected_scores": {"fur": 0.6532, "open_mouth": 0.6331, "claws": 0.6304, "standing": 0.6879, "shirt": 0.7484, "rabbit": 0.6511, "grey_background": 0.6785, "facial_markings": 0.6946, "white_shirt": 0.8198, "overalls": 0.8776, "black_pants": 0.8331, "crossed_arms": 0.7286, "blue_overalls": 0.9203}, "stage3_selected_ranks": {"fur": 12, "open_mouth": 15, "claws": 16, "standing": 9, "shirt": 6, "rabbit": 13, "grey_background": 11, "facial_markings": 8, "white_shirt": 4, "overalls": 2, "black_pants": 3, "crossed_arms": 7, "blue_overalls": 1}, "stage3_selected_phrase_ranks": {"fur": 1, "open_mouth": 1, "claws": 1, "standing": 1, "shirt": 1, "rabbit": 1, "grey_background": 1, "facial_markings": 1, "white_shirt": 1, "overalls": 1, "black_pants": 1, "crossed_arms": 1, "blue_overalls": 1}, "extra_evidence": {"black_bottomwear": {"source": "implied"}, "black_clothing": {"source": "implied"}, "black_pants": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8331}, "blue_overalls": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.9203}, "looking_at_viewer": {"source": "structural"}, "open_mouth": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6331}, "white_clothing": {"source": "implied"}, "white_shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8198}, "white_topwear": {"source": "implied"}}, "structural": ["duo", "anthro", "clothed", "looking_at_viewer"], "probe": ["simple_background", "anthro", "duo"], "t1": 2.43, "t2": 1.59, "t3": 4.84, "t3s": 1.56, "t3p": 3.66, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=19 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
| 6 |
+
{"id": 1023509, "n_gt": 13, "n_retrieved": 17, "n_selected": 21, "n_implied": 5, "n_structural": 5, "n_probe": 6, "ret_R": 0.2308, "P": 0.4286, "R": 0.6923, "F1": 0.5294, "leaf_P": 0.2667, "leaf_R": 0.6667, "leaf_F1": 0.381, "n_leaf_sel": 15, "n_leaf_gt": 6, "ret_P": 0.1765, "sel_given_ret": 3.0, "over_sel": 1.62, "why": {"explicit": 9}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 3, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 3, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 17, "attempts_by_n_local": {"17": {"attempts": 3, "parse_ok": 3, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.4286, "gen_R": 0.6923, "gen_F1": 0.5294, "missed": ["dialogue", "fur", "white_body", "white_fur"], "extra": ["anthro", "clothed", "clothing", "darkness", "group", "light", "lying_on_ground", "note", "solo", "speech_bubble", "standing_over", "taur"], "ground_truth_tags": ["bovid", "caprine", "dialogue", "fur", "goat", "human", "lizard", "mammal", "reptile", "scalie", "text", "white_body", "white_fur"], "selected_tags": ["anthro", "bovid", "caprine", "clothed", "clothing", "darkness", "goat", "group", "human", "light", "lizard", "lying_on_ground", "mammal", "note", "reptile", "scalie", "solo", "speech_bubble", "standing_over", "taur", "text"], "stage3_selected": ["darkness", "goat", "human", "light", "lizard", "lying_on_ground", "note", "speech_bubble", "standing_over"], "stage3_selected_scores": {"human": 0.5572, "speech_bubble": 0.5746, "lizard": 0.5943, "goat": 0.5777, "light": 0.5824, "lying_on_ground": 0.5929, "darkness": 0.5977, "note": 0.5658, "standing_over": 0.5799}, "stage3_selected_ranks": {"human": 12, "speech_bubble": 9, "lizard": 3, "goat": 8, "light": 6, "lying_on_ground": 4, "darkness": 2, "note": 11, "standing_over": 7}, "stage3_selected_phrase_ranks": {"human": 1, "speech_bubble": 1, "lizard": 1, "goat": 1, "light": 1, "lying_on_ground": 1, "darkness": 1, "note": 1, "standing_over": 1}, "extra_evidence": {"anthro": {"source": "structural"}, "clothed": {"source": "structural"}, "clothing": {"source": "probe"}, "darkness": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5977}, "group": {"source": "structural"}, "light": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5824}, "lying_on_ground": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5929}, "note": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5658}, "solo": {"source": "probe"}, "speech_bubble": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5746}, "standing_over": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5799}, "taur": {"source": "structural"}}, "structural": ["group", "anthro", "taur", "clothed", "text"], "probe": ["clothing", "simple_background", "anthro", "text", "solo", "group"], "t1": 3.15, "t2": 1.7, "t3": 6.84, "t3s": 1.68, "t3p": 3.68, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=17 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0"]}
|
| 7 |
+
{"id": 335343, "n_gt": 15, "n_retrieved": 22, "n_selected": 17, "n_implied": 2, "n_structural": 3, "n_probe": 2, "ret_R": 0.6, "P": 0.5882, "R": 0.6667, "F1": 0.625, "leaf_P": 0.5333, "leaf_R": 0.6667, "leaf_F1": 0.5926, "n_leaf_sel": 15, "n_leaf_gt": 12, "ret_P": 0.4091, "sel_given_ret": 1.1111, "over_sel": 1.13, "why": {"explicit": 12}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 20, "attempts_by_n_local": {"23": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.5882, "gen_R": 0.6667, "gen_F1": 0.625, "missed": ["angry", "bed", "eyes_closed", "eyeshadow", "furniture"], "extra": ["annoyed_expression", "anthro", "bedroom", "humanoid", "membrane_(anatomy)", "purple_membrane", "resting"], "ground_truth_tags": ["angry", "bed", "blonde_hair", "blue_eyes", "duo", "eyes_closed", "eyeshadow", "furniture", "green_eyes", "hair", "lying", "makeup", "purple_hair", "sleeping", "text"], "selected_tags": ["annoyed_expression", "anthro", "bedroom", "blonde_hair", "blue_eyes", "duo", "green_eyes", "hair", "humanoid", "lying", "makeup", "membrane_(anatomy)", "purple_hair", "purple_membrane", "resting", "sleeping", "text"], "stage3_selected": ["annoyed_expression", "bedroom", "blonde_hair", "blue_eyes", "green_eyes", "lying", "makeup", "purple_hair", "purple_membrane", "resting", "sleeping", "text"], "stage3_selected_scores": {"text": 0.6007, "blue_eyes": 0.6013, "lying": 0.4494, "green_eyes": 0.5989, "blonde_hair": 0.5986, "purple_hair": 0.5642, "makeup": 0.5965, "bedroom": 0.4901, "sleeping": 0.6027, "resting": 0.5034, "annoyed_expression": 0.7251, "purple_membrane": 0.5791}, "stage3_selected_ranks": {"text": 8, "blue_eyes": 7, "lying": 21, "green_eyes": 9, "blonde_hair": 10, "purple_hair": 13, "makeup": 11, "bedroom": 18, "sleeping": 6, "resting": 17, "annoyed_expression": 2, "purple_membrane": 12}, "stage3_selected_phrase_ranks": {"text": 1, "blue_eyes": 1, "lying": 1, "green_eyes": 1, "blonde_hair": 1, "purple_hair": 1, "makeup": 1, "bedroom": 1, "sleeping": 1, "resting": 1, "annoyed_expression": 1, "purple_membrane": 1}, "extra_evidence": {"annoyed_expression": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7251}, "anthro": {"source": "probe"}, "bedroom": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4901}, "humanoid": {"source": "structural"}, "membrane_(anatomy)": {"source": "implied"}, "purple_membrane": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5791}, "resting": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5034}}, "structural": ["duo", "humanoid", "text"], "probe": ["anthro", "duo"], "t1": 2.35, "t2": 2.12, "t3": 4.3, "t3s": 0.92, "t3p": 2.36, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=23 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
| 8 |
+
{"id": 2034167, "n_gt": 11, "n_retrieved": 20, "n_selected": 23, "n_implied": 7, "n_structural": 4, "n_probe": 3, "ret_R": 0.4545, "P": 0.3043, "R": 0.6364, "F1": 0.4118, "leaf_P": 0.3333, "leaf_R": 0.5714, "leaf_F1": 0.4211, "n_leaf_sel": 12, "n_leaf_gt": 7, "ret_P": 0.25, "sel_given_ret": 1.4, "over_sel": 2.09, "why": {"explicit": 4, "strong_implied": 7}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 20, "attempts_by_n_local": {"23": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.3043, "gen_R": 0.6364, "gen_F1": 0.4118, "missed": ["blue_nose", "fur", "white_body", "white_fur"], "extra": ["action_pose", "animal_humanoid", "anthro", "canid_humanoid", "canine_humanoid", "clothed", "clothing", "curved_tail", "humanoid", "male", "mammal_humanoid", "pink_stripes", "pink_tail", "pose", "stripes", "tail"], "ground_truth_tags": ["blue_eyes", "blue_nose", "canid", "canine", "fur", "mammal", "open_mouth", "purple_body", "solo", "white_body", "white_fur"], "selected_tags": ["action_pose", "animal_humanoid", "anthro", "blue_eyes", "canid", "canid_humanoid", "canine", "canine_humanoid", "clothed", "clothing", "curved_tail", "humanoid", "male", "mammal", "mammal_humanoid", "open_mouth", "pink_stripes", "pink_tail", "pose", "purple_body", "solo", "stripes", "tail"], "stage3_selected": ["action_pose", "blue_eyes", "canine_humanoid", "curved_tail", "open_mouth", "pink_stripes", "pink_tail", "pose", "purple_body", "stripes", "tail"], "stage3_selected_scores": {"open_mouth": 0.561, "blue_eyes": 0.5539, "purple_body": 0.5189, "canine_humanoid": 0.9365, "tail": 0.5573, "stripes": 0.5407, "pose": 0.5688, "pink_tail": 0.4493, "action_pose": 0.5588, "pink_stripes": 0.4579, "curved_tail": 0.4919}, "stage3_selected_ranks": {"open_mouth": 5, "blue_eyes": 9, "purple_body": 14, "canine_humanoid": 1, "tail": 7, "stripes": 11, "pose": 4, "pink_tail": 19, "action_pose": 6, "pink_stripes": 18, "curved_tail": 17}, "stage3_selected_phrase_ranks": {"open_mouth": 1, "blue_eyes": 1, "purple_body": 1, "canine_humanoid": 1, "tail": 1, "stripes": 1, "pose": 1, "pink_tail": 1, "action_pose": 1, "pink_stripes": 1, "curved_tail": 1}, "extra_evidence": {"action_pose": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.5588}, "animal_humanoid": {"source": "implied"}, "anthro": {"source": "structural"}, "canid_humanoid": {"source": "implied"}, "canine_humanoid": {"source": "stage3", "why": "explicit", "retrieval_score": 0.9365}, "clothed": {"source": "structural"}, "clothing": {"source": "implied"}, "curved_tail": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.4919}, "humanoid": {"source": "implied"}, "male": {"source": "structural"}, "mammal_humanoid": {"source": "implied"}, "pink_stripes": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.4579}, "pink_tail": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.4493}, "pose": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.5688}, "stripes": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.5407}, "tail": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.5573}}, "structural": ["solo", "anthro", "male", "clothed"], "probe": ["anthro", "canid", "solo"], "t1": 2.31, "t2": 2.1, "t3": 4.57, "t3s": 1.52, "t3p": 2.17, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=23 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=5"]}
|
| 9 |
+
{"id": 1325009, "n_gt": 22, "n_retrieved": 18, "n_selected": 22, "n_implied": 6, "n_structural": 5, "n_probe": 3, "ret_R": 0.2273, "P": 0.5909, "R": 0.5909, "F1": 0.5909, "leaf_P": 0.2667, "leaf_R": 0.3333, "leaf_F1": 0.2963, "n_leaf_sel": 15, "n_leaf_gt": 12, "ret_P": 0.2778, "sel_given_ret": 2.6, "over_sel": 1.0, "why": {"explicit": 6, "strong_implied": 4}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 14, "attempts_by_n_local": {"18": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.5909, "gen_R": 0.5909, "gen_F1": 0.5909, "missed": ["chest_tuft", "countershading", "muscular", "muscular_anthro", "muscular_male", "pantherine", "tiger", "topless", "tuft"], "extra": ["countershade_body", "looking_at_viewer", "muscular_arms", "pose", "striped_body", "striped_fur", "white_chest", "yellow_bottomwear", "yellow_clothing"], "ground_truth_tags": ["anthro", "blue_eyes", "bottomwear", "chest_tuft", "clothed", "clothing", "countershading", "felid", "fur", "hand_on_head", "male", "mammal", "muscular", "muscular_anthro", "muscular_male", "pantherine", "shorts", "solo", "stripes", "tiger", "topless", "tuft"], "selected_tags": ["anthro", "blue_eyes", "bottomwear", "clothed", "clothing", "countershade_body", "felid", "fur", "hand_on_head", "looking_at_viewer", "male", "mammal", "muscular_arms", "pose", "shorts", "solo", "striped_body", "striped_fur", "stripes", "white_chest", "yellow_bottomwear", "yellow_clothing"], "stage3_selected": ["blue_eyes", "countershade_body", "fur", "hand_on_head", "muscular_arms", "pose", "shorts", "striped_fur", "white_chest", "yellow_bottomwear"], "stage3_selected_scores": {"blue_eyes": 0.5717, "shorts": 0.5785, "striped_fur": 0.6385, "hand_on_head": 0.5932, "yellow_bottomwear": 0.652, "white_chest": 0.9198, "fur": 0.5838, "pose": 0.6235, "muscular_arms": 0.7948, "countershade_body": 0.8754}, "stage3_selected_ranks": {"blue_eyes": 14, "shorts": 13, "striped_fur": 8, "hand_on_head": 11, "yellow_bottomwear": 7, "white_chest": 2, "fur": 12, "pose": 9, "muscular_arms": 4, "countershade_body": 3}, "stage3_selected_phrase_ranks": {"blue_eyes": 1, "shorts": 1, "striped_fur": 1, "hand_on_head": 1, "yellow_bottomwear": 1, "white_chest": 1, "fur": 1, "pose": 1, "muscular_arms": 1, "countershade_body": 1}, "extra_evidence": {"countershade_body": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.8754}, "looking_at_viewer": {"source": "structural"}, "muscular_arms": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.7948}, "pose": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.6235}, "striped_body": {"source": "implied"}, "striped_fur": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6385}, "white_chest": {"source": "stage3", "why": "explicit", "retrieval_score": 0.9198}, "yellow_bottomwear": {"source": "stage3", "why": "explicit", "retrieval_score": 0.652}, "yellow_clothing": {"source": "implied"}}, "structural": ["solo", "anthro", "male", "clothed", "looking_at_viewer"], "probe": ["anthro", "felid", "solo"], "t1": 2.58, "t2": 1.76, "t3": 3.35, "t3s": 1.91, "t3p": 2.8, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=18 entity=0 copyright_filtered=1 generic_char_to_general=0 unknown_type=2"]}
|
| 10 |
+
{"id": 3285630, "n_gt": 12, "n_retrieved": 16, "n_selected": 26, "n_implied": 8, "n_structural": 4, "n_probe": 5, "ret_R": 0.1667, "P": 0.3846, "R": 0.8333, "F1": 0.5263, "leaf_P": 0.3333, "leaf_R": 0.5556, "leaf_F1": 0.4167, "n_leaf_sel": 15, "n_leaf_gt": 9, "ret_P": 0.125, "sel_given_ret": 5.0, "over_sel": 2.17, "why": {"explicit": 12}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 18, "attempts_by_n_local": {"19": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.3846, "gen_R": 0.8333, "gen_F1": 0.5263, "missed": ["alpha_channel", "fingers"], "extra": ["black_body", "black_fur", "business_attire", "formal", "hair_bun", "holding_mug", "holding_object", "mug", "necktie", "shirt", "teal_shirt", "teal_topwear", "text", "topwear", "vest", "white_necktie"], "ground_truth_tags": ["alpha_channel", "anthro", "clothed", "clothing", "felid", "feline", "fingers", "fur", "hair", "male", "mammal", "solo"], "selected_tags": ["anthro", "black_body", "black_fur", "business_attire", "clothed", "clothing", "felid", "feline", "formal", "fur", "hair", "hair_bun", "holding_mug", "holding_object", "male", "mammal", "mug", "necktie", "shirt", "solo", "teal_shirt", "teal_topwear", "text", "topwear", "vest", "white_necktie"], "stage3_selected": ["black_fur", "business_attire", "feline", "formal", "hair_bun", "holding_mug", "necktie", "shirt", "simple_background", "teal_shirt", "vest", "white_necktie"], "stage3_selected_scores": {"simple_background": 0.6978, "feline": 0.7062, "shirt": 0.7998, "black_fur": 0.7183, "necktie": 0.7314, "vest": 0.8403, "hair_bun": 0.6926, "holding_mug": 0.916, "formal": 0.5993, "business_attire": 0.5558, "teal_shirt": 0.7474, "white_necktie": 0.6418}, "stage3_selected_ranks": {"simple_background": 11, "feline": 10, "shirt": 5, "black_fur": 8, "necktie": 7, "vest": 3, "hair_bun": 12, "holding_mug": 1, "formal": 16, "business_attire": 18, "teal_shirt": 6, "white_necktie": 14}, "stage3_selected_phrase_ranks": {"simple_background": 1, "feline": 1, "shirt": 1, "black_fur": 1, "necktie": 1, "vest": 1, "hair_bun": 1, "holding_mug": 1, "formal": 1, "business_attire": 1, "teal_shirt": 1, "white_necktie": 1}, "extra_evidence": {"black_body": {"source": "implied"}, "black_fur": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7183}, "business_attire": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5558}, "formal": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5993}, "hair_bun": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6926}, "holding_mug": {"source": "stage3", "why": "explicit", "retrieval_score": 0.916}, "holding_object": {"source": "implied"}, "mug": {"source": "implied"}, "necktie": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7314}, "shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7998}, "teal_shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7474}, "teal_topwear": {"source": "implied"}, "text": {"source": "probe"}, "topwear": {"source": "implied"}, "vest": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8403}, "white_necktie": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6418}}, "structural": ["solo", "anthro", "male", "clothed"], "probe": ["clothing", "anthro", "text", "felid", "solo"], "t1": 1.81, "t2": 1.64, "t3": 1.67, "t3s": 1.52, "t3p": 2.57, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=19 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=2"]}
|
| 11 |
+
{"id": 260449, "n_gt": 14, "n_retrieved": 21, "n_selected": 21, "n_implied": 4, "n_structural": 6, "n_probe": 6, "ret_R": 0.5, "P": 0.4762, "R": 0.7143, "F1": 0.5714, "leaf_P": 0.3077, "leaf_R": 0.4, "leaf_F1": 0.3478, "n_leaf_sel": 13, "n_leaf_gt": 10, "ret_P": 0.3333, "sel_given_ret": 1.4286, "over_sel": 1.5, "why": {"strong_implied": 10}, "stage3_diag": {"mode": "chunked_map_union", "chunk_strategy": "interleave", "chunk_passes": 1, "chunk_shuffle_within_call": false, "calls_total": 1, "calls_with_selection": 1, "calls_exhausted_retries": 0, "attempts_total": 1, "attempt_errors": 0, "attempt_parse_fail": 0, "attempt_parse_ok": 1, "invalid_items_total": 0, "oob_indices_total": 0, "dupe_indices_total": 0, "kept_total": 14, "attempts_by_n_local": {"24": {"attempts": 1, "parse_ok": 1, "parse_fail": 0, "errors": 0}}, "attempt_failure_rate": 0.0, "call_exhaustion_rate": 0.0}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.4762, "gen_R": 0.7143, "gen_F1": 0.5714, "missed": ["fur", "hair", "human", "male"], "extra": ["anthro", "bottomwear", "duo", "feral", "grin", "loincloth", "mischievous", "raised_arms", "smile", "topless", "trio"], "ground_truth_tags": ["ape", "bear", "clothed", "clothing", "dancing", "fur", "group", "hair", "haplorhine", "human", "looking_at_viewer", "male", "mammal", "primate"], "selected_tags": ["anthro", "ape", "bear", "bottomwear", "clothed", "clothing", "dancing", "duo", "feral", "grin", "group", "haplorhine", "loincloth", "looking_at_viewer", "mammal", "mischievous", "primate", "raised_arms", "smile", "topless", "trio"], "stage3_selected": ["ape", "bear", "dancing", "grin", "loincloth", "looking_at_viewer", "mischievous", "primate", "raised_arms", "simple_background"], "stage3_selected_scores": {"simple_background": 0.5541, "looking_at_viewer": 0.5522, "bear": 0.5757, "grin": 0.5711, "primate": 0.8911, "loincloth": 0.5719, "dancing": 0.562, "ape": 0.9769, "raised_arms": 0.551, "mischievous": 0.545}, "stage3_selected_ranks": {"simple_background": 8, "looking_at_viewer": 9, "bear": 3, "grin": 5, "primate": 2, "loincloth": 4, "dancing": 7, "ape": 1, "raised_arms": 10, "mischievous": 12}, "stage3_selected_phrase_ranks": {"simple_background": 1, "looking_at_viewer": 1, "bear": 1, "grin": 1, "primate": 1, "loincloth": 1, "dancing": 1, "ape": 1, "raised_arms": 1, "mischievous": 1}, "extra_evidence": {"anthro": {"source": "structural"}, "bottomwear": {"source": "implied"}, "duo": {"source": "probe"}, "feral": {"source": "structural"}, "grin": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.5711}, "loincloth": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.5719}, "mischievous": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.545}, "raised_arms": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.551}, "smile": {"source": "implied"}, "topless": {"source": "structural"}, "trio": {"source": "structural"}}, "structural": ["trio", "anthro", "feral", "clothed", "topless", "looking_at_viewer"], "probe": ["clothing", "simple_background", "anthro", "duo", "group", "bear"], "t1": 2.67, "t2": 2.18, "t3": 5.74, "t3s": 1.29, "t3p": 1.13, "err": null, "issues": ["Stage3 split: general=13 entity=0 copyright_filtered=0 generic_char_to_general=0 unknown_type=0", "Stage3 split: general=24 entity=0 copyright_filtered=0 generic_char_to_general=1 unknown_type=2"]}
|
data/runtime_debug/eval_no_why_explicit_instruction_n10_20260303T005633Z.json
ADDED
|
@@ -0,0 +1,222 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"timestamp_utc": "2026-03-03T00:56:33Z",
|
| 3 |
+
"config": {
|
| 4 |
+
"dataset": "data\\eval_samples\\e621_sfw_sample_1000_seed123_buffer10000_caption_evident.jsonl",
|
| 5 |
+
"n": 10,
|
| 6 |
+
"mode": "chunked_map_union",
|
| 7 |
+
"chunk_size": 60,
|
| 8 |
+
"retrieval_per_phrase_k": 2,
|
| 9 |
+
"retrieval_per_phrase_final_k": 1,
|
| 10 |
+
"selection_per_phrase_k": 2,
|
| 11 |
+
"selection_schema": "no_why_explicit_instruction",
|
| 12 |
+
"structural": true,
|
| 13 |
+
"probe": true,
|
| 14 |
+
"expand_implications": true
|
| 15 |
+
},
|
| 16 |
+
"summary": {
|
| 17 |
+
"n": 10,
|
| 18 |
+
"avg_P": 0.49907896773114163,
|
| 19 |
+
"avg_R": 0.7686679986679986,
|
| 20 |
+
"avg_F1": 0.5949358584013316,
|
| 21 |
+
"avg_t1": 9.02932870388031,
|
| 22 |
+
"avg_t2": 2.8720282554626464,
|
| 23 |
+
"avg_t3": 4.9426744937896725,
|
| 24 |
+
"stage3_calls_total": 11,
|
| 25 |
+
"stage3_attempts_total": 11,
|
| 26 |
+
"stage3_parse_fail_total": 0,
|
| 27 |
+
"stage3_errors_total": 0,
|
| 28 |
+
"stage3_calls_exhausted": 0
|
| 29 |
+
},
|
| 30 |
+
"results": [
|
| 31 |
+
{
|
| 32 |
+
"id": 3285630,
|
| 33 |
+
"P": 0.4642857142857143,
|
| 34 |
+
"R": 0.9285714285714286,
|
| 35 |
+
"F1": 0.6190476190476191,
|
| 36 |
+
"n_gt": 14,
|
| 37 |
+
"n_sel": 28,
|
| 38 |
+
"t1": 9.331122159957886,
|
| 39 |
+
"t2": 13.36060380935669,
|
| 40 |
+
"t3": 6.582068920135498,
|
| 41 |
+
"stage3_diag": {
|
| 42 |
+
"calls_total": 1,
|
| 43 |
+
"calls_exhausted_retries": 0,
|
| 44 |
+
"attempts_total": 1,
|
| 45 |
+
"attempt_errors": 0,
|
| 46 |
+
"attempt_parse_fail": 0,
|
| 47 |
+
"attempt_parse_ok": 1
|
| 48 |
+
}
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"id": 260449,
|
| 52 |
+
"P": 0.52,
|
| 53 |
+
"R": 0.8666666666666667,
|
| 54 |
+
"F1": 0.65,
|
| 55 |
+
"n_gt": 15,
|
| 56 |
+
"n_sel": 25,
|
| 57 |
+
"t1": 8.170901536941528,
|
| 58 |
+
"t2": 2.0571630001068115,
|
| 59 |
+
"t3": 4.041555881500244,
|
| 60 |
+
"stage3_diag": {
|
| 61 |
+
"calls_total": 1,
|
| 62 |
+
"calls_exhausted_retries": 0,
|
| 63 |
+
"attempts_total": 1,
|
| 64 |
+
"attempt_errors": 0,
|
| 65 |
+
"attempt_parse_fail": 0,
|
| 66 |
+
"attempt_parse_ok": 1
|
| 67 |
+
}
|
| 68 |
+
},
|
| 69 |
+
{
|
| 70 |
+
"id": 1078019,
|
| 71 |
+
"P": 0.6363636363636364,
|
| 72 |
+
"R": 1.0,
|
| 73 |
+
"F1": 0.7777777777777778,
|
| 74 |
+
"n_gt": 14,
|
| 75 |
+
"n_sel": 22,
|
| 76 |
+
"t1": 12.34386157989502,
|
| 77 |
+
"t2": 1.5099613666534424,
|
| 78 |
+
"t3": 1.325575828552246,
|
| 79 |
+
"stage3_diag": {
|
| 80 |
+
"calls_total": 1,
|
| 81 |
+
"calls_exhausted_retries": 0,
|
| 82 |
+
"attempts_total": 1,
|
| 83 |
+
"attempt_errors": 0,
|
| 84 |
+
"attempt_parse_fail": 0,
|
| 85 |
+
"attempt_parse_ok": 1
|
| 86 |
+
}
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"id": 1624724,
|
| 90 |
+
"P": 0.3333333333333333,
|
| 91 |
+
"R": 0.3333333333333333,
|
| 92 |
+
"F1": 0.3333333333333333,
|
| 93 |
+
"n_gt": 6,
|
| 94 |
+
"n_sel": 6,
|
| 95 |
+
"t1": 13.713356494903564,
|
| 96 |
+
"t2": 0.1162874698638916,
|
| 97 |
+
"t3": 1.2268812656402588,
|
| 98 |
+
"stage3_diag": {
|
| 99 |
+
"calls_total": 1,
|
| 100 |
+
"calls_exhausted_retries": 0,
|
| 101 |
+
"attempts_total": 1,
|
| 102 |
+
"attempt_errors": 0,
|
| 103 |
+
"attempt_parse_fail": 0,
|
| 104 |
+
"attempt_parse_ok": 1
|
| 105 |
+
}
|
| 106 |
+
},
|
| 107 |
+
{
|
| 108 |
+
"id": 1325009,
|
| 109 |
+
"P": 0.5833333333333334,
|
| 110 |
+
"R": 0.6363636363636364,
|
| 111 |
+
"F1": 0.6086956521739131,
|
| 112 |
+
"n_gt": 22,
|
| 113 |
+
"n_sel": 24,
|
| 114 |
+
"t1": 8.598191976547241,
|
| 115 |
+
"t2": 1.8005964756011963,
|
| 116 |
+
"t3": 4.540030479431152,
|
| 117 |
+
"stage3_diag": {
|
| 118 |
+
"calls_total": 1,
|
| 119 |
+
"calls_exhausted_retries": 0,
|
| 120 |
+
"attempts_total": 1,
|
| 121 |
+
"attempt_errors": 0,
|
| 122 |
+
"attempt_parse_fail": 0,
|
| 123 |
+
"attempt_parse_ok": 1
|
| 124 |
+
}
|
| 125 |
+
},
|
| 126 |
+
{
|
| 127 |
+
"id": 1023509,
|
| 128 |
+
"P": 0.391304347826087,
|
| 129 |
+
"R": 0.6923076923076923,
|
| 130 |
+
"F1": 0.5,
|
| 131 |
+
"n_gt": 13,
|
| 132 |
+
"n_sel": 23,
|
| 133 |
+
"t1": 4.089767932891846,
|
| 134 |
+
"t2": 1.7381300926208496,
|
| 135 |
+
"t3": 3.972919464111328,
|
| 136 |
+
"stage3_diag": {
|
| 137 |
+
"calls_total": 1,
|
| 138 |
+
"calls_exhausted_retries": 0,
|
| 139 |
+
"attempts_total": 1,
|
| 140 |
+
"attempt_errors": 0,
|
| 141 |
+
"attempt_parse_fail": 0,
|
| 142 |
+
"attempt_parse_ok": 1
|
| 143 |
+
}
|
| 144 |
+
},
|
| 145 |
+
{
|
| 146 |
+
"id": 335343,
|
| 147 |
+
"P": 0.35714285714285715,
|
| 148 |
+
"R": 0.7142857142857143,
|
| 149 |
+
"F1": 0.4761904761904762,
|
| 150 |
+
"n_gt": 14,
|
| 151 |
+
"n_sel": 28,
|
| 152 |
+
"t1": 4.67448353767395,
|
| 153 |
+
"t2": 2.3359692096710205,
|
| 154 |
+
"t3": 8.36922836303711,
|
| 155 |
+
"stage3_diag": {
|
| 156 |
+
"calls_total": 1,
|
| 157 |
+
"calls_exhausted_retries": 0,
|
| 158 |
+
"attempts_total": 1,
|
| 159 |
+
"attempt_errors": 0,
|
| 160 |
+
"attempt_parse_fail": 0,
|
| 161 |
+
"attempt_parse_ok": 1
|
| 162 |
+
}
|
| 163 |
+
},
|
| 164 |
+
{
|
| 165 |
+
"id": 17482,
|
| 166 |
+
"P": 0.5357142857142857,
|
| 167 |
+
"R": 0.6818181818181818,
|
| 168 |
+
"F1": 0.6,
|
| 169 |
+
"n_gt": 22,
|
| 170 |
+
"n_sel": 28,
|
| 171 |
+
"t1": 4.954836368560791,
|
| 172 |
+
"t2": 1.8676352500915527,
|
| 173 |
+
"t3": 5.0674896240234375,
|
| 174 |
+
"stage3_diag": {
|
| 175 |
+
"calls_total": 1,
|
| 176 |
+
"calls_exhausted_retries": 0,
|
| 177 |
+
"attempts_total": 1,
|
| 178 |
+
"attempt_errors": 0,
|
| 179 |
+
"attempt_parse_fail": 0,
|
| 180 |
+
"attempt_parse_ok": 1
|
| 181 |
+
}
|
| 182 |
+
},
|
| 183 |
+
{
|
| 184 |
+
"id": 2021552,
|
| 185 |
+
"P": 0.7407407407407407,
|
| 186 |
+
"R": 0.8333333333333334,
|
| 187 |
+
"F1": 0.7843137254901961,
|
| 188 |
+
"n_gt": 24,
|
| 189 |
+
"n_sel": 27,
|
| 190 |
+
"t1": 8.012149810791016,
|
| 191 |
+
"t2": 1.6340866088867188,
|
| 192 |
+
"t3": 5.091134548187256,
|
| 193 |
+
"stage3_diag": {
|
| 194 |
+
"calls_total": 1,
|
| 195 |
+
"calls_exhausted_retries": 0,
|
| 196 |
+
"attempts_total": 1,
|
| 197 |
+
"attempt_errors": 0,
|
| 198 |
+
"attempt_parse_fail": 0,
|
| 199 |
+
"attempt_parse_ok": 1
|
| 200 |
+
}
|
| 201 |
+
},
|
| 202 |
+
{
|
| 203 |
+
"id": 2034167,
|
| 204 |
+
"P": 0.42857142857142855,
|
| 205 |
+
"R": 1.0,
|
| 206 |
+
"F1": 0.6,
|
| 207 |
+
"n_gt": 12,
|
| 208 |
+
"n_sel": 28,
|
| 209 |
+
"t1": 16.40461564064026,
|
| 210 |
+
"t2": 2.299849271774292,
|
| 211 |
+
"t3": 9.209860563278198,
|
| 212 |
+
"stage3_diag": {
|
| 213 |
+
"calls_total": 2,
|
| 214 |
+
"calls_exhausted_retries": 0,
|
| 215 |
+
"attempts_total": 2,
|
| 216 |
+
"attempt_errors": 0,
|
| 217 |
+
"attempt_parse_fail": 0,
|
| 218 |
+
"attempt_parse_ok": 2
|
| 219 |
+
}
|
| 220 |
+
}
|
| 221 |
+
]
|
| 222 |
+
}
|
data/runtime_debug/eval_no_why_n10_20260302T210359Z.json
ADDED
|
@@ -0,0 +1,308 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"timestamp_utc": "2026-03-02T21:03:59Z",
|
| 3 |
+
"config": {
|
| 4 |
+
"dataset": "data\\eval_samples\\e621_sfw_sample_1000_seed123_buffer10000_caption_evident.jsonl",
|
| 5 |
+
"n": 10,
|
| 6 |
+
"mode": "chunked_map_union",
|
| 7 |
+
"chunk_size": 60,
|
| 8 |
+
"retrieval_per_phrase_k": 2,
|
| 9 |
+
"retrieval_per_phrase_final_k": 1,
|
| 10 |
+
"selection_per_phrase_k": 2,
|
| 11 |
+
"selection_schema": "no_why",
|
| 12 |
+
"structural": true,
|
| 13 |
+
"probe": true,
|
| 14 |
+
"expand_implications": true
|
| 15 |
+
},
|
| 16 |
+
"summary": {
|
| 17 |
+
"n": 10,
|
| 18 |
+
"avg_P": 0.4951581196581197,
|
| 19 |
+
"avg_R": 0.7464818514818515,
|
| 20 |
+
"avg_F1": 0.5645442382676424,
|
| 21 |
+
"avg_t1": 9.060380721092224,
|
| 22 |
+
"avg_t2": 1.6346927881240845,
|
| 23 |
+
"avg_t3": 2.9281704664230346,
|
| 24 |
+
"stage3_calls_total": 11,
|
| 25 |
+
"stage3_attempts_total": 11,
|
| 26 |
+
"stage3_parse_fail_total": 0,
|
| 27 |
+
"stage3_errors_total": 0,
|
| 28 |
+
"stage3_calls_exhausted": 0
|
| 29 |
+
},
|
| 30 |
+
"results": [
|
| 31 |
+
{
|
| 32 |
+
"id": 3285630,
|
| 33 |
+
"P": 0.875,
|
| 34 |
+
"R": 0.5,
|
| 35 |
+
"F1": 0.6363636363636364,
|
| 36 |
+
"n_gt": 14,
|
| 37 |
+
"n_sel": 8,
|
| 38 |
+
"t1": 9.5723135471344,
|
| 39 |
+
"t2": 3.2603888511657715,
|
| 40 |
+
"t3": 1.1662352085113525,
|
| 41 |
+
"stage3_diag": {
|
| 42 |
+
"calls_total": 1,
|
| 43 |
+
"calls_exhausted_retries": 0,
|
| 44 |
+
"attempts_total": 1,
|
| 45 |
+
"attempt_errors": 0,
|
| 46 |
+
"attempt_parse_fail": 0,
|
| 47 |
+
"attempt_parse_ok": 1,
|
| 48 |
+
"attempts_by_n_local": {
|
| 49 |
+
"20": {
|
| 50 |
+
"attempts": 1,
|
| 51 |
+
"parse_ok": 1,
|
| 52 |
+
"parse_fail": 0,
|
| 53 |
+
"errors": 0
|
| 54 |
+
}
|
| 55 |
+
}
|
| 56 |
+
}
|
| 57 |
+
},
|
| 58 |
+
{
|
| 59 |
+
"id": 260449,
|
| 60 |
+
"P": 0.43333333333333335,
|
| 61 |
+
"R": 0.8666666666666667,
|
| 62 |
+
"F1": 0.5777777777777778,
|
| 63 |
+
"n_gt": 15,
|
| 64 |
+
"n_sel": 30,
|
| 65 |
+
"t1": 7.419761419296265,
|
| 66 |
+
"t2": 1.5911827087402344,
|
| 67 |
+
"t3": 2.060990333557129,
|
| 68 |
+
"stage3_diag": {
|
| 69 |
+
"calls_total": 1,
|
| 70 |
+
"calls_exhausted_retries": 0,
|
| 71 |
+
"attempts_total": 1,
|
| 72 |
+
"attempt_errors": 0,
|
| 73 |
+
"attempt_parse_fail": 0,
|
| 74 |
+
"attempt_parse_ok": 1,
|
| 75 |
+
"attempts_by_n_local": {
|
| 76 |
+
"20": {
|
| 77 |
+
"attempts": 1,
|
| 78 |
+
"parse_ok": 1,
|
| 79 |
+
"parse_fail": 0,
|
| 80 |
+
"errors": 0
|
| 81 |
+
}
|
| 82 |
+
}
|
| 83 |
+
}
|
| 84 |
+
},
|
| 85 |
+
{
|
| 86 |
+
"id": 1078019,
|
| 87 |
+
"P": 0.5555555555555556,
|
| 88 |
+
"R": 0.7142857142857143,
|
| 89 |
+
"F1": 0.6250000000000001,
|
| 90 |
+
"n_gt": 14,
|
| 91 |
+
"n_sel": 18,
|
| 92 |
+
"t1": 8.502500295639038,
|
| 93 |
+
"t2": 1.3456428050994873,
|
| 94 |
+
"t3": 2.0789365768432617,
|
| 95 |
+
"stage3_diag": {
|
| 96 |
+
"calls_total": 1,
|
| 97 |
+
"calls_exhausted_retries": 0,
|
| 98 |
+
"attempts_total": 1,
|
| 99 |
+
"attempt_errors": 0,
|
| 100 |
+
"attempt_parse_fail": 0,
|
| 101 |
+
"attempt_parse_ok": 1,
|
| 102 |
+
"attempts_by_n_local": {
|
| 103 |
+
"16": {
|
| 104 |
+
"attempts": 1,
|
| 105 |
+
"parse_ok": 1,
|
| 106 |
+
"parse_fail": 0,
|
| 107 |
+
"errors": 0
|
| 108 |
+
}
|
| 109 |
+
}
|
| 110 |
+
}
|
| 111 |
+
},
|
| 112 |
+
{
|
| 113 |
+
"id": 1624724,
|
| 114 |
+
"P": 0.4,
|
| 115 |
+
"R": 1.0,
|
| 116 |
+
"F1": 0.5714285714285715,
|
| 117 |
+
"n_gt": 6,
|
| 118 |
+
"n_sel": 15,
|
| 119 |
+
"t1": 5.102054595947266,
|
| 120 |
+
"t2": 1.01362943649292,
|
| 121 |
+
"t3": 2.029695749282837,
|
| 122 |
+
"stage3_diag": {
|
| 123 |
+
"calls_total": 1,
|
| 124 |
+
"calls_exhausted_retries": 0,
|
| 125 |
+
"attempts_total": 1,
|
| 126 |
+
"attempt_errors": 0,
|
| 127 |
+
"attempt_parse_fail": 0,
|
| 128 |
+
"attempt_parse_ok": 1,
|
| 129 |
+
"attempts_by_n_local": {
|
| 130 |
+
"14": {
|
| 131 |
+
"attempts": 1,
|
| 132 |
+
"parse_ok": 1,
|
| 133 |
+
"parse_fail": 0,
|
| 134 |
+
"errors": 0
|
| 135 |
+
}
|
| 136 |
+
}
|
| 137 |
+
}
|
| 138 |
+
},
|
| 139 |
+
{
|
| 140 |
+
"id": 1325009,
|
| 141 |
+
"P": 0.48,
|
| 142 |
+
"R": 0.5454545454545454,
|
| 143 |
+
"F1": 0.5106382978723404,
|
| 144 |
+
"n_gt": 22,
|
| 145 |
+
"n_sel": 25,
|
| 146 |
+
"t1": 10.626267194747925,
|
| 147 |
+
"t2": 1.750549554824829,
|
| 148 |
+
"t3": 2.414820432662964,
|
| 149 |
+
"stage3_diag": {
|
| 150 |
+
"calls_total": 1,
|
| 151 |
+
"calls_exhausted_retries": 0,
|
| 152 |
+
"attempts_total": 1,
|
| 153 |
+
"attempt_errors": 0,
|
| 154 |
+
"attempt_parse_fail": 0,
|
| 155 |
+
"attempt_parse_ok": 1,
|
| 156 |
+
"attempts_by_n_local": {
|
| 157 |
+
"23": {
|
| 158 |
+
"attempts": 1,
|
| 159 |
+
"parse_ok": 1,
|
| 160 |
+
"parse_fail": 0,
|
| 161 |
+
"errors": 0
|
| 162 |
+
}
|
| 163 |
+
}
|
| 164 |
+
}
|
| 165 |
+
},
|
| 166 |
+
{
|
| 167 |
+
"id": 1023509,
|
| 168 |
+
"P": 0.23076923076923078,
|
| 169 |
+
"R": 0.6923076923076923,
|
| 170 |
+
"F1": 0.34615384615384615,
|
| 171 |
+
"n_gt": 13,
|
| 172 |
+
"n_sel": 39,
|
| 173 |
+
"t1": 15.900179386138916,
|
| 174 |
+
"t2": 1.3188576698303223,
|
| 175 |
+
"t3": 3.010589361190796,
|
| 176 |
+
"stage3_diag": {
|
| 177 |
+
"calls_total": 1,
|
| 178 |
+
"calls_exhausted_retries": 0,
|
| 179 |
+
"attempts_total": 1,
|
| 180 |
+
"attempt_errors": 0,
|
| 181 |
+
"attempt_parse_fail": 0,
|
| 182 |
+
"attempt_parse_ok": 1,
|
| 183 |
+
"attempts_by_n_local": {
|
| 184 |
+
"20": {
|
| 185 |
+
"attempts": 1,
|
| 186 |
+
"parse_ok": 1,
|
| 187 |
+
"parse_fail": 0,
|
| 188 |
+
"errors": 0
|
| 189 |
+
}
|
| 190 |
+
}
|
| 191 |
+
}
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"id": 335343,
|
| 195 |
+
"P": 0.4,
|
| 196 |
+
"R": 0.7142857142857143,
|
| 197 |
+
"F1": 0.5128205128205129,
|
| 198 |
+
"n_gt": 14,
|
| 199 |
+
"n_sel": 25,
|
| 200 |
+
"t1": 6.280893087387085,
|
| 201 |
+
"t2": 1.8548295497894287,
|
| 202 |
+
"t3": 2.6963677406311035,
|
| 203 |
+
"stage3_diag": {
|
| 204 |
+
"calls_total": 1,
|
| 205 |
+
"calls_exhausted_retries": 0,
|
| 206 |
+
"attempts_total": 1,
|
| 207 |
+
"attempt_errors": 0,
|
| 208 |
+
"attempt_parse_fail": 0,
|
| 209 |
+
"attempt_parse_ok": 1,
|
| 210 |
+
"attempts_by_n_local": {
|
| 211 |
+
"23": {
|
| 212 |
+
"attempts": 1,
|
| 213 |
+
"parse_ok": 1,
|
| 214 |
+
"parse_fail": 0,
|
| 215 |
+
"errors": 0
|
| 216 |
+
}
|
| 217 |
+
}
|
| 218 |
+
}
|
| 219 |
+
},
|
| 220 |
+
{
|
| 221 |
+
"id": 17482,
|
| 222 |
+
"P": 0.5769230769230769,
|
| 223 |
+
"R": 0.6818181818181818,
|
| 224 |
+
"F1": 0.6249999999999999,
|
| 225 |
+
"n_gt": 22,
|
| 226 |
+
"n_sel": 26,
|
| 227 |
+
"t1": 3.7739036083221436,
|
| 228 |
+
"t2": 1.246765375137329,
|
| 229 |
+
"t3": 2.3435256481170654,
|
| 230 |
+
"stage3_diag": {
|
| 231 |
+
"calls_total": 1,
|
| 232 |
+
"calls_exhausted_retries": 0,
|
| 233 |
+
"attempts_total": 1,
|
| 234 |
+
"attempt_errors": 0,
|
| 235 |
+
"attempt_parse_fail": 0,
|
| 236 |
+
"attempt_parse_ok": 1,
|
| 237 |
+
"attempts_by_n_local": {
|
| 238 |
+
"18": {
|
| 239 |
+
"attempts": 1,
|
| 240 |
+
"parse_ok": 1,
|
| 241 |
+
"parse_fail": 0,
|
| 242 |
+
"errors": 0
|
| 243 |
+
}
|
| 244 |
+
}
|
| 245 |
+
}
|
| 246 |
+
},
|
| 247 |
+
{
|
| 248 |
+
"id": 2021552,
|
| 249 |
+
"P": 0.6875,
|
| 250 |
+
"R": 0.9166666666666666,
|
| 251 |
+
"F1": 0.7857142857142857,
|
| 252 |
+
"n_gt": 24,
|
| 253 |
+
"n_sel": 32,
|
| 254 |
+
"t1": 11.655076026916504,
|
| 255 |
+
"t2": 1.3419077396392822,
|
| 256 |
+
"t3": 3.532601833343506,
|
| 257 |
+
"stage3_diag": {
|
| 258 |
+
"calls_total": 1,
|
| 259 |
+
"calls_exhausted_retries": 0,
|
| 260 |
+
"attempts_total": 1,
|
| 261 |
+
"attempt_errors": 0,
|
| 262 |
+
"attempt_parse_fail": 0,
|
| 263 |
+
"attempt_parse_ok": 1,
|
| 264 |
+
"attempts_by_n_local": {
|
| 265 |
+
"18": {
|
| 266 |
+
"attempts": 1,
|
| 267 |
+
"parse_ok": 1,
|
| 268 |
+
"parse_fail": 0,
|
| 269 |
+
"errors": 0
|
| 270 |
+
}
|
| 271 |
+
}
|
| 272 |
+
}
|
| 273 |
+
},
|
| 274 |
+
{
|
| 275 |
+
"id": 2034167,
|
| 276 |
+
"P": 0.3125,
|
| 277 |
+
"R": 0.8333333333333334,
|
| 278 |
+
"F1": 0.45454545454545453,
|
| 279 |
+
"n_gt": 12,
|
| 280 |
+
"n_sel": 32,
|
| 281 |
+
"t1": 11.7708580493927,
|
| 282 |
+
"t2": 1.6231741905212402,
|
| 283 |
+
"t3": 7.947941780090332,
|
| 284 |
+
"stage3_diag": {
|
| 285 |
+
"calls_total": 2,
|
| 286 |
+
"calls_exhausted_retries": 0,
|
| 287 |
+
"attempts_total": 2,
|
| 288 |
+
"attempt_errors": 0,
|
| 289 |
+
"attempt_parse_fail": 0,
|
| 290 |
+
"attempt_parse_ok": 2,
|
| 291 |
+
"attempts_by_n_local": {
|
| 292 |
+
"23": {
|
| 293 |
+
"attempts": 1,
|
| 294 |
+
"parse_ok": 1,
|
| 295 |
+
"parse_fail": 0,
|
| 296 |
+
"errors": 0
|
| 297 |
+
},
|
| 298 |
+
"1": {
|
| 299 |
+
"attempts": 1,
|
| 300 |
+
"parse_ok": 1,
|
| 301 |
+
"parse_fail": 0,
|
| 302 |
+
"errors": 0
|
| 303 |
+
}
|
| 304 |
+
}
|
| 305 |
+
}
|
| 306 |
+
}
|
| 307 |
+
]
|
| 308 |
+
}
|
data/runtime_debug/false_positive_case_review_looking_anthro_bear_20260304.md
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# False-Positive Review: looking_at_viewer, anthro, bear
|
| 2 |
+
|
| 3 |
+
- Source run detail: `data\eval_results\eval_caption_cogvlm_n10_seed42_20260303_071022_detail.jsonl`
|
| 4 |
+
- Source eval set: `data\eval_samples\e621_sfw_sample_1000_seed123_buffer10000_caption_evident.jsonl`
|
| 5 |
+
|
| 6 |
+
## Tag Prompt Definitions Used
|
| 7 |
+
- `looking_at_viewer` structural definition: Select only when explicit gaze wording appears (looking at viewer, looking at camera, looking directly at us, direct eye contact). Do not infer from front view, pose, or expression.
|
| 8 |
+
- `looking_at_viewer` probe display text: looking at viewer
|
| 9 |
+
- `anthro` structural definition: An animal/furry character with BOTH human-like body plan (upright stance, human-like torso/arms/hands) and clear animal traits (fur, muzzle, tail, animal ears, paws, or species cues).
|
| 10 |
+
- `anthro` probe display text: anthro - This tag is short for "anthropomorphic animals".
|
| 11 |
+
- `bear` structural definition: (not in structural config)
|
| 12 |
+
- `bear` probe display text: bear
|
| 13 |
+
|
| 14 |
+
## Cases: false-positive `looking_at_viewer`
|
| 15 |
+
### sample_id `17482`
|
| 16 |
+
- GT has `looking_at_viewer`: no
|
| 17 |
+
- Selected source for `looking_at_viewer`: structural
|
| 18 |
+
- Full caption: The image showcases an anthropomorphic creature, possibly a wolf or a dog, with a spade tail and claws, playing a bass guitar. The creature is depicted in a dynamic pose, with its hair flowing and fingers poised on the guitar strings. The background is a blend of pastel colors, giving the artwork a dreamy and ethereal feel. The creature's attire appears torn, and it holds the guitar with a sense of passion and dedication.
|
| 19 |
+
- Rewrite phrases: ['anthropomorphic creature', 'wolf or dog', 'spade tail', 'claws', 'playing bass guitar', 'dynamic pose', 'flowing hair', 'fingers on strings', 'pastel background', 'dreamy atmosphere', 'torn attire', 'passionate expression']
|
| 20 |
+
- Structural tags: ['solo', 'anthro', 'clothed', 'looking_at_viewer']
|
| 21 |
+
- Probe tags: ['solo', 'canid', 'anthro']
|
| 22 |
+
- Stage3-selected tags: ['claws', 'flowing_hair', 'pastel_background', 'playing_guitar', 'pose', 'spade_tail', 'torn_jeans']
|
| 23 |
+
- Speculation:
|
| 24 |
+
- Expression/pose cues may be treated as proxy for viewer-facing gaze despite explicit no-inference instruction.
|
| 25 |
+
- Tag came from structural stage where the classifier is biased toward common portrait framing defaults.
|
| 26 |
+
|
| 27 |
+
### sample_id `1078019`
|
| 28 |
+
- GT has `looking_at_viewer`: no
|
| 29 |
+
- Selected source for `looking_at_viewer`: structural
|
| 30 |
+
- Full caption: The image showcases two anthropomorphic rabbits. The one on the left has a confident and slightly playful expression, with teal eyes and a blush on its cheeks. It's wearing a coat and holding a small plushie. The rabbit on the right appears to be more surprised or taken aback, with wide open blue eyes. Both rabbits seem to be in a close and intimate setting, suggesting a romantic or close relationship between them.
|
| 31 |
+
- Rewrite phrases: ['anthropomorphic rabbits', 'close relationship', 'romantic setting', 'teal eyes', 'blush cheeks', 'confident expression', 'playful expression', 'holding plushie', 'coat', 'wide open eyes', 'blue eyes', 'surprised expression']
|
| 32 |
+
- Structural tags: ['duo', 'anthro', 'clothed', 'looking_at_viewer']
|
| 33 |
+
- Probe tags: ['duo', 'clothing', 'blush', 'anthro', '<3']
|
| 34 |
+
- Stage3-selected tags: ['blue_eyes', 'coat', 'holding_plushie', 'plushie', 'relationship', 'teal_eyes']
|
| 35 |
+
- Speculation:
|
| 36 |
+
- Eye-related words appear, which can be over-read as direct gaze.
|
| 37 |
+
- Expression/pose cues may be treated as proxy for viewer-facing gaze despite explicit no-inference instruction.
|
| 38 |
+
- Tag came from structural stage where the classifier is biased toward common portrait framing defaults.
|
| 39 |
+
|
| 40 |
+
### sample_id `2021552`
|
| 41 |
+
- GT has `looking_at_viewer`: no
|
| 42 |
+
- Selected source for `looking_at_viewer`: structural
|
| 43 |
+
- Full caption: The image showcases two anthropomorphic characters. On the left is a rabbit-like creature dressed in a white shirt and black pants, standing with crossed arms. On the right is a fox-like character wearing blue overalls and a white shirt, looking towards the rabbit with a slightly open mouth. The background is a simple grey, and both characters have distinct features such as fur, facial markings, and claws.
|
| 44 |
+
- Rewrite phrases: ['rabbit', 'crossed arms', 'white shirt', 'black pants', 'fox', 'blue overalls', 'white shirt', 'looking at rabbit', 'open mouth', 'grey background', 'fur', 'facial markings', 'claws']
|
| 45 |
+
- Structural tags: ['duo', 'anthro', 'clothed', 'looking_at_viewer']
|
| 46 |
+
- Probe tags: ['simple_background', 'felid', 'duo', 'clothing', 'canid', 'blush', 'anthro']
|
| 47 |
+
- Stage3-selected tags: ['black_pants', 'claws', 'crossed_arms', 'facial_markings', 'fur', 'grey_background', 'open_mouth', 'overalls', 'rabbit', 'shirt', 'white_shirt']
|
| 48 |
+
- Speculation:
|
| 49 |
+
- Tag came from structural stage where the classifier is biased toward common portrait framing defaults.
|
| 50 |
+
|
| 51 |
+
### sample_id `1325009`
|
| 52 |
+
- GT has `looking_at_viewer`: no
|
| 53 |
+
- Selected source for `looking_at_viewer`: structural
|
| 54 |
+
- Full caption: The image showcases an anthropomorphic tiger with striking blue eyes. He is depicted in a muscular and confident pose, with one hand raised to his head in a thoughtful or playful gesture. The tiger has a white chest with a tuft of fur, and his fur is striped in the traditional tiger pattern. He is wearing dark blue shorts, and his muscular physique is accentuated by the lighting in the background, which creates a countershading effect. The overall mood of the image is one of confidence and playfulness.
|
| 55 |
+
- Rewrite phrases: ['anthropomorphic tiger', 'blue eyes', 'muscular pose', 'raised hand', 'white chest', 'tuft of fur', 'striped fur', 'dark blue shorts', 'countershading effect', 'confident expression', 'playful gesture', 'forest background']
|
| 56 |
+
- Structural tags: ['solo', 'anthro', 'male', 'clothed', 'looking_at_viewer']
|
| 57 |
+
- Probe tags: ['solo', 'felid', 'clothing', 'bear', 'anthro']
|
| 58 |
+
- Stage3-selected tags: ['blue_eyes', 'countershade_body', 'fluffy_fur', 'forest_background', 'gesture', 'raised_hand', 'shorts', 'striped_fur', 'tiger', 'white_chest']
|
| 59 |
+
- Speculation:
|
| 60 |
+
- Eye-related words appear, which can be over-read as direct gaze.
|
| 61 |
+
- Expression/pose cues may be treated as proxy for viewer-facing gaze despite explicit no-inference instruction.
|
| 62 |
+
- Tag came from structural stage where the classifier is biased toward common portrait framing defaults.
|
| 63 |
+
|
| 64 |
+
## Cases: false-positive `anthro`
|
| 65 |
+
### sample_id `1624724`
|
| 66 |
+
- GT has `anthro`: no
|
| 67 |
+
- Selected source for `anthro`: structural
|
| 68 |
+
- Full caption: The image showcases a cartoonish, smiling creature with large, round eyes and a prominent red nose. It has a tan body with spots and possesses a unique, crosshaped mouth. The creature appears to be floating or hovering against a simple white background.
|
| 69 |
+
- Rewrite phrases: ['cartoon character', 'smiling', 'large eyes', 'red nose', 'tan body', 'spots', 'cross-shaped mouth', 'floating', 'white background']
|
| 70 |
+
- Structural tags: ['solo', 'anthro', 'ambiguous_gender', 'topless']
|
| 71 |
+
- Probe tags: ['solo', 'simple_background', 'bear']
|
| 72 |
+
- Stage3-selected tags: ['big_eyes', 'cartoon_character', 'eyes', 'floating', 'nose', 'pink_mouth', 'red_nose', 'spots', 'tan_body', 'white_background']
|
| 73 |
+
- Speculation:
|
| 74 |
+
- Generic character/creature wording without strict body-plan cues may still trigger anthro in structural/probe stages.
|
| 75 |
+
- Probe list contains anthro with glossary text, increasing its prior when any animal-like terms are present.
|
| 76 |
+
|
| 77 |
+
### sample_id `1023509`
|
| 78 |
+
- GT has `anthro`: no
|
| 79 |
+
- Selected source for `anthro`: probe
|
| 80 |
+
- Full caption: The image is a multi-panel comic strip. The first panel shows a character lying on the ground, surrounded by darkness, with a speech bubble saying 'I'm done for...'. The next panel depicts a hooded figure standing over the character, with a speech bubble saying 'You're not done for, you're just beginning.'. The following panels show a conversation between the hooded figure and another character, where the hooded figure mentions 'I'm the guardian of the realm of darkness'. The dialogue continues with the hooded figure expressing that the character has been chosen for a task. The final panels depict a group of characters, including a white-furred creature, a goat, a human, and a lizard, discussing a plan to 'defeat the darkness'. The comic ends with a textual note saying 'there is light'.
|
| 81 |
+
- Rewrite phrases: ['darkness', 'lying on ground', 'speech bubble', 'hooded figure', 'standing over', 'speech bubble', 'guardian of realm of darkness', 'chosen for task', 'white-furred creature', 'goat', 'human', 'lizard', 'defeat darkness', 'textual note', 'light']
|
| 82 |
+
- Structural tags: ['solo', 'duo', 'group', 'text']
|
| 83 |
+
- Probe tags: ['group', 'felid', 'bear', 'anthro', '<3']
|
| 84 |
+
- Stage3-selected tags: ['bubble', 'darkness', 'face_mask', 'figurine', 'goat', 'human', 'light', 'lizard', 'lying_on_ground', 'note', 'pear-shaped_figure', 'power_lines', 'speech_bubble', 'standing_over', 'texting', 'underground', 'unknown_species']
|
| 85 |
+
- Speculation:
|
| 86 |
+
- Generic character/creature wording without strict body-plan cues may still trigger anthro in structural/probe stages.
|
| 87 |
+
- Probe list contains anthro with glossary text, increasing its prior when any animal-like terms are present.
|
| 88 |
+
|
| 89 |
+
### sample_id `335343`
|
| 90 |
+
- GT has `anthro`: no
|
| 91 |
+
- Selected source for `anthro`: probe
|
| 92 |
+
- Full caption: The image showcases two animated characters lying on a bed, seemingly in a resting state. The character on the left has blonde hair, green eyes, and is wearing makeup, with a slightly annoyed or disgruntled expression. The character on the right has purple hair, blue eyes, and a more relaxed or sleeping expression. Between them, there's a text that reads 'Look Before You Sleep', written in a playful font. The image also has a watermark at the bottom left corner that says 'SkyPony'. The overall color palette is dominated by shades of blue and purple, creating a serene and calming ambiance.
|
| 93 |
+
- Rewrite phrases: ['blonde hair', 'green eyes', 'makeup', 'annoyed expression', 'purple hair', 'blue eyes', 'sleeping expression', 'text', 'playful font', 'Look Before You Sleep', 'SkyPony watermark', 'blue and purple color palette', 'serene ambiance', 'bedroom scene', 'two characters lying down', 'resting state', 'calm atmosphere']
|
| 94 |
+
- Structural tags: ['duo', 'humanoid', 'text']
|
| 95 |
+
- Probe tags: ['text', 'simple_background', 'felid', 'duo', 'blush', 'anthro']
|
| 96 |
+
- Stage3-selected tags: ['annoyed_expression', 'atmosphere', 'bedroom', 'blonde_hair', 'blue_eyes', 'distracting_watermark', 'eyes', 'font', 'green_eyes', 'hair', 'lying', 'makeup', 'palette', 'playful', 'purple_hair', 'purple_membrane', 'resting', 'romantic_ambiance', 'sleeping', 'stats', 'text', 'walking', 'watermark']
|
| 97 |
+
- Speculation:
|
| 98 |
+
- Generic character/creature wording without strict body-plan cues may still trigger anthro in structural/probe stages.
|
| 99 |
+
- Probe list contains anthro with glossary text, increasing its prior when any animal-like terms are present.
|
| 100 |
+
|
| 101 |
+
### sample_id `2034167`
|
| 102 |
+
- GT has `anthro`: no
|
| 103 |
+
- Selected source for `anthro`: structural, probe
|
| 104 |
+
- Full caption: The image showcases a vibrant, animated character that appears to be a fusion of a canine and a humanoid. The character has striking blue eyes, a blue nose, and a purple body with white fur. The character's fur is adorned with vivid pink and blue stripes, and it has a playful, open-mouthed expression. The character's tail is long and curved, with a mix of blue and pink hues. The background is simple, allowing the character to be the focal point. The character appears to be in a dynamic pose, possibly mid-stride or jump.
|
| 105 |
+
- Rewrite phrases: ['blue eyes', 'purple body', 'white fur', 'pink and blue stripes', 'long curved tail', 'blue and pink tail', 'open mouth', 'dynamic pose', 'simple background', 'animated character', 'canine humanoid', 'blue nose']
|
| 106 |
+
- Structural tags: ['solo', 'anthro', 'ambiguous_gender', 'topless']
|
| 107 |
+
- Probe tags: ['solo', 'simple_background', 'canid', 'anthro']
|
| 108 |
+
- Stage3-selected tags: ['action_pose', 'animated_png', 'blue_eyes', 'blue_nose', 'canine_humanoid', 'curved_tail', 'eyes', 'fur', 'half-length_portrait', 'humanoid', 'invalid_background', 'nose', 'open_mouth', 'pink_stripes', 'pink_tail', 'pose', 'purple_body', 'simple_background', 'stripes', 'tail', 'white_fur']
|
| 109 |
+
- Speculation:
|
| 110 |
+
- Generic character/creature wording without strict body-plan cues may still trigger anthro in structural/probe stages.
|
| 111 |
+
- Probe list contains anthro with glossary text, increasing its prior when any animal-like terms are present.
|
| 112 |
+
|
| 113 |
+
### sample_id `260449`
|
| 114 |
+
- GT has `anthro`: no
|
| 115 |
+
- Selected source for `anthro`: structural, probe
|
| 116 |
+
- Full caption: The image showcases a group of animated characters. On the left, there's a large, jovial ape with a wide grin, raised arms, and a playful expression. In the center, a large, jovial bear is seen laughing and playfully interacting with a young boy, who is dancing with his arms raised. The boy has a cheerful expression and is wearing a loincloth. On the right, there's a smaller, mischievous-looking primate with a tuft of hair on its head, looking directly at the viewer with a cheeky grin. The background is simple, emphasizing the characters.
|
| 117 |
+
- Rewrite phrases: ['ape', 'raised arms', 'wide grin', 'playful expression', 'bear', 'laughing', 'interacting with boy', 'boy', 'dancing', 'arms raised', 'cheerful expression', 'loincloth', 'primate', 'tuft of hair', 'looking at viewer', 'cheeky grin', 'simple background']
|
| 118 |
+
- Structural tags: ['trio', 'anthro', 'feral', 'male', 'clothed', 'topless', 'looking_at_viewer']
|
| 119 |
+
- Probe tags: ['simple_background', 'group', 'duo', 'bear', 'anthro']
|
| 120 |
+
- Stage3-selected tags: ['ape', 'bear', 'cheeky', 'dancing', 'grin', 'hair', 'laugh', 'loincloth', 'looking_at_viewer', 'male', 'primate', 'raised_arm', 'raised_arms', 'simple_background', 'wide_grin']
|
| 121 |
+
- Speculation:
|
| 122 |
+
- Generic character/creature wording without strict body-plan cues may still trigger anthro in structural/probe stages.
|
| 123 |
+
- Probe list contains anthro with glossary text, increasing its prior when any animal-like terms are present.
|
| 124 |
+
|
| 125 |
+
## Cases: false-positive `bear`
|
| 126 |
+
### sample_id `1624724`
|
| 127 |
+
- GT has `bear`: no
|
| 128 |
+
- Selected source for `bear`: probe
|
| 129 |
+
- Full caption: The image showcases a cartoonish, smiling creature with large, round eyes and a prominent red nose. It has a tan body with spots and possesses a unique, crosshaped mouth. The creature appears to be floating or hovering against a simple white background.
|
| 130 |
+
- Rewrite phrases: ['cartoon character', 'smiling', 'large eyes', 'red nose', 'tan body', 'spots', 'cross-shaped mouth', 'floating', 'white background']
|
| 131 |
+
- Structural tags: ['solo', 'anthro', 'ambiguous_gender', 'topless']
|
| 132 |
+
- Probe tags: ['solo', 'simple_background', 'bear']
|
| 133 |
+
- Stage3-selected tags: ['big_eyes', 'cartoon_character', 'eyes', 'floating', 'nose', 'pink_mouth', 'red_nose', 'spots', 'tan_body', 'white_background']
|
| 134 |
+
- Speculation:
|
| 135 |
+
- Broad animal appearance cues can match bear weakly when species is underspecified.
|
| 136 |
+
- Bear is injected by probe stage as a standalone species guess (not structural), so one mistaken probe decision adds it directly.
|
| 137 |
+
|
| 138 |
+
### sample_id `1023509`
|
| 139 |
+
- GT has `bear`: no
|
| 140 |
+
- Selected source for `bear`: probe
|
| 141 |
+
- Full caption: The image is a multi-panel comic strip. The first panel shows a character lying on the ground, surrounded by darkness, with a speech bubble saying 'I'm done for...'. The next panel depicts a hooded figure standing over the character, with a speech bubble saying 'You're not done for, you're just beginning.'. The following panels show a conversation between the hooded figure and another character, where the hooded figure mentions 'I'm the guardian of the realm of darkness'. The dialogue continues with the hooded figure expressing that the character has been chosen for a task. The final panels depict a group of characters, including a white-furred creature, a goat, a human, and a lizard, discussing a plan to 'defeat the darkness'. The comic ends with a textual note saying 'there is light'.
|
| 142 |
+
- Rewrite phrases: ['darkness', 'lying on ground', 'speech bubble', 'hooded figure', 'standing over', 'speech bubble', 'guardian of realm of darkness', 'chosen for task', 'white-furred creature', 'goat', 'human', 'lizard', 'defeat darkness', 'textual note', 'light']
|
| 143 |
+
- Structural tags: ['solo', 'duo', 'group', 'text']
|
| 144 |
+
- Probe tags: ['group', 'felid', 'bear', 'anthro', '<3']
|
| 145 |
+
- Stage3-selected tags: ['bubble', 'darkness', 'face_mask', 'figurine', 'goat', 'human', 'light', 'lizard', 'lying_on_ground', 'note', 'pear-shaped_figure', 'power_lines', 'speech_bubble', 'standing_over', 'texting', 'underground', 'unknown_species']
|
| 146 |
+
- Speculation:
|
| 147 |
+
- Broad animal appearance cues can match bear weakly when species is underspecified.
|
| 148 |
+
- Bear is injected by probe stage as a standalone species guess (not structural), so one mistaken probe decision adds it directly.
|
| 149 |
+
|
| 150 |
+
### sample_id `1325009`
|
| 151 |
+
- GT has `bear`: no
|
| 152 |
+
- Selected source for `bear`: probe
|
| 153 |
+
- Full caption: The image showcases an anthropomorphic tiger with striking blue eyes. He is depicted in a muscular and confident pose, with one hand raised to his head in a thoughtful or playful gesture. The tiger has a white chest with a tuft of fur, and his fur is striped in the traditional tiger pattern. He is wearing dark blue shorts, and his muscular physique is accentuated by the lighting in the background, which creates a countershading effect. The overall mood of the image is one of confidence and playfulness.
|
| 154 |
+
- Rewrite phrases: ['anthropomorphic tiger', 'blue eyes', 'muscular pose', 'raised hand', 'white chest', 'tuft of fur', 'striped fur', 'dark blue shorts', 'countershading effect', 'confident expression', 'playful gesture', 'forest background']
|
| 155 |
+
- Structural tags: ['solo', 'anthro', 'male', 'clothed', 'looking_at_viewer']
|
| 156 |
+
- Probe tags: ['solo', 'felid', 'clothing', 'bear', 'anthro']
|
| 157 |
+
- Stage3-selected tags: ['blue_eyes', 'countershade_body', 'fluffy_fur', 'forest_background', 'gesture', 'raised_hand', 'shorts', 'striped_fur', 'tiger', 'white_chest']
|
| 158 |
+
- Speculation:
|
| 159 |
+
- Bear is injected by probe stage as a standalone species guess (not structural), so one mistaken probe decision adds it directly.
|
data/runtime_debug/llm_capture_20260302T162119Z/input_prompt.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
A young male anthro fox with red fur and white chest fluff wearing a black hoodie and jeans, standing in a city street at night, looking at viewer, slight smile, holding a coffee cup
|
data/runtime_debug/llm_capture_20260302T162202Z/input_prompt.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
A young male anthro fox with red fur and white chest fluff wearing a black hoodie and jeans, standing in a city street at night, looking at viewer, slight smile, holding a coffee cup
|
data/runtime_debug/llm_capture_20260302T162202Z/structural_request.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"messages": [
|
| 3 |
+
{
|
| 4 |
+
"role": "system",
|
| 5 |
+
"content": "You classify image descriptions by selecting true statements from a numbered list.\n\nThe statements are organized into GROUPS. Each group header tells you how many to pick:\n- \"pick EXACTLY ONE\" = choose the single best match in that group\n- \"pick ALL that apply\" = choose every statement that is true\n\nIMPORTANT RULES:\n1. ONLY select a statement if the description directly says it or makes it very obvious.\n2. Do NOT guess or assume things the description does not mention.\n3. For body type: \"anthro\" means an ANIMAL with a human-shaped body (walks upright, has hands, but still has fur/tail/muzzle). \"humanoid\" means HUMAN or human-like with NO animal features. A wolf standing on two legs = anthro, NOT humanoid.\n4. For gender: only select male/female/intersex when there is explicit textual evidence (such as gender words or pronouns). Do not infer gender from species, body shape, clothing, or style. If no reliable gender cue is present, do not select male/female/intersex; use ambiguous_gender instead.\n5. For clothing state: READ CAREFULLY! \"topless\" = bare chest, wearing pants. \"bottomless\" = wearing shirt, no pants. If unsure, re-read the description.\n6. If clothing is not mentioned, do NOT pick any clothing statement.\n\nReturn JSON ONLY:\n{\"selections\": [{\"i\": 1}, {\"i\": 5}]}\n\nEXAMPLE:\nDescription: \"A muscular male wolf standing in a forest, wearing jeans, giving a thumbs up\"\nAnswer: {\"selections\": [{\"i\": 2}, {\"i\": 6}, {\"i\": 10}, {\"i\": 14}]}\nWhy: One character = solo (2). Wolf standing upright with hands = anthro (6), NOT humanoid because it is a wolf. Male (10). Wearing jeans = clothed (14)."
|
| 6 |
+
},
|
| 7 |
+
{
|
| 8 |
+
"role": "human",
|
| 9 |
+
"content": "Read this image description and select which statements are true.\n\nIMAGE DESCRIPTION:\nA young male anthro fox with red fur and white chest fluff wearing a black hoodie and jeans, standing in a city street at night, looking at viewer, slight smile, holding a coffee cup\n\nSTATEMENTS (pick by number):\n--- CHARACTER COUNT (pick EXACTLY ONE) ---\n1. No characters or living beings are visible in the image.\n2. Exactly one character is visible in the image.\n3. Exactly two characters are visible in the image.\n4. Exactly three characters are visible in the image; select only when the count is clearly three.\n5. Four or more characters are visible in the image; do not use for one, two, or three.\n\n--- BODY TYPE (pick ALL that apply) ---\n6. An animal/furry character with BOTH human-like body plan (upright stance, human-like torso/arms/hands) and clear animal traits (fur, muzzle, tail, animal ears, paws, or species cues).\n7. A non-humanoid animal body plan (typically quadruped or otherwise animal-shaped, without human-like torso/hands). Do not select if explicitly anthropomorphic.\n8. A human or near-human character with no explicit animal-species traits. Do not select if animal species words or animal traits (muzzle, tail, paws, animal ears, heavy fur coat, scales) are present.\n9. Select only for an explicit centaur-like body plan: a humanoid upper torso attached to a separate four-legged lower body.\n\n--- GENDER (pick ALL that apply) ---\n10. Select only when the description explicitly indicates male presentation or identity, such as male/man/boy/he/him/his/father/husband/boyfriend. 'boy' and male pronouns count as explicit evidence.\n11. Select only when the description explicitly indicates female presentation or identity, such as female/woman/girl/she/her/hers/mother/wife/girlfriend. 'girl' and female pronouns count as explicit evidence.\n12. Select only when the description explicitly says gender is unknown, ambiguous, androgynous, mixed, or not determinable. Do not use this as a default fallback when gender is simply unmentioned.\n13. Select only when intersex or mixed-sex-traits wording is explicit in the description.\n\n--- CLOTHING STATE (pick ALL that apply) ---\n14. At least one character is explicitly described as wearing clothing or a garment (for example shirt, pants, shorts, dress, coat, loincloth, armor, uniform).\n15. Select only when the description explicitly indicates no clothing (nude/naked/unclothed). Do not infer nude just because clothing is not mentioned.\n16. The upper body/chest is uncovered while lower body has clothing. This includes descriptions with shorts/pants/loincloth and no shirt/top.\n17. The lower body is uncovered while the upper body has clothing.\n\n--- VISUAL ELEMENTS (pick ALL that apply) ---\n18. Select only when explicit gaze wording appears (looking at viewer, looking at camera, looking directly at us, direct eye contact). Do not infer from front view, pose, or expression.\n19. Visible written text, dialogue, signs, or lettering appear in the image.\n"
|
| 10 |
+
}
|
| 11 |
+
],
|
| 12 |
+
"n_statements": 19
|
| 13 |
+
}
|
data/runtime_debug/llm_capture_20260302T162202Z/structural_response_parsed.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"selections": [
|
| 3 |
+
{
|
| 4 |
+
"i": 2
|
| 5 |
+
},
|
| 6 |
+
{
|
| 7 |
+
"i": 6
|
| 8 |
+
},
|
| 9 |
+
{
|
| 10 |
+
"i": 10
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"i": 14
|
| 14 |
+
}
|
| 15 |
+
]
|
| 16 |
+
}
|
data/runtime_debug/llm_capture_20260302T162202Z/structural_response_raw.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"selections": [{"i": 2}, {"i": 6}, {"i": 10}, {"i": 14}]}
|
data/runtime_debug/llm_capture_20260302T162249Z/input_prompt.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
A young male anthro fox with red fur and white chest fluff wearing a black hoodie and jeans, standing in a city street at night, looking at viewer, slight smile, holding a coffee cup
|
data/runtime_debug/llm_capture_20260302T162249Z/probe_request.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"messages": [
|
| 3 |
+
{
|
| 4 |
+
"role": "system",
|
| 5 |
+
"content": "You are given a description of an image and a list of imageboard tags.\n\nSelect the tags that correspond to content that would be visible or depicted in the described image.\n\nThe list contains only valid tags; many of them are irrelevant to the image.\n\nReturn JSON ONLY matching this schema:\n\n{\n \"selections\": [\n {\"i\": <int>, \"why\": \"<one of: explicit|strong_implied|weak_implied|style_or_meta|other>\"},\n ...\n ]\n}\n\nRules:\n- Choose ONLY from indices 1..13.\n- Do NOT output tag text.\n- Do NOT output any keys other than \"selections\", and inside each item only the item index \"i\" and \"why\".\n- Do select both a general tag and a more specific tag when both apply (for example, \"shirt\" and \"grey shirt\").\n\nDefine \"why\" as:\n- explicit: directly stated in the image description\n- strong_implied: very likely given the description, even if not literally stated\n- weak_implied: plausible but not strongly supported by the description\n- style_or_meta: stylistic or presentation-related tags only if clearly indicated\n- other: fallback category; use sparingly\n"
|
| 6 |
+
},
|
| 7 |
+
{
|
| 8 |
+
"role": "human",
|
| 9 |
+
"content": "IMAGE DESCRIPTION:\nA young male anthro fox with red fur and white chest fluff wearing a black hoodie and jeans, standing in a city street at night, looking at viewer, slight smile, holding a coffee cup\n\nCANDIDATES (choose by index only):\n1. clothing\n2. simple background\n3. anthro - This tag is short for \"anthropomorphic animals\".\n4. text\n5. blush\n6. canid - The entirety of the dog family, Canidae.\n7. solo - When a scene only features one character.\n8. duo - Tagged for posts that feature two characters, no more and no less than two.\n9. group\n10. felid - The entirety of the cat family.\n11. bird\n12. bear\n13. <3 - The symbol of love, and pleasure.\n\nSelect up to 13 indices. Output fewer if uncertain.\n"
|
| 10 |
+
}
|
| 11 |
+
],
|
| 12 |
+
"n_candidates": 13,
|
| 13 |
+
"per_call_budget": 13
|
| 14 |
+
}
|
data/runtime_debug/llm_capture_20260302T162249Z/probe_response_parsed.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"selections": [
|
| 3 |
+
{
|
| 4 |
+
"i": 3,
|
| 5 |
+
"why": "explicit"
|
| 6 |
+
},
|
| 7 |
+
{
|
| 8 |
+
"i": 7,
|
| 9 |
+
"why": "explicit"
|
| 10 |
+
},
|
| 11 |
+
{
|
| 12 |
+
"i": 1,
|
| 13 |
+
"why": "explicit"
|
| 14 |
+
},
|
| 15 |
+
{
|
| 16 |
+
"i": 13,
|
| 17 |
+
"why": "explicit"
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"i": 6,
|
| 21 |
+
"why": "explicit"
|
| 22 |
+
},
|
| 23 |
+
{
|
| 24 |
+
"i": 4,
|
| 25 |
+
"why": "style_or_meta"
|
| 26 |
+
}
|
| 27 |
+
]
|
| 28 |
+
}
|
data/runtime_debug/llm_capture_20260302T162249Z/probe_response_raw.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"selections": [
|
| 3 |
+
{"i": 3, "why": "explicit"},
|
| 4 |
+
{"i": 7, "why": "explicit"},
|
| 5 |
+
{"i": 1, "why": "explicit"},
|
| 6 |
+
{"i": 13, "why": "explicit"},
|
| 7 |
+
{"i": 6, "why": "explicit"},
|
| 8 |
+
{"i": 4, "why": "style_or_meta"}
|
| 9 |
+
]
|
| 10 |
+
}
|
data/runtime_debug/llm_capture_20260302T162249Z/selection_request.json
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"messages": [
|
| 3 |
+
{
|
| 4 |
+
"role": "system",
|
| 5 |
+
"content": "You are given a description of an image and a list of imageboard tags.\n\nSelect the tags that correspond to content that would be visible or depicted in the described image.\n\nThe list contains only valid tags; many of them are irrelevant to the image.\n\nReturn JSON ONLY matching this schema:\n\n{\n \"selections\": [\n {\"i\": <int>, \"why\": \"<one of: explicit|strong_implied|weak_implied|style_or_meta|other>\"},\n ...\n ]\n}\n\nRules:\n- Choose ONLY from indices 1..19.\n- Do NOT output tag text.\n- Do NOT output any keys other than \"selections\", and inside each item only the item index \"i\" and \"why\".\n- Do select both a general tag and a more specific tag when both apply (for example, \"shirt\" and \"grey shirt\").\n\nDefine \"why\" as:\n- explicit: directly stated in the image description\n- strong_implied: very likely given the description, even if not literally stated\n- weak_implied: plausible but not strongly supported by the description\n- style_or_meta: stylistic or presentation-related tags only if clearly indicated\n- other: fallback category; use sparingly\n"
|
| 6 |
+
},
|
| 7 |
+
{
|
| 8 |
+
"role": "human",
|
| 9 |
+
"content": "IMAGE DESCRIPTION:\nIMAGE DESCRIPTION: A young male anthro fox with red fur and white chest fluff wearing a black hoodie and jeans, standing in a city street at night, looking at viewer, slight smile, holding a coffee cup\nREWRITE PHRASES: young male, anthro fox, red fur, white chest fluff, black hoodie, jeans, standing, city street, night, looking at viewer, slight smile, holding coffee cup\nINFERRED TAG HINTS (context only): <3, anthro, canid, clothed, clothing, looking_at_viewer, male, solo\n\nCANDIDATES (choose by index only):\n1. black hoodie\n2. hoodie\n3. jeans\n4. young male\n5. street\n6. holding coffee cup\n7. cup\n8. fluffy\n9. standing\n10. fur\n11. slight smile\n12. smile\n13. night\n14. looking at viewer\n15. male\n16. viewer\n17. fox\n18. red fur\n19. white inner ear fluff\n\nSelect up to 42 indices. Output fewer if uncertain.\n"
|
| 10 |
+
}
|
| 11 |
+
],
|
| 12 |
+
"n_candidates": 19,
|
| 13 |
+
"per_call_budget": 42,
|
| 14 |
+
"mode": "chunked_map_union",
|
| 15 |
+
"chunk_size": 60,
|
| 16 |
+
"selection_per_phrase_k": 2,
|
| 17 |
+
"retrieved_candidate_tags": [
|
| 18 |
+
"black_hoodie",
|
| 19 |
+
"hoodie",
|
| 20 |
+
"jeans",
|
| 21 |
+
"young_male",
|
| 22 |
+
"street",
|
| 23 |
+
"holding_coffee_cup",
|
| 24 |
+
"cup",
|
| 25 |
+
"fluffy",
|
| 26 |
+
"standing",
|
| 27 |
+
"fur",
|
| 28 |
+
"slight_smile",
|
| 29 |
+
"smile",
|
| 30 |
+
"night",
|
| 31 |
+
"looking_at_viewer",
|
| 32 |
+
"male",
|
| 33 |
+
"viewer",
|
| 34 |
+
"fox",
|
| 35 |
+
"red_fur",
|
| 36 |
+
"white_inner_ear_fluff"
|
| 37 |
+
]
|
| 38 |
+
}
|
data/runtime_debug/llm_capture_20260302T162249Z/selection_response_parsed.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"selections": []
|
| 3 |
+
}
|
data/runtime_debug/llm_capture_20260302T162249Z/selection_response_raw.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"selections":[]}
|
data/runtime_debug/llm_capture_20260302T162249Z/structural_request.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"messages": [
|
| 3 |
+
{
|
| 4 |
+
"role": "system",
|
| 5 |
+
"content": "You classify image descriptions by selecting true statements from a numbered list.\n\nThe statements are organized into GROUPS. Each group header tells you how many to pick:\n- \"pick EXACTLY ONE\" = choose the single best match in that group\n- \"pick ALL that apply\" = choose every statement that is true\n\nIMPORTANT RULES:\n1. ONLY select a statement if the description directly says it or makes it very obvious.\n2. Do NOT guess or assume things the description does not mention.\n3. For body type: \"anthro\" means an ANIMAL with a human-shaped body (walks upright, has hands, but still has fur/tail/muzzle). \"humanoid\" means HUMAN or human-like with NO animal features. A wolf standing on two legs = anthro, NOT humanoid.\n4. For gender: only select male/female/intersex when there is explicit textual evidence (such as gender words or pronouns). Do not infer gender from species, body shape, clothing, or style. If no reliable gender cue is present, do not select male/female/intersex; use ambiguous_gender instead.\n5. For clothing state: READ CAREFULLY! \"topless\" = bare chest, wearing pants. \"bottomless\" = wearing shirt, no pants. If unsure, re-read the description.\n6. If clothing is not mentioned, do NOT pick any clothing statement.\n\nReturn JSON ONLY:\n{\"selections\": [{\"i\": 1}, {\"i\": 5}]}\n\nEXAMPLE:\nDescription: \"A muscular male wolf standing in a forest, wearing jeans, giving a thumbs up\"\nAnswer: {\"selections\": [{\"i\": 2}, {\"i\": 6}, {\"i\": 10}, {\"i\": 14}]}\nWhy: One character = solo (2). Wolf standing upright with hands = anthro (6), NOT humanoid because it is a wolf. Male (10). Wearing jeans = clothed (14)."
|
| 6 |
+
},
|
| 7 |
+
{
|
| 8 |
+
"role": "human",
|
| 9 |
+
"content": "Read this image description and select which statements are true.\n\nIMAGE DESCRIPTION:\nA young male anthro fox with red fur and white chest fluff wearing a black hoodie and jeans, standing in a city street at night, looking at viewer, slight smile, holding a coffee cup\n\nSTATEMENTS (pick by number):\n--- CHARACTER COUNT (pick EXACTLY ONE) ---\n1. No characters or living beings are visible in the image.\n2. Exactly one character is visible in the image.\n3. Exactly two characters are visible in the image.\n4. Exactly three characters are visible in the image; select only when the count is clearly three.\n5. Four or more characters are visible in the image; do not use for one, two, or three.\n\n--- BODY TYPE (pick ALL that apply) ---\n6. An animal/furry character with BOTH human-like body plan (upright stance, human-like torso/arms/hands) and clear animal traits (fur, muzzle, tail, animal ears, paws, or species cues).\n7. A non-humanoid animal body plan (typically quadruped or otherwise animal-shaped, without human-like torso/hands). Do not select if explicitly anthropomorphic.\n8. A human or near-human character with no explicit animal-species traits. Do not select if animal species words or animal traits (muzzle, tail, paws, animal ears, heavy fur coat, scales) are present.\n9. Select only for an explicit centaur-like body plan: a humanoid upper torso attached to a separate four-legged lower body.\n\n--- GENDER (pick ALL that apply) ---\n10. Select only when the description explicitly indicates male presentation or identity, such as male/man/boy/he/him/his/father/husband/boyfriend. 'boy' and male pronouns count as explicit evidence.\n11. Select only when the description explicitly indicates female presentation or identity, such as female/woman/girl/she/her/hers/mother/wife/girlfriend. 'girl' and female pronouns count as explicit evidence.\n12. Select only when the description explicitly says gender is unknown, ambiguous, androgynous, mixed, or not determinable. Do not use this as a default fallback when gender is simply unmentioned.\n13. Select only when intersex or mixed-sex-traits wording is explicit in the description.\n\n--- CLOTHING STATE (pick ALL that apply) ---\n14. At least one character is explicitly described as wearing clothing or a garment (for example shirt, pants, shorts, dress, coat, loincloth, armor, uniform).\n15. Select only when the description explicitly indicates no clothing (nude/naked/unclothed). Do not infer nude just because clothing is not mentioned.\n16. The upper body/chest is uncovered while lower body has clothing. This includes descriptions with shorts/pants/loincloth and no shirt/top.\n17. The lower body is uncovered while the upper body has clothing.\n\n--- VISUAL ELEMENTS (pick ALL that apply) ---\n18. Select only when explicit gaze wording appears (looking at viewer, looking at camera, looking directly at us, direct eye contact). Do not infer from front view, pose, or expression.\n19. Visible written text, dialogue, signs, or lettering appear in the image.\n"
|
| 10 |
+
}
|
| 11 |
+
],
|
| 12 |
+
"n_statements": 19
|
| 13 |
+
}
|
data/runtime_debug/llm_capture_20260302T162249Z/structural_response_parsed.json
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"selections": [
|
| 3 |
+
{
|
| 4 |
+
"i": 2
|
| 5 |
+
},
|
| 6 |
+
{
|
| 7 |
+
"i": 6
|
| 8 |
+
},
|
| 9 |
+
{
|
| 10 |
+
"i": 10
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"i": 14
|
| 14 |
+
},
|
| 15 |
+
{
|
| 16 |
+
"i": 18
|
| 17 |
+
}
|
| 18 |
+
]
|
| 19 |
+
}
|
data/runtime_debug/llm_capture_20260302T162249Z/structural_response_raw.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"selections": [{"i": 2}, {"i": 6}, {"i": 10}, {"i": 14}, {"i": 18}]}
|
data/runtime_debug/llm_capture_20260302T162249Z/summary.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"rewritten": "young male, anthro fox, red fur, white chest fluff, black hoodie, jeans, standing, city street, night, looking at viewer, slight smile, holding coffee cup",
|
| 3 |
+
"structural_tags": [
|
| 4 |
+
"solo",
|
| 5 |
+
"anthro",
|
| 6 |
+
"male",
|
| 7 |
+
"clothed",
|
| 8 |
+
"looking_at_viewer"
|
| 9 |
+
],
|
| 10 |
+
"probe_tags": [
|
| 11 |
+
"anthro",
|
| 12 |
+
"solo",
|
| 13 |
+
"clothing",
|
| 14 |
+
"<3",
|
| 15 |
+
"canid"
|
| 16 |
+
],
|
| 17 |
+
"n_retrieved_candidates": 19,
|
| 18 |
+
"retrieved_candidates": [
|
| 19 |
+
"black_hoodie",
|
| 20 |
+
"hoodie",
|
| 21 |
+
"jeans",
|
| 22 |
+
"young_male",
|
| 23 |
+
"street",
|
| 24 |
+
"holding_coffee_cup",
|
| 25 |
+
"cup",
|
| 26 |
+
"fluffy",
|
| 27 |
+
"standing",
|
| 28 |
+
"fur",
|
| 29 |
+
"slight_smile",
|
| 30 |
+
"smile",
|
| 31 |
+
"night",
|
| 32 |
+
"looking_at_viewer",
|
| 33 |
+
"male",
|
| 34 |
+
"viewer",
|
| 35 |
+
"fox",
|
| 36 |
+
"red_fur",
|
| 37 |
+
"white_inner_ear_fluff"
|
| 38 |
+
],
|
| 39 |
+
"files": [
|
| 40 |
+
"input_prompt.txt",
|
| 41 |
+
"probe_request.json",
|
| 42 |
+
"probe_response_parsed.json",
|
| 43 |
+
"probe_response_raw.txt",
|
| 44 |
+
"selection_request.json",
|
| 45 |
+
"selection_response_parsed.json",
|
| 46 |
+
"selection_response_raw.txt",
|
| 47 |
+
"structural_request.json",
|
| 48 |
+
"structural_response_parsed.json",
|
| 49 |
+
"structural_response_raw.txt"
|
| 50 |
+
]
|
| 51 |
+
}
|
data/runtime_debug/selection_why_vs_no_why_20260302T191813Z.json
ADDED
|
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"timestamp_utc": "2026-03-02T19:15:40Z",
|
| 3 |
+
"model": "meta-llama/llama-3.1-8b-instruct",
|
| 4 |
+
"n_samples": 10,
|
| 5 |
+
"results": [
|
| 6 |
+
{
|
| 7 |
+
"id": 3285630,
|
| 8 |
+
"n_candidates": 18,
|
| 9 |
+
"N": 18,
|
| 10 |
+
"per_call_budget": 38,
|
| 11 |
+
"with_why": {
|
| 12 |
+
"label": "with_why",
|
| 13 |
+
"ok": true,
|
| 14 |
+
"failure_type": "ok",
|
| 15 |
+
"error": null,
|
| 16 |
+
"latency_s": 0.9805651999922702
|
| 17 |
+
},
|
| 18 |
+
"no_why": {
|
| 19 |
+
"label": "no_why",
|
| 20 |
+
"ok": true,
|
| 21 |
+
"failure_type": "ok",
|
| 22 |
+
"error": null,
|
| 23 |
+
"latency_s": 2.9448838000098476
|
| 24 |
+
}
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"id": 260449,
|
| 28 |
+
"n_candidates": 30,
|
| 29 |
+
"N": 30,
|
| 30 |
+
"per_call_budget": 74,
|
| 31 |
+
"with_why": {
|
| 32 |
+
"label": "with_why",
|
| 33 |
+
"ok": true,
|
| 34 |
+
"failure_type": "ok",
|
| 35 |
+
"error": null,
|
| 36 |
+
"latency_s": 5.376112800004194
|
| 37 |
+
},
|
| 38 |
+
"no_why": {
|
| 39 |
+
"label": "no_why",
|
| 40 |
+
"ok": true,
|
| 41 |
+
"failure_type": "ok",
|
| 42 |
+
"error": null,
|
| 43 |
+
"latency_s": 1.4645917000016198
|
| 44 |
+
}
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"id": 1078019,
|
| 48 |
+
"n_candidates": 16,
|
| 49 |
+
"N": 16,
|
| 50 |
+
"per_call_budget": 38,
|
| 51 |
+
"with_why": {
|
| 52 |
+
"label": "with_why",
|
| 53 |
+
"ok": true,
|
| 54 |
+
"failure_type": "ok",
|
| 55 |
+
"error": null,
|
| 56 |
+
"latency_s": 6.614833399988129
|
| 57 |
+
},
|
| 58 |
+
"no_why": {
|
| 59 |
+
"label": "no_why",
|
| 60 |
+
"ok": true,
|
| 61 |
+
"failure_type": "ok",
|
| 62 |
+
"error": null,
|
| 63 |
+
"latency_s": 0.9294685000058962
|
| 64 |
+
}
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"id": 1624724,
|
| 68 |
+
"n_candidates": 17,
|
| 69 |
+
"N": 17,
|
| 70 |
+
"per_call_budget": 36,
|
| 71 |
+
"with_why": {
|
| 72 |
+
"label": "with_why",
|
| 73 |
+
"ok": true,
|
| 74 |
+
"failure_type": "ok",
|
| 75 |
+
"error": null,
|
| 76 |
+
"latency_s": 3.396455699999933
|
| 77 |
+
},
|
| 78 |
+
"no_why": {
|
| 79 |
+
"label": "no_why",
|
| 80 |
+
"ok": true,
|
| 81 |
+
"failure_type": "ok",
|
| 82 |
+
"error": null,
|
| 83 |
+
"latency_s": 2.18461219999881
|
| 84 |
+
}
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"id": 1325009,
|
| 88 |
+
"n_candidates": 21,
|
| 89 |
+
"N": 21,
|
| 90 |
+
"per_call_budget": 46,
|
| 91 |
+
"with_why": {
|
| 92 |
+
"label": "with_why",
|
| 93 |
+
"ok": true,
|
| 94 |
+
"failure_type": "ok",
|
| 95 |
+
"error": null,
|
| 96 |
+
"latency_s": 5.769819000008283
|
| 97 |
+
},
|
| 98 |
+
"no_why": {
|
| 99 |
+
"label": "no_why",
|
| 100 |
+
"ok": true,
|
| 101 |
+
"failure_type": "ok",
|
| 102 |
+
"error": null,
|
| 103 |
+
"latency_s": 2.0555457000009483
|
| 104 |
+
}
|
| 105 |
+
},
|
| 106 |
+
{
|
| 107 |
+
"id": 1023509,
|
| 108 |
+
"n_candidates": 23,
|
| 109 |
+
"N": 23,
|
| 110 |
+
"per_call_budget": 58,
|
| 111 |
+
"with_why": {
|
| 112 |
+
"label": "with_why",
|
| 113 |
+
"ok": true,
|
| 114 |
+
"failure_type": "ok",
|
| 115 |
+
"error": null,
|
| 116 |
+
"latency_s": 5.662558600000921
|
| 117 |
+
},
|
| 118 |
+
"no_why": {
|
| 119 |
+
"label": "no_why",
|
| 120 |
+
"ok": true,
|
| 121 |
+
"failure_type": "ok",
|
| 122 |
+
"error": null,
|
| 123 |
+
"latency_s": 5.300095500002499
|
| 124 |
+
}
|
| 125 |
+
},
|
| 126 |
+
{
|
| 127 |
+
"id": 335343,
|
| 128 |
+
"n_candidates": 26,
|
| 129 |
+
"N": 26,
|
| 130 |
+
"per_call_budget": 56,
|
| 131 |
+
"with_why": {
|
| 132 |
+
"label": "with_why",
|
| 133 |
+
"ok": true,
|
| 134 |
+
"failure_type": "ok",
|
| 135 |
+
"error": null,
|
| 136 |
+
"latency_s": 1.9454404999996768
|
| 137 |
+
},
|
| 138 |
+
"no_why": {
|
| 139 |
+
"label": "no_why",
|
| 140 |
+
"ok": true,
|
| 141 |
+
"failure_type": "ok",
|
| 142 |
+
"error": null,
|
| 143 |
+
"latency_s": 4.1100776999956
|
| 144 |
+
}
|
| 145 |
+
},
|
| 146 |
+
{
|
| 147 |
+
"id": 17482,
|
| 148 |
+
"n_candidates": 14,
|
| 149 |
+
"N": 14,
|
| 150 |
+
"per_call_budget": 28,
|
| 151 |
+
"with_why": {
|
| 152 |
+
"label": "with_why",
|
| 153 |
+
"ok": true,
|
| 154 |
+
"failure_type": "ok",
|
| 155 |
+
"error": null,
|
| 156 |
+
"latency_s": 3.7595577000029152
|
| 157 |
+
},
|
| 158 |
+
"no_why": {
|
| 159 |
+
"label": "no_why",
|
| 160 |
+
"ok": true,
|
| 161 |
+
"failure_type": "ok",
|
| 162 |
+
"error": null,
|
| 163 |
+
"latency_s": 3.8396145999868168
|
| 164 |
+
}
|
| 165 |
+
},
|
| 166 |
+
{
|
| 167 |
+
"id": 2021552,
|
| 168 |
+
"n_candidates": 19,
|
| 169 |
+
"N": 19,
|
| 170 |
+
"per_call_budget": 42,
|
| 171 |
+
"with_why": {
|
| 172 |
+
"label": "with_why",
|
| 173 |
+
"ok": true,
|
| 174 |
+
"failure_type": "ok",
|
| 175 |
+
"error": null,
|
| 176 |
+
"latency_s": 4.009010099995066
|
| 177 |
+
},
|
| 178 |
+
"no_why": {
|
| 179 |
+
"label": "no_why",
|
| 180 |
+
"ok": true,
|
| 181 |
+
"failure_type": "ok",
|
| 182 |
+
"error": null,
|
| 183 |
+
"latency_s": 3.4596964999946067
|
| 184 |
+
}
|
| 185 |
+
},
|
| 186 |
+
{
|
| 187 |
+
"id": 2034167,
|
| 188 |
+
"n_candidates": 27,
|
| 189 |
+
"N": 27,
|
| 190 |
+
"per_call_budget": 58,
|
| 191 |
+
"with_why": {
|
| 192 |
+
"label": "with_why",
|
| 193 |
+
"ok": true,
|
| 194 |
+
"failure_type": "ok",
|
| 195 |
+
"error": null,
|
| 196 |
+
"latency_s": 2.2680347000132315
|
| 197 |
+
},
|
| 198 |
+
"no_why": {
|
| 199 |
+
"label": "no_why",
|
| 200 |
+
"ok": true,
|
| 201 |
+
"failure_type": "ok",
|
| 202 |
+
"error": null,
|
| 203 |
+
"latency_s": 6.154438600002322
|
| 204 |
+
}
|
| 205 |
+
}
|
| 206 |
+
],
|
| 207 |
+
"summary": {
|
| 208 |
+
"with_why": {
|
| 209 |
+
"ok": 10
|
| 210 |
+
},
|
| 211 |
+
"no_why": {
|
| 212 |
+
"ok": 10
|
| 213 |
+
},
|
| 214 |
+
"avg_latency_s_with_why": 3.9782387700004618,
|
| 215 |
+
"avg_latency_s_no_why": 3.2443024799998965
|
| 216 |
+
}
|
| 217 |
+
}
|
data/runtime_debug/whyless_replication_seeds_42_43_20260303T060318Z.json
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"timestamp_utc": "2026-03-03T05:51:00Z",
|
| 3 |
+
"seeds": [
|
| 4 |
+
42,
|
| 5 |
+
43
|
| 6 |
+
],
|
| 7 |
+
"variants": {
|
| 8 |
+
"with_why_explicit": {
|
| 9 |
+
"by_seed": {
|
| 10 |
+
"42": {
|
| 11 |
+
"n": 10,
|
| 12 |
+
"P": 0.5163875544168061,
|
| 13 |
+
"R": 0.6743697968697968,
|
| 14 |
+
"F1": 0.5748467389701281,
|
| 15 |
+
"t3_s": 8.31660475730896,
|
| 16 |
+
"attempts_total": 0,
|
| 17 |
+
"parse_fail_total": 0,
|
| 18 |
+
"errors_total": 0,
|
| 19 |
+
"calls_total": 0,
|
| 20 |
+
"calls_exhausted": 0
|
| 21 |
+
},
|
| 22 |
+
"43": {
|
| 23 |
+
"n": 10,
|
| 24 |
+
"P": 0.5410290148448043,
|
| 25 |
+
"R": 0.5713611388611388,
|
| 26 |
+
"F1": 0.5253254924582543,
|
| 27 |
+
"t3_s": 10.785722708702087,
|
| 28 |
+
"attempts_total": 0,
|
| 29 |
+
"parse_fail_total": 0,
|
| 30 |
+
"errors_total": 0,
|
| 31 |
+
"calls_total": 0,
|
| 32 |
+
"calls_exhausted": 0
|
| 33 |
+
}
|
| 34 |
+
},
|
| 35 |
+
"avg": {
|
| 36 |
+
"P": 0.5287082846308051,
|
| 37 |
+
"R": 0.6228654678654678,
|
| 38 |
+
"F1": 0.5500861157141912,
|
| 39 |
+
"t3_s": 9.551163733005524,
|
| 40 |
+
"parse_fail_total": 0,
|
| 41 |
+
"errors_total": 0,
|
| 42 |
+
"attempts_total": 0,
|
| 43 |
+
"calls_exhausted": 0
|
| 44 |
+
}
|
| 45 |
+
},
|
| 46 |
+
"no_why": {
|
| 47 |
+
"by_seed": {
|
| 48 |
+
"42": {
|
| 49 |
+
"n": 10,
|
| 50 |
+
"P": 0.526974765974766,
|
| 51 |
+
"R": 0.6552372627372628,
|
| 52 |
+
"F1": 0.5574913389066418,
|
| 53 |
+
"t3_s": 3.2179250478744508,
|
| 54 |
+
"attempts_total": 11,
|
| 55 |
+
"parse_fail_total": 0,
|
| 56 |
+
"errors_total": 0,
|
| 57 |
+
"calls_total": 11,
|
| 58 |
+
"calls_exhausted": 0
|
| 59 |
+
},
|
| 60 |
+
"43": {
|
| 61 |
+
"n": 10,
|
| 62 |
+
"P": 0.5616956032473274,
|
| 63 |
+
"R": 0.6611355311355311,
|
| 64 |
+
"F1": 0.5434100633858101,
|
| 65 |
+
"t3_s": 10.192648196220398,
|
| 66 |
+
"attempts_total": 11,
|
| 67 |
+
"parse_fail_total": 0,
|
| 68 |
+
"errors_total": 0,
|
| 69 |
+
"calls_total": 11,
|
| 70 |
+
"calls_exhausted": 0
|
| 71 |
+
}
|
| 72 |
+
},
|
| 73 |
+
"avg": {
|
| 74 |
+
"P": 0.5443351846110467,
|
| 75 |
+
"R": 0.658186396936397,
|
| 76 |
+
"F1": 0.550450701146226,
|
| 77 |
+
"t3_s": 6.705286622047424,
|
| 78 |
+
"parse_fail_total": 0,
|
| 79 |
+
"errors_total": 0,
|
| 80 |
+
"attempts_total": 22,
|
| 81 |
+
"calls_exhausted": 0
|
| 82 |
+
}
|
| 83 |
+
},
|
| 84 |
+
"no_why_explicit_instruction": {
|
| 85 |
+
"by_seed": {
|
| 86 |
+
"42": {
|
| 87 |
+
"n": 10,
|
| 88 |
+
"P": 0.5039213382541718,
|
| 89 |
+
"R": 0.7805727605727606,
|
| 90 |
+
"F1": 0.5978319552325569,
|
| 91 |
+
"t3_s": 5.193069648742676,
|
| 92 |
+
"attempts_total": 11,
|
| 93 |
+
"parse_fail_total": 0,
|
| 94 |
+
"errors_total": 0,
|
| 95 |
+
"calls_total": 11,
|
| 96 |
+
"calls_exhausted": 0
|
| 97 |
+
},
|
| 98 |
+
"43": {
|
| 99 |
+
"n": 10,
|
| 100 |
+
"P": 0.5479474309215688,
|
| 101 |
+
"R": 0.7035164835164835,
|
| 102 |
+
"F1": 0.5839503345959894,
|
| 103 |
+
"t3_s": 6.33277850151062,
|
| 104 |
+
"attempts_total": 11,
|
| 105 |
+
"parse_fail_total": 0,
|
| 106 |
+
"errors_total": 0,
|
| 107 |
+
"calls_total": 11,
|
| 108 |
+
"calls_exhausted": 0
|
| 109 |
+
}
|
| 110 |
+
},
|
| 111 |
+
"avg": {
|
| 112 |
+
"P": 0.5259343845878703,
|
| 113 |
+
"R": 0.7420446220446221,
|
| 114 |
+
"F1": 0.5908911449142732,
|
| 115 |
+
"t3_s": 5.762924075126648,
|
| 116 |
+
"parse_fail_total": 0,
|
| 117 |
+
"errors_total": 0,
|
| 118 |
+
"attempts_total": 22,
|
| 119 |
+
"calls_exhausted": 0
|
| 120 |
+
}
|
| 121 |
+
}
|
| 122 |
+
}
|
| 123 |
+
}
|
data/runtime_metrics/ui_pipeline_timings.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"timestamp_utc": "2026-03-02T12:44:26Z", "stages_s": {"preprocess": 7.90999984019436e-05, "rewrite": 1.9136111999978311, "structural": 1.0946640000038315, "probe": 0.5859509000001708, "retrieval": 4.595289600001706, "selection": 37.53351300000213, "implication_expansion": 0.15133090000017546, "prompt_composition": 6.299999949987978e-05, "group_display": 0.04701460000069346}, "total_s": 45.927563900004316, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 2 |
+
{"timestamp_utc": "2026-03-02T16:08:08Z", "stages_s": {"preprocess": 6.989999383222312e-05, "rewrite": 3.0064916999981506, "structural": 4.2000028770416975e-06, "probe": 3.01228209999681, "retrieval": 3.3860946000058902, "selection": 5.285027000005357, "implication_expansion": 0.147530000002007, "prompt_composition": 3.850000211969018e-05, "group_display": 0.10624819999793544}, "total_s": 14.949083599989535, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 3 |
+
{"timestamp_utc": "2026-03-02T16:08:37Z", "stages_s": {"preprocess": 7.179999374784529e-05, "rewrite": 4.608368299988797, "structural": 3.6999990697950125e-06, "probe": 1.5999976312741637e-06, "retrieval": 3.4574174999870593, "selection": 8.8562099999981, "implication_expansion": 0.14937499999359716, "prompt_composition": 3.650000144261867e-05, "group_display": 0.04632819999824278}, "total_s": 17.122792900001514, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
data/structural_tag_definitions.csv
CHANGED
|
@@ -2,18 +2,18 @@ enabled,group_name,constraint,tag,definition
|
|
| 2 |
1,character_count,exclusive,zero_pictured,"No characters or living beings are visible in the image."
|
| 3 |
1,character_count,exclusive,solo,"Exactly one character is visible in the image."
|
| 4 |
1,character_count,exclusive,duo,"Exactly two characters are visible in the image."
|
| 5 |
-
1,character_count,exclusive,trio,"Exactly three characters are visible in the image; select only when the count is clearly three."
|
| 6 |
1,character_count,exclusive,group,"Four or more characters are visible in the image; do not use for one, two, or three."
|
| 7 |
-
1,body_type,multi,anthro,"An animal/furry character with BOTH human-like body plan (upright stance, human-like torso/arms/hands) and clear animal traits (fur, muzzle, tail, animal ears, paws, or species cues)."
|
| 8 |
-
1,body_type,multi,feral,"
|
| 9 |
-
1,body_type,multi,humanoid,"
|
| 10 |
1,body_type,multi,taur,"Select only for an explicit centaur-like body plan: a humanoid upper torso attached to a separate four-legged lower body."
|
| 11 |
-
1,gender,multi,male,"Select only when the description explicitly indicates male presentation or identity, such as male/man/boy/he/him/his/father/husband/boyfriend. 'boy' and male pronouns count as explicit evidence."
|
| 12 |
-
1,gender,multi,female,"Select only when the description explicitly indicates female presentation or identity, such as female/woman/girl/she/her/hers/mother/wife/girlfriend. 'girl' and female pronouns count as explicit evidence."
|
| 13 |
-
1,gender,multi,ambiguous_gender,"Select only when the description explicitly says gender is unknown, ambiguous, androgynous, mixed, or not determinable. Do not use this as a default fallback when gender is simply unmentioned."
|
| 14 |
1,gender,multi,intersex,"Select only when intersex or mixed-sex-traits wording is explicit in the description."
|
| 15 |
1,clothing_state,multi,clothed,"At least one character is explicitly described as wearing clothing or a garment (for example shirt, pants, shorts, dress, coat, loincloth, armor, uniform)."
|
| 16 |
-
1,clothing_state,multi,nude,"Select only when the description explicitly indicates no clothing (nude/naked/unclothed). Do not infer nude just because clothing is not mentioned."
|
| 17 |
1,clothing_state,multi,topless,"The upper body/chest is uncovered while lower body has clothing. This includes descriptions with shorts/pants/loincloth and no shirt/top."
|
| 18 |
1,clothing_state,multi,bottomless,"The lower body is uncovered while the upper body has clothing."
|
| 19 |
1,visual_elements,multi,looking_at_viewer,"Select only when explicit gaze wording appears (looking at viewer, looking at camera, looking directly at us, direct eye contact). Do not infer from front view, pose, or expression."
|
|
|
|
| 2 |
1,character_count,exclusive,zero_pictured,"No characters or living beings are visible in the image."
|
| 3 |
1,character_count,exclusive,solo,"Exactly one character is visible in the image."
|
| 4 |
1,character_count,exclusive,duo,"Exactly two characters are visible in the image."
|
| 5 |
+
1,character_count,exclusive,trio,"Exactly three characters are visible in the image; select only when the count is clearly three."
|
| 6 |
1,character_count,exclusive,group,"Four or more characters are visible in the image; do not use for one, two, or three."
|
| 7 |
+
1,body_type,multi,anthro,"An animal/furry character with BOTH human-like body plan (upright stance, human-like torso/arms/hands) and clear animal traits (fur, muzzle, tail, animal ears, paws, or species cues)."
|
| 8 |
+
1,body_type,multi,feral,"A non-humanoid animal body plan (typically quadruped or otherwise animal-shaped, without human-like torso/hands). Do not select if explicitly anthropomorphic."
|
| 9 |
+
1,body_type,multi,humanoid,"A human or near-human character with no explicit animal-species traits. Do not select if animal species words or animal traits (muzzle, tail, paws, animal ears, heavy fur coat, scales) are present."
|
| 10 |
1,body_type,multi,taur,"Select only for an explicit centaur-like body plan: a humanoid upper torso attached to a separate four-legged lower body."
|
| 11 |
+
1,gender,multi,male,"Select only when the description explicitly indicates male presentation or identity, such as male/man/boy/he/him/his/father/husband/boyfriend. 'boy' and male pronouns count as explicit evidence."
|
| 12 |
+
1,gender,multi,female,"Select only when the description explicitly indicates female presentation or identity, such as female/woman/girl/she/her/hers/mother/wife/girlfriend. 'girl' and female pronouns count as explicit evidence."
|
| 13 |
+
1,gender,multi,ambiguous_gender,"Select only when the description explicitly says gender is unknown, ambiguous, androgynous, mixed, or not determinable. Do not use this as a default fallback when gender is simply unmentioned."
|
| 14 |
1,gender,multi,intersex,"Select only when intersex or mixed-sex-traits wording is explicit in the description."
|
| 15 |
1,clothing_state,multi,clothed,"At least one character is explicitly described as wearing clothing or a garment (for example shirt, pants, shorts, dress, coat, loincloth, armor, uniform)."
|
| 16 |
+
1,clothing_state,multi,nude,"Select only when the description explicitly indicates no clothing (nude/naked/unclothed). Do not infer nude just because clothing is not mentioned."
|
| 17 |
1,clothing_state,multi,topless,"The upper body/chest is uncovered while lower body has clothing. This includes descriptions with shorts/pants/loincloth and no shirt/top."
|
| 18 |
1,clothing_state,multi,bottomless,"The lower body is uncovered while the upper body has clothing."
|
| 19 |
1,visual_elements,multi,looking_at_viewer,"Select only when explicit gaze wording appears (looking at viewer, looking at camera, looking directly at us, direct eye contact). Do not infer from front view, pose, or expression."
|
psq_rag/llm/select.py
CHANGED
|
@@ -1,80 +1,59 @@
|
|
| 1 |
-
# psq_rag/llm/select.py
|
| 2 |
-
# Stage 3: Closed-Set Selection (LangChain-only implementation)
|
| 3 |
-
#
|
| 4 |
-
# This module intentionally uses LangChain for:
|
| 5 |
-
# - prompt templating (including {N})
|
| 6 |
-
# - LLM call orchestration
|
| 7 |
-
# - JSON parsing
|
| 8 |
-
#
|
| 9 |
-
# There is NO fallback path. If LangChain dependencies are missing, this module
|
| 10 |
-
# should fail loudly so you install them.
|
| 11 |
-
|
| 12 |
-
import os
|
| 13 |
-
import re
|
| 14 |
-
import csv
|
| 15 |
from dataclasses import dataclass
|
| 16 |
from pathlib import Path
|
| 17 |
-
from typing import Any, Dict, List, Optional, Sequence, Set, Tuple, Union, cast,
|
| 18 |
-
|
| 19 |
-
from langchain_openai import ChatOpenAI
|
| 20 |
-
from langchain_core.prompts import ChatPromptTemplate
|
| 21 |
-
from langchain_core.output_parsers import PydanticOutputParser
|
| 22 |
-
from pydantic import BaseModel, Field, SecretStr
|
| 23 |
-
from rapidfuzz import fuzz
|
| 24 |
-
|
| 25 |
-
from psq_rag.retrieval.psq_retrieval import Candidate # Candidate(tag, score_*, count, sources)
|
| 26 |
-
from psq_rag.retrieval.state import get_tag_type_name, get_tag2aliases
|
| 27 |
-
|
| 28 |
-
# Character-typed tags that are generic categories, not actual named characters.
|
| 29 |
-
# These leak through the alias filter because they match common words in captions.
|
| 30 |
-
# They are excluded from the entity pipeline and instead routed to general selection.
|
| 31 |
-
_GENERIC_CHARACTER_TAGS = frozenset({
|
| 32 |
-
"fan_character",
|
| 33 |
-
"background_character",
|
| 34 |
-
"unnamed_character",
|
| 35 |
-
"unknown_character",
|
| 36 |
-
"anonymous_character",
|
| 37 |
-
"viewer",
|
| 38 |
-
"original_character",
|
| 39 |
-
})
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
WHY_ENUM = ["explicit", "strong_implied", "weak_implied", "style_or_meta", "other"]
|
| 43 |
-
|
| 44 |
-
# Ordinal rank: lower = more confident. Used for threshold filtering.
|
| 45 |
-
WHY_RANK: Dict[str, int] = {
|
| 46 |
-
"explicit": 0,
|
| 47 |
-
"strong_implied": 1,
|
| 48 |
-
"weak_implied": 2,
|
| 49 |
-
"style_or_meta": 3,
|
| 50 |
-
"other": 4,
|
| 51 |
-
}
|
| 52 |
-
|
| 53 |
-
# Deterministic mapping: ordinal "why" -> numeric score for ordering/debug.
|
| 54 |
-
WHY_TO_SCORE: Dict[str, float] = {
|
| 55 |
-
"explicit": 0.90,
|
| 56 |
-
"strong_implied": 0.70,
|
| 57 |
-
"weak_implied": 0.45,
|
| 58 |
-
"style_or_meta": 0.35,
|
| 59 |
-
"other": 0.25,
|
| 60 |
-
}
|
| 61 |
-
|
| 62 |
-
|
| 63 |
# IMPORTANT ABOUT TEMPLATING:
|
| 64 |
# - This string is rendered by LangChain's f-string template engine.
|
| 65 |
# - Literal JSON braces must be escaped as {{ and }}.
|
| 66 |
# - {N} is a real template variable and MUST be provided.
|
| 67 |
SELECT_SYSTEM_TEMPLATE = """You are given a description of an image and a list of imageboard tags.
|
| 68 |
|
| 69 |
-
Select
|
| 70 |
-
|
| 71 |
-
|
| 72 |
|
| 73 |
Return JSON ONLY matching this schema:
|
| 74 |
|
| 75 |
{{
|
| 76 |
\"selections\": [
|
| 77 |
-
{{\"i\": <int>
|
| 78 |
...
|
| 79 |
]
|
| 80 |
}}
|
|
@@ -82,34 +61,27 @@ Return JSON ONLY matching this schema:
|
|
| 82 |
Rules:
|
| 83 |
- Choose ONLY from indices 1..{N}.
|
| 84 |
- Do NOT output tag text.
|
| 85 |
-
- Do NOT output any keys other than \"selections\", and inside each item only the item index \"i\"
|
| 86 |
- Do select both a general tag and a more specific tag when both apply (for example, \"shirt\" and \"grey shirt\").
|
| 87 |
-
|
| 88 |
-
Define \"why\" as:
|
| 89 |
-
- explicit: directly stated in the image description
|
| 90 |
-
- strong_implied: very likely given the description, even if not literally stated
|
| 91 |
-
- weak_implied: plausible but not strongly supported by the description
|
| 92 |
-
- style_or_meta: stylistic or presentation-related tags only if clearly indicated
|
| 93 |
-
- other: fallback category; use sparingly
|
| 94 |
"""
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
def _get_select_system_template() -> str:
|
| 98 |
-
"""Return Stage 3 selection prompt text."""
|
| 99 |
-
return SELECT_SYSTEM_TEMPLATE
|
| 100 |
-
|
| 101 |
-
|
| 102 |
ENTITY_SYSTEM_TEMPLATE = """You are given a description of an image and a list of CHARACTER tags.
|
| 103 |
-
|
| 104 |
-
These character tags have already been pre-filtered to only include characters whose names
|
| 105 |
-
(or known aliases) appear in the image description. Your job is to confirm which of these
|
| 106 |
-
pre-filtered candidates are the correct match for the character mentioned by the user.
|
| 107 |
-
|
| 108 |
Return JSON ONLY matching this schema:
|
| 109 |
|
| 110 |
{{
|
| 111 |
\"selections\": [
|
| 112 |
-
{{\"i\": <int>
|
| 113 |
...
|
| 114 |
]
|
| 115 |
}}
|
|
@@ -117,213 +89,205 @@ Return JSON ONLY matching this schema:
|
|
| 117 |
Rules for character selection:
|
| 118 |
- Choose ONLY from indices 1..{N}.
|
| 119 |
- Do NOT output tag text.
|
| 120 |
-
- Always use \"why\": \"explicit\" for all selections.
|
| 121 |
- Select the tag that best represents the character as described.
|
| 122 |
- If the user described a specific variant (e.g. \"pikachu libre\", \"detective pikachu\"),
|
| 123 |
select that specific variant tag.
|
| 124 |
-
- If the user described only the base character (e.g. just \"pikachu\"), select only the
|
| 125 |
-
base/default tag, NOT costume or variant tags.
|
| 126 |
-
- When uncertain between variants, prefer the simplest/most general tag.
|
| 127 |
-
"""
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
USER_TEMPLATE = """IMAGE DESCRIPTION:
|
| 131 |
-
{image_description}
|
| 132 |
-
|
| 133 |
-
CANDIDATES (choose by index only):
|
| 134 |
-
{candidate_lines}
|
| 135 |
-
|
| 136 |
-
Select up to {per_call_budget} indices. Output fewer if uncertain.
|
| 137 |
-
"""
|
| 138 |
-
|
| 139 |
-
|
| 140 |
@dataclass(frozen=True)
|
| 141 |
class Selected:
|
| 142 |
i: int
|
| 143 |
tag: str # canonical tag (underscore form)
|
| 144 |
-
why: str
|
| 145 |
-
score: float
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
WhyLiteral = Literal["explicit", "strong_implied", "weak_implied", "style_or_meta", "other"]
|
| 149 |
|
| 150 |
|
| 151 |
class Stage3SelectionItem(BaseModel):
|
| 152 |
i: int = Field(..., description="1-based index into the candidate list.")
|
| 153 |
-
why: WhyLiteral = Field(..., description="Rationale code from the allowed set.")
|
| 154 |
|
| 155 |
|
| 156 |
class Stage3SelectionResponse(BaseModel):
|
| 157 |
-
selections: List[Stage3SelectionItem] = Field(default_factory=list)
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
def _build_response_format() -> Dict[str, Any]:
|
| 161 |
-
# Strict JSON Schema structured output.
|
| 162 |
-
schema = {
|
| 163 |
-
"type": "object",
|
| 164 |
-
"properties": {
|
| 165 |
-
"selections": {
|
| 166 |
-
"type": "array",
|
| 167 |
"items": {
|
| 168 |
"type": "object",
|
| 169 |
"properties": {
|
| 170 |
"i": {"type": "integer"},
|
| 171 |
-
"why": {"type": "string", "enum": WHY_ENUM},
|
| 172 |
},
|
| 173 |
-
"required": ["i"
|
| 174 |
"additionalProperties": False,
|
| 175 |
},
|
| 176 |
}
|
| 177 |
-
},
|
| 178 |
-
"required": ["selections"],
|
| 179 |
-
"additionalProperties": False,
|
| 180 |
-
}
|
| 181 |
-
|
| 182 |
-
return {
|
| 183 |
-
"type": "json_schema",
|
| 184 |
-
"json_schema": {
|
| 185 |
-
"name": "stage3_selection",
|
| 186 |
-
"strict": True,
|
| 187 |
-
"schema": schema,
|
| 188 |
-
},
|
| 189 |
-
}
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
def _get_llm(*, temperature: float, max_tokens: int, response_format: Dict[str, Any]) -> ChatOpenAI:
|
| 193 |
-
api_key = os.getenv("OPENROUTER_API_KEY")
|
| 194 |
-
if not api_key:
|
| 195 |
-
raise RuntimeError(
|
| 196 |
-
"OPENROUTER_API_KEY is not set.\n"
|
| 197 |
-
"Set it in your environment before running Stage 3."
|
| 198 |
-
)
|
| 199 |
-
api_key = SecretStr(cast(str, api_key))
|
| 200 |
-
|
| 201 |
-
model = os.getenv("OPENROUTER_MODEL", "meta-llama/llama-3.1-8b-instruct")
|
| 202 |
-
headers: Dict[str, str] = {}
|
| 203 |
-
if referer := os.getenv("OPENROUTER_HTTP_REFERER"):
|
| 204 |
-
headers["HTTP-Referer"] = referer
|
| 205 |
-
if title := os.getenv("OPENROUTER_X_TITLE"):
|
| 206 |
-
headers["X-Title"] = title
|
| 207 |
-
|
| 208 |
-
# OpenRouter OpenAI-compatible endpoint.
|
| 209 |
-
return ChatOpenAI(
|
| 210 |
-
model=model,
|
| 211 |
-
base_url="https://openrouter.ai/api/v1",
|
| 212 |
-
api_key=api_key,
|
| 213 |
-
temperature=temperature,
|
| 214 |
-
max_completion_tokens=max_tokens,
|
| 215 |
-
default_headers=headers,
|
| 216 |
-
# Provider-specific request body fields (OpenAI-compatible).
|
| 217 |
-
# Response Healing plugin reduces malformed-JSON failures (syntax only).
|
| 218 |
-
extra_body={
|
| 219 |
-
"response_format": response_format,
|
| 220 |
-
"plugins": [{"id": "response-healing"}],
|
| 221 |
-
},
|
| 222 |
-
)
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
def _phrase_key_for_candidate(c: Candidate) -> str:
|
| 226 |
-
# Deterministic "primary phrase" for grouping.
|
| 227 |
-
if c.sources:
|
| 228 |
-
return sorted(c.sources)[0]
|
| 229 |
-
return ""
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
def _interleave_round_robin(cands: Sequence[Candidate]) -> List[Candidate]:
|
| 233 |
-
"""Round-robin interleave by primary source phrase.
|
| 234 |
-
|
| 235 |
-
NOTE: counts are used only for ordering; they are NOT shown to the LLM.
|
| 236 |
-
"""
|
| 237 |
-
groups: Dict[str, List[Candidate]] = {}
|
| 238 |
-
for c in cands:
|
| 239 |
-
k = _phrase_key_for_candidate(c)
|
| 240 |
-
groups.setdefault(k, []).append(c)
|
| 241 |
-
|
| 242 |
-
for k in groups:
|
| 243 |
-
groups[k].sort(key=lambda x: (x.score_combined, (x.count or -1)), reverse=True)
|
| 244 |
-
|
| 245 |
-
keys = sorted(groups.keys())
|
| 246 |
-
|
| 247 |
-
out: List[Candidate] = []
|
| 248 |
-
idx = 0
|
| 249 |
-
while True:
|
| 250 |
-
progressed = False
|
| 251 |
-
for k in keys:
|
| 252 |
-
if idx < len(groups[k]):
|
| 253 |
-
out.append(groups[k][idx])
|
| 254 |
-
progressed = True
|
| 255 |
-
if not progressed:
|
| 256 |
-
break
|
| 257 |
-
idx += 1
|
| 258 |
-
|
| 259 |
-
return out
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
def _build_chunks(cands: Sequence[Candidate], chunk_size: int) -> List[List[Candidate]]:
|
| 263 |
-
if chunk_size <= 0:
|
| 264 |
-
raise ValueError(f"chunk_size must be > 0, got {chunk_size}")
|
| 265 |
-
ordered = _interleave_round_robin(cands)
|
| 266 |
-
return [ordered[i:i + chunk_size] for i in range(0, len(ordered), chunk_size)]
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
def _display_tag(tag: str) -> str:
|
| 270 |
-
# Display tags with spaces for the LLM, but keep canonical underscores internally.
|
| 271 |
-
return tag.replace("_", " ")
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
def _format_candidates_local(
|
| 275 |
-
cands: Sequence[Candidate],
|
| 276 |
-
candidate_display: Optional[Mapping[str, str]] = None,
|
| 277 |
-
) -> Tuple[str, Dict[int, str], Dict[int, Candidate]]:
|
| 278 |
-
lines: List[str] = []
|
| 279 |
-
idx_to_tag: Dict[int, str] = {}
|
| 280 |
-
idx_to_candidate: Dict[int, Candidate] = {}
|
| 281 |
-
for j, c in enumerate(cands, start=1):
|
| 282 |
-
idx_to_tag[j] = c.tag
|
| 283 |
-
idx_to_candidate[j] = c
|
| 284 |
-
display = candidate_display.get(c.tag) if candidate_display else None
|
| 285 |
-
if not display:
|
| 286 |
-
display = _display_tag(c.tag)
|
| 287 |
-
lines.append(f"{j}. {display}")
|
| 288 |
-
return "\n".join(lines), idx_to_tag, idx_to_candidate
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
def _phrases_in_call(cands: Sequence[Candidate]) -> int:
|
| 292 |
-
s = set()
|
| 293 |
-
for c in cands:
|
| 294 |
-
for src in c.sources:
|
| 295 |
-
s.add(src)
|
| 296 |
-
return len(s)
|
| 297 |
-
|
| 298 |
-
|
| 299 |
def _parse_validate_map(
|
| 300 |
parsed: Any,
|
| 301 |
idx_to_tag: Dict[int, str],
|
| 302 |
per_call_budget: int,
|
| 303 |
-
) -> Tuple[List[Selected], Dict[str, Any]]:
|
| 304 |
-
diag = {
|
| 305 |
-
"parse_ok": isinstance(parsed, dict),
|
| 306 |
-
"invalid_items": 0,
|
| 307 |
-
"oob_indices": 0,
|
| 308 |
-
"dupe_indices": 0,
|
| 309 |
-
"kept": 0,
|
| 310 |
-
}
|
| 311 |
-
|
| 312 |
-
if isinstance(parsed, BaseModel):
|
| 313 |
-
parsed = parsed.model_dump() if hasattr(parsed, "model_dump") else parsed.dict()
|
| 314 |
-
diag["parse_ok"] = isinstance(parsed, dict)
|
| 315 |
-
|
| 316 |
-
if not isinstance(parsed, dict):
|
| 317 |
-
return [], diag
|
| 318 |
-
|
| 319 |
-
selections = parsed.get("selections", [])
|
| 320 |
-
if not isinstance(selections, list):
|
| 321 |
-
diag["parse_ok"] = False
|
| 322 |
-
return [], diag
|
| 323 |
-
|
| 324 |
-
out: List[Selected] = []
|
| 325 |
-
seen_i = set()
|
| 326 |
-
|
| 327 |
for item in selections:
|
| 328 |
if len(out) >= per_call_budget:
|
| 329 |
break
|
|
@@ -332,7 +296,6 @@ def _parse_validate_map(
|
|
| 332 |
continue
|
| 333 |
|
| 334 |
i = item.get("i")
|
| 335 |
-
why = item.get("why")
|
| 336 |
|
| 337 |
if isinstance(i, bool) or not isinstance(i, int):
|
| 338 |
diag["invalid_items"] += 1
|
|
@@ -343,1057 +306,1046 @@ def _parse_validate_map(
|
|
| 343 |
if i not in idx_to_tag:
|
| 344 |
diag["oob_indices"] += 1
|
| 345 |
continue
|
| 346 |
-
|
|
|
|
| 347 |
diag["invalid_items"] += 1
|
| 348 |
continue
|
| 349 |
seen_i.add(i)
|
| 350 |
tag = idx_to_tag[i]
|
| 351 |
-
out.append(Selected(i=i, tag=tag
|
| 352 |
-
|
| 353 |
-
diag["kept"] = len(out)
|
| 354 |
-
return out, diag
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
def _split_candidates_by_type(
|
| 358 |
-
candidates: List[Candidate],
|
| 359 |
-
log,
|
| 360 |
-
) -> Tuple[List[Tuple[int, Candidate]], List[Tuple[int, Candidate]]]:
|
| 361 |
-
"""Split candidates into general vs entity (character only) lists.
|
| 362 |
-
|
| 363 |
-
Returns:
|
| 364 |
-
(general_list, entity_list) where each item is (original_index, candidate)
|
| 365 |
-
|
| 366 |
-
Tag types:
|
| 367 |
-
- General: 0 (general), 1 (artist), 5 (species), 7 (meta)
|
| 368 |
-
- Entity: 4 (character) only
|
| 369 |
-
- Filtered: 3 (copyright) - too broad for image generation
|
| 370 |
-
"""
|
| 371 |
-
general_with_idx: List[Tuple[int, Candidate]] = []
|
| 372 |
-
entity_with_idx: List[Tuple[int, Candidate]] = []
|
| 373 |
-
|
| 374 |
-
unknown_count = 0
|
| 375 |
-
copyright_count = 0
|
| 376 |
-
|
| 377 |
-
generic_char_count = 0
|
| 378 |
-
|
| 379 |
-
for idx, cand in enumerate(candidates):
|
| 380 |
-
type_name = get_tag_type_name(cand.tag)
|
| 381 |
-
|
| 382 |
-
if type_name == "character":
|
| 383 |
-
if cand.tag in _GENERIC_CHARACTER_TAGS:
|
| 384 |
-
# Route generic character-category tags to general selection
|
| 385 |
-
general_with_idx.append((idx, cand))
|
| 386 |
-
generic_char_count += 1
|
| 387 |
-
else:
|
| 388 |
-
entity_with_idx.append((idx, cand))
|
| 389 |
-
elif type_name == "copyright":
|
| 390 |
-
# Filter out copyright/series tags - too broad for image generation
|
| 391 |
-
copyright_count += 1
|
| 392 |
-
elif type_name in ("general", "artist", "species", "meta"):
|
| 393 |
-
general_with_idx.append((idx, cand))
|
| 394 |
-
else:
|
| 395 |
-
# Unknown or None - treat as general by default
|
| 396 |
-
general_with_idx.append((idx, cand))
|
| 397 |
-
unknown_count += 1
|
| 398 |
-
|
| 399 |
-
if log:
|
| 400 |
-
log(
|
| 401 |
-
f"Stage3 split: "
|
| 402 |
-
f"general={len(general_with_idx)} "
|
| 403 |
-
f"entity={len(entity_with_idx)} "
|
| 404 |
-
f"copyright_filtered={copyright_count} "
|
| 405 |
-
f"generic_char_to_general={generic_char_count} "
|
| 406 |
-
f"unknown_type={unknown_count}"
|
| 407 |
-
)
|
| 408 |
-
|
| 409 |
-
return general_with_idx, entity_with_idx
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
# Regex to strip series/franchise suffixes from aliases, e.g. _(sonic), _(mlp), _(character)
|
| 413 |
-
_SERIES_SUFFIX_RE = re.compile(r"_\([^)]+\)$")
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
def _normalize_for_matching(text: str) -> str:
|
| 417 |
-
"""Lowercase, replace underscores with spaces, strip series suffixes."""
|
| 418 |
-
text = text.lower().strip()
|
| 419 |
-
text = _SERIES_SUFFIX_RE.sub("", text)
|
| 420 |
-
text = text.replace("_", " ")
|
| 421 |
-
return text
|
| 422 |
-
|
| 423 |
-
|
| 424 |
-
def _query_words(query: str) -> Set[str]:
|
| 425 |
-
"""Extract individual words from the user query for matching."""
|
| 426 |
-
return set(_normalize_for_matching(query).split())
|
| 427 |
-
|
| 428 |
-
|
| 429 |
-
def _alias_matches_query(alias_norm: str, query_words: Set[str], query_norm: str,
|
| 430 |
-
fuzzy_threshold: int = 85) -> bool:
|
| 431 |
-
"""Check if an alias matches the user query.
|
| 432 |
-
|
| 433 |
-
Matching logic:
|
| 434 |
-
1. Exact substring: alias appears as a substring of the query
|
| 435 |
-
2. Word subset: all words in the alias appear in the query words
|
| 436 |
-
3. Fuzzy: alias is close to a word in the query (handles typos)
|
| 437 |
-
"""
|
| 438 |
-
# Exact substring match
|
| 439 |
-
if alias_norm in query_norm:
|
| 440 |
-
return True
|
| 441 |
-
|
| 442 |
-
alias_words = alias_norm.split()
|
| 443 |
-
if not alias_words:
|
| 444 |
-
return False
|
| 445 |
-
|
| 446 |
-
# Word subset match: all alias words must appear in query
|
| 447 |
-
if all(w in query_words for w in alias_words):
|
| 448 |
-
return True
|
| 449 |
-
|
| 450 |
-
# For single-word aliases, try fuzzy matching against each query word
|
| 451 |
-
if len(alias_words) == 1:
|
| 452 |
-
for qw in query_words:
|
| 453 |
-
if fuzz.ratio(alias_words[0], qw) >= fuzzy_threshold:
|
| 454 |
-
return True
|
| 455 |
-
|
| 456 |
-
# For multi-word aliases, try fuzzy partial ratio against whole query
|
| 457 |
-
if len(alias_words) > 1:
|
| 458 |
-
if fuzz.partial_ratio(alias_norm, query_norm) >= fuzzy_threshold:
|
| 459 |
-
return True
|
| 460 |
-
|
| 461 |
-
return False
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
def _character_matches_via_aliases(
|
| 465 |
-
tag: str,
|
| 466 |
-
query: str,
|
| 467 |
-
tag2aliases: Dict[str, List[str]],
|
| 468 |
-
query_words: Set[str],
|
| 469 |
-
query_norm: str,
|
| 470 |
-
fuzzy_threshold: int = 85,
|
| 471 |
-
) -> bool:
|
| 472 |
-
"""Check if a character tag matches the user query via its aliases.
|
| 473 |
-
|
| 474 |
-
For a character tag to match:
|
| 475 |
-
- The tag name itself (normalized) must match, OR
|
| 476 |
-
- At least one of its registered aliases must match.
|
| 477 |
-
|
| 478 |
-
Empty aliases list means no known aliases; still check the tag name itself.
|
| 479 |
-
"""
|
| 480 |
-
# Check the tag name itself
|
| 481 |
-
tag_norm = _normalize_for_matching(tag)
|
| 482 |
-
if _alias_matches_query(tag_norm, query_words, query_norm, fuzzy_threshold):
|
| 483 |
-
return True
|
| 484 |
-
|
| 485 |
-
# Check all registered aliases
|
| 486 |
-
aliases = tag2aliases.get(tag, [])
|
| 487 |
-
for alias in aliases:
|
| 488 |
-
alias_norm = _normalize_for_matching(alias)
|
| 489 |
-
if not alias_norm:
|
| 490 |
-
continue
|
| 491 |
-
if _alias_matches_query(alias_norm, query_words, query_norm, fuzzy_threshold):
|
| 492 |
-
return True
|
| 493 |
-
|
| 494 |
-
return False
|
| 495 |
-
|
| 496 |
-
|
| 497 |
def llm_select_indices(
|
| 498 |
-
query_text: str, # kept for compatibility; treated as IMAGE DESCRIPTION
|
| 499 |
-
candidates: Union[
|
| 500 |
-
Sequence[Candidate],
|
| 501 |
-
Sequence[str],
|
| 502 |
-
Sequence[Tuple[str, float]],
|
| 503 |
-
],
|
| 504 |
-
max_pick: int, # legacy param; applied after union + ordering (optional)
|
| 505 |
-
log,
|
| 506 |
-
retries: int = 2,
|
| 507 |
-
*,
|
| 508 |
-
mode: str = "chunked_map_union", # "single_shot" or "chunked_map_union"
|
| 509 |
-
chunk_size: int = 60,
|
| 510 |
-
per_phrase_k: int = 2, # per-call budget = per_phrase_k * phrases_in_call
|
| 511 |
-
temperature: float = 0.0,
|
| 512 |
-
max_tokens: int = 512,
|
| 513 |
-
return_metadata: bool = False,
|
| 514 |
-
return_diagnostics: bool = False,
|
| 515 |
-
min_why: Optional[str] =
|
| 516 |
-
candidate_display: Optional[Mapping[str, str]] = None,
|
| 517 |
-
) -> Union[
|
| 518 |
-
List[int],
|
| 519 |
-
Tuple[List[int], Dict[str, str]],
|
| 520 |
-
Tuple[List[int], Dict[str, str], Dict[str, Any]],
|
| 521 |
-
]:
|
| 522 |
-
"""Return indices into the ORIGINAL candidates list (legacy interface).
|
| 523 |
-
|
| 524 |
-
min_why:
|
| 525 |
-
|
| 526 |
-
|
| 527 |
-
|
| 528 |
-
|
| 529 |
-
|
| 530 |
-
|
| 531 |
-
|
| 532 |
-
|
| 533 |
-
|
| 534 |
-
|
| 535 |
-
|
| 536 |
-
|
| 537 |
-
|
| 538 |
-
|
| 539 |
-
|
| 540 |
-
|
| 541 |
-
|
| 542 |
-
|
| 543 |
-
|
| 544 |
-
|
| 545 |
-
|
| 546 |
-
|
| 547 |
-
|
| 548 |
-
|
| 549 |
-
|
| 550 |
-
|
| 551 |
-
|
| 552 |
-
|
| 553 |
-
|
| 554 |
-
|
| 555 |
-
|
| 556 |
-
|
| 557 |
-
|
| 558 |
-
|
| 559 |
-
|
| 560 |
-
|
| 561 |
-
|
| 562 |
-
|
| 563 |
-
|
| 564 |
-
|
| 565 |
-
|
| 566 |
-
|
| 567 |
-
|
| 568 |
-
|
| 569 |
-
|
| 570 |
-
|
| 571 |
-
|
| 572 |
-
|
| 573 |
-
if not
|
| 574 |
-
|
| 575 |
-
|
| 576 |
-
|
| 577 |
-
|
| 578 |
-
|
| 579 |
-
|
| 580 |
-
|
| 581 |
-
|
| 582 |
-
|
| 583 |
-
|
| 584 |
-
|
| 585 |
-
|
| 586 |
-
|
| 587 |
-
|
| 588 |
-
|
| 589 |
-
|
| 590 |
-
|
| 591 |
-
|
| 592 |
-
"
|
| 593 |
-
f"
|
| 594 |
-
|
| 595 |
-
|
| 596 |
-
|
| 597 |
-
|
| 598 |
-
|
| 599 |
-
|
| 600 |
-
|
| 601 |
-
|
| 602 |
-
|
| 603 |
-
|
| 604 |
-
|
| 605 |
-
|
| 606 |
-
|
| 607 |
-
|
| 608 |
-
|
| 609 |
-
|
| 610 |
-
|
| 611 |
-
|
| 612 |
-
|
| 613 |
-
|
| 614 |
-
"
|
| 615 |
-
"
|
| 616 |
-
"
|
| 617 |
-
"
|
| 618 |
-
"
|
| 619 |
-
"
|
| 620 |
-
"
|
| 621 |
-
"
|
| 622 |
-
"
|
| 623 |
-
"
|
| 624 |
-
"
|
| 625 |
-
"
|
| 626 |
-
"
|
| 627 |
-
|
| 628 |
-
|
| 629 |
-
|
| 630 |
-
|
| 631 |
-
|
| 632 |
-
|
| 633 |
-
|
| 634 |
-
|
| 635 |
-
|
| 636 |
-
|
| 637 |
-
"
|
| 638 |
-
|
| 639 |
-
|
| 640 |
-
|
| 641 |
-
|
| 642 |
-
|
| 643 |
-
|
| 644 |
-
|
| 645 |
-
|
| 646 |
-
|
| 647 |
-
|
| 648 |
-
|
| 649 |
-
|
| 650 |
-
|
| 651 |
-
|
| 652 |
-
|
| 653 |
-
|
| 654 |
-
|
| 655 |
-
|
| 656 |
-
|
| 657 |
-
|
| 658 |
-
)
|
| 659 |
-
|
| 660 |
-
|
| 661 |
-
|
| 662 |
-
|
| 663 |
-
|
| 664 |
-
|
| 665 |
-
|
| 666 |
-
|
| 667 |
-
|
| 668 |
-
|
| 669 |
-
|
| 670 |
-
|
| 671 |
-
|
| 672 |
-
|
| 673 |
-
|
| 674 |
-
|
| 675 |
-
|
| 676 |
-
|
| 677 |
-
|
| 678 |
-
|
| 679 |
-
|
| 680 |
-
|
| 681 |
-
|
| 682 |
-
|
| 683 |
-
|
| 684 |
-
|
| 685 |
-
|
| 686 |
-
|
| 687 |
-
|
| 688 |
-
f"
|
| 689 |
-
f"
|
| 690 |
-
f"
|
| 691 |
-
|
| 692 |
-
|
| 693 |
-
|
| 694 |
-
|
| 695 |
-
|
| 696 |
-
|
| 697 |
-
|
| 698 |
-
"
|
| 699 |
-
|
| 700 |
-
|
| 701 |
-
|
| 702 |
-
|
| 703 |
-
)
|
| 704 |
-
|
| 705 |
-
diagnostics["
|
| 706 |
-
|
| 707 |
-
|
| 708 |
-
|
| 709 |
-
|
| 710 |
-
diagnostics["
|
| 711 |
-
_record_attempt_for_n(N_local, parse_ok=
|
| 712 |
-
|
| 713 |
-
|
| 714 |
-
|
| 715 |
-
|
| 716 |
-
|
| 717 |
-
|
| 718 |
-
|
| 719 |
-
|
| 720 |
-
f"N={N_local} selected={len(selected)} per_call_budget={per_call_budget}"
|
| 721 |
-
)
|
| 722 |
-
summary_logged = True
|
| 723 |
if selected:
|
| 724 |
lines = [
|
| 725 |
f"Stage3 {label} selections:",
|
| 726 |
*[
|
| 727 |
(
|
| 728 |
f' - i={s.i} tag="{s.tag}" '
|
| 729 |
-
f"why={s.why} score={s.score:.2f} "
|
| 730 |
f"sources={idx_to_candidate.get(s.i).sources if idx_to_candidate.get(s.i) else []}"
|
| 731 |
)
|
| 732 |
for s in selected
|
| 733 |
],
|
| 734 |
]
|
| 735 |
-
log("\n".join(lines))
|
| 736 |
-
else:
|
| 737 |
-
log(f"Stage3 {label} selections: (none)")
|
| 738 |
-
|
| 739 |
if selected:
|
| 740 |
diagnostics["calls_with_selection"] += 1
|
| 741 |
for s in selected:
|
| 742 |
-
|
| 743 |
-
if prev is None or s.score > prev[0]:
|
| 744 |
-
best[s.tag] = (s.score, s.why)
|
| 745 |
return
|
| 746 |
-
|
| 747 |
-
except Exception as e:
|
| 748 |
-
diagnostics["attempt_errors"] += 1
|
| 749 |
-
_record_attempt_for_n(N_local, parse_ok=False, error=True)
|
| 750 |
-
if log:
|
| 751 |
-
log(f"Stage3 {label}: attempt {att+1} error: {e}")
|
| 752 |
-
|
| 753 |
-
if log:
|
| 754 |
-
log(f"Stage3 {label}: gave up after {retries+1} attempts")
|
| 755 |
-
diagnostics["calls_exhausted_retries"] += 1
|
| 756 |
-
|
| 757 |
-
# Split candidates by type (general vs entity)
|
| 758 |
-
general_with_idx, entity_with_idx = _split_candidates_by_type(norm, log)
|
| 759 |
-
|
| 760 |
-
# Extract just the candidates for LLM calls
|
| 761 |
-
general_cands = [cand for _, cand in general_with_idx]
|
| 762 |
-
entity_cands = [cand for _, cand in entity_with_idx]
|
| 763 |
-
|
| 764 |
-
# Process general candidates (attributes, actions, species, etc.)
|
| 765 |
-
if general_cands:
|
| 766 |
-
if mode == "single_shot":
|
| 767 |
-
run_call(general_cands, "general_single_shot", select_system_template)
|
| 768 |
-
else:
|
| 769 |
-
base_chunks = _build_chunks(general_cands, chunk_size)
|
| 770 |
-
for chunk_idx, chunk in enumerate(base_chunks):
|
| 771 |
-
run_call(chunk, f"general_chunk_{chunk_idx}", select_system_template)
|
| 772 |
-
|
| 773 |
-
# Process entity candidates (characters only) with alias-based pre-filtering
|
| 774 |
-
if entity_cands:
|
| 775 |
-
tag2aliases = get_tag2aliases()
|
| 776 |
-
qwords = _query_words(image_description)
|
| 777 |
-
qnorm = _normalize_for_matching(image_description)
|
| 778 |
-
|
| 779 |
-
filtered_entity_cands: List[Candidate] = []
|
| 780 |
-
filtered_out: List[str] = []
|
| 781 |
-
|
| 782 |
-
for cand in entity_cands:
|
| 783 |
-
if _character_matches_via_aliases(
|
| 784 |
-
cand.tag, image_description, tag2aliases, qwords, qnorm
|
| 785 |
-
):
|
| 786 |
-
filtered_entity_cands.append(cand)
|
| 787 |
-
else:
|
| 788 |
-
filtered_out.append(cand.tag)
|
| 789 |
-
|
| 790 |
-
if log:
|
| 791 |
-
log(
|
| 792 |
-
f"Stage3 entity alias filter: "
|
| 793 |
-
f"before={len(entity_cands)} "
|
| 794 |
-
f"after={len(filtered_entity_cands)} "
|
| 795 |
-
f"removed={len(filtered_out)}"
|
| 796 |
-
)
|
| 797 |
-
if filtered_out:
|
| 798 |
-
log(f"Stage3 entity alias filter removed: {filtered_out[:20]}")
|
| 799 |
-
|
| 800 |
-
if filtered_entity_cands:
|
| 801 |
-
if mode == "single_shot":
|
| 802 |
-
run_call(filtered_entity_cands, "entity_single_shot", ENTITY_SYSTEM_TEMPLATE)
|
| 803 |
-
else:
|
| 804 |
-
base_chunks = _build_chunks(filtered_entity_cands, chunk_size)
|
| 805 |
-
for chunk_idx, chunk in enumerate(base_chunks):
|
| 806 |
-
run_call(chunk, f"entity_chunk_{chunk_idx}", ENTITY_SYSTEM_TEMPLATE)
|
| 807 |
-
|
| 808 |
-
|
| 809 |
-
|
| 810 |
-
|
| 811 |
-
|
| 812 |
-
best = {t: v for t, v in best.items() if WHY_RANK.get(v[1], 4) <= max_rank}
|
| 813 |
-
if log:
|
| 814 |
-
log(f"Stage3 why filter: min_why={min_why} (rank<={max_rank}), "
|
| 815 |
-
f"before={before} after={len(best)} dropped={before - len(best)}")
|
| 816 |
-
|
| 817 |
-
# Deterministic ordering: derived score desc, tie-break by count desc (count not shown to LLM).
|
| 818 |
count_by_tag = {c.tag: (c.count if c.count is not None else -1) for c in norm}
|
| 819 |
-
ordered_tags = sorted(
|
| 820 |
-
|
| 821 |
-
# Legacy cap: apply AFTER union + ordering.
|
| 822 |
-
if isinstance(max_pick, int) and max_pick > 0:
|
| 823 |
-
ordered_tags = ordered_tags[:max_pick]
|
| 824 |
-
|
| 825 |
-
# Map back to original indices
|
| 826 |
-
out_idx: List[int] = []
|
| 827 |
tag_why: Dict[str, str] = {}
|
| 828 |
for t in ordered_tags:
|
| 829 |
if t in tag_to_first_index:
|
| 830 |
out_idx.append(tag_to_first_index[t])
|
| 831 |
-
|
| 832 |
-
|
| 833 |
-
if diagnostics["attempts_total"] > 0:
|
| 834 |
-
diagnostics["attempt_failure_rate"] = (
|
| 835 |
-
diagnostics["attempt_parse_fail"] + diagnostics["attempt_errors"]
|
| 836 |
-
) / diagnostics["attempts_total"]
|
| 837 |
-
else:
|
| 838 |
-
diagnostics["attempt_failure_rate"] = 0.0
|
| 839 |
-
|
| 840 |
-
if diagnostics["calls_total"] > 0:
|
| 841 |
-
diagnostics["call_exhaustion_rate"] = (
|
| 842 |
-
diagnostics["calls_exhausted_retries"] / diagnostics["calls_total"]
|
| 843 |
-
)
|
| 844 |
-
else:
|
| 845 |
-
diagnostics["call_exhaustion_rate"] = 0.0
|
| 846 |
-
|
| 847 |
-
if return_metadata:
|
| 848 |
-
if return_diagnostics:
|
| 849 |
-
return out_idx, tag_why, diagnostics
|
| 850 |
-
return out_idx, tag_why
|
| 851 |
-
|
| 852 |
-
return out_idx
|
| 853 |
-
|
| 854 |
-
|
| 855 |
-
# ---------------------------------------------------------------------------
|
| 856 |
-
# Stage 3s: Structural tag inference (solo/duo/male/female/anthro/… )
|
| 857 |
-
# ---------------------------------------------------------------------------
|
| 858 |
-
# Group-based approach: tags are organized into semantic groups loaded from
|
| 859 |
-
# tag_groups.json / tag_wiki_defs.json where possible, with curated fallback
|
| 860 |
-
# definitions for tags whose wiki entries are only thumbnail references.
|
| 861 |
-
#
|
| 862 |
-
# Each group specifies a constraint mode:
|
| 863 |
-
# "exclusive" = pick exactly one (e.g. character count)
|
| 864 |
-
# "multi" = pick all that apply (e.g. body type, gender)
|
| 865 |
-
|
| 866 |
-
import json as _json
|
| 867 |
-
|
| 868 |
-
@dataclass
|
| 869 |
-
class StructuralGroup:
|
| 870 |
-
"""One category of structural tags to probe."""
|
| 871 |
-
name: str
|
| 872 |
-
constraint: str # "exclusive" or "multi"
|
| 873 |
-
tags: List[Tuple[str, str]] # (tag, definition) pairs
|
| 874 |
-
|
| 875 |
-
|
| 876 |
-
def _load_structural_groups_from_csv() -> List[StructuralGroup]:
|
| 877 |
-
"""Load structural groups from data/structural_tag_definitions.csv."""
|
| 878 |
-
data_dir = Path(__file__).resolve().parents[2] / "data"
|
| 879 |
-
csv_path = data_dir / "structural_tag_definitions.csv"
|
| 880 |
-
if not csv_path.is_file():
|
| 881 |
-
return []
|
| 882 |
-
|
| 883 |
-
groups_by_name: Dict[str, List[Tuple[str, str]]] = {}
|
| 884 |
-
constraints_by_name: Dict[str, str] = {}
|
| 885 |
-
|
| 886 |
-
with csv_path.open("r", encoding="utf-8", newline="") as f:
|
| 887 |
-
reader = csv.DictReader(f)
|
| 888 |
-
for row in reader:
|
| 889 |
-
enabled = (row.get("enabled") or "1").strip().lower()
|
| 890 |
-
if enabled in {"0", "false", "no"}:
|
| 891 |
-
continue
|
| 892 |
-
|
| 893 |
-
group_name = (row.get("group_name") or "").strip()
|
| 894 |
-
constraint = (row.get("constraint") or "multi").strip().lower()
|
| 895 |
-
tag = (row.get("tag") or "").strip()
|
| 896 |
-
definition = " ".join((row.get("definition") or "").split())
|
| 897 |
-
|
| 898 |
-
if not group_name or not tag or not definition:
|
| 899 |
-
continue
|
| 900 |
-
if constraint not in {"exclusive", "multi"}:
|
| 901 |
-
constraint = "multi"
|
| 902 |
-
|
| 903 |
-
if group_name not in groups_by_name:
|
| 904 |
-
groups_by_name[group_name] = []
|
| 905 |
-
constraints_by_name[group_name] = constraint
|
| 906 |
-
groups_by_name[group_name].append((tag, definition))
|
| 907 |
-
|
| 908 |
-
out: List[StructuralGroup] = []
|
| 909 |
-
for group_name, tags in groups_by_name.items():
|
| 910 |
-
if not tags:
|
| 911 |
-
continue
|
| 912 |
-
out.append(
|
| 913 |
-
StructuralGroup(
|
| 914 |
-
name=group_name,
|
| 915 |
-
constraint=constraints_by_name.get(group_name, "multi"),
|
| 916 |
-
tags=tags,
|
| 917 |
-
)
|
| 918 |
-
)
|
| 919 |
-
return out
|
| 920 |
-
|
| 921 |
-
def _load_structural_groups() -> List[StructuralGroup]:
|
| 922 |
-
"""Build structural groups from local config file with legacy fallback.
|
| 923 |
-
|
| 924 |
-
Preferred source:
|
| 925 |
-
data/structural_tag_definitions.csv
|
| 926 |
-
Fallback:
|
| 927 |
-
tag_wiki_defs.json + curated hardcoded defaults
|
| 928 |
-
"""
|
| 929 |
-
csv_groups = _load_structural_groups_from_csv()
|
| 930 |
-
if csv_groups:
|
| 931 |
-
return csv_groups
|
| 932 |
-
|
| 933 |
-
data_dir = Path(__file__).resolve().parents[2] / "data"
|
| 934 |
-
|
| 935 |
-
# Load wiki definitions (may not exist yet)
|
| 936 |
-
wiki_defs: Dict[str, str] = {}
|
| 937 |
-
wiki_path = data_dir / "tag_wiki_defs.json"
|
| 938 |
-
if wiki_path.is_file():
|
| 939 |
-
with wiki_path.open("r", encoding="utf-8") as f:
|
| 940 |
-
wiki_defs = _json.load(f)
|
| 941 |
-
|
| 942 |
-
def _def(tag: str, fallback: str) -> str:
|
| 943 |
-
"""Get wiki definition if it's real text, otherwise use fallback."""
|
| 944 |
-
d = wiki_defs.get(tag, "")
|
| 945 |
-
# Skip thumbnail-only definitions
|
| 946 |
-
if not d or d.startswith("thumb ") or len(d) < 15:
|
| 947 |
-
return fallback
|
| 948 |
-
return d[:200] # cap length for prompt
|
| 949 |
-
|
| 950 |
-
groups: List[StructuralGroup] = []
|
| 951 |
-
|
| 952 |
-
# ── Group A: Character Count (exclusive) ──
|
| 953 |
-
groups.append(StructuralGroup(
|
| 954 |
-
name="character_count",
|
| 955 |
-
constraint="exclusive",
|
| 956 |
-
tags=[
|
| 957 |
-
("zero_pictured", _def("zero_pictured",
|
| 958 |
-
"No characters or living beings appear in the image")),
|
| 959 |
-
("solo", _def("solo",
|
| 960 |
-
"Exactly one character appears in the image")),
|
| 961 |
-
("duo", _def("duo",
|
| 962 |
-
"Exactly two characters appear in the image")),
|
| 963 |
-
("trio", _def("trio",
|
| 964 |
-
"Exactly three characters appear in the image")),
|
| 965 |
-
("group", _def("group",
|
| 966 |
-
"Four or more characters appear in the image")),
|
| 967 |
-
],
|
| 968 |
-
))
|
| 969 |
-
|
| 970 |
-
# ── Group B: Body Type (multi — per character) ──
|
| 971 |
-
# Key distinction the LLM must learn:
|
| 972 |
-
# anthro = ANIMAL with human body shape (upright, hands)
|
| 973 |
-
# humanoid = HUMAN or near-human (elf, dwarf) with NO animal features
|
| 974 |
-
# feral = normal animal shape, on all fours
|
| 975 |
-
groups.append(StructuralGroup(
|
| 976 |
-
name="body_type",
|
| 977 |
-
constraint="multi",
|
| 978 |
-
tags=[
|
| 979 |
-
("anthro", _def("anthro",
|
| 980 |
-
"An animal character with a human-like body: walks upright on two legs, "
|
| 981 |
-
"has arms and hands. Examples: a wolf-person, a fox standing up. "
|
| 982 |
-
"Still has animal features like fur, tail, muzzle")),
|
| 983 |
-
("feral", _def("feral",
|
| 984 |
-
"A regular animal in its natural body shape. Walks on all fours (or "
|
| 985 |
-
"flies/swims naturally). NOT standing upright, NOT humanized")),
|
| 986 |
-
("humanoid", _def("humanoid",
|
| 987 |
-
"A human or human-like character with NO animal features. Includes "
|
| 988 |
-
"humans, elves, dwarves, and fantasy races that look human. "
|
| 989 |
-
"Does NOT include animal-people — those are anthro")),
|
| 990 |
-
("taur", _def("taur",
|
| 991 |
-
"A centaur-like body: human or anthro upper body attached to a "
|
| 992 |
-
"four-legged animal lower body")),
|
| 993 |
-
],
|
| 994 |
-
))
|
| 995 |
-
|
| 996 |
-
# ── Group C: Gender (multi — per character) ──
|
| 997 |
-
groups.append(StructuralGroup(
|
| 998 |
-
name="gender",
|
| 999 |
-
constraint="multi",
|
| 1000 |
-
tags=[
|
| 1001 |
-
("male", _def("male",
|
| 1002 |
-
"A character described as male, a boy, or with he/him pronouns")),
|
| 1003 |
-
("female", _def("female",
|
| 1004 |
-
"A character described as female, a girl, or with she/her pronouns")),
|
| 1005 |
-
("ambiguous_gender", _def("ambiguous_gender",
|
| 1006 |
-
"A character whose gender is not stated or cannot be determined")),
|
| 1007 |
-
("intersex", _def("intersex",
|
| 1008 |
-
"A character explicitly described as intersex or hermaphrodite")),
|
| 1009 |
-
],
|
| 1010 |
-
))
|
| 1011 |
-
|
| 1012 |
-
# ── Group D: Clothing State (multi) ──
|
| 1013 |
-
groups.append(StructuralGroup(
|
| 1014 |
-
name="clothing_state",
|
| 1015 |
-
constraint="multi",
|
| 1016 |
-
tags=[
|
| 1017 |
-
("clothed", _def("clothed",
|
| 1018 |
-
"Wearing clothes on BOTH chest/torso AND legs/waist. "
|
| 1019 |
-
"Examples: shirt and pants, dress, full outfit")),
|
| 1020 |
-
("nude", _def("nude",
|
| 1021 |
-
"Wearing NO clothes at all. Completely naked, no shirt and no pants")),
|
| 1022 |
-
("topless", _def("topless",
|
| 1023 |
-
"NO shirt/top (bare chest), BUT wearing pants/bottoms. "
|
| 1024 |
-
"Upper body exposed, lower body covered")),
|
| 1025 |
-
("bottomless", _def("bottomless",
|
| 1026 |
-
"Wearing shirt/top on chest, BUT NO pants/bottoms. "
|
| 1027 |
-
"Upper body covered, lower body exposed")),
|
| 1028 |
-
],
|
| 1029 |
-
))
|
| 1030 |
-
|
| 1031 |
-
# ── Group E: Common Visual Elements (multi) ──
|
| 1032 |
-
groups.append(StructuralGroup(
|
| 1033 |
-
name="visual_elements",
|
| 1034 |
-
constraint="multi",
|
| 1035 |
-
tags=[
|
| 1036 |
-
("looking_at_viewer", _def("looking_at_viewer",
|
| 1037 |
-
"A character is looking directly at the camera or viewer")),
|
| 1038 |
-
("text", _def("text",
|
| 1039 |
-
"The image contains visible writing, words, or lettering")),
|
| 1040 |
-
],
|
| 1041 |
-
))
|
| 1042 |
-
|
| 1043 |
-
return groups
|
| 1044 |
-
|
| 1045 |
-
|
| 1046 |
-
def _build_structural_prompt(groups: List[StructuralGroup]) -> Tuple[str, List[Tuple[str, str]]]:
|
| 1047 |
-
"""Build numbered statement list from structural groups.
|
| 1048 |
-
|
| 1049 |
-
Returns (formatted_text, flat_list_of_(tag, definition)_pairs).
|
| 1050 |
-
The flat list maps 1-based statement numbers to tags.
|
| 1051 |
-
"""
|
| 1052 |
-
lines: List[str] = []
|
| 1053 |
-
flat: List[Tuple[str, str]] = []
|
| 1054 |
-
idx = 1
|
| 1055 |
-
|
| 1056 |
-
for g in groups:
|
| 1057 |
-
constraint_label = "pick EXACTLY ONE" if g.constraint == "exclusive" else "pick ALL that apply"
|
| 1058 |
-
group_header = f"--- {g.name.replace('_', ' ').upper()} ({constraint_label}) ---"
|
| 1059 |
-
lines.append(group_header)
|
| 1060 |
-
for tag, defn in g.tags:
|
| 1061 |
-
lines.append(f"{idx}. {defn}")
|
| 1062 |
-
flat.append((tag, defn))
|
| 1063 |
-
idx += 1
|
| 1064 |
-
lines.append("") # blank line between groups
|
| 1065 |
-
|
| 1066 |
-
return "\n".join(lines), flat
|
| 1067 |
-
|
| 1068 |
-
|
| 1069 |
-
STRUCTURAL_SYSTEM_TEMPLATE = """You classify image descriptions by selecting true statements from a numbered list.
|
| 1070 |
-
|
| 1071 |
-
The statements are organized into GROUPS. Each group header tells you how many to pick:
|
| 1072 |
-
- "pick EXACTLY ONE" = choose the single best match in that group
|
| 1073 |
-
- "pick ALL that apply" = choose every statement that is true
|
| 1074 |
-
|
| 1075 |
-
IMPORTANT RULES:
|
| 1076 |
-
1. ONLY select a statement if the description directly says it or makes it very obvious.
|
| 1077 |
-
2. Do NOT guess or assume things the description does not mention.
|
| 1078 |
-
3. For body type: "anthro" means an ANIMAL with a human-shaped body (walks upright, has hands, but still has fur/tail/muzzle). "humanoid" means HUMAN or human-like with NO animal features. A wolf standing on two legs = anthro, NOT humanoid.
|
| 1079 |
-
4. For gender: only select male/female/intersex when there is explicit textual evidence (such as gender words or pronouns). Do not infer gender from species, body shape, clothing, or style. If no reliable gender cue is present, do not select male/female/intersex; use ambiguous_gender instead.
|
| 1080 |
-
5. For clothing state: READ CAREFULLY! "topless" = bare chest, wearing pants. "bottomless" = wearing shirt, no pants. If unsure, re-read the description.
|
| 1081 |
-
6. If clothing is not mentioned, do NOT pick any clothing statement.
|
| 1082 |
-
|
| 1083 |
-
Return JSON ONLY:
|
| 1084 |
-
{{"selections": [{{"i": 1}}, {{"i": 5}}]}}
|
| 1085 |
-
|
| 1086 |
-
EXAMPLE:
|
| 1087 |
-
Description: "A muscular male wolf standing in a forest, wearing jeans, giving a thumbs up"
|
| 1088 |
-
Answer: {{"selections": [{{"i": 2}}, {{"i": 6}}, {{"i": 10}}, {{"i": 14}}]}}
|
| 1089 |
-
Why: One character = solo (2). Wolf standing upright with hands = anthro (6), NOT humanoid because it is a wolf. Male (10). Wearing jeans = clothed (14)."""
|
| 1090 |
-
|
| 1091 |
-
STRUCTURAL_USER_TEMPLATE = """Read this image description and select which statements are true.
|
| 1092 |
-
|
| 1093 |
-
IMAGE DESCRIPTION:
|
| 1094 |
-
{image_description}
|
| 1095 |
-
|
| 1096 |
-
STATEMENTS (pick by number):
|
| 1097 |
-
{statement_lines}"""
|
| 1098 |
-
|
| 1099 |
-
|
| 1100 |
-
class StructuralSelectionItem(BaseModel):
|
| 1101 |
-
i: int = Field(..., description="1-based index into the statement list.")
|
| 1102 |
-
|
| 1103 |
-
|
| 1104 |
-
class StructuralSelectionResponse(BaseModel):
|
| 1105 |
-
selections: List[StructuralSelectionItem] = Field(default_factory=list)
|
| 1106 |
-
|
| 1107 |
-
|
| 1108 |
-
def _build_structural_response_format() -> Dict[str, Any]:
|
| 1109 |
-
schema = {
|
| 1110 |
-
"type": "object",
|
| 1111 |
-
"properties": {
|
| 1112 |
-
"selections": {
|
| 1113 |
-
"type": "array",
|
| 1114 |
-
"items": {
|
| 1115 |
-
"type": "object",
|
| 1116 |
-
"properties": {
|
| 1117 |
-
"i": {"type": "integer"},
|
| 1118 |
-
},
|
| 1119 |
-
"required": ["i"],
|
| 1120 |
-
"additionalProperties": False,
|
| 1121 |
-
},
|
| 1122 |
-
}
|
| 1123 |
-
},
|
| 1124 |
-
"required": ["selections"],
|
| 1125 |
-
"additionalProperties": False,
|
| 1126 |
-
}
|
| 1127 |
-
return {
|
| 1128 |
-
"type": "json_schema",
|
| 1129 |
-
"json_schema": {
|
| 1130 |
-
"name": "structural_selection",
|
| 1131 |
-
"strict": True,
|
| 1132 |
-
"schema": schema,
|
| 1133 |
-
},
|
| 1134 |
-
}
|
| 1135 |
-
|
| 1136 |
-
|
| 1137 |
-
# Cache the loaded groups so we only read JSON files once per process.
|
| 1138 |
-
_cached_structural_groups: Optional[List[StructuralGroup]] = None
|
| 1139 |
-
|
| 1140 |
-
|
| 1141 |
-
def _get_structural_groups() -> List[StructuralGroup]:
|
| 1142 |
-
global _cached_structural_groups
|
| 1143 |
-
if _cached_structural_groups is None:
|
| 1144 |
-
_cached_structural_groups = _load_structural_groups()
|
| 1145 |
-
return _cached_structural_groups
|
| 1146 |
-
|
| 1147 |
-
|
| 1148 |
-
def llm_infer_structural_tags(
|
| 1149 |
-
query_text: str,
|
| 1150 |
-
log=None,
|
| 1151 |
-
*,
|
| 1152 |
-
temperature: float = 0.0,
|
| 1153 |
-
max_tokens: int = 512,
|
| 1154 |
-
retries: int = 2,
|
| 1155 |
-
) -> List[str]:
|
| 1156 |
-
"""Infer structural tags via LLM using group-based statement agreement.
|
| 1157 |
-
|
| 1158 |
-
Probes multiple semantic groups (character count, body type, gender,
|
| 1159 |
-
clothing state, visual elements) with definitions loaded from wiki data
|
| 1160 |
-
where available.
|
| 1161 |
-
|
| 1162 |
-
Returns a list of e621 tag strings (e.g. ["solo", "anthro", "male", "clothed"]).
|
| 1163 |
-
"""
|
| 1164 |
-
if log:
|
| 1165 |
-
log("Stage3s (structural): inferring structural tags via group-based statement agreement")
|
| 1166 |
-
|
| 1167 |
-
groups = _get_structural_groups()
|
| 1168 |
-
statement_lines, flat_tags = _build_structural_prompt(groups)
|
| 1169 |
-
N = len(flat_tags)
|
| 1170 |
-
|
| 1171 |
-
response_format = _build_structural_response_format()
|
| 1172 |
-
llm = _get_llm(temperature=temperature, max_tokens=max_tokens,
|
| 1173 |
-
response_format=response_format)
|
| 1174 |
-
model_name = os.getenv("OPENROUTER_MODEL", "meta-llama/llama-3.1-8b-instruct")
|
| 1175 |
-
|
| 1176 |
-
parser = PydanticOutputParser(pydantic_object=StructuralSelectionResponse)
|
| 1177 |
-
|
| 1178 |
-
prompt = ChatPromptTemplate.from_messages(
|
| 1179 |
-
[
|
| 1180 |
-
("system", STRUCTURAL_SYSTEM_TEMPLATE),
|
| 1181 |
-
("human", STRUCTURAL_USER_TEMPLATE),
|
| 1182 |
-
],
|
| 1183 |
-
template_format="f-string",
|
| 1184 |
-
)
|
| 1185 |
-
chain = prompt | llm | parser
|
| 1186 |
-
|
| 1187 |
-
if log:
|
| 1188 |
-
group_summary = ", ".join(f"{g.name}({len(g.tags)})" for g in groups)
|
| 1189 |
-
log(f"Stage3s: model={model_name} groups=[{group_summary}] total_statements={N}")
|
| 1190 |
-
|
| 1191 |
-
for att in range(retries + 1):
|
| 1192 |
-
try:
|
| 1193 |
-
parsed = chain.invoke({
|
| 1194 |
-
"N": N,
|
| 1195 |
-
"image_description": query_text,
|
| 1196 |
-
"statement_lines": statement_lines,
|
| 1197 |
-
})
|
| 1198 |
-
|
| 1199 |
-
if isinstance(parsed, BaseModel):
|
| 1200 |
-
parsed = parsed.model_dump() if hasattr(parsed, "model_dump") else parsed.dict()
|
| 1201 |
-
|
| 1202 |
-
sels = parsed.get("selections", []) if isinstance(parsed, dict) else []
|
| 1203 |
-
chosen_tags: List[str] = []
|
| 1204 |
-
seen: Set[str] = set()
|
| 1205 |
-
for item in sels:
|
| 1206 |
-
idx = item.get("i") if isinstance(item, dict) else None
|
| 1207 |
-
if not isinstance(idx, int) or idx < 1 or idx > N:
|
| 1208 |
-
continue
|
| 1209 |
-
tag = flat_tags[idx - 1][0]
|
| 1210 |
-
if tag not in seen:
|
| 1211 |
-
chosen_tags.append(tag)
|
| 1212 |
-
seen.add(tag)
|
| 1213 |
-
|
| 1214 |
-
if log:
|
| 1215 |
-
tag_str = ", ".join(chosen_tags) if chosen_tags else "(none)"
|
| 1216 |
-
log(f"Stage3s: attempt {att+1} selected {len(chosen_tags)} tags: {tag_str}")
|
| 1217 |
-
|
| 1218 |
-
return chosen_tags
|
| 1219 |
-
|
| 1220 |
-
except Exception as e:
|
| 1221 |
-
if log:
|
| 1222 |
-
log(f"Stage3s: attempt {att+1} error: {e}")
|
| 1223 |
-
|
| 1224 |
-
if log:
|
| 1225 |
-
log(f"Stage3s: gave up after {retries+1} attempts")
|
| 1226 |
-
return []
|
| 1227 |
-
|
| 1228 |
-
|
| 1229 |
-
# ---------------------------------------------------------------------------
|
| 1230 |
-
# Stage 3p: Simplified high-precision probe tags
|
| 1231 |
-
# ---------------------------------------------------------------------------
|
| 1232 |
-
_cached_runtime_probe_tags: Optional[List[str]] = None
|
| 1233 |
-
_cached_runtime_probe_rows: Optional[List[Dict[str, str]]] = None
|
| 1234 |
-
_cached_runtime_probe_wiki_defs: Optional[Dict[str, str]] = None
|
| 1235 |
-
|
| 1236 |
-
_PROBE_GLOSSARY_FALLBACKS: Dict[str, str] = {
|
| 1237 |
-
"anthro": "Animal character with human-like body shape, usually upright with arms and hands.",
|
| 1238 |
-
"canid": "Member of dog-family species (wolves, foxes, dogs, coyotes).",
|
| 1239 |
-
"felid": "Member of cat-family species (cats, lions, tigers, leopards).",
|
| 1240 |
-
"solo": "Exactly one character is present in the image.",
|
| 1241 |
-
"duo": "Exactly two characters are present in the image.",
|
| 1242 |
-
"group": "Four or more characters are present in the image.",
|
| 1243 |
-
"<3": "Visible heart symbol in text or icon form.",
|
| 1244 |
-
}
|
| 1245 |
-
|
| 1246 |
-
|
| 1247 |
-
def _load_runtime_probe_rows(log=None) -> List[Dict[str, str]]:
|
| 1248 |
-
global _cached_runtime_probe_rows
|
| 1249 |
-
if _cached_runtime_probe_rows is not None:
|
| 1250 |
-
return _cached_runtime_probe_rows
|
| 1251 |
-
|
| 1252 |
-
csv_path = Path(__file__).resolve().parents[2] / "data" / "analysis" / "simplified_probe_tags.csv"
|
| 1253 |
-
rows: List[Dict[str, str]] = []
|
| 1254 |
-
if not csv_path.is_file():
|
| 1255 |
-
if log:
|
| 1256 |
-
log(f"Stage3p: probe CSV not found at {csv_path}; skipping probe step")
|
| 1257 |
-
_cached_runtime_probe_rows = rows
|
| 1258 |
-
return rows
|
| 1259 |
-
|
| 1260 |
-
try:
|
| 1261 |
-
with csv_path.open("r", encoding="utf-8", newline="") as f:
|
| 1262 |
-
rows = list(csv.DictReader(f))
|
| 1263 |
-
except Exception as e:
|
| 1264 |
-
if log:
|
| 1265 |
-
log(f"Stage3p: failed reading probe CSV: {e}")
|
| 1266 |
-
rows = []
|
| 1267 |
-
|
| 1268 |
-
_cached_runtime_probe_rows = rows
|
| 1269 |
-
return rows
|
| 1270 |
-
|
| 1271 |
-
|
| 1272 |
-
def _load_runtime_probe_wiki_defs() -> Dict[str, str]:
|
| 1273 |
-
global _cached_runtime_probe_wiki_defs
|
| 1274 |
-
if _cached_runtime_probe_wiki_defs is not None:
|
| 1275 |
-
return _cached_runtime_probe_wiki_defs
|
| 1276 |
-
|
| 1277 |
-
data_dir = Path(__file__).resolve().parents[2] / "data"
|
| 1278 |
-
wiki_path = data_dir / "tag_wiki_defs.json"
|
| 1279 |
-
defs: Dict[str, str] = {}
|
| 1280 |
-
if wiki_path.is_file():
|
| 1281 |
-
try:
|
| 1282 |
-
with wiki_path.open("r", encoding="utf-8") as f:
|
| 1283 |
-
defs = _json.load(f)
|
| 1284 |
-
except Exception:
|
| 1285 |
-
defs = {}
|
| 1286 |
-
_cached_runtime_probe_wiki_defs = defs
|
| 1287 |
-
return defs
|
| 1288 |
-
|
| 1289 |
-
|
| 1290 |
-
def _load_runtime_probe_tags(log=None) -> List[str]:
|
| 1291 |
-
"""Load runtime probe tags from analysis output.
|
| 1292 |
-
|
| 1293 |
-
Preference order:
|
| 1294 |
-
1) selected_final=1 (reliability-gated list)
|
| 1295 |
-
2) selected_initial=1 (fallback if reliability file not built)
|
| 1296 |
-
"""
|
| 1297 |
-
global _cached_runtime_probe_tags
|
| 1298 |
-
if _cached_runtime_probe_tags is not None:
|
| 1299 |
-
return _cached_runtime_probe_tags
|
| 1300 |
-
|
| 1301 |
-
rows = _load_runtime_probe_rows(log=log)
|
| 1302 |
-
tags: List[str] = []
|
| 1303 |
-
|
| 1304 |
-
def _is_on(v: str) -> bool:
|
| 1305 |
-
return (v or "").strip() in {"1", "true", "True"}
|
| 1306 |
-
|
| 1307 |
-
final = [r.get("tag", "").strip() for r in rows if _is_on(r.get("selected_final", ""))]
|
| 1308 |
-
initial = [r.get("tag", "").strip() for r in rows if _is_on(r.get("selected_initial", ""))]
|
| 1309 |
-
tags = [t for t in (final if final else initial) if t]
|
| 1310 |
-
|
| 1311 |
-
_cached_runtime_probe_tags = tags
|
| 1312 |
-
if log and tags:
|
| 1313 |
-
log(f"Stage3p: loaded {len(tags)} probe tags")
|
| 1314 |
-
return tags
|
| 1315 |
-
|
| 1316 |
-
|
| 1317 |
-
def _is_real_wiki_def(text: str) -> bool:
|
| 1318 |
-
t = (text or "").strip()
|
| 1319 |
-
if not t:
|
| 1320 |
-
return False
|
| 1321 |
-
if t.lower().startswith("thumb "):
|
| 1322 |
-
return False
|
| 1323 |
-
return len(t) >= 20
|
| 1324 |
-
|
| 1325 |
-
|
| 1326 |
-
def _clean_glossary_text(text: str) -> str:
|
| 1327 |
-
t = " ".join((text or "").replace("\n", " ").replace("\r", " ").split())
|
| 1328 |
-
if len(t) > 160:
|
| 1329 |
-
t = t[:157].rstrip() + "..."
|
| 1330 |
-
return t
|
| 1331 |
-
|
| 1332 |
-
|
| 1333 |
-
def _build_probe_candidate_display(probe_tags: Sequence[str], log=None) -> Dict[str, str]:
|
| 1334 |
-
rows = _load_runtime_probe_rows(log=log)
|
| 1335 |
-
rows_by_tag = {r.get("tag", "").strip(): r for r in rows}
|
| 1336 |
-
wiki_defs = _load_runtime_probe_wiki_defs()
|
| 1337 |
-
|
| 1338 |
-
display: Dict[str, str] = {}
|
| 1339 |
-
for tag in probe_tags:
|
| 1340 |
-
base = _display_tag(tag)
|
| 1341 |
-
row = rows_by_tag.get(tag, {})
|
| 1342 |
-
needs_glossary = (row.get("needs_glossary", "") or "").strip() in {"1", "true", "True"}
|
| 1343 |
-
if not needs_glossary:
|
| 1344 |
-
display[tag] = base
|
| 1345 |
-
continue
|
| 1346 |
-
|
| 1347 |
-
raw_def = wiki_defs.get(tag, "")
|
| 1348 |
-
if not _is_real_wiki_def(raw_def):
|
| 1349 |
-
raw_def = _PROBE_GLOSSARY_FALLBACKS.get(tag, "")
|
| 1350 |
-
gloss = _clean_glossary_text(raw_def)
|
| 1351 |
-
display[tag] = f"{base} - {gloss}" if gloss else base
|
| 1352 |
-
|
| 1353 |
-
return display
|
| 1354 |
-
|
| 1355 |
-
|
| 1356 |
def llm_infer_probe_tags(
|
| 1357 |
-
query_text: str,
|
| 1358 |
-
log=None,
|
| 1359 |
-
*,
|
| 1360 |
-
temperature: float = 0.0,
|
| 1361 |
-
max_tokens: int = 512,
|
| 1362 |
-
retries: int = 2,
|
| 1363 |
-
min_why: Optional[str] =
|
| 1364 |
) -> List[str]:
|
| 1365 |
-
"""Infer
|
| 1366 |
-
probe_tags = _load_runtime_probe_tags(log=log)
|
| 1367 |
-
if not probe_tags:
|
| 1368 |
-
return []
|
| 1369 |
-
|
| 1370 |
if log:
|
| 1371 |
-
log(f"Stage3p: probing {len(probe_tags)} tags
|
| 1372 |
-
candidate_display = _build_probe_candidate_display(probe_tags, log=log)
|
| 1373 |
-
|
| 1374 |
-
out = llm_select_indices(
|
| 1375 |
-
query_text=query_text,
|
| 1376 |
-
candidates=probe_tags,
|
| 1377 |
-
max_pick=len(probe_tags),
|
| 1378 |
-
log=log,
|
| 1379 |
-
retries=retries,
|
| 1380 |
-
mode="single_shot",
|
| 1381 |
-
chunk_size=max(1, len(probe_tags)),
|
| 1382 |
-
per_phrase_k=max(1, len(probe_tags)),
|
| 1383 |
-
temperature=temperature,
|
| 1384 |
-
max_tokens=max_tokens,
|
| 1385 |
-
return_metadata=False,
|
| 1386 |
-
return_diagnostics=False,
|
| 1387 |
-
min_why=
|
| 1388 |
candidate_display=candidate_display,
|
| 1389 |
)
|
| 1390 |
-
|
| 1391 |
-
selected: List[str] = []
|
| 1392 |
-
for i in out:
|
| 1393 |
-
if 0 <= i < len(probe_tags):
|
| 1394 |
-
selected.append(probe_tags[i])
|
| 1395 |
-
|
| 1396 |
-
if log:
|
| 1397 |
-
shown = ", ".join(selected) if selected else "(none)"
|
| 1398 |
-
log(f"Stage3p: selected {len(selected)} probe tags: {shown}")
|
| 1399 |
-
return selected
|
|
|
|
| 1 |
+
# psq_rag/llm/select.py
|
| 2 |
+
# Stage 3: Closed-Set Selection (LangChain-only implementation)
|
| 3 |
+
#
|
| 4 |
+
# This module intentionally uses LangChain for:
|
| 5 |
+
# - prompt templating (including {N})
|
| 6 |
+
# - LLM call orchestration
|
| 7 |
+
# - JSON parsing
|
| 8 |
+
#
|
| 9 |
+
# There is NO fallback path. If LangChain dependencies are missing, this module
|
| 10 |
+
# should fail loudly so you install them.
|
| 11 |
+
|
| 12 |
+
import os
|
| 13 |
+
import re
|
| 14 |
+
import csv
|
| 15 |
from dataclasses import dataclass
|
| 16 |
from pathlib import Path
|
| 17 |
+
from typing import Any, Dict, List, Optional, Sequence, Set, Tuple, Union, cast, Mapping
|
| 18 |
+
|
| 19 |
+
from langchain_openai import ChatOpenAI
|
| 20 |
+
from langchain_core.prompts import ChatPromptTemplate
|
| 21 |
+
from langchain_core.output_parsers import PydanticOutputParser
|
| 22 |
+
from pydantic import BaseModel, Field, SecretStr
|
| 23 |
+
from rapidfuzz import fuzz
|
| 24 |
+
|
| 25 |
+
from psq_rag.retrieval.psq_retrieval import Candidate # Candidate(tag, score_*, count, sources)
|
| 26 |
+
from psq_rag.retrieval.state import get_tag_type_name, get_tag2aliases
|
| 27 |
+
|
| 28 |
+
# Character-typed tags that are generic categories, not actual named characters.
|
| 29 |
+
# These leak through the alias filter because they match common words in captions.
|
| 30 |
+
# They are excluded from the entity pipeline and instead routed to general selection.
|
| 31 |
+
_GENERIC_CHARACTER_TAGS = frozenset({
|
| 32 |
+
"fan_character",
|
| 33 |
+
"background_character",
|
| 34 |
+
"unnamed_character",
|
| 35 |
+
"unknown_character",
|
| 36 |
+
"anonymous_character",
|
| 37 |
+
"viewer",
|
| 38 |
+
"original_character",
|
| 39 |
+
})
|
| 40 |
+
|
| 41 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
# IMPORTANT ABOUT TEMPLATING:
|
| 43 |
# - This string is rendered by LangChain's f-string template engine.
|
| 44 |
# - Literal JSON braces must be escaped as {{ and }}.
|
| 45 |
# - {N} is a real template variable and MUST be provided.
|
| 46 |
SELECT_SYSTEM_TEMPLATE = """You are given a description of an image and a list of imageboard tags.
|
| 47 |
|
| 48 |
+
Select tags ONLY when they are explicitly stated in the image description text.
|
| 49 |
+
Do NOT select tags based on implication, plausibility, style assumptions, or world knowledge.
|
| 50 |
+
If a tag is not directly supported by explicit wording in the description, do not select it.
|
| 51 |
|
| 52 |
Return JSON ONLY matching this schema:
|
| 53 |
|
| 54 |
{{
|
| 55 |
\"selections\": [
|
| 56 |
+
{{\"i\": <int>}},
|
| 57 |
...
|
| 58 |
]
|
| 59 |
}}
|
|
|
|
| 61 |
Rules:
|
| 62 |
- Choose ONLY from indices 1..{N}.
|
| 63 |
- Do NOT output tag text.
|
| 64 |
+
- Do NOT output any keys other than \"selections\", and inside each item only the item index \"i\".
|
| 65 |
- Do select both a general tag and a more specific tag when both apply (for example, \"shirt\" and \"grey shirt\").
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
"""
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def _get_select_system_template() -> str:
|
| 70 |
+
"""Return Stage 3 selection prompt text."""
|
| 71 |
+
return SELECT_SYSTEM_TEMPLATE
|
| 72 |
+
|
| 73 |
+
|
| 74 |
ENTITY_SYSTEM_TEMPLATE = """You are given a description of an image and a list of CHARACTER tags.
|
| 75 |
+
|
| 76 |
+
These character tags have already been pre-filtered to only include characters whose names
|
| 77 |
+
(or known aliases) appear in the image description. Your job is to confirm which of these
|
| 78 |
+
pre-filtered candidates are the correct match for the character mentioned by the user.
|
| 79 |
+
|
| 80 |
Return JSON ONLY matching this schema:
|
| 81 |
|
| 82 |
{{
|
| 83 |
\"selections\": [
|
| 84 |
+
{{\"i\": <int>}},
|
| 85 |
...
|
| 86 |
]
|
| 87 |
}}
|
|
|
|
| 89 |
Rules for character selection:
|
| 90 |
- Choose ONLY from indices 1..{N}.
|
| 91 |
- Do NOT output tag text.
|
|
|
|
| 92 |
- Select the tag that best represents the character as described.
|
| 93 |
- If the user described a specific variant (e.g. \"pikachu libre\", \"detective pikachu\"),
|
| 94 |
select that specific variant tag.
|
| 95 |
+
- If the user described only the base character (e.g. just \"pikachu\"), select only the
|
| 96 |
+
base/default tag, NOT costume or variant tags.
|
| 97 |
+
- When uncertain between variants, prefer the simplest/most general tag.
|
| 98 |
+
"""
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
USER_TEMPLATE = """IMAGE DESCRIPTION:
|
| 102 |
+
{image_description}
|
| 103 |
+
|
| 104 |
+
CANDIDATES (choose by index only):
|
| 105 |
+
{candidate_lines}
|
| 106 |
+
|
| 107 |
+
Select up to {per_call_budget} indices. Output fewer if uncertain.
|
| 108 |
+
"""
|
| 109 |
+
|
| 110 |
+
|
| 111 |
@dataclass(frozen=True)
|
| 112 |
class Selected:
|
| 113 |
i: int
|
| 114 |
tag: str # canonical tag (underscore form)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
|
| 116 |
|
| 117 |
class Stage3SelectionItem(BaseModel):
|
| 118 |
i: int = Field(..., description="1-based index into the candidate list.")
|
|
|
|
| 119 |
|
| 120 |
|
| 121 |
class Stage3SelectionResponse(BaseModel):
|
| 122 |
+
selections: List[Stage3SelectionItem] = Field(default_factory=list)
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
def _build_response_format() -> Dict[str, Any]:
|
| 126 |
+
# Strict JSON Schema structured output.
|
| 127 |
+
schema = {
|
| 128 |
+
"type": "object",
|
| 129 |
+
"properties": {
|
| 130 |
+
"selections": {
|
| 131 |
+
"type": "array",
|
| 132 |
"items": {
|
| 133 |
"type": "object",
|
| 134 |
"properties": {
|
| 135 |
"i": {"type": "integer"},
|
|
|
|
| 136 |
},
|
| 137 |
+
"required": ["i"],
|
| 138 |
"additionalProperties": False,
|
| 139 |
},
|
| 140 |
}
|
| 141 |
+
},
|
| 142 |
+
"required": ["selections"],
|
| 143 |
+
"additionalProperties": False,
|
| 144 |
+
}
|
| 145 |
+
|
| 146 |
+
return {
|
| 147 |
+
"type": "json_schema",
|
| 148 |
+
"json_schema": {
|
| 149 |
+
"name": "stage3_selection",
|
| 150 |
+
"strict": True,
|
| 151 |
+
"schema": schema,
|
| 152 |
+
},
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
def _get_llm(*, temperature: float, max_tokens: int, response_format: Dict[str, Any]) -> ChatOpenAI:
|
| 157 |
+
api_key = os.getenv("OPENROUTER_API_KEY")
|
| 158 |
+
if not api_key:
|
| 159 |
+
raise RuntimeError(
|
| 160 |
+
"OPENROUTER_API_KEY is not set.\n"
|
| 161 |
+
"Set it in your environment before running Stage 3."
|
| 162 |
+
)
|
| 163 |
+
api_key = SecretStr(cast(str, api_key))
|
| 164 |
+
|
| 165 |
+
model = os.getenv("OPENROUTER_MODEL", "meta-llama/llama-3.1-8b-instruct")
|
| 166 |
+
headers: Dict[str, str] = {}
|
| 167 |
+
if referer := os.getenv("OPENROUTER_HTTP_REFERER"):
|
| 168 |
+
headers["HTTP-Referer"] = referer
|
| 169 |
+
if title := os.getenv("OPENROUTER_X_TITLE"):
|
| 170 |
+
headers["X-Title"] = title
|
| 171 |
+
|
| 172 |
+
# OpenRouter OpenAI-compatible endpoint.
|
| 173 |
+
return ChatOpenAI(
|
| 174 |
+
model=model,
|
| 175 |
+
base_url="https://openrouter.ai/api/v1",
|
| 176 |
+
api_key=api_key,
|
| 177 |
+
temperature=temperature,
|
| 178 |
+
max_completion_tokens=max_tokens,
|
| 179 |
+
default_headers=headers,
|
| 180 |
+
# Provider-specific request body fields (OpenAI-compatible).
|
| 181 |
+
# Response Healing plugin reduces malformed-JSON failures (syntax only).
|
| 182 |
+
extra_body={
|
| 183 |
+
"response_format": response_format,
|
| 184 |
+
"plugins": [{"id": "response-healing"}],
|
| 185 |
+
},
|
| 186 |
+
)
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
def _phrase_key_for_candidate(c: Candidate) -> str:
|
| 190 |
+
# Deterministic "primary phrase" for grouping.
|
| 191 |
+
if c.sources:
|
| 192 |
+
return sorted(c.sources)[0]
|
| 193 |
+
return ""
|
| 194 |
+
|
| 195 |
+
|
| 196 |
+
def _interleave_round_robin(cands: Sequence[Candidate]) -> List[Candidate]:
|
| 197 |
+
"""Round-robin interleave by primary source phrase.
|
| 198 |
+
|
| 199 |
+
NOTE: counts are used only for ordering; they are NOT shown to the LLM.
|
| 200 |
+
"""
|
| 201 |
+
groups: Dict[str, List[Candidate]] = {}
|
| 202 |
+
for c in cands:
|
| 203 |
+
k = _phrase_key_for_candidate(c)
|
| 204 |
+
groups.setdefault(k, []).append(c)
|
| 205 |
+
|
| 206 |
+
for k in groups:
|
| 207 |
+
groups[k].sort(key=lambda x: (x.score_combined, (x.count or -1)), reverse=True)
|
| 208 |
+
|
| 209 |
+
keys = sorted(groups.keys())
|
| 210 |
+
|
| 211 |
+
out: List[Candidate] = []
|
| 212 |
+
idx = 0
|
| 213 |
+
while True:
|
| 214 |
+
progressed = False
|
| 215 |
+
for k in keys:
|
| 216 |
+
if idx < len(groups[k]):
|
| 217 |
+
out.append(groups[k][idx])
|
| 218 |
+
progressed = True
|
| 219 |
+
if not progressed:
|
| 220 |
+
break
|
| 221 |
+
idx += 1
|
| 222 |
+
|
| 223 |
+
return out
|
| 224 |
+
|
| 225 |
+
|
| 226 |
+
def _build_chunks(cands: Sequence[Candidate], chunk_size: int) -> List[List[Candidate]]:
|
| 227 |
+
if chunk_size <= 0:
|
| 228 |
+
raise ValueError(f"chunk_size must be > 0, got {chunk_size}")
|
| 229 |
+
ordered = _interleave_round_robin(cands)
|
| 230 |
+
return [ordered[i:i + chunk_size] for i in range(0, len(ordered), chunk_size)]
|
| 231 |
+
|
| 232 |
+
|
| 233 |
+
def _display_tag(tag: str) -> str:
|
| 234 |
+
# Display tags with spaces for the LLM, but keep canonical underscores internally.
|
| 235 |
+
return tag.replace("_", " ")
|
| 236 |
+
|
| 237 |
+
|
| 238 |
+
def _format_candidates_local(
|
| 239 |
+
cands: Sequence[Candidate],
|
| 240 |
+
candidate_display: Optional[Mapping[str, str]] = None,
|
| 241 |
+
) -> Tuple[str, Dict[int, str], Dict[int, Candidate]]:
|
| 242 |
+
lines: List[str] = []
|
| 243 |
+
idx_to_tag: Dict[int, str] = {}
|
| 244 |
+
idx_to_candidate: Dict[int, Candidate] = {}
|
| 245 |
+
for j, c in enumerate(cands, start=1):
|
| 246 |
+
idx_to_tag[j] = c.tag
|
| 247 |
+
idx_to_candidate[j] = c
|
| 248 |
+
display = candidate_display.get(c.tag) if candidate_display else None
|
| 249 |
+
if not display:
|
| 250 |
+
display = _display_tag(c.tag)
|
| 251 |
+
lines.append(f"{j}. {display}")
|
| 252 |
+
return "\n".join(lines), idx_to_tag, idx_to_candidate
|
| 253 |
+
|
| 254 |
+
|
| 255 |
+
def _phrases_in_call(cands: Sequence[Candidate]) -> int:
|
| 256 |
+
s = set()
|
| 257 |
+
for c in cands:
|
| 258 |
+
for src in c.sources:
|
| 259 |
+
s.add(src)
|
| 260 |
+
return len(s)
|
| 261 |
+
|
| 262 |
+
|
| 263 |
def _parse_validate_map(
|
| 264 |
parsed: Any,
|
| 265 |
idx_to_tag: Dict[int, str],
|
| 266 |
per_call_budget: int,
|
| 267 |
+
) -> Tuple[List[Selected], Dict[str, Any]]:
|
| 268 |
+
diag = {
|
| 269 |
+
"parse_ok": isinstance(parsed, dict),
|
| 270 |
+
"invalid_items": 0,
|
| 271 |
+
"oob_indices": 0,
|
| 272 |
+
"dupe_indices": 0,
|
| 273 |
+
"kept": 0,
|
| 274 |
+
}
|
| 275 |
+
|
| 276 |
+
if isinstance(parsed, BaseModel):
|
| 277 |
+
parsed = parsed.model_dump() if hasattr(parsed, "model_dump") else parsed.dict()
|
| 278 |
+
diag["parse_ok"] = isinstance(parsed, dict)
|
| 279 |
+
|
| 280 |
+
if not isinstance(parsed, dict):
|
| 281 |
+
return [], diag
|
| 282 |
+
|
| 283 |
+
selections = parsed.get("selections", [])
|
| 284 |
+
if not isinstance(selections, list):
|
| 285 |
+
diag["parse_ok"] = False
|
| 286 |
+
return [], diag
|
| 287 |
+
|
| 288 |
+
out: List[Selected] = []
|
| 289 |
+
seen_i = set()
|
| 290 |
+
|
| 291 |
for item in selections:
|
| 292 |
if len(out) >= per_call_budget:
|
| 293 |
break
|
|
|
|
| 296 |
continue
|
| 297 |
|
| 298 |
i = item.get("i")
|
|
|
|
| 299 |
|
| 300 |
if isinstance(i, bool) or not isinstance(i, int):
|
| 301 |
diag["invalid_items"] += 1
|
|
|
|
| 306 |
if i not in idx_to_tag:
|
| 307 |
diag["oob_indices"] += 1
|
| 308 |
continue
|
| 309 |
+
extra_keys = set(item.keys()) - {"i"}
|
| 310 |
+
if extra_keys:
|
| 311 |
diag["invalid_items"] += 1
|
| 312 |
continue
|
| 313 |
seen_i.add(i)
|
| 314 |
tag = idx_to_tag[i]
|
| 315 |
+
out.append(Selected(i=i, tag=tag))
|
| 316 |
+
|
| 317 |
+
diag["kept"] = len(out)
|
| 318 |
+
return out, diag
|
| 319 |
+
|
| 320 |
+
|
| 321 |
+
def _split_candidates_by_type(
|
| 322 |
+
candidates: List[Candidate],
|
| 323 |
+
log,
|
| 324 |
+
) -> Tuple[List[Tuple[int, Candidate]], List[Tuple[int, Candidate]]]:
|
| 325 |
+
"""Split candidates into general vs entity (character only) lists.
|
| 326 |
+
|
| 327 |
+
Returns:
|
| 328 |
+
(general_list, entity_list) where each item is (original_index, candidate)
|
| 329 |
+
|
| 330 |
+
Tag types:
|
| 331 |
+
- General: 0 (general), 1 (artist), 5 (species), 7 (meta)
|
| 332 |
+
- Entity: 4 (character) only
|
| 333 |
+
- Filtered: 3 (copyright) - too broad for image generation
|
| 334 |
+
"""
|
| 335 |
+
general_with_idx: List[Tuple[int, Candidate]] = []
|
| 336 |
+
entity_with_idx: List[Tuple[int, Candidate]] = []
|
| 337 |
+
|
| 338 |
+
unknown_count = 0
|
| 339 |
+
copyright_count = 0
|
| 340 |
+
|
| 341 |
+
generic_char_count = 0
|
| 342 |
+
|
| 343 |
+
for idx, cand in enumerate(candidates):
|
| 344 |
+
type_name = get_tag_type_name(cand.tag)
|
| 345 |
+
|
| 346 |
+
if type_name == "character":
|
| 347 |
+
if cand.tag in _GENERIC_CHARACTER_TAGS:
|
| 348 |
+
# Route generic character-category tags to general selection
|
| 349 |
+
general_with_idx.append((idx, cand))
|
| 350 |
+
generic_char_count += 1
|
| 351 |
+
else:
|
| 352 |
+
entity_with_idx.append((idx, cand))
|
| 353 |
+
elif type_name == "copyright":
|
| 354 |
+
# Filter out copyright/series tags - too broad for image generation
|
| 355 |
+
copyright_count += 1
|
| 356 |
+
elif type_name in ("general", "artist", "species", "meta"):
|
| 357 |
+
general_with_idx.append((idx, cand))
|
| 358 |
+
else:
|
| 359 |
+
# Unknown or None - treat as general by default
|
| 360 |
+
general_with_idx.append((idx, cand))
|
| 361 |
+
unknown_count += 1
|
| 362 |
+
|
| 363 |
+
if log:
|
| 364 |
+
log(
|
| 365 |
+
f"Stage3 split: "
|
| 366 |
+
f"general={len(general_with_idx)} "
|
| 367 |
+
f"entity={len(entity_with_idx)} "
|
| 368 |
+
f"copyright_filtered={copyright_count} "
|
| 369 |
+
f"generic_char_to_general={generic_char_count} "
|
| 370 |
+
f"unknown_type={unknown_count}"
|
| 371 |
+
)
|
| 372 |
+
|
| 373 |
+
return general_with_idx, entity_with_idx
|
| 374 |
+
|
| 375 |
+
|
| 376 |
+
# Regex to strip series/franchise suffixes from aliases, e.g. _(sonic), _(mlp), _(character)
|
| 377 |
+
_SERIES_SUFFIX_RE = re.compile(r"_\([^)]+\)$")
|
| 378 |
+
|
| 379 |
+
|
| 380 |
+
def _normalize_for_matching(text: str) -> str:
|
| 381 |
+
"""Lowercase, replace underscores with spaces, strip series suffixes."""
|
| 382 |
+
text = text.lower().strip()
|
| 383 |
+
text = _SERIES_SUFFIX_RE.sub("", text)
|
| 384 |
+
text = text.replace("_", " ")
|
| 385 |
+
return text
|
| 386 |
+
|
| 387 |
+
|
| 388 |
+
def _query_words(query: str) -> Set[str]:
|
| 389 |
+
"""Extract individual words from the user query for matching."""
|
| 390 |
+
return set(_normalize_for_matching(query).split())
|
| 391 |
+
|
| 392 |
+
|
| 393 |
+
def _alias_matches_query(alias_norm: str, query_words: Set[str], query_norm: str,
|
| 394 |
+
fuzzy_threshold: int = 85) -> bool:
|
| 395 |
+
"""Check if an alias matches the user query.
|
| 396 |
+
|
| 397 |
+
Matching logic:
|
| 398 |
+
1. Exact substring: alias appears as a substring of the query
|
| 399 |
+
2. Word subset: all words in the alias appear in the query words
|
| 400 |
+
3. Fuzzy: alias is close to a word in the query (handles typos)
|
| 401 |
+
"""
|
| 402 |
+
# Exact substring match
|
| 403 |
+
if alias_norm in query_norm:
|
| 404 |
+
return True
|
| 405 |
+
|
| 406 |
+
alias_words = alias_norm.split()
|
| 407 |
+
if not alias_words:
|
| 408 |
+
return False
|
| 409 |
+
|
| 410 |
+
# Word subset match: all alias words must appear in query
|
| 411 |
+
if all(w in query_words for w in alias_words):
|
| 412 |
+
return True
|
| 413 |
+
|
| 414 |
+
# For single-word aliases, try fuzzy matching against each query word
|
| 415 |
+
if len(alias_words) == 1:
|
| 416 |
+
for qw in query_words:
|
| 417 |
+
if fuzz.ratio(alias_words[0], qw) >= fuzzy_threshold:
|
| 418 |
+
return True
|
| 419 |
+
|
| 420 |
+
# For multi-word aliases, try fuzzy partial ratio against whole query
|
| 421 |
+
if len(alias_words) > 1:
|
| 422 |
+
if fuzz.partial_ratio(alias_norm, query_norm) >= fuzzy_threshold:
|
| 423 |
+
return True
|
| 424 |
+
|
| 425 |
+
return False
|
| 426 |
+
|
| 427 |
+
|
| 428 |
+
def _character_matches_via_aliases(
|
| 429 |
+
tag: str,
|
| 430 |
+
query: str,
|
| 431 |
+
tag2aliases: Dict[str, List[str]],
|
| 432 |
+
query_words: Set[str],
|
| 433 |
+
query_norm: str,
|
| 434 |
+
fuzzy_threshold: int = 85,
|
| 435 |
+
) -> bool:
|
| 436 |
+
"""Check if a character tag matches the user query via its aliases.
|
| 437 |
+
|
| 438 |
+
For a character tag to match:
|
| 439 |
+
- The tag name itself (normalized) must match, OR
|
| 440 |
+
- At least one of its registered aliases must match.
|
| 441 |
+
|
| 442 |
+
Empty aliases list means no known aliases; still check the tag name itself.
|
| 443 |
+
"""
|
| 444 |
+
# Check the tag name itself
|
| 445 |
+
tag_norm = _normalize_for_matching(tag)
|
| 446 |
+
if _alias_matches_query(tag_norm, query_words, query_norm, fuzzy_threshold):
|
| 447 |
+
return True
|
| 448 |
+
|
| 449 |
+
# Check all registered aliases
|
| 450 |
+
aliases = tag2aliases.get(tag, [])
|
| 451 |
+
for alias in aliases:
|
| 452 |
+
alias_norm = _normalize_for_matching(alias)
|
| 453 |
+
if not alias_norm:
|
| 454 |
+
continue
|
| 455 |
+
if _alias_matches_query(alias_norm, query_words, query_norm, fuzzy_threshold):
|
| 456 |
+
return True
|
| 457 |
+
|
| 458 |
+
return False
|
| 459 |
+
|
| 460 |
+
|
| 461 |
def llm_select_indices(
|
| 462 |
+
query_text: str, # kept for compatibility; treated as IMAGE DESCRIPTION
|
| 463 |
+
candidates: Union[
|
| 464 |
+
Sequence[Candidate],
|
| 465 |
+
Sequence[str],
|
| 466 |
+
Sequence[Tuple[str, float]],
|
| 467 |
+
],
|
| 468 |
+
max_pick: int, # legacy param; applied after union + ordering (optional)
|
| 469 |
+
log,
|
| 470 |
+
retries: int = 2,
|
| 471 |
+
*,
|
| 472 |
+
mode: str = "chunked_map_union", # "single_shot" or "chunked_map_union"
|
| 473 |
+
chunk_size: int = 60,
|
| 474 |
+
per_phrase_k: int = 2, # per-call budget = per_phrase_k * phrases_in_call
|
| 475 |
+
temperature: float = 0.0,
|
| 476 |
+
max_tokens: int = 512,
|
| 477 |
+
return_metadata: bool = False,
|
| 478 |
+
return_diagnostics: bool = False,
|
| 479 |
+
min_why: Optional[str] = None,
|
| 480 |
+
candidate_display: Optional[Mapping[str, str]] = None,
|
| 481 |
+
) -> Union[
|
| 482 |
+
List[int],
|
| 483 |
+
Tuple[List[int], Dict[str, str]],
|
| 484 |
+
Tuple[List[int], Dict[str, str], Dict[str, Any]],
|
| 485 |
+
]:
|
| 486 |
+
"""Return indices into the ORIGINAL candidates list (legacy interface).
|
| 487 |
+
|
| 488 |
+
min_why: legacy compatibility argument; ignored in explicit-only mode.
|
| 489 |
+
|
| 490 |
+
This implementation uses LangChain ONLY.
|
| 491 |
+
|
| 492 |
+
NOTE: query_text is treated as the image description (original prompt).
|
| 493 |
+
"""
|
| 494 |
+
|
| 495 |
+
image_description = query_text
|
| 496 |
+
|
| 497 |
+
# Normalize candidates:
|
| 498 |
+
# - preferred: List[Candidate]
|
| 499 |
+
# - legacy: List[(tag, sim)] (count/sources unavailable)
|
| 500 |
+
norm: List[Candidate] = []
|
| 501 |
+
tag_to_first_index: Dict[str, int] = {}
|
| 502 |
+
|
| 503 |
+
branch = "empty"
|
| 504 |
+
cand0_type = type(candidates[0]).__name__ if candidates else "none"
|
| 505 |
+
|
| 506 |
+
if candidates and isinstance(candidates[0], Candidate):
|
| 507 |
+
branch = "candidate"
|
| 508 |
+
typed_candidates = cast(Sequence[Candidate], candidates)
|
| 509 |
+
for idx, c in enumerate(typed_candidates):
|
| 510 |
+
if c.tag not in tag_to_first_index:
|
| 511 |
+
tag_to_first_index[c.tag] = idx
|
| 512 |
+
norm.append(c)
|
| 513 |
+
elif candidates and isinstance(candidates[0], str):
|
| 514 |
+
branch = "string"
|
| 515 |
+
typed_candidates = cast(Sequence[str], candidates)
|
| 516 |
+
for idx, tag in enumerate(typed_candidates):
|
| 517 |
+
if tag not in tag_to_first_index:
|
| 518 |
+
tag_to_first_index[tag] = idx
|
| 519 |
+
norm.append(
|
| 520 |
+
Candidate(
|
| 521 |
+
tag=tag,
|
| 522 |
+
score_combined=0.0,
|
| 523 |
+
score_fasttext=None,
|
| 524 |
+
score_context=None,
|
| 525 |
+
count=None,
|
| 526 |
+
sources=[],
|
| 527 |
+
)
|
| 528 |
+
)
|
| 529 |
+
else:
|
| 530 |
+
if candidates:
|
| 531 |
+
branch = "tuple"
|
| 532 |
+
typed_candidates = cast(Sequence[Tuple[str, float]], candidates)
|
| 533 |
+
for idx, row in enumerate(typed_candidates):
|
| 534 |
+
if not isinstance(row, (list, tuple)) or len(row) < 2:
|
| 535 |
+
raise ValueError("Stage 3 candidates must be Candidate, tag strings, or (tag, score) tuples.")
|
| 536 |
+
tag, sim = row[0], row[1]
|
| 537 |
+
if tag not in tag_to_first_index:
|
| 538 |
+
tag_to_first_index[tag] = idx
|
| 539 |
+
norm.append(
|
| 540 |
+
Candidate(
|
| 541 |
+
tag=tag,
|
| 542 |
+
score_combined=float(sim),
|
| 543 |
+
score_fasttext=None,
|
| 544 |
+
score_context=None,
|
| 545 |
+
count=None,
|
| 546 |
+
sources=[],
|
| 547 |
+
)
|
| 548 |
+
)
|
| 549 |
+
|
| 550 |
+
if log:
|
| 551 |
+
if norm:
|
| 552 |
+
log(
|
| 553 |
+
"Stage3 input: "
|
| 554 |
+
f"type0={cand0_type} "
|
| 555 |
+
f"branch={branch} "
|
| 556 |
+
f"norm0_score={norm[0].score_combined!r} "
|
| 557 |
+
f"norm0_sources_empty={not bool(norm[0].sources)}"
|
| 558 |
+
)
|
| 559 |
+
else:
|
| 560 |
+
log(f"Stage3 input: type0={cand0_type} branch={branch} (no candidates)")
|
| 561 |
+
|
| 562 |
+
if mode not in ("single_shot", "chunked_map_union"):
|
| 563 |
+
raise ValueError(f"Invalid mode: {mode}")
|
| 564 |
+
|
| 565 |
+
response_format = _build_response_format()
|
| 566 |
+
llm = _get_llm(temperature=temperature, max_tokens=max_tokens, response_format=response_format)
|
| 567 |
+
model_name = os.getenv("OPENROUTER_MODEL", "meta-llama/llama-3.1-8b-instruct")
|
| 568 |
+
|
| 569 |
+
parser = PydanticOutputParser(pydantic_object=Stage3SelectionResponse)
|
| 570 |
+
select_system_template = _get_select_system_template()
|
| 571 |
+
|
| 572 |
+
# Global union of selected tags across calls.
|
| 573 |
+
best_tags: Set[str] = set()
|
| 574 |
+
diagnostics: Dict[str, Any] = {
|
| 575 |
+
"mode": mode,
|
| 576 |
+
"chunk_strategy": "interleave",
|
| 577 |
+
"chunk_passes": 1,
|
| 578 |
+
"chunk_shuffle_within_call": False,
|
| 579 |
+
"calls_total": 0,
|
| 580 |
+
"calls_with_selection": 0,
|
| 581 |
+
"calls_exhausted_retries": 0,
|
| 582 |
+
"attempts_total": 0,
|
| 583 |
+
"attempt_errors": 0,
|
| 584 |
+
"attempt_parse_fail": 0,
|
| 585 |
+
"attempt_parse_ok": 0,
|
| 586 |
+
"invalid_items_total": 0,
|
| 587 |
+
"oob_indices_total": 0,
|
| 588 |
+
"dupe_indices_total": 0,
|
| 589 |
+
"kept_total": 0,
|
| 590 |
+
"attempts_by_n_local": {},
|
| 591 |
+
}
|
| 592 |
+
|
| 593 |
+
def _record_attempt_for_n(n_local: int, *, parse_ok: bool, error: bool) -> None:
|
| 594 |
+
by_n = diagnostics["attempts_by_n_local"]
|
| 595 |
+
key = str(n_local)
|
| 596 |
+
if key not in by_n:
|
| 597 |
+
by_n[key] = {
|
| 598 |
+
"attempts": 0,
|
| 599 |
+
"parse_ok": 0,
|
| 600 |
+
"parse_fail": 0,
|
| 601 |
+
"errors": 0,
|
| 602 |
+
}
|
| 603 |
+
by_n[key]["attempts"] += 1
|
| 604 |
+
if error:
|
| 605 |
+
by_n[key]["errors"] += 1
|
| 606 |
+
elif parse_ok:
|
| 607 |
+
by_n[key]["parse_ok"] += 1
|
| 608 |
+
else:
|
| 609 |
+
by_n[key]["parse_fail"] += 1
|
| 610 |
+
|
| 611 |
+
def run_call(call_cands: Sequence[Candidate], label: str, system_template: str) -> None:
|
| 612 |
+
# Create chain with the provided system template
|
| 613 |
+
prompt = ChatPromptTemplate.from_messages(
|
| 614 |
+
[
|
| 615 |
+
("system", system_template),
|
| 616 |
+
("human", USER_TEMPLATE),
|
| 617 |
+
],
|
| 618 |
+
template_format="f-string",
|
| 619 |
+
)
|
| 620 |
+
chain = prompt | llm | parser
|
| 621 |
+
|
| 622 |
+
ordered = _interleave_round_robin(call_cands) if mode == "single_shot" else list(call_cands)
|
| 623 |
+
candidate_lines, idx_to_tag, idx_to_candidate = _format_candidates_local(
|
| 624 |
+
ordered,
|
| 625 |
+
candidate_display=candidate_display,
|
| 626 |
+
)
|
| 627 |
+
N_local = len(idx_to_tag)
|
| 628 |
+
diagnostics["calls_total"] += 1
|
| 629 |
+
|
| 630 |
+
phrases = _phrases_in_call(call_cands)
|
| 631 |
+
per_call_budget = max(1, per_phrase_k * phrases) if phrases > 0 else per_phrase_k
|
| 632 |
+
summary_logged = False
|
| 633 |
+
|
| 634 |
+
if log:
|
| 635 |
+
log(f"Stage3 {label}: candidates (local indices):\n{candidate_lines}")
|
| 636 |
+
if phrases > 0:
|
| 637 |
+
distinct_phrases = sorted({src for c in call_cands for src in c.sources})
|
| 638 |
+
log(
|
| 639 |
+
f"Stage3 {label}: distinct_phrases={len(distinct_phrases)} "
|
| 640 |
+
f"phrases={', '.join(distinct_phrases)}"
|
| 641 |
+
)
|
| 642 |
+
|
| 643 |
+
# Invoke LangChain chain (templating fills {N} and other vars)
|
| 644 |
+
for att in range(retries + 1):
|
| 645 |
+
try:
|
| 646 |
+
diagnostics["attempts_total"] += 1
|
| 647 |
+
if log:
|
| 648 |
+
log(
|
| 649 |
+
f"Stage3 {label}: "
|
| 650 |
+
f"model={model_name} "
|
| 651 |
+
f"N={N_local} "
|
| 652 |
+
f"phrases={phrases} "
|
| 653 |
+
f"per_call_budget={per_call_budget} "
|
| 654 |
+
f"response_healing=on"
|
| 655 |
+
)
|
| 656 |
+
|
| 657 |
+
parsed = chain.invoke(
|
| 658 |
+
{
|
| 659 |
+
"N": N_local,
|
| 660 |
+
"image_description": image_description,
|
| 661 |
+
"candidate_lines": candidate_lines,
|
| 662 |
+
"per_call_budget": per_call_budget,
|
| 663 |
+
}
|
| 664 |
+
)
|
| 665 |
+
selected, diag = _parse_validate_map(parsed, idx_to_tag, per_call_budget=per_call_budget)
|
| 666 |
+
diagnostics["invalid_items_total"] += int(diag.get("invalid_items", 0))
|
| 667 |
+
diagnostics["oob_indices_total"] += int(diag.get("oob_indices", 0))
|
| 668 |
+
diagnostics["dupe_indices_total"] += int(diag.get("dupe_indices", 0))
|
| 669 |
+
diagnostics["kept_total"] += int(diag.get("kept", 0))
|
| 670 |
+
if bool(diag.get("parse_ok", False)):
|
| 671 |
+
diagnostics["attempt_parse_ok"] += 1
|
| 672 |
+
_record_attempt_for_n(N_local, parse_ok=True, error=False)
|
| 673 |
+
else:
|
| 674 |
+
diagnostics["attempt_parse_fail"] += 1
|
| 675 |
+
_record_attempt_for_n(N_local, parse_ok=False, error=False)
|
| 676 |
+
if log:
|
| 677 |
+
log(f"Stage3 {label}: attempt {att+1} diag={diag}")
|
| 678 |
+
if not summary_logged and (selected or att == retries):
|
| 679 |
+
log(
|
| 680 |
+
f"Stage3 {label}: summary "
|
| 681 |
+
f"N={N_local} selected={len(selected)} per_call_budget={per_call_budget}"
|
| 682 |
+
)
|
| 683 |
+
summary_logged = True
|
|
|
|
|
|
|
|
|
|
| 684 |
if selected:
|
| 685 |
lines = [
|
| 686 |
f"Stage3 {label} selections:",
|
| 687 |
*[
|
| 688 |
(
|
| 689 |
f' - i={s.i} tag="{s.tag}" '
|
|
|
|
| 690 |
f"sources={idx_to_candidate.get(s.i).sources if idx_to_candidate.get(s.i) else []}"
|
| 691 |
)
|
| 692 |
for s in selected
|
| 693 |
],
|
| 694 |
]
|
| 695 |
+
log("\n".join(lines))
|
| 696 |
+
else:
|
| 697 |
+
log(f"Stage3 {label} selections: (none)")
|
| 698 |
+
|
| 699 |
if selected:
|
| 700 |
diagnostics["calls_with_selection"] += 1
|
| 701 |
for s in selected:
|
| 702 |
+
best_tags.add(s.tag)
|
|
|
|
|
|
|
| 703 |
return
|
| 704 |
+
|
| 705 |
+
except Exception as e:
|
| 706 |
+
diagnostics["attempt_errors"] += 1
|
| 707 |
+
_record_attempt_for_n(N_local, parse_ok=False, error=True)
|
| 708 |
+
if log:
|
| 709 |
+
log(f"Stage3 {label}: attempt {att+1} error: {e}")
|
| 710 |
+
|
| 711 |
+
if log:
|
| 712 |
+
log(f"Stage3 {label}: gave up after {retries+1} attempts")
|
| 713 |
+
diagnostics["calls_exhausted_retries"] += 1
|
| 714 |
+
|
| 715 |
+
# Split candidates by type (general vs entity)
|
| 716 |
+
general_with_idx, entity_with_idx = _split_candidates_by_type(norm, log)
|
| 717 |
+
|
| 718 |
+
# Extract just the candidates for LLM calls
|
| 719 |
+
general_cands = [cand for _, cand in general_with_idx]
|
| 720 |
+
entity_cands = [cand for _, cand in entity_with_idx]
|
| 721 |
+
|
| 722 |
+
# Process general candidates (attributes, actions, species, etc.)
|
| 723 |
+
if general_cands:
|
| 724 |
+
if mode == "single_shot":
|
| 725 |
+
run_call(general_cands, "general_single_shot", select_system_template)
|
| 726 |
+
else:
|
| 727 |
+
base_chunks = _build_chunks(general_cands, chunk_size)
|
| 728 |
+
for chunk_idx, chunk in enumerate(base_chunks):
|
| 729 |
+
run_call(chunk, f"general_chunk_{chunk_idx}", select_system_template)
|
| 730 |
+
|
| 731 |
+
# Process entity candidates (characters only) with alias-based pre-filtering
|
| 732 |
+
if entity_cands:
|
| 733 |
+
tag2aliases = get_tag2aliases()
|
| 734 |
+
qwords = _query_words(image_description)
|
| 735 |
+
qnorm = _normalize_for_matching(image_description)
|
| 736 |
+
|
| 737 |
+
filtered_entity_cands: List[Candidate] = []
|
| 738 |
+
filtered_out: List[str] = []
|
| 739 |
+
|
| 740 |
+
for cand in entity_cands:
|
| 741 |
+
if _character_matches_via_aliases(
|
| 742 |
+
cand.tag, image_description, tag2aliases, qwords, qnorm
|
| 743 |
+
):
|
| 744 |
+
filtered_entity_cands.append(cand)
|
| 745 |
+
else:
|
| 746 |
+
filtered_out.append(cand.tag)
|
| 747 |
+
|
| 748 |
+
if log:
|
| 749 |
+
log(
|
| 750 |
+
f"Stage3 entity alias filter: "
|
| 751 |
+
f"before={len(entity_cands)} "
|
| 752 |
+
f"after={len(filtered_entity_cands)} "
|
| 753 |
+
f"removed={len(filtered_out)}"
|
| 754 |
+
)
|
| 755 |
+
if filtered_out:
|
| 756 |
+
log(f"Stage3 entity alias filter removed: {filtered_out[:20]}")
|
| 757 |
+
|
| 758 |
+
if filtered_entity_cands:
|
| 759 |
+
if mode == "single_shot":
|
| 760 |
+
run_call(filtered_entity_cands, "entity_single_shot", ENTITY_SYSTEM_TEMPLATE)
|
| 761 |
+
else:
|
| 762 |
+
base_chunks = _build_chunks(filtered_entity_cands, chunk_size)
|
| 763 |
+
for chunk_idx, chunk in enumerate(base_chunks):
|
| 764 |
+
run_call(chunk, f"entity_chunk_{chunk_idx}", ENTITY_SYSTEM_TEMPLATE)
|
| 765 |
+
|
| 766 |
+
if min_why is not None and log:
|
| 767 |
+
log("Stage3: min_why is ignored in explicit-only no-why mode")
|
| 768 |
+
|
| 769 |
+
# Deterministic ordering: count desc (count not shown to LLM), then tag.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 770 |
count_by_tag = {c.tag: (c.count if c.count is not None else -1) for c in norm}
|
| 771 |
+
ordered_tags = sorted(best_tags, key=lambda t: (count_by_tag.get(t, -1), t), reverse=True)
|
| 772 |
+
|
| 773 |
+
# Legacy cap: apply AFTER union + ordering.
|
| 774 |
+
if isinstance(max_pick, int) and max_pick > 0:
|
| 775 |
+
ordered_tags = ordered_tags[:max_pick]
|
| 776 |
+
|
| 777 |
+
# Map back to original indices
|
| 778 |
+
out_idx: List[int] = []
|
| 779 |
tag_why: Dict[str, str] = {}
|
| 780 |
for t in ordered_tags:
|
| 781 |
if t in tag_to_first_index:
|
| 782 |
out_idx.append(tag_to_first_index[t])
|
| 783 |
+
# Why labels removed in explicit-only no-why mode.
|
| 784 |
+
|
| 785 |
+
if diagnostics["attempts_total"] > 0:
|
| 786 |
+
diagnostics["attempt_failure_rate"] = (
|
| 787 |
+
diagnostics["attempt_parse_fail"] + diagnostics["attempt_errors"]
|
| 788 |
+
) / diagnostics["attempts_total"]
|
| 789 |
+
else:
|
| 790 |
+
diagnostics["attempt_failure_rate"] = 0.0
|
| 791 |
+
|
| 792 |
+
if diagnostics["calls_total"] > 0:
|
| 793 |
+
diagnostics["call_exhaustion_rate"] = (
|
| 794 |
+
diagnostics["calls_exhausted_retries"] / diagnostics["calls_total"]
|
| 795 |
+
)
|
| 796 |
+
else:
|
| 797 |
+
diagnostics["call_exhaustion_rate"] = 0.0
|
| 798 |
+
|
| 799 |
+
if return_metadata:
|
| 800 |
+
if return_diagnostics:
|
| 801 |
+
return out_idx, tag_why, diagnostics
|
| 802 |
+
return out_idx, tag_why
|
| 803 |
+
|
| 804 |
+
return out_idx
|
| 805 |
+
|
| 806 |
+
|
| 807 |
+
# ---------------------------------------------------------------------------
|
| 808 |
+
# Stage 3s: Structural tag inference (solo/duo/male/female/anthro/… )
|
| 809 |
+
# ---------------------------------------------------------------------------
|
| 810 |
+
# Group-based approach: tags are organized into semantic groups loaded from
|
| 811 |
+
# tag_groups.json / tag_wiki_defs.json where possible, with curated fallback
|
| 812 |
+
# definitions for tags whose wiki entries are only thumbnail references.
|
| 813 |
+
#
|
| 814 |
+
# Each group specifies a constraint mode:
|
| 815 |
+
# "exclusive" = pick exactly one (e.g. character count)
|
| 816 |
+
# "multi" = pick all that apply (e.g. body type, gender)
|
| 817 |
+
|
| 818 |
+
import json as _json
|
| 819 |
+
|
| 820 |
+
@dataclass
|
| 821 |
+
class StructuralGroup:
|
| 822 |
+
"""One category of structural tags to probe."""
|
| 823 |
+
name: str
|
| 824 |
+
constraint: str # "exclusive" or "multi"
|
| 825 |
+
tags: List[Tuple[str, str]] # (tag, definition) pairs
|
| 826 |
+
|
| 827 |
+
|
| 828 |
+
def _load_structural_groups_from_csv() -> List[StructuralGroup]:
|
| 829 |
+
"""Load structural groups from data/structural_tag_definitions.csv."""
|
| 830 |
+
data_dir = Path(__file__).resolve().parents[2] / "data"
|
| 831 |
+
csv_path = data_dir / "structural_tag_definitions.csv"
|
| 832 |
+
if not csv_path.is_file():
|
| 833 |
+
return []
|
| 834 |
+
|
| 835 |
+
groups_by_name: Dict[str, List[Tuple[str, str]]] = {}
|
| 836 |
+
constraints_by_name: Dict[str, str] = {}
|
| 837 |
+
|
| 838 |
+
with csv_path.open("r", encoding="utf-8", newline="") as f:
|
| 839 |
+
reader = csv.DictReader(f)
|
| 840 |
+
for row in reader:
|
| 841 |
+
enabled = (row.get("enabled") or "1").strip().lower()
|
| 842 |
+
if enabled in {"0", "false", "no"}:
|
| 843 |
+
continue
|
| 844 |
+
|
| 845 |
+
group_name = (row.get("group_name") or "").strip()
|
| 846 |
+
constraint = (row.get("constraint") or "multi").strip().lower()
|
| 847 |
+
tag = (row.get("tag") or "").strip()
|
| 848 |
+
definition = " ".join((row.get("definition") or "").split())
|
| 849 |
+
|
| 850 |
+
if not group_name or not tag or not definition:
|
| 851 |
+
continue
|
| 852 |
+
if constraint not in {"exclusive", "multi"}:
|
| 853 |
+
constraint = "multi"
|
| 854 |
+
|
| 855 |
+
if group_name not in groups_by_name:
|
| 856 |
+
groups_by_name[group_name] = []
|
| 857 |
+
constraints_by_name[group_name] = constraint
|
| 858 |
+
groups_by_name[group_name].append((tag, definition))
|
| 859 |
+
|
| 860 |
+
out: List[StructuralGroup] = []
|
| 861 |
+
for group_name, tags in groups_by_name.items():
|
| 862 |
+
if not tags:
|
| 863 |
+
continue
|
| 864 |
+
out.append(
|
| 865 |
+
StructuralGroup(
|
| 866 |
+
name=group_name,
|
| 867 |
+
constraint=constraints_by_name.get(group_name, "multi"),
|
| 868 |
+
tags=tags,
|
| 869 |
+
)
|
| 870 |
+
)
|
| 871 |
+
return out
|
| 872 |
+
|
| 873 |
+
def _load_structural_groups() -> List[StructuralGroup]:
|
| 874 |
+
"""Build structural groups from local config file with legacy fallback.
|
| 875 |
+
|
| 876 |
+
Preferred source:
|
| 877 |
+
data/structural_tag_definitions.csv
|
| 878 |
+
Fallback:
|
| 879 |
+
tag_wiki_defs.json + curated hardcoded defaults
|
| 880 |
+
"""
|
| 881 |
+
csv_groups = _load_structural_groups_from_csv()
|
| 882 |
+
if csv_groups:
|
| 883 |
+
return csv_groups
|
| 884 |
+
|
| 885 |
+
data_dir = Path(__file__).resolve().parents[2] / "data"
|
| 886 |
+
|
| 887 |
+
# Load wiki definitions (may not exist yet)
|
| 888 |
+
wiki_defs: Dict[str, str] = {}
|
| 889 |
+
wiki_path = data_dir / "tag_wiki_defs.json"
|
| 890 |
+
if wiki_path.is_file():
|
| 891 |
+
with wiki_path.open("r", encoding="utf-8") as f:
|
| 892 |
+
wiki_defs = _json.load(f)
|
| 893 |
+
|
| 894 |
+
def _def(tag: str, fallback: str) -> str:
|
| 895 |
+
"""Get wiki definition if it's real text, otherwise use fallback."""
|
| 896 |
+
d = wiki_defs.get(tag, "")
|
| 897 |
+
# Skip thumbnail-only definitions
|
| 898 |
+
if not d or d.startswith("thumb ") or len(d) < 15:
|
| 899 |
+
return fallback
|
| 900 |
+
return d[:200] # cap length for prompt
|
| 901 |
+
|
| 902 |
+
groups: List[StructuralGroup] = []
|
| 903 |
+
|
| 904 |
+
# ── Group A: Character Count (exclusive) ──
|
| 905 |
+
groups.append(StructuralGroup(
|
| 906 |
+
name="character_count",
|
| 907 |
+
constraint="exclusive",
|
| 908 |
+
tags=[
|
| 909 |
+
("zero_pictured", _def("zero_pictured",
|
| 910 |
+
"No characters or living beings appear in the image")),
|
| 911 |
+
("solo", _def("solo",
|
| 912 |
+
"Exactly one character appears in the image")),
|
| 913 |
+
("duo", _def("duo",
|
| 914 |
+
"Exactly two characters appear in the image")),
|
| 915 |
+
("trio", _def("trio",
|
| 916 |
+
"Exactly three characters appear in the image")),
|
| 917 |
+
("group", _def("group",
|
| 918 |
+
"Four or more characters appear in the image")),
|
| 919 |
+
],
|
| 920 |
+
))
|
| 921 |
+
|
| 922 |
+
# ── Group B: Body Type (multi — per character) ──
|
| 923 |
+
# Key distinction the LLM must learn:
|
| 924 |
+
# anthro = ANIMAL with human body shape (upright, hands)
|
| 925 |
+
# humanoid = HUMAN or near-human (elf, dwarf) with NO animal features
|
| 926 |
+
# feral = normal animal shape, on all fours
|
| 927 |
+
groups.append(StructuralGroup(
|
| 928 |
+
name="body_type",
|
| 929 |
+
constraint="multi",
|
| 930 |
+
tags=[
|
| 931 |
+
("anthro", _def("anthro",
|
| 932 |
+
"An animal character with a human-like body: walks upright on two legs, "
|
| 933 |
+
"has arms and hands. Examples: a wolf-person, a fox standing up. "
|
| 934 |
+
"Still has animal features like fur, tail, muzzle")),
|
| 935 |
+
("feral", _def("feral",
|
| 936 |
+
"A regular animal in its natural body shape. Walks on all fours (or "
|
| 937 |
+
"flies/swims naturally). NOT standing upright, NOT humanized")),
|
| 938 |
+
("humanoid", _def("humanoid",
|
| 939 |
+
"A human or human-like character with NO animal features. Includes "
|
| 940 |
+
"humans, elves, dwarves, and fantasy races that look human. "
|
| 941 |
+
"Does NOT include animal-people — those are anthro")),
|
| 942 |
+
("taur", _def("taur",
|
| 943 |
+
"A centaur-like body: human or anthro upper body attached to a "
|
| 944 |
+
"four-legged animal lower body")),
|
| 945 |
+
],
|
| 946 |
+
))
|
| 947 |
+
|
| 948 |
+
# ── Group C: Gender (multi — per character) ──
|
| 949 |
+
groups.append(StructuralGroup(
|
| 950 |
+
name="gender",
|
| 951 |
+
constraint="multi",
|
| 952 |
+
tags=[
|
| 953 |
+
("male", _def("male",
|
| 954 |
+
"A character described as male, a boy, or with he/him pronouns")),
|
| 955 |
+
("female", _def("female",
|
| 956 |
+
"A character described as female, a girl, or with she/her pronouns")),
|
| 957 |
+
("ambiguous_gender", _def("ambiguous_gender",
|
| 958 |
+
"A character whose gender is not stated or cannot be determined")),
|
| 959 |
+
("intersex", _def("intersex",
|
| 960 |
+
"A character explicitly described as intersex or hermaphrodite")),
|
| 961 |
+
],
|
| 962 |
+
))
|
| 963 |
+
|
| 964 |
+
# ── Group D: Clothing State (multi) ──
|
| 965 |
+
groups.append(StructuralGroup(
|
| 966 |
+
name="clothing_state",
|
| 967 |
+
constraint="multi",
|
| 968 |
+
tags=[
|
| 969 |
+
("clothed", _def("clothed",
|
| 970 |
+
"Wearing clothes on BOTH chest/torso AND legs/waist. "
|
| 971 |
+
"Examples: shirt and pants, dress, full outfit")),
|
| 972 |
+
("nude", _def("nude",
|
| 973 |
+
"Wearing NO clothes at all. Completely naked, no shirt and no pants")),
|
| 974 |
+
("topless", _def("topless",
|
| 975 |
+
"NO shirt/top (bare chest), BUT wearing pants/bottoms. "
|
| 976 |
+
"Upper body exposed, lower body covered")),
|
| 977 |
+
("bottomless", _def("bottomless",
|
| 978 |
+
"Wearing shirt/top on chest, BUT NO pants/bottoms. "
|
| 979 |
+
"Upper body covered, lower body exposed")),
|
| 980 |
+
],
|
| 981 |
+
))
|
| 982 |
+
|
| 983 |
+
# ── Group E: Common Visual Elements (multi) ──
|
| 984 |
+
groups.append(StructuralGroup(
|
| 985 |
+
name="visual_elements",
|
| 986 |
+
constraint="multi",
|
| 987 |
+
tags=[
|
| 988 |
+
("looking_at_viewer", _def("looking_at_viewer",
|
| 989 |
+
"A character is looking directly at the camera or viewer")),
|
| 990 |
+
("text", _def("text",
|
| 991 |
+
"The image contains visible writing, words, or lettering")),
|
| 992 |
+
],
|
| 993 |
+
))
|
| 994 |
+
|
| 995 |
+
return groups
|
| 996 |
+
|
| 997 |
+
|
| 998 |
+
def _build_structural_prompt(groups: List[StructuralGroup]) -> Tuple[str, List[Tuple[str, str]]]:
|
| 999 |
+
"""Build numbered statement list from structural groups.
|
| 1000 |
+
|
| 1001 |
+
Returns (formatted_text, flat_list_of_(tag, definition)_pairs).
|
| 1002 |
+
The flat list maps 1-based statement numbers to tags.
|
| 1003 |
+
"""
|
| 1004 |
+
lines: List[str] = []
|
| 1005 |
+
flat: List[Tuple[str, str]] = []
|
| 1006 |
+
idx = 1
|
| 1007 |
+
|
| 1008 |
+
for g in groups:
|
| 1009 |
+
constraint_label = "pick EXACTLY ONE" if g.constraint == "exclusive" else "pick ALL that apply"
|
| 1010 |
+
group_header = f"--- {g.name.replace('_', ' ').upper()} ({constraint_label}) ---"
|
| 1011 |
+
lines.append(group_header)
|
| 1012 |
+
for tag, defn in g.tags:
|
| 1013 |
+
lines.append(f"{idx}. {defn}")
|
| 1014 |
+
flat.append((tag, defn))
|
| 1015 |
+
idx += 1
|
| 1016 |
+
lines.append("") # blank line between groups
|
| 1017 |
+
|
| 1018 |
+
return "\n".join(lines), flat
|
| 1019 |
+
|
| 1020 |
+
|
| 1021 |
+
STRUCTURAL_SYSTEM_TEMPLATE = """You classify image descriptions by selecting true statements from a numbered list.
|
| 1022 |
+
|
| 1023 |
+
The statements are organized into GROUPS. Each group header tells you how many to pick:
|
| 1024 |
+
- "pick EXACTLY ONE" = choose the single best match in that group
|
| 1025 |
+
- "pick ALL that apply" = choose every statement that is true
|
| 1026 |
+
|
| 1027 |
+
IMPORTANT RULES:
|
| 1028 |
+
1. ONLY select a statement if the description directly says it or makes it very obvious.
|
| 1029 |
+
2. Do NOT guess or assume things the description does not mention.
|
| 1030 |
+
3. For body type: "anthro" means an ANIMAL with a human-shaped body (walks upright, has hands, but still has fur/tail/muzzle). "humanoid" means HUMAN or human-like with NO animal features. A wolf standing on two legs = anthro, NOT humanoid.
|
| 1031 |
+
4. For gender: only select male/female/intersex when there is explicit textual evidence (such as gender words or pronouns). Do not infer gender from species, body shape, clothing, or style. If no reliable gender cue is present, do not select male/female/intersex; use ambiguous_gender instead.
|
| 1032 |
+
5. For clothing state: READ CAREFULLY! "topless" = bare chest, wearing pants. "bottomless" = wearing shirt, no pants. If unsure, re-read the description.
|
| 1033 |
+
6. If clothing is not mentioned, do NOT pick any clothing statement.
|
| 1034 |
+
|
| 1035 |
+
Return JSON ONLY:
|
| 1036 |
+
{{"selections": [{{"i": 1}}, {{"i": 5}}]}}
|
| 1037 |
+
|
| 1038 |
+
EXAMPLE:
|
| 1039 |
+
Description: "A muscular male wolf standing in a forest, wearing jeans, giving a thumbs up"
|
| 1040 |
+
Answer: {{"selections": [{{"i": 2}}, {{"i": 6}}, {{"i": 10}}, {{"i": 14}}]}}
|
| 1041 |
+
Why: One character = solo (2). Wolf standing upright with hands = anthro (6), NOT humanoid because it is a wolf. Male (10). Wearing jeans = clothed (14)."""
|
| 1042 |
+
|
| 1043 |
+
STRUCTURAL_USER_TEMPLATE = """Read this image description and select which statements are true.
|
| 1044 |
+
|
| 1045 |
+
IMAGE DESCRIPTION:
|
| 1046 |
+
{image_description}
|
| 1047 |
+
|
| 1048 |
+
STATEMENTS (pick by number):
|
| 1049 |
+
{statement_lines}"""
|
| 1050 |
+
|
| 1051 |
+
|
| 1052 |
+
class StructuralSelectionItem(BaseModel):
|
| 1053 |
+
i: int = Field(..., description="1-based index into the statement list.")
|
| 1054 |
+
|
| 1055 |
+
|
| 1056 |
+
class StructuralSelectionResponse(BaseModel):
|
| 1057 |
+
selections: List[StructuralSelectionItem] = Field(default_factory=list)
|
| 1058 |
+
|
| 1059 |
+
|
| 1060 |
+
def _build_structural_response_format() -> Dict[str, Any]:
|
| 1061 |
+
schema = {
|
| 1062 |
+
"type": "object",
|
| 1063 |
+
"properties": {
|
| 1064 |
+
"selections": {
|
| 1065 |
+
"type": "array",
|
| 1066 |
+
"items": {
|
| 1067 |
+
"type": "object",
|
| 1068 |
+
"properties": {
|
| 1069 |
+
"i": {"type": "integer"},
|
| 1070 |
+
},
|
| 1071 |
+
"required": ["i"],
|
| 1072 |
+
"additionalProperties": False,
|
| 1073 |
+
},
|
| 1074 |
+
}
|
| 1075 |
+
},
|
| 1076 |
+
"required": ["selections"],
|
| 1077 |
+
"additionalProperties": False,
|
| 1078 |
+
}
|
| 1079 |
+
return {
|
| 1080 |
+
"type": "json_schema",
|
| 1081 |
+
"json_schema": {
|
| 1082 |
+
"name": "structural_selection",
|
| 1083 |
+
"strict": True,
|
| 1084 |
+
"schema": schema,
|
| 1085 |
+
},
|
| 1086 |
+
}
|
| 1087 |
+
|
| 1088 |
+
|
| 1089 |
+
# Cache the loaded groups so we only read JSON files once per process.
|
| 1090 |
+
_cached_structural_groups: Optional[List[StructuralGroup]] = None
|
| 1091 |
+
|
| 1092 |
+
|
| 1093 |
+
def _get_structural_groups() -> List[StructuralGroup]:
|
| 1094 |
+
global _cached_structural_groups
|
| 1095 |
+
if _cached_structural_groups is None:
|
| 1096 |
+
_cached_structural_groups = _load_structural_groups()
|
| 1097 |
+
return _cached_structural_groups
|
| 1098 |
+
|
| 1099 |
+
|
| 1100 |
+
def llm_infer_structural_tags(
|
| 1101 |
+
query_text: str,
|
| 1102 |
+
log=None,
|
| 1103 |
+
*,
|
| 1104 |
+
temperature: float = 0.0,
|
| 1105 |
+
max_tokens: int = 512,
|
| 1106 |
+
retries: int = 2,
|
| 1107 |
+
) -> List[str]:
|
| 1108 |
+
"""Infer structural tags via LLM using group-based statement agreement.
|
| 1109 |
+
|
| 1110 |
+
Probes multiple semantic groups (character count, body type, gender,
|
| 1111 |
+
clothing state, visual elements) with definitions loaded from wiki data
|
| 1112 |
+
where available.
|
| 1113 |
+
|
| 1114 |
+
Returns a list of e621 tag strings (e.g. ["solo", "anthro", "male", "clothed"]).
|
| 1115 |
+
"""
|
| 1116 |
+
if log:
|
| 1117 |
+
log("Stage3s (structural): inferring structural tags via group-based statement agreement")
|
| 1118 |
+
|
| 1119 |
+
groups = _get_structural_groups()
|
| 1120 |
+
statement_lines, flat_tags = _build_structural_prompt(groups)
|
| 1121 |
+
N = len(flat_tags)
|
| 1122 |
+
|
| 1123 |
+
response_format = _build_structural_response_format()
|
| 1124 |
+
llm = _get_llm(temperature=temperature, max_tokens=max_tokens,
|
| 1125 |
+
response_format=response_format)
|
| 1126 |
+
model_name = os.getenv("OPENROUTER_MODEL", "meta-llama/llama-3.1-8b-instruct")
|
| 1127 |
+
|
| 1128 |
+
parser = PydanticOutputParser(pydantic_object=StructuralSelectionResponse)
|
| 1129 |
+
|
| 1130 |
+
prompt = ChatPromptTemplate.from_messages(
|
| 1131 |
+
[
|
| 1132 |
+
("system", STRUCTURAL_SYSTEM_TEMPLATE),
|
| 1133 |
+
("human", STRUCTURAL_USER_TEMPLATE),
|
| 1134 |
+
],
|
| 1135 |
+
template_format="f-string",
|
| 1136 |
+
)
|
| 1137 |
+
chain = prompt | llm | parser
|
| 1138 |
+
|
| 1139 |
+
if log:
|
| 1140 |
+
group_summary = ", ".join(f"{g.name}({len(g.tags)})" for g in groups)
|
| 1141 |
+
log(f"Stage3s: model={model_name} groups=[{group_summary}] total_statements={N}")
|
| 1142 |
+
|
| 1143 |
+
for att in range(retries + 1):
|
| 1144 |
+
try:
|
| 1145 |
+
parsed = chain.invoke({
|
| 1146 |
+
"N": N,
|
| 1147 |
+
"image_description": query_text,
|
| 1148 |
+
"statement_lines": statement_lines,
|
| 1149 |
+
})
|
| 1150 |
+
|
| 1151 |
+
if isinstance(parsed, BaseModel):
|
| 1152 |
+
parsed = parsed.model_dump() if hasattr(parsed, "model_dump") else parsed.dict()
|
| 1153 |
+
|
| 1154 |
+
sels = parsed.get("selections", []) if isinstance(parsed, dict) else []
|
| 1155 |
+
chosen_tags: List[str] = []
|
| 1156 |
+
seen: Set[str] = set()
|
| 1157 |
+
for item in sels:
|
| 1158 |
+
idx = item.get("i") if isinstance(item, dict) else None
|
| 1159 |
+
if not isinstance(idx, int) or idx < 1 or idx > N:
|
| 1160 |
+
continue
|
| 1161 |
+
tag = flat_tags[idx - 1][0]
|
| 1162 |
+
if tag not in seen:
|
| 1163 |
+
chosen_tags.append(tag)
|
| 1164 |
+
seen.add(tag)
|
| 1165 |
+
|
| 1166 |
+
if log:
|
| 1167 |
+
tag_str = ", ".join(chosen_tags) if chosen_tags else "(none)"
|
| 1168 |
+
log(f"Stage3s: attempt {att+1} selected {len(chosen_tags)} tags: {tag_str}")
|
| 1169 |
+
|
| 1170 |
+
return chosen_tags
|
| 1171 |
+
|
| 1172 |
+
except Exception as e:
|
| 1173 |
+
if log:
|
| 1174 |
+
log(f"Stage3s: attempt {att+1} error: {e}")
|
| 1175 |
+
|
| 1176 |
+
if log:
|
| 1177 |
+
log(f"Stage3s: gave up after {retries+1} attempts")
|
| 1178 |
+
return []
|
| 1179 |
+
|
| 1180 |
+
|
| 1181 |
+
# ---------------------------------------------------------------------------
|
| 1182 |
+
# Stage 3p: Simplified high-precision probe tags
|
| 1183 |
+
# ---------------------------------------------------------------------------
|
| 1184 |
+
_cached_runtime_probe_tags: Optional[List[str]] = None
|
| 1185 |
+
_cached_runtime_probe_rows: Optional[List[Dict[str, str]]] = None
|
| 1186 |
+
_cached_runtime_probe_wiki_defs: Optional[Dict[str, str]] = None
|
| 1187 |
+
|
| 1188 |
+
_PROBE_GLOSSARY_FALLBACKS: Dict[str, str] = {
|
| 1189 |
+
"anthro": "Animal character with human-like body shape, usually upright with arms and hands.",
|
| 1190 |
+
"canid": "Member of dog-family species (wolves, foxes, dogs, coyotes).",
|
| 1191 |
+
"felid": "Member of cat-family species (cats, lions, tigers, leopards).",
|
| 1192 |
+
"solo": "Exactly one character is present in the image.",
|
| 1193 |
+
"duo": "Exactly two characters are present in the image.",
|
| 1194 |
+
"group": "Four or more characters are present in the image.",
|
| 1195 |
+
"<3": "Visible heart symbol in text or icon form.",
|
| 1196 |
+
}
|
| 1197 |
+
|
| 1198 |
+
|
| 1199 |
+
def _load_runtime_probe_rows(log=None) -> List[Dict[str, str]]:
|
| 1200 |
+
global _cached_runtime_probe_rows
|
| 1201 |
+
if _cached_runtime_probe_rows is not None:
|
| 1202 |
+
return _cached_runtime_probe_rows
|
| 1203 |
+
|
| 1204 |
+
csv_path = Path(__file__).resolve().parents[2] / "data" / "analysis" / "simplified_probe_tags.csv"
|
| 1205 |
+
rows: List[Dict[str, str]] = []
|
| 1206 |
+
if not csv_path.is_file():
|
| 1207 |
+
if log:
|
| 1208 |
+
log(f"Stage3p: probe CSV not found at {csv_path}; skipping probe step")
|
| 1209 |
+
_cached_runtime_probe_rows = rows
|
| 1210 |
+
return rows
|
| 1211 |
+
|
| 1212 |
+
try:
|
| 1213 |
+
with csv_path.open("r", encoding="utf-8", newline="") as f:
|
| 1214 |
+
rows = list(csv.DictReader(f))
|
| 1215 |
+
except Exception as e:
|
| 1216 |
+
if log:
|
| 1217 |
+
log(f"Stage3p: failed reading probe CSV: {e}")
|
| 1218 |
+
rows = []
|
| 1219 |
+
|
| 1220 |
+
_cached_runtime_probe_rows = rows
|
| 1221 |
+
return rows
|
| 1222 |
+
|
| 1223 |
+
|
| 1224 |
+
def _load_runtime_probe_wiki_defs() -> Dict[str, str]:
|
| 1225 |
+
global _cached_runtime_probe_wiki_defs
|
| 1226 |
+
if _cached_runtime_probe_wiki_defs is not None:
|
| 1227 |
+
return _cached_runtime_probe_wiki_defs
|
| 1228 |
+
|
| 1229 |
+
data_dir = Path(__file__).resolve().parents[2] / "data"
|
| 1230 |
+
wiki_path = data_dir / "tag_wiki_defs.json"
|
| 1231 |
+
defs: Dict[str, str] = {}
|
| 1232 |
+
if wiki_path.is_file():
|
| 1233 |
+
try:
|
| 1234 |
+
with wiki_path.open("r", encoding="utf-8") as f:
|
| 1235 |
+
defs = _json.load(f)
|
| 1236 |
+
except Exception:
|
| 1237 |
+
defs = {}
|
| 1238 |
+
_cached_runtime_probe_wiki_defs = defs
|
| 1239 |
+
return defs
|
| 1240 |
+
|
| 1241 |
+
|
| 1242 |
+
def _load_runtime_probe_tags(log=None) -> List[str]:
|
| 1243 |
+
"""Load runtime probe tags from analysis output.
|
| 1244 |
+
|
| 1245 |
+
Preference order:
|
| 1246 |
+
1) selected_final=1 (reliability-gated list)
|
| 1247 |
+
2) selected_initial=1 (fallback if reliability file not built)
|
| 1248 |
+
"""
|
| 1249 |
+
global _cached_runtime_probe_tags
|
| 1250 |
+
if _cached_runtime_probe_tags is not None:
|
| 1251 |
+
return _cached_runtime_probe_tags
|
| 1252 |
+
|
| 1253 |
+
rows = _load_runtime_probe_rows(log=log)
|
| 1254 |
+
tags: List[str] = []
|
| 1255 |
+
|
| 1256 |
+
def _is_on(v: str) -> bool:
|
| 1257 |
+
return (v or "").strip() in {"1", "true", "True"}
|
| 1258 |
+
|
| 1259 |
+
final = [r.get("tag", "").strip() for r in rows if _is_on(r.get("selected_final", ""))]
|
| 1260 |
+
initial = [r.get("tag", "").strip() for r in rows if _is_on(r.get("selected_initial", ""))]
|
| 1261 |
+
tags = [t for t in (final if final else initial) if t]
|
| 1262 |
+
|
| 1263 |
+
_cached_runtime_probe_tags = tags
|
| 1264 |
+
if log and tags:
|
| 1265 |
+
log(f"Stage3p: loaded {len(tags)} probe tags")
|
| 1266 |
+
return tags
|
| 1267 |
+
|
| 1268 |
+
|
| 1269 |
+
def _is_real_wiki_def(text: str) -> bool:
|
| 1270 |
+
t = (text or "").strip()
|
| 1271 |
+
if not t:
|
| 1272 |
+
return False
|
| 1273 |
+
if t.lower().startswith("thumb "):
|
| 1274 |
+
return False
|
| 1275 |
+
return len(t) >= 20
|
| 1276 |
+
|
| 1277 |
+
|
| 1278 |
+
def _clean_glossary_text(text: str) -> str:
|
| 1279 |
+
t = " ".join((text or "").replace("\n", " ").replace("\r", " ").split())
|
| 1280 |
+
if len(t) > 160:
|
| 1281 |
+
t = t[:157].rstrip() + "..."
|
| 1282 |
+
return t
|
| 1283 |
+
|
| 1284 |
+
|
| 1285 |
+
def _build_probe_candidate_display(probe_tags: Sequence[str], log=None) -> Dict[str, str]:
|
| 1286 |
+
rows = _load_runtime_probe_rows(log=log)
|
| 1287 |
+
rows_by_tag = {r.get("tag", "").strip(): r for r in rows}
|
| 1288 |
+
wiki_defs = _load_runtime_probe_wiki_defs()
|
| 1289 |
+
|
| 1290 |
+
display: Dict[str, str] = {}
|
| 1291 |
+
for tag in probe_tags:
|
| 1292 |
+
base = _display_tag(tag)
|
| 1293 |
+
row = rows_by_tag.get(tag, {})
|
| 1294 |
+
needs_glossary = (row.get("needs_glossary", "") or "").strip() in {"1", "true", "True"}
|
| 1295 |
+
if not needs_glossary:
|
| 1296 |
+
display[tag] = base
|
| 1297 |
+
continue
|
| 1298 |
+
|
| 1299 |
+
raw_def = wiki_defs.get(tag, "")
|
| 1300 |
+
if not _is_real_wiki_def(raw_def):
|
| 1301 |
+
raw_def = _PROBE_GLOSSARY_FALLBACKS.get(tag, "")
|
| 1302 |
+
gloss = _clean_glossary_text(raw_def)
|
| 1303 |
+
display[tag] = f"{base} - {gloss}" if gloss else base
|
| 1304 |
+
|
| 1305 |
+
return display
|
| 1306 |
+
|
| 1307 |
+
|
| 1308 |
def llm_infer_probe_tags(
|
| 1309 |
+
query_text: str,
|
| 1310 |
+
log=None,
|
| 1311 |
+
*,
|
| 1312 |
+
temperature: float = 0.0,
|
| 1313 |
+
max_tokens: int = 512,
|
| 1314 |
+
retries: int = 2,
|
| 1315 |
+
min_why: Optional[str] = None,
|
| 1316 |
) -> List[str]:
|
| 1317 |
+
"""Infer probe tags from a fixed reliability-gated tag list."""
|
| 1318 |
+
probe_tags = _load_runtime_probe_tags(log=log)
|
| 1319 |
+
if not probe_tags:
|
| 1320 |
+
return []
|
| 1321 |
+
|
| 1322 |
if log:
|
| 1323 |
+
log(f"Stage3p: probing {len(probe_tags)} tags")
|
| 1324 |
+
candidate_display = _build_probe_candidate_display(probe_tags, log=log)
|
| 1325 |
+
|
| 1326 |
+
out = llm_select_indices(
|
| 1327 |
+
query_text=query_text,
|
| 1328 |
+
candidates=probe_tags,
|
| 1329 |
+
max_pick=len(probe_tags),
|
| 1330 |
+
log=log,
|
| 1331 |
+
retries=retries,
|
| 1332 |
+
mode="single_shot",
|
| 1333 |
+
chunk_size=max(1, len(probe_tags)),
|
| 1334 |
+
per_phrase_k=max(1, len(probe_tags)),
|
| 1335 |
+
temperature=temperature,
|
| 1336 |
+
max_tokens=max_tokens,
|
| 1337 |
+
return_metadata=False,
|
| 1338 |
+
return_diagnostics=False,
|
| 1339 |
+
min_why=None,
|
| 1340 |
candidate_display=candidate_display,
|
| 1341 |
)
|
| 1342 |
+
|
| 1343 |
+
selected: List[str] = []
|
| 1344 |
+
for i in out:
|
| 1345 |
+
if 0 <= i < len(probe_tags):
|
| 1346 |
+
selected.append(probe_tags[i])
|
| 1347 |
+
|
| 1348 |
+
if log:
|
| 1349 |
+
shown = ", ".join(selected) if selected else "(none)"
|
| 1350 |
+
log(f"Stage3p: selected {len(selected)} probe tags: {shown}")
|
| 1351 |
+
return selected
|
psq_rag/retrieval/psq_retrieval.py
CHANGED
|
@@ -146,7 +146,7 @@ def psq_candidates_from_rewrite_phrases(
|
|
| 146 |
context_tag_weight: float = 1.0,
|
| 147 |
context_weight: float = 0.5,
|
| 148 |
per_phrase_k: int = 50,
|
| 149 |
-
per_phrase_final_k: int =
|
| 150 |
global_k: int = 300,
|
| 151 |
return_phrase_ranks: bool = False,
|
| 152 |
verbose: bool = False,
|
|
|
|
| 146 |
context_tag_weight: float = 1.0,
|
| 147 |
context_weight: float = 0.5,
|
| 148 |
per_phrase_k: int = 50,
|
| 149 |
+
per_phrase_final_k: int = 1,
|
| 150 |
global_k: int = 300,
|
| 151 |
return_phrase_ranks: bool = False,
|
| 152 |
verbose: bool = False,
|
psq_rag/ui/group_ranked_display.py
ADDED
|
@@ -0,0 +1,198 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import csv
|
| 4 |
+
from dataclasses import dataclass
|
| 5 |
+
from functools import lru_cache
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
from typing import Dict, List, Sequence, Tuple
|
| 8 |
+
|
| 9 |
+
import numpy as np
|
| 10 |
+
|
| 11 |
+
from psq_rag.retrieval.psq_retrieval import construct_pseudo_vector, _norm_tag_for_lookup
|
| 12 |
+
from psq_rag.retrieval.state import get_tfidf_components, get_tfidf_tag_vectors
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
@dataclass
|
| 16 |
+
class GroupRankingRow:
|
| 17 |
+
group_name: str
|
| 18 |
+
expected_count: float
|
| 19 |
+
tags: List[Tuple[str, float]]
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
@lru_cache(maxsize=1)
|
| 23 |
+
def _load_enabled_groups() -> Dict[str, List[str]]:
|
| 24 |
+
csv_path = Path("data/analysis/category_registry.csv")
|
| 25 |
+
groups: Dict[str, List[str]] = {}
|
| 26 |
+
if not csv_path.exists():
|
| 27 |
+
return groups
|
| 28 |
+
|
| 29 |
+
with csv_path.open("r", encoding="utf-8", newline="") as f:
|
| 30 |
+
reader = csv.DictReader(f)
|
| 31 |
+
for row in reader:
|
| 32 |
+
tag = (row.get("tag") or "").strip()
|
| 33 |
+
if not tag:
|
| 34 |
+
continue
|
| 35 |
+
|
| 36 |
+
enabled = str(row.get("category_enabled") or "").strip().lower() in {"1", "true", "yes"}
|
| 37 |
+
if not enabled:
|
| 38 |
+
continue
|
| 39 |
+
|
| 40 |
+
status = (row.get("category_status") or "").strip().lower()
|
| 41 |
+
if status == "excluded":
|
| 42 |
+
continue
|
| 43 |
+
|
| 44 |
+
group_name = (row.get("category_name") or "").strip()
|
| 45 |
+
if not group_name:
|
| 46 |
+
continue
|
| 47 |
+
|
| 48 |
+
groups.setdefault(group_name, []).append(_norm_tag_for_lookup(tag))
|
| 49 |
+
|
| 50 |
+
# Deduplicate per group, preserving order.
|
| 51 |
+
deduped: Dict[str, List[str]] = {}
|
| 52 |
+
for name, tags in groups.items():
|
| 53 |
+
seen = set()
|
| 54 |
+
out = []
|
| 55 |
+
for t in tags:
|
| 56 |
+
if t in seen:
|
| 57 |
+
continue
|
| 58 |
+
seen.add(t)
|
| 59 |
+
out.append(t)
|
| 60 |
+
if out:
|
| 61 |
+
deduped[name] = out
|
| 62 |
+
return deduped
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def _calibrate_probabilities(scores: Dict[str, float]) -> Dict[str, float]:
|
| 66 |
+
if not scores:
|
| 67 |
+
return {}
|
| 68 |
+
vals = np.asarray(list(scores.values()), dtype=np.float32)
|
| 69 |
+
center = float(np.median(vals))
|
| 70 |
+
q25 = float(np.percentile(vals, 25))
|
| 71 |
+
q75 = float(np.percentile(vals, 75))
|
| 72 |
+
scale = q75 - q25
|
| 73 |
+
if scale <= 1e-6:
|
| 74 |
+
scale = float(np.std(vals))
|
| 75 |
+
if scale <= 1e-6:
|
| 76 |
+
scale = 1.0
|
| 77 |
+
|
| 78 |
+
probs: Dict[str, float] = {}
|
| 79 |
+
for tag, score in scores.items():
|
| 80 |
+
z = (float(score) - center) / scale
|
| 81 |
+
p = 1.0 / (1.0 + float(np.exp(-z)))
|
| 82 |
+
probs[tag] = p
|
| 83 |
+
return probs
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
def rank_groups_from_tfidf(
|
| 87 |
+
seed_terms: Sequence[str],
|
| 88 |
+
*,
|
| 89 |
+
top_groups: int,
|
| 90 |
+
top_tags_per_group: int,
|
| 91 |
+
group_rank_top_k: int,
|
| 92 |
+
) -> List[GroupRankingRow]:
|
| 93 |
+
groups = _load_enabled_groups()
|
| 94 |
+
if not groups:
|
| 95 |
+
return []
|
| 96 |
+
|
| 97 |
+
components = get_tfidf_components()
|
| 98 |
+
tag_vectors = get_tfidf_tag_vectors()
|
| 99 |
+
idf = components["idf"]
|
| 100 |
+
term_to_col = components["tag_to_column_index"]
|
| 101 |
+
svd = components["svd_model"]
|
| 102 |
+
tag_to_row = tag_vectors["tag_to_row_index"]
|
| 103 |
+
mat_norm = tag_vectors["reduced_matrix_norm"]
|
| 104 |
+
|
| 105 |
+
pseudo_doc: Dict[str, float] = {}
|
| 106 |
+
for term in seed_terms:
|
| 107 |
+
key = _norm_tag_for_lookup(str(term))
|
| 108 |
+
if key in term_to_col:
|
| 109 |
+
pseudo_doc[key] = pseudo_doc.get(key, 0.0) + 1.0
|
| 110 |
+
if not pseudo_doc:
|
| 111 |
+
return []
|
| 112 |
+
|
| 113 |
+
pseudo_vec = construct_pseudo_vector(pseudo_doc, idf, term_to_col)
|
| 114 |
+
q = svd.transform(pseudo_vec).reshape(-1).astype(np.float32)
|
| 115 |
+
qn = float(np.linalg.norm(q))
|
| 116 |
+
if qn <= 0.0:
|
| 117 |
+
return []
|
| 118 |
+
q = q / qn
|
| 119 |
+
|
| 120 |
+
all_tags: List[str] = []
|
| 121 |
+
for tags in groups.values():
|
| 122 |
+
all_tags.extend(tags)
|
| 123 |
+
all_tags = list(dict.fromkeys(all_tags))
|
| 124 |
+
|
| 125 |
+
scored_tags: List[str] = []
|
| 126 |
+
rows: List[int] = []
|
| 127 |
+
for tag in all_tags:
|
| 128 |
+
idx = tag_to_row.get(tag)
|
| 129 |
+
if idx is None:
|
| 130 |
+
continue
|
| 131 |
+
scored_tags.append(tag)
|
| 132 |
+
rows.append(int(idx))
|
| 133 |
+
if not rows:
|
| 134 |
+
return []
|
| 135 |
+
|
| 136 |
+
sims = (mat_norm[np.asarray(rows, dtype=np.int32)] @ q).astype(np.float32)
|
| 137 |
+
score_by_tag: Dict[str, float] = {t: float(s) for t, s in zip(scored_tags, sims)}
|
| 138 |
+
prob_by_tag = _calibrate_probabilities(score_by_tag)
|
| 139 |
+
|
| 140 |
+
rows_out: List[GroupRankingRow] = []
|
| 141 |
+
rank_k = max(1, int(group_rank_top_k))
|
| 142 |
+
display_k = max(1, int(top_tags_per_group))
|
| 143 |
+
|
| 144 |
+
for group_name, tags in groups.items():
|
| 145 |
+
scored = [(t, prob_by_tag[t]) for t in tags if t in prob_by_tag]
|
| 146 |
+
if not scored:
|
| 147 |
+
continue
|
| 148 |
+
scored.sort(key=lambda x: x[1], reverse=True)
|
| 149 |
+
expected = float(sum(p for _, p in scored[:rank_k]))
|
| 150 |
+
rows_out.append(
|
| 151 |
+
GroupRankingRow(
|
| 152 |
+
group_name=group_name,
|
| 153 |
+
expected_count=expected,
|
| 154 |
+
tags=scored[:display_k],
|
| 155 |
+
)
|
| 156 |
+
)
|
| 157 |
+
|
| 158 |
+
rows_out.sort(key=lambda r: r.expected_count, reverse=True)
|
| 159 |
+
return rows_out[: max(1, int(top_groups))]
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
def _fmt_tag_cell(tag: str, p: float) -> str:
|
| 163 |
+
safe_tag = tag.replace("|", "\\|")
|
| 164 |
+
return f"`{safe_tag}` (p={p:.2f}, E={p:.2f})"
|
| 165 |
+
|
| 166 |
+
|
| 167 |
+
def render_group_rankings_markdown(
|
| 168 |
+
seed_terms: Sequence[str],
|
| 169 |
+
*,
|
| 170 |
+
top_groups: int,
|
| 171 |
+
top_tags_per_group: int,
|
| 172 |
+
group_rank_top_k: int,
|
| 173 |
+
) -> str:
|
| 174 |
+
rows = rank_groups_from_tfidf(
|
| 175 |
+
seed_terms,
|
| 176 |
+
top_groups=top_groups,
|
| 177 |
+
top_tags_per_group=top_tags_per_group,
|
| 178 |
+
group_rank_top_k=group_rank_top_k,
|
| 179 |
+
)
|
| 180 |
+
if not rows:
|
| 181 |
+
return "No ranked group display available (insufficient TF-IDF context)."
|
| 182 |
+
|
| 183 |
+
k = max(1, int(top_tags_per_group))
|
| 184 |
+
headers = ["Group/Category", f"Expected Tags (top {max(1, int(group_rank_top_k))})"]
|
| 185 |
+
headers.extend([f"Tag {i}" for i in range(1, k + 1)])
|
| 186 |
+
table = [
|
| 187 |
+
"| " + " | ".join(headers) + " |",
|
| 188 |
+
"| " + " | ".join(["---"] * len(headers)) + " |",
|
| 189 |
+
]
|
| 190 |
+
|
| 191 |
+
for row in rows:
|
| 192 |
+
cells = [row.group_name, f"{row.expected_count:.2f}"]
|
| 193 |
+
tag_cells = [_fmt_tag_cell(tag, p) for tag, p in row.tags]
|
| 194 |
+
if len(tag_cells) < k:
|
| 195 |
+
tag_cells.extend([""] * (k - len(tag_cells)))
|
| 196 |
+
cells.extend(tag_cells)
|
| 197 |
+
table.append("| " + " | ".join(cells) + " |")
|
| 198 |
+
return "\n".join(table)
|
scripts/eval_pipeline.py
CHANGED
|
@@ -582,7 +582,7 @@ def run_eval(
|
|
| 582 |
mode: str = "chunked_map_union",
|
| 583 |
chunk_size: int = 60,
|
| 584 |
per_phrase_k: int = 2,
|
| 585 |
-
per_phrase_final_k: int =
|
| 586 |
temperature: float = 0.0,
|
| 587 |
max_tokens: int = 512,
|
| 588 |
verbose: bool = False,
|
|
@@ -982,7 +982,7 @@ def main(argv=None) -> int:
|
|
| 982 |
choices=["single_shot", "chunked_map_union"])
|
| 983 |
ap.add_argument("--chunk-size", type=int, default=60)
|
| 984 |
ap.add_argument("--per-phrase-k", type=int, default=2)
|
| 985 |
-
ap.add_argument("--per-phrase-final-k", type=int, default=
|
| 986 |
help="Top-K candidates per phrase after scoring (retrieval cap)")
|
| 987 |
ap.add_argument("--temperature", type=float, default=0.0)
|
| 988 |
ap.add_argument("--max-tokens", type=int, default=512)
|
|
|
|
| 582 |
mode: str = "chunked_map_union",
|
| 583 |
chunk_size: int = 60,
|
| 584 |
per_phrase_k: int = 2,
|
| 585 |
+
per_phrase_final_k: int = 1,
|
| 586 |
temperature: float = 0.0,
|
| 587 |
max_tokens: int = 512,
|
| 588 |
verbose: bool = False,
|
|
|
|
| 982 |
choices=["single_shot", "chunked_map_union"])
|
| 983 |
ap.add_argument("--chunk-size", type=int, default=60)
|
| 984 |
ap.add_argument("--per-phrase-k", type=int, default=2)
|
| 985 |
+
ap.add_argument("--per-phrase-final-k", type=int, default=1,
|
| 986 |
help="Top-K candidates per phrase after scoring (retrieval cap)")
|
| 987 |
ap.add_argument("--temperature", type=float, default=0.0)
|
| 988 |
ap.add_argument("--max-tokens", type=int, default=512)
|