Spaces:
Running
Running
Food Desert commited on
Commit ·
33fc1b0
1
Parent(s): a48a025
Refine tag toggle UI ordering/colors and add category assignment analysis artifacts
Browse files
app.py
CHANGED
|
@@ -1,144 +1,409 @@
|
|
| 1 |
-
import gradio as gr
|
| 2 |
-
import os
|
| 3 |
-
import logging
|
| 4 |
-
import time
|
| 5 |
-
import json
|
| 6 |
-
|
| 7 |
-
from
|
| 8 |
-
from
|
| 9 |
-
from
|
| 10 |
-
from
|
| 11 |
-
|
| 12 |
-
from
|
| 13 |
-
|
| 14 |
-
from psq_rag.
|
| 15 |
-
from psq_rag.llm.
|
| 16 |
-
from psq_rag.retrieval.
|
| 17 |
-
from psq_rag.
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
|
|
|
|
|
|
| 43 |
def _display_tag_text(tag: str) -> str:
|
| 44 |
return tag.replace("_", " ")
|
| 45 |
|
| 46 |
|
| 47 |
-
def
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
)
|
| 53 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
seen: Set[str] = set()
|
| 58 |
-
for row in row_defs:
|
| 59 |
-
for tag in row.get("tags", []):
|
| 60 |
-
if tag in selected and tag not in seen:
|
| 61 |
-
out.append(tag)
|
| 62 |
-
seen.add(tag)
|
| 63 |
-
# Fallback for any selected tags not present in current rows.
|
| 64 |
-
for tag in sorted(selected):
|
| 65 |
-
if tag not in seen:
|
| 66 |
-
out.append(tag)
|
| 67 |
-
seen.add(tag)
|
| 68 |
-
return out
|
| 69 |
-
|
| 70 |
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
|
| 76 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
def _build_toggle_rows(
|
| 78 |
*,
|
| 79 |
seed_terms: List[str],
|
| 80 |
-
|
|
|
|
|
|
|
| 81 |
top_groups: int,
|
| 82 |
top_tags_per_group: int,
|
| 83 |
group_rank_top_k: int,
|
| 84 |
) -> List[Dict[str, Any]]:
|
| 85 |
-
ranked_rows = rank_groups_from_tfidf(
|
| 86 |
-
seed_terms=seed_terms,
|
| 87 |
-
top_groups=max(1, int(top_groups)),
|
| 88 |
-
top_tags_per_group=max(1, int(top_tags_per_group)),
|
| 89 |
-
group_rank_top_k=max(1, int(group_rank_top_k)),
|
| 90 |
-
)
|
| 91 |
groups_map = _load_enabled_groups()
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
}
|
| 99 |
-
tags_in_any_displayed_group: Set[str] = set()
|
| 100 |
-
for tag_set in displayed_group_tag_sets.values():
|
| 101 |
-
tags_in_any_displayed_group.update(tag_set)
|
| 102 |
-
|
| 103 |
-
llm_other = [t for t in llm_selected if t not in tags_in_any_displayed_group]
|
| 104 |
row_defs.append(
|
| 105 |
{
|
| 106 |
-
"name": "
|
| 107 |
-
"label": "
|
| 108 |
-
"tags":
|
|
|
|
| 109 |
}
|
| 110 |
)
|
| 111 |
|
| 112 |
for row in ranked_rows:
|
| 113 |
group_name = row.group_name
|
| 114 |
group_tag_set = displayed_group_tag_sets.get(group_name, set())
|
| 115 |
-
|
| 116 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
merged = selected_in_group + [t for t in ranked_tags if t not in selected_in_group]
|
| 118 |
keep_n = max(max(1, int(top_tags_per_group)), len(selected_in_group))
|
| 119 |
merged = merged[:keep_n]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
row_defs.append(
|
| 121 |
{
|
| 122 |
"name": group_name,
|
| 123 |
"label": f"{group_name} (E={row.expected_count:.2f})",
|
| 124 |
"tags": merged,
|
|
|
|
| 125 |
}
|
| 126 |
)
|
| 127 |
-
|
| 128 |
-
return row_defs
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
def
|
| 132 |
-
row_defs: List[Dict[str, Any]],
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
if idx < len(row_defs):
|
| 143 |
row = row_defs[idx]
|
| 144 |
tags = list(dict.fromkeys(row.get("tags", [])))
|
|
@@ -146,716 +411,993 @@ def _build_row_component_updates(
|
|
| 146 |
row_values_state.append(values)
|
| 147 |
visible = bool(tags)
|
| 148 |
header_updates.append(gr.update(value=f"**{row.get('label', '')}**", visible=visible))
|
| 149 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
checkbox_updates.append(
|
| 151 |
gr.update(
|
| 152 |
choices=choices,
|
| 153 |
value=values,
|
| 154 |
visible=visible,
|
| 155 |
-
)
|
| 156 |
-
)
|
| 157 |
-
else:
|
| 158 |
-
header_updates.append(gr.update(value="", visible=False))
|
| 159 |
-
checkbox_updates.append(gr.update(choices=[], value=[], visible=False))
|
| 160 |
-
|
| 161 |
-
prompt_text = _compose_toggle_prompt_text(list(selected), row_defs)
|
| 162 |
-
return prompt_text, row_values_state, header_updates, checkbox_updates
|
| 163 |
-
|
| 164 |
-
|
| 165 |
def _on_toggle_row(
|
| 166 |
row_idx: int,
|
| 167 |
changed_values: List[str],
|
| 168 |
selected_tags_state: List[str],
|
| 169 |
-
row_defs_state: List[Dict[str, Any]],
|
| 170 |
-
row_values_state: List[List[str]],
|
| 171 |
-
max_rows: int,
|
| 172 |
):
|
| 173 |
row_defs = row_defs_state or []
|
| 174 |
selected = set(selected_tags_state or [])
|
| 175 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
|
| 177 |
-
|
| 178 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
|
| 191 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
console_text,
|
| 208 |
-
legacy_prompt_text,
|
| 209 |
-
prompt_text,
|
| 210 |
-
sorted(set(selected_tags or [])),
|
| 211 |
-
row_defs,
|
| 212 |
-
row_values_state,
|
| 213 |
-
*header_updates,
|
| 214 |
-
*checkbox_updates,
|
| 215 |
-
]
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
def _build_selection_query(
|
| 219 |
-
prompt_in: str,
|
| 220 |
-
rewritten: str,
|
| 221 |
-
structural_tags: List[str],
|
| 222 |
-
probe_tags: List[str],
|
| 223 |
-
) -> str:
|
| 224 |
-
lines = [f"IMAGE DESCRIPTION: {prompt_in.strip()}"]
|
| 225 |
-
if rewritten and rewritten.strip():
|
| 226 |
-
lines.append(f"REWRITE PHRASES: {rewritten.strip()}")
|
| 227 |
-
hint_tags = []
|
| 228 |
-
if structural_tags:
|
| 229 |
-
hint_tags.extend(structural_tags)
|
| 230 |
-
if probe_tags:
|
| 231 |
-
hint_tags.extend(probe_tags)
|
| 232 |
-
if hint_tags:
|
| 233 |
-
# Keep hints as context only; selection still must choose by candidate indices.
|
| 234 |
-
lines.append(
|
| 235 |
-
"INFERRED TAG HINTS (context only): " + ", ".join(sorted(set(hint_tags)))
|
| 236 |
-
)
|
| 237 |
-
return "\n".join(lines)
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
# Set up logging
|
| 241 |
-
# Minimal prod logging: warnings+ to stderr, no file by default
|
| 242 |
-
import os, logging
|
| 243 |
-
|
| 244 |
-
LOG_LEVEL = os.environ.get("PSQ_LOG_LEVEL", "WARNING").upper()
|
| 245 |
-
logging.basicConfig(
|
| 246 |
-
level=getattr(logging, LOG_LEVEL, logging.WARNING),
|
| 247 |
-
format="%(asctime)s %(levelname)s:%(message)s",
|
| 248 |
-
handlers=[logging.StreamHandler()] # no file -> avoids huge logs on Spaces
|
| 249 |
-
)
|
| 250 |
-
|
| 251 |
-
# Quiet down common noisy libs (optional)
|
| 252 |
-
for _name in ("gensim", "gradio", "hnswlib", "httpx", "uvicorn"):
|
| 253 |
-
logging.getLogger(_name).setLevel(logging.ERROR)
|
| 254 |
-
|
| 255 |
-
# Turn off Gradio analytics phone-home to avoid those background thread errors (optional)
|
| 256 |
-
os.environ["GRADIO_ANALYTICS_ENABLED"] = "0"
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
MASCOT_DIR = Path(__file__).parent / "mascotimages"
|
| 260 |
-
MASCOT_FILE = MASCOT_DIR / "transparentsquirrel.png"
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
def _load_mascot_image():
|
| 264 |
-
"""Load mascot image if available; return None when missing/unreadable."""
|
| 265 |
-
if not MASCOT_FILE.exists():
|
| 266 |
-
logging.warning("Mascot image missing: %s", MASCOT_FILE)
|
| 267 |
-
return None
|
| 268 |
-
try:
|
| 269 |
-
return Image.open(MASCOT_FILE).convert("RGBA")
|
| 270 |
-
except Exception as e:
|
| 271 |
-
logging.warning("Failed to load mascot image (%s): %s", MASCOT_FILE, e)
|
| 272 |
-
return None
|
| 273 |
-
|
| 274 |
-
try:
|
| 275 |
-
from gradio_client import utils as _gc_utils
|
| 276 |
-
|
| 277 |
-
_orig_get_type = _gc_utils.get_type
|
| 278 |
-
_orig_j2p = _gc_utils._json_schema_to_python_type
|
| 279 |
-
_orig_pub = _gc_utils.json_schema_to_python_type
|
| 280 |
-
|
| 281 |
-
def _get_type_safe(schema):
|
| 282 |
-
# Sometimes schema is a bare True/False (JSON Schema boolean form)
|
| 283 |
-
if not isinstance(schema, dict):
|
| 284 |
-
return "any"
|
| 285 |
-
return _orig_get_type(schema)
|
| 286 |
-
|
| 287 |
-
def _j2p_safe(schema, defs=None):
|
| 288 |
-
# Accept non-dict schemas (True/False/None) and treat as "any"
|
| 289 |
-
if not isinstance(schema, dict):
|
| 290 |
-
return "any"
|
| 291 |
-
return _orig_j2p(schema, defs or schema.get("$defs"))
|
| 292 |
-
|
| 293 |
-
def _pub_safe(schema):
|
| 294 |
-
# Public wrapper used by Gradio; keep it resilient too
|
| 295 |
-
if not isinstance(schema, dict):
|
| 296 |
-
return "any"
|
| 297 |
-
return _j2p_safe(schema, schema.get("$defs"))
|
| 298 |
-
|
| 299 |
-
_gc_utils.get_type = _get_type_safe
|
| 300 |
-
_gc_utils._json_schema_to_python_type = _j2p_safe
|
| 301 |
-
_gc_utils.json_schema_to_python_type = _pub_safe
|
| 302 |
-
|
| 303 |
-
except Exception as e:
|
| 304 |
-
print("gradio_client hotfix not applied:", e)
|
| 305 |
-
# -------------------------------------------------------------------------------
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
allow_nsfw_tags = False
|
| 309 |
-
def _is_production_runtime() -> bool:
|
| 310 |
-
"""Best-effort detection for deployed runtime (HF Spaces or explicit env)."""
|
| 311 |
-
if os.environ.get("PSQ_PRODUCTION", "").strip().lower() in {"1", "true", "yes"}:
|
| 312 |
-
return True
|
| 313 |
-
if os.environ.get("SPACE_ID"):
|
| 314 |
-
return True
|
| 315 |
-
if os.environ.get("HF_SPACE_ID"):
|
| 316 |
-
return True
|
| 317 |
-
if os.environ.get("SYSTEM") == "spaces":
|
| 318 |
-
return True
|
| 319 |
-
return False
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
verbose_retrieval_default = "0" if _is_production_runtime() else "1"
|
| 323 |
-
verbose_retrieval = os.environ.get("PSQ_VERBOSE_RETRIEVAL", verbose_retrieval_default).strip().lower() in {"1", "true", "yes"}
|
| 324 |
-
verbose_retrieval_all = False
|
| 325 |
-
verbose_retrieval_limit = 20
|
| 326 |
-
enable_probe_tags = os.environ.get("PSQ_ENABLE_PROBE", "1").strip() not in {"0", "false", "False"}
|
| 327 |
-
display_top_groups_default = int(os.environ.get("PSQ_DISPLAY_TOP_GROUPS", "10"))
|
| 328 |
-
display_top_tags_per_group_default = int(os.environ.get("PSQ_DISPLAY_TOP_TAGS_PER_GROUP", "5"))
|
| 329 |
-
display_rank_top_k_default = int(os.environ.get("PSQ_DISPLAY_GROUP_RANK_TOP_K", "5"))
|
| 330 |
-
display_max_rows_default = int(os.environ.get("PSQ_DISPLAY_MAX_ROWS", "14"))
|
| 331 |
-
retrieval_global_k = int(os.environ.get("PSQ_RETRIEVAL_GLOBAL_K", "300"))
|
| 332 |
-
retrieval_per_phrase_k = int(os.environ.get("PSQ_RETRIEVAL_PER_PHRASE_K", "10"))
|
| 333 |
-
retrieval_per_phrase_final_k = int(os.environ.get("PSQ_RETRIEVAL_PER_PHRASE_FINAL_K", "1"))
|
| 334 |
-
selection_mode = os.environ.get("PSQ_SELECTION_MODE", "chunked_map_union").strip()
|
| 335 |
-
selection_chunk_size = int(os.environ.get("PSQ_SELECTION_CHUNK_SIZE", "60"))
|
| 336 |
-
selection_per_phrase_k = int(os.environ.get("PSQ_SELECTION_PER_PHRASE_K", "2"))
|
| 337 |
-
selection_candidate_cap = int(os.environ.get("PSQ_SELECTION_CANDIDATE_CAP", "0"))
|
| 338 |
-
stage1_rewrite_timeout_s = float(os.environ.get("PSQ_TIMEOUT_REWRITE_S", "45"))
|
| 339 |
-
stage1_struct_timeout_s = float(os.environ.get("PSQ_TIMEOUT_STRUCT_S", "45"))
|
| 340 |
-
stage1_probe_timeout_s = float(os.environ.get("PSQ_TIMEOUT_PROBE_S", "45"))
|
| 341 |
-
stage3_select_timeout_s = float(os.environ.get("PSQ_TIMEOUT_SELECT_S", "45"))
|
| 342 |
-
timing_log_path = Path(os.environ.get("PSQ_TIMING_LOG_PATH", "data/runtime_metrics/ui_pipeline_timings.jsonl"))
|
| 343 |
|
| 344 |
-
|
| 345 |
-
.
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 356 |
}
|
| 357 |
|
| 358 |
-
/*
|
| 359 |
-
.
|
| 360 |
-
|
| 361 |
-
|
|
|
|
|
|
|
|
|
|
| 362 |
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
max-height: 610px; /* was 420px; tweak to taste */
|
| 367 |
}
|
| 368 |
|
| 369 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 370 |
display: flex;
|
| 371 |
flex-wrap: wrap;
|
| 372 |
gap: 8px;
|
|
|
|
| 373 |
}
|
| 374 |
|
| 375 |
-
.
|
| 376 |
-
|
| 377 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 378 |
}
|
| 379 |
|
| 380 |
-
.
|
| 381 |
-
|
|
|
|
|
|
|
|
|
|
| 382 |
}
|
| 383 |
|
| 384 |
-
.
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
line-height: 1.2;
|
| 393 |
-
cursor: pointer;
|
| 394 |
-
user-select: none;
|
| 395 |
-
box-shadow: 0 1px 0 rgba(0,0,0,0.12), inset 0 1px 0 rgba(255,255,255,0.7);
|
| 396 |
-
}
|
| 397 |
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
| 401 |
-
|
| 402 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 403 |
}
|
| 404 |
"""
|
| 405 |
|
| 406 |
|
| 407 |
def rag_pipeline_ui(
|
| 408 |
-
user_prompt: str,
|
| 409 |
-
display_top_groups: float,
|
| 410 |
-
display_top_tags_per_group: float,
|
| 411 |
-
display_rank_top_k: float,
|
| 412 |
-
):
|
| 413 |
-
logs = []
|
| 414 |
-
def log(s): logs.append(s)
|
| 415 |
-
|
| 416 |
-
try:
|
| 417 |
-
stage_timings = {}
|
| 418 |
-
|
| 419 |
-
def _record_timing(stage: str, dt_s: float):
|
| 420 |
-
stage_timings[stage] = float(dt_s)
|
| 421 |
-
|
| 422 |
-
def _emit_timing_summary(total_s: float):
|
| 423 |
-
summary_order = [
|
| 424 |
-
"preprocess",
|
| 425 |
-
"rewrite",
|
| 426 |
-
"structural",
|
| 427 |
-
"probe",
|
| 428 |
-
"retrieval",
|
| 429 |
-
"selection",
|
| 430 |
-
"implication_expansion",
|
| 431 |
-
"prompt_composition",
|
| 432 |
-
"group_display",
|
| 433 |
-
]
|
| 434 |
-
lines = []
|
| 435 |
-
for k in summary_order:
|
| 436 |
-
if k in stage_timings:
|
| 437 |
-
lines.append(f"{k}={stage_timings[k]:.2f}s")
|
| 438 |
-
slowest = max(stage_timings.items(), key=lambda kv: kv[1])[0] if stage_timings else "n/a"
|
| 439 |
-
log("Timing Summary: " + ", ".join(lines))
|
| 440 |
-
log(f"Timing Slowest Stage: {slowest}")
|
| 441 |
-
log(f"Timing Total: {total_s:.2f}s")
|
| 442 |
-
|
| 443 |
-
def _append_timing_jsonl(total_s: float):
|
| 444 |
-
try:
|
| 445 |
-
timing_log_path.parent.mkdir(parents=True, exist_ok=True)
|
| 446 |
-
rec = {
|
| 447 |
-
"timestamp_utc": datetime.utcnow().isoformat(timespec="seconds") + "Z",
|
| 448 |
-
"stages_s": stage_timings,
|
| 449 |
-
"total_s": float(total_s),
|
| 450 |
-
"config": {
|
| 451 |
-
"timeout_rewrite_s": stage1_rewrite_timeout_s,
|
| 452 |
-
"timeout_struct_s": stage1_struct_timeout_s,
|
| 453 |
-
"timeout_probe_s": stage1_probe_timeout_s,
|
| 454 |
-
"timeout_select_s": stage3_select_timeout_s,
|
| 455 |
-
},
|
| 456 |
-
}
|
| 457 |
-
with timing_log_path.open("a", encoding="utf-8") as f:
|
| 458 |
-
f.write(json.dumps(rec, ensure_ascii=True) + "\n")
|
| 459 |
-
log(f"Timing Log: wrote {timing_log_path}")
|
| 460 |
-
except Exception as e:
|
| 461 |
-
log(f"Timing Log: failed ({type(e).__name__}: {e})")
|
| 462 |
-
|
| 463 |
-
def _future_with_timeout(fut, timeout_s: float, stage_name: str, fallback):
|
| 464 |
-
t0 = time.perf_counter()
|
| 465 |
-
try:
|
| 466 |
-
out = fut.result(timeout=max(1.0, float(timeout_s)))
|
| 467 |
-
dt = time.perf_counter() - t0
|
| 468 |
-
log(f"{stage_name}: {dt:.2f}s")
|
| 469 |
-
stage_key = {
|
| 470 |
-
"Rewrite": "rewrite",
|
| 471 |
-
"Structural inference": "structural",
|
| 472 |
-
"Probe inference": "probe",
|
| 473 |
-
"Index selection": "selection",
|
| 474 |
-
}.get(stage_name)
|
| 475 |
-
if stage_key:
|
| 476 |
-
_record_timing(stage_key, dt)
|
| 477 |
-
return out
|
| 478 |
-
except FutureTimeoutError:
|
| 479 |
-
fut.cancel()
|
| 480 |
-
log(f"{stage_name}: timed out after {timeout_s:.0f}s; using fallback")
|
| 481 |
-
return fallback
|
| 482 |
-
except Exception as e:
|
| 483 |
-
log(f"{stage_name}: failed ({type(e).__name__}: {e}); using fallback")
|
| 484 |
-
return fallback
|
| 485 |
-
|
| 486 |
-
t_total0 = time.perf_counter()
|
| 487 |
-
log("Start: received prompt")
|
| 488 |
-
prompt_in = (user_prompt or "").strip()
|
| 489 |
-
if not prompt_in:
|
| 490 |
-
return _build_ui_payload(
|
| 491 |
-
console_text="Error: empty prompt",
|
| 492 |
-
legacy_prompt_text="",
|
| 493 |
-
row_defs=[],
|
| 494 |
-
selected_tags=[],
|
| 495 |
-
)
|
| 496 |
-
|
| 497 |
-
log("Input:")
|
| 498 |
-
log(prompt_in)
|
| 499 |
-
log("")
|
| 500 |
-
log(
|
| 501 |
-
"Runtime config: "
|
| 502 |
-
f"retrieval_global_k={retrieval_global_k} "
|
| 503 |
-
f"retrieval_per_phrase_k={retrieval_per_phrase_k} "
|
| 504 |
-
f"retrieval_per_phrase_final_k={retrieval_per_phrase_final_k} "
|
| 505 |
-
f"selection_mode={selection_mode} "
|
| 506 |
-
f"selection_chunk_size={selection_chunk_size} "
|
| 507 |
-
f"selection_per_phrase_k={selection_per_phrase_k}"
|
| 508 |
-
)
|
| 509 |
-
log("")
|
| 510 |
-
|
| 511 |
-
t0 = time.perf_counter()
|
| 512 |
-
user_tags = extract_user_provided_tags_upto_3_words(prompt_in)
|
| 513 |
-
dt = time.perf_counter()-t0
|
| 514 |
-
_record_timing("preprocess", dt)
|
| 515 |
-
log(f"Preprocess (user tag extraction): {dt:.2f}s")
|
| 516 |
-
log("Heuristically extracted user tags:")
|
| 517 |
-
if user_tags:
|
| 518 |
-
log(", ".join(user_tags))
|
| 519 |
-
else:
|
| 520 |
-
log("(none)")
|
| 521 |
-
log("")
|
| 522 |
-
|
| 523 |
-
log("Step 1: LLM rewrite + structural inference + probe (concurrent)")
|
| 524 |
-
max_workers = 3 if enable_probe_tags else 2
|
| 525 |
-
with ThreadPoolExecutor(max_workers=max_workers) as ex:
|
| 526 |
-
fut_rewrite = ex.submit(llm_rewrite_prompt, prompt_in, log)
|
| 527 |
-
fut_struct = ex.submit(llm_infer_structural_tags, prompt_in, log=log)
|
| 528 |
-
fut_probe = ex.submit(llm_infer_probe_tags, prompt_in, log=log) if enable_probe_tags else None
|
| 529 |
-
|
| 530 |
-
rewritten = _future_with_timeout(
|
| 531 |
-
fut_rewrite, stage1_rewrite_timeout_s, "Rewrite", prompt_in
|
| 532 |
-
)
|
| 533 |
-
structural_tags = _future_with_timeout(
|
| 534 |
-
fut_struct, stage1_struct_timeout_s, "Structural inference", []
|
| 535 |
-
)
|
| 536 |
-
probe_tags = (
|
| 537 |
-
_future_with_timeout(fut_probe, stage1_probe_timeout_s, "Probe inference", [])
|
| 538 |
-
if fut_probe else []
|
| 539 |
-
)
|
| 540 |
-
|
| 541 |
-
log("Rewrite:")
|
| 542 |
-
log(rewritten if rewritten else "(empty)")
|
| 543 |
-
log("")
|
| 544 |
-
|
| 545 |
-
rewrite_for_retrieval = rewritten
|
| 546 |
-
if user_tags:
|
| 547 |
-
# keep them separate in logs, but allow them to help retrieval
|
| 548 |
-
rewrite_for_retrieval = (rewrite_for_retrieval + ", " + ", ".join(user_tags)).strip(", ").strip()
|
| 549 |
-
|
| 550 |
-
|
| 551 |
-
log("Step 2: Prompt Squirrel retrieval (hidden)")
|
| 552 |
-
try:
|
| 553 |
-
t0 = time.perf_counter()
|
| 554 |
-
retrieval_context_tags = list(dict.fromkeys((structural_tags or []) + (probe_tags or [])))
|
| 555 |
-
rewrite_phrases = [p.strip() for p in (rewrite_for_retrieval or "").split(",") if p.strip()]
|
| 556 |
-
retrieval_result = psq_candidates_from_rewrite_phrases(
|
| 557 |
-
rewrite_phrases=rewrite_phrases,
|
| 558 |
-
allow_nsfw_tags=allow_nsfw_tags,
|
| 559 |
-
context_tags=retrieval_context_tags,
|
| 560 |
-
global_k=max(1, retrieval_global_k),
|
| 561 |
-
per_phrase_k=max(1, retrieval_per_phrase_k),
|
| 562 |
-
per_phrase_final_k=max(1, retrieval_per_phrase_final_k),
|
| 563 |
-
verbose=verbose_retrieval,
|
| 564 |
-
)
|
| 565 |
-
if isinstance(retrieval_result, tuple):
|
| 566 |
-
candidates, phrase_reports = retrieval_result
|
| 567 |
-
else:
|
| 568 |
-
candidates, phrase_reports = retrieval_result, []
|
| 569 |
-
if selection_candidate_cap > 0 and len(candidates) > selection_candidate_cap:
|
| 570 |
-
candidates = candidates[:selection_candidate_cap]
|
| 571 |
-
log(f"Selection candidate cap applied: {selection_candidate_cap}")
|
| 572 |
-
dt = time.perf_counter()-t0
|
| 573 |
-
_record_timing("retrieval", dt)
|
| 574 |
-
log(f"Retrieval: {dt:.2f}s")
|
| 575 |
-
log(f"Retrieved {len(candidates)} candidate tags")
|
| 576 |
-
if verbose_retrieval:
|
| 577 |
-
log(f"Total unique candidates: {len(candidates)}")
|
| 578 |
-
limit = None if verbose_retrieval_all else max(1, int(verbose_retrieval_limit))
|
| 579 |
-
for report in phrase_reports:
|
| 580 |
-
phrase = report.get("normalized") or report.get("phrase") or ""
|
| 581 |
-
lookup = report.get("lookup") or ""
|
| 582 |
-
tfidf_vocab = report.get("tfidf_vocab")
|
| 583 |
-
log(f"Phrase: {phrase} (lookup={lookup}) tfidf_vocab={tfidf_vocab}")
|
| 584 |
-
rows = report.get("candidates", [])
|
| 585 |
-
shown = rows if limit is None else rows[:limit]
|
| 586 |
-
for row in shown:
|
| 587 |
-
tag = row.get("tag")
|
| 588 |
-
alias_token = row.get("alias_token")
|
| 589 |
-
score_fasttext = row.get("score_fasttext")
|
| 590 |
-
score_context = row.get("score_context")
|
| 591 |
-
score_combined = row.get("score_combined")
|
| 592 |
-
count = row.get("count")
|
| 593 |
-
alias_part = ""
|
| 594 |
-
if alias_token and alias_token != tag:
|
| 595 |
-
alias_part = f" [alias_token={alias_token}]"
|
| 596 |
-
fasttext_str = (
|
| 597 |
-
f"{score_fasttext:.3f}" if isinstance(score_fasttext, (int, float)) else score_fasttext
|
| 598 |
-
)
|
| 599 |
-
if score_context is None:
|
| 600 |
-
context_str = "None"
|
| 601 |
-
else:
|
| 602 |
-
context_str = (
|
| 603 |
-
f"{score_context:.3f}" if isinstance(score_context, (int, float)) else score_context
|
| 604 |
-
)
|
| 605 |
-
combined_str = (
|
| 606 |
-
f"{score_combined:.3f}" if isinstance(score_combined, (int, float)) else score_combined
|
| 607 |
-
)
|
| 608 |
-
log(
|
| 609 |
-
f" {tag}{alias_part} | fasttext={fasttext_str} context={context_str} "
|
| 610 |
-
f"combined={combined_str} count={count}"
|
| 611 |
-
)
|
| 612 |
-
if limit is not None and len(rows) > limit:
|
| 613 |
-
log(f" ... ({len(rows) - limit} more)")
|
| 614 |
-
except Exception as e:
|
| 615 |
-
log(f"Retrieval fallback: {type(e).__name__}: {e}")
|
| 616 |
-
candidates = []
|
| 617 |
-
|
| 618 |
-
log("Step 3: LLM index selection (uses rewrite + structural/probe context)")
|
| 619 |
-
selection_query = _build_selection_query(
|
| 620 |
-
prompt_in=prompt_in,
|
| 621 |
-
rewritten=rewritten,
|
| 622 |
-
structural_tags=structural_tags,
|
| 623 |
-
probe_tags=probe_tags,
|
| 624 |
-
)
|
| 625 |
-
with ThreadPoolExecutor(max_workers=1) as ex:
|
| 626 |
-
fut_sel = ex.submit(
|
| 627 |
-
llm_select_indices,
|
| 628 |
-
query_text=selection_query,
|
| 629 |
-
candidates=candidates,
|
| 630 |
-
max_pick=0,
|
| 631 |
-
log=log,
|
| 632 |
-
mode=selection_mode,
|
| 633 |
-
chunk_size=max(1, selection_chunk_size),
|
| 634 |
-
per_phrase_k=max(1, selection_per_phrase_k),
|
| 635 |
-
)
|
| 636 |
-
picked_indices = _future_with_timeout(
|
| 637 |
-
fut_sel, stage3_select_timeout_s, "Index selection", []
|
| 638 |
-
)
|
| 639 |
-
|
| 640 |
-
|
| 641 |
-
|
| 642 |
-
|
| 643 |
-
|
| 644 |
-
|
| 645 |
-
|
| 646 |
-
|
| 647 |
-
|
| 648 |
-
|
| 649 |
-
|
| 650 |
-
|
| 651 |
-
|
| 652 |
-
|
| 653 |
-
|
| 654 |
-
|
| 655 |
-
|
| 656 |
-
|
| 657 |
-
|
| 658 |
-
|
| 659 |
-
|
| 660 |
-
|
| 661 |
-
|
| 662 |
-
|
| 663 |
-
|
| 664 |
-
|
| 665 |
-
|
| 666 |
-
|
| 667 |
-
|
| 668 |
-
|
| 669 |
-
|
| 670 |
-
|
| 671 |
-
|
| 672 |
-
|
| 673 |
-
|
| 674 |
-
|
| 675 |
-
|
| 676 |
-
|
| 677 |
-
|
| 678 |
-
|
| 679 |
-
|
| 680 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 681 |
log("Step 5: Build ranked group/category display")
|
| 682 |
t0 = time.perf_counter()
|
| 683 |
seed_terms = []
|
| 684 |
seed_terms.extend(user_tags)
|
| 685 |
seed_terms.extend([p.strip() for p in (rewritten or "").split(",") if p.strip()])
|
| 686 |
-
seed_terms.extend(structural_tags or [])
|
| 687 |
-
seed_terms.extend(probe_tags or [])
|
| 688 |
-
seed_terms.extend(selected_tags)
|
| 689 |
-
seed_terms = list(dict.fromkeys(seed_terms))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 690 |
|
| 691 |
toggle_rows = _build_toggle_rows(
|
| 692 |
seed_terms=seed_terms,
|
| 693 |
-
|
|
|
|
|
|
|
| 694 |
top_groups=max(1, int(display_top_groups)),
|
| 695 |
top_tags_per_group=max(1, int(display_top_tags_per_group)),
|
| 696 |
group_rank_top_k=max(1, int(display_rank_top_k)),
|
| 697 |
)
|
| 698 |
-
dt = time.perf_counter()-t0
|
| 699 |
-
_record_timing("group_display", dt)
|
| 700 |
-
log(f"Ranked group display: {dt:.2f}s ({len(toggle_rows)} rows)")
|
| 701 |
-
|
| 702 |
-
|
| 703 |
-
|
| 704 |
-
|
| 705 |
-
|
| 706 |
-
|
| 707 |
-
|
| 708 |
-
|
| 709 |
-
|
| 710 |
-
|
| 711 |
-
)
|
| 712 |
-
|
| 713 |
-
|
| 714 |
-
|
| 715 |
-
|
| 716 |
-
|
| 717 |
-
|
| 718 |
-
|
| 719 |
-
|
| 720 |
-
|
| 721 |
-
|
| 722 |
-
|
| 723 |
-
|
| 724 |
-
|
| 725 |
-
|
| 726 |
-
|
| 727 |
-
|
| 728 |
-
|
| 729 |
-
|
| 730 |
-
|
| 731 |
-
|
| 732 |
-
|
| 733 |
-
|
| 734 |
-
|
| 735 |
-
|
| 736 |
-
|
| 737 |
-
|
| 738 |
-
|
| 739 |
-
|
| 740 |
-
|
| 741 |
-
|
| 742 |
-
|
| 743 |
-
mascot_img = gr.
|
| 744 |
-
|
| 745 |
-
|
| 746 |
-
|
| 747 |
-
|
| 748 |
-
|
| 749 |
-
|
| 750 |
-
|
| 751 |
-
|
| 752 |
-
|
| 753 |
-
|
| 754 |
-
|
| 755 |
-
|
| 756 |
-
|
| 757 |
-
|
| 758 |
-
|
| 759 |
-
|
| 760 |
-
|
| 761 |
-
|
| 762 |
-
|
| 763 |
-
|
| 764 |
-
|
| 765 |
-
|
| 766 |
-
|
| 767 |
-
placeholder="
|
| 768 |
-
)
|
| 769 |
-
|
| 770 |
-
|
| 771 |
-
|
| 772 |
-
|
| 773 |
-
|
| 774 |
-
|
| 775 |
-
|
| 776 |
-
|
| 777 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 778 |
selected_tags_state = gr.State([])
|
| 779 |
row_defs_state = gr.State([])
|
| 780 |
row_values_state = gr.State([])
|
| 781 |
|
| 782 |
gr.Markdown("### Toggle Tag Rows")
|
| 783 |
-
|
| 784 |
-
|
| 785 |
-
|
| 786 |
-
|
| 787 |
-
|
| 788 |
-
|
| 789 |
-
|
| 790 |
-
|
| 791 |
-
|
| 792 |
-
|
| 793 |
-
|
| 794 |
-
|
| 795 |
-
)
|
| 796 |
-
)
|
| 797 |
-
|
| 798 |
-
gr.Markdown(
|
| 799 |
-
"Toggling a tag in any row toggles it everywhere else that tag appears."
|
| 800 |
-
)
|
| 801 |
-
|
| 802 |
-
with gr.Accordion("Display Settings", open=False):
|
| 803 |
-
with gr.Row():
|
| 804 |
-
display_top_groups = gr.Number(
|
| 805 |
-
value=display_top_groups_default,
|
| 806 |
-
precision=0,
|
| 807 |
-
label="Rows (Top Groups/Categories)",
|
| 808 |
-
minimum=1,
|
| 809 |
-
)
|
| 810 |
-
display_top_tags_per_group = gr.Number(
|
| 811 |
-
value=display_top_tags_per_group_default,
|
| 812 |
-
precision=0,
|
| 813 |
-
label="Top Tags Shown Per Row",
|
| 814 |
-
minimum=1,
|
| 815 |
-
)
|
| 816 |
-
display_rank_top_k = gr.Number(
|
| 817 |
-
value=display_rank_top_k_default,
|
| 818 |
-
precision=0,
|
| 819 |
-
label="Top Tags Used for Row Ranking",
|
| 820 |
-
minimum=1,
|
| 821 |
-
)
|
| 822 |
-
|
| 823 |
-
run_outputs = [
|
| 824 |
-
console,
|
| 825 |
-
legacy_final_prompt,
|
| 826 |
-
suggested_prompt,
|
| 827 |
-
selected_tags_state,
|
| 828 |
-
row_defs_state,
|
| 829 |
-
row_values_state,
|
| 830 |
-
*row_headers,
|
| 831 |
-
*row_checkboxes,
|
| 832 |
-
]
|
| 833 |
-
|
| 834 |
-
submit_button.click(
|
| 835 |
-
rag_pipeline_ui,
|
| 836 |
-
inputs=[image_tags, display_top_groups, display_top_tags_per_group, display_rank_top_k],
|
| 837 |
-
outputs=run_outputs
|
| 838 |
)
|
| 839 |
-
|
| 840 |
-
|
| 841 |
-
|
| 842 |
-
inputs=[image_tags, display_top_groups, display_top_tags_per_group, display_rank_top_k],
|
| 843 |
-
outputs=run_outputs
|
| 844 |
)
|
| 845 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 846 |
for idx, row_cb in enumerate(row_checkboxes):
|
| 847 |
row_cb.change(
|
| 848 |
fn=lambda changed_values, selected_state, row_defs, row_values, i=idx: _on_toggle_row(
|
| 849 |
i,
|
| 850 |
changed_values,
|
| 851 |
-
selected_state,
|
| 852 |
-
row_defs,
|
| 853 |
-
row_values,
|
| 854 |
-
display_max_rows_default,
|
| 855 |
),
|
| 856 |
inputs=[row_cb, selected_tags_state, row_defs_state, row_values_state],
|
| 857 |
outputs=[selected_tags_state, row_values_state, suggested_prompt, *row_checkboxes],
|
|
|
|
|
|
|
| 858 |
)
|
| 859 |
-
|
| 860 |
-
if __name__ == "__main__":
|
| 861 |
-
app.queue().launch(allowed_paths=[str(MASCOT_DIR)])
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import os
|
| 3 |
+
import logging
|
| 4 |
+
import time
|
| 5 |
+
import json
|
| 6 |
+
import csv
|
| 7 |
+
from datetime import datetime
|
| 8 |
+
from functools import lru_cache
|
| 9 |
+
from PIL import Image
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
from typing import Any, Dict, List, Set, Tuple
|
| 12 |
+
from concurrent.futures import ThreadPoolExecutor, TimeoutError as FutureTimeoutError
|
| 13 |
+
|
| 14 |
+
from psq_rag.pipeline.preproc import extract_user_provided_tags_upto_3_words
|
| 15 |
+
from psq_rag.llm.rewrite import llm_rewrite_prompt
|
| 16 |
+
from psq_rag.retrieval.psq_retrieval import psq_candidates_from_rewrite_phrases, _norm_tag_for_lookup
|
| 17 |
+
from psq_rag.llm.select import llm_select_indices, llm_infer_structural_tags, llm_infer_probe_tags
|
| 18 |
+
from psq_rag.retrieval.state import expand_tags_via_implications, get_tag_type_name, get_tag_implications
|
| 19 |
+
from psq_rag.ui.group_ranked_display import rank_groups_from_tfidf, _load_enabled_groups
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def _split_prompt_commas(s: str) -> List[str]:
|
| 23 |
+
return [p.strip() for p in (s or "").split(",") if p.strip()]
|
| 24 |
+
|
| 25 |
+
def _norm_for_dedupe(tag: str) -> str:
|
| 26 |
+
# your canonical form for lookup/dedupe
|
| 27 |
+
return _norm_tag_for_lookup(tag.lower())
|
| 28 |
+
|
| 29 |
+
def compose_final_prompt(rewritten_prompt: str, selected_tags: List[str]) -> str:
|
| 30 |
+
parts = _split_prompt_commas(rewritten_prompt)
|
| 31 |
+
parts.extend(selected_tags)
|
| 32 |
+
|
| 33 |
+
seen = set()
|
| 34 |
+
out = []
|
| 35 |
+
for p in parts:
|
| 36 |
+
key = _norm_for_dedupe(p)
|
| 37 |
+
if key in seen:
|
| 38 |
+
continue
|
| 39 |
+
seen.add(key)
|
| 40 |
+
out.append(p)
|
| 41 |
+
|
| 42 |
+
return ", ".join(out)
|
| 43 |
+
|
| 44 |
+
|
| 45 |
def _display_tag_text(tag: str) -> str:
|
| 46 |
return tag.replace("_", " ")
|
| 47 |
|
| 48 |
|
| 49 |
+
def _normalize_selection_origin(origin: str) -> str:
|
| 50 |
+
o = (origin or "").strip().lower()
|
| 51 |
+
if o in {"rewrite", "selection", "probe", "structural", "user", "candidate"}:
|
| 52 |
+
return o
|
| 53 |
+
return "selection"
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def _choice_label_with_source_meta(tag: str, *, origin: str, preselected: bool) -> str:
|
| 57 |
+
# Marker is stripped client-side and converted into data attributes for CSS-driven colors.
|
| 58 |
+
origin_norm = _normalize_selection_origin(origin)
|
| 59 |
+
pre = "1" if preselected else "0"
|
| 60 |
+
return f"{_display_tag_text(tag)} [[psq:{origin_norm}:{pre}]]"
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def _selection_source_rank(origin: str) -> int:
|
| 64 |
+
o = _normalize_selection_origin(origin)
|
| 65 |
+
if o == "structural":
|
| 66 |
+
return 0
|
| 67 |
+
if o == "probe":
|
| 68 |
+
return 1
|
| 69 |
+
# Keep rewrite/user in the same priority band as general selection for row ordering.
|
| 70 |
+
return 2
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def _build_implied_parent_map(
|
| 74 |
+
direct_tags_ordered: List[str],
|
| 75 |
+
implied_tags: List[str],
|
| 76 |
+
) -> Dict[str, str]:
|
| 77 |
+
implied_set = {_norm_tag_for_lookup(t) for t in (implied_tags or []) if t}
|
| 78 |
+
if not implied_set or not direct_tags_ordered:
|
| 79 |
+
return {}
|
| 80 |
+
impl = get_tag_implications()
|
| 81 |
+
parent_by_implied: Dict[str, str] = {}
|
| 82 |
+
for direct in direct_tags_ordered:
|
| 83 |
+
d = _norm_tag_for_lookup(direct)
|
| 84 |
+
if not d:
|
| 85 |
+
continue
|
| 86 |
+
queue = [d]
|
| 87 |
+
seen = {d}
|
| 88 |
+
while queue:
|
| 89 |
+
t = queue.pop()
|
| 90 |
+
for parent in impl.get(t, ()):
|
| 91 |
+
p = _norm_tag_for_lookup(parent)
|
| 92 |
+
if not p or p in seen:
|
| 93 |
+
continue
|
| 94 |
+
seen.add(p)
|
| 95 |
+
if p in implied_set and p not in parent_by_implied:
|
| 96 |
+
parent_by_implied[p] = d
|
| 97 |
+
queue.append(p)
|
| 98 |
+
return parent_by_implied
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
def _order_selected_tags_for_row(
|
| 102 |
+
*,
|
| 103 |
+
row_selected_tags: List[str],
|
| 104 |
+
selected_index: Dict[str, int],
|
| 105 |
+
tag_selection_origins: Dict[str, str],
|
| 106 |
+
implied_parent_map: Dict[str, str],
|
| 107 |
+
) -> List[str]:
|
| 108 |
+
row_selected_norm = [_norm_tag_for_lookup(t) for t in (row_selected_tags or []) if t]
|
| 109 |
+
implied_in_row = {t for t in row_selected_norm if t in implied_parent_map}
|
| 110 |
+
base_tags = [t for t in row_selected_norm if t not in implied_in_row]
|
| 111 |
+
|
| 112 |
+
base_tags.sort(
|
| 113 |
+
key=lambda t: (
|
| 114 |
+
_selection_source_rank(tag_selection_origins.get(t, "selection")),
|
| 115 |
+
selected_index.get(t, 10**9),
|
| 116 |
+
t,
|
| 117 |
+
)
|
| 118 |
)
|
| 119 |
|
| 120 |
+
children_by_parent: Dict[str, List[str]] = {}
|
| 121 |
+
for implied in implied_in_row:
|
| 122 |
+
parent = implied_parent_map.get(implied)
|
| 123 |
+
if parent:
|
| 124 |
+
children_by_parent.setdefault(parent, []).append(implied)
|
| 125 |
|
| 126 |
+
for parent, children in children_by_parent.items():
|
| 127 |
+
children.sort(key=lambda t: (selected_index.get(t, 10**9), t))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
|
| 129 |
+
ordered: List[str] = []
|
| 130 |
+
emitted: Set[str] = set()
|
| 131 |
+
for tag in base_tags:
|
| 132 |
+
if tag in emitted:
|
| 133 |
+
continue
|
| 134 |
+
ordered.append(tag)
|
| 135 |
+
emitted.add(tag)
|
| 136 |
+
for child in children_by_parent.get(tag, []):
|
| 137 |
+
if child not in emitted:
|
| 138 |
+
ordered.append(child)
|
| 139 |
+
emitted.add(child)
|
| 140 |
+
|
| 141 |
+
remaining_implied = [t for t in row_selected_norm if t not in emitted]
|
| 142 |
+
remaining_implied.sort(
|
| 143 |
+
key=lambda t: (
|
| 144 |
+
_selection_source_rank(tag_selection_origins.get(implied_parent_map.get(t, ""), "selection")),
|
| 145 |
+
selected_index.get(implied_parent_map.get(t, ""), 10**9),
|
| 146 |
+
selected_index.get(t, 10**9),
|
| 147 |
+
t,
|
| 148 |
+
)
|
| 149 |
+
)
|
| 150 |
+
for t in remaining_implied:
|
| 151 |
+
if t not in emitted:
|
| 152 |
+
ordered.append(t)
|
| 153 |
+
emitted.add(t)
|
| 154 |
+
return ordered
|
| 155 |
|
| 156 |
|
| 157 |
+
def _escape_prompt_tag(tag: str) -> str:
|
| 158 |
+
return (
|
| 159 |
+
tag.replace("_", " ")
|
| 160 |
+
.replace("(", "\\(")
|
| 161 |
+
.replace(")", "\\)")
|
| 162 |
+
)
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
def _ordered_selected_for_prompt(selected: Set[str], row_defs: List[Dict[str, Any]]) -> List[str]:
|
| 166 |
+
out: List[str] = []
|
| 167 |
+
seen: Set[str] = set()
|
| 168 |
+
for row in row_defs:
|
| 169 |
+
for tag in row.get("tags", []):
|
| 170 |
+
if tag in selected and tag not in seen:
|
| 171 |
+
out.append(tag)
|
| 172 |
+
seen.add(tag)
|
| 173 |
+
# Fallback for any selected tags not present in current rows.
|
| 174 |
+
for tag in sorted(selected):
|
| 175 |
+
if tag not in seen:
|
| 176 |
+
out.append(tag)
|
| 177 |
+
seen.add(tag)
|
| 178 |
+
return out
|
| 179 |
+
|
| 180 |
+
|
| 181 |
+
def _compose_toggle_prompt_text(selected_tags: List[str], row_defs: List[Dict[str, Any]]) -> str:
|
| 182 |
+
selected = {t for t in (selected_tags or []) if t}
|
| 183 |
+
ordered = _ordered_selected_for_prompt(selected, row_defs or [])
|
| 184 |
+
return ", ".join(_escape_prompt_tag(t) for t in ordered)
|
| 185 |
+
|
| 186 |
+
|
| 187 |
+
def _is_artist_tag(tag: str) -> bool:
|
| 188 |
+
t = _norm_tag_for_lookup(str(tag))
|
| 189 |
+
if not t:
|
| 190 |
+
return False
|
| 191 |
+
# Keep a resilient fallback for malformed/missing tag typing metadata.
|
| 192 |
+
return get_tag_type_name(t) == "artist" or t.startswith("by_")
|
| 193 |
+
|
| 194 |
+
|
| 195 |
+
@lru_cache(maxsize=1)
|
| 196 |
+
def _load_excluded_recommendation_tags() -> Set[str]:
|
| 197 |
+
csv_path = Path("data/analysis/category_registry.csv")
|
| 198 |
+
out: Set[str] = set()
|
| 199 |
+
if not csv_path.exists():
|
| 200 |
+
return out
|
| 201 |
+
try:
|
| 202 |
+
with csv_path.open("r", encoding="utf-8", newline="") as f:
|
| 203 |
+
reader = csv.DictReader(f)
|
| 204 |
+
for row in reader:
|
| 205 |
+
tag = _norm_tag_for_lookup(str(row.get("tag") or ""))
|
| 206 |
+
if not tag:
|
| 207 |
+
continue
|
| 208 |
+
status = str(row.get("category_status") or "").strip().lower()
|
| 209 |
+
if status == "excluded":
|
| 210 |
+
out.add(tag)
|
| 211 |
+
except Exception:
|
| 212 |
+
return set()
|
| 213 |
+
return out
|
| 214 |
+
|
| 215 |
+
|
| 216 |
+
def _is_excluded_recommendation_tag(tag: str) -> bool:
|
| 217 |
+
t = _norm_tag_for_lookup(str(tag))
|
| 218 |
+
if not t:
|
| 219 |
+
return False
|
| 220 |
+
return t in _load_excluded_recommendation_tags()
|
| 221 |
+
|
| 222 |
+
|
| 223 |
+
def _filter_excluded_recommendation_tags(tags: List[str]) -> Tuple[List[str], List[str]]:
|
| 224 |
+
excluded = _load_excluded_recommendation_tags()
|
| 225 |
+
if not excluded:
|
| 226 |
+
return list(dict.fromkeys(_norm_tag_for_lookup(t) for t in (tags or []) if t)), []
|
| 227 |
+
|
| 228 |
+
keep: List[str] = []
|
| 229 |
+
removed: List[str] = []
|
| 230 |
+
seen: Set[str] = set()
|
| 231 |
+
for raw in (tags or []):
|
| 232 |
+
t = _norm_tag_for_lookup(str(raw))
|
| 233 |
+
if not t:
|
| 234 |
+
continue
|
| 235 |
+
if t in excluded:
|
| 236 |
+
removed.append(t)
|
| 237 |
+
continue
|
| 238 |
+
if t in seen:
|
| 239 |
+
continue
|
| 240 |
+
seen.add(t)
|
| 241 |
+
keep.append(t)
|
| 242 |
+
return keep, sorted(set(removed))
|
| 243 |
+
|
| 244 |
+
|
| 245 |
def _build_toggle_rows(
|
| 246 |
*,
|
| 247 |
seed_terms: List[str],
|
| 248 |
+
selected_tags: List[str],
|
| 249 |
+
tag_selection_origins: Dict[str, str],
|
| 250 |
+
implied_parent_map: Dict[str, str],
|
| 251 |
top_groups: int,
|
| 252 |
top_tags_per_group: int,
|
| 253 |
group_rank_top_k: int,
|
| 254 |
) -> List[Dict[str, Any]]:
|
| 255 |
+
ranked_rows = rank_groups_from_tfidf(
|
| 256 |
+
seed_terms=seed_terms,
|
| 257 |
+
top_groups=max(1, int(top_groups)),
|
| 258 |
+
top_tags_per_group=max(1, int(top_tags_per_group)),
|
| 259 |
+
group_rank_top_k=max(1, int(group_rank_top_k)),
|
| 260 |
+
)
|
| 261 |
groups_map = _load_enabled_groups()
|
| 262 |
+
selected_active = list(
|
| 263 |
+
dict.fromkeys(
|
| 264 |
+
_norm_tag_for_lookup(t)
|
| 265 |
+
for t in selected_tags
|
| 266 |
+
if t and not _is_artist_tag(t) and not _is_excluded_recommendation_tag(t)
|
| 267 |
+
)
|
| 268 |
+
)
|
| 269 |
+
selected_index: Dict[str, int] = {t: i for i, t in enumerate(selected_active)}
|
| 270 |
+
|
| 271 |
+
row_defs: List[Dict[str, Any]] = []
|
| 272 |
+
displayed_group_names = [r.group_name for r in ranked_rows]
|
| 273 |
+
displayed_group_tag_sets: Dict[str, Set[str]] = {
|
| 274 |
+
name: {t for t in groups_map.get(name, []) if not _is_artist_tag(t)}
|
| 275 |
+
for name in displayed_group_names
|
| 276 |
+
}
|
| 277 |
+
tags_in_any_displayed_group: Set[str] = set()
|
| 278 |
+
for tag_set in displayed_group_tag_sets.values():
|
| 279 |
+
tags_in_any_displayed_group.update(tag_set)
|
| 280 |
+
|
| 281 |
+
selected_other_raw = [t for t in selected_active if t not in tags_in_any_displayed_group]
|
| 282 |
+
selected_other = _order_selected_tags_for_row(
|
| 283 |
+
row_selected_tags=selected_other_raw,
|
| 284 |
+
selected_index=selected_index,
|
| 285 |
+
tag_selection_origins=tag_selection_origins,
|
| 286 |
+
implied_parent_map=implied_parent_map,
|
| 287 |
+
)
|
| 288 |
+
selected_other_meta = {
|
| 289 |
+
t: {
|
| 290 |
+
"origin": _normalize_selection_origin(tag_selection_origins.get(t, "selection")),
|
| 291 |
+
"preselected": True,
|
| 292 |
+
}
|
| 293 |
+
for t in selected_other
|
| 294 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 295 |
row_defs.append(
|
| 296 |
{
|
| 297 |
+
"name": "selected_other",
|
| 298 |
+
"label": "Selected (Other)",
|
| 299 |
+
"tags": selected_other,
|
| 300 |
+
"tag_meta": selected_other_meta,
|
| 301 |
}
|
| 302 |
)
|
| 303 |
|
| 304 |
for row in ranked_rows:
|
| 305 |
group_name = row.group_name
|
| 306 |
group_tag_set = displayed_group_tag_sets.get(group_name, set())
|
| 307 |
+
selected_in_group_raw = [t for t in selected_active if t in group_tag_set]
|
| 308 |
+
selected_in_group = _order_selected_tags_for_row(
|
| 309 |
+
row_selected_tags=selected_in_group_raw,
|
| 310 |
+
selected_index=selected_index,
|
| 311 |
+
tag_selection_origins=tag_selection_origins,
|
| 312 |
+
implied_parent_map=implied_parent_map,
|
| 313 |
+
)
|
| 314 |
+
ranked_tags = [
|
| 315 |
+
t
|
| 316 |
+
for t, _ in row.tags
|
| 317 |
+
if not _is_artist_tag(t) and not _is_excluded_recommendation_tag(t)
|
| 318 |
+
]
|
| 319 |
merged = selected_in_group + [t for t in ranked_tags if t not in selected_in_group]
|
| 320 |
keep_n = max(max(1, int(top_tags_per_group)), len(selected_in_group))
|
| 321 |
merged = merged[:keep_n]
|
| 322 |
+
tag_meta = {
|
| 323 |
+
t: {
|
| 324 |
+
"origin": _normalize_selection_origin(tag_selection_origins.get(t, "selection")),
|
| 325 |
+
"preselected": t in selected_active,
|
| 326 |
+
}
|
| 327 |
+
for t in merged
|
| 328 |
+
}
|
| 329 |
row_defs.append(
|
| 330 |
{
|
| 331 |
"name": group_name,
|
| 332 |
"label": f"{group_name} (E={row.expected_count:.2f})",
|
| 333 |
"tags": merged,
|
| 334 |
+
"tag_meta": tag_meta,
|
| 335 |
}
|
| 336 |
)
|
| 337 |
+
|
| 338 |
+
return row_defs
|
| 339 |
+
|
| 340 |
+
|
| 341 |
+
def _build_display_audit_line(
|
| 342 |
+
row_defs: List[Dict[str, Any]],
|
| 343 |
+
*,
|
| 344 |
+
active_selected_tags: List[str],
|
| 345 |
+
direct_selected_tags: List[str],
|
| 346 |
+
implied_selected_tags: List[str],
|
| 347 |
+
) -> str:
|
| 348 |
+
active_set = {
|
| 349 |
+
_norm_tag_for_lookup(t)
|
| 350 |
+
for t in (active_selected_tags or [])
|
| 351 |
+
if t and not _is_artist_tag(t)
|
| 352 |
+
}
|
| 353 |
+
direct_set = {
|
| 354 |
+
_norm_tag_for_lookup(t)
|
| 355 |
+
for t in (direct_selected_tags or [])
|
| 356 |
+
if t and not _is_artist_tag(t)
|
| 357 |
+
}
|
| 358 |
+
implied_set = {
|
| 359 |
+
_norm_tag_for_lookup(t)
|
| 360 |
+
for t in (implied_selected_tags or [])
|
| 361 |
+
if t and not _is_artist_tag(t)
|
| 362 |
+
}
|
| 363 |
+
info_by_tag: Dict[str, Dict[str, Any]] = {}
|
| 364 |
+
|
| 365 |
+
for row in row_defs or []:
|
| 366 |
+
row_name = row.get("name", "")
|
| 367 |
+
row_label = row.get("label", row_name)
|
| 368 |
+
for tag in row.get("tags", []):
|
| 369 |
+
rec = info_by_tag.setdefault(tag, {"rows": [], "sources": set()})
|
| 370 |
+
rec["rows"].append(row_label)
|
| 371 |
+
if row_name == "selected_other":
|
| 372 |
+
rec["sources"].add("selected_other_row")
|
| 373 |
+
else:
|
| 374 |
+
rec["sources"].add("ranked_group_row")
|
| 375 |
+
if tag in active_set:
|
| 376 |
+
rec["sources"].add("selected_active")
|
| 377 |
+
if tag in direct_set:
|
| 378 |
+
rec["sources"].add("selected_direct")
|
| 379 |
+
if tag in implied_set:
|
| 380 |
+
rec["sources"].add("selected_implied")
|
| 381 |
+
|
| 382 |
+
payload = {
|
| 383 |
+
"n_tags": len(info_by_tag),
|
| 384 |
+
"tags": [
|
| 385 |
+
{
|
| 386 |
+
"tag": tag,
|
| 387 |
+
"rows": rec["rows"],
|
| 388 |
+
"sources": sorted(rec["sources"]),
|
| 389 |
+
}
|
| 390 |
+
for tag, rec in sorted(info_by_tag.items())
|
| 391 |
+
],
|
| 392 |
+
}
|
| 393 |
+
return "Display Tag Audit: " + json.dumps(payload, ensure_ascii=True)
|
| 394 |
+
|
| 395 |
+
|
| 396 |
+
def _build_row_component_updates(
|
| 397 |
+
row_defs: List[Dict[str, Any]],
|
| 398 |
+
selected_tags: List[str],
|
| 399 |
+
max_rows: int,
|
| 400 |
+
):
|
| 401 |
+
selected = {t for t in (selected_tags or []) if t}
|
| 402 |
+
row_values_state: List[List[str]] = []
|
| 403 |
+
header_updates = []
|
| 404 |
+
checkbox_updates = []
|
| 405 |
+
|
| 406 |
+
for idx in range(max_rows):
|
| 407 |
if idx < len(row_defs):
|
| 408 |
row = row_defs[idx]
|
| 409 |
tags = list(dict.fromkeys(row.get("tags", [])))
|
|
|
|
| 411 |
row_values_state.append(values)
|
| 412 |
visible = bool(tags)
|
| 413 |
header_updates.append(gr.update(value=f"**{row.get('label', '')}**", visible=visible))
|
| 414 |
+
tag_meta = row.get("tag_meta", {}) if isinstance(row.get("tag_meta", {}), dict) else {}
|
| 415 |
+
choices = []
|
| 416 |
+
for t in tags:
|
| 417 |
+
meta = tag_meta.get(t, {}) if isinstance(tag_meta.get(t, {}), dict) else {}
|
| 418 |
+
origin = _normalize_selection_origin(str(meta.get("origin", "selection")))
|
| 419 |
+
preselected = bool(meta.get("preselected", False))
|
| 420 |
+
choices.append((_choice_label_with_source_meta(t, origin=origin, preselected=preselected), t))
|
| 421 |
checkbox_updates.append(
|
| 422 |
gr.update(
|
| 423 |
choices=choices,
|
| 424 |
value=values,
|
| 425 |
visible=visible,
|
| 426 |
+
)
|
| 427 |
+
)
|
| 428 |
+
else:
|
| 429 |
+
header_updates.append(gr.update(value="", visible=False))
|
| 430 |
+
checkbox_updates.append(gr.update(choices=[], value=[], visible=False))
|
| 431 |
+
|
| 432 |
+
prompt_text = _compose_toggle_prompt_text(list(selected), row_defs)
|
| 433 |
+
return prompt_text, row_values_state, header_updates, checkbox_updates
|
| 434 |
+
|
| 435 |
+
|
| 436 |
def _on_toggle_row(
|
| 437 |
row_idx: int,
|
| 438 |
changed_values: List[str],
|
| 439 |
selected_tags_state: List[str],
|
| 440 |
+
row_defs_state: List[Dict[str, Any]],
|
| 441 |
+
row_values_state: List[List[str]],
|
| 442 |
+
max_rows: int,
|
| 443 |
):
|
| 444 |
row_defs = row_defs_state or []
|
| 445 |
selected = set(selected_tags_state or [])
|
| 446 |
+
row = row_defs[row_idx] if 0 <= row_idx < len(row_defs) else {}
|
| 447 |
+
row_tags = list(dict.fromkeys(row.get("tags", [])))
|
| 448 |
+
row_tag_set = set(row_tags)
|
| 449 |
+
row_tag_by_norm = {_norm_tag_for_lookup(t): t for t in row_tags}
|
| 450 |
+
|
| 451 |
+
# Be tolerant to UI payload forms: canonical tag values, display labels, or normalized variants.
|
| 452 |
+
new_set: Set[str] = set()
|
| 453 |
+
for raw in (changed_values or []):
|
| 454 |
+
if raw in row_tag_set:
|
| 455 |
+
new_set.add(raw)
|
| 456 |
+
continue
|
| 457 |
+
raw_norm = _norm_tag_for_lookup(str(raw))
|
| 458 |
+
mapped = row_tag_by_norm.get(raw_norm)
|
| 459 |
+
if mapped:
|
| 460 |
+
new_set.add(mapped)
|
| 461 |
+
|
| 462 |
+
prev_row_selected = {t for t in selected if t in row_tag_set}
|
| 463 |
+
selected.difference_update(row_tag_set)
|
| 464 |
+
selected.update(new_set)
|
| 465 |
+
toggled_tags = prev_row_selected ^ new_set
|
| 466 |
+
|
| 467 |
+
# Recompute row selections, but only push UI updates to rows touched by the toggled tags.
|
| 468 |
+
new_row_values_state: List[List[str]] = []
|
| 469 |
+
affected_rows: Set[int] = {row_idx}
|
| 470 |
+
for idx, row in enumerate(row_defs):
|
| 471 |
+
tags = list(dict.fromkeys(row.get("tags", [])))
|
| 472 |
+
values = [t for t in tags if t in selected]
|
| 473 |
+
new_row_values_state.append(values)
|
| 474 |
+
if toggled_tags and any(t in toggled_tags for t in tags):
|
| 475 |
+
affected_rows.add(idx)
|
| 476 |
|
| 477 |
+
checkbox_updates = []
|
| 478 |
+
for idx in range(max_rows):
|
| 479 |
+
if idx < len(row_defs) and idx in affected_rows:
|
| 480 |
+
checkbox_updates.append(gr.update(value=new_row_values_state[idx]))
|
| 481 |
+
else:
|
| 482 |
+
checkbox_updates.append(gr.update())
|
| 483 |
|
| 484 |
+
prompt_text = _compose_toggle_prompt_text(sorted(selected), row_defs)
|
| 485 |
+
return [sorted(selected), new_row_values_state, prompt_text, *checkbox_updates]
|
| 486 |
+
|
| 487 |
+
|
| 488 |
+
def _build_ui_payload(
|
| 489 |
+
*,
|
| 490 |
+
console_text: str,
|
| 491 |
+
legacy_prompt_text: str,
|
| 492 |
+
row_defs: List[Dict[str, Any]],
|
| 493 |
+
selected_tags: List[str],
|
| 494 |
+
):
|
| 495 |
+
prompt_text, row_values_state, header_updates, checkbox_updates = _build_row_component_updates(
|
| 496 |
+
row_defs=row_defs,
|
| 497 |
+
selected_tags=selected_tags,
|
| 498 |
+
max_rows=display_max_rows_default,
|
| 499 |
+
)
|
| 500 |
+
return [
|
| 501 |
+
console_text,
|
| 502 |
+
legacy_prompt_text,
|
| 503 |
+
prompt_text,
|
| 504 |
+
sorted(set(selected_tags or [])),
|
| 505 |
+
row_defs,
|
| 506 |
+
row_values_state,
|
| 507 |
+
*header_updates,
|
| 508 |
+
*checkbox_updates,
|
| 509 |
+
]
|
| 510 |
+
|
| 511 |
+
|
| 512 |
+
def _build_selection_query(
|
| 513 |
+
prompt_in: str,
|
| 514 |
+
rewritten: str,
|
| 515 |
+
structural_tags: List[str],
|
| 516 |
+
probe_tags: List[str],
|
| 517 |
+
) -> str:
|
| 518 |
+
lines = [f"IMAGE DESCRIPTION: {prompt_in.strip()}"]
|
| 519 |
+
if rewritten and rewritten.strip():
|
| 520 |
+
lines.append(f"REWRITE PHRASES: {rewritten.strip()}")
|
| 521 |
+
hint_tags = []
|
| 522 |
+
if structural_tags:
|
| 523 |
+
hint_tags.extend(structural_tags)
|
| 524 |
+
if probe_tags:
|
| 525 |
+
hint_tags.extend(probe_tags)
|
| 526 |
+
if hint_tags:
|
| 527 |
+
# Keep hints as context only; selection still must choose by candidate indices.
|
| 528 |
+
lines.append(
|
| 529 |
+
"INFERRED TAG HINTS (context only): " + ", ".join(sorted(set(hint_tags)))
|
| 530 |
+
)
|
| 531 |
+
return "\n".join(lines)
|
| 532 |
+
|
| 533 |
+
|
| 534 |
+
# Set up logging
|
| 535 |
+
# Minimal prod logging: warnings+ to stderr, no file by default
|
| 536 |
+
import os, logging
|
| 537 |
+
|
| 538 |
+
LOG_LEVEL = os.environ.get("PSQ_LOG_LEVEL", "WARNING").upper()
|
| 539 |
+
logging.basicConfig(
|
| 540 |
+
level=getattr(logging, LOG_LEVEL, logging.WARNING),
|
| 541 |
+
format="%(asctime)s %(levelname)s:%(message)s",
|
| 542 |
+
handlers=[logging.StreamHandler()] # no file -> avoids huge logs on Spaces
|
| 543 |
+
)
|
| 544 |
+
|
| 545 |
+
# Quiet down common noisy libs (optional)
|
| 546 |
+
for _name in ("gensim", "gradio", "hnswlib", "httpx", "uvicorn"):
|
| 547 |
+
logging.getLogger(_name).setLevel(logging.ERROR)
|
| 548 |
+
|
| 549 |
+
# Turn off Gradio analytics phone-home to avoid those background thread errors (optional)
|
| 550 |
+
os.environ["GRADIO_ANALYTICS_ENABLED"] = "0"
|
| 551 |
+
|
| 552 |
+
|
| 553 |
+
MASCOT_DIR = Path(__file__).parent / "mascotimages"
|
| 554 |
+
MASCOT_FILE = MASCOT_DIR / "transparentsquirrel.png"
|
| 555 |
+
|
| 556 |
+
|
| 557 |
+
def _load_mascot_image():
|
| 558 |
+
"""Load mascot image if available; return None when missing/unreadable."""
|
| 559 |
+
if not MASCOT_FILE.exists():
|
| 560 |
+
logging.warning("Mascot image missing: %s", MASCOT_FILE)
|
| 561 |
+
return None
|
| 562 |
+
try:
|
| 563 |
+
return Image.open(MASCOT_FILE).convert("RGBA")
|
| 564 |
+
except Exception as e:
|
| 565 |
+
logging.warning("Failed to load mascot image (%s): %s", MASCOT_FILE, e)
|
| 566 |
+
return None
|
| 567 |
+
|
| 568 |
+
try:
|
| 569 |
+
from gradio_client import utils as _gc_utils
|
| 570 |
+
|
| 571 |
+
_orig_get_type = _gc_utils.get_type
|
| 572 |
+
_orig_j2p = _gc_utils._json_schema_to_python_type
|
| 573 |
+
_orig_pub = _gc_utils.json_schema_to_python_type
|
| 574 |
+
|
| 575 |
+
def _get_type_safe(schema):
|
| 576 |
+
# Sometimes schema is a bare True/False (JSON Schema boolean form)
|
| 577 |
+
if not isinstance(schema, dict):
|
| 578 |
+
return "any"
|
| 579 |
+
return _orig_get_type(schema)
|
| 580 |
+
|
| 581 |
+
def _j2p_safe(schema, defs=None):
|
| 582 |
+
# Accept non-dict schemas (True/False/None) and treat as "any"
|
| 583 |
+
if not isinstance(schema, dict):
|
| 584 |
+
return "any"
|
| 585 |
+
return _orig_j2p(schema, defs or schema.get("$defs"))
|
| 586 |
+
|
| 587 |
+
def _pub_safe(schema):
|
| 588 |
+
# Public wrapper used by Gradio; keep it resilient too
|
| 589 |
+
if not isinstance(schema, dict):
|
| 590 |
+
return "any"
|
| 591 |
+
return _j2p_safe(schema, schema.get("$defs"))
|
| 592 |
+
|
| 593 |
+
_gc_utils.get_type = _get_type_safe
|
| 594 |
+
_gc_utils._json_schema_to_python_type = _j2p_safe
|
| 595 |
+
_gc_utils.json_schema_to_python_type = _pub_safe
|
| 596 |
+
|
| 597 |
+
except Exception as e:
|
| 598 |
+
print("gradio_client hotfix not applied:", e)
|
| 599 |
+
# -------------------------------------------------------------------------------
|
| 600 |
+
|
| 601 |
+
|
| 602 |
+
allow_nsfw_tags = False
|
| 603 |
+
def _is_production_runtime() -> bool:
|
| 604 |
+
"""Best-effort detection for deployed runtime (HF Spaces or explicit env)."""
|
| 605 |
+
if os.environ.get("PSQ_PRODUCTION", "").strip().lower() in {"1", "true", "yes"}:
|
| 606 |
+
return True
|
| 607 |
+
if os.environ.get("SPACE_ID"):
|
| 608 |
+
return True
|
| 609 |
+
if os.environ.get("HF_SPACE_ID"):
|
| 610 |
+
return True
|
| 611 |
+
if os.environ.get("SYSTEM") == "spaces":
|
| 612 |
+
return True
|
| 613 |
+
return False
|
| 614 |
+
|
| 615 |
+
|
| 616 |
+
verbose_retrieval_default = "0" if _is_production_runtime() else "1"
|
| 617 |
+
verbose_retrieval = os.environ.get("PSQ_VERBOSE_RETRIEVAL", verbose_retrieval_default).strip().lower() in {"1", "true", "yes"}
|
| 618 |
+
verbose_retrieval_all = False
|
| 619 |
+
verbose_retrieval_limit = 20
|
| 620 |
+
enable_probe_tags = os.environ.get("PSQ_ENABLE_PROBE", "1").strip() not in {"0", "false", "False"}
|
| 621 |
+
display_top_groups_default = int(os.environ.get("PSQ_DISPLAY_TOP_GROUPS", "10"))
|
| 622 |
+
display_top_tags_per_group_default = int(os.environ.get("PSQ_DISPLAY_TOP_TAGS_PER_GROUP", "5"))
|
| 623 |
+
display_rank_top_k_default = int(os.environ.get("PSQ_DISPLAY_GROUP_RANK_TOP_K", "5"))
|
| 624 |
+
display_max_rows_default = int(os.environ.get("PSQ_DISPLAY_MAX_ROWS", "14"))
|
| 625 |
+
retrieval_global_k = int(os.environ.get("PSQ_RETRIEVAL_GLOBAL_K", "300"))
|
| 626 |
+
retrieval_per_phrase_k = int(os.environ.get("PSQ_RETRIEVAL_PER_PHRASE_K", "10"))
|
| 627 |
+
retrieval_per_phrase_final_k = int(os.environ.get("PSQ_RETRIEVAL_PER_PHRASE_FINAL_K", "1"))
|
| 628 |
+
selection_mode = os.environ.get("PSQ_SELECTION_MODE", "chunked_map_union").strip()
|
| 629 |
+
selection_chunk_size = int(os.environ.get("PSQ_SELECTION_CHUNK_SIZE", "60"))
|
| 630 |
+
selection_per_phrase_k = int(os.environ.get("PSQ_SELECTION_PER_PHRASE_K", "2"))
|
| 631 |
+
selection_candidate_cap = int(os.environ.get("PSQ_SELECTION_CANDIDATE_CAP", "0"))
|
| 632 |
+
stage1_rewrite_timeout_s = float(os.environ.get("PSQ_TIMEOUT_REWRITE_S", "45"))
|
| 633 |
+
stage1_struct_timeout_s = float(os.environ.get("PSQ_TIMEOUT_STRUCT_S", "45"))
|
| 634 |
+
stage1_probe_timeout_s = float(os.environ.get("PSQ_TIMEOUT_PROBE_S", "45"))
|
| 635 |
+
stage3_select_timeout_s = float(os.environ.get("PSQ_TIMEOUT_SELECT_S", "45"))
|
| 636 |
+
timing_log_path = Path(os.environ.get("PSQ_TIMING_LOG_PATH", "data/runtime_metrics/ui_pipeline_timings.jsonl"))
|
| 637 |
+
|
| 638 |
+
css = """
|
| 639 |
+
.scrollable-content{
|
| 640 |
+
max-height: 420px;
|
| 641 |
+
overflow-y: scroll; /* always show scrollbar */
|
| 642 |
+
overflow-x: hidden;
|
| 643 |
+
padding-right: 8px;
|
| 644 |
+
padding-bottom: 14px; /* <— add this */
|
| 645 |
+
scrollbar-gutter: stable; /* prevent layout shift as it fills */
|
| 646 |
+
|
| 647 |
+
/* Firefox */
|
| 648 |
+
scrollbar-width: auto;
|
| 649 |
+
scrollbar-color: rgba(180,180,180,.9) rgba(0,0,0,.15);
|
| 650 |
+
}
|
| 651 |
+
|
| 652 |
+
/* WebKit/Chromium (Chrome/Edge/Safari) */
|
| 653 |
+
.scrollable-content::-webkit-scrollbar{ width: 10px; }
|
| 654 |
+
.scrollable-content::-webkit-scrollbar-thumb{ background: rgba(180,180,180,.9); border-radius: 8px; }
|
| 655 |
+
.scrollable-content::-webkit-scrollbar-track{ background: rgba(0,0,0,.15); }
|
| 656 |
+
|
| 657 |
+
/* (Optional) make both scroll panes taller so they fill more of the column */
|
| 658 |
+
.pane-left .scrollable-content,
|
| 659 |
+
.pane-right .scrollable-content {
|
| 660 |
+
max-height: 610px; /* was 420px; tweak to taste */
|
| 661 |
+
}
|
| 662 |
+
|
| 663 |
+
.lego-tags .gr-checkboxgroup,
|
| 664 |
+
.lego-tags .wrap {
|
| 665 |
+
display: flex !important;
|
| 666 |
+
flex-wrap: wrap !important;
|
| 667 |
+
gap: 10px !important;
|
| 668 |
+
}
|
| 669 |
|
| 670 |
+
.lego-tags label {
|
| 671 |
+
margin: 0 !important;
|
| 672 |
+
padding: 0 !important;
|
| 673 |
+
position: relative !important;
|
| 674 |
+
}
|
| 675 |
|
| 676 |
+
/* Hide native checkbox visuals completely */
|
| 677 |
+
.lego-tags input[type="checkbox"] {
|
| 678 |
+
appearance: none !important;
|
| 679 |
+
-webkit-appearance: none !important;
|
| 680 |
+
-moz-appearance: none !important;
|
| 681 |
+
position: absolute !important;
|
| 682 |
+
width: 1px !important;
|
| 683 |
+
height: 1px !important;
|
| 684 |
+
opacity: 0 !important;
|
| 685 |
+
pointer-events: none !important;
|
| 686 |
+
display: none !important;
|
| 687 |
+
}
|
| 688 |
|
| 689 |
+
/* Brick button skin (works for both +span and ~span structures) */
|
| 690 |
+
.lego-tags input[type="checkbox"] + span,
|
| 691 |
+
.lego-tags input[type="checkbox"] ~ span {
|
| 692 |
+
--on-bg1: #ffd166;
|
| 693 |
+
--on-bg2: #f39c4a;
|
| 694 |
+
--on-border: #b86e21;
|
| 695 |
+
--on-text: #2e1706;
|
| 696 |
+
position: relative !important;
|
| 697 |
+
display: inline-flex !important;
|
| 698 |
+
align-items: center !important;
|
| 699 |
+
min-height: 40px !important;
|
| 700 |
+
padding: 10px 15px 9px !important;
|
| 701 |
+
border: 2px solid #7d8897 !important;
|
| 702 |
+
border-radius: 10px !important;
|
| 703 |
+
background: linear-gradient(180deg, #e8ecf2 0%, #c7ced8 100%) !important;
|
| 704 |
+
color: #2d3440 !important;
|
| 705 |
+
font-size: 0.97rem !important;
|
| 706 |
+
font-weight: 800 !important;
|
| 707 |
+
line-height: 1.15 !important;
|
| 708 |
+
cursor: pointer !important;
|
| 709 |
+
user-select: none !important;
|
| 710 |
+
letter-spacing: 0.01em !important;
|
| 711 |
+
box-shadow: 0 4px 0 rgba(0,0,0,0.22), inset 0 1px 0 rgba(255,255,255,0.72) !important;
|
| 712 |
+
transition: transform 0.08s ease, box-shadow 0.08s ease, filter 0.08s ease !important;
|
| 713 |
+
}
|
| 714 |
|
| 715 |
+
.lego-tags input[type="checkbox"] + span::before,
|
| 716 |
+
.lego-tags input[type="checkbox"] ~ span::before {
|
| 717 |
+
content: "" !important;
|
| 718 |
+
position: absolute !important;
|
| 719 |
+
top: 5px !important;
|
| 720 |
+
left: 8px !important;
|
| 721 |
+
width: 8px !important;
|
| 722 |
+
height: 8px !important;
|
| 723 |
+
border-radius: 50% !important;
|
| 724 |
+
background: rgba(255,255,255,0.58) !important;
|
| 725 |
+
box-shadow: 22px 0 0 rgba(255,255,255,0.58) !important;
|
| 726 |
+
pointer-events: none !important;
|
| 727 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 728 |
|
| 729 |
+
/* Bright color cycle used only when selected */
|
| 730 |
+
.lego-tags label:nth-child(8n+1) span { --on-bg1: #ffd166; --on-bg2: #f39c4a; --on-border: #b86e21; --on-text: #2e1706; }
|
| 731 |
+
.lego-tags label:nth-child(8n+2) span { --on-bg1: #6ee7ff; --on-bg2: #1fb7ff; --on-border: #157cb3; --on-text: #07263c; }
|
| 732 |
+
.lego-tags label:nth-child(8n+3) span { --on-bg1: #9dff8f; --on-bg2: #45c96f; --on-border: #2a8b4b; --on-text: #0d2917; }
|
| 733 |
+
.lego-tags label:nth-child(8n+4) span { --on-bg1: #ff8fab; --on-bg2: #ff5c7a; --on-border: #b83956; --on-text: #3f0f1d; }
|
| 734 |
+
.lego-tags label:nth-child(8n+5) span { --on-bg1: #d0a8ff; --on-bg2: #a46cff; --on-border: #7147b3; --on-text: #25143f; }
|
| 735 |
+
.lego-tags label:nth-child(8n+6) span { --on-bg1: #ffe27a; --on-bg2: #f7bf39; --on-border: #ad7f1f; --on-text: #332407; }
|
| 736 |
+
.lego-tags label:nth-child(8n+7) span { --on-bg1: #8effd5; --on-bg2: #2ed6b5; --on-border: #1e947d; --on-text: #0d2a25; }
|
| 737 |
+
.lego-tags label:nth-child(8n+8) span { --on-bg1: #ffb47e; --on-bg2: #ff8753; --on-border: #b95b2d; --on-text: #391a0a; }
|
| 738 |
+
|
| 739 |
+
/* Source-driven selected colors (applies when tags are preselected by the pipeline). */
|
| 740 |
+
.lego-tags label[data-psq-preselected="1"][data-psq-origin="rewrite"] span {
|
| 741 |
+
--on-bg1: #77f0d7;
|
| 742 |
+
--on-bg2: #26b9a3;
|
| 743 |
+
--on-border: #187869;
|
| 744 |
+
--on-text: #062923;
|
| 745 |
+
}
|
| 746 |
+
.lego-tags label[data-psq-preselected="1"][data-psq-origin="selection"] span {
|
| 747 |
+
--on-bg1: #ffd98a;
|
| 748 |
+
--on-bg2: #f0a93c;
|
| 749 |
+
--on-border: #a66f1f;
|
| 750 |
+
--on-text: #382206;
|
| 751 |
+
}
|
| 752 |
+
.lego-tags label[data-psq-preselected="1"][data-psq-origin="probe"] span {
|
| 753 |
+
--on-bg1: #d8b4ff;
|
| 754 |
+
--on-bg2: #9a6cff;
|
| 755 |
+
--on-border: #6745b0;
|
| 756 |
+
--on-text: #24143b;
|
| 757 |
+
}
|
| 758 |
+
.lego-tags label[data-psq-preselected="1"][data-psq-origin="structural"] span {
|
| 759 |
+
--on-bg1: #a6f79a;
|
| 760 |
+
--on-bg2: #53c368;
|
| 761 |
+
--on-border: #2f8442;
|
| 762 |
+
--on-text: #102d17;
|
| 763 |
+
}
|
| 764 |
+
.lego-tags label[data-psq-preselected="1"][data-psq-origin="implied"] span {
|
| 765 |
+
--on-bg1: #d7dde8;
|
| 766 |
+
--on-bg2: #a8b3c4;
|
| 767 |
+
--on-border: #6f7e95;
|
| 768 |
+
--on-text: #1d2633;
|
| 769 |
}
|
| 770 |
|
| 771 |
+
/* User-selected tags (not initially selected by the pipeline). */
|
| 772 |
+
.lego-tags label[data-psq-preselected="0"] span {
|
| 773 |
+
--on-bg1: #9ec5ff;
|
| 774 |
+
--on-bg2: #4f86ff;
|
| 775 |
+
--on-border: #2f5fbf;
|
| 776 |
+
--on-text: #0b1f42;
|
| 777 |
+
}
|
| 778 |
|
| 779 |
+
.lego-tags label:hover span {
|
| 780 |
+
filter: brightness(1.02) !important;
|
| 781 |
+
transform: translateY(1px) !important;
|
|
|
|
| 782 |
}
|
| 783 |
|
| 784 |
+
/* ON state: brighter + visibly recessed */
|
| 785 |
+
.lego-tags input[type="checkbox"]:checked + span,
|
| 786 |
+
.lego-tags input[type="checkbox"]:checked ~ span,
|
| 787 |
+
.lego-tags label:has(input[type="checkbox"]:checked) span {
|
| 788 |
+
background: linear-gradient(180deg, var(--on-bg1) 0%, var(--on-bg2) 100%) !important;
|
| 789 |
+
color: var(--on-text) !important;
|
| 790 |
+
border-color: var(--on-border) !important;
|
| 791 |
+
filter: saturate(1.2) brightness(1.12) !important;
|
| 792 |
+
transform: translateY(-2px) !important;
|
| 793 |
+
box-shadow:
|
| 794 |
+
inset 0 3px 6px rgba(0,0,0,0.20),
|
| 795 |
+
inset 0 -1px 0 rgba(255,255,255,0.36),
|
| 796 |
+
0 6px 0 rgba(0,0,0,0.32) !important;
|
| 797 |
+
}
|
| 798 |
+
|
| 799 |
+
.source-legend {
|
| 800 |
display: flex;
|
| 801 |
flex-wrap: wrap;
|
| 802 |
gap: 8px;
|
| 803 |
+
margin: 4px 0 10px 0;
|
| 804 |
}
|
| 805 |
|
| 806 |
+
.source-legend .chip {
|
| 807 |
+
display: inline-flex;
|
| 808 |
+
align-items: center;
|
| 809 |
+
gap: 8px;
|
| 810 |
+
border-radius: 999px;
|
| 811 |
+
border: 1px solid #8792a2;
|
| 812 |
+
padding: 5px 10px;
|
| 813 |
+
font-size: 0.85rem;
|
| 814 |
+
font-weight: 700;
|
| 815 |
+
color: #1f2430;
|
| 816 |
+
background: #f3f6fb;
|
| 817 |
}
|
| 818 |
|
| 819 |
+
.source-legend .swatch {
|
| 820 |
+
width: 12px;
|
| 821 |
+
height: 12px;
|
| 822 |
+
border-radius: 50%;
|
| 823 |
+
border: 1px solid rgba(0,0,0,0.2);
|
| 824 |
}
|
| 825 |
|
| 826 |
+
.source-legend .rewrite { background: #26b9a3; }
|
| 827 |
+
.source-legend .selection { background: #f0a93c; }
|
| 828 |
+
.source-legend .probe { background: #9a6cff; }
|
| 829 |
+
.source-legend .structural { background: #53c368; }
|
| 830 |
+
.source-legend .implied { background: #a8b3c4; }
|
| 831 |
+
.source-legend .user { background: #4f86ff; }
|
| 832 |
+
.source-legend .unselected { background: #c7ced8; }
|
| 833 |
+
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 834 |
|
| 835 |
+
client_js = """
|
| 836 |
+
() => {
|
| 837 |
+
const markerRe = /\\s*\\[\\[psq:([a-z_]+):(0|1)\\]\\]\\s*$/;
|
| 838 |
+
const applyTagMeta = () => {
|
| 839 |
+
const labels = document.querySelectorAll(".lego-tags label");
|
| 840 |
+
labels.forEach((label) => {
|
| 841 |
+
const span = label.querySelector("span");
|
| 842 |
+
if (!span) return;
|
| 843 |
+
const text = span.textContent || "";
|
| 844 |
+
const match = text.match(markerRe);
|
| 845 |
+
if (!match) return;
|
| 846 |
+
label.dataset.psqOrigin = match[1];
|
| 847 |
+
label.dataset.psqPreselected = match[2];
|
| 848 |
+
span.textContent = text.replace(markerRe, "");
|
| 849 |
+
});
|
| 850 |
+
};
|
| 851 |
+
|
| 852 |
+
applyTagMeta();
|
| 853 |
+
const observer = new MutationObserver(() => applyTagMeta());
|
| 854 |
+
observer.observe(document.body, { childList: true, subtree: true, characterData: true });
|
| 855 |
}
|
| 856 |
"""
|
| 857 |
|
| 858 |
|
| 859 |
def rag_pipeline_ui(
|
| 860 |
+
user_prompt: str,
|
| 861 |
+
display_top_groups: float,
|
| 862 |
+
display_top_tags_per_group: float,
|
| 863 |
+
display_rank_top_k: float,
|
| 864 |
+
):
|
| 865 |
+
logs = []
|
| 866 |
+
def log(s): logs.append(s)
|
| 867 |
+
|
| 868 |
+
try:
|
| 869 |
+
stage_timings = {}
|
| 870 |
+
|
| 871 |
+
def _record_timing(stage: str, dt_s: float):
|
| 872 |
+
stage_timings[stage] = float(dt_s)
|
| 873 |
+
|
| 874 |
+
def _emit_timing_summary(total_s: float):
|
| 875 |
+
summary_order = [
|
| 876 |
+
"preprocess",
|
| 877 |
+
"rewrite",
|
| 878 |
+
"structural",
|
| 879 |
+
"probe",
|
| 880 |
+
"retrieval",
|
| 881 |
+
"selection",
|
| 882 |
+
"implication_expansion",
|
| 883 |
+
"prompt_composition",
|
| 884 |
+
"group_display",
|
| 885 |
+
]
|
| 886 |
+
lines = []
|
| 887 |
+
for k in summary_order:
|
| 888 |
+
if k in stage_timings:
|
| 889 |
+
lines.append(f"{k}={stage_timings[k]:.2f}s")
|
| 890 |
+
slowest = max(stage_timings.items(), key=lambda kv: kv[1])[0] if stage_timings else "n/a"
|
| 891 |
+
log("Timing Summary: " + ", ".join(lines))
|
| 892 |
+
log(f"Timing Slowest Stage: {slowest}")
|
| 893 |
+
log(f"Timing Total: {total_s:.2f}s")
|
| 894 |
+
|
| 895 |
+
def _append_timing_jsonl(total_s: float):
|
| 896 |
+
try:
|
| 897 |
+
timing_log_path.parent.mkdir(parents=True, exist_ok=True)
|
| 898 |
+
rec = {
|
| 899 |
+
"timestamp_utc": datetime.utcnow().isoformat(timespec="seconds") + "Z",
|
| 900 |
+
"stages_s": stage_timings,
|
| 901 |
+
"total_s": float(total_s),
|
| 902 |
+
"config": {
|
| 903 |
+
"timeout_rewrite_s": stage1_rewrite_timeout_s,
|
| 904 |
+
"timeout_struct_s": stage1_struct_timeout_s,
|
| 905 |
+
"timeout_probe_s": stage1_probe_timeout_s,
|
| 906 |
+
"timeout_select_s": stage3_select_timeout_s,
|
| 907 |
+
},
|
| 908 |
+
}
|
| 909 |
+
with timing_log_path.open("a", encoding="utf-8") as f:
|
| 910 |
+
f.write(json.dumps(rec, ensure_ascii=True) + "\n")
|
| 911 |
+
log(f"Timing Log: wrote {timing_log_path}")
|
| 912 |
+
except Exception as e:
|
| 913 |
+
log(f"Timing Log: failed ({type(e).__name__}: {e})")
|
| 914 |
+
|
| 915 |
+
def _future_with_timeout(fut, timeout_s: float, stage_name: str, fallback):
|
| 916 |
+
t0 = time.perf_counter()
|
| 917 |
+
try:
|
| 918 |
+
out = fut.result(timeout=max(1.0, float(timeout_s)))
|
| 919 |
+
dt = time.perf_counter() - t0
|
| 920 |
+
log(f"{stage_name}: {dt:.2f}s")
|
| 921 |
+
stage_key = {
|
| 922 |
+
"Rewrite": "rewrite",
|
| 923 |
+
"Structural inference": "structural",
|
| 924 |
+
"Probe inference": "probe",
|
| 925 |
+
"Index selection": "selection",
|
| 926 |
+
}.get(stage_name)
|
| 927 |
+
if stage_key:
|
| 928 |
+
_record_timing(stage_key, dt)
|
| 929 |
+
return out
|
| 930 |
+
except FutureTimeoutError:
|
| 931 |
+
fut.cancel()
|
| 932 |
+
log(f"{stage_name}: timed out after {timeout_s:.0f}s; using fallback")
|
| 933 |
+
return fallback
|
| 934 |
+
except Exception as e:
|
| 935 |
+
log(f"{stage_name}: failed ({type(e).__name__}: {e}); using fallback")
|
| 936 |
+
return fallback
|
| 937 |
+
|
| 938 |
+
t_total0 = time.perf_counter()
|
| 939 |
+
log("Start: received prompt")
|
| 940 |
+
prompt_in = (user_prompt or "").strip()
|
| 941 |
+
if not prompt_in:
|
| 942 |
+
return _build_ui_payload(
|
| 943 |
+
console_text="Error: empty prompt",
|
| 944 |
+
legacy_prompt_text="",
|
| 945 |
+
row_defs=[],
|
| 946 |
+
selected_tags=[],
|
| 947 |
+
)
|
| 948 |
+
|
| 949 |
+
log("Input:")
|
| 950 |
+
log(prompt_in)
|
| 951 |
+
log("")
|
| 952 |
+
log(
|
| 953 |
+
"Runtime config: "
|
| 954 |
+
f"retrieval_global_k={retrieval_global_k} "
|
| 955 |
+
f"retrieval_per_phrase_k={retrieval_per_phrase_k} "
|
| 956 |
+
f"retrieval_per_phrase_final_k={retrieval_per_phrase_final_k} "
|
| 957 |
+
f"selection_mode={selection_mode} "
|
| 958 |
+
f"selection_chunk_size={selection_chunk_size} "
|
| 959 |
+
f"selection_per_phrase_k={selection_per_phrase_k}"
|
| 960 |
+
)
|
| 961 |
+
log("")
|
| 962 |
+
|
| 963 |
+
t0 = time.perf_counter()
|
| 964 |
+
user_tags = extract_user_provided_tags_upto_3_words(prompt_in)
|
| 965 |
+
dt = time.perf_counter()-t0
|
| 966 |
+
_record_timing("preprocess", dt)
|
| 967 |
+
log(f"Preprocess (user tag extraction): {dt:.2f}s")
|
| 968 |
+
log("Heuristically extracted user tags:")
|
| 969 |
+
if user_tags:
|
| 970 |
+
log(", ".join(user_tags))
|
| 971 |
+
else:
|
| 972 |
+
log("(none)")
|
| 973 |
+
log("")
|
| 974 |
+
|
| 975 |
+
log("Step 1: LLM rewrite + structural inference + probe (concurrent)")
|
| 976 |
+
max_workers = 3 if enable_probe_tags else 2
|
| 977 |
+
with ThreadPoolExecutor(max_workers=max_workers) as ex:
|
| 978 |
+
fut_rewrite = ex.submit(llm_rewrite_prompt, prompt_in, log)
|
| 979 |
+
fut_struct = ex.submit(llm_infer_structural_tags, prompt_in, log=log)
|
| 980 |
+
fut_probe = ex.submit(llm_infer_probe_tags, prompt_in, log=log) if enable_probe_tags else None
|
| 981 |
+
|
| 982 |
+
rewritten = _future_with_timeout(
|
| 983 |
+
fut_rewrite, stage1_rewrite_timeout_s, "Rewrite", prompt_in
|
| 984 |
+
)
|
| 985 |
+
structural_tags = _future_with_timeout(
|
| 986 |
+
fut_struct, stage1_struct_timeout_s, "Structural inference", []
|
| 987 |
+
)
|
| 988 |
+
probe_tags = (
|
| 989 |
+
_future_with_timeout(fut_probe, stage1_probe_timeout_s, "Probe inference", [])
|
| 990 |
+
if fut_probe else []
|
| 991 |
+
)
|
| 992 |
+
|
| 993 |
+
log("Rewrite:")
|
| 994 |
+
log(rewritten if rewritten else "(empty)")
|
| 995 |
+
log("")
|
| 996 |
+
|
| 997 |
+
rewrite_for_retrieval = rewritten
|
| 998 |
+
if user_tags:
|
| 999 |
+
# keep them separate in logs, but allow them to help retrieval
|
| 1000 |
+
rewrite_for_retrieval = (rewrite_for_retrieval + ", " + ", ".join(user_tags)).strip(", ").strip()
|
| 1001 |
+
|
| 1002 |
+
|
| 1003 |
+
log("Step 2: Prompt Squirrel retrieval (hidden)")
|
| 1004 |
+
try:
|
| 1005 |
+
t0 = time.perf_counter()
|
| 1006 |
+
retrieval_context_tags = list(dict.fromkeys((structural_tags or []) + (probe_tags or [])))
|
| 1007 |
+
rewrite_phrases = [p.strip() for p in (rewrite_for_retrieval or "").split(",") if p.strip()]
|
| 1008 |
+
retrieval_result = psq_candidates_from_rewrite_phrases(
|
| 1009 |
+
rewrite_phrases=rewrite_phrases,
|
| 1010 |
+
allow_nsfw_tags=allow_nsfw_tags,
|
| 1011 |
+
context_tags=retrieval_context_tags,
|
| 1012 |
+
global_k=max(1, retrieval_global_k),
|
| 1013 |
+
per_phrase_k=max(1, retrieval_per_phrase_k),
|
| 1014 |
+
per_phrase_final_k=max(1, retrieval_per_phrase_final_k),
|
| 1015 |
+
verbose=verbose_retrieval,
|
| 1016 |
+
)
|
| 1017 |
+
if isinstance(retrieval_result, tuple):
|
| 1018 |
+
candidates, phrase_reports = retrieval_result
|
| 1019 |
+
else:
|
| 1020 |
+
candidates, phrase_reports = retrieval_result, []
|
| 1021 |
+
if selection_candidate_cap > 0 and len(candidates) > selection_candidate_cap:
|
| 1022 |
+
candidates = candidates[:selection_candidate_cap]
|
| 1023 |
+
log(f"Selection candidate cap applied: {selection_candidate_cap}")
|
| 1024 |
+
dt = time.perf_counter()-t0
|
| 1025 |
+
_record_timing("retrieval", dt)
|
| 1026 |
+
log(f"Retrieval: {dt:.2f}s")
|
| 1027 |
+
log(f"Retrieved {len(candidates)} candidate tags")
|
| 1028 |
+
if verbose_retrieval:
|
| 1029 |
+
log(f"Total unique candidates: {len(candidates)}")
|
| 1030 |
+
limit = None if verbose_retrieval_all else max(1, int(verbose_retrieval_limit))
|
| 1031 |
+
for report in phrase_reports:
|
| 1032 |
+
phrase = report.get("normalized") or report.get("phrase") or ""
|
| 1033 |
+
lookup = report.get("lookup") or ""
|
| 1034 |
+
tfidf_vocab = report.get("tfidf_vocab")
|
| 1035 |
+
log(f"Phrase: {phrase} (lookup={lookup}) tfidf_vocab={tfidf_vocab}")
|
| 1036 |
+
rows = report.get("candidates", [])
|
| 1037 |
+
shown = rows if limit is None else rows[:limit]
|
| 1038 |
+
for row in shown:
|
| 1039 |
+
tag = row.get("tag")
|
| 1040 |
+
alias_token = row.get("alias_token")
|
| 1041 |
+
score_fasttext = row.get("score_fasttext")
|
| 1042 |
+
score_context = row.get("score_context")
|
| 1043 |
+
score_combined = row.get("score_combined")
|
| 1044 |
+
count = row.get("count")
|
| 1045 |
+
alias_part = ""
|
| 1046 |
+
if alias_token and alias_token != tag:
|
| 1047 |
+
alias_part = f" [alias_token={alias_token}]"
|
| 1048 |
+
fasttext_str = (
|
| 1049 |
+
f"{score_fasttext:.3f}" if isinstance(score_fasttext, (int, float)) else score_fasttext
|
| 1050 |
+
)
|
| 1051 |
+
if score_context is None:
|
| 1052 |
+
context_str = "None"
|
| 1053 |
+
else:
|
| 1054 |
+
context_str = (
|
| 1055 |
+
f"{score_context:.3f}" if isinstance(score_context, (int, float)) else score_context
|
| 1056 |
+
)
|
| 1057 |
+
combined_str = (
|
| 1058 |
+
f"{score_combined:.3f}" if isinstance(score_combined, (int, float)) else score_combined
|
| 1059 |
+
)
|
| 1060 |
+
log(
|
| 1061 |
+
f" {tag}{alias_part} | fasttext={fasttext_str} context={context_str} "
|
| 1062 |
+
f"combined={combined_str} count={count}"
|
| 1063 |
+
)
|
| 1064 |
+
if limit is not None and len(rows) > limit:
|
| 1065 |
+
log(f" ... ({len(rows) - limit} more)")
|
| 1066 |
+
except Exception as e:
|
| 1067 |
+
log(f"Retrieval fallback: {type(e).__name__}: {e}")
|
| 1068 |
+
candidates = []
|
| 1069 |
+
|
| 1070 |
+
log("Step 3: LLM index selection (uses rewrite + structural/probe context)")
|
| 1071 |
+
selection_query = _build_selection_query(
|
| 1072 |
+
prompt_in=prompt_in,
|
| 1073 |
+
rewritten=rewritten,
|
| 1074 |
+
structural_tags=structural_tags,
|
| 1075 |
+
probe_tags=probe_tags,
|
| 1076 |
+
)
|
| 1077 |
+
with ThreadPoolExecutor(max_workers=1) as ex:
|
| 1078 |
+
fut_sel = ex.submit(
|
| 1079 |
+
llm_select_indices,
|
| 1080 |
+
query_text=selection_query,
|
| 1081 |
+
candidates=candidates,
|
| 1082 |
+
max_pick=0,
|
| 1083 |
+
log=log,
|
| 1084 |
+
mode=selection_mode,
|
| 1085 |
+
chunk_size=max(1, selection_chunk_size),
|
| 1086 |
+
per_phrase_k=max(1, selection_per_phrase_k),
|
| 1087 |
+
)
|
| 1088 |
+
picked_indices = _future_with_timeout(
|
| 1089 |
+
fut_sel, stage3_select_timeout_s, "Index selection", []
|
| 1090 |
+
)
|
| 1091 |
+
|
| 1092 |
+
selection_selected_tags = [candidates[i].tag for i in picked_indices] if picked_indices else []
|
| 1093 |
+
selected_tags = list(selection_selected_tags)
|
| 1094 |
+
|
| 1095 |
+
if structural_tags:
|
| 1096 |
+
# Add structural tags that aren't already selected
|
| 1097 |
+
existing = {t for t in selected_tags}
|
| 1098 |
+
new_structural = [t for t in structural_tags if t not in existing]
|
| 1099 |
+
selected_tags.extend(new_structural)
|
| 1100 |
+
log(f" Added {len(new_structural)} structural tags: {', '.join(new_structural)}")
|
| 1101 |
+
else:
|
| 1102 |
+
log(" No structural tags inferred")
|
| 1103 |
+
|
| 1104 |
+
if probe_tags:
|
| 1105 |
+
existing = {t for t in selected_tags}
|
| 1106 |
+
new_probe = [t for t in probe_tags if t not in existing]
|
| 1107 |
+
selected_tags.extend(new_probe)
|
| 1108 |
+
log(f" Added {len(new_probe)} probe tags: {', '.join(new_probe)}")
|
| 1109 |
+
elif enable_probe_tags:
|
| 1110 |
+
log(" No probe tags inferred")
|
| 1111 |
+
|
| 1112 |
+
selected_tags, removed_excluded_direct = _filter_excluded_recommendation_tags(selected_tags)
|
| 1113 |
+
if removed_excluded_direct:
|
| 1114 |
+
log(f" Removed {len(removed_excluded_direct)} excluded tags: {', '.join(removed_excluded_direct)}")
|
| 1115 |
+
|
| 1116 |
+
direct_selected_tags = list(dict.fromkeys(selected_tags))
|
| 1117 |
+
|
| 1118 |
+
log("Step 3c: Expand via tag implications")
|
| 1119 |
+
t0 = time.perf_counter()
|
| 1120 |
+
tag_set = set(selected_tags)
|
| 1121 |
+
expanded, implied_only = expand_tags_via_implications(tag_set)
|
| 1122 |
+
dt = time.perf_counter()-t0
|
| 1123 |
+
_record_timing("implication_expansion", dt)
|
| 1124 |
+
log(f"Implication expansion: {dt:.2f}s")
|
| 1125 |
+
implied_selected_tags = sorted(implied_only) if implied_only else []
|
| 1126 |
+
if implied_only:
|
| 1127 |
+
selected_tags.extend(sorted(implied_only))
|
| 1128 |
+
log(f" Added {len(implied_only)} implied tags: {', '.join(sorted(implied_only))}")
|
| 1129 |
+
else:
|
| 1130 |
+
log(" No additional implied tags")
|
| 1131 |
+
|
| 1132 |
+
selected_tags, removed_excluded_implied = _filter_excluded_recommendation_tags(selected_tags)
|
| 1133 |
+
implied_selected_tags = [
|
| 1134 |
+
t for t in implied_selected_tags if not _is_excluded_recommendation_tag(t)
|
| 1135 |
+
]
|
| 1136 |
+
if removed_excluded_implied:
|
| 1137 |
+
log(
|
| 1138 |
+
f" Removed {len(removed_excluded_implied)} excluded tags after implications: "
|
| 1139 |
+
f"{', '.join(removed_excluded_implied)}"
|
| 1140 |
+
)
|
| 1141 |
+
|
| 1142 |
+
log("Step 4: Compose final prompt")
|
| 1143 |
+
t0 = time.perf_counter()
|
| 1144 |
+
final_prompt = compose_final_prompt(rewritten, selected_tags)
|
| 1145 |
+
dt = time.perf_counter()-t0
|
| 1146 |
+
_record_timing("prompt_composition", dt)
|
| 1147 |
+
log(f"Prompt composition: {dt:.2f}s")
|
| 1148 |
+
|
| 1149 |
log("Step 5: Build ranked group/category display")
|
| 1150 |
t0 = time.perf_counter()
|
| 1151 |
seed_terms = []
|
| 1152 |
seed_terms.extend(user_tags)
|
| 1153 |
seed_terms.extend([p.strip() for p in (rewritten or "").split(",") if p.strip()])
|
| 1154 |
+
seed_terms.extend(structural_tags or [])
|
| 1155 |
+
seed_terms.extend(probe_tags or [])
|
| 1156 |
+
seed_terms.extend(selected_tags)
|
| 1157 |
+
seed_terms = list(dict.fromkeys(seed_terms))
|
| 1158 |
+
|
| 1159 |
+
active_selected_tags = list(dict.fromkeys(selected_tags))
|
| 1160 |
+
structural_set = {_norm_tag_for_lookup(t) for t in (structural_tags or []) if t}
|
| 1161 |
+
probe_set = {_norm_tag_for_lookup(t) for t in (probe_tags or []) if t}
|
| 1162 |
+
implied_set = {_norm_tag_for_lookup(t) for t in (implied_selected_tags or []) if t}
|
| 1163 |
+
rewrite_set = {
|
| 1164 |
+
_norm_tag_for_lookup(t)
|
| 1165 |
+
for t in (list(user_tags or []) + [p.strip() for p in (rewritten or "").split(",") if p.strip()])
|
| 1166 |
+
if t
|
| 1167 |
+
}
|
| 1168 |
+
selection_set = {_norm_tag_for_lookup(t) for t in (selection_selected_tags or []) if t}
|
| 1169 |
+
tag_selection_origins: Dict[str, str] = {}
|
| 1170 |
+
for tag in active_selected_tags:
|
| 1171 |
+
tag_norm = _norm_tag_for_lookup(tag)
|
| 1172 |
+
if tag_norm in structural_set:
|
| 1173 |
+
origin = "structural"
|
| 1174 |
+
elif tag_norm in probe_set:
|
| 1175 |
+
origin = "probe"
|
| 1176 |
+
elif tag_norm in rewrite_set:
|
| 1177 |
+
origin = "rewrite"
|
| 1178 |
+
elif tag_norm in selection_set:
|
| 1179 |
+
origin = "selection"
|
| 1180 |
+
elif tag_norm in implied_set:
|
| 1181 |
+
origin = "implied"
|
| 1182 |
+
else:
|
| 1183 |
+
# Unknown/fallback tags use selection color.
|
| 1184 |
+
origin = "selection"
|
| 1185 |
+
tag_selection_origins[tag] = origin
|
| 1186 |
+
if tag_norm and tag_norm != tag:
|
| 1187 |
+
tag_selection_origins[tag_norm] = origin
|
| 1188 |
+
|
| 1189 |
+
direct_tags_for_implied = list(
|
| 1190 |
+
dict.fromkeys(_norm_tag_for_lookup(t) for t in (direct_selected_tags or []) if t)
|
| 1191 |
+
)
|
| 1192 |
+
direct_tags_for_implied_idx = {t: i for i, t in enumerate(direct_tags_for_implied)}
|
| 1193 |
+
direct_tags_for_implied.sort(
|
| 1194 |
+
key=lambda t: (
|
| 1195 |
+
_selection_source_rank(tag_selection_origins.get(t, "selection")),
|
| 1196 |
+
direct_tags_for_implied_idx.get(t, 10**9),
|
| 1197 |
+
)
|
| 1198 |
+
)
|
| 1199 |
+
implied_parent_map = _build_implied_parent_map(
|
| 1200 |
+
direct_tags_ordered=direct_tags_for_implied,
|
| 1201 |
+
implied_tags=implied_selected_tags,
|
| 1202 |
+
)
|
| 1203 |
|
| 1204 |
toggle_rows = _build_toggle_rows(
|
| 1205 |
seed_terms=seed_terms,
|
| 1206 |
+
selected_tags=active_selected_tags,
|
| 1207 |
+
tag_selection_origins=tag_selection_origins,
|
| 1208 |
+
implied_parent_map=implied_parent_map,
|
| 1209 |
top_groups=max(1, int(display_top_groups)),
|
| 1210 |
top_tags_per_group=max(1, int(display_top_tags_per_group)),
|
| 1211 |
group_rank_top_k=max(1, int(display_rank_top_k)),
|
| 1212 |
)
|
| 1213 |
+
dt = time.perf_counter()-t0
|
| 1214 |
+
_record_timing("group_display", dt)
|
| 1215 |
+
log(f"Ranked group display: {dt:.2f}s ({len(toggle_rows)} rows)")
|
| 1216 |
+
log(
|
| 1217 |
+
_build_display_audit_line(
|
| 1218 |
+
toggle_rows,
|
| 1219 |
+
active_selected_tags=active_selected_tags,
|
| 1220 |
+
direct_selected_tags=direct_selected_tags,
|
| 1221 |
+
implied_selected_tags=implied_selected_tags,
|
| 1222 |
+
)
|
| 1223 |
+
)
|
| 1224 |
+
|
| 1225 |
+
total_dt = time.perf_counter()-t_total0
|
| 1226 |
+
_emit_timing_summary(total_dt)
|
| 1227 |
+
_append_timing_jsonl(total_dt)
|
| 1228 |
+
log("Done: final prompt ready")
|
| 1229 |
+
return _build_ui_payload(
|
| 1230 |
+
console_text="\n".join(logs),
|
| 1231 |
+
legacy_prompt_text=final_prompt,
|
| 1232 |
+
row_defs=toggle_rows,
|
| 1233 |
+
selected_tags=active_selected_tags,
|
| 1234 |
+
)
|
| 1235 |
+
|
| 1236 |
+
except Exception as e:
|
| 1237 |
+
log(f"Error: {type(e).__name__}: {e}")
|
| 1238 |
+
return _build_ui_payload(
|
| 1239 |
+
console_text="\n".join(logs),
|
| 1240 |
+
legacy_prompt_text="",
|
| 1241 |
+
row_defs=[],
|
| 1242 |
+
selected_tags=[],
|
| 1243 |
+
)
|
| 1244 |
+
|
| 1245 |
+
|
| 1246 |
+
|
| 1247 |
+
with gr.Blocks(css=css, js=client_js) as app:
|
| 1248 |
+
with gr.Row():
|
| 1249 |
+
with gr.Column(scale=3, elem_classes=["prompt-col"]):
|
| 1250 |
+
image_tags = gr.Textbox(
|
| 1251 |
+
label="Enter Prompt",
|
| 1252 |
+
placeholder="e.g. fox, outside, detailed background, .",
|
| 1253 |
+
lines=1
|
| 1254 |
+
)
|
| 1255 |
+
with gr.Column(scale=1):
|
| 1256 |
+
_mascot_pil = _load_mascot_image()
|
| 1257 |
+
if _mascot_pil is not None:
|
| 1258 |
+
mascot_img = gr.Image(
|
| 1259 |
+
value=_mascot_pil,
|
| 1260 |
+
show_label=False,
|
| 1261 |
+
interactive=False,
|
| 1262 |
+
height=220,
|
| 1263 |
+
elem_id="mascot"
|
| 1264 |
+
)
|
| 1265 |
+
else:
|
| 1266 |
+
mascot_img = gr.Markdown("`(mascot image unavailable)`")
|
| 1267 |
+
submit_button = gr.Button("Run", variant="primary")
|
| 1268 |
+
|
| 1269 |
+
gr.Markdown(
|
| 1270 |
+
"""
|
| 1271 |
+
### Prompt Squirrel RAG (pipeline version)
|
| 1272 |
+
|
| 1273 |
+
Type a rough prompt. This tool rewrites it and aligns it to an e621-style tag vocabulary using Prompt Squirrel internally,
|
| 1274 |
+
then returns a cleaned, model-friendly prompt.
|
| 1275 |
+
""".strip()
|
| 1276 |
+
)
|
| 1277 |
+
|
| 1278 |
+
console = gr.Textbox(
|
| 1279 |
+
label="Console",
|
| 1280 |
+
lines=10,
|
| 1281 |
+
interactive=False,
|
| 1282 |
+
placeholder="Progress logs will appear here."
|
| 1283 |
+
)
|
| 1284 |
+
|
| 1285 |
+
suggested_prompt = gr.Textbox(
|
| 1286 |
+
label="Suggested Prompt (From Toggled Tags)",
|
| 1287 |
+
lines=3,
|
| 1288 |
+
interactive=False,
|
| 1289 |
+
show_copy_button=True,
|
| 1290 |
+
placeholder="Comma-separated tags selected in the rows below."
|
| 1291 |
+
)
|
| 1292 |
+
|
| 1293 |
+
with gr.Accordion("Legacy Pipeline Prompt (for reference)", open=False):
|
| 1294 |
+
legacy_final_prompt = gr.Textbox(
|
| 1295 |
+
label="Legacy Final Prompt",
|
| 1296 |
+
lines=3,
|
| 1297 |
+
interactive=False,
|
| 1298 |
+
show_copy_button=True,
|
| 1299 |
+
)
|
| 1300 |
+
|
| 1301 |
selected_tags_state = gr.State([])
|
| 1302 |
row_defs_state = gr.State([])
|
| 1303 |
row_values_state = gr.State([])
|
| 1304 |
|
| 1305 |
gr.Markdown("### Toggle Tag Rows")
|
| 1306 |
+
gr.HTML(
|
| 1307 |
+
"""
|
| 1308 |
+
<div class="source-legend">
|
| 1309 |
+
<span class="chip"><span class="swatch rewrite"></span>Rewrite phrase</span>
|
| 1310 |
+
<span class="chip"><span class="swatch selection"></span>General selection</span>
|
| 1311 |
+
<span class="chip"><span class="swatch probe"></span>Probe query</span>
|
| 1312 |
+
<span class="chip"><span class="swatch structural"></span>Structural query</span>
|
| 1313 |
+
<span class="chip"><span class="swatch implied"></span>Implied</span>
|
| 1314 |
+
<span class="chip"><span class="swatch user"></span>User-toggled</span>
|
| 1315 |
+
<span class="chip"><span class="swatch unselected"></span>Unselected</span>
|
| 1316 |
+
</div>
|
| 1317 |
+
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1318 |
)
|
| 1319 |
+
gr.Markdown(
|
| 1320 |
+
"Rows are ranked by expected tag count (E). Within each row: structural -> probe -> selected, "
|
| 1321 |
+
"implied tags follow their triggering selected tag when possible, then unselected tags in confidence order."
|
|
|
|
|
|
|
| 1322 |
)
|
| 1323 |
+
row_headers: List[gr.Markdown] = []
|
| 1324 |
+
row_checkboxes: List[gr.CheckboxGroup] = []
|
| 1325 |
+
for _ in range(display_max_rows_default):
|
| 1326 |
+
row_headers.append(gr.Markdown(value="", visible=False))
|
| 1327 |
+
row_checkboxes.append(
|
| 1328 |
+
gr.CheckboxGroup(
|
| 1329 |
+
choices=[],
|
| 1330 |
+
value=[],
|
| 1331 |
+
visible=False,
|
| 1332 |
+
interactive=True,
|
| 1333 |
+
container=False,
|
| 1334 |
+
elem_classes=["lego-tags"],
|
| 1335 |
+
)
|
| 1336 |
+
)
|
| 1337 |
+
|
| 1338 |
+
gr.Markdown(
|
| 1339 |
+
"Toggling a tag in any row toggles it everywhere else that tag appears."
|
| 1340 |
+
)
|
| 1341 |
+
|
| 1342 |
+
with gr.Accordion("Display Settings", open=False):
|
| 1343 |
+
with gr.Row():
|
| 1344 |
+
display_top_groups = gr.Number(
|
| 1345 |
+
value=display_top_groups_default,
|
| 1346 |
+
precision=0,
|
| 1347 |
+
label="Rows (Top Groups/Categories)",
|
| 1348 |
+
minimum=1,
|
| 1349 |
+
)
|
| 1350 |
+
display_top_tags_per_group = gr.Number(
|
| 1351 |
+
value=display_top_tags_per_group_default,
|
| 1352 |
+
precision=0,
|
| 1353 |
+
label="Top Tags Shown Per Row",
|
| 1354 |
+
minimum=1,
|
| 1355 |
+
)
|
| 1356 |
+
display_rank_top_k = gr.Number(
|
| 1357 |
+
value=display_rank_top_k_default,
|
| 1358 |
+
precision=0,
|
| 1359 |
+
label="Top Tags Used for Row Ranking",
|
| 1360 |
+
minimum=1,
|
| 1361 |
+
)
|
| 1362 |
+
|
| 1363 |
+
run_outputs = [
|
| 1364 |
+
console,
|
| 1365 |
+
legacy_final_prompt,
|
| 1366 |
+
suggested_prompt,
|
| 1367 |
+
selected_tags_state,
|
| 1368 |
+
row_defs_state,
|
| 1369 |
+
row_values_state,
|
| 1370 |
+
*row_headers,
|
| 1371 |
+
*row_checkboxes,
|
| 1372 |
+
]
|
| 1373 |
+
|
| 1374 |
+
submit_button.click(
|
| 1375 |
+
rag_pipeline_ui,
|
| 1376 |
+
inputs=[image_tags, display_top_groups, display_top_tags_per_group, display_rank_top_k],
|
| 1377 |
+
outputs=run_outputs
|
| 1378 |
+
)
|
| 1379 |
+
|
| 1380 |
+
image_tags.submit(
|
| 1381 |
+
rag_pipeline_ui,
|
| 1382 |
+
inputs=[image_tags, display_top_groups, display_top_tags_per_group, display_rank_top_k],
|
| 1383 |
+
outputs=run_outputs
|
| 1384 |
+
)
|
| 1385 |
+
|
| 1386 |
for idx, row_cb in enumerate(row_checkboxes):
|
| 1387 |
row_cb.change(
|
| 1388 |
fn=lambda changed_values, selected_state, row_defs, row_values, i=idx: _on_toggle_row(
|
| 1389 |
i,
|
| 1390 |
changed_values,
|
| 1391 |
+
selected_state,
|
| 1392 |
+
row_defs,
|
| 1393 |
+
row_values,
|
| 1394 |
+
display_max_rows_default,
|
| 1395 |
),
|
| 1396 |
inputs=[row_cb, selected_tags_state, row_defs_state, row_values_state],
|
| 1397 |
outputs=[selected_tags_state, row_values_state, suggested_prompt, *row_checkboxes],
|
| 1398 |
+
queue=False,
|
| 1399 |
+
show_progress="hidden",
|
| 1400 |
)
|
| 1401 |
+
|
| 1402 |
+
if __name__ == "__main__":
|
| 1403 |
+
app.queue().launch(allowed_paths=[str(MASCOT_DIR)])
|
data/analysis/category_registry.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/analysis/hybrid_category_assignment_preview.json
ADDED
|
@@ -0,0 +1,2753 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"config": {
|
| 3 |
+
"tfidf_weight": 0.6,
|
| 4 |
+
"wiki_weight": 0.4,
|
| 5 |
+
"tfidf_temp": 0.08,
|
| 6 |
+
"single_top1_min": 0.55,
|
| 7 |
+
"single_margin_min": 0.18,
|
| 8 |
+
"single_top2_max": 0.35,
|
| 9 |
+
"multi_top1_min": 0.42,
|
| 10 |
+
"multi_top2_min": 0.3,
|
| 11 |
+
"multi_pair_min": 0.78,
|
| 12 |
+
"sample_size": 20,
|
| 13 |
+
"seed": 42
|
| 14 |
+
},
|
| 15 |
+
"inputs": {
|
| 16 |
+
"registry_csv": "E:\\image\\backup\\Prompt_Squirrel_RAG\\data\\analysis\\category_registry.csv",
|
| 17 |
+
"wiki_pages_csv": "E:\\image\\backup\\Prompt_Squirrel_RAG\\wiki_pages-2023-08-08.csv",
|
| 18 |
+
"uncategorized_tags": 6261,
|
| 19 |
+
"active_categories_for_centroids": 19,
|
| 20 |
+
"centroid_seed_sizes": {
|
| 21 |
+
"anatomy_features": 407,
|
| 22 |
+
"background_composition": 84,
|
| 23 |
+
"body_decor": 9,
|
| 24 |
+
"body_type": 7,
|
| 25 |
+
"clothing_detail": 418,
|
| 26 |
+
"color_markings": 234,
|
| 27 |
+
"count": 5,
|
| 28 |
+
"expression_detail": 31,
|
| 29 |
+
"franchise_series": 86,
|
| 30 |
+
"gaze_detail": 22,
|
| 31 |
+
"gender": 3,
|
| 32 |
+
"objects_props": 264,
|
| 33 |
+
"organization": 8,
|
| 34 |
+
"perspective": 6,
|
| 35 |
+
"pose_action_detail": 100,
|
| 36 |
+
"resolution": 3,
|
| 37 |
+
"species": 13,
|
| 38 |
+
"style": 9,
|
| 39 |
+
"text": 4
|
| 40 |
+
}
|
| 41 |
+
},
|
| 42 |
+
"summary": {
|
| 43 |
+
"counts": {
|
| 44 |
+
"uncategorized_total": 6261,
|
| 45 |
+
"scored_rows": 6261,
|
| 46 |
+
"has_tfidf_vector": 5089,
|
| 47 |
+
"has_wiki_page": 4368,
|
| 48 |
+
"has_wiki_category_votes": 1957,
|
| 49 |
+
"signals": {
|
| 50 |
+
"tfidf_only": 3176,
|
| 51 |
+
"both": 1913,
|
| 52 |
+
"none": 1128,
|
| 53 |
+
"wiki_only": 44
|
| 54 |
+
},
|
| 55 |
+
"assignments": {
|
| 56 |
+
"hold": 5997,
|
| 57 |
+
"multi": 31,
|
| 58 |
+
"single": 233
|
| 59 |
+
},
|
| 60 |
+
"newly_categorized": 264,
|
| 61 |
+
"remaining_uncategorized": 5997,
|
| 62 |
+
"multi_category_additions": 62
|
| 63 |
+
},
|
| 64 |
+
"top_single_categories": [
|
| 65 |
+
[
|
| 66 |
+
"franchise_series",
|
| 67 |
+
177
|
| 68 |
+
],
|
| 69 |
+
[
|
| 70 |
+
"clothing_detail",
|
| 71 |
+
13
|
| 72 |
+
],
|
| 73 |
+
[
|
| 74 |
+
"anatomy_features",
|
| 75 |
+
9
|
| 76 |
+
],
|
| 77 |
+
[
|
| 78 |
+
"text",
|
| 79 |
+
8
|
| 80 |
+
],
|
| 81 |
+
[
|
| 82 |
+
"organization",
|
| 83 |
+
7
|
| 84 |
+
],
|
| 85 |
+
[
|
| 86 |
+
"body_type",
|
| 87 |
+
6
|
| 88 |
+
],
|
| 89 |
+
[
|
| 90 |
+
"style",
|
| 91 |
+
2
|
| 92 |
+
],
|
| 93 |
+
[
|
| 94 |
+
"species",
|
| 95 |
+
2
|
| 96 |
+
],
|
| 97 |
+
[
|
| 98 |
+
"body_decor",
|
| 99 |
+
2
|
| 100 |
+
],
|
| 101 |
+
[
|
| 102 |
+
"objects_props",
|
| 103 |
+
2
|
| 104 |
+
],
|
| 105 |
+
[
|
| 106 |
+
"background_composition",
|
| 107 |
+
1
|
| 108 |
+
],
|
| 109 |
+
[
|
| 110 |
+
"color_markings",
|
| 111 |
+
1
|
| 112 |
+
],
|
| 113 |
+
[
|
| 114 |
+
"expression_detail",
|
| 115 |
+
1
|
| 116 |
+
],
|
| 117 |
+
[
|
| 118 |
+
"pose_action_detail",
|
| 119 |
+
1
|
| 120 |
+
],
|
| 121 |
+
[
|
| 122 |
+
"count",
|
| 123 |
+
1
|
| 124 |
+
]
|
| 125 |
+
],
|
| 126 |
+
"top_multi_category_pairs": [
|
| 127 |
+
{
|
| 128 |
+
"categories": [
|
| 129 |
+
"body_type",
|
| 130 |
+
"franchise_series"
|
| 131 |
+
],
|
| 132 |
+
"count": 7
|
| 133 |
+
},
|
| 134 |
+
{
|
| 135 |
+
"categories": [
|
| 136 |
+
"objects_props",
|
| 137 |
+
"pose_action_detail"
|
| 138 |
+
],
|
| 139 |
+
"count": 2
|
| 140 |
+
},
|
| 141 |
+
{
|
| 142 |
+
"categories": [
|
| 143 |
+
"franchise_series",
|
| 144 |
+
"gender"
|
| 145 |
+
],
|
| 146 |
+
"count": 2
|
| 147 |
+
},
|
| 148 |
+
{
|
| 149 |
+
"categories": [
|
| 150 |
+
"body_type",
|
| 151 |
+
"species"
|
| 152 |
+
],
|
| 153 |
+
"count": 2
|
| 154 |
+
},
|
| 155 |
+
{
|
| 156 |
+
"categories": [
|
| 157 |
+
"color_markings",
|
| 158 |
+
"franchise_series"
|
| 159 |
+
],
|
| 160 |
+
"count": 2
|
| 161 |
+
},
|
| 162 |
+
{
|
| 163 |
+
"categories": [
|
| 164 |
+
"anatomy_features",
|
| 165 |
+
"color_markings"
|
| 166 |
+
],
|
| 167 |
+
"count": 2
|
| 168 |
+
},
|
| 169 |
+
{
|
| 170 |
+
"categories": [
|
| 171 |
+
"expression_detail",
|
| 172 |
+
"pose_action_detail"
|
| 173 |
+
],
|
| 174 |
+
"count": 1
|
| 175 |
+
},
|
| 176 |
+
{
|
| 177 |
+
"categories": [
|
| 178 |
+
"expression_detail",
|
| 179 |
+
"text"
|
| 180 |
+
],
|
| 181 |
+
"count": 1
|
| 182 |
+
},
|
| 183 |
+
{
|
| 184 |
+
"categories": [
|
| 185 |
+
"color_markings",
|
| 186 |
+
"style"
|
| 187 |
+
],
|
| 188 |
+
"count": 1
|
| 189 |
+
},
|
| 190 |
+
{
|
| 191 |
+
"categories": [
|
| 192 |
+
"anatomy_features",
|
| 193 |
+
"objects_props"
|
| 194 |
+
],
|
| 195 |
+
"count": 1
|
| 196 |
+
},
|
| 197 |
+
{
|
| 198 |
+
"categories": [
|
| 199 |
+
"franchise_series",
|
| 200 |
+
"species"
|
| 201 |
+
],
|
| 202 |
+
"count": 1
|
| 203 |
+
},
|
| 204 |
+
{
|
| 205 |
+
"categories": [
|
| 206 |
+
"perspective",
|
| 207 |
+
"pose_action_detail"
|
| 208 |
+
],
|
| 209 |
+
"count": 1
|
| 210 |
+
},
|
| 211 |
+
{
|
| 212 |
+
"categories": [
|
| 213 |
+
"count",
|
| 214 |
+
"franchise_series"
|
| 215 |
+
],
|
| 216 |
+
"count": 1
|
| 217 |
+
},
|
| 218 |
+
{
|
| 219 |
+
"categories": [
|
| 220 |
+
"clothing_detail",
|
| 221 |
+
"franchise_series"
|
| 222 |
+
],
|
| 223 |
+
"count": 1
|
| 224 |
+
},
|
| 225 |
+
{
|
| 226 |
+
"categories": [
|
| 227 |
+
"body_decor",
|
| 228 |
+
"objects_props"
|
| 229 |
+
],
|
| 230 |
+
"count": 1
|
| 231 |
+
},
|
| 232 |
+
{
|
| 233 |
+
"categories": [
|
| 234 |
+
"background_composition",
|
| 235 |
+
"franchise_series"
|
| 236 |
+
],
|
| 237 |
+
"count": 1
|
| 238 |
+
},
|
| 239 |
+
{
|
| 240 |
+
"categories": [
|
| 241 |
+
"anatomy_features",
|
| 242 |
+
"species"
|
| 243 |
+
],
|
| 244 |
+
"count": 1
|
| 245 |
+
},
|
| 246 |
+
{
|
| 247 |
+
"categories": [
|
| 248 |
+
"anatomy_features",
|
| 249 |
+
"franchise_series"
|
| 250 |
+
],
|
| 251 |
+
"count": 1
|
| 252 |
+
},
|
| 253 |
+
{
|
| 254 |
+
"categories": [
|
| 255 |
+
"body_type",
|
| 256 |
+
"gender"
|
| 257 |
+
],
|
| 258 |
+
"count": 1
|
| 259 |
+
},
|
| 260 |
+
{
|
| 261 |
+
"categories": [
|
| 262 |
+
"clothing_detail",
|
| 263 |
+
"color_markings"
|
| 264 |
+
],
|
| 265 |
+
"count": 1
|
| 266 |
+
}
|
| 267 |
+
],
|
| 268 |
+
"samples": {
|
| 269 |
+
"single": [
|
| 270 |
+
{
|
| 271 |
+
"tag": "eeveelution",
|
| 272 |
+
"count": 58150,
|
| 273 |
+
"signal": "both",
|
| 274 |
+
"assigned_categories": [
|
| 275 |
+
"franchise_series"
|
| 276 |
+
],
|
| 277 |
+
"top_fused": [
|
| 278 |
+
[
|
| 279 |
+
"franchise_series",
|
| 280 |
+
0.721
|
| 281 |
+
],
|
| 282 |
+
[
|
| 283 |
+
"gender",
|
| 284 |
+
0.0689
|
| 285 |
+
],
|
| 286 |
+
[
|
| 287 |
+
"resolution",
|
| 288 |
+
0.0234
|
| 289 |
+
]
|
| 290 |
+
],
|
| 291 |
+
"top_tfidf": [
|
| 292 |
+
[
|
| 293 |
+
"franchise_series",
|
| 294 |
+
0.535
|
| 295 |
+
],
|
| 296 |
+
[
|
| 297 |
+
"gender",
|
| 298 |
+
0.1148
|
| 299 |
+
],
|
| 300 |
+
[
|
| 301 |
+
"resolution",
|
| 302 |
+
0.039
|
| 303 |
+
]
|
| 304 |
+
],
|
| 305 |
+
"top_wiki": [
|
| 306 |
+
[
|
| 307 |
+
"franchise_series",
|
| 308 |
+
1.0
|
| 309 |
+
],
|
| 310 |
+
[
|
| 311 |
+
"text",
|
| 312 |
+
0.0
|
| 313 |
+
],
|
| 314 |
+
[
|
| 315 |
+
"background_composition",
|
| 316 |
+
0.0
|
| 317 |
+
]
|
| 318 |
+
],
|
| 319 |
+
"wiki_vote_count": 2,
|
| 320 |
+
"wiki_link_count": 13
|
| 321 |
+
},
|
| 322 |
+
{
|
| 323 |
+
"tag": "boss_monster",
|
| 324 |
+
"count": 19924,
|
| 325 |
+
"signal": "wiki_only",
|
| 326 |
+
"assigned_categories": [
|
| 327 |
+
"anatomy_features"
|
| 328 |
+
],
|
| 329 |
+
"top_fused": [
|
| 330 |
+
[
|
| 331 |
+
"anatomy_features",
|
| 332 |
+
1.0
|
| 333 |
+
],
|
| 334 |
+
[
|
| 335 |
+
"franchise_series",
|
| 336 |
+
0.0
|
| 337 |
+
],
|
| 338 |
+
[
|
| 339 |
+
"background_composition",
|
| 340 |
+
0.0
|
| 341 |
+
]
|
| 342 |
+
],
|
| 343 |
+
"top_tfidf": [],
|
| 344 |
+
"top_wiki": [
|
| 345 |
+
[
|
| 346 |
+
"anatomy_features",
|
| 347 |
+
1.0
|
| 348 |
+
],
|
| 349 |
+
[
|
| 350 |
+
"franchise_series",
|
| 351 |
+
0.0
|
| 352 |
+
],
|
| 353 |
+
[
|
| 354 |
+
"background_composition",
|
| 355 |
+
0.0
|
| 356 |
+
]
|
| 357 |
+
],
|
| 358 |
+
"wiki_vote_count": 4,
|
| 359 |
+
"wiki_link_count": 19
|
| 360 |
+
},
|
| 361 |
+
{
|
| 362 |
+
"tag": "blaze_the_cat",
|
| 363 |
+
"count": 7169,
|
| 364 |
+
"signal": "both",
|
| 365 |
+
"assigned_categories": [
|
| 366 |
+
"franchise_series"
|
| 367 |
+
],
|
| 368 |
+
"top_fused": [
|
| 369 |
+
[
|
| 370 |
+
"franchise_series",
|
| 371 |
+
0.6294
|
| 372 |
+
],
|
| 373 |
+
[
|
| 374 |
+
"resolution",
|
| 375 |
+
0.0338
|
| 376 |
+
],
|
| 377 |
+
[
|
| 378 |
+
"expression_detail",
|
| 379 |
+
0.0334
|
| 380 |
+
]
|
| 381 |
+
],
|
| 382 |
+
"top_tfidf": [
|
| 383 |
+
[
|
| 384 |
+
"franchise_series",
|
| 385 |
+
0.3824
|
| 386 |
+
],
|
| 387 |
+
[
|
| 388 |
+
"resolution",
|
| 389 |
+
0.0563
|
| 390 |
+
],
|
| 391 |
+
[
|
| 392 |
+
"expression_detail",
|
| 393 |
+
0.0556
|
| 394 |
+
]
|
| 395 |
+
],
|
| 396 |
+
"top_wiki": [
|
| 397 |
+
[
|
| 398 |
+
"franchise_series",
|
| 399 |
+
1.0
|
| 400 |
+
],
|
| 401 |
+
[
|
| 402 |
+
"text",
|
| 403 |
+
0.0
|
| 404 |
+
],
|
| 405 |
+
[
|
| 406 |
+
"background_composition",
|
| 407 |
+
0.0
|
| 408 |
+
]
|
| 409 |
+
],
|
| 410 |
+
"wiki_vote_count": 2,
|
| 411 |
+
"wiki_link_count": 6
|
| 412 |
+
},
|
| 413 |
+
{
|
| 414 |
+
"tag": "espeon",
|
| 415 |
+
"count": 7029,
|
| 416 |
+
"signal": "tfidf_only",
|
| 417 |
+
"assigned_categories": [
|
| 418 |
+
"franchise_series"
|
| 419 |
+
],
|
| 420 |
+
"top_fused": [
|
| 421 |
+
[
|
| 422 |
+
"franchise_series",
|
| 423 |
+
0.6445
|
| 424 |
+
],
|
| 425 |
+
[
|
| 426 |
+
"gender",
|
| 427 |
+
0.0854
|
| 428 |
+
],
|
| 429 |
+
[
|
| 430 |
+
"resolution",
|
| 431 |
+
0.0296
|
| 432 |
+
]
|
| 433 |
+
],
|
| 434 |
+
"top_tfidf": [
|
| 435 |
+
[
|
| 436 |
+
"franchise_series",
|
| 437 |
+
0.6445
|
| 438 |
+
],
|
| 439 |
+
[
|
| 440 |
+
"gender",
|
| 441 |
+
0.0854
|
| 442 |
+
],
|
| 443 |
+
[
|
| 444 |
+
"resolution",
|
| 445 |
+
0.0296
|
| 446 |
+
]
|
| 447 |
+
],
|
| 448 |
+
"top_wiki": [],
|
| 449 |
+
"wiki_vote_count": 0,
|
| 450 |
+
"wiki_link_count": 11
|
| 451 |
+
},
|
| 452 |
+
{
|
| 453 |
+
"tag": "zangoose",
|
| 454 |
+
"count": 6959,
|
| 455 |
+
"signal": "tfidf_only",
|
| 456 |
+
"assigned_categories": [
|
| 457 |
+
"franchise_series"
|
| 458 |
+
],
|
| 459 |
+
"top_fused": [
|
| 460 |
+
[
|
| 461 |
+
"franchise_series",
|
| 462 |
+
0.6316
|
| 463 |
+
],
|
| 464 |
+
[
|
| 465 |
+
"gender",
|
| 466 |
+
0.0872
|
| 467 |
+
],
|
| 468 |
+
[
|
| 469 |
+
"resolution",
|
| 470 |
+
0.0371
|
| 471 |
+
]
|
| 472 |
+
],
|
| 473 |
+
"top_tfidf": [
|
| 474 |
+
[
|
| 475 |
+
"franchise_series",
|
| 476 |
+
0.6316
|
| 477 |
+
],
|
| 478 |
+
[
|
| 479 |
+
"gender",
|
| 480 |
+
0.0872
|
| 481 |
+
],
|
| 482 |
+
[
|
| 483 |
+
"resolution",
|
| 484 |
+
0.0371
|
| 485 |
+
]
|
| 486 |
+
],
|
| 487 |
+
"top_wiki": [],
|
| 488 |
+
"wiki_vote_count": 0,
|
| 489 |
+
"wiki_link_count": 4
|
| 490 |
+
},
|
| 491 |
+
{
|
| 492 |
+
"tag": "snivy",
|
| 493 |
+
"count": 3315,
|
| 494 |
+
"signal": "tfidf_only",
|
| 495 |
+
"assigned_categories": [
|
| 496 |
+
"franchise_series"
|
| 497 |
+
],
|
| 498 |
+
"top_fused": [
|
| 499 |
+
[
|
| 500 |
+
"franchise_series",
|
| 501 |
+
0.7953
|
| 502 |
+
],
|
| 503 |
+
[
|
| 504 |
+
"gender",
|
| 505 |
+
0.0605
|
| 506 |
+
],
|
| 507 |
+
[
|
| 508 |
+
"resolution",
|
| 509 |
+
0.0192
|
| 510 |
+
]
|
| 511 |
+
],
|
| 512 |
+
"top_tfidf": [
|
| 513 |
+
[
|
| 514 |
+
"franchise_series",
|
| 515 |
+
0.7953
|
| 516 |
+
],
|
| 517 |
+
[
|
| 518 |
+
"gender",
|
| 519 |
+
0.0605
|
| 520 |
+
],
|
| 521 |
+
[
|
| 522 |
+
"resolution",
|
| 523 |
+
0.0192
|
| 524 |
+
]
|
| 525 |
+
],
|
| 526 |
+
"top_wiki": [],
|
| 527 |
+
"wiki_vote_count": 0,
|
| 528 |
+
"wiki_link_count": 6
|
| 529 |
+
},
|
| 530 |
+
{
|
| 531 |
+
"tag": "buizel",
|
| 532 |
+
"count": 3220,
|
| 533 |
+
"signal": "tfidf_only",
|
| 534 |
+
"assigned_categories": [
|
| 535 |
+
"franchise_series"
|
| 536 |
+
],
|
| 537 |
+
"top_fused": [
|
| 538 |
+
[
|
| 539 |
+
"franchise_series",
|
| 540 |
+
0.6631
|
| 541 |
+
],
|
| 542 |
+
[
|
| 543 |
+
"gender",
|
| 544 |
+
0.0802
|
| 545 |
+
],
|
| 546 |
+
[
|
| 547 |
+
"resolution",
|
| 548 |
+
0.0254
|
| 549 |
+
]
|
| 550 |
+
],
|
| 551 |
+
"top_tfidf": [
|
| 552 |
+
[
|
| 553 |
+
"franchise_series",
|
| 554 |
+
0.6631
|
| 555 |
+
],
|
| 556 |
+
[
|
| 557 |
+
"gender",
|
| 558 |
+
0.0802
|
| 559 |
+
],
|
| 560 |
+
[
|
| 561 |
+
"resolution",
|
| 562 |
+
0.0254
|
| 563 |
+
]
|
| 564 |
+
],
|
| 565 |
+
"top_wiki": [],
|
| 566 |
+
"wiki_vote_count": 0,
|
| 567 |
+
"wiki_link_count": 2
|
| 568 |
+
},
|
| 569 |
+
{
|
| 570 |
+
"tag": "floatzel",
|
| 571 |
+
"count": 2957,
|
| 572 |
+
"signal": "tfidf_only",
|
| 573 |
+
"assigned_categories": [
|
| 574 |
+
"franchise_series"
|
| 575 |
+
],
|
| 576 |
+
"top_fused": [
|
| 577 |
+
[
|
| 578 |
+
"franchise_series",
|
| 579 |
+
0.674
|
| 580 |
+
],
|
| 581 |
+
[
|
| 582 |
+
"gender",
|
| 583 |
+
0.0738
|
| 584 |
+
],
|
| 585 |
+
[
|
| 586 |
+
"resolution",
|
| 587 |
+
0.0261
|
| 588 |
+
]
|
| 589 |
+
],
|
| 590 |
+
"top_tfidf": [
|
| 591 |
+
[
|
| 592 |
+
"franchise_series",
|
| 593 |
+
0.674
|
| 594 |
+
],
|
| 595 |
+
[
|
| 596 |
+
"gender",
|
| 597 |
+
0.0738
|
| 598 |
+
],
|
| 599 |
+
[
|
| 600 |
+
"resolution",
|
| 601 |
+
0.0261
|
| 602 |
+
]
|
| 603 |
+
],
|
| 604 |
+
"top_wiki": [],
|
| 605 |
+
"wiki_vote_count": 0,
|
| 606 |
+
"wiki_link_count": 2
|
| 607 |
+
},
|
| 608 |
+
{
|
| 609 |
+
"tag": "charmeleon",
|
| 610 |
+
"count": 2899,
|
| 611 |
+
"signal": "tfidf_only",
|
| 612 |
+
"assigned_categories": [
|
| 613 |
+
"franchise_series"
|
| 614 |
+
],
|
| 615 |
+
"top_fused": [
|
| 616 |
+
[
|
| 617 |
+
"franchise_series",
|
| 618 |
+
0.6974
|
| 619 |
+
],
|
| 620 |
+
[
|
| 621 |
+
"gender",
|
| 622 |
+
0.0566
|
| 623 |
+
],
|
| 624 |
+
[
|
| 625 |
+
"count",
|
| 626 |
+
0.0233
|
| 627 |
+
]
|
| 628 |
+
],
|
| 629 |
+
"top_tfidf": [
|
| 630 |
+
[
|
| 631 |
+
"franchise_series",
|
| 632 |
+
0.6974
|
| 633 |
+
],
|
| 634 |
+
[
|
| 635 |
+
"gender",
|
| 636 |
+
0.0566
|
| 637 |
+
],
|
| 638 |
+
[
|
| 639 |
+
"count",
|
| 640 |
+
0.0233
|
| 641 |
+
]
|
| 642 |
+
],
|
| 643 |
+
"top_wiki": [],
|
| 644 |
+
"wiki_vote_count": 0,
|
| 645 |
+
"wiki_link_count": 9
|
| 646 |
+
},
|
| 647 |
+
{
|
| 648 |
+
"tag": "dragonite",
|
| 649 |
+
"count": 2477,
|
| 650 |
+
"signal": "tfidf_only",
|
| 651 |
+
"assigned_categories": [
|
| 652 |
+
"franchise_series"
|
| 653 |
+
],
|
| 654 |
+
"top_fused": [
|
| 655 |
+
[
|
| 656 |
+
"franchise_series",
|
| 657 |
+
0.6717
|
| 658 |
+
],
|
| 659 |
+
[
|
| 660 |
+
"gender",
|
| 661 |
+
0.0623
|
| 662 |
+
],
|
| 663 |
+
[
|
| 664 |
+
"resolution",
|
| 665 |
+
0.0246
|
| 666 |
+
]
|
| 667 |
+
],
|
| 668 |
+
"top_tfidf": [
|
| 669 |
+
[
|
| 670 |
+
"franchise_series",
|
| 671 |
+
0.6717
|
| 672 |
+
],
|
| 673 |
+
[
|
| 674 |
+
"gender",
|
| 675 |
+
0.0623
|
| 676 |
+
],
|
| 677 |
+
[
|
| 678 |
+
"resolution",
|
| 679 |
+
0.0246
|
| 680 |
+
]
|
| 681 |
+
],
|
| 682 |
+
"top_wiki": [],
|
| 683 |
+
"wiki_vote_count": 0,
|
| 684 |
+
"wiki_link_count": 3
|
| 685 |
+
},
|
| 686 |
+
{
|
| 687 |
+
"tag": "ampharos",
|
| 688 |
+
"count": 2449,
|
| 689 |
+
"signal": "tfidf_only",
|
| 690 |
+
"assigned_categories": [
|
| 691 |
+
"franchise_series"
|
| 692 |
+
],
|
| 693 |
+
"top_fused": [
|
| 694 |
+
[
|
| 695 |
+
"franchise_series",
|
| 696 |
+
0.7534
|
| 697 |
+
],
|
| 698 |
+
[
|
| 699 |
+
"gender",
|
| 700 |
+
0.0559
|
| 701 |
+
],
|
| 702 |
+
[
|
| 703 |
+
"resolution",
|
| 704 |
+
0.0207
|
| 705 |
+
]
|
| 706 |
+
],
|
| 707 |
+
"top_tfidf": [
|
| 708 |
+
[
|
| 709 |
+
"franchise_series",
|
| 710 |
+
0.7534
|
| 711 |
+
],
|
| 712 |
+
[
|
| 713 |
+
"gender",
|
| 714 |
+
0.0559
|
| 715 |
+
],
|
| 716 |
+
[
|
| 717 |
+
"resolution",
|
| 718 |
+
0.0207
|
| 719 |
+
]
|
| 720 |
+
],
|
| 721 |
+
"top_wiki": [],
|
| 722 |
+
"wiki_vote_count": 0,
|
| 723 |
+
"wiki_link_count": 4
|
| 724 |
+
},
|
| 725 |
+
{
|
| 726 |
+
"tag": "pichu",
|
| 727 |
+
"count": 1980,
|
| 728 |
+
"signal": "tfidf_only",
|
| 729 |
+
"assigned_categories": [
|
| 730 |
+
"franchise_series"
|
| 731 |
+
],
|
| 732 |
+
"top_fused": [
|
| 733 |
+
[
|
| 734 |
+
"franchise_series",
|
| 735 |
+
0.7864
|
| 736 |
+
],
|
| 737 |
+
[
|
| 738 |
+
"gender",
|
| 739 |
+
0.0402
|
| 740 |
+
],
|
| 741 |
+
[
|
| 742 |
+
"resolution",
|
| 743 |
+
0.0197
|
| 744 |
+
]
|
| 745 |
+
],
|
| 746 |
+
"top_tfidf": [
|
| 747 |
+
[
|
| 748 |
+
"franchise_series",
|
| 749 |
+
0.7864
|
| 750 |
+
],
|
| 751 |
+
[
|
| 752 |
+
"gender",
|
| 753 |
+
0.0402
|
| 754 |
+
],
|
| 755 |
+
[
|
| 756 |
+
"resolution",
|
| 757 |
+
0.0197
|
| 758 |
+
]
|
| 759 |
+
],
|
| 760 |
+
"top_wiki": [],
|
| 761 |
+
"wiki_vote_count": 0,
|
| 762 |
+
"wiki_link_count": 5
|
| 763 |
+
},
|
| 764 |
+
{
|
| 765 |
+
"tag": "quiver",
|
| 766 |
+
"count": 1372,
|
| 767 |
+
"signal": "wiki_only",
|
| 768 |
+
"assigned_categories": [
|
| 769 |
+
"objects_props"
|
| 770 |
+
],
|
| 771 |
+
"top_fused": [
|
| 772 |
+
[
|
| 773 |
+
"objects_props",
|
| 774 |
+
1.0
|
| 775 |
+
],
|
| 776 |
+
[
|
| 777 |
+
"text",
|
| 778 |
+
0.0
|
| 779 |
+
],
|
| 780 |
+
[
|
| 781 |
+
"franchise_series",
|
| 782 |
+
0.0
|
| 783 |
+
]
|
| 784 |
+
],
|
| 785 |
+
"top_tfidf": [],
|
| 786 |
+
"top_wiki": [
|
| 787 |
+
[
|
| 788 |
+
"objects_props",
|
| 789 |
+
1.0
|
| 790 |
+
],
|
| 791 |
+
[
|
| 792 |
+
"text",
|
| 793 |
+
0.0
|
| 794 |
+
],
|
| 795 |
+
[
|
| 796 |
+
"franchise_series",
|
| 797 |
+
0.0
|
| 798 |
+
]
|
| 799 |
+
],
|
| 800 |
+
"wiki_vote_count": 1,
|
| 801 |
+
"wiki_link_count": 4
|
| 802 |
+
},
|
| 803 |
+
{
|
| 804 |
+
"tag": "snorlax",
|
| 805 |
+
"count": 1079,
|
| 806 |
+
"signal": "tfidf_only",
|
| 807 |
+
"assigned_categories": [
|
| 808 |
+
"franchise_series"
|
| 809 |
+
],
|
| 810 |
+
"top_fused": [
|
| 811 |
+
[
|
| 812 |
+
"franchise_series",
|
| 813 |
+
0.6034
|
| 814 |
+
],
|
| 815 |
+
[
|
| 816 |
+
"gender",
|
| 817 |
+
0.0515
|
| 818 |
+
],
|
| 819 |
+
[
|
| 820 |
+
"resolution",
|
| 821 |
+
0.0309
|
| 822 |
+
]
|
| 823 |
+
],
|
| 824 |
+
"top_tfidf": [
|
| 825 |
+
[
|
| 826 |
+
"franchise_series",
|
| 827 |
+
0.6034
|
| 828 |
+
],
|
| 829 |
+
[
|
| 830 |
+
"gender",
|
| 831 |
+
0.0515
|
| 832 |
+
],
|
| 833 |
+
[
|
| 834 |
+
"resolution",
|
| 835 |
+
0.0309
|
| 836 |
+
]
|
| 837 |
+
],
|
| 838 |
+
"top_wiki": [],
|
| 839 |
+
"wiki_vote_count": 0,
|
| 840 |
+
"wiki_link_count": 2
|
| 841 |
+
},
|
| 842 |
+
{
|
| 843 |
+
"tag": "blastoise",
|
| 844 |
+
"count": 1006,
|
| 845 |
+
"signal": "tfidf_only",
|
| 846 |
+
"assigned_categories": [
|
| 847 |
+
"franchise_series"
|
| 848 |
+
],
|
| 849 |
+
"top_fused": [
|
| 850 |
+
[
|
| 851 |
+
"franchise_series",
|
| 852 |
+
0.6609
|
| 853 |
+
],
|
| 854 |
+
[
|
| 855 |
+
"gender",
|
| 856 |
+
0.0453
|
| 857 |
+
],
|
| 858 |
+
[
|
| 859 |
+
"count",
|
| 860 |
+
0.0243
|
| 861 |
+
]
|
| 862 |
+
],
|
| 863 |
+
"top_tfidf": [
|
| 864 |
+
[
|
| 865 |
+
"franchise_series",
|
| 866 |
+
0.6609
|
| 867 |
+
],
|
| 868 |
+
[
|
| 869 |
+
"gender",
|
| 870 |
+
0.0453
|
| 871 |
+
],
|
| 872 |
+
[
|
| 873 |
+
"count",
|
| 874 |
+
0.0243
|
| 875 |
+
]
|
| 876 |
+
],
|
| 877 |
+
"top_wiki": [],
|
| 878 |
+
"wiki_vote_count": 0,
|
| 879 |
+
"wiki_link_count": 4
|
| 880 |
+
},
|
| 881 |
+
{
|
| 882 |
+
"tag": "roserade",
|
| 883 |
+
"count": 871,
|
| 884 |
+
"signal": "tfidf_only",
|
| 885 |
+
"assigned_categories": [
|
| 886 |
+
"franchise_series"
|
| 887 |
+
],
|
| 888 |
+
"top_fused": [
|
| 889 |
+
[
|
| 890 |
+
"franchise_series",
|
| 891 |
+
0.6325
|
| 892 |
+
],
|
| 893 |
+
[
|
| 894 |
+
"gender",
|
| 895 |
+
0.052
|
| 896 |
+
],
|
| 897 |
+
[
|
| 898 |
+
"resolution",
|
| 899 |
+
0.0336
|
| 900 |
+
]
|
| 901 |
+
],
|
| 902 |
+
"top_tfidf": [
|
| 903 |
+
[
|
| 904 |
+
"franchise_series",
|
| 905 |
+
0.6325
|
| 906 |
+
],
|
| 907 |
+
[
|
| 908 |
+
"gender",
|
| 909 |
+
0.052
|
| 910 |
+
],
|
| 911 |
+
[
|
| 912 |
+
"resolution",
|
| 913 |
+
0.0336
|
| 914 |
+
]
|
| 915 |
+
],
|
| 916 |
+
"top_wiki": [],
|
| 917 |
+
"wiki_vote_count": 0,
|
| 918 |
+
"wiki_link_count": 3
|
| 919 |
+
},
|
| 920 |
+
{
|
| 921 |
+
"tag": "alolan_raichu",
|
| 922 |
+
"count": 730,
|
| 923 |
+
"signal": "tfidf_only",
|
| 924 |
+
"assigned_categories": [
|
| 925 |
+
"franchise_series"
|
| 926 |
+
],
|
| 927 |
+
"top_fused": [
|
| 928 |
+
[
|
| 929 |
+
"franchise_series",
|
| 930 |
+
0.6985
|
| 931 |
+
],
|
| 932 |
+
[
|
| 933 |
+
"gender",
|
| 934 |
+
0.0612
|
| 935 |
+
],
|
| 936 |
+
[
|
| 937 |
+
"resolution",
|
| 938 |
+
0.0263
|
| 939 |
+
]
|
| 940 |
+
],
|
| 941 |
+
"top_tfidf": [
|
| 942 |
+
[
|
| 943 |
+
"franchise_series",
|
| 944 |
+
0.6985
|
| 945 |
+
],
|
| 946 |
+
[
|
| 947 |
+
"gender",
|
| 948 |
+
0.0612
|
| 949 |
+
],
|
| 950 |
+
[
|
| 951 |
+
"resolution",
|
| 952 |
+
0.0263
|
| 953 |
+
]
|
| 954 |
+
],
|
| 955 |
+
"top_wiki": [],
|
| 956 |
+
"wiki_vote_count": 0,
|
| 957 |
+
"wiki_link_count": 4
|
| 958 |
+
},
|
| 959 |
+
{
|
| 960 |
+
"tag": "nickit",
|
| 961 |
+
"count": 663,
|
| 962 |
+
"signal": "tfidf_only",
|
| 963 |
+
"assigned_categories": [
|
| 964 |
+
"franchise_series"
|
| 965 |
+
],
|
| 966 |
+
"top_fused": [
|
| 967 |
+
[
|
| 968 |
+
"franchise_series",
|
| 969 |
+
0.6143
|
| 970 |
+
],
|
| 971 |
+
[
|
| 972 |
+
"gender",
|
| 973 |
+
0.0972
|
| 974 |
+
],
|
| 975 |
+
[
|
| 976 |
+
"resolution",
|
| 977 |
+
0.0326
|
| 978 |
+
]
|
| 979 |
+
],
|
| 980 |
+
"top_tfidf": [
|
| 981 |
+
[
|
| 982 |
+
"franchise_series",
|
| 983 |
+
0.6143
|
| 984 |
+
],
|
| 985 |
+
[
|
| 986 |
+
"gender",
|
| 987 |
+
0.0972
|
| 988 |
+
],
|
| 989 |
+
[
|
| 990 |
+
"resolution",
|
| 991 |
+
0.0326
|
| 992 |
+
]
|
| 993 |
+
],
|
| 994 |
+
"top_wiki": [],
|
| 995 |
+
"wiki_vote_count": 0,
|
| 996 |
+
"wiki_link_count": 0
|
| 997 |
+
},
|
| 998 |
+
{
|
| 999 |
+
"tag": "linoone",
|
| 1000 |
+
"count": 628,
|
| 1001 |
+
"signal": "tfidf_only",
|
| 1002 |
+
"assigned_categories": [
|
| 1003 |
+
"franchise_series"
|
| 1004 |
+
],
|
| 1005 |
+
"top_fused": [
|
| 1006 |
+
[
|
| 1007 |
+
"franchise_series",
|
| 1008 |
+
0.7373
|
| 1009 |
+
],
|
| 1010 |
+
[
|
| 1011 |
+
"gender",
|
| 1012 |
+
0.0406
|
| 1013 |
+
],
|
| 1014 |
+
[
|
| 1015 |
+
"resolution",
|
| 1016 |
+
0.0196
|
| 1017 |
+
]
|
| 1018 |
+
],
|
| 1019 |
+
"top_tfidf": [
|
| 1020 |
+
[
|
| 1021 |
+
"franchise_series",
|
| 1022 |
+
0.7373
|
| 1023 |
+
],
|
| 1024 |
+
[
|
| 1025 |
+
"gender",
|
| 1026 |
+
0.0406
|
| 1027 |
+
],
|
| 1028 |
+
[
|
| 1029 |
+
"resolution",
|
| 1030 |
+
0.0196
|
| 1031 |
+
]
|
| 1032 |
+
],
|
| 1033 |
+
"top_wiki": [],
|
| 1034 |
+
"wiki_vote_count": 0,
|
| 1035 |
+
"wiki_link_count": 2
|
| 1036 |
+
},
|
| 1037 |
+
{
|
| 1038 |
+
"tag": "amped_toxtricity",
|
| 1039 |
+
"count": 568,
|
| 1040 |
+
"signal": "tfidf_only",
|
| 1041 |
+
"assigned_categories": [
|
| 1042 |
+
"franchise_series"
|
| 1043 |
+
],
|
| 1044 |
+
"top_fused": [
|
| 1045 |
+
[
|
| 1046 |
+
"franchise_series",
|
| 1047 |
+
0.6964
|
| 1048 |
+
],
|
| 1049 |
+
[
|
| 1050 |
+
"gender",
|
| 1051 |
+
0.0587
|
| 1052 |
+
],
|
| 1053 |
+
[
|
| 1054 |
+
"resolution",
|
| 1055 |
+
0.0257
|
| 1056 |
+
]
|
| 1057 |
+
],
|
| 1058 |
+
"top_tfidf": [
|
| 1059 |
+
[
|
| 1060 |
+
"franchise_series",
|
| 1061 |
+
0.6964
|
| 1062 |
+
],
|
| 1063 |
+
[
|
| 1064 |
+
"gender",
|
| 1065 |
+
0.0587
|
| 1066 |
+
],
|
| 1067 |
+
[
|
| 1068 |
+
"resolution",
|
| 1069 |
+
0.0257
|
| 1070 |
+
]
|
| 1071 |
+
],
|
| 1072 |
+
"top_wiki": [],
|
| 1073 |
+
"wiki_vote_count": 0,
|
| 1074 |
+
"wiki_link_count": 0
|
| 1075 |
+
}
|
| 1076 |
+
],
|
| 1077 |
+
"multi": [
|
| 1078 |
+
{
|
| 1079 |
+
"tag": "cub",
|
| 1080 |
+
"count": 147547,
|
| 1081 |
+
"signal": "wiki_only",
|
| 1082 |
+
"assigned_categories": [
|
| 1083 |
+
"species",
|
| 1084 |
+
"body_type"
|
| 1085 |
+
],
|
| 1086 |
+
"top_fused": [
|
| 1087 |
+
[
|
| 1088 |
+
"species",
|
| 1089 |
+
0.6
|
| 1090 |
+
],
|
| 1091 |
+
[
|
| 1092 |
+
"body_type",
|
| 1093 |
+
0.4
|
| 1094 |
+
],
|
| 1095 |
+
[
|
| 1096 |
+
"text",
|
| 1097 |
+
0.0
|
| 1098 |
+
]
|
| 1099 |
+
],
|
| 1100 |
+
"top_tfidf": [],
|
| 1101 |
+
"top_wiki": [
|
| 1102 |
+
[
|
| 1103 |
+
"species",
|
| 1104 |
+
0.6
|
| 1105 |
+
],
|
| 1106 |
+
[
|
| 1107 |
+
"body_type",
|
| 1108 |
+
0.4
|
| 1109 |
+
],
|
| 1110 |
+
[
|
| 1111 |
+
"text",
|
| 1112 |
+
0.0
|
| 1113 |
+
]
|
| 1114 |
+
],
|
| 1115 |
+
"wiki_vote_count": 5,
|
| 1116 |
+
"wiki_link_count": 11
|
| 1117 |
+
},
|
| 1118 |
+
{
|
| 1119 |
+
"tag": "dock",
|
| 1120 |
+
"count": 16478,
|
| 1121 |
+
"signal": "wiki_only",
|
| 1122 |
+
"assigned_categories": [
|
| 1123 |
+
"anatomy_features",
|
| 1124 |
+
"objects_props"
|
| 1125 |
+
],
|
| 1126 |
+
"top_fused": [
|
| 1127 |
+
[
|
| 1128 |
+
"anatomy_features",
|
| 1129 |
+
0.5
|
| 1130 |
+
],
|
| 1131 |
+
[
|
| 1132 |
+
"objects_props",
|
| 1133 |
+
0.5
|
| 1134 |
+
],
|
| 1135 |
+
[
|
| 1136 |
+
"franchise_series",
|
| 1137 |
+
0.0
|
| 1138 |
+
]
|
| 1139 |
+
],
|
| 1140 |
+
"top_tfidf": [],
|
| 1141 |
+
"top_wiki": [
|
| 1142 |
+
[
|
| 1143 |
+
"anatomy_features",
|
| 1144 |
+
0.5
|
| 1145 |
+
],
|
| 1146 |
+
[
|
| 1147 |
+
"objects_props",
|
| 1148 |
+
0.5
|
| 1149 |
+
],
|
| 1150 |
+
[
|
| 1151 |
+
"franchise_series",
|
| 1152 |
+
0.0
|
| 1153 |
+
]
|
| 1154 |
+
],
|
| 1155 |
+
"wiki_vote_count": 2,
|
| 1156 |
+
"wiki_link_count": 3
|
| 1157 |
+
},
|
| 1158 |
+
{
|
| 1159 |
+
"tag": "teenager",
|
| 1160 |
+
"count": 13700,
|
| 1161 |
+
"signal": "wiki_only",
|
| 1162 |
+
"assigned_categories": [
|
| 1163 |
+
"body_type",
|
| 1164 |
+
"gender"
|
| 1165 |
+
],
|
| 1166 |
+
"top_fused": [
|
| 1167 |
+
[
|
| 1168 |
+
"body_type",
|
| 1169 |
+
0.5
|
| 1170 |
+
],
|
| 1171 |
+
[
|
| 1172 |
+
"gender",
|
| 1173 |
+
0.5
|
| 1174 |
+
],
|
| 1175 |
+
[
|
| 1176 |
+
"text",
|
| 1177 |
+
0.0
|
| 1178 |
+
]
|
| 1179 |
+
],
|
| 1180 |
+
"top_tfidf": [],
|
| 1181 |
+
"top_wiki": [
|
| 1182 |
+
[
|
| 1183 |
+
"body_type",
|
| 1184 |
+
0.5
|
| 1185 |
+
],
|
| 1186 |
+
[
|
| 1187 |
+
"gender",
|
| 1188 |
+
0.5
|
| 1189 |
+
],
|
| 1190 |
+
[
|
| 1191 |
+
"text",
|
| 1192 |
+
0.0
|
| 1193 |
+
]
|
| 1194 |
+
],
|
| 1195 |
+
"wiki_vote_count": 4,
|
| 1196 |
+
"wiki_link_count": 19
|
| 1197 |
+
},
|
| 1198 |
+
{
|
| 1199 |
+
"tag": "ringtail",
|
| 1200 |
+
"count": 6643,
|
| 1201 |
+
"signal": "wiki_only",
|
| 1202 |
+
"assigned_categories": [
|
| 1203 |
+
"anatomy_features",
|
| 1204 |
+
"color_markings"
|
| 1205 |
+
],
|
| 1206 |
+
"top_fused": [
|
| 1207 |
+
[
|
| 1208 |
+
"anatomy_features",
|
| 1209 |
+
0.6667
|
| 1210 |
+
],
|
| 1211 |
+
[
|
| 1212 |
+
"color_markings",
|
| 1213 |
+
0.3333
|
| 1214 |
+
],
|
| 1215 |
+
[
|
| 1216 |
+
"franchise_series",
|
| 1217 |
+
0.0
|
| 1218 |
+
]
|
| 1219 |
+
],
|
| 1220 |
+
"top_tfidf": [],
|
| 1221 |
+
"top_wiki": [
|
| 1222 |
+
[
|
| 1223 |
+
"anatomy_features",
|
| 1224 |
+
0.6667
|
| 1225 |
+
],
|
| 1226 |
+
[
|
| 1227 |
+
"color_markings",
|
| 1228 |
+
0.3333
|
| 1229 |
+
],
|
| 1230 |
+
[
|
| 1231 |
+
"franchise_series",
|
| 1232 |
+
0.0
|
| 1233 |
+
]
|
| 1234 |
+
],
|
| 1235 |
+
"wiki_vote_count": 3,
|
| 1236 |
+
"wiki_link_count": 5
|
| 1237 |
+
},
|
| 1238 |
+
{
|
| 1239 |
+
"tag": "greninja",
|
| 1240 |
+
"count": 3805,
|
| 1241 |
+
"signal": "both",
|
| 1242 |
+
"assigned_categories": [
|
| 1243 |
+
"franchise_series",
|
| 1244 |
+
"body_type"
|
| 1245 |
+
],
|
| 1246 |
+
"top_fused": [
|
| 1247 |
+
[
|
| 1248 |
+
"franchise_series",
|
| 1249 |
+
0.4591
|
| 1250 |
+
],
|
| 1251 |
+
[
|
| 1252 |
+
"body_type",
|
| 1253 |
+
0.4072
|
| 1254 |
+
],
|
| 1255 |
+
[
|
| 1256 |
+
"gender",
|
| 1257 |
+
0.0343
|
| 1258 |
+
]
|
| 1259 |
+
],
|
| 1260 |
+
"top_tfidf": [
|
| 1261 |
+
[
|
| 1262 |
+
"franchise_series",
|
| 1263 |
+
0.7651
|
| 1264 |
+
],
|
| 1265 |
+
[
|
| 1266 |
+
"gender",
|
| 1267 |
+
0.0571
|
| 1268 |
+
],
|
| 1269 |
+
[
|
| 1270 |
+
"resolution",
|
| 1271 |
+
0.0246
|
| 1272 |
+
]
|
| 1273 |
+
],
|
| 1274 |
+
"top_wiki": [
|
| 1275 |
+
[
|
| 1276 |
+
"body_type",
|
| 1277 |
+
1.0
|
| 1278 |
+
],
|
| 1279 |
+
[
|
| 1280 |
+
"text",
|
| 1281 |
+
0.0
|
| 1282 |
+
],
|
| 1283 |
+
[
|
| 1284 |
+
"franchise_series",
|
| 1285 |
+
0.0
|
| 1286 |
+
]
|
| 1287 |
+
],
|
| 1288 |
+
"wiki_vote_count": 1,
|
| 1289 |
+
"wiki_link_count": 9
|
| 1290 |
+
},
|
| 1291 |
+
{
|
| 1292 |
+
"tag": "roxanne_wolf_(fnaf)",
|
| 1293 |
+
"count": 3637,
|
| 1294 |
+
"signal": "wiki_only",
|
| 1295 |
+
"assigned_categories": [
|
| 1296 |
+
"anatomy_features",
|
| 1297 |
+
"color_markings"
|
| 1298 |
+
],
|
| 1299 |
+
"top_fused": [
|
| 1300 |
+
[
|
| 1301 |
+
"anatomy_features",
|
| 1302 |
+
0.6
|
| 1303 |
+
],
|
| 1304 |
+
[
|
| 1305 |
+
"color_markings",
|
| 1306 |
+
0.4
|
| 1307 |
+
],
|
| 1308 |
+
[
|
| 1309 |
+
"franchise_series",
|
| 1310 |
+
0.0
|
| 1311 |
+
]
|
| 1312 |
+
],
|
| 1313 |
+
"top_tfidf": [],
|
| 1314 |
+
"top_wiki": [
|
| 1315 |
+
[
|
| 1316 |
+
"anatomy_features",
|
| 1317 |
+
0.6
|
| 1318 |
+
],
|
| 1319 |
+
[
|
| 1320 |
+
"color_markings",
|
| 1321 |
+
0.4
|
| 1322 |
+
],
|
| 1323 |
+
[
|
| 1324 |
+
"franchise_series",
|
| 1325 |
+
0.0
|
| 1326 |
+
]
|
| 1327 |
+
],
|
| 1328 |
+
"wiki_vote_count": 5,
|
| 1329 |
+
"wiki_link_count": 21
|
| 1330 |
+
},
|
| 1331 |
+
{
|
| 1332 |
+
"tag": "pet",
|
| 1333 |
+
"count": 3461,
|
| 1334 |
+
"signal": "wiki_only",
|
| 1335 |
+
"assigned_categories": [
|
| 1336 |
+
"body_decor",
|
| 1337 |
+
"objects_props"
|
| 1338 |
+
],
|
| 1339 |
+
"top_fused": [
|
| 1340 |
+
[
|
| 1341 |
+
"body_decor",
|
| 1342 |
+
0.5
|
| 1343 |
+
],
|
| 1344 |
+
[
|
| 1345 |
+
"objects_props",
|
| 1346 |
+
0.5
|
| 1347 |
+
],
|
| 1348 |
+
[
|
| 1349 |
+
"text",
|
| 1350 |
+
0.0
|
| 1351 |
+
]
|
| 1352 |
+
],
|
| 1353 |
+
"top_tfidf": [],
|
| 1354 |
+
"top_wiki": [
|
| 1355 |
+
[
|
| 1356 |
+
"body_decor",
|
| 1357 |
+
0.5
|
| 1358 |
+
],
|
| 1359 |
+
[
|
| 1360 |
+
"objects_props",
|
| 1361 |
+
0.5
|
| 1362 |
+
],
|
| 1363 |
+
[
|
| 1364 |
+
"text",
|
| 1365 |
+
0.0
|
| 1366 |
+
]
|
| 1367 |
+
],
|
| 1368 |
+
"wiki_vote_count": 2,
|
| 1369 |
+
"wiki_link_count": 18
|
| 1370 |
+
},
|
| 1371 |
+
{
|
| 1372 |
+
"tag": "zorua",
|
| 1373 |
+
"count": 3167,
|
| 1374 |
+
"signal": "both",
|
| 1375 |
+
"assigned_categories": [
|
| 1376 |
+
"franchise_series",
|
| 1377 |
+
"body_type"
|
| 1378 |
+
],
|
| 1379 |
+
"top_fused": [
|
| 1380 |
+
[
|
| 1381 |
+
"franchise_series",
|
| 1382 |
+
0.4565
|
| 1383 |
+
],
|
| 1384 |
+
[
|
| 1385 |
+
"body_type",
|
| 1386 |
+
0.4079
|
| 1387 |
+
],
|
| 1388 |
+
[
|
| 1389 |
+
"gender",
|
| 1390 |
+
0.0393
|
| 1391 |
+
]
|
| 1392 |
+
],
|
| 1393 |
+
"top_tfidf": [
|
| 1394 |
+
[
|
| 1395 |
+
"franchise_series",
|
| 1396 |
+
0.7608
|
| 1397 |
+
],
|
| 1398 |
+
[
|
| 1399 |
+
"gender",
|
| 1400 |
+
0.0655
|
| 1401 |
+
],
|
| 1402 |
+
[
|
| 1403 |
+
"resolution",
|
| 1404 |
+
0.0223
|
| 1405 |
+
]
|
| 1406 |
+
],
|
| 1407 |
+
"top_wiki": [
|
| 1408 |
+
[
|
| 1409 |
+
"body_type",
|
| 1410 |
+
1.0
|
| 1411 |
+
],
|
| 1412 |
+
[
|
| 1413 |
+
"text",
|
| 1414 |
+
0.0
|
| 1415 |
+
],
|
| 1416 |
+
[
|
| 1417 |
+
"franchise_series",
|
| 1418 |
+
0.0
|
| 1419 |
+
]
|
| 1420 |
+
],
|
| 1421 |
+
"wiki_vote_count": 1,
|
| 1422 |
+
"wiki_link_count": 3
|
| 1423 |
+
},
|
| 1424 |
+
{
|
| 1425 |
+
"tag": "kirlia",
|
| 1426 |
+
"count": 3140,
|
| 1427 |
+
"signal": "both",
|
| 1428 |
+
"assigned_categories": [
|
| 1429 |
+
"franchise_series",
|
| 1430 |
+
"body_type"
|
| 1431 |
+
],
|
| 1432 |
+
"top_fused": [
|
| 1433 |
+
[
|
| 1434 |
+
"franchise_series",
|
| 1435 |
+
0.4781
|
| 1436 |
+
],
|
| 1437 |
+
[
|
| 1438 |
+
"body_type",
|
| 1439 |
+
0.4081
|
| 1440 |
+
],
|
| 1441 |
+
[
|
| 1442 |
+
"gender",
|
| 1443 |
+
0.0273
|
| 1444 |
+
]
|
| 1445 |
+
],
|
| 1446 |
+
"top_tfidf": [
|
| 1447 |
+
[
|
| 1448 |
+
"franchise_series",
|
| 1449 |
+
0.7969
|
| 1450 |
+
],
|
| 1451 |
+
[
|
| 1452 |
+
"gender",
|
| 1453 |
+
0.0455
|
| 1454 |
+
],
|
| 1455 |
+
[
|
| 1456 |
+
"resolution",
|
| 1457 |
+
0.0237
|
| 1458 |
+
]
|
| 1459 |
+
],
|
| 1460 |
+
"top_wiki": [
|
| 1461 |
+
[
|
| 1462 |
+
"body_type",
|
| 1463 |
+
1.0
|
| 1464 |
+
],
|
| 1465 |
+
[
|
| 1466 |
+
"text",
|
| 1467 |
+
0.0
|
| 1468 |
+
],
|
| 1469 |
+
[
|
| 1470 |
+
"franchise_series",
|
| 1471 |
+
0.0
|
| 1472 |
+
]
|
| 1473 |
+
],
|
| 1474 |
+
"wiki_vote_count": 1,
|
| 1475 |
+
"wiki_link_count": 8
|
| 1476 |
+
},
|
| 1477 |
+
{
|
| 1478 |
+
"tag": "simba",
|
| 1479 |
+
"count": 2566,
|
| 1480 |
+
"signal": "wiki_only",
|
| 1481 |
+
"assigned_categories": [
|
| 1482 |
+
"franchise_series",
|
| 1483 |
+
"gender"
|
| 1484 |
+
],
|
| 1485 |
+
"top_fused": [
|
| 1486 |
+
[
|
| 1487 |
+
"franchise_series",
|
| 1488 |
+
0.5
|
| 1489 |
+
],
|
| 1490 |
+
[
|
| 1491 |
+
"gender",
|
| 1492 |
+
0.5
|
| 1493 |
+
],
|
| 1494 |
+
[
|
| 1495 |
+
"text",
|
| 1496 |
+
0.0
|
| 1497 |
+
]
|
| 1498 |
+
],
|
| 1499 |
+
"top_tfidf": [],
|
| 1500 |
+
"top_wiki": [
|
| 1501 |
+
[
|
| 1502 |
+
"franchise_series",
|
| 1503 |
+
0.5
|
| 1504 |
+
],
|
| 1505 |
+
[
|
| 1506 |
+
"gender",
|
| 1507 |
+
0.5
|
| 1508 |
+
],
|
| 1509 |
+
[
|
| 1510 |
+
"text",
|
| 1511 |
+
0.0
|
| 1512 |
+
]
|
| 1513 |
+
],
|
| 1514 |
+
"wiki_vote_count": 2,
|
| 1515 |
+
"wiki_link_count": 14
|
| 1516 |
+
},
|
| 1517 |
+
{
|
| 1518 |
+
"tag": "colorful",
|
| 1519 |
+
"count": 2402,
|
| 1520 |
+
"signal": "wiki_only",
|
| 1521 |
+
"assigned_categories": [
|
| 1522 |
+
"color_markings",
|
| 1523 |
+
"style"
|
| 1524 |
+
],
|
| 1525 |
+
"top_fused": [
|
| 1526 |
+
[
|
| 1527 |
+
"color_markings",
|
| 1528 |
+
0.6667
|
| 1529 |
+
],
|
| 1530 |
+
[
|
| 1531 |
+
"style",
|
| 1532 |
+
0.3333
|
| 1533 |
+
],
|
| 1534 |
+
[
|
| 1535 |
+
"text",
|
| 1536 |
+
0.0
|
| 1537 |
+
]
|
| 1538 |
+
],
|
| 1539 |
+
"top_tfidf": [],
|
| 1540 |
+
"top_wiki": [
|
| 1541 |
+
[
|
| 1542 |
+
"color_markings",
|
| 1543 |
+
0.6667
|
| 1544 |
+
],
|
| 1545 |
+
[
|
| 1546 |
+
"style",
|
| 1547 |
+
0.3333
|
| 1548 |
+
],
|
| 1549 |
+
[
|
| 1550 |
+
"text",
|
| 1551 |
+
0.0
|
| 1552 |
+
]
|
| 1553 |
+
],
|
| 1554 |
+
"wiki_vote_count": 3,
|
| 1555 |
+
"wiki_link_count": 5
|
| 1556 |
+
},
|
| 1557 |
+
{
|
| 1558 |
+
"tag": "mawile",
|
| 1559 |
+
"count": 2121,
|
| 1560 |
+
"signal": "both",
|
| 1561 |
+
"assigned_categories": [
|
| 1562 |
+
"franchise_series",
|
| 1563 |
+
"clothing_detail"
|
| 1564 |
+
],
|
| 1565 |
+
"top_fused": [
|
| 1566 |
+
[
|
| 1567 |
+
"franchise_series",
|
| 1568 |
+
0.4989
|
| 1569 |
+
],
|
| 1570 |
+
[
|
| 1571 |
+
"clothing_detail",
|
| 1572 |
+
0.4019
|
| 1573 |
+
],
|
| 1574 |
+
[
|
| 1575 |
+
"gender",
|
| 1576 |
+
0.0226
|
| 1577 |
+
]
|
| 1578 |
+
],
|
| 1579 |
+
"top_tfidf": [
|
| 1580 |
+
[
|
| 1581 |
+
"franchise_series",
|
| 1582 |
+
0.8316
|
| 1583 |
+
],
|
| 1584 |
+
[
|
| 1585 |
+
"gender",
|
| 1586 |
+
0.0377
|
| 1587 |
+
],
|
| 1588 |
+
[
|
| 1589 |
+
"resolution",
|
| 1590 |
+
0.0176
|
| 1591 |
+
]
|
| 1592 |
+
],
|
| 1593 |
+
"top_wiki": [
|
| 1594 |
+
[
|
| 1595 |
+
"clothing_detail",
|
| 1596 |
+
1.0
|
| 1597 |
+
],
|
| 1598 |
+
[
|
| 1599 |
+
"text",
|
| 1600 |
+
0.0
|
| 1601 |
+
],
|
| 1602 |
+
[
|
| 1603 |
+
"franchise_series",
|
| 1604 |
+
0.0
|
| 1605 |
+
]
|
| 1606 |
+
],
|
| 1607 |
+
"wiki_vote_count": 1,
|
| 1608 |
+
"wiki_link_count": 6
|
| 1609 |
+
},
|
| 1610 |
+
{
|
| 1611 |
+
"tag": "troll",
|
| 1612 |
+
"count": 1556,
|
| 1613 |
+
"signal": "wiki_only",
|
| 1614 |
+
"assigned_categories": [
|
| 1615 |
+
"species",
|
| 1616 |
+
"body_type"
|
| 1617 |
+
],
|
| 1618 |
+
"top_fused": [
|
| 1619 |
+
[
|
| 1620 |
+
"species",
|
| 1621 |
+
0.5
|
| 1622 |
+
],
|
| 1623 |
+
[
|
| 1624 |
+
"body_type",
|
| 1625 |
+
0.5
|
| 1626 |
+
],
|
| 1627 |
+
[
|
| 1628 |
+
"text",
|
| 1629 |
+
0.0
|
| 1630 |
+
]
|
| 1631 |
+
],
|
| 1632 |
+
"top_tfidf": [],
|
| 1633 |
+
"top_wiki": [
|
| 1634 |
+
[
|
| 1635 |
+
"species",
|
| 1636 |
+
0.5
|
| 1637 |
+
],
|
| 1638 |
+
[
|
| 1639 |
+
"body_type",
|
| 1640 |
+
0.5
|
| 1641 |
+
],
|
| 1642 |
+
[
|
| 1643 |
+
"text",
|
| 1644 |
+
0.0
|
| 1645 |
+
]
|
| 1646 |
+
],
|
| 1647 |
+
"wiki_vote_count": 2,
|
| 1648 |
+
"wiki_link_count": 10
|
| 1649 |
+
},
|
| 1650 |
+
{
|
| 1651 |
+
"tag": "squirtle",
|
| 1652 |
+
"count": 1167,
|
| 1653 |
+
"signal": "both",
|
| 1654 |
+
"assigned_categories": [
|
| 1655 |
+
"franchise_series",
|
| 1656 |
+
"body_type"
|
| 1657 |
+
],
|
| 1658 |
+
"top_fused": [
|
| 1659 |
+
[
|
| 1660 |
+
"franchise_series",
|
| 1661 |
+
0.4597
|
| 1662 |
+
],
|
| 1663 |
+
[
|
| 1664 |
+
"body_type",
|
| 1665 |
+
0.4074
|
| 1666 |
+
],
|
| 1667 |
+
[
|
| 1668 |
+
"gender",
|
| 1669 |
+
0.0188
|
| 1670 |
+
]
|
| 1671 |
+
],
|
| 1672 |
+
"top_tfidf": [
|
| 1673 |
+
[
|
| 1674 |
+
"franchise_series",
|
| 1675 |
+
0.7662
|
| 1676 |
+
],
|
| 1677 |
+
[
|
| 1678 |
+
"gender",
|
| 1679 |
+
0.0313
|
| 1680 |
+
],
|
| 1681 |
+
[
|
| 1682 |
+
"resolution",
|
| 1683 |
+
0.0187
|
| 1684 |
+
]
|
| 1685 |
+
],
|
| 1686 |
+
"top_wiki": [
|
| 1687 |
+
[
|
| 1688 |
+
"body_type",
|
| 1689 |
+
1.0
|
| 1690 |
+
],
|
| 1691 |
+
[
|
| 1692 |
+
"text",
|
| 1693 |
+
0.0
|
| 1694 |
+
],
|
| 1695 |
+
[
|
| 1696 |
+
"franchise_series",
|
| 1697 |
+
0.0
|
| 1698 |
+
]
|
| 1699 |
+
],
|
| 1700 |
+
"wiki_vote_count": 1,
|
| 1701 |
+
"wiki_link_count": 10
|
| 1702 |
+
},
|
| 1703 |
+
{
|
| 1704 |
+
"tag": "oshawott",
|
| 1705 |
+
"count": 1157,
|
| 1706 |
+
"signal": "both",
|
| 1707 |
+
"assigned_categories": [
|
| 1708 |
+
"franchise_series",
|
| 1709 |
+
"body_type"
|
| 1710 |
+
],
|
| 1711 |
+
"top_fused": [
|
| 1712 |
+
[
|
| 1713 |
+
"franchise_series",
|
| 1714 |
+
0.5151
|
| 1715 |
+
],
|
| 1716 |
+
[
|
| 1717 |
+
"body_type",
|
| 1718 |
+
0.4049
|
| 1719 |
+
],
|
| 1720 |
+
[
|
| 1721 |
+
"gender",
|
| 1722 |
+
0.019
|
| 1723 |
+
]
|
| 1724 |
+
],
|
| 1725 |
+
"top_tfidf": [
|
| 1726 |
+
[
|
| 1727 |
+
"franchise_series",
|
| 1728 |
+
0.8585
|
| 1729 |
+
],
|
| 1730 |
+
[
|
| 1731 |
+
"gender",
|
| 1732 |
+
0.0316
|
| 1733 |
+
],
|
| 1734 |
+
[
|
| 1735 |
+
"resolution",
|
| 1736 |
+
0.013
|
| 1737 |
+
]
|
| 1738 |
+
],
|
| 1739 |
+
"top_wiki": [
|
| 1740 |
+
[
|
| 1741 |
+
"body_type",
|
| 1742 |
+
1.0
|
| 1743 |
+
],
|
| 1744 |
+
[
|
| 1745 |
+
"text",
|
| 1746 |
+
0.0
|
| 1747 |
+
],
|
| 1748 |
+
[
|
| 1749 |
+
"franchise_series",
|
| 1750 |
+
0.0
|
| 1751 |
+
]
|
| 1752 |
+
],
|
| 1753 |
+
"wiki_vote_count": 2,
|
| 1754 |
+
"wiki_link_count": 9
|
| 1755 |
+
},
|
| 1756 |
+
{
|
| 1757 |
+
"tag": "cosplay_pikachu_(character)",
|
| 1758 |
+
"count": 1138,
|
| 1759 |
+
"signal": "both",
|
| 1760 |
+
"assigned_categories": [
|
| 1761 |
+
"gender",
|
| 1762 |
+
"franchise_series"
|
| 1763 |
+
],
|
| 1764 |
+
"top_fused": [
|
| 1765 |
+
[
|
| 1766 |
+
"gender",
|
| 1767 |
+
0.4429
|
| 1768 |
+
],
|
| 1769 |
+
[
|
| 1770 |
+
"franchise_series",
|
| 1771 |
+
0.343
|
| 1772 |
+
],
|
| 1773 |
+
[
|
| 1774 |
+
"resolution",
|
| 1775 |
+
0.0276
|
| 1776 |
+
]
|
| 1777 |
+
],
|
| 1778 |
+
"top_tfidf": [
|
| 1779 |
+
[
|
| 1780 |
+
"franchise_series",
|
| 1781 |
+
0.5717
|
| 1782 |
+
],
|
| 1783 |
+
[
|
| 1784 |
+
"gender",
|
| 1785 |
+
0.0715
|
| 1786 |
+
],
|
| 1787 |
+
[
|
| 1788 |
+
"resolution",
|
| 1789 |
+
0.046
|
| 1790 |
+
]
|
| 1791 |
+
],
|
| 1792 |
+
"top_wiki": [
|
| 1793 |
+
[
|
| 1794 |
+
"gender",
|
| 1795 |
+
1.0
|
| 1796 |
+
],
|
| 1797 |
+
[
|
| 1798 |
+
"text",
|
| 1799 |
+
0.0
|
| 1800 |
+
],
|
| 1801 |
+
[
|
| 1802 |
+
"franchise_series",
|
| 1803 |
+
0.0
|
| 1804 |
+
]
|
| 1805 |
+
],
|
| 1806 |
+
"wiki_vote_count": 1,
|
| 1807 |
+
"wiki_link_count": 5
|
| 1808 |
+
},
|
| 1809 |
+
{
|
| 1810 |
+
"tag": "legendary_duo",
|
| 1811 |
+
"count": 1059,
|
| 1812 |
+
"signal": "both",
|
| 1813 |
+
"assigned_categories": [
|
| 1814 |
+
"franchise_series",
|
| 1815 |
+
"count"
|
| 1816 |
+
],
|
| 1817 |
+
"top_fused": [
|
| 1818 |
+
[
|
| 1819 |
+
"franchise_series",
|
| 1820 |
+
0.477
|
| 1821 |
+
],
|
| 1822 |
+
[
|
| 1823 |
+
"count",
|
| 1824 |
+
0.4082
|
| 1825 |
+
],
|
| 1826 |
+
[
|
| 1827 |
+
"gender",
|
| 1828 |
+
0.0209
|
| 1829 |
+
]
|
| 1830 |
+
],
|
| 1831 |
+
"top_tfidf": [
|
| 1832 |
+
[
|
| 1833 |
+
"franchise_series",
|
| 1834 |
+
0.7951
|
| 1835 |
+
],
|
| 1836 |
+
[
|
| 1837 |
+
"gender",
|
| 1838 |
+
0.0348
|
| 1839 |
+
],
|
| 1840 |
+
[
|
| 1841 |
+
"resolution",
|
| 1842 |
+
0.0174
|
| 1843 |
+
]
|
| 1844 |
+
],
|
| 1845 |
+
"top_wiki": [
|
| 1846 |
+
[
|
| 1847 |
+
"count",
|
| 1848 |
+
1.0
|
| 1849 |
+
],
|
| 1850 |
+
[
|
| 1851 |
+
"text",
|
| 1852 |
+
0.0
|
| 1853 |
+
],
|
| 1854 |
+
[
|
| 1855 |
+
"franchise_series",
|
| 1856 |
+
0.0
|
| 1857 |
+
]
|
| 1858 |
+
],
|
| 1859 |
+
"wiki_vote_count": 1,
|
| 1860 |
+
"wiki_link_count": 26
|
| 1861 |
+
},
|
| 1862 |
+
{
|
| 1863 |
+
"tag": "sobble",
|
| 1864 |
+
"count": 762,
|
| 1865 |
+
"signal": "both",
|
| 1866 |
+
"assigned_categories": [
|
| 1867 |
+
"franchise_series",
|
| 1868 |
+
"anatomy_features"
|
| 1869 |
+
],
|
| 1870 |
+
"top_fused": [
|
| 1871 |
+
[
|
| 1872 |
+
"franchise_series",
|
| 1873 |
+
0.4854
|
| 1874 |
+
],
|
| 1875 |
+
[
|
| 1876 |
+
"anatomy_features",
|
| 1877 |
+
0.3047
|
| 1878 |
+
],
|
| 1879 |
+
[
|
| 1880 |
+
"color_markings",
|
| 1881 |
+
0.1033
|
| 1882 |
+
]
|
| 1883 |
+
],
|
| 1884 |
+
"top_tfidf": [
|
| 1885 |
+
[
|
| 1886 |
+
"franchise_series",
|
| 1887 |
+
0.8089
|
| 1888 |
+
],
|
| 1889 |
+
[
|
| 1890 |
+
"gender",
|
| 1891 |
+
0.0414
|
| 1892 |
+
],
|
| 1893 |
+
[
|
| 1894 |
+
"resolution",
|
| 1895 |
+
0.0166
|
| 1896 |
+
]
|
| 1897 |
+
],
|
| 1898 |
+
"top_wiki": [
|
| 1899 |
+
[
|
| 1900 |
+
"anatomy_features",
|
| 1901 |
+
0.75
|
| 1902 |
+
],
|
| 1903 |
+
[
|
| 1904 |
+
"color_markings",
|
| 1905 |
+
0.25
|
| 1906 |
+
],
|
| 1907 |
+
[
|
| 1908 |
+
"franchise_series",
|
| 1909 |
+
0.0
|
| 1910 |
+
]
|
| 1911 |
+
],
|
| 1912 |
+
"wiki_vote_count": 4,
|
| 1913 |
+
"wiki_link_count": 20
|
| 1914 |
+
},
|
| 1915 |
+
{
|
| 1916 |
+
"tag": "chesnaught",
|
| 1917 |
+
"count": 718,
|
| 1918 |
+
"signal": "both",
|
| 1919 |
+
"assigned_categories": [
|
| 1920 |
+
"franchise_series",
|
| 1921 |
+
"body_type"
|
| 1922 |
+
],
|
| 1923 |
+
"top_fused": [
|
| 1924 |
+
[
|
| 1925 |
+
"franchise_series",
|
| 1926 |
+
0.4713
|
| 1927 |
+
],
|
| 1928 |
+
[
|
| 1929 |
+
"body_type",
|
| 1930 |
+
0.406
|
| 1931 |
+
],
|
| 1932 |
+
[
|
| 1933 |
+
"gender",
|
| 1934 |
+
0.0226
|
| 1935 |
+
]
|
| 1936 |
+
],
|
| 1937 |
+
"top_tfidf": [
|
| 1938 |
+
[
|
| 1939 |
+
"franchise_series",
|
| 1940 |
+
0.7856
|
| 1941 |
+
],
|
| 1942 |
+
[
|
| 1943 |
+
"gender",
|
| 1944 |
+
0.0377
|
| 1945 |
+
],
|
| 1946 |
+
[
|
| 1947 |
+
"resolution",
|
| 1948 |
+
0.0181
|
| 1949 |
+
]
|
| 1950 |
+
],
|
| 1951 |
+
"top_wiki": [
|
| 1952 |
+
[
|
| 1953 |
+
"body_type",
|
| 1954 |
+
1.0
|
| 1955 |
+
],
|
| 1956 |
+
[
|
| 1957 |
+
"text",
|
| 1958 |
+
0.0
|
| 1959 |
+
],
|
| 1960 |
+
[
|
| 1961 |
+
"franchise_series",
|
| 1962 |
+
0.0
|
| 1963 |
+
]
|
| 1964 |
+
],
|
| 1965 |
+
"wiki_vote_count": 1,
|
| 1966 |
+
"wiki_link_count": 7
|
| 1967 |
+
},
|
| 1968 |
+
{
|
| 1969 |
+
"tag": "</3",
|
| 1970 |
+
"count": 712,
|
| 1971 |
+
"signal": "wiki_only",
|
| 1972 |
+
"assigned_categories": [
|
| 1973 |
+
"expression_detail",
|
| 1974 |
+
"pose_action_detail"
|
| 1975 |
+
],
|
| 1976 |
+
"top_fused": [
|
| 1977 |
+
[
|
| 1978 |
+
"expression_detail",
|
| 1979 |
+
0.6667
|
| 1980 |
+
],
|
| 1981 |
+
[
|
| 1982 |
+
"pose_action_detail",
|
| 1983 |
+
0.3333
|
| 1984 |
+
],
|
| 1985 |
+
[
|
| 1986 |
+
"text",
|
| 1987 |
+
0.0
|
| 1988 |
+
]
|
| 1989 |
+
],
|
| 1990 |
+
"top_tfidf": [],
|
| 1991 |
+
"top_wiki": [
|
| 1992 |
+
[
|
| 1993 |
+
"expression_detail",
|
| 1994 |
+
0.6667
|
| 1995 |
+
],
|
| 1996 |
+
[
|
| 1997 |
+
"pose_action_detail",
|
| 1998 |
+
0.3333
|
| 1999 |
+
],
|
| 2000 |
+
[
|
| 2001 |
+
"text",
|
| 2002 |
+
0.0
|
| 2003 |
+
]
|
| 2004 |
+
],
|
| 2005 |
+
"wiki_vote_count": 3,
|
| 2006 |
+
"wiki_link_count": 7
|
| 2007 |
+
}
|
| 2008 |
+
],
|
| 2009 |
+
"hold": [
|
| 2010 |
+
{
|
| 2011 |
+
"tag": "helmet",
|
| 2012 |
+
"count": 24793,
|
| 2013 |
+
"signal": "both",
|
| 2014 |
+
"assigned_categories": [],
|
| 2015 |
+
"top_fused": [
|
| 2016 |
+
[
|
| 2017 |
+
"clothing_detail",
|
| 2018 |
+
0.2934
|
| 2019 |
+
],
|
| 2020 |
+
[
|
| 2021 |
+
"body_decor",
|
| 2022 |
+
0.0901
|
| 2023 |
+
],
|
| 2024 |
+
[
|
| 2025 |
+
"resolution",
|
| 2026 |
+
0.0744
|
| 2027 |
+
]
|
| 2028 |
+
],
|
| 2029 |
+
"top_tfidf": [
|
| 2030 |
+
[
|
| 2031 |
+
"resolution",
|
| 2032 |
+
0.124
|
| 2033 |
+
],
|
| 2034 |
+
[
|
| 2035 |
+
"count",
|
| 2036 |
+
0.1153
|
| 2037 |
+
],
|
| 2038 |
+
[
|
| 2039 |
+
"body_type",
|
| 2040 |
+
0.0904
|
| 2041 |
+
]
|
| 2042 |
+
],
|
| 2043 |
+
"top_wiki": [
|
| 2044 |
+
[
|
| 2045 |
+
"clothing_detail",
|
| 2046 |
+
0.7143
|
| 2047 |
+
],
|
| 2048 |
+
[
|
| 2049 |
+
"franchise_series",
|
| 2050 |
+
0.1429
|
| 2051 |
+
],
|
| 2052 |
+
[
|
| 2053 |
+
"body_decor",
|
| 2054 |
+
0.1429
|
| 2055 |
+
]
|
| 2056 |
+
],
|
| 2057 |
+
"wiki_vote_count": 7,
|
| 2058 |
+
"wiki_link_count": 87
|
| 2059 |
+
},
|
| 2060 |
+
{
|
| 2061 |
+
"tag": "poster",
|
| 2062 |
+
"count": 6434,
|
| 2063 |
+
"signal": "both",
|
| 2064 |
+
"assigned_categories": [],
|
| 2065 |
+
"top_fused": [
|
| 2066 |
+
[
|
| 2067 |
+
"organization",
|
| 2068 |
+
0.4334
|
| 2069 |
+
],
|
| 2070 |
+
[
|
| 2071 |
+
"objects_props",
|
| 2072 |
+
0.1036
|
| 2073 |
+
],
|
| 2074 |
+
[
|
| 2075 |
+
"pose_action_detail",
|
| 2076 |
+
0.0671
|
| 2077 |
+
]
|
| 2078 |
+
],
|
| 2079 |
+
"top_tfidf": [
|
| 2080 |
+
[
|
| 2081 |
+
"objects_props",
|
| 2082 |
+
0.1727
|
| 2083 |
+
],
|
| 2084 |
+
[
|
| 2085 |
+
"pose_action_detail",
|
| 2086 |
+
0.1119
|
| 2087 |
+
],
|
| 2088 |
+
[
|
| 2089 |
+
"background_composition",
|
| 2090 |
+
0.078
|
| 2091 |
+
]
|
| 2092 |
+
],
|
| 2093 |
+
"top_wiki": [
|
| 2094 |
+
[
|
| 2095 |
+
"organization",
|
| 2096 |
+
1.0
|
| 2097 |
+
],
|
| 2098 |
+
[
|
| 2099 |
+
"text",
|
| 2100 |
+
0.0
|
| 2101 |
+
],
|
| 2102 |
+
[
|
| 2103 |
+
"franchise_series",
|
| 2104 |
+
0.0
|
| 2105 |
+
]
|
| 2106 |
+
],
|
| 2107 |
+
"wiki_vote_count": 2,
|
| 2108 |
+
"wiki_link_count": 19
|
| 2109 |
+
},
|
| 2110 |
+
{
|
| 2111 |
+
"tag": "bottomless_female",
|
| 2112 |
+
"count": 4337,
|
| 2113 |
+
"signal": "both",
|
| 2114 |
+
"assigned_categories": [],
|
| 2115 |
+
"top_fused": [
|
| 2116 |
+
[
|
| 2117 |
+
"clothing_detail",
|
| 2118 |
+
0.4343
|
| 2119 |
+
],
|
| 2120 |
+
[
|
| 2121 |
+
"pose_action_detail",
|
| 2122 |
+
0.0644
|
| 2123 |
+
],
|
| 2124 |
+
[
|
| 2125 |
+
"gaze_detail",
|
| 2126 |
+
0.0573
|
| 2127 |
+
]
|
| 2128 |
+
],
|
| 2129 |
+
"top_tfidf": [
|
| 2130 |
+
[
|
| 2131 |
+
"pose_action_detail",
|
| 2132 |
+
0.1074
|
| 2133 |
+
],
|
| 2134 |
+
[
|
| 2135 |
+
"gaze_detail",
|
| 2136 |
+
0.0954
|
| 2137 |
+
],
|
| 2138 |
+
[
|
| 2139 |
+
"expression_detail",
|
| 2140 |
+
0.0898
|
| 2141 |
+
]
|
| 2142 |
+
],
|
| 2143 |
+
"top_wiki": [
|
| 2144 |
+
[
|
| 2145 |
+
"clothing_detail",
|
| 2146 |
+
1.0
|
| 2147 |
+
],
|
| 2148 |
+
[
|
| 2149 |
+
"text",
|
| 2150 |
+
0.0
|
| 2151 |
+
],
|
| 2152 |
+
[
|
| 2153 |
+
"franchise_series",
|
| 2154 |
+
0.0
|
| 2155 |
+
]
|
| 2156 |
+
],
|
| 2157 |
+
"wiki_vote_count": 11,
|
| 2158 |
+
"wiki_link_count": 17
|
| 2159 |
+
},
|
| 2160 |
+
{
|
| 2161 |
+
"tag": "guardians_of_the_galaxy",
|
| 2162 |
+
"count": 3013,
|
| 2163 |
+
"signal": "tfidf_only",
|
| 2164 |
+
"assigned_categories": [],
|
| 2165 |
+
"top_fused": [
|
| 2166 |
+
[
|
| 2167 |
+
"style",
|
| 2168 |
+
0.0935
|
| 2169 |
+
],
|
| 2170 |
+
[
|
| 2171 |
+
"pose_action_detail",
|
| 2172 |
+
0.0824
|
| 2173 |
+
],
|
| 2174 |
+
[
|
| 2175 |
+
"expression_detail",
|
| 2176 |
+
0.0768
|
| 2177 |
+
]
|
| 2178 |
+
],
|
| 2179 |
+
"top_tfidf": [
|
| 2180 |
+
[
|
| 2181 |
+
"style",
|
| 2182 |
+
0.0935
|
| 2183 |
+
],
|
| 2184 |
+
[
|
| 2185 |
+
"pose_action_detail",
|
| 2186 |
+
0.0824
|
| 2187 |
+
],
|
| 2188 |
+
[
|
| 2189 |
+
"expression_detail",
|
| 2190 |
+
0.0768
|
| 2191 |
+
]
|
| 2192 |
+
],
|
| 2193 |
+
"top_wiki": [],
|
| 2194 |
+
"wiki_vote_count": 0,
|
| 2195 |
+
"wiki_link_count": 5
|
| 2196 |
+
},
|
| 2197 |
+
{
|
| 2198 |
+
"tag": "barbel_(anatomy)",
|
| 2199 |
+
"count": 2627,
|
| 2200 |
+
"signal": "tfidf_only",
|
| 2201 |
+
"assigned_categories": [],
|
| 2202 |
+
"top_fused": [
|
| 2203 |
+
[
|
| 2204 |
+
"anatomy_features",
|
| 2205 |
+
0.1965
|
| 2206 |
+
],
|
| 2207 |
+
[
|
| 2208 |
+
"color_markings",
|
| 2209 |
+
0.1306
|
| 2210 |
+
],
|
| 2211 |
+
[
|
| 2212 |
+
"perspective",
|
| 2213 |
+
0.0843
|
| 2214 |
+
]
|
| 2215 |
+
],
|
| 2216 |
+
"top_tfidf": [
|
| 2217 |
+
[
|
| 2218 |
+
"anatomy_features",
|
| 2219 |
+
0.1965
|
| 2220 |
+
],
|
| 2221 |
+
[
|
| 2222 |
+
"color_markings",
|
| 2223 |
+
0.1306
|
| 2224 |
+
],
|
| 2225 |
+
[
|
| 2226 |
+
"perspective",
|
| 2227 |
+
0.0843
|
| 2228 |
+
]
|
| 2229 |
+
],
|
| 2230 |
+
"top_wiki": [],
|
| 2231 |
+
"wiki_vote_count": 0,
|
| 2232 |
+
"wiki_link_count": 5
|
| 2233 |
+
},
|
| 2234 |
+
{
|
| 2235 |
+
"tag": "millie_(helluva_boss)",
|
| 2236 |
+
"count": 2009,
|
| 2237 |
+
"signal": "both",
|
| 2238 |
+
"assigned_categories": [],
|
| 2239 |
+
"top_fused": [
|
| 2240 |
+
[
|
| 2241 |
+
"clothing_detail",
|
| 2242 |
+
0.4248
|
| 2243 |
+
],
|
| 2244 |
+
[
|
| 2245 |
+
"expression_detail",
|
| 2246 |
+
0.0566
|
| 2247 |
+
],
|
| 2248 |
+
[
|
| 2249 |
+
"text",
|
| 2250 |
+
0.0562
|
| 2251 |
+
]
|
| 2252 |
+
],
|
| 2253 |
+
"top_tfidf": [
|
| 2254 |
+
[
|
| 2255 |
+
"expression_detail",
|
| 2256 |
+
0.0944
|
| 2257 |
+
],
|
| 2258 |
+
[
|
| 2259 |
+
"text",
|
| 2260 |
+
0.0936
|
| 2261 |
+
],
|
| 2262 |
+
[
|
| 2263 |
+
"body_decor",
|
| 2264 |
+
0.09
|
| 2265 |
+
]
|
| 2266 |
+
],
|
| 2267 |
+
"top_wiki": [
|
| 2268 |
+
[
|
| 2269 |
+
"clothing_detail",
|
| 2270 |
+
1.0
|
| 2271 |
+
],
|
| 2272 |
+
[
|
| 2273 |
+
"text",
|
| 2274 |
+
0.0
|
| 2275 |
+
],
|
| 2276 |
+
[
|
| 2277 |
+
"franchise_series",
|
| 2278 |
+
0.0
|
| 2279 |
+
]
|
| 2280 |
+
],
|
| 2281 |
+
"wiki_vote_count": 1,
|
| 2282 |
+
"wiki_link_count": 13
|
| 2283 |
+
},
|
| 2284 |
+
{
|
| 2285 |
+
"tag": "hill",
|
| 2286 |
+
"count": 1443,
|
| 2287 |
+
"signal": "tfidf_only",
|
| 2288 |
+
"assigned_categories": [],
|
| 2289 |
+
"top_fused": [
|
| 2290 |
+
[
|
| 2291 |
+
"background_composition",
|
| 2292 |
+
0.1306
|
| 2293 |
+
],
|
| 2294 |
+
[
|
| 2295 |
+
"objects_props",
|
| 2296 |
+
0.0996
|
| 2297 |
+
],
|
| 2298 |
+
[
|
| 2299 |
+
"style",
|
| 2300 |
+
0.0904
|
| 2301 |
+
]
|
| 2302 |
+
],
|
| 2303 |
+
"top_tfidf": [
|
| 2304 |
+
[
|
| 2305 |
+
"background_composition",
|
| 2306 |
+
0.1306
|
| 2307 |
+
],
|
| 2308 |
+
[
|
| 2309 |
+
"objects_props",
|
| 2310 |
+
0.0996
|
| 2311 |
+
],
|
| 2312 |
+
[
|
| 2313 |
+
"style",
|
| 2314 |
+
0.0904
|
| 2315 |
+
]
|
| 2316 |
+
],
|
| 2317 |
+
"top_wiki": [],
|
| 2318 |
+
"wiki_vote_count": 0,
|
| 2319 |
+
"wiki_link_count": 3
|
| 2320 |
+
},
|
| 2321 |
+
{
|
| 2322 |
+
"tag": "electric_fan",
|
| 2323 |
+
"count": 1093,
|
| 2324 |
+
"signal": "tfidf_only",
|
| 2325 |
+
"assigned_categories": [],
|
| 2326 |
+
"top_fused": [
|
| 2327 |
+
[
|
| 2328 |
+
"objects_props",
|
| 2329 |
+
0.1294
|
| 2330 |
+
],
|
| 2331 |
+
[
|
| 2332 |
+
"pose_action_detail",
|
| 2333 |
+
0.0944
|
| 2334 |
+
],
|
| 2335 |
+
[
|
| 2336 |
+
"expression_detail",
|
| 2337 |
+
0.0939
|
| 2338 |
+
]
|
| 2339 |
+
],
|
| 2340 |
+
"top_tfidf": [
|
| 2341 |
+
[
|
| 2342 |
+
"objects_props",
|
| 2343 |
+
0.1294
|
| 2344 |
+
],
|
| 2345 |
+
[
|
| 2346 |
+
"pose_action_detail",
|
| 2347 |
+
0.0944
|
| 2348 |
+
],
|
| 2349 |
+
[
|
| 2350 |
+
"expression_detail",
|
| 2351 |
+
0.0939
|
| 2352 |
+
]
|
| 2353 |
+
],
|
| 2354 |
+
"top_wiki": [],
|
| 2355 |
+
"wiki_vote_count": 0,
|
| 2356 |
+
"wiki_link_count": 4
|
| 2357 |
+
},
|
| 2358 |
+
{
|
| 2359 |
+
"tag": "gammamon",
|
| 2360 |
+
"count": 1023,
|
| 2361 |
+
"signal": "both",
|
| 2362 |
+
"assigned_categories": [],
|
| 2363 |
+
"top_fused": [
|
| 2364 |
+
[
|
| 2365 |
+
"franchise_series",
|
| 2366 |
+
0.4428
|
| 2367 |
+
],
|
| 2368 |
+
[
|
| 2369 |
+
"body_type",
|
| 2370 |
+
0.0619
|
| 2371 |
+
],
|
| 2372 |
+
[
|
| 2373 |
+
"gender",
|
| 2374 |
+
0.0359
|
| 2375 |
+
]
|
| 2376 |
+
],
|
| 2377 |
+
"top_tfidf": [
|
| 2378 |
+
[
|
| 2379 |
+
"body_type",
|
| 2380 |
+
0.1032
|
| 2381 |
+
],
|
| 2382 |
+
[
|
| 2383 |
+
"franchise_series",
|
| 2384 |
+
0.0714
|
| 2385 |
+
],
|
| 2386 |
+
[
|
| 2387 |
+
"gender",
|
| 2388 |
+
0.0598
|
| 2389 |
+
]
|
| 2390 |
+
],
|
| 2391 |
+
"top_wiki": [
|
| 2392 |
+
[
|
| 2393 |
+
"franchise_series",
|
| 2394 |
+
1.0
|
| 2395 |
+
],
|
| 2396 |
+
[
|
| 2397 |
+
"text",
|
| 2398 |
+
0.0
|
| 2399 |
+
],
|
| 2400 |
+
[
|
| 2401 |
+
"background_composition",
|
| 2402 |
+
0.0
|
| 2403 |
+
]
|
| 2404 |
+
],
|
| 2405 |
+
"wiki_vote_count": 2,
|
| 2406 |
+
"wiki_link_count": 13
|
| 2407 |
+
},
|
| 2408 |
+
{
|
| 2409 |
+
"tag": "zazush-una",
|
| 2410 |
+
"count": 971,
|
| 2411 |
+
"signal": "none",
|
| 2412 |
+
"assigned_categories": [],
|
| 2413 |
+
"top_fused": [
|
| 2414 |
+
[
|
| 2415 |
+
"text",
|
| 2416 |
+
0.0
|
| 2417 |
+
],
|
| 2418 |
+
[
|
| 2419 |
+
"franchise_series",
|
| 2420 |
+
0.0
|
| 2421 |
+
],
|
| 2422 |
+
[
|
| 2423 |
+
"background_composition",
|
| 2424 |
+
0.0
|
| 2425 |
+
]
|
| 2426 |
+
],
|
| 2427 |
+
"top_tfidf": [],
|
| 2428 |
+
"top_wiki": [],
|
| 2429 |
+
"wiki_vote_count": 0,
|
| 2430 |
+
"wiki_link_count": 0
|
| 2431 |
+
},
|
| 2432 |
+
{
|
| 2433 |
+
"tag": "radio",
|
| 2434 |
+
"count": 842,
|
| 2435 |
+
"signal": "tfidf_only",
|
| 2436 |
+
"assigned_categories": [],
|
| 2437 |
+
"top_fused": [
|
| 2438 |
+
[
|
| 2439 |
+
"objects_props",
|
| 2440 |
+
0.2006
|
| 2441 |
+
],
|
| 2442 |
+
[
|
| 2443 |
+
"pose_action_detail",
|
| 2444 |
+
0.1172
|
| 2445 |
+
],
|
| 2446 |
+
[
|
| 2447 |
+
"background_composition",
|
| 2448 |
+
0.0925
|
| 2449 |
+
]
|
| 2450 |
+
],
|
| 2451 |
+
"top_tfidf": [
|
| 2452 |
+
[
|
| 2453 |
+
"objects_props",
|
| 2454 |
+
0.2006
|
| 2455 |
+
],
|
| 2456 |
+
[
|
| 2457 |
+
"pose_action_detail",
|
| 2458 |
+
0.1172
|
| 2459 |
+
],
|
| 2460 |
+
[
|
| 2461 |
+
"background_composition",
|
| 2462 |
+
0.0925
|
| 2463 |
+
]
|
| 2464 |
+
],
|
| 2465 |
+
"top_wiki": [],
|
| 2466 |
+
"wiki_vote_count": 0,
|
| 2467 |
+
"wiki_link_count": 5
|
| 2468 |
+
},
|
| 2469 |
+
{
|
| 2470 |
+
"tag": "by_bambii_dog",
|
| 2471 |
+
"count": 811,
|
| 2472 |
+
"signal": "none",
|
| 2473 |
+
"assigned_categories": [],
|
| 2474 |
+
"top_fused": [
|
| 2475 |
+
[
|
| 2476 |
+
"text",
|
| 2477 |
+
0.0
|
| 2478 |
+
],
|
| 2479 |
+
[
|
| 2480 |
+
"franchise_series",
|
| 2481 |
+
0.0
|
| 2482 |
+
],
|
| 2483 |
+
[
|
| 2484 |
+
"background_composition",
|
| 2485 |
+
0.0
|
| 2486 |
+
]
|
| 2487 |
+
],
|
| 2488 |
+
"top_tfidf": [],
|
| 2489 |
+
"top_wiki": [],
|
| 2490 |
+
"wiki_vote_count": 0,
|
| 2491 |
+
"wiki_link_count": 0
|
| 2492 |
+
},
|
| 2493 |
+
{
|
| 2494 |
+
"tag": "cabin",
|
| 2495 |
+
"count": 694,
|
| 2496 |
+
"signal": "tfidf_only",
|
| 2497 |
+
"assigned_categories": [],
|
| 2498 |
+
"top_fused": [
|
| 2499 |
+
[
|
| 2500 |
+
"style",
|
| 2501 |
+
0.0748
|
| 2502 |
+
],
|
| 2503 |
+
[
|
| 2504 |
+
"organization",
|
| 2505 |
+
0.074
|
| 2506 |
+
],
|
| 2507 |
+
[
|
| 2508 |
+
"background_composition",
|
| 2509 |
+
0.0733
|
| 2510 |
+
]
|
| 2511 |
+
],
|
| 2512 |
+
"top_tfidf": [
|
| 2513 |
+
[
|
| 2514 |
+
"style",
|
| 2515 |
+
0.0748
|
| 2516 |
+
],
|
| 2517 |
+
[
|
| 2518 |
+
"organization",
|
| 2519 |
+
0.074
|
| 2520 |
+
],
|
| 2521 |
+
[
|
| 2522 |
+
"background_composition",
|
| 2523 |
+
0.0733
|
| 2524 |
+
]
|
| 2525 |
+
],
|
| 2526 |
+
"top_wiki": [],
|
| 2527 |
+
"wiki_vote_count": 0,
|
| 2528 |
+
"wiki_link_count": 0
|
| 2529 |
+
},
|
| 2530 |
+
{
|
| 2531 |
+
"tag": "by_luckypan",
|
| 2532 |
+
"count": 681,
|
| 2533 |
+
"signal": "none",
|
| 2534 |
+
"assigned_categories": [],
|
| 2535 |
+
"top_fused": [
|
| 2536 |
+
[
|
| 2537 |
+
"text",
|
| 2538 |
+
0.0
|
| 2539 |
+
],
|
| 2540 |
+
[
|
| 2541 |
+
"franchise_series",
|
| 2542 |
+
0.0
|
| 2543 |
+
],
|
| 2544 |
+
[
|
| 2545 |
+
"background_composition",
|
| 2546 |
+
0.0
|
| 2547 |
+
]
|
| 2548 |
+
],
|
| 2549 |
+
"top_tfidf": [],
|
| 2550 |
+
"top_wiki": [],
|
| 2551 |
+
"wiki_vote_count": 0,
|
| 2552 |
+
"wiki_link_count": 0
|
| 2553 |
+
},
|
| 2554 |
+
{
|
| 2555 |
+
"tag": "silverstream_(mlp)",
|
| 2556 |
+
"count": 635,
|
| 2557 |
+
"signal": "tfidf_only",
|
| 2558 |
+
"assigned_categories": [],
|
| 2559 |
+
"top_fused": [
|
| 2560 |
+
[
|
| 2561 |
+
"body_type",
|
| 2562 |
+
0.37
|
| 2563 |
+
],
|
| 2564 |
+
[
|
| 2565 |
+
"resolution",
|
| 2566 |
+
0.141
|
| 2567 |
+
],
|
| 2568 |
+
[
|
| 2569 |
+
"count",
|
| 2570 |
+
0.0959
|
| 2571 |
+
]
|
| 2572 |
+
],
|
| 2573 |
+
"top_tfidf": [
|
| 2574 |
+
[
|
| 2575 |
+
"body_type",
|
| 2576 |
+
0.37
|
| 2577 |
+
],
|
| 2578 |
+
[
|
| 2579 |
+
"resolution",
|
| 2580 |
+
0.141
|
| 2581 |
+
],
|
| 2582 |
+
[
|
| 2583 |
+
"count",
|
| 2584 |
+
0.0959
|
| 2585 |
+
]
|
| 2586 |
+
],
|
| 2587 |
+
"top_wiki": [],
|
| 2588 |
+
"wiki_vote_count": 0,
|
| 2589 |
+
"wiki_link_count": 13
|
| 2590 |
+
},
|
| 2591 |
+
{
|
| 2592 |
+
"tag": "by_angrboda",
|
| 2593 |
+
"count": 622,
|
| 2594 |
+
"signal": "none",
|
| 2595 |
+
"assigned_categories": [],
|
| 2596 |
+
"top_fused": [
|
| 2597 |
+
[
|
| 2598 |
+
"text",
|
| 2599 |
+
0.0
|
| 2600 |
+
],
|
| 2601 |
+
[
|
| 2602 |
+
"franchise_series",
|
| 2603 |
+
0.0
|
| 2604 |
+
],
|
| 2605 |
+
[
|
| 2606 |
+
"background_composition",
|
| 2607 |
+
0.0
|
| 2608 |
+
]
|
| 2609 |
+
],
|
| 2610 |
+
"top_tfidf": [],
|
| 2611 |
+
"top_wiki": [],
|
| 2612 |
+
"wiki_vote_count": 0,
|
| 2613 |
+
"wiki_link_count": 0
|
| 2614 |
+
},
|
| 2615 |
+
{
|
| 2616 |
+
"tag": "glistening_arms",
|
| 2617 |
+
"count": 599,
|
| 2618 |
+
"signal": "tfidf_only",
|
| 2619 |
+
"assigned_categories": [],
|
| 2620 |
+
"top_fused": [
|
| 2621 |
+
[
|
| 2622 |
+
"color_markings",
|
| 2623 |
+
0.3229
|
| 2624 |
+
],
|
| 2625 |
+
[
|
| 2626 |
+
"gaze_detail",
|
| 2627 |
+
0.196
|
| 2628 |
+
],
|
| 2629 |
+
[
|
| 2630 |
+
"anatomy_features",
|
| 2631 |
+
0.1557
|
| 2632 |
+
]
|
| 2633 |
+
],
|
| 2634 |
+
"top_tfidf": [
|
| 2635 |
+
[
|
| 2636 |
+
"color_markings",
|
| 2637 |
+
0.3229
|
| 2638 |
+
],
|
| 2639 |
+
[
|
| 2640 |
+
"gaze_detail",
|
| 2641 |
+
0.196
|
| 2642 |
+
],
|
| 2643 |
+
[
|
| 2644 |
+
"anatomy_features",
|
| 2645 |
+
0.1557
|
| 2646 |
+
]
|
| 2647 |
+
],
|
| 2648 |
+
"top_wiki": [],
|
| 2649 |
+
"wiki_vote_count": 0,
|
| 2650 |
+
"wiki_link_count": 0
|
| 2651 |
+
},
|
| 2652 |
+
{
|
| 2653 |
+
"tag": "by_evilymasterful",
|
| 2654 |
+
"count": 571,
|
| 2655 |
+
"signal": "none",
|
| 2656 |
+
"assigned_categories": [],
|
| 2657 |
+
"top_fused": [
|
| 2658 |
+
[
|
| 2659 |
+
"text",
|
| 2660 |
+
0.0
|
| 2661 |
+
],
|
| 2662 |
+
[
|
| 2663 |
+
"franchise_series",
|
| 2664 |
+
0.0
|
| 2665 |
+
],
|
| 2666 |
+
[
|
| 2667 |
+
"background_composition",
|
| 2668 |
+
0.0
|
| 2669 |
+
]
|
| 2670 |
+
],
|
| 2671 |
+
"top_tfidf": [],
|
| 2672 |
+
"top_wiki": [],
|
| 2673 |
+
"wiki_vote_count": 0,
|
| 2674 |
+
"wiki_link_count": 0
|
| 2675 |
+
},
|
| 2676 |
+
{
|
| 2677 |
+
"tag": "by_0laffson",
|
| 2678 |
+
"count": 563,
|
| 2679 |
+
"signal": "none",
|
| 2680 |
+
"assigned_categories": [],
|
| 2681 |
+
"top_fused": [
|
| 2682 |
+
[
|
| 2683 |
+
"text",
|
| 2684 |
+
0.0
|
| 2685 |
+
],
|
| 2686 |
+
[
|
| 2687 |
+
"franchise_series",
|
| 2688 |
+
0.0
|
| 2689 |
+
],
|
| 2690 |
+
[
|
| 2691 |
+
"background_composition",
|
| 2692 |
+
0.0
|
| 2693 |
+
]
|
| 2694 |
+
],
|
| 2695 |
+
"top_tfidf": [],
|
| 2696 |
+
"top_wiki": [],
|
| 2697 |
+
"wiki_vote_count": 0,
|
| 2698 |
+
"wiki_link_count": 0
|
| 2699 |
+
},
|
| 2700 |
+
{
|
| 2701 |
+
"tag": "daffy_duck",
|
| 2702 |
+
"count": 562,
|
| 2703 |
+
"signal": "both",
|
| 2704 |
+
"assigned_categories": [],
|
| 2705 |
+
"top_fused": [
|
| 2706 |
+
[
|
| 2707 |
+
"body_type",
|
| 2708 |
+
0.4204
|
| 2709 |
+
],
|
| 2710 |
+
[
|
| 2711 |
+
"objects_props",
|
| 2712 |
+
0.0542
|
| 2713 |
+
],
|
| 2714 |
+
[
|
| 2715 |
+
"species",
|
| 2716 |
+
0.0486
|
| 2717 |
+
]
|
| 2718 |
+
],
|
| 2719 |
+
"top_tfidf": [
|
| 2720 |
+
[
|
| 2721 |
+
"objects_props",
|
| 2722 |
+
0.0904
|
| 2723 |
+
],
|
| 2724 |
+
[
|
| 2725 |
+
"species",
|
| 2726 |
+
0.081
|
| 2727 |
+
],
|
| 2728 |
+
[
|
| 2729 |
+
"pose_action_detail",
|
| 2730 |
+
0.0674
|
| 2731 |
+
]
|
| 2732 |
+
],
|
| 2733 |
+
"top_wiki": [
|
| 2734 |
+
[
|
| 2735 |
+
"body_type",
|
| 2736 |
+
1.0
|
| 2737 |
+
],
|
| 2738 |
+
[
|
| 2739 |
+
"text",
|
| 2740 |
+
0.0
|
| 2741 |
+
],
|
| 2742 |
+
[
|
| 2743 |
+
"franchise_series",
|
| 2744 |
+
0.0
|
| 2745 |
+
]
|
| 2746 |
+
],
|
| 2747 |
+
"wiki_vote_count": 1,
|
| 2748 |
+
"wiki_link_count": 6
|
| 2749 |
+
}
|
| 2750 |
+
]
|
| 2751 |
+
}
|
| 2752 |
+
}
|
| 2753 |
+
}
|
data/runtime_metrics/ui_pipeline_timings.jsonl
CHANGED
|
@@ -1,3 +1,16 @@
|
|
| 1 |
{"timestamp_utc": "2026-03-02T12:44:26Z", "stages_s": {"preprocess": 7.90999984019436e-05, "rewrite": 1.9136111999978311, "structural": 1.0946640000038315, "probe": 0.5859509000001708, "retrieval": 4.595289600001706, "selection": 37.53351300000213, "implication_expansion": 0.15133090000017546, "prompt_composition": 6.299999949987978e-05, "group_display": 0.04701460000069346}, "total_s": 45.927563900004316, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 2 |
{"timestamp_utc": "2026-03-02T16:08:08Z", "stages_s": {"preprocess": 6.989999383222312e-05, "rewrite": 3.0064916999981506, "structural": 4.2000028770416975e-06, "probe": 3.01228209999681, "retrieval": 3.3860946000058902, "selection": 5.285027000005357, "implication_expansion": 0.147530000002007, "prompt_composition": 3.850000211969018e-05, "group_display": 0.10624819999793544}, "total_s": 14.949083599989535, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 3 |
{"timestamp_utc": "2026-03-02T16:08:37Z", "stages_s": {"preprocess": 7.179999374784529e-05, "rewrite": 4.608368299988797, "structural": 3.6999990697950125e-06, "probe": 1.5999976312741637e-06, "retrieval": 3.4574174999870593, "selection": 8.8562099999981, "implication_expansion": 0.14937499999359716, "prompt_composition": 3.650000144261867e-05, "group_display": 0.04632819999824278}, "total_s": 17.122792900001514, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
{"timestamp_utc": "2026-03-02T12:44:26Z", "stages_s": {"preprocess": 7.90999984019436e-05, "rewrite": 1.9136111999978311, "structural": 1.0946640000038315, "probe": 0.5859509000001708, "retrieval": 4.595289600001706, "selection": 37.53351300000213, "implication_expansion": 0.15133090000017546, "prompt_composition": 6.299999949987978e-05, "group_display": 0.04701460000069346}, "total_s": 45.927563900004316, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 2 |
{"timestamp_utc": "2026-03-02T16:08:08Z", "stages_s": {"preprocess": 6.989999383222312e-05, "rewrite": 3.0064916999981506, "structural": 4.2000028770416975e-06, "probe": 3.01228209999681, "retrieval": 3.3860946000058902, "selection": 5.285027000005357, "implication_expansion": 0.147530000002007, "prompt_composition": 3.850000211969018e-05, "group_display": 0.10624819999793544}, "total_s": 14.949083599989535, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 3 |
{"timestamp_utc": "2026-03-02T16:08:37Z", "stages_s": {"preprocess": 7.179999374784529e-05, "rewrite": 4.608368299988797, "structural": 3.6999990697950125e-06, "probe": 1.5999976312741637e-06, "retrieval": 3.4574174999870593, "selection": 8.8562099999981, "implication_expansion": 0.14937499999359716, "prompt_composition": 3.650000144261867e-05, "group_display": 0.04632819999824278}, "total_s": 17.122792900001514, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 4 |
+
{"timestamp_utc": "2026-03-06T21:33:29Z", "stages_s": {"preprocess": 9.789998875930905e-05, "rewrite": 7.193461999995634, "structural": 3.3999676816165447e-06, "probe": 0.9721586999949068, "retrieval": 2.3267829000251368, "selection": 1.0979214000399224, "implication_expansion": 0.2668229000410065, "prompt_composition": 3.819999983534217e-05, "group_display": 0.08292249997612089}, "total_s": 11.945365399995353, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 5 |
+
{"timestamp_utc": "2026-03-06T21:38:28Z", "stages_s": {"preprocess": 1.9300030544400215e-05, "rewrite": 1.5391526999883354, "structural": 0.5504020000225864, "probe": 0.2567070999648422, "retrieval": 0.5546861999901012, "selection": 10.549223000009079, "implication_expansion": 3.8300000596791506e-05, "prompt_composition": 3.0399998649954796e-05, "group_display": 0.025673600030131638}, "total_s": 13.487254999985453, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 6 |
+
{"timestamp_utc": "2026-03-06T22:33:39Z", "stages_s": {"preprocess": 0.00016080000204965472, "rewrite": 1.9639222000259906, "structural": 0.7869719000300393, "probe": 0.503746600006707, "retrieval": 2.3870767999906093, "selection": 1.7242823000415228, "implication_expansion": 0.2691484999959357, "prompt_composition": 4.0899962186813354e-05, "group_display": 0.07882960001006722}, "total_s": 7.719753100012895, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 7 |
+
{"timestamp_utc": "2026-03-06T22:44:28Z", "stages_s": {"preprocess": 7.639999967068434e-05, "rewrite": 2.190264799981378, "structural": 0.5781105000060052, "probe": 0.23918199999025092, "retrieval": 2.4492038000025786, "selection": 0.4502618000260554, "implication_expansion": 0.1491194000118412, "prompt_composition": 3.100000321865082e-05, "group_display": 0.08213570003863424}, "total_s": 6.144094199989922, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 8 |
+
{"timestamp_utc": "2026-03-06T22:45:12Z", "stages_s": {"preprocess": 7.840001489967108e-05, "rewrite": 3.431444200046826, "structural": 3.400025889277458e-06, "probe": 1.3075993999955244, "retrieval": 2.425993000040762, "selection": 6.9358377999742515, "implication_expansion": 0.14671080000698566, "prompt_composition": 3.8300000596791506e-05, "group_display": 0.0784414000227116}, "total_s": 14.331572700000834, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 9 |
+
{"timestamp_utc": "2026-03-07T01:26:11Z", "stages_s": {"preprocess": 0.00019039999460801482, "rewrite": 1.972552299965173, "structural": 0.45487100002355874, "probe": 0.5801937999785878, "retrieval": 2.885647799994331, "selection": 2.3159614999894984, "implication_expansion": 0.27620089997071773, "prompt_composition": 3.2800016924738884e-05, "group_display": 0.07873340003425255}, "total_s": 8.597678899997845, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 10 |
+
{"timestamp_utc": "2026-03-07T01:37:49Z", "stages_s": {"preprocess": 0.00017070001922547817, "rewrite": 2.3397521000006236, "structural": 0.2748573999851942, "probe": 0.9656308999983594, "retrieval": 2.379494299995713, "selection": 1.9972827999736182, "implication_expansion": 0.26782700000330806, "prompt_composition": 2.889998722821474e-05, "group_display": 0.0790697000338696}, "total_s": 8.337543100002222, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 11 |
+
{"timestamp_utc": "2026-03-07T02:36:05Z", "stages_s": {"preprocess": 0.00019479996990412474, "rewrite": 2.3221199000254273, "structural": 0.8951652999967337, "probe": 0.9059996000141837, "retrieval": 7.6194937999825925, "selection": 10.099894999992102, "implication_expansion": 0.27516779996221885, "prompt_composition": 3.519997699186206e-05, "group_display": 0.08530839998275042}, "total_s": 22.24463780003134, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 12 |
+
{"timestamp_utc": "2026-03-07T02:37:16Z", "stages_s": {"preprocess": 2.0799983758479357e-05, "rewrite": 4.862703899969347, "structural": 3.8000289350748062e-06, "probe": 2.00001522898674e-06, "retrieval": 0.49216449999948964, "selection": 7.8598584000137635, "implication_expansion": 2.9799994081258774e-05, "prompt_composition": 2.4500011932104826e-05, "group_display": 0.03247090004151687}, "total_s": 13.258490999985952, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 13 |
+
{"timestamp_utc": "2026-03-07T02:59:49Z", "stages_s": {"preprocess": 8.230004459619522e-05, "rewrite": 1.6606152999447659, "structural": 0.6319172999938019, "probe": 0.32008590002078563, "retrieval": 2.676332700008061, "selection": 2.007969399972353, "implication_expansion": 0.26375650003319606, "prompt_composition": 3.090000245720148e-05, "group_display": 0.08008919999701902}, "total_s": 7.673184300016146, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 14 |
+
{"timestamp_utc": "2026-03-07T03:01:39Z", "stages_s": {"preprocess": 1.6900012269616127e-05, "rewrite": 1.713694000034593, "structural": 5.799985956400633e-06, "probe": 0.049874700023792684, "retrieval": 0.35970670002279803, "selection": 0.9267913000076078, "implication_expansion": 3.909994848072529e-05, "prompt_composition": 3.7299992982298136e-05, "group_display": 0.026757099956739694}, "total_s": 3.089661000005435, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 15 |
+
{"timestamp_utc": "2026-03-07T03:09:53Z", "stages_s": {"preprocess": 0.00012510002125054598, "rewrite": 2.249713899975177, "structural": 0.5107482000021264, "probe": 3.300025127828121e-06, "retrieval": 2.3757353999535553, "selection": 2.9089593999669887, "implication_expansion": 0.2682994999922812, "prompt_composition": 3.070000093430281e-05, "group_display": 0.07982710003852844}, "total_s": 8.42714020004496, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
| 16 |
+
{"timestamp_utc": "2026-03-07T03:37:54Z", "stages_s": {"preprocess": 0.00011760002234950662, "rewrite": 1.968222199997399, "structural": 1.1845426999498159, "probe": 2.214354399999138, "retrieval": 2.452574900002219, "selection": 0.8585481999907643, "implication_expansion": 0.27041040000040084, "prompt_composition": 3.319996176287532e-05, "group_display": 0.07736879994627088}, "total_s": 9.059251800004859, "config": {"timeout_rewrite_s": 45.0, "timeout_struct_s": 45.0, "timeout_probe_s": 45.0, "timeout_select_s": 45.0}}
|
scripts/analyze_hybrid_category_assignment.py
ADDED
|
@@ -0,0 +1,502 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Analyze hybrid category assignment for uncategorized tags.
|
| 2 |
+
|
| 3 |
+
Implements an offline analysis pipeline (no registry mutation):
|
| 4 |
+
1) TF-IDF centroid scoring over current active categories.
|
| 5 |
+
2) Wiki-link graph scoring from raw wiki pages.
|
| 6 |
+
3) Weighted fusion of TF-IDF and wiki signals.
|
| 7 |
+
4) Optional multi-category auto-assignment when top-2 fused probabilities are strong.
|
| 8 |
+
|
| 9 |
+
Outputs:
|
| 10 |
+
- data/analysis/hybrid_category_assignment_preview.json (default; overwritten)
|
| 11 |
+
"""
|
| 12 |
+
from __future__ import annotations
|
| 13 |
+
|
| 14 |
+
import argparse
|
| 15 |
+
import csv
|
| 16 |
+
import json
|
| 17 |
+
import random
|
| 18 |
+
import re
|
| 19 |
+
import sys
|
| 20 |
+
from collections import Counter, defaultdict
|
| 21 |
+
from dataclasses import dataclass
|
| 22 |
+
from pathlib import Path
|
| 23 |
+
from typing import Dict, List, Sequence, Set, Tuple
|
| 24 |
+
|
| 25 |
+
import numpy as np
|
| 26 |
+
|
| 27 |
+
REPO = Path(__file__).resolve().parents[1]
|
| 28 |
+
if str(REPO) not in sys.path:
|
| 29 |
+
sys.path.insert(0, str(REPO))
|
| 30 |
+
|
| 31 |
+
from psq_rag.retrieval.state import get_tfidf_tag_vectors
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
REGISTRY_CSV = REPO / "data" / "analysis" / "category_registry.csv"
|
| 35 |
+
WIKI_PAGES_CSV = REPO / "wiki_pages-2023-08-08.csv"
|
| 36 |
+
OUT_JSON = REPO / "data" / "analysis" / "hybrid_category_assignment_preview.json"
|
| 37 |
+
|
| 38 |
+
SKIP_STATUSES = {"excluded", "review_bucket", "special_exclusion"}
|
| 39 |
+
|
| 40 |
+
LINK_PIPE_RE = re.compile(r"\[\[([^\]|]+)\|[^\]]+\]\]")
|
| 41 |
+
LINK_PLAIN_RE = re.compile(r"\[\[([^\]|]+)\]\]")
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
@dataclass
|
| 45 |
+
class TagScoreRow:
|
| 46 |
+
tag: str
|
| 47 |
+
count: int
|
| 48 |
+
signal: str
|
| 49 |
+
assignment: str
|
| 50 |
+
assigned_categories: List[str]
|
| 51 |
+
top_fused: List[Tuple[str, float]]
|
| 52 |
+
top_tfidf: List[Tuple[str, float]]
|
| 53 |
+
top_wiki: List[Tuple[str, float]]
|
| 54 |
+
wiki_vote_count: int
|
| 55 |
+
wiki_link_count: int
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def _normalize_tag(tok: str) -> str:
|
| 59 |
+
return (tok or "").strip().lower().replace(" ", "_")
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def _is_enabled(v: str) -> bool:
|
| 63 |
+
return str(v).strip().lower() in {"1", "true", "yes"}
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def _softmax(values: np.ndarray, temperature: float) -> np.ndarray:
|
| 67 |
+
if values.size == 0:
|
| 68 |
+
return values
|
| 69 |
+
t = max(1e-6, float(temperature))
|
| 70 |
+
shifted = (values - float(np.max(values))) / t
|
| 71 |
+
expv = np.exp(shifted)
|
| 72 |
+
denom = float(np.sum(expv))
|
| 73 |
+
if denom <= 0.0:
|
| 74 |
+
return np.zeros_like(values, dtype=np.float32)
|
| 75 |
+
return (expv / denom).astype(np.float32)
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def _topk_with_names(names: Sequence[str], arr: np.ndarray, k: int) -> List[Tuple[str, float]]:
|
| 79 |
+
if arr.size == 0:
|
| 80 |
+
return []
|
| 81 |
+
order = np.argsort(arr)[::-1][: max(1, int(k))]
|
| 82 |
+
return [(names[int(i)], float(arr[int(i)])) for i in order]
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
def load_registry(
|
| 86 |
+
path: Path,
|
| 87 |
+
) -> Tuple[Dict[str, Set[str]], Dict[str, Set[str]], Dict[str, int], Set[str]]:
|
| 88 |
+
"""Return (active_category_to_tags, tag_to_active_categories, tag_counts, uncategorized_tags)."""
|
| 89 |
+
active_category_tags: Dict[str, Set[str]] = defaultdict(set)
|
| 90 |
+
tag_to_active_categories: Dict[str, Set[str]] = defaultdict(set)
|
| 91 |
+
tag_counts: Dict[str, int] = {}
|
| 92 |
+
uncategorized: Set[str] = set()
|
| 93 |
+
|
| 94 |
+
with path.open("r", encoding="utf-8", newline="") as f:
|
| 95 |
+
reader = csv.DictReader(f)
|
| 96 |
+
for row in reader:
|
| 97 |
+
tag = _normalize_tag(row.get("tag") or "")
|
| 98 |
+
category = (row.get("category_name") or "").strip()
|
| 99 |
+
status = (row.get("category_status") or "").strip().lower()
|
| 100 |
+
enabled = _is_enabled(row.get("category_enabled") or "")
|
| 101 |
+
if not tag or not category:
|
| 102 |
+
continue
|
| 103 |
+
|
| 104 |
+
try:
|
| 105 |
+
cnt = int(float(row.get("tag_fluffyrock_count") or "0"))
|
| 106 |
+
except Exception:
|
| 107 |
+
cnt = 0
|
| 108 |
+
if tag not in tag_counts or cnt > tag_counts[tag]:
|
| 109 |
+
tag_counts[tag] = cnt
|
| 110 |
+
|
| 111 |
+
if category == "uncategorized_review":
|
| 112 |
+
uncategorized.add(tag)
|
| 113 |
+
continue
|
| 114 |
+
|
| 115 |
+
if status in SKIP_STATUSES or not enabled:
|
| 116 |
+
continue
|
| 117 |
+
|
| 118 |
+
active_category_tags[category].add(tag)
|
| 119 |
+
tag_to_active_categories[tag].add(category)
|
| 120 |
+
|
| 121 |
+
return active_category_tags, tag_to_active_categories, tag_counts, uncategorized
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
def build_centroids(
|
| 125 |
+
active_category_tags: Dict[str, Set[str]],
|
| 126 |
+
tag_to_row: Dict[str, int],
|
| 127 |
+
vectors_norm: np.ndarray,
|
| 128 |
+
) -> Tuple[List[str], np.ndarray, Dict[str, int]]:
|
| 129 |
+
categories: List[str] = []
|
| 130 |
+
centroids: List[np.ndarray] = []
|
| 131 |
+
seed_sizes: Dict[str, int] = {}
|
| 132 |
+
|
| 133 |
+
for category in sorted(active_category_tags.keys()):
|
| 134 |
+
seeds = active_category_tags[category]
|
| 135 |
+
idxs = [tag_to_row[t] for t in seeds if t in tag_to_row]
|
| 136 |
+
if len(idxs) < 2:
|
| 137 |
+
continue
|
| 138 |
+
mat = vectors_norm[idxs]
|
| 139 |
+
c = mat.mean(axis=0)
|
| 140 |
+
n = float(np.linalg.norm(c))
|
| 141 |
+
if n <= 1e-12:
|
| 142 |
+
continue
|
| 143 |
+
categories.append(category)
|
| 144 |
+
centroids.append((c / n).astype(np.float32))
|
| 145 |
+
seed_sizes[category] = len(idxs)
|
| 146 |
+
|
| 147 |
+
if not categories:
|
| 148 |
+
return [], np.zeros((0, 0), dtype=np.float32), {}
|
| 149 |
+
return categories, np.vstack(centroids).astype(np.float32), seed_sizes
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
def _extract_links_from_body(body: str) -> List[str]:
|
| 153 |
+
links: List[str] = []
|
| 154 |
+
for tok in LINK_PIPE_RE.findall(body):
|
| 155 |
+
tag = _normalize_tag(tok)
|
| 156 |
+
if not tag or tag.startswith(("http://", "https://", "help:", "e621:", "tag_group:", "#")):
|
| 157 |
+
continue
|
| 158 |
+
links.append(tag)
|
| 159 |
+
for tok in LINK_PLAIN_RE.findall(body):
|
| 160 |
+
tag = _normalize_tag(tok)
|
| 161 |
+
if not tag or tag.startswith(("http://", "https://", "help:", "e621:", "tag_group:", "#")):
|
| 162 |
+
continue
|
| 163 |
+
links.append(tag)
|
| 164 |
+
|
| 165 |
+
seen: Set[str] = set()
|
| 166 |
+
deduped: List[str] = []
|
| 167 |
+
for tag in links:
|
| 168 |
+
if tag in seen:
|
| 169 |
+
continue
|
| 170 |
+
seen.add(tag)
|
| 171 |
+
deduped.append(tag)
|
| 172 |
+
return deduped
|
| 173 |
+
|
| 174 |
+
|
| 175 |
+
def build_wiki_votes(
|
| 176 |
+
wiki_csv: Path,
|
| 177 |
+
uncategorized_tags: Set[str],
|
| 178 |
+
tag_to_active_categories: Dict[str, Set[str]],
|
| 179 |
+
) -> Tuple[Dict[str, Counter], Set[str], Dict[str, int]]:
|
| 180 |
+
"""Return (wiki_votes_by_tag, tags_with_wiki_page, wiki_link_count_by_tag)."""
|
| 181 |
+
wiki_votes: Dict[str, Counter] = {}
|
| 182 |
+
has_page: Set[str] = set()
|
| 183 |
+
link_counts: Dict[str, int] = {}
|
| 184 |
+
|
| 185 |
+
with wiki_csv.open("r", encoding="utf-8", newline="") as f:
|
| 186 |
+
reader = csv.DictReader(f)
|
| 187 |
+
for row in reader:
|
| 188 |
+
title = _normalize_tag(row.get("title") or "")
|
| 189 |
+
if title not in uncategorized_tags:
|
| 190 |
+
continue
|
| 191 |
+
has_page.add(title)
|
| 192 |
+
body = row.get("body") or ""
|
| 193 |
+
links = _extract_links_from_body(body)
|
| 194 |
+
link_counts[title] = len(links)
|
| 195 |
+
if not links:
|
| 196 |
+
continue
|
| 197 |
+
votes = Counter()
|
| 198 |
+
for linked_tag in links:
|
| 199 |
+
cats = tag_to_active_categories.get(linked_tag)
|
| 200 |
+
if not cats:
|
| 201 |
+
continue
|
| 202 |
+
for c in cats:
|
| 203 |
+
votes[c] += 1
|
| 204 |
+
if votes:
|
| 205 |
+
wiki_votes[title] = votes
|
| 206 |
+
return wiki_votes, has_page, link_counts
|
| 207 |
+
|
| 208 |
+
|
| 209 |
+
def score_tags(
|
| 210 |
+
*,
|
| 211 |
+
uncategorized_tags: Set[str],
|
| 212 |
+
tag_counts: Dict[str, int],
|
| 213 |
+
categories: List[str],
|
| 214 |
+
centroid_matrix: np.ndarray,
|
| 215 |
+
tag_to_row: Dict[str, int],
|
| 216 |
+
vectors_norm: np.ndarray,
|
| 217 |
+
wiki_votes: Dict[str, Counter],
|
| 218 |
+
wiki_link_counts: Dict[str, int],
|
| 219 |
+
tfidf_weight: float,
|
| 220 |
+
wiki_weight: float,
|
| 221 |
+
tfidf_temp: float,
|
| 222 |
+
single_top1_min: float,
|
| 223 |
+
single_margin_min: float,
|
| 224 |
+
single_top2_max: float,
|
| 225 |
+
multi_top1_min: float,
|
| 226 |
+
multi_top2_min: float,
|
| 227 |
+
multi_pair_min: float,
|
| 228 |
+
) -> List[TagScoreRow]:
|
| 229 |
+
out: List[TagScoreRow] = []
|
| 230 |
+
cat_to_idx = {c: i for i, c in enumerate(categories)}
|
| 231 |
+
tfidf_w = max(0.0, float(tfidf_weight))
|
| 232 |
+
wiki_w = max(0.0, float(wiki_weight))
|
| 233 |
+
|
| 234 |
+
for tag in sorted(uncategorized_tags):
|
| 235 |
+
count = int(tag_counts.get(tag, 0))
|
| 236 |
+
tfidf_arr = None
|
| 237 |
+
wiki_arr = None
|
| 238 |
+
wiki_vote_count = 0
|
| 239 |
+
|
| 240 |
+
row_idx = tag_to_row.get(tag)
|
| 241 |
+
if row_idx is not None and centroid_matrix.size > 0:
|
| 242 |
+
sims = centroid_matrix @ vectors_norm[int(row_idx)]
|
| 243 |
+
tfidf_arr = _softmax(sims.astype(np.float32), temperature=tfidf_temp)
|
| 244 |
+
|
| 245 |
+
votes = wiki_votes.get(tag)
|
| 246 |
+
if votes:
|
| 247 |
+
wiki_vote_count = int(sum(votes.values()))
|
| 248 |
+
wiki_arr = np.zeros(len(categories), dtype=np.float32)
|
| 249 |
+
for c, n in votes.items():
|
| 250 |
+
idx = cat_to_idx.get(c)
|
| 251 |
+
if idx is not None:
|
| 252 |
+
wiki_arr[idx] += float(n)
|
| 253 |
+
s = float(np.sum(wiki_arr))
|
| 254 |
+
if s > 0.0:
|
| 255 |
+
wiki_arr /= s
|
| 256 |
+
else:
|
| 257 |
+
wiki_arr = None
|
| 258 |
+
|
| 259 |
+
if tfidf_arr is not None and wiki_arr is not None:
|
| 260 |
+
fused = tfidf_w * tfidf_arr + wiki_w * wiki_arr
|
| 261 |
+
denom = float(np.sum(fused))
|
| 262 |
+
if denom > 0.0:
|
| 263 |
+
fused /= denom
|
| 264 |
+
signal = "both"
|
| 265 |
+
elif tfidf_arr is not None:
|
| 266 |
+
fused = tfidf_arr
|
| 267 |
+
signal = "tfidf_only"
|
| 268 |
+
elif wiki_arr is not None:
|
| 269 |
+
fused = wiki_arr
|
| 270 |
+
signal = "wiki_only"
|
| 271 |
+
else:
|
| 272 |
+
fused = np.zeros(len(categories), dtype=np.float32)
|
| 273 |
+
signal = "none"
|
| 274 |
+
|
| 275 |
+
top_fused = _topk_with_names(categories, fused, 3)
|
| 276 |
+
top_tfidf = _topk_with_names(categories, tfidf_arr, 3) if tfidf_arr is not None else []
|
| 277 |
+
top_wiki = _topk_with_names(categories, wiki_arr, 3) if wiki_arr is not None else []
|
| 278 |
+
|
| 279 |
+
if len(top_fused) == 0:
|
| 280 |
+
assignment = "hold"
|
| 281 |
+
assigned: List[str] = []
|
| 282 |
+
else:
|
| 283 |
+
c1, p1 = top_fused[0]
|
| 284 |
+
c2, p2 = top_fused[1] if len(top_fused) > 1 else ("", 0.0)
|
| 285 |
+
|
| 286 |
+
if p1 >= multi_top1_min and p2 >= multi_top2_min and (p1 + p2) >= multi_pair_min:
|
| 287 |
+
assignment = "multi"
|
| 288 |
+
assigned = [c1, c2]
|
| 289 |
+
elif p1 >= single_top1_min and ((p1 - p2) >= single_margin_min or p2 <= single_top2_max):
|
| 290 |
+
assignment = "single"
|
| 291 |
+
assigned = [c1]
|
| 292 |
+
else:
|
| 293 |
+
assignment = "hold"
|
| 294 |
+
assigned = []
|
| 295 |
+
|
| 296 |
+
out.append(
|
| 297 |
+
TagScoreRow(
|
| 298 |
+
tag=tag,
|
| 299 |
+
count=count,
|
| 300 |
+
signal=signal,
|
| 301 |
+
assignment=assignment,
|
| 302 |
+
assigned_categories=assigned,
|
| 303 |
+
top_fused=top_fused,
|
| 304 |
+
top_tfidf=top_tfidf,
|
| 305 |
+
top_wiki=top_wiki,
|
| 306 |
+
wiki_vote_count=wiki_vote_count,
|
| 307 |
+
wiki_link_count=int(wiki_link_counts.get(tag, 0)),
|
| 308 |
+
)
|
| 309 |
+
)
|
| 310 |
+
return out
|
| 311 |
+
|
| 312 |
+
|
| 313 |
+
def summarize_rows(
|
| 314 |
+
rows: List[TagScoreRow],
|
| 315 |
+
*,
|
| 316 |
+
n_uncat_total: int,
|
| 317 |
+
n_has_tfidf: int,
|
| 318 |
+
n_wiki_page: int,
|
| 319 |
+
n_wiki_votes: int,
|
| 320 |
+
sample_size: int,
|
| 321 |
+
seed: int,
|
| 322 |
+
) -> Dict[str, object]:
|
| 323 |
+
assign_counts = Counter(r.assignment for r in rows)
|
| 324 |
+
signal_counts = Counter(r.signal for r in rows)
|
| 325 |
+
remaining_uncategorized = int(assign_counts.get("hold", 0))
|
| 326 |
+
newly_categorized = int(assign_counts.get("single", 0) + assign_counts.get("multi", 0))
|
| 327 |
+
multi_category_additions = int(sum(len(r.assigned_categories) for r in rows if r.assignment == "multi"))
|
| 328 |
+
|
| 329 |
+
single_by_category = Counter(
|
| 330 |
+
r.assigned_categories[0] for r in rows if r.assignment == "single" and r.assigned_categories
|
| 331 |
+
)
|
| 332 |
+
multi_pairs = Counter(
|
| 333 |
+
tuple(sorted(r.assigned_categories[:2])) for r in rows if r.assignment == "multi" and len(r.assigned_categories) >= 2
|
| 334 |
+
)
|
| 335 |
+
|
| 336 |
+
rng = random.Random(int(seed))
|
| 337 |
+
|
| 338 |
+
def sample_assignment(kind: str) -> List[Dict[str, object]]:
|
| 339 |
+
pool = [r for r in rows if r.assignment == kind]
|
| 340 |
+
if not pool:
|
| 341 |
+
return []
|
| 342 |
+
n = min(int(sample_size), len(pool))
|
| 343 |
+
picks = rng.sample(pool, n)
|
| 344 |
+
out: List[Dict[str, object]] = []
|
| 345 |
+
for r in sorted(picks, key=lambda x: (-x.count, x.tag)):
|
| 346 |
+
out.append(
|
| 347 |
+
{
|
| 348 |
+
"tag": r.tag,
|
| 349 |
+
"count": r.count,
|
| 350 |
+
"signal": r.signal,
|
| 351 |
+
"assigned_categories": r.assigned_categories,
|
| 352 |
+
"top_fused": [(c, round(p, 4)) for c, p in r.top_fused],
|
| 353 |
+
"top_tfidf": [(c, round(p, 4)) for c, p in r.top_tfidf],
|
| 354 |
+
"top_wiki": [(c, round(p, 4)) for c, p in r.top_wiki],
|
| 355 |
+
"wiki_vote_count": r.wiki_vote_count,
|
| 356 |
+
"wiki_link_count": r.wiki_link_count,
|
| 357 |
+
}
|
| 358 |
+
)
|
| 359 |
+
return out
|
| 360 |
+
|
| 361 |
+
return {
|
| 362 |
+
"counts": {
|
| 363 |
+
"uncategorized_total": int(n_uncat_total),
|
| 364 |
+
"scored_rows": int(len(rows)),
|
| 365 |
+
"has_tfidf_vector": int(n_has_tfidf),
|
| 366 |
+
"has_wiki_page": int(n_wiki_page),
|
| 367 |
+
"has_wiki_category_votes": int(n_wiki_votes),
|
| 368 |
+
"signals": dict(signal_counts),
|
| 369 |
+
"assignments": dict(assign_counts),
|
| 370 |
+
"newly_categorized": newly_categorized,
|
| 371 |
+
"remaining_uncategorized": remaining_uncategorized,
|
| 372 |
+
"multi_category_additions": multi_category_additions,
|
| 373 |
+
},
|
| 374 |
+
"top_single_categories": single_by_category.most_common(20),
|
| 375 |
+
"top_multi_category_pairs": [
|
| 376 |
+
{"categories": list(pair), "count": int(cnt)} for pair, cnt in multi_pairs.most_common(20)
|
| 377 |
+
],
|
| 378 |
+
"samples": {
|
| 379 |
+
"single": sample_assignment("single"),
|
| 380 |
+
"multi": sample_assignment("multi"),
|
| 381 |
+
"hold": sample_assignment("hold"),
|
| 382 |
+
},
|
| 383 |
+
}
|
| 384 |
+
|
| 385 |
+
|
| 386 |
+
def parse_args() -> argparse.Namespace:
|
| 387 |
+
ap = argparse.ArgumentParser(description="Analyze hybrid TF-IDF + wiki category assignment for uncategorized tags.")
|
| 388 |
+
ap.add_argument("--tfidf-weight", type=float, default=0.6, help="Weight for TF-IDF centroid probabilities.")
|
| 389 |
+
ap.add_argument("--wiki-weight", type=float, default=0.4, help="Weight for wiki-link graph probabilities.")
|
| 390 |
+
ap.add_argument("--tfidf-temp", type=float, default=0.08, help="Softmax temperature for TF-IDF similarities.")
|
| 391 |
+
|
| 392 |
+
ap.add_argument("--single-top1-min", type=float, default=0.55, help="Single-assign threshold: top1 probability min.")
|
| 393 |
+
ap.add_argument("--single-margin-min", type=float, default=0.18, help="Single-assign threshold: top1-top2 margin min.")
|
| 394 |
+
ap.add_argument("--single-top2-max", type=float, default=0.35, help="Single-assign threshold: top2 probability max.")
|
| 395 |
+
|
| 396 |
+
ap.add_argument("--multi-top1-min", type=float, default=0.42, help="Multi-assign threshold: top1 probability min.")
|
| 397 |
+
ap.add_argument("--multi-top2-min", type=float, default=0.30, help="Multi-assign threshold: top2 probability min.")
|
| 398 |
+
ap.add_argument("--multi-pair-min", type=float, default=0.78, help="Multi-assign threshold: (top1+top2) min.")
|
| 399 |
+
|
| 400 |
+
ap.add_argument("--sample-size", type=int, default=20, help="Random examples per assignment bucket.")
|
| 401 |
+
ap.add_argument("--seed", type=int, default=42, help="Random seed for examples.")
|
| 402 |
+
ap.add_argument(
|
| 403 |
+
"--out-json",
|
| 404 |
+
type=Path,
|
| 405 |
+
default=OUT_JSON,
|
| 406 |
+
help="Output JSON report (overwritten each run).",
|
| 407 |
+
)
|
| 408 |
+
return ap.parse_args()
|
| 409 |
+
|
| 410 |
+
|
| 411 |
+
def main() -> None:
|
| 412 |
+
args = parse_args()
|
| 413 |
+
|
| 414 |
+
active_category_tags, tag_to_active_categories, tag_counts, uncategorized = load_registry(REGISTRY_CSV)
|
| 415 |
+
vectors = get_tfidf_tag_vectors()
|
| 416 |
+
vectors_norm = vectors["reduced_matrix_norm"]
|
| 417 |
+
tag_to_row = vectors["tag_to_row_index"]
|
| 418 |
+
|
| 419 |
+
categories, centroid_matrix, seed_sizes = build_centroids(active_category_tags, tag_to_row, vectors_norm)
|
| 420 |
+
if not categories:
|
| 421 |
+
raise RuntimeError("No centroids available from active categories. Check category registry content.")
|
| 422 |
+
|
| 423 |
+
wiki_votes, has_wiki_page, wiki_link_counts = build_wiki_votes(
|
| 424 |
+
WIKI_PAGES_CSV,
|
| 425 |
+
uncategorized_tags=uncategorized,
|
| 426 |
+
tag_to_active_categories=tag_to_active_categories,
|
| 427 |
+
)
|
| 428 |
+
|
| 429 |
+
rows = score_tags(
|
| 430 |
+
uncategorized_tags=uncategorized,
|
| 431 |
+
tag_counts=tag_counts,
|
| 432 |
+
categories=categories,
|
| 433 |
+
centroid_matrix=centroid_matrix,
|
| 434 |
+
tag_to_row=tag_to_row,
|
| 435 |
+
vectors_norm=vectors_norm,
|
| 436 |
+
wiki_votes=wiki_votes,
|
| 437 |
+
wiki_link_counts=wiki_link_counts,
|
| 438 |
+
tfidf_weight=args.tfidf_weight,
|
| 439 |
+
wiki_weight=args.wiki_weight,
|
| 440 |
+
tfidf_temp=args.tfidf_temp,
|
| 441 |
+
single_top1_min=args.single_top1_min,
|
| 442 |
+
single_margin_min=args.single_margin_min,
|
| 443 |
+
single_top2_max=args.single_top2_max,
|
| 444 |
+
multi_top1_min=args.multi_top1_min,
|
| 445 |
+
multi_top2_min=args.multi_top2_min,
|
| 446 |
+
multi_pair_min=args.multi_pair_min,
|
| 447 |
+
)
|
| 448 |
+
|
| 449 |
+
n_has_tfidf = sum(1 for t in uncategorized if t in tag_to_row)
|
| 450 |
+
summary = summarize_rows(
|
| 451 |
+
rows,
|
| 452 |
+
n_uncat_total=len(uncategorized),
|
| 453 |
+
n_has_tfidf=n_has_tfidf,
|
| 454 |
+
n_wiki_page=len(has_wiki_page),
|
| 455 |
+
n_wiki_votes=len(wiki_votes),
|
| 456 |
+
sample_size=args.sample_size,
|
| 457 |
+
seed=args.seed,
|
| 458 |
+
)
|
| 459 |
+
|
| 460 |
+
report = {
|
| 461 |
+
"config": {
|
| 462 |
+
"tfidf_weight": args.tfidf_weight,
|
| 463 |
+
"wiki_weight": args.wiki_weight,
|
| 464 |
+
"tfidf_temp": args.tfidf_temp,
|
| 465 |
+
"single_top1_min": args.single_top1_min,
|
| 466 |
+
"single_margin_min": args.single_margin_min,
|
| 467 |
+
"single_top2_max": args.single_top2_max,
|
| 468 |
+
"multi_top1_min": args.multi_top1_min,
|
| 469 |
+
"multi_top2_min": args.multi_top2_min,
|
| 470 |
+
"multi_pair_min": args.multi_pair_min,
|
| 471 |
+
"sample_size": args.sample_size,
|
| 472 |
+
"seed": args.seed,
|
| 473 |
+
},
|
| 474 |
+
"inputs": {
|
| 475 |
+
"registry_csv": str(REGISTRY_CSV),
|
| 476 |
+
"wiki_pages_csv": str(WIKI_PAGES_CSV),
|
| 477 |
+
"uncategorized_tags": len(uncategorized),
|
| 478 |
+
"active_categories_for_centroids": len(categories),
|
| 479 |
+
"centroid_seed_sizes": seed_sizes,
|
| 480 |
+
},
|
| 481 |
+
"summary": summary,
|
| 482 |
+
}
|
| 483 |
+
|
| 484 |
+
args.out_json.parent.mkdir(parents=True, exist_ok=True)
|
| 485 |
+
args.out_json.write_text(json.dumps(report, indent=2, ensure_ascii=False), encoding="utf-8")
|
| 486 |
+
|
| 487 |
+
counts = summary["counts"]
|
| 488 |
+
print("Hybrid category assignment analysis complete")
|
| 489 |
+
print(f"Active categories (centroids): {len(categories)}")
|
| 490 |
+
print(
|
| 491 |
+
"Signals: "
|
| 492 |
+
f"tfidf={counts['has_tfidf_vector']} "
|
| 493 |
+
f"wiki_page={counts['has_wiki_page']} "
|
| 494 |
+
f"wiki_votes={counts['has_wiki_category_votes']}"
|
| 495 |
+
)
|
| 496 |
+
print(f"Assignments: {counts['assignments']}")
|
| 497 |
+
print(f"Remaining uncategorized: {counts['remaining_uncategorized']}")
|
| 498 |
+
print(f"Wrote: {args.out_json}")
|
| 499 |
+
|
| 500 |
+
|
| 501 |
+
if __name__ == "__main__":
|
| 502 |
+
main()
|