"""Image tab — Florence2 visual description via capability bus.""" from __future__ import annotations import asyncio import base64 import concurrent.futures from typing import Any def _run(coro): try: loop = asyncio.get_running_loop() except RuntimeError: loop = None if loop and loop.is_running(): with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool: return pool.submit(asyncio.run, coro).result() return asyncio.run(coro) def build_image_tab(bus: Any | None = None) -> None: import gradio as gr gr.HTML("""

🖼 Image — Visual AI

Florence2 vision model · describe scenes · read text · detect objects · 100% local

""") with gr.Row(): with gr.Column(scale=2): img_input = gr.Image(type="filepath", label="Upload image") task_select = gr.Radio( choices=["Describe", "Detailed Caption", "OCR (read text in image)"], value="Describe", label="Task", ) describe_btn = gr.Button("🔍 Analyse Image", variant="primary", size="lg") with gr.Column(scale=3): description_out = gr.Textbox(label="Result", lines=8, interactive=False) status_out = gr.Textbox(label="Status", lines=1, interactive=False) def _describe(path: str | None, task: str) -> tuple[str, str]: if not path: return "", "⚠ Upload an image first" if bus is None: return "", "⚠ No bus — run inside a HearthNet node" try: with open(path, "rb") as f: b64 = base64.b64encode(f.read()).decode() except Exception as exc: return "", f"⚠ Could not read file: {exc}" task_map = { "Describe": "caption", "Detailed Caption": "detailed_caption", "OCR (read text in image)": "ocr", } task_key = task_map.get(task, "caption") async def _call(): return await bus.call( "img.describe", (1, 0), {"params": {"task": task_key}, "input": {"image_b64": b64}}, ) try: result = _run(_call()) except Exception as exc: return "", f"⚠ Bus error: {exc}" if "error" in result: if result["error"] == "backend_unavailable": return "", "⚠ No vision backend — Florence2 model not loaded" return "", f"⚠ {result.get('message', result['error'])}" out = result.get("output", result) caption = out.get("caption", out.get("text", str(out))) return caption, "✓ Done" describe_btn.click(_describe, inputs=[img_input, task_select], outputs=[description_out, status_out]) gr.HTML("""

ℹ Setup help

Florence2 loads automatically if transformers and timm are installed.
For GPU acceleration: pip install torch torchvision --index-url https://download.pytorch.org/whl/cu121
Model: microsoft/Florence-2-base (~900MB download on first use)

""")