"""ShotCraft — model runtime client (Modal backend). ARCHITECTURE (2026-06-11, per team decision + hackathon rules check): all model inference runs on Modal (modal_backend/shotcraft_inference.py): Stage 1 MiniCPM-V-2_6 (8B) — A10G Stage 2 FLUX.1-schnell (12B) — L40S The Gradio Space is the interface (hackathon REQ-02); Modal is the runtime (explicitly allowed per field-guide FAQ, qualifies for "Best Use of Modal"). No ZeroGPU, no @spaces.GPU, no mock fallbacks — the app runs end to end against the real backend or fails loudly with an actionable error. Space configuration (Settings -> Variables and secrets): SHOTCRAFT_API_URL Explicit Modal endpoint, e.g. https://--shotcraft-inference-api.modal.run SHOTCRAFT_MODAL_WORKSPACE Modal workspace slug; used when SHOTCRAFT_API_URL is unset. SHOTCRAFT_MODAL_APP Modal app name; defaults to shotcraft-inference. SHOTCRAFT_MODAL_FUNCTION Modal ASGI function name; defaults to api. """ from __future__ import annotations import base64 import io import os import httpx DEFAULT_MODAL_WORKSPACE = "rafalbogusdxc" DEFAULT_MODAL_APP = "shotcraft-inference" DEFAULT_MODAL_FUNCTION = "api" def _modal_api_url() -> str: explicit_url = os.environ.get("SHOTCRAFT_API_URL", "").strip() if explicit_url: return explicit_url.rstrip("/") workspace = os.environ.get("SHOTCRAFT_MODAL_WORKSPACE", DEFAULT_MODAL_WORKSPACE).strip() app_name = os.environ.get("SHOTCRAFT_MODAL_APP", DEFAULT_MODAL_APP).strip() function_name = os.environ.get("SHOTCRAFT_MODAL_FUNCTION", DEFAULT_MODAL_FUNCTION).strip() return f"https://{workspace}--{app_name}-{function_name}.modal.run" API_URL = _modal_api_url() MINICPM_ID = "openbmb/MiniCPM-V-2_6" FLUX_ID = "black-forest-labs/FLUX.1-schnell" # Cold start can pull weights onto the GPU container; keep timeouts generous. STAGE1_TIMEOUT_S = 900 STAGE2_TIMEOUT_S = 900 class BackendError(RuntimeError): """Inference backend unreachable or returned an error.""" def _pil_to_b64(img) -> str: buf = io.BytesIO() img.save(buf, format="PNG") return base64.b64encode(buf.getvalue()).decode() def _b64_to_pil(data: str): from PIL import Image return Image.open(io.BytesIO(base64.b64decode(data))).convert("RGB") def _post(path: str, payload: dict, timeout: float) -> dict: url = f"{API_URL}{path}" try: # follow_redirects: Modal answers long-running calls with a 303 # redirect to a poll URL (?__modal_function_call_id=...) when the # request exceeds ~150 s (e.g. cold start pulling model weights). resp = httpx.post(url, json=payload, timeout=timeout, follow_redirects=True) resp.raise_for_status() return resp.json() except httpx.ConnectError as e: raise BackendError( f"Cannot reach inference backend at {API_URL} — is the Modal app " f"deployed? ({e})" ) from e except httpx.ReadTimeout as e: raise BackendError( "Inference backend timed out — likely a cold start pulling model " "weights. Try again in ~1 minute." ) from e except httpx.HTTPStatusError as e: raise BackendError( f"Backend error {e.response.status_code}: {e.response.text[:300]}" ) from e def health() -> dict: """GET /health — used by the app banner at startup.""" try: resp = httpx.get(f"{API_URL}/health", timeout=10, follow_redirects=True) resp.raise_for_status() return resp.json() except Exception as e: # noqa: BLE001 — banner only, never crash the UI return {"status": "unreachable", "error": str(e), "url": API_URL} def minicpm_chat(image, system: str, user: str, temperature: float = 0.6) -> str: """Stage 1: vision analysis + concept generation on Modal (MiniCPM-V-2_6).""" data = _post( "/minicpm", { "image_b64": _pil_to_b64(image), "system": system, "user": user, "temperature": temperature, }, STAGE1_TIMEOUT_S, ) return data["text"] def flux_generate_batch(prompts: list, width: int, height: int, seeds: list) -> list: """Stage 2: render N frames in one backend call (N=5 reel, N=1 regen). Returns PIL.Images in input order. Seeded per FR-2.3.""" data = _post( "/flux", { "prompts": list(prompts), "width": int(width), "height": int(height), "seeds": [int(s) for s in seeds], }, STAGE2_TIMEOUT_S, ) return [_b64_to_pil(b) for b in data["images_b64"]] def flux_generate(prompt: str, width: int, height: int, steps: int, seed: int): """Back-compat single-frame API; steps is fixed at 4 server-side.""" return flux_generate_batch([prompt], width, height, [seed])[0]