| """ShotCraft Stage 1 — Shot Director (MiniCPM-V-2_6, 8B). Owner: Pawel. |
| |
| Inference runs on the Modal backend (model_runtime.minicpm_chat). This |
| module owns the prompt, JSON validation, and the one auto-retry (FR-1.4). |
| """ |
| from __future__ import annotations |
| import json |
| import re |
| from model_runtime import minicpm_chat |
| from schemas import ConceptPackage, validate_package, STYLE_SUFFIXES |
|
|
| MODEL_ID = "openbmb/MiniCPM-V-2_6" |
| TEMPERATURE = 0.6 |
|
|
| SYSTEM_PROMPT = """You are ShotCraft, an expert e-commerce art director. |
| Analyze the uploaded PRODUCT PHOTO carefully: identify the product type, |
| materials, exact colors, and distinguishing features you can SEE. |
| Then design exactly 5 distinct marketing shot concepts grounded in those |
| real attributes. |
| |
| CRITICAL - PRODUCT CONSISTENCY: |
| The SAME physical product must appear, unchanged, in all 5 shots. Only the |
| scene around it changes. To guarantee this: |
| 1. Write "canonical_description": ONE sentence (max 40 words) describing the |
| product EXACTLY as photographed: product type, shape, every visible color |
| and which part it is on (e.g. body, sole, laces, lid, label), materials, |
| and any logo or branding placement. Use plain English color names |
| ("off-white", "charcoal black", "gum brown") - NEVER hex codes here. |
| 2. Each "image_prompt" must describe ONLY the scene: setting, surfaces, |
| props, camera angle, lighting, mood. Refer to the product simply as |
| "the product". NEVER re-describe, recolor, restyle or redesign the |
| product itself - no color adjectives for the product, no "colorway", |
| no "variant", no "matching the palette". The pipeline automatically |
| prefixes your canonical_description to every render prompt. |
| 3. "color_palette" is for the BACKDROP and props only, never the product. |
| Even in bold/colorful styles, the colors go into the background and |
| props while the product keeps its original colors. |
| |
| Return ONLY valid JSON, no markdown fences, matching this schema: |
| { |
| "product_analysis": { |
| "product_type": str, "materials": str, |
| "colors": [str hex], "distinguishing_features": str, |
| "canonical_description": str // the locked product identity, rule 1 |
| }, |
| "shots": [ // exactly 5 |
| { |
| "id": int, "concept_name": str, "scene": str, "camera_angle": str, |
| "lighting": str, "color_palette": [str hex], "props": str, |
| "marketing_angle": str, |
| "image_prompt": str // English, FLUX-optimized, scene only (rule 2) |
| } |
| ] |
| }""" |
|
|
| def build_user_prompt(brand_desc: str, category: str, style_preset: str) -> str: |
| return (f"Brand description: {brand_desc}\n" |
| f"Product category: {category}\n" |
| f"Style preset: {style_preset} " |
| f"(style keywords: {STYLE_SUFFIXES.get(style_preset, '')})\n" |
| f"Design 5 shot concepts for this product.") |
|
|
| def _strip_fences(text: str) -> str: |
| t = text.strip() |
| if t.startswith("```"): |
| t = t.split("\n", 1)[1] if "\n" in t else t |
| t = t.rsplit("```", 1)[0] |
| |
| |
| t = re.sub(r"'(#[0-9A-Fa-f]{3,8})'", r'"\1"', t) |
| return t.strip() |
|
|
| def generate_concepts(image, brand_desc: str, category: str, style_preset: str) -> ConceptPackage: |
| """Run MiniCPM-V-2_6 on the product photo. One auto-retry on invalid JSON (FR-1.4).""" |
| user_prompt = build_user_prompt(brand_desc, category, style_preset) |
| last_err = None |
| for attempt in range(2): |
| raw = minicpm_chat(image=image, system=SYSTEM_PROMPT, user=user_prompt, |
| temperature=TEMPERATURE) |
| try: |
| return validate_package(_strip_fences(raw)) |
| except (ValueError, json.JSONDecodeError) as e: |
| last_err = e |
| user_prompt += ("\n\nYour previous reply was invalid " |
| f"({e}). Return ONLY the corrected JSON.") |
| raise RuntimeError(f"Stage 1 failed after retry: {last_err}") |
|
|