import sys
print(f"[BOOT] Python {sys.version}", flush=True)

import base64
import os
import re
from typing import Generator, Optional

try:
    import gradio as gr
    print(f"[BOOT] gradio {gr.__version__}", flush=True)
except ImportError as e:
    print(f"[BOOT] FATAL: {e}", flush=True)
    sys.exit(1)

try:
    from huggingface_hub import InferenceClient
    import httpx
    import uvicorn
    from fastapi import FastAPI, Request
    from fastapi.responses import HTMLResponse, RedirectResponse, JSONResponse
    print("[BOOT] All imports OK", flush=True)
except ImportError as e:
    print(f"[BOOT] FATAL: {e} — add to requirements.txt", flush=True)
    sys.exit(1)

# ══════════════════════════════════════════════════════════════════════════════
# 1.  MODEL CAPABILITY MATRIX
# ══════════════════════════════════════════════════════════════════════════════
MODEL_CAPS: dict[str, dict] = {
    "Qwen/Qwen3.5-122B-A10B": {
        "arch": "MoE", "active": "10B / 122B total",
        "ctx": "262K → 1M", "thinking": True, "vision": True,
        "max_tokens": 8192, "temp_max": 2.0, "top_p": True,
        "color": "#7c3aed",
        "badge": "🏆 Best Overall · BFCL 72.2 · GPQA 86.6 · SWE 72.0",
        "desc": "Top reasoning & agents · Complex math · Long context",
    },
    "Qwen/Qwen3.5-27B": {
        "arch": "Dense", "active": "27B (all active)",
        "ctx": "262K → 1M", "thinking": True, "vision": True,
        "max_tokens": 8192, "temp_max": 2.0, "top_p": True,
        "color": "#0d9488",
        "badge": "🎯 Dense #1 · IFEval 95.0 · SWE 72.4 · PolyMATH 71.2",
        "desc": "Instruction king · Creative writing · 201 languages",
    },
    "Qwen/Qwen3.5-35B-A3B": {
        "arch": "MoE", "active": "3B / 35B total",
        "ctx": "262K → 1M", "thinking": True, "vision": True,
        "max_tokens": 4096, "temp_max": 1.5, "top_p": True,
        "color": "#d97706",
        "badge": "⚡ Flash Speed · TAU2 81.2 · MMLU-Pro 85.3",
        "desc": "Fastest · 3B active params · ~6× faster than 27B",
    }
}
MODEL_IDS     = list(MODEL_CAPS.keys())
DEFAULT_MODEL = MODEL_IDS[0]

# ══════════════════════════════════════════════════════════════════════════════
# 2.  SYSTEM PROMPT PRESETS

# ══════════════════════════════════════════════════════════════════════════════

PRESETS = {
    "general":   "You are a helpful, harmless, and honest AI assistant.",
    "code":      "You are an expert software engineer. Write clean, efficient, well-commented code. Explain your approach before writing. Use modern best practices.",
    "math":      "You are a world-class mathematician. Break problems step-by-step. Show full working. Use LaTeX where helpful.",
    "creative":  "You are a brilliant creative writer. Be imaginative, vivid, and engaging. Adapt tone and style to the request.",
    "translate": "You are a professional translator fluent in 201 languages. Provide accurate, natural-sounding translations with cultural context.",
    "research":  "You are a rigorous research analyst. Provide structured, well-reasoned analysis. Identify assumptions and acknowledge uncertainty.",
}

# ══════════════════════════════════════════════════════════════════════════════
# 3.  THINKING MODE HELPERS
# ══════════════════════════════════════════════════════════════════════════════
def build_user_message(text: str, thinking: bool) -> str:
    return ("/think\n" if thinking else "/no_think\n") + text

def parse_think_blocks(text: str) -> tuple[str, str]:
    m = re.search(r"<think>(.*?)</think>\s*", text, re.DOTALL)
    return (m.group(1).strip(), text[m.end():].strip()) if m else ("", text)

def format_response(raw: str) -> str:
    chain, answer = parse_think_blocks(raw)
    if chain:
        lines  = chain.split("\n")
        quoted = "\n".join(f"> {l}" for l in lines)
        block  = (
            "<details>\n"
            "<summary>🧠 Reasoning Chain — click to expand</summary>\n\n"
            f"{quoted}\n\n"
            "</details>\n\n"
        )
        return block + answer
    return raw

# ══════════════════════════════════════════════════════════════════════════════
# 4.  STREAMING BACKEND
# ══════════════════════════════════════════════════════════════════════════════
def generate_reply(
    message:        str,
    history:        list,
    model_id:       str,
    thinking_mode:  str,
    image_input,
    system_prompt:  str,
    max_new_tokens: int,
    temperature:    float,
    top_p:          float,
) -> Generator[str, None, None]:

    token     = os.getenv("HF_TOKEN")
    client    = InferenceClient(token=token, timeout=120)
    cap       = MODEL_CAPS[model_id]
    use_think = "Thinking" in thinking_mode and cap["thinking"]

    max_new_tokens = min(int(max_new_tokens), cap["max_tokens"])
    temperature    = min(float(temperature),  cap["temp_max"])

    messages: list[dict] = []
    if system_prompt.strip():
        messages.append({"role": "system", "content": system_prompt.strip()})

    for turn in history:
        if isinstance(turn, dict):
            role = turn.get("role", "")
            raw  = turn.get("content") or ""
            text = (" ".join(p.get("text","") for p in raw
                             if isinstance(p,dict) and p.get("type")=="text")
                    if isinstance(raw, list) else str(raw))
            if role == "user":
                messages.append({"role":"user","content":text})
            elif role == "assistant":
                _, clean = parse_think_blocks(text)
                messages.append({"role":"assistant","content":clean})
        else:
            try:
                u, a = (turn[0] or None), (turn[1] if len(turn)>1 else None)
            except (IndexError, TypeError):
                continue
            def _txt(v):
                if v is None: return None
                if isinstance(v, list):
                    return " ".join(p.get("text","") for p in v
                                    if isinstance(p,dict) and p.get("type")=="text")
                return str(v)
            if u := _txt(u): messages.append({"role":"user","content":u})
            if a := _txt(a):
                _, clean = parse_think_blocks(a)
                messages.append({"role":"assistant","content":clean})

    user_text = build_user_message(message, use_think)
    if image_input and cap["vision"]:
        import io
        from PIL import Image as PILImage

        # Handle 3 cases:
        # 1. base64 data URL string (from JS fetch API)  "data:image/...;base64,..."
        # 2. PIL Image object (from Gradio UI)
        # 3. numpy array (legacy Gradio)
        if isinstance(image_input, str) and image_input.startswith("data:"):
            # Strip the data URL prefix and decode directly
            header, b64_data = image_input.split(",", 1)
            b64 = b64_data
        else:
            buf = io.BytesIO()
            if not isinstance(image_input, PILImage.Image):
                image_input = PILImage.fromarray(image_input)
            image_input.save(buf, format="JPEG")
            b64 = base64.b64encode(buf.getvalue()).decode()

        content = [
            {"type":"image_url","image_url":{"url":f"data:image/jpeg;base64,{b64}"}},
            {"type":"text","text":user_text},
        ]
    else:
        content = user_text
    messages.append({"role":"user","content":content})

    try:
        stream = client.chat_completion(
            model=model_id, messages=messages,
            max_tokens=max_new_tokens, temperature=temperature,
            top_p=float(top_p), stream=True,
        )
        raw = ""
        for chunk in stream:
            if not chunk.choices: continue
            delta = chunk.choices[0].delta
            if not delta or not delta.content: continue
            raw += delta.content
            yield format_response(raw)
    except Exception as exc:
        yield (f"**Error:** `{model_id}`\n\n```\n{exc}\n```\n\n"
               "_Check HF\\_TOKEN or try another model._")


# ══════════════════════════════════════════════════════════════════════════════
# 5.  GRADIO BLOCKS  (hidden – only serves /gradio/gradio_api/call/chat API)
# ══════════════════════════════════════════════════════════════════════════════
with gr.Blocks(title="Qwen3.5 MultiChat API") as gradio_demo:
    model_dd = gr.Dropdown(choices=MODEL_IDS, value=DEFAULT_MODEL, visible=False)
    thinking_toggle = gr.Radio(
        choices=["⚡ Fast Mode  (direct answer)",
                 "🧠 Thinking Mode  (chain-of-thought reasoning)"],
        value="⚡ Fast Mode  (direct answer)",
        visible=False,
    )
    image_input    = gr.Textbox(value="", visible=False)  # receives base64 data URL from JS
    system_prompt  = gr.Textbox(value=PRESETS["general"], visible=False)
    max_new_tokens = gr.Slider(minimum=64, maximum=8192, value=1024, visible=False)
    temperature    = gr.Slider(minimum=0.0, maximum=2.0, value=0.7,  visible=False)
    top_p          = gr.Slider(minimum=0.1, maximum=1.0, value=0.9,  visible=False)

    gr.ChatInterface(
        fn=generate_reply,
        api_name="chat",
        additional_inputs=[
            model_dd, thinking_toggle, image_input,
            system_prompt, max_new_tokens, temperature, top_p,
        ],
    )

# ══════════════════════════════════════════════════════════════════════════════
# 6.  FASTAPI  –  index.html + HF OAuth + Gradio API
# ══════════════════════════════════════════════════════════════════════════════
import pathlib, secrets

fapp    = FastAPI()
SESSIONS: dict[str, dict] = {}          # session_id → user info
HTML    = pathlib.Path(__file__).parent / "index.html"

# ── HF OAuth config (auto-injected by HF Spaces when OAuth is enabled) ────────
CLIENT_ID     = os.getenv("OAUTH_CLIENT_ID", "")
CLIENT_SECRET = os.getenv("OAUTH_CLIENT_SECRET", "")
SPACE_HOST    = os.getenv("SPACE_HOST", "localhost:7860")
REDIRECT_URI  = f"https://{SPACE_HOST}/login/callback"

# Startup OAuth status log
print(f"[OAuth] CLIENT_ID set: {bool(CLIENT_ID)}")
print(f"[OAuth] CLIENT_SECRET set: {bool(CLIENT_SECRET)}")
print(f"[OAuth] SPACE_HOST: {SPACE_HOST}")
print(f"[OAuth] REDIRECT_URI: {REDIRECT_URI}")
HF_AUTH_URL   = "https://huggingface.co/oauth/authorize"
HF_TOKEN_URL  = "https://huggingface.co/oauth/token"
HF_USER_URL   = "https://huggingface.co/oauth/userinfo"
SCOPES        = os.getenv("OAUTH_SCOPES", "openid profile")

from urllib.parse import urlencode

def _sid(req: Request) -> Optional[str]:
    return req.cookies.get("mc_session")

def _user(req: Request) -> Optional[dict]:
    sid = _sid(req)
    return SESSIONS.get(sid) if sid else None

# ── Routes ────────────────────────────────────────────────────────────────────
@fapp.get("/")
async def root(request: Request):
    html = HTML.read_text(encoding="utf-8") if HTML.exists() else "<h2>index.html missing</h2>"
    return HTMLResponse(html)

@fapp.get("/oauth/user")
async def oauth_user(request: Request):
    u = _user(request)
    if u:
        return JSONResponse(u)
    return JSONResponse({"logged_in": False}, status_code=401)

@fapp.get("/oauth/login")
async def oauth_login(request: Request):
    print(f"[OAuth] /oauth/login called. CLIENT_ID={bool(CLIENT_ID)}")
    if not CLIENT_ID:
        print("[OAuth] ERROR: OAUTH_CLIENT_ID not set — add hf_oauth: true to README.md")
        return RedirectResponse("/?oauth_error=not_configured")
    state = secrets.token_urlsafe(16)
    params = {
        "response_type": "code",
        "client_id": CLIENT_ID,
        "redirect_uri": REDIRECT_URI,
        "scope": SCOPES,
        "state": state,
    }
    url = f"{HF_AUTH_URL}?{urlencode(params)}"
    print(f"[OAuth] Redirecting → {url[:120]}")
    return RedirectResponse(url, status_code=302)

@fapp.get("/login/callback")
async def oauth_callback(code: str = "", error: str = "", state: str = ""):
    if error or not code:
        print(f"[OAuth] Callback error: {error}")
        return RedirectResponse("/?auth_error=1")
    # Basic auth as recommended by HF docs
    basic = base64.b64encode(f"{CLIENT_ID}:{CLIENT_SECRET}".encode()).decode()
    async with httpx.AsyncClient() as client:
        # Exchange code for token — use Authorization: Basic header
        tok = await client.post(HF_TOKEN_URL, data={
            "grant_type": "authorization_code",
            "code": code,
            "redirect_uri": REDIRECT_URI,
        }, headers={
            "Accept": "application/json",
            "Authorization": f"Basic {basic}",
        })
        if tok.status_code != 200:
            print(f"[OAuth] Token exchange FAILED: {tok.status_code} {tok.text[:300]}")
            return RedirectResponse("/?auth_error=1")
        access_token = tok.json().get("access_token", "")
        if not access_token:
            print(f"[OAuth] No access_token: {tok.text[:300]}")
            return RedirectResponse("/?auth_error=1")
        # Get user info
        uinfo = await client.get(HF_USER_URL, headers={"Authorization": f"Bearer {access_token}"})
        if uinfo.status_code != 200:
            print(f"[OAuth] Userinfo FAILED: {uinfo.status_code}")
            return RedirectResponse("/?auth_error=1")
        user = uinfo.json()
        print(f"[OAuth] Login OK: {user.get('preferred_username', '?')}")

    sid = secrets.token_urlsafe(32)
    SESSIONS[sid] = {
        "logged_in": True,
        "username": user.get("preferred_username", user.get("name", "User")),
        "name":     user.get("name", ""),
        "avatar":   user.get("picture", ""),
        "profile":  f"https://huggingface.co/{user.get('preferred_username', '')}",
    }
    resp = RedirectResponse("/")
    resp.set_cookie("mc_session", sid, httponly=True, samesite="lax", secure=True, max_age=60*60*24*7)
    return resp

@fapp.get("/oauth/logout")
async def oauth_logout(request: Request):
    sid = _sid(request)
    if sid and sid in SESSIONS:
        del SESSIONS[sid]
    resp = RedirectResponse("/")
    resp.delete_cookie("mc_session")
    return resp

@fapp.get("/health")
async def health():
    return {"status": "ok"}

# Mount Gradio at /gradio  →  API at /gradio/gradio_api/call/chat
app = gr.mount_gradio_app(fapp, gradio_demo, path="/gradio")

# ── Launch ────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
    print("[BOOT] All components initialized. Starting uvicorn on :7860", flush=True)
    uvicorn.run(app, host="0.0.0.0", port=7860)