from __future__ import annotations

import os
from typing import Any, Generator

import gradio as gr
import requests
from huggingface_hub import InferenceClient
from fastapi import FastAPI
from fastapi.responses import HTMLResponse, FileResponse
from fastapi.staticfiles import StaticFiles

# ── ZeroGPU (로컬 모델 사용 시 활성화) ──────────────────────────────────────
try:
    import spaces
    HAS_SPACES = True
except ImportError:
    HAS_SPACES = False

# ── Qwen3.5 Collection API ───────────────────────────────────────────────────
COLLECTION_API = "https://huggingface.co/api/collections/Qwen/qwen35"

FALLBACK_MODELS = [
    {"id": "Qwen/Qwen3.5-235B-A22B", "live_providers": []},
    {"id": "Qwen/Qwen3.5-32B",       "live_providers": []},
    {"id": "Qwen/Qwen3.5-14B",       "live_providers": []},
    {"id": "Qwen/Qwen3.5-7B",        "live_providers": []},
    {"id": "Qwen/Qwen3.5-3B",        "live_providers": []},
    {"id": "Qwen/Qwen3.5-1.5B",      "live_providers": []},
    {"id": "Qwen/Qwen3.5-0.6B",      "live_providers": []},
]


def fetch_qwen35_models() -> list[dict[str, Any]]:
    try:
        resp = requests.get(COLLECTION_API, timeout=20)
        resp.raise_for_status()
        payload = resp.json()
    except Exception:
        return FALLBACK_MODELS

    models: list[dict[str, Any]] = []
    for item in payload.get("items", []):
        if item.get("type") != "model":
            continue
        model_id = item.get("id")
        if not model_id:
            continue
        providers = [
            str(p.get("provider"))
            for p in (item.get("availableInferenceProviders") or [])
            if p.get("providerStatus") == "live" and p.get("modelStatus") == "live"
        ]
        models.append({"id": model_id, "live_providers": sorted(set(providers))})
    return models or FALLBACK_MODELS


MODEL_INFO    = fetch_qwen35_models()
MODEL_IDS     = [x["id"] for x in MODEL_INFO]
DEFAULT_MODEL = MODEL_IDS[0] if MODEL_IDS else "Qwen/Qwen3.5-32B"
PROVIDER_MAP  = {x["id"]: x.get("live_providers", []) for x in MODEL_INFO}


def provider_note(model_id: str) -> str:
    providers = PROVIDER_MAP.get(model_id, [])
    if providers:
        return f"🟢 Live providers: **{', '.join(providers)}**"
    return "⚠️ 현재 라이브 프로바이더 없음 — 다른 모델을 선택하세요."


# ── Streaming Generator (ZeroGPU 호환) ───────────────────────────────────────
def _build_messages(history, system_prompt: str, message: str) -> list[dict]:
    msgs = []
    if system_prompt.strip():
        msgs.append({"role": "system", "content": system_prompt.strip()})
    for user_msg, assistant_msg in history:
        if user_msg:
            msgs.append({"role": "user",      "content": user_msg})
        if assistant_msg:
            msgs.append({"role": "assistant", "content": assistant_msg})
    msgs.append({"role": "user", "content": message})
    return msgs


def generate_stream(
    message:        str,
    history:        list[tuple[str, str]],
    model_id:       str,
    system_prompt:  str,
    max_new_tokens: int,
    temperature:    float,
    top_p:          float,
) -> Generator[str, None, None]:
    """스트리밍 응답 제너레이터 — Gradio ChatInterface와 호환"""
    if not message.strip():
        yield ""
        return

    client   = InferenceClient(token=os.getenv("HF_TOKEN"), timeout=180)
    messages = _build_messages(history, system_prompt, message)
    partial  = ""

    try:
        stream = client.chat_completion(
            model=model_id,
            messages=messages,
            max_tokens=int(max_new_tokens),
            temperature=float(temperature),
            top_p=float(top_p),
            stream=True,
        )
        for chunk in stream:
            delta = chunk.choices[0].delta.content
            if delta:
                partial += delta
                yield partial
    except Exception as exc:
        yield (
            f"❌ **오류** (`{model_id}`)\n\n"
            f"```\n{exc}\n```\n\n"
            "다른 모델을 선택하거나 잠시 후 다시 시도하세요."
        )


# ── Gradio Blocks UI ─────────────────────────────────────────────────────────
css = """
/* ── Global ── */
body, .gradio-container { background: #0f0a1e !important; }
.gradio-container { max-width: 900px !important; margin: 0 auto; }

/* ── Header band ── */
#qwen-header {
  background: linear-gradient(135deg, #1a0a30 0%, #2d1045 50%, #1a0a30 100%);
  border-bottom: 1px solid #4a2a6a;
  padding: 18px 24px;
  border-radius: 16px 16px 0 0;
  margin-bottom: 0;
}
#qwen-header h1 {
  font-size: 26px; font-weight: 900; margin: 0;
  background: linear-gradient(135deg, #ff6b9d, #c44dff, #70a1ff);
  -webkit-background-clip: text; -webkit-text-fill-color: transparent;
}
#qwen-header p { color: #a080b0; font-size: 13px; margin: 4px 0 0; }

/* ── Model dropdown ── */
#model-row { padding: 14px 0 0; }
label { color: #c490e0 !important; font-weight: 600 !important; }

/* ── Provider note ── */
#provider-note {
  background: rgba(196,77,255,.07);
  border: 1px solid rgba(196,77,255,.2);
  border-radius: 10px; padding: 8px 14px;
  font-size: 13px; color: #d0a0f0;
}

/* ── Chat bubbles ── */
.message.user   { background: linear-gradient(135deg,#2d1045,#3a1a5a) !important; border-color: #6a3a8a !important; }
.message.bot    { background: linear-gradient(135deg,#0f1a30,#1a2a48) !important; border-color: #3a5a7a !important; }
.message        { border-radius: 14px !important; border: 1px solid !important; }
.chatbot        { background: #0d0820 !important; border-color: #3a2060 !important; border-radius: 0 !important; }

/* ── Input area ── */
.input-area     { background: #150d28 !important; border-top: 1px solid #3a2060 !important; }
textarea        { background: #1a1035 !important; border-color: #4a2a6a !important; color: #ffe8f0 !important; }
textarea:focus  { border-color: #ff6b9d !important; }

/* ── Send button ── */
button[aria-label="Submit"] {
  background: linear-gradient(135deg,#ff6b9d,#c44dff) !important;
  border: none !important; border-radius: 10px !important;
}

/* ── Accordion ── */
.accordion { background: #1a0a30 !important; border-color: #4a2a6a !important; border-radius: 12px !important; }
.accordion-header { color: #c490e0 !important; }

/* ── Sliders ── */
input[type=range] { accent-color: #c44dff; }
"""

with gr.Blocks(
    title="Qwen3.5 스트리밍 챗",
    css=css,
    theme=gr.themes.Base(
        primary_hue="purple",
        secondary_hue="pink",
        font=gr.themes.GoogleFont("Noto Sans KR"),
    ),
) as gradio_app:

    with gr.Column(elem_id="qwen-header"):
        gr.HTML("""
        <h1>⚡ Qwen3.5 스트리밍 챗</h1>
        <p>Qwen3.5 컬렉션 · HF Inference API · 실시간 스트리밍</p>
        """)

    with gr.Row(elem_id="model-row"):
        model_dd = gr.Dropdown(
            choices=MODEL_IDS,
            value=DEFAULT_MODEL,
            label="🤖 Qwen3.5 모델 선택",
            allow_custom_value=False,
            scale=3,
        )

    provider_md = gr.Markdown(
        provider_note(DEFAULT_MODEL),
        elem_id="provider-note",
    )
    model_dd.change(fn=provider_note, inputs=model_dd, outputs=provider_md)

    with gr.Accordion("⚙️ 생성 설정", open=False):
        system_prompt = gr.Textbox(
            label="시스템 프롬프트",
            value="당신은 유능하고 친절한 AI 어시스턴트입니다. 한국어로 답변하세요.",
            lines=3,
        )
        with gr.Row():
            max_new_tokens = gr.Slider(64, 8192, step=64, value=1024, label="Max Tokens")
            temperature    = gr.Slider(0.0, 2.0, step=0.05, value=0.7, label="Temperature")
            top_p          = gr.Slider(0.1, 1.0, step=0.05, value=0.9, label="Top-p")

    gr.ChatInterface(
        fn=generate_stream,
        additional_inputs=[model_dd, system_prompt, max_new_tokens, temperature, top_p],
        submit_btn="📨 전송",
        stop_btn="⏹ 중지",
        retry_btn="🔄 재시도",
        undo_btn="↩ 되돌리기",
        clear_btn="🗑 초기화",
        chatbot=gr.Chatbot(
            height=520,
            placeholder="<div style='text-align:center;padding:60px;color:#6040a0'>모델을 선택하고 대화를 시작하세요 💬</div>",
            render_markdown=True,
        ),
    )


# ── FastAPI wrapper (index.html 서빙) ─────────────────────────────────────────
app = FastAPI(title="Qwen3.5 Chat")

# Gradio → /gradio 경로에 마운트
app = gr.mount_gradio_app(app, gradio_app, path="/gradio")

# 정적 파일 (index.html 등)
if os.path.isdir("static"):
    app.mount("/static", StaticFiles(directory="static"), name="static")


@app.get("/", response_class=HTMLResponse)
async def root():
    if os.path.exists("index.html"):
        return FileResponse("index.html")
    # index.html 없으면 /gradio 로 리다이렉트
    return HTMLResponse('<meta http-equiv="refresh" content="0;url=/gradio">', status_code=200)


@app.get("/models")
async def get_models():
    return {"models": MODEL_INFO, "default": DEFAULT_MODEL}


# ── 진입점 ────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)