from __future__ import annotations import os from typing import Any, Generator import gradio as gr import requests from huggingface_hub import InferenceClient from fastapi import FastAPI from fastapi.responses import HTMLResponse, FileResponse from fastapi.staticfiles import StaticFiles # ── ZeroGPU (로컬 모델 사용 시 활성화) ────────────────────────────────────── try: import spaces HAS_SPACES = True except ImportError: HAS_SPACES = False # ── Qwen3.5 Collection API ─────────────────────────────────────────────────── COLLECTION_API = "https://huggingface.co/api/collections/Qwen/qwen35" FALLBACK_MODELS = [ {"id": "Qwen/Qwen3.5-235B-A22B", "live_providers": []}, {"id": "Qwen/Qwen3.5-32B", "live_providers": []}, {"id": "Qwen/Qwen3.5-14B", "live_providers": []}, {"id": "Qwen/Qwen3.5-7B", "live_providers": []}, {"id": "Qwen/Qwen3.5-3B", "live_providers": []}, {"id": "Qwen/Qwen3.5-1.5B", "live_providers": []}, {"id": "Qwen/Qwen3.5-0.6B", "live_providers": []}, ] def fetch_qwen35_models() -> list[dict[str, Any]]: try: resp = requests.get(COLLECTION_API, timeout=20) resp.raise_for_status() payload = resp.json() except Exception: return FALLBACK_MODELS models: list[dict[str, Any]] = [] for item in payload.get("items", []): if item.get("type") != "model": continue model_id = item.get("id") if not model_id: continue providers = [ str(p.get("provider")) for p in (item.get("availableInferenceProviders") or []) if p.get("providerStatus") == "live" and p.get("modelStatus") == "live" ] models.append({"id": model_id, "live_providers": sorted(set(providers))}) return models or FALLBACK_MODELS MODEL_INFO = fetch_qwen35_models() MODEL_IDS = [x["id"] for x in MODEL_INFO] DEFAULT_MODEL = MODEL_IDS[0] if MODEL_IDS else "Qwen/Qwen3.5-32B" PROVIDER_MAP = {x["id"]: x.get("live_providers", []) for x in MODEL_INFO} def provider_note(model_id: str) -> str: providers = PROVIDER_MAP.get(model_id, []) if providers: return f"🟢 Live providers: **{', '.join(providers)}**" return "⚠️ 현재 라이브 프로바이더 없음 — 다른 모델을 선택하세요." # ── Streaming Generator (ZeroGPU 호환) ─────────────────────────────────────── def _build_messages(history, system_prompt: str, message: str) -> list[dict]: msgs = [] if system_prompt.strip(): msgs.append({"role": "system", "content": system_prompt.strip()}) for user_msg, assistant_msg in history: if user_msg: msgs.append({"role": "user", "content": user_msg}) if assistant_msg: msgs.append({"role": "assistant", "content": assistant_msg}) msgs.append({"role": "user", "content": message}) return msgs def generate_stream( message: str, history: list[tuple[str, str]], model_id: str, system_prompt: str, max_new_tokens: int, temperature: float, top_p: float, ) -> Generator[str, None, None]: """스트리밍 응답 제너레이터 — Gradio ChatInterface와 호환""" if not message.strip(): yield "" return client = InferenceClient(token=os.getenv("HF_TOKEN"), timeout=180) messages = _build_messages(history, system_prompt, message) partial = "" try: stream = client.chat_completion( model=model_id, messages=messages, max_tokens=int(max_new_tokens), temperature=float(temperature), top_p=float(top_p), stream=True, ) for chunk in stream: delta = chunk.choices[0].delta.content if delta: partial += delta yield partial except Exception as exc: yield ( f"❌ **오류** (`{model_id}`)\n\n" f"```\n{exc}\n```\n\n" "다른 모델을 선택하거나 잠시 후 다시 시도하세요." ) # ── Gradio Blocks UI ───────────────────────────────────────────────────────── css = """ /* ── Global ── */ body, .gradio-container { background: #0f0a1e !important; } .gradio-container { max-width: 900px !important; margin: 0 auto; } /* ── Header band ── */ #qwen-header { background: linear-gradient(135deg, #1a0a30 0%, #2d1045 50%, #1a0a30 100%); border-bottom: 1px solid #4a2a6a; padding: 18px 24px; border-radius: 16px 16px 0 0; margin-bottom: 0; } #qwen-header h1 { font-size: 26px; font-weight: 900; margin: 0; background: linear-gradient(135deg, #ff6b9d, #c44dff, #70a1ff); -webkit-background-clip: text; -webkit-text-fill-color: transparent; } #qwen-header p { color: #a080b0; font-size: 13px; margin: 4px 0 0; } /* ── Model dropdown ── */ #model-row { padding: 14px 0 0; } label { color: #c490e0 !important; font-weight: 600 !important; } /* ── Provider note ── */ #provider-note { background: rgba(196,77,255,.07); border: 1px solid rgba(196,77,255,.2); border-radius: 10px; padding: 8px 14px; font-size: 13px; color: #d0a0f0; } /* ── Chat bubbles ── */ .message.user { background: linear-gradient(135deg,#2d1045,#3a1a5a) !important; border-color: #6a3a8a !important; } .message.bot { background: linear-gradient(135deg,#0f1a30,#1a2a48) !important; border-color: #3a5a7a !important; } .message { border-radius: 14px !important; border: 1px solid !important; } .chatbot { background: #0d0820 !important; border-color: #3a2060 !important; border-radius: 0 !important; } /* ── Input area ── */ .input-area { background: #150d28 !important; border-top: 1px solid #3a2060 !important; } textarea { background: #1a1035 !important; border-color: #4a2a6a !important; color: #ffe8f0 !important; } textarea:focus { border-color: #ff6b9d !important; } /* ── Send button ── */ button[aria-label="Submit"] { background: linear-gradient(135deg,#ff6b9d,#c44dff) !important; border: none !important; border-radius: 10px !important; } /* ── Accordion ── */ .accordion { background: #1a0a30 !important; border-color: #4a2a6a !important; border-radius: 12px !important; } .accordion-header { color: #c490e0 !important; } /* ── Sliders ── */ input[type=range] { accent-color: #c44dff; } """ with gr.Blocks( title="Qwen3.5 스트리밍 챗", css=css, theme=gr.themes.Base( primary_hue="purple", secondary_hue="pink", font=gr.themes.GoogleFont("Noto Sans KR"), ), ) as gradio_app: with gr.Column(elem_id="qwen-header"): gr.HTML("""

⚡ Qwen3.5 스트리밍 챗

Qwen3.5 컬렉션 · HF Inference API · 실시간 스트리밍

""") with gr.Row(elem_id="model-row"): model_dd = gr.Dropdown( choices=MODEL_IDS, value=DEFAULT_MODEL, label="🤖 Qwen3.5 모델 선택", allow_custom_value=False, scale=3, ) provider_md = gr.Markdown( provider_note(DEFAULT_MODEL), elem_id="provider-note", ) model_dd.change(fn=provider_note, inputs=model_dd, outputs=provider_md) with gr.Accordion("⚙️ 생성 설정", open=False): system_prompt = gr.Textbox( label="시스템 프롬프트", value="당신은 유능하고 친절한 AI 어시스턴트입니다. 한국어로 답변하세요.", lines=3, ) with gr.Row(): max_new_tokens = gr.Slider(64, 8192, step=64, value=1024, label="Max Tokens") temperature = gr.Slider(0.0, 2.0, step=0.05, value=0.7, label="Temperature") top_p = gr.Slider(0.1, 1.0, step=0.05, value=0.9, label="Top-p") gr.ChatInterface( fn=generate_stream, additional_inputs=[model_dd, system_prompt, max_new_tokens, temperature, top_p], submit_btn="📨 전송", stop_btn="⏹ 중지", retry_btn="🔄 재시도", undo_btn="↩ 되돌리기", clear_btn="🗑 초기화", chatbot=gr.Chatbot( height=520, placeholder="
모델을 선택하고 대화를 시작하세요 💬
", render_markdown=True, ), ) # ── FastAPI wrapper (index.html 서빙) ───────────────────────────────────────── app = FastAPI(title="Qwen3.5 Chat") # Gradio → /gradio 경로에 마운트 app = gr.mount_gradio_app(app, gradio_app, path="/gradio") # 정적 파일 (index.html 등) if os.path.isdir("static"): app.mount("/static", StaticFiles(directory="static"), name="static") @app.get("/", response_class=HTMLResponse) async def root(): if os.path.exists("index.html"): return FileResponse("index.html") # index.html 없으면 /gradio 로 리다이렉트 return HTMLResponse('', status_code=200) @app.get("/models") async def get_models(): return {"models": MODEL_INFO, "default": DEFAULT_MODEL} # ── 진입점 ──────────────────────────────────────────────────────────────────── if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=7860)