import os from openai import OpenAI from gradio import Server from fastapi.responses import HTMLResponse app = Server() client = OpenAI( base_url="https://router.huggingface.co/v1", api_key=os.environ.get("HF_TOKEN", ""), default_headers={ "X-HF-Bill-To": "huggingface" } ) @app.api() def chat(message: str, history_json: list) -> str: # history_json should be a list of dictionaries like [{"role": "user", "content": "..."}, {"role": "assistant", "content": "..."}] messages = history_json + [{"role": "user", "content": message}] completion = client.chat.completions.create( model="deepseek-ai/DeepSeek-V4-Pro:together", messages=messages, stream=True ) full_response = "" for chunk in completion: if chunk.choices and chunk.choices[0].delta.content: full_response += chunk.choices[0].delta.content yield full_response @app.get("/") async def homepage(): html_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "index.html") with open(html_path, "r", encoding="utf-8") as f: return HTMLResponse(content=f.read()) if __name__ == "__main__": app.launch(show_error=True)