ilyass yani commited on
Commit ·
60b4410
1
Parent(s): 691aaaa
integration HF Inference API (Qwen 7B) (CHATBOT)
Browse files- app/api/chat.py +57 -0
app/api/chat.py
CHANGED
|
@@ -5,8 +5,10 @@ from __future__ import annotations
|
|
| 5 |
import json
|
| 6 |
import os
|
| 7 |
import re
|
|
|
|
| 8 |
from typing import Any, Dict, List, Optional
|
| 9 |
from urllib import request
|
|
|
|
| 10 |
|
| 11 |
from fastapi import APIRouter, Depends
|
| 12 |
from pydantic import BaseModel, Field
|
|
@@ -354,6 +356,56 @@ def _call_local_llm(prompt: str) -> Optional[str]:
|
|
| 354 |
return None
|
| 355 |
|
| 356 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 357 |
def _explain_score(context: Dict[str, Any]) -> str:
|
| 358 |
top_candidates = context.get("top_candidates") or []
|
| 359 |
if not top_candidates:
|
|
@@ -736,6 +788,9 @@ def chat(request_payload: ChatRequest, db: Session = Depends(get_db)):
|
|
| 736 |
prompt = _build_prompt(request_payload.message, local_context, intent)
|
| 737 |
llm_response = _call_anthropic(prompt)
|
| 738 |
|
|
|
|
|
|
|
|
|
|
| 739 |
if not llm_response:
|
| 740 |
llm_response = _call_local_llm(prompt)
|
| 741 |
|
|
@@ -775,6 +830,8 @@ def ideal_profile(request_payload: IdealProfileRequest, db: Session = Depends(ge
|
|
| 775 |
"Be concise and realistic.",
|
| 776 |
])
|
| 777 |
llm_response = _call_anthropic(llm_prompt)
|
|
|
|
|
|
|
| 778 |
if not llm_response:
|
| 779 |
llm_response = _call_local_llm(llm_prompt)
|
| 780 |
if llm_response:
|
|
|
|
| 5 |
import json
|
| 6 |
import os
|
| 7 |
import re
|
| 8 |
+
import time
|
| 9 |
from typing import Any, Dict, List, Optional
|
| 10 |
from urllib import request
|
| 11 |
+
from urllib.error import HTTPError
|
| 12 |
|
| 13 |
from fastapi import APIRouter, Depends
|
| 14 |
from pydantic import BaseModel, Field
|
|
|
|
| 356 |
return None
|
| 357 |
|
| 358 |
|
| 359 |
+
_HF_INFERENCE_URL = "https://router.huggingface.co/v1/chat/completions"
|
| 360 |
+
|
| 361 |
+
|
| 362 |
+
def _call_hf_inference(prompt: str) -> Optional[str]:
|
| 363 |
+
token = os.getenv("HF_TOKEN_CHATBOT")
|
| 364 |
+
if not token:
|
| 365 |
+
return None
|
| 366 |
+
|
| 367 |
+
model = os.getenv("CHATBOT_MODEL", "Qwen/Qwen2.5-7B-Instruct")
|
| 368 |
+
|
| 369 |
+
def _do_request() -> Optional[str]:
|
| 370 |
+
payload = json.dumps({
|
| 371 |
+
"model": model,
|
| 372 |
+
"max_tokens": 700,
|
| 373 |
+
"temperature": 0.2,
|
| 374 |
+
"messages": [{"role": "user", "content": prompt}],
|
| 375 |
+
}).encode("utf-8")
|
| 376 |
+
req = request.Request(
|
| 377 |
+
_HF_INFERENCE_URL,
|
| 378 |
+
data=payload,
|
| 379 |
+
headers={
|
| 380 |
+
"content-type": "application/json",
|
| 381 |
+
"Authorization": f"Bearer {token}",
|
| 382 |
+
},
|
| 383 |
+
method="POST",
|
| 384 |
+
)
|
| 385 |
+
with request.urlopen(req, timeout=30) as response:
|
| 386 |
+
data = json.loads(response.read().decode("utf-8"))
|
| 387 |
+
choices = data.get("choices", [])
|
| 388 |
+
if not choices:
|
| 389 |
+
return None
|
| 390 |
+
content = choices[0].get("message", {}).get("content")
|
| 391 |
+
return content.strip() if isinstance(content, str) else None
|
| 392 |
+
|
| 393 |
+
try:
|
| 394 |
+
return _do_request()
|
| 395 |
+
except HTTPError as exc:
|
| 396 |
+
if exc.code == 503:
|
| 397 |
+
# cold start — retry once after a short wait
|
| 398 |
+
try:
|
| 399 |
+
time.sleep(15)
|
| 400 |
+
return _do_request()
|
| 401 |
+
except Exception:
|
| 402 |
+
return None
|
| 403 |
+
# 429 quota or other HTTP errors → fallback
|
| 404 |
+
return None
|
| 405 |
+
except Exception:
|
| 406 |
+
return None
|
| 407 |
+
|
| 408 |
+
|
| 409 |
def _explain_score(context: Dict[str, Any]) -> str:
|
| 410 |
top_candidates = context.get("top_candidates") or []
|
| 411 |
if not top_candidates:
|
|
|
|
| 788 |
prompt = _build_prompt(request_payload.message, local_context, intent)
|
| 789 |
llm_response = _call_anthropic(prompt)
|
| 790 |
|
| 791 |
+
if not llm_response:
|
| 792 |
+
llm_response = _call_hf_inference(prompt)
|
| 793 |
+
|
| 794 |
if not llm_response:
|
| 795 |
llm_response = _call_local_llm(prompt)
|
| 796 |
|
|
|
|
| 830 |
"Be concise and realistic.",
|
| 831 |
])
|
| 832 |
llm_response = _call_anthropic(llm_prompt)
|
| 833 |
+
if not llm_response:
|
| 834 |
+
llm_response = _call_hf_inference(llm_prompt)
|
| 835 |
if not llm_response:
|
| 836 |
llm_response = _call_local_llm(llm_prompt)
|
| 837 |
if llm_response:
|