ilyass yani commited on
Commit
60b4410
·
1 Parent(s): 691aaaa

integration HF Inference API (Qwen 7B) (CHATBOT)

Browse files
Files changed (1) hide show
  1. app/api/chat.py +57 -0
app/api/chat.py CHANGED
@@ -5,8 +5,10 @@ from __future__ import annotations
5
  import json
6
  import os
7
  import re
 
8
  from typing import Any, Dict, List, Optional
9
  from urllib import request
 
10
 
11
  from fastapi import APIRouter, Depends
12
  from pydantic import BaseModel, Field
@@ -354,6 +356,56 @@ def _call_local_llm(prompt: str) -> Optional[str]:
354
  return None
355
 
356
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
357
  def _explain_score(context: Dict[str, Any]) -> str:
358
  top_candidates = context.get("top_candidates") or []
359
  if not top_candidates:
@@ -736,6 +788,9 @@ def chat(request_payload: ChatRequest, db: Session = Depends(get_db)):
736
  prompt = _build_prompt(request_payload.message, local_context, intent)
737
  llm_response = _call_anthropic(prompt)
738
 
 
 
 
739
  if not llm_response:
740
  llm_response = _call_local_llm(prompt)
741
 
@@ -775,6 +830,8 @@ def ideal_profile(request_payload: IdealProfileRequest, db: Session = Depends(ge
775
  "Be concise and realistic.",
776
  ])
777
  llm_response = _call_anthropic(llm_prompt)
 
 
778
  if not llm_response:
779
  llm_response = _call_local_llm(llm_prompt)
780
  if llm_response:
 
5
  import json
6
  import os
7
  import re
8
+ import time
9
  from typing import Any, Dict, List, Optional
10
  from urllib import request
11
+ from urllib.error import HTTPError
12
 
13
  from fastapi import APIRouter, Depends
14
  from pydantic import BaseModel, Field
 
356
  return None
357
 
358
 
359
+ _HF_INFERENCE_URL = "https://router.huggingface.co/v1/chat/completions"
360
+
361
+
362
+ def _call_hf_inference(prompt: str) -> Optional[str]:
363
+ token = os.getenv("HF_TOKEN_CHATBOT")
364
+ if not token:
365
+ return None
366
+
367
+ model = os.getenv("CHATBOT_MODEL", "Qwen/Qwen2.5-7B-Instruct")
368
+
369
+ def _do_request() -> Optional[str]:
370
+ payload = json.dumps({
371
+ "model": model,
372
+ "max_tokens": 700,
373
+ "temperature": 0.2,
374
+ "messages": [{"role": "user", "content": prompt}],
375
+ }).encode("utf-8")
376
+ req = request.Request(
377
+ _HF_INFERENCE_URL,
378
+ data=payload,
379
+ headers={
380
+ "content-type": "application/json",
381
+ "Authorization": f"Bearer {token}",
382
+ },
383
+ method="POST",
384
+ )
385
+ with request.urlopen(req, timeout=30) as response:
386
+ data = json.loads(response.read().decode("utf-8"))
387
+ choices = data.get("choices", [])
388
+ if not choices:
389
+ return None
390
+ content = choices[0].get("message", {}).get("content")
391
+ return content.strip() if isinstance(content, str) else None
392
+
393
+ try:
394
+ return _do_request()
395
+ except HTTPError as exc:
396
+ if exc.code == 503:
397
+ # cold start — retry once after a short wait
398
+ try:
399
+ time.sleep(15)
400
+ return _do_request()
401
+ except Exception:
402
+ return None
403
+ # 429 quota or other HTTP errors → fallback
404
+ return None
405
+ except Exception:
406
+ return None
407
+
408
+
409
  def _explain_score(context: Dict[str, Any]) -> str:
410
  top_candidates = context.get("top_candidates") or []
411
  if not top_candidates:
 
788
  prompt = _build_prompt(request_payload.message, local_context, intent)
789
  llm_response = _call_anthropic(prompt)
790
 
791
+ if not llm_response:
792
+ llm_response = _call_hf_inference(prompt)
793
+
794
  if not llm_response:
795
  llm_response = _call_local_llm(prompt)
796
 
 
830
  "Be concise and realistic.",
831
  ])
832
  llm_response = _call_anthropic(llm_prompt)
833
+ if not llm_response:
834
+ llm_response = _call_hf_inference(llm_prompt)
835
  if not llm_response:
836
  llm_response = _call_local_llm(llm_prompt)
837
  if llm_response: