ai-talent-finder-backend / app /core /capabilities.py
ilyass yani
chat_llm dispo si HF ou LLM local
c53f53f
Raw
History Blame Contribute Delete
7.64 kB
from __future__ import annotations
import importlib.util
import logging
import os
import shutil
from datetime import datetime
from typing import Dict, List, Optional
logger = logging.getLogger(__name__)
_DEFAULT_REQUIRED_FEATURES = [
"cv_text_extraction",
"semantic_matching",
]
_CAPABILITIES_CACHE: Optional[Dict[str, object]] = None
def _has_module(module_name: str) -> bool:
return importlib.util.find_spec(module_name) is not None
def _env_set(name: str) -> bool:
return bool(os.getenv(name))
def _env_bool(name: str, default: bool = False) -> bool:
raw = os.getenv(name)
if raw is None:
return default
return str(raw).strip().lower() in {"1", "true", "yes", "on"}
def _resolve_tesseract_path() -> Optional[str]:
cmd = os.getenv("TESSERACT_CMD", "").strip()
if cmd:
found = shutil.which(cmd)
return found
return shutil.which("tesseract")
def _parse_required_features() -> List[str]:
raw = os.getenv("AI_FEATURES_REQUIRED", "")
if not raw:
return []
return [item.strip() for item in raw.split(",") if item.strip()]
def _feature_status(
required: Dict[str, bool],
optional: Optional[Dict[str, bool]] = None,
notes: str | None = None,
) -> Dict[str, object]:
optional = optional or {}
missing_required = [name for name, ok in required.items() if not ok]
missing_optional = [name for name, ok in optional.items() if not ok]
available = not missing_required
status = "ok" if available else "missing"
if available and missing_optional:
status = "degraded"
return {
"available": available,
"status": status,
"required_missing": missing_required,
"optional_missing": missing_optional,
"notes": notes or "",
}
def detect_capabilities() -> Dict[str, object]:
use_ai_profile = _env_bool("USE_AI_PROFILE_GENERATOR", default=False)
local_llm_enabled = bool(os.getenv("LOCAL_LLM_BASE_URL", "").strip())
deps = {
"fitz": _has_module("fitz"),
"pdfplumber": _has_module("pdfplumber"),
"pytesseract": _has_module("pytesseract"),
"pillow": _has_module("PIL"),
"transformers": _has_module("transformers"),
"torch": _has_module("torch"),
"sentence_transformers": _has_module("sentence_transformers"),
"faiss": _has_module("faiss"),
"numpy": _has_module("numpy"),
"openpyxl": _has_module("openpyxl"),
"reportlab": _has_module("reportlab"),
"anthropic": _has_module("anthropic"),
}
tesseract_path = _resolve_tesseract_path()
deps["tesseract_binary"] = bool(tesseract_path)
api_keys = {
"ANTHROPIC_API_KEY": _env_set("ANTHROPIC_API_KEY"),
"OPENAI_API_KEY": _env_set("OPENAI_API_KEY"),
"HUGGINGFACE_API_KEY": _env_set("HUGGINGFACE_API_KEY"),
"HF_TOKEN_CHATBOT": _env_set("HF_TOKEN_CHATBOT"),
"LOCAL_LLM_BASE_URL": _env_set("LOCAL_LLM_BASE_URL"),
}
features = {
"cv_text_extraction": _feature_status(
required={"fitz": deps["fitz"]},
optional={"pdfplumber": deps["pdfplumber"]},
notes="PyMuPDF is required for PDF text extraction.",
),
"cv_ocr": _feature_status(
required={
"fitz": deps["fitz"],
"pytesseract": deps["pytesseract"],
"pillow": deps["pillow"],
"tesseract_binary": deps["tesseract_binary"],
},
notes="OCR requires the Tesseract binary and PIL.",
),
"ner_hf": _feature_status(
required={"transformers": deps["transformers"], "torch": deps["torch"]},
notes="If missing, regex-based NER is still available.",
),
"semantic_matching": _feature_status(
required={
"sentence_transformers": deps["sentence_transformers"],
"numpy": deps["numpy"],
"torch": deps["torch"],
},
optional={"faiss": deps["faiss"]},
notes="If missing, matching falls back to heuristic scoring.",
),
"export": _feature_status(
required={"openpyxl": deps["openpyxl"], "reportlab": deps["reportlab"]},
notes="If missing, export endpoints are disabled.",
),
"chat_llm": _feature_status(
required={
"llm_provider": (
api_keys["ANTHROPIC_API_KEY"]
or api_keys["HF_TOKEN_CHATBOT"]
or api_keys["LOCAL_LLM_BASE_URL"]
)
},
notes="Disponible si un provider LLM est configure (Anthropic, HF Inference, ou LLM local). Sinon, reponses deterministes.",
),
"profile_generator": _feature_status(
required={"transformers": deps["transformers"], "torch": deps["torch"]}
if use_ai_profile
else {},
notes="If disabled or missing deps, rule-based profile generation is used.",
),
}
return {
"timestamp": datetime.utcnow().isoformat() + "Z",
"strict": _env_set("AI_FEATURES_STRICT"),
"required_features": _parse_required_features(),
"dependencies": deps,
"api_keys": api_keys,
"flags": {
"USE_AI_PROFILE_GENERATOR": use_ai_profile,
"LOCAL_LLM_ENABLED": local_llm_enabled,
},
"features": features,
"tesseract_path": tesseract_path,
"tesseract_cmd": os.getenv("TESSERACT_CMD", "").strip() or None,
}
def get_capabilities(force_refresh: bool = False) -> Dict[str, object]:
global _CAPABILITIES_CACHE
if _CAPABILITIES_CACHE is None or force_refresh:
_CAPABILITIES_CACHE = detect_capabilities()
return _CAPABILITIES_CACHE
def log_capabilities_summary(capabilities: Optional[Dict[str, object]] = None) -> Dict[str, object]:
cap = capabilities or get_capabilities()
features = cap.get("features", {})
status_counts = {"ok": 0, "degraded": 0, "missing": 0}
for detail in features.values():
status = str(detail.get("status", "missing"))
status_counts[status] = status_counts.get(status, 0) + 1
logger.info(
"AI capabilities: ok=%s degraded=%s missing=%s",
status_counts.get("ok", 0),
status_counts.get("degraded", 0),
status_counts.get("missing", 0),
)
for name, detail in sorted(features.items()):
status = detail.get("status")
if status == "ok":
continue
logger.warning(
"AI capability %s: %s (required_missing=%s optional_missing=%s)",
name,
status,
detail.get("required_missing"),
detail.get("optional_missing"),
)
return cap
def assert_required_features(capabilities: Optional[Dict[str, object]] = None) -> None:
cap = capabilities or get_capabilities()
strict = bool(cap.get("strict"))
if not strict:
return
required = list(cap.get("required_features") or [])
if not required:
required = list(_DEFAULT_REQUIRED_FEATURES)
logger.warning(
"AI_FEATURES_STRICT is enabled without AI_FEATURES_REQUIRED; using defaults: %s",
", ".join(required),
)
features = cap.get("features", {})
missing = [name for name in required if not features.get(name, {}).get("available")]
if not missing:
return
logger.error("Missing required AI features: %s", ", ".join(missing))
raise RuntimeError(f"Missing required AI features: {', '.join(missing)}")