| from __future__ import annotations |
|
|
| import importlib.util |
| import logging |
| import os |
| import shutil |
| from datetime import datetime |
| from typing import Dict, List, Optional |
|
|
| logger = logging.getLogger(__name__) |
|
|
| _DEFAULT_REQUIRED_FEATURES = [ |
| "cv_text_extraction", |
| "semantic_matching", |
| ] |
|
|
| _CAPABILITIES_CACHE: Optional[Dict[str, object]] = None |
|
|
|
|
| def _has_module(module_name: str) -> bool: |
| return importlib.util.find_spec(module_name) is not None |
|
|
|
|
| def _env_set(name: str) -> bool: |
| return bool(os.getenv(name)) |
|
|
|
|
| def _env_bool(name: str, default: bool = False) -> bool: |
| raw = os.getenv(name) |
| if raw is None: |
| return default |
| return str(raw).strip().lower() in {"1", "true", "yes", "on"} |
|
|
|
|
| def _resolve_tesseract_path() -> Optional[str]: |
| cmd = os.getenv("TESSERACT_CMD", "").strip() |
| if cmd: |
| found = shutil.which(cmd) |
| return found |
| return shutil.which("tesseract") |
|
|
|
|
| def _parse_required_features() -> List[str]: |
| raw = os.getenv("AI_FEATURES_REQUIRED", "") |
| if not raw: |
| return [] |
| return [item.strip() for item in raw.split(",") if item.strip()] |
|
|
|
|
| def _feature_status( |
| required: Dict[str, bool], |
| optional: Optional[Dict[str, bool]] = None, |
| notes: str | None = None, |
| ) -> Dict[str, object]: |
| optional = optional or {} |
| missing_required = [name for name, ok in required.items() if not ok] |
| missing_optional = [name for name, ok in optional.items() if not ok] |
|
|
| available = not missing_required |
| status = "ok" if available else "missing" |
| if available and missing_optional: |
| status = "degraded" |
|
|
| return { |
| "available": available, |
| "status": status, |
| "required_missing": missing_required, |
| "optional_missing": missing_optional, |
| "notes": notes or "", |
| } |
|
|
|
|
| def detect_capabilities() -> Dict[str, object]: |
| use_ai_profile = _env_bool("USE_AI_PROFILE_GENERATOR", default=False) |
| local_llm_enabled = bool(os.getenv("LOCAL_LLM_BASE_URL", "").strip()) |
| deps = { |
| "fitz": _has_module("fitz"), |
| "pdfplumber": _has_module("pdfplumber"), |
| "pytesseract": _has_module("pytesseract"), |
| "pillow": _has_module("PIL"), |
| "transformers": _has_module("transformers"), |
| "torch": _has_module("torch"), |
| "sentence_transformers": _has_module("sentence_transformers"), |
| "faiss": _has_module("faiss"), |
| "numpy": _has_module("numpy"), |
| "openpyxl": _has_module("openpyxl"), |
| "reportlab": _has_module("reportlab"), |
| "anthropic": _has_module("anthropic"), |
| } |
|
|
| tesseract_path = _resolve_tesseract_path() |
| deps["tesseract_binary"] = bool(tesseract_path) |
|
|
| api_keys = { |
| "ANTHROPIC_API_KEY": _env_set("ANTHROPIC_API_KEY"), |
| "OPENAI_API_KEY": _env_set("OPENAI_API_KEY"), |
| "HUGGINGFACE_API_KEY": _env_set("HUGGINGFACE_API_KEY"), |
| "HF_TOKEN_CHATBOT": _env_set("HF_TOKEN_CHATBOT"), |
| "LOCAL_LLM_BASE_URL": _env_set("LOCAL_LLM_BASE_URL"), |
| } |
|
|
| features = { |
| "cv_text_extraction": _feature_status( |
| required={"fitz": deps["fitz"]}, |
| optional={"pdfplumber": deps["pdfplumber"]}, |
| notes="PyMuPDF is required for PDF text extraction.", |
| ), |
| "cv_ocr": _feature_status( |
| required={ |
| "fitz": deps["fitz"], |
| "pytesseract": deps["pytesseract"], |
| "pillow": deps["pillow"], |
| "tesseract_binary": deps["tesseract_binary"], |
| }, |
| notes="OCR requires the Tesseract binary and PIL.", |
| ), |
| "ner_hf": _feature_status( |
| required={"transformers": deps["transformers"], "torch": deps["torch"]}, |
| notes="If missing, regex-based NER is still available.", |
| ), |
| "semantic_matching": _feature_status( |
| required={ |
| "sentence_transformers": deps["sentence_transformers"], |
| "numpy": deps["numpy"], |
| "torch": deps["torch"], |
| }, |
| optional={"faiss": deps["faiss"]}, |
| notes="If missing, matching falls back to heuristic scoring.", |
| ), |
| "export": _feature_status( |
| required={"openpyxl": deps["openpyxl"], "reportlab": deps["reportlab"]}, |
| notes="If missing, export endpoints are disabled.", |
| ), |
| "chat_llm": _feature_status( |
| required={ |
| "llm_provider": ( |
| api_keys["ANTHROPIC_API_KEY"] |
| or api_keys["HF_TOKEN_CHATBOT"] |
| or api_keys["LOCAL_LLM_BASE_URL"] |
| ) |
| }, |
| notes="Disponible si un provider LLM est configure (Anthropic, HF Inference, ou LLM local). Sinon, reponses deterministes.", |
| ), |
| "profile_generator": _feature_status( |
| required={"transformers": deps["transformers"], "torch": deps["torch"]} |
| if use_ai_profile |
| else {}, |
| notes="If disabled or missing deps, rule-based profile generation is used.", |
| ), |
| } |
|
|
| return { |
| "timestamp": datetime.utcnow().isoformat() + "Z", |
| "strict": _env_set("AI_FEATURES_STRICT"), |
| "required_features": _parse_required_features(), |
| "dependencies": deps, |
| "api_keys": api_keys, |
| "flags": { |
| "USE_AI_PROFILE_GENERATOR": use_ai_profile, |
| "LOCAL_LLM_ENABLED": local_llm_enabled, |
| }, |
| "features": features, |
| "tesseract_path": tesseract_path, |
| "tesseract_cmd": os.getenv("TESSERACT_CMD", "").strip() or None, |
| } |
|
|
|
|
| def get_capabilities(force_refresh: bool = False) -> Dict[str, object]: |
| global _CAPABILITIES_CACHE |
| if _CAPABILITIES_CACHE is None or force_refresh: |
| _CAPABILITIES_CACHE = detect_capabilities() |
| return _CAPABILITIES_CACHE |
|
|
|
|
| def log_capabilities_summary(capabilities: Optional[Dict[str, object]] = None) -> Dict[str, object]: |
| cap = capabilities or get_capabilities() |
| features = cap.get("features", {}) |
|
|
| status_counts = {"ok": 0, "degraded": 0, "missing": 0} |
| for detail in features.values(): |
| status = str(detail.get("status", "missing")) |
| status_counts[status] = status_counts.get(status, 0) + 1 |
|
|
| logger.info( |
| "AI capabilities: ok=%s degraded=%s missing=%s", |
| status_counts.get("ok", 0), |
| status_counts.get("degraded", 0), |
| status_counts.get("missing", 0), |
| ) |
|
|
| for name, detail in sorted(features.items()): |
| status = detail.get("status") |
| if status == "ok": |
| continue |
| logger.warning( |
| "AI capability %s: %s (required_missing=%s optional_missing=%s)", |
| name, |
| status, |
| detail.get("required_missing"), |
| detail.get("optional_missing"), |
| ) |
|
|
| return cap |
|
|
|
|
| def assert_required_features(capabilities: Optional[Dict[str, object]] = None) -> None: |
| cap = capabilities or get_capabilities() |
| strict = bool(cap.get("strict")) |
| if not strict: |
| return |
|
|
| required = list(cap.get("required_features") or []) |
| if not required: |
| required = list(_DEFAULT_REQUIRED_FEATURES) |
| logger.warning( |
| "AI_FEATURES_STRICT is enabled without AI_FEATURES_REQUIRED; using defaults: %s", |
| ", ".join(required), |
| ) |
|
|
| features = cap.get("features", {}) |
| missing = [name for name in required if not features.get(name, {}).get("available")] |
| if not missing: |
| return |
|
|
| logger.error("Missing required AI features: %s", ", ".join(missing)) |
| raise RuntimeError(f"Missing required AI features: {', '.join(missing)}") |