ilyass yani
Deploiement backend dans HF Spaces
9df97a2
Raw
History Blame
9.11 kB
"""Hybrid matcher combining semantic, BERT classifier, skill cosine, and business signals.
Weight breakdown (defaults from training config):
semantic 0.35 — sentence-transformer cosine similarity on full texts
cross_encoder 0.20 — deeper semantic re-ranking (falls back to semantic when unavailable)
bert_classifier 0.25 — fine-tuned camembert compatibility classifier
skill_cosine 0.12 — binary skill-vector cosine (CosineScorer)
business 0.08 — structured rules: experience, location, availability
All weights must sum to 1.0; HybridConfig normalizes automatically if they don't.
"""
from __future__ import annotations
import logging
from dataclasses import dataclass, field
from typing import Dict, List, Optional
import numpy as np
logger = logging.getLogger(__name__)
@dataclass
class HybridConfig:
weight_semantic: float = 0.35
weight_cross_encoder: float = 0.20
weight_bert_classifier: float = 0.25
weight_skill_cosine: float = 0.12
weight_business: float = 0.08
def __post_init__(self) -> None:
total = (
self.weight_semantic
+ self.weight_cross_encoder
+ self.weight_bert_classifier
+ self.weight_skill_cosine
+ self.weight_business
)
if total <= 0:
raise ValueError("HybridConfig: all weights are zero.")
if abs(total - 1.0) > 1e-6:
logger.debug("HybridConfig: weights sum to %.4f — normalizing.", total)
self.weight_semantic /= total
self.weight_cross_encoder /= total
self.weight_bert_classifier /= total
self.weight_skill_cosine /= total
self.weight_business /= total
class HybridMatcher:
"""Combine multiple matchers into a single weighted score (0–100).
Parameters
----------
config:
Weight configuration.
bert_classifier:
Pre-loaded BertClassifierAdapter. If None, the adapter is lazy-loaded
from the default model directory (backend/models/bert_matching/).
"""
def __init__(
self,
config: Optional[HybridConfig] = None,
bert_classifier=None,
) -> None:
self.config = config or HybridConfig()
self._bert = bert_classifier # may be None; resolved lazily
# ------------------------------------------------------------------
# Public API
# ------------------------------------------------------------------
def score(
self,
candidate_text: str,
job_text: str,
candidate_skills: Optional[List[str]] = None,
criteria_skills: Optional[Dict[str, float]] = None,
business_signals: Optional[Dict[str, object]] = None,
) -> Dict[str, object]:
"""Return a hybrid score dict.
Parameters
----------
candidate_text:
Free-text CV / candidate profile.
job_text:
Free-text job description / offer.
candidate_skills:
List of skill names the candidate has.
criteria_skills:
Dict {skill_name: weight_0_to_100} from recruiter criteria.
business_signals:
Optional structured signals, e.g.::
{
"years_experience": 5,
"required_experience": 3,
"location_match": True,
"available": True,
}
Returns
-------
dict with keys: score (0–100), component_scores, weights_used
"""
cfg = self.config
components: Dict[str, float] = {}
# 1. Semantic score
components["semantic"] = self._semantic_score(candidate_text, job_text)
# 2. Cross-encoder score (fallback to semantic when unavailable)
components["cross_encoder"] = self._cross_encoder_score(candidate_text, job_text)
# 3. BERT classifier score
components["bert_classifier"] = self._bert_score(candidate_text, job_text)
# 4. Skill cosine score
components["skill_cosine"] = self._skill_cosine_score(
candidate_skills or [], criteria_skills or {}
)
# 5. Business rules score
components["business"] = self._business_score(business_signals or {})
# Weighted sum
raw = (
cfg.weight_semantic * components["semantic"]
+ cfg.weight_cross_encoder * components["cross_encoder"]
+ cfg.weight_bert_classifier * components["bert_classifier"]
+ cfg.weight_skill_cosine * components["skill_cosine"]
+ cfg.weight_business * components["business"]
)
final_score = float(np.clip(raw * 100, 0.0, 100.0))
return {
"score": final_score,
"component_scores": {k: round(v, 4) for k, v in components.items()},
"weights_used": {
"semantic": cfg.weight_semantic,
"cross_encoder": cfg.weight_cross_encoder,
"bert_classifier": cfg.weight_bert_classifier,
"skill_cosine": cfg.weight_skill_cosine,
"business": cfg.weight_business,
},
}
# ------------------------------------------------------------------
# Component scorers
# ------------------------------------------------------------------
def _semantic_score(self, candidate_text: str, job_text: str) -> float:
try:
from ai_module.matching.semantic_matcher import SemanticSkillMatcher
return SemanticSkillMatcher.semantic_similarity(candidate_text, job_text)
except Exception as exc:
logger.debug("Semantic scorer unavailable: %s", exc)
return 0.0
def _cross_encoder_score(self, candidate_text: str, job_text: str) -> float:
"""Attempt a cross-encoder pass; fall back to semantic similarity."""
try:
from sentence_transformers import CrossEncoder
if not hasattr(self, "_cross_encoder_model"):
self._cross_encoder_model = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")
score = self._cross_encoder_model.predict([[candidate_text, job_text]])[0]
# ms-marco scores are logits; apply sigmoid
import math
return float(np.clip(1 / (1 + math.exp(-score)), 0.0, 1.0))
except Exception:
# Graceful fallback to standard semantic similarity
return self._semantic_score(candidate_text, job_text)
def _bert_score(self, candidate_text: str, job_text: str) -> float:
bert = self._get_bert()
if bert is None:
return 0.0
return bert.predict_score(candidate_text, job_text)
def _skill_cosine_score(
self,
candidate_skills: List[str],
criteria_skills: Dict[str, float],
) -> float:
if not candidate_skills or not criteria_skills:
return 0.0
try:
from ai_module.matching.scorer import CosineScorer
all_skills = list(criteria_skills.keys())
result = CosineScorer.calculate_match_score(
candidate_skills, criteria_skills, all_skills
)
return float(result["score"]) / 100.0
except Exception as exc:
logger.debug("Skill cosine scorer failed: %s", exc)
return 0.0
def _business_score(self, signals: Dict[str, object]) -> float:
"""Simple rules-based business score in [0, 1]."""
if not signals:
return 0.5 # neutral when no signals provided
score = 0.0
count = 0
# Experience
years_exp = signals.get("years_experience")
required_exp = signals.get("required_experience")
if years_exp is not None and required_exp is not None:
try:
ratio = float(years_exp) / max(float(required_exp), 1.0)
score += float(np.clip(ratio, 0.0, 1.0))
except (TypeError, ValueError):
score += 0.5
count += 1
# Location match
location_match = signals.get("location_match")
if location_match is not None:
score += 1.0 if location_match else 0.2
count += 1
# Availability
available = signals.get("available")
if available is not None:
score += 1.0 if available else 0.0
count += 1
return float(np.clip(score / max(count, 1), 0.0, 1.0))
# ------------------------------------------------------------------
# Internal helpers
# ------------------------------------------------------------------
def _get_bert(self):
if self._bert is not None:
return self._bert
try:
from ai_module.matching.bert_classifier_adapter import get_default_adapter
self._bert = get_default_adapter()
except Exception as exc:
logger.warning("Could not load BertClassifierAdapter: %s", exc)
self._bert = None
return self._bert