| """Hybrid matcher combining semantic, BERT classifier, skill cosine, and business signals. |
| |
| Weight breakdown (defaults from training config): |
| semantic 0.35 — sentence-transformer cosine similarity on full texts |
| cross_encoder 0.20 — deeper semantic re-ranking (falls back to semantic when unavailable) |
| bert_classifier 0.25 — fine-tuned camembert compatibility classifier |
| skill_cosine 0.12 — binary skill-vector cosine (CosineScorer) |
| business 0.08 — structured rules: experience, location, availability |
| |
| All weights must sum to 1.0; HybridConfig normalizes automatically if they don't. |
| """ |
|
|
| from __future__ import annotations |
|
|
| import logging |
| from dataclasses import dataclass, field |
| from typing import Dict, List, Optional |
|
|
| import numpy as np |
|
|
| logger = logging.getLogger(__name__) |
|
|
|
|
| @dataclass |
| class HybridConfig: |
| weight_semantic: float = 0.35 |
| weight_cross_encoder: float = 0.20 |
| weight_bert_classifier: float = 0.25 |
| weight_skill_cosine: float = 0.12 |
| weight_business: float = 0.08 |
|
|
| def __post_init__(self) -> None: |
| total = ( |
| self.weight_semantic |
| + self.weight_cross_encoder |
| + self.weight_bert_classifier |
| + self.weight_skill_cosine |
| + self.weight_business |
| ) |
| if total <= 0: |
| raise ValueError("HybridConfig: all weights are zero.") |
| if abs(total - 1.0) > 1e-6: |
| logger.debug("HybridConfig: weights sum to %.4f — normalizing.", total) |
| self.weight_semantic /= total |
| self.weight_cross_encoder /= total |
| self.weight_bert_classifier /= total |
| self.weight_skill_cosine /= total |
| self.weight_business /= total |
|
|
|
|
| class HybridMatcher: |
| """Combine multiple matchers into a single weighted score (0–100). |
| |
| Parameters |
| ---------- |
| config: |
| Weight configuration. |
| bert_classifier: |
| Pre-loaded BertClassifierAdapter. If None, the adapter is lazy-loaded |
| from the default model directory (backend/models/bert_matching/). |
| """ |
|
|
| def __init__( |
| self, |
| config: Optional[HybridConfig] = None, |
| bert_classifier=None, |
| ) -> None: |
| self.config = config or HybridConfig() |
| self._bert = bert_classifier |
|
|
| |
| |
| |
|
|
| def score( |
| self, |
| candidate_text: str, |
| job_text: str, |
| candidate_skills: Optional[List[str]] = None, |
| criteria_skills: Optional[Dict[str, float]] = None, |
| business_signals: Optional[Dict[str, object]] = None, |
| ) -> Dict[str, object]: |
| """Return a hybrid score dict. |
| |
| Parameters |
| ---------- |
| candidate_text: |
| Free-text CV / candidate profile. |
| job_text: |
| Free-text job description / offer. |
| candidate_skills: |
| List of skill names the candidate has. |
| criteria_skills: |
| Dict {skill_name: weight_0_to_100} from recruiter criteria. |
| business_signals: |
| Optional structured signals, e.g.:: |
| |
| { |
| "years_experience": 5, |
| "required_experience": 3, |
| "location_match": True, |
| "available": True, |
| } |
| |
| Returns |
| ------- |
| dict with keys: score (0–100), component_scores, weights_used |
| """ |
| cfg = self.config |
| components: Dict[str, float] = {} |
|
|
| |
| components["semantic"] = self._semantic_score(candidate_text, job_text) |
|
|
| |
| components["cross_encoder"] = self._cross_encoder_score(candidate_text, job_text) |
|
|
| |
| components["bert_classifier"] = self._bert_score(candidate_text, job_text) |
|
|
| |
| components["skill_cosine"] = self._skill_cosine_score( |
| candidate_skills or [], criteria_skills or {} |
| ) |
|
|
| |
| components["business"] = self._business_score(business_signals or {}) |
|
|
| |
| raw = ( |
| cfg.weight_semantic * components["semantic"] |
| + cfg.weight_cross_encoder * components["cross_encoder"] |
| + cfg.weight_bert_classifier * components["bert_classifier"] |
| + cfg.weight_skill_cosine * components["skill_cosine"] |
| + cfg.weight_business * components["business"] |
| ) |
| final_score = float(np.clip(raw * 100, 0.0, 100.0)) |
|
|
| return { |
| "score": final_score, |
| "component_scores": {k: round(v, 4) for k, v in components.items()}, |
| "weights_used": { |
| "semantic": cfg.weight_semantic, |
| "cross_encoder": cfg.weight_cross_encoder, |
| "bert_classifier": cfg.weight_bert_classifier, |
| "skill_cosine": cfg.weight_skill_cosine, |
| "business": cfg.weight_business, |
| }, |
| } |
|
|
| |
| |
| |
|
|
| def _semantic_score(self, candidate_text: str, job_text: str) -> float: |
| try: |
| from ai_module.matching.semantic_matcher import SemanticSkillMatcher |
|
|
| return SemanticSkillMatcher.semantic_similarity(candidate_text, job_text) |
| except Exception as exc: |
| logger.debug("Semantic scorer unavailable: %s", exc) |
| return 0.0 |
|
|
| def _cross_encoder_score(self, candidate_text: str, job_text: str) -> float: |
| """Attempt a cross-encoder pass; fall back to semantic similarity.""" |
| try: |
| from sentence_transformers import CrossEncoder |
|
|
| if not hasattr(self, "_cross_encoder_model"): |
| self._cross_encoder_model = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2") |
| score = self._cross_encoder_model.predict([[candidate_text, job_text]])[0] |
| |
| import math |
|
|
| return float(np.clip(1 / (1 + math.exp(-score)), 0.0, 1.0)) |
| except Exception: |
| |
| return self._semantic_score(candidate_text, job_text) |
|
|
| def _bert_score(self, candidate_text: str, job_text: str) -> float: |
| bert = self._get_bert() |
| if bert is None: |
| return 0.0 |
| return bert.predict_score(candidate_text, job_text) |
|
|
| def _skill_cosine_score( |
| self, |
| candidate_skills: List[str], |
| criteria_skills: Dict[str, float], |
| ) -> float: |
| if not candidate_skills or not criteria_skills: |
| return 0.0 |
| try: |
| from ai_module.matching.scorer import CosineScorer |
|
|
| all_skills = list(criteria_skills.keys()) |
| result = CosineScorer.calculate_match_score( |
| candidate_skills, criteria_skills, all_skills |
| ) |
| return float(result["score"]) / 100.0 |
| except Exception as exc: |
| logger.debug("Skill cosine scorer failed: %s", exc) |
| return 0.0 |
|
|
| def _business_score(self, signals: Dict[str, object]) -> float: |
| """Simple rules-based business score in [0, 1].""" |
| if not signals: |
| return 0.5 |
|
|
| score = 0.0 |
| count = 0 |
|
|
| |
| years_exp = signals.get("years_experience") |
| required_exp = signals.get("required_experience") |
| if years_exp is not None and required_exp is not None: |
| try: |
| ratio = float(years_exp) / max(float(required_exp), 1.0) |
| score += float(np.clip(ratio, 0.0, 1.0)) |
| except (TypeError, ValueError): |
| score += 0.5 |
| count += 1 |
|
|
| |
| location_match = signals.get("location_match") |
| if location_match is not None: |
| score += 1.0 if location_match else 0.2 |
| count += 1 |
|
|
| |
| available = signals.get("available") |
| if available is not None: |
| score += 1.0 if available else 0.0 |
| count += 1 |
|
|
| return float(np.clip(score / max(count, 1), 0.0, 1.0)) |
|
|
| |
| |
| |
|
|
| def _get_bert(self): |
| if self._bert is not None: |
| return self._bert |
| try: |
| from ai_module.matching.bert_classifier_adapter import get_default_adapter |
|
|
| self._bert = get_default_adapter() |
| except Exception as exc: |
| logger.warning("Could not load BertClassifierAdapter: %s", exc) |
| self._bert = None |
| return self._bert |
|
|