Spaces:

RHmaster
/

ai-talent-finder-backend

Running

App Files Files Community

ilyass yani commited on 6 days ago

Commit

9614c9c

1 Parent(s): 9e6bc9f

Batch-2: fix CosineScorer, delete/put candidats, BOM extraction, ranking, _can_access_profile

Browse files

Files changed (5) hide show

ai_module/matching/__init__.py +2 -0
ai_module/nlp/resume_ner_extractor.py +57 -0
app/api/candidates.py +10 -3
app/api/matching.py +63 -0
app/services/cv_extractor.py +9 -2

ai_module/matching/__init__.py CHANGED Viewed

@@ -9,11 +9,13 @@
 #   from ai_module.matching import BertClassifierAdapter
 from ai_module.matching.hybrid_matcher import HybridConfig, HybridMatcher
 from ai_module.matching.bert_classifier_adapter import BertClassifierAdapter, get_default_adapter
 __all__ = [
     "HybridConfig",
     "HybridMatcher",
     "BertClassifierAdapter",
     "get_default_adapter",
 ]

 #   from ai_module.matching import BertClassifierAdapter
 from ai_module.matching.hybrid_matcher import HybridConfig, HybridMatcher
 from ai_module.matching.bert_classifier_adapter import BertClassifierAdapter, get_default_adapter
+from ai_module.matching.scorer import CosineScorer
 __all__ = [
     "HybridConfig",
     "HybridMatcher",
     "BertClassifierAdapter",
     "get_default_adapter",
+    "CosineScorer",
 ]

ai_module/nlp/resume_ner_extractor.py CHANGED Viewed

@@ -61,6 +61,63 @@ class ResumeNERExtractor:
         # Other Tools
         "jira", "confluence", "slack", "discord", "figma", "sketch",
         "vim", "vscode", "intellij",
     }
     # Job title keywords

         # Other Tools
         "jira", "confluence", "slack", "discord", "figma", "sketch",
         "vim", "vscode", "intellij",
+        # Sante / Health
+        "infirmier", "infirmiere", "pharmacien", "pharmacienne", "chirurgie",
+        "soins infirmiers", "soins intensifs", "bloc operatoire", "urgences",
+        "radiologie", "anesthesie", "pediatrie", "geriatrie", "cardiologie",
+        "neurologie", "oncologie", "kinesitherapie", "orthophonie",
+        "aide soignant", "auxiliaire de vie", "sage femme", "medecin",
+        "urgentiste", "generaliste", "specialiste", "imagerie medicale",
+        "pharmacologie", "biologie medicale", "dossier patient", "hip",
+        # Commerce / Vente
+        "negociation", "prospection", "crm", "salesforce", "hubspot",
+        "vente", "commerce", "relation client", "fidélisation", "fidélisation client",
+        "fidelisation", "pipeline commercial", "force de vente", "b2b", "b2c",
+        "cold calling", "account management", "key account", "grands comptes",
+        "administration des ventes", "devis", "facturation", "customer success",
+        "closing", "lead generation",
+        # Finance / Comptabilite
+        "comptabilite", "comptabilité", "audit", "controle de gestion",
+        "excel financier", "ifrs", "normes ifrs", "bilan", "liasses fiscales",
+        "gestion budgetaire", "tresorerie", "fiscalite", "sage comptabilité",
+        "sage compta", "sage", "cegid", "erp finance", "consolidation",
+        "reporting financier", "analyse financiere", "due diligence",
+        "commissariat aux comptes", "expert comptable", "bilan comptable",
+        "grand livre", "journaux comptables", "pcg", "tva",
+        # Marketing
+        "seo", "sea", "sem", "content marketing", "branding", "brand management",
+        "marketing digital", "emailing", "e-mailing", "google analytics",
+        "google ads", "facebook ads", "meta ads", "reseaux sociaux",
+        "community management", "inbound marketing", "marketing automation",
+        "mailchimp", "hubspot marketing", "copywriting", "ux writing",
+        "webmarketing", "e-commerce", "shopify", "growth hacking",
+        # BTP / Construction
+        "conduite de chantier", "autocad", "maconnerie", "gros oeuvre",
+        "second oeuvre", "menuiserie", "plomberie", "electricite batiment",
+        "genie civil", "architecture", "bim", "revit", "suivi de chantier",
+        "planification chantier", "metres", "cubature", "beton arme",
+        "coffreur", "platrerie", "carrelage", "peinture batiment",
+        "chef de chantier", "conducteur de travaux",
+        # Droit / Juridique
+        "droit du travail", "contrats", "contentieux", "droit commercial",
+        "droit civil", "droit penal", "jurisprudence", "redaction juridique",
+        "negociation contractuelle", "veille juridique", "rgpd", "droit des affaires",
+        "droit de la propriete intellectuelle", "droit des societes",
+        "recouvrement de creances", "arbitrage", "mediation",
+        # Ressources Humaines / RH
+        "recrutement", "gpec", "gestion des talents", "paie",
+        "administration du personnel", "droit social", "formation professionnelle",
+        "onboarding", "marque employeur", "entretien professionnel",
+        "gestion des competences", "bilan social", "dsn", "sirh",
+        "workday", "peoplesoft", "adp", "talent management",
+        "sourcing", "assessment center", "mobilite interne",
     }
     # Job title keywords

app/api/candidates.py CHANGED Viewed

@@ -30,7 +30,6 @@ def _can_access_profile(profile: Candidate, requesting_user: User) -> bool:
     Rules:
     - Owner always has access.
-    - A recruiter can read candidates they uploaded themselves.
     - An authenticated recruiter can read a candidate-deposited profile that
       has is_visible = True.
     - Everything else is denied (return False → caller raises 404).
@@ -665,7 +664,11 @@ def update_candidate(
     if not db_candidate:
         raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Candidate not found")
-    if db_candidate.user_id != current_user.id and current_user.role != UserRole.admin:
         raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Candidate not found")
     for key, value in candidate.dict(exclude_unset=True).items():
@@ -691,7 +694,11 @@ def delete_candidate(
     if not db_candidate:
         raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Candidate not found")
-    if db_candidate.user_id != current_user.id and current_user.role != UserRole.admin:
         raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Candidate not found")
     db.delete(db_candidate)

     Rules:
     - Owner always has access.
     - An authenticated recruiter can read a candidate-deposited profile that
       has is_visible = True.
     - Everything else is denied (return False → caller raises 404).
     if not db_candidate:
         raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Candidate not found")
+    is_owner = (
+        (db_candidate.user_id is not None and db_candidate.user_id == current_user.id)
+        or (db_candidate.recruiter_id is not None and db_candidate.recruiter_id == current_user.id)
+    )
+    if not is_owner and current_user.role != UserRole.admin:
         raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Candidate not found")
     for key, value in candidate.dict(exclude_unset=True).items():
     if not db_candidate:
         raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Candidate not found")
+    is_owner = (
+        (db_candidate.user_id is not None and db_candidate.user_id == current_user.id)
+        or (db_candidate.recruiter_id is not None and db_candidate.recruiter_id == current_user.id)
+    )
+    if not is_owner and current_user.role != UserRole.admin:
         raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Candidate not found")
     db.delete(db_candidate)

app/api/matching.py CHANGED Viewed

@@ -1053,6 +1053,69 @@ async def generate_and_match(
     }
 @router.post("/{criteria_id:int}/results", response_model=List[CriteriaMatchResultResponse])
 async def launch_matching_for_criteria(
     criteria_id: int,

     }
+class RankAllResult(BaseModel):
+    """Ranked candidate entry for rank-all endpoint."""
+    rank: int
+    candidate_id: int
+    full_name: str
+    email: str
+    score: float
+    coverage: float
+    matched_skills: List[str]
+    missing_skills: List[str]
+    class Config:
+        from_attributes = True
+@router.get("/{criteria_id}/rank-all", response_model=List[RankAllResult])
+def rank_all_candidates(
+    criteria_id: int,
+    current_user: User = Depends(get_current_user),
+    db: Session = Depends(get_db),
+):
+    """Return all recruiter candidates scored and ranked by match score (best first).
+    Only candidates belonging to the current recruiter (recruiter_id or user_id) are
+    included.  The score is computed on-the-fly using CosineScorer so that even
+    candidates without prior MatchResult records are ranked.
+    """
+    from sqlalchemy import or_ as sa_or
+    criteria = db.query(JobCriteria).filter(JobCriteria.id == criteria_id).first()
+    if not criteria:
+        raise HTTPException(status_code=404, detail="Criteria not found")
+    criteria_skills = _load_criteria_skills(criteria_id, db)
+    skill_universe = build_skill_universe(db)
+    candidates = (
+        db.query(Candidate)
+        .filter(
+            sa_or(
+                Candidate.recruiter_id == current_user.id,
+                Candidate.user_id == current_user.id,
+            )
+        )
+        .all()
+    )
+    ranked: List[Dict] = []
+    for cand in candidates:
+        score, details = score_candidate_against_criteria(cand, criteria_skills, skill_universe)
+        ranked.append({
+            "candidate_id": cast(int, cand.id),
+            "full_name": cast(str, cand.full_name or ""),
+            "email": cast(str, cand.email or ""),
+            "score": score,
+            "coverage": float(details.get("coverage", 0)),
+            "matched_skills": list(details.get("matched_skills", [])),
+            "missing_skills": list(details.get("missing_skills", [])),
+        })
+    ranked.sort(key=lambda item: item["score"], reverse=True)
+    return [RankAllResult(rank=idx + 1, **entry) for idx, entry in enumerate(ranked)]
 @router.post("/{criteria_id:int}/results", response_model=List[CriteriaMatchResultResponse])
 async def launch_matching_for_criteria(
     criteria_id: int,

app/services/cv_extractor.py CHANGED Viewed

@@ -356,13 +356,17 @@ class CVExtractionService:
     def _clean_name(self, name: Any) -> Optional[str]:
         value = str(name or "").strip()
         if not value:
             return None
         if "@" in value or "http" in value.lower():
             return None
         if any(ch.isdigit() for ch in value):
             return None
-        words = [w for w in re.split(r"\s+", value) if w]
         if len(words) < 2 or len(words) > 4:
             return None
         return " ".join(word.capitalize() for word in words)
@@ -460,7 +464,10 @@ class CVExtractionService:
     def _normalize_text_for_extraction(self, text: str) -> str:
         """Normalize noisy PDF extraction output to improve entity detection."""
-        normalized = text.replace("\r", "\n")
         normalized = re.sub(r"[ \t]+", " ", normalized)
         normalized = re.sub(r"\n{3,}", "\n\n", normalized)
         return normalized.strip()

     def _clean_name(self, name: Any) -> Optional[str]:
         value = str(name or "").strip()
+        # Strip UTF-8 BOM and zero-width chars that cause capitalize() to lowercase
+        # the real first letter (BOM becomes the first char, everything else lowercased).
+        value = value.lstrip("‌‍⁠").strip()
         if not value:
             return None
         if "@" in value or "http" in value.lower():
             return None
         if any(ch.isdigit() for ch in value):
             return None
+        words = [w.lstrip("‌‍⁠") for w in re.split(r"\s+", value) if w]
+        words = [w for w in words if w]
         if len(words) < 2 or len(words) > 4:
             return None
         return " ".join(word.capitalize() for word in words)
     def _normalize_text_for_extraction(self, text: str) -> str:
         """Normalize noisy PDF extraction output to improve entity detection."""
+        # Remove UTF-8 BOM () and similar zero-width chars that corrupt the first
+        # word of extracted text and cause name capitalize() bugs.
+        normalized = text.lstrip("‌‍")
+        normalized = normalized.replace("\r", "\n")
         normalized = re.sub(r"[ \t]+", " ", normalized)
         normalized = re.sub(r"\n{3,}", "\n\n", normalized)
         return normalized.strip()