ai-talent-finder-backend / test_cv_benchmark.py
ilyass yani
Deploiement backend dans HF Spaces
9df97a2
Raw
History Blame
12.3 kB
#!/usr/bin/env python3
"""
Multi-CV extraction benchmark.
Runs the CV extraction pipeline against several resume layouts and reports
coverage scores per section so we can track robustness across formats.
"""
from __future__ import annotations
import argparse
import json
import re
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, List, Optional
from app.services.cv_extractor import CVExtractionService
@dataclass
class BenchmarkSample:
name: str
text: str
expected: Dict[str, int]
def _load_reference_text() -> str:
"""Best-effort loader for the existing test CV fixture."""
fixture_path = Path(__file__).with_name("test_cv.txt")
if not fixture_path.exists():
return ""
raw_bytes = fixture_path.read_bytes()
for encoding in ("utf-16", "utf-16-le", "utf-8", "latin-1"):
try:
text = raw_bytes.decode(encoding)
cleaned = text.strip()
alpha_count = sum(1 for char in cleaned if char.isalpha())
if len(cleaned) >= 20 and alpha_count >= 10 and "\x00" not in cleaned:
return text
except Exception:
continue
return ""
def _build_samples() -> List[BenchmarkSample]:
return [
BenchmarkSample(
name="Structured English CV",
text="""
JOHN SMITH
john.smith@example.com | +33 6 12 34 56 78 | linkedin.com/in/johnsmith
PROFESSIONAL SUMMARY
Senior Full Stack Developer with 8 years of experience in web development.
EXPERIENCE
Senior Developer - Tech Company Inc (2020-2024)
- Led team of 5 developers
- Built microservices using Python and FastAPI
- Managed PostgreSQL databases
Junior Developer - Startup LLC (2016-2020)
- Developed React frontend applications
- Worked with Node.js backend
EDUCATION
Bachelor of Science in Computer Science
University of Technology (2016)
SKILLS
Python, JavaScript, TypeScript, SQL, HTML/CSS, FastAPI, React, Docker
""".strip(),
expected={"identity": 3, "experience": 2, "education": 2, "skills": 6, "enrichment": 1},
),
BenchmarkSample(
name="French CV with links",
text="""
MARIE DUPONT
marie.dupont@gmail.com | 06 12 34 56 78 | Paris
linkedin.com/in/mariedupont | github.com/mdupont | mariedupont.dev
PROFIL
Chef de projet digital orientée produit et expérience client.
EXPÉRIENCES PROFESSIONNELLES
Responsable Marketing Digital - Entreprise X (2021 - Présent)
- Pilotage de campagnes multi-canaux
- Analyse des performances et reporting
Chef de projet CRM - Société Y (2018 - 2021)
- Mise en place d'automatisations marketing
- Coordination avec les équipes produit et design
FORMATION
Master Marketing Digital - Université de Lyon (2018)
COMPÉTENCES
Communication, Organisation, Gestion de projet, Leadership, Sens du contact
CERTIFICATIONS
Google Analytics Individual Qualification
HubSpot Inbound Marketing
PROJETS
Refonte du parcours d'onboarding client
""".strip(),
expected={"identity": 3, "experience": 2, "education": 1, "skills": 4, "enrichment": 4},
),
BenchmarkSample(
name="OCR noisy CV",
text="""
ALEXANDRE MARTIN
alex.martin@example.com
EXPERIENCE PROFESSIONNELLE
2022 - PRESENT | DATA ENGINEER | BLUE ANALYTICS
Built ETL pipelines on Airflow and Spark
Implemented data quality checks and dashboards
2020 - 2022 - BI ANALYST - RETAIL GROUP
Automated SQL reporting and Power BI models
FORMATION
2019 - Master Data Science - Paris School of AI
LANGUES
French English
COMPETENCES
Python, SQL, Airflow, Spark, Power BI, Communication
PROJECTS
Customer churn prediction using Python and scikit-learn
""".strip(),
expected={"identity": 2, "experience": 2, "education": 1, "skills": 4, "enrichment": 2},
),
BenchmarkSample(
name="Minimal fallback CV",
text="""
NOAH LEROY
noah.leroy@outlook.com
Some short CV text with little structure.
Python, SQL, Docker.
""".strip(),
expected={"identity": 2, "experience": 0, "education": 0, "skills": 2, "enrichment": 0},
),
BenchmarkSample(
name="Ultra short CV",
text="""
NADIA BENALI
nadia.benali@example.com
+33 6 98 76 54 32
Paris, France
Python | SQL | Data analysis
""".strip(),
expected={"identity": 3, "experience": 0, "education": 0, "skills": 2, "enrichment": 1},
),
]
def _safe_len(value: Any) -> int:
if isinstance(value, list):
return len(value)
if value:
return 1
return 0
def _score_section(found: int, expected: int) -> float:
if expected <= 0:
return 100.0 if found == 0 else min(found * 25.0, 100.0)
return min(found / expected, 1.0) * 100.0
def _build_section_scores(structured: Dict[str, Any]) -> Dict[str, float]:
identity_found = sum(
[
1 if structured.get("full_name") else 0,
1 if structured.get("email") else 0,
1 if structured.get("phone") else 0,
]
)
experience_found = _safe_len(structured.get("experiences"))
education_found = _safe_len(structured.get("education"))
skills_found = _safe_len(structured.get("skills"))
enrichment_found = sum(
[
_safe_len(structured.get("linkedin_urls")),
_safe_len(structured.get("github_urls")),
_safe_len(structured.get("portfolio_urls")),
_safe_len(structured.get("certifications")),
_safe_len(structured.get("projects")),
_safe_len(structured.get("languages")),
_safe_len(structured.get("soft_skills")),
_safe_len(structured.get("interests")),
]
)
return {
"identity": float(identity_found),
"experience": float(experience_found),
"education": float(education_found),
"skills": float(skills_found),
"enrichment": float(enrichment_found),
}
def _overall_score(section_hits: Dict[str, float], expected: Dict[str, int]) -> float:
weights = {
"identity": 0.25,
"experience": 0.30,
"education": 0.15,
"skills": 0.15,
"enrichment": 0.15,
}
total = 0.0
for section, weight in weights.items():
total += weight * _score_section(int(section_hits[section]), expected.get(section, 0))
return round(total, 1)
def _diagnose_missing_fields(sample: BenchmarkSample, structured: Dict[str, Any]) -> List[str]:
"""Return human-readable reasons for missing fields in a given sample."""
reasons: List[str] = []
lines = [line.strip() for line in sample.text.splitlines() if line.strip()]
normalized_text = sample.text.lower()
if not structured.get("full_name"):
top_lines = lines[:5]
has_name_like_line = any(
2 <= len(re.findall(r"[A-Za-zÀ-ÿ'-]+", line)) <= 4 and not re.search(r"[@\d]", line)
for line in top_lines
)
if has_name_like_line:
reasons.append("Nom probable présent en haut du CV mais rejeté par les filtres de nom.")
elif structured.get("email"):
reasons.append("Nom absent mais un email est disponible: vérifier l'inférence depuis l'email.")
else:
reasons.append("Aucune ligne de nom claire détectée dans les premières lignes.")
if not structured.get("phone"):
has_phone_like_text = bool(
re.search(r"\+?\d[\d\s().-]{7,}\d", sample.text)
)
if has_phone_like_text:
reasons.append("Un numéro semble présent mais n'a pas passé la normalisation téléphone.")
else:
reasons.append("Aucun motif téléphone suffisamment clair détecté.")
if not structured.get("experiences"):
if any(keyword in normalized_text for keyword in ("experience", "experiences", "professionnelle", "work experience", "stage")):
reasons.append("Section expérience détectée mais aucun bloc stable titre/entreprise/période n'a pu être construit.")
else:
reasons.append("Aucune section expérience ou ancre de période détectée.")
if not structured.get("education") and any(keyword in normalized_text for keyword in ("formation", "education", "study", "universit", "school")):
reasons.append("Section formation présente mais les lignes ne ressemblaient pas assez à de l'éducation.")
if not structured.get("skills"):
if any(token in normalized_text for token in ("python", "sql", "java", "react", "docker", "airflow", "spark")):
reasons.append("Des mots-clés techniques existent mais la normalisation a raté l'extraction de compétences.")
elif any(keyword in normalized_text for keyword in ("communication", "organisation", "leadership", "rigueur", "autonomie", "gestion de projet", "sens du contact")):
reasons.append("Le CV contient surtout des compétences génériques/soft skills; vérifier si elles doivent être reportées dans skills ou seulement dans soft_skills.")
else:
reasons.append("Aucune compétence technique évidente détectée.")
if not (
structured.get("linkedin_urls")
or structured.get("github_urls")
or structured.get("portfolio_urls")
or structured.get("projects")
or structured.get("certifications")
):
reasons.append("Aucun signal d'enrichissement (liens/projets/certifications) détecté.")
return reasons
def run_benchmark(diagnostic: bool = False) -> int:
service = CVExtractionService()
samples = _build_samples()
print("=" * 78)
print("Multi-CV Extraction Benchmark")
print("=" * 78)
aggregate: List[float] = []
for index, sample in enumerate(samples, start=1):
result = service.extract_from_text(sample.text)
structured = result.structured
section_hits = _build_section_scores(structured)
overall = _overall_score(section_hits, sample.expected)
aggregate.append(overall)
print(f"\n[{index}] {sample.name}")
print(f" Overall: {overall:.1f}/100")
print(
" Sections: "
f"identity={_score_section(int(section_hits['identity']), sample.expected['identity']):.1f}, "
f"experience={_score_section(int(section_hits['experience']), sample.expected['experience']):.1f}, "
f"education={_score_section(int(section_hits['education']), sample.expected['education']):.1f}, "
f"skills={_score_section(int(section_hits['skills']), sample.expected['skills']):.1f}, "
f"enrichment={_score_section(int(section_hits['enrichment']), sample.expected['enrichment']):.1f}"
)
print(
" Extracted: "
f"name={bool(structured.get('full_name'))}, "
f"email={bool(structured.get('email'))}, "
f"phone={bool(structured.get('phone'))}, "
f"experiences={len(structured.get('experiences', []))}, "
f"education={len(structured.get('education', []))}, "
f"skills={len(structured.get('skills', []))}, "
f"links={len(structured.get('linkedin_urls', [])) + len(structured.get('github_urls', [])) + len(structured.get('portfolio_urls', []))}, "
f"projects={len(structured.get('projects', []))}, "
f"certifications={len(structured.get('certifications', []))}"
)
if diagnostic:
reasons = _diagnose_missing_fields(sample, structured)
if reasons:
print(" Diagnostics:")
for reason in reasons:
print(f" - {reason}")
average_score = round(sum(aggregate) / len(aggregate), 1) if aggregate else 0.0
print("\n" + "=" * 78)
print(f"Average overall score: {average_score:.1f}/100")
print("=" * 78)
return 0 if average_score >= 70.0 else 1
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run the CV extraction benchmark")
parser.add_argument(
"--diagnostic",
action="store_true",
help="Print human-readable reasons for missing fields",
)
args = parser.parse_args()
raise SystemExit(run_benchmark(diagnostic=args.diagnostic))