Spaces:

RHmaster
/

ai-talent-finder-backend

Sleeping

ai-talent-finder-backend / test_cv_benchmark.py

ilyass yani

Deploiement backend dans HF Spaces

9df97a2 11 days ago

12.3 kB

	#!/usr/bin/env python3
	"""
	Multi-CV extraction benchmark.

	Runs the CV extraction pipeline against several resume layouts and reports
	coverage scores per section so we can track robustness across formats.
	"""

	from __future__ import annotations

	import argparse
	import json
	import re
	from dataclasses import dataclass
	from pathlib import Path
	from typing import Any, Dict, List, Optional

	from app.services.cv_extractor import CVExtractionService


	@dataclass
	class BenchmarkSample:
	name: str
	text: str
	expected: Dict[str, int]


	def _load_reference_text() -> str:
	"""Best-effort loader for the existing test CV fixture."""
	fixture_path = Path(__file__).with_name("test_cv.txt")
	if not fixture_path.exists():
	return ""

	raw_bytes = fixture_path.read_bytes()
	for encoding in ("utf-16", "utf-16-le", "utf-8", "latin-1"):
	try:
	text = raw_bytes.decode(encoding)
	cleaned = text.strip()
	alpha_count = sum(1 for char in cleaned if char.isalpha())
	if len(cleaned) >= 20 and alpha_count >= 10 and "\x00" not in cleaned:
	return text
	except Exception:
	continue

	return ""


	def _build_samples() -> List[BenchmarkSample]:
	return [
	BenchmarkSample(
	name="Structured English CV",
	text="""
	JOHN SMITH
	john.smith@example.com \| +33 6 12 34 56 78 \| linkedin.com/in/johnsmith

	PROFESSIONAL SUMMARY
	Senior Full Stack Developer with 8 years of experience in web development.

	EXPERIENCE
	Senior Developer - Tech Company Inc (2020-2024)
	- Led team of 5 developers
	- Built microservices using Python and FastAPI
	- Managed PostgreSQL databases

	Junior Developer - Startup LLC (2016-2020)
	- Developed React frontend applications
	- Worked with Node.js backend

	EDUCATION
	Bachelor of Science in Computer Science
	University of Technology (2016)

	SKILLS
	Python, JavaScript, TypeScript, SQL, HTML/CSS, FastAPI, React, Docker
	""".strip(),
	expected={"identity": 3, "experience": 2, "education": 2, "skills": 6, "enrichment": 1},
	),
	BenchmarkSample(
	name="French CV with links",
	text="""
	MARIE DUPONT
	marie.dupont@gmail.com \| 06 12 34 56 78 \| Paris
	linkedin.com/in/mariedupont \| github.com/mdupont \| mariedupont.dev

	PROFIL
	Chef de projet digital orientée produit et expérience client.

	EXPÉRIENCES PROFESSIONNELLES
	Responsable Marketing Digital - Entreprise X (2021 - Présent)
	- Pilotage de campagnes multi-canaux
	- Analyse des performances et reporting

	Chef de projet CRM - Société Y (2018 - 2021)
	- Mise en place d'automatisations marketing
	- Coordination avec les équipes produit et design

	FORMATION
	Master Marketing Digital - Université de Lyon (2018)

	COMPÉTENCES
	Communication, Organisation, Gestion de projet, Leadership, Sens du contact

	CERTIFICATIONS
	Google Analytics Individual Qualification
	HubSpot Inbound Marketing

	PROJETS
	Refonte du parcours d'onboarding client
	""".strip(),
	expected={"identity": 3, "experience": 2, "education": 1, "skills": 4, "enrichment": 4},
	),
	BenchmarkSample(
	name="OCR noisy CV",
	text="""
	ALEXANDRE MARTIN
	alex.martin@example.com

	EXPERIENCE PROFESSIONNELLE
	2022 - PRESENT \| DATA ENGINEER \| BLUE ANALYTICS
	Built ETL pipelines on Airflow and Spark
	Implemented data quality checks and dashboards

	2020 - 2022 - BI ANALYST - RETAIL GROUP
	Automated SQL reporting and Power BI models

	FORMATION
	2019 - Master Data Science - Paris School of AI

	LANGUES
	French English

	COMPETENCES
	Python, SQL, Airflow, Spark, Power BI, Communication

	PROJECTS
	Customer churn prediction using Python and scikit-learn
	""".strip(),
	expected={"identity": 2, "experience": 2, "education": 1, "skills": 4, "enrichment": 2},
	),
	BenchmarkSample(
	name="Minimal fallback CV",
	text="""
	NOAH LEROY
	noah.leroy@outlook.com

	Some short CV text with little structure.
	Python, SQL, Docker.
	""".strip(),
	expected={"identity": 2, "experience": 0, "education": 0, "skills": 2, "enrichment": 0},
	),
	BenchmarkSample(
	name="Ultra short CV",
	text="""
	NADIA BENALI
	nadia.benali@example.com
	+33 6 98 76 54 32
	Paris, France

	Python \| SQL \| Data analysis
	""".strip(),
	expected={"identity": 3, "experience": 0, "education": 0, "skills": 2, "enrichment": 1},
	),
	]


	def _safe_len(value: Any) -> int:
	if isinstance(value, list):
	return len(value)
	if value:
	return 1
	return 0


	def _score_section(found: int, expected: int) -> float:
	if expected <= 0:
	return 100.0 if found == 0 else min(found * 25.0, 100.0)
	return min(found / expected, 1.0) * 100.0


	def _build_section_scores(structured: Dict[str, Any]) -> Dict[str, float]:
	identity_found = sum(
	[
	1 if structured.get("full_name") else 0,
	1 if structured.get("email") else 0,
	1 if structured.get("phone") else 0,
	]
	)
	experience_found = _safe_len(structured.get("experiences"))
	education_found = _safe_len(structured.get("education"))
	skills_found = _safe_len(structured.get("skills"))
	enrichment_found = sum(
	[
	_safe_len(structured.get("linkedin_urls")),
	_safe_len(structured.get("github_urls")),
	_safe_len(structured.get("portfolio_urls")),
	_safe_len(structured.get("certifications")),
	_safe_len(structured.get("projects")),
	_safe_len(structured.get("languages")),
	_safe_len(structured.get("soft_skills")),
	_safe_len(structured.get("interests")),
	]
	)

	return {
	"identity": float(identity_found),
	"experience": float(experience_found),
	"education": float(education_found),
	"skills": float(skills_found),
	"enrichment": float(enrichment_found),
	}


	def _overall_score(section_hits: Dict[str, float], expected: Dict[str, int]) -> float:
	weights = {
	"identity": 0.25,
	"experience": 0.30,
	"education": 0.15,
	"skills": 0.15,
	"enrichment": 0.15,
	}

	total = 0.0
	for section, weight in weights.items():
	total += weight * _score_section(int(section_hits[section]), expected.get(section, 0))

	return round(total, 1)


	def _diagnose_missing_fields(sample: BenchmarkSample, structured: Dict[str, Any]) -> List[str]:
	"""Return human-readable reasons for missing fields in a given sample."""
	reasons: List[str] = []
	lines = [line.strip() for line in sample.text.splitlines() if line.strip()]
	normalized_text = sample.text.lower()

	if not structured.get("full_name"):
	top_lines = lines[:5]
	has_name_like_line = any(
	2 <= len(re.findall(r"[A-Za-zÀ-ÿ'-]+", line)) <= 4 and not re.search(r"[@\d]", line)
	for line in top_lines
	)
	if has_name_like_line:
	reasons.append("Nom probable présent en haut du CV mais rejeté par les filtres de nom.")
	elif structured.get("email"):
	reasons.append("Nom absent mais un email est disponible: vérifier l'inférence depuis l'email.")
	else:
	reasons.append("Aucune ligne de nom claire détectée dans les premières lignes.")

	if not structured.get("phone"):
	has_phone_like_text = bool(
	re.search(r"\+?\d[\d\s().-]{7,}\d", sample.text)
	)
	if has_phone_like_text:
	reasons.append("Un numéro semble présent mais n'a pas passé la normalisation téléphone.")
	else:
	reasons.append("Aucun motif téléphone suffisamment clair détecté.")

	if not structured.get("experiences"):
	if any(keyword in normalized_text for keyword in ("experience", "experiences", "professionnelle", "work experience", "stage")):
	reasons.append("Section expérience détectée mais aucun bloc stable titre/entreprise/période n'a pu être construit.")
	else:
	reasons.append("Aucune section expérience ou ancre de période détectée.")

	if not structured.get("education") and any(keyword in normalized_text for keyword in ("formation", "education", "study", "universit", "school")):
	reasons.append("Section formation présente mais les lignes ne ressemblaient pas assez à de l'éducation.")

	if not structured.get("skills"):
	if any(token in normalized_text for token in ("python", "sql", "java", "react", "docker", "airflow", "spark")):
	reasons.append("Des mots-clés techniques existent mais la normalisation a raté l'extraction de compétences.")
	elif any(keyword in normalized_text for keyword in ("communication", "organisation", "leadership", "rigueur", "autonomie", "gestion de projet", "sens du contact")):
	reasons.append("Le CV contient surtout des compétences génériques/soft skills; vérifier si elles doivent être reportées dans skills ou seulement dans soft_skills.")
	else:
	reasons.append("Aucune compétence technique évidente détectée.")

	if not (
	structured.get("linkedin_urls")
	or structured.get("github_urls")
	or structured.get("portfolio_urls")
	or structured.get("projects")
	or structured.get("certifications")
	):
	reasons.append("Aucun signal d'enrichissement (liens/projets/certifications) détecté.")

	return reasons


	def run_benchmark(diagnostic: bool = False) -> int:
	service = CVExtractionService()
	samples = _build_samples()

	print("=" * 78)
	print("Multi-CV Extraction Benchmark")
	print("=" * 78)

	aggregate: List[float] = []

	for index, sample in enumerate(samples, start=1):
	result = service.extract_from_text(sample.text)
	structured = result.structured
	section_hits = _build_section_scores(structured)
	overall = _overall_score(section_hits, sample.expected)
	aggregate.append(overall)

	print(f"\n[{index}] {sample.name}")
	print(f" Overall: {overall:.1f}/100")
	print(
	" Sections: "
	f"identity={_score_section(int(section_hits['identity']), sample.expected['identity']):.1f}, "
	f"experience={_score_section(int(section_hits['experience']), sample.expected['experience']):.1f}, "
	f"education={_score_section(int(section_hits['education']), sample.expected['education']):.1f}, "
	f"skills={_score_section(int(section_hits['skills']), sample.expected['skills']):.1f}, "
	f"enrichment={_score_section(int(section_hits['enrichment']), sample.expected['enrichment']):.1f}"
	)
	print(
	" Extracted: "
	f"name={bool(structured.get('full_name'))}, "
	f"email={bool(structured.get('email'))}, "
	f"phone={bool(structured.get('phone'))}, "
	f"experiences={len(structured.get('experiences', []))}, "
	f"education={len(structured.get('education', []))}, "
	f"skills={len(structured.get('skills', []))}, "
	f"links={len(structured.get('linkedin_urls', [])) + len(structured.get('github_urls', [])) + len(structured.get('portfolio_urls', []))}, "
	f"projects={len(structured.get('projects', []))}, "
	f"certifications={len(structured.get('certifications', []))}"
	)

	if diagnostic:
	reasons = _diagnose_missing_fields(sample, structured)
	if reasons:
	print(" Diagnostics:")
	for reason in reasons:
	print(f" - {reason}")

	average_score = round(sum(aggregate) / len(aggregate), 1) if aggregate else 0.0
	print("\n" + "=" * 78)
	print(f"Average overall score: {average_score:.1f}/100")
	print("=" * 78)

	return 0 if average_score >= 70.0 else 1


	if __name__ == "__main__":
	parser = argparse.ArgumentParser(description="Run the CV extraction benchmark")
	parser.add_argument(
	"--diagnostic",
	action="store_true",
	help="Print human-readable reasons for missing fields",
	)
	args = parser.parse_args()
	raise SystemExit(run_benchmark(diagnostic=args.diagnostic))