Spaces:

RHmaster
/

ai-talent-finder-backend

Running

ai-talent-finder-backend / tests /test_data_scoring_services.py

ilyass yani

Deploiement backend dans HF Spaces

9df97a2 10 days ago

5.8 kB

	"""Unit tests for data normalization, scoring and deduplication."""

	import pytest
	from app.services.data_normalization import (
	normalize_skill,
	normalize_skills_list,
	parse_experience_years,
	clean_candidate,
	)
	from app.services.scoring import (
	compute_match_score,
	decide_match,
	MatchDecision,
	apply_business_rules,
	)
	from app.services.deduplication import (
	compute_fingerprint,
	deduplicate_candidates,
	)
	from app.services.synthetic_data import (
	generate_synthetic_candidate,
	generate_synthetic_job,
	generate_synthetic_dataset,
	)


	class TestDataNormalization:
	"""Test data normalization functions."""

	def test_normalize_skill_basic(self):
	assert normalize_skill("Python") == "Python"
	assert normalize_skill("python") == "Python"

	def test_normalize_skill_mapping(self):
	assert normalize_skill("ml") == "Machine Learning"
	assert normalize_skill("ML") == "Machine Learning"
	assert normalize_skill("js") == "JavaScript"
	assert normalize_skill("nodejs") == "Node.js"

	def test_normalize_skill_extra_mappings(self):
	assert normalize_skill("fast api") == "FastAPI"
	assert normalize_skill("postgres") == "PostgreSQL"
	assert normalize_skill("csharp") == "C#"

	def test_normalize_skills_list(self):
	skills = ["React.js", "python", "ml", "AWS"]
	normalized = normalize_skills_list(skills)
	assert "React" in normalized
	assert "Python" in normalized
	assert "Machine Learning" in normalized
	assert "AWS" in normalized

	def test_parse_experience_years(self):
	assert parse_experience_years("5+ years") == 5
	assert parse_experience_years("5+ years of experience") == 5
	assert parse_experience_years("10 ans") == 10
	assert parse_experience_years("") == 0

	def test_clean_candidate(self):
	cand = {
	"skills": "React, python, ml, AWS",
	"experience": "5+ years in web dev",
	"education": "Bachelor",
	}
	cleaned = clean_candidate(cand)
	assert "normalized_skills" in cleaned
	assert len(cleaned["normalized_skills"]) == 4
	assert cleaned["experience_years"] == 5


	class TestScoring:
	"""Test scoring logic."""

	def test_compute_match_score_perfect_match(self):
	score = compute_match_score(
	cv_skills=["React", "Python", "AWS"],
	job_skills=["React", "Python", "AWS"],
	cv_years=5,
	job_years=3,
	)
	# Score should be high for perfect skill match + enough experience
	assert score >= 0.80

	def test_compute_match_score_no_skills(self):
	score = compute_match_score(
	cv_skills=[],
	job_skills=["React"],
	cv_years=5,
	)
	assert score < 0.5

	def test_decide_match_accepted(self):
	decision = decide_match(0.85)
	assert decision == MatchDecision.ACCEPTED

	def test_decide_match_review(self):
	decision = decide_match(0.65)
	assert decision == MatchDecision.REVIEW

	def test_decide_match_rejected(self):
	decision = decide_match(0.40)
	assert decision == MatchDecision.REJECTED

	def test_apply_business_rules(self):
	result = apply_business_rules({
	"score": 0.75,
	"cv_skills": ["React", "Python"],
	"job_skills": ["React", "Node"],
	"cv_years": 5,
	"job_years": 3,
	})
	assert result["decision"] == "to_review"
	assert result["score"] == 0.75
	# missing_skills are lowercased during comparison
	assert any("node" in str(skill).lower() for skill in result["missing_skills"])


	class TestDeduplication:
	"""Test deduplication logic."""

	def test_compute_fingerprint(self):
	cand = {
	"email": "test@example.com",
	"phone": "123456",
	"full_name": "John Doe",
	"normalized_skills": ["Python", "React"],
	}
	fp = compute_fingerprint(cand)
	assert isinstance(fp, str)
	assert len(fp) == 32 # MD5 hash

	def test_deduplicate_candidates(self):
	cand1 = {
	"email": "test@example.com",
	"phone": "123456",
	"full_name": "John",
	"normalized_skills": ["Python"],
	}
	cand2 = {
	"email": "test@example.com",
	"phone": "123456",
	"full_name": "John",
	"normalized_skills": ["Python"],
	}
	cand3 = {
	"email": "other@example.com",
	"phone": "999999",
	"full_name": "Jane",
	"normalized_skills": ["React"],
	}
	candidates = [cand1, cand2, cand3]
	deduped = deduplicate_candidates(candidates)
	assert len(deduped) == 2 # cand2 is duplicate of cand1


	class TestSyntheticData:
	"""Test synthetic data generation."""

	def test_generate_synthetic_candidate(self):
	cand = generate_synthetic_candidate(user_id=100)
	assert cand["id"] == 100
	assert cand["full_name"] == "Candidate 100"
	assert len(cand["normalized_skills"]) > 0
	assert cand["experience_years"] >= 0
	assert len(cand["languages"]) > 0

	def test_generate_synthetic_job(self):
	job = generate_synthetic_job(job_id=50)
	assert job["id"] == 50
	assert len(job["required_skills"]) > 0
	assert job["required_years"] >= 0

	def test_generate_synthetic_dataset(self):
	dataset = generate_synthetic_dataset(n_candidates=3, n_jobs=2, seed=42)
	assert len(dataset["candidates"]) == 3
	assert len(dataset["jobs"]) == 2


	if __name__ == "__main__":
	pytest.main([__file__, "-v"])