| """Unit tests for data normalization, scoring and deduplication.""" |
|
|
| import pytest |
| from app.services.data_normalization import ( |
| normalize_skill, |
| normalize_skills_list, |
| parse_experience_years, |
| clean_candidate, |
| ) |
| from app.services.scoring import ( |
| compute_match_score, |
| decide_match, |
| MatchDecision, |
| apply_business_rules, |
| ) |
| from app.services.deduplication import ( |
| compute_fingerprint, |
| deduplicate_candidates, |
| ) |
| from app.services.synthetic_data import ( |
| generate_synthetic_candidate, |
| generate_synthetic_job, |
| generate_synthetic_dataset, |
| ) |
|
|
|
|
| class TestDataNormalization: |
| """Test data normalization functions.""" |
|
|
| def test_normalize_skill_basic(self): |
| assert normalize_skill("Python") == "Python" |
| assert normalize_skill("python") == "Python" |
|
|
| def test_normalize_skill_mapping(self): |
| assert normalize_skill("ml") == "Machine Learning" |
| assert normalize_skill("ML") == "Machine Learning" |
| assert normalize_skill("js") == "JavaScript" |
| assert normalize_skill("nodejs") == "Node.js" |
|
|
| def test_normalize_skill_extra_mappings(self): |
| assert normalize_skill("fast api") == "FastAPI" |
| assert normalize_skill("postgres") == "PostgreSQL" |
| assert normalize_skill("csharp") == "C#" |
|
|
| def test_normalize_skills_list(self): |
| skills = ["React.js", "python", "ml", "AWS"] |
| normalized = normalize_skills_list(skills) |
| assert "React" in normalized |
| assert "Python" in normalized |
| assert "Machine Learning" in normalized |
| assert "AWS" in normalized |
|
|
| def test_parse_experience_years(self): |
| assert parse_experience_years("5+ years") == 5 |
| assert parse_experience_years("5+ years of experience") == 5 |
| assert parse_experience_years("10 ans") == 10 |
| assert parse_experience_years("") == 0 |
|
|
| def test_clean_candidate(self): |
| cand = { |
| "skills": "React, python, ml, AWS", |
| "experience": "5+ years in web dev", |
| "education": "Bachelor", |
| } |
| cleaned = clean_candidate(cand) |
| assert "normalized_skills" in cleaned |
| assert len(cleaned["normalized_skills"]) == 4 |
| assert cleaned["experience_years"] == 5 |
|
|
|
|
| class TestScoring: |
| """Test scoring logic.""" |
|
|
| def test_compute_match_score_perfect_match(self): |
| score = compute_match_score( |
| cv_skills=["React", "Python", "AWS"], |
| job_skills=["React", "Python", "AWS"], |
| cv_years=5, |
| job_years=3, |
| ) |
| |
| assert score >= 0.80 |
|
|
| def test_compute_match_score_no_skills(self): |
| score = compute_match_score( |
| cv_skills=[], |
| job_skills=["React"], |
| cv_years=5, |
| ) |
| assert score < 0.5 |
|
|
| def test_decide_match_accepted(self): |
| decision = decide_match(0.85) |
| assert decision == MatchDecision.ACCEPTED |
|
|
| def test_decide_match_review(self): |
| decision = decide_match(0.65) |
| assert decision == MatchDecision.REVIEW |
|
|
| def test_decide_match_rejected(self): |
| decision = decide_match(0.40) |
| assert decision == MatchDecision.REJECTED |
|
|
| def test_apply_business_rules(self): |
| result = apply_business_rules({ |
| "score": 0.75, |
| "cv_skills": ["React", "Python"], |
| "job_skills": ["React", "Node"], |
| "cv_years": 5, |
| "job_years": 3, |
| }) |
| assert result["decision"] == "to_review" |
| assert result["score"] == 0.75 |
| |
| assert any("node" in str(skill).lower() for skill in result["missing_skills"]) |
|
|
|
|
| class TestDeduplication: |
| """Test deduplication logic.""" |
|
|
| def test_compute_fingerprint(self): |
| cand = { |
| "email": "test@example.com", |
| "phone": "123456", |
| "full_name": "John Doe", |
| "normalized_skills": ["Python", "React"], |
| } |
| fp = compute_fingerprint(cand) |
| assert isinstance(fp, str) |
| assert len(fp) == 32 |
|
|
| def test_deduplicate_candidates(self): |
| cand1 = { |
| "email": "test@example.com", |
| "phone": "123456", |
| "full_name": "John", |
| "normalized_skills": ["Python"], |
| } |
| cand2 = { |
| "email": "test@example.com", |
| "phone": "123456", |
| "full_name": "John", |
| "normalized_skills": ["Python"], |
| } |
| cand3 = { |
| "email": "other@example.com", |
| "phone": "999999", |
| "full_name": "Jane", |
| "normalized_skills": ["React"], |
| } |
| candidates = [cand1, cand2, cand3] |
| deduped = deduplicate_candidates(candidates) |
| assert len(deduped) == 2 |
|
|
|
|
| class TestSyntheticData: |
| """Test synthetic data generation.""" |
|
|
| def test_generate_synthetic_candidate(self): |
| cand = generate_synthetic_candidate(user_id=100) |
| assert cand["id"] == 100 |
| assert cand["full_name"] == "Candidate 100" |
| assert len(cand["normalized_skills"]) > 0 |
| assert cand["experience_years"] >= 0 |
| assert len(cand["languages"]) > 0 |
|
|
| def test_generate_synthetic_job(self): |
| job = generate_synthetic_job(job_id=50) |
| assert job["id"] == 50 |
| assert len(job["required_skills"]) > 0 |
| assert job["required_years"] >= 0 |
|
|
| def test_generate_synthetic_dataset(self): |
| dataset = generate_synthetic_dataset(n_candidates=3, n_jobs=2, seed=42) |
| assert len(dataset["candidates"]) == 3 |
| assert len(dataset["jobs"]) == 2 |
|
|
|
|
| if __name__ == "__main__": |
| pytest.main([__file__, "-v"]) |
|
|