File size: 4,752 Bytes
9df97a2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 | """
Matching Module - Similarity scoring and ranking
Étape 7 - Moteur de matching
"""
import numpy as np
from typing import List, Dict, Tuple
class CosineScorer:
"""
Calculate cosine similarity between candidate skills and job criteria
Étape 7 - Algorithme de scoring personnalisable
"""
@staticmethod
def vectorize_skills(skills: List[str], all_skills: List[str]) -> np.ndarray:
"""
Convert list of skills to binary vector
Args:
skills: List of skill names candidate has
all_skills: Complete list of all possible skills (dictionary)
Returns:
Binary numpy array (1 if has skill, 0 if doesn't)
"""
vector = np.zeros(len(all_skills))
skills_lower = {s.lower() for s in skills}
for i, skill in enumerate(all_skills):
if skill.lower() in skills_lower:
vector[i] = 1
return vector
@staticmethod
def vectorize_criteria(criteria_skills: Dict[str, float], all_skills: List[str]) -> np.ndarray:
"""
Convert criteria (skills + weights) to weighted vector
Args:
criteria_skills: Dict of {skill_name: weight (0-100)}
all_skills: Complete list of all possible skills
Returns:
Weighted numpy array (normalized 0-1)
"""
vector = np.zeros(len(all_skills))
for i, skill in enumerate(all_skills):
skill_lower = skill.lower()
for crit_skill, weight in criteria_skills.items():
if crit_skill.lower() == skill_lower:
# Normalize weight to 0-1
vector[i] = weight / 100.0
break
# Normalize the vector
norm = np.linalg.norm(vector)
if norm > 0:
vector = vector / norm
return vector
@staticmethod
def cosine_similarity(vec1: np.ndarray, vec2: np.ndarray) -> float:
"""
Calculate cosine similarity between two vectors
Args:
vec1: Candidate skills vector
vec2: Job criteria vector
Returns:
Score 0-1
"""
dot_product = np.dot(vec1, vec2)
norm1 = np.linalg.norm(vec1)
norm2 = np.linalg.norm(vec2)
if norm1 == 0 or norm2 == 0:
return 0.0
return dot_product / (norm1 * norm2)
@staticmethod
def calculate_match_score(
candidate_skills: List[str],
criteria_skills: Dict[str, float],
all_skills: List[str]
) -> Dict:
"""
Calculate complete match score with breakdown
Args:
candidate_skills: List of candidate's skills
criteria_skills: Dict of {skill: weight (0-100)}
all_skills: Dictionary of all skills
Returns:
{
"score": 0-100,
"similarity": 0-1,
"matched_skills": ["skill1", "skill2", ...],
"missing_skills": ["skill3", ...],
"skill_breakdown": {"skill": score, ...}
}
"""
# Vectorize
candidate_vec = CosineScorer.vectorize_skills(candidate_skills, all_skills)
criteria_vec = CosineScorer.vectorize_criteria(criteria_skills, all_skills)
# Calculate similarity
similarity = CosineScorer.cosine_similarity(candidate_vec, criteria_vec)
score = similarity * 100 # Convert to 0-100
# Find matched and missing skills
candidate_skills_lower = {s.lower() for s in candidate_skills}
matched_skills = []
missing_skills = []
for skill in criteria_skills.keys():
if skill.lower() in candidate_skills_lower:
matched_skills.append(skill)
else:
missing_skills.append(skill)
# Skill-by-skill breakdown
skill_breakdown = {}
for skill, weight in criteria_skills.items():
if skill.lower() in candidate_skills_lower:
skill_breakdown[skill] = weight # Full weight if skill is present
else:
skill_breakdown[skill] = 0 # 0 if missing
return {
"score": min(100.0, max(0.0, score)), # Clamp to 0-100
"similarity": similarity,
"matched_skills": matched_skills,
"missing_skills": missing_skills,
"skill_breakdown": skill_breakdown,
"matching_percentage": len(matched_skills) / len(criteria_skills) * 100 if criteria_skills else 0
}
|