ai-talent-finder-backend / app /services /normalization.py
ilyass yani
Deploiement backend dans HF Spaces
9df97a2
Raw
History Blame Contribute Delete
1.85 kB
"""Text and skill normalization helpers used across extraction, matching and training."""
from __future__ import annotations
import re
_WHITESPACE_RE = re.compile(r"\s+")
_PUNCT_RE = re.compile(r"[\t\r\n]+")
_SKILL_ALIASES = {
"ml": "Machine Learning",
"machine learning": "Machine Learning",
"ai": "Artificial Intelligence",
"artificial intelligence": "Artificial Intelligence",
"nlp": "Natural Language Processing",
"natural language processing": "Natural Language Processing",
"js": "JavaScript",
"javascript": "JavaScript",
"ts": "TypeScript",
"typescript": "TypeScript",
"sql": "SQL",
"python": "Python",
"fastapi": "FastAPI",
"docker": "Docker",
"aws": "AWS",
"devops": "DevOps",
"react": "React",
"node js": "Node.js",
"nodejs": "Node.js",
"pandas": "Pandas",
"scikit learn": "Scikit-Learn",
}
def normalize_text(value: str | None) -> str:
if not value:
return ""
text = str(value).replace("\u00a0", " ")
text = _PUNCT_RE.sub(" ", text)
text = _WHITESPACE_RE.sub(" ", text).strip()
return text
def normalize_skill_name(value: str | None) -> str:
text = normalize_text(value)
if not text:
return ""
text = text.replace("/", " ")
text = _WHITESPACE_RE.sub(" ", text)
normalized = text.strip().lower()
return _SKILL_ALIASES.get(normalized, text.title())
def normalize_company_name(value: str | None) -> str:
text = normalize_text(value)
return text.strip(" ,.-")
def normalize_job_title(value: str | None) -> str:
text = normalize_text(value)
return text[:120]
def compact_join(values: list[str] | tuple[str, ...] | None) -> str:
if not values:
return ""
return normalize_text(" ".join(normalize_text(item) for item in values if normalize_text(item)))