Spaces:

RHmaster
/

ai-talent-finder-backend

Running

ai-talent-finder-backend / app /services /normalization.py

ilyass yani

Deploiement backend dans HF Spaces

9df97a2 9 days ago

1.85 kB

	"""Text and skill normalization helpers used across extraction, matching and training."""

	from __future__ import annotations

	import re


	_WHITESPACE_RE = re.compile(r"\s+")
	_PUNCT_RE = re.compile(r"[\t\r\n]+")
	_SKILL_ALIASES = {
	"ml": "Machine Learning",
	"machine learning": "Machine Learning",
	"ai": "Artificial Intelligence",
	"artificial intelligence": "Artificial Intelligence",
	"nlp": "Natural Language Processing",
	"natural language processing": "Natural Language Processing",
	"js": "JavaScript",
	"javascript": "JavaScript",
	"ts": "TypeScript",
	"typescript": "TypeScript",
	"sql": "SQL",
	"python": "Python",
	"fastapi": "FastAPI",
	"docker": "Docker",
	"aws": "AWS",
	"devops": "DevOps",
	"react": "React",
	"node js": "Node.js",
	"nodejs": "Node.js",
	"pandas": "Pandas",
	"scikit learn": "Scikit-Learn",
	}


	def normalize_text(value: str \| None) -> str:
	if not value:
	return ""
	text = str(value).replace("\u00a0", " ")
	text = _PUNCT_RE.sub(" ", text)
	text = _WHITESPACE_RE.sub(" ", text).strip()
	return text


	def normalize_skill_name(value: str \| None) -> str:
	text = normalize_text(value)
	if not text:
	return ""

	text = text.replace("/", " ")
	text = _WHITESPACE_RE.sub(" ", text)
	normalized = text.strip().lower()
	return _SKILL_ALIASES.get(normalized, text.title())


	def normalize_company_name(value: str \| None) -> str:
	text = normalize_text(value)
	return text.strip(" ,.-")


	def normalize_job_title(value: str \| None) -> str:
	text = normalize_text(value)
	return text[:120]


	def compact_join(values: list[str] \| tuple[str, ...] \| None) -> str:
	if not values:
	return ""
	return normalize_text(" ".join(normalize_text(item) for item in values if normalize_text(item)))