ilyass yani commited on
Commit ·
bf6b5ed
1
Parent(s): c53f53f
Batch ameliorations: fix candidats, NER nom, recruiter_id + migration
Browse files- .dockerignore +15 -0
- ai_module/nlp/resume_ner_extractor.py +19 -11
- alembic/versions/20260620_add_recruiter_id.py +43 -0
- app/api/candidates.py +28 -13
- app/main.py +7 -15
- app/models/models.py +1 -0
- app/schemas/candidate.py +1 -0
.dockerignore
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__pycache__/
|
| 2 |
+
*.py[cod]
|
| 3 |
+
*.so
|
| 4 |
+
.pytest_cache/
|
| 5 |
+
.mypy_cache/
|
| 6 |
+
.ruff_cache/
|
| 7 |
+
.venv/
|
| 8 |
+
venv/
|
| 9 |
+
.env
|
| 10 |
+
.git
|
| 11 |
+
.gitignore
|
| 12 |
+
.vscode/
|
| 13 |
+
uploads/
|
| 14 |
+
tests/
|
| 15 |
+
*.db
|
ai_module/nlp/resume_ner_extractor.py
CHANGED
|
@@ -243,35 +243,43 @@ class ResumeNERExtractor:
|
|
| 243 |
email_fallback = self._infer_name_from_email(emails[0])
|
| 244 |
|
| 245 |
for index, raw_line in enumerate(lines[:80]):
|
| 246 |
-
line = raw_line.strip().strip(
|
| 247 |
normalized = self._normalize_for_matching(line)
|
| 248 |
|
| 249 |
-
if not line or
|
| 250 |
continue
|
| 251 |
-
if any(token in normalized for token in (
|
| 252 |
continue
|
| 253 |
if any(char.isdigit() for char in line):
|
| 254 |
continue
|
| 255 |
|
| 256 |
-
words = [word for word in re.split(r
|
| 257 |
-
|
|
|
|
| 258 |
continue
|
| 259 |
|
| 260 |
-
alpha_words = sum(1 for word in words if re.search(r
|
| 261 |
if alpha_words != len(words):
|
| 262 |
continue
|
| 263 |
|
| 264 |
score = 0
|
| 265 |
-
|
|
|
|
|
|
|
|
|
|
| 266 |
score += 3
|
| 267 |
-
|
|
|
|
|
|
|
| 268 |
score += 5
|
| 269 |
-
|
|
|
|
| 270 |
score += 4
|
| 271 |
-
|
|
|
|
| 272 |
score += 2
|
| 273 |
|
| 274 |
-
if any(keyword in normalized for keyword in (
|
| 275 |
score -= 4
|
| 276 |
|
| 277 |
candidates.append((score, line.title()))
|
|
|
|
| 243 |
email_fallback = self._infer_name_from_email(emails[0])
|
| 244 |
|
| 245 |
for index, raw_line in enumerate(lines[:80]):
|
| 246 |
+
line = raw_line.strip().strip(‘•*-’).strip()
|
| 247 |
normalized = self._normalize_for_matching(line)
|
| 248 |
|
| 249 |
+
if not line or ‘@’ in line or ‘http’ in normalized or self._is_section_header(normalized):
|
| 250 |
continue
|
| 251 |
+
if any(token in normalized for token in (‘linkedin’, ‘github’, ‘contact’, ‘profil’, ‘profile’)):
|
| 252 |
continue
|
| 253 |
if any(char.isdigit() for char in line):
|
| 254 |
continue
|
| 255 |
|
| 256 |
+
words = [word for word in re.split(r’\s+’, line) if word]
|
| 257 |
+
# Allow 1–4 words: single-name aliases, compound names, and particle names (de, van…)
|
| 258 |
+
if not 1 <= len(words) <= 4:
|
| 259 |
continue
|
| 260 |
|
| 261 |
+
alpha_words = sum(1 for word in words if re.search(r’[A-Za-zÀ-ÿ]’, word))
|
| 262 |
if alpha_words != len(words):
|
| 263 |
continue
|
| 264 |
|
| 265 |
score = 0
|
| 266 |
+
# Strong position bonus for the very first lines of the CV
|
| 267 |
+
if index < 5:
|
| 268 |
+
score += 5
|
| 269 |
+
elif index < 15:
|
| 270 |
score += 3
|
| 271 |
+
|
| 272 |
+
# ALL_CAPS name (common in French CVs): highest match
|
| 273 |
+
if re.fullmatch(r"[A-ZÀ-Ÿ][A-ZÀ-Ÿ’’\-]+(?:\s+[A-ZÀ-Ÿ][A-ZÀ-Ÿ’’\-]+){0,3}", line):
|
| 274 |
score += 5
|
| 275 |
+
# Title-case properly capitalized name
|
| 276 |
+
elif re.fullmatch(r"[A-ZÀ-Ÿ][A-Za-zÀ-ÿ’’\-]+(?:\s+[A-ZÀ-Ÿ][A-Za-zÀ-ÿ’’\-]+){0,3}", line):
|
| 277 |
score += 4
|
| 278 |
+
# Mixed-case but all letters
|
| 279 |
+
elif all(re.search(r’[A-Za-zÀ-ÿ]’, w) for w in words):
|
| 280 |
score += 2
|
| 281 |
|
| 282 |
+
if any(keyword in normalized for keyword in (‘experience’, ‘formation’, ‘education’, ‘profil’, ‘contact’)):
|
| 283 |
score -= 4
|
| 284 |
|
| 285 |
candidates.append((score, line.title()))
|
alembic/versions/20260620_add_recruiter_id.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Add recruiter_id to candidates to track who uploaded each CV
|
| 2 |
+
|
| 3 |
+
Revision ID: 20260620_add_recruiter_id
|
| 4 |
+
Revises: 20260617_add_profile_visibility
|
| 5 |
+
Create Date: 2026-06-20 00:00:00.000000
|
| 6 |
+
"""
|
| 7 |
+
from typing import Sequence, Union
|
| 8 |
+
|
| 9 |
+
from alembic import op
|
| 10 |
+
import sqlalchemy as sa
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
revision: str = "20260620_add_recruiter_id"
|
| 14 |
+
down_revision: Union[str, None] = "20260617_add_profile_visibility"
|
| 15 |
+
branch_labels: Union[str, Sequence[str], None] = None
|
| 16 |
+
depends_on: Union[str, Sequence[str], None] = None
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def upgrade() -> None:
|
| 20 |
+
op.add_column(
|
| 21 |
+
"candidates",
|
| 22 |
+
sa.Column(
|
| 23 |
+
"recruiter_id",
|
| 24 |
+
sa.Integer(),
|
| 25 |
+
sa.ForeignKey("users.id"),
|
| 26 |
+
nullable=True,
|
| 27 |
+
),
|
| 28 |
+
)
|
| 29 |
+
op.create_index("ix_candidates_recruiter_id", "candidates", ["recruiter_id"])
|
| 30 |
+
# Backfill: existing recruiter-deposited profiles have owner_role='recruiter'
|
| 31 |
+
# and their user_id was incorrectly set to the recruiter's id.
|
| 32 |
+
# Move that value to recruiter_id and clear user_id so the unique constraint
|
| 33 |
+
# no longer blocks future uploads by the same recruiter.
|
| 34 |
+
op.execute("""
|
| 35 |
+
UPDATE candidates
|
| 36 |
+
SET recruiter_id = user_id, user_id = NULL
|
| 37 |
+
WHERE owner_role = 'recruiter' AND user_id IS NOT NULL
|
| 38 |
+
""")
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def downgrade() -> None:
|
| 42 |
+
op.drop_index("ix_candidates_recruiter_id", table_name="candidates")
|
| 43 |
+
op.drop_column("candidates", "recruiter_id")
|
app/api/candidates.py
CHANGED
|
@@ -84,7 +84,7 @@ def get_candidates(
|
|
| 84 |
elif current_user.role == UserRole.recruiter:
|
| 85 |
query = db.query(Candidate).filter(
|
| 86 |
or_(
|
| 87 |
-
Candidate.
|
| 88 |
and_(
|
| 89 |
or_(
|
| 90 |
Candidate.owner_role == "candidate",
|
|
@@ -380,29 +380,39 @@ async def upload_candidate_cv(
|
|
| 380 |
|
| 381 |
# Visibility metadata
|
| 382 |
depositor_role = cast(UserRole, current_user.role)
|
| 383 |
-
candidate_dict["user_id"] = current_user.id
|
| 384 |
candidate_dict["owner_role"] = depositor_role.value # "candidate" or "recruiter"
|
| 385 |
# Recruiter deposits are always private; candidate profiles start hidden
|
| 386 |
candidate_dict["is_visible"] = False
|
| 387 |
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
if not existing_candidate and candidate_dict.get("email"):
|
| 394 |
existing_candidate = db.query(Candidate).filter(
|
| 395 |
-
Candidate.
|
| 396 |
).first()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 397 |
|
| 398 |
if existing_candidate:
|
| 399 |
for key, value in candidate_dict.items():
|
| 400 |
setattr(existing_candidate, key, value)
|
| 401 |
-
existing_candidate.user_id = current_user.id
|
| 402 |
db_candidate = existing_candidate
|
| 403 |
else:
|
| 404 |
db_candidate = Candidate(**candidate_dict)
|
| 405 |
-
db_candidate.user_id = current_user.id
|
| 406 |
db.add(db_candidate)
|
| 407 |
|
| 408 |
db.flush()
|
|
@@ -528,10 +538,14 @@ async def upload_cv_with_ner(
|
|
| 528 |
candidate_email = profile.get("email") or current_user.email
|
| 529 |
depositor_role = cast(UserRole, current_user.role)
|
| 530 |
|
|
|
|
| 531 |
candidate = db.query(Candidate).filter(Candidate.email == candidate_email).first()
|
| 532 |
|
| 533 |
if candidate:
|
| 534 |
-
|
|
|
|
|
|
|
|
|
|
| 535 |
candidate.full_name = profile.get("full_name") or current_user.full_name
|
| 536 |
candidate.phone = profile.get("phone")
|
| 537 |
candidate.raw_text = text[:5000]
|
|
@@ -547,7 +561,8 @@ async def upload_cv_with_ner(
|
|
| 547 |
candidate.cv_path = None # not persisted
|
| 548 |
else:
|
| 549 |
candidate = Candidate(
|
| 550 |
-
user_id=current_user.id,
|
|
|
|
| 551 |
full_name=profile.get("full_name") or current_user.full_name,
|
| 552 |
email=candidate_email,
|
| 553 |
phone=profile.get("phone"),
|
|
|
|
| 84 |
elif current_user.role == UserRole.recruiter:
|
| 85 |
query = db.query(Candidate).filter(
|
| 86 |
or_(
|
| 87 |
+
Candidate.recruiter_id == current_user.id,
|
| 88 |
and_(
|
| 89 |
or_(
|
| 90 |
Candidate.owner_role == "candidate",
|
|
|
|
| 380 |
|
| 381 |
# Visibility metadata
|
| 382 |
depositor_role = cast(UserRole, current_user.role)
|
|
|
|
| 383 |
candidate_dict["owner_role"] = depositor_role.value # "candidate" or "recruiter"
|
| 384 |
# Recruiter deposits are always private; candidate profiles start hidden
|
| 385 |
candidate_dict["is_visible"] = False
|
| 386 |
|
| 387 |
+
if depositor_role == UserRole.candidate:
|
| 388 |
+
# Candidate owns their profile: link via user_id (unique per user)
|
| 389 |
+
candidate_dict["user_id"] = current_user.id
|
| 390 |
+
candidate_dict["recruiter_id"] = None
|
|
|
|
|
|
|
| 391 |
existing_candidate = db.query(Candidate).filter(
|
| 392 |
+
Candidate.user_id == current_user.id
|
| 393 |
).first()
|
| 394 |
+
if not existing_candidate and candidate_dict.get("email"):
|
| 395 |
+
existing_candidate = db.query(Candidate).filter(
|
| 396 |
+
Candidate.email == candidate_dict["email"]
|
| 397 |
+
).first()
|
| 398 |
+
else:
|
| 399 |
+
# Recruiter deposits: track recruiter via recruiter_id, NOT user_id.
|
| 400 |
+
# This allows a recruiter to upload many CVs without constraint conflicts.
|
| 401 |
+
candidate_dict["user_id"] = None
|
| 402 |
+
candidate_dict["recruiter_id"] = current_user.id
|
| 403 |
+
# Upsert only by email to avoid overwriting a different person's record
|
| 404 |
+
existing_candidate = None
|
| 405 |
+
if candidate_dict.get("email"):
|
| 406 |
+
existing_candidate = db.query(Candidate).filter(
|
| 407 |
+
Candidate.email == candidate_dict["email"]
|
| 408 |
+
).first()
|
| 409 |
|
| 410 |
if existing_candidate:
|
| 411 |
for key, value in candidate_dict.items():
|
| 412 |
setattr(existing_candidate, key, value)
|
|
|
|
| 413 |
db_candidate = existing_candidate
|
| 414 |
else:
|
| 415 |
db_candidate = Candidate(**candidate_dict)
|
|
|
|
| 416 |
db.add(db_candidate)
|
| 417 |
|
| 418 |
db.flush()
|
|
|
|
| 538 |
candidate_email = profile.get("email") or current_user.email
|
| 539 |
depositor_role = cast(UserRole, current_user.role)
|
| 540 |
|
| 541 |
+
is_recruiter_upload = depositor_role == UserRole.recruiter
|
| 542 |
candidate = db.query(Candidate).filter(Candidate.email == candidate_email).first()
|
| 543 |
|
| 544 |
if candidate:
|
| 545 |
+
if is_recruiter_upload:
|
| 546 |
+
candidate.recruiter_id = current_user.id
|
| 547 |
+
else:
|
| 548 |
+
candidate.user_id = current_user.id
|
| 549 |
candidate.full_name = profile.get("full_name") or current_user.full_name
|
| 550 |
candidate.phone = profile.get("phone")
|
| 551 |
candidate.raw_text = text[:5000]
|
|
|
|
| 561 |
candidate.cv_path = None # not persisted
|
| 562 |
else:
|
| 563 |
candidate = Candidate(
|
| 564 |
+
user_id=None if is_recruiter_upload else current_user.id,
|
| 565 |
+
recruiter_id=current_user.id if is_recruiter_upload else None,
|
| 566 |
full_name=profile.get("full_name") or current_user.full_name,
|
| 567 |
email=candidate_email,
|
| 568 |
phone=profile.get("phone"),
|
app/main.py
CHANGED
|
@@ -19,25 +19,20 @@ import logging
|
|
| 19 |
class HTTPSRedirectMiddleware(BaseHTTPMiddleware):
|
| 20 |
"""
|
| 21 |
Middleware to ensure redirects use HTTPS in production.
|
| 22 |
-
When deployed behind a reverse proxy
|
| 23 |
but should redirect to HTTPS. Starlette's redirect_slashes uses the request scheme,
|
| 24 |
so we wrap the scope to force HTTPS redirects in production.
|
|
|
|
| 25 |
"""
|
| 26 |
-
|
| 27 |
# In production, ensure the scheme seen by Starlette is HTTPS
|
| 28 |
# by checking X-Forwarded-Proto header (set by reverse proxies)
|
| 29 |
-
if (os.getenv("NODE_ENV") == "production" or
|
| 30 |
-
|
| 31 |
forwarded_proto = request.headers.get("x-forwarded-proto", "").lower()
|
| 32 |
if forwarded_proto == "https":
|
| 33 |
-
# Force the scope to use https so redirects are generated correctly
|
| 34 |
request.scope["scheme"] = "https"
|
| 35 |
|
| 36 |
-
return await call_next(request) """
|
| 37 |
-
async def dispatch(self, request: Request, call_next):
|
| 38 |
-
forwarded_proto = request.headers.get("x-forwarded-proto", "").lower()
|
| 39 |
-
if forwarded_proto == "https":
|
| 40 |
-
request.scope["scheme"] = "https"
|
| 41 |
return await call_next(request)
|
| 42 |
|
| 43 |
|
|
@@ -52,12 +47,9 @@ app = FastAPI(
|
|
| 52 |
redirect_slashes=True,
|
| 53 |
)
|
| 54 |
|
| 55 |
-
|
| 56 |
# Add HTTPS redirect middleware BEFORE CORS to catch all requests
|
| 57 |
-
|
| 58 |
-
app.add_middleware(HTTPSRedirectMiddleware)
|
| 59 |
-
app.add_middleware(HTTPSRedirectMiddleware)
|
| 60 |
-
|
| 61 |
|
| 62 |
# Configure CORS
|
| 63 |
allowed_origins = [
|
|
|
|
| 19 |
class HTTPSRedirectMiddleware(BaseHTTPMiddleware):
|
| 20 |
"""
|
| 21 |
Middleware to ensure redirects use HTTPS in production.
|
| 22 |
+
When deployed behind a reverse proxy the request arrives as HTTP
|
| 23 |
but should redirect to HTTPS. Starlette's redirect_slashes uses the request scheme,
|
| 24 |
so we wrap the scope to force HTTPS redirects in production.
|
| 25 |
+
Activated by setting DEPLOY_ENV=production or NODE_ENV=production.
|
| 26 |
"""
|
| 27 |
+
async def dispatch(self, request: Request, call_next):
|
| 28 |
# In production, ensure the scheme seen by Starlette is HTTPS
|
| 29 |
# by checking X-Forwarded-Proto header (set by reverse proxies)
|
| 30 |
+
if (os.getenv("NODE_ENV") == "production" or
|
| 31 |
+
os.getenv("DEPLOY_ENV") == "production"):
|
| 32 |
forwarded_proto = request.headers.get("x-forwarded-proto", "").lower()
|
| 33 |
if forwarded_proto == "https":
|
|
|
|
| 34 |
request.scope["scheme"] = "https"
|
| 35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
return await call_next(request)
|
| 37 |
|
| 38 |
|
|
|
|
| 47 |
redirect_slashes=True,
|
| 48 |
)
|
| 49 |
|
|
|
|
| 50 |
# Add HTTPS redirect middleware BEFORE CORS to catch all requests
|
| 51 |
+
if os.getenv("ENABLE_HTTPS_REDIRECT", "false").lower() == "true":
|
| 52 |
+
app.add_middleware(HTTPSRedirectMiddleware)
|
|
|
|
|
|
|
| 53 |
|
| 54 |
# Configure CORS
|
| 55 |
allowed_origins = [
|
app/models/models.py
CHANGED
|
@@ -52,6 +52,7 @@ class Candidate(Base):
|
|
| 52 |
|
| 53 |
id = Column(Integer, primary_key=True, index=True)
|
| 54 |
user_id = Column(Integer, ForeignKey("users.id"), nullable=True, unique=True)
|
|
|
|
| 55 |
full_name = Column(String, nullable=False)
|
| 56 |
email = Column(String, unique=True, index=True, nullable=False)
|
| 57 |
phone = Column(String, nullable=True)
|
|
|
|
| 52 |
|
| 53 |
id = Column(Integer, primary_key=True, index=True)
|
| 54 |
user_id = Column(Integer, ForeignKey("users.id"), nullable=True, unique=True)
|
| 55 |
+
recruiter_id = Column(Integer, ForeignKey("users.id"), nullable=True, index=True)
|
| 56 |
full_name = Column(String, nullable=False)
|
| 57 |
email = Column(String, unique=True, index=True, nullable=False)
|
| 58 |
phone = Column(String, nullable=True)
|
app/schemas/candidate.py
CHANGED
|
@@ -28,6 +28,7 @@ class CandidateResponse(CandidateBase):
|
|
| 28 |
raw_text: Optional[str]
|
| 29 |
owner_role: Optional[str] = None
|
| 30 |
is_visible: bool = False
|
|
|
|
| 31 |
created_at: datetime
|
| 32 |
updated_at: Optional[datetime] = None
|
| 33 |
|
|
|
|
| 28 |
raw_text: Optional[str]
|
| 29 |
owner_role: Optional[str] = None
|
| 30 |
is_visible: bool = False
|
| 31 |
+
recruiter_id: Optional[int] = None
|
| 32 |
created_at: datetime
|
| 33 |
updated_at: Optional[datetime] = None
|
| 34 |
|