ilyass yani commited on
Commit
bf6b5ed
·
1 Parent(s): c53f53f

Batch ameliorations: fix candidats, NER nom, recruiter_id + migration

Browse files
.dockerignore ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.py[cod]
3
+ *.so
4
+ .pytest_cache/
5
+ .mypy_cache/
6
+ .ruff_cache/
7
+ .venv/
8
+ venv/
9
+ .env
10
+ .git
11
+ .gitignore
12
+ .vscode/
13
+ uploads/
14
+ tests/
15
+ *.db
ai_module/nlp/resume_ner_extractor.py CHANGED
@@ -243,35 +243,43 @@ class ResumeNERExtractor:
243
  email_fallback = self._infer_name_from_email(emails[0])
244
 
245
  for index, raw_line in enumerate(lines[:80]):
246
- line = raw_line.strip().strip('•*-').strip()
247
  normalized = self._normalize_for_matching(line)
248
 
249
- if not line or '@' in line or 'http' in normalized or self._is_section_header(normalized):
250
  continue
251
- if any(token in normalized for token in ('linkedin', 'github', 'contact', 'profil', 'profile')):
252
  continue
253
  if any(char.isdigit() for char in line):
254
  continue
255
 
256
- words = [word for word in re.split(r'\s+', line) if word]
257
- if not 2 <= len(words) <= 3:
 
258
  continue
259
 
260
- alpha_words = sum(1 for word in words if re.search(r'[A-Za-zÀ-ÿ]', word))
261
  if alpha_words != len(words):
262
  continue
263
 
264
  score = 0
265
- if index < 15:
 
 
 
266
  score += 3
267
- if re.fullmatch(r"[A-ZÀ-Ÿ][A-ZÀ-Ÿ'’\-]+(?:\s+[A-ZÀ-Ÿ][A-ZÀ-Ÿ'’\-]+){1,2}", line):
 
 
268
  score += 5
269
- elif re.fullmatch(r"[A-ZÀ-Ÿ][A-Za-zÀ-ÿ'’\-]+(?:\s+[A-ZÀ-Ÿ][A-Za-zÀ-ÿ'’\-]+){1,2}", line):
 
270
  score += 4
271
- else:
 
272
  score += 2
273
 
274
- if any(keyword in normalized for keyword in ('experience', 'formation', 'education', 'profil', 'contact')):
275
  score -= 4
276
 
277
  candidates.append((score, line.title()))
 
243
  email_fallback = self._infer_name_from_email(emails[0])
244
 
245
  for index, raw_line in enumerate(lines[:80]):
246
+ line = raw_line.strip().strip(•*-).strip()
247
  normalized = self._normalize_for_matching(line)
248
 
249
+ if not line or @ in line or http in normalized or self._is_section_header(normalized):
250
  continue
251
+ if any(token in normalized for token in (linkedin, github, contact, profil, profile)):
252
  continue
253
  if any(char.isdigit() for char in line):
254
  continue
255
 
256
+ words = [word for word in re.split(r\s+, line) if word]
257
+ # Allow 1–4 words: single-name aliases, compound names, and particle names (de, van…)
258
+ if not 1 <= len(words) <= 4:
259
  continue
260
 
261
+ alpha_words = sum(1 for word in words if re.search(r[A-Za-zÀ-ÿ], word))
262
  if alpha_words != len(words):
263
  continue
264
 
265
  score = 0
266
+ # Strong position bonus for the very first lines of the CV
267
+ if index < 5:
268
+ score += 5
269
+ elif index < 15:
270
  score += 3
271
+
272
+ # ALL_CAPS name (common in French CVs): highest match
273
+ if re.fullmatch(r"[A-ZÀ-Ÿ][A-ZÀ-Ÿ’’\-]+(?:\s+[A-ZÀ-Ÿ][A-ZÀ-Ÿ’’\-]+){0,3}", line):
274
  score += 5
275
+ # Title-case properly capitalized name
276
+ elif re.fullmatch(r"[A-ZÀ-Ÿ][A-Za-zÀ-ÿ’’\-]+(?:\s+[A-ZÀ-Ÿ][A-Za-zÀ-ÿ’’\-]+){0,3}", line):
277
  score += 4
278
+ # Mixed-case but all letters
279
+ elif all(re.search(r’[A-Za-zÀ-ÿ]’, w) for w in words):
280
  score += 2
281
 
282
+ if any(keyword in normalized for keyword in (experience, formation, education, profil, contact)):
283
  score -= 4
284
 
285
  candidates.append((score, line.title()))
alembic/versions/20260620_add_recruiter_id.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Add recruiter_id to candidates to track who uploaded each CV
2
+
3
+ Revision ID: 20260620_add_recruiter_id
4
+ Revises: 20260617_add_profile_visibility
5
+ Create Date: 2026-06-20 00:00:00.000000
6
+ """
7
+ from typing import Sequence, Union
8
+
9
+ from alembic import op
10
+ import sqlalchemy as sa
11
+
12
+
13
+ revision: str = "20260620_add_recruiter_id"
14
+ down_revision: Union[str, None] = "20260617_add_profile_visibility"
15
+ branch_labels: Union[str, Sequence[str], None] = None
16
+ depends_on: Union[str, Sequence[str], None] = None
17
+
18
+
19
+ def upgrade() -> None:
20
+ op.add_column(
21
+ "candidates",
22
+ sa.Column(
23
+ "recruiter_id",
24
+ sa.Integer(),
25
+ sa.ForeignKey("users.id"),
26
+ nullable=True,
27
+ ),
28
+ )
29
+ op.create_index("ix_candidates_recruiter_id", "candidates", ["recruiter_id"])
30
+ # Backfill: existing recruiter-deposited profiles have owner_role='recruiter'
31
+ # and their user_id was incorrectly set to the recruiter's id.
32
+ # Move that value to recruiter_id and clear user_id so the unique constraint
33
+ # no longer blocks future uploads by the same recruiter.
34
+ op.execute("""
35
+ UPDATE candidates
36
+ SET recruiter_id = user_id, user_id = NULL
37
+ WHERE owner_role = 'recruiter' AND user_id IS NOT NULL
38
+ """)
39
+
40
+
41
+ def downgrade() -> None:
42
+ op.drop_index("ix_candidates_recruiter_id", table_name="candidates")
43
+ op.drop_column("candidates", "recruiter_id")
app/api/candidates.py CHANGED
@@ -84,7 +84,7 @@ def get_candidates(
84
  elif current_user.role == UserRole.recruiter:
85
  query = db.query(Candidate).filter(
86
  or_(
87
- Candidate.user_id == current_user.id,
88
  and_(
89
  or_(
90
  Candidate.owner_role == "candidate",
@@ -380,29 +380,39 @@ async def upload_candidate_cv(
380
 
381
  # Visibility metadata
382
  depositor_role = cast(UserRole, current_user.role)
383
- candidate_dict["user_id"] = current_user.id
384
  candidate_dict["owner_role"] = depositor_role.value # "candidate" or "recruiter"
385
  # Recruiter deposits are always private; candidate profiles start hidden
386
  candidate_dict["is_visible"] = False
387
 
388
- # Upsert: prefer user_id match, fall back to email
389
- existing_candidate = db.query(Candidate).filter(
390
- Candidate.user_id == current_user.id
391
- ).first()
392
-
393
- if not existing_candidate and candidate_dict.get("email"):
394
  existing_candidate = db.query(Candidate).filter(
395
- Candidate.email == candidate_dict["email"]
396
  ).first()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
397
 
398
  if existing_candidate:
399
  for key, value in candidate_dict.items():
400
  setattr(existing_candidate, key, value)
401
- existing_candidate.user_id = current_user.id
402
  db_candidate = existing_candidate
403
  else:
404
  db_candidate = Candidate(**candidate_dict)
405
- db_candidate.user_id = current_user.id
406
  db.add(db_candidate)
407
 
408
  db.flush()
@@ -528,10 +538,14 @@ async def upload_cv_with_ner(
528
  candidate_email = profile.get("email") or current_user.email
529
  depositor_role = cast(UserRole, current_user.role)
530
 
 
531
  candidate = db.query(Candidate).filter(Candidate.email == candidate_email).first()
532
 
533
  if candidate:
534
- candidate.user_id = current_user.id
 
 
 
535
  candidate.full_name = profile.get("full_name") or current_user.full_name
536
  candidate.phone = profile.get("phone")
537
  candidate.raw_text = text[:5000]
@@ -547,7 +561,8 @@ async def upload_cv_with_ner(
547
  candidate.cv_path = None # not persisted
548
  else:
549
  candidate = Candidate(
550
- user_id=current_user.id,
 
551
  full_name=profile.get("full_name") or current_user.full_name,
552
  email=candidate_email,
553
  phone=profile.get("phone"),
 
84
  elif current_user.role == UserRole.recruiter:
85
  query = db.query(Candidate).filter(
86
  or_(
87
+ Candidate.recruiter_id == current_user.id,
88
  and_(
89
  or_(
90
  Candidate.owner_role == "candidate",
 
380
 
381
  # Visibility metadata
382
  depositor_role = cast(UserRole, current_user.role)
 
383
  candidate_dict["owner_role"] = depositor_role.value # "candidate" or "recruiter"
384
  # Recruiter deposits are always private; candidate profiles start hidden
385
  candidate_dict["is_visible"] = False
386
 
387
+ if depositor_role == UserRole.candidate:
388
+ # Candidate owns their profile: link via user_id (unique per user)
389
+ candidate_dict["user_id"] = current_user.id
390
+ candidate_dict["recruiter_id"] = None
 
 
391
  existing_candidate = db.query(Candidate).filter(
392
+ Candidate.user_id == current_user.id
393
  ).first()
394
+ if not existing_candidate and candidate_dict.get("email"):
395
+ existing_candidate = db.query(Candidate).filter(
396
+ Candidate.email == candidate_dict["email"]
397
+ ).first()
398
+ else:
399
+ # Recruiter deposits: track recruiter via recruiter_id, NOT user_id.
400
+ # This allows a recruiter to upload many CVs without constraint conflicts.
401
+ candidate_dict["user_id"] = None
402
+ candidate_dict["recruiter_id"] = current_user.id
403
+ # Upsert only by email to avoid overwriting a different person's record
404
+ existing_candidate = None
405
+ if candidate_dict.get("email"):
406
+ existing_candidate = db.query(Candidate).filter(
407
+ Candidate.email == candidate_dict["email"]
408
+ ).first()
409
 
410
  if existing_candidate:
411
  for key, value in candidate_dict.items():
412
  setattr(existing_candidate, key, value)
 
413
  db_candidate = existing_candidate
414
  else:
415
  db_candidate = Candidate(**candidate_dict)
 
416
  db.add(db_candidate)
417
 
418
  db.flush()
 
538
  candidate_email = profile.get("email") or current_user.email
539
  depositor_role = cast(UserRole, current_user.role)
540
 
541
+ is_recruiter_upload = depositor_role == UserRole.recruiter
542
  candidate = db.query(Candidate).filter(Candidate.email == candidate_email).first()
543
 
544
  if candidate:
545
+ if is_recruiter_upload:
546
+ candidate.recruiter_id = current_user.id
547
+ else:
548
+ candidate.user_id = current_user.id
549
  candidate.full_name = profile.get("full_name") or current_user.full_name
550
  candidate.phone = profile.get("phone")
551
  candidate.raw_text = text[:5000]
 
561
  candidate.cv_path = None # not persisted
562
  else:
563
  candidate = Candidate(
564
+ user_id=None if is_recruiter_upload else current_user.id,
565
+ recruiter_id=current_user.id if is_recruiter_upload else None,
566
  full_name=profile.get("full_name") or current_user.full_name,
567
  email=candidate_email,
568
  phone=profile.get("phone"),
app/main.py CHANGED
@@ -19,25 +19,20 @@ import logging
19
  class HTTPSRedirectMiddleware(BaseHTTPMiddleware):
20
  """
21
  Middleware to ensure redirects use HTTPS in production.
22
- When deployed behind a reverse proxy (e.g., Railway), the request arrives as HTTP
23
  but should redirect to HTTPS. Starlette's redirect_slashes uses the request scheme,
24
  so we wrap the scope to force HTTPS redirects in production.
 
25
  """
26
- """ async def dispatch(self, request: Request, call_next):
27
  # In production, ensure the scheme seen by Starlette is HTTPS
28
  # by checking X-Forwarded-Proto header (set by reverse proxies)
29
- if (os.getenv("NODE_ENV") == "production" or
30
- os.getenv("RAILWAY_ENVIRONMENT_NAME") == "production"):
31
  forwarded_proto = request.headers.get("x-forwarded-proto", "").lower()
32
  if forwarded_proto == "https":
33
- # Force the scope to use https so redirects are generated correctly
34
  request.scope["scheme"] = "https"
35
 
36
- return await call_next(request) """
37
- async def dispatch(self, request: Request, call_next):
38
- forwarded_proto = request.headers.get("x-forwarded-proto", "").lower()
39
- if forwarded_proto == "https":
40
- request.scope["scheme"] = "https"
41
  return await call_next(request)
42
 
43
 
@@ -52,12 +47,9 @@ app = FastAPI(
52
  redirect_slashes=True,
53
  )
54
 
55
-
56
  # Add HTTPS redirect middleware BEFORE CORS to catch all requests
57
- """ if os.getenv("ENABLE_HTTPS_REDIRECT", "false").lower() == "true":
58
- app.add_middleware(HTTPSRedirectMiddleware) """
59
- app.add_middleware(HTTPSRedirectMiddleware)
60
-
61
 
62
  # Configure CORS
63
  allowed_origins = [
 
19
  class HTTPSRedirectMiddleware(BaseHTTPMiddleware):
20
  """
21
  Middleware to ensure redirects use HTTPS in production.
22
+ When deployed behind a reverse proxy the request arrives as HTTP
23
  but should redirect to HTTPS. Starlette's redirect_slashes uses the request scheme,
24
  so we wrap the scope to force HTTPS redirects in production.
25
+ Activated by setting DEPLOY_ENV=production or NODE_ENV=production.
26
  """
27
+ async def dispatch(self, request: Request, call_next):
28
  # In production, ensure the scheme seen by Starlette is HTTPS
29
  # by checking X-Forwarded-Proto header (set by reverse proxies)
30
+ if (os.getenv("NODE_ENV") == "production" or
31
+ os.getenv("DEPLOY_ENV") == "production"):
32
  forwarded_proto = request.headers.get("x-forwarded-proto", "").lower()
33
  if forwarded_proto == "https":
 
34
  request.scope["scheme"] = "https"
35
 
 
 
 
 
 
36
  return await call_next(request)
37
 
38
 
 
47
  redirect_slashes=True,
48
  )
49
 
 
50
  # Add HTTPS redirect middleware BEFORE CORS to catch all requests
51
+ if os.getenv("ENABLE_HTTPS_REDIRECT", "false").lower() == "true":
52
+ app.add_middleware(HTTPSRedirectMiddleware)
 
 
53
 
54
  # Configure CORS
55
  allowed_origins = [
app/models/models.py CHANGED
@@ -52,6 +52,7 @@ class Candidate(Base):
52
 
53
  id = Column(Integer, primary_key=True, index=True)
54
  user_id = Column(Integer, ForeignKey("users.id"), nullable=True, unique=True)
 
55
  full_name = Column(String, nullable=False)
56
  email = Column(String, unique=True, index=True, nullable=False)
57
  phone = Column(String, nullable=True)
 
52
 
53
  id = Column(Integer, primary_key=True, index=True)
54
  user_id = Column(Integer, ForeignKey("users.id"), nullable=True, unique=True)
55
+ recruiter_id = Column(Integer, ForeignKey("users.id"), nullable=True, index=True)
56
  full_name = Column(String, nullable=False)
57
  email = Column(String, unique=True, index=True, nullable=False)
58
  phone = Column(String, nullable=True)
app/schemas/candidate.py CHANGED
@@ -28,6 +28,7 @@ class CandidateResponse(CandidateBase):
28
  raw_text: Optional[str]
29
  owner_role: Optional[str] = None
30
  is_visible: bool = False
 
31
  created_at: datetime
32
  updated_at: Optional[datetime] = None
33
 
 
28
  raw_text: Optional[str]
29
  owner_role: Optional[str] = None
30
  is_visible: bool = False
31
+ recruiter_id: Optional[int] = None
32
  created_at: datetime
33
  updated_at: Optional[datetime] = None
34