| |
| """Safe cleanup for test/demo candidate profiles. |
| |
| Creates a backup JSON of affected candidates and removes their DB rows and uploaded CV files. |
| Run from project root with the same python env used for the backend. |
| |
| Example: |
| DATABASE_URL=sqlite:///./ai_talent_finder.db .venv/bin/python backend/scripts/cleanup_test_profiles.py |
| """ |
| import os |
| import json |
| import sqlite3 |
| from pathlib import Path |
| import shutil |
|
|
| DB_PATH = os.environ.get("DATABASE_URL", "sqlite:///./ai_talent_finder.db") |
|
|
| def sqlite_path_from_url(url: str) -> Path: |
| |
| if url.startswith("sqlite:///"): |
| path = url.replace("sqlite:///", "") |
| return Path(path).resolve() |
| raise ValueError("Unsupported DB URL: " + url) |
|
|
|
|
| def main(): |
| db_path = sqlite_path_from_url(DB_PATH) |
| if not db_path.exists(): |
| print(f"DB not found: {db_path}") |
| return |
|
|
| out_dir = Path("archive") / "cleanups" |
| out_dir.mkdir(parents=True, exist_ok=True) |
| backup_file = out_dir / f"candidates_backup_{os.getpid()}.json" |
|
|
| conn = sqlite3.connect(str(db_path)) |
| conn.row_factory = sqlite3.Row |
| cur = conn.cursor() |
|
|
| |
| query = """ |
| SELECT id, full_name, email, cv_path, extraction_quality_score, is_fully_extracted |
| FROM candidates |
| WHERE (extraction_quality_score IS NULL OR extraction_quality_score < 80) |
| OR lower(full_name) LIKE '%unknown%' |
| OR lower(full_name) LIKE '%test%' |
| OR lower(full_name) LIKE '%demo%' |
| OR lower(email) LIKE 'candidate-%' |
| """ |
|
|
| rows = cur.execute(query).fetchall() |
| rows_data = [dict(r) for r in rows] |
| if not rows_data: |
| print("No candidate rows matched the cleanup criteria.") |
| return |
|
|
| |
| with backup_file.open("w", encoding="utf-8") as f: |
| json.dump(rows_data, f, ensure_ascii=False, indent=2) |
| print(f"Backed up {len(rows_data)} candidates to {backup_file}") |
|
|
| |
| uploads_root = Path(__file__).resolve().parents[2] / "uploads" |
| for r in rows_data: |
| cv = r.get("cv_path") |
| if cv: |
| file_path = Path(__file__).resolve().parents[2] / cv |
| try: |
| if file_path.exists(): |
| file_path.unlink() |
| print(f"Removed upload: {file_path}") |
| except Exception as e: |
| print(f"Failed to remove {file_path}: {e}") |
|
|
| |
| ids = [str(r["id"]) for r in rows_data] |
| id_list = ",".join(ids) |
| try: |
| cur.execute(f"PRAGMA foreign_keys = ON;") |
| cur.execute(f"DELETE FROM candidate_skills WHERE candidate_id IN ({id_list})") |
| cur.execute(f"DELETE FROM favorites WHERE candidate_id IN ({id_list})") |
| cur.execute(f"DELETE FROM match_results WHERE candidate_id IN ({id_list})") |
| cur.execute(f"DELETE FROM candidates WHERE id IN ({id_list})") |
| conn.commit() |
| print(f"Deleted {len(ids)} candidate rows from DB.") |
| except Exception as e: |
| conn.rollback() |
| print(f"DB deletion failed: {e}") |
| finally: |
| conn.close() |
|
|
|
|
| if __name__ == '__main__': |
| main() |
|
|