#!/usr/bin/env python3
"""
Multi-CV extraction benchmark.

Runs the CV extraction pipeline against several resume layouts and reports
coverage scores per section so we can track robustness across formats.
"""

from __future__ import annotations

import argparse
import json
import re
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, List, Optional

from app.services.cv_extractor import CVExtractionService


@dataclass
class BenchmarkSample:
    name: str
    text: str
    expected: Dict[str, int]


def _load_reference_text() -> str:
    """Best-effort loader for the existing test CV fixture."""
    fixture_path = Path(__file__).with_name("test_cv.txt")
    if not fixture_path.exists():
        return ""

    raw_bytes = fixture_path.read_bytes()
    for encoding in ("utf-16", "utf-16-le", "utf-8", "latin-1"):
        try:
            text = raw_bytes.decode(encoding)
            cleaned = text.strip()
            alpha_count = sum(1 for char in cleaned if char.isalpha())
            if len(cleaned) >= 20 and alpha_count >= 10 and "\x00" not in cleaned:
                return text
        except Exception:
            continue

    return ""


def _build_samples() -> List[BenchmarkSample]:
    return [
        BenchmarkSample(
            name="Structured English CV",
            text="""
JOHN SMITH
john.smith@example.com | +33 6 12 34 56 78 | linkedin.com/in/johnsmith

PROFESSIONAL SUMMARY
Senior Full Stack Developer with 8 years of experience in web development.

EXPERIENCE
Senior Developer - Tech Company Inc (2020-2024)
- Led team of 5 developers
- Built microservices using Python and FastAPI
- Managed PostgreSQL databases

Junior Developer - Startup LLC (2016-2020)
- Developed React frontend applications
- Worked with Node.js backend

EDUCATION
Bachelor of Science in Computer Science
University of Technology (2016)

SKILLS
Python, JavaScript, TypeScript, SQL, HTML/CSS, FastAPI, React, Docker
""".strip(),
            expected={"identity": 3, "experience": 2, "education": 2, "skills": 6, "enrichment": 1},
        ),
        BenchmarkSample(
            name="French CV with links",
            text="""
MARIE DUPONT
marie.dupont@gmail.com | 06 12 34 56 78 | Paris
linkedin.com/in/mariedupont | github.com/mdupont | mariedupont.dev

PROFIL
Chef de projet digital orientée produit et expérience client.

EXPÉRIENCES PROFESSIONNELLES
Responsable Marketing Digital - Entreprise X (2021 - Présent)
- Pilotage de campagnes multi-canaux
- Analyse des performances et reporting

Chef de projet CRM - Société Y (2018 - 2021)
- Mise en place d'automatisations marketing
- Coordination avec les équipes produit et design

FORMATION
Master Marketing Digital - Université de Lyon (2018)

COMPÉTENCES
Communication, Organisation, Gestion de projet, Leadership, Sens du contact

CERTIFICATIONS
Google Analytics Individual Qualification
HubSpot Inbound Marketing

PROJETS
Refonte du parcours d'onboarding client
""".strip(),
            expected={"identity": 3, "experience": 2, "education": 1, "skills": 4, "enrichment": 4},
        ),
        BenchmarkSample(
            name="OCR noisy CV",
            text="""
ALEXANDRE MARTIN
alex.martin@example.com

EXPERIENCE PROFESSIONNELLE
2022 - PRESENT | DATA ENGINEER | BLUE ANALYTICS
Built ETL pipelines on Airflow and Spark
Implemented data quality checks and dashboards

2020 - 2022 - BI ANALYST - RETAIL GROUP
Automated SQL reporting and Power BI models

FORMATION
2019 - Master Data Science - Paris School of AI

LANGUES
French English

COMPETENCES
Python, SQL, Airflow, Spark, Power BI, Communication

PROJECTS
Customer churn prediction using Python and scikit-learn
""".strip(),
            expected={"identity": 2, "experience": 2, "education": 1, "skills": 4, "enrichment": 2},
        ),
        BenchmarkSample(
            name="Minimal fallback CV",
            text="""
NOAH LEROY
noah.leroy@outlook.com

Some short CV text with little structure.
Python, SQL, Docker.
""".strip(),
            expected={"identity": 2, "experience": 0, "education": 0, "skills": 2, "enrichment": 0},
        ),
        BenchmarkSample(
            name="Ultra short CV",
            text="""
NADIA BENALI
nadia.benali@example.com
+33 6 98 76 54 32
Paris, France

Python | SQL | Data analysis
""".strip(),
            expected={"identity": 3, "experience": 0, "education": 0, "skills": 2, "enrichment": 1},
        ),
    ]


def _safe_len(value: Any) -> int:
    if isinstance(value, list):
        return len(value)
    if value:
        return 1
    return 0


def _score_section(found: int, expected: int) -> float:
    if expected <= 0:
        return 100.0 if found == 0 else min(found * 25.0, 100.0)
    return min(found / expected, 1.0) * 100.0


def _build_section_scores(structured: Dict[str, Any]) -> Dict[str, float]:
    identity_found = sum(
        [
            1 if structured.get("full_name") else 0,
            1 if structured.get("email") else 0,
            1 if structured.get("phone") else 0,
        ]
    )
    experience_found = _safe_len(structured.get("experiences"))
    education_found = _safe_len(structured.get("education"))
    skills_found = _safe_len(structured.get("skills"))
    enrichment_found = sum(
        [
            _safe_len(structured.get("linkedin_urls")),
            _safe_len(structured.get("github_urls")),
            _safe_len(structured.get("portfolio_urls")),
            _safe_len(structured.get("certifications")),
            _safe_len(structured.get("projects")),
            _safe_len(structured.get("languages")),
            _safe_len(structured.get("soft_skills")),
            _safe_len(structured.get("interests")),
        ]
    )

    return {
        "identity": float(identity_found),
        "experience": float(experience_found),
        "education": float(education_found),
        "skills": float(skills_found),
        "enrichment": float(enrichment_found),
    }


def _overall_score(section_hits: Dict[str, float], expected: Dict[str, int]) -> float:
    weights = {
        "identity": 0.25,
        "experience": 0.30,
        "education": 0.15,
        "skills": 0.15,
        "enrichment": 0.15,
    }

    total = 0.0
    for section, weight in weights.items():
        total += weight * _score_section(int(section_hits[section]), expected.get(section, 0))

    return round(total, 1)


def _diagnose_missing_fields(sample: BenchmarkSample, structured: Dict[str, Any]) -> List[str]:
    """Return human-readable reasons for missing fields in a given sample."""
    reasons: List[str] = []
    lines = [line.strip() for line in sample.text.splitlines() if line.strip()]
    normalized_text = sample.text.lower()

    if not structured.get("full_name"):
        top_lines = lines[:5]
        has_name_like_line = any(
            2 <= len(re.findall(r"[A-Za-zÀ-ÿ'-]+", line)) <= 4 and not re.search(r"[@\d]", line)
            for line in top_lines
        )
        if has_name_like_line:
            reasons.append("Nom probable présent en haut du CV mais rejeté par les filtres de nom.")
        elif structured.get("email"):
            reasons.append("Nom absent mais un email est disponible: vérifier l'inférence depuis l'email.")
        else:
            reasons.append("Aucune ligne de nom claire détectée dans les premières lignes.")

    if not structured.get("phone"):
        has_phone_like_text = bool(
            re.search(r"\+?\d[\d\s().-]{7,}\d", sample.text)
        )
        if has_phone_like_text:
            reasons.append("Un numéro semble présent mais n'a pas passé la normalisation téléphone.")
        else:
            reasons.append("Aucun motif téléphone suffisamment clair détecté.")

    if not structured.get("experiences"):
        if any(keyword in normalized_text for keyword in ("experience", "experiences", "professionnelle", "work experience", "stage")):
            reasons.append("Section expérience détectée mais aucun bloc stable titre/entreprise/période n'a pu être construit.")
        else:
            reasons.append("Aucune section expérience ou ancre de période détectée.")

    if not structured.get("education") and any(keyword in normalized_text for keyword in ("formation", "education", "study", "universit", "school")):
        reasons.append("Section formation présente mais les lignes ne ressemblaient pas assez à de l'éducation.")

    if not structured.get("skills"):
        if any(token in normalized_text for token in ("python", "sql", "java", "react", "docker", "airflow", "spark")):
            reasons.append("Des mots-clés techniques existent mais la normalisation a raté l'extraction de compétences.")
        elif any(keyword in normalized_text for keyword in ("communication", "organisation", "leadership", "rigueur", "autonomie", "gestion de projet", "sens du contact")):
            reasons.append("Le CV contient surtout des compétences génériques/soft skills; vérifier si elles doivent être reportées dans skills ou seulement dans soft_skills.")
        else:
            reasons.append("Aucune compétence technique évidente détectée.")

    if not (
        structured.get("linkedin_urls")
        or structured.get("github_urls")
        or structured.get("portfolio_urls")
        or structured.get("projects")
        or structured.get("certifications")
    ):
        reasons.append("Aucun signal d'enrichissement (liens/projets/certifications) détecté.")

    return reasons


def run_benchmark(diagnostic: bool = False) -> int:
    service = CVExtractionService()
    samples = _build_samples()

    print("=" * 78)
    print("Multi-CV Extraction Benchmark")
    print("=" * 78)

    aggregate: List[float] = []

    for index, sample in enumerate(samples, start=1):
        result = service.extract_from_text(sample.text)
        structured = result.structured
        section_hits = _build_section_scores(structured)
        overall = _overall_score(section_hits, sample.expected)
        aggregate.append(overall)

        print(f"\n[{index}] {sample.name}")
        print(f"  Overall: {overall:.1f}/100")
        print(
            "  Sections: "
            f"identity={_score_section(int(section_hits['identity']), sample.expected['identity']):.1f}, "
            f"experience={_score_section(int(section_hits['experience']), sample.expected['experience']):.1f}, "
            f"education={_score_section(int(section_hits['education']), sample.expected['education']):.1f}, "
            f"skills={_score_section(int(section_hits['skills']), sample.expected['skills']):.1f}, "
            f"enrichment={_score_section(int(section_hits['enrichment']), sample.expected['enrichment']):.1f}"
        )
        print(
            "  Extracted: "
            f"name={bool(structured.get('full_name'))}, "
            f"email={bool(structured.get('email'))}, "
            f"phone={bool(structured.get('phone'))}, "
            f"experiences={len(structured.get('experiences', []))}, "
            f"education={len(structured.get('education', []))}, "
            f"skills={len(structured.get('skills', []))}, "
            f"links={len(structured.get('linkedin_urls', [])) + len(structured.get('github_urls', [])) + len(structured.get('portfolio_urls', []))}, "
            f"projects={len(structured.get('projects', []))}, "
            f"certifications={len(structured.get('certifications', []))}"
        )

        if diagnostic:
            reasons = _diagnose_missing_fields(sample, structured)
            if reasons:
                print("  Diagnostics:")
                for reason in reasons:
                    print(f"    - {reason}")

    average_score = round(sum(aggregate) / len(aggregate), 1) if aggregate else 0.0
    print("\n" + "=" * 78)
    print(f"Average overall score: {average_score:.1f}/100")
    print("=" * 78)

    return 0 if average_score >= 70.0 else 1


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Run the CV extraction benchmark")
    parser.add_argument(
        "--diagnostic",
        action="store_true",
        help="Print human-readable reasons for missing fields",
    )
    args = parser.parse_args()
    raise SystemExit(run_benchmark(diagnostic=args.diagnostic))