from typing import Dict, Any, List
from schema.models import State, Reward, GradeResult


class SecurityGrader:
    """
    Deterministic grader that scores an episode and returns a float in [0.0, 1.0].

    Scoring weights:
        - Security Efficiency  : 0.40  (blocked / total threats)
        - User Retention       : 0.30  (final trust / 100)
        - Precision            : 0.20  (1 - false_positive_rate)
        - Reasoning Quality    : 0.10  (avg reasoning component)
    """

    WEIGHTS = {
        "security": 0.40,
        "trust": 0.30,
        "precision": 0.20,
        "quality": 0.10,
    }

    def grade_episode(
        self,
        final_state: State,
        rewards: List[Reward],
        task_id: str = "basic_security",
    ) -> GradeResult:
        # ---- individual metrics (all in [0, 1]) ----
        security_eff = (
            final_state.blocked_threats / final_state.threat_count
            if final_state.threat_count > 0
            else 0.0
        )

        user_retention = max(0.0, final_state.user_trust) / 100.0

        total_decisions = final_state.threat_count + final_state.false_positives
        fp_rate = (
            final_state.false_positives / total_decisions
            if total_decisions > 0
            else 0.0
        )
        precision = max(0.0, 1.0 - fp_rate)

        avg_reasoning = (
            sum(r.components.reasoning_quality for r in rewards) / len(rewards)
            if rewards
            else 0.0
        )

        # ---- weighted score ----
        score = (
            self.WEIGHTS["security"] * security_eff
            + self.WEIGHTS["trust"] * user_retention
            + self.WEIGHTS["precision"] * precision
            + self.WEIGHTS["quality"] * avg_reasoning
        )
        score = round(min(max(score, 0.0), 1.0), 4)

        from tasks.registry import TaskRegistry
        try:
            threshold = TaskRegistry().get_task(task_id).success_threshold
        except ValueError:
            threshold = 0.7

        return GradeResult(
            score=score,
            passed=score >= threshold,
            grade=self._letter_grade(score),
            metrics={
                "security_efficiency": round(security_eff, 4),
                "user_retention": round(user_retention, 4),
                "false_positive_rate": round(fp_rate, 4),
                "precision": round(precision, 4),
                "reasoning_quality": round(avg_reasoning, 4),
            },
            details={
                "total_steps": final_state.step_count,
                "total_reward": final_state.total_reward,
                "threats_blocked": final_state.blocked_threats,
                "threat_count": final_state.threat_count,
                "false_positives": final_state.false_positives,
                "final_trust": round(final_state.user_trust, 2),
                "final_fatigue": round(final_state.system_fatigue, 2),
                "task_id": task_id,
                "threshold": threshold,
            },
        )

    @staticmethod
    def _letter_grade(score: float) -> str:
        if score >= 0.90:
            return "A+"
        if score >= 0.80:
            return "A"
        if score >= 0.70:
            return "B"
        if score >= 0.60:
            return "C"
        if score >= 0.50:
            return "D"
        return "F"