"""
Commercial VLM/LLM API inference for manuscript transcription.

Supports:
- OpenAI GPT-4 Vision / GPT-4o
- Google Gemini Pro Vision / Gemini Flash
- Anthropic Claude 3 (Opus, Sonnet, Haiku)

Usage:
    # OpenAI
    api = OpenAIInference(api_key="sk-...")
    text = api.transcribe(image)

    # Gemini
    api = GeminiInference(api_key="...")
    text = api.transcribe(image)

    # Claude
    api = ClaudeInference(api_key="sk-ant-...")
    text = api.transcribe(image)
"""

import base64
import io
import time
from abc import ABC, abstractmethod
from pathlib import Path
from typing import Optional, Dict, Any
from PIL import Image

# API clients (install with: pip install openai google-generativeai anthropic)
try:
    from openai import OpenAI
    OPENAI_AVAILABLE = True
except ImportError:
    OPENAI_AVAILABLE = False

try:
    from google import genai as _google_genai_new
    from google.genai import types as _google_genai_types
    GEMINI_AVAILABLE = True
    GEMINI_NEW_SDK = True
except ImportError:
    GEMINI_NEW_SDK = False
    try:
        import google.generativeai as genai  # legacy fallback
        GEMINI_AVAILABLE = True
    except ImportError:
        GEMINI_AVAILABLE = False

try:
    from anthropic import Anthropic
    CLAUDE_AVAILABLE = True
except ImportError:
    CLAUDE_AVAILABLE = False


class BaseAPIInference(ABC):
    """Base class for commercial API inference."""

    def __init__(self, api_key: str, default_prompt: Optional[str] = None):
        """
        Initialize API client.

        Args:
            api_key: API key for the service
            default_prompt: Default transcription prompt
        """
        self.api_key = api_key
        self.default_prompt = default_prompt or self._get_default_prompt()

    @abstractmethod
    def _get_default_prompt(self) -> str:
        """Get default transcription prompt."""
        pass

    @abstractmethod
    def transcribe(
        self,
        image: Image.Image,
        prompt: Optional[str] = None,
        **kwargs
    ) -> str:
        """
        Transcribe a manuscript line image.

        Args:
            image: PIL Image
            prompt: Custom prompt (uses default if None)
            **kwargs: Provider-specific parameters

        Returns:
            Transcribed text
        """
        pass

    @staticmethod
    def encode_image_base64(image: Image.Image, format: str = "PNG") -> str:
        """
        Encode PIL Image to base64 string.

        Args:
            image: PIL Image
            format: Image format (PNG, JPEG, etc.)

        Returns:
            Base64-encoded image string
        """
        buffered = io.BytesIO()
        image.save(buffered, format=format)
        return base64.b64encode(buffered.getvalue()).decode("utf-8")

    @staticmethod
    def resize_image_if_needed(
        image: Image.Image,
        max_dimension: int = 2048
    ) -> Image.Image:
        """
        Resize image if larger than max dimension while preserving aspect ratio.

        Args:
            image: PIL Image
            max_dimension: Maximum width or height

        Returns:
            Resized image (or original if already small enough)
        """
        width, height = image.size

        if width <= max_dimension and height <= max_dimension:
            return image

        # Calculate new size preserving aspect ratio
        if width > height:
            new_width = max_dimension
            new_height = int(height * (max_dimension / width))
        else:
            new_height = max_dimension
            new_width = int(width * (max_dimension / height))

        return image.resize((new_width, new_height), Image.Resampling.LANCZOS)


class OpenAIInference(BaseAPIInference):
    """OpenAI GPT-4 Vision / GPT-4o inference."""

    def __init__(
        self,
        api_key: str,
        model: str = "gpt-4o",  # gpt-4o, gpt-4-vision-preview, gpt-4-turbo
        default_prompt: Optional[str] = None
    ):
        """
        Initialize OpenAI inference.

        Args:
            api_key: OpenAI API key
            model: Model name
            default_prompt: Default transcription prompt
        """
        if not OPENAI_AVAILABLE:
            raise ImportError("OpenAI library not installed. Install with: pip install openai")

        super().__init__(api_key, default_prompt)
        self.model = model
        self.client = OpenAI(api_key=api_key)

    def _get_default_prompt(self) -> str:
        return (
            "Transcribe all handwritten text in this manuscript image. "
            "Preserve the original language (Cyrillic, Latin, etc.) and layout. "
            "Output only the transcribed text without any additional commentary."
        )

    def transcribe(
        self,
        image: Image.Image,
        prompt: Optional[str] = None,
        max_tokens: int = 500,
    temperature: float = 1.0,
        **kwargs
    ) -> str:
        """
        Transcribe with OpenAI GPT-4 Vision.

        Args:
            image: PIL Image
            prompt: Custom prompt
            max_tokens: Maximum tokens to generate
            temperature: Sampling temperature (web default ~1.0). Lower (0-0.3) = deterministic; higher = more variation.
            **kwargs: Additional OpenAI parameters

        Returns:
            Transcribed text
        """
        prompt = prompt or self.default_prompt

        # Resize if needed (GPT-4V supports up to 2048x2048)
        image = self.resize_image_if_needed(image, max_dimension=2048)

        # Encode image
        base64_image = self.encode_image_base64(image, format="PNG")

        # API call
        response = self.client.chat.completions.create(
            model=self.model,
            messages=[
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": prompt},
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": f"data:image/png;base64,{base64_image}"
                            }
                        }
                    ]
                }
            ],
            max_tokens=max_tokens,
            temperature=temperature,
            **kwargs
        )

        return response.choices[0].message.content.strip()


class GeminiInference(BaseAPIInference):
    """Google Gemini inference via google-genai SDK (with legacy google-generativeai fallback)."""

    # thinking_mode string -> thinking_budget token count (max tokens for internal reasoning)
    # "low":  8000  — moderate budget; fast enough for most lines
    # "high": None  — no ThinkingConfig passed at all; model decides dynamically (no cap)
    _THINKING_BUDGETS = {"low": 8000, "high": None}

    def __init__(
        self,
        api_key: str,
        model: str = "gemini-2.0-flash",
        default_prompt: Optional[str] = None,
    ):
        if not GEMINI_AVAILABLE:
            raise ImportError(
                "Google AI library not installed. Install with: pip install google-genai"
            )
        super().__init__(api_key, default_prompt)
        self.model_name = model
        # Populated after each transcribe() call — for UI token display
        self.last_usage: Dict[str, Any] = {}
        self._last_call_usage: Dict[str, Any] = {}

        if GEMINI_NEW_SDK:
            self._client = _google_genai_new.Client(api_key=api_key)
        else:
            # Legacy fallback
            genai.configure(api_key=api_key)
            self._legacy_model = genai.GenerativeModel(model)

    def _get_default_prompt(self) -> str:
        return (
            "Transcribe all handwritten text in this manuscript image. "
            "Preserve the original language (Cyrillic, Latin, etc.) and layout. "
            "Output only the transcribed text without any additional commentary."
        )

    def _build_config(self, temperature, max_output_tokens, thinking_budget, safety_settings,
                      request_thoughts: bool = True):
        """Build GenerateContentConfig for google-genai SDK.

        request_thoughts=True (default): always sets include_thoughts=True so thought parts
        appear in candidates[].content.parts[] and can be exported.  Pass False when retrying
        against a model that rejects ThinkingConfig entirely.
        """
        kw: Dict[str, Any] = {"temperature": temperature}
        if max_output_tokens:
            kw["max_output_tokens"] = max_output_tokens
        if safety_settings:
            kw["safety_settings"] = safety_settings
        if request_thoughts:
            # Always request thought text back; only cap thinking_budget when explicitly set
            tc_kw: Dict[str, Any] = {"include_thoughts": True}
            if thinking_budget is not None:
                tc_kw["thinking_budget"] = thinking_budget
            kw["thinking_config"] = _google_genai_types.ThinkingConfig(**tc_kw)
        return _google_genai_types.GenerateContentConfig(**kw)

    def _generate(self, prompt, image, temperature, thinking_budget, safety_settings, verbose):
        """Single generate call. Handles thinking-not-supported gracefully."""
        if not GEMINI_NEW_SDK:
            # Legacy google-generativeai path
            gen_cfg = genai.GenerationConfig(temperature=temperature or 0.0)
            resp = self._legacy_model.generate_content(
                [prompt, image], generation_config=gen_cfg, safety_settings=safety_settings
            )
            self._last_call_usage = {}
            return resp.text.strip()

        config = self._build_config(temperature or 0.0, None, thinking_budget, safety_settings,
                                    request_thoughts=True)
        try:
            resp = self._client.models.generate_content(
                model=self.model_name, contents=[prompt, image], config=config
            )
        except Exception as e:
            err = str(e)
            # Non-thinking models reject ThinkingConfig with a 400/invalid error — retry without it
            if "thinking" in err.lower() or ("400" in err and "invalid" in err.lower()):
                if verbose:
                    print(f"Model does not support ThinkingConfig, retrying without.")
                config = self._build_config(temperature or 0.0, None, thinking_budget,
                                            safety_settings, request_thoughts=False)
                resp = self._client.models.generate_content(
                    model=self.model_name, contents=[prompt, image], config=config
                )
            else:
                raise

        usage = getattr(resp, "usage_metadata", None)
        self._last_call_usage = {
            "prompt_tokens": getattr(usage, "prompt_token_count", None) if usage else None,
            "output_tokens": getattr(usage, "candidates_token_count", None) if usage else None,
            "thinking_tokens": getattr(usage, "thoughts_token_count", None) if usage else None,
            "total_tokens": getattr(usage, "total_token_count", None) if usage else None,
        }
        # Extract thinking text from thought parts (present when include_thoughts=True was sent)
        thinking_parts = []
        try:
            for cand in (getattr(resp, "candidates", None) or []):
                for part in (getattr(getattr(cand, "content", None), "parts", None) or []):
                    if getattr(part, "thought", False) and getattr(part, "text", None):
                        thinking_parts.append(part.text)
        except Exception:
            pass
        self._last_call_usage["thinking_text"] = "\n\n".join(thinking_parts) if thinking_parts else None
        return resp.text.strip()

    def _maybe_continue(
        self,
        current_text: str,
        prompt: str,
        image,
        thinking_budget,
        safety_settings,
        auto_continue: bool,
        max_auto_continuations: int,
        continuation_min_new_chars: int,
        verbose_block_logging: bool,
    ) -> str:
        if not auto_continue:
            return current_text
        accumulated = current_text
        for pass_idx in range(1, max_auto_continuations + 1):
            continuation_prompt = (
                f"{prompt}\n\nPartial transcription so far (DO NOT repeat it):\n"
                f"{accumulated}\n\nContinue transcribing remaining, previously UNTRANSCRIBED text. "
                "Output ONLY the new continuation without repeating prior characters."
            )
            try:
                new_chunk = self._generate(
                    continuation_prompt, image, None, thinking_budget,
                    safety_settings, verbose_block_logging
                )
            except Exception as e:
                if verbose_block_logging:
                    print(f"Continuation {pass_idx} failed: {e}")
                break
            if not new_chunk:
                if verbose_block_logging:
                    print(f"Continuation {pass_idx}: no new text, stopping.")
                break
            # Guard against repetition
            if accumulated and new_chunk.startswith(accumulated[:200]):
                overlap_pos = new_chunk.find(accumulated[-50:])
                if overlap_pos > 0:
                    new_chunk = new_chunk[overlap_pos + len(accumulated[-50:]):]
            delta = len(new_chunk)
            if delta < continuation_min_new_chars:
                if verbose_block_logging:
                    print(f"Continuation {pass_idx}: only {delta} chars, stopping.")
                break
            accumulated += ("\n" if not accumulated.endswith("\n") else "") + new_chunk
            if verbose_block_logging:
                print(f"Continuation {pass_idx}: +{delta} chars (total {len(accumulated)})")
        return accumulated

    def transcribe(
        self,
        image,
        prompt: Optional[str] = None,
        temperature: float = 0.0,
        max_output_tokens: Optional[int] = None,
        auto_retry_on_block: bool = True,
        safety_relax: bool = True,
        verbose_block_logging: bool = True,
        thinking_mode: Optional[str] = None,
        fast_direct: bool = False,
        fast_direct_early_exit: bool = True,
        auto_continue: bool = False,
        max_auto_continuations: int = 2,
        continuation_min_new_chars: int = 50,
        reasoning_fallback_threshold: float = 1.0,
        record_stats_csv: Optional[str] = None,
        apply_restriction_prompt: bool = False,
        fallback_max_output_tokens: int = 8192,
        **kwargs,
    ) -> str:
        """Transcribe a manuscript image with Google Gemini.

        Args:
            image: PIL Image or numpy array
            prompt: Transcription prompt (uses default if None)
            temperature: Sampling temperature (0.0 = deterministic)
            max_output_tokens: Output token cap (None = model default)
            thinking_mode: None | "low" | "high" -- maps to thinking_budget
            record_stats_csv: Path to append usage CSV row (None to skip)
            auto_continue: Request continuation calls if output seems truncated
        """
        from PIL import Image as _PIL_Image
        import numpy as np
        if isinstance(image, np.ndarray):
            image = _PIL_Image.fromarray(image)
        image = self.resize_image_if_needed(image, max_dimension=3072)
        prompt = prompt or self.default_prompt

        # Map thinking_mode to thinking_budget
        thinking_budget = self._THINKING_BUDGETS.get(thinking_mode)  # None if mode is None/unknown

        # Safety settings
        safety_settings = None
        if safety_relax and GEMINI_NEW_SDK:
            safety_settings = [
                _google_genai_types.SafetySetting(category=cat, threshold="BLOCK_NONE")
                for cat in (
                    "HARM_CATEGORY_HARASSMENT",
                    "HARM_CATEGORY_HATE_SPEECH",
                    "HARM_CATEGORY_SEXUALLY_EXPLICIT",
                    "HARM_CATEGORY_DANGEROUS_CONTENT",
                )
            ]

        self._last_call_usage = {}

        try:
            result_text = self._generate(
                prompt, image, temperature, thinking_budget, safety_settings, verbose_block_logging
            )
        except Exception as e:
            raise ValueError(f"Gemini transcription failed: {e}") from e

        # Persist usage for callers (e.g. statistics panel, CSV logging)
        self.last_usage = dict(self._last_call_usage)
        u = self.last_usage
        if verbose_block_logging and u.get("total_tokens"):
            print(
                f"[tokens] prompt={u.get('prompt_tokens')} "
                f"output={u.get('output_tokens')} "
                f"thinking={u.get('thinking_tokens')} "
                f"total={u.get('total_tokens')}"
            )

        if record_stats_csv:
            try:
                from datetime import datetime
                with open(record_stats_csv, "a") as f:
                    f.write(
                        f"{datetime.utcnow().isoformat()},"
                        f"{self.model_name},"
                        f"{thinking_mode or 'default'},"
                        f"final_success,"
                        f"{u.get('prompt_tokens')},"
                        f"{u.get('output_tokens')},"
                        f"{u.get('thinking_tokens')},"
                        f"{u.get('total_tokens')},"
                        f"{len(result_text)}\n"
                    )
            except Exception as csv_e:
                if verbose_block_logging:
                    print(f"Stats logging failed: {csv_e}")

        return self._maybe_continue(
            result_text, prompt, image, thinking_budget, safety_settings,
            auto_continue, max_auto_continuations, continuation_min_new_chars,
            verbose_block_logging,
        )

class ClaudeInference(BaseAPIInference):
    """Anthropic Claude 3 inference (Opus, Sonnet, Haiku)."""

    def __init__(
        self,
        api_key: str,
        model: str = "claude-sonnet-4-6",
        default_prompt: Optional[str] = None
    ):
        """
        Initialize Claude inference.

        Args:
            api_key: Anthropic API key
            model: Model name
            default_prompt: Default transcription prompt
        """
        if not CLAUDE_AVAILABLE:
            raise ImportError("Anthropic library not installed. Install with: pip install anthropic")

        super().__init__(api_key, default_prompt)
        self.model = model
        self.client = Anthropic(api_key=api_key)

    def _get_default_prompt(self) -> str:
        return (
            "Transcribe all handwritten text in this manuscript image. "
            "Preserve the original language (Cyrillic, Latin, etc.) and layout. "
            "Output only the transcribed text without any additional commentary."
        )

    def transcribe(
        self,
        image: Image.Image,
        prompt: Optional[str] = None,
        max_tokens: int = 500,
        temperature: float = 0.0,
        **kwargs
    ) -> str:
        """
        Transcribe with Anthropic Claude.

        Args:
            image: PIL Image
            prompt: Custom prompt
            max_tokens: Maximum tokens to generate
            temperature: Sampling temperature (0.0 = deterministic)
            **kwargs: Additional Claude parameters

        Returns:
            Transcribed text
        """
        prompt = prompt or self.default_prompt

        # Resize if needed (Claude supports up to 1568px on longest side)
        image = self.resize_image_if_needed(image, max_dimension=1568)

        # Encode image
        base64_image = self.encode_image_base64(image, format="PNG")

        # API call
        response = self.client.messages.create(
            model=self.model,
            max_tokens=max_tokens,
            temperature=temperature,
            messages=[
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "image",
                            "source": {
                                "type": "base64",
                                "media_type": "image/png",
                                "data": base64_image
                            }
                        },
                        {
                            "type": "text",
                            "text": prompt
                        }
                    ]
                }
            ],
            **kwargs
        )

        return response.content[0].text.strip()


# Model availability checks
def check_api_availability() -> Dict[str, bool]:
    """Check which API libraries are installed."""
    return {
        "openai": OPENAI_AVAILABLE,
        "gemini": GEMINI_AVAILABLE,
        "claude": CLAUDE_AVAILABLE,
    }


# Fallback API model lists (used only if dynamic fetching fails)
OPENAI_MODELS_FALLBACK = [
    "gpt-4o",
    "gpt-4o-mini",
    "gpt-4.1",
    "gpt-4.1-mini",
    "gpt-4.1-nano",
    "o4-mini",
    "o3",
    "o1",
    "chatgpt-4o-latest",
]

GEMINI_MODELS_FALLBACK = [
    "gemini-2.5-pro-preview-06-05",
    "gemini-2.5-flash-preview-05-20",
    "gemini-2.0-flash",
    "gemini-2.0-flash-lite",
    "gemini-1.5-pro",
    "gemini-1.5-flash",
]

CLAUDE_MODELS_FALLBACK = [
    "claude-opus-4-7",
    "claude-opus-4-6",
    "claude-sonnet-4-6",
    "claude-haiku-4-5-20251001",
    "claude-3-5-sonnet-20241022",
    "claude-3-5-haiku-20241022",
]


def fetch_openai_models(api_key: str = None) -> list:
    """
    Dynamically fetch available OpenAI models from API.

    Args:
        api_key: OpenAI API key (uses env var if not provided)

    Returns:
        List of vision-capable model IDs, or fallback list if fetch fails
    """
    if not OPENAI_AVAILABLE:
        return OPENAI_MODELS_FALLBACK

    try:
        if not api_key:
            return OPENAI_MODELS_FALLBACK

        client = OpenAI(api_key=api_key)
        models = client.models.list()

        # Return all models the account has access to, sorted newest-first
        model_ids = sorted((m.id for m in models.data), reverse=True)
        return model_ids if model_ids else OPENAI_MODELS_FALLBACK

    except Exception as e:
        print(f"[OpenAI] Could not fetch models dynamically: {e}")
        print(f"[OpenAI] Using fallback model list")
        return OPENAI_MODELS_FALLBACK


def fetch_gemini_models(api_key: str = None) -> list:
    """Dynamically fetch available Gemini models; returns fallback list on failure."""
    if not GEMINI_AVAILABLE:
        return GEMINI_MODELS_FALLBACK
    try:
        if not api_key:
            return GEMINI_MODELS_FALLBACK
        if GEMINI_NEW_SDK:
            client = _google_genai_new.Client(api_key=api_key)
            models = [
                m.name.replace("models/", "")
                for m in client.models.list()
                if "generateContent" in (getattr(m, "supported_actions", None) or [])
            ]
        else:
            genai.configure(api_key=api_key)
            models = [
                m.name.replace("models/", "")
                for m in genai.list_models()
                if "generateContent" in m.supported_generation_methods
            ]
        models = [m for m in models if m.startswith("gemini")]
        models.sort(reverse=True)
        return models if models else GEMINI_MODELS_FALLBACK
    except Exception as e:
        print(f"[Gemini] Could not fetch models: {e}")
        return GEMINI_MODELS_FALLBACK

def fetch_claude_models(api_key: str = None) -> list:
    """
    Dynamically fetch available Claude models via Anthropic API.

    Returns:
        List of Claude model IDs (newest first), or fallback list if fetch fails.
    """
    if not CLAUDE_AVAILABLE:
        return CLAUDE_MODELS_FALLBACK

    try:
        if not api_key:
            return CLAUDE_MODELS_FALLBACK

        client = Anthropic(api_key=api_key)
        models_page = client.models.list()
        model_ids = [m.id for m in models_page.data]
        # Sort newest first (IDs contain dates like -20241022 or version numbers)
        model_ids.sort(reverse=True)
        return model_ids if model_ids else CLAUDE_MODELS_FALLBACK

    except Exception as e:
        print(f"[Claude] Could not fetch models dynamically: {e}")
        return CLAUDE_MODELS_FALLBACK


# Initialize model lists (will be updated when API keys are provided)
OPENAI_MODELS = OPENAI_MODELS_FALLBACK.copy()
GEMINI_MODELS = GEMINI_MODELS_FALLBACK.copy()
CLAUDE_MODELS = CLAUDE_MODELS_FALLBACK.copy()


if __name__ == "__main__":
    # Example usage
    import sys

    if len(sys.argv) < 4:
        print("Usage: python inference_commercial_api.py <provider> <api_key> <image_path>")
        print("Providers: openai, gemini, claude")
        sys.exit(1)

    provider = sys.argv[1].lower()
    api_key = sys.argv[2]
    image_path = sys.argv[3]

    # Load image
    image = Image.open(image_path).convert("RGB")

    # Initialize appropriate inference client
    if provider == "openai":
        api = OpenAIInference(api_key)
    elif provider == "gemini":
        api = GeminiInference(api_key)
    elif provider == "claude":
        api = ClaudeInference(api_key)
    else:
        print(f"Unknown provider: {provider}")
        sys.exit(1)

    # Transcribe
    print(f"Transcribing with {provider}...")
    text = api.transcribe(image)
    print(f"\nResult: {text}")