import time
import logging
import requests
from threading import Lock
from collections import deque
from config import OPENROUTER_API_KEY, OPENROUTER_BASE_URL, MAX_REQUESTS_PER_MINUTE, LLM_TIMEOUT, LLM_MAX_RETRIES

logger = logging.getLogger(__name__)


class RateLimiter:
    """Token-bucket style rate limiter: max N requests per 60s window."""

    def __init__(self, max_per_minute: int = MAX_REQUESTS_PER_MINUTE):
        self.max_per_minute = max_per_minute
        self.timestamps: deque = deque()
        self.lock = Lock()

    def acquire(self):
        with self.lock:
            now = time.time()
            # Remove timestamps older than 60s
            while self.timestamps and now - self.timestamps[0] > 60:
                self.timestamps.popleft()

            if len(self.timestamps) >= self.max_per_minute:
                sleep_for = 60 - (now - self.timestamps[0]) + 0.1
                logger.info(f"Rate limit reached, sleeping {sleep_for:.1f}s")
                time.sleep(sleep_for)
                now = time.time()
                while self.timestamps and now - self.timestamps[0] > 60:
                    self.timestamps.popleft()

            self.timestamps.append(time.time())


_rate_limiter = RateLimiter()


class OpenRouterClient:
    def __init__(self, model: str, api_key: str = OPENROUTER_API_KEY):
        self.model = model
        self.api_key = api_key
        self.base_url = OPENROUTER_BASE_URL

    def call(self, system_prompt: str, user_prompt: str) -> str:
        """Call OpenRouter with retry + exponential backoff. Returns raw text."""
        headers = {
            "Authorization": f"Bearer {self.api_key}",
            "Content-Type": "application/json",
            "HTTP-Referer": "https://cryptoagentbench.github.io",
            "X-Title": "CryptoAgentBench",
        }
        payload = {
            "model": self.model,
            "messages": [
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt},
            ],
            "temperature": 0.1,
            "max_tokens": 512,
        }

        for attempt in range(LLM_MAX_RETRIES):
            _rate_limiter.acquire()
            try:
                resp = requests.post(
                    f"{self.base_url}/chat/completions",
                    headers=headers,
                    json=payload,
                    timeout=LLM_TIMEOUT,
                )
                resp.raise_for_status()
                data = resp.json()
                content = data["choices"][0]["message"]["content"]
                return content
            except requests.exceptions.Timeout:
                wait = 2 ** attempt
                logger.warning(f"Timeout on attempt {attempt+1}, retrying in {wait}s")
                time.sleep(wait)
            except requests.exceptions.HTTPError as e:
                status = e.response.status_code if e.response else None
                if status in (429, 503, 502):
                    wait = 2 ** (attempt + 1)
                    logger.warning(f"HTTP {status} on attempt {attempt+1}, retrying in {wait}s")
                    time.sleep(wait)
                else:
                    logger.error(f"HTTP error {status}: {e}")
                    break
            except Exception as e:
                logger.error(f"LLM call failed: {e}")
                break

        logger.error(f"All retries failed for model {self.model}, returning HOLD")
        return '{"action": "HOLD", "size": 0.0, "confidence": 0.0, "reason": "LLM unavailable"}'


def ping_model(model: str, api_key: str = OPENROUTER_API_KEY) -> bool:
    """Quick check if a model is responding."""
    client = OpenRouterClient(model=model, api_key=api_key)
    try:
        result = client.call("You are a test.", "Reply with OK")
        return bool(result)
    except Exception:
        return False