import time import logging import requests from threading import Lock from collections import deque from config import OPENROUTER_API_KEY, OPENROUTER_BASE_URL, MAX_REQUESTS_PER_MINUTE, LLM_TIMEOUT, LLM_MAX_RETRIES logger = logging.getLogger(__name__) class RateLimiter: """Token-bucket style rate limiter: max N requests per 60s window.""" def __init__(self, max_per_minute: int = MAX_REQUESTS_PER_MINUTE): self.max_per_minute = max_per_minute self.timestamps: deque = deque() self.lock = Lock() def acquire(self): with self.lock: now = time.time() # Remove timestamps older than 60s while self.timestamps and now - self.timestamps[0] > 60: self.timestamps.popleft() if len(self.timestamps) >= self.max_per_minute: sleep_for = 60 - (now - self.timestamps[0]) + 0.1 logger.info(f"Rate limit reached, sleeping {sleep_for:.1f}s") time.sleep(sleep_for) now = time.time() while self.timestamps and now - self.timestamps[0] > 60: self.timestamps.popleft() self.timestamps.append(time.time()) _rate_limiter = RateLimiter() class OpenRouterClient: def __init__(self, model: str, api_key: str = OPENROUTER_API_KEY): self.model = model self.api_key = api_key self.base_url = OPENROUTER_BASE_URL def call(self, system_prompt: str, user_prompt: str) -> str: """Call OpenRouter with retry + exponential backoff. Returns raw text.""" headers = { "Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json", "HTTP-Referer": "https://cryptoagentbench.github.io", "X-Title": "CryptoAgentBench", } payload = { "model": self.model, "messages": [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}, ], "temperature": 0.1, "max_tokens": 512, } for attempt in range(LLM_MAX_RETRIES): _rate_limiter.acquire() try: resp = requests.post( f"{self.base_url}/chat/completions", headers=headers, json=payload, timeout=LLM_TIMEOUT, ) resp.raise_for_status() data = resp.json() content = data["choices"][0]["message"]["content"] return content except requests.exceptions.Timeout: wait = 2 ** attempt logger.warning(f"Timeout on attempt {attempt+1}, retrying in {wait}s") time.sleep(wait) except requests.exceptions.HTTPError as e: status = e.response.status_code if e.response else None if status in (429, 503, 502): wait = 2 ** (attempt + 1) logger.warning(f"HTTP {status} on attempt {attempt+1}, retrying in {wait}s") time.sleep(wait) else: logger.error(f"HTTP error {status}: {e}") break except Exception as e: logger.error(f"LLM call failed: {e}") break logger.error(f"All retries failed for model {self.model}, returning HOLD") return '{"action": "HOLD", "size": 0.0, "confidence": 0.0, "reason": "LLM unavailable"}' def ping_model(model: str, api_key: str = OPENROUTER_API_KEY) -> bool: """Quick check if a model is responding.""" client = OpenRouterClient(model=model, api_key=api_key) try: result = client.call("You are a test.", "Reply with OK") return bool(result) except Exception: return False