import os, json from typing import Any, Dict, List, Optional, Tuple import httpx OPENROUTER_API_KEY = os.environ.get("OPENROUTER_API_KEY", "") OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "meta-llama/llama-3.1-8b-instruct") def _extract_json_object(text: str) -> Optional[dict]: """ Best-effort: find the first top-level JSON object in a response. Works even if the model wraps JSON with prose or code fences. """ if not text: return None # Strip common fences t = text.strip() t = t.removeprefix("```json").removeprefix("```").removesuffix("```").strip() # Find first {...} span start = t.find("{") if start == -1: return None depth = 0 for i in range(start, len(t)): if t[i] == "{": depth += 1 elif t[i] == "}": depth -= 1 if depth == 0: chunk = t[start:i+1] try: return json.loads(chunk) except Exception: return None return None def openrouter_chat( messages: List[Dict[str, str]], response_format: Optional[Dict[str, Any]] = None, temperature: float = 0.2, max_tokens: int = 512, timeout_s: float = 30.0, ) -> Tuple[Optional[str], Optional[dict], Optional[str]]: """ Returns (raw_text, parsed_json, error_str). Never raises. Instrumented to help diagnose moderation/routing variance: - includes HTTP status - includes OpenRouter error message/code if provided """ if not OPENROUTER_API_KEY: return None, None, "OPENROUTER_API_KEY missing" payload: Dict[str, Any] = { "model": OPENROUTER_MODEL, "messages": messages, "temperature": temperature, "max_tokens": max_tokens, } if response_format is not None: payload["response_format"] = response_format headers = { "Authorization": f"Bearer {OPENROUTER_API_KEY}", "Content-Type": "application/json", "HTTP-Referer": "https://huggingface.co/spaces", "X-Title": "Prompt_Squirrel_RAG", } try: with httpx.Client(timeout=timeout_s) as client: r = client.post( "https://openrouter.ai/api/v1/chat/completions", headers=headers, json=payload, ) data = r.json() choice0 = data["choices"][0] content = (choice0["message"].get("content", "") or "").strip() finish_reason = choice0.get("finish_reason") native_finish_reason = choice0.get("native_finish_reason") # (optional) expose these as part of error_str for logging meta = [] if data.get("model"): meta.append(f"model={data['model']}") if finish_reason: meta.append(f"finish={finish_reason}") if native_finish_reason: meta.append(f"native_finish={native_finish_reason}") if isinstance(data.get("usage"), dict): u = data["usage"] if "prompt_tokens" in u and "completion_tokens" in u: meta.append(f"tokens={u['prompt_tokens']}+{u['completion_tokens']}") parsed = _extract_json_object(content) # If it looks filtered, flag it if finish_reason == "content_filter": return content, parsed, f"Filtered (content_filter; {'; '.join(meta)})" # If it looks refusal-like but not content_filter, still flag it if content.lower().startswith(("i can't", "i can’t", "i cannot", "can't", "cannot")): return content, parsed, f"Refusal-like ({'; '.join(meta)})" return content, parsed, None except Exception as e: return None, None, f"{type(e).__name__}: {e}" if __name__ == "__main__": print("openrouter_client.py imports ok")