Spaces:
Running
Running
| import os, json | |
| from typing import Any, Dict, List, Optional, Tuple | |
| import httpx | |
| OPENROUTER_API_KEY = os.environ.get("OPENROUTER_API_KEY", "") | |
| OPENROUTER_MODEL = os.environ.get("OPENROUTER_MODEL", "meta-llama/llama-3.1-8b-instruct") | |
| def _extract_json_object(text: str) -> Optional[dict]: | |
| """ | |
| Best-effort: find the first top-level JSON object in a response. | |
| Works even if the model wraps JSON with prose or code fences. | |
| """ | |
| if not text: | |
| return None | |
| # Strip common fences | |
| t = text.strip() | |
| t = t.removeprefix("```json").removeprefix("```").removesuffix("```").strip() | |
| # Find first {...} span | |
| start = t.find("{") | |
| if start == -1: | |
| return None | |
| depth = 0 | |
| for i in range(start, len(t)): | |
| if t[i] == "{": | |
| depth += 1 | |
| elif t[i] == "}": | |
| depth -= 1 | |
| if depth == 0: | |
| chunk = t[start:i+1] | |
| try: | |
| return json.loads(chunk) | |
| except Exception: | |
| return None | |
| return None | |
| def openrouter_chat( | |
| messages: List[Dict[str, str]], | |
| response_format: Optional[Dict[str, Any]] = None, | |
| temperature: float = 0.2, | |
| max_tokens: int = 512, | |
| timeout_s: float = 30.0, | |
| ) -> Tuple[Optional[str], Optional[dict], Optional[str]]: | |
| """ | |
| Returns (raw_text, parsed_json, error_str). | |
| Never raises. | |
| Instrumented to help diagnose moderation/routing variance: | |
| - includes HTTP status | |
| - includes OpenRouter error message/code if provided | |
| """ | |
| if not OPENROUTER_API_KEY: | |
| return None, None, "OPENROUTER_API_KEY missing" | |
| payload: Dict[str, Any] = { | |
| "model": OPENROUTER_MODEL, | |
| "messages": messages, | |
| "temperature": temperature, | |
| "max_tokens": max_tokens, | |
| } | |
| if response_format is not None: | |
| payload["response_format"] = response_format | |
| headers = { | |
| "Authorization": f"Bearer {OPENROUTER_API_KEY}", | |
| "Content-Type": "application/json", | |
| "HTTP-Referer": "https://huggingface.co/spaces", | |
| "X-Title": "Prompt_Squirrel_RAG", | |
| } | |
| try: | |
| with httpx.Client(timeout=timeout_s) as client: | |
| r = client.post( | |
| "https://openrouter.ai/api/v1/chat/completions", | |
| headers=headers, | |
| json=payload, | |
| ) | |
| data = r.json() | |
| choice0 = data["choices"][0] | |
| content = (choice0["message"].get("content", "") or "").strip() | |
| finish_reason = choice0.get("finish_reason") | |
| native_finish_reason = choice0.get("native_finish_reason") | |
| # (optional) expose these as part of error_str for logging | |
| meta = [] | |
| if data.get("model"): | |
| meta.append(f"model={data['model']}") | |
| if finish_reason: | |
| meta.append(f"finish={finish_reason}") | |
| if native_finish_reason: | |
| meta.append(f"native_finish={native_finish_reason}") | |
| if isinstance(data.get("usage"), dict): | |
| u = data["usage"] | |
| if "prompt_tokens" in u and "completion_tokens" in u: | |
| meta.append(f"tokens={u['prompt_tokens']}+{u['completion_tokens']}") | |
| parsed = _extract_json_object(content) | |
| # If it looks filtered, flag it | |
| if finish_reason == "content_filter": | |
| return content, parsed, f"Filtered (content_filter; {'; '.join(meta)})" | |
| # If it looks refusal-like but not content_filter, still flag it | |
| if content.lower().startswith(("i can't", "i can’t", "i cannot", "can't", "cannot")): | |
| return content, parsed, f"Refusal-like ({'; '.join(meta)})" | |
| return content, parsed, None | |
| except Exception as e: | |
| return None, None, f"{type(e).__name__}: {e}" | |
| if __name__ == "__main__": | |
| print("openrouter_client.py imports ok") | |