Cyril Dupland
fix impact_service: update trace_llm_impact function to return value directly when min and max are not available, improving error handling and output consistency.
e4e6861 | """Impact service wrapping ecologits calculations for LLM emissions. | |
| This module provides a small helper to compute estimated greenhouse gas | |
| emissions for an LLM request based on provider, model, request latency, and | |
| token usage. It is intentionally defensive: if the ecologits library is not | |
| available or raises, the function returns None so that API calls never fail | |
| because of emissions tracing. | |
| """ | |
| from typing import Optional, Dict, Any | |
| from ecologits.tracers.utils import llm_impacts | |
| def trace_llm_impact( | |
| provider: str, | |
| model: str, | |
| usage: Optional[Dict[str, Any]], | |
| latency: float, | |
| ) -> Optional[float]: | |
| """Return estimated average kgCO2eq for a single LLM request. | |
| The value returned is the midpoint of the (min, max) interval provided by | |
| ecologits for GWP (Global Warming Potential). If ecologits is not | |
| installed/available or provides no data, returns None. | |
| Args: | |
| provider: LLM provider identifier (e.g., "openai", "mistralai"). | |
| model: Model name (e.g., "mistral-large-latest"). | |
| usage: Token usage metadata. When available, should include | |
| output token count under a common key such as | |
| "output_tokens" or "completion_tokens" depending on the stack. | |
| latency: End-to-end request latency in seconds. | |
| Returns: | |
| Average kgCO2eq as a float, or None if unavailable. | |
| """ | |
| # Try common keys for output token count to maximize compatibility across LLM backends | |
| output_token_count = None | |
| if usage: | |
| for key in ("output_tokens", "completion_tokens", "generated_tokens"): | |
| value = usage.get(key) if isinstance(usage, dict) else None | |
| if isinstance(value, int): | |
| output_token_count = value | |
| break | |
| try: | |
| impact = llm_impacts( | |
| provider=provider, | |
| model_name=model, | |
| output_token_count=output_token_count, | |
| request_latency=latency, | |
| ) | |
| except Exception: | |
| return None | |
| try: | |
| gwp = getattr(impact, "gwp", None) | |
| value = getattr(gwp, "value", None) | |
| min_v = getattr(value, "min", None) | |
| max_v = getattr(value, "max", None) | |
| if min_v is not None and max_v is not None: | |
| return (min_v + max_v) / 2.0 | |
| else: | |
| return value | |
| except Exception: | |
| return None | |
| return None | |