routeur_ia_api / services /impact_service.py
Cyril Dupland
fix impact_service: update trace_llm_impact function to return value directly when min and max are not available, improving error handling and output consistency.
e4e6861
raw
history blame
2.4 kB
"""Impact service wrapping ecologits calculations for LLM emissions.
This module provides a small helper to compute estimated greenhouse gas
emissions for an LLM request based on provider, model, request latency, and
token usage. It is intentionally defensive: if the ecologits library is not
available or raises, the function returns None so that API calls never fail
because of emissions tracing.
"""
from typing import Optional, Dict, Any
from ecologits.tracers.utils import llm_impacts
def trace_llm_impact(
provider: str,
model: str,
usage: Optional[Dict[str, Any]],
latency: float,
) -> Optional[float]:
"""Return estimated average kgCO2eq for a single LLM request.
The value returned is the midpoint of the (min, max) interval provided by
ecologits for GWP (Global Warming Potential). If ecologits is not
installed/available or provides no data, returns None.
Args:
provider: LLM provider identifier (e.g., "openai", "mistralai").
model: Model name (e.g., "mistral-large-latest").
usage: Token usage metadata. When available, should include
output token count under a common key such as
"output_tokens" or "completion_tokens" depending on the stack.
latency: End-to-end request latency in seconds.
Returns:
Average kgCO2eq as a float, or None if unavailable.
"""
# Try common keys for output token count to maximize compatibility across LLM backends
output_token_count = None
if usage:
for key in ("output_tokens", "completion_tokens", "generated_tokens"):
value = usage.get(key) if isinstance(usage, dict) else None
if isinstance(value, int):
output_token_count = value
break
try:
impact = llm_impacts(
provider=provider,
model_name=model,
output_token_count=output_token_count,
request_latency=latency,
)
except Exception:
return None
try:
gwp = getattr(impact, "gwp", None)
value = getattr(gwp, "value", None)
min_v = getattr(value, "min", None)
max_v = getattr(value, "max", None)
if min_v is not None and max_v is not None:
return (min_v + max_v) / 2.0
else:
return value
except Exception:
return None
return None