Spaces:

ChambreAgriculturePaysLoire
/

routeur_ia_api

Running

routeur_ia_api / services /impact_service.py

Cyril Dupland

fix impact_service: update trace_llm_impact function to return value directly when min and max are not available, improving error handling and output consistency.

e4e6861 3 months ago

raw

history blame

2.4 kB

	"""Impact service wrapping ecologits calculations for LLM emissions.

	This module provides a small helper to compute estimated greenhouse gas
	emissions for an LLM request based on provider, model, request latency, and
	token usage. It is intentionally defensive: if the ecologits library is not
	available or raises, the function returns None so that API calls never fail
	because of emissions tracing.
	"""
	from typing import Optional, Dict, Any
	from ecologits.tracers.utils import llm_impacts


	def trace_llm_impact(
	provider: str,
	model: str,
	usage: Optional[Dict[str, Any]],
	latency: float,
	) -> Optional[float]:
	"""Return estimated average kgCO2eq for a single LLM request.

	The value returned is the midpoint of the (min, max) interval provided by
	ecologits for GWP (Global Warming Potential). If ecologits is not
	installed/available or provides no data, returns None.

	Args:
	provider: LLM provider identifier (e.g., "openai", "mistralai").
	model: Model name (e.g., "mistral-large-latest").
	usage: Token usage metadata. When available, should include
	output token count under a common key such as
	"output_tokens" or "completion_tokens" depending on the stack.
	latency: End-to-end request latency in seconds.

	Returns:
	Average kgCO2eq as a float, or None if unavailable.
	"""

	# Try common keys for output token count to maximize compatibility across LLM backends
	output_token_count = None
	if usage:
	for key in ("output_tokens", "completion_tokens", "generated_tokens"):
	value = usage.get(key) if isinstance(usage, dict) else None
	if isinstance(value, int):
	output_token_count = value
	break

	try:
	impact = llm_impacts(
	provider=provider,
	model_name=model,
	output_token_count=output_token_count,
	request_latency=latency,
	)
	except Exception:
	return None

	try:
	gwp = getattr(impact, "gwp", None)
	value = getattr(gwp, "value", None)
	min_v = getattr(value, "min", None)
	max_v = getattr(value, "max", None)
	if min_v is not None and max_v is not None:
	return (min_v + max_v) / 2.0
	else:
	return value

	except Exception:
	return None

	return None