routeur_ia_api / services /llm_service.py
Cyril Dupland
Remove OpenAI models & add Magistral model
883546f
raw
history blame
6.89 kB
"""LLM service - Factory for creating LLM instances."""
from typing import Optional
from langchain_openai import ChatOpenAI
from langchain_mistralai import ChatMistralAI
from langchain_core.language_models.chat_models import BaseChatModel
from domain.enums import ModelName, ModelProvider
from config import settings
class LLMService:
"""Service for managing LLM instances across different providers."""
def __init__(self):
"""Initialize LLM service."""
self._openai_api_key = settings.openai_api_key
self._mistralai_api_key = settings.mistralai_api_key
def get_llm(
self,
model_name: ModelName,
temperature: float = 0.7,
streaming: bool = False,
max_tokens: Optional[int] = None
) -> BaseChatModel:
"""
Factory method to create an LLM instance based on model name.
Args:
model_name: Model enum value
temperature: Sampling temperature (0.0 to 2.0)
streaming: Enable streaming mode
max_tokens: Maximum tokens to generate
Returns:
LLM instance (ChatOpenAI or ChatMistralAI)
Raises:
ValueError: If model provider is unknown
"""
provider = model_name.provider
if provider == ModelProvider.OPENAI:
return self._create_openai_llm(
model_name=model_name.value,
temperature=temperature,
streaming=streaming,
max_tokens=max_tokens
)
elif provider == ModelProvider.MISTRALAI:
return self._create_mistralai_llm(
model_name=model_name.value,
temperature=temperature,
streaming=streaming,
max_tokens=max_tokens
)
else:
raise ValueError(f"Unknown provider: {provider}")
def _create_openai_llm(
self,
model_name: str,
temperature: float,
streaming: bool,
max_tokens: Optional[int]
) -> ChatOpenAI:
"""Create OpenAI LLM instance.
Some OpenAI models (e.g., `gpt-5`) have specific parameter requirements:
- Only support default temperature (1.0)
- Use 'max_completion_tokens' instead of 'max_tokens'
"""
effective_temperature = temperature
# Coerce to default temperature for models that don't allow custom values
if model_name.startswith("gpt-5"):
effective_temperature = 1.0
# For gpt-5 models, use max_completion_tokens instead of max_tokens
if model_name.startswith("gpt-5"):
return ChatOpenAI(
model=model_name,
temperature=effective_temperature,
streaming=streaming,
max_completion_tokens=max_tokens,
api_key=self._openai_api_key
)
else:
return ChatOpenAI(
model=model_name,
temperature=effective_temperature,
streaming=streaming,
max_tokens=max_tokens,
api_key=self._openai_api_key
)
def _create_mistralai_llm(
self,
model_name: str,
temperature: float,
streaming: bool,
max_tokens: Optional[int]
) -> ChatMistralAI:
"""Create Mistral AI LLM instance."""
return ChatMistralAI(
model=model_name,
temperature=temperature,
streaming=streaming,
max_tokens=max_tokens,
mistral_api_key=self._mistralai_api_key
)
@staticmethod
def supports_streaming(model_name: ModelName) -> bool:
"""
Check if a model supports streaming.
Args:
model_name: Model enum value
Returns:
True if model supports streaming, False otherwise
"""
models = LLMService.list_available_models()
for model in models:
if model["name"] == model_name.value:
return model.get("supports_streaming", False)
return False
@staticmethod
def list_available_models() -> list[dict]:
"""
List all available models with their metadata.
Returns:
List of model information dictionaries
"""
models = []
# OpenAI models
openai_models = [
# {
# "name": ModelName.GPT_5.value,
# "provider": "openai",
# "description": "GPT-5",
# "supports_streaming": False,
# # "context_window": 128000
# },
# {
# "name": ModelName.GPT_5_CHAT.value,
# "provider": "openai",
# "description": "GPT-5 Chat",
# "supports_streaming": True,
# # "context_window": 128000
# },
# {
# "name": ModelName.GPT_4.value,
# "provider": "openai",
# "description": "GPT-4",
# "supports_streaming": True,
# # "context_window": 128000
# },
# {
# "name": ModelName.GPT_4_TURBO.value,
# "provider": "openai",
# "description": "GPT-4 Turbo - Fast and powerful",
# "supports_streaming": True,
# "context_window": 128000
# },
# {
# "name": ModelName.GPT_4.value,
# "provider": "openai",
# "description": "GPT-4 - High quality",
# "supports_streaming": True,
# "context_window": 8192
# },
# {
# "name": ModelName.GPT_35_TURBO.value,
# "provider": "openai",
# "description": "GPT-3.5 Turbo - Fast and efficient",
# "supports_streaming": True,
# "context_window": 16385
# }
]
# Mistral AI models
mistral_models = [
{
"name": ModelName.MISTRAL_LARGE.value,
"provider": "mistralai",
"description": "Mistral Large",
"supports_streaming": True,
# "context_window": 32000
},
{
"name": ModelName.MAGISTRAL_MEDIUM.value,
"provider": "mistralai",
"description": "Magistral Medium (reasonning)",
"supports_streaming": True,
# "context_window": 32000
}
]
models.extend(openai_models)
models.extend(mistral_models)
return models
# Singleton instance
llm_service = LLMService()