"""LLM service - Factory for creating LLM instances.""" from typing import Optional from langchain_openai import ChatOpenAI from langchain_mistralai import ChatMistralAI from langchain_core.language_models.chat_models import BaseChatModel from domain.enums import ModelName, ModelProvider from config import settings class LLMService: """Service for managing LLM instances across different providers.""" def __init__(self): """Initialize LLM service.""" self._openai_api_key = settings.openai_api_key self._mistralai_api_key = settings.mistralai_api_key def get_llm( self, model_name: ModelName, temperature: float = 0.7, streaming: bool = False, max_tokens: Optional[int] = None ) -> BaseChatModel: """ Factory method to create an LLM instance based on model name. Args: model_name: Model enum value temperature: Sampling temperature (0.0 to 2.0) streaming: Enable streaming mode max_tokens: Maximum tokens to generate Returns: LLM instance (ChatOpenAI or ChatMistralAI) Raises: ValueError: If model provider is unknown """ provider = model_name.provider if provider == ModelProvider.OPENAI: return self._create_openai_llm( model_name=model_name.value, temperature=temperature, streaming=streaming, max_tokens=max_tokens ) elif provider == ModelProvider.MISTRALAI: return self._create_mistralai_llm( model_name=model_name.value, temperature=temperature, streaming=streaming, max_tokens=max_tokens ) else: raise ValueError(f"Unknown provider: {provider}") def _create_openai_llm( self, model_name: str, temperature: float, streaming: bool, max_tokens: Optional[int] ) -> ChatOpenAI: """Create OpenAI LLM instance. Some OpenAI models (e.g., `gpt-5`) have specific parameter requirements: - Only support default temperature (1.0) - Use 'max_completion_tokens' instead of 'max_tokens' """ effective_temperature = temperature # Coerce to default temperature for models that don't allow custom values if model_name.startswith("gpt-5"): effective_temperature = 1.0 # For gpt-5 models, use max_completion_tokens instead of max_tokens if model_name.startswith("gpt-5"): return ChatOpenAI( model=model_name, temperature=effective_temperature, streaming=streaming, max_completion_tokens=max_tokens, api_key=self._openai_api_key ) else: return ChatOpenAI( model=model_name, temperature=effective_temperature, streaming=streaming, max_tokens=max_tokens, api_key=self._openai_api_key ) def _create_mistralai_llm( self, model_name: str, temperature: float, streaming: bool, max_tokens: Optional[int] ) -> ChatMistralAI: """Create Mistral AI LLM instance.""" return ChatMistralAI( model=model_name, temperature=temperature, streaming=streaming, max_tokens=max_tokens, mistral_api_key=self._mistralai_api_key ) @staticmethod def supports_streaming(model_name: ModelName) -> bool: """ Check if a model supports streaming. Args: model_name: Model enum value Returns: True if model supports streaming, False otherwise """ models = LLMService.list_available_models() for model in models: if model["name"] == model_name.value: return model.get("supports_streaming", False) return False @staticmethod def list_available_models() -> list[dict]: """ List all available models with their metadata. Returns: List of model information dictionaries """ models = [] # OpenAI models openai_models = [ # { # "name": ModelName.GPT_5.value, # "provider": "openai", # "description": "GPT-5", # "supports_streaming": False, # # "context_window": 128000 # }, # { # "name": ModelName.GPT_5_CHAT.value, # "provider": "openai", # "description": "GPT-5 Chat", # "supports_streaming": True, # # "context_window": 128000 # }, # { # "name": ModelName.GPT_4.value, # "provider": "openai", # "description": "GPT-4", # "supports_streaming": True, # # "context_window": 128000 # }, # { # "name": ModelName.GPT_4_TURBO.value, # "provider": "openai", # "description": "GPT-4 Turbo - Fast and powerful", # "supports_streaming": True, # "context_window": 128000 # }, # { # "name": ModelName.GPT_4.value, # "provider": "openai", # "description": "GPT-4 - High quality", # "supports_streaming": True, # "context_window": 8192 # }, # { # "name": ModelName.GPT_35_TURBO.value, # "provider": "openai", # "description": "GPT-3.5 Turbo - Fast and efficient", # "supports_streaming": True, # "context_window": 16385 # } ] # Mistral AI models mistral_models = [ { "name": ModelName.MISTRAL_LARGE.value, "provider": "mistralai", "description": "Mistral Large", "supports_streaming": True, # "context_window": 32000 }, { "name": ModelName.MAGISTRAL_MEDIUM.value, "provider": "mistralai", "description": "Magistral Medium (reasonning)", "supports_streaming": True, # "context_window": 32000 } ] models.extend(openai_models) models.extend(mistral_models) return models # Singleton instance llm_service = LLMService()