Cyril Dupland
feat voice: integrate WebRTC ICE server configuration, enhance voice pipeline with new modes, and improve VAD settings for better audio processing
8b0c3c9 | """Voice conversation routes using Pipecat WebRTC.""" | |
| import logging | |
| import sys | |
| import uuid | |
| import aiohttp | |
| from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, Query, Request | |
| from pipecat.audio.vad.silero import SileroVADAnalyzer | |
| from pipecat.transports.base_transport import TransportParams | |
| from pipecat.transports.smallwebrtc.request_handler import ( | |
| SmallWebRTCPatchRequest, | |
| SmallWebRTCRequest, | |
| ) | |
| from pipecat.transports.smallwebrtc.transport import SmallWebRTCTransport | |
| from config import settings | |
| from core.security import get_current_user | |
| from domain.enums import ModelName | |
| from domain.models import TranscriptListResponse | |
| from services.voice.bot import run_voice_bot | |
| from services.voice.transcript_store import TranscriptStore | |
| from services.voice.vad_config_service import resolve_from_request_data | |
| logger = logging.getLogger(__name__) | |
| router = APIRouter(prefix="/voice", tags=["Voice"]) | |
| # Valid voice pipeline modes | |
| VOICE_MODES = frozenset({"classic", "trigger_on_push"}) | |
| def _get_handler(request: Request): | |
| return request.app.state.voice_handler | |
| async def capabilities(_=Depends(get_current_user)): | |
| """Return available voice transports based on environment configuration.""" | |
| return {"daily_available": bool(settings.daily_api_key)} | |
| async def ice_servers(request: Request, _=Depends(get_current_user)): | |
| """Return ICE servers (STUN/TURN) for WebRTC client configuration.""" | |
| return getattr(request.app.state, "voice_ice_servers", []) | |
| async def offer( | |
| request: Request, | |
| body: SmallWebRTCRequest, | |
| background_tasks: BackgroundTasks, | |
| current_user: dict = Depends(get_current_user), | |
| model: str = Query(default=None, description="LLM model name override"), | |
| project_id: str = Query(default=None, description="Project ID for scoped retrieval"), | |
| mode: str = Query(default="classic", description="Voice pipeline mode (classic or trigger_on_push)"), | |
| ): | |
| """Initiate a WebRTC voice session. | |
| Performs the SDP offer/answer exchange and starts the selected Pipecat | |
| voice pipeline as a background task. | |
| """ | |
| if mode not in VOICE_MODES: | |
| raise HTTPException( | |
| status_code=400, | |
| detail=f"Unknown voice mode '{mode}'. Available modes: {', '.join(sorted(VOICE_MODES))}", | |
| ) | |
| model_name = ( | |
| ModelName(model) if model else ModelName(settings.voice_default_model) | |
| ) | |
| conversation_id = str(uuid.uuid4()) | |
| logger.info( | |
| "Voice offer user=%s conversation=%s model=%s project=%s mode=%s", | |
| current_user.get("sub", "?"), | |
| conversation_id, | |
| model_name.value, | |
| project_id, | |
| mode, | |
| ) | |
| request_data = getattr(body, "request_data", None) | |
| handler = _get_handler(request) | |
| async def webrtc_connection_callback(connection): | |
| vad_params = resolve_from_request_data(request_data) | |
| vad_analyzer = SileroVADAnalyzer(params=vad_params) | |
| transport = SmallWebRTCTransport( | |
| webrtc_connection=connection, | |
| params=TransportParams( | |
| audio_in_enabled=True, | |
| audio_out_enabled=True, | |
| vad_analyzer=vad_analyzer, | |
| ), | |
| ) | |
| background_tasks.add_task( | |
| run_voice_bot, | |
| transport, | |
| conversation_id, | |
| model_name, | |
| project_id, | |
| request_data, | |
| enable_recording=True, | |
| mode=mode, | |
| ) | |
| answer = await handler.handle_web_request( | |
| request=body, | |
| webrtc_connection_callback=webrtc_connection_callback, | |
| ) | |
| answer["conversation_id"] = conversation_id | |
| return answer | |
| async def ice_candidate(request: Request, body: SmallWebRTCPatchRequest): | |
| """Exchange ICE candidates for an in-progress WebRTC negotiation.""" | |
| handler = _get_handler(request) | |
| await handler.handle_patch_request(body) | |
| return {"status": "success"} | |
| async def get_transcript( | |
| conversation_id: str, | |
| current_user: dict = Depends(get_current_user), | |
| ): | |
| """Return the transcript for a given voice conversation.""" | |
| messages = TranscriptStore.get(conversation_id) | |
| return TranscriptListResponse(conversation_id=conversation_id, messages=messages) | |
| async def daily_start( | |
| request: Request, | |
| background_tasks: BackgroundTasks, | |
| current_user: dict = Depends(get_current_user), | |
| model: str = Query(default=None, description="LLM model name override"), | |
| project_id: str = Query(default=None, description="Project ID for scoped retrieval"), | |
| mode: str = Query(default="classic", description="Voice pipeline mode (classic or trigger_on_push)"), | |
| ): | |
| """Start a Daily.co voice session. | |
| Creates a Daily room and token, starts the Pipecat bot, and returns | |
| room_url and token for the client to join via Daily JS SDK. | |
| Works on Hugging Face Spaces where direct WebRTC (SmallWebRTC) may fail. | |
| """ | |
| if mode not in VOICE_MODES: | |
| raise HTTPException( | |
| status_code=400, | |
| detail=f"Unknown voice mode '{mode}'. Available modes: {', '.join(sorted(VOICE_MODES))}", | |
| ) | |
| if not settings.daily_api_key: | |
| raise HTTPException( | |
| status_code=503, | |
| detail="Daily transport not configured: set DAILY_API_KEY in environment", | |
| ) | |
| try: | |
| from pipecat.runner.daily import configure | |
| from pipecat.transports.daily.transport import DailyParams, DailyTransport | |
| except ImportError as e: | |
| hint = ( | |
| "Daily transport is Linux-only (daily-python requires glibc)." | |
| if sys.platform == "win32" | |
| else "pip install daily-python (or pipecat-ai[daily])" | |
| ) | |
| raise HTTPException( | |
| status_code=503, | |
| detail=f"Daily transport not available: {hint}. {e}", | |
| ) from e | |
| model_name = ( | |
| ModelName(model) if model else ModelName(settings.voice_default_model) | |
| ) | |
| conversation_id = str(uuid.uuid4()) | |
| logger.info( | |
| "Daily voice start user=%s conversation=%s model=%s project=%s mode=%s", | |
| current_user.get("sub", "?"), | |
| conversation_id, | |
| model_name.value, | |
| project_id, | |
| mode, | |
| ) | |
| request_data = None | |
| try: | |
| body = await request.json() | |
| request_data = body.get("request_data") if isinstance(body, dict) else None | |
| except Exception: | |
| pass | |
| async with aiohttp.ClientSession() as session: | |
| room_config = await configure(session) | |
| room_url = room_config.room_url | |
| token = room_config.token | |
| vad_params = resolve_from_request_data(request_data) | |
| vad_analyzer = SileroVADAnalyzer(params=vad_params) | |
| transport = DailyTransport( | |
| room_url, | |
| token, | |
| "Routeur CAPL", | |
| DailyParams( | |
| audio_in_enabled=True, | |
| audio_out_enabled=True, | |
| audio_out_sample_rate=24000, | |
| vad_analyzer=vad_analyzer, | |
| ), | |
| ) | |
| background_tasks.add_task( | |
| run_voice_bot, | |
| transport, | |
| conversation_id, | |
| model_name, | |
| project_id, | |
| request_data, | |
| enable_recording=False, | |
| mode=mode, | |
| ) | |
| return { | |
| "room_url": room_url, | |
| "token": token, | |
| "conversation_id": conversation_id, | |
| } | |