routeur_ia_api / api /routes /voice.py
Cyril Dupland
feat voice: integrate WebRTC ICE server configuration, enhance voice pipeline with new modes, and improve VAD settings for better audio processing
8b0c3c9
"""Voice conversation routes using Pipecat WebRTC."""
import logging
import sys
import uuid
import aiohttp
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, Query, Request
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.transports.base_transport import TransportParams
from pipecat.transports.smallwebrtc.request_handler import (
SmallWebRTCPatchRequest,
SmallWebRTCRequest,
)
from pipecat.transports.smallwebrtc.transport import SmallWebRTCTransport
from config import settings
from core.security import get_current_user
from domain.enums import ModelName
from domain.models import TranscriptListResponse
from services.voice.bot import run_voice_bot
from services.voice.transcript_store import TranscriptStore
from services.voice.vad_config_service import resolve_from_request_data
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/voice", tags=["Voice"])
# Valid voice pipeline modes
VOICE_MODES = frozenset({"classic", "trigger_on_push"})
def _get_handler(request: Request):
return request.app.state.voice_handler
@router.get("/capabilities")
async def capabilities(_=Depends(get_current_user)):
"""Return available voice transports based on environment configuration."""
return {"daily_available": bool(settings.daily_api_key)}
@router.get("/ice-servers")
async def ice_servers(request: Request, _=Depends(get_current_user)):
"""Return ICE servers (STUN/TURN) for WebRTC client configuration."""
return getattr(request.app.state, "voice_ice_servers", [])
@router.post("/offer")
async def offer(
request: Request,
body: SmallWebRTCRequest,
background_tasks: BackgroundTasks,
current_user: dict = Depends(get_current_user),
model: str = Query(default=None, description="LLM model name override"),
project_id: str = Query(default=None, description="Project ID for scoped retrieval"),
mode: str = Query(default="classic", description="Voice pipeline mode (classic or trigger_on_push)"),
):
"""Initiate a WebRTC voice session.
Performs the SDP offer/answer exchange and starts the selected Pipecat
voice pipeline as a background task.
"""
if mode not in VOICE_MODES:
raise HTTPException(
status_code=400,
detail=f"Unknown voice mode '{mode}'. Available modes: {', '.join(sorted(VOICE_MODES))}",
)
model_name = (
ModelName(model) if model else ModelName(settings.voice_default_model)
)
conversation_id = str(uuid.uuid4())
logger.info(
"Voice offer user=%s conversation=%s model=%s project=%s mode=%s",
current_user.get("sub", "?"),
conversation_id,
model_name.value,
project_id,
mode,
)
request_data = getattr(body, "request_data", None)
handler = _get_handler(request)
async def webrtc_connection_callback(connection):
vad_params = resolve_from_request_data(request_data)
vad_analyzer = SileroVADAnalyzer(params=vad_params)
transport = SmallWebRTCTransport(
webrtc_connection=connection,
params=TransportParams(
audio_in_enabled=True,
audio_out_enabled=True,
vad_analyzer=vad_analyzer,
),
)
background_tasks.add_task(
run_voice_bot,
transport,
conversation_id,
model_name,
project_id,
request_data,
enable_recording=True,
mode=mode,
)
answer = await handler.handle_web_request(
request=body,
webrtc_connection_callback=webrtc_connection_callback,
)
answer["conversation_id"] = conversation_id
return answer
@router.patch("/offer")
async def ice_candidate(request: Request, body: SmallWebRTCPatchRequest):
"""Exchange ICE candidates for an in-progress WebRTC negotiation."""
handler = _get_handler(request)
await handler.handle_patch_request(body)
return {"status": "success"}
@router.get("/transcript/{conversation_id}", response_model=TranscriptListResponse)
async def get_transcript(
conversation_id: str,
current_user: dict = Depends(get_current_user),
):
"""Return the transcript for a given voice conversation."""
messages = TranscriptStore.get(conversation_id)
return TranscriptListResponse(conversation_id=conversation_id, messages=messages)
@router.post("/daily-start")
async def daily_start(
request: Request,
background_tasks: BackgroundTasks,
current_user: dict = Depends(get_current_user),
model: str = Query(default=None, description="LLM model name override"),
project_id: str = Query(default=None, description="Project ID for scoped retrieval"),
mode: str = Query(default="classic", description="Voice pipeline mode (classic or trigger_on_push)"),
):
"""Start a Daily.co voice session.
Creates a Daily room and token, starts the Pipecat bot, and returns
room_url and token for the client to join via Daily JS SDK.
Works on Hugging Face Spaces where direct WebRTC (SmallWebRTC) may fail.
"""
if mode not in VOICE_MODES:
raise HTTPException(
status_code=400,
detail=f"Unknown voice mode '{mode}'. Available modes: {', '.join(sorted(VOICE_MODES))}",
)
if not settings.daily_api_key:
raise HTTPException(
status_code=503,
detail="Daily transport not configured: set DAILY_API_KEY in environment",
)
try:
from pipecat.runner.daily import configure
from pipecat.transports.daily.transport import DailyParams, DailyTransport
except ImportError as e:
hint = (
"Daily transport is Linux-only (daily-python requires glibc)."
if sys.platform == "win32"
else "pip install daily-python (or pipecat-ai[daily])"
)
raise HTTPException(
status_code=503,
detail=f"Daily transport not available: {hint}. {e}",
) from e
model_name = (
ModelName(model) if model else ModelName(settings.voice_default_model)
)
conversation_id = str(uuid.uuid4())
logger.info(
"Daily voice start user=%s conversation=%s model=%s project=%s mode=%s",
current_user.get("sub", "?"),
conversation_id,
model_name.value,
project_id,
mode,
)
request_data = None
try:
body = await request.json()
request_data = body.get("request_data") if isinstance(body, dict) else None
except Exception:
pass
async with aiohttp.ClientSession() as session:
room_config = await configure(session)
room_url = room_config.room_url
token = room_config.token
vad_params = resolve_from_request_data(request_data)
vad_analyzer = SileroVADAnalyzer(params=vad_params)
transport = DailyTransport(
room_url,
token,
"Routeur CAPL",
DailyParams(
audio_in_enabled=True,
audio_out_enabled=True,
audio_out_sample_rate=24000,
vad_analyzer=vad_analyzer,
),
)
background_tasks.add_task(
run_voice_bot,
transport,
conversation_id,
model_name,
project_id,
request_data,
enable_recording=False,
mode=mode,
)
return {
"room_url": room_url,
"token": token,
"conversation_id": conversation_id,
}