File size: 7,518 Bytes
48d6c63
 
8824891
0d9dbeb
48d6c63
a7400dd
 
8824891
 
48d6c63
 
 
 
8824891
48d6c63
 
 
 
f11d5d5
8824891
f11d5d5
8824891
48d6c63
 
 
 
 
8b0c3c9
 
 
8f36e50
 
 
 
 
8824891
 
 
 
48d6c63
f11d5d5
8f36e50
 
 
 
f11d5d5
48d6c63
 
 
8f36e50
 
48d6c63
 
 
 
8b0c3c9
48d6c63
 
 
f11d5d5
 
48d6c63
8b0c3c9
f11d5d5
 
8b0c3c9
f11d5d5
 
48d6c63
 
 
 
 
f11d5d5
48d6c63
 
 
 
f11d5d5
48d6c63
 
8f36e50
 
bd76267
48d6c63
8824891
 
 
 
 
 
 
 
 
 
48d6c63
8824891
 
bd76267
 
 
 
8824891
8b0c3c9
48d6c63
 
8f36e50
 
48d6c63
 
f11d5d5
48d6c63
 
 
 
8f36e50
48d6c63
8f36e50
 
48d6c63
f11d5d5
 
 
 
 
 
 
 
 
 
8b0c3c9
 
a7400dd
 
 
 
 
 
 
8b0c3c9
a7400dd
 
 
 
 
 
 
8b0c3c9
 
 
 
 
 
a7400dd
 
 
 
 
 
8824891
 
 
 
 
 
 
 
 
 
 
 
 
 
a7400dd
 
 
 
 
 
8b0c3c9
a7400dd
 
 
 
8b0c3c9
a7400dd
 
 
 
 
 
 
 
 
 
 
 
 
 
8824891
 
 
a7400dd
 
8824891
 
 
 
 
 
 
 
 
 
 
a7400dd
 
 
 
8824891
8b0c3c9
a7400dd
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
"""Voice conversation routes using Pipecat WebRTC."""
import logging
import sys
import uuid

import aiohttp
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, Query, Request
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.transports.base_transport import TransportParams
from pipecat.transports.smallwebrtc.request_handler import (
    SmallWebRTCPatchRequest,
    SmallWebRTCRequest,
)
from pipecat.transports.smallwebrtc.transport import SmallWebRTCTransport

from config import settings
from core.security import get_current_user
from domain.enums import ModelName
from domain.models import TranscriptListResponse
from services.voice.bot import run_voice_bot
from services.voice.transcript_store import TranscriptStore
from services.voice.vad_config_service import resolve_from_request_data

logger = logging.getLogger(__name__)

router = APIRouter(prefix="/voice", tags=["Voice"])

# Valid voice pipeline modes
VOICE_MODES = frozenset({"classic", "trigger_on_push"})


def _get_handler(request: Request):
    return request.app.state.voice_handler


@router.get("/capabilities")
async def capabilities(_=Depends(get_current_user)):
    """Return available voice transports based on environment configuration."""
    return {"daily_available": bool(settings.daily_api_key)}


@router.get("/ice-servers")
async def ice_servers(request: Request, _=Depends(get_current_user)):
    """Return ICE servers (STUN/TURN) for WebRTC client configuration."""
    return getattr(request.app.state, "voice_ice_servers", [])


@router.post("/offer")
async def offer(
    request: Request,
    body: SmallWebRTCRequest,
    background_tasks: BackgroundTasks,
    current_user: dict = Depends(get_current_user),
    model: str = Query(default=None, description="LLM model name override"),
    project_id: str = Query(default=None, description="Project ID for scoped retrieval"),
    mode: str = Query(default="classic", description="Voice pipeline mode (classic or trigger_on_push)"),
):
    """Initiate a WebRTC voice session.

    Performs the SDP offer/answer exchange and starts the selected Pipecat
    voice pipeline as a background task.
    """
    if mode not in VOICE_MODES:
        raise HTTPException(
            status_code=400,
            detail=f"Unknown voice mode '{mode}'. Available modes: {', '.join(sorted(VOICE_MODES))}",
        )

    model_name = (
        ModelName(model) if model else ModelName(settings.voice_default_model)
    )
    conversation_id = str(uuid.uuid4())
    logger.info(
        "Voice offer  user=%s  conversation=%s  model=%s  project=%s  mode=%s",
        current_user.get("sub", "?"),
        conversation_id,
        model_name.value,
        project_id,
        mode,
    )

    request_data = getattr(body, "request_data", None)
    handler = _get_handler(request)

    async def webrtc_connection_callback(connection):
        vad_params = resolve_from_request_data(request_data)
        vad_analyzer = SileroVADAnalyzer(params=vad_params)
        transport = SmallWebRTCTransport(
            webrtc_connection=connection,
            params=TransportParams(
                audio_in_enabled=True,
                audio_out_enabled=True,
                vad_analyzer=vad_analyzer,
            ),
        )
        background_tasks.add_task(
            run_voice_bot,
            transport,
            conversation_id,
            model_name,
            project_id,
            request_data,
            enable_recording=True,
            mode=mode,
        )

    answer = await handler.handle_web_request(
        request=body,
        webrtc_connection_callback=webrtc_connection_callback,
    )
    answer["conversation_id"] = conversation_id
    return answer


@router.patch("/offer")
async def ice_candidate(request: Request, body: SmallWebRTCPatchRequest):
    """Exchange ICE candidates for an in-progress WebRTC negotiation."""
    handler = _get_handler(request)
    await handler.handle_patch_request(body)
    return {"status": "success"}


@router.get("/transcript/{conversation_id}", response_model=TranscriptListResponse)
async def get_transcript(
    conversation_id: str,
    current_user: dict = Depends(get_current_user),
):
    """Return the transcript for a given voice conversation."""
    messages = TranscriptStore.get(conversation_id)
    return TranscriptListResponse(conversation_id=conversation_id, messages=messages)


@router.post("/daily-start")
async def daily_start(
    request: Request,
    background_tasks: BackgroundTasks,
    current_user: dict = Depends(get_current_user),
    model: str = Query(default=None, description="LLM model name override"),
    project_id: str = Query(default=None, description="Project ID for scoped retrieval"),
    mode: str = Query(default="classic", description="Voice pipeline mode (classic or trigger_on_push)"),
):
    """Start a Daily.co voice session.

    Creates a Daily room and token, starts the Pipecat bot, and returns
    room_url and token for the client to join via Daily JS SDK.
    Works on Hugging Face Spaces where direct WebRTC (SmallWebRTC) may fail.
    """
    if mode not in VOICE_MODES:
        raise HTTPException(
            status_code=400,
            detail=f"Unknown voice mode '{mode}'. Available modes: {', '.join(sorted(VOICE_MODES))}",
        )

    if not settings.daily_api_key:
        raise HTTPException(
            status_code=503,
            detail="Daily transport not configured: set DAILY_API_KEY in environment",
        )

    try:
        from pipecat.runner.daily import configure
        from pipecat.transports.daily.transport import DailyParams, DailyTransport
    except ImportError as e:
        hint = (
            "Daily transport is Linux-only (daily-python requires glibc)."
            if sys.platform == "win32"
            else "pip install daily-python (or pipecat-ai[daily])"
        )
        raise HTTPException(
            status_code=503,
            detail=f"Daily transport not available: {hint}. {e}",
        ) from e

    model_name = (
        ModelName(model) if model else ModelName(settings.voice_default_model)
    )
    conversation_id = str(uuid.uuid4())

    logger.info(
        "Daily voice start  user=%s  conversation=%s  model=%s  project=%s  mode=%s",
        current_user.get("sub", "?"),
        conversation_id,
        model_name.value,
        project_id,
        mode,
    )

    request_data = None
    try:
        body = await request.json()
        request_data = body.get("request_data") if isinstance(body, dict) else None
    except Exception:
        pass

    async with aiohttp.ClientSession() as session:
        room_config = await configure(session)
        room_url = room_config.room_url
        token = room_config.token

    vad_params = resolve_from_request_data(request_data)
    vad_analyzer = SileroVADAnalyzer(params=vad_params)
    transport = DailyTransport(
        room_url,
        token,
        "Routeur CAPL",
        DailyParams(
            audio_in_enabled=True,
            audio_out_enabled=True,
            audio_out_sample_rate=24000,
            vad_analyzer=vad_analyzer,
        ),
    )
    background_tasks.add_task(
        run_voice_bot,
        transport,
        conversation_id,
        model_name,
        project_id,
        request_data,
        enable_recording=False,
        mode=mode,
    )

    return {
        "room_url": room_url,
        "token": token,
        "conversation_id": conversation_id,
    }