File size: 7,612 Bytes
d28f1ed
 
f11d5d5
0ef1224
c392583
d28f1ed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86fd22c
c392583
 
7c43c4c
c392583
d28f1ed
 
 
 
 
 
 
53e5530
 
686df05
 
 
 
e26bba7
 
 
 
 
d28f1ed
 
 
 
 
 
c392583
d28f1ed
 
e26bba7
 
 
 
d28f1ed
 
 
 
 
 
e26bba7
 
 
 
d28f1ed
 
 
 
 
 
1f2c8d1
 
 
 
d28f1ed
 
e081eb3
d28f1ed
1f2c8d1
d28f1ed
 
f11d5d5
 
 
 
 
 
 
 
 
 
 
 
 
d28f1ed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c392583
d28f1ed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0ef1224
d28f1ed
 
 
 
 
 
 
 
0ef1224
d28f1ed
595f77d
bd76267
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
595f77d
 
 
 
 
 
 
e081eb3
 
595f77d
 
 
 
 
 
83d7f8a
 
595f77d
 
 
 
 
 
 
 
 
 
83d7f8a
 
 
 
 
1f2c8d1
595f77d
 
 
 
 
 
8a00f1c
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
"""Pydantic models for request/response schemas."""
from pydantic import BaseModel, Field
from typing import Optional, List, Dict, Any, Literal
from datetime import datetime, timezone
from .enums import ModelName


# ============ Auth Models ============

class TokenRequest(BaseModel):
    """Request for JWT token (can be extended with username/password)."""
    # Pour l'instant, on pourrait juste retourner un token
    # Plus tard, on peut ajouter username/password
    pass


class TokenResponse(BaseModel):
    """JWT token response."""
    access_token: str
    token_type: str = "bearer"
    expires_in: int


# ============ Completion Models ============

class CompletionRequest(BaseModel):
    """Request for text completion."""
    message: str = Field(..., description="User message to complete")
    model: ModelName = Field(default=ModelName.MISTRAL_LARGE, description="LLM model to use")
    agent: Optional[str] = Field(
        default=None,
        description="Agent identifier to use (ex: 'V1', 'V2', or 'AGENT'). If omitted, defaults to 'AGENT'."
    )
    stream: bool = Field(default=False, description="Enable streaming response")
    temperature: float = Field(default=0.7, ge=0.0, le=2.0, description="Sampling temperature")
    max_tokens: Optional[int] = Field(default=None, description="Maximum tokens to generate")
    conversation_history: Optional[List[Dict[str, str]]] = Field(
        default=None,
        description="Optional conversation history"
    )
    # Project-scoped retrieval
    project_id: Optional[str] = Field(default=None, description="Optional project id to scope retrieval")
    sources: Optional[List[str]] = Field(
        default=None,
        description="Optional list of document UUIDs to restrict project retrieval. Empty list or null means no restriction.",
    )
    # Server-side memory (LangGraph thread_id). When set, conversation history is managed by the server; conversation_history is ignored.
    conversation_id: Optional[str] = Field(
        default=None,
        description="Conversation id for server-side memory (thread_id). When set, conversation_history is ignored."
    )


class CompletionResponse(BaseModel):
    """Response for text completion (non-streaming)."""
    response: str
    model: str
    agent: Optional[str] = None
    usage: Optional[Dict[str, Any]] = None
    metadata: Optional[Dict[str, Any]] = None
    conversation_id: Optional[str] = Field(
        default=None,
        description="Conversation id used for this request. Reuse it in subsequent requests to maintain server-side memory."
    )


class StreamChunk(BaseModel):
    """Single chunk in streaming response."""
    content: str
    done: bool = False
    conversation_id: Optional[str] = Field(
        default=None,
        description="Conversation id present in every chunk. Reuse it in subsequent requests to maintain server-side memory."
    )
    metadata: Optional[Dict[str, Any]] = None


# ============ Transcription Models ============

class TranscriptionResponse(BaseModel):
    """Response for audio transcription.

    Usage, emissions, latency, etc. are under metadata.usage and metadata.emissions_*.
    """
    text: str
    language: Optional[str] = None
    duration_s: Optional[float] = None
    model: str = "whisper-1"
    metadata: Optional[Dict[str, Any]] = None


class TranscriptMessage(BaseModel):
    """Single message in a voice conversation transcript."""
    role: Literal["user", "assistant"]
    text: str
    timestamp: datetime


class TranscriptListResponse(BaseModel):
    """Transcript for a given conversation."""
    conversation_id: str
    messages: List[TranscriptMessage]


# ============ Model Info Models ============

class ModelInfo(BaseModel):
    """Information about an available model."""
    name: str
    provider: str
    description: Optional[str] = None
    supports_streaming: bool = True
    context_window: Optional[int] = None


class ModelsListResponse(BaseModel):
    """List of available models."""
    models: List[ModelInfo]
    total: int


class AgentInfo(BaseModel):
    """Information about an available agent."""
    type: str
    name: str
    description: str
    available: bool = True


class AgentsListResponse(BaseModel):
    """List of available agents."""
    agents: List[AgentInfo]
    total: int


# ============ Error Models ============

class ErrorResponse(BaseModel):
    """Error response."""
    error: str
    detail: Optional[str] = None
    timestamp: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))


# ============ Health Check ============

class HealthResponse(BaseModel):
    """Health check response."""
    status: str
    version: str
    timestamp: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))


# ============ Voice / VAD Models ============

class VadConfigDTO(BaseModel):
    """Voice Activity Detection configuration from client or settings.

    Domain DTO with validation. Mapping to Pipecat VADParams is done in
    services/voice/vad_config_service to keep domain independent of Pipecat.
    """
    vad_stop_secs: float = Field(
        default=0.2,
        ge=0.2,
        le=2.0,
        description="Seconds of silence before confirming speech has stopped",
    )
    vad_start_secs: float = Field(
        default=0.2,
        ge=0.1,
        le=0.5,
        description="Seconds of speech before confirming voice start",
    )
    vad_confidence: float = Field(
        default=0.7,
        ge=0.5,
        le=0.95,
        description="Minimum confidence threshold for voice detection (0-1)",
    )
    vad_min_volume: float = Field(
        default=0.6,
        ge=0.3,
        le=0.9,
        description="Minimum audio volume threshold for speech detection (0-1)",
    )


# ============ Upload / Ingestion Jobs ============

class UploadJobResponse(BaseModel):
    """Response returned when a background upload/ingestion job is created."""
    job_id: str
    status: str = "queued"
    created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
    # Optional: server-measured audio duration for transcription jobs (seconds)
    duration_s: Optional[float] = None


class JobStatusResponse(BaseModel):
    """Snapshot of ingestion job status for polling."""
    job_id: str
    status: str
    # Logical job type: document_ingestion | transcription_audio | transcription_meeting | ...
    job_type: Optional[str] = None
    # Progress and stage
    progress: Optional[float] = None
    progress_percent: Optional[int] = None
    stage: Optional[str] = None  # upload | ocr | chunk | embed | index
    # Counters
    pages_total: Optional[int] = None
    pages_done: Optional[int] = None
    chunks_total: Optional[int] = None
    chunks_done: Optional[int] = None
    inserted_count: Optional[int] = None
    # Transcription-specific fields (for audio transcription jobs)
    transcript_text: Optional[str] = None
    transcript_language: Optional[str] = None
    transcript_duration: Optional[float] = None
    transcript_model: Optional[str] = None
    transcript_metadata: Optional[Dict[str, Any]] = None  # usage, emissions_kgCO2eq, latency_s, etc.
    # Error and timestamps
    error: Optional[str] = None
    created_at: Optional[datetime] = None
    started_at: Optional[datetime] = None
    updated_at: Optional[datetime] = None
    finished_at: Optional[datetime] = None


class DeleteDocumentResponse(BaseModel):
    """Response returned when deleting a document's vector chunks."""
    project_id: str
    document_id: str
    deleted_count: int