File size: 1,651 Bytes
4cd8837
4aaae80
4cd8837
 
 
 
 
 
 
4aaae80
4cd8837
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4aaae80
4cd8837
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3f78ea8
4cd8837
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
"""STT and TTS backend protocol and result types."""

from __future__ import annotations

from dataclasses import dataclass
from typing import Protocol, runtime_checkable

# ── STT ───────────────────────────────────────────────────────────────────────


@dataclass(frozen=True)
class SttSegment:
    start_seconds: float
    end_seconds: float
    text: str
    language: str | None = None
    confidence: float | None = None


@dataclass(frozen=True)
class SttResult:
    segments: list[SttSegment]
    full_text: str
    detected_language: str
    backend: str
    ms: int


@runtime_checkable
class SttBackend(Protocol):
    name: str

    async def transcribe(
        self,
        audio_bytes: bytes,
        language: str | None = None,
        translate_to_en: bool = False,
    ) -> SttResult: ...

    def health(self) -> dict: ...


# ── TTS ───────────────────────────────────────────────────────────────────────


@dataclass(frozen=True)
class TtsResult:
    audio_bytes: bytes
    audio_format: str
    duration_seconds: float
    backend: str
    ms: int


@runtime_checkable
class TtsBackend(Protocol):
    name: str

    async def synthesize(
        self,
        text: str,
        voice: str | None = None,
        language: str = "de",
        audio_format: str = "ogg_vorbis",
    ) -> TtsResult: ...

    def health(self) -> dict: ...