| """Transcription service using OpenAI Whisper API.""" |
| from typing import Optional |
| import tempfile |
| import os |
| from fastapi import UploadFile |
| from openai import AsyncOpenAI |
| from config import settings |
|
|
|
|
| class TranscriptionService: |
| """Service for audio transcription using OpenAI Whisper.""" |
| |
| def __init__(self): |
| """Initialize transcription service with OpenAI client.""" |
| self.client = AsyncOpenAI(api_key=settings.openai_api_key) |
| self.model = "whisper-1" |
| |
| async def transcribe( |
| self, |
| audio_file: UploadFile, |
| language: Optional[str] = None, |
| prompt: Optional[str] = None |
| ) -> dict: |
| """ |
| Transcribe audio file to text using Whisper API. |
| |
| Args: |
| audio_file: Uploaded audio file |
| language: Optional ISO-639-1 language code (e.g., 'en', 'fr') |
| prompt: Optional text to guide the model's style |
| |
| Returns: |
| Dictionary with transcription text and metadata |
| |
| Raises: |
| Exception: If transcription fails |
| """ |
| |
| |
| with tempfile.NamedTemporaryFile(delete=False, suffix=self._get_file_extension(audio_file.filename)) as tmp_file: |
| try: |
| |
| content = await audio_file.read() |
| tmp_file.write(content) |
| tmp_file.flush() |
| |
| |
| with open(tmp_file.name, "rb") as audio: |
| transcript = await self.client.audio.transcriptions.create( |
| model=self.model, |
| file=audio, |
| language=language, |
| prompt=prompt, |
| response_format="verbose_json" |
| ) |
| |
| |
| result = { |
| "text": transcript.text, |
| "language": getattr(transcript, "language", None), |
| "duration": getattr(transcript, "duration", None), |
| "model": self.model |
| } |
| |
| return result |
| |
| finally: |
| |
| if os.path.exists(tmp_file.name): |
| os.unlink(tmp_file.name) |
| |
| @staticmethod |
| def _get_file_extension(filename: Optional[str]) -> str: |
| """ |
| Extract file extension from filename. |
| |
| Args: |
| filename: Name of the file |
| |
| Returns: |
| File extension with dot (e.g., '.mp3') |
| """ |
| if filename and "." in filename: |
| return "." + filename.rsplit(".", 1)[1] |
| return ".mp3" |
| |
| def is_supported_format(self, filename: str) -> bool: |
| """ |
| Check if audio format is supported by Whisper. |
| |
| Supported formats: mp3, mp4, mpeg, mpga, m4a, wav, webm |
| |
| Args: |
| filename: Name of the file |
| |
| Returns: |
| True if format is supported |
| """ |
| supported_formats = {".mp3", ".mp4", ".mpeg", ".mpga", ".m4a", ".wav", ".webm"} |
| extension = self._get_file_extension(filename).lower() |
| return extension in supported_formats |
|
|
|
|
| |
| transcription_service = TranscriptionService() |
|
|
|
|