"""Transcription service using OpenAI Whisper API.""" from typing import Optional import tempfile import os from fastapi import UploadFile from openai import AsyncOpenAI from config import settings class TranscriptionService: """Service for audio transcription using OpenAI Whisper.""" def __init__(self): """Initialize transcription service with OpenAI client.""" self.client = AsyncOpenAI(api_key=settings.openai_api_key) self.model = "whisper-1" async def transcribe( self, audio_file: UploadFile, language: Optional[str] = None, prompt: Optional[str] = None ) -> dict: """ Transcribe audio file to text using Whisper API. Args: audio_file: Uploaded audio file language: Optional ISO-639-1 language code (e.g., 'en', 'fr') prompt: Optional text to guide the model's style Returns: Dictionary with transcription text and metadata Raises: Exception: If transcription fails """ # Create a temporary file to save the upload # Whisper API requires a file path, not file content with tempfile.NamedTemporaryFile(delete=False, suffix=self._get_file_extension(audio_file.filename)) as tmp_file: try: # Write uploaded content to temp file content = await audio_file.read() tmp_file.write(content) tmp_file.flush() # Call Whisper API with open(tmp_file.name, "rb") as audio: transcript = await self.client.audio.transcriptions.create( model=self.model, file=audio, language=language, prompt=prompt, response_format="verbose_json" # Get more metadata ) # Extract information result = { "text": transcript.text, "language": getattr(transcript, "language", None), "duration": getattr(transcript, "duration", None), "model": self.model } return result finally: # Clean up temp file if os.path.exists(tmp_file.name): os.unlink(tmp_file.name) @staticmethod def _get_file_extension(filename: Optional[str]) -> str: """ Extract file extension from filename. Args: filename: Name of the file Returns: File extension with dot (e.g., '.mp3') """ if filename and "." in filename: return "." + filename.rsplit(".", 1)[1] return ".mp3" # Default extension def is_supported_format(self, filename: str) -> bool: """ Check if audio format is supported by Whisper. Supported formats: mp3, mp4, mpeg, mpga, m4a, wav, webm Args: filename: Name of the file Returns: True if format is supported """ supported_formats = {".mp3", ".mp4", ".mpeg", ".mpga", ".m4a", ".wav", ".webm"} extension = self._get_file_extension(filename).lower() return extension in supported_formats # Singleton instance transcription_service = TranscriptionService()