"""Transcription routes for audio to text conversion.""" from fastapi import APIRouter, UploadFile, File, HTTPException, status, Depends, Query from typing import Optional from core.security import get_current_user from domain.models import TranscriptionResponse, ErrorResponse from services.transcription_service import transcription_service router = APIRouter(prefix="/transcription", tags=["Transcription"]) @router.post( "", response_model=TranscriptionResponse, responses={ 400: {"model": ErrorResponse, "description": "Invalid file format"}, 500: {"model": ErrorResponse, "description": "Transcription failed"} } ) async def transcribe_audio( current_user: dict = Depends(get_current_user), file: UploadFile = File(..., description="Audio file to transcribe"), language: Optional[str] = Query(None, description="ISO-639-1 language code (e.g., 'en', 'fr')"), prompt: Optional[str] = Query(None, description="Optional text to guide the model's style") ) -> TranscriptionResponse: """ Transcribe an audio file to text using OpenAI Whisper. **Supported formats:** mp3, mp4, mpeg, mpga, m4a, wav, webm **Max file size:** 25 MB (OpenAI Whisper limit) Args: file: Audio file upload language: Optional language code to improve accuracy prompt: Optional prompt to guide transcription style current_user: Authenticated user (JWT required) Returns: Transcription with text, detected language, and duration Raises: HTTPException: If file format is unsupported or transcription fails """ # Validate file format if not transcription_service.is_supported_format(file.filename): raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail=f"Unsupported file format. Supported: mp3, mp4, mpeg, mpga, m4a, wav, webm" ) # Check file size (25 MB limit for Whisper API) file.file.seek(0, 2) # Seek to end file_size = file.file.tell() # Get position (file size) file.file.seek(0) # Reset to beginning max_size = 25 * 1024 * 1024 # 25 MB if file_size > max_size: raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail=f"File too large. Maximum size is 25 MB, got {file_size / (1024 * 1024):.2f} MB" ) try: # Transcribe audio result = await transcription_service.transcribe( audio_file=file, language=language, prompt=prompt ) return TranscriptionResponse(**result) except Exception as e: raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Transcription failed: {str(e)}" ) @router.get("/supported-formats") async def get_supported_formats( current_user: dict = Depends(get_current_user) ) -> dict: """ Get list of supported audio formats. Returns: Dictionary with supported formats and info """ return { "supported_formats": ["mp3", "mp4", "mpeg", "mpga", "m4a", "wav", "webm"], "max_file_size_mb": 25, "model": "whisper-1", "languages": "Auto-detection or specify ISO-639-1 code" }