File size: 3,574 Bytes
d28f1ed | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 | """Transcription service using OpenAI Whisper API."""
from typing import Optional
import tempfile
import os
from fastapi import UploadFile
from openai import AsyncOpenAI
from config import settings
class TranscriptionService:
"""Service for audio transcription using OpenAI Whisper."""
def __init__(self):
"""Initialize transcription service with OpenAI client."""
self.client = AsyncOpenAI(api_key=settings.openai_api_key)
self.model = "whisper-1"
async def transcribe(
self,
audio_file: UploadFile,
language: Optional[str] = None,
prompt: Optional[str] = None
) -> dict:
"""
Transcribe audio file to text using Whisper API.
Args:
audio_file: Uploaded audio file
language: Optional ISO-639-1 language code (e.g., 'en', 'fr')
prompt: Optional text to guide the model's style
Returns:
Dictionary with transcription text and metadata
Raises:
Exception: If transcription fails
"""
# Create a temporary file to save the upload
# Whisper API requires a file path, not file content
with tempfile.NamedTemporaryFile(delete=False, suffix=self._get_file_extension(audio_file.filename)) as tmp_file:
try:
# Write uploaded content to temp file
content = await audio_file.read()
tmp_file.write(content)
tmp_file.flush()
# Call Whisper API
with open(tmp_file.name, "rb") as audio:
transcript = await self.client.audio.transcriptions.create(
model=self.model,
file=audio,
language=language,
prompt=prompt,
response_format="verbose_json" # Get more metadata
)
# Extract information
result = {
"text": transcript.text,
"language": getattr(transcript, "language", None),
"duration": getattr(transcript, "duration", None),
"model": self.model
}
return result
finally:
# Clean up temp file
if os.path.exists(tmp_file.name):
os.unlink(tmp_file.name)
@staticmethod
def _get_file_extension(filename: Optional[str]) -> str:
"""
Extract file extension from filename.
Args:
filename: Name of the file
Returns:
File extension with dot (e.g., '.mp3')
"""
if filename and "." in filename:
return "." + filename.rsplit(".", 1)[1]
return ".mp3" # Default extension
def is_supported_format(self, filename: str) -> bool:
"""
Check if audio format is supported by Whisper.
Supported formats: mp3, mp4, mpeg, mpga, m4a, wav, webm
Args:
filename: Name of the file
Returns:
True if format is supported
"""
supported_formats = {".mp3", ".mp4", ".mpeg", ".mpga", ".m4a", ".wav", ".webm"}
extension = self._get_file_extension(filename).lower()
return extension in supported_formats
# Singleton instance
transcription_service = TranscriptionService()
|