routeur_ia_api / services /transcription_service.py
Cyril Dupland
FIrst Commit
d28f1ed
raw
history blame
3.57 kB
"""Transcription service using OpenAI Whisper API."""
from typing import Optional
import tempfile
import os
from fastapi import UploadFile
from openai import AsyncOpenAI
from config import settings
class TranscriptionService:
"""Service for audio transcription using OpenAI Whisper."""
def __init__(self):
"""Initialize transcription service with OpenAI client."""
self.client = AsyncOpenAI(api_key=settings.openai_api_key)
self.model = "whisper-1"
async def transcribe(
self,
audio_file: UploadFile,
language: Optional[str] = None,
prompt: Optional[str] = None
) -> dict:
"""
Transcribe audio file to text using Whisper API.
Args:
audio_file: Uploaded audio file
language: Optional ISO-639-1 language code (e.g., 'en', 'fr')
prompt: Optional text to guide the model's style
Returns:
Dictionary with transcription text and metadata
Raises:
Exception: If transcription fails
"""
# Create a temporary file to save the upload
# Whisper API requires a file path, not file content
with tempfile.NamedTemporaryFile(delete=False, suffix=self._get_file_extension(audio_file.filename)) as tmp_file:
try:
# Write uploaded content to temp file
content = await audio_file.read()
tmp_file.write(content)
tmp_file.flush()
# Call Whisper API
with open(tmp_file.name, "rb") as audio:
transcript = await self.client.audio.transcriptions.create(
model=self.model,
file=audio,
language=language,
prompt=prompt,
response_format="verbose_json" # Get more metadata
)
# Extract information
result = {
"text": transcript.text,
"language": getattr(transcript, "language", None),
"duration": getattr(transcript, "duration", None),
"model": self.model
}
return result
finally:
# Clean up temp file
if os.path.exists(tmp_file.name):
os.unlink(tmp_file.name)
@staticmethod
def _get_file_extension(filename: Optional[str]) -> str:
"""
Extract file extension from filename.
Args:
filename: Name of the file
Returns:
File extension with dot (e.g., '.mp3')
"""
if filename and "." in filename:
return "." + filename.rsplit(".", 1)[1]
return ".mp3" # Default extension
def is_supported_format(self, filename: str) -> bool:
"""
Check if audio format is supported by Whisper.
Supported formats: mp3, mp4, mpeg, mpga, m4a, wav, webm
Args:
filename: Name of the file
Returns:
True if format is supported
"""
supported_formats = {".mp3", ".mp4", ".mpeg", ".mpga", ".m4a", ".wav", ".webm"}
extension = self._get_file_extension(filename).lower()
return extension in supported_formats
# Singleton instance
transcription_service = TranscriptionService()