File size: 3,574 Bytes
d28f1ed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
"""Transcription service using OpenAI Whisper API."""
from typing import Optional
import tempfile
import os
from fastapi import UploadFile
from openai import AsyncOpenAI
from config import settings


class TranscriptionService:
    """Service for audio transcription using OpenAI Whisper."""
    
    def __init__(self):
        """Initialize transcription service with OpenAI client."""
        self.client = AsyncOpenAI(api_key=settings.openai_api_key)
        self.model = "whisper-1"
    
    async def transcribe(
        self,
        audio_file: UploadFile,
        language: Optional[str] = None,
        prompt: Optional[str] = None
    ) -> dict:
        """
        Transcribe audio file to text using Whisper API.
        
        Args:
            audio_file: Uploaded audio file
            language: Optional ISO-639-1 language code (e.g., 'en', 'fr')
            prompt: Optional text to guide the model's style
            
        Returns:
            Dictionary with transcription text and metadata
            
        Raises:
            Exception: If transcription fails
        """
        # Create a temporary file to save the upload
        # Whisper API requires a file path, not file content
        with tempfile.NamedTemporaryFile(delete=False, suffix=self._get_file_extension(audio_file.filename)) as tmp_file:
            try:
                # Write uploaded content to temp file
                content = await audio_file.read()
                tmp_file.write(content)
                tmp_file.flush()
                
                # Call Whisper API
                with open(tmp_file.name, "rb") as audio:
                    transcript = await self.client.audio.transcriptions.create(
                        model=self.model,
                        file=audio,
                        language=language,
                        prompt=prompt,
                        response_format="verbose_json"  # Get more metadata
                    )
                
                # Extract information
                result = {
                    "text": transcript.text,
                    "language": getattr(transcript, "language", None),
                    "duration": getattr(transcript, "duration", None),
                    "model": self.model
                }
                
                return result
                
            finally:
                # Clean up temp file
                if os.path.exists(tmp_file.name):
                    os.unlink(tmp_file.name)
    
    @staticmethod
    def _get_file_extension(filename: Optional[str]) -> str:
        """
        Extract file extension from filename.
        
        Args:
            filename: Name of the file
            
        Returns:
            File extension with dot (e.g., '.mp3')
        """
        if filename and "." in filename:
            return "." + filename.rsplit(".", 1)[1]
        return ".mp3"  # Default extension
    
    def is_supported_format(self, filename: str) -> bool:
        """
        Check if audio format is supported by Whisper.
        
        Supported formats: mp3, mp4, mpeg, mpga, m4a, wav, webm
        
        Args:
            filename: Name of the file
            
        Returns:
            True if format is supported
        """
        supported_formats = {".mp3", ".mp4", ".mpeg", ".mpga", ".m4a", ".wav", ".webm"}
        extension = self._get_file_extension(filename).lower()
        return extension in supported_formats


# Singleton instance
transcription_service = TranscriptionService()