Spaces:

ChambreAgriculturePaysLoire
/

routeur_ia_api

Running

App Files Files Community

Cyril Dupland commited on Mar 9

Commit

663d026

1 Parent(s): 4855f78

feat transcription: update audio file upload limits to be configurable via settings, enhancing flexibility for transcription service. Update documentation to reflect new maximum file size and chunking behavior for large audio files.

Browse files

Files changed (3) hide show

.env.example +3 -1
api/routes/transcription.py +11 -10
config/settings.py +2 -0

.env.example CHANGED Viewed

@@ -21,4 +21,6 @@ LANGCHAIN_PROJECT=routeur-ia
 # DAILY_API_KEY=61efebb10fa3956006a11e194980876c4453311ef25737fb2e434bc040326deb
 # PDF - logo en haut du document (section dédiée)
-PDF_LOGO_PATH=assets/logo.png

 # DAILY_API_KEY=61efebb10fa3956006a11e194980876c4453311ef25737fb2e434bc040326deb
 # PDF - logo en haut du document (section dédiée)
+PDF_LOGO_PATH=assets/logo.png
+MAX_UPLOAD_MB_AUDIO=500

api/routes/transcription.py CHANGED Viewed

@@ -1,6 +1,7 @@
 """Transcription routes for audio to text conversion."""
 from fastapi import APIRouter, UploadFile, File, HTTPException, status, Depends, Query
 from typing import Optional
 from core.security import get_current_user
 from domain.models import TranscriptionResponse, ErrorResponse
 from services.transcription_service import transcription_service, meeting_transcription_service
@@ -31,7 +32,7 @@ async def transcribe_meeting_audio(
     **Supported formats:** mp3, mp4, mpeg, mpga, m4a, wav, webm
-    **Max file size:** 25 MB (same limit as standard transcription)
     Args:
         file: Meeting audio file upload
@@ -52,16 +53,16 @@ async def transcribe_meeting_audio(
             detail="Unsupported file format. Supported: mp3, mp4, mpeg, mpga, m4a, wav, webm",
         )
-    # Check file size (25 MB limit)
     file.file.seek(0, 2)  # Seek to end
     file_size = file.file.tell()  # Get position (file size)
     file.file.seek(0)  # Reset to beginning
-    max_size = 25 * 1024 * 1024  # 25 MB
     if file_size > max_size:
         raise HTTPException(
             status_code=status.HTTP_400_BAD_REQUEST,
-            detail=f"File too large. Maximum size is 25 MB, got {file_size / (1024 * 1024):.2f} MB",
         )
     try:
@@ -93,7 +94,7 @@ async def get_supported_formats(
     """
     return {
         "supported_formats": ["mp3", "mp4", "mpeg", "mpga", "m4a", "wav", "webm"],
-        "max_file_size_mb": 25,
         "model": "whisper-1",
         "languages": "Auto-detection or specify ISO-639-1 code"
     }
@@ -119,7 +120,7 @@ async def transcribe_audio(
     **Supported formats:** mp3, mp4, mpeg, mpga, m4a, wav, webm
-    **Max file size:** 25 MB (OpenAI Whisper limit)
     Args:
         file: Audio file upload
@@ -140,16 +141,16 @@ async def transcribe_audio(
             detail=f"Unsupported file format. Supported: mp3, mp4, mpeg, mpga, m4a, wav, webm"
         )
-    # Check file size (25 MB limit for Whisper API)
     file.file.seek(0, 2)  # Seek to end
     file_size = file.file.tell()  # Get position (file size)
     file.file.seek(0)  # Reset to beginning
-    max_size = 25 * 1024 * 1024  # 25 MB
     if file_size > max_size:
         raise HTTPException(
             status_code=status.HTTP_400_BAD_REQUEST,
-            detail=f"File too large. Maximum size is 25 MB, got {file_size / (1024 * 1024):.2f} MB"
         )
     try:

 """Transcription routes for audio to text conversion."""
 from fastapi import APIRouter, UploadFile, File, HTTPException, status, Depends, Query
 from typing import Optional
+from config import settings
 from core.security import get_current_user
 from domain.models import TranscriptionResponse, ErrorResponse
 from services.transcription_service import transcription_service, meeting_transcription_service
     **Supported formats:** mp3, mp4, mpeg, mpga, m4a, wav, webm
+    **Max file size:** configurable (default 500 MB; files are chunked automatically if > 25 MB)
     Args:
         file: Meeting audio file upload
             detail="Unsupported file format. Supported: mp3, mp4, mpeg, mpga, m4a, wav, webm",
         )
+    # Check file size (configurable limit; service chunks files > 25 MB)
     file.file.seek(0, 2)  # Seek to end
     file_size = file.file.tell()  # Get position (file size)
     file.file.seek(0)  # Reset to beginning
+    max_size = settings.max_upload_mb_audio * 1024 * 1024
     if file_size > max_size:
         raise HTTPException(
             status_code=status.HTTP_400_BAD_REQUEST,
+            detail=f"File too large. Maximum size is {settings.max_upload_mb_audio} MB, got {file_size / (1024 * 1024):.2f} MB",
         )
     try:
     """
     return {
         "supported_formats": ["mp3", "mp4", "mpeg", "mpga", "m4a", "wav", "webm"],
+        "max_file_size_mb": settings.max_upload_mb_audio,
         "model": "whisper-1",
         "languages": "Auto-detection or specify ISO-639-1 code"
     }
     **Supported formats:** mp3, mp4, mpeg, mpga, m4a, wav, webm
+    **Max file size:** configurable (default 500 MB; files are chunked automatically if > 25 MB)
     Args:
         file: Audio file upload
             detail=f"Unsupported file format. Supported: mp3, mp4, mpeg, mpga, m4a, wav, webm"
         )
+    # Check file size (configurable limit; service chunks files > 25 MB)
     file.file.seek(0, 2)  # Seek to end
     file_size = file.file.tell()  # Get position (file size)
     file.file.seek(0)  # Reset to beginning
+    max_size = settings.max_upload_mb_audio * 1024 * 1024
     if file_size > max_size:
         raise HTTPException(
             status_code=status.HTTP_400_BAD_REQUEST,
+            detail=f"File too large. Maximum size is {settings.max_upload_mb_audio} MB, got {file_size / (1024 * 1024):.2f} MB"
         )
     try:

config/settings.py CHANGED Viewed

@@ -43,6 +43,8 @@ class Settings(BaseSettings):
     # Upload & chunking (PDF ingestion)
     max_upload_mb_pdf: int = 50
     chunk_size: int = 1000
     chunk_overlap: int = 100
     doc_default_type: str = "project_doc"

     # Upload & chunking (PDF ingestion)
     max_upload_mb_pdf: int = 50
+    # Max audio upload size for transcription (service chunks internally; OpenAI limit is 25 MB per chunk)
+    max_upload_mb_audio: int = 500
     chunk_size: int = 1000
     chunk_overlap: int = 100
     doc_default_type: str = "project_doc"