Spaces:

AmirMohseni
/

Whisper-Farsi

Running on Zero

App Files Files Community

AmirMohseni commited on Oct 3, 2025

Commit

659131a

verified ·

1 Parent(s): bff6732

Update app.py

Browse files

Files changed (1) hide show

app.py +7 -7

app.py CHANGED Viewed

@@ -21,7 +21,7 @@ def load_model(model_name):
         pipe = pipeline(
             "automatic-speech-recognition",
             model=model_id,
-            torch_dtype="auto",
             device=device,
         )
         model_pipelines[model_id] = pipe
@@ -31,7 +31,7 @@ def load_model(model_name):
 # --- 3. Main Transcription Function ---
 @spaces.GPU(duration=90)
 def transcribe(audio, model_name):
-    # 'audio' is a tuple like (sample_rate, numpy_array)
     if audio is None:
         gr.Warning("No audio recorded. Please record your voice first.")
         return ""
@@ -39,13 +39,13 @@ def transcribe(audio, model_name):
     selected_pipe = load_model(model_name)
     print(f"Transcribing with '{model_name}'...")
-    # Pass only the numpy array (the second element) to the pipeline
-    result = selected_pipe(audio[1], generate_kwargs={"language": "persian", "task": "transcribe"})
     transcription = result["text"]
     print(f"Transcription result: {transcription}")
     return transcription
 # --- 4. Pre-load the Default Model ---
 print("Pre-loading the default model ('Whisper Large v3')...")
 load_model("Whisper Large v3")
@@ -55,8 +55,8 @@ print("Default model pre-loaded. The interface is ready.")
 iface = gr.Interface(
     fn=transcribe,
     inputs=[
-        # Change type from "filepath" to "numpy"
-        gr.Audio(sources=["microphone"], type="numpy", label="Record Audio 🎤"), # <-- CHANGED
         gr.Radio(
             choices=list(MODELS.keys()),
             value="Whisper Large v3",

         pipe = pipeline(
             "automatic-speech-recognition",
             model=model_id,
+            torch_dtype="auto",
             device=device,
         )
         model_pipelines[model_id] = pipe
 # --- 3. Main Transcription Function ---
 @spaces.GPU(duration=90)
 def transcribe(audio, model_name):
+    # 'audio' is now a filepath string again
     if audio is None:
         gr.Warning("No audio recorded. Please record your voice first.")
         return ""
     selected_pipe = load_model(model_name)
     print(f"Transcribing with '{model_name}'...")
+    # The pipeline now receives the filepath directly
+    result = selected_pipe(audio, generate_kwargs={"language": "persian", "task": "transcribe"})
     transcription = result["text"]
     print(f"Transcription result: {transcription}")
     return transcription
 # --- 4. Pre-load the Default Model ---
 print("Pre-loading the default model ('Whisper Large v3')...")
 load_model("Whisper Large v3")
 iface = gr.Interface(
     fn=transcribe,
     inputs=[
+        # Reverted the type back to "filepath"
+        gr.Audio(sources=["microphone"], type="filepath", label="Record Audio 🎤"), # <-- REVERTED
         gr.Radio(
             choices=list(MODELS.keys()),
             value="Whisper Large v3",