Spaces:
Running on Zero
Running on Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -21,7 +21,7 @@ def load_model(model_name):
|
|
| 21 |
pipe = pipeline(
|
| 22 |
"automatic-speech-recognition",
|
| 23 |
model=model_id,
|
| 24 |
-
torch_dtype="auto",
|
| 25 |
device=device,
|
| 26 |
)
|
| 27 |
model_pipelines[model_id] = pipe
|
|
@@ -31,7 +31,7 @@ def load_model(model_name):
|
|
| 31 |
# --- 3. Main Transcription Function ---
|
| 32 |
@spaces.GPU(duration=90)
|
| 33 |
def transcribe(audio, model_name):
|
| 34 |
-
# 'audio' is a
|
| 35 |
if audio is None:
|
| 36 |
gr.Warning("No audio recorded. Please record your voice first.")
|
| 37 |
return ""
|
|
@@ -39,13 +39,13 @@ def transcribe(audio, model_name):
|
|
| 39 |
selected_pipe = load_model(model_name)
|
| 40 |
print(f"Transcribing with '{model_name}'...")
|
| 41 |
|
| 42 |
-
#
|
| 43 |
-
result = selected_pipe(audio
|
| 44 |
|
| 45 |
transcription = result["text"]
|
| 46 |
print(f"Transcription result: {transcription}")
|
| 47 |
return transcription
|
| 48 |
-
|
| 49 |
# --- 4. Pre-load the Default Model ---
|
| 50 |
print("Pre-loading the default model ('Whisper Large v3')...")
|
| 51 |
load_model("Whisper Large v3")
|
|
@@ -55,8 +55,8 @@ print("Default model pre-loaded. The interface is ready.")
|
|
| 55 |
iface = gr.Interface(
|
| 56 |
fn=transcribe,
|
| 57 |
inputs=[
|
| 58 |
-
#
|
| 59 |
-
gr.Audio(sources=["microphone"], type="
|
| 60 |
gr.Radio(
|
| 61 |
choices=list(MODELS.keys()),
|
| 62 |
value="Whisper Large v3",
|
|
|
|
| 21 |
pipe = pipeline(
|
| 22 |
"automatic-speech-recognition",
|
| 23 |
model=model_id,
|
| 24 |
+
torch_dtype="auto",
|
| 25 |
device=device,
|
| 26 |
)
|
| 27 |
model_pipelines[model_id] = pipe
|
|
|
|
| 31 |
# --- 3. Main Transcription Function ---
|
| 32 |
@spaces.GPU(duration=90)
|
| 33 |
def transcribe(audio, model_name):
|
| 34 |
+
# 'audio' is now a filepath string again
|
| 35 |
if audio is None:
|
| 36 |
gr.Warning("No audio recorded. Please record your voice first.")
|
| 37 |
return ""
|
|
|
|
| 39 |
selected_pipe = load_model(model_name)
|
| 40 |
print(f"Transcribing with '{model_name}'...")
|
| 41 |
|
| 42 |
+
# The pipeline now receives the filepath directly
|
| 43 |
+
result = selected_pipe(audio, generate_kwargs={"language": "persian", "task": "transcribe"})
|
| 44 |
|
| 45 |
transcription = result["text"]
|
| 46 |
print(f"Transcription result: {transcription}")
|
| 47 |
return transcription
|
| 48 |
+
|
| 49 |
# --- 4. Pre-load the Default Model ---
|
| 50 |
print("Pre-loading the default model ('Whisper Large v3')...")
|
| 51 |
load_model("Whisper Large v3")
|
|
|
|
| 55 |
iface = gr.Interface(
|
| 56 |
fn=transcribe,
|
| 57 |
inputs=[
|
| 58 |
+
# Reverted the type back to "filepath"
|
| 59 |
+
gr.Audio(sources=["microphone"], type="filepath", label="Record Audio 🎤"), # <-- REVERTED
|
| 60 |
gr.Radio(
|
| 61 |
choices=list(MODELS.keys()),
|
| 62 |
value="Whisper Large v3",
|