AmirMohseni commited on
Commit
659131a
·
verified ·
1 Parent(s): bff6732

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -7
app.py CHANGED
@@ -21,7 +21,7 @@ def load_model(model_name):
21
  pipe = pipeline(
22
  "automatic-speech-recognition",
23
  model=model_id,
24
- torch_dtype="auto",
25
  device=device,
26
  )
27
  model_pipelines[model_id] = pipe
@@ -31,7 +31,7 @@ def load_model(model_name):
31
  # --- 3. Main Transcription Function ---
32
  @spaces.GPU(duration=90)
33
  def transcribe(audio, model_name):
34
- # 'audio' is a tuple like (sample_rate, numpy_array)
35
  if audio is None:
36
  gr.Warning("No audio recorded. Please record your voice first.")
37
  return ""
@@ -39,13 +39,13 @@ def transcribe(audio, model_name):
39
  selected_pipe = load_model(model_name)
40
  print(f"Transcribing with '{model_name}'...")
41
 
42
- # Pass only the numpy array (the second element) to the pipeline
43
- result = selected_pipe(audio[1], generate_kwargs={"language": "persian", "task": "transcribe"})
44
 
45
  transcription = result["text"]
46
  print(f"Transcription result: {transcription}")
47
  return transcription
48
-
49
  # --- 4. Pre-load the Default Model ---
50
  print("Pre-loading the default model ('Whisper Large v3')...")
51
  load_model("Whisper Large v3")
@@ -55,8 +55,8 @@ print("Default model pre-loaded. The interface is ready.")
55
  iface = gr.Interface(
56
  fn=transcribe,
57
  inputs=[
58
- # Change type from "filepath" to "numpy"
59
- gr.Audio(sources=["microphone"], type="numpy", label="Record Audio 🎤"), # <-- CHANGED
60
  gr.Radio(
61
  choices=list(MODELS.keys()),
62
  value="Whisper Large v3",
 
21
  pipe = pipeline(
22
  "automatic-speech-recognition",
23
  model=model_id,
24
+ torch_dtype="auto",
25
  device=device,
26
  )
27
  model_pipelines[model_id] = pipe
 
31
  # --- 3. Main Transcription Function ---
32
  @spaces.GPU(duration=90)
33
  def transcribe(audio, model_name):
34
+ # 'audio' is now a filepath string again
35
  if audio is None:
36
  gr.Warning("No audio recorded. Please record your voice first.")
37
  return ""
 
39
  selected_pipe = load_model(model_name)
40
  print(f"Transcribing with '{model_name}'...")
41
 
42
+ # The pipeline now receives the filepath directly
43
+ result = selected_pipe(audio, generate_kwargs={"language": "persian", "task": "transcribe"})
44
 
45
  transcription = result["text"]
46
  print(f"Transcription result: {transcription}")
47
  return transcription
48
+
49
  # --- 4. Pre-load the Default Model ---
50
  print("Pre-loading the default model ('Whisper Large v3')...")
51
  load_model("Whisper Large v3")
 
55
  iface = gr.Interface(
56
  fn=transcribe,
57
  inputs=[
58
+ # Reverted the type back to "filepath"
59
+ gr.Audio(sources=["microphone"], type="filepath", label="Record Audio 🎤"), # <-- REVERTED
60
  gr.Radio(
61
  choices=list(MODELS.keys()),
62
  value="Whisper Large v3",