import gradio as gr from transformers import pipeline # --- 1. Define Model Choices --- MODELS = { "Whisper Small": "AmirMohseni/whisper-small-persian-bf16", "Whisper Large v3": "AmirMohseni/whisper-large-v3-persian-bf16" } # Cache to hold loaded models model_pipelines = {} def load_model(model_id): """Loads a model into the cache.""" print(f"Loading model on GPU: {model_id}...") pipe = pipeline( "automatic-speech-recognition", model=model_id, generate_kwargs={"language": "persian", "task": "transcribe"}, device=0 # Use GPU 0 ) model_pipelines[model_id] = pipe print("Model loaded.") # Pre-load the default model before the interface starts default_model_id = MODELS["Whisper Large v3"] load_model(default_model_id) # --- 2. Function for transcription --- # NO DECORATOR NEEDED HERE ANYMORE def transcribe(audio, model_choice): if audio is None: return "No audio provided." if model_choice is None or model_choice not in MODELS: model_choice = "Whisper Large v3" model_id = MODELS[model_choice] if model_id not in model_pipelines: load_model(model_id) selected_pipe = model_pipelines[model_id] result = selected_pipe(audio) return result["text"] # --- 3. Gradio Interface --- iface = gr.Interface( fn=transcribe, inputs=[ gr.Audio(sources=["microphone"], type="filepath", label="Record Audio 🎤"), gr.Radio( choices=list(MODELS.keys()), value="Whisper Large v3", label="Choose Model", info="Select the model to use for transcription" ) ], outputs="text", title="Whisper Farsi 🎙️", description="Realtime demo for Persian speech recognition. Choose a model, press the record button, and speak.", ) # --- 4. Launch --- iface.launch()