import gradio as gr
from transformers import pipeline

# --- 1. Define Model Choices ---
MODELS = {
    "Whisper Small": "AmirMohseni/whisper-small-persian-bf16",
    "Whisper Large v3": "AmirMohseni/whisper-large-v3-persian-bf16"
}

# Cache to hold loaded models
model_pipelines = {}

def load_model(model_id):
    """Loads a model into the cache."""
    print(f"Loading model on GPU: {model_id}...")
    pipe = pipeline(
        "automatic-speech-recognition",
        model=model_id,
        generate_kwargs={"language": "persian", "task": "transcribe"},
        device=0  # Use GPU 0
    )
    model_pipelines[model_id] = pipe
    print("Model loaded.")

# Pre-load the default model before the interface starts
default_model_id = MODELS["Whisper Large v3"]
load_model(default_model_id)

# --- 2. Function for transcription ---
# NO DECORATOR NEEDED HERE ANYMORE
def transcribe(audio, model_choice):
    if audio is None:
        return "No audio provided."

    if model_choice is None or model_choice not in MODELS:
        model_choice = "Whisper Large v3"

    model_id = MODELS[model_choice]

    if model_id not in model_pipelines:
        load_model(model_id)

    selected_pipe = model_pipelines[model_id]
    result = selected_pipe(audio)
    return result["text"]

# --- 3. Gradio Interface ---
iface = gr.Interface(
    fn=transcribe,
    inputs=[
        gr.Audio(sources=["microphone"], type="filepath", label="Record Audio 🎤"),
        gr.Radio(
            choices=list(MODELS.keys()),
            value="Whisper Large v3",
            label="Choose Model",
            info="Select the model to use for transcription"
        )
    ],
    outputs="text",
    title="Whisper Farsi 🎙️",
    description="Realtime demo for Persian speech recognition. Choose a model, press the record button, and speak.",
)

# --- 4. Launch ---
iface.launch()