import os
import gradio as gr
import torch
from huggingface_hub import hf_hub_download
from torongoxetu import TorongoModel


HF_MODEL_REPO = "ananddey/torongoXetu-asr"
MODEL_FILENAME = "torongoXetu-asr.nemo"
CACHE_DIR = "/app/model_cache"

os.makedirs(CACHE_DIR, exist_ok=True)

model = None
init_error = None


try:
    print("⬇️ Downloading model from Hugging Face...")
    model_path = hf_hub_download(
        repo_id=HF_MODEL_REPO,
        filename=MODEL_FILENAME,
        cache_dir=CACHE_DIR,
    )
    print(f"✅ Model downloaded to: {model_path}")
    model = TorongoModel(model_path)
    print("✅ TorongoXetu model loaded successfully")
except Exception as e:
    init_error = str(e)
    print(f"❌ Model initialization failed: {init_error}")


def transcribe(audio):
    if model is None:
        return f"Model not loaded: {init_error}"
    if audio is None:
        return "Please upload or record an audio file."
    try:
        text = model.transcribe(audio)
        return text if text else "No transcription generated."
    except Exception as e:
        return f"Error during transcription: {e}"


EXAMPLES = [
    [os.path.join("test-audio", "test.wav")],
    [os.path.join("test-audio", "test-2.wav")],
    [os.path.join("test-audio", "test-3.wav")],
    [os.path.join("test-audio", "test-4.wav")],
    [os.path.join("test-audio", "test-5.wav")],
]


demo = gr.Interface(
    fn=transcribe,
    inputs=gr.Audio(type="filepath", label="Upload Assamese Audio"),
    outputs=gr.Textbox(label="Transcription (Assamese)", lines=4),
    title="🎙️ TorongoXetu – Assamese ASR",
    description=(
        "Automatic Speech Recognition for Assamese using the "
        "TorongoXetu model built with NVIDIA NeMo.\n\n"
        "Upload a WAV file or record audio to get instant transcription."
    ),
    examples=EXAMPLES,
    allow_flagging="never",
    api_name=False,
)

if __name__ == "__main__":
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        show_error=True,
    )