torongoXetu-assamese-asr / app-spaces.py
ananddey's picture
init
e0a23e0 verified
import os
import gradio as gr
import torch
from huggingface_hub import hf_hub_download
from torongoxetu import TorongoModel
HF_MODEL_REPO = "ananddey/torongoXetu-asr"
MODEL_FILENAME = "torongoXetu-asr.nemo"
CACHE_DIR = "/app/model_cache"
os.makedirs(CACHE_DIR, exist_ok=True)
model = None
init_error = None
try:
print("⬇️ Downloading model from Hugging Face...")
model_path = hf_hub_download(
repo_id=HF_MODEL_REPO,
filename=MODEL_FILENAME,
cache_dir=CACHE_DIR,
)
print(f"βœ… Model downloaded to: {model_path}")
model = TorongoModel(model_path)
print("βœ… TorongoXetu model loaded successfully")
except Exception as e:
init_error = str(e)
print(f"❌ Model initialization failed: {init_error}")
def transcribe(audio):
if model is None:
return f"Model not loaded: {init_error}"
if audio is None:
return "Please upload or record an audio file."
try:
text = model.transcribe(audio)
return text if text else "No transcription generated."
except Exception as e:
return f"Error during transcription: {e}"
EXAMPLES = [
[os.path.join("test-audio", "test.wav")],
[os.path.join("test-audio", "test-2.wav")],
[os.path.join("test-audio", "test-3.wav")],
[os.path.join("test-audio", "test-4.wav")],
[os.path.join("test-audio", "test-5.wav")],
]
demo = gr.Interface(
fn=transcribe,
inputs=gr.Audio(type="filepath", label="Upload Assamese Audio"),
outputs=gr.Textbox(label="Transcription (Assamese)", lines=4),
title="πŸŽ™οΈ TorongoXetu – Assamese ASR",
description=(
"Automatic Speech Recognition for Assamese using the "
"TorongoXetu model built with NVIDIA NeMo.\n\n"
"Upload a WAV file or record audio to get instant transcription."
),
examples=EXAMPLES,
allow_flagging="never",
api_name=False,
)
if __name__ == "__main__":
demo.launch(
server_name="0.0.0.0",
server_port=7860,
show_error=True,
)