Harisri's picture
Update app.py
668dd69 verified
Raw
History Blame Contribute Delete
2.46 kB
import gradio as gr
from ai4bharat.transliteration import XlitEngine
import torch
import re
from transformers import MarianMTModel, MarianTokenizer
# Initialize Transliteration Engine
engine = XlitEngine("te")
# Load Model & Tokenizer
model_folder_path = "HackHedron/N_Te-N_En-Translator" # Your model on Hugging Face
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
tokenizer = MarianTokenizer.from_pretrained(model_folder_path)
model = MarianMTModel.from_pretrained(model_folder_path).to(device)
# Text Cleaning Function
def clean_text(text):
text = text.lower().strip()
numbers = re.findall(r'\d+', text)
text = re.sub(r'\d+', '<>', text)
text = re.sub(r'[.!?]', '', text)
text = re.sub(r'["\'β€œβ€β€˜β€™]', '', text)
text = re.sub(r'\s+', ' ', text)
return text, numbers
# Transliterate to Telugu Script
def transliterate_to_native_script(text):
words = text.split()
transliterated_words = [engine.translit_word(word, topk=1)["te"][0] for word in words]
return " ".join(transliterated_words)
# Translate to English
def translate_input(user_input):
cleaned, numbers = clean_text(user_input)
telugu_script = transliterate_to_native_script(cleaned)
inputs = tokenizer(telugu_script, return_tensors="pt", max_length=128, truncation=True).to(device)
output_ids = model.generate(inputs["input_ids"], max_length=128, num_beams=5, early_stopping=True)
translated = tokenizer.decode(output_ids[0], skip_special_tokens=True)
def restore_numbers(text, numbers):
placeholder_pattern = re.compile(r'(<<>>|<>|<<|>>|<|>)')
for num in numbers:
match = placeholder_pattern.search(text)
if match:
start, end = match.span()
text = text[:start] + num + text[end:]
return text
final_output = restore_numbers(translated, numbers)
return user_input, cleaned, telugu_script, final_output
# Gradio Interface
interface = gr.Interface(
fn=translate_input,
inputs=gr.Textbox(label="Enter Romanized Telugu Sentence"),
outputs=[
gr.Textbox(label="Original Input"),
gr.Textbox(label="Cleaned Text"),
gr.Textbox(label="Telugu Script"),
gr.Textbox(label="Translated English"),
],
title="Romanized Telugu to English Translator",
description="Transliterates Romanized Telugu to Telugu script and translates it to English."
)
interface.launch(share=True)