import gradio as gr from ai4bharat.transliteration import XlitEngine import torch import re from transformers import MarianMTModel, MarianTokenizer # Initialize Transliteration Engine engine = XlitEngine("te") # Load Model & Tokenizer model_folder_path = "HackHedron/N_Te-N_En-Translator" # Your model on Hugging Face device = torch.device("cuda" if torch.cuda.is_available() else "cpu") tokenizer = MarianTokenizer.from_pretrained(model_folder_path) model = MarianMTModel.from_pretrained(model_folder_path).to(device) # Text Cleaning Function def clean_text(text): text = text.lower().strip() numbers = re.findall(r'\d+', text) text = re.sub(r'\d+', '<>', text) text = re.sub(r'[.!?]', '', text) text = re.sub(r'["\'“”‘’]', '', text) text = re.sub(r'\s+', ' ', text) return text, numbers # Transliterate to Telugu Script def transliterate_to_native_script(text): words = text.split() transliterated_words = [engine.translit_word(word, topk=1)["te"][0] for word in words] return " ".join(transliterated_words) # Translate to English def translate_input(user_input): cleaned, numbers = clean_text(user_input) telugu_script = transliterate_to_native_script(cleaned) inputs = tokenizer(telugu_script, return_tensors="pt", max_length=128, truncation=True).to(device) output_ids = model.generate(inputs["input_ids"], max_length=128, num_beams=5, early_stopping=True) translated = tokenizer.decode(output_ids[0], skip_special_tokens=True) def restore_numbers(text, numbers): placeholder_pattern = re.compile(r'(<<>>|<>|<<|>>|<|>)') for num in numbers: match = placeholder_pattern.search(text) if match: start, end = match.span() text = text[:start] + num + text[end:] return text final_output = restore_numbers(translated, numbers) return user_input, cleaned, telugu_script, final_output # Gradio Interface interface = gr.Interface( fn=translate_input, inputs=gr.Textbox(label="Enter Romanized Telugu Sentence"), outputs=[ gr.Textbox(label="Original Input"), gr.Textbox(label="Cleaned Text"), gr.Textbox(label="Telugu Script"), gr.Textbox(label="Translated English"), ], title="Romanized Telugu to English Translator", description="Transliterates Romanized Telugu to Telugu script and translates it to English." ) interface.launch(share=True)