|
|
| import gradio as gr |
| from ai4bharat.transliteration import XlitEngine |
| import torch |
| import re |
| from transformers import MarianMTModel, MarianTokenizer |
|
|
| |
| engine = XlitEngine("te") |
|
|
| |
| model_folder_path = "HackHedron/N_Te-N_En-Translator" |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
| tokenizer = MarianTokenizer.from_pretrained(model_folder_path) |
| model = MarianMTModel.from_pretrained(model_folder_path).to(device) |
|
|
| |
| def clean_text(text): |
| text = text.lower().strip() |
| numbers = re.findall(r'\d+', text) |
| text = re.sub(r'\d+', '<>', text) |
| text = re.sub(r'[.!?]', '', text) |
| text = re.sub(r'["\'ββββ]', '', text) |
| text = re.sub(r'\s+', ' ', text) |
| return text, numbers |
|
|
| |
| def transliterate_to_native_script(text): |
| words = text.split() |
| transliterated_words = [engine.translit_word(word, topk=1)["te"][0] for word in words] |
| return " ".join(transliterated_words) |
|
|
| |
| def translate_input(user_input): |
| cleaned, numbers = clean_text(user_input) |
| telugu_script = transliterate_to_native_script(cleaned) |
| inputs = tokenizer(telugu_script, return_tensors="pt", max_length=128, truncation=True).to(device) |
| output_ids = model.generate(inputs["input_ids"], max_length=128, num_beams=5, early_stopping=True) |
| translated = tokenizer.decode(output_ids[0], skip_special_tokens=True) |
|
|
| def restore_numbers(text, numbers): |
| placeholder_pattern = re.compile(r'(<<>>|<>|<<|>>|<|>)') |
| for num in numbers: |
| match = placeholder_pattern.search(text) |
| if match: |
| start, end = match.span() |
| text = text[:start] + num + text[end:] |
| return text |
|
|
| final_output = restore_numbers(translated, numbers) |
| return user_input, cleaned, telugu_script, final_output |
|
|
| |
| interface = gr.Interface( |
| fn=translate_input, |
| inputs=gr.Textbox(label="Enter Romanized Telugu Sentence"), |
| outputs=[ |
| gr.Textbox(label="Original Input"), |
| gr.Textbox(label="Cleaned Text"), |
| gr.Textbox(label="Telugu Script"), |
| gr.Textbox(label="Translated English"), |
| ], |
| title="Romanized Telugu to English Translator", |
| description="Transliterates Romanized Telugu to Telugu script and translates it to English." |
| ) |
|
|
| interface.launch(share=True) |
|
|