from transformers import T5ForConditionalGeneration, AutoTokenizer model = T5ForConditionalGeneration.from_pretrained("alakxender/flan-t5-base-dhivehi-en-latin") tokenizer = AutoTokenizer.from_pretrained("alakxender/flan-t5-base-dhivehi-en-latin") supported_languages = ["en2dv", "dv2en", "dv2latin", "latin2dv"] def translate(source_text, target_language): prompt = f"{target_language.strip()} {source_text.strip()}" inputs = tokenizer(prompt, return_tensors="pt", max_length=128, truncation=True) output_ids = model.generate( **inputs, max_length=128, min_length=10, num_beams=4, early_stopping=True, no_repeat_ngram_size=3, repetition_penalty=1.2, do_sample=False, pad_token_id=tokenizer.pad_token_id, eos_token_id=tokenizer.eos_token_id ) result = tokenizer.decode(output_ids[0], skip_special_tokens=True) return result # Example usage source_text = "Concerns over prepayment of GST raised in parliament" target_language = "en2dv" translated_text = translate(source_text, target_language) print(translated_text) # Output: ރައްޔިތުންގެ މަޖިލީހުގައި ޖީއެސްޓީގެ އަގު ބޮޑުވުމާ ގުޅިގެން ކަންބޮޑުވުން ފާޅުކޮށްފި source_text = "ދުނިޔޭގެ އެކި ކަންކޮޅުތަކުން 1.4 މިލިއަން މީހުން މައްކާއަށް ޖަމާވެފައި" target_language = "dv2en" translated_text = translate(source_text, target_language) print(translated_text) # Output: 1.4 million people gathered in Mecca from different parts of the world source_text = "ވައިބާރުވުމުން ކުޅުދުއްފުށީ އެއާޕޯޓަށް ނުޖެއްސިގެން މޯލްޑިވިއަންގެ ބޯޓެއް އެނބުރި މާލެއަށް" target_language = "dv2latin" translated_text = translate(source_text, target_language) print(translated_text) # Output: Vaibaruvumun kulhudhuhfushee eaapoatah nujehsigen moaldiviange boateh enburi maaleah source_text = "Paakisthaanuge skoolu bahakah dhin hamalaaehgai thin kuhjakaai bodu dhe meehaku maruvehje" target_language = "latin2dv" translated_text = translate(source_text, target_language) print(translated_text) # Output: ޕާކިސްތާނުގެ ސްކޫލު ބަހަކަށް ދިން ހަމަލާއެއްގައި ތިން ކުއްޖަކާއި ބޮޑު ދެ މީހަކު މަރުވެއްޖެ