| import gradio as gr |
| import os |
| import tempfile |
| import whisper |
| import re |
| from groq import Groq |
| from gtts import gTTS |
| from openai import OpenAI |
|
|
| |
| whisper_model = whisper.load_model("small") |
|
|
| |
| |
| groq_client = Groq(api_key=os.getenv("GROQ_API_KEY")) |
|
|
| |
| SEALION_API_KEY = os.environ.get("SEALION_API_KEY") |
| SEALION_BASE_URL = "https://api.sea-lion.ai/v1" |
| SEALION_MODEL = "aisingapore/Gemma-SEA-LION-v4-27B-IT" |
|
|
| |
| sealion_client = OpenAI( |
| api_key=SEALION_API_KEY, |
| base_url=SEALION_BASE_URL |
| ) |
|
|
| |
| SUPPORTED_LANGUAGES = [ |
| "English", "Chinese", "Thai", |
| "Malaysian Malay", "Indonesian Malay", |
| "Korean", "Japanese", "Spanish", "German", |
| "Hindi", "Urdu", "French", "Russian", |
| "Tagalog", "Arabic", "Myanmar", "Vietnamese", |
| "Khmer" |
| ] |
|
|
| LANGUAGE_CODES = { |
| "English": "en", "Chinese": "zh", "Thai": "th", |
| "Malaysian Malay": "ms", |
| "Indonesian Malay": "id", |
| "Korean": "ko", "Japanese": "ja", "Spanish": "es", |
| "German": "de", "Hindi": "hi", "Urdu": "ur", |
| "French": "fr", "Russian": "ru", "Tagalog": "tl", |
| "Arabic": "ar", "Myanmar": "my", "Vietnamese": "vi", |
| "Khmer": "km" |
| } |
|
|
| |
| AVAILABLE_MODELS = { |
| "SEA-LION v4 27B": "aisingapore/Gemma-SEA-LION-v4-27B-IT", |
| "Qwen3 32B": "qwen/qwen3-32b", |
| "kimi-k2": "moonshotai/kimi-k2-instruct-0905", |
| "Llama-3.3 70B": "llama-3.3-70b-versatile", |
| "Llama-3.1 instant 8B": "llama-3.1-8b-instant", |
| "Llama-4 guard 12B": "meta-llama/llama-guard-4-12b" |
| } |
|
|
| def transcribe_audio_locally(audio): |
| """Transcribe audio using local Whisper model""" |
| if audio is None: |
| return "" |
| |
| try: |
| audio_path = audio |
| result = whisper_model.transcribe(audio_path) |
| return result["text"] |
| except Exception as e: |
| print(f"Error transcribing audio locally: {e}") |
| return f"Error transcribing audio: {str(e)}" |
|
|
| def translate_text(input_text, input_lang, output_langs, model_name): |
| """Translate text using Groq's API with the selected model""" |
| if not input_text or not output_langs: |
| return "" |
| |
| try: |
| |
| model_id = AVAILABLE_MODELS.get(model_name, "qwen/qwen3-32b") |
|
|
| |
| if model_name == "SEA-LION v4 27B": |
| client = sealion_client |
| else: |
| client = groq_client |
| |
| |
| |
| system_prompt = """You are a translation assistant that provides direct, accurate translations. |
| Do NOT include any thinking, reasoning, or explanations in your response. |
| Do NOT use phrases like 'In [language]:', 'Translation:' or similar prefixes. |
| Do NOT use any special formatting like asterisks (**) or other markdown. |
| Always respond with ONLY the exact translation text itself.""" |
| |
| user_prompt = f"Translate this {input_lang} text: '{input_text}' into the following languages: {', '.join(output_langs)}. Provide each translation on a separate line with the language name as a prefix. Do not use any special formatting or markdown." |
| |
| |
| response = client.chat.completions.create( |
| model=model_id, |
| messages=[ |
| {"role": "system", "content": system_prompt}, |
| {"role": "user", "content": user_prompt} |
| ] |
| ) |
| |
| translation_text = response.choices[0].message.content.strip() |
| |
| |
| translation_text = re.sub(r'<think>.*?</think>', '', translation_text, flags=re.DOTALL) |
| translation_text = translation_text.replace('**', '') |
| |
| |
| thinking_patterns = [ |
| r'^\s*Let me think.*$', |
| r'^\s*I need to.*$', |
| r'^\s*First,.*$', |
| r'^\s*Okay, so.*$', |
| r'^\s*Hmm,.*$', |
| r'^\s*Let\'s break this down.*$' |
| ] |
| |
| for pattern in thinking_patterns: |
| translation_text = re.sub(pattern, '', translation_text, flags=re.MULTILINE) |
| |
| return translation_text |
| except Exception as e: |
| print(f"Error translating text: {e}") |
| return f"Error: {str(e)}" |
|
|
| def synthesize_speech(text, lang): |
| """Generate speech from text""" |
| if not text: |
| return None |
| |
| try: |
| lang_code = LANGUAGE_CODES.get(lang, "en") |
| tts = gTTS(text=text, lang=lang_code) |
| |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp: |
| tts.save(fp.name) |
| return fp.name |
| except Exception as e: |
| print(f"Error synthesizing speech: {e}") |
| return None |
|
|
| def clear_all(): |
| """Clear all fields""" |
| return [""] * 4 + [None] * 3 |
|
|
| def process_speech_to_text(audio): |
| """Process audio and return the transcribed text""" |
| if not audio: |
| return "" |
| |
| transcribed_text = transcribe_audio_locally(audio) |
| return transcribed_text |
|
|
| def clean_translation_output(text): |
| """Clean translation output to remove any thinking or processing text""" |
| if not text: |
| return "" |
| |
| |
| text = re.sub(r'<think>.*?</think>', '', text, flags=re.DOTALL) |
| text = text.replace('**', '') |
| text = text.replace('*', '') |
|
|
| |
| lines = text.split('\n') |
| cleaned_lines = [] |
| |
| for line in lines: |
| |
| if re.search(r'(^I need to|^Let me|^First|^Okay|^Hmm|^I will|^I am thinking|^I should)', line, re.IGNORECASE): |
| continue |
| |
| |
| if ':' in line and any(lang.lower() in line.lower() for lang in SUPPORTED_LANGUAGES): |
| cleaned_lines.append(line) |
| |
| elif line.strip() and not re.search(r'(thinking|translating|understand|process)', line, re.IGNORECASE): |
| cleaned_lines.append(line) |
| |
| return '\n'.join(cleaned_lines) |
|
|
| def extract_translations(translations_text, output_langs): |
| """Extract clean translations from the model output""" |
| if not translations_text or not output_langs: |
| return [""] * 3 |
| |
| |
| clean_text = clean_translation_output(translations_text) |
| |
| |
| translation_results = [] |
| |
| |
| for lang in output_langs: |
| pattern = rf'{re.escape(lang)}[\s]*:[\s]*(.*?)(?=\n\s*[A-Z]|$)' |
| match = re.search(pattern, clean_text, re.IGNORECASE | re.DOTALL) |
| if match: |
| translation_results.append(match.group(1).strip()) |
| |
| |
| if not translation_results and '\n' in clean_text: |
| lines = [line.strip() for line in clean_text.split('\n') if line.strip()] |
| |
| for line in lines: |
| |
| if ':' in line: |
| parts = line.split(':', 1) |
| if len(parts) == 2: |
| translation_results.append(parts[1].strip()) |
| else: |
| |
| translation_results.append(line) |
| elif not translation_results: |
| |
| translation_results.append(clean_text) |
| |
| |
| while len(translation_results) < 3: |
| translation_results.append("") |
| |
| return translation_results[:3] |
|
|
| def perform_translation(audio, typed_text, input_lang, output_langs, model_name): |
| """Main function to handle translation process""" |
| |
| if not output_langs: |
| return [typed_text] + [""] * 3 + [None] * 3 |
| |
| |
| selected_langs = output_langs[:3] |
| |
| |
| input_text = typed_text |
| if not input_text and audio: |
| input_text = transcribe_audio_locally(audio) |
| |
| if not input_text: |
| return [""] * 4 + [None] * 3 |
| |
| |
| translations_text = translate_text(input_text, input_lang, selected_langs, model_name) |
| |
| |
| translation_results = extract_translations(translations_text, selected_langs) |
| |
| |
| audio_paths = [] |
| for i, (trans, lang) in enumerate(zip(translation_results, selected_langs)): |
| if trans and lang: |
| audio_path = synthesize_speech(trans, lang) |
| audio_paths.append(audio_path) |
| else: |
| audio_paths.append(None) |
| |
| |
| while len(audio_paths) < 3: |
| audio_paths.append(None) |
| |
| |
| return [input_text] + translation_results + audio_paths |
|
|
| |
| with gr.Blocks(title="Multilingual Translator") as demo: |
| gr.Markdown("## 🌍 Multilingual Translator with Speech Support") |
| |
| with gr.Row(): |
| with gr.Column(): |
| input_lang = gr.Dropdown( |
| choices=SUPPORTED_LANGUAGES, |
| value="English", |
| label="Input Language" |
| ) |
| |
| output_langs = gr.CheckboxGroup( |
| choices=SUPPORTED_LANGUAGES, |
| label="Output Languages (select up to 3)" |
| ) |
| model_selector = gr.Dropdown( |
| choices=list(AVAILABLE_MODELS.keys()), |
| value="SEA-LION v4 27B", |
| |
| label="Translation Model" |
| ) |
| |
| with gr.Row(): |
| with gr.Column(): |
| audio_input = gr.Audio( |
| sources=["microphone", "upload"], |
| type="filepath", |
| label="Speak Your Input (upload or record)" |
| ) |
| text_input = gr.Textbox( |
| label="Or Type Text", |
| placeholder="Enter text to translate here..." |
| ) |
| |
| |
| transcribed_text = gr.Textbox( |
| label="Transcribed Text (from audio)", |
| interactive=False |
| ) |
| |
| |
| translated_outputs = [] |
| audio_outputs = [] |
| |
| with gr.Row(): |
| for i in range(3): |
| with gr.Column(): |
| translated_output = gr.Textbox( |
| label=f"Translation {i+1}", |
| interactive=False, |
| visible=True |
| ) |
| translated_outputs.append(translated_output) |
| |
| audio_output = gr.Audio( |
| label=f"Speech Output {i+1}", |
| visible=True |
| ) |
| audio_outputs.append(audio_output) |
| |
| |
| def validate_output_langs(output_langs): |
| if len(output_langs) > 3: |
| |
| gr.Warning("Please select only up to 3 languages. Using first 3 selected.") |
| return output_langs[:3] |
| return output_langs |
| |
| with gr.Row(): |
| translate_btn = gr.Button("Translate", variant="primary") |
| clear_btn = gr.Button("Clear All") |
| |
| |
| def handle_audio_transcription(audio): |
| if audio: |
| return process_speech_to_text(audio) |
| return "" |
| |
| audio_input.change( |
| handle_audio_transcription, |
| inputs=[audio_input], |
| outputs=[text_input] |
| ) |
| |
| |
| def handle_translation(audio, text, input_lang, output_langs, model): |
| validated_langs = validate_output_langs(output_langs) |
| return perform_translation(audio, text, input_lang, validated_langs, model) |
| |
| translate_btn.click( |
| handle_translation, |
| inputs=[audio_input, text_input, input_lang, output_langs, model_selector], |
| outputs=[transcribed_text] + translated_outputs + audio_outputs |
| ) |
| |
| |
| text_input.submit( |
| handle_translation, |
| inputs=[audio_input, text_input, input_lang, output_langs, model_selector], |
| outputs=[transcribed_text] + translated_outputs + audio_outputs |
| ) |
| |
| |
| def handle_clear(): |
| return [""] * 4 + [None] * 3 |
| |
| clear_btn.click( |
| handle_clear, |
| inputs=[], |
| outputs=[transcribed_text] + translated_outputs + audio_outputs |
| ) |
|
|
| |
| if __name__ == "__main__": |
| demo.launch(share=True) |