Spaces:
Running
Running
| from __future__ import annotations | |
| import base64 | |
| import os | |
| import pathlib | |
| import sys | |
| from typing import Optional | |
| from mistralai.client import Mistral | |
| def synthesize_and_save_audio( | |
| input_text: str = "Hello!", | |
| voice_id: str = "", | |
| model: str = "voxtral-mini-tts-2603", | |
| api_key: str = "MISTRAL_API_KEY", | |
| output_path: str = "/tmp/voxtral.wav", | |
| response_format: str = "wav", | |
| ) -> int: | |
| client = Mistral(api_key=api_key) | |
| moderation_response = client.classifiers.moderate( | |
| model="mistral-moderation-2603", | |
| inputs=[input_text] | |
| ) | |
| if moderation_response.results[0].categories["sexual"] or \ | |
| moderation_response.results[0].categories["hate_and_discrimination"] or \ | |
| moderation_response.results[0].categories["violence_and_threats"] or \ | |
| moderation_response.results[0].categories["selfharm"] or \ | |
| moderation_response.results[0].categories["jailbreaking"]: | |
| print("Input text blocked by moderation layer.", file=sys.stderr) | |
| return 3 | |
| if "." in voice_id: | |
| print("Cloning voice from reference audio...") | |
| reference_path = pathlib.Path(voice_id).expanduser().resolve() | |
| if not reference_path.is_file(): | |
| print(f"Reference audio not found: {reference_path}", file=sys.stderr) | |
| return 2 | |
| with open(reference_path, "rb") as f: | |
| reference_bytes = f.read() | |
| reference_b64 = base64.b64encode(reference_bytes).decode("ascii") | |
| response = client.audio.speech.complete( | |
| model=model, | |
| input=input_text, | |
| ref_audio=reference_b64, | |
| response_format=response_format, | |
| ) | |
| else: | |
| response = client.audio.speech.complete( | |
| model=model, | |
| input=input_text, | |
| voice_id=voice_id, | |
| response_format=response_format, | |
| ) | |
| audio_bytes = base64.b64decode(response.audio_data) | |
| output_path_obj = pathlib.Path(output_path).expanduser() | |
| output_path_obj.write_bytes(audio_bytes) | |
| print(f"Wrote {len(audio_bytes)} bytes to {output_path_obj}") | |
| return 0 | |