"""Voice tab β€” STT transcription and TTS synthesis via the capability bus.""" from __future__ import annotations import asyncio import base64 import concurrent.futures import tempfile from typing import Any def _run(coro): """Run a coroutine safely regardless of whether an event loop is running.""" try: loop = asyncio.get_running_loop() except RuntimeError: loop = None if loop and loop.is_running(): with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool: return pool.submit(asyncio.run, coro).result() return asyncio.run(coro) def build_voice_tab(bus: Any | None = None) -> None: import gradio as gr gr.HTML("""

πŸŽ™ Voice β€” STT & TTS

Whisper (speech→text) · Edge-TTS 300+ voices (text→speech) · 100% local

""") # ── STT ─────────────────────────────────────────────────────────────────── gr.Markdown("### 🎀 Speech β†’ Text") with gr.Row(): with gr.Column(scale=2): stt_audio = gr.Audio( label="Upload or record audio", type="filepath", sources=["upload", "microphone"], ) stt_language = gr.Textbox( label="Language hint (optional)", placeholder="en de fr auto …", value="", ) with gr.Column(scale=3): stt_btn = gr.Button("🎀 Transcribe", variant="primary", size="lg") stt_out = gr.Textbox(label="Transcript", lines=6, interactive=False) stt_status = gr.Textbox(label="Status", lines=1, interactive=False) def _transcribe(audio_path: str, language: str) -> tuple[str, str]: if not audio_path: return "", "⚠ Upload or record audio first" if bus is None: return "", "⚠ No bus β€” run inside a HearthNet node" try: with open(audio_path, "rb") as f: audio_b64 = base64.b64encode(f.read()).decode() except Exception as exc: return "", f"⚠ Could not read file: {exc}" async def _call(): return await bus.call( "stt.transcribe", (1, 0), {"params": {"language": language.strip() or None}, "input": {"audio_b64": audio_b64}}, ) try: result = _run(_call()) except Exception as exc: return "", f"⚠ Bus error: {exc}" if "error" in result: if result["error"] == "backend_unavailable": return "", "⚠ No STT backend β€” install: pip install faster-whisper" return "", f"⚠ {result.get('message', result['error'])}" text = result.get("output", {}).get("text", result.get("text", "")) lang = result.get("output", {}).get("language", "") return text, f"βœ“ Transcribed{f' [{lang}]' if lang else ''}" stt_btn.click(_transcribe, inputs=[stt_audio, stt_language], outputs=[stt_out, stt_status]) gr.HTML("
") # ── TTS ─────────────────────────────────────────────────────────────────── gr.Markdown("### πŸ”Š Text β†’ Speech") with gr.Row(): with gr.Column(scale=2): tts_text = gr.Textbox( label="Text to speak", placeholder="Type anything…", lines=5, ) tts_voice = gr.Textbox( label="Voice (optional)", placeholder="en-US-JennyNeural de-DE-KatjaNeural fr-FR-DeniseNeural …", value="", ) with gr.Column(scale=3): tts_btn = gr.Button("πŸ”Š Synthesize", variant="primary", size="lg") tts_audio_out = gr.Audio(label="Generated speech", type="filepath") tts_status = gr.Textbox(label="Status", lines=1, interactive=False) def _synthesize(text: str, voice: str) -> tuple[str | None, str]: if not text.strip(): return None, "⚠ Enter text to synthesize" if bus is None: return None, "⚠ No bus β€” run inside a HearthNet node" async def _call(): return await bus.call( "tts.synthesize", (1, 0), {"params": {"voice": voice.strip() or None}, "input": {"text": text}}, ) try: result = _run(_call()) except Exception as exc: return None, f"⚠ Bus error: {exc}" if "error" in result: if result["error"] == "backend_unavailable": return None, "⚠ No TTS backend β€” install: pip install edge-tts" return None, f"⚠ {result.get('message', result['error'])}" audio_b64 = result.get("output", {}).get("audio_b64", result.get("audio_b64", "")) if not audio_b64: return None, "⚠ No audio in response" tmp = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) tmp.write(base64.b64decode(audio_b64)) tmp.close() return tmp.name, "βœ“ Synthesized" tts_btn.click(_synthesize, inputs=[tts_text, tts_voice], outputs=[tts_audio_out, tts_status]) gr.HTML("""
β„Ή Voice setup help
STT: pip install faster-whisper (CPU/GPU) or pip install openai-whisper
TTS: pip install edge-tts (free, 300+ voices, needs internet for synthesis)
Example voices: en-US-JennyNeural, en-GB-SoniaNeural, de-DE-KatjaNeural, fr-FR-DeniseNeural, es-ES-ElviraNeural, ja-JP-NanamiNeural
""")