Spaces:

djhui5710
/

reachy_mini_home_assistant

Running

Desmond-Dong commited on Jan 6

Commit

cd59e9e

1 Parent(s): d94735f

v0.2.16: Fix TTS playback - pause recording during playback

- Restore tts_playing flag in models.py
- Set tts_playing=True in play_tts(), clear in _tts_finished()
- Pause audio recording loop when tts_playing is True
- Use push_audio_sample() with proper synchronization
- This prevents GStreamer pipeline conflicts between recording and playback

Files changed (6) hide show

pyproject.toml +1 -1
reachy_mini_ha_voice/__init__.py +1 -1
reachy_mini_ha_voice/audio_player.py +49 -48
reachy_mini_ha_voice/models.py +4 -0
reachy_mini_ha_voice/satellite.py +7 -0
reachy_mini_ha_voice/voice_assistant.py +6 -0

pyproject.toml CHANGED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "reachy_mini_ha_voice"
-version = "0.2.15"
 description = "Home Assistant Voice Assistant for Reachy Mini"
 readme = "README.md"
 requires-python = ">=3.10"

 [project]
 name = "reachy_mini_ha_voice"
+version = "0.2.16"
 description = "Home Assistant Voice Assistant for Reachy Mini"
 readme = "README.md"
 requires-python = ">=3.10"

reachy_mini_ha_voice/__init__.py CHANGED Viewed

@@ -11,7 +11,7 @@ Key features:
 - Reachy Mini motion control integration
 """
-__version__ = "0.2.15"
 __author__ = "Desmond Dong"
 # Don't import main module here to avoid runpy warning

 - Reachy Mini motion control integration
 """
+__version__ = "0.2.16"
 __author__ = "Desmond Dong"
 # Don't import main module here to avoid runpy warning

reachy_mini_ha_voice/audio_player.py CHANGED Viewed

@@ -9,12 +9,17 @@ from typing import List, Optional, Union
 import numpy as np
 import soundfile as sf
 _LOGGER = logging.getLogger(__name__)
 class AudioPlayer:
-    """Audio player using Reachy Mini's media system."""
     def __init__(self, reachy_mini=None) -> None:
         self.reachy_mini = reachy_mini
@@ -63,31 +68,30 @@ class AudioPlayer:
         thread.start()
     def _play_file(self, file_path: str) -> None:
-        """Play an audio file using play_sound() - independent GStreamer playbin."""
         try:
             # Handle URLs - download first
             if file_path.startswith(("http://", "https://")):
                 import urllib.request
                 import tempfile
                 with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
                     urllib.request.urlretrieve(file_path, tmp.name)
                     file_path = tmp.name
             if self._stop_flag.is_set():
                 return
-            # Use Reachy Mini's play_sound() for TTS playback
-            # This creates an independent GStreamer playbin that doesn't conflict
-            # with the recording pipeline (unlike push_audio_sample which shares appsrc)
             if self.reachy_mini is not None:
                 try:
-                    self._play_via_play_sound(file_path)
                 except Exception as e:
-                    _LOGGER.warning("play_sound failed, trying sounddevice: %s", e)
-                    self._play_file_fallback(file_path)
             else:
-                self._play_file_fallback(file_path)
         except Exception as e:
             _LOGGER.error("Error playing audio: %s", e)
@@ -99,56 +103,48 @@ class AudioPlayer:
             else:
                 self._on_playback_finished()
-    def _play_via_play_sound(self, file_path: str) -> None:
-        """Play audio using Reachy Mini's play_sound() method.
-        This creates an independent GStreamer playbin pipeline that doesn't
-        conflict with the recording pipeline. This is the key difference from
-        push_audio_sample() which writes to the shared appsrc pipeline.
         """
-        # play_sound() is non-blocking, so we need to estimate duration and wait
-        data, samplerate = sf.read(file_path, dtype='float32')
-        duration = len(data) / samplerate
-        _LOGGER.debug("Playing via play_sound: %s (duration=%.2fs)", file_path, duration)
-        # Call play_sound - creates independent playbin
-        self.reachy_mini.media.play_sound(file_path)
-        # Wait for playback to complete (play_sound is non-blocking)
-        # Check stop flag periodically
-        elapsed = 0.0
-        while elapsed < duration and not self._stop_flag.is_set():
-            time.sleep(0.1)
-            elapsed += 0.1
-    def _play_file_fallback(self, file_path: str) -> None:
-        """Play audio using sounddevice (fallback when Reachy Mini not available)."""
-        import sounddevice as sd
-        import scipy.signal
-        data, samplerate = sf.read(file_path, dtype='float32')
         # Convert to mono if stereo
         if data.ndim == 2:
             data = data.mean(axis=1)
         # Apply volume
         data = data * self._current_volume
-        # Resample to 48000Hz (standard rate supported by most devices)
-        target_samplerate = 48000
-        if samplerate != target_samplerate:
-            num_samples = int(len(data) * target_samplerate / samplerate)
             data = scipy.signal.resample(data, num_samples)
-            samplerate = target_samplerate
-        if not self._stop_flag.is_set():
-            try:
-                sd.play(data.astype(np.float32), samplerate)
-                sd.wait()
-            except Exception as e:
-                _LOGGER.warning("sounddevice playback failed: %s", e)
     def _on_playback_finished(self) -> None:
         """Called when playback is finished."""
@@ -175,6 +171,11 @@ class AudioPlayer:
     def stop(self) -> None:
         self._stop_flag.set()
         self._playlist.clear()
         self.is_playing = False

 import numpy as np
 import soundfile as sf
+import scipy.signal
 _LOGGER = logging.getLogger(__name__)
 class AudioPlayer:
+    """Audio player using Reachy Mini's media system.
+    Uses push_audio_sample() to write audio to the GStreamer pipeline.
+    The caller must pause audio recording during playback to avoid conflicts.
+    """
     def __init__(self, reachy_mini=None) -> None:
         self.reachy_mini = reachy_mini
         thread.start()
     def _play_file(self, file_path: str) -> None:
+        """Play an audio file using push_audio_sample()."""
         try:
             # Handle URLs - download first
             if file_path.startswith(("http://", "https://")):
                 import urllib.request
                 import tempfile
+                _LOGGER.debug("Downloading TTS audio from %s", file_path)
                 with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
                     urllib.request.urlretrieve(file_path, tmp.name)
                     file_path = tmp.name
+                _LOGGER.debug("Downloaded to %s", file_path)
             if self._stop_flag.is_set():
                 return
+            # Use push_audio_sample for playback
             if self.reachy_mini is not None:
                 try:
+                    self._play_via_push_audio(file_path)
                 except Exception as e:
+                    _LOGGER.warning("push_audio_sample failed: %s", e)
             else:
+                _LOGGER.warning("No reachy_mini instance, cannot play audio")
         except Exception as e:
             _LOGGER.error("Error playing audio: %s", e)
             else:
                 self._on_playback_finished()
+    def _play_via_push_audio(self, file_path: str) -> None:
+        """Play audio by pushing samples to Reachy Mini's GStreamer pipeline.
+        This writes audio directly to the existing playback pipeline.
+        The caller should pause audio recording during this operation.
         """
+        # Read audio file
+        data, input_samplerate = sf.read(file_path, dtype='float32')
+        _LOGGER.debug("Audio file: %s, samplerate=%d, shape=%s", file_path, input_samplerate, data.shape)
+        # Get output sample rate from Reachy Mini
+        output_samplerate = self.reachy_mini.media.get_output_audio_samplerate()
+        _LOGGER.debug("Output samplerate: %d", output_samplerate)
         # Convert to mono if stereo
         if data.ndim == 2:
             data = data.mean(axis=1)
         # Apply volume
         data = data * self._current_volume
+        # Resample if needed
+        if input_samplerate != output_samplerate:
+            num_samples = int(len(data) * output_samplerate / input_samplerate)
             data = scipy.signal.resample(data, num_samples)
+            _LOGGER.debug("Resampled to %d samples", num_samples)
+        # Push audio in chunks (like conversation_app)
+        # Use smaller chunks for smoother playback
+        chunk_duration = 0.05  # 50ms chunks
+        chunk_size = int(output_samplerate * chunk_duration)
+        for i in range(0, len(data), chunk_size):
+            if self._stop_flag.is_set():
+                _LOGGER.debug("Playback stopped by flag")
+                break
+            chunk = data[i:i + chunk_size].astype(np.float32)
+            self.reachy_mini.media.push_audio_sample(chunk)
+            # Sleep to match chunk duration (prevents buffer overflow)
+            time.sleep(chunk_duration * 0.8)  # Slightly less to keep buffer fed
+        _LOGGER.debug("Audio playback complete")
     def _on_playback_finished(self) -> None:
         """Called when playback is finished."""
     def stop(self) -> None:
         self._stop_flag.set()
+        if self.reachy_mini is not None:
+            try:
+                self.reachy_mini.media.clear_output_buffer()
+            except Exception:
+                pass
         self._playlist.clear()
         self.is_playing = False

reachy_mini_ha_voice/models.py CHANGED Viewed

@@ -73,6 +73,10 @@ class ServerState:
     reachy_mini: Optional[object] = None
     motion_enabled: bool = True
     motion: Optional[object] = None  # ReachyMiniMotion instance
     media_player_entity: "Optional[MediaPlayerEntity]" = None
     satellite: "Optional[VoiceSatelliteProtocol]" = None

     reachy_mini: Optional[object] = None
     motion_enabled: bool = True
     motion: Optional[object] = None  # ReachyMiniMotion instance
+    # TTS playback state - when True, audio recording should pause
+    # to avoid GStreamer pipeline conflicts (recording and playback share resources)
+    tts_playing: bool = False
     media_player_entity: "Optional[MediaPlayerEntity]" = None
     satellite: "Optional[VoiceSatelliteProtocol]" = None

reachy_mini_ha_voice/satellite.py CHANGED Viewed

@@ -352,6 +352,10 @@ class VoiceSatelliteProtocol(APIServer):
         self._tts_played = True
         _LOGGER.debug("Playing TTS response: %s", self._tts_url)
         self.state.active_wake_words.add(self.state.stop_word.id)
         self.state.tts_player.play(self._tts_url, done_callback=self._tts_finished)
@@ -364,6 +368,9 @@ class VoiceSatelliteProtocol(APIServer):
         self.state.music_player.unduck()
     def _tts_finished(self) -> None:
         self.state.active_wake_words.discard(self.state.stop_word.id)
         self.send_messages([VoiceAssistantAnnounceFinished()])

         self._tts_played = True
         _LOGGER.debug("Playing TTS response: %s", self._tts_url)
+        # Set flag to pause audio recording during TTS playback
+        # This prevents GStreamer pipeline conflicts (recording and playback share resources)
+        self.state.tts_playing = True
         self.state.active_wake_words.add(self.state.stop_word.id)
         self.state.tts_player.play(self._tts_url, done_callback=self._tts_finished)
         self.state.music_player.unduck()
     def _tts_finished(self) -> None:
+        # Clear TTS playing flag to resume audio recording
+        self.state.tts_playing = False
         self.state.active_wake_words.discard(self.state.stop_word.id)
         self.send_messages([VoiceAssistantAnnounceFinished()])

reachy_mini_ha_voice/voice_assistant.py CHANGED Viewed

@@ -424,6 +424,12 @@ class VoiceAssistantService:
                 if not self._wait_for_satellite():
                     continue
                 self._update_wake_words_list(ctx)
                 # Get audio from Reachy Mini

                 if not self._wait_for_satellite():
                     continue
+                # Pause audio recording during TTS playback to avoid GStreamer conflicts
+                # Recording and playback pipelines share the same audio device
+                if self._state is not None and self._state.tts_playing:
+                    time.sleep(0.05)
+                    continue
                 self._update_wake_words_list(ctx)
                 # Get audio from Reachy Mini