Spaces:

djhui5710
/

reachy_mini_home_assistant

Running

Desmond-Dong commited on Jan 6

Commit

0832ea3

1 Parent(s): a88f884

v0.2.18: Fix audio playback - restore wakeup sound, use push_audio_sample

- Rewrite audio_player.py based on linux-voice-assistant pattern
- Use push_audio_sample() with proper pacing (like conversation_app)
- Restore wakeup sound playback in satellite.py wakeup()
- Remove tts_playing flag - recording and playback can run simultaneously
- GStreamer has separate pipelines for recording and playback

Files changed (6) hide show

pyproject.toml +1 -1
reachy_mini_ha_voice/__init__.py +1 -1
reachy_mini_ha_voice/audio_player.py +102 -38
reachy_mini_ha_voice/models.py +0 -4
reachy_mini_ha_voice/satellite.py +2 -11
reachy_mini_ha_voice/voice_assistant.py +0 -6

pyproject.toml CHANGED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "reachy_mini_ha_voice"
-version = "0.2.17"
 description = "Home Assistant Voice Assistant for Reachy Mini"
 readme = "README.md"
 requires-python = ">=3.10"

 [project]
 name = "reachy_mini_ha_voice"
+version = "0.2.18"
 description = "Home Assistant Voice Assistant for Reachy Mini"
 readme = "README.md"
 requires-python = ">=3.10"

reachy_mini_ha_voice/__init__.py CHANGED Viewed

@@ -11,7 +11,7 @@ Key features:
 - Reachy Mini motion control integration
 """
-__version__ = "0.2.17"
 __author__ = "Desmond Dong"
 # Don't import main module here to avoid runpy warning

 - Reachy Mini motion control integration
 """
+__version__ = "0.2.18"
 __author__ = "Desmond Dong"
 # Don't import main module here to avoid runpy warning

reachy_mini_ha_voice/audio_player.py CHANGED Viewed

@@ -1,10 +1,21 @@
-"""Audio player using Reachy Mini's media system."""
 import logging
 import threading
 import time
 from collections.abc import Callable
-from pathlib import Path
 from typing import List, Optional, Union
 import numpy as np
@@ -17,8 +28,7 @@ _LOGGER = logging.getLogger(__name__)
 class AudioPlayer:
     """Audio player using Reachy Mini's media system.
-    Uses push_audio_sample() to write audio to the GStreamer pipeline.
-    The caller must pause audio recording during playback to avoid conflicts.
     """
     def __init__(self, reachy_mini=None) -> None:
@@ -31,6 +41,7 @@ class AudioPlayer:
         self._unduck_volume: float = 1.0
         self._current_volume: float = 1.0
         self._stop_flag = threading.Event()
     def set_reachy_mini(self, reachy_mini) -> None:
         """Set the Reachy Mini instance."""
@@ -42,6 +53,13 @@ class AudioPlayer:
         done_callback: Optional[Callable[[], None]] = None,
         stop_first: bool = True,
     ) -> None:
         if stop_first:
             self.stop()
@@ -55,6 +73,7 @@ class AudioPlayer:
         self._play_next()
     def _play_next(self) -> None:
         if not self._playlist or self._stop_flag.is_set():
             self._on_playback_finished()
             return
@@ -64,39 +83,55 @@ class AudioPlayer:
         self.is_playing = True
         # Start playback in a thread
-        thread = threading.Thread(target=self._play_file, args=(next_url,), daemon=True)
-        thread.start()
     def _play_file(self, file_path: str) -> None:
-        """Play an audio file using push_audio_sample()."""
         try:
             # Handle URLs - download first
             if file_path.startswith(("http://", "https://")):
-                import urllib.request
-                import tempfile
-                _LOGGER.debug("Downloading TTS audio from %s", file_path)
-                with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
-                    urllib.request.urlretrieve(file_path, tmp.name)
-                    file_path = tmp.name
                 _LOGGER.debug("Downloaded to %s", file_path)
             if self._stop_flag.is_set():
                 return
-            # Use push_audio_sample for playback
             if self.reachy_mini is not None:
-                try:
-                    self._play_via_push_audio(file_path)
-                except Exception as e:
-                    _LOGGER.warning("push_audio_sample failed: %s", e)
             else:
                 _LOGGER.warning("No reachy_mini instance, cannot play audio")
         except Exception as e:
             _LOGGER.error("Error playing audio: %s", e)
         finally:
             self.is_playing = False
             # Play next in playlist or finish
             if self._playlist and not self._stop_flag.is_set():
                 self._play_next()
@@ -104,18 +139,17 @@ class AudioPlayer:
                 self._on_playback_finished()
     def _play_via_push_audio(self, file_path: str) -> None:
-        """Play audio by pushing samples to Reachy Mini's GStreamer pipeline.
-        This writes audio directly to the existing playback pipeline.
-        The caller should pause audio recording during this operation.
         """
         # Read audio file
         data, input_samplerate = sf.read(file_path, dtype='float32')
-        _LOGGER.debug("Audio file: %s, samplerate=%d, shape=%s", file_path, input_samplerate, data.shape)
-        # Get output sample rate from Reachy Mini
         output_samplerate = self.reachy_mini.media.get_output_audio_samplerate()
-        _LOGGER.debug("Output samplerate: %d", output_samplerate)
         # Convert to mono if stereo
         if data.ndim == 2:
@@ -128,29 +162,47 @@ class AudioPlayer:
         if input_samplerate != output_samplerate:
             num_samples = int(len(data) * output_samplerate / input_samplerate)
             data = scipy.signal.resample(data, num_samples)
-            _LOGGER.debug("Resampled to %d samples", num_samples)
-        # Push audio in chunks (like conversation_app)
-        # Use smaller chunks for smoother playback
-        chunk_duration = 0.05  # 50ms chunks
         chunk_size = int(output_samplerate * chunk_duration)
         for i in range(0, len(data), chunk_size):
             if self._stop_flag.is_set():
-                _LOGGER.debug("Playback stopped by flag")
-                break
             chunk = data[i:i + chunk_size].astype(np.float32)
             self.reachy_mini.media.push_audio_sample(chunk)
-            # Sleep to match chunk duration (prevents buffer overflow)
-            time.sleep(chunk_duration * 0.8)  # Slightly less to keep buffer fed
         _LOGGER.debug("Audio playback complete")
     def _on_playback_finished(self) -> None:
-        """Called when playback is finished."""
         self.is_playing = False
         todo_callback: Optional[Callable[[], None]] = None
         with self._done_callback_lock:
             if self._done_callback:
                 todo_callback = self._done_callback
@@ -163,29 +215,41 @@ class AudioPlayer:
                 _LOGGER.exception("Unexpected error running done callback")
     def pause(self) -> None:
         self.is_playing = False
     def resume(self) -> None:
         if self._playlist:
             self._play_next()
     def stop(self) -> None:
         self._stop_flag.set()
         if self.reachy_mini is not None:
             try:
-                self.reachy_mini.media.clear_output_buffer()
-            except Exception:
-                pass
         self._playlist.clear()
         self.is_playing = False
     def duck(self) -> None:
         self._current_volume = self._duck_volume
     def unduck(self) -> None:
         self._current_volume = self._unduck_volume
     def set_volume(self, volume: int) -> None:
         volume = max(0, min(100, volume))
         self._unduck_volume = volume / 100.0
         self._duck_volume = self._unduck_volume / 2

+"""Audio player using Reachy Mini's media system.
+This module provides audio playback functionality similar to linux-voice-assistant's
+MpvMediaPlayer, but using Reachy Mini's GStreamer-based audio system.
+For local files: Uses play_sound() which creates an independent playbin pipeline.
+For URLs (TTS): Downloads to temp file, then uses play_sound().
+This approach avoids conflicts with the recording pipeline.
+"""
 import logging
+import os
+import tempfile
 import threading
 import time
+import urllib.request
 from collections.abc import Callable
 from typing import List, Optional, Union
 import numpy as np
 class AudioPlayer:
     """Audio player using Reachy Mini's media system.
+    Similar to linux-voice-assistant's MpvMediaPlayer but using GStreamer.
     """
     def __init__(self, reachy_mini=None) -> None:
         self._unduck_volume: float = 1.0
         self._current_volume: float = 1.0
         self._stop_flag = threading.Event()
+        self._playback_thread: Optional[threading.Thread] = None
     def set_reachy_mini(self, reachy_mini) -> None:
         """Set the Reachy Mini instance."""
         done_callback: Optional[Callable[[], None]] = None,
         stop_first: bool = True,
     ) -> None:
+        """Play audio file(s) or URL(s).
+        Args:
+            url: Single URL/path or list of URLs/paths to play
+            done_callback: Called when all playback is finished
+            stop_first: Stop current playback before starting new
+        """
         if stop_first:
             self.stop()
         self._play_next()
     def _play_next(self) -> None:
+        """Play the next item in the playlist."""
         if not self._playlist or self._stop_flag.is_set():
             self._on_playback_finished()
             return
         self.is_playing = True
         # Start playback in a thread
+        self._playback_thread = threading.Thread(
+            target=self._play_file,
+            args=(next_url,),
+            daemon=True
+        )
+        self._playback_thread.start()
     def _play_file(self, file_path: str) -> None:
+        """Play an audio file.
+        For URLs: Download to temp file first.
+        Then use push_audio_sample() to play through the GStreamer pipeline.
+        """
+        temp_file = None
         try:
             # Handle URLs - download first
             if file_path.startswith(("http://", "https://")):
+                _LOGGER.debug("Downloading audio from %s", file_path)
+                temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
+                temp_file.close()
+                urllib.request.urlretrieve(file_path, temp_file.name)
+                file_path = temp_file.name
                 _LOGGER.debug("Downloaded to %s", file_path)
             if self._stop_flag.is_set():
                 return
+            if not os.path.exists(file_path):
+                _LOGGER.error("Audio file not found: %s", file_path)
+                return
+            # Play using Reachy Mini's audio system
             if self.reachy_mini is not None:
+                self._play_via_push_audio(file_path)
             else:
                 _LOGGER.warning("No reachy_mini instance, cannot play audio")
         except Exception as e:
             _LOGGER.error("Error playing audio: %s", e)
         finally:
+            # Clean up temp file
+            if temp_file is not None:
+                try:
+                    os.unlink(temp_file.name)
+                except Exception:
+                    pass
             self.is_playing = False
             # Play next in playlist or finish
             if self._playlist and not self._stop_flag.is_set():
                 self._play_next()
                 self._on_playback_finished()
     def _play_via_push_audio(self, file_path: str) -> None:
+        """Play audio using push_audio_sample().
+        This pushes audio data to the GStreamer playback pipeline.
+        Recording and playback pipelines are separate in GStreamer,
+        so they can run simultaneously (like in conversation_app).
         """
         # Read audio file
         data, input_samplerate = sf.read(file_path, dtype='float32')
+        # Get output sample rate
         output_samplerate = self.reachy_mini.media.get_output_audio_samplerate()
         # Convert to mono if stereo
         if data.ndim == 2:
         if input_samplerate != output_samplerate:
             num_samples = int(len(data) * output_samplerate / input_samplerate)
             data = scipy.signal.resample(data, num_samples)
+        total_duration = len(data) / output_samplerate
+        _LOGGER.debug("Playing %.2fs audio at %dHz", total_duration, output_samplerate)
+        # Push audio in chunks (like conversation_app's play_loop)
+        chunk_duration = 0.02  # 20ms chunks
         chunk_size = int(output_samplerate * chunk_duration)
+        start_time = time.monotonic()
+        samples_pushed = 0
         for i in range(0, len(data), chunk_size):
             if self._stop_flag.is_set():
+                _LOGGER.debug("Playback stopped")
+                return
             chunk = data[i:i + chunk_size].astype(np.float32)
             self.reachy_mini.media.push_audio_sample(chunk)
+            samples_pushed += len(chunk)
+            # Pace the pushing to avoid buffer overflow
+            # Calculate how much time should have elapsed
+            expected_time = samples_pushed / output_samplerate
+            actual_time = time.monotonic() - start_time
+            sleep_time = expected_time - actual_time - 0.01  # 10ms ahead
+            if sleep_time > 0:
+                time.sleep(sleep_time)
+        # Wait for playback to complete
+        remaining = total_duration - (time.monotonic() - start_time)
+        if remaining > 0:
+            time.sleep(remaining + 0.05)  # Small buffer
         _LOGGER.debug("Audio playback complete")
     def _on_playback_finished(self) -> None:
+        """Called when all playback is finished."""
         self.is_playing = False
         todo_callback: Optional[Callable[[], None]] = None
         with self._done_callback_lock:
             if self._done_callback:
                 todo_callback = self._done_callback
                 _LOGGER.exception("Unexpected error running done callback")
     def pause(self) -> None:
+        """Pause playback."""
         self.is_playing = False
     def resume(self) -> None:
+        """Resume playback."""
         if self._playlist:
             self._play_next()
     def stop(self) -> None:
+        """Stop playback and clear playlist."""
         self._stop_flag.set()
+        # Clear the playback buffer
         if self.reachy_mini is not None:
             try:
+                if hasattr(self.reachy_mini.media, 'audio'):
+                    audio = self.reachy_mini.media.audio
+                    if hasattr(audio, 'clear_player'):
+                        audio.clear_player()
+            except Exception as e:
+                _LOGGER.debug("Error clearing player: %s", e)
         self._playlist.clear()
         self.is_playing = False
     def duck(self) -> None:
+        """Lower volume for ducking."""
         self._current_volume = self._duck_volume
     def unduck(self) -> None:
+        """Restore volume after ducking."""
         self._current_volume = self._unduck_volume
     def set_volume(self, volume: int) -> None:
+        """Set volume (0-100)."""
         volume = max(0, min(100, volume))
         self._unduck_volume = volume / 100.0
         self._duck_volume = self._unduck_volume / 2

reachy_mini_ha_voice/models.py CHANGED Viewed

@@ -73,10 +73,6 @@ class ServerState:
     reachy_mini: Optional[object] = None
     motion_enabled: bool = True
     motion: Optional[object] = None  # ReachyMiniMotion instance
-    # TTS playback state - when True, audio recording should pause
-    # to avoid GStreamer pipeline conflicts (recording and playback share resources)
-    tts_playing: bool = False
     media_player_entity: "Optional[MediaPlayerEntity]" = None
     satellite: "Optional[VoiceSatelliteProtocol]" = None

     reachy_mini: Optional[object] = None
     motion_enabled: bool = True
     motion: Optional[object] = None  # ReachyMiniMotion instance
     media_player_entity: "Optional[MediaPlayerEntity]" = None
     satellite: "Optional[VoiceSatelliteProtocol]" = None

reachy_mini_ha_voice/satellite.py CHANGED Viewed

@@ -328,10 +328,8 @@ class VoiceSatelliteProtocol(APIServer):
         )
         self.duck()
         self._is_streaming_audio = True
-        # NOTE: Wakeup sound disabled - playing audio while recording causes daemon crash
-        # The GStreamer pipeline conflicts when push_audio_sample is called during get_audio_sample
-        # self.state.tts_player.play(self.state.wakeup_sound)
-        _LOGGER.debug("Wakeup sound skipped to prevent daemon crash")
     def stop(self) -> None:
         self.state.active_wake_words.discard(self.state.stop_word.id)
@@ -352,10 +350,6 @@ class VoiceSatelliteProtocol(APIServer):
         self._tts_played = True
         _LOGGER.debug("Playing TTS response: %s", self._tts_url)
-        # Set flag to pause audio recording during TTS playback
-        # This prevents GStreamer pipeline conflicts (recording and playback share resources)
-        self.state.tts_playing = True
         self.state.active_wake_words.add(self.state.stop_word.id)
         self.state.tts_player.play(self._tts_url, done_callback=self._tts_finished)
@@ -368,9 +362,6 @@ class VoiceSatelliteProtocol(APIServer):
         self.state.music_player.unduck()
     def _tts_finished(self) -> None:
-        # Clear TTS playing flag to resume audio recording
-        self.state.tts_playing = False
         self.state.active_wake_words.discard(self.state.stop_word.id)
         self.send_messages([VoiceAssistantAnnounceFinished()])

         )
         self.duck()
         self._is_streaming_audio = True
+        # Play wakeup sound (like linux-voice-assistant does)
+        self.state.tts_player.play(self.state.wakeup_sound)
     def stop(self) -> None:
         self.state.active_wake_words.discard(self.state.stop_word.id)
         self._tts_played = True
         _LOGGER.debug("Playing TTS response: %s", self._tts_url)
         self.state.active_wake_words.add(self.state.stop_word.id)
         self.state.tts_player.play(self._tts_url, done_callback=self._tts_finished)
         self.state.music_player.unduck()
     def _tts_finished(self) -> None:
         self.state.active_wake_words.discard(self.state.stop_word.id)
         self.send_messages([VoiceAssistantAnnounceFinished()])

reachy_mini_ha_voice/voice_assistant.py CHANGED Viewed

@@ -424,12 +424,6 @@ class VoiceAssistantService:
                 if not self._wait_for_satellite():
                     continue
-                # Pause audio recording during TTS playback to avoid GStreamer conflicts
-                # Recording and playback pipelines share the same audio device
-                if self._state is not None and self._state.tts_playing:
-                    time.sleep(0.05)
-                    continue
                 self._update_wake_words_list(ctx)
                 # Get audio from Reachy Mini

                 if not self._wait_for_satellite():
                     continue
                 self._update_wake_words_list(ctx)
                 # Get audio from Reachy Mini