Spaces:

djhui5710
/

reachy_mini_home_assistant

Running

Desmond-Dong commited on Jan 12

Commit

9d92632

1 Parent(s): 08a2d90

fix: revert to reference project pattern for wake word detection (v0.7.3)

- Use refractory_seconds (2s) instead of broken state flags
- Remove _in_pipeline and _tts_playing - they caused more issues
- Restore correct RUN_END handling from linux-voice-assistant
- Fix stop() to call _tts_finished() like reference project

Files changed (5) hide show

changelog.json +9 -0
pyproject.toml +1 -1
reachy_mini_ha_voice/__init__.py +1 -1
reachy_mini_ha_voice/satellite.py +7 -50
reachy_mini_ha_voice/voice_assistant.py +12 -18

changelog.json CHANGED Viewed

@@ -1,4 +1,13 @@
 [
   {
     "version": "0.7.2",
     "date": "2026-01-12",

 [
+  {
+    "version": "0.7.3",
+    "date": "2026-01-12",
+    "changes": [
+      "Fix: Revert to reference project pattern - use refractory period instead of state flags",
+      "Fix: Remove broken _in_pipeline and _tts_playing state management",
+      "Fix: Restore correct RUN_END event handling from linux-voice-assistant"
+    ]
+  },
   {
     "version": "0.7.2",
     "date": "2026-01-12",

pyproject.toml CHANGED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "reachy_mini_ha_voice"
-version = "0.7.2"
 description = "Home Assistant Voice Assistant for Reachy Mini"
 readme = "README.md"
 requires-python = ">=3.10"

 [project]
 name = "reachy_mini_ha_voice"
+version = "0.7.3"
 description = "Home Assistant Voice Assistant for Reachy Mini"
 readme = "README.md"
 requires-python = ">=3.10"

reachy_mini_ha_voice/__init__.py CHANGED Viewed

@@ -11,7 +11,7 @@ Key features:
 - Reachy Mini motion control integration
 """
-__version__ = "0.7.2"
 __author__ = "Desmond Dong"
 # Don't import main module here to avoid runpy warning

 - Reachy Mini motion control integration
 """
+__version__ = "0.7.3"
 __author__ = "Desmond Dong"
 # Don't import main module here to avoid runpy warning

reachy_mini_ha_voice/satellite.py CHANGED Viewed

@@ -69,20 +69,13 @@ class VoiceSatelliteProtocol(APIServer):
         self.camera_server = camera_server
         # Initialize streaming state early (before entity setup)
-        # This is needed because audio processing thread checks this attribute
         self._is_streaming_audio = False
-        self._in_pipeline = False  # True when voice pipeline is active (listening/processing/speaking)
-        self._tts_playing = False  # True when TTS audio is actively playing
         self._tts_url: Optional[str] = None
         self._tts_played = False
         self._continue_conversation = False
         self._timer_finished = False
         self._external_wake_words: Dict[str, VoiceAssistantExternalWakeWord] = {}
-        # Tap-to-talk continuous conversation mode (REMOVED - too many false triggers)
-        # Continuous conversation is now controlled via Home Assistant switch
-        # self._tap_conversation_mode = False
         # Conversation tracking for continuous conversation
         self._conversation_id: Optional[str] = None
         self._conversation_timeout = 300.0  # 5 minutes, same as ESPHome default
@@ -184,8 +177,6 @@ class VoiceSatelliteProtocol(APIServer):
         elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_TTS_START:
             # Reachy Mini: Start speaking animation (JSON-defined multi-frequency sway)
             _LOGGER.debug("TTS_START event received, triggering speaking animation")
-            # Mark TTS as playing - this prevents wake word detection during TTS
-            self._tts_playing = True
             self._reachy_on_speaking()
         elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_TTS_END:
@@ -196,8 +187,11 @@ class VoiceSatelliteProtocol(APIServer):
             # Pipeline run ended
             self._is_streaming_audio = False
-            # Check if should continue conversation
-            self._handle_run_end()
     def handle_timer_event(
         self,
@@ -234,9 +228,6 @@ class VoiceSatelliteProtocol(APIServer):
             self.state.active_wake_words.add(self.state.stop_word.id)
             self._continue_conversation = msg.start_conversation
-            # Mark as playing to prevent wake word detection during announcement
-            self._tts_playing = True
-            self._in_pipeline = True
             self.duck()
             yield from self.state.media_player_entity.play(
@@ -382,10 +373,7 @@ class VoiceSatelliteProtocol(APIServer):
         self._continue_conversation = False
     def wakeup(self, wake_word: Union[MicroWakeWord, OpenWakeWord]) -> None:
-        """Handle wake word detection - start voice pipeline.
-        Only called when in idle state (checked by voice_assistant.py).
-        """
         if self._timer_finished:
             # Stop timer instead
             self._timer_finished = False
@@ -393,9 +381,6 @@ class VoiceSatelliteProtocol(APIServer):
             _LOGGER.debug("Stopping timer finished sound")
             return
-        # Mark pipeline as active
-        self._in_pipeline = True
         wake_word_phrase = wake_word.wake_word
         _LOGGER.debug("Detected wake word: %s", wake_word_phrase)
@@ -436,26 +421,19 @@ class VoiceSatelliteProtocol(APIServer):
         """Stop current TTS playback (e.g., user said stop word)."""
         self.state.active_wake_words.discard(self.state.stop_word.id)
         self.state.tts_player.stop()
-        # Reset TTS playing flag
-        self._tts_playing = False
         if self._timer_finished:
             self._timer_finished = False
             _LOGGER.debug("Stopping timer finished sound")
         else:
             _LOGGER.debug("TTS response stopped manually")
-        # Send announce finished to HA
-        self.send_messages([VoiceAssistantAnnounceFinished()])
-        # Note: RUN_END event will handle the rest
     def play_tts(self) -> None:
         if (not self._tts_url) or self._tts_played:
             return
         self._tts_played = True
-        # Mark TTS as playing to prevent wake word detection
-        self._tts_playing = True
         _LOGGER.debug("Playing TTS response: %s", self._tts_url)
         self.state.active_wake_words.add(self.state.stop_word.id)
@@ -478,9 +456,6 @@ class VoiceSatelliteProtocol(APIServer):
         Following reference project pattern: handle continue conversation here.
         """
-        # Mark TTS as finished
-        self._tts_playing = False
         self.state.active_wake_words.discard(self.state.stop_word.id)
         self.send_messages([VoiceAssistantAnnounceFinished()])
@@ -494,9 +469,6 @@ class VoiceSatelliteProtocol(APIServer):
             _LOGGER.debug("Continuing conversation (our_switch=%s, ha_request=%s)",
                          continuous_mode, self._continue_conversation)
-            # Keep pipeline active during continuous conversation
-            self._in_pipeline = True
             # Play prompt sound to indicate ready for next input
             self.state.tts_player.play(self.state.wakeup_sound)
@@ -514,23 +486,10 @@ class VoiceSatelliteProtocol(APIServer):
             self._clear_conversation()
             self.unduck()
             _LOGGER.debug("Conversation finished")
-            # Mark pipeline as inactive - ready for new wake word
-            self._in_pipeline = False
             # Reachy Mini: Return to idle
             self._reachy_on_idle()
-    def _handle_run_end(self) -> None:
-        """Handle pipeline RUN_END event.
-        Following reference project pattern: call _tts_finished if TTS wasn't played.
-        """
-        if not self._tts_played:
-            self._tts_finished()
-        self._tts_played = False
     def _play_timer_finished(self) -> None:
         if not self._timer_finished:
             self.unduck()
@@ -548,8 +507,6 @@ class VoiceSatelliteProtocol(APIServer):
         _LOGGER.info("Disconnected from Home Assistant")
         # Clear streaming state on disconnect
         self._is_streaming_audio = False
-        self._in_pipeline = False
-        self._tts_playing = False
         self._tts_url = None
         self._tts_played = False
         self._continue_conversation = False

         self.camera_server = camera_server
         # Initialize streaming state early (before entity setup)
         self._is_streaming_audio = False
         self._tts_url: Optional[str] = None
         self._tts_played = False
         self._continue_conversation = False
         self._timer_finished = False
         self._external_wake_words: Dict[str, VoiceAssistantExternalWakeWord] = {}
         # Conversation tracking for continuous conversation
         self._conversation_id: Optional[str] = None
         self._conversation_timeout = 300.0  # 5 minutes, same as ESPHome default
         elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_TTS_START:
             # Reachy Mini: Start speaking animation (JSON-defined multi-frequency sway)
             _LOGGER.debug("TTS_START event received, triggering speaking animation")
             self._reachy_on_speaking()
         elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_TTS_END:
             # Pipeline run ended
             self._is_streaming_audio = False
+            # Following reference project pattern
+            if not self._tts_played:
+                self._tts_finished()
+            self._tts_played = False
     def handle_timer_event(
         self,
             self.state.active_wake_words.add(self.state.stop_word.id)
             self._continue_conversation = msg.start_conversation
             self.duck()
             yield from self.state.media_player_entity.play(
         self._continue_conversation = False
     def wakeup(self, wake_word: Union[MicroWakeWord, OpenWakeWord]) -> None:
+        """Handle wake word detection - start voice pipeline."""
         if self._timer_finished:
             # Stop timer instead
             self._timer_finished = False
             _LOGGER.debug("Stopping timer finished sound")
             return
         wake_word_phrase = wake_word.wake_word
         _LOGGER.debug("Detected wake word: %s", wake_word_phrase)
         """Stop current TTS playback (e.g., user said stop word)."""
         self.state.active_wake_words.discard(self.state.stop_word.id)
         self.state.tts_player.stop()
         if self._timer_finished:
             self._timer_finished = False
             _LOGGER.debug("Stopping timer finished sound")
         else:
             _LOGGER.debug("TTS response stopped manually")
+            self._tts_finished()
     def play_tts(self) -> None:
         if (not self._tts_url) or self._tts_played:
             return
         self._tts_played = True
         _LOGGER.debug("Playing TTS response: %s", self._tts_url)
         self.state.active_wake_words.add(self.state.stop_word.id)
         Following reference project pattern: handle continue conversation here.
         """
         self.state.active_wake_words.discard(self.state.stop_word.id)
         self.send_messages([VoiceAssistantAnnounceFinished()])
             _LOGGER.debug("Continuing conversation (our_switch=%s, ha_request=%s)",
                          continuous_mode, self._continue_conversation)
             # Play prompt sound to indicate ready for next input
             self.state.tts_player.play(self.state.wakeup_sound)
             self._clear_conversation()
             self.unduck()
             _LOGGER.debug("Conversation finished")
             # Reachy Mini: Return to idle
             self._reachy_on_idle()
     def _play_timer_finished(self) -> None:
         if not self._timer_finished:
             self.unduck()
         _LOGGER.info("Disconnected from Home Assistant")
         # Clear streaming state on disconnect
         self._is_streaming_audio = False
         self._tts_url = None
         self._tts_played = False
         self._continue_conversation = False

reachy_mini_ha_voice/voice_assistant.py CHANGED Viewed

@@ -764,23 +764,12 @@ class VoiceAssistantService:
     def _detect_wake_words(self, ctx: AudioProcessingContext) -> None:
         """Detect wake words in the processed audio features.
-        Only detect wake words when in idle state (not in pipeline or TTS playing).
-        This prevents duplicate triggers during continuous conversation and TTS playback.
         """
         from pymicro_wakeword import MicroWakeWord
         from pyopen_wakeword import OpenWakeWord
-        # Skip wake word detection if pipeline is active or TTS is playing
-        # Check both flags to handle all cases:
-        # - _in_pipeline: True during listening/processing/speaking phases
-        # - _tts_playing: True specifically when TTS audio is being played
-        satellite = self._state.satellite
-        if satellite is None:
-            return
-        if satellite._in_pipeline or satellite._tts_playing:
-            return
         for wake_word in ctx.wake_words:
             activated = False
@@ -795,11 +784,16 @@ class VoiceAssistantService:
                             activated = True
             if activated:
-                _LOGGER.info("Wake word detected: %s", wake_word.id)
-                self._state.satellite.wakeup(wake_word)
-                # Face tracking will handle looking at user automatically
-                self._motion.on_wakeup()
-                # No need for refractory period - state check handles it
     def _detect_stop_word(self, ctx: AudioProcessingContext) -> None:
         """Detect stop word in the processed audio features."""

     def _detect_wake_words(self, ctx: AudioProcessingContext) -> None:
         """Detect wake words in the processed audio features.
+        Uses refractory period to prevent duplicate triggers.
+        Following reference project pattern.
         """
         from pymicro_wakeword import MicroWakeWord
         from pyopen_wakeword import OpenWakeWord
         for wake_word in ctx.wake_words:
             activated = False
                             activated = True
             if activated:
+                # Check refractory period to prevent duplicate triggers
+                now = time.monotonic()
+                if (ctx.last_active is None) or (
+                    (now - ctx.last_active) > self._state.refractory_seconds
+                ):
+                    _LOGGER.info("Wake word detected: %s", wake_word.id)
+                    self._state.satellite.wakeup(wake_word)
+                    # Face tracking will handle looking at user automatically
+                    self._motion.on_wakeup()
+                    ctx.last_active = now
     def _detect_stop_word(self, ctx: AudioProcessingContext) -> None:
         """Detect stop word in the processed audio features."""