Spaces:

djhui5710
/

reachy_mini_home_assistant

Running

Desmond-Dong commited on Jan 11

Commit

afdb99d

1 Parent(s): eb57938

refactor: 简化唤醒词检测逻辑，按照参考项目模式

- 移除 _pipeline_active 状态和 is_pipeline_active() 方法
- 移除 wake_word_refractory_until，只用 refractory_seconds
- 简化 wakeup() 方法，不再检查 pipeline 状态
- 简化 _handle_run_end() 和 _tts_finished()，按照参考项目模式处理持续对话
- 简化 _process_audio_chunk()，始终处理唤醒词检测

v0.5.17

Files changed (5) hide show

pyproject.toml +1 -1
reachy_mini_ha_voice/__init__.py +1 -1
reachy_mini_ha_voice/models.py +0 -1
reachy_mini_ha_voice/satellite.py +19 -54
reachy_mini_ha_voice/voice_assistant.py +8 -10

pyproject.toml CHANGED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "reachy_mini_ha_voice"
-version = "0.5.16"
 description = "Home Assistant Voice Assistant for Reachy Mini"
 readme = "README.md"
 requires-python = ">=3.10"

 [project]
 name = "reachy_mini_ha_voice"
+version = "0.5.17"
 description = "Home Assistant Voice Assistant for Reachy Mini"
 readme = "README.md"
 requires-python = ">=3.10"

reachy_mini_ha_voice/__init__.py CHANGED Viewed

@@ -11,7 +11,7 @@ Key features:
 - Reachy Mini motion control integration
 """
-__version__ = "0.5.16"
 __author__ = "Desmond Dong"
 # Don't import main module here to avoid runpy warning

 - Reachy Mini motion control integration
 """
+__version__ = "0.5.17"
 __author__ = "Desmond Dong"
 # Don't import main module here to avoid runpy warning

reachy_mini_ha_voice/models.py CHANGED Viewed

@@ -85,7 +85,6 @@ class ServerState:
     satellite: "Optional[VoiceSatelliteProtocol]" = None
     wake_words_changed: bool = False
     refractory_seconds: float = 2.0
-    wake_word_refractory_until: float = 0.0  # Timestamp until which wake word detection is suppressed
     def save_preferences(self) -> None:
         """Save preferences as JSON."""

     satellite: "Optional[VoiceSatelliteProtocol]" = None
     wake_words_changed: bool = False
     refractory_seconds: float = 2.0
     def save_preferences(self) -> None:
         """Save preferences as JSON."""

reachy_mini_ha_voice/satellite.py CHANGED Viewed

@@ -86,9 +86,6 @@ class VoiceSatelliteProtocol(APIServer):
         self._conversation_timeout = 300.0  # 5 minutes, same as ESPHome default
         self._last_conversation_time = 0.0
-        # Pipeline state tracking - prevent multiple concurrent pipelines
-        self._pipeline_active = False
         # Initialize Reachy controller
         self.reachy_controller = ReachyController(state.reachy_mini)
@@ -136,13 +133,6 @@ class VoiceSatelliteProtocol(APIServer):
         _LOGGER.debug("Voice event: type=%s, data=%s", event_type.name, data)
         if event_type == VoiceAssistantEventType.VOICE_ASSISTANT_RUN_START:
-            # Check if pipeline is already active (shouldn't happen, but be safe)
-            if self._pipeline_active:
-                _LOGGER.warning("RUN_START received but pipeline already active, stopping previous")
-                self.state.tts_player.stop()
-            # Mark pipeline as active
-            self._pipeline_active = True
             self._tts_url = data.get("url")
             self._tts_played = False
             self._continue_conversation = False
@@ -179,8 +169,7 @@ class VoiceSatelliteProtocol(APIServer):
             self._tts_played = False
             self._is_streaming_audio = False
-            # Check if should continue conversation (after RUN_END is safe)
-            # Note: _pipeline_active is managed inside _handle_run_end
             self._handle_run_end()
     def handle_timer_event(
@@ -361,14 +350,13 @@ class VoiceSatelliteProtocol(APIServer):
         """Clear conversation state when exiting conversation mode."""
         self._conversation_id = None
         self._continue_conversation = False
-        self._pipeline_active = False
     def wakeup(self, wake_word: Union[MicroWakeWord, OpenWakeWord]) -> None:
-        # Prevent starting new conversation if pipeline is already active
-        if self._pipeline_active:
-            _LOGGER.warning("Pipeline already active, ignoring wake word")
-            return
         if self._timer_finished:
             # Stop timer instead
             self._timer_finished = False
@@ -376,16 +364,10 @@ class VoiceSatelliteProtocol(APIServer):
             _LOGGER.debug("Stopping timer finished sound")
             return
-        # Mark pipeline as active IMMEDIATELY to prevent duplicate wakeups
-        # This is set before sending request to HA, as there's network delay
-        self._pipeline_active = True
         wake_word_phrase = wake_word.wake_word
         _LOGGER.debug("Detected wake word: %s", wake_word_phrase)
         # Turn toward sound source using DOA (Direction of Arrival)
-        # Only read DOA once at wakeup to avoid daemon pressure
-        # Face tracking will take over after initial turn
         self._turn_to_sound_source()
         # Get or create conversation_id for context tracking
@@ -418,10 +400,6 @@ class VoiceSatelliteProtocol(APIServer):
         """
         return False
-    def is_pipeline_active(self) -> bool:
-        """Check if voice pipeline is currently active (listening/thinking/speaking)."""
-        return self._pipeline_active
     def stop(self) -> None:
         """Stop current TTS playback (e.g., user said stop word)."""
         self.state.active_wake_words.discard(self.state.stop_word.id)
@@ -462,24 +440,12 @@ class VoiceSatelliteProtocol(APIServer):
     def _tts_finished(self) -> None:
         """Called when TTS audio playback finishes.
-        Note: This is called from the audio player callback, NOT from HA events.
-        We should NOT start a new conversation here - wait for RUN_END event.
         """
         self.state.active_wake_words.discard(self.state.stop_word.id)
         self.send_messages([VoiceAssistantAnnounceFinished()])
-        _LOGGER.debug("TTS playback finished, waiting for RUN_END event")
-    def _handle_run_end(self) -> None:
-        """Handle pipeline RUN_END event - safe point to continue conversation.
-        This is called after HA has fully completed the pipeline run.
-        """
-        # If pipeline wasn't active, this might be a duplicate RUN_END - ignore
-        if not self._pipeline_active:
-            _LOGGER.debug("RUN_END received but pipeline wasn't active, ignoring")
-            return
-        # Check if should continue conversation BEFORE clearing pipeline state
         # 1. Our switch is ON: Always continue (unconditional)
         # 2. Our switch is OFF: Follow HA's continue_conversation request
         continuous_mode = self.state.preferences.continuous_conversation
@@ -489,11 +455,7 @@ class VoiceSatelliteProtocol(APIServer):
             _LOGGER.info("Continuing conversation (our_switch=%s, ha_request=%s)",
                          continuous_mode, self._continue_conversation)
-            # Keep pipeline active - no gap for wake word detection
-            # _pipeline_active stays True
             # Play prompt sound to indicate ready for next input
-            # Use wakeup sound as the prompt (short beep)
             self.state.tts_player.play(self.state.wakeup_sound)
             # Use same conversation_id for context continuity
@@ -504,23 +466,26 @@ class VoiceSatelliteProtocol(APIServer):
             )])
             self._is_streaming_audio = True
-            # Stay in listening mode, don't go to idle
             self._reachy_on_listening()
         else:
-            # Conversation ended, clear state
-            self._pipeline_active = False
             self._clear_conversation()
             self.unduck()
-            _LOGGER.debug("Pipeline ended, conversation finished")
-            # Set wake word refractory period to prevent immediate re-trigger
-            # Wake word model may have accumulated state during conversation
-            self.state.wake_word_refractory_until = time.monotonic() + 1.5  # 1.5 second cooldown
-            _LOGGER.debug("Wake word refractory period set for 1.5 seconds")
             # Reachy Mini: Return to idle
             self._reachy_on_idle()
     def _play_timer_finished(self) -> None:
         if not self._timer_finished:
             self.unduck()

         self._conversation_timeout = 300.0  # 5 minutes, same as ESPHome default
         self._last_conversation_time = 0.0
         # Initialize Reachy controller
         self.reachy_controller = ReachyController(state.reachy_mini)
         _LOGGER.debug("Voice event: type=%s, data=%s", event_type.name, data)
         if event_type == VoiceAssistantEventType.VOICE_ASSISTANT_RUN_START:
             self._tts_url = data.get("url")
             self._tts_played = False
             self._continue_conversation = False
             self._tts_played = False
             self._is_streaming_audio = False
+            # Check if should continue conversation
             self._handle_run_end()
     def handle_timer_event(
         """Clear conversation state when exiting conversation mode."""
         self._conversation_id = None
         self._continue_conversation = False
     def wakeup(self, wake_word: Union[MicroWakeWord, OpenWakeWord]) -> None:
+        """Handle wake word detection - start voice pipeline.
+        Following reference project pattern: no pipeline state check here.
+        Refractory period in audio processing prevents duplicate triggers.
+        """
         if self._timer_finished:
             # Stop timer instead
             self._timer_finished = False
             _LOGGER.debug("Stopping timer finished sound")
             return
         wake_word_phrase = wake_word.wake_word
         _LOGGER.debug("Detected wake word: %s", wake_word_phrase)
         # Turn toward sound source using DOA (Direction of Arrival)
         self._turn_to_sound_source()
         # Get or create conversation_id for context tracking
         """
         return False
     def stop(self) -> None:
         """Stop current TTS playback (e.g., user said stop word)."""
         self.state.active_wake_words.discard(self.state.stop_word.id)
     def _tts_finished(self) -> None:
         """Called when TTS audio playback finishes.
+        Following reference project pattern: handle continue conversation here.
         """
         self.state.active_wake_words.discard(self.state.stop_word.id)
         self.send_messages([VoiceAssistantAnnounceFinished()])
+        # Check if should continue conversation
         # 1. Our switch is ON: Always continue (unconditional)
         # 2. Our switch is OFF: Follow HA's continue_conversation request
         continuous_mode = self.state.preferences.continuous_conversation
             _LOGGER.info("Continuing conversation (our_switch=%s, ha_request=%s)",
                          continuous_mode, self._continue_conversation)
             # Play prompt sound to indicate ready for next input
             self.state.tts_player.play(self.state.wakeup_sound)
             # Use same conversation_id for context continuity
             )])
             self._is_streaming_audio = True
+            # Stay in listening mode
             self._reachy_on_listening()
         else:
             self._clear_conversation()
             self.unduck()
+            _LOGGER.debug("Conversation finished")
             # Reachy Mini: Return to idle
             self._reachy_on_idle()
+    def _handle_run_end(self) -> None:
+        """Handle pipeline RUN_END event.
+        Following reference project pattern: call _tts_finished if TTS wasn't played.
+        """
+        if not self._tts_played:
+            self._tts_finished()
+        self._tts_played = False
     def _play_timer_finished(self) -> None:
         if not self._timer_finished:
             self.unduck()

reachy_mini_ha_voice/voice_assistant.py CHANGED Viewed

@@ -718,6 +718,9 @@ class VoiceAssistantService:
     def _process_audio_chunk(self, ctx: AudioProcessingContext, audio_chunk: bytes) -> None:
         """Process an audio chunk for wake word detection.
         Args:
             ctx: Audio processing context
             audio_chunk: PCM audio bytes
@@ -725,12 +728,6 @@ class VoiceAssistantService:
         # Stream audio to Home Assistant
         self._state.satellite.handle_audio(audio_chunk)
-        # Skip wake word processing entirely if pipeline is active
-        # This prevents model state accumulation during conversation
-        pipeline_active = self._state.satellite.is_pipeline_active()
-        if pipeline_active:
-            return
         # Process wake word features
         self._process_features(ctx, audio_chunk)
@@ -750,14 +747,14 @@ class VoiceAssistantService:
             ctx.oww_inputs.extend(ctx.oww_features.process_streaming(audio_chunk))
     def _detect_wake_words(self, ctx: AudioProcessingContext) -> None:
-        """Detect wake words in the processed audio features."""
         from pymicro_wakeword import MicroWakeWord
         from pyopen_wakeword import OpenWakeWord
-        # Check global refractory period (set after conversation ends)
         now = time.monotonic()
-        if now < self._state.wake_word_refractory_until:
-            return
         for wake_word in ctx.wake_words:
             activated = False
@@ -773,6 +770,7 @@ class VoiceAssistantService:
                             activated = True
             if activated:
                 if (ctx.last_active is None) or ((now - ctx.last_active) > self._state.refractory_seconds):
                     _LOGGER.info("Wake word detected: %s", wake_word.id)
                     self._state.satellite.wakeup(wake_word)

     def _process_audio_chunk(self, ctx: AudioProcessingContext, audio_chunk: bytes) -> None:
         """Process an audio chunk for wake word detection.
+        Following reference project pattern: always process wake words.
+        Refractory period prevents duplicate triggers.
         Args:
             ctx: Audio processing context
             audio_chunk: PCM audio bytes
         # Stream audio to Home Assistant
         self._state.satellite.handle_audio(audio_chunk)
         # Process wake word features
         self._process_features(ctx, audio_chunk)
             ctx.oww_inputs.extend(ctx.oww_features.process_streaming(audio_chunk))
     def _detect_wake_words(self, ctx: AudioProcessingContext) -> None:
+        """Detect wake words in the processed audio features.
+        Following reference project pattern: only use refractory_seconds.
+        """
         from pymicro_wakeword import MicroWakeWord
         from pyopen_wakeword import OpenWakeWord
         now = time.monotonic()
         for wake_word in ctx.wake_words:
             activated = False
                             activated = True
             if activated:
+                # Check refractory period
                 if (ctx.last_active is None) or ((now - ctx.last_active) > self._state.refractory_seconds):
                     _LOGGER.info("Wake word detected: %s", wake_word.id)
                     self._state.satellite.wakeup(wake_word)