Spaces:

djhui5710
/

reachy_mini_home_assistant

Running

App Files Files Community

Desmond-Dong commited on Jan 12

Commit

5ce6940

1 Parent(s): 5666e95

fix: 使用状态判断唤醒词检测, 添加手势模型信息日志

Browse files

- 添加 _in_pipeline 状态标识对话是否进行中
- 唤醒词检测只在 idle 状态下进行，避免连续对话时重复唤醒
- 手势检测器加载时输出模型输入输出信息用于调试

Files changed (3) hide show

reachy_mini_ha_voice/gesture_detector.py +14 -0
reachy_mini_ha_voice/satellite.py +9 -2
reachy_mini_ha_voice/voice_assistant.py +11 -9

reachy_mini_ha_voice/gesture_detector.py CHANGED Viewed

@@ -133,6 +133,20 @@ class GestureDetector:
             self._classifier = ort.InferenceSession(
                 str(self._classifier_path), providers=providers
             )
             self._available = True
             logger.info("Gesture detection ready (18 HaGRID classes)")
         except Exception as e:

             self._classifier = ort.InferenceSession(
                 str(self._classifier_path), providers=providers
             )
+            # Log model input/output info
+            det_inputs = self._detector.get_inputs()
+            det_outputs = self._detector.get_outputs()
+            logger.info("Hand detector - inputs: %s, outputs: %s",
+                       [(i.name, i.shape) for i in det_inputs],
+                       [(o.name, o.shape) for o in det_outputs])
+            cls_inputs = self._classifier.get_inputs()
+            cls_outputs = self._classifier.get_outputs()
+            logger.info("Classifier - inputs: %s, outputs: %s",
+                       [(i.name, i.shape) for i in cls_inputs],
+                       [(o.name, o.shape) for o in cls_outputs])
             self._available = True
             logger.info("Gesture detection ready (18 HaGRID classes)")
         except Exception as e:

reachy_mini_ha_voice/satellite.py CHANGED Viewed

@@ -71,6 +71,7 @@ class VoiceSatelliteProtocol(APIServer):
         # Initialize streaming state early (before entity setup)
         # This is needed because audio processing thread checks this attribute
         self._is_streaming_audio = False
         self._tts_url: Optional[str] = None
         self._tts_played = False
         self._continue_conversation = False
@@ -374,8 +375,7 @@ class VoiceSatelliteProtocol(APIServer):
     def wakeup(self, wake_word: Union[MicroWakeWord, OpenWakeWord]) -> None:
         """Handle wake word detection - start voice pipeline.
-        Following reference project pattern: no pipeline state check here.
-        Refractory period in audio processing prevents duplicate triggers.
         """
         if self._timer_finished:
             # Stop timer instead
@@ -384,6 +384,9 @@ class VoiceSatelliteProtocol(APIServer):
             _LOGGER.debug("Stopping timer finished sound")
             return
         wake_word_phrase = wake_word.wake_word
         _LOGGER.debug("Detected wake word: %s", wake_word_phrase)
@@ -492,6 +495,9 @@ class VoiceSatelliteProtocol(APIServer):
             self._clear_conversation()
             self.unduck()
             _LOGGER.debug("Conversation finished")
             # Reachy Mini: Return to idle
             self._reachy_on_idle()
@@ -523,6 +529,7 @@ class VoiceSatelliteProtocol(APIServer):
         _LOGGER.info("Disconnected from Home Assistant")
         # Clear streaming state on disconnect
         self._is_streaming_audio = False
         self._tts_url = None
         self._tts_played = False
         self._continue_conversation = False

         # Initialize streaming state early (before entity setup)
         # This is needed because audio processing thread checks this attribute
         self._is_streaming_audio = False
+        self._in_pipeline = False  # True when voice pipeline is active (listening/processing/speaking)
         self._tts_url: Optional[str] = None
         self._tts_played = False
         self._continue_conversation = False
     def wakeup(self, wake_word: Union[MicroWakeWord, OpenWakeWord]) -> None:
         """Handle wake word detection - start voice pipeline.
+        Only called when in idle state (checked by voice_assistant.py).
         """
         if self._timer_finished:
             # Stop timer instead
             _LOGGER.debug("Stopping timer finished sound")
             return
+        # Mark pipeline as active
+        self._in_pipeline = True
         wake_word_phrase = wake_word.wake_word
         _LOGGER.debug("Detected wake word: %s", wake_word_phrase)
             self._clear_conversation()
             self.unduck()
             _LOGGER.debug("Conversation finished")
+            # Mark pipeline as inactive - ready for new wake word
+            self._in_pipeline = False
             # Reachy Mini: Return to idle
             self._reachy_on_idle()
         _LOGGER.info("Disconnected from Home Assistant")
         # Clear streaming state on disconnect
         self._is_streaming_audio = False
+        self._in_pipeline = False
         self._tts_url = None
         self._tts_played = False
         self._continue_conversation = False

reachy_mini_ha_voice/voice_assistant.py CHANGED Viewed

@@ -764,12 +764,16 @@ class VoiceAssistantService:
     def _detect_wake_words(self, ctx: AudioProcessingContext) -> None:
         """Detect wake words in the processed audio features.
-        Following reference project pattern: only use refractory_seconds.
         """
         from pymicro_wakeword import MicroWakeWord
         from pyopen_wakeword import OpenWakeWord
-        now = time.monotonic()
         for wake_word in ctx.wake_words:
             activated = False
@@ -785,13 +789,11 @@ class VoiceAssistantService:
                             activated = True
             if activated:
-                # Check refractory period
-                if (ctx.last_active is None) or ((now - ctx.last_active) > self._state.refractory_seconds):
-                    _LOGGER.info("Wake word detected: %s", wake_word.id)
-                    self._state.satellite.wakeup(wake_word)
-                    # Face tracking will handle looking at user automatically
-                    self._motion.on_wakeup()
-                    ctx.last_active = now
     def _detect_stop_word(self, ctx: AudioProcessingContext) -> None:
         """Detect stop word in the processed audio features."""

     def _detect_wake_words(self, ctx: AudioProcessingContext) -> None:
         """Detect wake words in the processed audio features.
+        Only detect wake words when in idle state (not in pipeline).
+        This prevents duplicate triggers during continuous conversation.
         """
         from pymicro_wakeword import MicroWakeWord
         from pyopen_wakeword import OpenWakeWord
+        # Skip wake word detection if pipeline is active (listening/processing/speaking)
+        # This is the key fix: use state instead of refractory time
+        if self._state.satellite and self._state.satellite._in_pipeline:
+            return
         for wake_word in ctx.wake_words:
             activated = False
                             activated = True
             if activated:
+                _LOGGER.info("Wake word detected: %s", wake_word.id)
+                self._state.satellite.wakeup(wake_word)
+                # Face tracking will handle looking at user automatically
+                self._motion.on_wakeup()
+                # No need for refractory period - state check handles it
     def _detect_stop_word(self, ctx: AudioProcessingContext) -> None:
         """Detect stop word in the processed audio features."""