Commit ·
5ce6940
1
Parent(s): 5666e95
fix: 使用状态判断唤醒词检测, 添加手势模型信息日志
Browse files- 添加 _in_pipeline 状态标识对话是否进行中
- 唤醒词检测只在 idle 状态下进行,避免连续对话时重复唤醒
- 手势检测器加载时输出模型输入输出信息用于调试
reachy_mini_ha_voice/gesture_detector.py
CHANGED
|
@@ -133,6 +133,20 @@ class GestureDetector:
|
|
| 133 |
self._classifier = ort.InferenceSession(
|
| 134 |
str(self._classifier_path), providers=providers
|
| 135 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
self._available = True
|
| 137 |
logger.info("Gesture detection ready (18 HaGRID classes)")
|
| 138 |
except Exception as e:
|
|
|
|
| 133 |
self._classifier = ort.InferenceSession(
|
| 134 |
str(self._classifier_path), providers=providers
|
| 135 |
)
|
| 136 |
+
|
| 137 |
+
# Log model input/output info
|
| 138 |
+
det_inputs = self._detector.get_inputs()
|
| 139 |
+
det_outputs = self._detector.get_outputs()
|
| 140 |
+
logger.info("Hand detector - inputs: %s, outputs: %s",
|
| 141 |
+
[(i.name, i.shape) for i in det_inputs],
|
| 142 |
+
[(o.name, o.shape) for o in det_outputs])
|
| 143 |
+
|
| 144 |
+
cls_inputs = self._classifier.get_inputs()
|
| 145 |
+
cls_outputs = self._classifier.get_outputs()
|
| 146 |
+
logger.info("Classifier - inputs: %s, outputs: %s",
|
| 147 |
+
[(i.name, i.shape) for i in cls_inputs],
|
| 148 |
+
[(o.name, o.shape) for o in cls_outputs])
|
| 149 |
+
|
| 150 |
self._available = True
|
| 151 |
logger.info("Gesture detection ready (18 HaGRID classes)")
|
| 152 |
except Exception as e:
|
reachy_mini_ha_voice/satellite.py
CHANGED
|
@@ -71,6 +71,7 @@ class VoiceSatelliteProtocol(APIServer):
|
|
| 71 |
# Initialize streaming state early (before entity setup)
|
| 72 |
# This is needed because audio processing thread checks this attribute
|
| 73 |
self._is_streaming_audio = False
|
|
|
|
| 74 |
self._tts_url: Optional[str] = None
|
| 75 |
self._tts_played = False
|
| 76 |
self._continue_conversation = False
|
|
@@ -374,8 +375,7 @@ class VoiceSatelliteProtocol(APIServer):
|
|
| 374 |
def wakeup(self, wake_word: Union[MicroWakeWord, OpenWakeWord]) -> None:
|
| 375 |
"""Handle wake word detection - start voice pipeline.
|
| 376 |
|
| 377 |
-
|
| 378 |
-
Refractory period in audio processing prevents duplicate triggers.
|
| 379 |
"""
|
| 380 |
if self._timer_finished:
|
| 381 |
# Stop timer instead
|
|
@@ -384,6 +384,9 @@ class VoiceSatelliteProtocol(APIServer):
|
|
| 384 |
_LOGGER.debug("Stopping timer finished sound")
|
| 385 |
return
|
| 386 |
|
|
|
|
|
|
|
|
|
|
| 387 |
wake_word_phrase = wake_word.wake_word
|
| 388 |
_LOGGER.debug("Detected wake word: %s", wake_word_phrase)
|
| 389 |
|
|
@@ -492,6 +495,9 @@ class VoiceSatelliteProtocol(APIServer):
|
|
| 492 |
self._clear_conversation()
|
| 493 |
self.unduck()
|
| 494 |
_LOGGER.debug("Conversation finished")
|
|
|
|
|
|
|
|
|
|
| 495 |
|
| 496 |
# Reachy Mini: Return to idle
|
| 497 |
self._reachy_on_idle()
|
|
@@ -523,6 +529,7 @@ class VoiceSatelliteProtocol(APIServer):
|
|
| 523 |
_LOGGER.info("Disconnected from Home Assistant")
|
| 524 |
# Clear streaming state on disconnect
|
| 525 |
self._is_streaming_audio = False
|
|
|
|
| 526 |
self._tts_url = None
|
| 527 |
self._tts_played = False
|
| 528 |
self._continue_conversation = False
|
|
|
|
| 71 |
# Initialize streaming state early (before entity setup)
|
| 72 |
# This is needed because audio processing thread checks this attribute
|
| 73 |
self._is_streaming_audio = False
|
| 74 |
+
self._in_pipeline = False # True when voice pipeline is active (listening/processing/speaking)
|
| 75 |
self._tts_url: Optional[str] = None
|
| 76 |
self._tts_played = False
|
| 77 |
self._continue_conversation = False
|
|
|
|
| 375 |
def wakeup(self, wake_word: Union[MicroWakeWord, OpenWakeWord]) -> None:
|
| 376 |
"""Handle wake word detection - start voice pipeline.
|
| 377 |
|
| 378 |
+
Only called when in idle state (checked by voice_assistant.py).
|
|
|
|
| 379 |
"""
|
| 380 |
if self._timer_finished:
|
| 381 |
# Stop timer instead
|
|
|
|
| 384 |
_LOGGER.debug("Stopping timer finished sound")
|
| 385 |
return
|
| 386 |
|
| 387 |
+
# Mark pipeline as active
|
| 388 |
+
self._in_pipeline = True
|
| 389 |
+
|
| 390 |
wake_word_phrase = wake_word.wake_word
|
| 391 |
_LOGGER.debug("Detected wake word: %s", wake_word_phrase)
|
| 392 |
|
|
|
|
| 495 |
self._clear_conversation()
|
| 496 |
self.unduck()
|
| 497 |
_LOGGER.debug("Conversation finished")
|
| 498 |
+
|
| 499 |
+
# Mark pipeline as inactive - ready for new wake word
|
| 500 |
+
self._in_pipeline = False
|
| 501 |
|
| 502 |
# Reachy Mini: Return to idle
|
| 503 |
self._reachy_on_idle()
|
|
|
|
| 529 |
_LOGGER.info("Disconnected from Home Assistant")
|
| 530 |
# Clear streaming state on disconnect
|
| 531 |
self._is_streaming_audio = False
|
| 532 |
+
self._in_pipeline = False
|
| 533 |
self._tts_url = None
|
| 534 |
self._tts_played = False
|
| 535 |
self._continue_conversation = False
|
reachy_mini_ha_voice/voice_assistant.py
CHANGED
|
@@ -764,12 +764,16 @@ class VoiceAssistantService:
|
|
| 764 |
def _detect_wake_words(self, ctx: AudioProcessingContext) -> None:
|
| 765 |
"""Detect wake words in the processed audio features.
|
| 766 |
|
| 767 |
-
|
|
|
|
| 768 |
"""
|
| 769 |
from pymicro_wakeword import MicroWakeWord
|
| 770 |
from pyopen_wakeword import OpenWakeWord
|
| 771 |
|
| 772 |
-
|
|
|
|
|
|
|
|
|
|
| 773 |
|
| 774 |
for wake_word in ctx.wake_words:
|
| 775 |
activated = False
|
|
@@ -785,13 +789,11 @@ class VoiceAssistantService:
|
|
| 785 |
activated = True
|
| 786 |
|
| 787 |
if activated:
|
| 788 |
-
|
| 789 |
-
|
| 790 |
-
|
| 791 |
-
|
| 792 |
-
|
| 793 |
-
self._motion.on_wakeup()
|
| 794 |
-
ctx.last_active = now
|
| 795 |
|
| 796 |
def _detect_stop_word(self, ctx: AudioProcessingContext) -> None:
|
| 797 |
"""Detect stop word in the processed audio features."""
|
|
|
|
| 764 |
def _detect_wake_words(self, ctx: AudioProcessingContext) -> None:
|
| 765 |
"""Detect wake words in the processed audio features.
|
| 766 |
|
| 767 |
+
Only detect wake words when in idle state (not in pipeline).
|
| 768 |
+
This prevents duplicate triggers during continuous conversation.
|
| 769 |
"""
|
| 770 |
from pymicro_wakeword import MicroWakeWord
|
| 771 |
from pyopen_wakeword import OpenWakeWord
|
| 772 |
|
| 773 |
+
# Skip wake word detection if pipeline is active (listening/processing/speaking)
|
| 774 |
+
# This is the key fix: use state instead of refractory time
|
| 775 |
+
if self._state.satellite and self._state.satellite._in_pipeline:
|
| 776 |
+
return
|
| 777 |
|
| 778 |
for wake_word in ctx.wake_words:
|
| 779 |
activated = False
|
|
|
|
| 789 |
activated = True
|
| 790 |
|
| 791 |
if activated:
|
| 792 |
+
_LOGGER.info("Wake word detected: %s", wake_word.id)
|
| 793 |
+
self._state.satellite.wakeup(wake_word)
|
| 794 |
+
# Face tracking will handle looking at user automatically
|
| 795 |
+
self._motion.on_wakeup()
|
| 796 |
+
# No need for refractory period - state check handles it
|
|
|
|
|
|
|
| 797 |
|
| 798 |
def _detect_stop_word(self, ctx: AudioProcessingContext) -> None:
|
| 799 |
"""Detect stop word in the processed audio features."""
|