Desmond-Dong commited on
Commit
5ce6940
·
1 Parent(s): 5666e95

fix: 使用状态判断唤醒词检测, 添加手势模型信息日志

Browse files

- 添加 _in_pipeline 状态标识对话是否进行中
- 唤醒词检测只在 idle 状态下进行,避免连续对话时重复唤醒
- 手势检测器加载时输出模型输入输出信息用于调试

reachy_mini_ha_voice/gesture_detector.py CHANGED
@@ -133,6 +133,20 @@ class GestureDetector:
133
  self._classifier = ort.InferenceSession(
134
  str(self._classifier_path), providers=providers
135
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  self._available = True
137
  logger.info("Gesture detection ready (18 HaGRID classes)")
138
  except Exception as e:
 
133
  self._classifier = ort.InferenceSession(
134
  str(self._classifier_path), providers=providers
135
  )
136
+
137
+ # Log model input/output info
138
+ det_inputs = self._detector.get_inputs()
139
+ det_outputs = self._detector.get_outputs()
140
+ logger.info("Hand detector - inputs: %s, outputs: %s",
141
+ [(i.name, i.shape) for i in det_inputs],
142
+ [(o.name, o.shape) for o in det_outputs])
143
+
144
+ cls_inputs = self._classifier.get_inputs()
145
+ cls_outputs = self._classifier.get_outputs()
146
+ logger.info("Classifier - inputs: %s, outputs: %s",
147
+ [(i.name, i.shape) for i in cls_inputs],
148
+ [(o.name, o.shape) for o in cls_outputs])
149
+
150
  self._available = True
151
  logger.info("Gesture detection ready (18 HaGRID classes)")
152
  except Exception as e:
reachy_mini_ha_voice/satellite.py CHANGED
@@ -71,6 +71,7 @@ class VoiceSatelliteProtocol(APIServer):
71
  # Initialize streaming state early (before entity setup)
72
  # This is needed because audio processing thread checks this attribute
73
  self._is_streaming_audio = False
 
74
  self._tts_url: Optional[str] = None
75
  self._tts_played = False
76
  self._continue_conversation = False
@@ -374,8 +375,7 @@ class VoiceSatelliteProtocol(APIServer):
374
  def wakeup(self, wake_word: Union[MicroWakeWord, OpenWakeWord]) -> None:
375
  """Handle wake word detection - start voice pipeline.
376
 
377
- Following reference project pattern: no pipeline state check here.
378
- Refractory period in audio processing prevents duplicate triggers.
379
  """
380
  if self._timer_finished:
381
  # Stop timer instead
@@ -384,6 +384,9 @@ class VoiceSatelliteProtocol(APIServer):
384
  _LOGGER.debug("Stopping timer finished sound")
385
  return
386
 
 
 
 
387
  wake_word_phrase = wake_word.wake_word
388
  _LOGGER.debug("Detected wake word: %s", wake_word_phrase)
389
 
@@ -492,6 +495,9 @@ class VoiceSatelliteProtocol(APIServer):
492
  self._clear_conversation()
493
  self.unduck()
494
  _LOGGER.debug("Conversation finished")
 
 
 
495
 
496
  # Reachy Mini: Return to idle
497
  self._reachy_on_idle()
@@ -523,6 +529,7 @@ class VoiceSatelliteProtocol(APIServer):
523
  _LOGGER.info("Disconnected from Home Assistant")
524
  # Clear streaming state on disconnect
525
  self._is_streaming_audio = False
 
526
  self._tts_url = None
527
  self._tts_played = False
528
  self._continue_conversation = False
 
71
  # Initialize streaming state early (before entity setup)
72
  # This is needed because audio processing thread checks this attribute
73
  self._is_streaming_audio = False
74
+ self._in_pipeline = False # True when voice pipeline is active (listening/processing/speaking)
75
  self._tts_url: Optional[str] = None
76
  self._tts_played = False
77
  self._continue_conversation = False
 
375
  def wakeup(self, wake_word: Union[MicroWakeWord, OpenWakeWord]) -> None:
376
  """Handle wake word detection - start voice pipeline.
377
 
378
+ Only called when in idle state (checked by voice_assistant.py).
 
379
  """
380
  if self._timer_finished:
381
  # Stop timer instead
 
384
  _LOGGER.debug("Stopping timer finished sound")
385
  return
386
 
387
+ # Mark pipeline as active
388
+ self._in_pipeline = True
389
+
390
  wake_word_phrase = wake_word.wake_word
391
  _LOGGER.debug("Detected wake word: %s", wake_word_phrase)
392
 
 
495
  self._clear_conversation()
496
  self.unduck()
497
  _LOGGER.debug("Conversation finished")
498
+
499
+ # Mark pipeline as inactive - ready for new wake word
500
+ self._in_pipeline = False
501
 
502
  # Reachy Mini: Return to idle
503
  self._reachy_on_idle()
 
529
  _LOGGER.info("Disconnected from Home Assistant")
530
  # Clear streaming state on disconnect
531
  self._is_streaming_audio = False
532
+ self._in_pipeline = False
533
  self._tts_url = None
534
  self._tts_played = False
535
  self._continue_conversation = False
reachy_mini_ha_voice/voice_assistant.py CHANGED
@@ -764,12 +764,16 @@ class VoiceAssistantService:
764
  def _detect_wake_words(self, ctx: AudioProcessingContext) -> None:
765
  """Detect wake words in the processed audio features.
766
 
767
- Following reference project pattern: only use refractory_seconds.
 
768
  """
769
  from pymicro_wakeword import MicroWakeWord
770
  from pyopen_wakeword import OpenWakeWord
771
 
772
- now = time.monotonic()
 
 
 
773
 
774
  for wake_word in ctx.wake_words:
775
  activated = False
@@ -785,13 +789,11 @@ class VoiceAssistantService:
785
  activated = True
786
 
787
  if activated:
788
- # Check refractory period
789
- if (ctx.last_active is None) or ((now - ctx.last_active) > self._state.refractory_seconds):
790
- _LOGGER.info("Wake word detected: %s", wake_word.id)
791
- self._state.satellite.wakeup(wake_word)
792
- # Face tracking will handle looking at user automatically
793
- self._motion.on_wakeup()
794
- ctx.last_active = now
795
 
796
  def _detect_stop_word(self, ctx: AudioProcessingContext) -> None:
797
  """Detect stop word in the processed audio features."""
 
764
  def _detect_wake_words(self, ctx: AudioProcessingContext) -> None:
765
  """Detect wake words in the processed audio features.
766
 
767
+ Only detect wake words when in idle state (not in pipeline).
768
+ This prevents duplicate triggers during continuous conversation.
769
  """
770
  from pymicro_wakeword import MicroWakeWord
771
  from pyopen_wakeword import OpenWakeWord
772
 
773
+ # Skip wake word detection if pipeline is active (listening/processing/speaking)
774
+ # This is the key fix: use state instead of refractory time
775
+ if self._state.satellite and self._state.satellite._in_pipeline:
776
+ return
777
 
778
  for wake_word in ctx.wake_words:
779
  activated = False
 
789
  activated = True
790
 
791
  if activated:
792
+ _LOGGER.info("Wake word detected: %s", wake_word.id)
793
+ self._state.satellite.wakeup(wake_word)
794
+ # Face tracking will handle looking at user automatically
795
+ self._motion.on_wakeup()
796
+ # No need for refractory period - state check handles it
 
 
797
 
798
  def _detect_stop_word(self, ctx: AudioProcessingContext) -> None:
799
  """Detect stop word in the processed audio features."""