Desmond-Dong commited on
Commit
afdb99d
·
1 Parent(s): eb57938

refactor: 简化唤醒词检测逻辑,按照参考项目模式

Browse files

- 移除 _pipeline_active 状态和 is_pipeline_active() 方法
- 移除 wake_word_refractory_until,只用 refractory_seconds
- 简化 wakeup() 方法,不再检查 pipeline 状态
- 简化 _handle_run_end() 和 _tts_finished(),按照参考项目模式处理持续对话
- 简化 _process_audio_chunk(),始终处理唤醒词检测

v0.5.17

pyproject.toml CHANGED
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
 
5
  [project]
6
  name = "reachy_mini_ha_voice"
7
- version = "0.5.16"
8
  description = "Home Assistant Voice Assistant for Reachy Mini"
9
  readme = "README.md"
10
  requires-python = ">=3.10"
 
4
 
5
  [project]
6
  name = "reachy_mini_ha_voice"
7
+ version = "0.5.17"
8
  description = "Home Assistant Voice Assistant for Reachy Mini"
9
  readme = "README.md"
10
  requires-python = ">=3.10"
reachy_mini_ha_voice/__init__.py CHANGED
@@ -11,7 +11,7 @@ Key features:
11
  - Reachy Mini motion control integration
12
  """
13
 
14
- __version__ = "0.5.16"
15
  __author__ = "Desmond Dong"
16
 
17
  # Don't import main module here to avoid runpy warning
 
11
  - Reachy Mini motion control integration
12
  """
13
 
14
+ __version__ = "0.5.17"
15
  __author__ = "Desmond Dong"
16
 
17
  # Don't import main module here to avoid runpy warning
reachy_mini_ha_voice/models.py CHANGED
@@ -85,7 +85,6 @@ class ServerState:
85
  satellite: "Optional[VoiceSatelliteProtocol]" = None
86
  wake_words_changed: bool = False
87
  refractory_seconds: float = 2.0
88
- wake_word_refractory_until: float = 0.0 # Timestamp until which wake word detection is suppressed
89
 
90
  def save_preferences(self) -> None:
91
  """Save preferences as JSON."""
 
85
  satellite: "Optional[VoiceSatelliteProtocol]" = None
86
  wake_words_changed: bool = False
87
  refractory_seconds: float = 2.0
 
88
 
89
  def save_preferences(self) -> None:
90
  """Save preferences as JSON."""
reachy_mini_ha_voice/satellite.py CHANGED
@@ -86,9 +86,6 @@ class VoiceSatelliteProtocol(APIServer):
86
  self._conversation_timeout = 300.0 # 5 minutes, same as ESPHome default
87
  self._last_conversation_time = 0.0
88
 
89
- # Pipeline state tracking - prevent multiple concurrent pipelines
90
- self._pipeline_active = False
91
-
92
  # Initialize Reachy controller
93
  self.reachy_controller = ReachyController(state.reachy_mini)
94
 
@@ -136,13 +133,6 @@ class VoiceSatelliteProtocol(APIServer):
136
  _LOGGER.debug("Voice event: type=%s, data=%s", event_type.name, data)
137
 
138
  if event_type == VoiceAssistantEventType.VOICE_ASSISTANT_RUN_START:
139
- # Check if pipeline is already active (shouldn't happen, but be safe)
140
- if self._pipeline_active:
141
- _LOGGER.warning("RUN_START received but pipeline already active, stopping previous")
142
- self.state.tts_player.stop()
143
-
144
- # Mark pipeline as active
145
- self._pipeline_active = True
146
  self._tts_url = data.get("url")
147
  self._tts_played = False
148
  self._continue_conversation = False
@@ -179,8 +169,7 @@ class VoiceSatelliteProtocol(APIServer):
179
  self._tts_played = False
180
  self._is_streaming_audio = False
181
 
182
- # Check if should continue conversation (after RUN_END is safe)
183
- # Note: _pipeline_active is managed inside _handle_run_end
184
  self._handle_run_end()
185
 
186
  def handle_timer_event(
@@ -361,14 +350,13 @@ class VoiceSatelliteProtocol(APIServer):
361
  """Clear conversation state when exiting conversation mode."""
362
  self._conversation_id = None
363
  self._continue_conversation = False
364
- self._pipeline_active = False
365
 
366
  def wakeup(self, wake_word: Union[MicroWakeWord, OpenWakeWord]) -> None:
367
- # Prevent starting new conversation if pipeline is already active
368
- if self._pipeline_active:
369
- _LOGGER.warning("Pipeline already active, ignoring wake word")
370
- return
371
 
 
 
 
372
  if self._timer_finished:
373
  # Stop timer instead
374
  self._timer_finished = False
@@ -376,16 +364,10 @@ class VoiceSatelliteProtocol(APIServer):
376
  _LOGGER.debug("Stopping timer finished sound")
377
  return
378
 
379
- # Mark pipeline as active IMMEDIATELY to prevent duplicate wakeups
380
- # This is set before sending request to HA, as there's network delay
381
- self._pipeline_active = True
382
-
383
  wake_word_phrase = wake_word.wake_word
384
  _LOGGER.debug("Detected wake word: %s", wake_word_phrase)
385
 
386
  # Turn toward sound source using DOA (Direction of Arrival)
387
- # Only read DOA once at wakeup to avoid daemon pressure
388
- # Face tracking will take over after initial turn
389
  self._turn_to_sound_source()
390
 
391
  # Get or create conversation_id for context tracking
@@ -418,10 +400,6 @@ class VoiceSatelliteProtocol(APIServer):
418
  """
419
  return False
420
 
421
- def is_pipeline_active(self) -> bool:
422
- """Check if voice pipeline is currently active (listening/thinking/speaking)."""
423
- return self._pipeline_active
424
-
425
  def stop(self) -> None:
426
  """Stop current TTS playback (e.g., user said stop word)."""
427
  self.state.active_wake_words.discard(self.state.stop_word.id)
@@ -462,24 +440,12 @@ class VoiceSatelliteProtocol(APIServer):
462
  def _tts_finished(self) -> None:
463
  """Called when TTS audio playback finishes.
464
 
465
- Note: This is called from the audio player callback, NOT from HA events.
466
- We should NOT start a new conversation here - wait for RUN_END event.
467
  """
468
  self.state.active_wake_words.discard(self.state.stop_word.id)
469
  self.send_messages([VoiceAssistantAnnounceFinished()])
470
- _LOGGER.debug("TTS playback finished, waiting for RUN_END event")
471
-
472
- def _handle_run_end(self) -> None:
473
- """Handle pipeline RUN_END event - safe point to continue conversation.
474
 
475
- This is called after HA has fully completed the pipeline run.
476
- """
477
- # If pipeline wasn't active, this might be a duplicate RUN_END - ignore
478
- if not self._pipeline_active:
479
- _LOGGER.debug("RUN_END received but pipeline wasn't active, ignoring")
480
- return
481
-
482
- # Check if should continue conversation BEFORE clearing pipeline state
483
  # 1. Our switch is ON: Always continue (unconditional)
484
  # 2. Our switch is OFF: Follow HA's continue_conversation request
485
  continuous_mode = self.state.preferences.continuous_conversation
@@ -489,11 +455,7 @@ class VoiceSatelliteProtocol(APIServer):
489
  _LOGGER.info("Continuing conversation (our_switch=%s, ha_request=%s)",
490
  continuous_mode, self._continue_conversation)
491
 
492
- # Keep pipeline active - no gap for wake word detection
493
- # _pipeline_active stays True
494
-
495
  # Play prompt sound to indicate ready for next input
496
- # Use wakeup sound as the prompt (short beep)
497
  self.state.tts_player.play(self.state.wakeup_sound)
498
 
499
  # Use same conversation_id for context continuity
@@ -504,23 +466,26 @@ class VoiceSatelliteProtocol(APIServer):
504
  )])
505
  self._is_streaming_audio = True
506
 
507
- # Stay in listening mode, don't go to idle
508
  self._reachy_on_listening()
509
  else:
510
- # Conversation ended, clear state
511
- self._pipeline_active = False
512
  self._clear_conversation()
513
  self.unduck()
514
- _LOGGER.debug("Pipeline ended, conversation finished")
515
-
516
- # Set wake word refractory period to prevent immediate re-trigger
517
- # Wake word model may have accumulated state during conversation
518
- self.state.wake_word_refractory_until = time.monotonic() + 1.5 # 1.5 second cooldown
519
- _LOGGER.debug("Wake word refractory period set for 1.5 seconds")
520
 
521
  # Reachy Mini: Return to idle
522
  self._reachy_on_idle()
523
 
 
 
 
 
 
 
 
 
 
 
524
  def _play_timer_finished(self) -> None:
525
  if not self._timer_finished:
526
  self.unduck()
 
86
  self._conversation_timeout = 300.0 # 5 minutes, same as ESPHome default
87
  self._last_conversation_time = 0.0
88
 
 
 
 
89
  # Initialize Reachy controller
90
  self.reachy_controller = ReachyController(state.reachy_mini)
91
 
 
133
  _LOGGER.debug("Voice event: type=%s, data=%s", event_type.name, data)
134
 
135
  if event_type == VoiceAssistantEventType.VOICE_ASSISTANT_RUN_START:
 
 
 
 
 
 
 
136
  self._tts_url = data.get("url")
137
  self._tts_played = False
138
  self._continue_conversation = False
 
169
  self._tts_played = False
170
  self._is_streaming_audio = False
171
 
172
+ # Check if should continue conversation
 
173
  self._handle_run_end()
174
 
175
  def handle_timer_event(
 
350
  """Clear conversation state when exiting conversation mode."""
351
  self._conversation_id = None
352
  self._continue_conversation = False
 
353
 
354
  def wakeup(self, wake_word: Union[MicroWakeWord, OpenWakeWord]) -> None:
355
+ """Handle wake word detection - start voice pipeline.
 
 
 
356
 
357
+ Following reference project pattern: no pipeline state check here.
358
+ Refractory period in audio processing prevents duplicate triggers.
359
+ """
360
  if self._timer_finished:
361
  # Stop timer instead
362
  self._timer_finished = False
 
364
  _LOGGER.debug("Stopping timer finished sound")
365
  return
366
 
 
 
 
 
367
  wake_word_phrase = wake_word.wake_word
368
  _LOGGER.debug("Detected wake word: %s", wake_word_phrase)
369
 
370
  # Turn toward sound source using DOA (Direction of Arrival)
 
 
371
  self._turn_to_sound_source()
372
 
373
  # Get or create conversation_id for context tracking
 
400
  """
401
  return False
402
 
 
 
 
 
403
  def stop(self) -> None:
404
  """Stop current TTS playback (e.g., user said stop word)."""
405
  self.state.active_wake_words.discard(self.state.stop_word.id)
 
440
  def _tts_finished(self) -> None:
441
  """Called when TTS audio playback finishes.
442
 
443
+ Following reference project pattern: handle continue conversation here.
 
444
  """
445
  self.state.active_wake_words.discard(self.state.stop_word.id)
446
  self.send_messages([VoiceAssistantAnnounceFinished()])
 
 
 
 
447
 
448
+ # Check if should continue conversation
 
 
 
 
 
 
 
449
  # 1. Our switch is ON: Always continue (unconditional)
450
  # 2. Our switch is OFF: Follow HA's continue_conversation request
451
  continuous_mode = self.state.preferences.continuous_conversation
 
455
  _LOGGER.info("Continuing conversation (our_switch=%s, ha_request=%s)",
456
  continuous_mode, self._continue_conversation)
457
 
 
 
 
458
  # Play prompt sound to indicate ready for next input
 
459
  self.state.tts_player.play(self.state.wakeup_sound)
460
 
461
  # Use same conversation_id for context continuity
 
466
  )])
467
  self._is_streaming_audio = True
468
 
469
+ # Stay in listening mode
470
  self._reachy_on_listening()
471
  else:
 
 
472
  self._clear_conversation()
473
  self.unduck()
474
+ _LOGGER.debug("Conversation finished")
 
 
 
 
 
475
 
476
  # Reachy Mini: Return to idle
477
  self._reachy_on_idle()
478
 
479
+ def _handle_run_end(self) -> None:
480
+ """Handle pipeline RUN_END event.
481
+
482
+ Following reference project pattern: call _tts_finished if TTS wasn't played.
483
+ """
484
+ if not self._tts_played:
485
+ self._tts_finished()
486
+
487
+ self._tts_played = False
488
+
489
  def _play_timer_finished(self) -> None:
490
  if not self._timer_finished:
491
  self.unduck()
reachy_mini_ha_voice/voice_assistant.py CHANGED
@@ -718,6 +718,9 @@ class VoiceAssistantService:
718
  def _process_audio_chunk(self, ctx: AudioProcessingContext, audio_chunk: bytes) -> None:
719
  """Process an audio chunk for wake word detection.
720
 
 
 
 
721
  Args:
722
  ctx: Audio processing context
723
  audio_chunk: PCM audio bytes
@@ -725,12 +728,6 @@ class VoiceAssistantService:
725
  # Stream audio to Home Assistant
726
  self._state.satellite.handle_audio(audio_chunk)
727
 
728
- # Skip wake word processing entirely if pipeline is active
729
- # This prevents model state accumulation during conversation
730
- pipeline_active = self._state.satellite.is_pipeline_active()
731
- if pipeline_active:
732
- return
733
-
734
  # Process wake word features
735
  self._process_features(ctx, audio_chunk)
736
 
@@ -750,14 +747,14 @@ class VoiceAssistantService:
750
  ctx.oww_inputs.extend(ctx.oww_features.process_streaming(audio_chunk))
751
 
752
  def _detect_wake_words(self, ctx: AudioProcessingContext) -> None:
753
- """Detect wake words in the processed audio features."""
 
 
 
754
  from pymicro_wakeword import MicroWakeWord
755
  from pyopen_wakeword import OpenWakeWord
756
 
757
- # Check global refractory period (set after conversation ends)
758
  now = time.monotonic()
759
- if now < self._state.wake_word_refractory_until:
760
- return
761
 
762
  for wake_word in ctx.wake_words:
763
  activated = False
@@ -773,6 +770,7 @@ class VoiceAssistantService:
773
  activated = True
774
 
775
  if activated:
 
776
  if (ctx.last_active is None) or ((now - ctx.last_active) > self._state.refractory_seconds):
777
  _LOGGER.info("Wake word detected: %s", wake_word.id)
778
  self._state.satellite.wakeup(wake_word)
 
718
  def _process_audio_chunk(self, ctx: AudioProcessingContext, audio_chunk: bytes) -> None:
719
  """Process an audio chunk for wake word detection.
720
 
721
+ Following reference project pattern: always process wake words.
722
+ Refractory period prevents duplicate triggers.
723
+
724
  Args:
725
  ctx: Audio processing context
726
  audio_chunk: PCM audio bytes
 
728
  # Stream audio to Home Assistant
729
  self._state.satellite.handle_audio(audio_chunk)
730
 
 
 
 
 
 
 
731
  # Process wake word features
732
  self._process_features(ctx, audio_chunk)
733
 
 
747
  ctx.oww_inputs.extend(ctx.oww_features.process_streaming(audio_chunk))
748
 
749
  def _detect_wake_words(self, ctx: AudioProcessingContext) -> None:
750
+ """Detect wake words in the processed audio features.
751
+
752
+ Following reference project pattern: only use refractory_seconds.
753
+ """
754
  from pymicro_wakeword import MicroWakeWord
755
  from pyopen_wakeword import OpenWakeWord
756
 
 
757
  now = time.monotonic()
 
 
758
 
759
  for wake_word in ctx.wake_words:
760
  activated = False
 
770
  activated = True
771
 
772
  if activated:
773
+ # Check refractory period
774
  if (ctx.last_active is None) or ((now - ctx.last_active) > self._state.refractory_seconds):
775
  _LOGGER.info("Wake word detected: %s", wake_word.id)
776
  self._state.satellite.wakeup(wake_word)