Desmond-Dong commited on
Commit
9d92632
·
1 Parent(s): 08a2d90

fix: revert to reference project pattern for wake word detection (v0.7.3)

Browse files

- Use refractory_seconds (2s) instead of broken state flags
- Remove _in_pipeline and _tts_playing - they caused more issues
- Restore correct RUN_END handling from linux-voice-assistant
- Fix stop() to call _tts_finished() like reference project

changelog.json CHANGED
@@ -1,4 +1,13 @@
1
  [
 
 
 
 
 
 
 
 
 
2
  {
3
  "version": "0.7.2",
4
  "date": "2026-01-12",
 
1
  [
2
+ {
3
+ "version": "0.7.3",
4
+ "date": "2026-01-12",
5
+ "changes": [
6
+ "Fix: Revert to reference project pattern - use refractory period instead of state flags",
7
+ "Fix: Remove broken _in_pipeline and _tts_playing state management",
8
+ "Fix: Restore correct RUN_END event handling from linux-voice-assistant"
9
+ ]
10
+ },
11
  {
12
  "version": "0.7.2",
13
  "date": "2026-01-12",
pyproject.toml CHANGED
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
 
5
  [project]
6
  name = "reachy_mini_ha_voice"
7
- version = "0.7.2"
8
  description = "Home Assistant Voice Assistant for Reachy Mini"
9
  readme = "README.md"
10
  requires-python = ">=3.10"
 
4
 
5
  [project]
6
  name = "reachy_mini_ha_voice"
7
+ version = "0.7.3"
8
  description = "Home Assistant Voice Assistant for Reachy Mini"
9
  readme = "README.md"
10
  requires-python = ">=3.10"
reachy_mini_ha_voice/__init__.py CHANGED
@@ -11,7 +11,7 @@ Key features:
11
  - Reachy Mini motion control integration
12
  """
13
 
14
- __version__ = "0.7.2"
15
  __author__ = "Desmond Dong"
16
 
17
  # Don't import main module here to avoid runpy warning
 
11
  - Reachy Mini motion control integration
12
  """
13
 
14
+ __version__ = "0.7.3"
15
  __author__ = "Desmond Dong"
16
 
17
  # Don't import main module here to avoid runpy warning
reachy_mini_ha_voice/satellite.py CHANGED
@@ -69,20 +69,13 @@ class VoiceSatelliteProtocol(APIServer):
69
  self.camera_server = camera_server
70
 
71
  # Initialize streaming state early (before entity setup)
72
- # This is needed because audio processing thread checks this attribute
73
  self._is_streaming_audio = False
74
- self._in_pipeline = False # True when voice pipeline is active (listening/processing/speaking)
75
- self._tts_playing = False # True when TTS audio is actively playing
76
  self._tts_url: Optional[str] = None
77
  self._tts_played = False
78
  self._continue_conversation = False
79
  self._timer_finished = False
80
  self._external_wake_words: Dict[str, VoiceAssistantExternalWakeWord] = {}
81
 
82
- # Tap-to-talk continuous conversation mode (REMOVED - too many false triggers)
83
- # Continuous conversation is now controlled via Home Assistant switch
84
- # self._tap_conversation_mode = False
85
-
86
  # Conversation tracking for continuous conversation
87
  self._conversation_id: Optional[str] = None
88
  self._conversation_timeout = 300.0 # 5 minutes, same as ESPHome default
@@ -184,8 +177,6 @@ class VoiceSatelliteProtocol(APIServer):
184
  elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_TTS_START:
185
  # Reachy Mini: Start speaking animation (JSON-defined multi-frequency sway)
186
  _LOGGER.debug("TTS_START event received, triggering speaking animation")
187
- # Mark TTS as playing - this prevents wake word detection during TTS
188
- self._tts_playing = True
189
  self._reachy_on_speaking()
190
 
191
  elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_TTS_END:
@@ -196,8 +187,11 @@ class VoiceSatelliteProtocol(APIServer):
196
  # Pipeline run ended
197
  self._is_streaming_audio = False
198
 
199
- # Check if should continue conversation
200
- self._handle_run_end()
 
 
 
201
 
202
  def handle_timer_event(
203
  self,
@@ -234,9 +228,6 @@ class VoiceSatelliteProtocol(APIServer):
234
 
235
  self.state.active_wake_words.add(self.state.stop_word.id)
236
  self._continue_conversation = msg.start_conversation
237
- # Mark as playing to prevent wake word detection during announcement
238
- self._tts_playing = True
239
- self._in_pipeline = True
240
  self.duck()
241
 
242
  yield from self.state.media_player_entity.play(
@@ -382,10 +373,7 @@ class VoiceSatelliteProtocol(APIServer):
382
  self._continue_conversation = False
383
 
384
  def wakeup(self, wake_word: Union[MicroWakeWord, OpenWakeWord]) -> None:
385
- """Handle wake word detection - start voice pipeline.
386
-
387
- Only called when in idle state (checked by voice_assistant.py).
388
- """
389
  if self._timer_finished:
390
  # Stop timer instead
391
  self._timer_finished = False
@@ -393,9 +381,6 @@ class VoiceSatelliteProtocol(APIServer):
393
  _LOGGER.debug("Stopping timer finished sound")
394
  return
395
 
396
- # Mark pipeline as active
397
- self._in_pipeline = True
398
-
399
  wake_word_phrase = wake_word.wake_word
400
  _LOGGER.debug("Detected wake word: %s", wake_word_phrase)
401
 
@@ -436,26 +421,19 @@ class VoiceSatelliteProtocol(APIServer):
436
  """Stop current TTS playback (e.g., user said stop word)."""
437
  self.state.active_wake_words.discard(self.state.stop_word.id)
438
  self.state.tts_player.stop()
439
- # Reset TTS playing flag
440
- self._tts_playing = False
441
 
442
  if self._timer_finished:
443
  self._timer_finished = False
444
  _LOGGER.debug("Stopping timer finished sound")
445
  else:
446
  _LOGGER.debug("TTS response stopped manually")
447
-
448
- # Send announce finished to HA
449
- self.send_messages([VoiceAssistantAnnounceFinished()])
450
- # Note: RUN_END event will handle the rest
451
 
452
  def play_tts(self) -> None:
453
  if (not self._tts_url) or self._tts_played:
454
  return
455
 
456
  self._tts_played = True
457
- # Mark TTS as playing to prevent wake word detection
458
- self._tts_playing = True
459
  _LOGGER.debug("Playing TTS response: %s", self._tts_url)
460
 
461
  self.state.active_wake_words.add(self.state.stop_word.id)
@@ -478,9 +456,6 @@ class VoiceSatelliteProtocol(APIServer):
478
 
479
  Following reference project pattern: handle continue conversation here.
480
  """
481
- # Mark TTS as finished
482
- self._tts_playing = False
483
-
484
  self.state.active_wake_words.discard(self.state.stop_word.id)
485
  self.send_messages([VoiceAssistantAnnounceFinished()])
486
 
@@ -494,9 +469,6 @@ class VoiceSatelliteProtocol(APIServer):
494
  _LOGGER.debug("Continuing conversation (our_switch=%s, ha_request=%s)",
495
  continuous_mode, self._continue_conversation)
496
 
497
- # Keep pipeline active during continuous conversation
498
- self._in_pipeline = True
499
-
500
  # Play prompt sound to indicate ready for next input
501
  self.state.tts_player.play(self.state.wakeup_sound)
502
 
@@ -514,23 +486,10 @@ class VoiceSatelliteProtocol(APIServer):
514
  self._clear_conversation()
515
  self.unduck()
516
  _LOGGER.debug("Conversation finished")
517
-
518
- # Mark pipeline as inactive - ready for new wake word
519
- self._in_pipeline = False
520
 
521
  # Reachy Mini: Return to idle
522
  self._reachy_on_idle()
523
 
524
- def _handle_run_end(self) -> None:
525
- """Handle pipeline RUN_END event.
526
-
527
- Following reference project pattern: call _tts_finished if TTS wasn't played.
528
- """
529
- if not self._tts_played:
530
- self._tts_finished()
531
-
532
- self._tts_played = False
533
-
534
  def _play_timer_finished(self) -> None:
535
  if not self._timer_finished:
536
  self.unduck()
@@ -548,8 +507,6 @@ class VoiceSatelliteProtocol(APIServer):
548
  _LOGGER.info("Disconnected from Home Assistant")
549
  # Clear streaming state on disconnect
550
  self._is_streaming_audio = False
551
- self._in_pipeline = False
552
- self._tts_playing = False
553
  self._tts_url = None
554
  self._tts_played = False
555
  self._continue_conversation = False
 
69
  self.camera_server = camera_server
70
 
71
  # Initialize streaming state early (before entity setup)
 
72
  self._is_streaming_audio = False
 
 
73
  self._tts_url: Optional[str] = None
74
  self._tts_played = False
75
  self._continue_conversation = False
76
  self._timer_finished = False
77
  self._external_wake_words: Dict[str, VoiceAssistantExternalWakeWord] = {}
78
 
 
 
 
 
79
  # Conversation tracking for continuous conversation
80
  self._conversation_id: Optional[str] = None
81
  self._conversation_timeout = 300.0 # 5 minutes, same as ESPHome default
 
177
  elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_TTS_START:
178
  # Reachy Mini: Start speaking animation (JSON-defined multi-frequency sway)
179
  _LOGGER.debug("TTS_START event received, triggering speaking animation")
 
 
180
  self._reachy_on_speaking()
181
 
182
  elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_TTS_END:
 
187
  # Pipeline run ended
188
  self._is_streaming_audio = False
189
 
190
+ # Following reference project pattern
191
+ if not self._tts_played:
192
+ self._tts_finished()
193
+
194
+ self._tts_played = False
195
 
196
  def handle_timer_event(
197
  self,
 
228
 
229
  self.state.active_wake_words.add(self.state.stop_word.id)
230
  self._continue_conversation = msg.start_conversation
 
 
 
231
  self.duck()
232
 
233
  yield from self.state.media_player_entity.play(
 
373
  self._continue_conversation = False
374
 
375
  def wakeup(self, wake_word: Union[MicroWakeWord, OpenWakeWord]) -> None:
376
+ """Handle wake word detection - start voice pipeline."""
 
 
 
377
  if self._timer_finished:
378
  # Stop timer instead
379
  self._timer_finished = False
 
381
  _LOGGER.debug("Stopping timer finished sound")
382
  return
383
 
 
 
 
384
  wake_word_phrase = wake_word.wake_word
385
  _LOGGER.debug("Detected wake word: %s", wake_word_phrase)
386
 
 
421
  """Stop current TTS playback (e.g., user said stop word)."""
422
  self.state.active_wake_words.discard(self.state.stop_word.id)
423
  self.state.tts_player.stop()
 
 
424
 
425
  if self._timer_finished:
426
  self._timer_finished = False
427
  _LOGGER.debug("Stopping timer finished sound")
428
  else:
429
  _LOGGER.debug("TTS response stopped manually")
430
+ self._tts_finished()
 
 
 
431
 
432
  def play_tts(self) -> None:
433
  if (not self._tts_url) or self._tts_played:
434
  return
435
 
436
  self._tts_played = True
 
 
437
  _LOGGER.debug("Playing TTS response: %s", self._tts_url)
438
 
439
  self.state.active_wake_words.add(self.state.stop_word.id)
 
456
 
457
  Following reference project pattern: handle continue conversation here.
458
  """
 
 
 
459
  self.state.active_wake_words.discard(self.state.stop_word.id)
460
  self.send_messages([VoiceAssistantAnnounceFinished()])
461
 
 
469
  _LOGGER.debug("Continuing conversation (our_switch=%s, ha_request=%s)",
470
  continuous_mode, self._continue_conversation)
471
 
 
 
 
472
  # Play prompt sound to indicate ready for next input
473
  self.state.tts_player.play(self.state.wakeup_sound)
474
 
 
486
  self._clear_conversation()
487
  self.unduck()
488
  _LOGGER.debug("Conversation finished")
 
 
 
489
 
490
  # Reachy Mini: Return to idle
491
  self._reachy_on_idle()
492
 
 
 
 
 
 
 
 
 
 
 
493
  def _play_timer_finished(self) -> None:
494
  if not self._timer_finished:
495
  self.unduck()
 
507
  _LOGGER.info("Disconnected from Home Assistant")
508
  # Clear streaming state on disconnect
509
  self._is_streaming_audio = False
 
 
510
  self._tts_url = None
511
  self._tts_played = False
512
  self._continue_conversation = False
reachy_mini_ha_voice/voice_assistant.py CHANGED
@@ -764,23 +764,12 @@ class VoiceAssistantService:
764
  def _detect_wake_words(self, ctx: AudioProcessingContext) -> None:
765
  """Detect wake words in the processed audio features.
766
 
767
- Only detect wake words when in idle state (not in pipeline or TTS playing).
768
- This prevents duplicate triggers during continuous conversation and TTS playback.
769
  """
770
  from pymicro_wakeword import MicroWakeWord
771
  from pyopen_wakeword import OpenWakeWord
772
 
773
- # Skip wake word detection if pipeline is active or TTS is playing
774
- # Check both flags to handle all cases:
775
- # - _in_pipeline: True during listening/processing/speaking phases
776
- # - _tts_playing: True specifically when TTS audio is being played
777
- satellite = self._state.satellite
778
- if satellite is None:
779
- return
780
-
781
- if satellite._in_pipeline or satellite._tts_playing:
782
- return
783
-
784
  for wake_word in ctx.wake_words:
785
  activated = False
786
 
@@ -795,11 +784,16 @@ class VoiceAssistantService:
795
  activated = True
796
 
797
  if activated:
798
- _LOGGER.info("Wake word detected: %s", wake_word.id)
799
- self._state.satellite.wakeup(wake_word)
800
- # Face tracking will handle looking at user automatically
801
- self._motion.on_wakeup()
802
- # No need for refractory period - state check handles it
 
 
 
 
 
803
 
804
  def _detect_stop_word(self, ctx: AudioProcessingContext) -> None:
805
  """Detect stop word in the processed audio features."""
 
764
  def _detect_wake_words(self, ctx: AudioProcessingContext) -> None:
765
  """Detect wake words in the processed audio features.
766
 
767
+ Uses refractory period to prevent duplicate triggers.
768
+ Following reference project pattern.
769
  """
770
  from pymicro_wakeword import MicroWakeWord
771
  from pyopen_wakeword import OpenWakeWord
772
 
 
 
 
 
 
 
 
 
 
 
 
773
  for wake_word in ctx.wake_words:
774
  activated = False
775
 
 
784
  activated = True
785
 
786
  if activated:
787
+ # Check refractory period to prevent duplicate triggers
788
+ now = time.monotonic()
789
+ if (ctx.last_active is None) or (
790
+ (now - ctx.last_active) > self._state.refractory_seconds
791
+ ):
792
+ _LOGGER.info("Wake word detected: %s", wake_word.id)
793
+ self._state.satellite.wakeup(wake_word)
794
+ # Face tracking will handle looking at user automatically
795
+ self._motion.on_wakeup()
796
+ ctx.last_active = now
797
 
798
  def _detect_stop_word(self, ctx: AudioProcessingContext) -> None:
799
  """Detect stop word in the processed audio features."""