Commit ·
9d92632
1
Parent(s): 08a2d90
fix: revert to reference project pattern for wake word detection (v0.7.3)
Browse files- Use refractory_seconds (2s) instead of broken state flags
- Remove _in_pipeline and _tts_playing - they caused more issues
- Restore correct RUN_END handling from linux-voice-assistant
- Fix stop() to call _tts_finished() like reference project
- changelog.json +9 -0
- pyproject.toml +1 -1
- reachy_mini_ha_voice/__init__.py +1 -1
- reachy_mini_ha_voice/satellite.py +7 -50
- reachy_mini_ha_voice/voice_assistant.py +12 -18
changelog.json
CHANGED
|
@@ -1,4 +1,13 @@
|
|
| 1 |
[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
{
|
| 3 |
"version": "0.7.2",
|
| 4 |
"date": "2026-01-12",
|
|
|
|
| 1 |
[
|
| 2 |
+
{
|
| 3 |
+
"version": "0.7.3",
|
| 4 |
+
"date": "2026-01-12",
|
| 5 |
+
"changes": [
|
| 6 |
+
"Fix: Revert to reference project pattern - use refractory period instead of state flags",
|
| 7 |
+
"Fix: Remove broken _in_pipeline and _tts_playing state management",
|
| 8 |
+
"Fix: Restore correct RUN_END event handling from linux-voice-assistant"
|
| 9 |
+
]
|
| 10 |
+
},
|
| 11 |
{
|
| 12 |
"version": "0.7.2",
|
| 13 |
"date": "2026-01-12",
|
pyproject.toml
CHANGED
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
| 4 |
|
| 5 |
[project]
|
| 6 |
name = "reachy_mini_ha_voice"
|
| 7 |
-
version = "0.7.
|
| 8 |
description = "Home Assistant Voice Assistant for Reachy Mini"
|
| 9 |
readme = "README.md"
|
| 10 |
requires-python = ">=3.10"
|
|
|
|
| 4 |
|
| 5 |
[project]
|
| 6 |
name = "reachy_mini_ha_voice"
|
| 7 |
+
version = "0.7.3"
|
| 8 |
description = "Home Assistant Voice Assistant for Reachy Mini"
|
| 9 |
readme = "README.md"
|
| 10 |
requires-python = ">=3.10"
|
reachy_mini_ha_voice/__init__.py
CHANGED
|
@@ -11,7 +11,7 @@ Key features:
|
|
| 11 |
- Reachy Mini motion control integration
|
| 12 |
"""
|
| 13 |
|
| 14 |
-
__version__ = "0.7.
|
| 15 |
__author__ = "Desmond Dong"
|
| 16 |
|
| 17 |
# Don't import main module here to avoid runpy warning
|
|
|
|
| 11 |
- Reachy Mini motion control integration
|
| 12 |
"""
|
| 13 |
|
| 14 |
+
__version__ = "0.7.3"
|
| 15 |
__author__ = "Desmond Dong"
|
| 16 |
|
| 17 |
# Don't import main module here to avoid runpy warning
|
reachy_mini_ha_voice/satellite.py
CHANGED
|
@@ -69,20 +69,13 @@ class VoiceSatelliteProtocol(APIServer):
|
|
| 69 |
self.camera_server = camera_server
|
| 70 |
|
| 71 |
# Initialize streaming state early (before entity setup)
|
| 72 |
-
# This is needed because audio processing thread checks this attribute
|
| 73 |
self._is_streaming_audio = False
|
| 74 |
-
self._in_pipeline = False # True when voice pipeline is active (listening/processing/speaking)
|
| 75 |
-
self._tts_playing = False # True when TTS audio is actively playing
|
| 76 |
self._tts_url: Optional[str] = None
|
| 77 |
self._tts_played = False
|
| 78 |
self._continue_conversation = False
|
| 79 |
self._timer_finished = False
|
| 80 |
self._external_wake_words: Dict[str, VoiceAssistantExternalWakeWord] = {}
|
| 81 |
|
| 82 |
-
# Tap-to-talk continuous conversation mode (REMOVED - too many false triggers)
|
| 83 |
-
# Continuous conversation is now controlled via Home Assistant switch
|
| 84 |
-
# self._tap_conversation_mode = False
|
| 85 |
-
|
| 86 |
# Conversation tracking for continuous conversation
|
| 87 |
self._conversation_id: Optional[str] = None
|
| 88 |
self._conversation_timeout = 300.0 # 5 minutes, same as ESPHome default
|
|
@@ -184,8 +177,6 @@ class VoiceSatelliteProtocol(APIServer):
|
|
| 184 |
elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_TTS_START:
|
| 185 |
# Reachy Mini: Start speaking animation (JSON-defined multi-frequency sway)
|
| 186 |
_LOGGER.debug("TTS_START event received, triggering speaking animation")
|
| 187 |
-
# Mark TTS as playing - this prevents wake word detection during TTS
|
| 188 |
-
self._tts_playing = True
|
| 189 |
self._reachy_on_speaking()
|
| 190 |
|
| 191 |
elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_TTS_END:
|
|
@@ -196,8 +187,11 @@ class VoiceSatelliteProtocol(APIServer):
|
|
| 196 |
# Pipeline run ended
|
| 197 |
self._is_streaming_audio = False
|
| 198 |
|
| 199 |
-
#
|
| 200 |
-
self.
|
|
|
|
|
|
|
|
|
|
| 201 |
|
| 202 |
def handle_timer_event(
|
| 203 |
self,
|
|
@@ -234,9 +228,6 @@ class VoiceSatelliteProtocol(APIServer):
|
|
| 234 |
|
| 235 |
self.state.active_wake_words.add(self.state.stop_word.id)
|
| 236 |
self._continue_conversation = msg.start_conversation
|
| 237 |
-
# Mark as playing to prevent wake word detection during announcement
|
| 238 |
-
self._tts_playing = True
|
| 239 |
-
self._in_pipeline = True
|
| 240 |
self.duck()
|
| 241 |
|
| 242 |
yield from self.state.media_player_entity.play(
|
|
@@ -382,10 +373,7 @@ class VoiceSatelliteProtocol(APIServer):
|
|
| 382 |
self._continue_conversation = False
|
| 383 |
|
| 384 |
def wakeup(self, wake_word: Union[MicroWakeWord, OpenWakeWord]) -> None:
|
| 385 |
-
"""Handle wake word detection - start voice pipeline.
|
| 386 |
-
|
| 387 |
-
Only called when in idle state (checked by voice_assistant.py).
|
| 388 |
-
"""
|
| 389 |
if self._timer_finished:
|
| 390 |
# Stop timer instead
|
| 391 |
self._timer_finished = False
|
|
@@ -393,9 +381,6 @@ class VoiceSatelliteProtocol(APIServer):
|
|
| 393 |
_LOGGER.debug("Stopping timer finished sound")
|
| 394 |
return
|
| 395 |
|
| 396 |
-
# Mark pipeline as active
|
| 397 |
-
self._in_pipeline = True
|
| 398 |
-
|
| 399 |
wake_word_phrase = wake_word.wake_word
|
| 400 |
_LOGGER.debug("Detected wake word: %s", wake_word_phrase)
|
| 401 |
|
|
@@ -436,26 +421,19 @@ class VoiceSatelliteProtocol(APIServer):
|
|
| 436 |
"""Stop current TTS playback (e.g., user said stop word)."""
|
| 437 |
self.state.active_wake_words.discard(self.state.stop_word.id)
|
| 438 |
self.state.tts_player.stop()
|
| 439 |
-
# Reset TTS playing flag
|
| 440 |
-
self._tts_playing = False
|
| 441 |
|
| 442 |
if self._timer_finished:
|
| 443 |
self._timer_finished = False
|
| 444 |
_LOGGER.debug("Stopping timer finished sound")
|
| 445 |
else:
|
| 446 |
_LOGGER.debug("TTS response stopped manually")
|
| 447 |
-
|
| 448 |
-
# Send announce finished to HA
|
| 449 |
-
self.send_messages([VoiceAssistantAnnounceFinished()])
|
| 450 |
-
# Note: RUN_END event will handle the rest
|
| 451 |
|
| 452 |
def play_tts(self) -> None:
|
| 453 |
if (not self._tts_url) or self._tts_played:
|
| 454 |
return
|
| 455 |
|
| 456 |
self._tts_played = True
|
| 457 |
-
# Mark TTS as playing to prevent wake word detection
|
| 458 |
-
self._tts_playing = True
|
| 459 |
_LOGGER.debug("Playing TTS response: %s", self._tts_url)
|
| 460 |
|
| 461 |
self.state.active_wake_words.add(self.state.stop_word.id)
|
|
@@ -478,9 +456,6 @@ class VoiceSatelliteProtocol(APIServer):
|
|
| 478 |
|
| 479 |
Following reference project pattern: handle continue conversation here.
|
| 480 |
"""
|
| 481 |
-
# Mark TTS as finished
|
| 482 |
-
self._tts_playing = False
|
| 483 |
-
|
| 484 |
self.state.active_wake_words.discard(self.state.stop_word.id)
|
| 485 |
self.send_messages([VoiceAssistantAnnounceFinished()])
|
| 486 |
|
|
@@ -494,9 +469,6 @@ class VoiceSatelliteProtocol(APIServer):
|
|
| 494 |
_LOGGER.debug("Continuing conversation (our_switch=%s, ha_request=%s)",
|
| 495 |
continuous_mode, self._continue_conversation)
|
| 496 |
|
| 497 |
-
# Keep pipeline active during continuous conversation
|
| 498 |
-
self._in_pipeline = True
|
| 499 |
-
|
| 500 |
# Play prompt sound to indicate ready for next input
|
| 501 |
self.state.tts_player.play(self.state.wakeup_sound)
|
| 502 |
|
|
@@ -514,23 +486,10 @@ class VoiceSatelliteProtocol(APIServer):
|
|
| 514 |
self._clear_conversation()
|
| 515 |
self.unduck()
|
| 516 |
_LOGGER.debug("Conversation finished")
|
| 517 |
-
|
| 518 |
-
# Mark pipeline as inactive - ready for new wake word
|
| 519 |
-
self._in_pipeline = False
|
| 520 |
|
| 521 |
# Reachy Mini: Return to idle
|
| 522 |
self._reachy_on_idle()
|
| 523 |
|
| 524 |
-
def _handle_run_end(self) -> None:
|
| 525 |
-
"""Handle pipeline RUN_END event.
|
| 526 |
-
|
| 527 |
-
Following reference project pattern: call _tts_finished if TTS wasn't played.
|
| 528 |
-
"""
|
| 529 |
-
if not self._tts_played:
|
| 530 |
-
self._tts_finished()
|
| 531 |
-
|
| 532 |
-
self._tts_played = False
|
| 533 |
-
|
| 534 |
def _play_timer_finished(self) -> None:
|
| 535 |
if not self._timer_finished:
|
| 536 |
self.unduck()
|
|
@@ -548,8 +507,6 @@ class VoiceSatelliteProtocol(APIServer):
|
|
| 548 |
_LOGGER.info("Disconnected from Home Assistant")
|
| 549 |
# Clear streaming state on disconnect
|
| 550 |
self._is_streaming_audio = False
|
| 551 |
-
self._in_pipeline = False
|
| 552 |
-
self._tts_playing = False
|
| 553 |
self._tts_url = None
|
| 554 |
self._tts_played = False
|
| 555 |
self._continue_conversation = False
|
|
|
|
| 69 |
self.camera_server = camera_server
|
| 70 |
|
| 71 |
# Initialize streaming state early (before entity setup)
|
|
|
|
| 72 |
self._is_streaming_audio = False
|
|
|
|
|
|
|
| 73 |
self._tts_url: Optional[str] = None
|
| 74 |
self._tts_played = False
|
| 75 |
self._continue_conversation = False
|
| 76 |
self._timer_finished = False
|
| 77 |
self._external_wake_words: Dict[str, VoiceAssistantExternalWakeWord] = {}
|
| 78 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
# Conversation tracking for continuous conversation
|
| 80 |
self._conversation_id: Optional[str] = None
|
| 81 |
self._conversation_timeout = 300.0 # 5 minutes, same as ESPHome default
|
|
|
|
| 177 |
elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_TTS_START:
|
| 178 |
# Reachy Mini: Start speaking animation (JSON-defined multi-frequency sway)
|
| 179 |
_LOGGER.debug("TTS_START event received, triggering speaking animation")
|
|
|
|
|
|
|
| 180 |
self._reachy_on_speaking()
|
| 181 |
|
| 182 |
elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_TTS_END:
|
|
|
|
| 187 |
# Pipeline run ended
|
| 188 |
self._is_streaming_audio = False
|
| 189 |
|
| 190 |
+
# Following reference project pattern
|
| 191 |
+
if not self._tts_played:
|
| 192 |
+
self._tts_finished()
|
| 193 |
+
|
| 194 |
+
self._tts_played = False
|
| 195 |
|
| 196 |
def handle_timer_event(
|
| 197 |
self,
|
|
|
|
| 228 |
|
| 229 |
self.state.active_wake_words.add(self.state.stop_word.id)
|
| 230 |
self._continue_conversation = msg.start_conversation
|
|
|
|
|
|
|
|
|
|
| 231 |
self.duck()
|
| 232 |
|
| 233 |
yield from self.state.media_player_entity.play(
|
|
|
|
| 373 |
self._continue_conversation = False
|
| 374 |
|
| 375 |
def wakeup(self, wake_word: Union[MicroWakeWord, OpenWakeWord]) -> None:
|
| 376 |
+
"""Handle wake word detection - start voice pipeline."""
|
|
|
|
|
|
|
|
|
|
| 377 |
if self._timer_finished:
|
| 378 |
# Stop timer instead
|
| 379 |
self._timer_finished = False
|
|
|
|
| 381 |
_LOGGER.debug("Stopping timer finished sound")
|
| 382 |
return
|
| 383 |
|
|
|
|
|
|
|
|
|
|
| 384 |
wake_word_phrase = wake_word.wake_word
|
| 385 |
_LOGGER.debug("Detected wake word: %s", wake_word_phrase)
|
| 386 |
|
|
|
|
| 421 |
"""Stop current TTS playback (e.g., user said stop word)."""
|
| 422 |
self.state.active_wake_words.discard(self.state.stop_word.id)
|
| 423 |
self.state.tts_player.stop()
|
|
|
|
|
|
|
| 424 |
|
| 425 |
if self._timer_finished:
|
| 426 |
self._timer_finished = False
|
| 427 |
_LOGGER.debug("Stopping timer finished sound")
|
| 428 |
else:
|
| 429 |
_LOGGER.debug("TTS response stopped manually")
|
| 430 |
+
self._tts_finished()
|
|
|
|
|
|
|
|
|
|
| 431 |
|
| 432 |
def play_tts(self) -> None:
|
| 433 |
if (not self._tts_url) or self._tts_played:
|
| 434 |
return
|
| 435 |
|
| 436 |
self._tts_played = True
|
|
|
|
|
|
|
| 437 |
_LOGGER.debug("Playing TTS response: %s", self._tts_url)
|
| 438 |
|
| 439 |
self.state.active_wake_words.add(self.state.stop_word.id)
|
|
|
|
| 456 |
|
| 457 |
Following reference project pattern: handle continue conversation here.
|
| 458 |
"""
|
|
|
|
|
|
|
|
|
|
| 459 |
self.state.active_wake_words.discard(self.state.stop_word.id)
|
| 460 |
self.send_messages([VoiceAssistantAnnounceFinished()])
|
| 461 |
|
|
|
|
| 469 |
_LOGGER.debug("Continuing conversation (our_switch=%s, ha_request=%s)",
|
| 470 |
continuous_mode, self._continue_conversation)
|
| 471 |
|
|
|
|
|
|
|
|
|
|
| 472 |
# Play prompt sound to indicate ready for next input
|
| 473 |
self.state.tts_player.play(self.state.wakeup_sound)
|
| 474 |
|
|
|
|
| 486 |
self._clear_conversation()
|
| 487 |
self.unduck()
|
| 488 |
_LOGGER.debug("Conversation finished")
|
|
|
|
|
|
|
|
|
|
| 489 |
|
| 490 |
# Reachy Mini: Return to idle
|
| 491 |
self._reachy_on_idle()
|
| 492 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 493 |
def _play_timer_finished(self) -> None:
|
| 494 |
if not self._timer_finished:
|
| 495 |
self.unduck()
|
|
|
|
| 507 |
_LOGGER.info("Disconnected from Home Assistant")
|
| 508 |
# Clear streaming state on disconnect
|
| 509 |
self._is_streaming_audio = False
|
|
|
|
|
|
|
| 510 |
self._tts_url = None
|
| 511 |
self._tts_played = False
|
| 512 |
self._continue_conversation = False
|
reachy_mini_ha_voice/voice_assistant.py
CHANGED
|
@@ -764,23 +764,12 @@ class VoiceAssistantService:
|
|
| 764 |
def _detect_wake_words(self, ctx: AudioProcessingContext) -> None:
|
| 765 |
"""Detect wake words in the processed audio features.
|
| 766 |
|
| 767 |
-
|
| 768 |
-
|
| 769 |
"""
|
| 770 |
from pymicro_wakeword import MicroWakeWord
|
| 771 |
from pyopen_wakeword import OpenWakeWord
|
| 772 |
|
| 773 |
-
# Skip wake word detection if pipeline is active or TTS is playing
|
| 774 |
-
# Check both flags to handle all cases:
|
| 775 |
-
# - _in_pipeline: True during listening/processing/speaking phases
|
| 776 |
-
# - _tts_playing: True specifically when TTS audio is being played
|
| 777 |
-
satellite = self._state.satellite
|
| 778 |
-
if satellite is None:
|
| 779 |
-
return
|
| 780 |
-
|
| 781 |
-
if satellite._in_pipeline or satellite._tts_playing:
|
| 782 |
-
return
|
| 783 |
-
|
| 784 |
for wake_word in ctx.wake_words:
|
| 785 |
activated = False
|
| 786 |
|
|
@@ -795,11 +784,16 @@ class VoiceAssistantService:
|
|
| 795 |
activated = True
|
| 796 |
|
| 797 |
if activated:
|
| 798 |
-
|
| 799 |
-
|
| 800 |
-
|
| 801 |
-
|
| 802 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 803 |
|
| 804 |
def _detect_stop_word(self, ctx: AudioProcessingContext) -> None:
|
| 805 |
"""Detect stop word in the processed audio features."""
|
|
|
|
| 764 |
def _detect_wake_words(self, ctx: AudioProcessingContext) -> None:
|
| 765 |
"""Detect wake words in the processed audio features.
|
| 766 |
|
| 767 |
+
Uses refractory period to prevent duplicate triggers.
|
| 768 |
+
Following reference project pattern.
|
| 769 |
"""
|
| 770 |
from pymicro_wakeword import MicroWakeWord
|
| 771 |
from pyopen_wakeword import OpenWakeWord
|
| 772 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 773 |
for wake_word in ctx.wake_words:
|
| 774 |
activated = False
|
| 775 |
|
|
|
|
| 784 |
activated = True
|
| 785 |
|
| 786 |
if activated:
|
| 787 |
+
# Check refractory period to prevent duplicate triggers
|
| 788 |
+
now = time.monotonic()
|
| 789 |
+
if (ctx.last_active is None) or (
|
| 790 |
+
(now - ctx.last_active) > self._state.refractory_seconds
|
| 791 |
+
):
|
| 792 |
+
_LOGGER.info("Wake word detected: %s", wake_word.id)
|
| 793 |
+
self._state.satellite.wakeup(wake_word)
|
| 794 |
+
# Face tracking will handle looking at user automatically
|
| 795 |
+
self._motion.on_wakeup()
|
| 796 |
+
ctx.last_active = now
|
| 797 |
|
| 798 |
def _detect_stop_word(self, ctx: AudioProcessingContext) -> None:
|
| 799 |
"""Detect stop word in the processed audio features."""
|