Spaces:

djhui5710
/

reachy_mini_home_assistant

Running

App Files Files Community

fix: use right name for store search

by Domotick - opened Jan 16

base: refs/heads/main

←

from: refs/pr/5

Discussion Files changed

+8118

-19930

This view is limited to 50 files because it contains too many changes. See the raw diff here.

Files changed (50) hide show

.claude/settings.local.json +9 -38
.gitattributes +6 -3
.github/dependabot.yml +0 -13
.github/workflows/sync_develop_to_hf_edge.yml +0 -86
.github/workflows/sync_to_hf.yml +0 -36
.gitignore +5 -12
.pre-commit-config.yaml +0 -20
CHANGELOG.md +0 -713
PROJECT_PLAN.md +1279 -0
Project_Summary.md +0 -1439
README.md +1 -0
changelog.json +1 -272
docs/USER_MANUAL_CN.md +0 -244
docs/USER_MANUAL_EN.md +0 -244
home_assistant_blueprints/reachy_mini_presence_companion.yaml +0 -246
index.html +36 -98
pyproject.toml +22 -141
{reachy_mini_home_assistant → reachy_mini_ha_voice}/__init__.py +24 -29
{reachy_mini_home_assistant → reachy_mini_ha_voice}/__main__.py +63 -52
{reachy_mini_home_assistant/motion → reachy_mini_ha_voice}/animation_player.py +44 -123
reachy_mini_ha_voice/animations/conversation_animations.json +87 -0
{reachy_mini_home_assistant/protocol → reachy_mini_ha_voice}/api_server.py +35 -65
reachy_mini_ha_voice/audio_player.py +578 -0
reachy_mini_ha_voice/camera_server.py +842 -0
{reachy_mini_home_assistant/entities → reachy_mini_ha_voice}/entity.py +54 -44
{reachy_mini_home_assistant/entities → reachy_mini_ha_voice}/entity_extensions.py +25 -33
reachy_mini_ha_voice/entity_registry.py +945 -0
reachy_mini_ha_voice/gesture_detector.py +183 -0
{reachy_mini_home_assistant/vision → reachy_mini_ha_voice}/head_tracker.py +48 -158
{reachy_mini_home_assistant → reachy_mini_ha_voice}/main.py +83 -31
{reachy_mini_home_assistant → reachy_mini_ha_voice}/models.py +25 -108
{reachy_mini_home_assistant → reachy_mini_ha_voice}/models/crops_classifier.onnx +0 -0
{reachy_mini_home_assistant → reachy_mini_ha_voice}/models/hand_detector.onnx +0 -0
reachy_mini_home_assistant/motion/reachy_motion.py → reachy_mini_ha_voice/motion.py +17 -31
reachy_mini_ha_voice/movement_manager.py +861 -0
{reachy_mini_home_assistant → reachy_mini_ha_voice}/reachy_controller.py +869 -735
reachy_mini_ha_voice/satellite.py +784 -0
{reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/.gitkeep +0 -0
{reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/LICENSE.md +0 -0
{reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/README.md +0 -0
{reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/timer_finished.flac +0 -0
{reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/wake_word_triggered.flac +0 -0
{reachy_mini_home_assistant/motion → reachy_mini_ha_voice}/speech_sway.py +27 -36
{reachy_mini_home_assistant → reachy_mini_ha_voice}/static/index.html +0 -0
{reachy_mini_home_assistant → reachy_mini_ha_voice}/static/main.js +0 -0
{reachy_mini_home_assistant → reachy_mini_ha_voice}/static/style.css +0 -0
reachy_mini_ha_voice/util.py +45 -0
reachy_mini_ha_voice/voice_assistant.py +810 -0
{reachy_mini_home_assistant → reachy_mini_ha_voice}/wakewords/.gitkeep +0 -0
{reachy_mini_home_assistant → reachy_mini_ha_voice}/wakewords/README.md +0 -0

.claude/settings.local.json CHANGED Viewed

@@ -3,53 +3,24 @@
   "includeCoAuthoredBy": false,
   "permissions": {
     "allow": [
-      "Bash",
-      "BashOutput",
       "Edit",
-      "Glob",
-      "Grep",
-      "KillShell",
-      "NotebookEdit",
-      "Read",
-      "SlashCommand",
-      "Task",
-      "TodoWrite",
-      "WebFetch",
-      "WebSearch",
-      "Write",
-      "mcp__ide",
-      "mcp__exa",
-      "mcp__context7",
-      "mcp__mcp-deepwiki",
-      "mcp__Playwright",
-      "mcp__spec-workflow",
-      "mcp__open-websearch",
-      "mcp__serena",
-      "All",
-      "Bash(copy:*)",
-      "mcp__zread__search_doc",
-      "mcp__zread__read_file",
       "Bash(cd:*)",
-      "Bash(ls:*)",
-      "Bash(find:*)",
-      "mcp__acp__Bash",
-      "Skill(commit-commands:commit)",
-      "Skill(commit-commands:commit:*)"
     ],
     "deny": [],
     "ask": []
   },
-  "model": "opus",
   "hooks": {},
   "statusLine": {
     "type": "command",
     "command": "%USERPROFILE%\\.claude\\ccline\\ccline.exe",
     "padding": 0
   },
-  "enabledPlugins": {
-    "glm-plan-usage@zai-coding-plugins": true,
-    "glm-plan-bug@zai-coding-plugins": true
-  },
-  "outputStyle": "Explanatory",
-  "alwaysThinkingEnabled": true
-}

   "includeCoAuthoredBy": false,
   "permissions": {
     "allow": [
+      "SlashCommand(/zcf:git-commit)",
       "Edit",
       "Bash(cd:*)",
+      "SlashCommand(/zcf:git-commit --emoji)",
+      "SlashCommand(/zcf:git-commit:*)",
+      "Bash(git:*)",
+      "Bash(ls:*)"
     ],
     "deny": [],
     "ask": []
   },
   "hooks": {},
+  "alwaysThinkingEnabled": true,
+  "outputStyle": "default",
   "statusLine": {
     "type": "command",
     "command": "%USERPROFILE%\\.claude\\ccline\\ccline.exe",
     "padding": 0
   },
+  "model": "opus"
+}

.gitattributes CHANGED Viewed

@@ -1,5 +1,8 @@
-# LFS tracking for large binary files
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.onnx filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.flac filter=lfs diff=lfs merge=lfs -text

+reachy_mini_ha_voice/wakewords/**/*.tflite filter=lfs diff=lfs merge=lfs -text
+reachy_mini_ha_voice/sounds/**/*.flac filter=lfs diff=lfs merge=lfs -text
+"reachy_mini_ha_voice/wakewords/**/*.tflite filter=lfs diff=lfs merge=lfs -text
+reachy_mini_ha_voice/sounds/**/*.flac" filter=lfs diff=lfs merge=lfs -text
+"ha/assets/meshes/*.stl" filter=lfs diff=lfs merge=lfs -text
+"ha/assets/*.urdf" filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.onnx filter=lfs diff=lfs merge=lfs -text

.github/dependabot.yml DELETED Viewed

@@ -1,13 +0,0 @@
-version: 2
-updates:
-  # Enable version updates for pip
-  - package-ecosystem: "pip"
-    directory: "/"
-    schedule:
-      interval: "weekly"
-    # Ignore PyTorch updates - locked version required for compatibility
-    ignore:
-      - dependency-name: "torch"
-        versions: [">2.5.1"]
-      - dependency-name: "torchvision"
-        versions: [">0.20.1"]

.github/workflows/sync_develop_to_hf_edge.yml DELETED Viewed

@@ -1,86 +0,0 @@
-name: Sync Develop to Hugging Face Edge
-on:
-  push:
-    branches: [develop]
-  workflow_dispatch:
-jobs:
-  sync-edge:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout GitHub repo
-        uses: actions/checkout@v4
-        with:
-          lfs: true
-      - name: Transform project name for edge channel
-        run: |
-          python - <<'PY'
-          from pathlib import Path
-          # Keep runtime module path unchanged, only rewrite package/app naming metadata.
-          pyproject = Path('pyproject.toml')
-          text = pyproject.read_text(encoding='utf-8')
-          text = text.replace(
-              'name = "reachy_mini_home_assistant"',
-              'name = "reachy_mini_home_assistant_edge"',
-              1,
-          )
-          text = text.replace(
-              'reachy_mini_home_assistant = "reachy_mini_home_assistant.main:ReachyMiniHaVoice"',
-              'reachy_mini_home_assistant_edge = "reachy_mini_home_assistant.main:ReachyMiniHaVoice"',
-              1,
-          )
-          pyproject.write_text(text, encoding='utf-8')
-          init_file = Path('reachy_mini_home_assistant/__init__.py')
-          init_text = init_file.read_text(encoding='utf-8')
-          init_text = init_text.replace(
-              'version("reachy_mini_home_assistant")',
-              'version("reachy_mini_home_assistant_edge")',
-              1,
-          )
-          init_file.write_text(init_text, encoding='utf-8')
-          readme = Path('README.md')
-          if readme.exists():
-              readme_text = readme.read_text(encoding='utf-8')
-              readme_text = readme_text.replace(
-                  'title: Reachy Mini for Home Assistant',
-                  'title: Reachy Mini for Home Assistant (Edge)',
-                  1,
-              )
-              readme_text = readme_text.replace(
-                  'short_description: Deep integration of Reachy Mini robot with Home Assistant',
-                  'short_description: Edge channel for Reachy Mini Home Assistant integration',
-                  1,
-              )
-              readme_text = readme_text.replace(
-                  '  - reachy_mini_home_assistant',
-                  '  - reachy_mini_home_assistant_edge',
-                  1,
-              )
-              readme.write_text(readme_text, encoding='utf-8')
-          PY
-      - name: Create fresh commit and push to Hugging Face edge space
-        env:
-          HF_TOKEN: ${{ secrets.HF_TOKEN }}
-        run: |
-          git config --global user.email "action@github.com"
-          git config --global user.name "GitHub Action"
-          # Create a new orphan branch with no history
-          git checkout --orphan hf-edge-sync
-          git add -A
-          git commit -m "Fresh edge sync: $(date +%Y-%m-%d_%H:%M:%S)"
-          # Add Hugging Face edge remote
-          git remote add hf-edge https://djhui5710:$HF_TOKEN@huggingface.co/spaces/djhui5710/reachy_mini_home_assistant_edge
-          # Push LFS objects first
-          git lfs push hf-edge hf-edge-sync --all
-          # Force push as main to HF edge space
-          git push hf-edge hf-edge-sync:main --force

.github/workflows/sync_to_hf.yml DELETED Viewed

@@ -1,36 +0,0 @@
-name: Sync to Hugging Face
-on:
-  push:
-    branches: [main]
-  workflow_dispatch:
-jobs:
-  sync:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout GitHub repo
-        uses: actions/checkout@v4
-        with:
-          lfs: true
-      - name: Create fresh commit and push to Hugging Face
-        env:
-          HF_TOKEN: ${{ secrets.HF_TOKEN }}
-        run: |
-          git config --global user.email "action@github.com"
-          git config --global user.name "GitHub Action"
-          # Create a new orphan branch with no history
-          git checkout --orphan hf-sync
-          git add -A
-          git commit -m "Fresh sync: $(date +%Y-%m-%d\ %H:%M:%S)"
-          # Add Hugging Face remote
-          git remote add hf https://djhui5710:$HF_TOKEN@huggingface.co/spaces/djhui5710/reachy_mini_home_assistant
-          # Push LFS objects first
-          git lfs push hf hf-sync --all
-          # Force push as main to HF (overwrites all history)
-          git push hf hf-sync:main --force

.gitignore CHANGED Viewed

@@ -39,8 +39,6 @@ env/
 .spec-workflow/
 .playwright-mcp/
 *~
-CLAUDE.md
-commit_msg.txt
 # Configuration
 config.json
@@ -65,19 +63,14 @@ htmlcov/
 !reachy_mini_ha_voice/sounds/*.flac
 # Models (exclude package bundled files)
-# models/ - ignore external models directory
 models/
-# Package bundled models
-!reachy_mini_ha_voice/models/
-reachy_mini_ha_voice/models/*.tflite
-reachy_mini_ha_voice/models/*.onnx
-reachy_mini_ha_voice/models/*.pt
 # SDK Reference (local development only)
 reference/
-local/
 # ha/ - temporarily commented out for path fixes
 # ha/ will be moved to separate repository soon
-# Temporary check scripts
-temp_check_scripts/

 .spec-workflow/
 .playwright-mcp/
 *~
 # Configuration
 config.json
 !reachy_mini_ha_voice/sounds/*.flac
 # Models (exclude package bundled files)
 models/
+# *.tflite - bundled in package
+!reachy_mini_ha_voice/wakewords/*.tflite
+!reachy_mini_ha_voice/wakewords/**/*.tflite
+*.onnx
+!reachy_mini_ha_voice/models/*.onnx
 # SDK Reference (local development only)
 reference/
 # ha/ - temporarily commented out for path fixes
 # ha/ will be moved to separate repository soon

.pre-commit-config.yaml DELETED Viewed

@@ -1,20 +0,0 @@
-# Pre-commit hooks for code quality
-# Install: pip install pre-commit && pre-commit install
-# Run manually: pre-commit run --all-files
-repos:
-  - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.8.6
-    hooks:
-      - id: ruff
-        args: [--fix]
-      - id: ruff-format
-  - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.14.1
-    hooks:
-      - id: mypy
-        additional_dependencies: []
-        args: [--ignore-missing-imports]
-        # Only check changed files for speed
-        pass_filenames: true

CHANGELOG.md DELETED Viewed

@@ -1,713 +0,0 @@
-# Changelog
-All notable changes to the Reachy Mini HA Voice project will be documented in this file.
-The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
-and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
-## [Unreleased]
-### Fixed
-- **NameError** - Add missing deque import in gesture smoother
-- **Syntax Error** - Add missing class indentation for volume methods in audio_player.py
-- **Audio Card Name Detection** - Use SDK's detection logic instead of hardcoded values
-- **SDK Port 8000 Blocking** - Use amixer directly for volume control to avoid SDK HTTP API blocking
-- **Memory Leak Root Cause** - Audio buffer array creation in loop causing unbounded memory growth
-- **Indentation Error** - Fix indentation in audio_player.py stop_sendspin method
-## [1.0.7] - 2026-05-05
-### Changed
-- Align audio runtime with current SDK patterns by splitting local TTS playback from Sendspin-capable music playback and moving wakeword/stopword loading into shared helpers
-- Raise the Reachy Mini SDK baseline to `reachy-mini>=1.7.1`
-### Fixed
-- Keep wakeup/TTS playback on the local player path while binding both local and Sendspin players to shared speech sway helpers
-- Synchronize `Idle Behavior` shutdown with ESPHome face/gesture switches and runtime state updates
-- Remove obsolete runtime monitor modules that are no longer needed with the current SDK behavior
-### Optimized
-- Tighten Sendspin buffering with proactive backpressure and cleaner local queue handling
-## [1.0.6] - 2026-05-01
-### Changed
-- Align `pyproject.toml` with the current Reachy Mini SDK baseline by requiring `reachy-mini>=1.7.0`, `Python>=3.12`, `zeroconf>=0.131,<1`, `aiohttp`, `websockets>=12,<16`, and `gstreamer-bundle==1.28.1` on non-Linux platforms
-- Align Sendspin client dependency with the current upstream line via `aiosendspin>=5.1,<6.0`
-### Fixed
-- Fetch camera snapshot frames on demand when the MJPEG cache is empty so Home Assistant camera proxy requests keep working with the Reachy Mini SDK 1.7.0 media pull model
-### Optimized
-- Stop the camera server entirely when `Idle Behavior` is disabled instead of only unloading vision models, so idle-without-animation behaves more like a low-resource sleep state
-## [1.0.5] - 2026-04-12
-### Changed
-- Remove app-managed robot sleep/wake handling because current Reachy Mini SDK no longer supports mini apps remaining active while the robot enters sleep
-- Keep resource suspend/resume limited to ESPHome-driven runtime toggles such as Home Assistant disconnect, mute, camera disable, and service recovery
-- Align `pyproject.toml` runtime constraints with the current Reachy Mini reference SDK package (`reachy-mini>=1.6.3`, `websockets>=12,<16`, Python baseline `>=3.10`, and uv gstreamer metadata)
-### Removed
-- Remove `SleepManager` integration and app-side sleep/wake callback flow from the voice assistant runtime
-- Remove Home Assistant sleep control entities and internal robot sleep state tracking from the mini app
-## [1.0.4] - 2026-03-19
-### Fixed
-- Align Reachy Mini integration with current SDK assumptions by removing legacy compatibility paths and private client health checks
-- Replace direct SDK private `_respeaker` access with `audio_control_utils`-based ReSpeaker initialization
-- Tighten camera and pose composition to require current SDK media/utils APIs and valid `look_at_image` inputs
-### Improved
-- Unify idle behavior into a single persisted Home Assistant entity and remove old idle compatibility aliases
-- Replace separate wake/sleep buttons with a single sleep control entity
-- Update Sendspin integration for current `aiosendspin` lifecycle, stream handling, listener cleanup, and synchronized buffering
-- Standardize daemon URL usage on shared config across controller, sleep manager, and daemon monitor
-## [1.0.3] - 2026-03-07
-### Added
-- Idle Random Actions switch in Home Assistant with preferences persistence and startup restore
-- Configurable `idle_random_actions` presets in `conversation_animations.json` for centralized idle motion tuning
-### Fixed
-- Remove duplicate `idle_random_actions` fields/methods and complete runtime control wiring in controller/entity registry/movement manager
-### Optimized
-- Increase idle breathing and antenna sway cadence to 0.24Hz with wiggle antenna profile for more natural standby motion
-- Remove `set_target` global rate limiting and unchanged-pose skip gating to continuously stream motion commands each control tick
-- Remove idle antenna slew-rate limiter so antenna motion follows animation waveforms directly for reference-like smoothness
-## [1.0.2] - 2026-03-06
-### Fixed
-- Restore idle antenna sway animation and tune idle breathing parameters to reduce perceived stiffness
-- Reintroduce idle anti-chatter smoothing/deadband for antenna and body updates to reduce mechanical jitter/noise
-- Switch sleep/wake control to daemon API (`start` with `wake_up=true`, `stop` with `goto_sleep=true`) so `/api/daemon/status` reflects real sleep state on SDK 1.5
-- Normalize daemon status parsing for SDK 1.5 object-based status responses
-- Remove all app-side antenna power on/off operations to avoid SDK instability and external-control conflicts
-- Sync Idle Motion toggle with Idle Antenna Motion toggle for expected behavior in ESPHome
-- Remove legacy app-managed audio routing hooks and rely on native SDK/system audio selection
-- Harden startup against import-time failures (lazy emotion library loading and graceful Sendspin disable)
-### Changed
-- Keep idle antenna behavior as animation-only control (no torque coupling)
-- Tighten preference loading to current schema (no legacy config fallback filtering)
-### Added
-- Home Assistant blueprint for Reachy presence companion automation
-- GitHub workflow to auto-create releases when pyproject/changelog version updates produce a new tag
-### Improved
-- Blueprint supports device-first auto-binding and richer usage instructions
-- Refresh landing page (`index.html`) with current version, GitHub source link, and new Blueprint/Auto Release capability cards
-## [1.0.1] - 2026-03-05
-### Changed
-- Update runtime dependency baseline to `reachy-mini>=1.5.0`
-### Fixed
-- Remove legacy Zenoh 7447 startup precheck for SDK v1.5 compatibility
-- Remove legacy ZError string matching from connection error handling
-- Adapt daemon status handling to SDK v1.5 `DaemonStatus` object (prevents `AttributeError` on `status.get`)
-- Harden stop-word handling with runtime activation/deactivation and mute-aware trigger gating
-- Align wakeup stream start timing with reference behavior (start microphone stream after wakeup sound)
-- Improve TTS streaming robustness and reduce cutoffs with retry-based audio push
-### Optimized
-- Support single-request streaming with in-memory fallback cache for one-time TTS URLs (no temp file dependency)
-- Lower streaming fetch chunk size and apply unthrottled preroll for faster first audio
-## [1.0.0] - 2026-03-04
-### Changed
-- Require `reachy-mini[gstreamer]>=1.4.1`
-### Added
-- Sendspin switch in ESPHome (default OFF, persistent, runtime enable/disable)
-- Face Tracking and Gesture Detection switches in ESPHome (both default OFF, persistent)
-- Face Confidence number entity (0.0-1.0, persistent)
-### Fixed
-- Improve gesture responsiveness and stability (faster smoothing, min processing cadence, no-gesture alignment)
-- Auto-match ONNX gesture input size from model shape to prevent `INVALID_ARGUMENT` dimension errors
-- Disable antenna torque in idle mode and re-enable outside idle to reduce chatter/noise
-- Enforce deterministic audio startup path and fail fast when microphone capture is not ready
-- Add on-demand `/snapshot` JPEG generation when no cached stream frame is available
-### Optimized
-- Unload/reload face and gesture models when toggled off/on to save resources
-- Update idle behavior to breathing + look-around alternation, idle antenna sway disabled
-- Adjust idle breathing to human-like cadence
-- Make MJPEG streaming viewer-aware (skip continuous JPEG encode/push when no stream clients)
-- Keep face/gesture AI processing active even when stream viewers are absent
-### Changed
-- Use camera backend default FPS/resolution for stream path instead of forcing fixed 1080p/25fps
-## [0.9.9] - 2026-01-28
-### Fixed
-- **SDK Buffer Overflow During Idle**
-  - Add SDK buffer flush on GStreamer lock timeout
-  - Prevents buffer overflow during long idle periods when lock contention prevents buffer drainage
-  - Audio thread flushes SDK audio buffer when lock acquisition times out
-  - Camera thread flushes SDK video buffer when lock acquisition times out
-  - Audio playback flushes SDK playback buffer when lock acquisition times out
-  - Resolves SDK crashes during extended wake-up idle periods without conversation
-  - Requires Reachy Mini hardware (not applicable to simulation mode)
-### Fixed
-- **Memory Leaks**
-  - Audio buffer memory leak - added size limit to prevent unbounded growth
-  - Temp file leak - downloaded audio files now cleaned up after playback
-  - Multiple memory leak and resource leak issues fixed
-  - Thread-safe draining flag using threading.Event
-  - Silent failures now logged for debugging
-### Optimized
-- **Gesture Recognition Sensitivity**
-  - Simplify GestureSmoother to frequency-based confirmation (1 frame)
-  - Remove all confidence filtering - return all detections to Home Assistant
-  - Remove unused parameters (confidence_threshold, detection_threshold, GestureConfig)
-  - Remove duplicate empty check in gesture detection
-  - Add GestureSmoother class with history tracking for stable output
-  - Reduce gesture detection interval from 3 frames to 1 frame for higher frequency
-  - Fix: Gesture detection now returns all detected hands instead of only the highest confidence one
-  - Matches reference implementation behavior for improved detection rate
-  - No conflicts with face tracking (shared frame, independent processing)
-### Code Quality
-- Fix Ruff linter issues (import ordering, missing newlines, __all__ sorting)
-- Format code with Ruff formatter (5 files reformatted)
-- Fix slice index error in gesture detection (convert coordinates to integers)
-- Fix Python 3.12 type annotation compatibility
-## [0.9.8] - 2026-01-27
-### New
-- Mute switch entity - suspends voice services only (not camera/motion)
-- Disable Camera switch entity - suspends camera and AI processing
-- Home Assistant connection-driven feature loading
-- Automatic suspend/resume on HA disconnect/reconnect
-### Fixed
-- Camera disable logic - corrected inverted conditions for proper operation
-- Prevent daemon crash when entering idle state
-- Camera preview in Home Assistant
-- SDK crash during idle - optimized audio processing to skip get_frame() when not streaming to Home Assistant, reducing GStreamer resource competition
-- Add GStreamer threading lock to prevent pipeline competition between audio, playback, and camera threads
-- Audio thread gets priority during conversations - bypasses lock when conversation is active
-- Remove GStreamer lock to fix wake word detection in idle state (lock was preventing wake word detection)
-### Optimized
-- Reduce log output by 30-40%
-- Bundle face tracking model with package - eliminated HuggingFace download dependency, removed huggingface_hub from requirements, models now load from local package directory for offline operation
-- Replace HTTP API polling with SDK Zenoh for daemon status monitoring to reduce uvicorn blocking and improve stability
-- Device ID now reads /etc/machine-id directly - removed uuid.getnode() and file persistence
-- Implement high-priority SDK improvements
-- Remove aiohttp dependency from daemon_monitor - fully migrated to SDK Zenoh
-### Removed
-- Temporarily disable emotion playback during TTS
-- Unused config items (connection_timeout)
-### Code Quality
-- Code quality improvements
-## [0.9.7] - 2026-01-20
-### Fixed
-- Device ID file path corrected after util.py moved to core/ subdirectory (prevents HA seeing device as new)
-- Animation file path corrected (was looking in wrong directory)
-- Remove hey_jarvis from required wake words (it's optional in openWakeWord/)
-## [0.9.6] - 2026-01-20
-### New
-- Add ruff linter/formatter and mypy type checker configuration
-- Add pre-commit hooks for automated code quality checks
-### Fixed
-- Remove duplicate resume() method in audio_player.py
-- Remove duplicate connection_lost() method in satellite.py
-- Store asyncio task reference in sleep_manager.py to prevent garbage collection
-### Optimized
-- Use dict.items() for efficient iteration in smoothing.py
-## [0.9.5] - 2026-01-19
-### Refactored
-- Modularize codebase - new core/motion/vision/audio/entities module structure
-- Remove legacy/compatibility code
-- Remove audio diagnostics debug code
-### New
-- Direct callbacks for HA sleep/wake buttons to suspend/resume services
-### Optimized
-- Audio processing latency - reduced chunk size from 1024 to 256 samples (64ms -> 16ms)
-- Audio loop delay reduced from 10ms to 1ms for faster VAD response
-- Stereo to mono conversion uses first channel instead of mean for cleaner signal
-### Improved
-- Camera resume_from_suspend now synchronous for reliable wake from sleep
-- Rotation clamping in face tracking to prevent IK collisions
-- Audio gain boosted for faster VAD detection
-- Audio NaN/Inf values causing STT issues fixed
-## [0.9.0] - 2026-01-18
-### New
-- Robot state monitor for proper sleep mode handling - services pause when robot disconnects and resume on reconnect
-- System diagnostics entities (CPU, memory, disk, uptime) exposed as Home Assistant diagnostic sensors
-- Phase 24 with 9 diagnostic sensors (cpu_percent, cpu_temperature, memory_percent, memory_used_gb, disk_percent, disk_free_gb, uptime_hours, process_cpu_percent, process_memory_mb)
-### Fixed
-- Voice assistant and movement manager now properly pause during robot sleep mode instead of generating error spam
-### Improved
-- Graceful service lifecycle management with RobotStateMonitor callbacks
-## [0.8.7] - 2026-01-18
-### Fixed
-- Clamp body_yaw to safe range to prevent IK collision warnings during emotion playback
-- Emotion moves and face tracking now respect SDK safety limits
-### Improved
-- Face tracking smoothness - removed EMA smoothing (matches reference project)
-- Face tracking timing updated to match reference (2s delay, 1s interpolation)
-## [0.8.6] - 2026-01-18
-### Fixed
-- Audio buffer memory leak - added size limit to prevent unbounded growth
-- Temp file leak - downloaded audio files now cleaned up after playback
-- Camera thread termination timeout increased for clean shutdown
-- Thread-safe draining flag using threading.Event
-- Silent failures now logged for debugging
-## [0.8.5] - 2026-01-18
-### Fixed
-- DOA turn-to-sound direction inverted - now turns correctly toward sound source
-- Graceful shutdown prevents daemon crash on app stop
-## [0.8.4] - 2026-01-18
-### Improved
-- Smooth idle animation with interpolation phase (matches reference BreathingMove)
-- Two-phase animation - interpolates to neutral before oscillation
-- Antenna frequency updated to 0.5Hz (was 0.15Hz) for more natural sway
-## [0.8.3] - 2026-01-18
-### Fixed
-- Body now properly follows head rotation during face tracking
-- body_yaw extracted from final head pose matrix and synced with head_yaw
-- Matches reference project sweep_look behavior for natural body movement
-## [0.8.2] - 2026-01-18
-### Fixed
-- Body follows head rotation during face tracking - body_yaw syncs with head_yaw
-- Matches reference project sweep_look behavior for natural body movement
-## [0.8.1] - 2026-01-18
-### Fixed
-- face_detected entity now pushes state updates to Home Assistant in real-time
-- Body yaw simplified to match reference project - SDK automatic_body_yaw handles collision prevention
-- Idle animation now starts immediately on app launch
-- Smooth antenna animation - removed pose change threshold for continuous motion
-## [0.8.0] - 2026-01-17
-### New
-- Comprehensive emotion keyword mapping with 280+ Chinese and English keywords
-- 35 emotion categories mapped to robot expressions
-- Auto-trigger expressions from conversation text patterns
-## [0.7.3] - 2026-01-12
-### Fixed
-- Revert to reference project pattern - use refractory period instead of state flags
-- Remove broken _in_pipeline and _tts_playing state management
-- Restore correct RUN_END event handling from linux-voice-assistant
-## [0.7.2] - 2026-01-12
-### Fixed
-- Remove premature _tts_played reset in RUN_END event
-- Ensure _in_pipeline stays True until TTS playback completes
-## [0.7.1] - 2026-01-12
-### Fixed
-- Prevent wake word detection during TTS playback
-- Add _tts_playing flag to track TTS audio state precisely
-## [0.7.0] - 2026-01-12
-### New
-- Gesture detection using HaGRID ONNX models (18 gesture classes)
-- gesture_detected and gesture_confidence entities in Home Assistant
-### Fixed
-- Gesture state now properly pushed to Home Assistant in real-time
-### Optimized
-- Aggressive power saving - 0.5fps idle mode after 30s without face
-- Gesture detection only runs when face detected (saves CPU)
-## [0.6.1] - 2026-01-12
-### Fixed
-- Prioritize MicroWakeWord over OpenWakeWord for same-name wake words
-- OpenWakeWord wake words now visible in Home Assistant selection
-- Stop word detection now works correctly
-- STT/LLM response time improved with fixed audio chunk size
-## [0.6.0] - 2026-01-11
-### New
-- Real-time audio-driven speech animation (SwayRollRT algorithm)
-- JSON-driven animation system - all animations configurable
-### Refactored
-- Remove hardcoded actions, use animation offsets only
-### Fixed
-- TTS audio analysis now works with local playback
-## [0.5.16] - 2026-01-11
-### Removed
-- Tap-to-wake feature (too many false triggers)
-### New
-- Continuous Conversation switch in Home Assistant
-### Refactored
-- Simplified satellite.py and voice_assistant.py
-## [0.5.15] - 2026-01-11
-### New
-- Audio settings persistence (AGC, Noise Suppression, Tap Sensitivity)
-### Refactored
-- Move Sendspin mDNS discovery to zeroconf.py
-### Fixed
-- Tap detection not re-enabled during emotion playback in conversation
-## [0.5.14] - 2026-01-11
-### Fixed
-- Skip ALL wake word processing when pipeline is active
-- Eliminate race condition in pipeline state during continuous conversation
-### Improved
-- Control loop increased to 100Hz (daemon updated)
-## [0.5.13] - 2026-01-10
-### New
-- JSON-driven animation system for conversation states
-- AnimationPlayer class inspired by SimpleDances project
-### Refactored
-- Replace SpeechSwayGenerator and BreathingAnimation with unified animation system
-## [0.5.12] - 2026-01-10
-### Removed
-- Deleted broken hey_reachy wake word model
-### Revert
-- Default wake word back to "Okay Nabu"
-## [0.5.11] - 2026-01-10
-### Fixed
-- Reset feature extractors when switching wake words
-- Add refractory period after wake word switch
-## [0.5.10] - 2026-01-10
-### Fixed
-- Wake word models now have 'id' attribute set correctly
-- Wake word switching from Home Assistant now works
-## [0.5.9] - 2026-01-10
-### New
-- Default wake word changed to hey_reachy
-### Fixed
-- Wake word switching bug
-## [0.5.8] - 2026-01-09
-### Fixed
-- Tap detection waits for emotion playback to complete
-- Poll daemon API for move completion
-## [0.5.7] - 2026-01-09
-### New
-- DOA turn-to-sound at wakeup
-### Fixed
-- Show raw DOA angle in Home Assistant (0-180)
-- Invert DOA yaw direction
-## [0.5.6] - 2026-01-08
-### Fixed
-- Better pipeline state tracking to prevent duplicate audio
-## [0.5.5] - 2026-01-08
-### New
-- Prevent concurrent pipelines
-- Add prompt sound for continuous conversation
-## [0.5.4] - 2026-01-08
-### Fixed
-- Wait for RUN_END before starting new conversation
-## [0.5.3] - 2026-01-08
-### Fixed
-- Improve continuous conversation with conversation_id tracking
-## [0.5.2] - 2026-01-08
-### Fixed
-- Enable HA control of robot pose
-- Continuous conversation improvements
-## [0.5.1] - 2026-01-08
-### Fixed
-- Sendspin connects to music_player instead of tts_player
-- Persist tap_sensitivity settings
-- Pause Sendspin during voice assistant wakeup
-- Sendspin prioritize 16kHz sample rate
-## [0.5.0] - 2026-01-07
-### New
-- Face tracking with adaptive frequency
-- Sendspin multi-room audio integration
-### Optimized
-- Shutdown mechanism improvements
-## [0.4.0] - 2026-01-07
-### Fixed
-- Daemon stability fixes
-### New
-- Face tracking enabled by default
-### Optimized
-- Microphone settings for better sensitivity
-## [0.3.0] - 2026-01-06
-### New
-- Tap sensitivity slider entity
-### Fixed
-- Music Assistant compatibility
-### Optimized
-- Face tracking and tap detection
-## [0.2.21] - 2026-01-06
-### Fixed
-- Daemon crash - reduce control loop to 2Hz
-- Pause control loop during audio playback
-## [0.2.20] - 2026-01-06
-### Revert
-- Audio/satellite/voice_assistant to v0.2.9 working state
-## [0.2.19] - 2026-01-06
-### Fixed
-- Force localhost connection mode to prevent WebRTC errors
-## [0.2.18] - 2026-01-06
-### Fixed
-- Audio playback - restore wakeup sound
-- Use push_audio_sample for TTS
-## [0.2.17] - 2026-01-06
-### Removed
-- head_joints/passive_joints entities
-- error_message to diagnostic category
-## [0.2.16] - 2026-01-06
-### Fixed
-- TTS playback - pause recording during playback
-## [0.2.15] - 2026-01-06
-### Fixed
-- Use play_sound() instead of push_audio_sample() for TTS
-## [0.2.14] - 2026-01-06
-### Fixed
-- Pause audio recording during TTS playback
-## [0.2.13] - 2026-01-06
-### Fixed
-- Don't manually start/stop media - let SDK/daemon manage it
-## [0.2.12] - 2026-01-05
-### Fixed
-- Disable breathing animation to prevent serial port overflow
-## [0.2.11] - 2026-01-05
-### Fixed
-- Disable wakeup sound to prevent daemon crash
-- Add debug logging for troubleshooting
-## [0.2.10] - 2026-01-05
-### Added
-- Debug logging for motion init
-### Fixed
-- Audio fallback samplerate
-## [0.2.9] - 2026-01-05
-### Removed
-- DOA/speech detection - replaced by face tracking
-## [0.2.8] - 2026-01-05
-### New
-- Replace DOA with YOLO face tracking
-## [0.2.7] - 2026-01-05
-### Fixed
-- Add DOA caching to prevent ReSpeaker query overload
-## [0.2.6] - 2026-01-05
-### New
-- Thread-safe ReSpeaker USB access to prevent daemon deadlock
-## [0.2.4] - 2026-01-05
-### Fixed
-- Microphone volume control via daemon HTTP API
-## [0.2.3] - 2026-01-05
-### Fixed
-- Daemon crash caused by conflicting pose commands
-- Disable: Pose setter methods in ReachyController
-## [0.2.2] - 2026-01-05
-### Fixed
-- Second conversation motion failure
-- Reduce: Control loop from 20Hz to 10Hz
-- Improve: Connection recovery (faster reconnect)
-## [0.2.1] - 2026-01-05
-### Fixed
-- Daemon crash issue
-- Optimize: Code structure
-## [0.2.0] - 2026-01-05
-### New
-- Automatic facial expressions during conversation
-- New: Emotion playback integration
-### Refactored
-- Integrate emotion playback into MovementManager
-## [0.1.5] - 2026-01-04
-### Optimized
-- Code splitting and organization
-### Fixed
-- Program crash issues
-## [0.1.0] - 2026-01-01
-### New
-- Initial release
-- ESPHome protocol server implementation
-- mDNS auto-discovery for Home Assistant
-- Local wake word detection (microWakeWord)
-- Voice assistant pipeline integration
-- Basic motion feedback (nod, shake)
----
-## Version History Summary
-| Version | Date | Major Changes |
-|---------|------|--------------|
-| 0.9.9 | 2026-01-28 | SDK buffer overflow fixes, memory leak fixes, gesture detection optimization |
-| 0.9.8 | 2026-01-27 | Mute/Disable entities, HA connection-driven features, log reduction |
-| 0.9.7 | 2026-01-20 | Device ID path fix, animation path fix |
-| 0.9.6 | 2026-01-20 | Code quality tools (ruff, mypy, pre-commit) |
-| 0.9.5 | 2026-01-19 | Modular architecture refactoring, audio latency optimization |
-| 0.9.0 | 2026-01-18 | Robot state monitor, system diagnostics entities |
-| 0.8.7 | 2026-01-18 | Body yaw clamping, face tracking smoothness |
-| 0.8.0 | 2026-01-17 | Emotion keyword mapping (280+ keywords, 35 categories) |
-| 0.7.0 | 2026-01-12 | Gesture detection with HaGRID ONNX models (18 gestures) |
-| 0.6.0 | 2026-01-11 | Real-time audio-driven speech animation, JSON animation system |
-| 0.5.0 | 2026-01-07 | Face tracking, Sendspin multi-room audio |
-| 0.4.0 | 2026-01-07 | Daemon stability, microphone optimization |
-| 0.3.0 | 2026-01-06 | Tap sensitivity slider |
-| 0.2.0 | 2026-01-05 | Emotion playback integration |
-| 0.1.0 | 2026-01-01 | Initial release |
-## Project Statistics
-- **Total Versions**: 29 (from 0.1.0 to 0.9.9)
-- **Development Period**: ~30 days (2026-01-01 to 2026-01-28)
-- **Average Release Rate**: ~1 version per day
-- **Lines of Code**: ~18,000 lines across 52 Python files
-- **ESPHome Entities**: 54 entities implemented
-- **Supported Features**:
-  - Voice assistant pipeline integration
-  - Local wake word detection (multiple models)
-  - Face tracking with YOLO
-  - Gesture detection (18 classes)
-  - Multi-room audio (Sendspin)
-  - Real-time speech animation
-  - Emotion keyword detection (280+ keywords)
-  - System diagnostics
-For detailed implementation notes, see [PROJECT_PLAN.md](./PROJECT_PLAN.md).

PROJECT_PLAN.md ADDED Viewed

	@@ -0,0 +1,1279 @@

+# Reachy Mini for Home Assistant - Project Plan
+## Project Overview
+Integrate Home Assistant voice assistant functionality into Reachy Mini Wi-Fi robot, communicating with Home Assistant via ESPHome protocol.
+## Local Reference Directories (DO NOT modify any files in reference directories)
+1. [linux-voice-assistant](reference/linux-voice-assistant) - Linux-based Home Assistant voice assistant app for reference
+2. [Reachy Mini SDK](reference/reachy_mini) - Reachy Mini SDK local directory for reference
+3. [reachy_mini_conversation_app](reference/reachy_mini_conversation_app) - Reachy Mini conversation app for reference
+4. [reachy-mini-desktop-app](reference/reachy-mini-desktop-app) - Reachy Mini desktop app for reference
+5. [sendspin](reference/sendspin-cli/) - Sendspin client for reference
+## Core Design Principles
+1. **Zero Configuration** - Users only need to install the app, no manual configuration required
+2. **Native Hardware** - Use robot's built-in microphone and speaker
+3. **Home Assistant Centralized Management** - All configuration done on Home Assistant side
+4. **Motion Feedback** - Provide head movement and antenna animation feedback during voice interaction
+5. **Project Constraints** - Strictly follow [Reachy Mini SDK](reachy_mini) architecture design and constraints
+6. **Code Quality** - Follow Python development standards with consistent code style, clear structure, complete comments, comprehensive documentation, high test coverage, high code quality, readability, maintainability, extensibility, and reusability
+7. **Feature Priority** - Voice conversation with Home Assistant is highest priority; other features are auxiliary and must not affect voice conversation functionality or response speed
+8. **No LED Functions** - LEDs are hidden inside the robot; all LED control is ignored
+9. **Preserve Functionality** - Any code modifications should optimize while preserving completed features; do not remove features to solve problems. When issues occur, prioritize solving problems after referencing examples, not adding various log outputs
+## Technical Architecture
+```
+┌─────────────────────────────────────────────────────────────────────────────┐
+│                              Reachy Mini (ARM64)                            │
+│                                                                             │
+│  ┌─────────────────────────────── AUDIO INPUT ───────────────────────────┐  │
+│  │  ReSpeaker XVF3800 (16kHz)                                            │  │
+│  │  ┌──────────────┐   ┌──────────────────────────────────────────────┐  │  │
+│  │  │ 4-Mic Array  │ → │ XVF3800 DSP                                  │  │  │
+│  │  └──────────────┘   │ • Echo Cancellation (AEC)                    │  │  │
+│  │                     │ • Noise Suppression (NS)                     │  │  │
+│  │                     │ • Auto Gain Control (AGC, max 30dB)          │  │  │
+│  │                     │ • Direction of Arrival (DOA)                 │  │  │
+│  │                     │ • Voice Activity Detection (VAD)             │  │  │
+│  │                     └──────────────────────────────────────────────┘  │  │
+│  │                                      │                                │  │
+│  │                                      ▼                                │  │
+│  │                     ┌──────────────────────────────────────────────┐  │  │
+│  │                     │ Wake Word Detection (microWakeWord)          │  │  │
+│  │                     │ • "Okay Nabu" / "Hey Jarvis"                 │  │  │
+│  │                     │ • Stop word detection                        │  │  │
+│  │                     └──────────────────────────────────────────────┘  │  │
+│  └───────────────────────────────────────────────────────────────────────┘  │
+│                                                                             │
+│  ┌───────────��─────────────────── AUDIO OUTPUT ──────────────────────────┐  │
+│  │  ┌──────────────────────────┐    ┌──────────────────────────────────┐ │  │
+│  │  │ TTS Player               │    │ Music Player (Sendspin)          │ │  │
+│  │  │ • Voice assistant speech │    │ • Multi-room audio streaming     │ │  │
+│  │  │ • Sound effects          │    │ • Auto-discovery via mDNS        │ │  │
+│  │  │ • Priority over music    │    │ • Auto-pause during conversation │ │  │
+│  │  └──────────────────────────┘    └──────────────────────────────────┘ │  │
+│  │                 │                              │                      │  │
+│  │                 └──────────────┬───────────────┘                      │  │
+│  │                                ▼                                      │  │
+│  │                 ┌──────────────────────────────────────────────────┐  │  │
+│  │                 │ ReSpeaker Speaker (16kHz)                        │  │  │
+│  │                 └──────────────────────────────────────────────────┘  │  │
+│  └───────────────────────────────────────────────────────────────────────┘  │
+│                                                                             │
+│  ┌─────────────────────────── VISION & TRACKING ─────────────────────────┐  │
+│  │  ┌──────────────────────────┐    ┌──────────────────────────────────┐ │  │
+│  │  │ Camera (VPU accelerated) │ →  │ YOLO Face Detection              │ │  │
+│  │  │ • MJPEG stream server    │    │ • AdamCodd/YOLOv11n-face         │ │  │
+│  │  │ • ESPHome Camera entity  │    │ • Adaptive frame rate:           │ │  │
+│  │  └──────────────────────────┘    │   - 15fps: conversation/face     │ │  │
+│  │                                  │   - 2fps: idle (power saving)    │ │  │
+│  │                                  │ • look_at_image() pose calc      │ │  │
+│  │                                  │ • Smooth return after face lost  │ │  │
+│  │                                  └──────────────────────────────────┘ │  │
+│  └───────────────────────────────────────────────────────────────────────┘  │
+│                                                                             │
+│  ┌─────────────────────────── MOTION CONTROL ────────────────────────────┐  │
+│  │  MovementManager (100Hz Control Loop)                                 │  │
+│  │  ┌────────────────────────────────────────────────────────────────┐   │  │
+│  │  │ Motion Layers (Priority: Move > Action > SpeechSway > Breath)  │   │  │
+│  │  │ ┌────────────┐ ┌────────────┐ ┌────────────┐ ┌──────────────┐  │   │  │
+│  │  │ │ Move Queue │ │ Actions    │ │ SpeechSway │ │ Breathing    │  │   │  │
+│  │  │ │ (Emotions) │ │ (Nod/Shake)│ │ (Voice VAD)│ │ (Idle anim)  │  │   │  │
+│  │  │ └────────────┘ └────────────┘ └────────────┘ └──────────────┘  │   │  │
+│  │  └─────────────────────────────���──────────────────────────────────┘   │  │
+│  │                                                                       │  │
+│  │  ┌────────────────────────────────────────────────────────────────┐   │  │
+│  │  │ Face Tracking Offsets (Secondary Pose Overlay)                 │   │  │
+│  │  │ • Pitch offset: +9° (down compensation)                        │   │  │
+│  │  │ • Yaw offset: -7° (right compensation)                         │   │  │
+│  │  └────────────────────────────────────────────────────────────────┘   │  │
+│  │                                                                       │  │
+│  │   State Machine: on_wakeup → on_listening → on_speaking → on_idle     │  │
+│  └───────────────────────────────────────────────────────────────────────┘  │
+│                                                                             │
+│  ┌─────────────────────────── TAP DETECTION ─────────────────────────────┐  │
+│  │  IMU Accelerometer (Wireless version only) - DISABLED                 │  │
+│  │  • Tap-to-wake: REMOVED (too many false triggers)                     │  │
+│  │  • Continuous conversation now controlled via Home Assistant switch   │  │
+│  └───────────────────────────────────────────────────────────────────────┘  │
+│                                                                             │
+│  ┌─────────────────────────── ESPHOME SERVER ────────────────────────────┐  │
+│  │  Port 6053 (mDNS auto-discovery)                                      │  │
+│  │  • 43+ entities (sensors, controls, media player, camera)             │  │
+│  │  • Voice Assistant pipeline integration                               │  │
+│  │  • Real-time state synchronization                                    │  │
+│  └───────────────────────────────────────────────────────────────────────┘  │
+└─────────────────────────────────────────────────────────────────────────────┘
+                                       │
+                                       │ ESPHome Protocol (protobuf)
+                                       ▼
+┌─────────────────────────────────────────────────────────────────────────────┐
+│                            Home Assistant                                   │
+│  ┌──────────────────┐  ┌──────────────────┐  ┌────────────────────────────┐ │
+│  │ STT Engine       │  │ Intent Processing│  │ TTS Engine                 │ │
+│  │ (User configured)│  │ (Conversation)   │  │ (User configured)          │ │
+│  └──────────────────┘  └──────────────────┘  └────────────────────────────┘ │
+└─────────────────────────────────────────────────────────────────────────────┘
+```
+## Completed Features
+### Core Features
+- [x] ESPHome protocol server implementation
+- [x] mDNS service discovery (auto-discovered by Home Assistant)
+- [x] Local wake word detection (microWakeWord)
+- [x] Continuous conversation mode (controlled via Home Assistant switch)
+- [x] Audio stream transmission to Home Assistant
+- [x] TTS audio playback
+- [x] Stop word detection
+### Reachy Mini Integration
+- [x] Use Reachy Mini SDK microphone input
+- [x] Use Reachy Mini SDK speaker output
+- [x] Head motion control (nod, shake, gaze)
+- [x] Antenna animation control
+- [x] Voice state feedback actions
+- [x] YOLO face tracking (replaces DOA sound source localization)
+- [x] 100Hz unified motion control loop
+### Application Architecture
+- [x] Compliant with Reachy Mini App architecture
+## File List
+```
+reachy_mini_ha_voice/
+├── reachy_mini_ha_voice/
+│   ├── __init__.py             # Package initialization
+│   ├── __main__.py             # Command line entry
+│   ├── main.py                 # ReachyMiniApp entry
+│   ├── voice_assistant.py      # Voice assistant service
+│   ├── satellite.py            # ESPHome protocol handling
+│   ├── audio_player.py         # Audio player
+│   ├── camera_server.py        # MJPEG camera stream server + face tracking
+│   ├── head_tracker.py         # YOLO face detector
+│   ├── motion.py               # Motion control (high-level API)
+│   ├── movement_manager.py     # Unified movement manager (100Hz control loop)
+│   ├── animation_player.py     # JSON-driven animation system
+│   ├── models.py               # Data models
+│   ├── entity.py               # ESPHome base entity
+│   ├── entity_extensions.py    # Extended entity types
+│   ├── entity_registry.py      # Entity registry
+│   ├── reachy_controller.py    # Reachy Mini controller wrapper
+│   ├── gesture_detector.py     # Gesture detection
+│   ├── api_server.py           # API server
+│   ├── zeroconf.py             # mDNS discovery
+│   └── util.py                 # Utility functions
+├── animations/                 # Animation definitions
+│   └── conversation_animations.json  # Conversation state animations
+├── wakewords/                  # Wake word models (auto-download)
+│   ├── okay_nabu.json
+│   ├── okay_nabu.tflite
+│   ├── hey_jarvis.json
+│   ├── hey_jarvis.tflite
+│   ├── stop.json
+│   └── stop.tflite
+├── sounds/                     # Sound effect files (auto-download)
+│   ├── wake_word_triggered.flac
+│   └── timer_finished.flac
+├── pyproject.toml              # Project configuration
+├── README.md                   # Documentation
+└── PROJECT_PLAN.md             # Project plan
+```
+## Dependencies
+```toml
+dependencies = [
+    "reachy-mini",           # Reachy Mini SDK
+    "sounddevice>=0.4.6",    # Audio processing (backup)
+    "soundfile>=0.12.0",     # Audio file reading
+    "numpy>=1.24.0",         # Numerical computation
+    "pymicro-wakeword>=2.0.0,<3.0.0",  # Wake word detection
+    "pyopen-wakeword>=1.0.0,<2.0.0",   # Backup wake word
+    "aioesphomeapi>=42.0.0", # ESPHome protocol
+    "zeroconf>=0.100.0",     # mDNS discovery
+    "scipy>=1.10.0",         # Motion control
+    "pydantic>=2.0.0",       # Data validation
+]
+```
+## Usage Flow
+1. **Install App**
+   - Install `reachy_mini_ha_voice` from Reachy Mini App Store
+2. **Start App**
+   - App auto-starts ESPHome server (port 6053)
+   - Auto-downloads required models and sounds
+3. **Connect Home Assistant**
+   - Home Assistant auto-discovers device (mDNS)
+   - Or manually add: Settings → Devices & Services → Add Integration → ESPHome
+4. **Use Voice Assistant**
+   - Say "Okay Nabu" to wake
+   - Speak command
+   - Reachy Mini provides motion feedback
+## ESPHome Entity Planning
+Based on deep analysis of Reachy Mini SDK, the following entities are exposed to Home Assistant:
+### Implemented Entities
+| Entity Type | Name | Description |
+|-------------|------|-------------|
+| Media Player | `media_player` | Audio playback control |
+| Voice Assistant | `voice_assistant` | Voice assistant pipeline |
+### Implemented Control Entities (Read/Write)
+#### Phase 1-3: Basic Controls and Pose
+| ESPHome Entity Type | Name | SDK API | Range/Options | Description |
+|---------------------|------|---------|---------------|-------------|
+| `Number` | `speaker_volume` | `AudioPlayer.set_volume()` | 0-100 | Speaker volume |
+| `Select` | `motor_mode` | `set_motor_control_mode()` | enabled/disabled/gravity_compensation | Motor mode selection |
+| `Switch` | `motors_enabled` | `enable_motors()` / `disable_motors()` | on/off | Motor torque switch |
+| `Button` | `wake_up` | `mini.wake_up()` | - | Wake robot action |
+| `Button` | `go_to_sleep` | `mini.goto_sleep()` | - | Sleep robot action |
+| `Number` | `head_x` | `goto_target(head=...)` | ±50mm | Head X position control |
+| `Number` | `head_y` | `goto_target(head=...)` | ±50mm | Head Y position control |
+| `Number` | `head_z` | `goto_target(head=...)` | ±50mm | Head Z position control |
+| `Number` | `head_roll` | `goto_target(head=...)` | -40° ~ +40° | Head roll angle control |
+| `Number` | `head_pitch` | `goto_target(head=...)` | -40° ~ +40° | Head pitch angle control |
+| `Number` | `head_yaw` | `goto_target(head=...)` | -180° ~ +180° | Head yaw angle control |
+| `Number` | `body_yaw` | `goto_target(body_yaw=...)` | -160° ~ +160° | Body yaw angle control |
+| `Number` | `antenna_left` | `goto_target(antennas=...)` | -90° ~ +90° | Left antenna angle control |
+| `Number` | `antenna_right` | `goto_target(antennas=...)` | -90° ~ +90° | Right antenna angle control |
+#### Phase 4: Gaze Control
+| ESPHome Entity Type | Name | SDK API | Range/Options | Description |
+|---------------------|------|---------|---------------|-------------|
+| `Number` | `look_at_x` | `look_at_world(x, y, z)` | World coordinates | Gaze point X coordinate |
+| `Number` | `look_at_y` | `look_at_world(x, y, z)` | World coordinates | Gaze point Y coordinate |
+| `Number` | `look_at_z` | `look_at_world(x, y, z)` | World coordinates | Gaze point Z coordinate |
+### Implemented Sensor Entities (Read-only)
+#### Phase 1 & 5: Basic Status and Audio Sensors
+| ESPHome Entity Type | Name | SDK API | Description |
+|---------------------|------|---------|-------------|
+| `Text Sensor` | `daemon_state` | `DaemonStatus.state` | Daemon status |
+| `Binary Sensor` | `backend_ready` | `backend_status.ready` | Backend ready status |
+| `Text Sensor` | `error_message` | `DaemonStatus.error` | Current error message |
+| `Sensor` | `doa_angle` | `DoAInfo.angle` | Sound source direction angle (°) |
+| `Binary Sensor` | `speech_detected` | `DoAInfo.speech_detected` | Speech detection status |
+#### Phase 6: Diagnostic Information
+| ESPHome Entity Type | Name | SDK API | Description |
+|---------------------|------|---------|-------------|
+| `Sensor` | `control_loop_frequency` | `control_loop_stats` | Control loop frequency (Hz) |
+| `Text Sensor` | `sdk_version` | `DaemonStatus.version` | SDK version |
+| `Text Sensor` | `robot_name` | `DaemonStatus.robot_name` | Robot name |
+| `Binary Sensor` | `wireless_version` | `DaemonStatus.wireless_version` | Wireless version flag |
+| `Binary Sensor` | `simulation_mode` | `DaemonStatus.simulation_enabled` | Simulation mode flag |
+| `Text Sensor` | `wlan_ip` | `DaemonStatus.wlan_ip` | Wireless IP address |
+#### Phase 7: IMU Sensors (Wireless version only)
+| ESPHome Entity Type | Name | SDK API | Description |
+|---------------------|------|---------|-------------|
+| `Sensor` | `imu_accel_x` | `mini.imu["accelerometer"][0]` | X-axis acceleration (m/s²) |
+| `Sensor` | `imu_accel_y` | `mini.imu["accelerometer"][1]` | Y-axis acceleration (m/s²) |
+| `Sensor` | `imu_accel_z` | `mini.imu["accelerometer"][2]` | Z-axis acceleration (m/s²) |
+| `Sensor` | `imu_gyro_x` | `mini.imu["gyroscope"][0]` | X-axis angular velocity (rad/s) |
+| `Sensor` | `imu_gyro_y` | `mini.imu["gyroscope"][1]` | Y-axis angular velocity (rad/s) |
+| `Sensor` | `imu_gyro_z` | `mini.imu["gyroscope"][2]` | Z-axis angular velocity (rad/s) |
+| `Sensor` | `imu_temperature` | `mini.imu["temperature"]` | IMU temperature (°C) |
+#### Phase 8-12: Extended Features
+| ESPHome Entity Type | Name | Description |
+|---------------------|------|-------------|
+| `Select` | `emotion` | Emotion selector (Happy/Sad/Angry/Fear/Surprise/Disgust) |
+| `Number` | `microphone_volume` | Microphone volume (0-100%) |
+| `Camera` | `camera` | ESPHome Camera entity (live preview) |
+| `Number` | `led_brightness` | LED brightness (0-100%) |
+| `Select` | `led_effect` | LED effect (off/solid/breathing/rainbow/doa) |
+| `Number` | `led_color_r` | LED red component (0-255) |
+| `Number` | `led_color_g` | LED green component (0-255) |
+| `Number` | `led_color_b` | LED blue component (0-255) |
+| `Switch` | `agc_enabled` | Auto gain control switch |
+| `Number` | `agc_max_gain` | AGC max gain (0-30 dB) |
+| `Number` | `noise_suppression` | Noise suppression level (0-100%) |
+| `Binary Sensor` | `echo_cancellation_converged` | Echo cancellation convergence status |
+> **Note**: Head position (x/y/z) and angles (roll/pitch/yaw), body yaw, antenna angles are all **controllable** entities,
+> using `Number` type for bidirectional control. Call `goto_target()` when setting new values, call `get_current_head_pose()` etc. when reading current values.
+### Implementation Priority
+1. **Phase 1 - Basic Status and Volume** (High Priority) ✅ **Completed**
+   - [x] `daemon_state` - Daemon status sensor
+   - [x] `backend_ready` - Backend ready status
+   - [x] `error_message` - Error message
+   - [x] `speaker_volume` - Speaker volume control
+2. **Phase 2 - Motor Control** (High Priority) ✅ **Completed**
+   - [x] `motors_enabled` - Motor switch
+   - [x] `motor_mode` - Motor mode selection (enabled/disabled/gravity_compensation)
+   - [x] `wake_up` / `go_to_sleep` - Wake/sleep buttons
+3. **Phase 3 - Pose Control** (Medium Priority) ✅ **Completed**
+   - [x] `head_x/y/z` - Head position control
+   - [x] `head_roll/pitch/yaw` - Head angle control
+   - [x] `body_yaw` - Body yaw angle control
+   - [x] `antenna_left/right` - Antenna angle control
+4. **Phase 4 - Gaze Control** (Medium Priority) ✅ **Completed**
+   - [x] `look_at_x/y/z` - Gaze point coordinate control
+5. **Phase 5 - DOA (Direction of Arrival)** ✅ **Re-added for wakeup turn-to-sound**
+   - [x] `doa_angle` - Sound source direction (degrees, 0-180°, where 0°=left, 90°=front, 180°=right)
+   - [x] `speech_detected` - Speech detection status
+   - [x] Turn-to-sound at wakeup (robot turns toward speaker when wake word detected)
+   - [x] Direction correction: `yaw = π/2 - doa` (fixed left/right inversion)
+   - Note: DOA only read once at wakeup to avoid daemon pressure; face tracking takes over after
+6. **Phase 6 - Diagnostic Information** (Low Priority) ✅ **Completed**
+   - [x] `control_loop_frequency` - Control loop frequency
+   - [x] `sdk_version` - SDK version
+   - [x] `robot_name` - Robot name
+   - [x] `wireless_version` - Wireless version flag
+   - [x] `simulation_mode` - Simulation mode flag
+   - [x] `wlan_ip` - Wireless IP address
+7. **Phase 7 - IMU Sensors** (Optional, wireless version only) ✅ **Completed**
+   - [x] `imu_accel_x/y/z` - Accelerometer
+   - [x] `imu_gyro_x/y/z` - Gyroscope
+   - [x] `imu_temperature` - IMU temperature
+8. **Phase 8 - Emotion Control** ✅ **Completed**
+   - [x] `emotion` - Emotion selector (Happy/Sad/Angry/Fear/Surprise/Disgust)
+9. **Phase 9 - Audio Control** ✅ **Completed**
+   - [x] `microphone_volume` - Microphone volume control (0-100%)
+10. **Phase 10 - Camera Integration** ✅ **Completed**
+    - [x] `camera` - ESPHome Camera entity (live preview)
+11. **Phase 11 - LED Control** ❌ **Disabled (LEDs hidden inside robot)**
+    - [ ] `led_brightness` - LED brightness (0-100%) - Commented out
+    - [ ] `led_effect` - LED effect (off/solid/breathing/rainbow/doa) - Commented out
+    - [ ] `led_color_r/g/b` - LED RGB color (0-255) - Commented out
+12. **Phase 12 - Audio Processing Parameters** ✅ **Completed**
+    - [x] `agc_enabled` - Auto gain control switch
+    - [x] `agc_max_gain` - AGC max gain (0-30 dB)
+    - [x] `noise_suppression` - Noise suppression level (0-100%)
+    - [x] `echo_cancellation_converged` - Echo cancellation convergence status (read-only)
+13. **Phase 13 - Sendspin Audio Playback Support** ✅ **Completed**
+    - [x] `sendspin_enabled` - Sendspin switch (Switch)
+    - [x] `sendspin_url` - Sendspin server URL (Text Sensor)
+    - [x] `sendspin_connected` - Sendspin connection status (Binary Sensor)
+    - [x] AudioPlayer integrates aiosendspin library
+    - [x] TTS audio sent to both local speaker and Sendspin server
+14. **Phase 22 - Gesture Detection** ✅ **Completed**
+    - [x] `gesture_detected` - Detected gesture name (Text Sensor)
+    - [x] `gesture_confidence` - Gesture detection confidence % (Sensor)
+    - [x] HaGRID ONNX models: hand_detector.onnx + crops_classifier.onnx
+    - [x] Real-time state push to Home Assistant
+    - [x] 18 supported gestures:
+      | Gesture | Emoji | Gesture | Emoji |
+      |---------|-------|---------|-------|
+      | call | 🤙 | like | 👍 |
+      | dislike | 👎 | mute | 🤫 |
+      | fist | ✊ | ok | 👌 |
+      | four | 🖐️ | one | ☝️ |
+      | palm | ✋ | peace | ✌️ |
+      | peace_inverted | 🔻✌️ | rock | 🤘 |
+      | stop | 🛑 | stop_inverted | 🔻🛑 |
+      | three | 3️⃣ | three2 | 🤟 |
+      | two_up | ✌️☝️ | two_up_inverted | 🔻✌️☝️ |
+---
+## 🎉 Phase 1-13 + Phase 22 Entities Completed!
+**Total Completed: 45 entities**
+- Phase 1: 4 entities (Basic status and volume)
+- Phase 2: 4 entities (Motor control)
+- Phase 3: 9 entities (Pose control)
+- Phase 4: 3 entities (Gaze control)
+- Phase 5: 2 entities (Audio sensors)
+- Phase 6: 6 entities (Diagnostic information)
+- Phase 7: 7 entities (IMU sensors)
+- Phase 8: 1 entity (Emotion control)
+- Phase 9: 1 entity (Microphone volume)
+- Phase 10: 1 entity (Camera)
+- Phase 11: 0 entities (LED control - Disabled)
+- Phase 12: 4 entities (Audio processing parameters)
+- Phase 13: 3 entities (Sendspin audio output)
+- Phase 22: 2 entities (Gesture detection)
+---
+## 🚀 Voice Assistant Enhancement Features Implementation Status
+### Phase 14 - Emotion Action Feedback System (Partial) 🟡
+**Implementation Status**: Basic infrastructure ready, supports manual trigger, uses voice-driven natural micro-movements during conversation
+**Implemented Features**:
+- ✅ Phase 8 Emotion Selector entity (`emotion`)
+- ✅ Basic emotion action playback API (`_play_emotion`)
+- ✅ Emotion mapping: Happy/Sad/Angry/Fear/Surprise/Disgust
+- ✅ Integration with HuggingFace action library (`pollen-robotics/reachy-mini-emotions-library`)
+- ✅ SpeechSway system for natural head micro-movements during conversation (non-blocking)
+- ✅ Tap detection disabled during emotion playback (polls daemon API for completion)
+**Design Decisions**:
+- 🎯 No auto-play of full emotion actions during conversation to avoid blocking
+- 🎯 Use voice-driven head sway (SpeechSway) for natural motion feedback
+- 🎯 Emotion actions retained as manual trigger feature via ESPHome entity
+- 🎯 Tap detection waits for actual move completion via `/api/move/running` polling
+**Not Implemented**:
+- ❌ Auto-trigger emotion actions based on voice assistant response (decided not to implement to avoid blocking)
+- ❌ Intent recognition and emotion matching
+- ❌ Dance action library integration
+- ❌ Context awareness (e.g., weather query - sunny plays happy, rainy plays sad)
+**Code Locations**:
+- `entity_registry.py:633-658` - Emotion Selector entity
+- `satellite.py:_play_emotion()` - Emotion playback with move UUID tracking
+- `satellite.py:_wait_for_move_completion()` - Polls daemon API for move completion
+- `motion.py:132-156` - Conversation start motion control (uses SpeechSway)
+- `movement_manager.py:541-595` - Move queue management (allows SpeechSway overlay)
+**Actual Behavior**:
+| Voice Assistant Event | Actual Action | Implementation Status |
+|----------------------|---------------|----------------------|
+| Wake word detected | Turn toward sound source + nod confirmation | ✅ Implemented |
+| Conversation start | Voice-driven head micro-movements (SpeechSway) | ✅ Implemented |
+| During conversation | Continuous voice-driven micro-movements + breathing animation | ✅ Implemented |
+| Conversation end | Return to neutral position + breathing animation | ✅ Implemented |
+| Manual emotion trigger | Play via ESPHome `emotion` entity | ✅ Implemented |
+**Technical Details**:
+```python
+# motion.py - Use SpeechSway instead of full emotion actions during conversation
+def on_speaking_start(self):
+    self._is_speaking = True
+    self._movement_manager.set_state(RobotState.SPEAKING)
+    # SpeechSway automatically generates natural head micro-movements based on audio loudness
+    # No full emotion actions played to avoid blocking conversation experience
+# movement_manager.py - Motion layering system
+# 1. Move queue (emotion actions) - Sets base pose
+# 2. Action (nod/shake etc.) - Overlays on base pose
+# 3. SpeechSway - Voice-driven micro-movements, can coexist with Move
+# 4. Breathing - Idle breathing animation
+```
+**Original Plan** (Decided not to implement to avoid blocking conversation):
+| Voice Assistant Event | Original Planned Action | Reason Not Implemented |
+|----------------------|------------------------|------------------------|
+| Positive response received | Play "happy" action | Full action would block conversation fluency |
+| Negative response received | Play "sad" action | Full action would block conversation fluency |
+| Play music/entertainment | Play "dance" action | Full action would block conversation fluency |
+| Timer completed | Play "alert" action | Full action would block conversation fluency |
+| Error/cannot understand | Play "confused" action | Full action would block conversation fluency |
+**Manual Emotion Trigger Example**:
+```yaml
+# Home Assistant automation example - Manual emotion trigger
+automation:
+  - alias: "Reachy Good Morning Greeting"
+    trigger:
+      - platform: time
+        at: "07:00:00"
+    action:
+      - service: select.select_option
+        target:
+          entity_id: select.reachy_mini_emotion
+        data:
+          option: "Happy"
+```
+### Phase 15 - Face Tracking (Complements DOA Turn-to-Sound) ✅ **Completed**
+**Goal**: Implement natural face tracking so robot looks at speaker during conversation.
+**Design Decision**:
+- ✅ DOA (Direction of Arrival): Used once at wakeup to turn toward sound source
+- ✅ YOLO face detection: Takes over after initial turn for continuous tracking
+- Reason: DOA provides quick initial orientation, face tracking provides accurate continuous tracking
+**Wakeup Turn-to-Sound Flow**:
+1. Wake word detected → Read DOA angle once (avoid daemon pressure)
+2. If DOA angle > 10°: Turn head toward sound source (80% of angle, conservative)
+3. Face tracking takes over for continuous tracking during conversation
+**Implemented Features**:
+| Feature | Description | Implementation Location | Status |
+|---------|-------------|------------------------|--------|
+| DOA turn-to-sound | Turn toward speaker at wakeup | `satellite.py:_turn_to_sound_source()` | ✅ Implemented |
+| YOLO face detection | Uses `AdamCodd/YOLOv11n-face-detection` model | `head_tracker.py` | ✅ Implemented |
+| Adaptive frame rate tracking | 15fps during conversation, 2fps when idle without face | `camera_server.py` | ✅ Implemented |
+| look_at_image() | Calculate target pose from face position | `camera_server.py` | ✅ Implemented |
+| Smooth return to neutral | Smooth return within 1 second after face lost | `camera_server.py` | ✅ Implemented |
+| face_tracking_offsets | As secondary pose overlay to motion control | `movement_manager.py` | ✅ Implemented |
+| DOA entities | `doa_angle` and `speech_detected` exposed to Home Assistant | `entity_registry.py` | ✅ Implemented |
+| Model download retry | 3 retries, 5 second interval | `head_tracker.py` | ✅ Implemented |
+| Conversation mode integration | Auto-switch tracking frequency on voice assistant state change | `satellite.py` | ✅ Implemented |
+**Resource Optimization (v0.5.1, updated v0.6.2)**:
+- During conversation (listening/thinking/speaking): High-frequency tracking 15fps
+- Idle with face detected: High-frequency tracking 15fps
+- Idle without face for 5s: Low-power mode 2fps
+- Idle without face for 30s: Ultra-low power mode 0.5fps (every 2 seconds)
+- Gesture detection only runs when face detected recently (within 5s)
+- Immediately restore high-frequency tracking when face detected
+**Code Locations**:
+- `satellite.py:_turn_to_sound_source()` - DOA turn-to-sound at wakeup
+- `head_tracker.py` - YOLO face detector (`HeadTracker` class)
+- `camera_server.py:_capture_frames()` - Adaptive frame rate face tracking
+- `camera_server.py:set_conversation_mode()` - Conversation mode switch API
+- `satellite.py:_set_conversation_mode()` - Voice assistant state integration
+- `movement_manager.py:set_face_tracking_offsets()` - Face tracking offset API
+**Technical Details**:
+```python
+# camera_server.py - Adaptive frame rate face tracking
+class MJPEGCameraServer:
+    def __init__(self):
+        self._fps_high = 15  # During conversation/face detected
+        self._fps_low = 2    # Idle without face (5-30s)
+        self._fps_idle = 0.5 # Ultra-low power (>30s without face)
+        self._low_power_threshold = 5.0   # 5s without face switches to low power
+        self._idle_threshold = 30.0       # 30s without face switches to idle mode
+    def _should_run_ai_inference(self, current_time):
+        # Conversation mode: Always high-frequency tracking
+        if self._in_conversation:
+            return True
+        # High-frequency mode: Track every frame
+        if self._current_fps == self._fps_high:
+            return True
+        # Low/idle power mode: Periodic detection
+        return time.since_last_check >= 1/self._current_fps
+# satellite.py - Voice assistant state integration
+def _reachy_on_listening(self):
+    self._set_conversation_mode(True)  # Start conversation, high-frequency tracking
+def _reachy_on_idle(self):
+    self._set_conversation_mode(False)  # End conversation, adaptive tracking
+```
+### Phase 16 - Cartoon Style Motion Mode (Partial) 🟡
+**Goal**: Use SDK interpolation techniques for more expressive robot movements.
+**SDK Support**: `InterpolationTechnique` enum
+- `LINEAR` - Linear, mechanical feel
+- `MIN_JERK` - Minimum jerk, natural and smooth (default)
+- `EASE_IN_OUT` - Ease in-out, elegant
+- `CARTOON` - Cartoon style, with bounce effect, lively and cute
+**Implemented Features**:
+- ✅ 100Hz unified control loop (`movement_manager.py`) - Restored to 100Hz after daemon update
+- ✅ JSON-driven animation system (`AnimationPlayer`) - Inspired by SimpleDances project
+- ✅ Conversation state animations (idle/listening/thinking/speaking)
+- ✅ Pose change detection - Only send commands on significant changes (threshold 0.005)
+- ✅ State query caching - 2s TTL, reduces daemon load
+- ✅ Smooth interpolation (ease in-out curve)
+- ✅ Command queue mode - Thread-safe external API
+- ✅ Error throttling - Prevents log explosion
+- ✅ Connection health monitoring - Auto-detect and recover from connection loss
+**Animation System (v0.5.13)**:
+- `AnimationPlayer` class loads animations from `conversation_animations.json`
+- Each animation defines: pitch/yaw/roll amplitudes, position offsets, antenna movements, frequency
+- Smooth transitions between animations (configurable duration)
+- State-to-animation mapping: idle→idle, listening→listening, thinking→thinking, speaking→speaking
+**Not Implemented**:
+- ❌ Dynamic interpolation technique switching (CARTOON/EASE_IN_OUT etc.)
+- ❌ Exaggerated cartoon bounce effects
+**Code Locations**:
+- `animation_player.py` - AnimationPlayer class
+- `animations/conversation_animations.json` - Animation definitions
+- `movement_manager.py` - 100Hz control loop with animation integration
+**Scene Implementation Status**:
+| Scene | Recommended Interpolation | Effect | Status |
+|-------|--------------------------|--------|--------|
+| Wake nod | `CARTOON` | Lively bounce effect | ❌ Not implemented |
+| Thinking head up | `EASE_IN_OUT` | Elegant transition | ✅ Implemented (smooth interpolation) |
+| Speaking micro-movements | `MIN_JERK` | Natural and fluid | ✅ Implemented (SpeechSway) |
+| Error head shake | `CARTOON` | Exaggerated denial | ❌ Not implemented |
+| Return to neutral | `MIN_JERK` | Smooth return | ✅ Implemented |
+| Idle breathing | - | Subtle sense of life | ✅ Implemented (BreathingAnimation) |
+### Phase 17 - Antenna Sync Animation During Speech (Completed) ✅
+**Goal**: Antennas sway with audio rhythm during TTS playback, simulating "speaking" effect.
+**Implemented Features**:
+- ✅ JSON-driven animation system with antenna movements
+- ✅ Different antenna patterns: "both" (sync), "wiggle" (opposite phase)
+- ✅ State-specific antenna animations (listening/thinking/speaking)
+- ✅ Smooth transitions between animation states
+**Code Locations**:
+- `animation_player.py` - AnimationPlayer with antenna offset calculation
+- `animations/conversation_animations.json` - Antenna amplitude and pattern definitions
+- `movement_manager.py` - Antenna offset composition in final pose
+### Phase 18 - Visual Gaze Interaction (Not Implemented) ❌
+**Goal**: Use camera to detect faces for eye contact.
+**SDK Support**:
+- `look_at_image(u, v)` - Look at point in image
+- `look_at_world(x, y, z)` - Look at world coordinate point
+- `media.get_frame()` - Get camera frame (✅ Already implemented in `camera_server.py:146`)
+**Not Implemented Features**:
+| Feature | Description | Status |
+|---------|-------------|--------|
+| Face detection | Use OpenCV/MediaPipe to detect faces | ❌ Not implemented |
+| Eye tracking | Look at speaker's face during conversation | ❌ Not implemented |
+| Multi-person switching | When multiple people detected, look at current speaker | ❌ Not implemented |
+| Idle scanning | Randomly look around when idle | ❌ Not implemented |
+### Phase 19 - Gravity Compensation Interactive Mode (Partial) 🟡
+**Goal**: Allow users to physically touch and guide robot head for "teaching" style interaction.
+**SDK Support**: `enable_gravity_compensation()` - Motors enter gravity compensation mode, can be manually moved
+**Implemented Features**:
+- ✅ Gravity compensation mode switch (`motor_mode` Select entity, option "gravity_compensation")
+- ✅ `reachy_controller.py:236-237` - Gravity compensation API call
+**Not Implemented**:
+- ❌ Teaching mode - Record motion trajectory
+- ❌ Save/playback custom actions
+- ❌ Voice command triggered teaching flow
+**Application Scenarios**:
+- ❌ User says "Let me teach you a move" → Enter gravity compensation mode
+- ❌ User manually moves head → Record motion trajectory
+- ❌ User says "Remember this" → Save action
+- ❌ User says "Do that action again" → Playback recorded action
+### Phase 20 - Environment Awareness Response (Partial) 🟡
+**Goal**: Use IMU sensors to sense environment changes and respond.
+**SDK Support**:
+- ✅ `mini.imu["accelerometer"]` - Accelerometer (Phase 7 implemented as entity)
+- ✅ `mini.imu["gyroscope"]` - Gyroscope (Phase 7 implemented as entity)
+**Implemented Features**:
+| Detection Event | Response Action | Status |
+|-----------------|-----------------|--------|
+| Continuous conversation | Controlled via Home Assistant switch | ✅ Implemented |
+**Tap-to-wake REMOVED** (v0.5.16):
+- Too many false triggers from robot movement and vibrations
+- Continuous conversation mode now controlled via "Continuous Conversation" switch in Home Assistant
+- Users can enable/disable continuous conversation from HA dashboard
+**Technical Implementation**:
+- `models.py` - `Preferences.continuous_conversation` field
+- `entity_registry.py` - `continuous_conversation` Switch entity (Phase 21)
+- `satellite.py` - `_handle_run_end()` checks `preferences.continuous_conversation`
+**Not Implemented**:
+| Detection Event | Response Action | Status |
+|-----------------|-----------------|--------|
+| Being shaken | Play dizzy action + voice "Don't shake me~" | ❌ Not implemented |
+| Tilted/fallen | Play help action + voice "I fell, help me" | ❌ Not implemented |
+| Long idle | Enter sleep animation | ❌ Not implemented |
+### Phase 21 - Home Assistant Scene Integration (Not Implemented) ❌
+**Goal**: Trigger robot actions based on Home Assistant scenes/automations.
+**Implementation**: Via ESPHome service calls
+**Not Implemented Scenes**:
+| HA Scene | Robot Response | Status |
+|----------|----------------|--------|
+| Good morning scene | Play wake action + "Good morning!" | ❌ Not implemented |
+| Good night scene | Play sleep action + "Good night~" | ❌ Not implemented |
+| Someone home | Turn toward door + wave + "Welcome home!" | ❌ Not implemented |
+| Doorbell rings | Turn toward door + alert action | ❌ Not implemented |
+| Play music | Sway with music rhythm | ❌ Not implemented |
+---
+## 📊 Feature Implementation Summary
+### ✅ Completed Features
+#### Core Voice Assistant (Phase 1-12)
+- **45+ ESPHome entities** - All implemented
+- **Basic voice interaction** - Wake word detection, STT/TTS integration
+- **Motion feedback** - Nod, shake, gaze and other basic actions
+- **Audio processing** - AGC, noise suppression, echo cancellation
+- **Camera stream** - MJPEG live preview
+#### Partially Implemented Features (Phase 14-21)
+- **Phase 14** - Emotion action API infrastructure (manual trigger available)
+- **Phase 19** - Gravity compensation mode switch (teaching flow not implemented)
+### ❌ Not Implemented Features
+#### High Priority
+- ~~**Phase 13** - Sendspin audio playback support~~ ✅ **Completed**
+- **Phase 14** - Auto emotion action feedback (needs voice assistant event association)
+- **Phase 15** - Continuous sound source tracking (only turn toward at wakeup)
+#### Medium Priority
+- **Phase 16** - Cartoon style motion mode (needs dynamic interpolation switching)
+- **Phase 17** - Antenna sync animation
+- **Phase 18** - Face tracking and eye contact interaction
+#### Low Priority
+- **Phase 19** - Teaching mode record/playback functionality
+- **Phase 20** - IMU environment awareness response
+- **Phase 21** - Home Assistant scene integration
+---
+## Feature Priority Summary (Updated)
+### High Priority (Completed ✅)
+- ✅ **Phase 1-12**: Basic ESPHome entities (45+)
+- ✅ Core voice assistant functionality
+- ✅ Basic motion feedback (nod, shake, gaze)
+### High Priority (Partial 🟡)
+- 🟡 **Phase 13**: Emotion action feedback system
+  - ✅ Emotion Selector entity and API infrastructure
+  - ❌ Auto-trigger emotion actions based on voice assistant response
+  - ❌ Intent recognition and emotion matching
+  - ❌ Dance action library integration
+### High Priority (Not Implemented ❌)
+- ❌ **Phase 14**: Smart sound source tracking enhancement
+  - ✅ Turn toward sound source at wakeup
+  - ❌ Continuous sound source tracking
+  - ❌ Multi-person conversation switching
+  - ❌ Sound source visualization
+### Medium Priority (Completed ✅)
+- ✅ **Phase 15**: Cartoon style motion mode
+  - ✅ 100Hz unified control loop architecture (restored after daemon update)
+  - ✅ JSON-driven animation system (AnimationPlayer)
+  - ✅ Conversation state animations (idle/listening/thinking/speaking)
+  - ✅ Pose change detection + state query caching (reduces daemon load)
+  - ❌ Dynamic interpolation technique switching (CARTOON etc.)
+- ✅ **Phase 16**: Antenna sync during speech
+  - ✅ JSON-driven antenna animations with different patterns (both/wiggle)
+  - ✅ State-specific antenna movements
+### Medium Priority (Not Implemented ❌)
+- ❌ **Phase 17**: Visual gaze interaction - Eye contact
+### Low Priority (Partial 🟡)
+- 🟡 **Phase 18**: Gravity compensation interactive mode
+  - ✅ Gravity compensation mode switch
+  - ❌ Teaching style interaction (record/playback functionality)
+### Low Priority (Not Implemented ❌)
+- ❌ **Phase 19**: Environment awareness response - IMU triggered actions
+- ❌ **Phase 20**: Home Assistant scene integration - Smart home integration
+---
+## 📈 Completion Statistics
+| Phase | Status | Completion | Notes |
+|-------|--------|------------|-------|
+| Phase 1-12 | ✅ Complete | 100% | 40 ESPHome entities implemented (Phase 11 LED disabled) |
+| Phase 13 | 🟡 Partial | 30% | API infrastructure ready, missing auto-trigger |
+| Phase 14 | ❌ Not done | 20% | Only turn toward at wakeup implemented |
+| Phase 15 | 🟡 Partial | 80% | 100Hz control loop + JSON animation system + pose change detection + state cache implemented |
+| Phase 16 | ✅ Complete | 100% | JSON-driven animation with antenna movements |
+| Phase 17 | ❌ Not done | 10% | Camera implemented, missing face detection |
+| Phase 18 | 🟡 Partial | 40% | Mode switch implemented, missing teaching flow |
+| Phase 19 | ❌ Not done | 10% | IMU data exposed, missing trigger logic |
+| Phase 20 | ❌ Not done | 0% | Not implemented |
+**Overall Completion**: **Phase 1-12: 100%** | **Phase 13-20: ~35%**
+---
+## 🔧 Daemon Crash Fix (2025-01-05)
+### Problem Description
+During long-term operation, `reachy_mini daemon` would crash, causing robot to become unresponsive.
+### Root Cause
+1. **100Hz control loop too frequent** - Calling `robot.set_target()` every 10ms, even when pose hasn't changed
+2. **Frequent state queries** - Every entity state read calls `get_status()`, `get_current_head_pose()` etc.
+3. **Missing change detection** - Even when pose hasn't changed, continues sending same commands
+4. **Zenoh message queue blocking** - Accumulated 150+ messages per second, daemon cannot process in time
+### Fix Solution
+#### 1. Reduce control loop frequency (movement_manager.py)
+```python
+# Reduced from 100Hz to 20Hz
+CONTROL_LOOP_FREQUENCY_HZ = 20  # 80% reduction in messages
+```
+#### 2. Add pose change detection (movement_manager.py)
+```python
+# Only send commands on significant pose changes
+if self._last_sent_pose is not None:
+    max_diff = max(abs(pose[k] - self._last_sent_pose.get(k, 0.0)) for k in pose.keys())
+    if max_diff < 0.001:  # Threshold: 0.001 rad or 0.001 m
+        return  # Skip sending
+```
+#### 3. State query caching (reachy_controller.py)
+```python
+# Cache daemon status query results
+self._cache_ttl = 0.1  # 100ms TTL
+self._last_status_query = 0.0
+def _get_cached_status(self):
+    now = time.time()
+    if now - self._last_status_query < self._cache_ttl:
+        return self._state_cache.get('status')  # Use cache
+    # ... query and update cache
+```
+#### 4. Head pose query caching (reachy_controller.py)
+```python
+# Cache get_current_head_pose() and get_current_joint_positions() results
+def _get_cached_head_pose(self):
+    # Reuse cached results within 100ms
+```
+### Fix Results
+| Metric | Before Fix | After Fix | Improvement |
+|--------|------------|-----------|-------------|
+| Control message frequency | ~100 msg/s | ~20 msg/s | ↓ 80% |
+| State query frequency | ~50 msg/s | ~5 msg/s | ↓ 90% |
+| Total Zenoh messages | ~150 msg/s | ~25 msg/s | ↓ 83% |
+| Daemon CPU load | Sustained high load | Normal load | Significantly reduced |
+| Expected stability | Crash within hours | Stable for days | Major improvement |
+### Related Files
+- `DAEMON_CRASH_FIX_PLAN.md` - Detailed fix plan and test plan
+- `movement_manager.py` - Control loop optimization
+- `reachy_controller.py` - State query caching
+### Future Optimization Suggestions
+1. ⏳ Dynamic frequency adjustment - 50Hz during motion, 5Hz when idle
+2. ⏳ Batch state queries - Get all states at once
+3. ⏳ Performance monitoring and alerts - Real-time daemon health monitoring
+---
+## 🔧 Daemon Crash Deep Fix (2026-01-07)
+> **Update (2026-01-12)**: After daemon updates and further testing, control loop frequency has been restored to 100Hz (same as `reachy_mini_conversation_app`). The pose change threshold (0.005) and state cache TTL (2s) optimizations remain in place to reduce unnecessary Zenoh messages.
+### Problem Description
+During long-term operation, `reachy_mini daemon` still crashes, previous fix not thorough enough.
+### Root Cause Analysis
+Through deep analysis of SDK source code:
+1. **Each `set_target()` sends 3 Zenoh messages**
+   - `set_target_head_pose()` - 1 message
+   - `set_target_antenna_joint_positions()` - 1 message
+   - `set_target_body_yaw()` - 1 message
+2. **Daemon control loop is 50Hz**
+   - See `reachy_mini/daemon/backend/robot/backend.py`: `control_loop_frequency = 50.0`
+   - If message send frequency exceeds 50Hz, daemon may not process in time
+3. **Previous 20Hz control loop still too high**
+   - 20Hz × 3 messages = 60 messages/second
+   - Already exceeds daemon's 50Hz processing capacity
+4. **Pose change threshold too small (0.002)**
+   - Breathing animation, speech sway, face tracking continuously produce tiny changes
+   - Almost every loop triggers `set_target()`
+### Fix Solution
+#### 1. Further reduce control loop frequency (movement_manager.py)
+```python
+# Reduced from 20Hz to 10Hz
+# 10Hz × 3 messages = 30 messages/second, safely below daemon's 50Hz capacity
+CONTROL_LOOP_FREQUENCY_HZ = 10
+```
+#### 2. Increase pose change threshold (movement_manager.py)
+```python
+# Increased from 0.002 to 0.005
+# 0.005 rad ≈ 0.29 degrees, still smooth enough
+self._pose_change_threshold = 0.005
+```
+#### 3. Reduce camera/face tracking frequency (camera_server.py)
+```python
+# Reduced from 15fps to 10fps
+fps: int = 10
+```
+#### 4. Increase state cache TTL (reachy_controller.py)
+```python
+# Increased from 1 second to 2 seconds
+self._cache_ttl = 2.0
+```
+### Fix Results
+| Metric | Before (20Hz) | After (10Hz) | Improvement |
+|--------|---------------|--------------|-------------|
+| Control loop frequency | 20 Hz | 10 Hz | ↓ 50% |
+| Max Zenoh messages | 60 msg/s | 30 msg/s | ↓ 50% |
+| Actual messages (with change detection) | ~40 msg/s | ~15 msg/s | ↓ 62% |
+| Face tracking frequency | 15 Hz | 10 Hz | ↓ 33% |
+| State cache TTL | 1 second | 2 seconds | ↑ 100% |
+| Expected stability | Crash within hours | Stable operation | Major improvement |
+### Key Finding
+Reference `reachy_mini_conversation_app` uses 100Hz control loop, but it's an official app that may have special optimizations or runs on more powerful hardware. Our app needs more conservative settings.
+### Related Files
+- `movement_manager.py` - Control loop frequency and pose threshold
+- `camera_server.py` - Face tracking frequency
+- `reachy_controller.py` - State cache TTL
+---
+## 🔧 Tap-to-Wake and Microphone Sensitivity Fix (2026-01-07)
+### Problem Description
+1. **Tap-to-wake blocking** - Conversation not working properly after tap wake, blocking issues
+2. **Low microphone sensitivity** - Need to be very close for voice recognition
+### Root Cause
+1. **Audio playback blocking** - `_tap_continue_feedback()` plays sound in continuous conversation mode, blocking audio stream processing
+2. **AGC settings not optimized** - ReSpeaker XVF3800 default settings not suitable for distant voice recognition
+### Fix Solution
+#### 1. Remove audio playback in continuous conversation feedback (satellite.py)
+```python
+def _tap_continue_feedback(self) -> None:
+    """Provide feedback when continuing conversation in tap mode.
+    Triggers a nod to indicate ready for next input.
+    Sound is NOT played here to avoid blocking audio streaming.
+    """
+    # NOTE: Do NOT play sound here - it blocks audio streaming
+    if self.state.motion_enabled and self.state.motion:
+        self.state.motion.on_continue_listening()
+```
+#### 2. Add exception handling to tap callback (voice_assistant.py)
+```python
+def _on_tap_detected(self) -> None:
+    """Callback when tap is detected on the robot.
+    NOTE: This is called from the tap_detector background thread.
+    """
+    try:
+        self._state.satellite.wakeup_from_tap()
+        # ... motion feedback
+    except Exception as e:
+        _LOGGER.error("Error in tap detection callback: %s", e)
+```
+#### 3. Comprehensive microphone optimization (voice_assistant.py) - Updated 2026-01-07
+```python
+def _optimize_microphone_settings(self) -> None:
+    """Optimize ReSpeaker XVF3800 microphone settings for voice recognition."""
+    # ========== 1. AGC (Automatic Gain Control) Settings ==========
+    # Enable AGC for automatic volume normalization
+    respeaker.write("PP_AGCONOFF", [1])
+    # Increase AGC max gain for better distant speech pickup (default ~15dB -> 30dB)
+    respeaker.write("PP_AGCMAXGAIN", [30.0])
+    # Set AGC desired output level (default ~-25dB -> -18dB for stronger output)
+    respeaker.write("PP_AGCDESIREDLEVEL", [-18.0])
+    # Optimize AGC time constant for voice commands
+    respeaker.write("PP_AGCTIME", [0.5])
+    # ========== 2. Base Microphone Gain ==========
+    # Increase base microphone gain (default 1.0 -> 2.0)
+    respeaker.write("AUDIO_MGR_MIC_GAIN", [2.0])
+    # ========== 3. Noise Suppression Settings ==========
+    # Reduce noise suppression to preserve quiet speech (default ~0.5 -> 0.15)
+    respeaker.write("PP_MIN_NS", [0.15])
+    respeaker.write("PP_MIN_NN", [0.15])
+    # ========== 4. Echo Cancellation & High-pass Filter ==========
+    respeaker.write("PP_ECHOONOFF", [1])
+    respeaker.write("AEC_HPFONOFF", [1])
+```
+### Fix Results
+| Parameter | Before | After | Notes |
+|-----------|--------|-------|-------|
+| Tap continuous conversation | Blocking | Working | Removed blocking audio playback |
+| Microphone sensitivity | ~30cm | ~2-3m | Comprehensive AGC and gain optimization |
+| AGC switch | Off | On | Auto volume normalization |
+| AGC max gain | ~15dB | 30dB | Better distant speech pickup |
+| AGC target level | -25dB | -18dB | Stronger output signal |
+| Microphone gain | 1.0x | 2.0x | Base gain doubled |
+| Noise suppression | ~0.5 | 0.15 | Reduced speech mis-suppression |
+| Echo cancellation | On | On | Maintain clarity during TTS playback |
+| High-pass filter | Off | On | Remove low-frequency noise |
+### XVF3800 Parameter Reference
+| Parameter Name | Type | Range | Description |
+|----------------|------|-------|-------------|
+| `PP_AGCONOFF` | int32 | 0/1 | AGC switch |
+| `PP_AGCMAXGAIN` | float | 0-40 dB | AGC max gain |
+| `PP_AGCDESIREDLEVEL` | float | dB | AGC target output level |
+| `PP_AGCTIME` | float | seconds | AGC time constant |
+| `AUDIO_MGR_MIC_GAIN` | float | 0-4.0 | Microphone gain multiplier |
+| `PP_MIN_NS` | float | 0-1.0 | Minimum noise suppression (lower = less suppression) |
+| `PP_MIN_NN` | float | 0-1.0 | Minimum noise estimation |
+| `PP_ECHOONOFF` | int32 | 0/1 | Echo cancellation switch |
+| `AEC_HPFONOFF` | int32 | 0/1 | High-pass filter switch |
+### Related Files
+- `satellite.py` - Removed blocking audio playback
+- `voice_assistant.py` - Comprehensive microphone optimization
+- `reachy_controller.py` - AGC entity default value updates
+- `entity_registry.py` - AGC max gain range update (0-40dB)
+- `reachy_mini/src/reachy_mini/media/audio_control_utils.py` - SDK reference
+---
+## 🔧 v0.5.1 Bug Fixes (2026-01-08)
+### Issue 1: Music Not Resuming After Voice Conversation
+**Problem**: Music doesn't resume after voice conversation ends.
+**Root Cause**: Sendspin was incorrectly connected to `tts_player` instead of `music_player`.
+**Fix**:
+- `voice_assistant.py`: Sendspin discovery now connects to `music_player`
+- `satellite.py`: `duck()`/`unduck()` now call `music_player.pause_sendspin()`/`resume_sendspin()`
+### Issue 2: tap_sensitivity Not Persisted
+**Problem**: tap_sensitivity value set in ESPHome lost after restart.
+**Fix**:
+- `models.py`: Added `tap_sensitivity` field to `Preferences` dataclass
+- `entity_registry.py`: Entity setter now saves to `preferences.json`
+- Load saved value on startup
+### Issue 3: Audio Conflict During Voice Assistant Wakeup
+**Problem**: Audio streaming (Sendspin or ESPHome audio) conflicts when voice assistant wakes up.
+**Fix**:
+- `audio_player.py`: Added `pause_sendspin()` and `resume_sendspin()` methods
+- `satellite.py`: `duck()` now pauses Sendspin, `unduck()` resumes it
+- Improved `pause()` method to actually stop audio output
+### Issue 4: AttributeError for _camera_server
+**Problem**: `_set_conversation_mode()` referenced non-existent `_camera_server` attribute.
+**Fix**: Changed `self._camera_server` to `self.camera_server` (removed underscore prefix)
+### Issue 5: tap_sensitivity Default Value Wrong
+**Problem**: tap_sensitivity default was still 2.0g instead of expected 0.5g.
+**Fix**: Use `TAP_THRESHOLD_G_DEFAULT` constant as default value
+### Issue 6: Sendspin Sample Rate Optimization
+**Problem**: ReSpeaker hardware I/O is 16kHz (hardware limitation), but Sendspin might try higher sample rates.
+**Fix**: Prioritize 16kHz in Sendspin supported formats list to avoid unnecessary resampling
+---
+## 🔧 v0.5.15 Updates (2026-01-11)
+### Feature 1: Audio Settings Persistence
+**Problem**: AGC Enabled, AGC Max Gain, Noise Suppression settings lost after restart.
+**Solution**:
+- `models.py`: Added `agc_enabled`, `agc_max_gain`, `noise_suppression` fields to `Preferences` dataclass (Optional, None = use default)
+- `entity_registry.py`: Entity setters now save to `preferences.json`
+- `voice_assistant.py`: `_optimize_microphone_settings()` now restores saved values from preferences on startup
+**Behavior**:
+- First startup: Use optimized defaults (AGC=ON, MaxGain=30dB, NoiseSuppression=15%)
+- After user changes via Home Assistant: Values persisted and restored on restart
+### Feature 2: Sendspin Discovery Refactoring
+**Problem**: Sendspin mDNS discovery code was in `audio_player.py`, mixing concerns.
+**Solution**:
+- `zeroconf.py`: Added `SendspinDiscovery` class for mDNS service discovery
+- `audio_player.py`: Simplified to use `SendspinDiscovery` via callback pattern
+- Better separation of concerns: zeroconf.py handles all mDNS, audio_player.py handles audio
+### Fix 1: Tap Detection During Emotion Playback
+**Problem**: Tap detection was re-enabled after emotion playback completes, even during active conversation.
+**Root Cause**: `_play_emotion()` and `_wait_for_move_completion()` always re-enabled tap detection without checking pipeline state.
+**Fix**:
+- `satellite.py`: Check `_pipeline_active` before re-enabling tap detection
+- Only re-enable tap detection if conversation has ended (pipeline not active)
+**Related Files**:
+- `models.py` - Preferences fields
+- `entity_registry.py` - Entity setters with persistence
+- `voice_assistant.py` - Settings restoration on startup
+- `zeroconf.py` - SendspinDiscovery class
+- `audio_player.py` - Simplified Sendspin integration
+- `satellite.py` - Tap detection fix
+---
+### SDK Data Structure Reference
+```python
+# Motor control mode
+class MotorControlMode(str, Enum):
+    Enabled = "enabled"              # Torque on, position control
+    Disabled = "disabled"            # Torque off
+    GravityCompensation = "gravity_compensation"  # Gravity compensation mode
+# Daemon state
+class DaemonState(Enum):
+    NOT_INITIALIZED = "not_initialized"
+    STARTING = "starting"
+    RUNNING = "running"
+    STOPPING = "stopping"
+    STOPPED = "stopped"
+    ERROR = "error"
+# Full state
+class FullState:
+    control_mode: MotorControlMode
+    head_pose: XYZRPYPose  # x, y, z (m), roll, pitch, yaw (rad)
+    head_joints: list[float]  # 7 joint angles
+    body_yaw: float
+    antennas_position: list[float]  # [right, left]
+    doa: DoAInfo  # angle (rad), speech_detected (bool)
+# IMU data (wireless version only)
+imu_data = {
+    "accelerometer": [x, y, z],  # m/s²
+    "gyroscope": [x, y, z],      # rad/s
+    "quaternion": [w, x, y, z],  # Attitude quaternion
+    "temperature": float         # °C
+}
+# Safety limits
+HEAD_PITCH_ROLL_LIMIT = [-40°, +40°]
+HEAD_YAW_LIMIT = [-180°, +180°]
+BODY_YAW_LIMIT = [-160°, +160°]
+YAW_DELTA_MAX = 65°  # Max difference between head and body yaw
+```
+### ESPHome Protocol Implementation Notes
+ESPHome protocol communicates with Home Assistant via protobuf messages. The following message types need to be implemented:
+```python
+from aioesphomeapi.api_pb2 import (
+    # Number entity (volume/angle control)
+    ListEntitiesNumberResponse,
+    NumberStateResponse,
+    NumberCommandRequest,
+    # Select entity (motor mode)
+    ListEntitiesSelectResponse,
+    SelectStateResponse,
+    SelectCommandRequest,
+    # Button entity (wake/sleep)
+    ListEntitiesButtonResponse,
+    ButtonCommandRequest,
+    # Switch entity (motor switch)
+    ListEntitiesSwitchResponse,
+    SwitchStateResponse,
+    SwitchCommandRequest,
+    # Sensor entity (numeric sensors)
+    ListEntitiesSensorResponse,
+    SensorStateResponse,
+    # Binary Sensor entity (boolean sensors)
+    ListEntitiesBinarySensorResponse,
+    BinarySensorStateResponse,
+    # Text Sensor entity (text sensors)
+    ListEntitiesTextSensorResponse,
+    TextSensorStateResponse,
+)
+```
+## Reference Projects
+- [OHF-Voice/linux-voice-assistant](https://github.com/OHF-Voice/linux-voice-assistant)
+- [pollen-robotics/reachy_mini](https://github.com/pollen-robotics/reachy_mini)
+- [reachy_mini_conversation_app](https://github.com/pollen-robotics/reachy_mini_conversation_app)
+- [sendspin-cli](https://github.com/Sendspin/sendspin-cli)
+- [home-assistant-voice](https://github.com/esphome/home-assistant-voice-pe/blob/dev/home-assistant-voice.yaml)

Project_Summary.md DELETED Viewed

@@ -1,1439 +0,0 @@
-# Reachy Mini for Home Assistant - Project Plan (Current snapshot: v1.0.6)
-## Project Overview
-Integrate Home Assistant voice assistant functionality into Reachy Mini Wi-Fi robot, communicating with Home Assistant via ESPHome protocol.
-## Local Reference Directories (DO NOT modify any files in reference directories)
-1. [linux-voice-assistant](reference/linux-voice-assistant) - Linux-based Home Assistant voice assistant app for reference
-2. [Reachy Mini SDK](reference/reachy_mini) - Reachy Mini SDK local directory for reference
-3. [reachy_mini_conversation_app](reference/reachy_mini_conversation_app) - Reachy Mini conversation app for reference
-4. [reachy-mini-desktop-app](reference/reachy-mini-desktop-app) - Reachy Mini desktop app for reference
-5. [sendspin](reference/sendspin-cli/) - Sendspin client for reference
-6. [aiosendspin](reference/aiosendspin/) - Sendspin protocol client library reference
-7. [dynamic_gestures](reference/dynamic_gestures/) - Dynamic gesture reference
-8. [SimpleDances](reference/SimpleDances/) - Local reference snapshot
-## Core Design Principles
-1. **Zero Configuration** - Users only need to install the app, no manual configuration required
-2. **Native Hardware** - Use robot's built-in microphone and speaker
-3. **Home Assistant Centralized Management** - STT/TTS/intent configuration stays on Home Assistant side
-4. **Motion Feedback** - Provide head movement and antenna animation feedback during voice interaction
-5. **Project Constraints** - Strictly follow [Reachy Mini SDK](reachy_mini) architecture design and constraints
-6. **Code Quality** - Follow Python development standards with consistent code style, clear structure, complete comments, comprehensive documentation, high test coverage, high code quality, readability, maintainability, extensibility, and reusability
-7. **Feature Priority** - Voice conversation with Home Assistant is highest priority; other features are auxiliary and must not affect voice conversation functionality or response speed
-8. **No LED Functions** - LEDs are hidden inside the robot; all LED control is ignored
-9. **Preserve Functionality** - Any code modifications should optimize while preserving completed features; do not remove features to solve problems. When issues occur, prioritize solving problems after referencing examples, not adding various log outputs
-10. **No App-Managed Sleep/Wake** - The app no longer manages robot sleep/wake transitions; current SDK behavior is treated as source of truth
-## Technical Architecture
-```
-鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?
-鈹?                             Reachy Mini (ARM64)                            鈹?
-鈹?                                                                            鈹?
-鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€ AUDIO INPUT 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹?
-鈹? 鈹? ReSpeaker XVF3800 (16kHz)                                            鈹? 鈹?
-鈹? 鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?  鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹? 鈹?
-鈹? 鈹? 鈹?4-Mic Array  鈹?鈫?鈹?XVF3800 DSP                                  鈹? 鈹? 鈹?
-鈹? 鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?  鈹?鈥?Hardware DSP path available                鈹? 鈹? 鈹?
-鈹? 鈹?                    鈹?鈥?App currently relies on HA STT/TTS         鈹? 鈹? 鈹?
-鈹? 鈹?                    鈹?鈥?DOA/VAD used by the current runtime        鈹? 鈹? 鈹?
-鈹? 鈹?                    鈹?鈥?Direction of Arrival (DOA)                 鈹? 鈹? 鈹?
-鈹? 鈹?                    鈹?鈥?Voice Activity Detection (VAD)             鈹? 鈹? 鈹?
-鈹? 鈹?                    鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹? 鈹?
-鈹? 鈹?                                     鈹?                               鈹? 鈹?
-鈹? 鈹?                                     鈻?                               鈹? 鈹?
-鈹? 鈹?                    鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹? 鈹?
-鈹? 鈹?                    鈹?Wake Word Detection (microWakeWord)          鈹? 鈹? 鈹?
-鈹? 鈹?                    鈹?鈥?"Okay Nabu" / "Hey Jarvis"                 鈹? 鈹? 鈹?
-鈹? 鈹?                    鈹?鈥?Stop word detection                        鈹? 鈹? 鈹?
-鈹? 鈹?                    鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹? 鈹?
-鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹?
-鈹?                                                                            鈹?
-鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€ AUDIO OUTPUT 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹?
-鈹? 鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?   鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?鈹? 鈹?
-鈹? 鈹? 鈹?TTS Player               鈹?   鈹?Music Player (Sendspin)          鈹?鈹? 鈹?
-鈹? 鈹? 鈹?鈥?Voice assistant speech 鈹?   鈹?鈥?Multi-room audio streaming     鈹?鈹? 鈹?
-鈹? 鈹? 鈹?鈥?Sound effects          鈹?   鈹?鈥?Auto-discovery via mDNS        鈹?鈹? 鈹?
-鈹? 鈹? 鈹?鈥?Priority over music    鈹?   鈹?鈥?Auto-pause during conversation 鈹?鈹? 鈹?
-鈹? 鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?   鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?鈹? 鈹?
-鈹? 鈹?                鈹?                             鈹?                     鈹? 鈹?
-鈹? 鈹?                鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?                     鈹? 鈹?
-鈹? 鈹?                               鈻?                                     鈹? 鈹?
-鈹? 鈹?                鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹? 鈹?
-鈹? 鈹?                鈹?ReSpeaker Speaker (16kHz)                        鈹? 鈹? 鈹?
-鈹? 鈹?                鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹? 鈹?
-鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹?
-鈹?                                                                            鈹?
-鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€ VISION & TRACKING 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹?
-鈹? 鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?   鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?鈹? 鈹?
-鈹? 鈹? 鈹?Camera (VPU accelerated) 鈹?鈫? 鈹?YOLO Face Detection              鈹?鈹? 鈹?
-鈹? 鈹? 鈹?鈥?MJPEG stream server    鈹?   鈹?鈥?AdamCodd/YOLOv11n-face         鈹?鈹? 鈹?
-鈹? 鈹? 鈹?鈥?ESPHome Camera entity  鈹?   鈹?鈥?Adaptive frame rate:           鈹?鈹? 鈹?
-鈹? 鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?   鈹?  - 15fps: conversation/face     鈹?鈹? 鈹?
-鈹? 鈹?                                 鈹?  - 2fps: idle (power saving)    鈹?鈹? 鈹?
-鈹? 鈹?                                 鈹?鈥?look_at_image() pose calc      鈹?鈹? 鈹?
-鈹? 鈹?                                 鈹?鈥?Smooth return after face lost  鈹?鈹? 鈹?
-鈹? 鈹?                                 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?鈹? 鈹?
-鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹?
-鈹?                                                                            鈹?
-鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€ MOTION CONTROL 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹?
-鈹? 鈹? MovementManager (50Hz Control Loop)                                  鈹? 鈹?
-鈹? 鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?  鈹? 鈹?
-鈹? 鈹? 鈹?Motion Layers (Priority: Move > Action > SpeechSway > Breath)  鈹?  鈹? 鈹?
-鈹? 鈹? 鈹?鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹?  鈹? 鈹?
-鈹? 鈹? 鈹?鈹?Move Queue 鈹?鈹?Actions    鈹?鈹?SpeechSway 鈹?鈹?Breathing    鈹? 鈹?  鈹? 鈹?
-鈹? 鈹? 鈹?鈹?(Emotions) 鈹?鈹?(Nod/Shake)鈹?鈹?(Voice VAD)鈹?鈹?(Idle anim)  鈹? 鈹?  鈹? 鈹?
-鈹? 鈹? 鈹?鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹?  鈹? 鈹?
-鈹? 鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?  鈹? 鈹?
-鈹? 鈹?                                                                      鈹? 鈹?
-鈹? 鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?  鈹? 鈹?
-鈹? 鈹? 鈹?Face Tracking Offsets (Secondary Pose Overlay)                 鈹?  鈹? 鈹?
-鈹? 鈹? 鈹?鈥?Pitch offset: +9掳 (down compensation)                        鈹?  鈹? 鈹?
-鈹? 鈹? 鈹?鈥?Yaw offset: -7掳 (right compensation)                         鈹?  鈹? 鈹?
-鈹? 鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?  鈹? 鈹?
-鈹? 鈹?                                                                      鈹? 鈹?
-鈹? 鈹?  State Machine: on_wakeup 鈫?on_listening 鈫?on_speaking 鈫?on_idle     鈹? 鈹?
-鈹? 鈹?                                                                      鈹? 鈹?
-鈹? 鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?  鈹? 鈹?
-鈹? 鈹? 鈹?Body Following                                                鈹?  鈹? 鈹?
-鈹? 鈹? 鈹?鈥?Body yaw syncs with head yaw for natural tracking            鈹?  鈹? 鈹?
-鈹? 鈹? 鈹?鈥?Extracted from final head pose matrix                        鈹?  鈹? 鈹?
-鈹? 鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?  鈹? 鈹?
-鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹?
-鈹?                                                                            鈹?
-鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€ GESTURE DETECTION 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹?
-鈹? 鈹? HaGRID ONNX Models                                                鈹? 鈹?
-鈹? 鈹? 鈥?18 gesture classes (call, like, dislike, fist, ok, palm, etc.)    鈹? 鈹?
-鈹? 鈹? 鈥?Runtime result publishing only                                    鈹? 鈹?
-鈹? 鈹? 鈥?Batch detection: all hands (not just highest confidence)         鈹? 鈹?
-鈹? 鈹? 鈥?Detection cadence: adaptive scheduler + minimum processing FPS    鈹? 鈹?
-鈹? 鈹? 鈥?No confidence filtering - all detections passed to Home Assistant鈹? 鈹?
-鈹? 鈹? 鈥?Runtime switchable (default OFF, model unloaded when disabled)    鈹? 鈹?
-鈹? 鈹? 鈥?Real-time state push to Home Assistant                            鈹? 鈹?
-鈹? 鈹? 鈥?No conflicts with face tracking (shared frame, independent)       鈹? 鈹?
-鈹? 鈹? 鈥?SDK integration: MediaBackend detection, proper resource cleanup 鈹? 鈹?
-鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹?
-鈹?                                                                            鈹?
-鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€ ESPHOME SERVER 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹?
-鈹? 鈹? Port 6053 (mDNS auto-discovery)                                      鈹? 鈹?
-鈹? 鈹? 鈥?Entity count evolves by release (sensors, controls, media, camera) 鈹? 鈹?
-鈹? 鈹? 鈥?Voice Assistant pipeline integration                               鈹? 鈹?
-鈹? 鈹? 鈥?Real-time state synchronization                                    鈹? 鈹?
-鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹���鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹?
-鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?
-                                       鈹?
-                                       鈹?ESPHome Protocol (protobuf)
-                                       鈻?
-鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?
-鈹?                           Home Assistant                                   鈹?
-鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?鈹?
-鈹? 鈹?STT Engine       鈹? 鈹?Intent Processing鈹? 鈹?TTS Engine                 鈹?鈹?
-鈹? 鈹?(User configured)鈹? 鈹?(Conversation)   鈹? 鈹?(User configured)          鈹?鈹?
-鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?鈹?
-鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?
-```
-### Software Module Architecture (v1.0.6)
-```
-reachy_mini_home_assistant/
-鈹?
-鈹溾攢鈹€ main.py                    # ReachyMiniApp entry point
-鈹溾攢鈹€ __main__.py                # Standalone CLI entry point
-鈹溾攢鈹€ voice_assistant.py         # Voice assistant service orchestrator
-鈹溾攢鈹€ reachy_controller.py       # Reachy Mini SDK wrapper
-鈹溾攢鈹€ models.py                  # Data models / preferences / server state
-鈹?
-鈹溾攢鈹€ core/                      # Core Infrastructure
-鈹?  鈹溾攢鈹€ config.py              # Centralized nested configuration
-鈹?  鈹溾攢鈹€ service_base.py        # Suspend/resume-aware service helpers
-鈹?  鈹溾攢鈹€ system_diagnostics.py  # System diagnostics
-鈹?  鈹溾攢鈹€ exceptions.py          # Custom exception classes
-鈹?  鈹斺攢鈹€ util.py                # Utility functions
-鈹?
-鈹溾攢鈹€ motion/                    # Motion Control
-鈹?  鈹溾攢鈹€ movement_manager.py    # 50Hz unified motion control loop
-鈹?  鈹溾攢鈹€ command_runtime.py     # Command queue handling / state transitions
-鈹?  鈹溾攢鈹€ control_runtime.py     # Control-loop runtime helpers
-鈹?  鈹溾攢鈹€ idle_runtime.py        # Idle behavior / idle rest handling
-鈹?  鈹溾攢鈹€ antenna.py             # Antenna control / freeze logic
-鈹?  鈹溾攢鈹€ pose_composer.py       # Pose composition from multiple sources
-鈹?  鈹溾攢鈹€ smoothing.py           # Motion smoothing algorithms
-鈹?  鈹溾攢鈹€ state_machine.py       # Robot state definitions / idle config parsing
-鈹?  鈹溾攢鈹€ animation_player.py    # Animation player
-鈹?  鈹溾攢鈹€ emotion_moves.py       # Emotion moves
-鈹?  鈹溾攢鈹€ speech_sway.py         # Speech-driven head micro-movements
-鈹?  鈹斺攢鈹€ reachy_motion.py       # Reachy motion API
-鈹?
-鈹溾攢鈹€ vision/                    # Vision Processing
-鈹?  鈹溾攢鈹€ camera_server.py       # MJPEG camera stream server facade
-鈹?  鈹溾攢鈹€ camera_runtime.py      # Camera lifecycle helpers
-鈹?  鈹溾攢鈹€ camera_processing.py   # Frame capture / AI processing helpers
-鈹?  鈹溾攢鈹€ camera_http.py         # HTTP handlers for stream/snapshot
-鈹?  鈹溾攢鈹€ head_tracker.py        # YOLO face detector
-鈹?  鈹溾攢鈹€ gesture_detector.py    # HaGRID gesture detection
-鈹?  鈹溾攢鈹€ face_tracking_interpolator.py  # Smooth face tracking
-鈹?  鈹斺攢鈹€ frame_processor.py     # Adaptive frame rate management
-鈹?
-鈹溾攢鈹€ audio/                     # Audio runtime support
-鈹?  鈹溾攢鈹€ audio_player.py                # AudioPlayer facade
-鈹?  鈹溾攢鈹€ audio_player_shared.py         # Shared audio/sendspin constants + helpers
-鈹?  鈹溾攢鈹€ audio_player_playback.py       # Playback orchestration / lifecycle
-鈹?  鈹溾攢鈹€ audio_player_local.py          # Local file + fallback playback
-鈹?  鈹溾攢鈹€ audio_player_stream_pcm.py     # PCM streaming playback
-鈹?  鈹溾攢鈹€ audio_player_stream_decoded.py # Decoded/GStreamer streaming playback
-鈹?  鈹溾攢鈹€ audio_player_sendspin.py       # Sendspin runtime integration
-鈹?  鈹溾攢鈹€ microphone.py                  # Hardware audio helper / legacy tuning code
-鈹?  鈹斺攢鈹€ doa_tracker.py                 # Direction of Arrival tracking
-鈹?
-鈹溾攢鈹€ entities/                  # Home Assistant Entities
-鈹?  鈹溾攢鈹€ entity.py              # ESPHome base entity
-鈹?  鈹溾攢鈹€ entity_registry.py     # ESPHome entity registry
-鈹?  鈹溾攢鈹€ entity_factory.py      # Entity creation factory
-鈹?  鈹溾攢鈹€ entity_keys.py         # Entity key constants
-鈹?  鈹溾攢鈹€ entity_extensions.py   # Extended entity types
-鈹?  鈹溾攢鈹€ runtime_entity_setup.py # Runtime/control entity wiring
-鈹?  鈹溾攢鈹€ sensor_entity_setup.py # Sensor/diagnostic entity wiring
-鈹?  鈹溾攢鈹€ event_emotion_mapper.py # HA event 鈫?Emotion mapping
-鈹?  鈹斺攢鈹€ emotion_detector.py    # Disabled runtime path for text emotion detection
-鈹?
-鈹溾攢鈹€ protocol/                  # Protocol Handling
-鈹?  鈹溾攢鈹€ satellite.py           # ESPHome protocol handler facade
-鈹?  鈹溾攢鈹€ api_server.py          # HTTP API server
-鈹?  鈹溾攢鈹€ zeroconf.py            # mDNS discovery
-鈹?  鈹溾攢鈹€ entity_bridge.py       # Protocol/entity bridge helpers
-鈹?  鈹溾攢鈹€ message_dispatch.py    # ESPHome message dispatch
-鈹?  鈹溾攢鈹€ motion_bridge.py       # Voice 鈫?motion bridge
-鈹?  鈹溾攢鈹€ session_flow.py        # Conversation lifecycle helpers
-鈹?  鈹溾攢鈹€ voice_pipeline.py      # Voice event handling / TTS / stop / ducking
-鈹?  鈹斺攢鈹€ wakeword_assets.py     # Wake word asset helpers
-鈹?
-鈹溾攢鈹€ animations/               # Animation definitions
-鈹?  鈹斺攢鈹€ conversation_animations.json  # Unified built-in behavior resource file
-鈹?
-鈹斺攢鈹€ wakewords/                # Wake word models
-    鈹溾攢鈹€ okay_nabu.json/.tflite
-    鈹溾攢鈹€ hey_jarvis.json/.tflite
-    鈹溾攢鈹€ alexa.json/.tflite
-    鈹溾攢鈹€ hey_luna.json/.tflite
-    鈹斺攢鈹€ stop.json/.tflite
-```
-### Current Runtime Defaults (v1.0.6)
-- `idle_behavior_enabled`: user-controlled
-- `sendspin_enabled`: OFF
-- `face_tracking_enabled`: OFF
-- `gesture_detection_enabled`: OFF
-- `face_confidence_threshold`: 0.5 (persistent)
-- `continuous_conversation`: user-controlled
-- `Idle Behavior = OFF` means a parked no-animation state aligned to configured idle rest pose
-- When `Idle Behavior = OFF`, camera server is stopped entirely to save resources
-- When `Idle Behavior = ON`, camera server can run and `/snapshot` supports on-demand frame capture when cache is empty
-- Idle antenna behavior: torque disabled in `IDLE`, re-enabled when leaving `IDLE`
-- Voice phases and HA-triggered emotions are routed through one built-in zero-config behavior layer
-When face/gesture switches are OFF, their models are unloaded to save resources.
-### Current Audio Startup Note (SDK 1.7.0)
-- The app now aligns to the current Reachy Mini SDK media model instead of carrying older compatibility paths.
-- Camera snapshots can be fetched on demand when the MJPEG cache is empty and the camera server is still running.
-- Audio block size is currently `512` samples to reduce CPU overhead versus the earlier `256`-sample path.
-### Latest Incremental Update (2026-03-04) - Viewer-Aware Camera Streaming
-- MJPEG encoding/push is now viewer-aware: when no `/stream` client is connected, continuous MJPEG encoding is skipped to reduce CPU usage.
-- Face tracking and gesture detection still run without active stream viewers, so AI behavior remains available.
-- `/snapshot` now supports on-demand frame encode when no cached stream frame exists.
-- Stream output no longer forces fixed 1080p/25fps; it follows camera backend defaults (resolution/FPS) and only falls back when backend FPS is unavailable.
-- Transition from "watching" to "not watching" returns to adaptive idle pacing for resource saving.
-## Completed Features
-### Core Features
-- [x] ESPHome protocol server implementation
-- [x] mDNS service discovery (auto-discovered by Home Assistant)
-- [x] Local wake word detection (microWakeWord)
-- [x] Continuous conversation mode (controlled via Home Assistant switch)
-- [x] Audio stream transmission to Home Assistant
-- [x] TTS audio playback
-- [x] Stop word detection
-### Reachy Mini Integration
-- [x] Use Reachy Mini SDK microphone input
-- [x] Use Reachy Mini SDK speaker output
-- [x] Head motion control (nod, shake, gaze)
-- [x] Antenna animation control
-- [x] Voice state feedback actions
-- [x] YOLO face tracking (complements DOA wakeup orientation)
-- [x] 50Hz unified motion control loop
-### Application Architecture
-- [x] Compliant with Reachy Mini App architecture
-## File List
-```
-reachy_mini_ha_voice/
-鈹溾攢鈹€ reachy_mini_ha_voice/
-鈹?  鈹溾攢鈹€ __init__.py             # Package initialization (v0.9.9)
-鈹?  鈹溾攢鈹€ __main__.py             # Command line entry
-鈹?  鈹溾攢鈹€ main.py                 # ReachyMiniApp entry
-鈹?  鈹溾攢鈹€ voice_assistant.py      # Voice assistant service (1270 lines)
-鈹?  鈹溾攢鈹€ protocol/               # ESPHome protocol handling
-鈹?  鈹?  鈹溾攢鈹€ __init__.py         # Module exports (13 lines)
-鈹?  鈹?  鈹溾攢鈹€ satellite.py        # ESPHome protocol handler facade
-鈹?  鈹?  鈹溾攢鈹€ api_server.py       # HTTP API server
-鈹?  鈹?  鈹溾攢鈹€ zeroconf.py         # mDNS discovery
-鈹?  鈹?  鈹溾攢鈹€ entity_bridge.py    # Protocol/entity bridge helpers
-鈹?  鈹?  鈹溾攢鈹€ message_dispatch.py # ESPHome message dispatch
-鈹?  鈹?  鈹溾攢鈹€ motion_bridge.py    # Voice 鈫?motion bridge
-鈹?  鈹?  鈹溾攢鈹€ session_flow.py     # Conversation lifecycle helpers
-鈹?  鈹?  鈹溾攢鈹€ voice_pipeline.py   # Voice event handling / TTS / stop / ducking
-鈹?  鈹?  鈹斺攢鈹€ wakeword_assets.py  # Wake word asset helpers
-鈹?  鈹溾攢鈹€ models.py               # Data models
-鈹?  鈹斺攢鈹€ reachy_controller.py    # Reachy Mini controller wrapper (961 lines)
-鈹?  鈹?
-鈹?  鈹溾攢鈹€ core/                   # Core infrastructure modules
-鈹?  鈹?  鈹溾攢鈹€ __init__.py         # Module exports
-鈹?  鈹?  鈹溾攢鈹€ config.py           # Centralized configuration (368 lines)
-鈹?  鈹?  鈹溾攢鈹€ service_base.py     # Suspend/resume-aware service helpers
-鈹?  鈹?  鈹溾攢鈹€ system_diagnostics.py   # System diagnostics (250 lines)
-鈹?  鈹?  鈹斺攢鈹€ exceptions.py       # Custom exception classes (68 lines)
-鈹?  鈹?  鈹斺攢鈹€ util.py             # Utility functions (28 lines)
-鈹?  鈹?
-鈹?  鈹溾攢鈹€ motion/                 # Motion control modules
-鈹?  鈹?  鈹溾攢鈹€ __init__.py         # Module exports
-鈹?  鈹?  鈹溾攢鈹€ antenna.py          # Antenna freeze/unfreeze control
-鈹?  鈹?  鈹溾攢鈹€ pose_composer.py    # Pose composition utilities
-鈹?  鈹?  鈹溾攢鈹€ command_runtime.py  # Command queue handling / state transitions
-鈹?  鈹?  鈹溾攢鈹€ control_runtime.py  # Control-loop runtime helpers
-鈹?  鈹?  鈹溾攢鈹€ idle_runtime.py     # Idle behavior / idle rest handling
-鈹?  鈹?  鈹溾攢鈹€ smoothing.py        # Smoothing/transition algorithms
-鈹?  鈹?  鈹溾攢鈹€ state_machine.py    # State machine definitions
-鈹?  鈹?  鈹溾攢鈹€ animation_player.py # Animation player
-鈹?  鈹?  鈹溾攢鈹€ emotion_moves.py    # Emotion moves
-鈹?  鈹?  鈹溾攢鈹€ speech_sway.py      # Speech-driven head micro-movements (338 lines)
-鈹?  鈹?  鈹斺攢鈹€ reachy_motion.py    # Reachy motion API
-鈹?  鈹?
-鈹?  鈹溾攢鈹€ vision/                 # Vision processing modules
-鈹?  鈹?  鈹溾攢鈹€ __init__.py         # Module exports (30 lines)
-鈹?  鈹?  鈹溾攢鈹€ frame_processor.py  # Adaptive frame rate management (227 lines)
-鈹?  鈹?  鈹溾攢鈹€ face_tracking_interpolator.py  # Face lost interpolation (253 lines)
-鈹?  鈹?  鈹溾攢鈹€ gesture_detector.py  # HaGRID gesture detection
-鈹?  鈹?  鈹溾攢鈹€ head_tracker.py     # YOLO face detector
-鈹?  鈹?  鈹溾攢鈹€ camera_runtime.py   # Camera lifecycle helpers
-鈹?  鈹?  鈹溾攢鈹€ camera_processing.py # Frame capture / AI processing helpers
-鈹?  鈹?  鈹溾攢鈹€ camera_http.py      # HTTP handlers for stream/snapshot
-鈹?  鈹?  鈹斺攢鈹€ camera_server.py     # MJPEG camera stream server facade
-鈹?  鈹?
-鈹?  鈹溾攢鈹€ audio/                  # Audio runtime modules
-鈹?  鈹?  鈹溾攢鈹€ __init__.py         # Module exports (21 lines)
-鈹?  鈹?  鈹溾攢鈹€ microphone.py       # Hardware audio helper / legacy tuning code
-鈹?  鈹?  鈹溾攢鈹€ doa_tracker.py      # Direction of Arrival tracking
-鈹?  鈹?  鈹溾攢鈹€ audio_player.py     # AudioPlayer facade
-鈹?  鈹?  鈹溾攢鈹€ audio_player_shared.py # Shared audio/sendspin constants + helpers
-鈹?  鈹?  鈹溾攢鈹€ audio_player_playback.py # Playback orchestration / lifecycle
-鈹?  鈹?  鈹溾攢鈹€ audio_player_local.py # Local file + fallback playback
-鈹?  鈹?  鈹溾攢鈹€ audio_player_stream_pcm.py # PCM streaming playback
-鈹?  鈹?  鈹溾攢鈹€ audio_player_stream_decoded.py # Decoded/GStreamer streaming playback
-鈹?  鈹?  鈹斺攢鈹€ audio_player_sendspin.py # Sendspin runtime integration
-鈹?  鈹?
-鈹?  鈹溾攢鈹€ entities/               # Home Assistant entity modules
-鈹?  鈹?  鈹溾攢鈹€ __init__.py         # Module exports (38 lines)
-鈹?  鈹?  鈹溾攢鈹€ entity.py           # ESPHome base entity (402 lines)
-鈹?  鈹?  鈹溾攢鈹€ entity_factory.py   # Entity factory pattern (440 lines)
-鈹?  鈹?  鈹溾攢鈹€ entity_keys.py      # Entity key constants (155 lines)
-鈹?  鈹?  鈹溾攢鈹€ entity_extensions.py  # Extended entity types (258 lines)
-鈹?  鈹?  鈹溾攢鈹€ entity_registry.py  # ESPHome entity registry
-鈹?  鈹?  鈹溾攢鈹€ runtime_entity_setup.py # Runtime/control entity wiring
-鈹?  鈹?  鈹溾攢鈹€ sensor_entity_setup.py # Sensor/diagnostic entity wiring
-鈹?  鈹?  鈹溾攢鈹€ event_emotion_mapper.py  # HA event to emotion mapping
-鈹?  鈹?  鈹斺攢鈹€ emotion_detector.py # Disabled runtime path for text emotion detection
-鈹?  鈹?
-鈹?  鈹溾攢鈹€ animations/             # Animation definitions
-鈹?  鈹?  鈹斺攢鈹€ conversation_animations.json  # Unified animations / gestures / HA events / keyword resources
-鈹?  鈹?
-鈹?  鈹斺攢鈹€ wakewords/              # Wake word models
-鈹?      鈹溾攢鈹€ okay_nabu.json/.tflite
-鈹?      鈹溾攢鈹€ hey_jarvis.json/.tflite (openWakeWord)
-鈹?      鈹溾攢鈹€ alexa.json/.tflite
-鈹?      鈹溾攢鈹€ hey_luna.json/.tflite
-鈹?      鈹斺攢鈹€ stop.json/.tflite   # Stop word detection
-鈹?
-鈹溾攢鈹€ sounds/                     # Sound effect files (auto-download)
-鈹?  鈹溾攢鈹€ wake_word_triggered.flac
-鈹?  鈹斺攢鈹€ timer_finished.flac
-鈹溾攢鈹€ pyproject.toml              # Project configuration
-鈹溾攢鈹€ README.md                   # Documentation
-鈹溾攢鈹€ changelog.json              # Version changelog
-鈹斺攢鈹€ PROJECT_PLAN.md             # Project plan
-```
-## Dependencies
-```toml
-dependencies = [
-    "reachy-mini>=1.7.0",
-    "soundfile>=0.13.0",
-    "numpy>=2.2.5,<=2.2.5",
-    "opencv-python>=4.12.0.88",
-    "pymicro-wakeword>=2.0.0,<3.0.0",
-    "pyopen-wakeword>=1.0.0,<2.0.0",
-    "aioesphomeapi>=43.10.1",
-    "zeroconf>=0.131,<1",
-    "websockets>=12,<16",
-    "aiohttp",
-    "scipy>=1.15.3,<2.0.0",
-    "ultralytics",
-    "supervision",
-    "aiosendspin>=5.1,<6.0",
-    "onnxruntime>=1.18.0",
-    "torch==2.5.1",
-    "torchvision==0.20.1",
-    "pillow<12.0",
-    "pydantic<=2.12.5",
-    "requests>=2.33.0",
-    "gstreamer-bundle==1.28.1; sys_platform != 'linux'",
-]
-```
-## Usage Flow
-1. **Install App**
-   - Install `reachy_mini_ha_voice` from Reachy Mini App Store
-2. **Start App**
-   - App auto-starts ESPHome server (port 6053)
-   - Auto-downloads required models and sounds
-3. **Connect Home Assistant**
-   - Home Assistant auto-discovers device (mDNS)
-   - Or manually add: Settings 閳?Devices & Services 閳?Add Integration 閳?ESPHome
-4. **Use Voice Assistant**
-   - Say "Okay Nabu" to wake
-   - Speak command
-   - Reachy Mini provides motion feedback
-## ESPHome Entity Planning
-Based on deep analysis of Reachy Mini SDK, the following entities are exposed to Home Assistant:
-### Implemented Entities
-| Entity Type | Name | Description |
-|-------------|------|-------------|
-| Media Player | `media_player` | Audio playback control |
-| Voice Assistant | `voice_assistant` | Voice assistant pipeline |
-### Implemented Control Entities (Read/Write)
-#### Phase 1-3: Basic Controls and Pose
-| ESPHome Entity Type | Name | SDK API | Range/Options | Description |
-|---------------------|------|---------|---------------|-------------|
-| `Number` | `speaker_volume` | `AudioPlayer.set_volume()` | 0-100 | Speaker volume |
-| `Switch` | `idle_behavior_enabled` | `set_idle_behavior_enabled()` | off=parked/on=idle runtime enabled | Unified idle behavior toggle |
-| `Number` | `head_x` | `goto_target(head=...)` | 卤50mm | Head X position control |
-| `Number` | `head_y` | `goto_target(head=...)` | 卤50mm | Head Y position control |
-| `Number` | `head_z` | `goto_target(head=...)` | 卤50mm | Head Z position control |
-| `Number` | `head_roll` | `goto_target(head=...)` | -40掳 ~ +40掳 | Head roll angle control |
-| `Number` | `head_pitch` | `goto_target(head=...)` | -40掳 ~ +40掳 | Head pitch angle control |
-| `Number` | `head_yaw` | `goto_target(head=...)` | -180掳 ~ +180掳 | Head yaw angle control |
-| `Number` | `body_yaw` | `goto_target(body_yaw=...)` | -160掳 ~ +160掳 | Body yaw angle control |
-| `Number` | `antenna_left` | `goto_target(antennas=...)` | -90掳 ~ +90掳 | Left antenna angle control |
-| `Number` | `antenna_right` | `goto_target(antennas=...)` | -90掳 ~ +90掳 | Right antenna angle control |
-#### Phase 4: Gaze Control
-| ESPHome Entity Type | Name | SDK API | Range/Options | Description |
-|---------------------|------|---------|---------------|-------------|
-| `Number` | `look_at_x` | `look_at_world(x, y, z)` | World coordinates | Gaze point X coordinate |
-| `Number` | `look_at_y` | `look_at_world(x, y, z)` | World coordinates | Gaze point Y coordinate |
-| `Number` | `look_at_z` | `look_at_world(x, y, z)` | World coordinates | Gaze point Z coordinate |
-### Implemented Sensor Entities (Read-only)
-#### Phase 1 & 5: Basic Status and Audio Sensors
-| ESPHome Entity Type | Name | SDK API | Description |
-|---------------------|------|---------|-------------|
-| `Text Sensor` | `daemon_state` | `DaemonStatus.state` | Daemon status |
-| `Binary Sensor` | `backend_ready` | `backend_status.ready` | Backend ready status |
-| `Text Sensor` | `error_message` | `DaemonStatus.error` | Current error message |
-| `Sensor` | `doa_angle` | `DoAInfo.angle` | Sound source direction angle (鎺? |
-| `Binary Sensor` | `speech_detected` | `DoAInfo.speech_detected` | Speech detection status |
-#### Phase 6: Diagnostic Information
-| ESPHome Entity Type | Name | SDK API | Description |
-|---------------------|------|---------|-------------|
-| `Sensor` | `control_loop_frequency` | `control_loop_stats` | Control loop frequency (Hz) |
-| `Text Sensor` | `sdk_version` | `DaemonStatus.version` | SDK version |
-| `Text Sensor` | `robot_name` | `DaemonStatus.robot_name` | Robot name |
-| `Binary Sensor` | `wireless_version` | `DaemonStatus.wireless_version` | Wireless version flag |
-| `Binary Sensor` | `simulation_mode` | `DaemonStatus.simulation_enabled` | Simulation mode flag |
-| `Text Sensor` | `wlan_ip` | `DaemonStatus.wlan_ip` | Wireless IP address |
-#### Phase 7: IMU Sensors (Wireless version only)
-| ESPHome Entity Type | Name | SDK API | Description |
-|---------------------|------|---------|-------------|
-| `Sensor` | `imu_accel_x` | `mini.imu["accelerometer"][0]` | X-axis acceleration (m/s铏? |
-| `Sensor` | `imu_accel_y` | `mini.imu["accelerometer"][1]` | Y-axis acceleration (m/s铏? |
-| `Sensor` | `imu_accel_z` | `mini.imu["accelerometer"][2]` | Z-axis acceleration (m/s铏? |
-| `Sensor` | `imu_gyro_x` | `mini.imu["gyroscope"][0]` | X-axis angular velocity (rad/s) |
-| `Sensor` | `imu_gyro_y` | `mini.imu["gyroscope"][1]` | Y-axis angular velocity (rad/s) |
-| `Sensor` | `imu_gyro_z` | `mini.imu["gyroscope"][2]` | Z-axis angular velocity (rad/s) |
-| `Sensor` | `imu_temperature` | `mini.imu["temperature"]` | IMU temperature (鎺矯) |
-#### Current Runtime Control and Sensor Entities
-| Phase | ESPHome Entity Type | Name | Description |
-|------|---------------------|------|-------------|
-| 1 | `Switch` | `mute` | Suspend/resume the voice pipeline |
-| 1 | `Switch` | `camera_disabled` | Disable/enable camera runtime |
-| 1 | `Switch` | `idle_behavior_enabled` | Unified idle motion / antenna / micro-actions toggle |
-| 1 | `Switch` | `sendspin_enabled` | Enable/disable Sendspin playback integration |
-| 1 | `Switch` | `face_tracking_enabled` | Enable/disable face tracking models |
-| 1 | `Switch` | `gesture_detection_enabled` | Enable/disable gesture detection models |
-| 1 | `Number` | `face_confidence_threshold` | Face tracking confidence threshold (0-1) |
-| 2 | `Binary Sensor` | `services_suspended` | Runtime suspension state |
-| 8 | `Select` | `emotion` | Manual emotion trigger |
-| 10 | `Camera` | `camera` | ESPHome camera entity / live preview |
-| 21 | `Switch` | `continuous_conversation` | Multi-turn conversation mode |
-| 22 | `Text Sensor` | `gesture_detected` | Current detected gesture |
-| 22 | `Sensor` | `gesture_confidence` | Current gesture confidence |
-| 23 | `Binary Sensor` | `face_detected` | Face currently visible |
-> **Note**: Head position (x/y/z) and angles (roll/pitch/yaw), body yaw, antenna angles are all **controllable** entities,
-> using `Number` type for bidirectional control. Call `goto_target()` when setting new values, call `get_current_head_pose()` etc. when reading current values.
-### Implementation Priority
-1. **Phase 1 - Basic Status and Volume** (High Priority) 閴?**Completed**
-   - [x] `daemon_state` - Daemon status sensor
-   - [x] `backend_ready` - Backend ready status
-   - [x] `error_message` - Error message
-   - [x] `speaker_volume` - Speaker volume control
-2. **Phase 2 - Runtime State** (High Priority) 鉁?**Completed**
-   - [x] `services_suspended` - Service suspension state sensor
-   - [x] App-managed sleep/wake entities removed from the current runtime
-3. **Phase 3 - Pose Control** (Medium Priority) 閴?**Completed**
-   - [x] `head_x/y/z` - Head position control
-   - [x] `head_roll/pitch/yaw` - Head angle control
-   - [x] `body_yaw` - Body yaw angle control
-   - [x] `antenna_left/right` - Antenna angle control
-4. **Phase 4 - Gaze Control** (Medium Priority) 閴?**Completed**
-   - [x] `look_at_x/y/z` - Gaze point coordinate control
-5. **Phase 5 - DOA (Direction of Arrival)** 閴?**Re-added for wakeup turn-to-sound**
-   - [x] `doa_angle` - Sound source direction (degrees, 0-180鎺? where 0鎺?left, 90鎺?front, 180鎺?right)
-   - [x] `speech_detected` - Speech detection status
-   - [x] Turn-to-sound at wakeup (robot turns toward speaker when wake word detected)
-   - [x] Direction correction: `yaw = 锜?2 - doa` (fixed left/right inversion)
-   - Note: DOA only read once at wakeup to avoid daemon pressure; face tracking takes over after
-6. **Phase 6 - Diagnostic Information** (Low Priority) 閴?**Completed**
-   - [x] `control_loop_frequency` - Control loop frequency
-   - [x] `sdk_version` - SDK version
-   - [x] `robot_name` - Robot name
-   - [x] `wireless_version` - Wireless version flag
-   - [x] `simulation_mode` - Simulation mode flag
-   - [x] `wlan_ip` - Wireless IP address
-7. **Phase 7 - IMU Sensors** (Optional, wireless version only) 閴?**Completed**
-   - [x] `imu_accel_x/y/z` - Accelerometer
-   - [x] `imu_gyro_x/y/z` - Gyroscope
-   - [x] `imu_temperature` - IMU temperature
-8. **Phase 8 - Emotion Control** 閴?**Completed**
-    - [x] `emotion` - Emotion selector (Happy/Sad/Angry/Fear/Surprise/Disgust)
-9. **Phase 10 - Camera Integration** 閴?**Completed**
-    - [x] `camera` - ESPHome Camera entity (live preview)
-10. **Phase 11 - LED Control** 閴?**Disabled (LEDs hidden inside robot)**
-    - [ ] `led_brightness` - LED brightness (0-100%) - Commented out
-    - [ ] `led_effect` - LED effect (off/solid/breathing/rainbow/doa) - Commented out
-    - [ ] `led_color_r/g/b` - LED RGB color (0-255) - Commented out
-11. **Phase 13 - Sendspin Audio Playback Support** 閴?**Completed**
-    - [x] `sendspin_enabled` - Sendspin switch (Switch)
-    - [x] AudioPlayer integrates aiosendspin library
-    - [x] Local music/sendspin path coexists with voice playback and is auto-paused during conversation
-12. **Phase 21 - Continuous Conversation** 閴?**Completed**
-    - [x] `continuous_conversation` - Conversation continuation switch
-13. **Phase 22 - Gesture Detection** 鉁?**Completed (current runtime behavior)**
-    - [x] `gesture_detected` - Detected gesture name (Text Sensor)
-    - [x] `gesture_confidence` - Gesture detection confidence % (Sensor)
-    - [x] HaGRID ONNX models: hand_detector.onnx + crops_classifier.onnx
-    - [x] Real-time state push to Home Assistant
-    - [x] Runtime gesture result publishing only (no gesture-driven robot actions)
-    - [x] Runtime toggle supported (default OFF, model unload on disable)
-    - [x] Batch detection: returns all detected hands (not just highest confidence)
-    - [x] Minimum processing cadence preserved for responsiveness
-    - [x] No conflicts with face tracking (shared frame, independent processing)
-    - [x] SDK integration: MediaBackend detection, proper resource cleanup on shutdown
-    - [x] 18 supported gestures:
-      | Gesture | Emoji | Gesture | Emoji |
-      |---------|-------|---------|-------|
-      | call | 棣冾樉 | like | 棣冩啢 |
-      | dislike | 棣冩啣 | mute | 棣冦亱 |
-      | fist | 閴?| ok | 棣冩啠 |
-      | four | 棣冩瀾閿?| one | 閳芥繐绗?|
-      | palm | 閴?| peace | 閴佸矉绗?|
-      | peace_inverted | 棣冩暰閴佸矉绗?| rock | 棣冾樈 |
-      | stop | 棣冩磧 | stop_inverted | 棣冩暰棣冩磧 |
-      | three | 3閿斿繆鍎?| three2 | 棣冾檮 |
-      | two_up | 閴佸矉绗嶉埥婵撶瑣 | two_up_inverted | 棣冩暰閴佸矉绗嶉埥婵撶瑣 |
-14. **Phase 23 - Face Detection** 閴?**Completed**
-    - [x] `face_detected` - Face visibility sensor
-15. **Phase 24 - System Diagnostics** 閴?**Completed**
-    - [x] `sys_cpu_percent` - CPU usage percentage (Sensor, diagnostic)
-    - [x] `sys_cpu_temperature` - CPU temperature in Celsius (Sensor, diagnostic)
-    - [x] `sys_memory_percent` - Memory usage percentage (Sensor, diagnostic)
-    - [x] `sys_memory_used` - Used memory in GB (Sensor, diagnostic)
-    - [x] `sys_disk_percent` - Disk usage percentage (Sensor, diagnostic)
-    - [x] `sys_disk_free` - Free disk space in GB (Sensor, diagnostic)
-    - [x] `sys_uptime` - System uptime in hours (Sensor, diagnostic)
-    - [x] `sys_process_cpu` - This process CPU usage (Sensor, diagnostic)
-    - [x] `sys_process_memory` - This process memory in MB (Sensor, diagnostic)
----
-## 棣冨竴 Current Runtime Entity Coverage
-**Total Completed: See runtime registry (count evolves with releases)**
-- Phase 1: 10 entities (status, zero-config runtime switches, volume)
-- Phase 2: runtime state entities only (`services_suspended`; sleep entities removed)
-- Phase 3: 9 entities (Pose control)
-- Phase 4: 3 entities (Gaze control)
-- Phase 5: 3 entities (DOA sensors and tracking switch)
-- Phase 6: 7 entities (Diagnostic information)
-- Phase 7: 7 entities (IMU sensors)
-- Phase 8: 1 entity (Emotion control)
-- Phase 10: 1 entity (Camera)
-- Phase 11: 0 entities (LED control - Disabled)
-- Phase 13: 1 entity (Sendspin toggle)
-- Phase 21: 1 entity (Continuous conversation)
-- Phase 22: 2 entities (Gesture detection)
-- Phase 23: 1 entity (Face detection)
-- Phase 24: 9 entities (System diagnostics)
----
-## 棣冩畬 Voice Assistant Enhancement Features Implementation Status
-### Phase 14 - Emotion and Motion Feedback 閴?
-**Current Status**: Manual emotion playback and non-blocking motion feedback are implemented. Automatic keyword-based emotion triggering is currently disabled in the runtime.
-**Implemented Features**:
-- 閴?Phase 8 Emotion Selector entity (`emotion`)
-- 閴?`_play_emotion()` queues emotion moves through `MovementManager`
-- 閴?Wake/listen/think/speak/idle motion transitions are non-blocking
-- 閴?Timer-finished motion feedback is implemented
-- 閴?Gesture detection publishes recognized gesture label and confidence to Home Assistant entities
-- 閴?Voice phases and HA state reactions share one built-in behavior dispatcher
-**Current Behavior**:
-| Voice Assistant Event | Actual Action | Implementation Status |
-|----------------------|---------------|----------------------|
-| Wake word detected | Turn toward sound source + listening pose | 閴?Implemented |
-| Listening | Attentive listening state | 閴?Implemented |
-| Thinking | Thinking state animation | 閴?Implemented |
-| Speaking | Speech-reactive motion | 閴?Implemented |
-| Timer completed | Alert shake motion | 閴?Implemented |
-| Manual emotion trigger | Play via ESPHome `emotion` entity | 閴?Implemented |
-**Deliberately Not Active In Runtime**:
-- Automatic emotion keyword detection from assistant text
-- Blocking full-action choreography during conversation
-- Dance/personalization layers that require user configuration
-**Manual Emotion Trigger Example**:
-```yaml
-# Home Assistant automation example - Manual emotion trigger
-automation:
-  - alias: "Reachy Good Morning Greeting"
-    trigger:
-      - platform: time
-        at: "07:00:00"
-    action:
-      - service: select.select_option
-        target:
-          entity_id: select.reachy_mini_emotion
-        data:
-          option: "Happy"
-```
-### Phase 15 - Face Tracking (Complements DOA Turn-to-Sound) 閴?**Completed**
-**Goal**: Implement natural face tracking so robot looks at speaker during conversation.
-**Design Decision**:
-- 閴?DOA (Direction of Arrival): Used once at wakeup to turn toward sound source
-- 閴?YOLO face detection: Takes over after initial turn for continuous tracking
-- 閴?Body follows head rotation: Body yaw automatically syncs with head yaw for natural tracking
-- Reason: DOA provides quick initial orientation, face tracking provides accurate continuous tracking, body following enables natural whole-body tracking similar to human behavior
-**Wakeup Turn-to-Sound Flow**:
-1. Wake word detected 閳?Read DOA angle once (avoid daemon pressure)
-2. If DOA angle > 10鎺? Turn head toward sound source (80% of angle, conservative)
-3. Face tracking takes over for continuous tracking during conversation
-**Implemented Features**:
-| Feature | Description | Implementation Location | Status |
-|---------|-------------|------------------------|--------|
-| DOA turn-to-sound | Turn toward speaker at wakeup | `protocol/satellite.py:_turn_to_sound_source()` | 閴?Implemented |
-| YOLO face detection | Uses `AdamCodd/YOLOv11n-face-detection` model | `vision/head_tracker.py` | 閴?Implemented |
-| Adaptive frame rate tracking | 15fps during conversation, 2fps when idle without face | `camera_server.py` | 閴?Implemented |
-| look_at_image() | Calculate target pose from face position | `camera_server.py` | 閴?Implemented |
-| Smooth return to neutral | Smooth return within 1 second after face lost | `camera_server.py` | 閴?Implemented |
-| face_tracking_offsets | As secondary pose overlay to motion control | `movement_manager.py` | 閴?Implemented |
-| Body follows head rotation | Body yaw syncs with head yaw extracted from final pose matrix | `motion/movement_manager.py:_compose_final_pose()` | 閴?Implemented (v0.8.3) |
-| DOA entities | `doa_angle` and `speech_detected` exposed to Home Assistant | `entity_registry.py` | 閴?Implemented |
-| face_detected entity | Binary sensor for face detection state | `entity_registry.py` | 閴?Implemented |
-| Model download retry | 3 retries, 5 second interval | `head_tracker.py` | 閴?Implemented |
-| Conversation mode integration | Auto-switch tracking frequency on voice assistant state change | `satellite.py` | 閴?Implemented |
-**Resource Optimization (v0.5.1, updated v0.6.2)**:
-- During conversation (listening/thinking/speaking): High-frequency tracking 15fps
-- Idle with face detected: High-frequency tracking 15fps
-- Idle without face for 5s: Low-power mode 2fps
-- Idle without face for 30s: Ultra-low power mode 0.5fps (every 2 seconds)
-- Gesture detection is switch-controlled and can run independently of face tracking
-- Immediately restore high-frequency tracking when face detected
-**Code Locations**:
-- `protocol/satellite.py:_turn_to_sound_source()` - DOA turn-to-sound at wakeup
-- `vision/head_tracker.py` - YOLO face detector (`HeadTracker` class)
-- `vision/camera_server.py:_capture_frames()` - Adaptive frame rate face tracking
-- `vision/camera_server.py:set_conversation_mode()` - Conversation mode switch API
-- `protocol/satellite.py:_set_conversation_mode()` - Voice assistant state integration
-- `motion/movement_manager.py:set_face_tracking_offsets()` - Face tracking offset API
-- `motion/movement_manager.py:_compose_final_pose()` - Body yaw follows head yaw (v0.8.3)
-**Technical Details**:
-```python
-# vision/camera_server.py - Adaptive frame rate face tracking
-class MJPEGCameraServer:
-    def __init__(self):
-        self._fps_high = 15  # During conversation/face detected
-        self._fps_low = 2    # Idle without face (5-30s)
-        self._fps_idle = 0.5 # Ultra-low power (>30s without face)
-        self._low_power_threshold = 5.0   # 5s without face switches to low power
-        self._idle_threshold = 30.0       # 30s without face switches to idle mode
-    def _should_run_ai_inference(self, current_time):
-        # Conversation mode: Always high-frequency tracking
-        if self._in_conversation:
-            return True
-        # High-frequency mode: Track every frame
-        if self._current_fps == self._fps_high:
-            return True
-        # Low/idle power mode: Periodic detection
-        return time.since_last_check >= 1/self._current_fps
-# protocol/satellite.py - Voice assistant state integration
-def _reachy_on_listening(self):
-    self._set_conversation_mode(True)  # Start conversation, high-frequency tracking
-def _reachy_on_idle(self):
-    self._set_conversation_mode(False)  # End conversation, adaptive tracking
-# motion/movement_manager.py - Body follows head rotation (v0.8.3)
-# This enables natural body rotation when tracking faces, similar to how
-# the reference project's sweep_look tool synchronizes body_yaw with head_yaw.
-def _compose_final_pose(self) -> Tuple[np.ndarray, Tuple[float, float], float]:
-    # ... compose head pose from all motion sources ...
-    # Extract yaw from final head pose rotation matrix
-    # The rotation matrix uses xyz euler convention
-    final_rotation = R.from_matrix(final_head[:3, :3])
-    _, _, final_head_yaw = final_rotation.as_euler('xyz')
-    # Body follows head yaw directly
-    # SDK's automatic_body_yaw (inverse_kinematics_safe) only handles collision
-    # prevention by clamping relative angle to max 65鎺? not active following
-    body_yaw = final_head_yaw
-    return final_head, (antenna_right, antenna_left), body_yaw
-```
-**Body Following Head Rotation (v0.8.3)**:
-- SDK's `automatic_body_yaw` is only **collision protection**, not active body following
-- The `inverse_kinematics_safe` function with `max_relative_yaw=65鎺砢 only prevents head-body collision
-- To enable natural body following, `body_yaw` must be explicitly set to match `head_yaw`
-- Body yaw is extracted from final head pose matrix using scipy's `R.from_matrix().as_euler('xyz')`
-- This matches the reference project's `sweep_look.py` behavior where `target_body_yaw = head_yaw`
-### Phase 16 - Cartoon Style Motion Mode (Partial) 棣冪厸
-**Goal**: Use SDK interpolation techniques for more expressive robot movements.
-**SDK Support**: `InterpolationTechnique` enum
-- `LINEAR` - Linear, mechanical feel
-- `MIN_JERK` - Minimum jerk, natural and smooth (default)
-- `EASE_IN_OUT` - Ease in-out, elegant
-- `CARTOON` - Cartoon style, with bounce effect, lively and cute
-**Implemented Features**:
-- 閴?50Hz unified control loop (`motion/movement_manager.py`) - Current stable frequency
-- 閴?JSON-driven animation system (`AnimationPlayer`) - Inspired by SimpleDances project
-- 閴?Conversation state animations (idle/listening/thinking/speaking)
-- 閴?Pose change detection - Only send commands on significant changes (threshold 0.005)
-- 閴?State query caching - 2s TTL, reduces daemon load
-- 閴?Smooth interpolation (ease in-out curve)
-- 閴?Command queue mode - Thread-safe external API
-- 閴?Error throttling - Prevents log explosion
-- 閴?Connection health monitoring - Auto-detect and recover from connection loss
-**Animation System (v0.5.13)**:
-- `AnimationPlayer` class loads animations from `conversation_animations.json`
-- Each animation defines: pitch/yaw/roll amplitudes, position offsets, antenna movements, frequency
-- Smooth transitions between animations (configurable duration)
-- State-to-animation mapping: idle閳姕dle, listening閳姡istening, thinking閳姲hinking, speaking閳姱peaking
-**Not Implemented**:
-- 閴?Dynamic interpolation technique switching (CARTOON/EASE_IN_OUT etc.)
-- 閴?Exaggerated cartoon bounce effects
-**Code Locations**:
-- `motion/animation_player.py` - AnimationPlayer class
-- `animations/conversation_animations.json` - Animation definitions
-- `motion/movement_manager.py` - 50Hz control loop with animation integration
-**Scene Implementation Status**:
-| Scene | Recommended Interpolation | Effect | Status |
-|-------|--------------------------|--------|--------|
-| Wake nod | `CARTOON` | Lively bounce effect | 閴?Not implemented |
-| Thinking head up | `EASE_IN_OUT` | Elegant transition | 閴?Implemented (smooth interpolation) |
-| Speaking micro-movements | `MIN_JERK` | Natural and fluid | 閴?Implemented (SpeechSway) |
-| Error head shake | `CARTOON` | Exaggerated denial | 閴?Not implemented |
-| Return to neutral | `MIN_JERK` | Smooth return | 閴?Implemented |
-| Idle breathing | - | Subtle sense of life | 閴?Implemented (BreathingAnimation) |
-### Phase 17 - Antenna Sync Animation During Speech (Completed) 閴?
-**Goal**: Antennas sway with audio rhythm during TTS playback, simulating "speaking" effect.
-**Implemented Features**:
-- 閴?JSON-driven animation system with antenna movements
-- 閴?Different antenna patterns: "both" (sync), "wiggle" (opposite phase)
-- 閴?State-specific antenna animations (listening/thinking/speaking)
-- 閴?Smooth transitions between animation states
-- 閴?v1.0.0 idle refinement: idle antenna sway disabled while conversation-state antenna behaviors are retained
-- 閴?v1.0.0 hardware refinement: antenna torque disabled in `IDLE` to reduce idle chatter/noise
-**Code Locations**:
-- `motion/animation_player.py` - AnimationPlayer with antenna offset calculation
-- `animations/conversation_animations.json` - Antenna amplitude and pattern definitions
-- `motion/movement_manager.py` - Antenna offset composition in final pose
-### Phase 18 - Visual Gaze Interaction (Single-face only) 閴?
-**Goal**: Use camera to detect faces for eye contact.
-**SDK Support**:
-- `look_at_image(u, v)` - Look at point in image
-- `look_at_world(x, y, z)` - Look at world coordinate point
-- `media.get_frame()` - Get camera frame (閴?Already implemented in `vision/camera_server.py:146`)
-**Current Status**:
-| Feature | Description | Status |
-|---------|-------------|--------|
-| Face detection | YOLO-based face detection (`AdamCodd/YOLOv11n-face-detection`) | 閴?Implemented |
-| Eye tracking | Robot tracks detected face during conversation/active mode | 閴?Implemented |
-| Idle scanning | Random look-around in idle cycles (switch-controlled) | 閴?Implemented |
-> Scope note: Current implementation is intentionally single-face tracking for stability and device performance.
-### Phase 19 - Gravity Compensation Interactive Mode (Historical / Not Current Target)
-This was an exploration direction for manual teaching workflows.
-**Current Runtime Position**:
-- The zero-config runtime does not depend on a teaching flow
-- No user-facing teaching interaction is exposed as a core feature
-- If gravity-compensation support is revisited, it should remain optional and not become a required setup path
-### Phase 20 - Environment Awareness Response (Partial) 棣冪厸
-**Goal**: Use IMU sensors to sense environment changes and respond.
-**SDK Support**:
-- 閴?`mini.imu["accelerometer"]` - Accelerometer (Phase 7 implemented as entity)
-- 閴?`mini.imu["gyroscope"]` - Gyroscope (Phase 7 implemented as entity)
-**Implemented Features**:
-| Feature | Description | Status |
-|---------|-------------|--------|
-| Continuous conversation | Controlled via Home Assistant switch | 閴?Implemented |
-| IMU sensor entities | Accelerometer and gyroscope exposed to HA | 閴?Implemented |
-> **Note**: Tap-to-wake feature was removed in v0.5.16 due to false triggers from robot movement. Continuous conversation is now controlled via Home Assistant switch.
-**Not Implemented**:
-| Detection Event | Response Action | Status |
-|-----------------|-----------------|--------|
-| Being shaken | Play dizzy action + voice "Don't shake me~" | 閴?Not implemented |
-| Tilted/fallen | Play help action + voice "I fell, help me" | 閴?Not implemented |
-| Long idle | Enter sleep animation | 閴?Not implemented |
-### Phase 21 - Home Assistant Orchestration Scope
-The current runtime already exposes the main zero-config controls needed by Home Assistant:
-- `services_suspended`
-- `idle_behavior_enabled`
-- `continuous_conversation`
-- `emotion`
-- gesture / face / diagnostic sensors
-More elaborate scene orchestration remains intentionally outside the core runtime scope unless it can be delivered without introducing user configuration burden.
----
-## 棣冩惓 Feature Implementation Summary
-### 閴?Completed Features
-#### Core Voice Assistant (Phase 1-12)
-- **ESPHome entities** - Core phases implemented (Phase 11 LED intentionally disabled); exact count evolves by release
-- **Basic voice interaction** - Wake word detection (microWakeWord/openWakeWord), STT/TTS integration
-- **Motion feedback** - Nod, shake, gaze and other basic actions
-- **Audio path** - local wake word / stop word detection plus HA-managed STT/TTS
-- **Camera stream** - MJPEG live preview with ESPHome Camera entity
-#### Extended Features (Phase 13-22)
-- **Phase 13** 閴?- Sendspin multi-room audio support
-- **Phase 14** 閴?- Manual emotion playback + non-blocking motion feedback
-- **Phase 15** 閴?- Face tracking with body following (DOA + YOLO + body_yaw sync)
-- **Phase 16** 閴?- JSON-driven animation system (50Hz control loop)
-- **Phase 17** 閴?- Antenna sync animation during speech
-- **Phase 22** 閴?- Gesture detection (HaGRID ONNX, 18 gestures)
-### 棣冪厸 Partially Implemented Features
-- **Phase 20** - IMU sensor entities are exposed; higher-level trigger logic is intentionally minimal
-### 閴?Not Implemented Features
-- Zero-config scene orchestration beyond the provided runtime switches and blueprint defaults
----
-## Feature Priority Summary (Updated v1.0.6)
-### Completed 鉁?
-- 鉁?**Phase 1-12**: Core ESPHome entities and voice assistant
-- 鉁?**Phase 13**: Sendspin audio playback
-- 鉁?**Phase 14**: Emotion playback and motion feedback
-- 鉁?**Phase 15**: Face tracking with body following
-- 鉁?**Phase 16**: JSON-driven animation system
-- 鉁?**Phase 17**: Antenna sync animation + v1.0.0 idle antenna behavior refinements
-- 鉁?**Phase 21**: Continuous conversation switch
-- 鉁?**Phase 22**: Gesture detection
-- 鉁?**Phase 23**: Face detection sensor
-- 鉁?**Phase 24**: System diagnostics entities
-### Partial 棣冪厸
-- 棣冪厸 **Phase 20**: Environment awareness (IMU entities done, triggers pending)
-### Not Implemented 閴?- 閴?Zero-config scene orchestration layer beyond current runtime behavior
----
-## 棣冩惐 Completion Statistics
-| Phase | Status | Completion | Notes |
-|-------|--------|------------|-------|
-| Phase 1-12 | 閴?Complete | 100% | Core ESPHome entities implemented (Phase 11 LED intentionally disabled) |
-| Phase 13 | 閴?Complete | 100% | Sendspin audio playback support |
-| Phase 14 | 閴?Complete | 100% | Manual emotion playback and non-blocking motion feedback |
-| Phase 15 | 閴?Complete | 100% | Face tracking with DOA, YOLO detection, body follows head |
-| Phase 16 | 閴?Complete | 100% | JSON-driven animation system (50Hz control loop) |
-| Phase 17 | 閴?Complete | 100% | Antenna sync animation during speech |
-| Phase 18 | 閴?Complete | 100% | Single-face visual gaze interaction with idle scanning |
-| Phase 19 | Not a current runtime target | - | Historical planning item, not part of the zero-config runtime model |
-| Phase 20 | 馃煛 Partial | 30% | IMU sensors exposed, missing trigger logic |
-| Phase 21 | 鉁?Complete | 100% | Continuous conversation switch implemented |
-| Phase 22 | 鉁?Complete | 100% | Gesture detection with HaGRID ONNX models |
-| Phase 23 | 鉁?Complete | 100% | Face detection sensor exposed |
-| Phase 24 | 鉁?Complete | 100% | System diagnostics entities (9 sensors) |
-| **v0.9.5** | 鉁?Complete | 100% | Modular architecture refactoring |
-| **v1.0.0** | 鉁?Complete | 100% | Runtime toggles/persistence (Sendspin, face, gesture, confidence) + idle and gesture stability updates |
-**Overall Completion**: current zero-config runtime path is functionally complete; remaining gaps are optional orchestration ideas rather than missing core runtime features.
----
-## 棣冩暋 Daemon Crash Fix (2025-01-05)
-### Problem Description
-During long-term operation, `reachy_mini daemon` would crash, causing robot to become unresponsive.
-### Root Cause
-1. **50Hz control loop** - Current stable frequency for motion control
-2. **Frequent state queries** - Every entity state read calls `get_status()`, `get_current_head_pose()` etc.
-3. **Missing change detection** - Even when pose hasn't changed, continues sending same commands
-4. **Zenoh message queue blocking** - Accumulated 150+ messages per second, daemon cannot process in time
-### Fix Solution
-#### 1. Control loop frequency (motion/movement_manager.py)
-```python
-# Evolution: 100Hz -> 20Hz -> 10Hz -> 50Hz (current)
-# Current stable frequency for production use
-CONTROL_LOOP_FREQUENCY_HZ = 50  # Current stable frequency
-```
-#### 2. Add pose change detection (movement_manager.py)
-```python
-# Only send commands on significant pose changes
-if self._last_sent_pose is not None:
-    max_diff = max(abs(pose[k] - self._last_sent_pose.get(k, 0.0)) for k in pose.keys())
-    if max_diff < 0.001:  # Threshold: 0.001 rad or 0.001 m
-        return  # Skip sending
-```
-#### 3. State query caching (reachy_controller.py)
-```python
-# Cache daemon status query results
-self._cache_ttl = 0.1  # 100ms TTL
-self._last_status_query = 0.0
-def _get_cached_status(self):
-    now = time.time()
-    if now - self._last_status_query < self._cache_ttl:
-        return self._state_cache.get('status')  # Use cache
-    # ... query and update cache
-```
-#### 4. Head pose query caching (reachy_controller.py)
-```python
-# Cache get_current_head_pose() and get_current_joint_positions() results
-def _get_cached_head_pose(self):
-    # Reuse cached results within 100ms
-```
-### Fix Results
-| Metric | Before Fix | After Fix | Improvement |
-|--------|------------|-----------|-------------|
-| Control message frequency | ~100 msg/s | ~20 msg/s | 閳?80% |
-| State query frequency | ~50 msg/s | ~5 msg/s | 閳?90% |
-| Total Zenoh messages | ~150 msg/s | ~25 msg/s | 閳?83% |
-| Daemon CPU load | Sustained high load | Normal load | Significantly reduced |
-| Expected stability | Crash within hours | Stable for days | Major improvement |
-### Related Files
-- `DAEMON_CRASH_FIX_PLAN.md` - Detailed fix plan and test plan
-- `movement_manager.py` - Control loop optimization
-- `reachy_controller.py` - State query caching
-### Future Optimization Suggestions
-1. 鈴?Dynamic frequency adjustment - 50Hz during motion, 5Hz when idle
-2. 鈴?Batch state queries - Get all states at once
-3. 鈴?Further runtime efficiency tuning after real usage profiling
----
-## 棣冩暋 Daemon Crash Deep Fix (2026-01-07)
-> **Update (2026-01-30)**: Current implementation uses 50Hz control loop for stability and performance. The control loop frequency aligns with daemon backend processing capacity. The pose change threshold (0.005) and state cache TTL (2s) optimizations remain in place to reduce unnecessary Zenoh messages.
-### Problem Description
-During long-term operation, `reachy_mini daemon` still crashes, previous fix not thorough enough.
-### Root Cause Analysis
-Through deep analysis of SDK source code:
-1. **Each `set_target()` sends 3 Zenoh messages**
-   - `set_target_head_pose()` - 1 message
-   - `set_target_antenna_joint_positions()` - 1 message
-   - `set_target_body_yaw()` - 1 message
-2. **Daemon control loop is 50Hz**
-   - See `reachy_mini/daemon/backend/robot/backend.py`: `control_loop_frequency = 50.0`
-   - If message send frequency exceeds 50Hz, daemon may not process in time
-3. **Previous 20Hz control loop still too high**
-   - 20Hz 鑴?3 messages = 60 messages/second
-   - Already exceeds daemon's 50Hz processing capacity
-4. **Pose change threshold too small (0.002)**
-   - Breathing animation, speech sway, face tracking continuously produce tiny changes
-   - Almost every loop triggers `set_target()`
-### Fix Solution
-#### 1. Control loop frequency history (motion/movement_manager.py)
-```python
-# Evolution: 100Hz -> 20Hz -> 10Hz -> 50Hz (current)
-# Current stable frequency for production use
-CONTROL_LOOP_FREQUENCY_HZ = 50  # Current (2026-01-30)
-```
-#### 2. Increase pose change threshold (movement_manager.py)
-```python
-# Increased from 0.002 to 0.005
-# 0.005 rad 閳?0.29 degrees, still smooth enough
-self._pose_change_threshold = 0.005
-```
-#### 3. Reduce camera/face tracking frequency (camera_server.py)
-```python
-# Reduced from 15fps to 10fps
-fps: int = 10
-```
-#### 4. Increase state cache TTL (reachy_controller.py)
-```python
-# Increased from 1 second to 2 seconds
-self._cache_ttl = 2.0
-```
-### Fix Results
-> **Note**: Current implementation uses 50Hz control loop as of 2026-01-30. The table below shows historical evolution.
-| Metric | Before (20Hz) | After (10Hz) | Current (50Hz) |
-|--------|---------------|--------------|-----------------|
-| Control loop frequency | 20 Hz | 10 Hz | 50 Hz (current) |
-| Max Zenoh messages | 60 msg/s | 30 msg/s | ~50 msg/s (optimized) |
-| Actual messages (with change detection) | ~40 msg/s | ~15 msg/s | ~30 msg/s |
-| Face tracking frequency | 15 Hz | 10 Hz | Adaptive (2-15 Hz) |
-| State cache TTL | 1 second | 2 seconds | 2 seconds |
-| Expected stability | Crash within hours | Stable operation | Stable (daemon updated) |
-### Key Finding
-Current implementation uses 50Hz control loop for stability and performance. The control loop frequency aligns with daemon backend processing capacity.
-### Related Files
-- `motion/movement_manager.py` - Control loop frequency and pose threshold
-- `vision/camera_server.py` - Face tracking frequency
-- `reachy_controller.py` - State cache TTL
----
-## 棣冩暋 Microphone Sensitivity Optimization (2026-01-07)
-> Historical background only. These notes describe earlier low-level microphone tuning experiments and should not be read as current Home Assistant entity capabilities.
-### Problem
-Low microphone sensitivity - Need to be very close for voice recognition.
-### Solution
-Comprehensive ReSpeaker XVF3800 microphone optimization:
-| Parameter | Default | Optimized | Notes |
-|-----------|---------|-----------|-------|
-| AGC | Off | On | Auto volume normalization |
-| AGC max gain | ~15dB | 30dB | Better distant speech pickup |
-| AGC target level | -25dB | -18dB | Stronger output signal |
-| Microphone gain | 1.0x | 2.0x | Base gain doubled |
-| Noise suppression | ~0.5 | 0.15 | Reduced speech mis-suppression |
-### Result
-Microphone sensitivity improved from ~30cm to ~2-3m effective range.
----
-## 棣冩暋 v0.5.1 Bug Fixes (2026-01-08)
-### Issue 1: Music Not Resuming After Voice Conversation
-**Fix**: Sendspin now connects to `music_player` instead of `tts_player`
-### Issue 2: Audio Conflict During Voice Assistant Wakeup
-**Fix**: Added `pause_sendspin()` and `resume_sendspin()` methods to `audio/audio_player.py`
-### Issue 3: Sendspin Sample Rate Optimization
-**Fix**: Prioritize 16kHz in Sendspin supported formats (hardware limitation)
----
-## 棣冩暋 v0.5.15 Updates (2026-01-11)
-### Feature 1: Audio Settings Persistence
-Historical note: older audio processing preferences were once persisted here. The current app no longer exposes AGC or noise suppression entities.
-### Feature 2: Sendspin Discovery Refactoring
-Moved mDNS discovery to `zeroconf.py` for better separation of concerns.
----
-### SDK Data Structure Reference
-```python
-# Motor control mode
-class MotorControlMode(str, Enum):
-    Enabled = "enabled"              # Torque on, position control
-    Disabled = "disabled"            # Torque off
-    GravityCompensation = "gravity_compensation"  # Gravity compensation mode
-# Daemon state
-class DaemonState(Enum):
-    NOT_INITIALIZED = "not_initialized"
-    STARTING = "starting"
-    RUNNING = "running"
-    STOPPING = "stopping"
-    STOPPED = "stopped"
-    ERROR = "error"
-# Full state
-class FullState:
-    control_mode: MotorControlMode
-    head_pose: XYZRPYPose  # x, y, z (m), roll, pitch, yaw (rad)
-    head_joints: list[float]  # 7 joint angles
-    body_yaw: float
-    antennas_position: list[float]  # [right, left]
-    doa: DoAInfo  # angle (rad), speech_detected (bool)
-# IMU data (wireless version only)
-imu_data = {
-    "accelerometer": [x, y, z],  # m/s铏?
-    "gyroscope": [x, y, z],      # rad/s
-    "quaternion": [w, x, y, z],  # Attitude quaternion
-    "temperature": float         # 鎺矯
-}
-# Safety limits
-HEAD_PITCH_ROLL_LIMIT = [-40鎺? +40鎺砞
-HEAD_YAW_LIMIT = [-180鎺? +180鎺砞
-BODY_YAW_LIMIT = [-160鎺? +160鎺砞
-YAW_DELTA_MAX = 65鎺? # Max difference between head and body yaw
-```
-### ESPHome Protocol Implementation Notes
-ESPHome protocol communicates with Home Assistant via protobuf messages. The runtime primarily uses switch/number/select/sensor/binary_sensor/text_sensor/camera entities; button-only wake/sleep flows are historical and no longer the main control model.
-```python
-from aioesphomeapi.api_pb2 import (
-    # Number entity (volume/angle/confidence control)
-    ListEntitiesNumberResponse,
-    NumberStateResponse,
-    NumberCommandRequest,
-    # Select entity (emotion)
-    ListEntitiesSelectResponse,
-    SelectStateResponse,
-    SelectCommandRequest,
-    # Switch entity (sleep/runtime toggles)
-    ListEntitiesSwitchResponse,
-    SwitchStateResponse,
-    SwitchCommandRequest,
-    # Sensor entity (numeric sensors)
-    ListEntitiesSensorResponse,
-    SensorStateResponse,
-    # Binary Sensor entity (boolean sensors)
-    ListEntitiesBinarySensorResponse,
-    BinarySensorStateResponse,
-    # Text Sensor entity (text sensors)
-    ListEntitiesTextSensorResponse,
-    TextSensorStateResponse,
-)
-```
-## Reference Projects
-- [OHF-Voice/linux-voice-assistant](https://github.com/OHF-Voice/linux-voice-assistant)
-- [pollen-robotics/reachy_mini](https://github.com/pollen-robotics/reachy_mini)
-- [reachy_mini_conversation_app](https://github.com/pollen-robotics/reachy_mini_conversation_app)
-- [sendspin-cli](https://github.com/Sendspin/sendspin-cli)
-- [home-assistant-voice](https://github.com/esphome/home-assistant-voice-pe/blob/dev/home-assistant-voice.yaml)
----
-## 棣冩暋 Code Refactoring & Improvement Plan (v0.9.5)
-> Comprehensive improvement plan based on code analysis
-> Target Platform: Raspberry Pi CM4 (4GB RAM, 4-core CPU)
-### Code Size Statistics (Updated 2026-01-19)
-| File | Original | Current | Status |
-|------|----------|---------|--------|
-| `movement_manager.py` | 1205 | 1260 | 閳跨媴绗?Modularized but still large |
-| `voice_assistant.py` | 1097 | 1270 | 閴?Enhanced with new features |
-| `satellite.py` | 1003 | 1022 | 閴?Optimized (-2%) |
-| `camera_server.py` | 1070 | 1009 | 閴?Optimized (-6%) |
-| `reachy_controller.py` | 878 | 961 | 閴?Enhanced |
-| `entity_registry.py` | 1129 | 844 | 閴?Optimized (-25%) |
-| `audio_player.py` | 599 | 679 | 閴?Acceptable |
-| `core/service_base.py` | - | 552 | 棣冨晭 New module |
-| `entities/entity_factory.py` | - | 440 | 棣冨晭 New module |
-> **Optimization Notes**:
-> - `entity_registry.py`: Factory pattern refactoring reduced 285 lines
-> - `camera_server.py`: Using `FaceTrackingInterpolator` module reduced 61 lines
-> - `protocol/satellite.py`: Runtime paths are now centered on voice state handling and HA event reactions
-> - New modular architecture with 6 sub-packages: `core/`, `motion/`, `vision/`, `audio/`, `entities/`, `protocol/`
-### New Module List (Updated 2026-01-19)
-| Directory | Module | Lines | Description |
-|-----------|--------|-------|-------------|
-| `core/` | `config.py` | 454 | Centralized nested configuration |
-| `core/` | `service_base.py` | 552 | Suspend/resume service helpers + RobustOperationMixin |
-| `core/` | `system_diagnostics.py` | 250 | System diagnostics |
-| `core/` | `exceptions.py` | 68 | Custom exception classes |
-| `core/` | `util.py` | 28 | Utility functions |
-| `motion/` | `antenna.py` | - | Antenna freeze/unfreeze control |
-| `motion/` | `pose_composer.py` | - | Pose composition utilities |
-| `motion/` | `command_runtime.py` | - | Command queue handling / state transitions |
-| `motion/` | `control_runtime.py` | - | Control-loop runtime helpers |
-| `motion/` | `idle_runtime.py` | - | Idle behavior / idle rest handling |
-| `motion/` | `state_machine.py` | - | State machine definitions |
-| `motion/` | `smoothing.py` | - | Smoothing/transition algorithms |
-| `motion/` | `animation_player.py` | - | Animation player |
-| `motion/` | `emotion_moves.py` | - | Emotion moves |
-| `motion/` | `speech_sway.py` | 338 | Speech-driven head micro-movements |
-| `motion/` | `reachy_motion.py` | - | Reachy motion API |
-| `vision/` | `frame_processor.py` | 227 | Adaptive frame rate management |
-| `vision/` | `face_tracking_interpolator.py` | 253 | Face lost interpolation |
-| `vision/` | `gesture_smoother.py` | 80 | Historical gesture smoothing module; current runtime no longer depends on it |
-| `vision/` | `gesture_detector.py` | 285 | HaGRID gesture detection |
-| `vision/` | `head_tracker.py` | 367 | YOLO face detector |
-| `vision/` | `camera_server.py` | 1009 | MJPEG camera stream server facade |
-| `audio/` | `doa_tracker.py` | 206 | Direction of Arrival tracking |
-| `audio/` | `microphone.py` | 219 | Hardware audio helper / legacy tuning code |
-| `audio/` | `audio_player.py` | facade | AudioPlayer facade (split into playback/sendspin/local streaming modules) |
-| `entities/` | `entity.py` | 402 | ESPHome base entity |
-| `entities/` | `entity_factory.py` | 440 | Entity factory pattern |
-| `entities/` | `entity_keys.py` | 155 | Entity key constants |
-| `entities/` | `entity_extensions.py` | 258 | Extended entity types |
-| `entities/` | `event_emotion_mapper.py` | 351 | HA event to emotion mapping |
-| `protocol/` | `satellite.py` | 1022 | ESPHome protocol handler |
-| `protocol/` | `api_server.py` | 172 | HTTP API server |
-| `protocol/` | `zeroconf.py` | - | mDNS discovery |
-### Improvement Plan Status
-#### Phase 1: Runtime Suspend/Resume Foundation 鉁?Complete
-- [x] Create `core/service_base.py` - runtime suspend/resume service helpers
-- [x] All required services implement `suspend()` / `resume()` methods where needed
-- [x] Historical app-managed sleep/wake flow was later removed to align with the current SDK
-#### Phase 2: Code Modularization 閴?Complete
-- [x] Create new directory structure (`core/`, `motion/`, `audio/`, `vision/`, `entities/`)
-- [x] Extract from `movement_manager.py` 閳?`motion/antenna.py`, `motion/pose_composer.py`
-- [x] Extract from `camera_server.py` 閳?`vision/frame_processor.py`, `vision/face_tracking_interpolator.py`
-- [x] Extract from `entity_registry.py` 閳?`entities/entity_factory.py`, `entities/entity_keys.py`
-- [x] Create `core/config.py` for centralized configuration
-- [x] Ensure no circular dependencies
-#### Phase 3: Stability & Performance 閴?Complete
-- [x] Create `core/exceptions.py` - Custom exception classes
-- [x] Implement `RobustOperationMixin` - Unified error handling
-- [x] `CameraServer` implements Context Manager pattern
-- [x] Improve `CameraServer` resource cleanup
-- [x] Fix MJPEG client tracking (proper register/unregister)
-- [x] Historical health/memory monitor modules were added during earlier SDK instability periods
-- [x] Health/memory monitor modules were later removed after runtime simplification
-- [ ] Long-running stability test (24h+)
-#### Phase 4: Feature Enhancements 閴?Complete
-- [x] Historical gesture-action runtime path explored
-- [x] Gesture runtime later simplified to publish recognition results only
-- [x] Create `audio/doa_tracker.py` - DOATracker
-- [x] Implement sound source tracking with motion control integration
-- [x] Create `entities/event_emotion_mapper.py` - EventEmotionMapper
-- [x] Fold HA event behavior config into `animations/conversation_animations.json`
-- [x] Add DOA tracking toggle HA entity
-### SDK Compatibility Verification 閴?Passed
-| API Call | Status | Notes |
-|----------|--------|-------|
-| `set_target(head, antennas, body_yaw)` | 閴?| Correct usage |
-| `goto_target()` | 閴?| Correct usage |
-| `look_at_image(u: int, v: int)` | 閴?| Fixed float閳姕nt |
-| `create_head_pose(degrees=False)` | 閴?| Using radians |
-| `compose_world_offset()` | 閴?| SDK function correctly called |
-| `linear_pose_interpolation()` | 閴?| Has fallback implementation |
-| Body yaw range | 閴?| Clamped to 鍗?60鎺?|
----
-## 棣冩暋 v0.9.5 Updates (2026-01-19)
-### Major Changes: Modular Architecture Refactoring
-The codebase has been restructured into a modular architecture with 5 sub-packages:
-| Package | Purpose | Key Modules |
-|---------|---------|-------------|
-| `core/` | Core infrastructure | `config.py`, `service_base.py`, `system_diagnostics.py` |
-| `motion/` | Motion control | `antenna.py`, `pose_composer.py`, `command_runtime.py`, `control_runtime.py`, `idle_runtime.py`, `smoothing.py` |
-| `vision/` | Vision processing | `frame_processor.py`, `face_tracking_interpolator.py` |
-| `audio/` | Audio processing | `microphone.py`, `doa_tracker.py` |
-| `entities/` | HA entity management | `entity_factory.py`, `entity_keys.py`, `event_emotion_mapper.py` |
-### New Features
-1. **Historical note**
-   - Earlier versions explored direct sleep/wake callbacks and polling-based state handling
-   - Current runtime no longer uses app-managed sleep/wake callbacks
-2. **Camera runtime evolution**
-   - Camera lifecycle was later split into dedicated runtime/processing/http helpers
-   - Current runtime can fully stop camera service when `Idle Behavior` is disabled
-### Audio Optimizations
-| Parameter | Before | After | Improvement |
-|-----------|--------|-------|-------------|
-| Audio chunk size | 1024 samples | 512 samples | 64ms 鈫?32ms latency with lower CPU load |
-| Audio loop delay | 10ms | 1ms | Faster VAD response |
-| Stereo閳墷ono | Mean of channels | First channel | Cleaner signal |
-### Code Quality Improvements
-- Removed all legacy/compatibility code
-- Centralized configuration in nested dataclasses
-- NaN/Inf cleaning in audio pipeline
-- Rotation clamping in face tracking to prevent IK collisions

README.md CHANGED Viewed

@@ -9,6 +9,7 @@ short_description: Deep integration of Reachy Mini robot with Home Assistant
 tags:
   - reachy_mini
   - reachy_mini_python_app
   - reachy_mini_home_assistant
   - home_assistant
   - homeassistant

 tags:
   - reachy_mini
   - reachy_mini_python_app
+  - reachy_mini_ha_voice
   - reachy_mini_home_assistant
   - home_assistant
   - homeassistant

changelog.json CHANGED Viewed

@@ -1,274 +1,4 @@
-[  {
-    "version": "1.0.7",
-    "date": "2026-05-05",
-    "changes": [
-      "Build: Bump package version to 1.0.7",
-      "Change: Align audio runtime with current SDK patterns by splitting local TTS playback from Sendspin-capable music playback and moving wakeword/stopword loading into shared helpers",
-      "Change: Raise the Reachy Mini SDK baseline to reachy-mini>=1.7.1",
-      "Fix: Keep wakeup and TTS playback on the local player path while binding both local and Sendspin players to shared speech sway helpers",
-      "Fix: Synchronize Idle Behavior shutdown with ESPHome face and gesture switches plus runtime state updates",
-      "Fix: Remove obsolete runtime monitor modules that are no longer needed with the current SDK behavior",
-      "Optimize: Tighten Sendspin buffering with proactive backpressure and cleaner local queue handling"
-    ]
-  },
-  {
-    "version": "1.0.6",
-    "date": "2026-05-01",
-    "changes": [
-      "Build: Bump package version to 1.0.6",
-      "Change: Align pyproject.toml with the current Reachy Mini SDK baseline (reachy-mini>=1.7.0, Python>=3.12, zeroconf>=0.131,<1, aiohttp, websockets>=12,<16, and gstreamer-bundle==1.28.1 on non-Linux)",
-      "Change: Align Sendspin dependency with the current upstream client line via aiosendspin>=5.1,<6.0",
-      "Fix: Fetch camera snapshot frames on demand when the MJPEG cache is empty so Home Assistant camera proxy requests keep working with the Reachy Mini SDK 1.7.0 media pull model",
-      "Optimize: Stop the camera server entirely when Idle Behavior is disabled instead of only unloading vision models"
-    ]
-  },
-  {
-    "version": "1.0.5",
-    "date": "2026-04-12",
-    "changes": [
-      "Build: Bump package version to 1.0.5",
-      "Change: Remove app-managed robot sleep/wake handling because the current Reachy Mini SDK no longer allows mini apps to stay active while the robot enters sleep",
-      "Change: Limit resource suspend/resume to ESPHome-driven runtime toggles such as Home Assistant disconnect, mute, camera disable, and service recovery",
-      "Change: Align pyproject.toml runtime constraints with the current Reachy Mini reference SDK package (reachy-mini>=1.6.3, websockets>=12,<16, Python baseline >=3.10, and uv gstreamer metadata)",
-      "Remove: Delete SleepManager integration and app-side sleep/wake callback flow from the voice assistant runtime",
-      "Remove: Delete Home Assistant sleep control entities and internal robot sleep state tracking from the mini app"
-    ]
-  },
-  {
-    "version": "1.0.4",
-    "date": "2026-03-19",
-    "changes": [
-      "Build: Bump package version to 1.0.4",
-      "Fix: Align Reachy Mini integration with current SDK assumptions by removing legacy compatibility paths and private client health checks",
-      "Fix: Replace direct SDK private _respeaker access with audio_control_utils-based ReSpeaker initialization",
-      "Fix: Tighten camera and pose composition to require current SDK media/utils APIs and valid look_at_image inputs",
-      "Improve: Unify idle behavior into a single persisted Home Assistant entity and remove old idle compatibility aliases",
-      "Improve: Replace separate wake/sleep buttons with a single sleep control entity",
-      "Improve: Update Sendspin integration for current aiosendspin lifecycle, stream handling, listener cleanup, and synchronized buffering",
-      "Improve: Standardize daemon URL usage on shared config across controller, sleep manager, and daemon monitor"
-    ]
-  },
-  {
-    "version": "1.0.3",
-    "date": "2026-03-07",
-    "changes": [
-      "Build: Bump package version to 1.0.3",
-      "New: Add Idle Random Actions switch in Home Assistant with preferences persistence and startup restore",
-      "New: Add configurable idle_random_actions action presets in conversation_animations.json for centralized idle motion tuning",
-      "Fix: Remove duplicate idle_random_actions fields/methods and complete runtime control wiring in controller/entity registry/movement manager",
-      "Improve: Increase idle breathing and antenna sway cadence to 0.24Hz with wiggle antenna profile for more natural standby motion",
-      "Optimize: Remove set_target global rate limiting and unchanged-pose skip gating to continuously stream motion commands each control tick",
-      "Optimize: Remove idle antenna slew-rate limiter so antenna motion follows animation waveforms directly for reference-like smoothness"
-    ]
-  },
-  {
-    "version": "1.0.2",
-    "date": "2026-03-06",
-    "changes": [
-      "Build: Bump package version to 1.0.2",
-      "Fix: Restore idle antenna sway animation and tune idle breathing parameters to reduce perceived stiffness",
-      "Fix: Reintroduce idle anti-chatter smoothing/deadband for antenna and body updates to reduce mechanical jitter/noise",
-      "Fix: Switch sleep/wake control to daemon API (start/stop with wake_up/goto_sleep) so /api/daemon/status reflects real sleep state on SDK 1.5",
-      "Fix: Normalize daemon status parsing for SDK 1.5 object-based status responses",
-      "Fix: Remove all app-side antenna power on/off operations to avoid SDK instability and external-control conflicts",
-      "Change: Keep idle antenna behavior as animation-only control (no torque coupling)",
-      "Change: Tighten preference loading to current schema (no legacy config fallback filtering)",
-      "Fix: Sync Idle Motion toggle with Idle Antenna Motion toggle for expected behavior in ESPHome",
-      "Fix: Remove legacy app-managed audio routing hooks and rely on native SDK/system audio selection",
-      "New: Add Home Assistant blueprint for Reachy presence companion automation",
-      "Improve: Blueprint supports device-first auto-binding and richer usage instructions",
-      "Docs: Refresh landing page (index.html) with current version, GitHub source link, and new Blueprint/Auto Release capability cards",
-      "New: Add GitHub workflow to auto-create releases when pyproject/changelog version updates produce a new tag",
-      "Chore: Ignore local wiki workspace artifacts (local/) from repository tracking"
-    ]
-  },
-  {
-    "version": "1.0.1",
-    "date": "2026-03-05",
-    "changes": [
-      "Build: Bump package version to 1.0.1",
-      "Deps: Update runtime dependency baseline to reachy-mini>=1.5.0",
-      "Fix: Remove legacy Zenoh 7447 startup precheck for SDK v1.5 compatibility",
-      "Fix: Remove legacy ZError string matching from connection error handling",
-      "Fix: Adapt daemon status handling to SDK v1.5 DaemonStatus object (prevents AttributeError on status.get)",
-      "Fix: Harden stop-word handling with runtime activation/deactivation and mute-aware trigger gating",
-      "Fix: Align wakeup stream start timing with reference behavior (start microphone stream after wakeup sound)",
-      "Fix: Improve TTS streaming robustness and reduce cutoffs with retry-based audio push",
-      "Optimize: Support single-request streaming with in-memory fallback cache for one-time TTS URLs (no temp file dependency)",
-      "Optimize: Lower streaming fetch chunk size and apply unthrottled preroll for faster first audio"
-    ]
-  },
-  {
-    "version": "1.0.0",
-    "date": "2026-03-04",
-    "changes": [
-      "Build: Bump package version to 1.0.0",
-      "Deps: Require reachy-mini[gstreamer]>=1.4.1",
-      "Fix: Improve gesture responsiveness and stability (faster smoothing, min processing cadence, no-gesture alignment)",
-      "Fix: Auto-match ONNX gesture input size from model shape to prevent INVALID_ARGUMENT dimension errors",
-      "New: Add Sendspin switch in ESPHome (default OFF, persistent, runtime enable/disable)",
-      "New: Add Face Tracking and Gesture Detection switches in ESPHome (both default OFF, persistent)",
-      "New: Add Face Confidence number entity (0.0-1.0, persistent)",
-      "Optimize: Unload/reload face and gesture models when toggled off/on to save resources",
-      "Optimize: Idle behavior updated to breathing + look-around alternation, idle antenna sway disabled",
-      "Optimize: Adjust idle breathing to human-like cadence",
-      "Fix: Disable antenna torque in idle mode and re-enable outside idle to reduce chatter/noise",
-      "Fix: Harden startup against import-time failures (lazy emotion library loading and graceful Sendspin disable)",
-      "Fix: Enforce deterministic audio startup path and fail fast when microphone capture is not ready",
-      "Optimize: Make MJPEG streaming viewer-aware (skip continuous JPEG encode/push when no stream clients)",
-      "Optimize: Keep face/gesture AI processing active even when stream viewers are absent",
-      "Fix: Add on-demand /snapshot JPEG generation when no cached stream frame is available",
-      "Change: Use camera backend default FPS/resolution for stream path instead of forcing fixed 1080p/25fps"
-    ]
-  },
-  {
-    "version": "0.9.9",
-    "date": "2026-01-28",
-    "changes": [
-      "Fix: Audio buffer overflow - require Reachy Mini hardware, use only Reachy microphone with 50ms sleep",
-      "Optimize: Gesture detection sensitivity - remove all confidence filtering, return all detections to Home Assistant",
-      "Optimize: Gesture detection now runs at 1 frame interval for maximum responsiveness",
-      "Refactor: Simplify GestureSmoother to frequency-based confirmation (1 frame)",
-      "Refactor: Remove unused parameters (confidence_threshold, detection_threshold, GestureConfig)",
-      "Fix: Remove duplicate empty check in gesture detection",
-      "Optimize: SDK integration - add MediaBackend detection and proper resource cleanup",
-      "Document: ReSpeaker private attribute access risk with TODO comments"
-    ]
-  },
-  {
-    "version": "0.9.8",
-    "date": "2026-01-27",
-    "changes": [
-      "New: Mute switch and Disable Camera entities for granular control",
-      "Fix: Camera disable logic and daemon crash prevention",
-      "New: Home Assistant connection-driven feature loading with auto suspend/resume",
-      "Optimize: Reduce log output by 30-40%",
-      "Fix: Code quality improvements",
-      "Fix: SDK crash during idle - optimize audio processing and add GStreamer threading lock",
-      "Optimize: Bundle face tracking model, use SDK Zenoh for daemon monitoring",
-      "Simplify: Device ID reads /etc/machine-id directly",
-      "Clean up: Remove unused config items"
-    ]
-  },
-  {
-    "version": "0.9.7",
-    "date": "2026-01-20",
-    "changes": [
-      "Fix: Device ID file path corrected after util.py moved to core/ subdirectory (prevents HA seeing device as new)",
-      "Fix: Animation file path corrected (was looking in wrong directory)",
-      "Fix: Remove hey_jarvis from required wake words (it's optional in openWakeWord/)"
-    ]
-  },
-  {
-    "version": "0.9.6",
-    "date": "2026-01-20",
-    "changes": [
-      "New: Add ruff linter/formatter and mypy type checker configuration",
-      "New: Add pre-commit hooks for automated code quality checks",
-      "Fix: Remove duplicate resume() method in audio_player.py",
-      "Fix: Remove duplicate connection_lost() method in satellite.py",
-      "Fix: Store asyncio task reference in sleep_manager.py to prevent garbage collection",
-      "Optimize: Use dict.items() for efficient iteration in smoothing.py"
-    ]
-  },
-  {
-    "version": "0.9.5",
-    "date": "2026-01-19",
-    "changes": [
-      "Refactor: Modularize codebase - new core/motion/vision/audio/entities module structure",
-      "New: Direct callbacks for HA sleep/wake buttons to suspend/resume services",
-      "Optimize: Audio processing latency - reduced chunk size from 1024 to 256 samples (64ms -> 16ms)",
-      "Optimize: Audio loop delay reduced from 10ms to 1ms for faster VAD response",
-      "Optimize: Stereo to mono conversion uses first channel instead of mean for cleaner signal",
-      "Improve: Camera resume_from_suspend now synchronous for reliable wake from sleep",
-      "Improve: Rotation clamping in face tracking to prevent IK collisions"
-    ]
-  },
-  {
-    "version": "0.9.0",
-    "date": "2026-01-18",
-    "changes": [
-      "New: Robot state monitor for proper sleep mode handling - services pause when robot disconnects and resume on reconnect",
-      "New: System diagnostics entities (CPU, memory, disk, uptime) exposed as Home Assistant diagnostic sensors",
-      "New: Phase 24 with 9 diagnostic sensors (cpu_percent, cpu_temperature, memory_percent, memory_used_gb, disk_percent, disk_free_gb, uptime_hours, process_cpu_percent, process_memory_mb)",
-      "Fix: Voice assistant and movement manager now properly pause during robot sleep mode instead of generating error spam",
-      "Improve: Graceful service lifecycle management with RobotStateMonitor callbacks"
-    ]
-  },
-  {
-    "version": "0.8.7",
-    "date": "2026-01-18",
-    "changes": [
-      "Fix: Clamp body_yaw to safe range to prevent IK collision warnings during emotion playback",
-      "Fix: Emotion moves and face tracking now respect SDK safety limits",
-      "Improve: Face tracking smoothness - removed EMA smoothing (matches reference project)",
-      "Improve: Face tracking timing updated to match reference (2s delay, 1s interpolation)"
-    ]
-  },
-  {
-    "version": "0.8.6",
-    "date": "2026-01-18",
-    "changes": [
-      "Fix: Audio buffer memory leak - added size limit to prevent unbounded growth",
-      "Fix: Temp file leak - downloaded audio files now cleaned up after playback",
-      "Fix: Camera thread termination timeout increased for clean shutdown",
-      "Fix: Thread-safe draining flag using threading.Event",
-      "Fix: Silent failures now logged for debugging"
-    ]
-  },
-  {
-    "version": "0.8.5",
-    "date": "2026-01-18",
-    "changes": [
-      "Fix: DOA turn-to-sound direction inverted - now turns correctly toward sound source",
-      "Fix: Graceful shutdown prevents daemon crash on app stop"
-    ]
-  },
-  {
-    "version": "0.8.4",
-    "date": "2026-01-18",
-    "changes": [
-      "Improve: Smooth idle animation with interpolation phase (matches reference BreathingMove)",
-      "Improve: Two-phase animation - interpolates to neutral before oscillation",
-      "Fix: Antenna frequency updated to 0.5Hz (was 0.15Hz) for more natural sway"
-    ]
-  },
-  {
-    "version": "0.8.3",
-    "date": "2026-01-18",
-    "changes": [
-      "Fix: Body now properly follows head rotation during face tracking",
-      "Fix: body_yaw extracted from final head pose matrix and synced with head_yaw",
-      "Fix: Matches reference project sweep_look behavior for natural body movement"
-    ]
-  },
-  {
-    "version": "0.8.2",
-    "date": "2026-01-18",
-    "changes": [
-      "Fix: Body now follows head rotation during face tracking - body_yaw syncs with head_yaw",
-      "Fix: Matches reference project sweep_look behavior for natural body movement"
-    ]
-  },
-  {
-    "version": "0.8.1",
-    "date": "2026-01-18",
-    "changes": [
-      "Fix: face_detected entity now pushes state updates to Home Assistant in real-time",
-      "Fix: Body yaw simplified to match reference project - SDK automatic_body_yaw handles collision prevention",
-      "Fix: Idle animation now starts immediately on app launch",
-      "Fix: Smooth antenna animation - removed pose change threshold for continuous motion"
-    ]
-  },
-  {
-    "version": "0.8.0",
-    "date": "2026-01-17",
-    "changes": [
-      "New: Comprehensive emotion keyword mapping with 280+ Chinese and English keywords",
-      "New: 35 emotion categories mapped to robot expressions",
-      "New: Auto-trigger expressions from conversation text patterns"
-    ]
-  },
   {
     "version": "0.7.3",
     "date": "2026-01-12",
@@ -663,4 +393,3 @@
     ]
   }
 ]

+[
   {
     "version": "0.7.3",
     "date": "2026-01-12",
     ]
   }
 ]

docs/USER_MANUAL_CN.md DELETED Viewed

@@ -1,244 +0,0 @@
-# Reachy Mini 语音助手 - 用户手册
-## 系统要求
-### 硬件
-- Reachy Mini 机器人（带 ReSpeaker XVF3800 麦克风）
-- WiFi 网络连接
-### 软件
-- Home Assistant（2024.1 或更高版本）
-- Home Assistant 中已启用 ESPHome 集成
----
-## 安装步骤
-### 第一步：安装应用
-从 Reachy Mini 应用商店安装 `reachy_mini_home_assistant`。
-### 第二步：启动应用
-应用将自动：
-- 在端口 6053 启动 ESPHome 服务器
-- 加载预打包的唤醒词模型
-- 通过 mDNS 注册以便自动发现
-- 如果网络上有 Sendspin 服务器则自动连接
-### 第三步：连接 Home Assistant
-**自动连接（推荐）：**
-Home Assistant 会通过 mDNS 自动发现 Reachy Mini。
-**手动连接：**
-1. 进入 设置 → 设备与服务
-2. 点击"添加集成"
-3. 选择"ESPHome"
-4. 输入机器人的 IP 地址和端口 6053
----
-## 功能介绍
-### 语音助手
-- **唤醒词检测**：说 "Okay Nabu" 激活（本地处理）
-- **停止词**：说 "Stop" 结束对话
-- **连续对话模式**：无需重复唤醒词即可持续对话
-- **语音识别/合成**：使用 Home Assistant 配置的语音引擎
-**支持的唤醒词：**
-- Okay Nabu（默认）
-- Hey Jarvis
-- Alexa
-- Hey Luna
-### 人脸追踪
-- 基于 YOLO 的人脸检测
-- 头部跟随检测到的人脸
-- 头部转动时身体随之旋转
-- 自适应帧率：活跃时 15fps，空闲时 2fps
-- 可在 Home Assistant 中运行时开关
-### 手势检测
-检测到的手势会作为实体状态同步到 Home Assistant。
-当前默认运行时不会直接用手势触发机器人动作。
-| 输出 | 说明 |
-|------|------|
-| `gesture_detected` | 当前识别到的手势标签 |
-| `gesture_confidence` | 手势识别置信度 |
-### 情绪响应
-机器人可播放 35 种不同情绪：
-- 基础：开心、难过、愤怒、恐惧、惊讶、厌恶
-- 扩展：大笑、爱慕、骄傲、感激、热情、好奇、惊叹、害羞、困惑、沉思、焦虑、害怕、沮丧、烦躁、狂怒、轻蔑、无聊、疲倦、精疲力竭、孤独、沮丧、顺从、不确定、不舒服
-### 音频功能
-- 扬声器音量控制（0-100%）
-- 静音开关，可暂停/恢复语音链路
-- 支持唤醒提示音与计时器完成提示音
-- STT/TTS 由 Home Assistant 负责
-### Sendspin 多房间音频
-- 通过 mDNS 自动发现 Sendspin 服务器
-- 同步多房间音频播放
-- Reachy Mini 作为 PLAYER 接收音频流
-- 语音对话时自动暂停
-- 无需用户配置
-### DOA 声源追踪
-- 声源方向检测
-- 唤醒时机器人转向声源
-- 可通过开关启用/禁用
----
-## Home Assistant 实体
-### 阶段 1：基础状态
-| 实体 | 类型 | 说明 |
-|------|------|------|
-| Daemon State | 文本传感器 | 机器人守护进程状态 |
-| Backend Ready | 二进制传感器 | 后端连接状态 |
-| Mute | 开关 | 暂停/恢复语音链路 |
-| Speaker Volume | 数值 (0-100%) | 扬声器音量控制 |
-| Disable Camera | 开关 | 暂停/恢复摄像头服务 |
-| Idle Behavior | 开关 | 统一空闲行为：头部、天线、微动作 |
-| Sendspin | 开关 | 启用/禁用 Sendspin 发现与播放 |
-| Face Tracking | 开关 | 启用/禁用人脸跟踪 |
-| Gesture Detection | 开关 | 启用/禁用手势检测 |
-| Face Confidence | 数值 (0-1) | 人脸跟踪置信度阈值 |
-### 阶段 2：睡眠与运行状态
-| 实体 | 类型 | 说明 |
-|------|------|------|
-| Sleep Control | 开关 | 打开表示进入睡眠，关闭表示唤醒 |
-| Sleep Mode | 二进制传感器 | 运行中表示唤醒，非运行表示睡眠 |
-| Services Suspended | 二进制传感器 | 运行中表示服务活跃 |
-### 阶段 3：姿态控制
-| 实体 | 类型 | 范围 |
-|------|------|------|
-| Head X/Y/Z | 数值 | ±50mm |
-| Head Roll/Pitch/Yaw | 数值 | ±40° |
-| Body Yaw | 数值 | ±160° |
-| Antenna Left/Right | 数值 | ±90° |
-### 阶段 4：注视控制
-| 实体 | 类型 | 说明 |
-|------|------|------|
-| Look At X/Y/Z | 数值 | 注视目标的世界坐标 |
-### 阶段 5：DOA（声源定位）
-| 实体 | 类型 | 说明 |
-|------|------|------|
-| DOA Angle | 传感器 (°) | 声源方向 |
-| Speech Detected | 二进制传感器 | 语音活动检测 |
-| DOA Sound Tracking | 开关 | 启用/禁用 DOA 追踪 |
-### 阶段 6：诊断信息
-| 实体 | 类型 | 说明 |
-|------|------|------|
-| Control Loop Frequency | 传感器 (Hz) | 运动控制循环频率 |
-| SDK Version | 文本传感器 | Reachy Mini SDK 版本 |
-| Robot Name | 文本传感器 | 设备名称 |
-| Wireless Version | 二进制传感器 | 无线版本标志 |
-| Simulation Mode | 二进制传感器 | 仿真模式标志 |
-| WLAN IP | 文本传感器 | WiFi IP 地址 |
-| Error Message | 文本传感器 | 当前错误 |
-### 阶段 7：IMU 传感器（仅无线版本）
-| 实体 | 类型 | 说明 |
-|------|------|------|
-| IMU Accel X/Y/Z | 传感器 (m/s²) | 加速度计 |
-| IMU Gyro X/Y/Z | 传感器 (rad/s) | ���螺仪 |
-| IMU Temperature | 传感器 (°C) | IMU 温度 |
-### 阶段 8：情绪控制
-| 实体 | 类型 | 说明 |
-|------|------|------|
-| Emotion | 选择器 | 选择要播放的情绪（35 个选项）|
-### 阶段 10：摄像头
-| 实体 | 类型 | 说明 |
-|------|------|------|
-| Camera | 摄像头 | 实时 MJPEG 流 |
-### 3D 可视化卡片
-可在 Home Assistant 中安装自定义 Lovelace 卡片，实时 3D 可视化 Reachy Mini 机器人。
-安装地址：[ha-reachy-mini](https://github.com/Desmond-Dong/ha-reachy-mini)
-功能：
-- 实时 3D 机器人可视化
-- 交互式机器人状态视图
-- 连接机器人守护进程获取实时更新
-### 阶段 21：对话
-| 实体 | 类型 | 说明 |
-|------|------|------|
-| Continuous Conversation | 开关 | 多轮对话模式 |
-### 阶段 22：手势检测
-| 实体 | 类型 | 说明 |
-|------|------|------|
-| Gesture Detected | 文本传感器 | 当前手势名称 |
-| Gesture Confidence | 传感器 (%) | 检测置信度 |
-### 阶段 23：人脸检测
-| 实体 | 类型 | 说明 |
-|------|------|------|
-| Face Detected | 二进制传感器 | 视野中是否有人脸 |
-### 阶段 24：系统诊断
-| 实体 | 类型 | 说明 |
-|------|------|------|
-| CPU Percent | 传感器 (%) | CPU 使用率 |
-| CPU Temperature | 传感器 (°C) | CPU 温度 |
-| Memory Percent | 传感器 (%) | 内存使用率 |
-| Memory Used | 传感器 (GB) | 已用内存 |
-| Disk Percent | 传感器 (%) | 磁盘使用率 |
-| Disk Free | 传感器 (GB) | 磁盘可用空间 |
-| Uptime | 传感器 (hours) | 系统运行时间 |
-| Process CPU | 传感器 (%) | 应用 CPU 使用率 |
-| Process Memory | 传感器 (MB) | 应用内存使用 |
----
-## 睡眠模式
-运行时反应是零配置的：语音阶段、计时器提醒和 HA 状态触发情绪，共用同一套内建行为模型。
-### 进入睡眠
-- 在 Home Assistant 中打开 `Sleep Control` 开关
-- 机器人放松电机、停止摄像头、暂停语音检测
-### 唤醒
-- 在 Home Assistant 中关闭 `Sleep Control` 开关
-- 或说唤醒词
-- 机器人恢复所有功能
----
-## 故障排除
-| 问题 | 解决方案 |
-|------|----------|
-| 不响应唤醒词 | 检查 Mute 是否关闭，减少背景噪音，并确认已连接 Home Assistant |
-| 人脸追踪不工作 | 确保光线充足，检查 Face Detected 传感器 |
-| 没有音频输出 | 检查 Speaker Volume，验证 HA 中的 TTS 引擎 |
-| 无法连接 HA | 确认在同一网络，检查端口 6053 |
-| 手势检测不到 | 确保光线充足，正对摄像头 |
----
-## 快速参考
-```
-唤醒词：       "Okay Nabu"
-停止词：       "Stop"
-ESPHome 端口： 6053
-摄像头端口：   8081 (MJPEG)
-```
----
-*Reachy Mini 语音助手 v1.0.4*

docs/USER_MANUAL_EN.md DELETED Viewed

@@ -1,244 +0,0 @@
-# Reachy Mini Voice Assistant - User Manual
-## Requirements
-### Hardware
-- Reachy Mini robot (with ReSpeaker XVF3800 microphone)
-- WiFi network connection
-### Software
-- Home Assistant (2024.1 or later)
-- ESPHome integration enabled in Home Assistant
----
-## Installation
-### Step 1: Install the App
-Install `reachy_mini_home_assistant` from the Reachy Mini App Store.
-### Step 2: Start the App
-The app will automatically:
-- Start the ESPHome server on port 6053
-- Load pre-packaged wake word models
-- Register with mDNS for auto-discovery
-- Connect to Sendspin server if available on network
-### Step 3: Connect to Home Assistant
-**Automatic (Recommended):**
-Home Assistant will auto-discover Reachy Mini via mDNS.
-**Manual:**
-1. Go to Settings → Devices & Services
-2. Click "Add Integration"
-3. Select "ESPHome"
-4. Enter the robot's IP address and port 6053
----
-## Features
-### Voice Assistant
-- **Wake Word Detection**: Say "Okay Nabu" to activate (local processing)
-- **Stop Word**: Say "Stop" to end conversation
-- **Continuous Conversation Mode**: Keep talking without repeating wake word
-- **STT/TTS**: Uses Home Assistant's configured speech engines
-**Supported Wake Words:**
-- Okay Nabu (default)
-- Hey Jarvis
-- Alexa
-- Hey Luna
-### Face Tracking
-- YOLO-based face detection
-- Head follows detected face
-- Body follows head when turned far
-- Adaptive frame rate: 15fps active, 2fps idle
-- Runtime switchable from Home Assistant
-### Gesture Detection
-Detected gestures are published to Home Assistant as entity state updates.
-The default runtime does not trigger built-in robot actions from gestures.
-| Output | Description |
-|--------|-------------|
-| `gesture_detected` | Current gesture label |
-| `gesture_confidence` | Detection confidence |
-### Emotion Responses
-The robot can play 35 different emotions:
-- Basic: Happy, Sad, Angry, Fear, Surprise, Disgust
-- Extended: Laughing, Loving, Proud, Grateful, Enthusiastic, Curious, Amazed, Shy, Confused, Thoughtful, Anxious, Scared, Frustrated, Irritated, Furious, Contempt, Bored, Tired, Exhausted, Lonely, Downcast, Resigned, Uncertain, Uncomfortable
-### Audio Features
-- Speaker volume control (0-100%)
-- Mute switch for voice pipeline pause/resume
-- Wake sound and timer-finished sound playback
-- Home Assistant handles STT/TTS engines
-### Sendspin Multi-Room Audio
-- Automatic discovery of Sendspin servers via mDNS
-- Synchronized multi-room audio playback
-- Reachy Mini acts as a PLAYER to receive audio streams
-- Auto-pause during voice conversations
-- No user configuration required
-### DOA Sound Tracking
-- Direction of Arrival detection
-- Robot turns toward sound source on wake word
-- Can be enabled/disabled via switch
----
-## Home Assistant Entities
-### Phase 1: Basic Status
-| Entity | Type | Description |
-|--------|------|-------------|
-| Daemon State | Text Sensor | Robot daemon status |
-| Backend Ready | Binary Sensor | Backend connection status |
-| Mute | Switch | Suspend/resume voice pipeline |
-| Speaker Volume | Number (0-100%) | Speaker volume control |
-| Disable Camera | Switch | Suspend/resume camera service |
-| Idle Behavior | Switch | Unified idle motion + idle antenna + idle micro-actions |
-| Sendspin | Switch | Enable/disable Sendspin discovery and playback |
-| Face Tracking | Switch | Enable/disable face tracking |
-| Gesture Detection | Switch | Enable/disable gesture detection |
-| Face Confidence | Number (0-1) | Face tracking confidence threshold |
-### Phase 2: Sleep and Runtime State
-| Entity | Type | Description |
-|--------|------|-------------|
-| Sleep Control | Switch | Turn on to sleep, turn off to wake |
-| Sleep Mode | Binary Sensor | Running when awake, not running when sleeping |
-| Services Suspended | Binary Sensor | Running when services are active |
-### Phase 3: Pose Control
-| Entity | Type | Range |
-|--------|------|-------|
-| Head X/Y/Z | Number | ±50mm |
-| Head Roll/Pitch/Yaw | Number | ±40° |
-| Body Yaw | Number | ±160° |
-| Antenna Left/Right | Number | ±90° |
-### Phase 4: Look At Control
-| Entity | Type | Description |
-|--------|------|-------------|
-| Look At X/Y/Z | Number | World coordinates for gaze target |
-### Phase 5: DOA (Direction of Arrival)
-| Entity | Type | Description |
-|--------|------|-------------|
-| DOA Angle | Sensor (°) | Sound source direction |
-| Speech Detected | Binary Sensor | Voice activity detection |
-| DOA Sound Tracking | Switch | Enable/disable DOA tracking |
-### Phase 6: Diagnostics
-| Entity | Type | Description |
-|--------|------|-------------|
-| Control Loop Frequency | Sensor (Hz) | Motion control loop rate |
-| SDK Version | Text Sensor | Reachy Mini SDK version |
-| Robot Name | Text Sensor | Device name |
-| Wireless Version | Binary Sensor | Wireless model flag |
-| Simulation Mode | Binary Sensor | Simulation flag |
-| WLAN IP | Text Sensor | WiFi IP address |
-| Error Message | Text Sensor | Current error |
-### Phase 7: IMU Sensors (Wireless version only)
-| Entity | Type | Description |
-|--------|------|-------------|
-| IMU Accel X/Y/Z | Sensor (m/s²) | Accelerometer |
-| IMU Gyro X/Y/Z | Sensor (rad/s) | Gyroscope |
-| IMU Temperature | Sensor (°C) | IMU temperature |
-### Phase 8: Emotion Control
-| Entity | Type | Description |
-|--------|------|-------------|
-| Emotion | Select | Choose emotion to play (35 options) |
-### Phase 10: Camera
-| Entity | Type | Description |
-|--------|------|-------------|
-| Camera | Camera | Live MJPEG stream |
-### 3D Visualization Card
-A custom Lovelace card is available for real-time 3D visualization of the Reachy Mini robot in Home Assistant.
-Install from: [ha-reachy-mini](https://github.com/Desmond-Dong/ha-reachy-mini)
-Features:
-- Real-time 3D robot visualization
-- Interactive view of robot state
-- Connects to robot daemon for live updates
-### Phase 21: Conversation
-| Entity | Type | Description |
-|--------|------|-------------|
-| Continuous Conversation | Switch | Multi-turn conversation mode |
-### Phase 22: Gesture Detection
-| Entity | Type | Description |
-|--------|------|-------------|
-| Gesture Detected | Text Sensor | Current gesture name |
-| Gesture Confidence | Sensor (%) | Detection confidence |
-### Phase 23: Face Detection
-| Entity | Type | Description |
-|--------|------|-------------|
-| Face Detected | Binary Sensor | Face in view |
-### Phase 24: System Diagnostics
-| Entity | Type | Description |
-|--------|------|-------------|
-| CPU Percent | Sensor (%) | CPU usage |
-| CPU Temperature | Sensor (°C) | CPU temperature |
-| Memory Percent | Sensor (%) | RAM usage |
-| Memory Used | Sensor (GB) | RAM used |
-| Disk Percent | Sensor (%) | Disk usage |
-| Disk Free | Sensor (GB) | Disk free space |
-| Uptime | Sensor (hours) | System uptime |
-| Process CPU | Sensor (%) | App CPU usage |
-| Process Memory | Sensor (MB) | App memory usage |
----
-## Sleep Mode
-Runtime reactions are zero-config: voice phases, timer alerts, and HA state-triggered emotions use the same built-in behavior model.
-### Enter Sleep
-- Turn on the `Sleep Control` switch in Home Assistant
-- Robot relaxes motors, stops camera, pauses voice detection
-### Wake Up
-- Turn off the `Sleep Control` switch in Home Assistant
-- Or say the wake word
-- Robot resumes all functions
----
-## Troubleshooting
-| Problem | Solution |
-|---------|----------|
-| Not responding to wake word | Check Mute is off, reduce background noise, verify Home Assistant is connected |
-| Face tracking not working | Ensure adequate lighting, check Face Detected sensor |
-| No audio output | Check Speaker Volume, verify TTS engine in HA |
-| Can't connect to HA | Verify same network, check port 6053 |
-| Gestures not detected | Ensure good lighting, face the camera directly |
----
-## Quick Reference
-```
-Wake Word:     "Okay Nabu"
-Stop Word:     "Stop"
-ESPHome Port:  6053
-Camera Port:   8081 (MJPEG)
-```
----
-*Reachy Mini Voice Assistant v1.0.4*

home_assistant_blueprints/reachy_mini_presence_companion.yaml DELETED Viewed

@@ -1,246 +0,0 @@
-blueprint:
-  name: Reachy Mini Presence Companion
-  description: >-
-    Presence-driven automation for Reachy Mini in Home Assistant.
-    How to use:
-    1) Select Home occupancy entity (person/group/binary_sensor).
-    2) Select Reachy ESPHome device (recommended).
-    3) Leave optional fallback entity inputs empty unless auto-binding fails.
-    4) Set away delay and day/night volume.
-    What this automation does:
-    - Occupied: Wake Reachy, enable unified idle behavior, set day volume.
-    - Empty (after delay): Disable unified idle behavior, send Reachy to sleep.
-    - Quiet hours start/end: Apply night/day volume while occupied.
-    Auto-binding rules (when Reachy device is selected):
-    - Sleep switch suffix: sleep_control
-    - Idle behavior switch suffix: idle_behavior_enabled
-    - Volume number suffix: speaker_volume
-    If your entities use different names, fill optional fallback entity inputs manually.
-  domain: automation
-  input:
-    occupancy_entity:
-      name: Home occupancy entity
-      description: Person, group, or binary sensor representing home presence.
-      selector:
-        entity: {}
-    reachy_device:
-      name: Reachy device (recommended)
-      description: Select your Reachy ESPHome device for automatic entity binding.
-      default: ""
-      selector:
-        device:
-          filter:
-            - integration: esphome
-    reachy_sleep_switch:
-      name: Sleep Control switch (optional fallback)
-      description: Leave empty to auto-bind from Reachy device.
-      default: ""
-      selector:
-        entity:
-          domain: switch
-    idle_behavior_switch:
-      name: Idle Behavior switch (optional fallback)
-      description: Leave empty to auto-bind from Reachy device.
-      default: ""
-      selector:
-        entity:
-          domain: switch
-    reachy_volume_number:
-      name: Speaker Volume number (optional fallback)
-      description: Leave empty to auto-bind from Reachy device.
-      default: ""
-      selector:
-        entity:
-          domain: number
-    away_delay_minutes:
-      name: Away delay (minutes)
-      description: Wait before sleeping after everyone leaves.
-      default: 20
-      selector:
-        number:
-          min: 1
-          max: 180
-          mode: box
-          unit_of_measurement: min
-    day_volume:
-      name: Day volume
-      default: 80
-      selector:
-        number:
-          min: 0
-          max: 100
-          step: 1
-          mode: slider
-    night_volume:
-      name: Night volume
-      default: 35
-      selector:
-        number:
-          min: 0
-          max: 100
-          step: 1
-          mode: slider
-    quiet_start:
-      name: Quiet hours start
-      default: "22:30:00"
-      selector:
-        time: {}
-    quiet_end:
-      name: Quiet hours end
-      default: "07:30:00"
-      selector:
-        time: {}
-mode: restart
-variables:
-  occupancy_entity: !input occupancy_entity
-  reachy_device: !input reachy_device
-  manual_sleep_switch: !input reachy_sleep_switch
-  manual_idle_behavior_switch: !input idle_behavior_switch
-  manual_volume_number: !input reachy_volume_number
-  day_volume: !input day_volume
-  night_volume: !input night_volume
-  device_entities_list: >-
-    {{ device_entities(reachy_device) if reachy_device else [] }}
-  sleep_switch_auto: >-
-    {{ (device_entities_list | select('match', '^switch\..*sleep_control$') | list | first) or '' }}
-  idle_behavior_switch_auto: >-
-    {{ (device_entities_list | select('match', '^switch\..*idle_behavior_enabled$') | list | first) or '' }}
-  volume_number_auto: >-
-    {{ (device_entities_list | select('match', '^number\..*speaker_volume$') | list | first) or '' }}
-  sleep_switch: >-
-    {{ manual_sleep_switch if manual_sleep_switch else sleep_switch_auto }}
-  idle_behavior_switch: >-
-    {{ manual_idle_behavior_switch if manual_idle_behavior_switch else idle_behavior_switch_auto }}
-  volume_number: >-
-    {{ manual_volume_number if manual_volume_number else volume_number_auto }}
-  is_occupied: >-
-    {{ states(occupancy_entity) in ['home', 'on'] }}
-trigger:
-  - platform: state
-    id: occupied_home
-    entity_id: !input occupancy_entity
-    to: "home"
-  - platform: state
-    id: occupied_on
-    entity_id: !input occupancy_entity
-    to: "on"
-  - platform: state
-    id: empty_not_home
-    entity_id: !input occupancy_entity
-    to: "not_home"
-    for:
-      minutes: !input away_delay_minutes
-  - platform: state
-    id: empty_off
-    entity_id: !input occupancy_entity
-    to: "off"
-    for:
-      minutes: !input away_delay_minutes
-  - platform: time
-    id: quiet_start
-    at: !input quiet_start
-  - platform: time
-    id: quiet_end
-    at: !input quiet_end
-action:
-  - choose:
-      - conditions:
-          - condition: template
-            value_template: "{{ trigger.id in ['occupied_home', 'occupied_on'] }}"
-        sequence:
-          - if:
-              - condition: template
-                value_template: "{{ sleep_switch != '' }}"
-            then:
-              - service: switch.turn_off
-                target:
-                  entity_id: "{{ sleep_switch }}"
-          - if:
-              - condition: template
-                value_template: "{{ idle_behavior_switch != '' }}"
-            then:
-              - service: switch.turn_on
-                target:
-                  entity_id: "{{ idle_behavior_switch }}"
-          - if:
-              - condition: template
-                value_template: "{{ volume_number != '' }}"
-            then:
-              - service: number.set_value
-                target:
-                  entity_id: "{{ volume_number }}"
-                data:
-                  value: "{{ day_volume }}"
-      - conditions:
-          - condition: template
-            value_template: "{{ trigger.id in ['empty_not_home', 'empty_off'] }}"
-        sequence:
-          - if:
-              - condition: template
-                value_template: "{{ idle_behavior_switch != '' }}"
-            then:
-              - service: switch.turn_off
-                target:
-                  entity_id: "{{ idle_behavior_switch }}"
-          - if:
-              - condition: template
-                value_template: "{{ sleep_switch != '' }}"
-            then:
-              - service: switch.turn_on
-                target:
-                  entity_id: "{{ sleep_switch }}"
-      - conditions:
-          - condition: template
-            value_template: "{{ trigger.id == 'quiet_start' and is_occupied }}"
-        sequence:
-          - if:
-              - condition: template
-                value_template: "{{ volume_number != '' }}"
-            then:
-              - service: number.set_value
-                target:
-                  entity_id: "{{ volume_number }}"
-                data:
-                  value: "{{ night_volume }}"
-      - conditions:
-          - condition: template
-            value_template: "{{ trigger.id == 'quiet_end' and is_occupied }}"
-        sequence:
-          - if:
-              - condition: template
-                value_template: "{{ volume_number != '' }}"
-            then:
-              - service: number.set_value
-                target:
-                  entity_id: "{{ volume_number }}"
-                data:
-                  value: "{{ day_volume }}"

index.html CHANGED Viewed

@@ -18,24 +18,21 @@
 				<span class="brand-name">Reachy Mini for Home Assistant</span>
 			</div>
 			<div class="pill">Voice · Gestures · Smart Home</div>
-			<div class="version-pill" id="version-pill">v1.0.4</div>
 		</div>
 		<div class="hero-grid">
 			<div class="hero-copy">
 				<p class="eyebrow">Reachy Mini App</p>
 				<h1>Your robot meets your Home Assistant.</h1>
 				<p class="lede">
-					Transform Reachy Mini Wi-Fi into a voice-controlled smart home hub. Natural conversations, expressive movements, gesture recognition — all seamlessly connected to Home Assistant.
 				</p>
 				<div class="hero-actions">
-					<a class="btn primary" href="#requirements">Requirements</a>
 					<a class="btn ghost" href="#install">Quick Start</a>
-					<a class="btn ghost" href="#features">Features</a>
 				</div>
 				<div class="hero-badges">
 					<span>🎤 Wake Word</span>
 					<span>👀 Face Tracking</span>
-					<span>🔄 Body Following</span>
 					<span>🤚 18 Gestures</span>
 					<span>🔊 Multi-room Audio</span>
 					<span>⚡ Zero Config</span>
@@ -50,74 +47,11 @@
 		</div>
 	</header>
-	<section id="requirements" class="section">
-		<div class="section-header">
-			<p class="eyebrow">Before You Start</p>
-			<h2>Requirements</h2>
-			<p class="intro">Make sure you have everything ready for a smooth setup.</p>
-		</div>
-		<div class="requirements-grid">
-			<div class="requirement-card">
-				<span class="icon">🤖</span>
-				<h3>Reachy Mini Wi-Fi</h3>
-				<p>This app requires the <strong>Wi-Fi version</strong> of Reachy Mini. The USB version has not been validated</p>
-			</div>
-			<div class="requirement-card">
-				<span class="icon">🏠</span>
-				<h3>Home Assistant</h3>
-				<p>A running Home Assistant instance </p>
-			</div>
-			<div class="requirement-card">
-				<span class="icon">📶</span>
-				<h3>Same Network</h3>
-				<p>Both Reachy Mini and Home Assistant must be on the <strong>same local network</strong>.</p>
-			</div>
-			<div class="requirement-card">
-				<span class="icon">🎙️</span>
-				<h3>Voice Pipeline</h3>
-				<p>Configure a <strong>Voice Assistant pipeline</strong> in Home Assistant (STT + TTS + LLM).</p>
-			</div>
-		</div>
-	</section>
-	<section id="install" class="section story">
-		<div class="section-header">
-			<p class="eyebrow">Getting Started</p>
-			<h2>Quick Start</h2>
-			<p class="intro">Install and connect in under a minute. No configuration needed.</p>
-		</div>
-		<div class="story-grid">
-			<div class="story-card">
-				<p class="eyebrow">Installation</p>
-				<h3>Up and running in 1 minute</h3>
-				<ul class="story-list">
-					<li><span>1️⃣</span> Open Reachy Mini Dashboard → Applications</li>
-					<li><span>2️⃣</span> Enable "Show community apps"</li>
-					<li><span>3️⃣</span> Install "Reachy Mini for Home Assistant"</li>
-					<li><span>4️⃣</span> Home Assistant discovers automatically</li>
-				</ul>
-			</div>
-			<div class="story-card secondary">
-				<p class="eyebrow">How it works</p>
-				<h3>Seamless integration</h3>
-				<p class="story-text">
-					This Reachy Mini app uses ESPHome protocol to communicate with Home Assistant — no ESPHome device needed. Home Assistant discovers it via mDNS and adds the robot entities automatically. Voice commands are processed by your Home Assistant instance — STT, intent recognition, and TTS all happen there.
-				</p>
-				<div class="chips">
-					<span class="chip">ESPHome Protocol</span>
-					<span class="chip">mDNS Discovery</span>
-					<span class="chip">Robot Entities</span>
-					<span class="chip">Zero Config</span>
-				</div>
-			</div>
-		</div>
-	</section>
 	<section id="features" class="section features">
 		<div class="section-header">
 			<p class="eyebrow">Capabilities</p>
 			<h2>Everything you need for smart home control</h2>
-			<p class="intro">Zero-configuration robot entities, built-in reactions, and auto-discovery via mDNS.</p>
 		</div>
 		<div class="feature-grid">
 			<div class="feature-card">
@@ -133,17 +67,17 @@
 			<div class="feature-card">
 				<span class="icon">👀</span>
 				<h3>Face Tracking</h3>
-				<p>YOLO-based face detection with body following. Head and body move together naturally to track you during conversations.</p>
 			</div>
 			<div class="feature-card">
 				<span class="icon">🤚</span>
 				<h3>Gesture Detection</h3>
-				<p>HaGRID ONNX models recognize hand gestures and publish the detected gesture label and confidence to Home Assistant entities.</p>
 			</div>
 			<div class="feature-card">
 				<span class="icon">😊</span>
 				<h3>Expressive Motion</h3>
-				<p>Built-in listening, thinking, speaking, timer, and emotion reactions with natural head sway and non-blocking motion during conversations.</p>
 			</div>
 			<div class="feature-card">
 				<span class="icon">📹</span>
@@ -158,22 +92,40 @@
 			<div class="feature-card">
 				<span class="icon">⚡</span>
 				<h3>Zero Configuration</h3>
-				<p>Install and go. mDNS auto-discovery and built-in HA reactions mean the default experience works without extra setup.</p>
 			</div>
 			<div class="feature-card">
 				<span class="icon">🃏</span>
 				<h3>Dashboard Card</h3>
 				<p>Custom Lovelace card for Home Assistant. Real-time 3D visualization of robot pose and status.</p>
 			</div>
-			<div class="feature-card">
-				<span class="icon">🧩</span>
-				<h3>HA Blueprint</h3>
-				<p>Device-first Home Assistant blueprint for presence automations using the current zero-config model: sleep control, idle behavior, and speaker volume.</p>
 			</div>
-			<div class="feature-card">
-				<span class="icon">🚀</span>
-				<h3>Auto Release</h3>
-				<p>Version-driven GitHub release workflow. Update pyproject/changelog, then release is created automatically.</p>
 			</div>
 		</div>
 	</section>
@@ -197,15 +149,6 @@
 		fetch('changelog.json')
 			.then(res => res.json())
 			.then(data => {
-				// Update version pill with latest version
-				if (data.length > 0) {
-					const versionPill = document.getElementById('version-pill');
-					if (versionPill) {
-						versionPill.textContent = `v${data[0].version}`;
-					}
-				}
-				// Populate changelog grid
 				const mainGrid = document.getElementById('changelog-grid');
 				const olderGrid = document.getElementById('changelog-older');
 				data.forEach((item, index) => {
@@ -236,15 +179,10 @@
 				<h3>HA Dashboard Card</h3>
 				<p>Lovelace Card for HA</p>
 			</a>
-			<a href="https://github.com/ha-china/Reachy_Mini_For_Home_Assistant" target="_blank" class="link-card">
 				<span class="icon">📦</span>
 				<h3>Source Code</h3>
-				<p>GitHub Repository</p>
-			</a>
-			<a href="home_assistant_blueprints/reachy_mini_presence_companion.yaml" target="_blank" class="link-card">
-				<span class="icon">🧩</span>
-				<h3>HA Blueprint</h3>
-				<p>Presence Companion YAML</p>
 			</a>
 			<a href="https://www.pollen-robotics.com/" target="_blank" class="link-card">
 				<span class="icon">🤖</span>
@@ -279,7 +217,7 @@
 			<a href="https://github.com/ai-forever/dynamic_gestures" target="_blank" class="link-card">
 				<span class="icon">✋</span>
 				<h3>Dynamic Gestures</h3>
-				<p>Reference Project</p>
 			</a>
 			<a href="https://github.com/Sendspin/sendspin-cli" target="_blank" class="link-card">
 				<span class="icon">🔊</span>

 				<span class="brand-name">Reachy Mini for Home Assistant</span>
 			</div>
 			<div class="pill">Voice · Gestures · Smart Home</div>
 		</div>
 		<div class="hero-grid">
 			<div class="hero-copy">
 				<p class="eyebrow">Reachy Mini App</p>
 				<h1>Your robot meets your Home Assistant.</h1>
 				<p class="lede">
+					Transform Reachy Mini into a voice-controlled smart home hub. Natural conversations, expressive movements, gesture recognition — all seamlessly connected to Home Assistant.
 				</p>
 				<div class="hero-actions">
+					<a class="btn primary" href="#features">Explore Features</a>
 					<a class="btn ghost" href="#install">Quick Start</a>
 				</div>
 				<div class="hero-badges">
 					<span>🎤 Wake Word</span>
 					<span>👀 Face Tracking</span>
 					<span>🤚 18 Gestures</span>
 					<span>🔊 Multi-room Audio</span>
 					<span>⚡ Zero Config</span>
 		</div>
 	</header>
 	<section id="features" class="section features">
 		<div class="section-header">
 			<p class="eyebrow">Capabilities</p>
 			<h2>Everything you need for smart home control</h2>
+			<p class="intro">45+ Home Assistant entities. Zero configuration. Auto-discovery via mDNS.</p>
 		</div>
 		<div class="feature-grid">
 			<div class="feature-card">
 			<div class="feature-card">
 				<span class="icon">👀</span>
 				<h3>Face Tracking</h3>
+				<p>YOLO-based face detection. Reachy looks at you during conversations with adaptive frame rate optimization.</p>
 			</div>
 			<div class="feature-card">
 				<span class="icon">🤚</span>
 				<h3>Gesture Detection</h3>
+				<p>HaGRID ONNX models recognize 18 hand gestures: 👍👎✌️🤘👌✊🤙🤫 and more.</p>
 			</div>
 			<div class="feature-card">
 				<span class="icon">😊</span>
 				<h3>Expressive Motion</h3>
+				<p>Real-time audio-driven animations. Natural head sway and antenna movements during conversations.</p>
 			</div>
 			<div class="feature-card">
 				<span class="icon">📹</span>
 			<div class="feature-card">
 				<span class="icon">⚡</span>
 				<h3>Zero Configuration</h3>
+				<p>Install and go. mDNS auto-discovery means Home Assistant finds your robot automatically.</p>
 			</div>
 			<div class="feature-card">
 				<span class="icon">🃏</span>
 				<h3>Dashboard Card</h3>
 				<p>Custom Lovelace card for Home Assistant. Real-time 3D visualization of robot pose and status.</p>
 			</div>
+		</div>
+	</section>
+	<section id="install" class="section story">
+		<div class="story-grid">
+			<div class="story-card">
+				<p class="eyebrow">Installation</p>
+				<h3>Up and running in 1 minutes</h3>
+				<ul class="story-list">
+					<li><span>1️⃣</span> Open Reachy Mini Dashboard → Applications</li>
+					<li><span>2️⃣</span> Enable "Show community apps"</li>
+					<li><span>3️⃣</span> Install "reachy_mini_ha_voice"</li>
+					<li><span>4️⃣</span> Home Assistant discovers automatically</li>
+				</ul>
 			</div>
+			<div class="story-card secondary">
+				<p class="eyebrow">How it works</p>
+				<h3>Seamless integration</h3>
+				<p class="story-text">
+					This Reachy Mini app uses ESPHome protocol to communicate with Home Assistant — no ESPHome device needed. Home Assistant discovers it via mDNS and adds all 45+ entities automatically. Voice commands are processed by your Home Assistant instance — STT, intent recognition, and TTS all happen there.
+				</p>
+				<div class="chips">
+					<span class="chip">ESPHome Protocol</span>
+					<span class="chip">mDNS Discovery</span>
+					<span class="chip">45+ Entities</span>
+					<span class="chip">Zero Config</span>
+				</div>
 			</div>
 		</div>
 	</section>
 		fetch('changelog.json')
 			.then(res => res.json())
 			.then(data => {
 				const mainGrid = document.getElementById('changelog-grid');
 				const olderGrid = document.getElementById('changelog-older');
 				data.forEach((item, index) => {
 				<h3>HA Dashboard Card</h3>
 				<p>Lovelace Card for HA</p>
 			</a>
+			<a href="https://huggingface.co/spaces/djhui5710/reachy_mini_ha_voice/tree/main" target="_blank" class="link-card">
 				<span class="icon">📦</span>
 				<h3>Source Code</h3>
+				<p>HuggingFace Spaces</p>
 			</a>
 			<a href="https://www.pollen-robotics.com/" target="_blank" class="link-card">
 				<span class="icon">🤖</span>
 			<a href="https://github.com/ai-forever/dynamic_gestures" target="_blank" class="link-card">
 				<span class="icon">✋</span>
 				<h3>Dynamic Gestures</h3>
+				<p>ONNX Models</p>
 			</a>
 			<a href="https://github.com/Sendspin/sendspin-cli" target="_blank" class="link-card">
 				<span class="icon">🔊</span>

pyproject.toml CHANGED Viewed

@@ -1,24 +1,25 @@
 [build-system]
-requires = ["setuptools"]
 build-backend = "setuptools.build_meta"
 [project]
-name = "reachy_mini_home_assistant"
-version = "1.0.7"
-description = "Deep integration of Reachy Mini robot with Home Assistant"
 readme = "README.md"
-requires-python = ">=3.12"
 license = {text = "Apache-2.0"}
 dependencies = [
-    # Reachy Mini SDK with gstreamer support (for camera streaming)
-    "reachy-mini>=1.7.1",
-    # Audio processing (for audio file analysis)
     "soundfile>=0.13.0",
-    "numpy>=2.2.5,<=2.2.5",
     # Camera streaming
-    "opencv-python>=4.12.0.88",
     # Wake word detection (local)
     # STT/TTS is handled by Home Assistant, not locally
@@ -27,36 +28,26 @@ dependencies = [
     # ESPHome protocol (communication with Home Assistant)
     "aioesphomeapi>=43.10.1",
-    "zeroconf>=0.131,<1",
-    "websockets>=12,<16",
-    "aiohttp",
     # Motion control (head movements)
-    "scipy>=1.15.3,<2.0.0",
     # Face tracking (YOLO-based head detection)
-    "ultralytics",
-    "supervision",
     # Sendspin synchronized audio (optional, for multi-room playback)
-    "aiosendspin>=5.1,<6.0",
     # Gesture detection (ONNX runtime for HaGRID models)
     "onnxruntime>=1.18.0",
-    # PyTorch (for vision models)
-    "torch==2.5.1",
-    "torchvision==0.20.1",
-    # Compatibility with system packages (gradio, etc.)
-    "pillow<12.0",
-    "pydantic<=2.12.5",
-    "requests>=2.33.0",
 ]
 keywords = ["reachy-mini-app", "reachy-mini", "home-assistant", "voice-assistant"]
 [project.entry-points."reachy_mini_apps"]
-reachy_mini_home_assistant = "reachy_mini_home_assistant.main:ReachyMiniHaVoice"
 [tool.setuptools]
 package-dir = { "" = "." }
@@ -66,114 +57,4 @@ include-package-data = true
 where = ["."]
 [tool.setuptools.package-data]
-"*" = ["*.json", "*.flac", "*.md", "*.tflite", "*.onnx", "*.pt"]
-# ============================================================================
-# Ruff - Fast Python linter and formatter
-# ============================================================================
-[tool.ruff]
-target-version = "py312"
-line-length = 120
-src = ["reachy_mini_home_assistant"]
-# Exclude reference code and generated files
-exclude = [
-    "reference/",
-    "__pycache__",
-    ".git",
-    "*.egg-info",
-]
-[dependency-groups]
-dev = [
-    "ruff==0.15.4",
-    "mypy==1.20.0",
-]
-[tool.uv]
-dependency-metadata = [
-    { name = "gstreamer-libs", version = "1.28.1", requires-dist = ["gstreamer-msvc-runtime; sys_platform == 'win32'", "setuptools"] },
-]
-[tool.ruff.lint]
-select = [
-    "E",      # pycodestyle errors
-    "W",      # pycodestyle warnings
-    "F",      # Pyflakes
-    "I",      # isort (import sorting)
-    "B",      # flake8-bugbear (common bugs)
-    "C4",     # flake8-comprehensions
-    "UP",     # pyupgrade (modern Python syntax)
-    "SIM",    # flake8-simplify
-    "TCH",    # flake8-type-checking (TYPE_CHECKING optimization)
-    "RUF",    # Ruff-specific rules
-    "PTH",    # flake8-use-pathlib
-    "PL",     # Pylint
-]
-ignore = [
-    "E501",     # line too long (handled by formatter)
-    "PLR0913",  # too many arguments (common in robot control)
-    "PLR2004",  # magic value comparison (many thresholds in motion code)
-    "PLR0912",  # too many branches
-    "PLR0915",  # too many statements
-    "PLR0911",  # too many return statements
-    "SIM108",   # use ternary operator (sometimes less readable)
-    "B008",     # function call in default argument (used for field factories)
-    # The following are intentional patterns in this codebase:
-    "PLC0415",  # import-outside-top-level (lazy imports for optional deps)
-    "PLW0603",  # global-statement (used for singletons)
-    "SIM102",   # collapsible-if (sometimes more readable expanded)
-    "SIM105",   # suppressible-exception (explicit try/except is clearer)
-    "PTH123",   # builtin-open (pathlib not always better)
-    "PTH108",   # os-unlink (pathlib not always better)
-    "RUF013",   # implicit-optional (legacy code)
-    "TC002",    # third-party import (numpy is required at runtime)
-]
-[tool.ruff.lint.per-file-ignores]
-"__init__.py" = ["F401"]  # unused imports in __init__ are intentional
-[tool.ruff.lint.isort]
-known-first-party = ["reachy_mini_home_assistant"]
-# ============================================================================
-# Mypy - Static type checker
-# ============================================================================
-[tool.mypy]
-python_version = "3.12"
-warn_return_any = false  # Too noisy for mixed typed/untyped codebase
-warn_unused_ignores = true
-disallow_untyped_defs = false  # Start lenient, can tighten later
-check_untyped_defs = false  # Too strict for initial setup
-ignore_missing_imports = true  # Many robot SDK libs lack type stubs
-no_implicit_optional = false  # Allow implicit Optional for now
-# Disable some checks that are too strict for this codebase
-disable_error_code = [
-    "union-attr",  # Too many Optional accesses without None checks
-    "no-redef",    # Class redefinitions for SDK compatibility
-    "attr-defined",  # Some dynamic attributes from SDK
-    "assignment",  # Variable type changes (common in Python)
-    "arg-type",    # Argument type mismatches (often SDK issues)
-    "unused-ignore",  # Type ignore comments from before config
-    "return-value",  # Return type mismatches (often fine)
-    "no-untyped-def",  # Missing type annotations (too strict initially)
-    "valid-type",  # Type validity (some edge cases)
-    "has-type",    # Cannot determine type
-    "call-arg",    # Too few/many arguments
-    "import-untyped",  # Missing stubs for third-party libs
-    "misc",        # Miscellaneous errors
-]
-exclude = [
-    "reference/",
-    "tests/",
-]
-# Stricter checking for core modules (can enable gradually)
-[[tool.mypy.overrides]]
-module = [
-    "reachy_mini_home_assistant.core.*",
-    "reachy_mini_home_assistant.motion.smoothing",
-    "reachy_mini_home_assistant.motion.pose_composer",
-]
-disallow_untyped_defs = true
-warn_unreachable = true

 [build-system]
+requires = ["setuptools>=61.0"]
 build-backend = "setuptools.build_meta"
 [project]
+name = "reachy_mini_ha_voice"
+version = "0.7.3"
+description = "Home Assistant Voice Assistant for Reachy Mini"
 readme = "README.md"
+requires-python = ">=3.10"
 license = {text = "Apache-2.0"}
 dependencies = [
+    # Reachy Mini SDK (provides audio via media system)
+    "reachy-mini",
+    # Audio processing (fallback when not on Reachy Mini)
+    "sounddevice>=0.5.0",
     "soundfile>=0.13.0",
+    "numpy>=2.0.0",
     # Camera streaming
+    "opencv-python>=4.10.0",
     # Wake word detection (local)
     # STT/TTS is handled by Home Assistant, not locally
     # ESPHome protocol (communication with Home Assistant)
     "aioesphomeapi>=43.10.1",
+    "zeroconf>=0.140.0",
     # Motion control (head movements)
+    "scipy>=1.14.0",
     # Face tracking (YOLO-based head detection)
+    "ultralytics>=8.3.0",
+    "supervision>=0.25.0",
+    "huggingface_hub>=0.27.0",
     # Sendspin synchronized audio (optional, for multi-room playback)
+    "aiosendspin>=2.0.1",
     # Gesture detection (ONNX runtime for HaGRID models)
     "onnxruntime>=1.18.0",
 ]
 keywords = ["reachy-mini-app", "reachy-mini", "home-assistant", "voice-assistant"]
 [project.entry-points."reachy_mini_apps"]
+reachy_mini_ha_voice = "reachy_mini_ha_voice.main:ReachyMiniHaVoice"
 [tool.setuptools]
 package-dir = { "" = "." }
 where = ["."]
 [tool.setuptools.package-data]
+"*" = ["*.json", "*.flac", "*.md", "*.tflite", "*.onnx"]

{reachy_mini_home_assistant → reachy_mini_ha_voice}/__init__.py RENAMED Viewed

@@ -1,29 +1,24 @@
-"""
-Reachy Mini for Home Assistant
-A deep integration app combining Reachy Mini robot with Home Assistant,
-enabling voice control, smart home automation, and expressive robot interactions.
-Key features:
-- Local wake word detection (microWakeWord/openWakeWord)
-- ESPHome protocol for seamless Home Assistant communication
-- STT/TTS powered by Home Assistant voice pipeline
-- Reachy Mini motion control with expressive animations
-- Camera streaming and gesture detection
-- Smart home entity control through natural voice commands
-"""
-try:
-    from importlib.metadata import version
-    __version__ = version("reachy_mini_home_assistant")
-except Exception:
-    __version__ = "0.0.0"  # Fallback for development
-__author__ = "Desmond Dong"
-# Don't import main module here to avoid runpy warning
-# The app is loaded via entry point: reachy_mini_home_assistant.main:ReachyMiniHaVoiceApp
-__all__ = [
-    "__version__",
-]

+"""
+Reachy Mini for Home Assistant
+A deep integration app combining Reachy Mini robot with Home Assistant,
+enabling voice control, smart home automation, and expressive robot interactions.
+Key features:
+- Local wake word detection (microWakeWord/openWakeWord)
+- ESPHome protocol for seamless Home Assistant communication
+- STT/TTS powered by Home Assistant voice pipeline
+- Reachy Mini motion control with expressive animations
+- Camera streaming and gesture detection
+- Smart home entity control through natural voice commands
+"""
+__version__ = "0.7.3"
+__author__ = "Desmond Dong"
+# Don't import main module here to avoid runpy warning
+# The app is loaded via entry point: reachy_mini_ha_voice.main:ReachyMiniHAVoiceApp
+__all__ = [
+    "__version__",
+]

{reachy_mini_home_assistant → reachy_mini_ha_voice}/__main__.py RENAMED Viewed

@@ -2,7 +2,7 @@
 """Main entry point for Reachy Mini for Home Assistant.
 This module provides a command-line interface for running the voice assistant
-without the ReachyMini App framework.
 """
 import argparse
@@ -10,17 +10,17 @@ import asyncio
 import logging
 import threading
-from .protocol.zeroconf import get_default_friendly_name
 _LOGGER = logging.getLogger(__name__)
 async def main() -> None:
-    parser = argparse.ArgumentParser(description="Reachy Mini for Home Assistant")
     parser.add_argument(
         "--name",
-        default=get_default_friendly_name(),
-        help="Name of the voice assistant (default: auto-generated from MAC)",
     )
     parser.add_argument(
         "--host",
@@ -49,6 +49,11 @@ async def main() -> None:
         action="store_true",
         help="Disable camera server",
     )
     parser.add_argument(
         "--debug",
         action="store_true",
@@ -63,53 +68,59 @@ async def main() -> None:
         format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
     )
-    # Initialize Reachy Mini (required)
-    from reachy_mini import ReachyMini
-    with ReachyMini() as reachy_mini:
-        _LOGGER.info("Reachy Mini connected")
-        # Import and create VoiceAssistantService
-        from .voice_assistant import VoiceAssistantService
-        service = VoiceAssistantService(
-            reachy_mini=reachy_mini,
-            name=args.name,
-            host=args.host,
-            port=args.port,
-            wake_model=args.wake_model,
-            camera_port=args.camera_port,
-            camera_enabled=not args.no_camera,
-        )
-        # Create stop event for graceful shutdown
-        stop_event = threading.Event()
         try:
-            await service.start()
-            _LOGGER.info("=" * 50)
-            _LOGGER.info("Reachy Mini Voice Assistant Started")
-            _LOGGER.info("=" * 50)
-            _LOGGER.info("Name: %s", args.name)
-            _LOGGER.info("ESPHome Server: %s:%s", args.host, args.port)
-            _LOGGER.info("Camera Server: %s:%s", args.host, args.camera_port)
-            _LOGGER.info("Motion control: enabled")
-            _LOGGER.info("=" * 50)
-            _LOGGER.info("Add this device in Home Assistant:")
-            _LOGGER.info("  Settings -> Devices & Services -> Add Integration -> ESPHome")
-            _LOGGER.info("  Enter: <this-device-ip>:%s", args.port)
-            _LOGGER.info("=" * 50)
-            # Wait for stop signal
-            while not stop_event.is_set():
-                await asyncio.sleep(0.5)
-        except KeyboardInterrupt:
-            _LOGGER.info("Shutting down...")
-        finally:
-            await service.stop()
-            _LOGGER.info("Voice assistant stopped")
 def run():

 """Main entry point for Reachy Mini for Home Assistant.
 This module provides a command-line interface for running the voice assistant
+in standalone mode (without the ReachyMini App framework).
 """
 import argparse
 import logging
 import threading
 _LOGGER = logging.getLogger(__name__)
 async def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="Reachy Mini for Home Assistant"
+    )
     parser.add_argument(
         "--name",
+        default="Reachy Mini",
+        help="Name of the voice assistant (default: Reachy Mini)",
     )
     parser.add_argument(
         "--host",
         action="store_true",
         help="Disable camera server",
     )
+    parser.add_argument(
+        "--no-motion",
+        action="store_true",
+        help="Disable Reachy Mini motion control",
+    )
     parser.add_argument(
         "--debug",
         action="store_true",
         format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
     )
+    # Initialize Reachy Mini (if available)
+    reachy_mini = None
+    if not args.no_motion:
         try:
+            from reachy_mini import ReachyMini
+            reachy_mini = ReachyMini()
+            _LOGGER.info("Reachy Mini connected")
+        except ImportError:
+            _LOGGER.warning("reachy-mini not installed, motion control disabled")
+        except Exception as e:
+            _LOGGER.warning("Failed to connect to Reachy Mini: %s", e)
+    # Import and create VoiceAssistantService
+    from .voice_assistant import VoiceAssistantService
+    service = VoiceAssistantService(
+        reachy_mini=reachy_mini,
+        name=args.name,
+        host=args.host,
+        port=args.port,
+        wake_model=args.wake_model,
+        camera_port=args.camera_port,
+        camera_enabled=not args.no_camera,
+    )
+    # Create stop event for graceful shutdown
+    stop_event = threading.Event()
+    try:
+        await service.start()
+        _LOGGER.info("=" * 50)
+        _LOGGER.info("Reachy Mini Voice Assistant Started")
+        _LOGGER.info("=" * 50)
+        _LOGGER.info("Name: %s", args.name)
+        _LOGGER.info("ESPHome Server: %s:%s", args.host, args.port)
+        _LOGGER.info("Camera Server: %s:%s", args.host, args.camera_port)
+        _LOGGER.info("Motion control: %s", "enabled" if reachy_mini else "disabled")
+        _LOGGER.info("=" * 50)
+        _LOGGER.info("Add this device in Home Assistant:")
+        _LOGGER.info("  Settings -> Devices & Services -> Add Integration -> ESPHome")
+        _LOGGER.info("  Enter: <this-device-ip>:%s", args.port)
+        _LOGGER.info("=" * 50)
+        # Wait for stop signal
+        while not stop_event.is_set():
+            await asyncio.sleep(0.5)
+    except KeyboardInterrupt:
+        _LOGGER.info("Shutting down...")
+    finally:
+        await service.stop()
+        _LOGGER.info("Voice assistant stopped")
 def run():

{reachy_mini_home_assistant/motion → reachy_mini_ha_voice}/animation_player.py RENAMED Viewed

@@ -8,6 +8,7 @@ on top of other movements. The speaking animation uses multi-frequency
 oscillators for more natural head sway.
 """
 import logging
 import math
 import random
@@ -15,20 +16,17 @@ import threading
 import time
 from dataclasses import dataclass
 from pathlib import Path
-from ..animations.animation_config import load_animation_config
 _LOGGER = logging.getLogger(__name__)
 _MODULE_DIR = Path(__file__).parent
-_PACKAGE_DIR = _MODULE_DIR.parent  # reachy_mini_home_assistant/
-_ANIMATIONS_FILE = _PACKAGE_DIR / "animations" / "conversation_animations.json"
 @dataclass
 class AnimationParams:
     """Parameters for a single animation with per-axis frequencies."""
     name: str
     description: str
     # Position amplitudes (meters)
@@ -50,7 +48,6 @@ class AnimationParams:
     # Antenna
     antenna_amplitude_rad: float = 0.0
     antenna_move_name: str = "both"
-    antenna_frequency_hz: float = 0.0  # If not specified, uses main frequency_hz
     # Per-axis frequencies (Hz) - if not specified, uses main frequency_hz
     frequency_hz: float = 0.5
     pitch_frequency_hz: float = 0.0
@@ -70,17 +67,14 @@ class AnimationPlayer:
     - Multi-frequency oscillators for natural motion
     - Random phase offsets per animation start for variation
     - Smooth transitions between animations
-    - Interpolation phase: smooth transition from current pose to neutral before oscillation
-      (same as BreathingMove in reference project)
     """
     def __init__(self):
-        self._animations: dict[str, AnimationParams] = {}
         self._amplitude_scale: float = 1.0
         self._transition_duration: float = 0.3
-        self._interpolation_duration: float = 0.2
-        self._current_animation: str | None = None
-        self._target_animation: str | None = None
         self._transition_start: float = 0.0
         self._phase_start: float = 0.0
         self._lock = threading.Lock()
@@ -91,29 +85,6 @@ class AnimationPlayer:
         self._phase_x: float = 0.0
         self._phase_y: float = 0.0
         self._phase_z: float = 0.0
-        # Interpolation state (for smooth transition to neutral before oscillation)
-        self._in_interpolation: bool = False
-        self._interpolation_start_time: float = 0.0
-        self._interpolation_start_offsets: dict[str, float] = {
-            "pitch": 0.0,
-            "yaw": 0.0,
-            "roll": 0.0,
-            "x": 0.0,
-            "y": 0.0,
-            "z": 0.0,
-            "antenna_left": 0.0,
-            "antenna_right": 0.0,
-        }
-        self._last_offsets: dict[str, float] = {
-            "pitch": 0.0,
-            "yaw": 0.0,
-            "roll": 0.0,
-            "x": 0.0,
-            "y": 0.0,
-            "z": 0.0,
-            "antenna_left": 0.0,
-            "antenna_right": 0.0,
-        }
         self._load_config()
     def _load_config(self) -> None:
@@ -122,7 +93,8 @@ class AnimationPlayer:
             _LOGGER.warning("Animations file not found: %s", _ANIMATIONS_FILE)
             return
         try:
-            data = load_animation_config(_ANIMATIONS_FILE)
             settings = data.get("settings", {})
             self._amplitude_scale = settings.get("amplitude_scale", 1.0)
@@ -148,7 +120,6 @@ class AnimationPlayer:
                     yaw_offset_rad=params.get("yaw_offset_rad", 0.0),
                     antenna_amplitude_rad=params.get("antenna_amplitude_rad", 0.0),
                     antenna_move_name=params.get("antenna_move_name", "both"),
-                    antenna_frequency_hz=params.get("antenna_frequency_hz", 0.0),
                     frequency_hz=params.get("frequency_hz", 0.5),
                     pitch_frequency_hz=params.get("pitch_frequency_hz", 0.0),
                     yaw_frequency_hz=params.get("yaw_frequency_hz", 0.0),
@@ -173,29 +144,18 @@ class AnimationPlayer:
         self._phase_z = random.random() * 2 * math.pi
     def set_animation(self, name: str) -> bool:
-        """Set the current animation with smooth transition.
-        Like BreathingMove in reference project, this starts an interpolation
-        phase that smoothly transitions from the current pose to neutral before
-        starting the oscillation animation.
-        """
         with self._lock:
             if name not in self._animations and name is not None:
                 _LOGGER.warning("Unknown animation: %s", name)
                 return False
-            if name == self._current_animation and not self._in_interpolation:
                 return True
-            # Capture current offsets for interpolation start
-            self._interpolation_start_offsets = self._last_offsets.copy()
-            self._interpolation_start_time = time.perf_counter()
-            self._in_interpolation = True
             self._target_animation = name
             self._transition_start = time.perf_counter()
             # Randomize phases for new animation
             self._randomize_phases()
-            _LOGGER.debug("Transitioning to animation: %s (interpolation phase)", name)
             return True
     def stop(self) -> None:
@@ -204,13 +164,10 @@ class AnimationPlayer:
             self._current_animation = None
             self._target_animation = None
-    def get_offsets(self, dt: float = 0.0) -> dict[str, float]:
         """Calculate current animation offsets.
-        Uses two-phase animation like BreathingMove in reference project:
-        1. Interpolation phase: smoothly transition from current pose to neutral
-        2. Oscillation phase: continuous sinusoidal breathing motion
         Each axis can have its own frequency for more organic movement.
         Args:
@@ -222,7 +179,7 @@ class AnimationPlayer:
         with self._lock:
             now = time.perf_counter()
-            # Handle transition to new animation
             if self._target_animation != self._current_animation:
                 elapsed = now - self._transition_start
                 if elapsed >= self._transition_duration:
@@ -231,59 +188,20 @@ class AnimationPlayer:
             # No animation
             if self._current_animation is None:
-                result = {
-                    "pitch": 0.0,
-                    "yaw": 0.0,
-                    "roll": 0.0,
-                    "x": 0.0,
-                    "y": 0.0,
-                    "z": 0.0,
-                    "antenna_left": 0.0,
-                    "antenna_right": 0.0,
                 }
-                self._last_offsets = result.copy()
-                return result
             params = self._animations.get(self._current_animation)
             if params is None:
-                result = {
-                    "pitch": 0.0,
-                    "yaw": 0.0,
-                    "roll": 0.0,
-                    "x": 0.0,
-                    "y": 0.0,
-                    "z": 0.0,
-                    "antenna_left": 0.0,
-                    "antenna_right": 0.0,
                 }
-                self._last_offsets = result.copy()
-                return result
-            # Check if in interpolation phase
-            if self._in_interpolation:
-                interp_elapsed = now - self._interpolation_start_time
-                if interp_elapsed < self._interpolation_duration:
-                    # Phase 1: Linear interpolation from current pose to neutral (offset=0)
-                    # Use smooth ease-in-out for natural motion
-                    t = interp_elapsed / self._interpolation_duration
-                    # Smooth step: t * t * (3 - 2 * t)
-                    smooth_t = t * t * (3 - 2 * t)
-                    result = {}
-                    for key in self._interpolation_start_offsets:
-                        start_val = self._interpolation_start_offsets[key]
-                        # Interpolate toward 0 (neutral)
-                        result[key] = start_val * (1.0 - smooth_t)
-                    self._last_offsets = result.copy()
-                    return result
-                else:
-                    # Interpolation complete, start oscillation phase
-                    self._in_interpolation = False
-                    self._phase_start = now
-                    _LOGGER.debug("Interpolation complete, starting oscillation phase")
-            # Phase 2: Oscillation animation
             elapsed = now - self._phase_start
             base_freq = params.frequency_hz
@@ -301,27 +219,32 @@ class AnimationPlayer:
             z_freq = params.z_frequency_hz if params.z_frequency_hz > 0 else base_freq
             # Calculate oscillations with per-axis frequencies and random phases
-            pitch = params.pitch_offset_rad + params.pitch_amplitude_rad * math.sin(
-                2 * math.pi * pitch_freq * elapsed + self._phase_pitch
-            )
-            yaw = params.yaw_offset_rad + params.yaw_amplitude_rad * math.sin(
-                2 * math.pi * yaw_freq * elapsed + self._phase_yaw
-            )
-            roll = params.roll_offset_rad + params.roll_amplitude_rad * math.sin(
-                2 * math.pi * roll_freq * elapsed + self._phase_roll
-            )
-            x = params.x_offset_m + params.x_amplitude_m * math.sin(2 * math.pi * x_freq * elapsed + self._phase_x)
-            y = params.y_offset_m + params.y_amplitude_m * math.sin(2 * math.pi * y_freq * elapsed + self._phase_y)
-            z = params.z_offset_m + params.z_amplitude_m * math.sin(2 * math.pi * z_freq * elapsed + self._phase_z)
-            # Antenna movement with its own frequency
-            antenna_freq = params.antenna_frequency_hz if params.antenna_frequency_hz > 0 else base_freq
-            antenna_phase = 2 * math.pi * antenna_freq * elapsed
             if params.antenna_move_name == "both":
                 left = right = params.antenna_amplitude_rad * math.sin(antenna_phase)
             elif params.antenna_move_name == "wiggle":
@@ -333,7 +256,7 @@ class AnimationPlayer:
             # Apply scale and blend
             scale = self._amplitude_scale * blend
-            result = {
                 "pitch": pitch * scale,
                 "yaw": yaw * scale,
                 "roll": roll * scale,
@@ -343,11 +266,9 @@ class AnimationPlayer:
                 "antenna_left": left * scale,
                 "antenna_right": right * scale,
             }
-            self._last_offsets = result.copy()
-            return result
     @property
-    def current_animation(self) -> str | None:
         """Get the current animation name."""
         with self._lock:
             return self._current_animation

 oscillators for more natural head sway.
 """
+import json
 import logging
 import math
 import random
 import time
 from dataclasses import dataclass
 from pathlib import Path
+from typing import Dict, Optional
 _LOGGER = logging.getLogger(__name__)
 _MODULE_DIR = Path(__file__).parent
+_ANIMATIONS_FILE = _MODULE_DIR / "animations" / "conversation_animations.json"
 @dataclass
 class AnimationParams:
     """Parameters for a single animation with per-axis frequencies."""
     name: str
     description: str
     # Position amplitudes (meters)
     # Antenna
     antenna_amplitude_rad: float = 0.0
     antenna_move_name: str = "both"
     # Per-axis frequencies (Hz) - if not specified, uses main frequency_hz
     frequency_hz: float = 0.5
     pitch_frequency_hz: float = 0.0
     - Multi-frequency oscillators for natural motion
     - Random phase offsets per animation start for variation
     - Smooth transitions between animations
     """
     def __init__(self):
+        self._animations: Dict[str, AnimationParams] = {}
         self._amplitude_scale: float = 1.0
         self._transition_duration: float = 0.3
+        self._current_animation: Optional[str] = None
+        self._target_animation: Optional[str] = None
         self._transition_start: float = 0.0
         self._phase_start: float = 0.0
         self._lock = threading.Lock()
         self._phase_x: float = 0.0
         self._phase_y: float = 0.0
         self._phase_z: float = 0.0
         self._load_config()
     def _load_config(self) -> None:
             _LOGGER.warning("Animations file not found: %s", _ANIMATIONS_FILE)
             return
         try:
+            with open(_ANIMATIONS_FILE, "r", encoding="utf-8") as f:
+                data = json.load(f)
             settings = data.get("settings", {})
             self._amplitude_scale = settings.get("amplitude_scale", 1.0)
                     yaw_offset_rad=params.get("yaw_offset_rad", 0.0),
                     antenna_amplitude_rad=params.get("antenna_amplitude_rad", 0.0),
                     antenna_move_name=params.get("antenna_move_name", "both"),
                     frequency_hz=params.get("frequency_hz", 0.5),
                     pitch_frequency_hz=params.get("pitch_frequency_hz", 0.0),
                     yaw_frequency_hz=params.get("yaw_frequency_hz", 0.0),
         self._phase_z = random.random() * 2 * math.pi
     def set_animation(self, name: str) -> bool:
+        """Set the current animation with smooth transition."""
         with self._lock:
             if name not in self._animations and name is not None:
                 _LOGGER.warning("Unknown animation: %s", name)
                 return False
+            if name == self._current_animation:
                 return True
             self._target_animation = name
             self._transition_start = time.perf_counter()
             # Randomize phases for new animation
             self._randomize_phases()
+            _LOGGER.debug("Transitioning to animation: %s", name)
             return True
     def stop(self) -> None:
             self._current_animation = None
             self._target_animation = None
+    def get_offsets(self, dt: float = 0.0) -> Dict[str, float]:
         """Calculate current animation offsets.
+        Uses multi-frequency oscillators for natural motion.
         Each axis can have its own frequency for more organic movement.
         Args:
         with self._lock:
             now = time.perf_counter()
+            # Handle transition
             if self._target_animation != self._current_animation:
                 elapsed = now - self._transition_start
                 if elapsed >= self._transition_duration:
             # No animation
             if self._current_animation is None:
+                return {
+                    "pitch": 0.0, "yaw": 0.0, "roll": 0.0,
+                    "x": 0.0, "y": 0.0, "z": 0.0,
+                    "antenna_left": 0.0, "antenna_right": 0.0,
                 }
             params = self._animations.get(self._current_animation)
             if params is None:
+                return {
+                    "pitch": 0.0, "yaw": 0.0, "roll": 0.0,
+                    "x": 0.0, "y": 0.0, "z": 0.0,
+                    "antenna_left": 0.0, "antenna_right": 0.0,
                 }
             elapsed = now - self._phase_start
             base_freq = params.frequency_hz
             z_freq = params.z_frequency_hz if params.z_frequency_hz > 0 else base_freq
             # Calculate oscillations with per-axis frequencies and random phases
+            pitch = (params.pitch_offset_rad +
+                     params.pitch_amplitude_rad *
+                     math.sin(2 * math.pi * pitch_freq * elapsed + self._phase_pitch))
+            yaw = (params.yaw_offset_rad +
+                   params.yaw_amplitude_rad *
+                   math.sin(2 * math.pi * yaw_freq * elapsed + self._phase_yaw))
+            roll = (params.roll_offset_rad +
+                    params.roll_amplitude_rad *
+                    math.sin(2 * math.pi * roll_freq * elapsed + self._phase_roll))
+            x = (params.x_offset_m +
+                 params.x_amplitude_m *
+                 math.sin(2 * math.pi * x_freq * elapsed + self._phase_x))
+            y = (params.y_offset_m +
+                 params.y_amplitude_m *
+                 math.sin(2 * math.pi * y_freq * elapsed + self._phase_y))
+            z = (params.z_offset_m +
+                 params.z_amplitude_m *
+                 math.sin(2 * math.pi * z_freq * elapsed + self._phase_z))
+            # Antenna movement
+            antenna_phase = 2 * math.pi * base_freq * elapsed
             if params.antenna_move_name == "both":
                 left = right = params.antenna_amplitude_rad * math.sin(antenna_phase)
             elif params.antenna_move_name == "wiggle":
             # Apply scale and blend
             scale = self._amplitude_scale * blend
+            return {
                 "pitch": pitch * scale,
                 "yaw": yaw * scale,
                 "roll": roll * scale,
                 "antenna_left": left * scale,
                 "antenna_right": right * scale,
             }
     @property
+    def current_animation(self) -> Optional[str]:
         """Get the current animation name."""
         with self._lock:
             return self._current_animation

reachy_mini_ha_voice/animations/conversation_animations.json ADDED Viewed

	@@ -0,0 +1,87 @@

+{
+  "animations": {
+    "idle": {
+      "description": "No movement when idle - robot stays at neutral position",
+      "z_amplitude_m": 0.0,
+      "antenna_amplitude_rad": 0.0,
+      "frequency_hz": 0.0
+    },
+    "listening": {
+      "description": "Attentive pose while listening to user - slight forward lean",
+      "pitch_offset_rad": -0.05,
+      "pitch_amplitude_rad": 0.03,
+      "z_amplitude_m": 0.003,
+      "antenna_amplitude_rad": 0.2,
+      "antenna_move_name": "both",
+      "frequency_hz": 0.6
+    },
+    "thinking": {
+      "description": "Processing/thinking animation - head tilted with gentle sway",
+      "roll_offset_rad": 0.08,
+      "pitch_amplitude_rad": 0.03,
+      "yaw_amplitude_rad": 0.05,
+      "roll_amplitude_rad": 0.04,
+      "z_amplitude_m": 0.003,
+      "antenna_amplitude_rad": 0.25,
+      "antenna_move_name": "wiggle",
+      "frequency_hz": 0.4
+    },
+    "speaking": {
+      "description": "Speaking animation - multi-frequency natural head sway",
+      "pitch_amplitude_rad": 0.08,
+      "pitch_frequency_hz": 2.2,
+      "yaw_amplitude_rad": 0.13,
+      "yaw_frequency_hz": 0.6,
+      "roll_amplitude_rad": 0.04,
+      "roll_frequency_hz": 1.3,
+      "x_amplitude_m": 0.0045,
+      "x_frequency_hz": 0.35,
+      "y_amplitude_m": 0.00375,
+      "y_frequency_hz": 0.45,
+      "z_amplitude_m": 0.00225,
+      "z_frequency_hz": 0.25,
+      "antenna_amplitude_rad": 0.5,
+      "antenna_move_name": "wiggle",
+      "frequency_hz": 1.0
+    },
+    "happy": {
+      "description": "Happy/positive response",
+      "pitch_amplitude_rad": 0.08,
+      "z_amplitude_m": 0.01,
+      "antenna_amplitude_rad": 0.5,
+      "antenna_move_name": "both",
+      "frequency_hz": 1.2
+    },
+    "sad": {
+      "description": "Sad/negative response - head droops",
+      "pitch_offset_rad": 0.1,
+      "pitch_amplitude_rad": 0.04,
+      "z_offset_m": -0.01,
+      "z_amplitude_m": 0.002,
+      "antenna_amplitude_rad": 0.1,
+      "antenna_move_name": "both",
+      "frequency_hz": 0.3
+    },
+    "confused": {
+      "description": "Confused/error state - head tilts",
+      "roll_amplitude_rad": 0.1,
+      "yaw_amplitude_rad": 0.12,
+      "pitch_amplitude_rad": 0.05,
+      "antenna_amplitude_rad": 0.4,
+      "antenna_move_name": "wiggle",
+      "frequency_hz": 0.7
+    },
+    "alert": {
+      "description": "Alert/timer finished - quick movements",
+      "pitch_amplitude_rad": 0.1,
+      "z_amplitude_m": 0.012,
+      "antenna_amplitude_rad": 0.6,
+      "antenna_move_name": "both",
+      "frequency_hz": 1.5
+    }
+  },
+  "settings": {
+    "amplitude_scale": 1.0,
+    "transition_duration_s": 0.3
+  }
+}

{reachy_mini_home_assistant/protocol → reachy_mini_ha_voice}/api_server.py RENAMED Viewed

@@ -4,7 +4,7 @@ import asyncio
 import logging
 from abc import abstractmethod
 from collections.abc import Iterable
-from typing import TYPE_CHECKING
 # pylint: disable=no-name-in-module
 from aioesphomeapi._frame_helper.packets import make_plain_text_packets
@@ -31,7 +31,7 @@ class APIServer(asyncio.Protocol):
     def __init__(self, name: str) -> None:
         self.name = name
-        self._buffer: bytes | None = None
         self._buffer_len: int = 0
         self._pos: int = 0
         self._transport = None
@@ -41,77 +41,52 @@ class APIServer(asyncio.Protocol):
     def handle_message(self, msg: message.Message) -> Iterable[message.Message]:
         pass
-    def on_authenticated(self) -> None:
-        """Hook called after authentication succeeds."""
-        return
     def process_packet(self, msg_type: int, packet_data: bytes) -> None:
-        try:
-            msg_class = MESSAGE_TYPE_TO_PROTO[msg_type]
-            msg_inst = msg_class.FromString(packet_data)
-            _LOGGER.debug("Received message: %s", msg_class.__name__)
-            if isinstance(msg_inst, HelloRequest):
-                _LOGGER.info("HelloRequest received, sending HelloResponse")
-                self.send_messages(
-                    [
-                        HelloResponse(
-                            api_version_major=1,
-                            api_version_minor=10,
-                            name=self.name,
-                        )
-                    ]
-                )
-                return
-            if isinstance(msg_inst, AuthenticationRequest):
-                _LOGGER.info("AuthenticationRequest received, sending AuthenticationResponse")
-                self.send_messages([AuthenticationResponse()])
-                self.on_authenticated()
-            elif isinstance(msg_inst, DisconnectRequest):
-                self.send_messages([DisconnectResponse()])
-                _LOGGER.debug("Disconnect requested")
-                if self._transport:
-                    self._transport.close()
-                    self._transport = None
-                    self._writelines = None
-            elif isinstance(msg_inst, PingRequest):
-                self.send_messages([PingResponse()])
-            elif msgs := self.handle_message(msg_inst):
-                if isinstance(msgs, message.Message):
-                    msgs = [msgs]
-                self.send_messages(msgs)
-        except Exception:
-            _LOGGER.exception("Unhandled ESPHome protocol error while processing message type %s", msg_type)
             if self._transport:
                 self._transport.close()
                 self._transport = None
                 self._writelines = None
-    def send_messages(self, msgs: list[message.Message]):
         if self._writelines is None:
             return
-        try:
-            packets = [(PROTO_TO_MESSAGE_TYPE[msg.__class__], msg.SerializeToString()) for msg in msgs]
-            packet_bytes = make_plain_text_packets(packets)
-            self._writelines(packet_bytes)
-        except (IndexError, OSError, BrokenPipeError, ConnectionResetError) as e:
-            _LOGGER.warning(
-                "Error sending message (%s): %s - connection may be lost",
-                msgs[0].__class__.__name__ if msgs else "unknown",
-                e,
-            )
-            # Mark transport as invalid to prevent further writes
-            self._writelines = None
-            if self._transport:
-                self._transport.close()
-                self._transport = None
     def connection_made(self, transport) -> None:
         self._transport = transport
         self._writelines = transport.writelines
-        _LOGGER.info("ESPHome client connected from %s", transport.get_extra_info("peername"))
     def data_received(self, data: bytes):
         if self._buffer is None:
@@ -165,13 +140,8 @@ class APIServer(asyncio.Protocol):
         return cstr[original_pos:new_pos]
     def connection_lost(self, exc):
-        _LOGGER.info("ESPHome client disconnected: %s", exc)
         self._transport = None
         self._writelines = None
-        # Clear buffer to prevent memory leak
-        self._buffer = None
-        self._buffer_len = 0
-        self._pos = 0
     def _read_varuint(self) -> int:
         """Read a varuint from the buffer or -1 if the buffer runs out of bytes."""

 import logging
 from abc import abstractmethod
 from collections.abc import Iterable
+from typing import TYPE_CHECKING, List, Optional
 # pylint: disable=no-name-in-module
 from aioesphomeapi._frame_helper.packets import make_plain_text_packets
     def __init__(self, name: str) -> None:
         self.name = name
+        self._buffer: Optional[bytes] = None
         self._buffer_len: int = 0
         self._pos: int = 0
         self._transport = None
     def handle_message(self, msg: message.Message) -> Iterable[message.Message]:
         pass
     def process_packet(self, msg_type: int, packet_data: bytes) -> None:
+        msg_class = MESSAGE_TYPE_TO_PROTO[msg_type]
+        msg_inst = msg_class.FromString(packet_data)
+        if isinstance(msg_inst, HelloRequest):
+            self.send_messages(
+                [
+                    HelloResponse(
+                        api_version_major=1,
+                        api_version_minor=10,
+                        name=self.name,
+                    )
+                ]
+            )
+            return
+        if isinstance(msg_inst, AuthenticationRequest):
+            self.send_messages([AuthenticationResponse()])
+        elif isinstance(msg_inst, DisconnectRequest):
+            self.send_messages([DisconnectResponse()])
+            _LOGGER.debug("Disconnect requested")
             if self._transport:
                 self._transport.close()
                 self._transport = None
                 self._writelines = None
+        elif isinstance(msg_inst, PingRequest):
+            self.send_messages([PingResponse()])
+        elif msgs := self.handle_message(msg_inst):
+            if isinstance(msgs, message.Message):
+                msgs = [msgs]
+            self.send_messages(msgs)
+    def send_messages(self, msgs: List[message.Message]):
         if self._writelines is None:
             return
+        packets = [
+            (PROTO_TO_MESSAGE_TYPE[msg.__class__], msg.SerializeToString())
+            for msg in msgs
+        ]
+        packet_bytes = make_plain_text_packets(packets)
+        self._writelines(packet_bytes)
     def connection_made(self, transport) -> None:
         self._transport = transport
         self._writelines = transport.writelines
     def data_received(self, data: bytes):
         if self._buffer is None:
         return cstr[original_pos:new_pos]
     def connection_lost(self, exc):
         self._transport = None
         self._writelines = None
     def _read_varuint(self) -> int:
         """Read a varuint from the buffer or -1 if the buffer runs out of bytes."""

reachy_mini_ha_voice/audio_player.py ADDED Viewed

	@@ -0,0 +1,578 @@

+"""Audio player using Reachy Mini's media system with automatic Sendspin support.
+Sendspin integration allows synchronized multi-room audio playback through
+a Sendspin server. Reachy Mini connects as a PLAYER to receive audio streams
+from Home Assistant or other Sendspin controllers.
+Sendspin is automatically enabled by default - no user configuration needed.
+The system uses mDNS to discover Sendspin servers on the local network.
+"""
+import hashlib
+import logging
+import socket
+import threading
+import time
+from collections.abc import Callable
+from typing import List, Optional, TYPE_CHECKING, Union
+if TYPE_CHECKING:
+    from .zeroconf import SendspinDiscovery
+_LOGGER = logging.getLogger(__name__)
+# Check if aiosendspin is available
+try:
+    from aiosendspin.client import SendspinClient, PCMFormat
+    from aiosendspin.models.types import Roles, AudioCodec, PlayerCommand
+    from aiosendspin.models.player import ClientHelloPlayerSupport, SupportedAudioFormat
+    from aiosendspin.models.core import StreamStartMessage
+    SENDSPIN_AVAILABLE = True
+except ImportError:
+    SENDSPIN_AVAILABLE = False
+    _LOGGER.debug("aiosendspin not installed, Sendspin support disabled")
+def _get_stable_client_id() -> str:
+    """Generate a stable client ID based on machine identity.
+    Uses hostname and MAC address to create a consistent ID across restarts.
+    """
+    try:
+        hostname = socket.gethostname()
+        # Create a hash of hostname for stability
+        hash_input = f"reachy-mini-{hostname}"
+        return hashlib.sha256(hash_input.encode()).hexdigest()[:16]
+    except Exception:
+        return "reachy-mini-default"
+class AudioPlayer:
+    """Audio player using Reachy Mini's media system with automatic Sendspin support.
+    Supports audio playback modes:
+    1. Reachy Mini's built-in media system (default)
+    2. Sendspin synchronized multi-room playback (as PLAYER - receives audio)
+    3. Sounddevice fallback (when Reachy Mini not available)
+    When connected to Sendspin as a PLAYER, Reachy Mini receives audio streams
+    from Home Assistant or other controllers for synchronized playback.
+    """
+    def __init__(self, reachy_mini=None) -> None:
+        """Initialize audio player.
+        Args:
+            reachy_mini: Reachy Mini SDK instance.
+        """
+        self.reachy_mini = reachy_mini
+        self.is_playing = False
+        self._playlist: List[str] = []
+        self._done_callback: Optional[Callable[[], None]] = None
+        self._done_callback_lock = threading.Lock()
+        self._duck_volume: float = 0.5
+        self._unduck_volume: float = 1.0
+        self._current_volume: float = 1.0
+        self._stop_flag = threading.Event()
+        # Speech sway callback for audio-driven head motion
+        self._sway_callback: Optional[Callable[[dict], None]] = None
+        # Sendspin support (auto-enabled via mDNS discovery)
+        # Uses stable client_id so HA recognizes the same device after restart
+        self._sendspin_client_id = _get_stable_client_id()
+        self._sendspin_client: Optional["SendspinClient"] = None
+        self._sendspin_enabled = False
+        self._sendspin_url: Optional[str] = None
+        self._sendspin_discovery: Optional["SendspinDiscovery"] = None
+        self._sendspin_unsubscribers: List[Callable] = []
+        # Audio buffer for Sendspin playback
+        self._sendspin_audio_format: Optional["PCMFormat"] = None
+        self._sendspin_playback_started = False
+        self._sendspin_paused = False  # Pause Sendspin when voice assistant is active
+    def set_sway_callback(self, callback: Optional[Callable[[dict], None]]) -> None:
+        """Set callback for speech-driven sway animation.
+        Args:
+            callback: Function called with sway dict containing
+                      pitch_rad, yaw_rad, roll_rad, x_m, y_m, z_m
+        """
+        self._sway_callback = callback
+    def set_reachy_mini(self, reachy_mini) -> None:
+        """Set the Reachy Mini instance."""
+        self.reachy_mini = reachy_mini
+    # ========== Sendspin Integration (Auto-enabled via mDNS) ==========
+    @property
+    def sendspin_available(self) -> bool:
+        """Check if Sendspin library is available."""
+        return SENDSPIN_AVAILABLE
+    @property
+    def sendspin_enabled(self) -> bool:
+        """Check if Sendspin output is enabled and connected."""
+        return self._sendspin_enabled and self._sendspin_client is not None
+    @property
+    def sendspin_url(self) -> Optional[str]:
+        """Get current Sendspin server URL."""
+        return self._sendspin_url
+    def pause_sendspin(self) -> None:
+        """Pause Sendspin audio playback.
+        Called when voice assistant is activated to prevent audio conflicts.
+        Incoming Sendspin audio chunks will be dropped until resumed.
+        """
+        if self._sendspin_paused:
+            return
+        self._sendspin_paused = True
+        _LOGGER.debug("Sendspin audio paused (voice assistant active)")
+    def resume_sendspin(self) -> None:
+        """Resume Sendspin audio playback.
+        Called when voice assistant returns to idle state.
+        """
+        if not self._sendspin_paused:
+            return
+        self._sendspin_paused = False
+        self._logged_resample = False  # Reset resample log flag for new stream
+        _LOGGER.debug("Sendspin audio resumed")
+    async def start_sendspin_discovery(self) -> None:
+        """Start mDNS discovery for Sendspin servers.
+        This runs in the background and automatically connects when a server is found.
+        Called automatically during voice assistant startup.
+        """
+        if not SENDSPIN_AVAILABLE:
+            _LOGGER.debug("aiosendspin not installed, skipping Sendspin discovery")
+            return
+        if self._sendspin_discovery is not None and self._sendspin_discovery.is_running:
+            _LOGGER.debug("Sendspin discovery already running")
+            return
+        # Import here to avoid circular imports
+        from .zeroconf import SendspinDiscovery
+        _LOGGER.info("Starting Sendspin server discovery...")
+        self._sendspin_discovery = SendspinDiscovery(self._on_sendspin_server_found)
+        await self._sendspin_discovery.start()
+    async def _on_sendspin_server_found(self, server_url: str) -> None:
+        """Callback when a Sendspin server is discovered via mDNS.
+        Args:
+            server_url: WebSocket URL of the discovered server.
+        """
+        await self._connect_to_server(server_url)
+    async def _connect_to_server(self, server_url: str) -> bool:
+        """Connect to a discovered Sendspin server as PLAYER.
+        Args:
+            server_url: WebSocket URL of the Sendspin server.
+        Returns:
+            True if connected successfully.
+        """
+        if not SENDSPIN_AVAILABLE:
+            return False
+        # Already connected to this server
+        if self._sendspin_enabled and self._sendspin_url == server_url:
+            return True
+        # Disconnect from previous server if any
+        if self._sendspin_client is not None:
+            await self._disconnect_sendspin()
+        try:
+            # Use stable client_id so HA recognizes the same device after restart
+            # Configure player support with audio formats
+            # Prioritize 16kHz since ReSpeaker hardware only supports 16kHz output
+            # Higher sample rates will be resampled down, causing quality loss
+            player_support = ClientHelloPlayerSupport(
+                supported_formats=[
+                    # Prefer 16kHz (native ReSpeaker sample rate - no resampling needed)
+                    SupportedAudioFormat(
+                        codec=AudioCodec.PCM, channels=2, sample_rate=16000, bit_depth=16
+                    ),
+                    SupportedAudioFormat(
+                        codec=AudioCodec.PCM, channels=1, sample_rate=16000, bit_depth=16
+                    ),
+                    # Also support higher sample rates (will be resampled to 16kHz)
+                    SupportedAudioFormat(
+                        codec=AudioCodec.PCM, channels=2, sample_rate=48000, bit_depth=16
+                    ),
+                    SupportedAudioFormat(
+                        codec=AudioCodec.PCM, channels=2, sample_rate=44100, bit_depth=16
+                    ),
+                    SupportedAudioFormat(
+                        codec=AudioCodec.PCM, channels=1, sample_rate=48000, bit_depth=16
+                    ),
+                    SupportedAudioFormat(
+                        codec=AudioCodec.PCM, channels=1, sample_rate=44100, bit_depth=16
+                    ),
+                ],
+                buffer_capacity=32_000_000,
+                supported_commands=[PlayerCommand.VOLUME, PlayerCommand.MUTE],
+            )
+            self._sendspin_client = SendspinClient(
+                client_id=self._sendspin_client_id,
+                client_name="Reachy Mini",
+                roles=[Roles.PLAYER],  # PLAYER role to receive audio
+                player_support=player_support,
+            )
+            await self._sendspin_client.connect(server_url)
+            # Register audio listeners
+            self._sendspin_unsubscribers = [
+                self._sendspin_client.add_audio_chunk_listener(self._on_sendspin_audio_chunk),
+                self._sendspin_client.add_stream_start_listener(self._on_sendspin_stream_start),
+                self._sendspin_client.add_stream_end_listener(self._on_sendspin_stream_end),
+                self._sendspin_client.add_stream_clear_listener(self._on_sendspin_stream_clear),
+            ]
+            self._sendspin_url = server_url
+            self._sendspin_enabled = True
+            _LOGGER.info("Sendspin connected as PLAYER: %s (client_id=%s)",
+                         server_url, self._sendspin_client_id)
+            return True
+        except Exception as e:
+            _LOGGER.warning("Failed to connect to Sendspin server %s: %s", server_url, e)
+            self._sendspin_client = None
+            self._sendspin_enabled = False
+            return False
+    def _on_sendspin_audio_chunk(self, server_timestamp_us: int, audio_data: bytes, fmt: "PCMFormat") -> None:
+        """Handle incoming audio chunks from Sendspin server.
+        Plays the audio through Reachy Mini's speaker using push_audio_sample().
+        Resamples audio if needed (Reachy Mini uses 16kHz).
+        Note: Audio is dropped when Sendspin is paused (e.g., during voice assistant interaction).
+        """
+        if self.reachy_mini is None:
+            return
+        # Drop audio when paused (voice assistant is active)
+        if self._sendspin_paused:
+            return
+        try:
+            # Store format for potential use
+            self._sendspin_audio_format = fmt
+            import numpy as np
+            # Convert bytes to numpy array based on format
+            if fmt.bit_depth == 16:
+                dtype = np.int16
+                max_val = 32768.0
+            elif fmt.bit_depth == 32:
+                dtype = np.int32
+                max_val = 2147483648.0
+            else:
+                dtype = np.int16
+                max_val = 32768.0
+            audio_array = np.frombuffer(audio_data, dtype=dtype)
+            # Convert to float32 for playback (SDK expects float32)
+            audio_float = audio_array.astype(np.float32) / max_val
+            # Reshape for channels if needed
+            if fmt.channels > 1:
+                # Reshape to (samples, channels)
+                audio_float = audio_float.reshape(-1, fmt.channels)
+            else:
+                # Mono: reshape to (samples, 1)
+                audio_float = audio_float.reshape(-1, 1)
+            # Resample if needed (ReSpeaker hardware only supports 16kHz)
+            target_sample_rate = self.reachy_mini.media.get_output_audio_samplerate()
+            if fmt.sample_rate != target_sample_rate and target_sample_rate > 0:
+                import scipy.signal
+                # Calculate new length
+                new_length = int(len(audio_float) * target_sample_rate / fmt.sample_rate)
+                if new_length > 0:
+                    audio_float = scipy.signal.resample(audio_float, new_length, axis=0)
+                    # Log resampling only once per stream
+                    if not hasattr(self, '_logged_resample') or not self._logged_resample:
+                        _LOGGER.debug("Resampling Sendspin audio: %d Hz -> %d Hz",
+                                      fmt.sample_rate, target_sample_rate)
+                        self._logged_resample = True
+            # Apply volume
+            audio_float = audio_float * self._current_volume
+            # Ensure media playback is started
+            if not self._sendspin_playback_started:
+                try:
+                    self.reachy_mini.media.start_playing()
+                    self._sendspin_playback_started = True
+                    _LOGGER.info("Started media playback for Sendspin audio (target: %d Hz)", target_sample_rate)
+                except Exception as e:
+                    _LOGGER.warning("Failed to start media playback: %s", e)
+            # Play through Reachy Mini's media system using push_audio_sample
+            self.reachy_mini.media.push_audio_sample(audio_float)
+        except Exception as e:
+            _LOGGER.debug("Error playing Sendspin audio: %s", e)
+    def _on_sendspin_stream_start(self, message: "StreamStartMessage") -> None:
+        """Handle stream start from Sendspin server."""
+        _LOGGER.debug("Sendspin stream started")
+        # No need to clear buffer - just start fresh
+    def _on_sendspin_stream_end(self, roles: Optional[List[Roles]]) -> None:
+        """Handle stream end from Sendspin server."""
+        if roles is None or Roles.PLAYER in roles:
+            _LOGGER.debug("Sendspin stream ended")
+    def _on_sendspin_stream_clear(self, roles: Optional[List[Roles]]) -> None:
+        """Handle stream clear from Sendspin server."""
+        if roles is None or Roles.PLAYER in roles:
+            _LOGGER.debug("Sendspin stream cleared")
+            if self.reachy_mini is not None:
+                try:
+                    self.reachy_mini.media.stop_playing()
+                    self._sendspin_playback_started = False
+                except Exception:
+                    pass
+    async def _disconnect_sendspin(self) -> None:
+        """Disconnect from current Sendspin server."""
+        # Unsubscribe from listeners
+        for unsub in self._sendspin_unsubscribers:
+            try:
+                unsub()
+            except Exception:
+                pass
+        self._sendspin_unsubscribers.clear()
+        if self._sendspin_client is not None:
+            try:
+                await self._sendspin_client.disconnect()
+            except Exception as e:
+                _LOGGER.debug("Error disconnecting from Sendspin: %s", e)
+            self._sendspin_client = None
+        self._sendspin_enabled = False
+        self._sendspin_url = None
+        self._sendspin_audio_format = None
+    async def stop_sendspin(self) -> None:
+        """Stop Sendspin discovery and disconnect from server."""
+        # Stop discovery
+        if self._sendspin_discovery is not None:
+            await self._sendspin_discovery.stop()
+            self._sendspin_discovery = None
+        # Disconnect from server
+        await self._disconnect_sendspin()
+        _LOGGER.info("Sendspin stopped")
+    # ========== Core Playback Methods ==========
+    def play(
+        self,
+        url: Union[str, List[str]],
+        done_callback: Optional[Callable[[], None]] = None,
+        stop_first: bool = True,
+    ) -> None:
+        """Play audio from URL(s).
+        Args:
+            url: Single URL or list of URLs to play.
+            done_callback: Called when playback finishes.
+            stop_first: Stop current playback before starting new.
+        """
+        if stop_first:
+            self.stop()
+        if isinstance(url, str):
+            self._playlist = [url]
+        else:
+            self._playlist = list(url)
+        self._done_callback = done_callback
+        self._stop_flag.clear()
+        self._play_next()
+    def _play_next(self) -> None:
+        """Play next item in playlist."""
+        if not self._playlist or self._stop_flag.is_set():
+            self._on_playback_finished()
+            return
+        next_url = self._playlist.pop(0)
+        _LOGGER.debug("Playing %s", next_url)
+        self.is_playing = True
+        # Start playback in a thread
+        thread = threading.Thread(target=self._play_file, args=(next_url,), daemon=True)
+        thread.start()
+    def _play_file(self, file_path: str) -> None:
+        """Play an audio file with optional speech-driven sway animation."""
+        try:
+            # Handle URLs - download first
+            if file_path.startswith(("http://", "https://")):
+                import urllib.request
+                import tempfile
+                with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
+                    urllib.request.urlretrieve(file_path, tmp.name)
+                    file_path = tmp.name
+            if self._stop_flag.is_set():
+                return
+            # Play locally using Reachy Mini's media system
+            if self.reachy_mini is not None:
+                try:
+                    # Read audio data for duration calculation and sway analysis
+                    import soundfile as sf
+                    data, sample_rate = sf.read(file_path)
+                    duration = len(data) / sample_rate
+                    # Pre-analyze audio for speech sway if callback is set
+                    sway_frames = []
+                    if self._sway_callback is not None:
+                        from .speech_sway import SpeechSwayRT
+                        sway = SpeechSwayRT()
+                        sway_frames = sway.feed(data, sample_rate)
+                        _LOGGER.debug("Generated %d sway frames for %.2fs audio",
+                                      len(sway_frames), duration)
+                    # Start playback
+                    self.reachy_mini.media.play_sound(file_path)
+                    # Playback loop with sway animation
+                    start_time = time.time()
+                    frame_duration = 0.05  # 50ms per sway frame (HOP_MS)
+                    frame_idx = 0
+                    while time.time() - start_time < duration:
+                        if self._stop_flag.is_set():
+                            self.reachy_mini.media.stop_playing()
+                            break
+                        # Apply sway frame if available
+                        if self._sway_callback and frame_idx < len(sway_frames):
+                            elapsed = time.time() - start_time
+                            target_frame = int(elapsed / frame_duration)
+                            while frame_idx <= target_frame and frame_idx < len(sway_frames):
+                                self._sway_callback(sway_frames[frame_idx])
+                                frame_idx += 1
+                        time.sleep(0.02)  # 20ms sleep for responsive sway
+                    # Reset sway to zero when done
+                    if self._sway_callback:
+                        self._sway_callback({
+                            "pitch_rad": 0.0, "yaw_rad": 0.0, "roll_rad": 0.0,
+                            "x_m": 0.0, "y_m": 0.0, "z_m": 0.0,
+                        })
+                except Exception as e:
+                    _LOGGER.warning("Reachy Mini audio failed, falling back: %s", e)
+                    self._play_file_fallback(file_path)
+            else:
+                self._play_file_fallback(file_path)
+        except Exception as e:
+            _LOGGER.error("Error playing audio: %s", e)
+        finally:
+            self.is_playing = False
+            if self._playlist and not self._stop_flag.is_set():
+                self._play_next()
+            else:
+                self._on_playback_finished()
+    def _play_file_fallback(self, file_path: str) -> None:
+        """Fallback to sounddevice for audio playback."""
+        import sounddevice as sd
+        import soundfile as sf
+        data, samplerate = sf.read(file_path)
+        data = data * self._current_volume
+        if not self._stop_flag.is_set():
+            sd.play(data, samplerate)
+            sd.wait()
+    def _on_playback_finished(self) -> None:
+        """Called when playback is finished."""
+        self.is_playing = False
+        todo_callback: Optional[Callable[[], None]] = None
+        with self._done_callback_lock:
+            if self._done_callback:
+                todo_callback = self._done_callback
+                self._done_callback = None
+        if todo_callback:
+            try:
+                todo_callback()
+            except Exception:
+                _LOGGER.exception("Unexpected error running done callback")
+    def pause(self) -> None:
+        """Pause playback.
+        Stops current audio output but preserves playlist for resume.
+        """
+        self._stop_flag.set()
+        if self.reachy_mini is not None:
+            try:
+                self.reachy_mini.media.stop_playing()
+            except Exception:
+                pass
+        self.is_playing = False
+    def resume(self) -> None:
+        """Resume playback from where it was paused."""
+        self._stop_flag.clear()
+        if self._playlist:
+            self._play_next()
+    def stop(self) -> None:
+        """Stop playback and clear playlist."""
+        self._stop_flag.set()
+        if self.reachy_mini is not None:
+            try:
+                self.reachy_mini.media.stop_playing()
+            except Exception:
+                pass
+        self._playlist.clear()
+        self.is_playing = False
+    def duck(self) -> None:
+        """Reduce volume for announcements."""
+        self._current_volume = self._duck_volume
+    def unduck(self) -> None:
+        """Restore normal volume."""
+        self._current_volume = self._unduck_volume
+    def set_volume(self, volume: int) -> None:
+        """Set volume level (0-100)."""
+        volume = max(0, min(100, volume))
+        self._unduck_volume = volume / 100.0
+        self._duck_volume = self._unduck_volume / 2
+        self._current_volume = self._unduck_volume

reachy_mini_ha_voice/camera_server.py ADDED Viewed

	@@ -0,0 +1,842 @@

+"""
+MJPEG Camera Server for Reachy Mini with Face Tracking.
+This module provides an HTTP server that streams camera frames from Reachy Mini
+as MJPEG, which can be integrated with Home Assistant via Generic Camera.
+Also provides face tracking for head movement control.
+Reference: reachy_mini_conversation_app/src/reachy_mini_conversation_app/camera_worker.py
+"""
+import asyncio
+import logging
+import threading
+import time
+from typing import Optional, Tuple, List, TYPE_CHECKING
+import cv2
+import numpy as np
+from scipy.spatial.transform import Rotation as R
+# Import SDK interpolation utilities (same as conversation_app)
+try:
+    from reachy_mini.utils.interpolation import linear_pose_interpolation
+    SDK_INTERPOLATION_AVAILABLE = True
+except ImportError:
+    SDK_INTERPOLATION_AVAILABLE = False
+if TYPE_CHECKING:
+    from reachy_mini import ReachyMini
+_LOGGER = logging.getLogger(__name__)
+# MJPEG boundary string
+MJPEG_BOUNDARY = "frame"
+class MJPEGCameraServer:
+    """
+    MJPEG streaming server for Reachy Mini camera with face tracking.
+    Provides HTTP endpoints:
+    - /stream - MJPEG video stream
+    - /snapshot - Single JPEG image
+    - / - Simple status page
+    Also provides face tracking offsets for head movement control.
+    Resource Optimization:
+    - Adaptive frame rate: high (15fps) when face detected or in conversation,
+      low (3fps) when idle and no face for extended period
+    - Face detection pauses after prolonged absence to save CPU
+    """
+    def __init__(
+        self,
+        reachy_mini: Optional["ReachyMini"] = None,
+        host: str = "0.0.0.0",
+        port: int = 8081,
+        fps: int = 15,  # 15fps for smooth face tracking
+        quality: int = 80,
+        enable_face_tracking: bool = True,
+    ):
+        """
+        Initialize the MJPEG camera server.
+        Args:
+            reachy_mini: Reachy Mini robot instance (can be None for testing)
+            host: Host address to bind to
+            port: Port number for the HTTP server
+            fps: Target frames per second for the stream
+            quality: JPEG quality (1-100)
+            enable_face_tracking: Enable face tracking for head movement
+        """
+        self.reachy_mini = reachy_mini
+        self.host = host
+        self.port = port
+        self.fps = fps
+        self.quality = quality
+        self.enable_face_tracking = enable_face_tracking
+        self._server: Optional[asyncio.Server] = None
+        self._running = False
+        self._frame_interval = 1.0 / fps
+        self._last_frame: Optional[bytes] = None
+        self._last_frame_time: float = 0
+        self._frame_lock = threading.Lock()
+        # Frame capture thread
+        self._capture_thread: Optional[threading.Thread] = None
+        # Face tracking state
+        self._head_tracker = None
+        self._face_tracking_enabled = True  # Enabled by default for always-on face tracking
+        self._face_tracking_offsets: List[float] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
+        self._face_tracking_lock = threading.Lock()
+        # Gesture detection state
+        self._gesture_detector = None
+        self._gesture_detection_enabled = True
+        self._current_gesture = "none"
+        self._gesture_confidence = 0.0
+        self._gesture_lock = threading.Lock()
+        self._gesture_frame_counter = 0
+        self._gesture_detection_interval = 3  # Run gesture detection every N frames
+        self._gesture_state_callback = None  # Callback to notify entity registry
+        # Face tracking timing (smooth interpolation when face lost)
+        self._last_face_detected_time: Optional[float] = None
+        self._interpolation_start_time: Optional[float] = None
+        self._interpolation_start_pose: Optional[np.ndarray] = None
+        self._face_lost_delay = 1.0  # Reduced from 2.0s to 1.0s for faster response
+        self._interpolation_duration = 0.8  # Reduced from 1.0s to 0.8s for faster return
+        # Offset scaling (same as conversation_app)
+        self._offset_scale = 0.6
+        # =====================================================================
+        # Resource optimization: Adaptive frame rate for face tracking
+        # =====================================================================
+        # High frequency when: face detected, in conversation, or recently active
+        # Low frequency when: idle and no face for extended period
+        # Ultra-low when: idle for very long time (just MJPEG stream, minimal AI)
+        self._fps_high = fps  # Normal tracking rate (15fps)
+        self._fps_low = 2     # Low power rate (2fps) - periodic face check
+        self._fps_idle = 0.5  # Ultra-low power (0.5fps) - minimal CPU usage
+        self._current_fps = fps
+        # Conversation state (set by voice assistant)
+        self._in_conversation = False
+        self._conversation_lock = threading.Lock()
+        # Adaptive tracking timing
+        self._no_face_duration = 0.0  # How long since last face detection
+        self._low_power_threshold = 5.0   # Switch to low power after 5s without face
+        self._idle_threshold = 30.0       # Switch to idle mode after 30s without face
+        self._last_face_check_time = 0.0
+        # Skip AI inference in idle mode (only stream MJPEG)
+        self._ai_enabled = True
+    async def start(self) -> None:
+        """Start the MJPEG camera server."""
+        if self._running:
+            _LOGGER.warning("Camera server already running")
+            return
+        self._running = True
+        # Initialize head tracker if face tracking enabled
+        if self.enable_face_tracking:
+            try:
+                from .head_tracker import HeadTracker
+                self._head_tracker = HeadTracker()
+                _LOGGER.info("Face tracking enabled with YOLO head tracker")
+            except ImportError as e:
+                _LOGGER.error("Failed to import head tracker: %s", e)
+                self._head_tracker = None
+            except Exception as e:
+                _LOGGER.warning("Failed to initialize head tracker: %s", e)
+                self._head_tracker = None
+        else:
+            _LOGGER.info("Face tracking disabled by configuration")
+        # Initialize gesture detector
+        if self._gesture_detection_enabled:
+            try:
+                from .gesture_detector import GestureDetector
+                self._gesture_detector = GestureDetector()
+                if self._gesture_detector.is_available:
+                    _LOGGER.info("Gesture detection enabled (18 HaGRID classes)")
+                else:
+                    _LOGGER.warning("Gesture detection not available")
+                    self._gesture_detector = None
+            except ImportError as e:
+                _LOGGER.warning("Failed to import gesture detector: %s", e)
+                self._gesture_detector = None
+            except Exception as e:
+                _LOGGER.warning("Failed to initialize gesture detector: %s", e)
+                self._gesture_detector = None
+        # Start frame capture thread
+        self._capture_thread = threading.Thread(
+            target=self._capture_frames,
+            daemon=True,
+            name="camera-capture"
+        )
+        self._capture_thread.start()
+        # Start HTTP server
+        self._server = await asyncio.start_server(
+            self._handle_client,
+            self.host,
+            self.port,
+        )
+        _LOGGER.info("MJPEG Camera server started on http://%s:%d", self.host, self.port)
+        _LOGGER.info("  Stream URL: http://<ip>:%d/stream", self.port)
+        _LOGGER.info("  Snapshot URL: http://<ip>:%d/snapshot", self.port)
+    async def stop(self) -> None:
+        """Stop the MJPEG camera server."""
+        self._running = False
+        if self._capture_thread:
+            self._capture_thread.join(timeout=0.5)
+            self._capture_thread = None
+        if self._server:
+            self._server.close()
+            await self._server.wait_closed()
+            self._server = None
+        _LOGGER.info("MJPEG Camera server stopped")
+    def _capture_frames(self) -> None:
+        """Background thread to capture frames from Reachy Mini and do face tracking.
+        Resource optimization:
+        - High frequency (15fps) when face detected or in conversation
+        - Low frequency (2fps) when idle and no face for short period
+        - Ultra-low (0.5fps) when idle for extended period - minimal AI inference
+        """
+        _LOGGER.info("Starting camera capture thread (face_tracking=%s)", self._face_tracking_enabled)
+        frame_count = 0
+        face_detect_count = 0
+        last_log_time = time.time()
+        while self._running:
+            try:
+                current_time = time.time()
+                # Determine if we should run AI inference this frame
+                should_run_ai = self._should_run_ai_inference(current_time)
+                # Only get frame if needed (AI inference or MJPEG streaming)
+                frame = self._get_camera_frame() if should_run_ai or self._has_stream_clients() else None
+                if frame is not None:
+                    frame_count += 1
+                    # Encode frame as JPEG for streaming
+                    encode_params = [cv2.IMWRITE_JPEG_QUALITY, self.quality]
+                    success, jpeg_data = cv2.imencode('.jpg', frame, encode_params)
+                    if success:
+                        with self._frame_lock:
+                            self._last_frame = jpeg_data.tobytes()
+                            self._last_frame_time = time.time()
+                    # Only run AI inference when enabled
+                    if should_run_ai:
+                        # Face tracking
+                        if self._face_tracking_enabled and self._head_tracker is not None:
+                            face_detect_count += 1
+                            face_detected = self._process_face_tracking(frame, current_time)
+                            # Update adaptive timing based on detection result
+                            if face_detected:
+                                self._no_face_duration = 0.0
+                                self._current_fps = self._fps_high
+                                self._ai_enabled = True
+                            else:
+                                # Accumulate no-face duration
+                                if self._last_face_detected_time is not None:
+                                    self._no_face_duration = current_time - self._last_face_detected_time
+                                else:
+                                    self._no_face_duration += 1.0 / self._current_fps
+                                # Adaptive power mode
+                                if self._no_face_duration > self._idle_threshold:
+                                    self._current_fps = self._fps_idle
+                                elif self._no_face_duration > self._low_power_threshold:
+                                    self._current_fps = self._fps_low
+                            self._last_face_check_time = current_time
+                        # Handle smooth interpolation when face lost
+                        self._process_face_lost_interpolation(current_time)
+                        # Gesture detection (only when face detected recently)
+                        if (self._gesture_detection_enabled and
+                            self._gesture_detector is not None and
+                            self._no_face_duration < 5.0):  # Only detect gestures when someone is present
+                            self._gesture_frame_counter += 1
+                            if self._gesture_frame_counter >= self._gesture_detection_interval:
+                                self._gesture_frame_counter = 0
+                                self._process_gesture_detection(frame)
+                    # Log stats every 30 seconds
+                    if current_time - last_log_time >= 30.0:
+                        fps = frame_count / (current_time - last_log_time)
+                        detect_fps = face_detect_count / (current_time - last_log_time)
+                        mode = "HIGH" if self._current_fps == self._fps_high else ("LOW" if self._current_fps == self._fps_low else "IDLE")
+                        _LOGGER.debug("Camera: %.1f fps, AI: %.1f fps (%s), no_face: %.0fs",
+                                     fps, detect_fps, mode, self._no_face_duration)
+                        frame_count = 0
+                        face_detect_count = 0
+                        last_log_time = current_time
+                # Sleep to maintain target FPS (use current adaptive rate)
+                sleep_time = 1.0 / self._current_fps
+                time.sleep(sleep_time)
+            except Exception as e:
+                _LOGGER.error("Error capturing frame: %s", e)
+                time.sleep(1.0)
+        _LOGGER.info("Camera capture thread stopped")
+    def _should_run_ai_inference(self, current_time: float) -> bool:
+        """Determine if AI inference (face/gesture detection) should run.
+        Returns True if:
+        - In conversation mode (always run)
+        - Face was recently detected
+        - Periodic check in low power mode
+        """
+        # Always run during conversation
+        with self._conversation_lock:
+            if self._in_conversation:
+                return True
+        # High frequency mode: run every frame
+        if self._current_fps == self._fps_high:
+            return True
+        # Low/idle power mode: run periodically
+        time_since_last = current_time - self._last_face_check_time
+        return time_since_last >= (1.0 / self._current_fps)
+    def _has_stream_clients(self) -> bool:
+        """Check if there are active MJPEG stream clients."""
+        # For now, always return True to keep stream available
+        # Could be optimized to track actual client connections
+        return True
+    def _process_face_tracking(self, frame: np.ndarray, current_time: float) -> bool:
+        """Process face tracking on a frame.
+        Returns:
+            True if face was detected, False otherwise
+        """
+        if self._head_tracker is None or self.reachy_mini is None:
+            return False
+        try:
+            face_center, confidence = self._head_tracker.get_head_position(frame)
+            if face_center is not None:
+                # Face detected - update tracking
+                self._last_face_detected_time = current_time
+                self._interpolation_start_time = None  # Stop any interpolation
+                # Convert normalized coordinates to pixel coordinates
+                h, w = frame.shape[:2]
+                eye_center_norm = (face_center + 1) / 2
+                eye_center_pixels = [
+                    eye_center_norm[0] * w,
+                    eye_center_norm[1] * h,
+                ]
+                # Get the head pose needed to look at the target
+                target_pose = self.reachy_mini.look_at_image(
+                    eye_center_pixels[0],
+                    eye_center_pixels[1],
+                    duration=0.0,
+                    perform_movement=False,
+                )
+                # Extract translation and rotation from target pose
+                translation = target_pose[:3, 3]
+                rotation = R.from_matrix(target_pose[:3, :3]).as_euler("xyz", degrees=False)
+                # Scale down for smoother tracking (same as conversation_app)
+                translation = translation * self._offset_scale
+                rotation = rotation * self._offset_scale
+                # Apply pitch offset compensation (robot tends to look up)
+                # rotation[1] is pitch in xyz euler order
+                # Positive pitch = look down in robot coordinate system
+                pitch_offset_rad = np.radians(9.0)  # Look down 9 degrees
+                rotation[1] += pitch_offset_rad
+                # Apply yaw offset compensation (robot tends to look to user's right)
+                # rotation[2] is yaw in xyz euler order
+                # Negative yaw = turn right (towards user's left from robot's perspective)
+                yaw_offset_rad = np.radians(-7.0)  # Turn right 7 degrees
+                rotation[2] += yaw_offset_rad
+                # Update face tracking offsets
+                with self._face_tracking_lock:
+                    self._face_tracking_offsets = [
+                        float(translation[0]),
+                        float(translation[1]),
+                        float(translation[2]),
+                        float(rotation[0]),
+                        float(rotation[1]),
+                        float(rotation[2]),
+                    ]
+                return True
+            return False
+        except Exception as e:
+            _LOGGER.debug("Face tracking error: %s", e)
+            return False
+    def _process_face_lost_interpolation(self, current_time: float) -> None:
+        """Handle smooth interpolation back to neutral when face is lost."""
+        if self._last_face_detected_time is None:
+            return
+        time_since_face_lost = current_time - self._last_face_detected_time
+        if time_since_face_lost < self._face_lost_delay:
+            return  # Still within delay period, keep current offsets
+        # Start interpolation if not already started
+        if self._interpolation_start_time is None:
+            self._interpolation_start_time = current_time
+            # Capture current pose as start of interpolation
+            with self._face_tracking_lock:
+                current_offsets = self._face_tracking_offsets.copy()
+            # Convert to 4x4 pose matrix
+            pose_matrix = np.eye(4, dtype=np.float32)
+            pose_matrix[:3, 3] = current_offsets[:3]
+            pose_matrix[:3, :3] = R.from_euler("xyz", current_offsets[3:]).as_matrix()
+            self._interpolation_start_pose = pose_matrix
+        # Calculate interpolation progress
+        elapsed = current_time - self._interpolation_start_time
+        t = min(1.0, elapsed / self._interpolation_duration)
+        # Interpolate to neutral (identity matrix)
+        if self._interpolation_start_pose is not None:
+            neutral_pose = np.eye(4, dtype=np.float32)
+            interpolated_pose = self._linear_pose_interpolation(
+                self._interpolation_start_pose, neutral_pose, t
+            )
+            # Extract translation and rotation
+            translation = interpolated_pose[:3, 3]
+            rotation = R.from_matrix(interpolated_pose[:3, :3]).as_euler("xyz", degrees=False)
+            with self._face_tracking_lock:
+                self._face_tracking_offsets = [
+                    float(translation[0]),
+                    float(translation[1]),
+                    float(translation[2]),
+                    float(rotation[0]),
+                    float(rotation[1]),
+                    float(rotation[2]),
+                ]
+        # Reset when interpolation complete
+        if t >= 1.0:
+            self._last_face_detected_time = None
+            self._interpolation_start_time = None
+            self._interpolation_start_pose = None
+    def _linear_pose_interpolation(
+        self, start: np.ndarray, end: np.ndarray, t: float
+    ) -> np.ndarray:
+        """Linear interpolation between two 4x4 pose matrices.
+        Uses SDK's linear_pose_interpolation if available, otherwise falls back
+        to manual SLERP implementation.
+        """
+        if SDK_INTERPOLATION_AVAILABLE:
+            return linear_pose_interpolation(start, end, t)
+        # Fallback: manual interpolation
+        # Interpolate translation
+        start_trans = start[:3, 3]
+        end_trans = end[:3, 3]
+        interp_trans = start_trans * (1 - t) + end_trans * t
+        # Interpolate rotation using SLERP
+        start_rot = R.from_matrix(start[:3, :3])
+        end_rot = R.from_matrix(end[:3, :3])
+        # Use scipy's slerp - create Rotation array from list
+        from scipy.spatial.transform import Slerp
+        key_rots = R.from_quat(np.array([start_rot.as_quat(), end_rot.as_quat()]))
+        slerp = Slerp([0, 1], key_rots)
+        interp_rot = slerp(t)
+        # Build result matrix
+        result = np.eye(4, dtype=np.float32)
+        result[:3, :3] = interp_rot.as_matrix()
+        result[:3, 3] = interp_trans
+        return result
+    # =========================================================================
+    # Public API for face tracking
+    # =========================================================================
+    def get_face_tracking_offsets(self) -> Tuple[float, float, float, float, float, float]:
+        """Get current face tracking offsets (thread-safe).
+        Returns:
+            Tuple of (x, y, z, roll, pitch, yaw) offsets
+        """
+        with self._face_tracking_lock:
+            offsets = self._face_tracking_offsets
+            return (offsets[0], offsets[1], offsets[2], offsets[3], offsets[4], offsets[5])
+    def set_face_tracking_enabled(self, enabled: bool) -> None:
+        """Enable or disable face tracking."""
+        if self._face_tracking_enabled == enabled:
+            return  # No change, skip logging
+        self._face_tracking_enabled = enabled
+        if not enabled:
+            # Start interpolation back to neutral
+            self._last_face_detected_time = time.time()
+            self._interpolation_start_time = None
+        _LOGGER.info("Face tracking %s", "enabled" if enabled else "disabled")
+    def set_conversation_mode(self, in_conversation: bool) -> None:
+        """Set conversation mode for adaptive face tracking.
+        When in conversation mode, face tracking runs at high frequency
+        regardless of whether a face is currently detected.
+        Args:
+            in_conversation: True when voice assistant is actively conversing
+        """
+        with self._conversation_lock:
+            self._in_conversation = in_conversation
+        if in_conversation:
+            # Immediately switch to high frequency mode
+            self._current_fps = self._fps_high
+            self._ai_enabled = True
+            self._no_face_duration = 0.0  # Reset no-face timer
+            _LOGGER.debug("Face tracking: conversation mode ON (high frequency)")
+        else:
+            _LOGGER.debug("Face tracking: conversation mode OFF (adaptive)")
+    # =========================================================================
+    # Gesture detection
+    # =========================================================================
+    def _process_gesture_detection(self, frame: np.ndarray) -> None:
+        """Process gesture detection on a frame."""
+        if self._gesture_detector is None:
+            return
+        try:
+            # Detect gesture
+            detected_gesture, confidence = self._gesture_detector.detect(frame)
+            # Update current gesture state
+            state_changed = False
+            with self._gesture_lock:
+                old_gesture = self._current_gesture
+                if detected_gesture.value != "no_gesture":
+                    self._current_gesture = detected_gesture.value
+                    self._gesture_confidence = confidence
+                    if old_gesture != detected_gesture.value:
+                        state_changed = True
+                        _LOGGER.debug("Gesture: %s (%.0f%%)",
+                                     detected_gesture.value, confidence * 100)
+                else:
+                    if self._current_gesture != "none":
+                        state_changed = True
+                    self._current_gesture = "none"
+                    self._gesture_confidence = 0.0
+            # Notify entity registry to push update to Home Assistant
+            if state_changed and self._gesture_state_callback:
+                try:
+                    self._gesture_state_callback()
+                except Exception:
+                    pass  # Ignore callback errors
+        except Exception as e:
+            _LOGGER.warning("Gesture detection error: %s", e)
+    def get_current_gesture(self) -> str:
+        """Get current detected gesture name (thread-safe).
+        Returns:
+            Gesture name string (e.g., "like", "peace", "none")
+        """
+        with self._gesture_lock:
+            return self._current_gesture
+    def get_gesture_confidence(self) -> float:
+        """Get current gesture detection confidence (thread-safe).
+        Returns:
+            Confidence value (0.0 to 1.0), multiplied by 100 for percentage display
+        """
+        with self._gesture_lock:
+            return self._gesture_confidence * 100.0  # Return as percentage
+    def set_gesture_detection_enabled(self, enabled: bool) -> None:
+        """Enable or disable gesture detection."""
+        self._gesture_detection_enabled = enabled
+        if not enabled:
+            with self._gesture_lock:
+                self._current_gesture = "none"
+                self._gesture_confidence = 0.0
+        _LOGGER.info("Gesture detection %s", "enabled" if enabled else "disabled")
+    def set_gesture_state_callback(self, callback) -> None:
+        """Set callback to notify when gesture state changes."""
+        self._gesture_state_callback = callback
+    def _get_camera_frame(self) -> Optional[np.ndarray]:
+        """Get a frame from Reachy Mini's camera."""
+        if self.reachy_mini is None:
+            # Return a test pattern if no robot connected
+            return self._generate_test_frame()
+        try:
+            frame = self.reachy_mini.media.get_frame()
+            return frame
+        except Exception as e:
+            _LOGGER.debug("Failed to get camera frame: %s", e)
+            return None
+    def _generate_test_frame(self) -> np.ndarray:
+        """Generate a test pattern frame when no camera is available."""
+        # Create a simple test pattern
+        frame = np.zeros((480, 640, 3), dtype=np.uint8)
+        # Add some visual elements
+        cv2.putText(
+            frame,
+            "Reachy Mini Camera",
+            (150, 200),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            1.2,
+            (255, 255, 255),
+            2,
+        )
+        cv2.putText(
+            frame,
+            "No camera connected",
+            (180, 280),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            0.8,
+            (128, 128, 128),
+            1,
+        )
+        # Add timestamp
+        timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
+        cv2.putText(
+            frame,
+            timestamp,
+            (220, 350),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            0.6,
+            (0, 255, 0),
+            1,
+        )
+        return frame
+    def get_snapshot(self) -> Optional[bytes]:
+        """Get the latest frame as JPEG bytes."""
+        with self._frame_lock:
+            return self._last_frame
+    async def _handle_client(
+        self,
+        reader: asyncio.StreamReader,
+        writer: asyncio.StreamWriter,
+    ) -> None:
+        """Handle incoming HTTP client connections."""
+        try:
+            # Read HTTP request
+            request_line = await asyncio.wait_for(
+                reader.readline(),
+                timeout=10.0
+            )
+            request = request_line.decode('utf-8', errors='ignore').strip()
+            # Read headers (we don't need them but must consume them)
+            while True:
+                line = await asyncio.wait_for(reader.readline(), timeout=5.0)
+                if line == b'\r\n' or line == b'\n' or line == b'':
+                    break
+            # Parse request path
+            parts = request.split(' ')
+            if len(parts) >= 2:
+                path = parts[1]
+            else:
+                path = '/'
+            _LOGGER.debug("HTTP request: %s", request)
+            if path == '/stream':
+                await self._handle_stream(writer)
+            elif path == '/snapshot':
+                await self._handle_snapshot(writer)
+            else:
+                await self._handle_index(writer)
+        except asyncio.TimeoutError:
+            _LOGGER.debug("Client connection timeout")
+        except ConnectionResetError:
+            _LOGGER.debug("Client connection reset")
+        except Exception as e:
+            _LOGGER.error("Error handling client: %s", e)
+        finally:
+            try:
+                writer.close()
+                await writer.wait_closed()
+            except Exception:
+                pass
+    async def _handle_index(self, writer: asyncio.StreamWriter) -> None:
+        """Handle index page request."""
+        html = f"""<!DOCTYPE html>
+<html>
+<head>
+    <title>Reachy Mini Camera</title>
+    <style>
+        body {{ font-family: Arial, sans-serif; margin: 40px; background: #1a1a2e; color: #eee; }}
+        h1 {{ color: #00d4ff; }}
+        .container {{ max-width: 800px; margin: 0 auto; }}
+        .stream {{ width: 100%; max-width: 640px; border: 2px solid #00d4ff; border-radius: 8px; }}
+        a {{ color: #00d4ff; }}
+        .info {{ background: #16213e; padding: 20px; border-radius: 8px; margin-top: 20px; }}
+    </style>
+</head>
+<body>
+    <div class="container">
+        <h1>Reachy Mini Camera</h1>
+        <img class="stream" src="/stream" alt="Camera Stream">
+        <div class="info">
+            <h3>Endpoints:</h3>
+            <ul>
+                <li><a href="/stream">/stream</a> - MJPEG video stream</li>
+                <li><a href="/snapshot">/snapshot</a> - Single JPEG snapshot</li>
+            </ul>
+            <h3>Home Assistant Integration:</h3>
+            <p>Add a Generic Camera with URL: <code>http://&lt;ip&gt;:{self.port}/stream</code></p>
+        </div>
+    </div>
+</body>
+</html>"""
+        response = (
+            "HTTP/1.1 200 OK\r\n"
+            "Content-Type: text/html; charset=utf-8\r\n"
+            f"Content-Length: {len(html)}\r\n"
+            "Connection: close\r\n"
+            "\r\n"
+        )
+        writer.write(response.encode('utf-8'))
+        writer.write(html.encode('utf-8'))
+        await writer.drain()
+    async def _handle_snapshot(self, writer: asyncio.StreamWriter) -> None:
+        """Handle snapshot request - return single JPEG image."""
+        jpeg_data = self.get_snapshot()
+        if jpeg_data is None:
+            response = (
+                "HTTP/1.1 503 Service Unavailable\r\n"
+                "Content-Type: text/plain\r\n"
+                "Connection: close\r\n"
+                "\r\n"
+                "No frame available"
+            )
+            writer.write(response.encode('utf-8'))
+        else:
+            response = (
+                "HTTP/1.1 200 OK\r\n"
+                "Content-Type: image/jpeg\r\n"
+                f"Content-Length: {len(jpeg_data)}\r\n"
+                "Cache-Control: no-cache, no-store, must-revalidate\r\n"
+                "Connection: close\r\n"
+                "\r\n"
+            )
+            writer.write(response.encode('utf-8'))
+            writer.write(jpeg_data)
+        await writer.drain()
+    async def _handle_stream(self, writer: asyncio.StreamWriter) -> None:
+        """Handle MJPEG stream request."""
+        # Send MJPEG headers
+        response = (
+            "HTTP/1.1 200 OK\r\n"
+            f"Content-Type: multipart/x-mixed-replace; boundary={MJPEG_BOUNDARY}\r\n"
+            "Cache-Control: no-cache, no-store, must-revalidate\r\n"
+            "Connection: keep-alive\r\n"
+            "\r\n"
+        )
+        writer.write(response.encode('utf-8'))
+        await writer.drain()
+        _LOGGER.debug("Started MJPEG stream")
+        last_sent_time = 0
+        try:
+            while self._running:
+                # Get latest frame
+                with self._frame_lock:
+                    jpeg_data = self._last_frame
+                    frame_time = self._last_frame_time
+                # Only send if we have a new frame
+                if jpeg_data is not None and frame_time > last_sent_time:
+                    # Send MJPEG frame
+                    frame_header = (
+                        f"--{MJPEG_BOUNDARY}\r\n"
+                        "Content-Type: image/jpeg\r\n"
+                        f"Content-Length: {len(jpeg_data)}\r\n"
+                        "\r\n"
+                    )
+                    writer.write(frame_header.encode('utf-8'))
+                    writer.write(jpeg_data)
+                    writer.write(b"\r\n")
+                    await writer.drain()
+                    last_sent_time = frame_time
+                # Small delay to prevent busy loop
+                await asyncio.sleep(0.01)
+        except (ConnectionResetError, BrokenPipeError):
+            _LOGGER.debug("Client disconnected from stream")
+        except Exception as e:
+            _LOGGER.error("Error in MJPEG stream: %s", e)
+        _LOGGER.debug("Ended MJPEG stream")

{reachy_mini_home_assistant/entities → reachy_mini_ha_voice}/entity.py RENAMED Viewed

@@ -1,56 +1,56 @@
 """ESPHome entity definitions."""
-import logging
 from abc import abstractmethod
-from collections.abc import Callable, Iterable
-from typing import TYPE_CHECKING
 # pylint: disable=no-name-in-module
 from aioesphomeapi.api_pb2 import (  # type: ignore[attr-defined]
-    BinarySensorStateResponse,
-    CameraImageRequest,
-    CameraImageResponse,
     ListEntitiesBinarySensorResponse,
     ListEntitiesCameraResponse,
     ListEntitiesMediaPlayerResponse,
     ListEntitiesNumberResponse,
     ListEntitiesRequest,
     ListEntitiesTextSensorResponse,
     MediaPlayerCommandRequest,
     MediaPlayerStateResponse,
     NumberCommandRequest,
     NumberStateResponse,
     SubscribeHomeAssistantStatesRequest,
     SubscribeStatesRequest,
     TextSensorStateResponse,
 )
-from aioesphomeapi.model import MediaPlayerCommand, MediaPlayerEntityFeature, MediaPlayerState
 from google.protobuf import message
-from ..audio.audio_player import AudioPlayer
-from ..core.util import call_all
 if TYPE_CHECKING:
-    from ..protocol.api_server import APIServer
 logger = logging.getLogger(__name__)
-def _safe_get_value(getter: Callable[[], object] | None, current_value: object, entity_name: str) -> object:
-    """Read an entity value without letting getter failures break the ESPHome session."""
-    if getter is None:
-        return current_value
-    try:
-        return getter()
-    except Exception as e:
-        logger.error("Entity getter failed for %s: %s", entity_name, e)
-        return current_value
 class ESPHomeEntity:
     """Base class for ESPHome entities."""
-    def __init__(self, server: "APIServer") -> None:
         self.server = server
     @abstractmethod
@@ -63,7 +63,7 @@ class MediaPlayerEntity(ESPHomeEntity):
     def __init__(
         self,
-        server: "APIServer",
         key: int,
         name: str,
         object_id: str,
@@ -82,9 +82,9 @@ class MediaPlayerEntity(ESPHomeEntity):
     def play(
         self,
-        url: str | list[str],
         announcement: bool = False,
-        done_callback: Callable[[], None] | None = None,
     ) -> Iterable[message.Message]:
         if announcement:
             if self.music_player.is_playing:
@@ -92,14 +92,18 @@ class MediaPlayerEntity(ESPHomeEntity):
                 self.music_player.pause()
                 self.announce_player.play(
                     url,
-                    done_callback=lambda: call_all(self.music_player.resume, done_callback),
                 )
             else:
                 # Announce, idle
                 self.announce_player.play(
                     url,
                     done_callback=lambda: call_all(
-                        lambda: self.server.send_messages([self._update_state(MediaPlayerState.IDLE)]),
                         done_callback,
                     ),
                 )
@@ -108,7 +112,9 @@ class MediaPlayerEntity(ESPHomeEntity):
             self.music_player.play(
                 url,
                 done_callback=lambda: call_all(
-                    lambda: self.server.send_messages([self._update_state(MediaPlayerState.IDLE)]),
                     done_callback,
                 ),
             )
@@ -127,9 +133,6 @@ class MediaPlayerEntity(ESPHomeEntity):
                 elif msg.command == MediaPlayerCommand.PLAY:
                     self.music_player.resume()
                     yield self._update_state(MediaPlayerState.PLAYING)
-                elif msg.command == MediaPlayerCommand.STOP:
-                    self.music_player.stop()
-                    yield self._update_state(MediaPlayerState.IDLE)
             elif msg.has_volume:
                 volume = int(msg.volume * 100)
                 self.music_player.set_volume(volume)
@@ -173,13 +176,13 @@ class TextSensorEntity(ESPHomeEntity):
     def __init__(
         self,
-        server: "APIServer",
         key: int,
         name: str,
         object_id: str,
         icon: str = "",
         entity_category: int = 0,  # 0 = none, 1 = config, 2 = diagnostic
-        value_getter: Callable[[], str] | None = None,
     ) -> None:
         ESPHomeEntity.__init__(self, server)
         self.key = key
@@ -192,7 +195,9 @@ class TextSensorEntity(ESPHomeEntity):
     @property
     def value(self) -> str:
-        return str(_safe_get_value(self._value_getter, self._value, self.object_id))
     @value.setter
     def value(self, new_value: str) -> None:
@@ -227,14 +232,14 @@ class BinarySensorEntity(ESPHomeEntity):
     def __init__(
         self,
-        server: "APIServer",
         key: int,
         name: str,
         object_id: str,
         icon: str = "",
         device_class: str = "",
         entity_category: int = 0,  # 0 = none, 1 = config, 2 = diagnostic
-        value_getter: Callable[[], bool] | None = None,
     ) -> None:
         ESPHomeEntity.__init__(self, server)
         self.key = key
@@ -248,7 +253,9 @@ class BinarySensorEntity(ESPHomeEntity):
     @property
     def value(self) -> bool:
-        return bool(_safe_get_value(self._value_getter, self._value, self.object_id))
     @value.setter
     def value(self, new_value: bool) -> None:
@@ -284,7 +291,7 @@ class NumberEntity(ESPHomeEntity):
     def __init__(
         self,
-        server: "APIServer",
         key: int,
         name: str,
         object_id: str,
@@ -295,8 +302,8 @@ class NumberEntity(ESPHomeEntity):
         unit_of_measurement: str = "",
         mode: int = 0,  # 0 = auto, 1 = box, 2 = slider
         entity_category: int = 0,  # 0 = none, 1 = config, 2 = diagnostic
-        value_getter: Callable[[], float] | None = None,
-        value_setter: Callable[[float], None] | None = None,
     ) -> None:
         ESPHomeEntity.__init__(self, server)
         self.key = key
@@ -315,7 +322,9 @@ class NumberEntity(ESPHomeEntity):
     @property
     def value(self) -> float:
-        return float(_safe_get_value(self._value_getter, self._value, self.object_id))
     @value.setter
     def value(self, new_value: float) -> None:
@@ -362,12 +371,12 @@ class CameraEntity(ESPHomeEntity):
     def __init__(
         self,
-        server: "APIServer",
         key: int,
         name: str,
         object_id: str,
         icon: str = "mdi:camera",
-        image_getter: Callable[[], bytes | None] | None = None,
     ) -> None:
         ESPHomeEntity.__init__(self, server)
         self.key = key
@@ -376,7 +385,7 @@ class CameraEntity(ESPHomeEntity):
         self.icon = icon
         self._image_getter = image_getter
-    def get_image(self) -> bytes | None:
         """Get the current camera image as JPEG bytes."""
         if self._image_getter:
             return self._image_getter()
@@ -407,3 +416,4 @@ class CameraEntity(ESPHomeEntity):
                     data=b"",
                     done=True,
                 )

 """ESPHome entity definitions."""
 from abc import abstractmethod
+from collections.abc import Iterable
+from typing import Callable, List, Optional, Union, TYPE_CHECKING
+import logging
 # pylint: disable=no-name-in-module
 from aioesphomeapi.api_pb2 import (  # type: ignore[attr-defined]
     ListEntitiesBinarySensorResponse,
+    ListEntitiesButtonResponse,
     ListEntitiesCameraResponse,
     ListEntitiesMediaPlayerResponse,
     ListEntitiesNumberResponse,
     ListEntitiesRequest,
+    ListEntitiesSelectResponse,
+    ListEntitiesSensorResponse,
+    ListEntitiesSwitchResponse,
     ListEntitiesTextSensorResponse,
+    BinarySensorStateResponse,
+    ButtonCommandRequest,
+    CameraImageRequest,
+    CameraImageResponse,
     MediaPlayerCommandRequest,
     MediaPlayerStateResponse,
     NumberCommandRequest,
     NumberStateResponse,
+    SelectCommandRequest,
+    SelectStateResponse,
+    SensorStateResponse,
     SubscribeHomeAssistantStatesRequest,
     SubscribeStatesRequest,
+    SwitchCommandRequest,
+    SwitchStateResponse,
     TextSensorStateResponse,
 )
+from aioesphomeapi.model import MediaPlayerCommand, MediaPlayerState, MediaPlayerEntityFeature
 from google.protobuf import message
+from .api_server import APIServer
+from .audio_player import AudioPlayer
+from .util import call_all
 if TYPE_CHECKING:
+    from reachy_mini import ReachyMini
 logger = logging.getLogger(__name__)
 class ESPHomeEntity:
     """Base class for ESPHome entities."""
+    def __init__(self, server: APIServer) -> None:
         self.server = server
     @abstractmethod
     def __init__(
         self,
+        server: APIServer,
         key: int,
         name: str,
         object_id: str,
     def play(
         self,
+        url: Union[str, List[str]],
         announcement: bool = False,
+        done_callback: Optional[Callable[[], None]] = None,
     ) -> Iterable[message.Message]:
         if announcement:
             if self.music_player.is_playing:
                 self.music_player.pause()
                 self.announce_player.play(
                     url,
+                    done_callback=lambda: call_all(
+                        self.music_player.resume, done_callback
+                    ),
                 )
             else:
                 # Announce, idle
                 self.announce_player.play(
                     url,
                     done_callback=lambda: call_all(
+                        lambda: self.server.send_messages(
+                            [self._update_state(MediaPlayerState.IDLE)]
+                        ),
                         done_callback,
                     ),
                 )
             self.music_player.play(
                 url,
                 done_callback=lambda: call_all(
+                    lambda: self.server.send_messages(
+                        [self._update_state(MediaPlayerState.IDLE)]
+                    ),
                     done_callback,
                 ),
             )
                 elif msg.command == MediaPlayerCommand.PLAY:
                     self.music_player.resume()
                     yield self._update_state(MediaPlayerState.PLAYING)
             elif msg.has_volume:
                 volume = int(msg.volume * 100)
                 self.music_player.set_volume(volume)
     def __init__(
         self,
+        server: APIServer,
         key: int,
         name: str,
         object_id: str,
         icon: str = "",
         entity_category: int = 0,  # 0 = none, 1 = config, 2 = diagnostic
+        value_getter: Optional[Callable[[], str]] = None,
     ) -> None:
         ESPHomeEntity.__init__(self, server)
         self.key = key
     @property
     def value(self) -> str:
+        if self._value_getter:
+            return self._value_getter()
+        return self._value
     @value.setter
     def value(self, new_value: str) -> None:
     def __init__(
         self,
+        server: APIServer,
         key: int,
         name: str,
         object_id: str,
         icon: str = "",
         device_class: str = "",
         entity_category: int = 0,  # 0 = none, 1 = config, 2 = diagnostic
+        value_getter: Optional[Callable[[], bool]] = None,
     ) -> None:
         ESPHomeEntity.__init__(self, server)
         self.key = key
     @property
     def value(self) -> bool:
+        if self._value_getter:
+            return self._value_getter()
+        return self._value
     @value.setter
     def value(self, new_value: bool) -> None:
     def __init__(
         self,
+        server: APIServer,
         key: int,
         name: str,
         object_id: str,
         unit_of_measurement: str = "",
         mode: int = 0,  # 0 = auto, 1 = box, 2 = slider
         entity_category: int = 0,  # 0 = none, 1 = config, 2 = diagnostic
+        value_getter: Optional[Callable[[], float]] = None,
+        value_setter: Optional[Callable[[float], None]] = None,
     ) -> None:
         ESPHomeEntity.__init__(self, server)
         self.key = key
     @property
     def value(self) -> float:
+        if self._value_getter:
+            return self._value_getter()
+        return self._value
     @value.setter
     def value(self, new_value: float) -> None:
     def __init__(
         self,
+        server: APIServer,
         key: int,
         name: str,
         object_id: str,
         icon: str = "mdi:camera",
+        image_getter: Optional[Callable[[], Optional[bytes]]] = None,
     ) -> None:
         ESPHomeEntity.__init__(self, server)
         self.key = key
         self.icon = icon
         self._image_getter = image_getter
+    def get_image(self) -> Optional[bytes]:
         """Get the current camera image as JPEG bytes."""
         if self._image_getter:
             return self._image_getter()
                     data=b"",
                     done=True,
                 )

{reachy_mini_home_assistant/entities → reachy_mini_ha_voice}/entity_extensions.py RENAMED Viewed

@@ -1,16 +1,16 @@
 """Extended ESPHome entity types for Reachy Mini control."""
 import logging
-from collections.abc import Callable, Iterable
-from typing import TYPE_CHECKING
 from aioesphomeapi.api_pb2 import (  # type: ignore[attr-defined]
-    ButtonCommandRequest,
     ListEntitiesButtonResponse,
     ListEntitiesRequest,
     ListEntitiesSelectResponse,
     ListEntitiesSensorResponse,
     ListEntitiesSwitchResponse,
     SelectCommandRequest,
     SelectStateResponse,
     SensorStateResponse,
@@ -21,28 +21,14 @@ from aioesphomeapi.api_pb2 import (  # type: ignore[attr-defined]
 )
 from google.protobuf import message
 from .entity import ESPHomeEntity
-if TYPE_CHECKING:
-    from ..protocol.api_server import APIServer
 logger = logging.getLogger(__name__)
-def _safe_get_value(getter: Callable[[], object] | None, current_value: object, entity_name: str) -> object:
-    """Read an entity value without letting getter failures break the ESPHome session."""
-    if getter is None:
-        return current_value
-    try:
-        return getter()
-    except Exception as e:
-        logger.error("Entity getter failed for %s: %s", entity_name, e)
-        return current_value
 class SensorStateClass:
     """ESPHome SensorStateClass enum values."""
     NONE = 0
     MEASUREMENT = 1
     TOTAL_INCREASING = 2
@@ -54,7 +40,7 @@ class SensorEntity(ESPHomeEntity):
     def __init__(
         self,
-        server: "APIServer",
         key: int,
         name: str,
         object_id: str,
@@ -64,7 +50,7 @@ class SensorEntity(ESPHomeEntity):
         device_class: str = "",
         state_class: int = SensorStateClass.NONE,
         entity_category: int = 0,  # 0 = none, 1 = config, 2 = diagnostic
-        value_getter: Callable[[], float] | None = None,
     ) -> None:
         ESPHomeEntity.__init__(self, server)
         self.key = key
@@ -75,7 +61,7 @@ class SensorEntity(ESPHomeEntity):
         self.accuracy_decimals = accuracy_decimals
         self.device_class = device_class
         self.entity_category = entity_category
-        # Convert string state_class to enum
         if isinstance(state_class, str):
             state_class_map = {
                 "": SensorStateClass.NONE,
@@ -91,7 +77,9 @@ class SensorEntity(ESPHomeEntity):
     @property
     def value(self) -> float:
-        return float(_safe_get_value(self._value_getter, self._value, self.object_id))
     @value.setter
     def value(self, new_value: float) -> None:
@@ -130,15 +118,15 @@ class SwitchEntity(ESPHomeEntity):
     def __init__(
         self,
-        server: "APIServer",
         key: int,
         name: str,
         object_id: str,
         icon: str = "",
         device_class: str = "",
         entity_category: int = 0,  # 0 = none, 1 = config, 2 = diagnostic
-        value_getter: Callable[[], bool] | None = None,
-        value_setter: Callable[[bool], None] | None = None,
     ) -> None:
         ESPHomeEntity.__init__(self, server)
         self.key = key
@@ -153,7 +141,9 @@ class SwitchEntity(ESPHomeEntity):
     @property
     def value(self) -> bool:
-        return bool(_safe_get_value(self._value_getter, self._value, self.object_id))
     @value.setter
     def value(self, new_value: bool) -> None:
@@ -193,15 +183,15 @@ class SelectEntity(ESPHomeEntity):
     def __init__(
         self,
-        server: "APIServer",
         key: int,
         name: str,
         object_id: str,
-        options: list[str],
         icon: str = "",
         entity_category: int = 0,  # 0 = none, 1 = config, 2 = diagnostic
-        value_getter: Callable[[], str] | None = None,
-        value_setter: Callable[[str], None] | None = None,
     ) -> None:
         ESPHomeEntity.__init__(self, server)
         self.key = key
@@ -216,7 +206,9 @@ class SelectEntity(ESPHomeEntity):
     @property
     def value(self) -> str:
-        return str(_safe_get_value(self._value_getter, self._value, self.object_id))
     @value.setter
     def value(self, new_value: str) -> None:
@@ -260,14 +252,14 @@ class ButtonEntity(ESPHomeEntity):
     def __init__(
         self,
-        server: "APIServer",
         key: int,
         name: str,
         object_id: str,
         icon: str = "",
         device_class: str = "",
         entity_category: int = 0,  # 0 = none, 1 = config, 2 = diagnostic
-        on_press: Callable[[], None] | None = None,
     ) -> None:
         ESPHomeEntity.__init__(self, server)
         self.key = key

 """Extended ESPHome entity types for Reachy Mini control."""
+from collections.abc import Iterable
+from typing import Callable, List, Optional
 import logging
 from aioesphomeapi.api_pb2 import (  # type: ignore[attr-defined]
     ListEntitiesButtonResponse,
     ListEntitiesRequest,
     ListEntitiesSelectResponse,
     ListEntitiesSensorResponse,
     ListEntitiesSwitchResponse,
+    ButtonCommandRequest,
     SelectCommandRequest,
     SelectStateResponse,
     SensorStateResponse,
 )
 from google.protobuf import message
+from .api_server import APIServer
 from .entity import ESPHomeEntity
 logger = logging.getLogger(__name__)
 class SensorStateClass:
     """ESPHome SensorStateClass enum values."""
     NONE = 0
     MEASUREMENT = 1
     TOTAL_INCREASING = 2
     def __init__(
         self,
+        server: APIServer,
         key: int,
         name: str,
         object_id: str,
         device_class: str = "",
         state_class: int = SensorStateClass.NONE,
         entity_category: int = 0,  # 0 = none, 1 = config, 2 = diagnostic
+        value_getter: Optional[Callable[[], float]] = None,
     ) -> None:
         ESPHomeEntity.__init__(self, server)
         self.key = key
         self.accuracy_decimals = accuracy_decimals
         self.device_class = device_class
         self.entity_category = entity_category
+        # Convert string state_class to int if needed (for backward compatibility)
         if isinstance(state_class, str):
             state_class_map = {
                 "": SensorStateClass.NONE,
     @property
     def value(self) -> float:
+        if self._value_getter:
+            return self._value_getter()
+        return self._value
     @value.setter
     def value(self, new_value: float) -> None:
     def __init__(
         self,
+        server: APIServer,
         key: int,
         name: str,
         object_id: str,
         icon: str = "",
         device_class: str = "",
         entity_category: int = 0,  # 0 = none, 1 = config, 2 = diagnostic
+        value_getter: Optional[Callable[[], bool]] = None,
+        value_setter: Optional[Callable[[bool], None]] = None,
     ) -> None:
         ESPHomeEntity.__init__(self, server)
         self.key = key
     @property
     def value(self) -> bool:
+        if self._value_getter:
+            return self._value_getter()
+        return self._value
     @value.setter
     def value(self, new_value: bool) -> None:
     def __init__(
         self,
+        server: APIServer,
         key: int,
         name: str,
         object_id: str,
+        options: List[str],
         icon: str = "",
         entity_category: int = 0,  # 0 = none, 1 = config, 2 = diagnostic
+        value_getter: Optional[Callable[[], str]] = None,
+        value_setter: Optional[Callable[[str], None]] = None,
     ) -> None:
         ESPHomeEntity.__init__(self, server)
         self.key = key
     @property
     def value(self) -> str:
+        if self._value_getter:
+            return self._value_getter()
+        return self._value
     @value.setter
     def value(self, new_value: str) -> None:
     def __init__(
         self,
+        server: APIServer,
         key: int,
         name: str,
         object_id: str,
         icon: str = "",
         device_class: str = "",
         entity_category: int = 0,  # 0 = none, 1 = config, 2 = diagnostic
+        on_press: Optional[Callable[[], None]] = None,
     ) -> None:
         ESPHomeEntity.__init__(self, server)
         self.key = key

reachy_mini_ha_voice/entity_registry.py ADDED Viewed

	@@ -0,0 +1,945 @@

+"""Entity registry for ESPHome entities.
+This module handles the registration and management of all ESPHome entities
+for the Reachy Mini voice assistant.
+"""
+import logging
+from typing import TYPE_CHECKING, Callable, Dict, List, Optional
+from .entity import BinarySensorEntity, CameraEntity, NumberEntity, TextSensorEntity
+from .entity_extensions import SensorEntity, SwitchEntity, SelectEntity, ButtonEntity
+if TYPE_CHECKING:
+    from .reachy_controller import ReachyController
+    from .camera_server import MJPEGCameraServer
+_LOGGER = logging.getLogger(__name__)
+# Fixed entity key mapping - ensures consistent keys across restarts
+# Keys are based on object_id hash to ensure uniqueness and consistency
+ENTITY_KEYS: Dict[str, int] = {
+    # Media player (key 0 reserved)
+    "reachy_mini_media_player": 0,
+    # Phase 1: Basic status and volume
+    "daemon_state": 100,
+    "backend_ready": 101,
+    "speaker_volume": 103,
+    # Phase 2: Motor control
+    "motors_enabled": 200,
+    "motor_mode": 201,
+    "wake_up": 202,
+    "go_to_sleep": 203,
+    # Phase 3: Pose control
+    "head_x": 300,
+    "head_y": 301,
+    "head_z": 302,
+    "head_roll": 303,
+    "head_pitch": 304,
+    "head_yaw": 305,
+    "body_yaw": 306,
+    "antenna_left": 307,
+    "antenna_right": 308,
+    # Phase 4: Look at control
+    "look_at_x": 400,
+    "look_at_y": 401,
+    "look_at_z": 402,
+    # Phase 5: DOA (Direction of Arrival) - re-added for wakeup turn-to-sound
+    "doa_angle": 500,
+    "speech_detected": 501,
+    # Phase 6: Diagnostic information
+    "control_loop_frequency": 600,
+    "sdk_version": 601,
+    "robot_name": 602,
+    "wireless_version": 603,
+    "simulation_mode": 604,
+    "wlan_ip": 605,
+    "error_message": 606,  # Moved to diagnostic
+    # Phase 7: IMU sensors
+    "imu_accel_x": 700,
+    "imu_accel_y": 701,
+    "imu_accel_z": 702,
+    "imu_gyro_x": 703,
+    "imu_gyro_y": 704,
+    "imu_gyro_z": 705,
+    "imu_temperature": 706,
+    # Phase 8: Emotion selector
+    "emotion": 800,
+    # Phase 9: Audio controls
+    "microphone_volume": 900,
+    # Phase 10: Camera
+    "camera_url": 1000,  # Keep for backward compatibility
+    "camera": 1001,      # New camera entity
+    # Phase 11: LED control (disabled - not visible)
+    # "led_brightness": 1100,
+    # "led_effect": 1101,
+    # "led_color_r": 1102,
+    # "led_color_g": 1103,
+    # "led_color_b": 1104,
+    # Phase 12: Audio processing
+    "agc_enabled": 1200,
+    "agc_max_gain": 1201,
+    "noise_suppression": 1202,
+    "echo_cancellation_converged": 1203,
+    # Phase 13: Sendspin - auto-enabled via mDNS, no user entities needed
+    # Phase 21: Continuous conversation
+    "continuous_conversation": 1500,
+    # Phase 22: Gesture detection
+    "gesture_detected": 1600,
+    "gesture_confidence": 1601,
+}
+def get_entity_key(object_id: str) -> int:
+    """Get a consistent entity key for the given object_id."""
+    if object_id in ENTITY_KEYS:
+        return ENTITY_KEYS[object_id]
+    # Fallback: generate key from hash (should not happen if all entities are registered)
+    _LOGGER.warning(f"Entity key not found for {object_id}, generating from hash")
+    return abs(hash(object_id)) % 10000 + 2000
+class EntityRegistry:
+    """Registry for managing ESPHome entities."""
+    def __init__(
+        self,
+        server,
+        reachy_controller: "ReachyController",
+        camera_server: Optional["MJPEGCameraServer"] = None,
+        play_emotion_callback: Optional[Callable[[str], None]] = None,
+    ):
+        """Initialize the entity registry.
+        Args:
+            server: The VoiceSatelliteProtocol server instance
+            reachy_controller: The ReachyController instance
+            camera_server: Optional camera server for camera entity
+            play_emotion_callback: Optional callback for playing emotions
+        """
+        self.server = server
+        self.reachy_controller = reachy_controller
+        self.camera_server = camera_server
+        self._play_emotion_callback = play_emotion_callback
+        # Gesture detection state
+        self._current_gesture = "none"
+        self._gesture_confidence = 0.0
+        # Emotion state
+        self._current_emotion = "None"
+        # Map emotion names to available robot emotions
+        # Full list of available emotions from robot
+        self._emotion_map = {
+            "None": None,
+            # Basic emotions
+            "Happy": "cheerful1",
+            "Sad": "sad1",
+            "Angry": "rage1",
+            "Fear": "fear1",
+            "Surprise": "surprised1",
+            "Disgust": "disgusted1",
+            # Extended emotions
+            "Laughing": "laughing1",
+            "Loving": "loving1",
+            "Proud": "proud1",
+            "Grateful": "grateful1",
+            "Enthusiastic": "enthusiastic1",
+            "Curious": "curious1",
+            "Amazed": "amazed1",
+            "Shy": "shy1",
+            "Confused": "confused1",
+            "Thoughtful": "thoughtful1",
+            "Anxious": "anxiety1",
+            "Scared": "scared1",
+            "Frustrated": "frustrated1",
+            "Irritated": "irritated1",
+            "Furious": "furious1",
+            "Contempt": "contempt1",
+            "Bored": "boredom1",
+            "Tired": "tired1",
+            "Exhausted": "exhausted1",
+            "Lonely": "lonely1",
+            "Downcast": "downcast1",
+            "Resigned": "resigned1",
+            "Uncertain": "uncertain1",
+            "Uncomfortable": "uncomfortable1",
+            "Lost": "lost1",
+            "Indifferent": "indifferent1",
+            # Positive actions
+            "Yes": "yes1",
+            "No": "no1",
+            "Welcoming": "welcoming1",
+            "Helpful": "helpful1",
+            "Attentive": "attentive1",
+            "Understanding": "understanding1",
+            "Calming": "calming1",
+            "Relief": "relief1",
+            "Success": "success1",
+            "Serenity": "serenity1",
+            # Negative actions
+            "Oops": "oops1",
+            "Displeased": "displeased1",
+            "Impatient": "impatient1",
+            "Reprimand": "reprimand1",
+            "GoAway": "go_away1",
+            # Special
+            "Come": "come1",
+            "Inquiring": "inquiring1",
+            "Sleep": "sleep1",
+            "Dance": "dance1",
+            "Electric": "electric1",
+            "Dying": "dying1",
+        }
+    def setup_all_entities(self, entities: List) -> None:
+        """Setup all entity phases.
+        Args:
+            entities: The list to append entities to
+        """
+        self._setup_phase1_entities(entities)
+        self._setup_phase2_entities(entities)
+        self._setup_phase3_entities(entities)
+        self._setup_phase4_entities(entities)
+        self._setup_phase5_entities(entities)  # DOA for wakeup turn-to-sound
+        self._setup_phase6_entities(entities)
+        self._setup_phase7_entities(entities)
+        self._setup_phase8_entities(entities)
+        self._setup_phase9_entities(entities)
+        self._setup_phase10_entities(entities)
+        # Phase 11 (LED control) disabled - LEDs are inside the robot and not visible
+        self._setup_phase12_entities(entities)
+        # Phase 13 (Sendspin) - auto-enabled via mDNS discovery, no user entities
+        # Phase 14 (head_joints, passive_joints) removed - not needed
+        # Phase 20 (Tap detection) disabled - too many false triggers
+        self._setup_phase21_entities(entities)
+        self._setup_phase22_entities(entities)
+        _LOGGER.info("All entities registered: %d total", len(entities))
+    def _setup_phase1_entities(self, entities: List) -> None:
+        """Setup Phase 1 entities: Basic status and volume control."""
+        rc = self.reachy_controller
+        entities.append(TextSensorEntity(
+            server=self.server,
+            key=get_entity_key("daemon_state"),
+            name="Daemon State",
+            object_id="daemon_state",
+            icon="mdi:robot",
+            value_getter=rc.get_daemon_state,
+        ))
+        entities.append(BinarySensorEntity(
+            server=self.server,
+            key=get_entity_key("backend_ready"),
+            name="Backend Ready",
+            object_id="backend_ready",
+            icon="mdi:check-circle",
+            device_class="connectivity",
+            value_getter=rc.get_backend_ready,
+        ))
+        entities.append(NumberEntity(
+            server=self.server,
+            key=get_entity_key("speaker_volume"),
+            name="Speaker Volume",
+            object_id="speaker_volume",
+            min_value=0.0,
+            max_value=100.0,
+            step=1.0,
+            icon="mdi:volume-high",
+            unit_of_measurement="%",
+            mode=2,  # Slider mode
+            entity_category=1,  # config
+            value_getter=rc.get_speaker_volume,
+            value_setter=rc.set_speaker_volume,
+        ))
+        _LOGGER.debug("Phase 1 entities registered: daemon_state, backend_ready, speaker_volume")
+    def _setup_phase2_entities(self, entities: List) -> None:
+        """Setup Phase 2 entities: Motor control."""
+        rc = self.reachy_controller
+        entities.append(SwitchEntity(
+            server=self.server,
+            key=get_entity_key("motors_enabled"),
+            name="Motors Enabled",
+            object_id="motors_enabled",
+            icon="mdi:engine",
+            device_class="switch",
+            value_getter=rc.get_motors_enabled,
+            value_setter=rc.set_motors_enabled,
+        ))
+        entities.append(ButtonEntity(
+            server=self.server,
+            key=get_entity_key("wake_up"),
+            name="Wake Up",
+            object_id="wake_up",
+            icon="mdi:alarm",
+            device_class="restart",
+            on_press=rc.wake_up,
+        ))
+        entities.append(ButtonEntity(
+            server=self.server,
+            key=get_entity_key("go_to_sleep"),
+            name="Go to Sleep",
+            object_id="go_to_sleep",
+            icon="mdi:sleep",
+            device_class="restart",
+            on_press=rc.go_to_sleep,
+        ))
+        _LOGGER.debug("Phase 2 entities registered: motors_enabled, wake_up, go_to_sleep")
+    def _setup_phase3_entities(self, entities: List) -> None:
+        """Setup Phase 3 entities: Pose control."""
+        rc = self.reachy_controller
+        # Head position controls (X, Y, Z in mm)
+        entities.append(NumberEntity(
+            server=self.server,
+            key=get_entity_key("head_x"),
+            name="Head X Position",
+            object_id="head_x",
+            min_value=-50.0,
+            max_value=50.0,
+            step=1.0,
+            icon="mdi:axis-x-arrow",
+            unit_of_measurement="mm",
+            mode=2,
+            value_getter=rc.get_head_x,
+            value_setter=rc.set_head_x,
+        ))
+        entities.append(NumberEntity(
+            server=self.server,
+            key=get_entity_key("head_y"),
+            name="Head Y Position",
+            object_id="head_y",
+            min_value=-50.0,
+            max_value=50.0,
+            step=1.0,
+            icon="mdi:axis-y-arrow",
+            unit_of_measurement="mm",
+            mode=2,
+            value_getter=rc.get_head_y,
+            value_setter=rc.set_head_y,
+        ))
+        entities.append(NumberEntity(
+            server=self.server,
+            key=get_entity_key("head_z"),
+            name="Head Z Position",
+            object_id="head_z",
+            min_value=-50.0,
+            max_value=50.0,
+            step=1.0,
+            icon="mdi:axis-z-arrow",
+            unit_of_measurement="mm",
+            mode=2,
+            value_getter=rc.get_head_z,
+            value_setter=rc.set_head_z,
+        ))
+        # Head orientation controls (Roll, Pitch, Yaw in degrees)
+        entities.append(NumberEntity(
+            server=self.server,
+            key=get_entity_key("head_roll"),
+            name="Head Roll",
+            object_id="head_roll",
+            min_value=-40.0,
+            max_value=40.0,
+            step=1.0,
+            icon="mdi:rotate-3d-variant",
+            unit_of_measurement="°",
+            mode=2,
+            value_getter=rc.get_head_roll,
+            value_setter=rc.set_head_roll,
+        ))
+        entities.append(NumberEntity(
+            server=self.server,
+            key=get_entity_key("head_pitch"),
+            name="Head Pitch",
+            object_id="head_pitch",
+            min_value=-40.0,
+            max_value=40.0,
+            step=1.0,
+            icon="mdi:rotate-3d-variant",
+            unit_of_measurement="°",
+            mode=2,
+            value_getter=rc.get_head_pitch,
+            value_setter=rc.set_head_pitch,
+        ))
+        entities.append(NumberEntity(
+            server=self.server,
+            key=get_entity_key("head_yaw"),
+            name="Head Yaw",
+            object_id="head_yaw",
+            min_value=-180.0,
+            max_value=180.0,
+            step=1.0,
+            icon="mdi:rotate-3d-variant",
+            unit_of_measurement="°",
+            mode=2,
+            value_getter=rc.get_head_yaw,
+            value_setter=rc.set_head_yaw,
+        ))
+        # Body yaw control
+        entities.append(NumberEntity(
+            server=self.server,
+            key=get_entity_key("body_yaw"),
+            name="Body Yaw",
+            object_id="body_yaw",
+            min_value=-160.0,
+            max_value=160.0,
+            step=1.0,
+            icon="mdi:rotate-3d-variant",
+            unit_of_measurement="°",
+            mode=2,
+            value_getter=rc.get_body_yaw,
+            value_setter=rc.set_body_yaw,
+        ))
+        # Antenna controls
+        entities.append(NumberEntity(
+            server=self.server,
+            key=get_entity_key("antenna_left"),
+            name="Antenna(L)",
+            object_id="antenna_left",
+            min_value=-90.0,
+            max_value=90.0,
+            step=1.0,
+            icon="mdi:antenna",
+            unit_of_measurement="°",
+            mode=2,
+            value_getter=rc.get_antenna_left,
+            value_setter=rc.set_antenna_left,
+        ))
+        entities.append(NumberEntity(
+            server=self.server,
+            key=get_entity_key("antenna_right"),
+            name="Antenna(R)",
+            object_id="antenna_right",
+            min_value=-90.0,
+            max_value=90.0,
+            step=1.0,
+            icon="mdi:antenna",
+            unit_of_measurement="°",
+            mode=2,
+            value_getter=rc.get_antenna_right,
+            value_setter=rc.set_antenna_right,
+        ))
+        _LOGGER.debug("Phase 3 entities registered: head position/orientation, body_yaw, antennas")
+    def _setup_phase4_entities(self, entities: List) -> None:
+        """Setup Phase 4 entities: Look at control."""
+        rc = self.reachy_controller
+        entities.append(NumberEntity(
+            server=self.server,
+            key=get_entity_key("look_at_x"),
+            name="Look At X",
+            object_id="look_at_x",
+            min_value=-2.0,
+            max_value=2.0,
+            step=0.1,
+            icon="mdi:crosshairs-gps",
+            unit_of_measurement="m",
+            mode=1,  # Box mode for precise input
+            value_getter=rc.get_look_at_x,
+            value_setter=rc.set_look_at_x,
+        ))
+        entities.append(NumberEntity(
+            server=self.server,
+            key=get_entity_key("look_at_y"),
+            name="Look At Y",
+            object_id="look_at_y",
+            min_value=-2.0,
+            max_value=2.0,
+            step=0.1,
+            icon="mdi:crosshairs-gps",
+            unit_of_measurement="m",
+            mode=1,
+            value_getter=rc.get_look_at_y,
+            value_setter=rc.set_look_at_y,
+        ))
+        entities.append(NumberEntity(
+            server=self.server,
+            key=get_entity_key("look_at_z"),
+            name="Look At Z",
+            object_id="look_at_z",
+            min_value=-2.0,
+            max_value=2.0,
+            step=0.1,
+            icon="mdi:crosshairs-gps",
+            unit_of_measurement="m",
+            mode=1,
+            value_getter=rc.get_look_at_z,
+            value_setter=rc.set_look_at_z,
+        ))
+        _LOGGER.debug("Phase 4 entities registered: look_at_x/y/z")
+    def _setup_phase5_entities(self, entities: List) -> None:
+        """Setup Phase 5 entities: DOA (Direction of Arrival) for wakeup turn-to-sound."""
+        rc = self.reachy_controller
+        entities.append(SensorEntity(
+            server=self.server,
+            key=get_entity_key("doa_angle"),
+            name="DOA Angle",
+            object_id="doa_angle",
+            icon="mdi:surround-sound",
+            unit_of_measurement="°",
+            accuracy_decimals=1,
+            state_class="measurement",
+            value_getter=rc.get_doa_angle_degrees,
+        ))
+        entities.append(BinarySensorEntity(
+            server=self.server,
+            key=get_entity_key("speech_detected"),
+            name="Speech Detected",
+            object_id="speech_detected",
+            icon="mdi:account-voice",
+            device_class="sound",
+            value_getter=rc.get_speech_detected,
+        ))
+        _LOGGER.debug("Phase 5 entities registered: doa_angle, speech_detected")
+    def _setup_phase6_entities(self, entities: List) -> None:
+        """Setup Phase 6 entities: Diagnostic information."""
+        rc = self.reachy_controller
+        entities.append(SensorEntity(
+            server=self.server,
+            key=get_entity_key("control_loop_frequency"),
+            name="Control Loop Frequency",
+            object_id="control_loop_frequency",
+            icon="mdi:speedometer",
+            unit_of_measurement="Hz",
+            accuracy_decimals=1,
+            state_class="measurement",
+            entity_category=2,  # diagnostic
+            value_getter=rc.get_control_loop_frequency,
+        ))
+        entities.append(TextSensorEntity(
+            server=self.server,
+            key=get_entity_key("sdk_version"),
+            name="SDK Version",
+            object_id="sdk_version",
+            icon="mdi:information",
+            entity_category=2,  # diagnostic
+            value_getter=rc.get_sdk_version,
+        ))
+        entities.append(TextSensorEntity(
+            server=self.server,
+            key=get_entity_key("robot_name"),
+            name="Robot Name",
+            object_id="robot_name",
+            icon="mdi:robot",
+            entity_category=2,  # diagnostic
+            value_getter=rc.get_robot_name,
+        ))
+        entities.append(BinarySensorEntity(
+            server=self.server,
+            key=get_entity_key("wireless_version"),
+            name="Wireless Version",
+            object_id="wireless_version",
+            icon="mdi:wifi",
+            device_class="connectivity",
+            entity_category=2,  # diagnostic
+            value_getter=rc.get_wireless_version,
+        ))
+        entities.append(BinarySensorEntity(
+            server=self.server,
+            key=get_entity_key("simulation_mode"),
+            name="Simulation Mode",
+            object_id="simulation_mode",
+            icon="mdi:virtual-reality",
+            entity_category=2,  # diagnostic
+            value_getter=rc.get_simulation_mode,
+        ))
+        entities.append(TextSensorEntity(
+            server=self.server,
+            key=get_entity_key("wlan_ip"),
+            name="WLAN IP",
+            object_id="wlan_ip",
+            icon="mdi:ip-network",
+            entity_category=2,  # diagnostic
+            value_getter=rc.get_wlan_ip,
+        ))
+        entities.append(TextSensorEntity(
+            server=self.server,
+            key=get_entity_key("error_message"),
+            name="Error Message",
+            object_id="error_message",
+            icon="mdi:alert-circle",
+            entity_category=2,  # diagnostic
+            value_getter=rc.get_error_message,
+        ))
+        _LOGGER.debug(
+            "Phase 6 entities registered: control_loop_frequency, sdk_version, "
+            "robot_name, wireless_version, simulation_mode, wlan_ip, error_message"
+        )
+    def _setup_phase7_entities(self, entities: List) -> None:
+        """Setup Phase 7 entities: IMU sensors (wireless only)."""
+        rc = self.reachy_controller
+        # IMU Accelerometer
+        entities.append(SensorEntity(
+            server=self.server,
+            key=get_entity_key("imu_accel_x"),
+            name="IMU Accel X",
+            object_id="imu_accel_x",
+            icon="mdi:axis-x-arrow",
+            unit_of_measurement="m/s²",
+            accuracy_decimals=3,
+            state_class="measurement",
+            value_getter=rc.get_imu_accel_x,
+        ))
+        entities.append(SensorEntity(
+            server=self.server,
+            key=get_entity_key("imu_accel_y"),
+            name="IMU Accel Y",
+            object_id="imu_accel_y",
+            icon="mdi:axis-y-arrow",
+            unit_of_measurement="m/s²",
+            accuracy_decimals=3,
+            state_class="measurement",
+            value_getter=rc.get_imu_accel_y,
+        ))
+        entities.append(SensorEntity(
+            server=self.server,
+            key=get_entity_key("imu_accel_z"),
+            name="IMU Accel Z",
+            object_id="imu_accel_z",
+            icon="mdi:axis-z-arrow",
+            unit_of_measurement="m/s²",
+            accuracy_decimals=3,
+            state_class="measurement",
+            value_getter=rc.get_imu_accel_z,
+        ))
+        # IMU Gyroscope
+        entities.append(SensorEntity(
+            server=self.server,
+            key=get_entity_key("imu_gyro_x"),
+            name="IMU Gyro X",
+            object_id="imu_gyro_x",
+            icon="mdi:rotate-3d-variant",
+            unit_of_measurement="rad/s",
+            accuracy_decimals=3,
+            state_class="measurement",
+            value_getter=rc.get_imu_gyro_x,
+        ))
+        entities.append(SensorEntity(
+            server=self.server,
+            key=get_entity_key("imu_gyro_y"),
+            name="IMU Gyro Y",
+            object_id="imu_gyro_y",
+            icon="mdi:rotate-3d-variant",
+            unit_of_measurement="rad/s",
+            accuracy_decimals=3,
+            state_class="measurement",
+            value_getter=rc.get_imu_gyro_y,
+        ))
+        entities.append(SensorEntity(
+            server=self.server,
+            key=get_entity_key("imu_gyro_z"),
+            name="IMU Gyro Z",
+            object_id="imu_gyro_z",
+            icon="mdi:rotate-3d-variant",
+            unit_of_measurement="rad/s",
+            accuracy_decimals=3,
+            state_class="measurement",
+            value_getter=rc.get_imu_gyro_z,
+        ))
+        # IMU Temperature
+        entities.append(SensorEntity(
+            server=self.server,
+            key=get_entity_key("imu_temperature"),
+            name="IMU Temperature",
+            object_id="imu_temperature",
+            icon="mdi:thermometer",
+            unit_of_measurement="°C",
+            accuracy_decimals=1,
+            device_class="temperature",
+            state_class="measurement",
+            value_getter=rc.get_imu_temperature,
+        ))
+        _LOGGER.debug("Phase 7 entities registered: IMU accelerometer, gyroscope, temperature")
+    def _setup_phase8_entities(self, entities: List) -> None:
+        """Setup Phase 8 entities: Emotion selector."""
+        def get_emotion() -> str:
+            return self._current_emotion
+        def set_emotion(emotion: str) -> None:
+            self._current_emotion = emotion
+            emotion_name = self._emotion_map.get(emotion)
+            if emotion_name and self._play_emotion_callback:
+                self._play_emotion_callback(emotion_name)
+                # Reset to None after playing
+                self._current_emotion = "None"
+        entities.append(SelectEntity(
+            server=self.server,
+            key=get_entity_key("emotion"),
+            name="Emotion",
+            object_id="emotion",
+            options=list(self._emotion_map.keys()),
+            icon="mdi:emoticon",
+            value_getter=get_emotion,
+            value_setter=set_emotion,
+        ))
+        _LOGGER.debug("Phase 8 entities registered: emotion selector")
+    def _setup_phase9_entities(self, entities: List) -> None:
+        """Setup Phase 9 entities: Audio controls."""
+        rc = self.reachy_controller
+        entities.append(NumberEntity(
+            server=self.server,
+            key=get_entity_key("microphone_volume"),
+            name="Microphone Volume",
+            object_id="microphone_volume",
+            min_value=0.0,
+            max_value=100.0,
+            step=1.0,
+            icon="mdi:microphone",
+            unit_of_measurement="%",
+            mode=2,  # Slider mode
+            entity_category=1,  # config
+            value_getter=rc.get_microphone_volume,
+            value_setter=rc.set_microphone_volume,
+        ))
+        _LOGGER.debug("Phase 9 entities registered: microphone_volume")
+    def _setup_phase10_entities(self, entities: List) -> None:
+        """Setup Phase 10 entities: Camera for Home Assistant integration."""
+        def get_camera_image() -> Optional[bytes]:
+            """Get camera snapshot as JPEG bytes."""
+            if self.camera_server:
+                return self.camera_server.get_snapshot()
+            return None
+        entities.append(CameraEntity(
+            server=self.server,
+            key=get_entity_key("camera"),
+            name="Camera",
+            object_id="camera",
+            icon="mdi:camera",
+            image_getter=get_camera_image,
+        ))
+        _LOGGER.debug("Phase 10 entities registered: camera (ESPHome Camera entity)")
+    def _setup_phase12_entities(self, entities: List) -> None:
+        """Setup Phase 12 entities: Audio processing parameters (via local SDK)."""
+        rc = self.reachy_controller
+        def set_agc_enabled_with_save(enabled: bool) -> None:
+            """Set AGC enabled and save to preferences."""
+            rc.set_agc_enabled(enabled)
+            if hasattr(self.server, 'state') and self.server.state:
+                self.server.state.preferences.agc_enabled = enabled
+                self.server.state.save_preferences()
+                _LOGGER.debug("AGC enabled saved to preferences: %s", enabled)
+        def set_agc_max_gain_with_save(gain: float) -> None:
+            """Set AGC max gain and save to preferences."""
+            rc.set_agc_max_gain(gain)
+            if hasattr(self.server, 'state') and self.server.state:
+                self.server.state.preferences.agc_max_gain = gain
+                self.server.state.save_preferences()
+                _LOGGER.debug("AGC max gain saved to preferences: %.1f dB", gain)
+        def set_noise_suppression_with_save(level: float) -> None:
+            """Set noise suppression and save to preferences."""
+            rc.set_noise_suppression(level)
+            if hasattr(self.server, 'state') and self.server.state:
+                self.server.state.preferences.noise_suppression = level
+                self.server.state.save_preferences()
+                _LOGGER.debug("Noise suppression saved to preferences: %.1f%%", level)
+        entities.append(SwitchEntity(
+            server=self.server,
+            key=get_entity_key("agc_enabled"),
+            name="AGC Enabled",
+            object_id="agc_enabled",
+            icon="mdi:tune-vertical",
+            device_class="switch",
+            entity_category=1,  # config
+            value_getter=rc.get_agc_enabled,
+            value_setter=set_agc_enabled_with_save,
+        ))
+        entities.append(NumberEntity(
+            server=self.server,
+            key=get_entity_key("agc_max_gain"),
+            name="AGC Max Gain",
+            object_id="agc_max_gain",
+            min_value=0.0,
+            max_value=40.0,  # XVF3800 supports up to 40dB
+            step=1.0,
+            icon="mdi:volume-plus",
+            unit_of_measurement="dB",
+            mode=2,
+            entity_category=1,  # config
+            value_getter=rc.get_agc_max_gain,
+            value_setter=set_agc_max_gain_with_save,
+        ))
+        entities.append(NumberEntity(
+            server=self.server,
+            key=get_entity_key("noise_suppression"),
+            name="Noise Suppression",
+            object_id="noise_suppression",
+            min_value=0.0,
+            max_value=100.0,
+            step=1.0,
+            icon="mdi:volume-off",
+            unit_of_measurement="%",
+            mode=2,
+            entity_category=1,  # config
+            value_getter=rc.get_noise_suppression,
+            value_setter=set_noise_suppression_with_save,
+        ))
+        entities.append(BinarySensorEntity(
+            server=self.server,
+            key=get_entity_key("echo_cancellation_converged"),
+            name="Echo Cancellation Converged",
+            object_id="echo_cancellation_converged",
+            icon="mdi:waveform",
+            device_class="running",
+            entity_category=2,  # diagnostic
+            value_getter=rc.get_echo_cancellation_converged,
+        ))
+        _LOGGER.debug(
+            "Phase 12 entities registered: agc_enabled, agc_max_gain, "
+            "noise_suppression, echo_cancellation_converged"
+        )
+    def _setup_phase21_entities(self, entities: List) -> None:
+        """Setup Phase 21 entities: Continuous conversation mode."""
+        def get_continuous_conversation() -> bool:
+            """Get current continuous conversation mode state."""
+            if hasattr(self.server, 'state') and self.server.state:
+                prefs = self.server.state.preferences
+                return getattr(prefs, 'continuous_conversation', False)
+            return False
+        def set_continuous_conversation(enabled: bool) -> None:
+            """Set continuous conversation mode and save to preferences."""
+            if hasattr(self.server, 'state') and self.server.state:
+                self.server.state.preferences.continuous_conversation = enabled
+                self.server.state.save_preferences()
+                _LOGGER.info("Continuous conversation mode %s", "enabled" if enabled else "disabled")
+        entities.append(SwitchEntity(
+            server=self.server,
+            key=get_entity_key("continuous_conversation"),
+            name="Continuous Conversation",
+            object_id="continuous_conversation",
+            icon="mdi:message-reply-text",
+            device_class="switch",
+            entity_category=1,  # config
+            value_getter=get_continuous_conversation,
+            value_setter=set_continuous_conversation,
+        ))
+        _LOGGER.debug("Phase 21 entities registered: continuous_conversation")
+    def _setup_phase22_entities(self, entities: List) -> None:
+        """Setup Phase 22 entities: Gesture detection."""
+        def get_gesture() -> str:
+            """Get current detected gesture."""
+            if self.camera_server:
+                return self.camera_server.get_current_gesture()
+            return "none"
+        def get_gesture_confidence() -> float:
+            """Get gesture detection confidence."""
+            if self.camera_server:
+                return self.camera_server.get_gesture_confidence()
+            return 0.0
+        gesture_entity = TextSensorEntity(
+            server=self.server,
+            key=get_entity_key("gesture_detected"),
+            name="Gesture Detected",
+            object_id="gesture_detected",
+            icon="mdi:hand-wave",
+            value_getter=get_gesture,
+        )
+        entities.append(gesture_entity)
+        self._gesture_entity = gesture_entity
+        confidence_entity = SensorEntity(
+            server=self.server,
+            key=get_entity_key("gesture_confidence"),
+            name="Gesture Confidence",
+            object_id="gesture_confidence",
+            icon="mdi:percent",
+            unit_of_measurement="%",
+            accuracy_decimals=1,
+            state_class="measurement",
+            value_getter=get_gesture_confidence,
+        )
+        entities.append(confidence_entity)
+        self._gesture_confidence_entity = confidence_entity
+        _LOGGER.debug("Phase 22 entities registered: gesture_detected, gesture_confidence")
+    def update_gesture_state(self) -> None:
+        """Push gesture state update to Home Assistant."""
+        if hasattr(self, '_gesture_entity') and self._gesture_entity:
+            self._gesture_entity.update_state()
+        if hasattr(self, '_gesture_confidence_entity') and self._gesture_confidence_entity:
+            self._gesture_confidence_entity.update_state()
+    def find_entity_references(self, entities: List) -> None:
+        """Find and store references to special entities from existing list.
+        Args:
+            entities: The list of existing entities to search
+        """
+        # DOA entities are read-only sensors, no special references needed
+        pass

reachy_mini_ha_voice/gesture_detector.py ADDED Viewed

	@@ -0,0 +1,183 @@

+"""Gesture detection using HaGRID ONNX models."""
+from __future__ import annotations
+import logging
+from enum import Enum
+from pathlib import Path
+from typing import Optional, Tuple
+import cv2
+import numpy as np
+from numpy.typing import NDArray
+logger = logging.getLogger(__name__)
+class Gesture(Enum):
+    NONE = "no_gesture"
+    CALL = "call"
+    DISLIKE = "dislike"
+    FIST = "fist"
+    FOUR = "four"
+    LIKE = "like"
+    MUTE = "mute"
+    OK = "ok"
+    ONE = "one"
+    PALM = "palm"
+    PEACE = "peace"
+    PEACE_INVERTED = "peace_inverted"
+    ROCK = "rock"
+    STOP = "stop"
+    STOP_INVERTED = "stop_inverted"
+    THREE = "three"
+    THREE2 = "three2"
+    TWO_UP = "two_up"
+    TWO_UP_INVERTED = "two_up_inverted"
+_GESTURE_CLASSES = [
+    'hand_down', 'hand_right', 'hand_left', 'thumb_index', 'thumb_left',
+    'thumb_right', 'thumb_down', 'half_up', 'half_left', 'half_right',
+    'half_down', 'part_hand_heart', 'part_hand_heart2', 'fist_inverted',
+    'two_left', 'two_right', 'two_down', 'grabbing', 'grip', 'point',
+    'call', 'three3', 'little_finger', 'middle_finger', 'dislike', 'fist',
+    'four', 'like', 'mute', 'ok', 'one', 'palm', 'peace', 'peace_inverted',
+    'rock', 'stop', 'stop_inverted', 'three', 'three2', 'two_up',
+    'two_up_inverted', 'three_gun', 'one_left', 'one_right', 'one_down'
+]
+_NAME_TO_GESTURE = {
+    'call': Gesture.CALL, 'dislike': Gesture.DISLIKE, 'fist': Gesture.FIST,
+    'four': Gesture.FOUR, 'like': Gesture.LIKE, 'mute': Gesture.MUTE,
+    'ok': Gesture.OK, 'one': Gesture.ONE, 'palm': Gesture.PALM,
+    'peace': Gesture.PEACE, 'peace_inverted': Gesture.PEACE_INVERTED,
+    'rock': Gesture.ROCK, 'stop': Gesture.STOP,
+    'stop_inverted': Gesture.STOP_INVERTED, 'three': Gesture.THREE,
+    'three2': Gesture.THREE2, 'two_up': Gesture.TWO_UP,
+    'two_up_inverted': Gesture.TWO_UP_INVERTED,
+}
+class GestureDetector:
+    def __init__(self, confidence_threshold: float = 0.3, detection_threshold: float = 0.3):
+        self._confidence_threshold = confidence_threshold
+        self._detection_threshold = detection_threshold
+        models_dir = Path(__file__).parent / "models"
+        self._detector_path = models_dir / "hand_detector.onnx"
+        self._classifier_path = models_dir / "crops_classifier.onnx"
+        self._detector = None
+        self._classifier = None
+        self._available = False
+        self._mean = np.array([127, 127, 127], dtype=np.float32)
+        self._std = np.array([128, 128, 128], dtype=np.float32)
+        self._detector_size = (320, 240)
+        self._classifier_size = (128, 128)
+        self._load_models()
+    def _load_models(self) -> None:
+        try:
+            import onnxruntime as ort
+        except ImportError:
+            logger.warning("onnxruntime not installed")
+            return
+        if not self._detector_path.exists() or not self._classifier_path.exists():
+            logger.warning("Model files not found")
+            return
+        try:
+            providers = ['CPUExecutionProvider']
+            logger.info("Loading gesture models...")
+            self._detector = ort.InferenceSession(str(self._detector_path), providers=providers)
+            self._classifier = ort.InferenceSession(str(self._classifier_path), providers=providers)
+            self._det_input = self._detector.get_inputs()[0].name
+            self._det_outputs = [o.name for o in self._detector.get_outputs()]
+            self._cls_input = self._classifier.get_inputs()[0].name
+            self._available = True
+            logger.info("Gesture detection ready")
+        except Exception as e:
+            logger.error("Failed to load models: %s", e)
+    @property
+    def is_available(self) -> bool:
+        return self._available
+    def _preprocess(self, frame: NDArray, size: Tuple[int, int]) -> NDArray:
+        img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        img = cv2.resize(img, size)
+        img = (img.astype(np.float32) - self._mean) / self._std
+        img = np.transpose(img, [2, 0, 1])
+        return np.expand_dims(img, axis=0)
+    def _detect_hand(self, frame: NDArray) -> Optional[Tuple[int, int, int, int, float]]:
+        if self._detector is None:
+            return None
+        h, w = frame.shape[:2]
+        inp = self._preprocess(frame, self._detector_size)
+        outs = self._detector.run(self._det_outputs, {self._det_input: inp})
+        boxes = outs[0]
+        scores = outs[2]
+        if len(boxes) == 0:
+            return None
+        best_i, best_c = -1, self._detection_threshold
+        for i, c in enumerate(scores):
+            if c > best_c:
+                best_c, best_i = float(c), i
+        if best_i < 0:
+            return None
+        b = boxes[best_i]
+        # Model outputs normalized coordinates (0-1), scale to original frame size
+        x1, y1 = int(b[0] * w), int(b[1] * h)
+        x2, y2 = int(b[2] * w), int(b[3] * h)
+        x1, y1 = max(0, x1), max(0, y1)
+        x2, y2 = min(w-1, x2), min(h-1, y2)
+        if x2 <= x1 or y2 <= y1:
+            return None
+        return (x1, y1, x2, y2, best_c)
+    def _get_square_crop(self, frame: NDArray, box: Tuple[int, int, int, int]) -> NDArray:
+        h, w = frame.shape[:2]
+        x1, y1, x2, y2 = box
+        bw, bh = x2 - x1, y2 - y1
+        if bh < bw:
+            y1, y2 = y1 - (bw - bh) // 2, y1 - (bw - bh) // 2 + bw
+        elif bh > bw:
+            x1, x2 = x1 - (bh - bw) // 2, x1 - (bh - bw) // 2 + bh
+        x1, y1 = max(0, x1), max(0, y1)
+        x2, y2 = min(w-1, x2), min(h-1, y2)
+        return frame[y1:y2, x1:x2]
+    def _classify(self, crop: NDArray) -> Tuple[Gesture, float]:
+        if self._classifier is None or crop.size == 0:
+            return Gesture.NONE, 0.0
+        inp = self._preprocess(crop, self._classifier_size)
+        logits = self._classifier.run(None, {self._cls_input: inp})[0][0]
+        idx = int(np.argmax(logits))
+        exp_l = np.exp(logits - np.max(logits))
+        conf = float(exp_l[idx] / np.sum(exp_l))
+        if idx >= len(_GESTURE_CLASSES) or conf < self._confidence_threshold:
+            return Gesture.NONE, conf
+        name = _GESTURE_CLASSES[idx]
+        return _NAME_TO_GESTURE.get(name, Gesture.NONE), conf
+    def detect(self, frame: NDArray) -> Tuple[Gesture, float]:
+        if not self._available:
+            return Gesture.NONE, 0.0
+        try:
+            det = self._detect_hand(frame)
+            if det is None:
+                return Gesture.NONE, 0.0
+            x1, y1, x2, y2, det_c = det
+            logger.debug("Hand: box=(%d,%d,%d,%d) conf=%.2f", x1, y1, x2, y2, det_c)
+            crop = self._get_square_crop(frame, (x1, y1, x2, y2))
+            if crop.size == 0:
+                return Gesture.NONE, 0.0
+            gest, cls_c = self._classify(crop)
+            if gest != Gesture.NONE:
+                logger.debug("Gesture: %s (det=%.2f cls=%.2f)", gest.value, det_c, cls_c)
+            return gest, det_c * cls_c
+        except Exception as e:
+            logger.warning("Gesture error: %s", e)
+            return Gesture.NONE, 0.0
+    def close(self) -> None:
+        self._detector = self._classifier = None
+        self._available = False

{reachy_mini_home_assistant/vision → reachy_mini_ha_voice}/head_tracker.py RENAMED Viewed

@@ -3,35 +3,24 @@
 Ported from reachy_mini_conversation_app for voice assistant integration.
 Model is loaded at initialization time (not lazy) to ensure face tracking
 is ready immediately when the camera server starts.
-Performance Optimizations:
-- Optional frame downscaling for faster inference on low-power devices
-- Frame skip support for reduced CPU usage when tracking is stable
-- Configurable inference resolution (default: native resolution)
 """
 from __future__ import annotations
 import logging
-from typing import TYPE_CHECKING
 import numpy as np
-if TYPE_CHECKING:
-    from numpy.typing import NDArray
 logger = logging.getLogger(__name__)
 class HeadTracker:
     """Lightweight head tracker using YOLO for face detection.
     Model is loaded at initialization time to ensure face tracking
     is ready immediately (matching conversation_app behavior).
-    Performance Features:
-    - Frame downscaling: Reduces inference resolution for ~4x speedup
-    - Frame skipping: Reuses last detection result for stable tracking
     """
     def __init__(
@@ -40,7 +29,6 @@ class HeadTracker:
         model_filename: str = "model.pt",
         confidence_threshold: float = 0.3,
         device: str = "cpu",
-        inference_scale: float = 1.0,  # Scale factor for inference (0.5 = half resolution)
     ) -> None:
         """Initialize YOLO-based head tracker.
@@ -49,7 +37,6 @@ class HeadTracker:
             model_filename: Model file name
             confidence_threshold: Minimum confidence for face detection
             device: Device to run inference on ('cpu' or 'cuda')
-            inference_scale: Scale factor for inference (0.5 = half res for ~4x speedup)
         """
         self.confidence_threshold = confidence_threshold
         self.model = None
@@ -58,57 +45,57 @@ class HeadTracker:
         self._device = device
         self._detections_class = None
         self._model_load_attempted = False
-        self._model_load_error: str | None = None
-        # Performance optimization settings
-        self._inference_scale = min(1.0, max(0.25, inference_scale))
-        # Frame skip support for stable tracking
-        self._last_detection: tuple[NDArray, float] | None = None
-        self._frames_since_detection = 0
-        self._max_skip_frames = 0  # 0 = no skipping (can be set externally)
         # Load model immediately at init (not lazy)
         self._load_model()
     def _load_model(self) -> None:
-        """Load YOLO model for face detection."""
         if self._model_load_attempted:
             return
         self._model_load_attempted = True
         try:
-            from pathlib import Path
-            from supervision import Detections
             from ultralytics import YOLO
             self._detections_class = Detections
-            # Load local model from models directory
-            models_dir = Path(__file__).resolve().parents[1] / "models"
-            local_model_path = models_dir / self._model_filename
-            if not local_model_path.exists():
-                raise FileNotFoundError(
-                    f"Model file not found: {local_model_path}. "
-                    f"Please place {self._model_filename} in the models directory."
-                )
-            model_path = str(local_model_path)
-            logger.info("Loading local YOLO model: %s", model_path)
             self.model = YOLO(model_path).to(self._device)
-            logger.info("YOLO face detection model loaded successfully")
         except ImportError as e:
             self._model_load_error = f"Missing dependencies: {e}"
             logger.warning("Face tracking disabled - missing dependencies: %s", e)
             self.model = None
-        except FileNotFoundError as e:
-            self._model_load_error = str(e)
-            logger.error("Failed to load YOLO model: %s", e)
-            self.model = None
         except Exception as e:
             self._model_load_error = str(e)
             logger.error("Failed to load YOLO model: %s", e)
@@ -119,7 +106,7 @@ class HeadTracker:
         """Check if the head tracker is available and ready."""
         return self.model is not None and self._detections_class is not None
-    def _select_best_face(self, detections) -> int | None:
         """Select the best face based on confidence and area.
         Args:
@@ -152,7 +139,9 @@ class HeadTracker:
         best_idx = valid_indices[np.argmax(scores)]
         return int(best_idx)
-    def _bbox_to_normalized_coords(self, bbox: NDArray[np.float32], w: int, h: int) -> NDArray[np.float32]:
         """Convert bounding box center to normalized coordinates [-1, 1].
         Args:
@@ -172,7 +161,9 @@ class HeadTracker:
         return np.array([norm_x, norm_y], dtype=np.float32)
-    def get_head_position(self, img: NDArray[np.uint8]) -> tuple[NDArray[np.float32] | None, float | None]:
         """Get head position from face detection.
         Args:
@@ -186,36 +177,14 @@ class HeadTracker:
         h, w = img.shape[:2]
-        # Frame skip optimization: return last detection if within skip limit
-        if (
-            self._max_skip_frames > 0
-            and self._last_detection is not None
-            and self._frames_since_detection < self._max_skip_frames
-        ):
-            self._frames_since_detection += 1
-            return self._last_detection
         try:
-            # Downscale image for faster inference if scale < 1.0
-            if self._inference_scale < 1.0:
-                import cv2
-                new_w = int(w * self._inference_scale)
-                new_h = int(h * self._inference_scale)
-                inference_img = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
-            else:
-                inference_img = img
-                new_w, new_h = w, h
             # Run YOLO inference
-            results = self.model(inference_img, verbose=False)
             detections = self._detections_class.from_ultralytics(results[0])
             # Select best face
             face_idx = self._select_best_face(detections)
             if face_idx is None:
-                self._last_detection = None
-                self._frames_since_detection = 0
                 return None, None
             bbox = detections.xyxy[face_idx]
@@ -223,90 +192,11 @@ class HeadTracker:
             if detections.confidence is not None:
                 confidence = float(detections.confidence[face_idx])
-            # Scale bbox back to original resolution if downscaled
-            if self._inference_scale < 1.0:
-                scale_factor = 1.0 / self._inference_scale
-                bbox = bbox * scale_factor
-            # Get face center in [-1, 1] coordinates (using original dimensions)
             face_center = self._bbox_to_normalized_coords(bbox, w, h)
-            # Cache result for frame skipping
-            self._last_detection = (face_center, confidence)
-            self._frames_since_detection = 0
             return face_center, confidence
         except Exception as e:
             logger.debug("Error in head position detection: %s", e)
             return None, None
-    def set_inference_scale(self, scale: float) -> None:
-        """Set the inference resolution scale factor.
-        Args:
-            scale: Scale factor (0.25 to 1.0). Lower = faster but less accurate.
-        """
-        self._inference_scale = min(1.0, max(0.25, scale))
-        logger.debug("Inference scale set to %.2f", self._inference_scale)
-    def set_max_skip_frames(self, skip: int) -> None:
-        """Set maximum frames to skip between detections.
-        Args:
-            skip: Number of frames to skip (0 = no skipping).
-                  Higher values reduce CPU but may cause tracking lag.
-        """
-        self._max_skip_frames = max(0, skip)
-        logger.debug("Max skip frames set to %d", self._max_skip_frames)
-    def clear_detection_cache(self) -> None:
-        """Clear cached detection result."""
-        self._last_detection = None
-        self._frames_since_detection = 0
-    def suspend(self) -> None:
-        """Suspend the head tracker to release YOLO model from memory.
-        Call resume() to reload the model.
-        """
-        if self.model is None:
-            logger.debug("HeadTracker model not loaded, nothing to suspend")
-            return
-        logger.info("Suspending HeadTracker - releasing YOLO model...")
-        try:
-            # Release YOLO model from memory
-            del self.model
-            self.model = None
-            # Also clear the detections class reference
-            self._detections_class = None
-            # Reset load state so resume can reload
-            self._model_load_attempted = False
-            self._model_load_error = None
-            # Clear detection cache
-            self.clear_detection_cache()
-            logger.info("HeadTracker suspended - YOLO model released")
-        except Exception as e:
-            logger.warning("Error suspending HeadTracker: %s", e)
-    def resume(self) -> None:
-        """Resume the head tracker by reloading the YOLO model."""
-        if self.model is not None:
-            logger.debug("HeadTracker model already loaded")
-            return
-        logger.info("Resuming HeadTracker - reloading YOLO model...")
-        # Reload the model
-        self._load_model()
-        if self.is_available:
-            logger.info("HeadTracker resumed - YOLO model loaded")
-        else:
-            logger.warning("HeadTracker resume failed - model not available")

 Ported from reachy_mini_conversation_app for voice assistant integration.
 Model is loaded at initialization time (not lazy) to ensure face tracking
 is ready immediately when the camera server starts.
 """
 from __future__ import annotations
 import logging
+from typing import Tuple, Optional
 import numpy as np
+from numpy.typing import NDArray
 logger = logging.getLogger(__name__)
 class HeadTracker:
     """Lightweight head tracker using YOLO for face detection.
     Model is loaded at initialization time to ensure face tracking
     is ready immediately (matching conversation_app behavior).
     """
     def __init__(
         model_filename: str = "model.pt",
         confidence_threshold: float = 0.3,
         device: str = "cpu",
     ) -> None:
         """Initialize YOLO-based head tracker.
             model_filename: Model file name
             confidence_threshold: Minimum confidence for face detection
             device: Device to run inference on ('cpu' or 'cuda')
         """
         self.confidence_threshold = confidence_threshold
         self.model = None
         self._device = device
         self._detections_class = None
         self._model_load_attempted = False
+        self._model_load_error: Optional[str] = None
         # Load model immediately at init (not lazy)
         self._load_model()
     def _load_model(self) -> None:
+        """Load YOLO model with retry logic."""
         if self._model_load_attempted:
             return
         self._model_load_attempted = True
         try:
             from ultralytics import YOLO
+            from supervision import Detections
+            from huggingface_hub import hf_hub_download
+            import time
             self._detections_class = Detections
+            # Download with retries
+            max_retries = 3
+            retry_delay = 5
+            model_path = None
+            last_error = None
+            for attempt in range(max_retries):
+                try:
+                    model_path = hf_hub_download(
+                        repo_id=self._model_repo,
+                        filename=self._model_filename,
+                    )
+                    break
+                except Exception as e:
+                    last_error = e
+                    if attempt < max_retries - 1:
+                        logger.warning(
+                            "Model download failed (attempt %d/%d): %s. Retrying in %ds...",
+                            attempt + 1, max_retries, e, retry_delay
+                        )
+                        time.sleep(retry_delay)
+            if model_path is None:
+                raise last_error
             self.model = YOLO(model_path).to(self._device)
+            logger.info("YOLO face detection model loaded")
         except ImportError as e:
             self._model_load_error = f"Missing dependencies: {e}"
             logger.warning("Face tracking disabled - missing dependencies: %s", e)
             self.model = None
         except Exception as e:
             self._model_load_error = str(e)
             logger.error("Failed to load YOLO model: %s", e)
         """Check if the head tracker is available and ready."""
         return self.model is not None and self._detections_class is not None
+    def _select_best_face(self, detections) -> Optional[int]:
         """Select the best face based on confidence and area.
         Args:
         best_idx = valid_indices[np.argmax(scores)]
         return int(best_idx)
+    def _bbox_to_normalized_coords(
+        self, bbox: NDArray[np.float32], w: int, h: int
+    ) -> NDArray[np.float32]:
         """Convert bounding box center to normalized coordinates [-1, 1].
         Args:
         return np.array([norm_x, norm_y], dtype=np.float32)
+    def get_head_position(
+        self, img: NDArray[np.uint8]
+    ) -> Tuple[Optional[NDArray[np.float32]], Optional[float]]:
         """Get head position from face detection.
         Args:
         h, w = img.shape[:2]
         try:
             # Run YOLO inference
+            results = self.model(img, verbose=False)
             detections = self._detections_class.from_ultralytics(results[0])
             # Select best face
             face_idx = self._select_best_face(detections)
             if face_idx is None:
                 return None, None
             bbox = detections.xyxy[face_idx]
             if detections.confidence is not None:
                 confidence = float(detections.confidence[face_idx])
+            # Get face center in [-1, 1] coordinates
             face_center = self._bbox_to_normalized_coords(bbox, w, h)
             return face_center, confidence
         except Exception as e:
             logger.debug("Error in head position detection: %s", e)
             return None, None

{reachy_mini_home_assistant → reachy_mini_ha_voice}/main.py RENAMED Viewed

@@ -7,14 +7,48 @@ with Home Assistant via ESPHome protocol for voice control.
 import asyncio
 import logging
-import sys
 import threading
-from reachy_mini import ReachyMiniApp
-from .voice_assistant import VoiceAssistantService
-logger = logging.getLogger(__name__)
 class ReachyMiniHaVoice(ReachyMiniApp):
@@ -27,40 +61,60 @@ class ReachyMiniHaVoice(ReachyMiniApp):
     """
     # No custom web UI needed - configuration is automatic via Home Assistant
-    custom_app_url: str | None = None
     def __init__(self, *args, **kwargs):
         """Initialize the app."""
         super().__init__(*args, **kwargs)
-        self.stop_event = threading.Event()
     def wrapped_run(self, *args, **kwargs) -> None:
         """
-        Override wrapped_run to handle Reachy Mini connection failures.
         """
         logger.info("Starting Reachy Mini HA Voice App...")
-        # Connect to ReachyMini
-        try:
-            logger.info("Attempting to connect to Reachy Mini...")
-            super().wrapped_run(*args, **kwargs)
-        except TimeoutError as e:
-            logger.error(f"Timeout connecting to Reachy Mini: {e}")
-            sys.exit(1)
-        except Exception as e:
-            error_str = str(e)
-            if "Unable to connect" in error_str or "Timeout" in error_str:
-                logger.error(f"Failed to connect to Reachy Mini: {e}")
-                sys.exit(1)
-            else:
-                raise
     def run(self, reachy_mini, stop_event: threading.Event) -> None:
         """
         Main application entry point.
         Args:
-            reachy_mini: The Reachy Mini robot instance (required, cannot be None)
             stop_event: Event to signal graceful shutdown
         """
         logger.info("Starting Reachy Mini for Home Assistant...")
@@ -82,8 +136,12 @@ class ReachyMiniHaVoice(ReachyMiniApp):
             logger.info("ESPHome Server: 0.0.0.0:6053")
             logger.info("Camera Server: 0.0.0.0:8081")
             logger.info("Wake word: Okay Nabu")
-            logger.info("Motion control: enabled")
-            logger.info("Camera: enabled (Reachy Mini)")
             logger.info("=" * 50)
             logger.info("To connect from Home Assistant:")
             logger.info("  Settings -> Devices & Services -> Add Integration")
@@ -120,19 +178,13 @@ class ReachyMiniHaVoice(ReachyMiniApp):
             logger.info("Reachy Mini HA stopped.")
-# This is called when running as: python -m reachy_mini_home_assistant.main
 if __name__ == "__main__":
     logging.basicConfig(
         level=logging.INFO,
         format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
     )
-    # Reduce verbosity for some noisy modules
-    logging.getLogger("reachy_mini.media.media_manager").setLevel(logging.WARNING)
-    logging.getLogger("reachy_mini.media.camera_base").setLevel(logging.WARNING)
-    logging.getLogger("reachy_mini.media.audio_base").setLevel(logging.WARNING)
-    logging.getLogger("matplotlib").setLevel(logging.WARNING)
     app = ReachyMiniHaVoice()
     try:
         app.wrapped_run()

 import asyncio
 import logging
+import socket
 import threading
+import time
+from typing import Optional
+logger = logging.getLogger(__name__)
+def _check_zenoh_available(timeout: float = 1.0) -> bool:
+    """Check if Zenoh service is available."""
+    try:
+        with socket.create_connection(("127.0.0.1", 7447), timeout=timeout):
+            return True
+    except (socket.timeout, ConnectionRefusedError, OSError):
+        return False
+# Only import ReachyMiniApp if we're running as an app
+try:
+    from reachy_mini import ReachyMini, ReachyMiniApp
+    REACHY_MINI_AVAILABLE = True
+except ImportError:
+    REACHY_MINI_AVAILABLE = False
+    # Create a dummy base class
+    class ReachyMiniApp:
+        custom_app_url = None
+        def __init__(self):
+            self.stop_event = threading.Event()
+        def wrapped_run(self, *args, **kwargs):
+            pass
+        def stop(self):
+            self.stop_event.set()
+    ReachyMini = None
+from .voice_assistant import VoiceAssistantService
+from .motion import ReachyMiniMotion
 class ReachyMiniHaVoice(ReachyMiniApp):
     """
     # No custom web UI needed - configuration is automatic via Home Assistant
+    custom_app_url: Optional[str] = None
     def __init__(self, *args, **kwargs):
         """Initialize the app."""
         super().__init__(*args, **kwargs)
+        if not hasattr(self, 'stop_event'):
+            self.stop_event = threading.Event()
     def wrapped_run(self, *args, **kwargs) -> None:
         """
+        Override wrapped_run to handle Zenoh connection failures gracefully.
+        If Zenoh is not available, run in standalone mode without robot control.
         """
         logger.info("Starting Reachy Mini HA Voice App...")
+        # Check if Zenoh is available before trying to connect
+        if not _check_zenoh_available():
+            logger.warning("Zenoh service not available (port 7447)")
+            logger.info("Running in standalone mode without robot control")
+            self._run_standalone()
+            return
+        # Zenoh is available, try normal startup with ReachyMini
+        if REACHY_MINI_AVAILABLE:
+            try:
+                logger.info("Attempting to connect to Reachy Mini...")
+                super().wrapped_run(*args, **kwargs)
+            except TimeoutError as e:
+                logger.warning(f"Timeout connecting to Reachy Mini: {e}")
+                logger.info("Falling back to standalone mode")
+                self._run_standalone()
+            except Exception as e:
+                error_str = str(e)
+                if "Unable to connect" in error_str or "ZError" in error_str or "Timeout" in error_str:
+                    logger.warning(f"Failed to connect to Reachy Mini: {e}")
+                    logger.info("Falling back to standalone mode")
+                    self._run_standalone()
+                else:
+                    raise
+        else:
+            logger.info("Reachy Mini SDK not available, running standalone")
+            self._run_standalone()
+    def _run_standalone(self) -> None:
+        """Run in standalone mode without robot."""
+        self.run(None, self.stop_event)
     def run(self, reachy_mini, stop_event: threading.Event) -> None:
         """
         Main application entry point.
         Args:
+            reachy_mini: The Reachy Mini robot instance (can be None)
             stop_event: Event to signal graceful shutdown
         """
         logger.info("Starting Reachy Mini for Home Assistant...")
             logger.info("ESPHome Server: 0.0.0.0:6053")
             logger.info("Camera Server: 0.0.0.0:8081")
             logger.info("Wake word: Okay Nabu")
+            if reachy_mini:
+                logger.info("Motion control: enabled")
+                logger.info("Camera: enabled (Reachy Mini)")
+            else:
+                logger.info("Motion control: disabled (no robot)")
+                logger.info("Camera: test pattern (no robot)")
             logger.info("=" * 50)
             logger.info("To connect from Home Assistant:")
             logger.info("  Settings -> Devices & Services -> Add Integration")
             logger.info("Reachy Mini HA stopped.")
+# This is called when running as: python -m reachy_mini_ha_voice.main
 if __name__ == "__main__":
     logging.basicConfig(
         level=logging.INFO,
         format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
     )
     app = ReachyMiniHaVoice()
     try:
         app.wrapped_run()

{reachy_mini_home_assistant → reachy_mini_ha_voice}/models.py RENAMED Viewed

@@ -5,18 +5,15 @@ import logging
 from dataclasses import asdict, dataclass, field
 from enum import Enum
 from pathlib import Path
-from typing import TYPE_CHECKING
 if TYPE_CHECKING:
-    import threading
-    from queue import Queue
     from pymicro_wakeword import MicroWakeWord
     from pyopen_wakeword import OpenWakeWord
-    from .audio.audio_player import AudioPlayer
-    from .entities.entity import ESPHomeEntity, MediaPlayerEntity
-    from .protocol.satellite import VoiceSatelliteProtocol
 _LOGGER = logging.getLogger(__name__)
@@ -31,21 +28,18 @@ class AvailableWakeWord:
     id: str
     type: WakeWordType
     wake_word: str
-    trained_languages: list[str]
     wake_word_path: Path
-    probability_cutoff: float = 0.7
-    def load(self) -> "MicroWakeWord | OpenWakeWord":
         if self.type == WakeWordType.MICRO_WAKE_WORD:
             from pymicro_wakeword import MicroWakeWord
             return MicroWakeWord.from_config(config_path=self.wake_word_path)
         if self.type == WakeWordType.OPEN_WAKE_WORD:
             from pyopen_wakeword import OpenWakeWord
             oww_model = OpenWakeWord.from_model(model_path=self.wake_word_path)
-            oww_model.wake_word = self.wake_word
             return oww_model
         raise ValueError(f"Unexpected wake word type: {self.type}")
@@ -53,34 +47,25 @@ class AvailableWakeWord:
 @dataclass
 class Preferences:
-    active_wake_words: list[str] = field(default_factory=list)
     # Continuous conversation mode (controlled from Home Assistant)
     continuous_conversation: bool = False
-    # Unified idle behavior toggle (controlled from Home Assistant)
-    idle_behavior_enabled: bool = False
-    # Sendspin discovery and playback toggle (controlled from Home Assistant)
-    sendspin_enabled: bool = False
-    # Vision toggles and parameters (controlled from Home Assistant)
-    face_tracking_enabled: bool = False
-    gesture_detection_enabled: bool = False
-    face_confidence_threshold: float = 0.5
-    def set_idle_behavior_enabled(self, enabled: bool) -> None:
-        """Update the unified idle behavior toggle."""
-        self.idle_behavior_enabled = enabled
 @dataclass
 class ServerState:
     """Global server state."""
     name: str
     mac_address: str
-    audio_queue: "Queue[bytes | None]"
-    entities: "list[ESPHomeEntity]"
-    available_wake_words: "dict[str, AvailableWakeWord]"
-    wake_words: "dict[str, MicroWakeWord | OpenWakeWord]"
-    active_wake_words: set[str]
     stop_word: "MicroWakeWord"
     music_player: "AudioPlayer"
     tts_player: "AudioPlayer"
@@ -91,88 +76,20 @@ class ServerState:
     download_dir: Path
     # Reachy Mini specific
-    reachy_mini: object
     motion_enabled: bool = True
-    motion: object | None = None  # ReachyMiniMotion instance
-    media_player_entity: "MediaPlayerEntity | None" = None
-    satellite: "VoiceSatelliteProtocol | None" = None
     wake_words_changed: bool = False
     refractory_seconds: float = 2.0
-    timer_max_ring_seconds: float = 900.0
-    _entities_initialized: bool = False
-    _services_suspended: bool = False
-    # Mute state (controlled from Home Assistant) - thread-safe via properties
-    _is_muted: bool = False
-    # Camera state (controlled from Home Assistant) - thread-safe via properties
-    _camera_enabled: bool = True
-    # Thread safety
-    _state_lock: "threading.Lock | None" = None
-    def __post_init__(self):
-        """Initialize state lock after dataclass creation."""
-        import threading
-        object.__setattr__(self, "_state_lock", threading.Lock())
-    @property
-    def services_suspended(self) -> bool:
-        """Thread-safe getter for services_suspended."""
-        if self._state_lock is None:
-            return self._services_suspended
-        with self._state_lock:
-            return self._services_suspended
-    @services_suspended.setter
-    def services_suspended(self, value: bool) -> None:
-        """Thread-safe setter for services_suspended."""
-        if self._state_lock is None:
-            object.__setattr__(self, "_services_suspended", value)
-        else:
-            with self._state_lock:
-                object.__setattr__(self, "_services_suspended", value)
-    @property
-    def is_muted(self) -> bool:
-        """Thread-safe getter for is_muted."""
-        if self._state_lock is None:
-            return self._is_muted
-        with self._state_lock:
-            return self._is_muted
-    @is_muted.setter
-    def is_muted(self, value: bool) -> None:
-        """Thread-safe setter for is_muted."""
-        if self._state_lock is None:
-            object.__setattr__(self, "_is_muted", value)
-        else:
-            with self._state_lock:
-                object.__setattr__(self, "_is_muted", value)
-    @property
-    def camera_enabled(self) -> bool:
-        """Thread-safe getter for camera_enabled."""
-        if self._state_lock is None:
-            return self._camera_enabled
-        with self._state_lock:
-            return self._camera_enabled
-    @camera_enabled.setter
-    def camera_enabled(self, value: bool) -> None:
-        """Thread-safe setter for camera_enabled."""
-        if self._state_lock is None:
-            object.__setattr__(self, "_camera_enabled", value)
-        else:
-            with self._state_lock:
-                object.__setattr__(self, "_camera_enabled", value)
     def save_preferences(self) -> None:
         """Save preferences as JSON."""
         _LOGGER.debug("Saving preferences: %s", self.preferences_path)
         self.preferences_path.parent.mkdir(parents=True, exist_ok=True)
         with open(self.preferences_path, "w", encoding="utf-8") as preferences_file:
-            json.dump(asdict(self.preferences), preferences_file, ensure_ascii=False, indent=4)

 from dataclasses import asdict, dataclass, field
 from enum import Enum
 from pathlib import Path
+from queue import Queue
+from typing import TYPE_CHECKING, Dict, List, Optional, Set, Union
 if TYPE_CHECKING:
     from pymicro_wakeword import MicroWakeWord
     from pyopen_wakeword import OpenWakeWord
+    from .entity import ESPHomeEntity, MediaPlayerEntity
+    from .audio_player import AudioPlayer
+    from .satellite import VoiceSatelliteProtocol
 _LOGGER = logging.getLogger(__name__)
     id: str
     type: WakeWordType
     wake_word: str
+    trained_languages: List[str]
     wake_word_path: Path
+    def load(self) -> "Union[MicroWakeWord, OpenWakeWord]":
         if self.type == WakeWordType.MICRO_WAKE_WORD:
             from pymicro_wakeword import MicroWakeWord
             return MicroWakeWord.from_config(config_path=self.wake_word_path)
         if self.type == WakeWordType.OPEN_WAKE_WORD:
             from pyopen_wakeword import OpenWakeWord
             oww_model = OpenWakeWord.from_model(model_path=self.wake_word_path)
+            setattr(oww_model, "wake_word", self.wake_word)
             return oww_model
         raise ValueError(f"Unexpected wake word type: {self.type}")
 @dataclass
 class Preferences:
+    active_wake_words: List[str] = field(default_factory=list)
+    # Audio processing settings (persisted from Home Assistant)
+    agc_enabled: Optional[bool] = None  # None = use hardware default
+    agc_max_gain: Optional[float] = None  # None = use hardware default
+    noise_suppression: Optional[float] = None  # None = use hardware default
     # Continuous conversation mode (controlled from Home Assistant)
     continuous_conversation: bool = False
 @dataclass
 class ServerState:
     """Global server state."""
     name: str
     mac_address: str
+    audio_queue: "Queue[Optional[bytes]]"
+    entities: "List[ESPHomeEntity]"
+    available_wake_words: "Dict[str, AvailableWakeWord]"
+    wake_words: "Dict[str, Union[MicroWakeWord, OpenWakeWord]]"
+    active_wake_words: Set[str]
     stop_word: "MicroWakeWord"
     music_player: "AudioPlayer"
     tts_player: "AudioPlayer"
     download_dir: Path
     # Reachy Mini specific
+    reachy_mini: Optional[object] = None
     motion_enabled: bool = True
+    motion: Optional[object] = None  # ReachyMiniMotion instance
+    media_player_entity: "Optional[MediaPlayerEntity]" = None
+    satellite: "Optional[VoiceSatelliteProtocol]" = None
     wake_words_changed: bool = False
     refractory_seconds: float = 2.0
     def save_preferences(self) -> None:
         """Save preferences as JSON."""
         _LOGGER.debug("Saving preferences: %s", self.preferences_path)
         self.preferences_path.parent.mkdir(parents=True, exist_ok=True)
         with open(self.preferences_path, "w", encoding="utf-8") as preferences_file:
+            json.dump(
+                asdict(self.preferences), preferences_file, ensure_ascii=False, indent=4
+            )

{reachy_mini_home_assistant → reachy_mini_ha_voice}/models/crops_classifier.onnx RENAMED Viewed

File without changes

{reachy_mini_home_assistant → reachy_mini_ha_voice}/models/hand_detector.onnx RENAMED Viewed

File without changes

reachy_mini_home_assistant/motion/reachy_motion.py → reachy_mini_ha_voice/motion.py RENAMED Viewed

@@ -5,6 +5,7 @@ MovementManager for unified 5Hz control with face tracking.
 """
 import logging
 from .movement_manager import MovementManager, RobotState
@@ -18,28 +19,31 @@ class ReachyMiniMotion:
     to the MovementManager which handles them in its 5Hz control loop.
     """
-    def __init__(self, reachy_mini):
         self.reachy_mini = reachy_mini
-        self._movement_manager: MovementManager | None = None
         self._camera_server = None  # Reference to camera server for face tracking control
         self._is_speaking = False
         _LOGGER.debug("ReachyMiniMotion.__init__ called with reachy_mini=%s", reachy_mini)
-        # Initialize movement manager
-        try:
-            self._movement_manager = MovementManager(reachy_mini)
-            _LOGGER.debug("MovementManager created successfully")
-        except Exception as e:
-            _LOGGER.error("Failed to create MovementManager: %s", e, exc_info=True)
-            self._movement_manager = None
     def set_reachy_mini(self, reachy_mini):
         """Set the Reachy Mini instance."""
         self.reachy_mini = reachy_mini
-        if self._movement_manager is None:
             self._movement_manager = MovementManager(reachy_mini)
-        else:
             self._movement_manager.robot = reachy_mini
     def set_camera_server(self, camera_server):
@@ -68,7 +72,7 @@ class ReachyMiniMotion:
             _LOGGER.info("Motion control stopped")
     @property
-    def movement_manager(self) -> MovementManager | None:
         """Get the movement manager instance."""
         return self._movement_manager
@@ -164,31 +168,13 @@ class ReachyMiniMotion:
         self._is_speaking = False
         self._movement_manager.set_state(RobotState.IDLE)
-        if self._movement_manager.get_idle_behavior_enabled():
-            self._movement_manager.reset_to_neutral(duration=2.0)
-        else:
-            self._movement_manager.transition_to_idle_rest(duration=2.0)
         # Note: Face tracking remains enabled for continuous tracking
         # This allows the robot to always look at the user when they approach
         _LOGGER.debug("Reachy Mini: Idle pose")
-    def on_pause_motion(self):
-        """Called when motion should settle immediately.
-        Used for zero-config gesture reactions such as the palm gesture.
-        The robot smoothly returns to a neutral pose and then resumes its
-        normal idle behavior.
-        """
-        if self._movement_manager is None:
-            return
-        self._is_speaking = False
-        self._movement_manager.reset_to_neutral(duration=0.6)
-        self._movement_manager.set_state(RobotState.IDLE)
-        _LOGGER.debug("Reachy Mini: Motion paused to neutral idle")
     def on_timer_finished(self):
         """Called when a timer finishes - alert animation.

 """
 import logging
+from typing import Optional
 from .movement_manager import MovementManager, RobotState
     to the MovementManager which handles them in its 5Hz control loop.
     """
+    def __init__(self, reachy_mini=None):
         self.reachy_mini = reachy_mini
+        self._movement_manager: Optional[MovementManager] = None
         self._camera_server = None  # Reference to camera server for face tracking control
         self._is_speaking = False
         _LOGGER.debug("ReachyMiniMotion.__init__ called with reachy_mini=%s", reachy_mini)
+        # Initialize movement manager if robot is available
+        if reachy_mini is not None:
+            try:
+                self._movement_manager = MovementManager(reachy_mini)
+                _LOGGER.debug("MovementManager created successfully")
+            except Exception as e:
+                _LOGGER.error("Failed to create MovementManager: %s", e, exc_info=True)
+                self._movement_manager = None
+        else:
+            _LOGGER.debug("reachy_mini is None, MovementManager not created")
     def set_reachy_mini(self, reachy_mini):
         """Set the Reachy Mini instance."""
         self.reachy_mini = reachy_mini
+        if reachy_mini is not None and self._movement_manager is None:
             self._movement_manager = MovementManager(reachy_mini)
+        elif reachy_mini is not None and self._movement_manager is not None:
             self._movement_manager.robot = reachy_mini
     def set_camera_server(self, camera_server):
             _LOGGER.info("Motion control stopped")
     @property
+    def movement_manager(self) -> Optional[MovementManager]:
         """Get the movement manager instance."""
         return self._movement_manager
         self._is_speaking = False
         self._movement_manager.set_state(RobotState.IDLE)
+        self._movement_manager.reset_to_neutral(duration=0.5)
         # Note: Face tracking remains enabled for continuous tracking
         # This allows the robot to always look at the user when they approach
         _LOGGER.debug("Reachy Mini: Idle pose")
     def on_timer_finished(self):
         """Called when a timer finishes - alert animation.

reachy_mini_ha_voice/movement_manager.py ADDED Viewed

	@@ -0,0 +1,861 @@

+"""
+Unified Movement Manager for Reachy Mini.
+This module provides a centralized control system for robot movements,
+inspired by the reachy_mini_conversation_app architecture.
+Key features:
+- Single 10Hz control loop (balanced between responsiveness and stability)
+- Command queue pattern (thread-safe external API)
+- Error throttling (prevents log explosion)
+- JSON-driven animation system (conversation state animations)
+- Graceful shutdown
+- Pose change detection (skip sending if no significant change)
+- Robust connection recovery (faster reconnection attempts)
+- Proper pose composition using SDK's compose_world_offset (same as conversation_app)
+- Antenna freeze during listening mode with smooth blend back
+"""
+import logging
+import math
+import threading
+import time
+from dataclasses import dataclass, field
+from enum import Enum
+from queue import Queue, Empty
+from typing import Any, Callable, Dict, List, Optional, Tuple, TYPE_CHECKING
+import numpy as np
+from scipy.spatial.transform import Rotation as R
+if TYPE_CHECKING:
+    from reachy_mini import ReachyMini
+logger = logging.getLogger(__name__)
+# Import SDK utilities for pose composition (same as conversation_app)
+try:
+    from reachy_mini.utils import create_head_pose
+    from reachy_mini.utils.interpolation import compose_world_offset
+    SDK_UTILS_AVAILABLE = True
+except ImportError:
+    SDK_UTILS_AVAILABLE = False
+    logger.warning("SDK utils not available, using fallback pose composition")
+# Import animation player
+from .animation_player import AnimationPlayer
+# =============================================================================
+# Constants
+# =============================================================================
+# Control loop frequency - daemon now supports higher rates
+CONTROL_LOOP_FREQUENCY_HZ = 100  # 100Hz control loop (same as conversation_app)
+TARGET_PERIOD = 1.0 / CONTROL_LOOP_FREQUENCY_HZ
+# Antenna freeze parameters (listening mode)
+ANTENNA_BLEND_DURATION = 0.5  # Seconds to blend back from frozen state
+# State to animation mapping
+STATE_ANIMATION_MAP = {
+    "idle": "idle",
+    "listening": "listening",
+    "thinking": "thinking",
+    "speaking": "speaking",
+}
+class RobotState(Enum):
+    """Robot state machine states."""
+    IDLE = "idle"
+    LISTENING = "listening"
+    THINKING = "thinking"
+    SPEAKING = "speaking"
+@dataclass
+class MovementState:
+    """Internal movement state (only modified by control loop)."""
+    # Current robot state
+    robot_state: RobotState = RobotState.IDLE
+    # Animation offsets (from AnimationPlayer)
+    anim_pitch: float = 0.0
+    anim_yaw: float = 0.0
+    anim_roll: float = 0.0
+    anim_x: float = 0.0
+    anim_y: float = 0.0
+    anim_z: float = 0.0
+    anim_antenna_left: float = 0.0
+    anim_antenna_right: float = 0.0
+    # Speech sway offsets (from audio analysis)
+    sway_pitch: float = 0.0
+    sway_yaw: float = 0.0
+    sway_roll: float = 0.0
+    sway_x: float = 0.0
+    sway_y: float = 0.0
+    sway_z: float = 0.0
+    # Target pose (from actions)
+    target_pitch: float = 0.0
+    target_yaw: float = 0.0
+    target_roll: float = 0.0
+    target_x: float = 0.0
+    target_y: float = 0.0
+    target_z: float = 0.0
+    target_antenna_left: float = 0.0
+    target_antenna_right: float = 0.0
+    target_body_yaw: float = 0.0
+    # Timing
+    last_activity_time: float = 0.0
+    idle_start_time: float = 0.0
+    # Antenna freeze state (listening mode)
+    antenna_frozen: bool = False
+    frozen_antenna_left: float = 0.0
+    frozen_antenna_right: float = 0.0
+    antenna_blend: float = 1.0  # 0=frozen, 1=normal
+    antenna_blend_start_time: float = 0.0
+@dataclass
+class PendingAction:
+    """A pending motion action."""
+    name: str
+    target_pitch: float = 0.0
+    target_yaw: float = 0.0
+    target_roll: float = 0.0
+    target_x: float = 0.0
+    target_y: float = 0.0
+    target_z: float = 0.0
+    duration: float = 0.5
+    callback: Optional[Callable] = None
+class MovementManager:
+    """
+    Unified movement manager with 10Hz control loop.
+    All external interactions go through the command queue,
+    ensuring thread safety and preventing race conditions.
+    Note: Frequency reduced from 100Hz to 10Hz to prevent daemon crashes
+    caused by excessive Zenoh message traffic.
+    """
+    def __init__(self, reachy_mini: Optional["ReachyMini"] = None):
+        self.robot = reachy_mini
+        self._now = time.monotonic
+        # Command queue - all external threads communicate through this
+        self._command_queue: Queue[Tuple[str, Any]] = Queue()
+        # Internal state (only modified by control loop)
+        self.state = MovementState()
+        self.state.last_activity_time = self._now()
+        self.state.idle_start_time = self._now()
+        # Animation player (JSON-driven animations)
+        self._animation_player = AnimationPlayer()
+        # Thread control
+        self._stop_event = threading.Event()
+        self._thread: Optional[threading.Thread] = None
+        # Error throttling
+        self._last_error_time = 0.0
+        self._error_interval = 1.0  # Log at most once per second
+        self._suppressed_errors = 0
+        # Connection health tracking
+        self._connection_lost = False
+        self._last_successful_command = self._now()
+        self._connection_timeout = 3.0
+        self._reconnect_attempt_interval = 2.0
+        self._last_reconnect_attempt = 0.0
+        self._consecutive_errors = 0
+        self._max_consecutive_errors = 5
+        # Pending action
+        self._pending_action: Optional[PendingAction] = None
+        self._action_start_time: float = 0.0
+        self._action_start_pose: Dict[str, float] = {}
+        # Pose change detection threshold
+        self._last_sent_pose: Optional[Dict[str, float]] = None
+        self._pose_change_threshold = 0.005
+        # Face tracking offsets (from camera worker)
+        self._face_tracking_offsets: Tuple[float, float, float, float, float, float] = (0.0, 0.0, 0.0, 0.0, 0.0, 0.0)
+        self._face_tracking_lock = threading.Lock()
+        # Camera server reference for face tracking
+        self._camera_server = None
+        # Face tracking smoothing (exponential moving average)
+        self._smoothed_face_offsets: List[float] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
+        self._face_smoothing_factor = 0.3
+        logger.info("MovementManager initialized with AnimationPlayer")
+    # =========================================================================
+    # Thread-safe public API (called from any thread)
+    # =========================================================================
+    def set_state(self, new_state: RobotState) -> None:
+        """Thread-safe: Set robot state."""
+        self._command_queue.put(("set_state", new_state))
+    def set_listening(self, listening: bool) -> None:
+        """Thread-safe: Set listening state."""
+        state = RobotState.LISTENING if listening else RobotState.IDLE
+        self._command_queue.put(("set_state", state))
+    def set_thinking(self) -> None:
+        """Thread-safe: Set thinking state."""
+        self._command_queue.put(("set_state", RobotState.THINKING))
+    def set_speaking(self, speaking: bool) -> None:
+        """Thread-safe: Set speaking state."""
+        state = RobotState.SPEAKING if speaking else RobotState.IDLE
+        self._command_queue.put(("set_state", state))
+    def set_idle(self) -> None:
+        """Thread-safe: Return to idle state."""
+        self._command_queue.put(("set_state", RobotState.IDLE))
+    def queue_action(self, action: PendingAction) -> None:
+        """Thread-safe: Queue a motion action."""
+        self._command_queue.put(("action", action))
+    def turn_to_angle(self, yaw_deg: float, duration: float = 0.8) -> None:
+        """Thread-safe: Turn head to face a direction."""
+        action = PendingAction(
+            name="turn_to",
+            target_yaw=math.radians(yaw_deg),
+            duration=duration,
+        )
+        self._command_queue.put(("action", action))
+    def nod(self, amplitude_deg: float = 15, duration: float = 0.5) -> None:
+        """Thread-safe: Perform a nod gesture."""
+        self._command_queue.put(("nod", (amplitude_deg, duration)))
+    def shake(self, amplitude_deg: float = 20, duration: float = 0.5) -> None:
+        """Thread-safe: Perform a head shake gesture."""
+        self._command_queue.put(("shake", (amplitude_deg, duration)))
+    def set_speech_sway(
+        self, x: float, y: float, z: float,
+        roll: float, pitch: float, yaw: float
+    ) -> None:
+        """Thread-safe: Set speech-driven sway offsets.
+        These offsets are applied on top of the current animation
+        to create audio-synchronized head motion during TTS playback.
+        Args:
+            x, y, z: Position offsets in meters
+            roll, pitch, yaw: Orientation offsets in radians
+        """
+        self._command_queue.put(("speech_sway", (x, y, z, roll, pitch, yaw)))
+    def reset_to_neutral(self, duration: float = 0.5) -> None:
+        """Thread-safe: Reset to neutral position."""
+        action = PendingAction(
+            name="neutral",
+            target_pitch=0.0,
+            target_yaw=0.0,
+            target_roll=0.0,
+            target_x=0.0,
+            target_y=0.0,
+            target_z=0.0,
+            duration=duration,
+        )
+        self._command_queue.put(("action", action))
+    def set_camera_server(self, camera_server) -> None:
+        """Set the camera server for face tracking offsets.
+        Args:
+            camera_server: MJPEGCameraServer instance with face tracking
+        """
+        self._camera_server = camera_server
+        logger.info("Camera server set for face tracking")
+    def set_face_tracking_offsets(self, offsets: Tuple[float, float, float, float, float, float]) -> None:
+        """Thread-safe: Update face tracking offsets manually.
+        Args:
+            offsets: Tuple of (x, y, z, roll, pitch, yaw) in meters/radians
+        """
+        with self._face_tracking_lock:
+            self._face_tracking_offsets = offsets
+    def set_target_pose(
+        self,
+        x: Optional[float] = None,
+        y: Optional[float] = None,
+        z: Optional[float] = None,
+        roll: Optional[float] = None,
+        pitch: Optional[float] = None,
+        yaw: Optional[float] = None,
+        body_yaw: Optional[float] = None,
+        antenna_left: Optional[float] = None,
+        antenna_right: Optional[float] = None,
+    ) -> None:
+        """Thread-safe: Set target pose components.
+        Only provided values will be updated. Values are in meters for position
+        and radians for angles.
+        Args:
+            x, y, z: Head position in meters
+            roll, pitch, yaw: Head orientation in radians
+            body_yaw: Body yaw in radians
+            antenna_left, antenna_right: Antenna angles in radians
+        """
+        self._command_queue.put(("set_pose", {
+            "x": x,
+            "y": y,
+            "z": z,
+            "roll": roll,
+            "pitch": pitch,
+            "yaw": yaw,
+            "body_yaw": body_yaw,
+            "antenna_left": antenna_left,
+            "antenna_right": antenna_right,
+        }))
+    # =========================================================================
+    # Internal: Command processing (runs in control loop)
+    # =========================================================================
+    def _poll_commands(self) -> None:
+        """Process all pending commands from the queue."""
+        while True:
+            try:
+                cmd, payload = self._command_queue.get_nowait()
+            except Empty:
+                break
+            self._handle_command(cmd, payload)
+    def _handle_command(self, cmd: str, payload: Any) -> None:
+        """Handle a single command."""
+        if cmd == "set_state":
+            old_state = self.state.robot_state
+            self.state.robot_state = payload
+            self.state.last_activity_time = self._now()
+            # Update animation based on state
+            animation_name = STATE_ANIMATION_MAP.get(payload.value, "idle")
+            self._animation_player.set_animation(animation_name)
+            # State transition logic
+            if payload == RobotState.IDLE and old_state != RobotState.IDLE:
+                self.state.idle_start_time = self._now()
+                # Unfreeze antennas when returning to idle
+                self._start_antenna_unfreeze()
+            # Freeze antennas when entering listening mode
+            if payload == RobotState.LISTENING:
+                self._freeze_antennas()
+            elif old_state == RobotState.LISTENING and payload != RobotState.LISTENING:
+                # Start unfreezing when leaving listening mode
+                self._start_antenna_unfreeze()
+            logger.debug("State changed: %s -> %s, animation: %s",
+                        old_state.value, payload.value, animation_name)
+        elif cmd == "action":
+            self._start_action(payload)
+        elif cmd == "nod":
+            amplitude_deg, duration = payload
+            self._do_nod(amplitude_deg, duration)
+        elif cmd == "shake":
+            amplitude_deg, duration = payload
+            self._do_shake(amplitude_deg, duration)
+        elif cmd == "set_pose":
+            # Update target pose from external control (e.g., Home Assistant)
+            if payload.get("x") is not None:
+                self.state.target_x = payload["x"]
+            if payload.get("y") is not None:
+                self.state.target_y = payload["y"]
+            if payload.get("z") is not None:
+                self.state.target_z = payload["z"]
+            if payload.get("roll") is not None:
+                self.state.target_roll = payload["roll"]
+            if payload.get("pitch") is not None:
+                self.state.target_pitch = payload["pitch"]
+            if payload.get("yaw") is not None:
+                self.state.target_yaw = payload["yaw"]
+            if payload.get("body_yaw") is not None:
+                self.state.target_body_yaw = payload["body_yaw"]
+            if payload.get("antenna_left") is not None:
+                self.state.target_antenna_left = payload["antenna_left"]
+            if payload.get("antenna_right") is not None:
+                self.state.target_antenna_right = payload["antenna_right"]
+            logger.debug("External pose update: %s", payload)
+        elif cmd == "speech_sway":
+            # Update speech-driven sway offsets
+            x, y, z, roll, pitch, yaw = payload
+            self.state.sway_x = x
+            self.state.sway_y = y
+            self.state.sway_z = z
+            self.state.sway_roll = roll
+            self.state.sway_pitch = pitch
+            self.state.sway_yaw = yaw
+    def _start_action(self, action: PendingAction) -> None:
+        """Start a new motion action."""
+        self._pending_action = action
+        self._action_start_time = self._now()
+        self._action_start_pose = {
+            "pitch": self.state.target_pitch,
+            "yaw": self.state.target_yaw,
+            "roll": self.state.target_roll,
+            "x": self.state.target_x,
+            "y": self.state.target_y,
+            "z": self.state.target_z,
+        }
+        logger.debug("Starting action: %s", action.name)
+    def _do_nod(self, amplitude_deg: float, duration: float) -> None:
+        """Execute nod gesture (blocking in control loop context)."""
+        # This is simplified - in production, use action queue
+        amplitude_rad = math.radians(amplitude_deg)
+        half_duration = duration / 2
+        # Nod down
+        action_down = PendingAction(
+            name="nod_down",
+            target_pitch=amplitude_rad,
+            duration=half_duration,
+        )
+        self._start_action(action_down)
+    def _do_shake(self, amplitude_deg: float, duration: float) -> None:
+        """Execute shake gesture (blocking in control loop context)."""
+        amplitude_rad = math.radians(amplitude_deg)
+        half_duration = duration / 2
+        # Shake left
+        action_left = PendingAction(
+            name="shake_left",
+            target_yaw=-amplitude_rad,
+            duration=half_duration,
+        )
+        self._start_action(action_left)
+    # =========================================================================
+    # Internal: Motion updates (runs in control loop)
+    # =========================================================================
+    def _update_action(self, dt: float) -> None:
+        """Update pending action interpolation."""
+        if self._pending_action is None:
+            return
+        elapsed = self._now() - self._action_start_time
+        progress = min(1.0, elapsed / self._pending_action.duration)
+        # Smooth interpolation (ease in-out)
+        t = progress * progress * (3 - 2 * progress)
+        # Interpolate pose
+        start = self._action_start_pose
+        action = self._pending_action
+        self.state.target_pitch = start["pitch"] + t * (action.target_pitch - start["pitch"])
+        self.state.target_yaw = start["yaw"] + t * (action.target_yaw - start["yaw"])
+        self.state.target_roll = start["roll"] + t * (action.target_roll - start["roll"])
+        self.state.target_x = start["x"] + t * (action.target_x - start["x"])
+        self.state.target_y = start["y"] + t * (action.target_y - start["y"])
+        self.state.target_z = start["z"] + t * (action.target_z - start["z"])
+        # Action complete
+        if progress >= 1.0:
+            if self._pending_action.callback:
+                try:
+                    self._pending_action.callback()
+                except Exception as e:
+                    logger.error("Action callback error: %s", e)
+            self._pending_action = None
+    def _update_animation(self, dt: float) -> None:
+        """Update animation offsets from AnimationPlayer."""
+        offsets = self._animation_player.get_offsets(dt)
+        self.state.anim_pitch = offsets["pitch"]
+        self.state.anim_yaw = offsets["yaw"]
+        self.state.anim_roll = offsets["roll"]
+        self.state.anim_x = offsets["x"]
+        self.state.anim_y = offsets["y"]
+        self.state.anim_z = offsets["z"]
+        self.state.anim_antenna_left = offsets["antenna_left"]
+        self.state.anim_antenna_right = offsets["antenna_right"]
+    def _freeze_antennas(self) -> None:
+        """Freeze antennas at current position (for listening mode)."""
+        # Capture current antenna positions
+        current_left = self.state.target_antenna_left + self.state.anim_antenna_left
+        current_right = self.state.target_antenna_right + self.state.anim_antenna_right
+        self.state.antenna_frozen = True
+        self.state.frozen_antenna_left = current_left
+        self.state.frozen_antenna_right = current_right
+        self.state.antenna_blend = 0.0  # Fully frozen
+        logger.debug("Antennas frozen at left=%.2f, right=%.2f",
+                    math.degrees(current_left), math.degrees(current_right))
+    def _start_antenna_unfreeze(self) -> None:
+        """Start unfreezing antennas (smooth blend back to normal)."""
+        if not self.state.antenna_frozen:
+            return
+        self.state.antenna_blend_start_time = self._now()
+        logger.debug("Starting antenna unfreeze")
+    def _update_antenna_blend(self, dt: float) -> None:
+        """Update antenna blend state for smooth unfreezing."""
+        if not self.state.antenna_frozen:
+            return
+        if self.state.antenna_blend >= 1.0:
+            # Fully unfrozen
+            self.state.antenna_frozen = False
+            return
+        # Calculate blend progress
+        elapsed = self._now() - self.state.antenna_blend_start_time
+        if elapsed > 0:
+            self.state.antenna_blend = min(1.0, elapsed / ANTENNA_BLEND_DURATION)
+            if self.state.antenna_blend >= 1.0:
+                self.state.antenna_frozen = False
+                logger.debug("Antennas unfrozen")
+    def _update_face_tracking(self) -> None:
+        """Get face tracking offsets from camera server with smoothing."""
+        if self._camera_server is not None:
+            try:
+                raw_offsets = self._camera_server.get_face_tracking_offsets()
+                # Apply exponential moving average smoothing
+                alpha = self._face_smoothing_factor
+                for i in range(6):
+                    self._smoothed_face_offsets[i] = (
+                        alpha * raw_offsets[i] +
+                        (1 - alpha) * self._smoothed_face_offsets[i]
+                    )
+                with self._face_tracking_lock:
+                    self._face_tracking_offsets = tuple(self._smoothed_face_offsets)
+            except Exception as e:
+                logger.debug("Error getting face tracking offsets: %s", e)
+    def _compose_final_pose(self) -> Tuple[np.ndarray, Tuple[float, float], float]:
+        """Compose final pose from all sources using SDK's compose_world_offset.
+        Returns:
+            Tuple of (head_pose_4x4, (antenna_right, antenna_left), body_yaw)
+        """
+        # Build primary head pose from target state
+        if SDK_UTILS_AVAILABLE:
+            primary_head = create_head_pose(
+                x=self.state.target_x,
+                y=self.state.target_y,
+                z=self.state.target_z,
+                roll=self.state.target_roll,
+                pitch=self.state.target_pitch,
+                yaw=self.state.target_yaw,
+                degrees=False,
+                mm=False,
+            )
+        else:
+            # Fallback: build matrix manually
+            rotation = R.from_euler('xyz', [
+                self.state.target_roll,
+                self.state.target_pitch,
+                self.state.target_yaw,
+            ])
+            primary_head = np.eye(4)
+            primary_head[:3, :3] = rotation.as_matrix()
+            primary_head[0, 3] = self.state.target_x
+            primary_head[1, 3] = self.state.target_y
+            primary_head[2, 3] = self.state.target_z
+        # Build secondary pose from animation + face tracking + speech sway
+        with self._face_tracking_lock:
+            face_offsets = self._face_tracking_offsets
+        secondary_x = self.state.anim_x + self.state.sway_x + face_offsets[0]
+        secondary_y = self.state.anim_y + self.state.sway_y + face_offsets[1]
+        secondary_z = self.state.anim_z + self.state.sway_z + face_offsets[2]
+        secondary_roll = self.state.anim_roll + self.state.sway_roll + face_offsets[3]
+        secondary_pitch = self.state.anim_pitch + self.state.sway_pitch + face_offsets[4]
+        secondary_yaw = self.state.anim_yaw + self.state.sway_yaw + face_offsets[5]
+        if SDK_UTILS_AVAILABLE:
+            secondary_head = create_head_pose(
+                x=secondary_x,
+                y=secondary_y,
+                z=secondary_z,
+                roll=secondary_roll,
+                pitch=secondary_pitch,
+                yaw=secondary_yaw,
+                degrees=False,
+                mm=False,
+            )
+            # Compose using SDK's compose_world_offset (same as conversation_app)
+            final_head = compose_world_offset(primary_head, secondary_head, reorthonormalize=True)
+        else:
+            # Fallback: simple addition (less accurate but works)
+            secondary_rotation = R.from_euler('xyz', [secondary_roll, secondary_pitch, secondary_yaw])
+            secondary_head = np.eye(4)
+            secondary_head[:3, :3] = secondary_rotation.as_matrix()
+            secondary_head[0, 3] = secondary_x
+            secondary_head[1, 3] = secondary_y
+            secondary_head[2, 3] = secondary_z
+            # Simple composition: R_final = R_secondary @ R_primary, t_final = t_primary + t_secondary
+            final_head = np.eye(4)
+            final_head[:3, :3] = secondary_head[:3, :3] @ primary_head[:3, :3]
+            final_head[:3, 3] = primary_head[:3, 3] + secondary_head[:3, 3]
+        # Antenna pose with freeze blending
+        target_antenna_left = self.state.target_antenna_left + self.state.anim_antenna_left
+        target_antenna_right = self.state.target_antenna_right + self.state.anim_antenna_right
+        # Apply antenna freeze blending (listening mode)
+        blend = self.state.antenna_blend
+        if blend < 1.0:
+            # Blend between frozen position and target position
+            antenna_left = (self.state.frozen_antenna_left * (1.0 - blend) +
+                          target_antenna_left * blend)
+            antenna_right = (self.state.frozen_antenna_right * (1.0 - blend) +
+                           target_antenna_right * blend)
+        else:
+            antenna_left = target_antenna_left
+            antenna_right = target_antenna_right
+        return final_head, (antenna_right, antenna_left), self.state.target_body_yaw
+    # =========================================================================
+    # Internal: Robot control (runs in control loop)
+    # =========================================================================
+    def _issue_control_command(self, head_pose: np.ndarray, antennas: Tuple[float, float], body_yaw: float) -> None:
+        """Send control command to robot with error throttling and connection health tracking."""
+        if self.robot is None:
+            return
+        # Check if pose changed significantly (prevent unnecessary commands)
+        # Extract euler angles for comparison
+        rotation = R.from_matrix(head_pose[:3, :3])
+        euler = rotation.as_euler('xyz')  # [roll, pitch, yaw]
+        current_pose = {
+            "x": head_pose[0, 3],
+            "y": head_pose[1, 3],
+            "z": head_pose[2, 3],
+            "roll": euler[0],
+            "pitch": euler[1],
+            "yaw": euler[2],
+            "antenna_right": antennas[0],
+            "antenna_left": antennas[1],
+            "body_yaw": body_yaw,
+        }
+        if self._last_sent_pose is not None:
+            max_diff = max(
+                abs(current_pose[k] - self._last_sent_pose.get(k, 0.0))
+                for k in current_pose.keys()
+            )
+            if max_diff < self._pose_change_threshold:
+                # No significant change, skip sending command
+                return
+        now = self._now()
+        # Check if we should skip due to connection loss (but always try periodically)
+        if self._connection_lost:
+            if now - self._last_reconnect_attempt < self._reconnect_attempt_interval:
+                # Skip sending commands to reduce error spam
+                return
+            # Time to try reconnecting
+            self._last_reconnect_attempt = now
+            logger.debug("Attempting to send command after connection loss...")
+        try:
+            # Send to robot (single control point!)
+            # head_pose is already a 4x4 matrix from _compose_final_pose
+            self.robot.set_target(
+                head=head_pose,
+                antennas=list(antennas),
+                body_yaw=body_yaw,
+            )
+            # Command succeeded - update connection health and cache
+            self._last_successful_command = now
+            self._last_sent_pose = current_pose.copy()  # Cache sent pose
+            self._consecutive_errors = 0  # Reset error counter
+            if self._connection_lost:
+                logger.info("✓ Connection to robot restored")
+                self._connection_lost = False
+                self._suppressed_errors = 0
+        except Exception as e:
+            error_msg = str(e)
+            self._consecutive_errors += 1
+            # Check if this is a connection error
+            is_connection_error = "Lost connection" in error_msg or "ZError" in error_msg
+            if is_connection_error:
+                if not self._connection_lost:
+                    # First time detecting connection loss
+                    if self._consecutive_errors >= self._max_consecutive_errors:
+                        logger.warning(f"Connection unstable after {self._consecutive_errors} errors: {error_msg}")
+                        logger.warning("  Will retry connection every %.1fs...", self._reconnect_attempt_interval)
+                        self._connection_lost = True
+                        self._last_reconnect_attempt = now
+                    else:
+                        # Transient error, log but don't mark as lost yet
+                        self._log_error_throttled(f"Transient connection error ({self._consecutive_errors}/{self._max_consecutive_errors}): {error_msg}")
+                else:
+                    # Already in lost state, use throttled logging
+                    self._log_error_throttled(f"Connection still lost: {error_msg}")
+            else:
+                # Non-connection error - log but don't affect connection state
+                self._log_error_throttled(f"Failed to set robot target: {error_msg}")
+    def _log_error_throttled(self, message: str) -> None:
+        """Log error with throttling to prevent log explosion."""
+        now = self._now()
+        if now - self._last_error_time >= self._error_interval:
+            if self._suppressed_errors > 0:
+                message += f" (suppressed {self._suppressed_errors} repeats)"
+                self._suppressed_errors = 0
+            logger.error(message)
+            self._last_error_time = now
+        else:
+            self._suppressed_errors += 1
+    # =========================================================================
+    # Control loop
+    # =========================================================================
+    def _control_loop(self) -> None:
+        """Main 10Hz control loop."""
+        logger.info("Movement manager control loop started (%.0f Hz)", CONTROL_LOOP_FREQUENCY_HZ)
+        last_time = self._now()
+        while not self._stop_event.is_set():
+            loop_start = self._now()
+            dt = loop_start - last_time
+            last_time = loop_start
+            try:
+                # 1. Process commands from queue
+                self._poll_commands()
+                # 2. Update action interpolation
+                self._update_action(dt)
+                # 3. Update animation offsets (JSON-driven)
+                self._update_animation(dt)
+                # 4. Update antenna blend (listening mode freeze/unfreeze)
+                self._update_antenna_blend(dt)
+                # 5. Update face tracking offsets from camera server
+                self._update_face_tracking()
+                # 6. Compose final pose (returns head_pose matrix, antennas tuple, body_yaw)
+                head_pose, antennas, body_yaw = self._compose_final_pose()
+                # 7. Send to robot (single control point!)
+                self._issue_control_command(head_pose, antennas, body_yaw)
+            except Exception as e:
+                self._log_error_throttled(f"Control loop error: {e}")
+            # Adaptive sleep
+            elapsed = self._now() - loop_start
+            sleep_time = max(0.0, TARGET_PERIOD - elapsed)
+            if sleep_time > 0:
+                time.sleep(sleep_time)
+        logger.info("Movement manager control loop stopped")
+    # =========================================================================
+    # Lifecycle
+    # =========================================================================
+    def start(self) -> None:
+        """Start the control loop."""
+        if self._thread is not None and self._thread.is_alive():
+            logger.warning("Movement manager already running")
+            return
+        self._stop_event.clear()
+        self._thread = threading.Thread(
+            target=self._control_loop,
+            daemon=True,
+            name="MovementManager",
+        )
+        self._thread.start()
+        logger.info("Movement manager started")
+    def stop(self) -> None:
+        """Stop the control loop and reset robot."""
+        if self._thread is None or not self._thread.is_alive():
+            return
+        logger.info("Stopping movement manager...")
+        # Signal stop
+        self._stop_event.set()
+        # Wait for thread with shorter timeout
+        self._thread.join(timeout=0.5)
+        if self._thread.is_alive():
+            logger.warning("Movement manager thread did not stop in time")
+        # Skip reset to neutral - let the app manager handle it
+        # This speeds up shutdown significantly
+        logger.info("Movement manager stopped")
+    def _reset_to_neutral_blocking(self) -> None:
+        """Reset robot to neutral position (blocking)."""
+        if self.robot is None:
+            return
+        try:
+            neutral_pose = np.eye(4)
+            self.robot.goto_target(
+                head=neutral_pose,
+                antennas=[0.0, 0.0],
+                body_yaw=0.0,
+                duration=0.3,  # Faster reset
+            )
+            logger.info("Robot reset to neutral position")
+        except Exception as e:
+            logger.error("Failed to reset robot: %s", e)
+    @property
+    def is_running(self) -> bool:
+        """Check if control loop is running."""
+        return self._thread is not None and self._thread.is_alive()

{reachy_mini_home_assistant → reachy_mini_ha_voice}/reachy_controller.py RENAMED Viewed

@@ -1,735 +1,869 @@
-"""Reachy Mini controller wrapper for ESPHome entities."""
-import logging
-import math
-import time
-from typing import TYPE_CHECKING, Any
-import numpy as np
-import requests
-from scipy.spatial.transform import Rotation as R
-from .core.config import Config
-if TYPE_CHECKING:
-    from reachy_mini import ReachyMini
-logger = logging.getLogger(__name__)
-class ReachyController:
-    """
-    Wrapper class for Reachy Mini control operations.
-    Provides safe access to Reachy Mini SDK functions with error handling.
-    """
-    def __init__(self, reachy_mini: "ReachyMini"):
-        """
-        Initialize the controller.
-        Args:
-            reachy_mini: ReachyMini instance (required)
-        """
-        self.reachy = reachy_mini
-        self._speaker_volume = 100  # Default volume
-        self._movement_manager = None  # Set later via set_movement_manager()
-        # Shared session to reduce per-request overhead
-        self._http_session = requests.Session()
-        self._http_timeout = 5.0  # seconds
-        self._cache_ttl = Config.daemon.status_cache_ttl
-        self._daemon_base_url = Config.daemon.url.rstrip("/")
-        # Status caching - only for get_status() which may trigger I/O
-        # Note: get_current_head_pose() and get_current_joint_positions() are
-        # non-blocking in the SDK (they return cached Zenoh data), so no caching needed
-        self._state_cache: dict[str, Any] = {}
-        self._last_status_query = 0.0
-        self._look_at_x = 0.0
-        self._look_at_y = 0.0
-        self._look_at_z = 0.0
-    def set_movement_manager(self, movement_manager) -> None:
-        """Set the MovementManager instance for pose control.
-        Args:
-            movement_manager: MovementManager instance
-        """
-        self._movement_manager = movement_manager
-        logger.info("MovementManager set for ReachyController")
-    @property
-    def is_available(self) -> bool:
-        """Check if robot is available."""
-        return self.reachy is not None
-    def _with_movement_manager(self, caller: str):
-        if self._movement_manager is None:
-            logger.warning("%s failed - MovementManager not set", caller)
-            return None
-        return self._movement_manager
-    def _get_movement_bool(self, getter_name: str, log_label: str) -> bool:
-        movement_manager = self._movement_manager
-        if movement_manager is None:
-            return False
-        try:
-            return bool(getattr(movement_manager, getter_name)())
-        except Exception as e:
-            logger.debug("Error getting %s state: %s", log_label, e)
-            return False
-    def get_idle_behavior_enabled(self) -> bool:
-        """Get whether any idle behavior subsystem is enabled."""
-        return self._get_movement_bool("get_idle_behavior_enabled", "idle behavior")
-    def set_idle_behavior_enabled(self, enabled: bool) -> None:
-        """Enable or disable all idle behavior subsystems together."""
-        movement_manager = self._with_movement_manager("set_idle_behavior_enabled")
-        if movement_manager is not None:
-            movement_manager.set_idle_behavior_enabled(enabled)
-    # ========== Phase 1: Basic Status & Volume ==========
-    @staticmethod
-    def _status_value(status: Any, key: str, default: Any = None) -> Any:
-        if status is None:
-            return default
-        if isinstance(status, dict):
-            return status.get(key, default)
-        return getattr(status, key, default)
-    @classmethod
-    def _nested_status_value(cls, status: Any, parent_key: str, child_key: str, default: Any = None) -> Any:
-        parent = cls._status_value(status, parent_key, None)
-        if parent is None:
-            return default
-        if isinstance(parent, dict):
-            return parent.get(child_key, default)
-        return getattr(parent, child_key, default)
-    def _get_cached_status(self) -> Any:
-        """Get cached daemon status to reduce query frequency.
-        Note: get_status() may trigger I/O, so we cache it.
-        Unlike get_current_head_pose() and get_current_joint_positions()
-        which are non-blocking in the SDK.
-        """
-        now = time.time()
-        if now - self._last_status_query < self._cache_ttl:
-            return self._state_cache.get("status")
-        if not self.is_available:
-            return None
-        try:
-            status = self.reachy.client.get_status(wait=False)
-            self._state_cache["status"] = status
-            self._last_status_query = now
-            return status
-        except Exception as e:
-            logger.error(f"Error getting status: {e}")
-            return None
-    def get_daemon_state(self) -> str:
-        """Get daemon state with caching."""
-        status = self._get_cached_status()
-        if status is None:
-            return "not_available"
-        return str(self._status_value(status, "state", "unknown"))
-    def get_backend_ready(self) -> bool:
-        """Check if backend is ready with caching."""
-        status = self._get_cached_status()
-        if status is None:
-            return False
-        return self._status_value(status, "state") == "running"
-    def get_error_message(self) -> str:
-        """Get current error message with caching."""
-        status = self._get_cached_status()
-        if status is None:
-            return "Robot not available"
-        return str(self._status_value(status, "error", "") or "")
-    def _get_volume_via_api(self, path: str, cached_value: float, label: str) -> float:
-        """Fetch a volume value from the daemon API, falling back to the cached value."""
-        try:
-            resp = self._http_session.get(
-                f"{self._daemon_base_url}{path}",
-                timeout=self._http_timeout,
-            )
-            resp.raise_for_status()
-            data = resp.json()
-            if isinstance(data, dict) and "volume" in data:
-                return float(data["volume"])
-        except Exception as e:
-            logger.warning("Failed to get %s volume via daemon API: %s", label, e)
-        return cached_value
-    def _set_volume_via_api(self, path: str, volume: float, label: str) -> float:
-        """Write a volume value through the daemon API and return the confirmed level."""
-        try:
-            resp = self._http_session.post(
-                f"{self._daemon_base_url}{path}",
-                json={"volume": int(volume)},
-                timeout=self._http_timeout,
-            )
-            resp.raise_for_status()
-            data = resp.json()
-            if isinstance(data, dict) and "volume" in data:
-                return float(data["volume"])
-            return volume
-        except Exception as e:
-            logger.error("Failed to set %s volume via daemon API: %s", label, e)
-            return volume
-    def _motor_mode_from_status(self, status: Any) -> str | None:
-        motor_mode = self._nested_status_value(status, "backend_status", "motor_control_mode", None)
-        if motor_mode is not None:
-            return str(motor_mode)
-        return None
-    def get_speaker_volume(self) -> float:
-        """Get speaker volume (0-100) from the daemon volume API."""
-        self._speaker_volume = self._get_volume_via_api("/api/volume/current", self._speaker_volume, "speaker")
-        return self._speaker_volume
-    def set_speaker_volume(self, volume: float) -> None:
-        """Set speaker volume (0-100) through the daemon volume API."""
-        volume = max(0.0, min(100.0, volume))
-        self._speaker_volume = self._set_volume_via_api("/api/volume/set", volume, "speaker")
-        logger.info("Speaker volume set to %.1f%% via daemon API", self._speaker_volume)
-    # ========== Phase 2: Motor Control ==========
-    def get_motors_enabled(self) -> bool:
-        """Check if motors are enabled with caching."""
-        status = self._get_cached_status()
-        if status is None:
-            return False
-        try:
-            motor_mode = self._motor_mode_from_status(status)
-            return motor_mode == "enabled"
-        except Exception as e:
-            logger.error(f"Error getting motor state: {e}")
-            return False
-    def set_motors_enabled(self, enabled: bool) -> None:
-        """
-        Enable or disable motors.
-        Args:
-            enabled: True to enable, False to disable
-        """
-        if not self.is_available:
-            logger.warning("Cannot control motors: robot not available")
-            return
-        try:
-            if enabled:
-                self.reachy.enable_motors()
-                logger.info("Motors enabled")
-            else:
-                self.reachy.disable_motors()
-                logger.info("Motors disabled")
-        except Exception as e:
-            logger.error(f"Error setting motor state: {e}")
-    def get_motor_mode(self) -> str:
-        """Get current motor control mode with caching."""
-        status = self._get_cached_status()
-        if status is None:
-            return "disabled"
-        try:
-            return self._motor_mode_from_status(status) or "disabled"
-        except Exception as e:
-            logger.error(f"Error getting motor mode: {e}")
-            return "error"
-    def set_motor_mode(self, mode: str) -> None:
-        """
-        Set motor control mode.
-        Args:
-            mode: One of "enabled", "disabled", "gravity_compensation"
-        """
-        if not self.is_available:
-            logger.warning("Cannot set motor mode: robot not available")
-            return
-        try:
-            if mode == "enabled":
-                self.reachy.enable_motors()
-            elif mode == "disabled":
-                self.reachy.disable_motors()
-            elif mode == "gravity_compensation":
-                self.reachy.enable_gravity_compensation()
-            else:
-                logger.warning(f"Invalid motor mode: {mode}")
-                return
-            logger.info(f"Motor mode set to {mode}")
-        except Exception as e:
-            logger.error(f"Error setting motor mode: {e}")
-    def get_doa_enabled(self) -> bool:
-        """Get whether DOA sound tracking is enabled."""
-        return self._get_movement_bool("get_doa_enabled", "DOA tracking")
-    def set_doa_enabled(self, enabled: bool) -> None:
-        """Enable or disable DOA sound tracking."""
-        movement_manager = self._with_movement_manager("set_doa_enabled")
-        if movement_manager is not None:
-            movement_manager.set_doa_enabled(enabled)
-    def _daemon_command(self, path: str, params: dict[str, str] | None = None) -> None:
-        """Send a daemon command request and wait for the daemon state to settle."""
-        url = f"{self._daemon_base_url}{path}"
-        resp = self._http_session.post(url, params=params or {}, timeout=self._http_timeout)
-        resp.raise_for_status()
-        desired_state = None
-        if path.endswith("/start"):
-            desired_state = "running"
-        elif path.endswith("/stop"):
-            desired_state = "stopped"
-        if desired_state is not None:
-            self._wait_for_daemon_state(desired_state)
-    def _wait_for_daemon_state(self, desired_state: str, timeout: float = 10.0) -> None:
-        """Poll daemon status until the requested state is reached."""
-        deadline = time.time() + timeout
-        while time.time() < deadline:
-            try:
-                resp = self._http_session.get(
-                    f"{self._daemon_base_url}/api/daemon/status",
-                    timeout=self._http_timeout,
-                )
-                resp.raise_for_status()
-                data = resp.json()
-                current_state = str(data.get("state", "")).lower()
-                if current_state == desired_state:
-                    self._last_status_query = 0.0
-                    return
-            except Exception as e:
-                logger.debug("Waiting for daemon state %s failed: %s", desired_state, e)
-            time.sleep(0.2)
-        logger.warning("Timed out waiting for daemon state '%s'", desired_state)
-    # ========== Phase 3: Pose Control ==========
-    def _get_head_pose(self) -> np.ndarray | None:
-        """Get current head pose from SDK.
-        Note: SDK's get_current_head_pose() is non-blocking - it returns
-        cached data from Zenoh subscriptions, so no throttling needed.
-        """
-        if not self.is_available:
-            return None
-        try:
-            return self.reachy.get_current_head_pose()
-        except Exception as e:
-            logger.error(f"Error getting head pose: {e}")
-            return None
-    def _get_joint_positions(self) -> tuple | None:
-        """Get current joint positions from SDK.
-        Note: SDK's get_current_joint_positions() is non-blocking - it returns
-        cached data from Zenoh subscriptions, so no throttling needed.
-        """
-        if not self.is_available:
-            return None
-        try:
-            return self.reachy.get_current_joint_positions()
-        except Exception as e:
-            logger.error(f"Error getting joint positions: {e}")
-            return None
-    def _extract_pose_from_matrix(self, pose_matrix: np.ndarray) -> tuple:
-        """
-        Extract position (x, y, z) and rotation (roll, pitch, yaw) from 4x4 pose matrix.
-        Args:
-            pose_matrix: 4x4 homogeneous transformation matrix
-        Returns:
-            tuple: (x, y, z, roll, pitch, yaw) where position is in meters and angles in radians
-        """
-        # Extract position from the last column
-        x = pose_matrix[0, 3]
-        y = pose_matrix[1, 3]
-        z = pose_matrix[2, 3]
-        # Extract rotation matrix and convert to euler angles
-        rotation_matrix = pose_matrix[:3, :3]
-        rotation = R.from_matrix(rotation_matrix)
-        # Use 'xyz' convention for roll, pitch, yaw
-        roll, pitch, yaw = rotation.as_euler("xyz")
-        return x, y, z, roll, pitch, yaw
-    def _get_head_pose_component(self, component: str) -> float:
-        """Get a specific component from head pose.
-        Args:
-            component: One of 'x', 'y', 'z' (mm), 'roll', 'pitch', 'yaw' (degrees)
-        Returns:
-            The component value, or 0.0 on error
-        """
-        pose = self._get_head_pose()
-        if pose is None:
-            return 0.0
-        try:
-            x, y, z, roll, pitch, yaw = self._extract_pose_from_matrix(pose)
-            components = {
-                "x": x * 1000,  # m to mm
-                "y": y * 1000,
-                "z": z * 1000,
-                "roll": math.degrees(roll),
-                "pitch": math.degrees(pitch),
-                "yaw": math.degrees(yaw),
-            }
-            return components.get(component, 0.0)
-        except Exception as e:
-            logger.error(f"Error getting head {component}: {e}")
-            return 0.0
-    def _disabled_pose_setter(self, name: str) -> None:
-        """Log warning when MovementManager is not available."""
-        logger.warning(f"set_{name} failed - MovementManager not set")
-    def _set_pose_via_manager(self, **kwargs) -> bool:
-        """Set pose via MovementManager if available.
-        Returns True if successful, False if MovementManager not available.
-        """
-        if self._movement_manager is None:
-            return False
-        self._movement_manager.set_target_pose(**kwargs)
-        return True
-    # Head position getters and setters
-    def get_head_x(self) -> float:
-        """Get head X position in mm."""
-        return self._get_head_pose_component("x")
-    def set_head_x(self, x_mm: float) -> None:
-        """Set head X position in mm via MovementManager."""
-        if not self._set_pose_via_manager(x=x_mm / 1000.0):  # mm to m
-            self._disabled_pose_setter("head_x")
-    def get_head_y(self) -> float:
-        """Get head Y position in mm."""
-        return self._get_head_pose_component("y")
-    def set_head_y(self, y_mm: float) -> None:
-        """Set head Y position in mm via MovementManager."""
-        if not self._set_pose_via_manager(y=y_mm / 1000.0):  # mm to m
-            self._disabled_pose_setter("head_y")
-    def get_head_z(self) -> float:
-        """Get head Z position in mm."""
-        return self._get_head_pose_component("z")
-    def set_head_z(self, z_mm: float) -> None:
-        """Set head Z position in mm via MovementManager."""
-        if not self._set_pose_via_manager(z=z_mm / 1000.0):  # mm to m
-            self._disabled_pose_setter("head_z")
-    # Head orientation getters and setters
-    def get_head_roll(self) -> float:
-        """Get head roll angle in degrees."""
-        return self._get_head_pose_component("roll")
-    def set_head_roll(self, roll_deg: float) -> None:
-        """Set head roll angle in degrees via MovementManager."""
-        if not self._set_pose_via_manager(roll=math.radians(roll_deg)):
-            self._disabled_pose_setter("head_roll")
-    def get_head_pitch(self) -> float:
-        """Get head pitch angle in degrees."""
-        return self._get_head_pose_component("pitch")
-    def set_head_pitch(self, pitch_deg: float) -> None:
-        """Set head pitch angle in degrees via MovementManager."""
-        if not self._set_pose_via_manager(pitch=math.radians(pitch_deg)):
-            self._disabled_pose_setter("head_pitch")
-    def get_head_yaw(self) -> float:
-        """Get head yaw angle in degrees."""
-        return self._get_head_pose_component("yaw")
-    def set_head_yaw(self, yaw_deg: float) -> None:
-        """Set head yaw angle in degrees via MovementManager."""
-        if not self._set_pose_via_manager(yaw=math.radians(yaw_deg)):
-            self._disabled_pose_setter("head_yaw")
-    def get_body_yaw(self) -> float:
-        """Get body yaw angle in degrees."""
-        joints = self._get_joint_positions()
-        if joints is None:
-            return 0.0
-        try:
-            head_joints, _ = joints
-            return math.degrees(head_joints[0])
-        except Exception as e:
-            logger.error(f"Error getting body yaw: {e}")
-            return 0.0
-    def set_body_yaw(self, yaw_deg: float) -> None:
-        """Set body yaw angle in degrees.
-        Note: This directly calls SDK's set_target_body_yaw since automatic body yaw
-        is enabled. Manual control will temporarily override automatic mode.
-        """
-        if self.reachy is None:
-            self._disabled_pose_setter("body_yaw")
-            return
-        try:
-            self.reachy.set_target_body_yaw(math.radians(yaw_deg))
-        except Exception as e:
-            logger.error(f"Error setting body yaw: {e}")
-    def get_antenna_left(self) -> float:
-        """Get left antenna angle in degrees."""
-        joints = self._get_joint_positions()
-        if joints is None:
-            return 0.0
-        try:
-            _, antennas = joints
-            return math.degrees(antennas[1])  # left is index 1
-        except Exception as e:
-            logger.error(f"Error getting left antenna: {e}")
-            return 0.0
-    def set_antenna_left(self, angle_deg: float) -> None:
-        """Set left antenna angle in degrees via MovementManager."""
-        if not self._set_pose_via_manager(antenna_left=math.radians(angle_deg)):
-            self._disabled_pose_setter("antenna_left")
-    def get_antenna_right(self) -> float:
-        """Get right antenna angle in degrees."""
-        joints = self._get_joint_positions()
-        if joints is None:
-            return 0.0
-        try:
-            _, antennas = joints
-            return math.degrees(antennas[0])  # right is index 0
-        except Exception as e:
-            logger.error(f"Error getting right antenna: {e}")
-            return 0.0
-    def set_antenna_right(self, angle_deg: float) -> None:
-        """Set right antenna angle in degrees via MovementManager."""
-        if not self._set_pose_via_manager(antenna_right=math.radians(angle_deg)):
-            self._disabled_pose_setter("antenna_right")
-    # ========== Phase 4: Look At Control ==========
-    def get_look_at_x(self) -> float:
-        """Get look at target X coordinate in world frame (meters)."""
-        return self._look_at_x
-    def set_look_at_x(self, x: float) -> None:
-        """Set look at target X coordinate."""
-        self._look_at_x = x
-        self._update_look_at()
-    def get_look_at_y(self) -> float:
-        """Get look at target Y coordinate in world frame (meters)."""
-        return self._look_at_y
-    def set_look_at_y(self, y: float) -> None:
-        """Set look at target Y coordinate."""
-        self._look_at_y = y
-        self._update_look_at()
-    def get_look_at_z(self) -> float:
-        """Get look at target Z coordinate in world frame (meters)."""
-        return self._look_at_z
-    def set_look_at_z(self, z: float) -> None:
-        """Set look at target Z coordinate."""
-        self._look_at_z = z
-        self._update_look_at()
-    def _update_look_at(self) -> None:
-        """Update robot to look at the target coordinates.
-        NOTE: Disabled to prevent conflict with MovementManager's control loop.
-        """
-        logger.warning("_update_look_at is disabled - MovementManager controls head pose")
-        # if not self.is_available:
-        #     return
-        # try:
-        #     x = getattr(self, '_look_at_x', 0.0)
-        #     y = getattr(self, '_look_at_y', 0.0)
-        #     z = getattr(self, '_look_at_z', 0.0)
-        #     self.reachy.look_at_world(x, y, z)
-        #     logger.info(f"Looking at world coordinates: ({x}, {y}, {z})")
-        # except Exception as e:
-        #     logger.error(f"Error updating look at: {e}")
-    # ========== Phase 6: Diagnostic Information ==========
-    def get_control_loop_frequency(self) -> float:
-        """Get control loop frequency in Hz with caching."""
-        status = self._get_cached_status()
-        if status is None:
-            return 0.0
-        try:
-            control_loop_stats = self._nested_status_value(status, "backend_status", "control_loop_stats", None)
-            if isinstance(control_loop_stats, dict):
-                return float(control_loop_stats.get("mean_control_loop_frequency", 0.0))
-            if control_loop_stats is not None:
-                return float(getattr(control_loop_stats, "mean_control_loop_frequency", 0.0))
-            return 0.0
-        except Exception as e:
-            logger.error(f"Error getting control loop frequency: {e}")
-            return 0.0
-    def get_sdk_version(self) -> str:
-        """Get SDK version with caching."""
-        status = self._get_cached_status()
-        if status is None:
-            return "N/A"
-        return str(self._status_value(status, "version", "unknown") or "unknown")
-    def get_robot_name(self) -> str:
-        """Get robot name with caching."""
-        status = self._get_cached_status()
-        if status is None:
-            return "N/A"
-        return str(self._status_value(status, "robot_name", "unknown") or "unknown")
-    def get_wireless_version(self) -> bool:
-        """Check if this is a wireless version with caching."""
-        status = self._get_cached_status()
-        if status is None:
-            return False
-        return bool(self._status_value(status, "wireless_version", False))
-    def get_simulation_mode(self) -> bool:
-        """Check if simulation mode is enabled with caching."""
-        status = self._get_cached_status()
-        if status is None:
-            return False
-        return bool(self._status_value(status, "simulation_enabled", False))
-    def get_wlan_ip(self) -> str:
-        """Get WLAN IP address with caching."""
-        status = self._get_cached_status()
-        if status is None:
-            return "N/A"
-        return str(self._status_value(status, "wlan_ip", "N/A") or "N/A")
-    # ========== Phase 7: IMU Sensors (Wireless only) ==========
-    def _get_imu_value(self, sensor_type: str, index: int) -> float:
-        """Get a specific IMU sensor value.
-        Args:
-            sensor_type: 'accelerometer', 'gyroscope', or 'temperature'
-            index: Array index (0=x, 1=y, 2=z) or -1 for scalar values
-        Returns:
-            The sensor value, or 0.0 on error
-        """
-        if not self.is_available:
-            return 0.0
-        try:
-            imu_data = self.reachy.imu
-            if imu_data is None or sensor_type not in imu_data:
-                return 0.0
-            value = imu_data[sensor_type]
-            return float(value[index]) if index >= 0 else float(value)
-        except Exception as e:
-            logger.debug(f"Error getting IMU {sensor_type}: {e}")
-            return 0.0
-    def get_imu_accel_x(self) -> float:
-        """Get IMU X-axis acceleration in m/s²."""
-        return self._get_imu_value("accelerometer", 0)
-    def get_imu_accel_y(self) -> float:
-        """Get IMU Y-axis acceleration in m/s²."""
-        return self._get_imu_value("accelerometer", 1)
-    def get_imu_accel_z(self) -> float:
-        """Get IMU Z-axis acceleration in m/s²."""
-        return self._get_imu_value("accelerometer", 2)
-    def get_imu_gyro_x(self) -> float:
-        """Get IMU X-axis angular velocity in rad/s."""
-        return self._get_imu_value("gyroscope", 0)
-    def get_imu_gyro_y(self) -> float:
-        """Get IMU Y-axis angular velocity in rad/s."""
-        return self._get_imu_value("gyroscope", 1)
-    def get_imu_gyro_z(self) -> float:
-        """Get IMU Z-axis angular velocity in rad/s."""
-        return self._get_imu_value("gyroscope", 2)
-    def get_imu_temperature(self) -> float:
-        """Get IMU temperature in °C."""
-        return self._get_imu_value("temperature", -1)
-    # ========== Phase 11: LED Control (DISABLED) ==========
-    # LED control is disabled because LEDs are hidden inside the robot.
-    # See PROJECT_PLAN.md principle 8.
-    # ========== DOA (Direction of Arrival) ==========
-    def get_doa_angle(self) -> tuple[float, bool] | None:
-        """Get Direction of Arrival angle from microphone array.
-        The DOA angle indicates the direction of the sound source relative to the robot.
-        Angle is in radians: 0 = left, π/2 = front/back, π = right.
-        Returns:
-            Tuple of (angle_radians, speech_detected), or None if unavailable.
-            - angle_radians: Sound source direction in radians
-            - speech_detected: Whether speech is currently detected
-        """
-        if not self.is_available:
-            return None
-        try:
-            return self.reachy.media.get_DoA()
-        except Exception as e:
-            logger.debug(f"Error getting DOA: {e}")
-        return None
-    def get_doa_angle_degrees(self) -> float:
-        """Get DOA angle in degrees for Home Assistant entity.
-        Returns the raw DOA angle in degrees (0-180°).
-        SDK convention: 0° = left, 90° = front, 180° = right
-        """
-        doa = self.get_doa_angle()
-        if doa is None:
-            return 0.0
-        angle_rad, _ = doa
-        # Return raw angle in degrees (0-180°)
-        angle_deg = math.degrees(angle_rad)
-        return angle_deg
-    def get_speech_detected(self) -> bool:
-        """Get speech detection status from DOA.
-        Returns True if speech is currently detected.
-        """
-        doa = self.get_doa_angle()
-        if doa is None:
-            return False
-        _, speech_detected = doa
-        return speech_detected

+"""Reachy Mini controller wrapper for ESPHome entities."""
+import logging
+import time
+from typing import Any, Dict, Optional, TYPE_CHECKING
+import math
+import numpy as np
+from scipy.spatial.transform import Rotation as R
+import requests
+if TYPE_CHECKING:
+    from reachy_mini import ReachyMini
+logger = logging.getLogger(__name__)
+class _ReSpeakerContext:
+    """Context manager for thread-safe ReSpeaker access."""
+    def __init__(self, respeaker, lock):
+        self._respeaker = respeaker
+        self._lock = lock
+    def __enter__(self):
+        self._lock.acquire()
+        return self._respeaker
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self._lock.release()
+        return False
+class ReachyController:
+    """
+    Wrapper class for Reachy Mini control operations.
+    Provides safe access to Reachy Mini SDK functions with error handling
+    and fallback for standalone mode (when robot is not available).
+    """
+    def __init__(self, reachy_mini: Optional["ReachyMini"] = None):
+        """
+        Initialize the controller.
+        Args:
+            reachy_mini: ReachyMini instance, or None for standalone mode
+        """
+        self.reachy = reachy_mini
+        self._speaker_volume = 100  # Default volume
+        self._movement_manager = None  # Set later via set_movement_manager()
+        # Status caching - only for get_status() which may trigger I/O
+        # Note: get_current_head_pose() and get_current_joint_positions() are
+        # non-blocking in the SDK (they return cached Zenoh data), so no caching needed
+        self._state_cache: Dict[str, Any] = {}
+        self._cache_ttl = 2.0  # 2 second cache TTL for status queries (increased from 1s)
+        self._last_status_query = 0.0
+        # Thread lock for ReSpeaker USB access to prevent conflicts with GStreamer audio pipeline
+        self._respeaker_lock = __import__('threading').Lock()
+    def set_movement_manager(self, movement_manager) -> None:
+        """Set the MovementManager instance for pose control.
+        Args:
+            movement_manager: MovementManager instance
+        """
+        self._movement_manager = movement_manager
+        logger.info("MovementManager set for ReachyController")
+    @property
+    def is_available(self) -> bool:
+        """Check if robot is available."""
+        return self.reachy is not None
+    # ========== Phase 1: Basic Status & Volume ==========
+    def _get_cached_status(self) -> Optional[Dict]:
+        """Get cached daemon status to reduce query frequency.
+        Note: get_status() may trigger I/O, so we cache it.
+        Unlike get_current_head_pose() and get_current_joint_positions()
+        which are non-blocking in the SDK.
+        """
+        now = time.time()
+        if now - self._last_status_query < self._cache_ttl:
+            return self._state_cache.get('status')
+        if not self.is_available:
+            return None
+        try:
+            status = self.reachy.client.get_status(wait=False)
+            self._state_cache['status'] = status
+            self._last_status_query = now
+            return status
+        except Exception as e:
+            logger.error(f"Error getting status: {e}")
+            return self._state_cache.get('status')  # Return stale cache on error
+    def get_daemon_state(self) -> str:
+        """Get daemon state with caching."""
+        status = self._get_cached_status()
+        if status is None:
+            return "not_available"
+        return status.get('state', 'unknown')
+    def get_backend_ready(self) -> bool:
+        """Check if backend is ready with caching."""
+        status = self._get_cached_status()
+        if status is None:
+            return False
+        return status.get('state') == 'running'
+    def get_error_message(self) -> str:
+        """Get current error message with caching."""
+        status = self._get_cached_status()
+        if status is None:
+            return "Robot not available"
+        return status.get('error') or ""
+    def get_speaker_volume(self) -> float:
+        """Get speaker volume (0-100) with caching."""
+        if not self.is_available:
+            return self._speaker_volume
+        try:
+            # Get volume from daemon API (use cached status for IP)
+            status = self._get_cached_status()
+            if status is None:
+                return self._speaker_volume
+            wlan_ip = status.get('wlan_ip', 'localhost')
+            response = requests.get(f"http://{wlan_ip}:8000/api/volume/current", timeout=2)
+            if response.status_code == 200:
+                data = response.json()
+                self._speaker_volume = float(data.get('volume', self._speaker_volume))
+        except Exception as e:
+            logger.debug(f"Could not get volume from API: {e}")
+        return self._speaker_volume
+    def set_speaker_volume(self, volume: float) -> None:
+        """
+        Set speaker volume (0-100) with cached status.
+        Args:
+            volume: Volume level 0-100
+        """
+        volume = max(0.0, min(100.0, volume))
+        self._speaker_volume = volume
+        if not self.is_available:
+            logger.warning("Cannot set volume: robot not available")
+            return
+        try:
+            # Set volume via daemon API (use cached status for IP)
+            status = self._get_cached_status()
+            if status is None:
+                logger.error("Cannot get daemon status for volume control")
+                return
+            wlan_ip = status.get('wlan_ip', 'localhost')
+            response = requests.post(
+                f"http://{wlan_ip}:8000/api/volume/set",
+                json={"volume": int(volume)},
+                timeout=5
+            )
+            if response.status_code == 200:
+                logger.info(f"Speaker volume set to {volume}%")
+            else:
+                logger.error(f"Failed to set volume: {response.status_code} {response.text}")
+        except Exception as e:
+            logger.error(f"Error setting speaker volume: {e}")
+    def get_microphone_volume(self) -> float:
+        """Get microphone volume (0-100) using daemon HTTP API."""
+        if not self.is_available:
+            return getattr(self, '_microphone_volume', 50.0)
+        try:
+            # Get WLAN IP from cached daemon status
+            status = self._get_cached_status()
+            if status is None:
+                return getattr(self, '_microphone_volume', 50.0)
+            wlan_ip = status.get('wlan_ip', 'localhost')
+            # Call the daemon API to get microphone volume
+            response = requests.get(
+                f"http://{wlan_ip}:8000/api/volume/microphone/current",
+                timeout=2
+            )
+            if response.status_code == 200:
+                data = response.json()
+                self._microphone_volume = float(data.get('volume', 50))
+                return self._microphone_volume
+        except Exception as e:
+            logger.debug(f"Could not get microphone volume from API: {e}")
+        return getattr(self, '_microphone_volume', 50.0)
+    def set_microphone_volume(self, volume: float) -> None:
+        """
+        Set microphone volume (0-100) using daemon HTTP API.
+        Args:
+            volume: Volume level 0-100
+        """
+        volume = max(0.0, min(100.0, volume))
+        self._microphone_volume = volume
+        if not self.is_available:
+            logger.warning("Cannot set microphone volume: robot not available")
+            return
+        try:
+            # Get WLAN IP from cached daemon status
+            status = self._get_cached_status()
+            if status is None:
+                logger.error("Cannot get daemon status for microphone volume control")
+                return
+            wlan_ip = status.get('wlan_ip', 'localhost')
+            # Call the daemon API to set microphone volume
+            response = requests.post(
+                f"http://{wlan_ip}:8000/api/volume/microphone/set",
+                json={"volume": int(volume)},
+                timeout=5
+            )
+            if response.status_code == 200:
+                logger.info(f"Microphone volume set to {volume}%")
+            else:
+                logger.error(f"Failed to set microphone volume: {response.status_code} {response.text}")
+        except Exception as e:
+            logger.error(f"Error setting microphone volume: {e}")
+    # ========== Phase 2: Motor Control ==========
+    def get_motors_enabled(self) -> bool:
+        """Check if motors are enabled with caching."""
+        status = self._get_cached_status()
+        if status is None:
+            return False
+        try:
+            backend_status = status.get('backend_status')
+            if backend_status and isinstance(backend_status, dict):
+                motor_mode = backend_status.get('motor_control_mode', 'disabled')
+                return motor_mode == 'enabled'
+            return status.get('state') == 'running'
+        except Exception as e:
+            logger.error(f"Error getting motor state: {e}")
+            return False
+    def set_motors_enabled(self, enabled: bool) -> None:
+        """
+        Enable or disable motors.
+        Args:
+            enabled: True to enable, False to disable
+        """
+        if not self.is_available:
+            logger.warning("Cannot control motors: robot not available")
+            return
+        try:
+            if enabled:
+                self.reachy.enable_motors()
+                logger.info("Motors enabled")
+            else:
+                self.reachy.disable_motors()
+                logger.info("Motors disabled")
+        except Exception as e:
+            logger.error(f"Error setting motor state: {e}")
+    def get_motor_mode(self) -> str:
+        """Get current motor control mode with caching."""
+        status = self._get_cached_status()
+        if status is None:
+            return "disabled"
+        try:
+            backend_status = status.get('backend_status')
+            if backend_status and isinstance(backend_status, dict):
+                motor_mode = backend_status.get('motor_control_mode', 'disabled')
+                return motor_mode
+            if status.get('state') == 'running':
+                return "enabled"
+            return "disabled"
+        except Exception as e:
+            logger.error(f"Error getting motor mode: {e}")
+            return "error"
+    def set_motor_mode(self, mode: str) -> None:
+        """
+        Set motor control mode.
+        Args:
+            mode: One of "enabled", "disabled", "gravity_compensation"
+        """
+        if not self.is_available:
+            logger.warning("Cannot set motor mode: robot not available")
+            return
+        try:
+            if mode == "enabled":
+                self.reachy.enable_motors()
+            elif mode == "disabled":
+                self.reachy.disable_motors()
+            elif mode == "gravity_compensation":
+                self.reachy.enable_gravity_compensation()
+            else:
+                logger.warning(f"Invalid motor mode: {mode}")
+                return
+            logger.info(f"Motor mode set to {mode}")
+        except Exception as e:
+            logger.error(f"Error setting motor mode: {e}")
+    def wake_up(self) -> None:
+        """Execute wake up animation."""
+        if not self.is_available:
+            logger.warning("Cannot wake up: robot not available")
+            return
+        try:
+            self.reachy.wake_up()
+            logger.info("Wake up animation executed")
+        except Exception as e:
+            logger.error(f"Error executing wake up: {e}")
+    def go_to_sleep(self) -> None:
+        """Execute sleep animation."""
+        if not self.is_available:
+            logger.warning("Cannot sleep: robot not available")
+            return
+        try:
+            self.reachy.goto_sleep()
+            logger.info("Sleep animation executed")
+        except Exception as e:
+            logger.error(f"Error executing sleep: {e}")
+    # ========== Phase 3: Pose Control ==========
+    def _get_head_pose(self) -> Optional[np.ndarray]:
+        """Get current head pose from SDK.
+        Note: SDK's get_current_head_pose() is non-blocking - it returns
+        cached data from Zenoh subscriptions, so no throttling needed.
+        """
+        if not self.is_available:
+            return None
+        try:
+            return self.reachy.get_current_head_pose()
+        except Exception as e:
+            logger.error(f"Error getting head pose: {e}")
+            return None
+    def _get_joint_positions(self) -> Optional[tuple]:
+        """Get current joint positions from SDK.
+        Note: SDK's get_current_joint_positions() is non-blocking - it returns
+        cached data from Zenoh subscriptions, so no throttling needed.
+        """
+        if not self.is_available:
+            return None
+        try:
+            return self.reachy.get_current_joint_positions()
+        except Exception as e:
+            logger.error(f"Error getting joint positions: {e}")
+            return None
+    def _extract_pose_from_matrix(self, pose_matrix: np.ndarray) -> tuple:
+        """
+        Extract position (x, y, z) and rotation (roll, pitch, yaw) from 4x4 pose matrix.
+        Args:
+            pose_matrix: 4x4 homogeneous transformation matrix
+        Returns:
+            tuple: (x, y, z, roll, pitch, yaw) where position is in meters and angles in radians
+        """
+        # Extract position from the last column
+        x = pose_matrix[0, 3]
+        y = pose_matrix[1, 3]
+        z = pose_matrix[2, 3]
+        # Extract rotation matrix and convert to euler angles
+        rotation_matrix = pose_matrix[:3, :3]
+        rotation = R.from_matrix(rotation_matrix)
+        # Use 'xyz' convention for roll, pitch, yaw
+        roll, pitch, yaw = rotation.as_euler('xyz')
+        return x, y, z, roll, pitch, yaw
+    def _get_head_pose_component(self, component: str) -> float:
+        """Get a specific component from head pose.
+        Args:
+            component: One of 'x', 'y', 'z' (mm), 'roll', 'pitch', 'yaw' (degrees)
+        Returns:
+            The component value, or 0.0 on error
+        """
+        pose = self._get_head_pose()
+        if pose is None:
+            return 0.0
+        try:
+            x, y, z, roll, pitch, yaw = self._extract_pose_from_matrix(pose)
+            components = {
+                'x': x * 1000,  # m to mm
+                'y': y * 1000,
+                'z': z * 1000,
+                'roll': math.degrees(roll),
+                'pitch': math.degrees(pitch),
+                'yaw': math.degrees(yaw),
+            }
+            return components.get(component, 0.0)
+        except Exception as e:
+            logger.error(f"Error getting head {component}: {e}")
+            return 0.0
+    def _disabled_pose_setter(self, name: str) -> None:
+        """Log warning when MovementManager is not available."""
+        logger.warning(f"set_{name} failed - MovementManager not set")
+    def _set_pose_via_manager(self, **kwargs) -> bool:
+        """Set pose via MovementManager if available.
+        Returns True if successful, False if MovementManager not available.
+        """
+        if self._movement_manager is None:
+            return False
+        self._movement_manager.set_target_pose(**kwargs)
+        return True
+    # Head position getters and setters
+    def get_head_x(self) -> float:
+        """Get head X position in mm."""
+        return self._get_head_pose_component('x')
+    def set_head_x(self, x_mm: float) -> None:
+        """Set head X position in mm via MovementManager."""
+        if not self._set_pose_via_manager(x=x_mm / 1000.0):  # mm to m
+            self._disabled_pose_setter('head_x')
+    def get_head_y(self) -> float:
+        """Get head Y position in mm."""
+        return self._get_head_pose_component('y')
+    def set_head_y(self, y_mm: float) -> None:
+        """Set head Y position in mm via MovementManager."""
+        if not self._set_pose_via_manager(y=y_mm / 1000.0):  # mm to m
+            self._disabled_pose_setter('head_y')
+    def get_head_z(self) -> float:
+        """Get head Z position in mm."""
+        return self._get_head_pose_component('z')
+    def set_head_z(self, z_mm: float) -> None:
+        """Set head Z position in mm via MovementManager."""
+        if not self._set_pose_via_manager(z=z_mm / 1000.0):  # mm to m
+            self._disabled_pose_setter('head_z')
+    # Head orientation getters and setters
+    def get_head_roll(self) -> float:
+        """Get head roll angle in degrees."""
+        return self._get_head_pose_component('roll')
+    def set_head_roll(self, roll_deg: float) -> None:
+        """Set head roll angle in degrees via MovementManager."""
+        if not self._set_pose_via_manager(roll=math.radians(roll_deg)):
+            self._disabled_pose_setter('head_roll')
+    def get_head_pitch(self) -> float:
+        """Get head pitch angle in degrees."""
+        return self._get_head_pose_component('pitch')
+    def set_head_pitch(self, pitch_deg: float) -> None:
+        """Set head pitch angle in degrees via MovementManager."""
+        if not self._set_pose_via_manager(pitch=math.radians(pitch_deg)):
+            self._disabled_pose_setter('head_pitch')
+    def get_head_yaw(self) -> float:
+        """Get head yaw angle in degrees."""
+        return self._get_head_pose_component('yaw')
+    def set_head_yaw(self, yaw_deg: float) -> None:
+        """Set head yaw angle in degrees via MovementManager."""
+        if not self._set_pose_via_manager(yaw=math.radians(yaw_deg)):
+            self._disabled_pose_setter('head_yaw')
+    def get_body_yaw(self) -> float:
+        """Get body yaw angle in degrees."""
+        joints = self._get_joint_positions()
+        if joints is None:
+            return 0.0
+        try:
+            head_joints, _ = joints
+            return math.degrees(head_joints[0])
+        except Exception as e:
+            logger.error(f"Error getting body yaw: {e}")
+            return 0.0
+    def set_body_yaw(self, yaw_deg: float) -> None:
+        """Set body yaw angle in degrees via MovementManager."""
+        if not self._set_pose_via_manager(body_yaw=math.radians(yaw_deg)):
+            self._disabled_pose_setter('body_yaw')
+    def get_antenna_left(self) -> float:
+        """Get left antenna angle in degrees."""
+        joints = self._get_joint_positions()
+        if joints is None:
+            return 0.0
+        try:
+            _, antennas = joints
+            return math.degrees(antennas[1])  # left is index 1
+        except Exception as e:
+            logger.error(f"Error getting left antenna: {e}")
+            return 0.0
+    def set_antenna_left(self, angle_deg: float) -> None:
+        """Set left antenna angle in degrees via MovementManager."""
+        if not self._set_pose_via_manager(antenna_left=math.radians(angle_deg)):
+            self._disabled_pose_setter('antenna_left')
+    def get_antenna_right(self) -> float:
+        """Get right antenna angle in degrees."""
+        joints = self._get_joint_positions()
+        if joints is None:
+            return 0.0
+        try:
+            _, antennas = joints
+            return math.degrees(antennas[0])  # right is index 0
+        except Exception as e:
+            logger.error(f"Error getting right antenna: {e}")
+            return 0.0
+    def set_antenna_right(self, angle_deg: float) -> None:
+        """Set right antenna angle in degrees via MovementManager."""
+        if not self._set_pose_via_manager(antenna_right=math.radians(angle_deg)):
+            self._disabled_pose_setter('antenna_right')
+    # ========== Phase 4: Look At Control ==========
+    def get_look_at_x(self) -> float:
+        """Get look at target X coordinate in world frame (meters)."""
+        # This is a target position, not a current state
+        # We'll store it internally
+        return getattr(self, '_look_at_x', 0.0)
+    def set_look_at_x(self, x: float) -> None:
+        """Set look at target X coordinate."""
+        self._look_at_x = x
+        self._update_look_at()
+    def get_look_at_y(self) -> float:
+        """Get look at target Y coordinate in world frame (meters)."""
+        return getattr(self, '_look_at_y', 0.0)
+    def set_look_at_y(self, y: float) -> None:
+        """Set look at target Y coordinate."""
+        self._look_at_y = y
+        self._update_look_at()
+    def get_look_at_z(self) -> float:
+        """Get look at target Z coordinate in world frame (meters)."""
+        return getattr(self, '_look_at_z', 0.0)
+    def set_look_at_z(self, z: float) -> None:
+        """Set look at target Z coordinate."""
+        self._look_at_z = z
+        self._update_look_at()
+    def _update_look_at(self) -> None:
+        """Update robot to look at the target coordinates.
+        NOTE: Disabled to prevent conflict with MovementManager's control loop.
+        """
+        logger.warning("_update_look_at is disabled - MovementManager controls head pose")
+        # if not self.is_available:
+        #     return
+        # try:
+        #     x = getattr(self, '_look_at_x', 0.0)
+        #     y = getattr(self, '_look_at_y', 0.0)
+        #     z = getattr(self, '_look_at_z', 0.0)
+        #     self.reachy.look_at_world(x, y, z)
+        #     logger.info(f"Looking at world coordinates: ({x}, {y}, {z})")
+        # except Exception as e:
+        #     logger.error(f"Error updating look at: {e}")
+    # ========== Phase 6: Diagnostic Information ==========
+    def get_control_loop_frequency(self) -> float:
+        """Get control loop frequency in Hz with caching."""
+        status = self._get_cached_status()
+        if status is None:
+            return 0.0
+        try:
+            backend_status = status.get('backend_status')
+            if backend_status and isinstance(backend_status, dict):
+                control_loop_stats = backend_status.get('control_loop_stats', {})
+                return control_loop_stats.get('mean_control_loop_frequency', 0.0)
+            return 0.0
+        except Exception as e:
+            logger.error(f"Error getting control loop frequency: {e}")
+            return 0.0
+    def get_sdk_version(self) -> str:
+        """Get SDK version with caching."""
+        status = self._get_cached_status()
+        if status is None:
+            return "N/A"
+        return status.get('version') or "unknown"
+    def get_robot_name(self) -> str:
+        """Get robot name with caching."""
+        status = self._get_cached_status()
+        if status is None:
+            return "N/A"
+        return status.get('robot_name') or "unknown"
+    def get_wireless_version(self) -> bool:
+        """Check if this is a wireless version with caching."""
+        status = self._get_cached_status()
+        if status is None:
+            return False
+        return status.get('wireless_version', False)
+    def get_simulation_mode(self) -> bool:
+        """Check if simulation mode is enabled with caching."""
+        status = self._get_cached_status()
+        if status is None:
+            return False
+        return status.get('simulation_enabled', False)
+    def get_wlan_ip(self) -> str:
+        """Get WLAN IP address with caching."""
+        status = self._get_cached_status()
+        if status is None:
+            return "N/A"
+        return status.get('wlan_ip') or "N/A"
+    # ========== Phase 7: IMU Sensors (Wireless only) ==========
+    def _get_imu_value(self, sensor_type: str, index: int) -> float:
+        """Get a specific IMU sensor value.
+        Args:
+            sensor_type: 'accelerometer', 'gyroscope', or 'temperature'
+            index: Array index (0=x, 1=y, 2=z) or -1 for scalar values
+        Returns:
+            The sensor value, or 0.0 on error
+        """
+        if not self.is_available:
+            return 0.0
+        try:
+            imu_data = self.reachy.imu
+            if imu_data is None or sensor_type not in imu_data:
+                return 0.0
+            value = imu_data[sensor_type]
+            return float(value[index]) if index >= 0 else float(value)
+        except Exception as e:
+            logger.debug(f"Error getting IMU {sensor_type}: {e}")
+            return 0.0
+    def get_imu_accel_x(self) -> float:
+        """Get IMU X-axis acceleration in m/s²."""
+        return self._get_imu_value('accelerometer', 0)
+    def get_imu_accel_y(self) -> float:
+        """Get IMU Y-axis acceleration in m/s²."""
+        return self._get_imu_value('accelerometer', 1)
+    def get_imu_accel_z(self) -> float:
+        """Get IMU Z-axis acceleration in m/s²."""
+        return self._get_imu_value('accelerometer', 2)
+    def get_imu_gyro_x(self) -> float:
+        """Get IMU X-axis angular velocity in rad/s."""
+        return self._get_imu_value('gyroscope', 0)
+    def get_imu_gyro_y(self) -> float:
+        """Get IMU Y-axis angular velocity in rad/s."""
+        return self._get_imu_value('gyroscope', 1)
+    def get_imu_gyro_z(self) -> float:
+        """Get IMU Z-axis angular velocity in rad/s."""
+        return self._get_imu_value('gyroscope', 2)
+    def get_imu_temperature(self) -> float:
+        """Get IMU temperature in °C."""
+        return self._get_imu_value('temperature', -1)
+    # ========== Phase 11: LED Control (DISABLED) ==========
+    # LED control is disabled because LEDs are hidden inside the robot.
+    # See PROJECT_PLAN.md principle 8.
+    def _get_respeaker(self):
+        """Get ReSpeaker device from media manager with thread-safe access.
+        Returns a context manager that holds the lock during ReSpeaker operations.
+        Usage:
+            with self._get_respeaker() as respeaker:
+                if respeaker:
+                    respeaker.read("...")
+        """
+        if not self.is_available:
+            return _ReSpeakerContext(None, self._respeaker_lock)
+        try:
+            if not self.reachy.media or not self.reachy.media.audio:
+                return _ReSpeakerContext(None, self._respeaker_lock)
+            respeaker = self.reachy.media.audio._respeaker
+            return _ReSpeakerContext(respeaker, self._respeaker_lock)
+        except Exception:
+            return _ReSpeakerContext(None, self._respeaker_lock)
+    # ========== Phase 12: Audio Processing (via local SDK with thread-safe access) ==========
+    def get_agc_enabled(self) -> bool:
+        """Get AGC (Automatic Gain Control) enabled status."""
+        with self._get_respeaker() as respeaker:
+            if respeaker is None:
+                return getattr(self, '_agc_enabled', True)  # Default to enabled
+            try:
+                result = respeaker.read("PP_AGCONOFF")
+                if result is not None:
+                    self._agc_enabled = bool(result[1])
+                    return self._agc_enabled
+            except Exception as e:
+                logger.debug(f"Error getting AGC status: {e}")
+        return getattr(self, '_agc_enabled', True)
+    def set_agc_enabled(self, enabled: bool) -> None:
+        """Set AGC (Automatic Gain Control) enabled status."""
+        self._agc_enabled = enabled
+        with self._get_respeaker() as respeaker:
+            if respeaker is None:
+                return
+            try:
+                respeaker.write("PP_AGCONOFF", [1 if enabled else 0])
+                logger.info(f"AGC {'enabled' if enabled else 'disabled'}")
+            except Exception as e:
+                logger.error(f"Error setting AGC status: {e}")
+    def get_agc_max_gain(self) -> float:
+        """Get AGC maximum gain in dB (0-40 dB range)."""
+        with self._get_respeaker() as respeaker:
+            if respeaker is None:
+                return getattr(self, '_agc_max_gain', 30.0)  # Default to optimized value
+            try:
+                result = respeaker.read("PP_AGCMAXGAIN")
+                if result is not None:
+                    self._agc_max_gain = float(result[0])
+                    return self._agc_max_gain
+            except Exception as e:
+                logger.debug(f"Error getting AGC max gain: {e}")
+        return getattr(self, '_agc_max_gain', 30.0)
+    def set_agc_max_gain(self, gain: float) -> None:
+        """Set AGC maximum gain in dB (0-40 dB range)."""
+        gain = max(0.0, min(40.0, gain))  # XVF3800 supports up to 40dB
+        self._agc_max_gain = gain
+        with self._get_respeaker() as respeaker:
+            if respeaker is None:
+                return
+            try:
+                respeaker.write("PP_AGCMAXGAIN", [gain])
+                logger.info(f"AGC max gain set to {gain} dB")
+            except Exception as e:
+                logger.error(f"Error setting AGC max gain: {e}")
+    def get_noise_suppression(self) -> float:
+        """Get noise suppression level (0-100%).
+        PP_MIN_NS represents "minimum signal preservation ratio":
+        - PP_MIN_NS = 0.85 means "keep at least 85% of signal" = 15% suppression
+        - PP_MIN_NS = 0.15 means "keep at least 15% of signal" = 85% suppression
+        We display "noise suppression strength" to user, so:
+        - suppression_percent = (1.0 - PP_MIN_NS) * 100
+        """
+        with self._get_respeaker() as respeaker:
+            if respeaker is None:
+                return getattr(self, '_noise_suppression', 15.0)
+            try:
+                result = respeaker.read("PP_MIN_NS")
+                if result is not None:
+                    raw_value = result[0]
+                    # Convert: PP_MIN_NS=0.85 -> 15% suppression, PP_MIN_NS=0.15 -> 85% suppression
+                    self._noise_suppression = max(0.0, min(100.0, (1.0 - raw_value) * 100.0))
+                    logger.debug(f"Noise suppression: PP_MIN_NS={raw_value:.2f} -> {self._noise_suppression:.1f}%")
+                    return self._noise_suppression
+            except Exception as e:
+                logger.debug(f"Error getting noise suppression: {e}")
+        return getattr(self, '_noise_suppression', 15.0)
+    def set_noise_suppression(self, level: float) -> None:
+        """Set noise suppression level (0-100%)."""
+        level = max(0.0, min(100.0, level))
+        self._noise_suppression = level
+        with self._get_respeaker() as respeaker:
+            if respeaker is None:
+                return
+            try:
+                # Convert percentage to PP_MIN_NS value (inverted)
+                value = 1.0 - (level / 100.0)
+                respeaker.write("PP_MIN_NS", [value])
+                logger.info(f"Noise suppression set to {level}%")
+            except Exception as e:
+                logger.error(f"Error setting noise suppression: {e}")
+    def get_echo_cancellation_converged(self) -> bool:
+        """Check if echo cancellation has converged."""
+        with self._get_respeaker() as respeaker:
+            if respeaker is None:
+                return False
+            try:
+                result = respeaker.read("AEC_AECCONVERGED")
+                if result is not None:
+                    return bool(result[1])
+            except Exception as e:
+                logger.debug(f"Error getting AEC converged status: {e}")
+        return False
+    # ========== DOA (Direction of Arrival) ==========
+    def get_doa_angle(self) -> tuple[float, bool] | None:
+        """Get Direction of Arrival angle from microphone array.
+        The DOA angle indicates the direction of the sound source relative to the robot.
+        Angle is in radians: 0 = left, π/2 = front/back, π = right.
+        Returns:
+            Tuple of (angle_radians, speech_detected), or None if unavailable.
+            - angle_radians: Sound source direction in radians
+            - speech_detected: Whether speech is currently detected
+        """
+        if not self.is_available:
+            return None
+        try:
+            if self.reachy.media and self.reachy.media.audio:
+                return self.reachy.media.audio.get_DoA()
+        except Exception as e:
+            logger.debug(f"Error getting DOA: {e}")
+        return None
+    def get_doa_angle_degrees(self) -> float:
+        """Get DOA angle in degrees for Home Assistant entity.
+        Returns the raw DOA angle in degrees (0-180°).
+        SDK convention: 0° = left, 90° = front, 180° = right
+        """
+        doa = self.get_doa_angle()
+        if doa is None:
+            return 0.0
+        angle_rad, _ = doa
+        # Return raw angle in degrees (0-180°)
+        angle_deg = math.degrees(angle_rad)
+        return angle_deg
+    def get_speech_detected(self) -> bool:
+        """Get speech detection status from DOA.
+        Returns True if speech is currently detected.
+        """
+        doa = self.get_doa_angle()
+        if doa is None:
+            return False
+        _, speech_detected = doa
+        return speech_detected

reachy_mini_ha_voice/satellite.py ADDED Viewed

	@@ -0,0 +1,784 @@

+"""Voice satellite protocol for Reachy Mini."""
+import hashlib
+import logging
+import math
+import posixpath
+import shutil
+import time
+from collections.abc import Iterable
+from typing import Dict, Optional, Set, Union, TYPE_CHECKING
+from urllib.parse import urlparse, urlunparse
+from urllib.request import urlopen
+if TYPE_CHECKING:
+    from .camera_server import MJPEGCameraServer
+# pylint: disable=no-name-in-module
+from aioesphomeapi.api_pb2 import (  # type: ignore[attr-defined]
+    ButtonCommandRequest,
+    CameraImageRequest,
+    DeviceInfoRequest,
+    DeviceInfoResponse,
+    ListEntitiesDoneResponse,
+    ListEntitiesRequest,
+    MediaPlayerCommandRequest,
+    NumberCommandRequest,
+    SelectCommandRequest,
+    SubscribeHomeAssistantStatesRequest,
+    SubscribeStatesRequest,
+    SwitchCommandRequest,
+    VoiceAssistantAnnounceFinished,
+    VoiceAssistantAnnounceRequest,
+    VoiceAssistantAudio,
+    VoiceAssistantConfigurationRequest,
+    VoiceAssistantConfigurationResponse,
+    VoiceAssistantEventResponse,
+    VoiceAssistantExternalWakeWord,
+    VoiceAssistantRequest,
+    VoiceAssistantSetConfiguration,
+    VoiceAssistantTimerEventResponse,
+    VoiceAssistantWakeWord,
+)
+from aioesphomeapi.model import (
+    VoiceAssistantEventType,
+    VoiceAssistantFeature,
+    VoiceAssistantTimerEventType,
+)
+from google.protobuf import message
+from pymicro_wakeword import MicroWakeWord
+from pyopen_wakeword import OpenWakeWord
+from .api_server import APIServer
+from .entity import MediaPlayerEntity
+from .entity_registry import EntityRegistry, get_entity_key
+from .models import AvailableWakeWord, ServerState, WakeWordType
+from .util import call_all
+from .reachy_controller import ReachyController
+_LOGGER = logging.getLogger(__name__)
+class VoiceSatelliteProtocol(APIServer):
+    """Voice satellite protocol handler for ESPHome."""
+    def __init__(self, state: ServerState, camera_server: Optional["MJPEGCameraServer"] = None) -> None:
+        super().__init__(state.name)
+        self.state = state
+        self.state.satellite = self
+        self.camera_server = camera_server
+        # Initialize streaming state early (before entity setup)
+        self._is_streaming_audio = False
+        self._tts_url: Optional[str] = None
+        self._tts_played = False
+        self._continue_conversation = False
+        self._timer_finished = False
+        self._external_wake_words: Dict[str, VoiceAssistantExternalWakeWord] = {}
+        # Conversation tracking for continuous conversation
+        self._conversation_id: Optional[str] = None
+        self._conversation_timeout = 300.0  # 5 minutes, same as ESPHome default
+        self._last_conversation_time = 0.0
+        # Initialize Reachy controller
+        self.reachy_controller = ReachyController(state.reachy_mini)
+        # Connect MovementManager to ReachyController for pose control from HA
+        if state.motion is not None and state.motion.movement_manager is not None:
+            self.reachy_controller.set_movement_manager(state.motion.movement_manager)
+            # Setup speech sway callback for audio-driven head motion
+            def sway_callback(sway: dict) -> None:
+                mm = state.motion.movement_manager
+                if mm is not None:
+                    mm.set_speech_sway(
+                        sway.get("x_m", 0.0),
+                        sway.get("y_m", 0.0),
+                        sway.get("z_m", 0.0),
+                        sway.get("roll_rad", 0.0),
+                        sway.get("pitch_rad", 0.0),
+                        sway.get("yaw_rad", 0.0),
+                    )
+            state.tts_player.set_sway_callback(sway_callback)
+            _LOGGER.info("Speech sway callback configured for TTS player")
+        # Initialize entity registry
+        self._entity_registry = EntityRegistry(
+            server=self,
+            reachy_controller=self.reachy_controller,
+            camera_server=camera_server,
+            play_emotion_callback=self._play_emotion,
+        )
+        # Connect gesture state callback
+        if camera_server:
+            camera_server.set_gesture_state_callback(self._entity_registry.update_gesture_state)
+        # Only setup entities once (check if already initialized)
+        # This prevents duplicate entity registration on reconnection
+        if not getattr(self.state, '_entities_initialized', False):
+            if self.state.media_player_entity is None:
+                self.state.media_player_entity = MediaPlayerEntity(
+                    server=self,
+                    key=get_entity_key("reachy_mini_media_player"),
+                    name="Media Player",
+                    object_id="reachy_mini_media_player",
+                    music_player=state.music_player,
+                    announce_player=state.tts_player,
+                )
+                self.state.entities.append(self.state.media_player_entity)
+            # Setup all entities using the registry
+            self._entity_registry.setup_all_entities(self.state.entities)
+            # Mark entities as initialized
+            self.state._entities_initialized = True
+            _LOGGER.info("Entities initialized: %d total", len(self.state.entities))
+        else:
+            _LOGGER.debug("Entities already initialized, skipping setup")
+            # Update server reference in existing entities
+            for entity in self.state.entities:
+                entity.server = self
+    def handle_voice_event(
+        self, event_type: VoiceAssistantEventType, data: Dict[str, str]
+    ) -> None:
+        _LOGGER.debug("Voice event: type=%s, data=%s", event_type.name, data)
+        if event_type == VoiceAssistantEventType.VOICE_ASSISTANT_RUN_START:
+            self._tts_url = data.get("url")
+            self._tts_played = False
+            self._continue_conversation = False
+            # Reachy Mini: Start listening animation
+            self._reachy_on_listening()
+            # Note: TTS URL requires HA authentication, cannot pre-download
+            # Speaking animation uses JSON-defined multi-frequency sway instead
+        elif event_type in (
+            VoiceAssistantEventType.VOICE_ASSISTANT_STT_VAD_END,
+            VoiceAssistantEventType.VOICE_ASSISTANT_STT_END,
+        ):
+            self._is_streaming_audio = False
+            # Reachy Mini: Stop listening, start thinking
+            self._reachy_on_thinking()
+        elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_INTENT_PROGRESS:
+            if data.get("tts_start_streaming") == "1":
+                # Start streaming early
+                self.play_tts()
+        elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_INTENT_END:
+            if data.get("continue_conversation") == "1":
+                self._continue_conversation = True
+        elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_TTS_START:
+            # Reachy Mini: Start speaking animation (JSON-defined multi-frequency sway)
+            _LOGGER.debug("TTS_START event received, triggering speaking animation")
+            self._reachy_on_speaking()
+        elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_TTS_END:
+            self._tts_url = data.get("url")
+            self.play_tts()
+        elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_RUN_END:
+            # Pipeline run ended
+            self._is_streaming_audio = False
+            # Following reference project pattern
+            if not self._tts_played:
+                self._tts_finished()
+            self._tts_played = False
+    def handle_timer_event(
+        self,
+        event_type: VoiceAssistantTimerEventType,
+        msg: VoiceAssistantTimerEventResponse,
+    ) -> None:
+        _LOGGER.debug("Timer event: type=%s", event_type.name)
+        if event_type == VoiceAssistantTimerEventType.VOICE_ASSISTANT_TIMER_FINISHED:
+            if not self._timer_finished:
+                self.state.active_wake_words.add(self.state.stop_word.id)
+                self._timer_finished = True
+                self.duck()
+                self._play_timer_finished()
+                # Reachy Mini: Timer finished animation
+                self._reachy_on_timer_finished()
+    def handle_message(self, msg: message.Message) -> Iterable[message.Message]:
+        if isinstance(msg, VoiceAssistantEventResponse):
+            # Pipeline event
+            data: Dict[str, str] = {}
+            for arg in msg.data:
+                data[arg.name] = arg.value
+            self.handle_voice_event(VoiceAssistantEventType(msg.event_type), data)
+        elif isinstance(msg, VoiceAssistantAnnounceRequest):
+            _LOGGER.debug("Announcing: %s", msg.text)
+            assert self.state.media_player_entity is not None
+            urls = []
+            if msg.preannounce_media_id:
+                urls.append(msg.preannounce_media_id)
+            urls.append(msg.media_id)
+            self.state.active_wake_words.add(self.state.stop_word.id)
+            self._continue_conversation = msg.start_conversation
+            self.duck()
+            yield from self.state.media_player_entity.play(
+                urls, announcement=True, done_callback=self._tts_finished
+            )
+        elif isinstance(msg, VoiceAssistantTimerEventResponse):
+            self.handle_timer_event(VoiceAssistantTimerEventType(msg.event_type), msg)
+        elif isinstance(msg, DeviceInfoRequest):
+            yield DeviceInfoResponse(
+                uses_password=False,
+                name=self.state.name,
+                mac_address=self.state.mac_address,
+                voice_assistant_feature_flags=(
+                    VoiceAssistantFeature.VOICE_ASSISTANT
+                    | VoiceAssistantFeature.API_AUDIO
+                    | VoiceAssistantFeature.ANNOUNCE
+                    | VoiceAssistantFeature.START_CONVERSATION
+                    | VoiceAssistantFeature.TIMERS
+                ),
+            )
+        elif isinstance(
+            msg,
+            (
+                ListEntitiesRequest,
+                SubscribeHomeAssistantStatesRequest,
+                SubscribeStatesRequest,
+                MediaPlayerCommandRequest,
+                NumberCommandRequest,
+                SwitchCommandRequest,
+                SelectCommandRequest,
+                ButtonCommandRequest,
+                CameraImageRequest,
+            ),
+        ):
+            for entity in self.state.entities:
+                yield from entity.handle_message(msg)
+            if isinstance(msg, ListEntitiesRequest):
+                yield ListEntitiesDoneResponse()
+        elif isinstance(msg, VoiceAssistantConfigurationRequest):
+            available_wake_words = [
+                VoiceAssistantWakeWord(
+                    id=ww.id,
+                    wake_word=ww.wake_word,
+                    trained_languages=ww.trained_languages,
+                )
+                for ww in self.state.available_wake_words.values()
+            ]
+            for eww in msg.external_wake_words:
+                if eww.model_type != "micro":
+                    continue
+                available_wake_words.append(
+                    VoiceAssistantWakeWord(
+                        id=eww.id,
+                        wake_word=eww.wake_word,
+                        trained_languages=eww.trained_languages,
+                    )
+                )
+                self._external_wake_words[eww.id] = eww
+            yield VoiceAssistantConfigurationResponse(
+                available_wake_words=available_wake_words,
+                active_wake_words=[
+                    ww.id
+                    for ww in self.state.wake_words.values()
+                    if ww.id in self.state.active_wake_words
+                ],
+                max_active_wake_words=2,
+            )
+            _LOGGER.info("Connected to Home Assistant")
+        elif isinstance(msg, VoiceAssistantSetConfiguration):
+            # Change active wake words
+            active_wake_words: Set[str] = set()
+            for wake_word_id in msg.active_wake_words:
+                if wake_word_id in self.state.wake_words:
+                    # Already loaded, just add to active set
+                    active_wake_words.add(wake_word_id)
+                    continue
+                model_info = self.state.available_wake_words.get(wake_word_id)
+                if not model_info:
+                    # Check external wake words (may require download)
+                    external_wake_word = self._external_wake_words.get(wake_word_id)
+                    if not external_wake_word:
+                        _LOGGER.warning("Wake word not found: %s", wake_word_id)
+                        continue
+                    model_info = self._download_external_wake_word(external_wake_word)
+                    if not model_info:
+                        continue
+                    self.state.available_wake_words[wake_word_id] = model_info
+                _LOGGER.debug("Loading wake word: %s", model_info.wake_word_path)
+                loaded_model = model_info.load()
+                # Set id attribute on the model for later identification
+                setattr(loaded_model, 'id', wake_word_id)
+                self.state.wake_words[wake_word_id] = loaded_model
+                _LOGGER.info("Wake word loaded: %s", wake_word_id)
+                active_wake_words.add(wake_word_id)
+                # Don't break - load ALL requested wake words, not just the first one
+            self.state.active_wake_words = active_wake_words
+            _LOGGER.debug("Active wake words: %s", active_wake_words)
+            self.state.preferences.active_wake_words = list(active_wake_words)
+            self.state.save_preferences()
+            self.state.wake_words_changed = True
+    def handle_audio(self, audio_chunk: bytes) -> None:
+        if not self._is_streaming_audio:
+            return
+        self.send_messages([VoiceAssistantAudio(data=audio_chunk)])
+    def _get_or_create_conversation_id(self) -> str:
+        """Get existing conversation_id or create a new one.
+        Reuses conversation_id if within timeout period, otherwise creates new one.
+        """
+        now = time.time()
+        if (self._conversation_id is None or
+                now - self._last_conversation_time > self._conversation_timeout):
+            # Create new conversation_id
+            import uuid
+            self._conversation_id = str(uuid.uuid4())
+            _LOGGER.debug("Created new conversation_id: %s", self._conversation_id)
+        self._last_conversation_time = now
+        return self._conversation_id
+    def _clear_conversation(self) -> None:
+        """Clear conversation state when exiting conversation mode."""
+        self._conversation_id = None
+        self._continue_conversation = False
+    def wakeup(self, wake_word: Union[MicroWakeWord, OpenWakeWord]) -> None:
+        """Handle wake word detection - start voice pipeline."""
+        if self._timer_finished:
+            # Stop timer instead
+            self._timer_finished = False
+            self.state.tts_player.stop()
+            _LOGGER.debug("Stopping timer finished sound")
+            return
+        wake_word_phrase = wake_word.wake_word
+        _LOGGER.debug("Detected wake word: %s", wake_word_phrase)
+        # Turn toward sound source using DOA (Direction of Arrival)
+        self._turn_to_sound_source()
+        # Get or create conversation_id for context tracking
+        conv_id = self._get_or_create_conversation_id()
+        self.send_messages(
+            [VoiceAssistantRequest(
+                start=True,
+                wake_word_phrase=wake_word_phrase,
+                conversation_id=conv_id,
+            )]
+        )
+        self.duck()
+        self._is_streaming_audio = True
+        self.state.tts_player.play(self.state.wakeup_sound)
+    def stop(self) -> None:
+        """Stop current TTS playback (e.g., user said stop word)."""
+        self.state.active_wake_words.discard(self.state.stop_word.id)
+        self.state.tts_player.stop()
+        if self._timer_finished:
+            self._timer_finished = False
+            _LOGGER.debug("Stopping timer finished sound")
+        else:
+            _LOGGER.debug("TTS response stopped manually")
+            self._tts_finished()
+    def play_tts(self) -> None:
+        if (not self._tts_url) or self._tts_played:
+            return
+        self._tts_played = True
+        _LOGGER.debug("Playing TTS response: %s", self._tts_url)
+        self.state.active_wake_words.add(self.state.stop_word.id)
+        self.state.tts_player.play(self._tts_url, done_callback=self._tts_finished)
+    def duck(self) -> None:
+        _LOGGER.debug("Ducking music")
+        self.state.music_player.duck()
+        # Pause Sendspin to prevent audio conflicts during voice interaction
+        self.state.music_player.pause_sendspin()
+    def unduck(self) -> None:
+        _LOGGER.debug("Unducking music")
+        self.state.music_player.unduck()
+        # Resume Sendspin audio
+        self.state.music_player.resume_sendspin()
+    def _tts_finished(self) -> None:
+        """Called when TTS audio playback finishes.
+        Following reference project pattern: handle continue conversation here.
+        """
+        self.state.active_wake_words.discard(self.state.stop_word.id)
+        self.send_messages([VoiceAssistantAnnounceFinished()])
+        # Check if should continue conversation
+        # 1. Our switch is ON: Always continue (unconditional)
+        # 2. Our switch is OFF: Follow HA's continue_conversation request
+        continuous_mode = self.state.preferences.continuous_conversation
+        should_continue = continuous_mode or self._continue_conversation
+        if should_continue:
+            _LOGGER.debug("Continuing conversation (our_switch=%s, ha_request=%s)",
+                         continuous_mode, self._continue_conversation)
+            # Play prompt sound to indicate ready for next input
+            self.state.tts_player.play(self.state.wakeup_sound)
+            # Use same conversation_id for context continuity
+            conv_id = self._get_or_create_conversation_id()
+            self.send_messages([VoiceAssistantRequest(
+                start=True,
+                conversation_id=conv_id,
+            )])
+            self._is_streaming_audio = True
+            # Stay in listening mode
+            self._reachy_on_listening()
+        else:
+            self._clear_conversation()
+            self.unduck()
+            _LOGGER.debug("Conversation finished")
+            # Reachy Mini: Return to idle
+            self._reachy_on_idle()
+    def _play_timer_finished(self) -> None:
+        if not self._timer_finished:
+            self.unduck()
+            return
+        self.state.tts_player.play(
+            self.state.timer_finished_sound,
+            done_callback=lambda: call_all(
+                lambda: time.sleep(1.0), self._play_timer_finished
+            ),
+        )
+    def connection_lost(self, exc):
+        super().connection_lost(exc)
+        _LOGGER.info("Disconnected from Home Assistant")
+        # Clear streaming state on disconnect
+        self._is_streaming_audio = False
+        self._tts_url = None
+        self._tts_played = False
+        self._continue_conversation = False
+    def _download_external_wake_word(
+        self, external_wake_word: VoiceAssistantExternalWakeWord
+    ) -> Optional[AvailableWakeWord]:
+        eww_dir = self.state.download_dir / "external_wake_words"
+        eww_dir.mkdir(parents=True, exist_ok=True)
+        config_path = eww_dir / f"{external_wake_word.id}.json"
+        should_download_config = not config_path.exists()
+        # Check if we need to download the model file
+        model_path = eww_dir / f"{external_wake_word.id}.tflite"
+        should_download_model = True
+        if model_path.exists():
+            model_size = model_path.stat().st_size
+            if model_size == external_wake_word.model_size:
+                with open(model_path, "rb") as model_file:
+                    model_hash = hashlib.sha256(model_file.read()).hexdigest()
+                if model_hash == external_wake_word.model_hash:
+                    should_download_model = False
+                    _LOGGER.debug(
+                        "Model size and hash match for %s. Skipping download.",
+                        external_wake_word.id,
+                    )
+        if should_download_config or should_download_model:
+            # Download config
+            _LOGGER.debug("Downloading %s to %s", external_wake_word.url, config_path)
+            with urlopen(external_wake_word.url) as request:
+                if request.status != 200:
+                    _LOGGER.warning(
+                        "Failed to download: %s, status=%s",
+                        external_wake_word.url,
+                        request.status,
+                    )
+                    return None
+                with open(config_path, "wb") as model_file:
+                    shutil.copyfileobj(request, model_file)
+        if should_download_model:
+            # Download model file
+            parsed_url = urlparse(external_wake_word.url)
+            parsed_url = parsed_url._replace(
+                path=posixpath.join(posixpath.dirname(parsed_url.path), model_path.name)
+            )
+            model_url = urlunparse(parsed_url)
+            _LOGGER.debug("Downloading %s to %s", model_url, model_path)
+            with urlopen(model_url) as request:
+                if request.status != 200:
+                    _LOGGER.warning(
+                        "Failed to download: %s, status=%s", model_url, request.status
+                    )
+                    return None
+                with open(model_path, "wb") as model_file:
+                    shutil.copyfileobj(request, model_file)
+        return AvailableWakeWord(
+            id=external_wake_word.id,
+            type=WakeWordType.MICRO_WAKE_WORD,
+            wake_word=external_wake_word.wake_word,
+            trained_languages=external_wake_word.trained_languages,
+            wake_word_path=config_path,
+        )
+    # -------------------------------------------------------------------------
+    # Reachy Mini Motion Control
+    # -------------------------------------------------------------------------
+    def _turn_to_sound_source(self) -> None:
+        """Turn robot head toward sound source using DOA at wakeup.
+        This is called once at wakeup to orient the robot toward the speaker.
+        Face tracking will take over after the initial turn.
+        DOA angle convention (from SDK):
+        - 0 radians = left (Y+ direction in head frame)
+        - π/2 radians = front (X+ direction in head frame)
+        - π radians = right (Y- direction in head frame)
+        The SDK uses: p_head = [sin(doa), cos(doa), 0]
+        So we need to convert this to yaw angle.
+        Note: We don't check speech_detected because by the time wake word
+        detection completes, the user may have stopped speaking.
+        """
+        if not self.state.motion_enabled or not self.state.reachy_mini:
+            _LOGGER.info("DOA turn-to-sound: motion disabled or no robot")
+            return
+        try:
+            # Get DOA from reachy_controller (only read once)
+            doa = self.reachy_controller.get_doa_angle()
+            if doa is None:
+                _LOGGER.info("DOA not available, skipping turn-to-sound")
+                return
+            angle_rad, speech_detected = doa
+            _LOGGER.debug("DOA raw: angle=%.3f rad (%.1f°), speech=%s",
+                         angle_rad, math.degrees(angle_rad), speech_detected)
+            # Convert DOA to direction vector in head frame
+            # SDK convention: p_head = [sin(doa), cos(doa), 0]
+            # where X+ is front, Y+ is left
+            dir_x = math.sin(angle_rad)  # Front component
+            dir_y = math.cos(angle_rad)  # Left component
+            # Calculate yaw angle from direction vector
+            # DOA convention: 0 = left, π/2 = front, π = right
+            # Robot yaw: positive = turn left, negative = turn right
+            # yaw = doa - π/2 maps: left(0) → -90°, front(π/2) → 0°, right(π) → +90°
+            yaw_rad = angle_rad - math.pi / 2
+            yaw_deg = math.degrees(yaw_rad)
+            _LOGGER.debug("DOA direction: x=%.2f, y=%.2f, yaw=%.1f°",
+                         dir_x, dir_y, yaw_deg)
+            # Only turn if angle is significant (> 10°) to avoid noise
+            DOA_THRESHOLD_DEG = 10.0
+            if abs(yaw_deg) < DOA_THRESHOLD_DEG:
+                _LOGGER.debug("DOA angle %.1f° below threshold (%.1f°), skipping turn",
+                             yaw_deg, DOA_THRESHOLD_DEG)
+                return
+            # Apply 80% of DOA angle as conservative strategy
+            # This accounts for potential DOA inaccuracy
+            DOA_SCALE = 0.8
+            target_yaw_deg = yaw_deg * DOA_SCALE
+            _LOGGER.info("Turning toward sound source: DOA=%.1f°, target=%.1f°",
+                         yaw_deg, target_yaw_deg)
+            # Use MovementManager to turn (non-blocking)
+            if self.state.motion and self.state.motion.movement_manager:
+                self.state.motion.movement_manager.turn_to_angle(
+                    target_yaw_deg,
+                    duration=0.5  # Quick turn
+                )
+        except Exception as e:
+            _LOGGER.error("Error in turn-to-sound: %s", e)
+    def _reachy_on_listening(self) -> None:
+        """Called when listening for speech (HA state: Listening)."""
+        # Enable high-frequency face tracking during listening
+        self._set_conversation_mode(True)
+        # Resume face tracking (may have been paused during speaking)
+        if self.camera_server is not None:
+            try:
+                self.camera_server.set_face_tracking_enabled(True)
+            except Exception as e:
+                _LOGGER.debug("Failed to resume face tracking: %s", e)
+        if not self.state.motion_enabled or not self.state.reachy_mini:
+            return
+        try:
+            _LOGGER.debug("Reachy Mini: Listening animation")
+            if self.state.motion:
+                self.state.motion.on_listening()
+        except Exception as e:
+            _LOGGER.error("Reachy Mini motion error: %s", e)
+    def _reachy_on_thinking(self) -> None:
+        """Called when processing speech (HA state: Processing)."""
+        # Resume face tracking (may have been paused during speaking)
+        if self.camera_server is not None:
+            try:
+                self.camera_server.set_face_tracking_enabled(True)
+            except Exception as e:
+                _LOGGER.debug("Failed to resume face tracking: %s", e)
+        if not self.state.motion_enabled or not self.state.reachy_mini:
+            return
+        try:
+            _LOGGER.debug("Reachy Mini: Thinking animation")
+            if self.state.motion:
+                self.state.motion.on_thinking()
+        except Exception as e:
+            _LOGGER.error("Reachy Mini motion error: %s", e)
+    def _reachy_on_speaking(self) -> None:
+        """Called when TTS is playing (HA state: Responding)."""
+        # Pause face tracking during speaking - robot will use speaking animation instead
+        if self.camera_server is not None:
+            try:
+                self.camera_server.set_face_tracking_enabled(False)
+                _LOGGER.debug("Face tracking paused during speaking")
+            except Exception as e:
+                _LOGGER.debug("Failed to pause face tracking: %s", e)
+        if not self.state.motion_enabled:
+            _LOGGER.warning("Motion disabled, skipping speaking animation")
+            return
+        if not self.state.reachy_mini:
+            _LOGGER.warning("No reachy_mini instance, skipping speaking animation")
+            return
+        if not self.state.motion:
+            _LOGGER.warning("No motion controller, skipping speaking animation")
+            return
+        try:
+            _LOGGER.debug("Reachy Mini: Starting speaking animation")
+            self.state.motion.on_speaking_start()
+        except Exception as e:
+            _LOGGER.error("Reachy Mini motion error: %s", e)
+    def _reachy_on_idle(self) -> None:
+        """Called when returning to idle state (HA state: Idle)."""
+        # Disable high-frequency face tracking, switch to adaptive mode
+        self._set_conversation_mode(False)
+        # Resume face tracking (may have been paused during speaking)
+        if self.camera_server is not None:
+            try:
+                self.camera_server.set_face_tracking_enabled(True)
+            except Exception as e:
+                _LOGGER.debug("Failed to resume face tracking: %s", e)
+        if not self.state.motion_enabled or not self.state.reachy_mini:
+            return
+        try:
+            _LOGGER.debug("Reachy Mini: Idle animation")
+            if self.state.motion:
+                self.state.motion.on_idle()
+        except Exception as e:
+            _LOGGER.error("Reachy Mini motion error: %s", e)
+    def _set_conversation_mode(self, in_conversation: bool) -> None:
+        """Set conversation mode for adaptive face tracking.
+        When in conversation, face tracking runs at high frequency.
+        When idle, face tracking uses adaptive rate to save CPU.
+        """
+        if self.camera_server is not None:
+            try:
+                self.camera_server.set_conversation_mode(in_conversation)
+            except Exception as e:
+                _LOGGER.debug("Failed to set conversation mode: %s", e)
+    def _reachy_on_timer_finished(self) -> None:
+        """Called when a timer finishes."""
+        if not self.state.motion_enabled or not self.state.reachy_mini:
+            return
+        try:
+            _LOGGER.debug("Reachy Mini: Timer finished animation")
+            if self.state.motion:
+                self.state.motion.on_timer_finished()
+        except Exception as e:
+            _LOGGER.error("Reachy Mini motion error: %s", e)
+    def _play_emotion(self, emotion_name: str) -> None:
+        """Play an emotion/expression from the emotions library.
+        Args:
+            emotion_name: Name of the emotion (e.g., "happy1", "sad1", etc.)
+        """
+        try:
+            import requests
+            # Get WLAN IP from daemon status
+            wlan_ip = "localhost"
+            if self.state.reachy_mini is not None:
+                try:
+                    status = self.state.reachy_mini.client.get_status(wait=False)
+                    wlan_ip = status.get('wlan_ip', 'localhost')
+                except Exception:
+                    wlan_ip = "localhost"
+            # Call the emotion playback API
+            # Dataset: pollen-robotics/reachy-mini-emotions-library
+            base_url = f"http://{wlan_ip}:8000/api/move/play/recorded-move-dataset"
+            dataset = "pollen-robotics/reachy-mini-emotions-library"
+            url = f"{base_url}/{dataset}/{emotion_name}"
+            response = requests.post(url, timeout=5)
+            if response.status_code == 200:
+                result = response.json()
+                move_uuid = result.get('uuid')
+                _LOGGER.info(f"Playing emotion: {emotion_name} (uuid={move_uuid})")
+            else:
+                _LOGGER.warning(f"Failed to play emotion {emotion_name}: HTTP {response.status_code}")
+        except Exception as e:
+            _LOGGER.error(f"Error playing emotion {emotion_name}: {e}")

{reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/.gitkeep RENAMED Viewed

File without changes

{reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/LICENSE.md RENAMED Viewed

File without changes

{reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/README.md RENAMED Viewed

File without changes

{reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/timer_finished.flac RENAMED Viewed

File without changes

{reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/wake_word_triggered.flac RENAMED Viewed

File without changes

{reachy_mini_home_assistant/motion → reachy_mini_ha_voice}/speech_sway.py RENAMED Viewed

@@ -6,9 +6,8 @@ Analyzes audio loudness to drive natural head movements during TTS playback.
 import math
 from collections import deque
-from collections.abc import Callable
 from itertools import islice
-from typing import Any
 import numpy as np
 from numpy.typing import NDArray
@@ -65,7 +64,7 @@ def _loudness_gain(db: float, offset: float = SENS_DB_OFFSET) -> float:
     """Normalize dB into [0,1] with gamma; clipped to [0,1]."""
     t = (db + offset - SWAY_DB_LOW) / (SWAY_DB_HIGH - SWAY_DB_LOW)
     t = max(0.0, min(1.0, t))
-    return t**LOUDNESS_GAMMA if LOUDNESS_GAMMA != 1.0 else t
 def _to_float32_mono(x: NDArray[Any]) -> NDArray[np.float32]:
@@ -94,7 +93,7 @@ def _resample_linear(x: NDArray[np.float32], sr_in: int, sr_out: int) -> NDArray
     """Lightweight linear resampler for short buffers."""
     if sr_in == sr_out or x.size == 0:
         return x
-    n_out = round(x.size * sr_out / sr_in)
     if n_out <= 1:
         return np.zeros(0, dtype=np.float32)
     t_in = np.linspace(0.0, 1.0, num=x.size, dtype=np.float32, endpoint=True)
@@ -144,7 +143,7 @@ class SpeechSwayRT:
         self.sway_down = 0
         self.t = 0.0
-    def feed(self, pcm: NDArray[Any], sr: int | None = None) -> list[dict[str, float]]:
         """Stream in PCM chunk. Returns list of sway dicts, one per hop.
         Args:
@@ -168,7 +167,7 @@ class SpeechSwayRT:
         else:
             self.carry = x
-        out: list[dict[str, float]] = []
         while self.carry.size >= HOP:
             hop = self.carry[:HOP]
@@ -216,35 +215,27 @@ class SpeechSwayRT:
             self.t += HOP_MS / 1000.0
             # Oscillators
-            pitch = (
-                math.radians(SWAY_A_PITCH_DEG)
-                * loud
-                * env
-                * math.sin(2 * math.pi * SWAY_F_PITCH * self.t + self.phase_pitch)
-            )
-            yaw = (
-                math.radians(SWAY_A_YAW_DEG) * loud * env * math.sin(2 * math.pi * SWAY_F_YAW * self.t + self.phase_yaw)
-            )
-            roll = (
-                math.radians(SWAY_A_ROLL_DEG)
-                * loud
-                * env
-                * math.sin(2 * math.pi * SWAY_F_ROLL * self.t + self.phase_roll)
-            )
-            x_m = (SWAY_A_X_MM / 1000.0) * loud * env * math.sin(2 * math.pi * SWAY_F_X * self.t + self.phase_x)
-            y_m = (SWAY_A_Y_MM / 1000.0) * loud * env * math.sin(2 * math.pi * SWAY_F_Y * self.t + self.phase_y)
-            z_m = (SWAY_A_Z_MM / 1000.0) * loud * env * math.sin(2 * math.pi * SWAY_F_Z * self.t + self.phase_z)
-            out.append(
-                {
-                    "pitch_rad": pitch,
-                    "yaw_rad": yaw,
-                    "roll_rad": roll,
-                    "x_m": x_m,
-                    "y_m": y_m,
-                    "z_m": z_m,
-                }
-            )
         return out
@@ -252,7 +243,7 @@ class SpeechSwayRT:
 def analyze_audio_for_sway(
     audio_data: NDArray[Any],
     sample_rate: int,
-    callback: Callable[[dict[str, float]], None],
 ) -> None:
     """Analyze entire audio and call callback for each sway frame.

 import math
 from collections import deque
 from itertools import islice
+from typing import Any, Callable, Dict, List, Optional
 import numpy as np
 from numpy.typing import NDArray
     """Normalize dB into [0,1] with gamma; clipped to [0,1]."""
     t = (db + offset - SWAY_DB_LOW) / (SWAY_DB_HIGH - SWAY_DB_LOW)
     t = max(0.0, min(1.0, t))
+    return t ** LOUDNESS_GAMMA if LOUDNESS_GAMMA != 1.0 else t
 def _to_float32_mono(x: NDArray[Any]) -> NDArray[np.float32]:
     """Lightweight linear resampler for short buffers."""
     if sr_in == sr_out or x.size == 0:
         return x
+    n_out = int(round(x.size * sr_out / sr_in))
     if n_out <= 1:
         return np.zeros(0, dtype=np.float32)
     t_in = np.linspace(0.0, 1.0, num=x.size, dtype=np.float32, endpoint=True)
         self.sway_down = 0
         self.t = 0.0
+    def feed(self, pcm: NDArray[Any], sr: Optional[int] = None) -> List[Dict[str, float]]:
         """Stream in PCM chunk. Returns list of sway dicts, one per hop.
         Args:
         else:
             self.carry = x
+        out: List[Dict[str, float]] = []
         while self.carry.size >= HOP:
             hop = self.carry[:HOP]
             self.t += HOP_MS / 1000.0
             # Oscillators
+            pitch = (math.radians(SWAY_A_PITCH_DEG) * loud * env *
+                     math.sin(2 * math.pi * SWAY_F_PITCH * self.t + self.phase_pitch))
+            yaw = (math.radians(SWAY_A_YAW_DEG) * loud * env *
+                   math.sin(2 * math.pi * SWAY_F_YAW * self.t + self.phase_yaw))
+            roll = (math.radians(SWAY_A_ROLL_DEG) * loud * env *
+                    math.sin(2 * math.pi * SWAY_F_ROLL * self.t + self.phase_roll))
+            x_m = (SWAY_A_X_MM / 1000.0) * loud * env * math.sin(
+                2 * math.pi * SWAY_F_X * self.t + self.phase_x)
+            y_m = (SWAY_A_Y_MM / 1000.0) * loud * env * math.sin(
+                2 * math.pi * SWAY_F_Y * self.t + self.phase_y)
+            z_m = (SWAY_A_Z_MM / 1000.0) * loud * env * math.sin(
+                2 * math.pi * SWAY_F_Z * self.t + self.phase_z)
+            out.append({
+                "pitch_rad": pitch,
+                "yaw_rad": yaw,
+                "roll_rad": roll,
+                "x_m": x_m,
+                "y_m": y_m,
+                "z_m": z_m,
+            })
         return out
 def analyze_audio_for_sway(
     audio_data: NDArray[Any],
     sample_rate: int,
+    callback: Callable[[Dict[str, float]], None],
 ) -> None:
     """Analyze entire audio and call callback for each sway frame.

{reachy_mini_home_assistant → reachy_mini_ha_voice}/static/index.html RENAMED Viewed

File without changes

{reachy_mini_home_assistant → reachy_mini_ha_voice}/static/main.js RENAMED Viewed

File without changes

{reachy_mini_home_assistant → reachy_mini_ha_voice}/static/style.css RENAMED Viewed

File without changes

reachy_mini_ha_voice/util.py ADDED Viewed

	@@ -0,0 +1,45 @@

+"""Utility functions."""
+import hashlib
+import uuid
+from collections.abc import Callable
+from pathlib import Path
+from typing import Optional
+def call_all(*funcs: Optional[Callable[[], None]]) -> None:
+    """Call all non-None functions."""
+    for func in funcs:
+        if func is not None:
+            func()
+def get_mac() -> str:
+    """Return a stable MAC address for device identification.
+    Uses a cached device ID stored in a file to ensure the same ID
+    is used across restarts, preventing Home Assistant from seeing
+    the device as new each time.
+    """
+    # Store device ID in a persistent location
+    local_dir = Path(__file__).parent.parent / "local"
+    local_dir.mkdir(parents=True, exist_ok=True)
+    device_id_file = local_dir / ".device_id"
+    if device_id_file.exists():
+        try:
+            return device_id_file.read_text().strip()
+        except Exception:
+            pass
+    # Generate a stable device ID based on machine UUID
+    machine_id = uuid.getnode()
+    # Create a hash to ensure consistent format
+    device_id = hashlib.md5(str(machine_id).encode()).hexdigest()[:12]
+    try:
+        device_id_file.write_text(device_id)
+    except Exception:
+        pass
+    return device_id

reachy_mini_ha_voice/voice_assistant.py ADDED Viewed

	@@ -0,0 +1,810 @@

+"""
+Voice Assistant Service for Reachy Mini.
+This module provides the main voice assistant service that integrates
+with Home Assistant via ESPHome protocol.
+"""
+import asyncio
+import json
+import logging
+import threading
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from queue import Queue
+from typing import Dict, List, Optional, Set, Union
+import numpy as np
+from reachy_mini import ReachyMini
+from .models import AvailableWakeWord, Preferences, ServerState, WakeWordType
+from .audio_player import AudioPlayer
+from .satellite import VoiceSatelliteProtocol
+from .util import get_mac
+from .zeroconf import HomeAssistantZeroconf
+from .motion import ReachyMiniMotion
+from .camera_server import MJPEGCameraServer
+_LOGGER = logging.getLogger(__name__)
+_MODULE_DIR = Path(__file__).parent
+_WAKEWORDS_DIR = _MODULE_DIR / "wakewords"
+_SOUNDS_DIR = _MODULE_DIR / "sounds"
+_LOCAL_DIR = _MODULE_DIR.parent / "local"
+@dataclass
+class AudioProcessingContext:
+    """Context for audio processing, holding mutable state."""
+    wake_words: List = field(default_factory=list)
+    micro_features: Optional[object] = None
+    micro_inputs: List = field(default_factory=list)
+    oww_features: Optional[object] = None
+    oww_inputs: List = field(default_factory=list)
+    has_oww: bool = False
+    last_active: Optional[float] = None
+# Audio chunk size for consistent streaming (matches reference project)
+AUDIO_BLOCK_SIZE = 1024  # samples at 16kHz = 64ms
+class VoiceAssistantService:
+    """Voice assistant service that runs ESPHome protocol server."""
+    def __init__(
+        self,
+        reachy_mini: Optional[ReachyMini] = None,
+        name: str = "Reachy Mini",
+        host: str = "0.0.0.0",
+        port: int = 6053,
+        wake_model: str = "okay_nabu",
+        camera_port: int = 8081,
+        camera_enabled: bool = True,
+    ):
+        self.reachy_mini = reachy_mini
+        self.name = name
+        self.host = host
+        self.port = port
+        self.wake_model = wake_model
+        self.camera_port = camera_port
+        self.camera_enabled = camera_enabled
+        self._server = None
+        self._discovery = None
+        self._audio_thread = None
+        self._running = False
+        self._state: Optional[ServerState] = None
+        self._motion = ReachyMiniMotion(reachy_mini)
+        self._camera_server: Optional[MJPEGCameraServer] = None
+        # Audio buffer for fixed-size chunk output
+        self._audio_buffer: np.ndarray = np.array([], dtype=np.float32)
+    async def start(self) -> None:
+        """Start the voice assistant service."""
+        _LOGGER.info("Initializing voice assistant service...")
+        # Ensure directories exist
+        _WAKEWORDS_DIR.mkdir(parents=True, exist_ok=True)
+        _SOUNDS_DIR.mkdir(parents=True, exist_ok=True)
+        _LOCAL_DIR.mkdir(parents=True, exist_ok=True)
+        # Verify required files (bundled with package)
+        await self._verify_required_files()
+        # Load wake words
+        available_wake_words = self._load_available_wake_words()
+        _LOGGER.debug("Available wake words: %s", list(available_wake_words.keys()))
+        # Load preferences
+        preferences_path = _LOCAL_DIR / "preferences.json"
+        preferences = self._load_preferences(preferences_path)
+        # Load wake word models
+        wake_models, active_wake_words = self._load_wake_models(
+            available_wake_words, preferences
+        )
+        # Load stop model
+        stop_model = self._load_stop_model()
+        # Create audio players with Reachy Mini reference
+        music_player = AudioPlayer(self.reachy_mini)
+        tts_player = AudioPlayer(self.reachy_mini)
+        # Create server state
+        self._state = ServerState(
+            name=self.name,
+            mac_address=get_mac(),
+            audio_queue=Queue(),
+            entities=[],
+            available_wake_words=available_wake_words,
+            wake_words=wake_models,
+            active_wake_words=active_wake_words,
+            stop_word=stop_model,
+            music_player=music_player,
+            tts_player=tts_player,
+            wakeup_sound=str(_SOUNDS_DIR / "wake_word_triggered.flac"),
+            timer_finished_sound=str(_SOUNDS_DIR / "timer_finished.flac"),
+            preferences=preferences,
+            preferences_path=preferences_path,
+            refractory_seconds=2.0,
+            download_dir=_LOCAL_DIR,
+            reachy_mini=self.reachy_mini,
+            motion_enabled=self.reachy_mini is not None,
+        )
+        # Set motion controller reference in state
+        self._state.motion = self._motion
+        # Start Reachy Mini media system if available
+        if self.reachy_mini is not None:
+            try:
+                # Check if media system is already running to avoid conflicts
+                media = self.reachy_mini.media
+                if media.audio is not None:
+                    # Check recording state
+                    is_recording = getattr(media, '_recording', False)
+                    if not is_recording:
+                        media.start_recording()
+                        _LOGGER.info("Started Reachy Mini recording")
+                    else:
+                        _LOGGER.debug("Reachy Mini recording already active")
+                    # Check playback state
+                    is_playing = getattr(media, '_playing', False)
+                    if not is_playing:
+                        media.start_playing()
+                        _LOGGER.info("Started Reachy Mini playback")
+                    else:
+                        _LOGGER.debug("Reachy Mini playback already active")
+                    _LOGGER.info("Reachy Mini media system initialized")
+                    # Optimize microphone settings for voice recognition
+                    self._optimize_microphone_settings()
+                else:
+                    _LOGGER.warning("Reachy Mini audio system not available")
+            except Exception as e:
+                _LOGGER.warning("Failed to initialize Reachy Mini media: %s", e)
+        # Start motion controller (5Hz control loop)
+        if self._motion is not None:
+            self._motion.start()
+        # Start audio processing thread (non-daemon for proper cleanup)
+        self._running = True
+        self._audio_thread = threading.Thread(
+            target=self._process_audio,
+            daemon=False,
+        )
+        self._audio_thread.start()
+        # Start camera server if enabled (must be before ESPHome server)
+        if self.camera_enabled:
+            self._camera_server = MJPEGCameraServer(
+                reachy_mini=self.reachy_mini,
+                host=self.host,
+                port=self.camera_port,
+                fps=15,
+                quality=80,
+                enable_face_tracking=True,
+            )
+            await self._camera_server.start()
+            # Connect camera server to motion controller for face tracking
+            if self._motion is not None:
+                self._motion.set_camera_server(self._camera_server)
+        # Create ESPHome server (pass camera_server for camera entity)
+        loop = asyncio.get_running_loop()
+        camera_server = self._camera_server  # Capture for lambda
+        self._server = await loop.create_server(
+            lambda: VoiceSatelliteProtocol(self._state, camera_server=camera_server),
+            host=self.host,
+            port=self.port,
+        )
+        # Start mDNS discovery
+        self._discovery = HomeAssistantZeroconf(port=self.port, name=self.name)
+        await self._discovery.register_server()
+        # Start Sendspin auto-discovery (auto-enabled, no user config needed)
+        # Sendspin is for music playback, so connect to music_player
+        await music_player.start_sendspin_discovery()
+        _LOGGER.info("Voice assistant service started on %s:%s", self.host, self.port)
+    def _optimize_microphone_settings(self) -> None:
+        """Optimize ReSpeaker XVF3800 microphone settings for voice recognition.
+        This method configures the XMOS XVF3800 audio processor for optimal
+        voice command recognition at distances up to 2-3 meters.
+        If user has previously set values via Home Assistant, those values are
+        restored from preferences. Otherwise, default optimized values are used.
+        Key optimizations:
+        1. Enable AGC with higher max gain for distant speech
+        2. Reduce noise suppression to preserve quiet speech
+        3. Increase base microphone gain
+        4. Optimize AGC response times for voice commands
+        Reference: reachy_mini/src/reachy_mini/media/audio_control_utils.py
+        XMOS docs: https://www.xmos.com/documentation/XM-014888-PC/
+        """
+        if self.reachy_mini is None:
+            return
+        try:
+            # Access ReSpeaker through the media audio system
+            audio = self.reachy_mini.media.audio
+            if audio is None or not hasattr(audio, '_respeaker'):
+                _LOGGER.debug("ReSpeaker not available for optimization")
+                return
+            respeaker = audio._respeaker
+            if respeaker is None:
+                _LOGGER.debug("ReSpeaker device not found")
+                return
+            # Get saved preferences (if any)
+            prefs = self._state.preferences if self._state else None
+            # ========== 1. AGC (Automatic Gain Control) Settings ==========
+            # Use saved value if available, otherwise use default (enabled)
+            agc_enabled = prefs.agc_enabled if (prefs and prefs.agc_enabled is not None) else True
+            try:
+                respeaker.write("PP_AGCONOFF", [1 if agc_enabled else 0])
+                _LOGGER.info("AGC %s (PP_AGCONOFF=%d)%s",
+                             "enabled" if agc_enabled else "disabled",
+                             1 if agc_enabled else 0,
+                             " [from preferences]" if (prefs and prefs.agc_enabled is not None) else " [default]")
+            except Exception as e:
+                _LOGGER.debug("Could not set AGC: %s", e)
+            # Use saved value if available, otherwise use default (30dB)
+            agc_max_gain = prefs.agc_max_gain if (prefs and prefs.agc_max_gain is not None) else 30.0
+            try:
+                respeaker.write("PP_AGCMAXGAIN", [agc_max_gain])
+                _LOGGER.info("AGC max gain set (PP_AGCMAXGAIN=%.1fdB)%s",
+                             agc_max_gain,
+                             " [from preferences]" if (prefs and prefs.agc_max_gain is not None) else " [default]")
+            except Exception as e:
+                _LOGGER.debug("Could not set PP_AGCMAXGAIN: %s", e)
+            # Set AGC desired output level (target level after gain)
+            # More negative = quieter output, less negative = louder
+            # Default is around -25dB, set to -18dB for stronger output
+            try:
+                respeaker.write("PP_AGCDESIREDLEVEL", [-18.0])
+                _LOGGER.debug("AGC desired level set (PP_AGCDESIREDLEVEL=-18.0dB)")
+            except Exception as e:
+                _LOGGER.debug("Could not set PP_AGCDESIREDLEVEL: %s", e)
+            # Optimize AGC time constants for voice commands
+            # Faster attack time helps capture sudden speech onset
+            try:
+                respeaker.write("PP_AGCTIME", [0.5])  # Main time constant (seconds)
+                _LOGGER.debug("AGC time constant set (PP_AGCTIME=0.5s)")
+            except Exception as e:
+                _LOGGER.debug("Could not set PP_AGCTIME: %s", e)
+            # ========== 2. Base Microphone Gain ==========
+            # Increase base microphone gain for better sensitivity
+            # Default is 1.0, increase to 2.0 for distant speech
+            # Range: 0.0-4.0 (float, linear gain multiplier)
+            try:
+                respeaker.write("AUDIO_MGR_MIC_GAIN", [2.0])
+                _LOGGER.info("Microphone gain increased (AUDIO_MGR_MIC_GAIN=2.0)")
+            except Exception as e:
+                _LOGGER.debug("Could not set AUDIO_MGR_MIC_GAIN: %s", e)
+            # ========== 3. Noise Suppression Settings ==========
+            # Use saved value if available, otherwise use default (15%)
+            # PP_MIN_NS: minimum noise suppression threshold
+            # Higher values = less aggressive suppression = better voice pickup
+            # PP_MIN_NS = 0.85 means "keep at least 85% of signal" = 15% max suppression
+            # UI shows "noise suppression strength" so 15% = PP_MIN_NS of 0.85
+            noise_suppression = prefs.noise_suppression if (prefs and prefs.noise_suppression is not None) else 15.0
+            pp_min_ns = 1.0 - (noise_suppression / 100.0)  # Convert percentage to PP_MIN_NS value
+            try:
+                respeaker.write("PP_MIN_NS", [pp_min_ns])
+                _LOGGER.info("Noise suppression set to %.0f%% strength (PP_MIN_NS=%.2f)%s",
+                             noise_suppression, pp_min_ns,
+                             " [from preferences]" if (prefs and prefs.noise_suppression is not None) else " [default]")
+            except Exception as e:
+                _LOGGER.debug("Could not set PP_MIN_NS: %s", e)
+            # PP_MIN_NN: minimum noise floor estimation
+            # Higher values = less aggressive noise floor tracking
+            try:
+                respeaker.write("PP_MIN_NN", [pp_min_ns])  # Match PP_MIN_NS
+                _LOGGER.debug("Noise floor threshold set (PP_MIN_NN=%.2f)", pp_min_ns)
+            except Exception as e:
+                _LOGGER.debug("Could not set PP_MIN_NN: %s", e)
+            # ========== 4. Echo Cancellation Settings ==========
+            # Ensure echo cancellation is enabled (important for TTS playback)
+            try:
+                respeaker.write("PP_ECHOONOFF", [1])
+                _LOGGER.debug("Echo cancellation enabled (PP_ECHOONOFF=1)")
+            except Exception as e:
+                _LOGGER.debug("Could not set PP_ECHOONOFF: %s", e)
+            # ========== 5. High-pass filter (remove low frequency noise) ==========
+            try:
+                respeaker.write("AEC_HPFONOFF", [1])
+                _LOGGER.debug("High-pass filter enabled (AEC_HPFONOFF=1)")
+            except Exception as e:
+                _LOGGER.debug("Could not set AEC_HPFONOFF: %s", e)
+            _LOGGER.info("Microphone settings initialized (AGC=%s, MaxGain=%.0fdB, NoiseSuppression=%.0f%%)",
+                         "ON" if agc_enabled else "OFF", agc_max_gain, noise_suppression)
+        except Exception as e:
+            _LOGGER.warning("Failed to optimize microphone settings: %s", e)
+    async def stop(self) -> None:
+        """Stop the voice assistant service."""
+        _LOGGER.info("Stopping voice assistant service...")
+        # 1. First stop audio recording to prevent new data from coming in
+        if self.reachy_mini is not None:
+            try:
+                self.reachy_mini.media.stop_recording()
+                _LOGGER.debug("Reachy Mini recording stopped")
+            except Exception as e:
+                _LOGGER.warning("Error stopping Reachy Mini recording: %s", e)
+        # 2. Set stop flag
+        self._running = False
+        # 3. Wait for audio thread to finish
+        if self._audio_thread:
+            self._audio_thread.join(timeout=1.0)
+            if self._audio_thread.is_alive():
+                _LOGGER.warning("Audio thread did not stop in time")
+        # 4. Stop playback
+        if self.reachy_mini is not None:
+            try:
+                self.reachy_mini.media.stop_playing()
+                _LOGGER.debug("Reachy Mini playback stopped")
+            except Exception as e:
+                _LOGGER.warning("Error stopping Reachy Mini playback: %s", e)
+        # 5. Stop ESPHome server
+        if self._server:
+            self._server.close()
+            await self._server.wait_closed()
+        # 6. Unregister mDNS
+        if self._discovery:
+            await self._discovery.unregister_server()
+        # 6.5. Stop Sendspin
+        if self._state and self._state.music_player:
+            await self._state.music_player.stop_sendspin()
+        # 7. Stop camera server
+        if self._camera_server:
+            await self._camera_server.stop()
+            self._camera_server = None
+        # 8. Shutdown motion executor
+        if self._motion:
+            self._motion.shutdown()
+        _LOGGER.info("Voice assistant service stopped.")
+    async def _verify_required_files(self) -> None:
+        """Verify required model and sound files exist (bundled with package)."""
+        # Required wake word files (bundled in wakewords/ directory)
+        required_wakewords = [
+            "okay_nabu.tflite",
+            "okay_nabu.json",
+            "hey_jarvis.tflite",
+            "hey_jarvis.json",
+            "stop.tflite",
+            "stop.json",
+        ]
+        # Required sound files (bundled in sounds/ directory)
+        required_sounds = [
+            "wake_word_triggered.flac",
+            "timer_finished.flac",
+        ]
+        # Verify wake word files
+        missing_wakewords = []
+        for filename in required_wakewords:
+            filepath = _WAKEWORDS_DIR / filename
+            if not filepath.exists():
+                missing_wakewords.append(filename)
+        if missing_wakewords:
+            _LOGGER.warning(
+                "Missing wake word files: %s. These should be bundled with the package.",
+                missing_wakewords
+            )
+        # Verify sound files
+        missing_sounds = []
+        for filename in required_sounds:
+            filepath = _SOUNDS_DIR / filename
+            if not filepath.exists():
+                missing_sounds.append(filename)
+        if missing_sounds:
+            _LOGGER.warning(
+                "Missing sound files: %s. These should be bundled with the package.",
+                missing_sounds
+            )
+        if not missing_wakewords and not missing_sounds:
+            _LOGGER.info("All required files verified successfully.")
+    def _load_available_wake_words(self) -> Dict[str, AvailableWakeWord]:
+        """Load available wake word configurations."""
+        available_wake_words: Dict[str, AvailableWakeWord] = {}
+        # Load order: OpenWakeWord first, then MicroWakeWord, then external
+        # Later entries override earlier ones, so MicroWakeWord takes priority
+        wake_word_dirs = [
+            _WAKEWORDS_DIR / "openWakeWord",  # OpenWakeWord (lowest priority)
+            _LOCAL_DIR / "external_wake_words",  # External wake words
+            _WAKEWORDS_DIR,  # MicroWakeWord (highest priority)
+        ]
+        for wake_word_dir in wake_word_dirs:
+            if not wake_word_dir.exists():
+                continue
+            for config_path in wake_word_dir.glob("*.json"):
+                model_id = config_path.stem
+                if model_id == "stop":
+                    continue
+                try:
+                    with open(config_path, "r", encoding="utf-8") as f:
+                        config = json.load(f)
+                    model_type = WakeWordType(config.get("type", "micro"))
+                    if model_type == WakeWordType.OPEN_WAKE_WORD:
+                        wake_word_path = config_path.parent / config["model"]
+                    else:
+                        wake_word_path = config_path
+                    available_wake_words[model_id] = AvailableWakeWord(
+                        id=model_id,
+                        type=model_type,
+                        wake_word=config.get("wake_word", model_id),
+                        trained_languages=config.get("trained_languages", []),
+                        wake_word_path=wake_word_path,
+                    )
+                except Exception as e:
+                    _LOGGER.warning("Failed to load wake word %s: %s", config_path, e)
+        return available_wake_words
+    def _load_preferences(self, preferences_path: Path) -> Preferences:
+        """Load user preferences."""
+        if preferences_path.exists():
+            try:
+                with open(preferences_path, "r", encoding="utf-8") as f:
+                    data = json.load(f)
+                return Preferences(**data)
+            except Exception as e:
+                _LOGGER.warning("Failed to load preferences: %s", e)
+        return Preferences()
+    def _load_wake_models(
+        self,
+        available_wake_words: Dict[str, AvailableWakeWord],
+        preferences: Preferences,
+    ):
+        """Load wake word models."""
+        from pymicro_wakeword import MicroWakeWord
+        from pyopen_wakeword import OpenWakeWord
+        wake_models: Dict[str, Union[MicroWakeWord, OpenWakeWord]] = {}
+        active_wake_words: Set[str] = set()
+        # Try to load preferred models
+        if preferences.active_wake_words:
+            for wake_word_id in preferences.active_wake_words:
+                wake_word = available_wake_words.get(wake_word_id)
+                if wake_word is None:
+                    _LOGGER.warning("Unknown wake word: %s", wake_word_id)
+                    continue
+                try:
+                    _LOGGER.debug("Loading wake model: %s", wake_word_id)
+                    loaded_model = wake_word.load()
+                    # Set id attribute on the model for later identification
+                    setattr(loaded_model, 'id', wake_word_id)
+                    wake_models[wake_word_id] = loaded_model
+                    active_wake_words.add(wake_word_id)
+                except Exception as e:
+                    _LOGGER.warning("Failed to load wake model %s: %s", wake_word_id, e)
+        # Load default model if none loaded
+        if not wake_models:
+            wake_word = available_wake_words.get(self.wake_model)
+            if wake_word:
+                try:
+                    _LOGGER.debug("Loading default wake model: %s", self.wake_model)
+                    loaded_model = wake_word.load()
+                    # Set id attribute on the model for later identification
+                    setattr(loaded_model, 'id', self.wake_model)
+                    wake_models[self.wake_model] = loaded_model
+                    active_wake_words.add(self.wake_model)
+                except Exception as e:
+                    _LOGGER.error("Failed to load default wake model: %s", e)
+        return wake_models, active_wake_words
+    def _load_stop_model(self):
+        """Load the stop word model."""
+        from pymicro_wakeword import MicroWakeWord
+        stop_config = _WAKEWORDS_DIR / "stop.json"
+        if stop_config.exists():
+            try:
+                model = MicroWakeWord.from_config(stop_config)
+                setattr(model, 'id', 'stop')
+                return model
+            except Exception as e:
+                _LOGGER.warning("Failed to load stop model: %s", e)
+        # Return a dummy model if stop model not available
+        _LOGGER.warning("Stop model not available, using fallback")
+        okay_nabu_config = _WAKEWORDS_DIR / "okay_nabu.json"
+        if okay_nabu_config.exists():
+            model = MicroWakeWord.from_config(okay_nabu_config)
+            setattr(model, 'id', 'stop')
+            return model
+        return None
+    def _process_audio(self) -> None:
+        """Process audio from microphone (Reachy Mini or system fallback)."""
+        from pymicro_wakeword import MicroWakeWordFeatures
+        ctx = AudioProcessingContext()
+        ctx.micro_features = MicroWakeWordFeatures()
+        try:
+            _LOGGER.info("Starting audio processing...")
+            if self.reachy_mini is not None:
+                _LOGGER.info("Using Reachy Mini's microphone")
+                self._audio_loop_reachy(ctx)
+            else:
+                _LOGGER.info("Using system microphone (fallback)")
+                self._audio_loop_fallback(ctx)
+        except Exception:
+            _LOGGER.exception("Error processing audio")
+    def _audio_loop_reachy(self, ctx: AudioProcessingContext) -> None:
+        """Audio loop using Reachy Mini's microphone."""
+        while self._running:
+            try:
+                if not self._wait_for_satellite():
+                    continue
+                self._update_wake_words_list(ctx)
+                # Get audio from Reachy Mini
+                audio_chunk = self._get_reachy_audio_chunk()
+                if audio_chunk is None:
+                    time.sleep(0.01)
+                    continue
+                self._process_audio_chunk(ctx, audio_chunk)
+            except Exception as e:
+                _LOGGER.error("Error in Reachy audio processing: %s", e)
+                time.sleep(0.1)
+    def _audio_loop_fallback(self, ctx: AudioProcessingContext) -> None:
+        """Audio loop using system microphone (fallback)."""
+        import sounddevice as sd
+        block_size = 1024
+        with sd.InputStream(
+            samplerate=16000,
+            channels=1,
+            blocksize=block_size,
+            dtype="float32",
+        ) as stream:
+            while self._running:
+                if not self._wait_for_satellite():
+                    continue
+                self._update_wake_words_list(ctx)
+                # Get audio from system microphone
+                audio_chunk_array, overflowed = stream.read(block_size)
+                if overflowed:
+                    _LOGGER.warning("Audio buffer overflow")
+                audio_chunk_array = audio_chunk_array.reshape(-1)
+                audio_chunk = self._convert_to_pcm(audio_chunk_array)
+                self._process_audio_chunk(ctx, audio_chunk)
+    def _wait_for_satellite(self) -> bool:
+        """Wait for satellite connection. Returns True if connected."""
+        if self._state is None or self._state.satellite is None:
+            time.sleep(0.1)
+            return False
+        return True
+    def _update_wake_words_list(self, ctx: AudioProcessingContext) -> None:
+        """Update wake words list if changed."""
+        from pyopen_wakeword import OpenWakeWord, OpenWakeWordFeatures
+        from pymicro_wakeword import MicroWakeWordFeatures
+        if (not ctx.wake_words) or (self._state.wake_words_changed and self._state.wake_words):
+            self._state.wake_words_changed = False
+            ctx.wake_words.clear()
+            # Reset feature extractors to clear any residual audio data
+            # This prevents false triggers when switching wake words
+            ctx.micro_features = MicroWakeWordFeatures()
+            ctx.micro_inputs.clear()
+            if ctx.oww_features is not None:
+                ctx.oww_features = OpenWakeWordFeatures.from_builtin()
+            ctx.oww_inputs.clear()
+            # Also reset the refractory period to prevent immediate trigger
+            ctx.last_active = time.monotonic()
+            # state.wake_words is Dict[str, MicroWakeWord/OpenWakeWord]
+            # We need to filter by active_wake_words (which contains the IDs/keys)
+            for ww_id, ww_model in self._state.wake_words.items():
+                if ww_id in self._state.active_wake_words:
+                    # Ensure the model has an 'id' attribute for later use
+                    if not hasattr(ww_model, 'id'):
+                        setattr(ww_model, 'id', ww_id)
+                    ctx.wake_words.append(ww_model)
+            ctx.has_oww = any(isinstance(ww, OpenWakeWord) for ww in ctx.wake_words)
+            if ctx.has_oww and ctx.oww_features is None:
+                ctx.oww_features = OpenWakeWordFeatures.from_builtin()
+            _LOGGER.info("Active wake words updated: %s (features reset)", list(self._state.active_wake_words))
+    def _get_reachy_audio_chunk(self) -> Optional[bytes]:
+        """Get fixed-size audio chunk from Reachy Mini's microphone.
+        Returns exactly AUDIO_BLOCK_SIZE samples each time, buffering
+        internally to ensure consistent chunk sizes for streaming.
+        Returns:
+            PCM audio bytes of fixed size, or None if not enough data.
+        """
+        # Get new audio data from SDK
+        audio_data = self.reachy_mini.media.get_audio_sample()
+        # Append new data to buffer if valid
+        if audio_data is not None and isinstance(audio_data, np.ndarray) and audio_data.size > 0:
+            try:
+                if audio_data.dtype.kind not in ('S', 'U', 'O', 'V', 'b'):
+                    if audio_data.dtype != np.float32:
+                        audio_data = np.asarray(audio_data, dtype=np.float32)
+                    # Convert stereo to mono
+                    if audio_data.ndim == 2 and audio_data.shape[1] == 2:
+                        audio_data = audio_data.mean(axis=1)
+                    elif audio_data.ndim == 2:
+                        audio_data = audio_data[:, 0].copy()
+                    if audio_data.ndim == 1:
+                        self._audio_buffer = np.concatenate([self._audio_buffer, audio_data])
+            except (TypeError, ValueError):
+                pass
+        # Return fixed-size chunk if we have enough data
+        if len(self._audio_buffer) >= AUDIO_BLOCK_SIZE:
+            chunk = self._audio_buffer[:AUDIO_BLOCK_SIZE]
+            self._audio_buffer = self._audio_buffer[AUDIO_BLOCK_SIZE:]
+            return self._convert_to_pcm(chunk)
+        return None
+    def _convert_to_pcm(self, audio_chunk_array: np.ndarray) -> bytes:
+        """Convert float32 audio array to 16-bit PCM bytes."""
+        return (
+            (np.clip(audio_chunk_array, -1.0, 1.0) * 32767.0)
+            .astype("<i2")
+            .tobytes()
+        )
+    def _process_audio_chunk(self, ctx: AudioProcessingContext, audio_chunk: bytes) -> None:
+        """Process an audio chunk for wake word detection.
+        Following reference project pattern: always process wake words.
+        Refractory period prevents duplicate triggers.
+        Args:
+            ctx: Audio processing context
+            audio_chunk: PCM audio bytes
+        """
+        # Stream audio to Home Assistant
+        self._state.satellite.handle_audio(audio_chunk)
+        # Process wake word features
+        self._process_features(ctx, audio_chunk)
+        # Detect wake words
+        self._detect_wake_words(ctx)
+        # Detect stop word
+        self._detect_stop_word(ctx)
+    def _process_features(self, ctx: AudioProcessingContext, audio_chunk: bytes) -> None:
+        """Process audio features for wake word detection."""
+        ctx.micro_inputs.clear()
+        ctx.micro_inputs.extend(ctx.micro_features.process_streaming(audio_chunk))
+        if ctx.has_oww and ctx.oww_features is not None:
+            ctx.oww_inputs.clear()
+            ctx.oww_inputs.extend(ctx.oww_features.process_streaming(audio_chunk))
+    def _detect_wake_words(self, ctx: AudioProcessingContext) -> None:
+        """Detect wake words in the processed audio features.
+        Uses refractory period to prevent duplicate triggers.
+        Following reference project pattern.
+        """
+        from pymicro_wakeword import MicroWakeWord
+        from pyopen_wakeword import OpenWakeWord
+        for wake_word in ctx.wake_words:
+            activated = False
+            if isinstance(wake_word, MicroWakeWord):
+                for micro_input in ctx.micro_inputs:
+                    if wake_word.process_streaming(micro_input):
+                        activated = True
+            elif isinstance(wake_word, OpenWakeWord):
+                for oww_input in ctx.oww_inputs:
+                    for prob in wake_word.process_streaming(oww_input):
+                        if prob > 0.5:
+                            activated = True
+            if activated:
+                # Check refractory period to prevent duplicate triggers
+                now = time.monotonic()
+                if (ctx.last_active is None) or (
+                    (now - ctx.last_active) > self._state.refractory_seconds
+                ):
+                    _LOGGER.info("Wake word detected: %s", wake_word.id)
+                    self._state.satellite.wakeup(wake_word)
+                    # Face tracking will handle looking at user automatically
+                    self._motion.on_wakeup()
+                    ctx.last_active = now
+    def _detect_stop_word(self, ctx: AudioProcessingContext) -> None:
+        """Detect stop word in the processed audio features."""
+        if not self._state.stop_word:
+            return
+        stopped = False
+        for micro_input in ctx.micro_inputs:
+            if self._state.stop_word.process_streaming(micro_input):
+                stopped = True
+        if stopped and (self._state.stop_word.id in self._state.active_wake_words):
+            _LOGGER.info("Stop word detected")
+            self._state.satellite.stop()

{reachy_mini_home_assistant → reachy_mini_ha_voice}/wakewords/.gitkeep RENAMED Viewed

File without changes

{reachy_mini_home_assistant → reachy_mini_ha_voice}/wakewords/README.md RENAMED Viewed

File without changes