This view is limited to 50 files because it contains too many changes. See the raw diff here.
Files changed (50) hide show
  1. .gitattributes +6 -3
  2. .github/dependabot.yml +0 -13
  3. .github/workflows/sync_develop_to_hf_edge.yml +0 -86
  4. .github/workflows/sync_to_hf.yml +0 -36
  5. .gitignore +5 -10
  6. .pre-commit-config.yaml +0 -20
  7. CHANGELOG.md +0 -713
  8. PROJECT_PLAN.md +1186 -0
  9. Project_Summary.md +0 -1439
  10. README.md +1 -0
  11. changelog.json +1 -227
  12. docs/USER_MANUAL_CN.md +0 -244
  13. docs/USER_MANUAL_EN.md +0 -244
  14. home_assistant_blueprints/reachy_mini_presence_companion.yaml +0 -246
  15. index.html +11 -26
  16. pyproject.toml +21 -140
  17. {reachy_mini_home_assistant → reachy_mini_ha_voice}/__init__.py +24 -29
  18. {reachy_mini_home_assistant → reachy_mini_ha_voice}/__main__.py +63 -52
  19. {reachy_mini_home_assistant/motion → reachy_mini_ha_voice}/animation_player.py +43 -58
  20. reachy_mini_ha_voice/animations/conversation_animations.json +176 -0
  21. reachy_mini_ha_voice/animations/emotion_keywords.json +424 -0
  22. {reachy_mini_home_assistant/protocol → reachy_mini_ha_voice}/api_server.py +35 -65
  23. reachy_mini_ha_voice/audio_player.py +589 -0
  24. reachy_mini_ha_voice/camera_server.py +877 -0
  25. {reachy_mini_home_assistant/entities → reachy_mini_ha_voice}/entity.py +54 -44
  26. {reachy_mini_home_assistant/entities → reachy_mini_ha_voice}/entity_extensions.py +25 -33
  27. reachy_mini_ha_voice/entity_registry.py +976 -0
  28. reachy_mini_ha_voice/gesture_detector.py +183 -0
  29. {reachy_mini_home_assistant/vision → reachy_mini_ha_voice}/head_tracker.py +48 -158
  30. {reachy_mini_home_assistant → reachy_mini_ha_voice}/main.py +83 -31
  31. {reachy_mini_home_assistant → reachy_mini_ha_voice}/models.py +25 -108
  32. {reachy_mini_home_assistant → reachy_mini_ha_voice}/models/crops_classifier.onnx +0 -0
  33. {reachy_mini_home_assistant → reachy_mini_ha_voice}/models/hand_detector.onnx +0 -0
  34. reachy_mini_home_assistant/motion/reachy_motion.py → reachy_mini_ha_voice/motion.py +17 -31
  35. {reachy_mini_home_assistant/motion → reachy_mini_ha_voice}/movement_manager.py +654 -694
  36. {reachy_mini_home_assistant → reachy_mini_ha_voice}/reachy_controller.py +878 -735
  37. reachy_mini_ha_voice/satellite.py +856 -0
  38. {reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/.gitkeep +0 -0
  39. {reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/LICENSE.md +0 -0
  40. {reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/README.md +0 -0
  41. {reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/timer_finished.flac +0 -0
  42. {reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/wake_word_triggered.flac +0 -0
  43. {reachy_mini_home_assistant/motion → reachy_mini_ha_voice}/speech_sway.py +27 -36
  44. {reachy_mini_home_assistant → reachy_mini_ha_voice}/static/index.html +0 -0
  45. {reachy_mini_home_assistant → reachy_mini_ha_voice}/static/main.js +0 -0
  46. {reachy_mini_home_assistant → reachy_mini_ha_voice}/static/style.css +0 -0
  47. reachy_mini_ha_voice/util.py +45 -0
  48. reachy_mini_ha_voice/voice_assistant.py +813 -0
  49. {reachy_mini_home_assistant → reachy_mini_ha_voice}/wakewords/.gitkeep +0 -0
  50. {reachy_mini_home_assistant → reachy_mini_ha_voice}/wakewords/README.md +0 -0
.gitattributes CHANGED
@@ -1,5 +1,8 @@
1
- # LFS tracking for large binary files
 
 
 
 
 
2
  *.tflite filter=lfs diff=lfs merge=lfs -text
3
  *.onnx filter=lfs diff=lfs merge=lfs -text
4
- *.pt filter=lfs diff=lfs merge=lfs -text
5
- *.flac filter=lfs diff=lfs merge=lfs -text
 
1
+ reachy_mini_ha_voice/wakewords/**/*.tflite filter=lfs diff=lfs merge=lfs -text
2
+ reachy_mini_ha_voice/sounds/**/*.flac filter=lfs diff=lfs merge=lfs -text
3
+ "reachy_mini_ha_voice/wakewords/**/*.tflite filter=lfs diff=lfs merge=lfs -text
4
+ reachy_mini_ha_voice/sounds/**/*.flac" filter=lfs diff=lfs merge=lfs -text
5
+ "ha/assets/meshes/*.stl" filter=lfs diff=lfs merge=lfs -text
6
+ "ha/assets/*.urdf" filter=lfs diff=lfs merge=lfs -text
7
  *.tflite filter=lfs diff=lfs merge=lfs -text
8
  *.onnx filter=lfs diff=lfs merge=lfs -text
 
 
.github/dependabot.yml DELETED
@@ -1,13 +0,0 @@
1
- version: 2
2
- updates:
3
- # Enable version updates for pip
4
- - package-ecosystem: "pip"
5
- directory: "/"
6
- schedule:
7
- interval: "weekly"
8
- # Ignore PyTorch updates - locked version required for compatibility
9
- ignore:
10
- - dependency-name: "torch"
11
- versions: [">2.5.1"]
12
- - dependency-name: "torchvision"
13
- versions: [">0.20.1"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.github/workflows/sync_develop_to_hf_edge.yml DELETED
@@ -1,86 +0,0 @@
1
- name: Sync Develop to Hugging Face Edge
2
-
3
- on:
4
- push:
5
- branches: [develop]
6
- workflow_dispatch:
7
-
8
- jobs:
9
- sync-edge:
10
- runs-on: ubuntu-latest
11
- steps:
12
- - name: Checkout GitHub repo
13
- uses: actions/checkout@v4
14
- with:
15
- lfs: true
16
-
17
- - name: Transform project name for edge channel
18
- run: |
19
- python - <<'PY'
20
- from pathlib import Path
21
-
22
- # Keep runtime module path unchanged, only rewrite package/app naming metadata.
23
- pyproject = Path('pyproject.toml')
24
- text = pyproject.read_text(encoding='utf-8')
25
- text = text.replace(
26
- 'name = "reachy_mini_home_assistant"',
27
- 'name = "reachy_mini_home_assistant_edge"',
28
- 1,
29
- )
30
- text = text.replace(
31
- 'reachy_mini_home_assistant = "reachy_mini_home_assistant.main:ReachyMiniHaVoice"',
32
- 'reachy_mini_home_assistant_edge = "reachy_mini_home_assistant.main:ReachyMiniHaVoice"',
33
- 1,
34
- )
35
- pyproject.write_text(text, encoding='utf-8')
36
-
37
- init_file = Path('reachy_mini_home_assistant/__init__.py')
38
- init_text = init_file.read_text(encoding='utf-8')
39
- init_text = init_text.replace(
40
- 'version("reachy_mini_home_assistant")',
41
- 'version("reachy_mini_home_assistant_edge")',
42
- 1,
43
- )
44
- init_file.write_text(init_text, encoding='utf-8')
45
-
46
- readme = Path('README.md')
47
- if readme.exists():
48
- readme_text = readme.read_text(encoding='utf-8')
49
- readme_text = readme_text.replace(
50
- 'title: Reachy Mini for Home Assistant',
51
- 'title: Reachy Mini for Home Assistant (Edge)',
52
- 1,
53
- )
54
- readme_text = readme_text.replace(
55
- 'short_description: Deep integration of Reachy Mini robot with Home Assistant',
56
- 'short_description: Edge channel for Reachy Mini Home Assistant integration',
57
- 1,
58
- )
59
- readme_text = readme_text.replace(
60
- ' - reachy_mini_home_assistant',
61
- ' - reachy_mini_home_assistant_edge',
62
- 1,
63
- )
64
- readme.write_text(readme_text, encoding='utf-8')
65
- PY
66
-
67
- - name: Create fresh commit and push to Hugging Face edge space
68
- env:
69
- HF_TOKEN: ${{ secrets.HF_TOKEN }}
70
- run: |
71
- git config --global user.email "action@github.com"
72
- git config --global user.name "GitHub Action"
73
-
74
- # Create a new orphan branch with no history
75
- git checkout --orphan hf-edge-sync
76
- git add -A
77
- git commit -m "Fresh edge sync: $(date +%Y-%m-%d_%H:%M:%S)"
78
-
79
- # Add Hugging Face edge remote
80
- git remote add hf-edge https://djhui5710:$HF_TOKEN@huggingface.co/spaces/djhui5710/reachy_mini_home_assistant_edge
81
-
82
- # Push LFS objects first
83
- git lfs push hf-edge hf-edge-sync --all
84
-
85
- # Force push as main to HF edge space
86
- git push hf-edge hf-edge-sync:main --force
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.github/workflows/sync_to_hf.yml DELETED
@@ -1,36 +0,0 @@
1
- name: Sync to Hugging Face
2
-
3
- on:
4
- push:
5
- branches: [main]
6
- workflow_dispatch:
7
-
8
- jobs:
9
- sync:
10
- runs-on: ubuntu-latest
11
- steps:
12
- - name: Checkout GitHub repo
13
- uses: actions/checkout@v4
14
- with:
15
- lfs: true
16
-
17
- - name: Create fresh commit and push to Hugging Face
18
- env:
19
- HF_TOKEN: ${{ secrets.HF_TOKEN }}
20
- run: |
21
- git config --global user.email "action@github.com"
22
- git config --global user.name "GitHub Action"
23
-
24
- # Create a new orphan branch with no history
25
- git checkout --orphan hf-sync
26
- git add -A
27
- git commit -m "Fresh sync: $(date +%Y-%m-%d\ %H:%M:%S)"
28
-
29
- # Add Hugging Face remote
30
- git remote add hf https://djhui5710:$HF_TOKEN@huggingface.co/spaces/djhui5710/reachy_mini_home_assistant
31
-
32
- # Push LFS objects first
33
- git lfs push hf hf-sync --all
34
-
35
- # Force push as main to HF (overwrites all history)
36
- git push hf hf-sync:main --force
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.gitignore CHANGED
@@ -65,19 +65,14 @@ htmlcov/
65
  !reachy_mini_ha_voice/sounds/*.flac
66
 
67
  # Models (exclude package bundled files)
68
- # models/ - ignore external models directory
69
  models/
70
- # Package bundled models
71
- !reachy_mini_ha_voice/models/
72
- reachy_mini_ha_voice/models/*.tflite
73
- reachy_mini_ha_voice/models/*.onnx
74
- reachy_mini_ha_voice/models/*.pt
75
 
76
  # SDK Reference (local development only)
77
  reference/
78
- local/
79
  # ha/ - temporarily commented out for path fixes
80
  # ha/ will be moved to separate repository soon
81
-
82
- # Temporary check scripts
83
- temp_check_scripts/
 
65
  !reachy_mini_ha_voice/sounds/*.flac
66
 
67
  # Models (exclude package bundled files)
 
68
  models/
69
+ # *.tflite - bundled in package
70
+ !reachy_mini_ha_voice/wakewords/*.tflite
71
+ !reachy_mini_ha_voice/wakewords/**/*.tflite
72
+ *.onnx
73
+ !reachy_mini_ha_voice/models/*.onnx
74
 
75
  # SDK Reference (local development only)
76
  reference/
 
77
  # ha/ - temporarily commented out for path fixes
78
  # ha/ will be moved to separate repository soon
 
 
 
.pre-commit-config.yaml DELETED
@@ -1,20 +0,0 @@
1
- # Pre-commit hooks for code quality
2
- # Install: pip install pre-commit && pre-commit install
3
- # Run manually: pre-commit run --all-files
4
-
5
- repos:
6
- - repo: https://github.com/astral-sh/ruff-pre-commit
7
- rev: v0.8.6
8
- hooks:
9
- - id: ruff
10
- args: [--fix]
11
- - id: ruff-format
12
-
13
- - repo: https://github.com/pre-commit/mirrors-mypy
14
- rev: v1.14.1
15
- hooks:
16
- - id: mypy
17
- additional_dependencies: []
18
- args: [--ignore-missing-imports]
19
- # Only check changed files for speed
20
- pass_filenames: true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CHANGELOG.md DELETED
@@ -1,713 +0,0 @@
1
- # Changelog
2
-
3
- All notable changes to the Reachy Mini HA Voice project will be documented in this file.
4
-
5
- The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
- and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
-
8
- ## [Unreleased]
9
-
10
- ### Fixed
11
- - **NameError** - Add missing deque import in gesture smoother
12
- - **Syntax Error** - Add missing class indentation for volume methods in audio_player.py
13
- - **Audio Card Name Detection** - Use SDK's detection logic instead of hardcoded values
14
- - **SDK Port 8000 Blocking** - Use amixer directly for volume control to avoid SDK HTTP API blocking
15
- - **Memory Leak Root Cause** - Audio buffer array creation in loop causing unbounded memory growth
16
- - **Indentation Error** - Fix indentation in audio_player.py stop_sendspin method
17
-
18
- ## [1.0.7] - 2026-05-05
19
-
20
- ### Changed
21
- - Align audio runtime with current SDK patterns by splitting local TTS playback from Sendspin-capable music playback and moving wakeword/stopword loading into shared helpers
22
- - Raise the Reachy Mini SDK baseline to `reachy-mini>=1.7.1`
23
-
24
- ### Fixed
25
- - Keep wakeup/TTS playback on the local player path while binding both local and Sendspin players to shared speech sway helpers
26
- - Synchronize `Idle Behavior` shutdown with ESPHome face/gesture switches and runtime state updates
27
- - Remove obsolete runtime monitor modules that are no longer needed with the current SDK behavior
28
-
29
- ### Optimized
30
- - Tighten Sendspin buffering with proactive backpressure and cleaner local queue handling
31
-
32
- ## [1.0.6] - 2026-05-01
33
-
34
- ### Changed
35
- - Align `pyproject.toml` with the current Reachy Mini SDK baseline by requiring `reachy-mini>=1.7.0`, `Python>=3.12`, `zeroconf>=0.131,<1`, `aiohttp`, `websockets>=12,<16`, and `gstreamer-bundle==1.28.1` on non-Linux platforms
36
- - Align Sendspin client dependency with the current upstream line via `aiosendspin>=5.1,<6.0`
37
-
38
- ### Fixed
39
- - Fetch camera snapshot frames on demand when the MJPEG cache is empty so Home Assistant camera proxy requests keep working with the Reachy Mini SDK 1.7.0 media pull model
40
-
41
- ### Optimized
42
- - Stop the camera server entirely when `Idle Behavior` is disabled instead of only unloading vision models, so idle-without-animation behaves more like a low-resource sleep state
43
-
44
- ## [1.0.5] - 2026-04-12
45
-
46
- ### Changed
47
- - Remove app-managed robot sleep/wake handling because current Reachy Mini SDK no longer supports mini apps remaining active while the robot enters sleep
48
- - Keep resource suspend/resume limited to ESPHome-driven runtime toggles such as Home Assistant disconnect, mute, camera disable, and service recovery
49
- - Align `pyproject.toml` runtime constraints with the current Reachy Mini reference SDK package (`reachy-mini>=1.6.3`, `websockets>=12,<16`, Python baseline `>=3.10`, and uv gstreamer metadata)
50
-
51
- ### Removed
52
- - Remove `SleepManager` integration and app-side sleep/wake callback flow from the voice assistant runtime
53
- - Remove Home Assistant sleep control entities and internal robot sleep state tracking from the mini app
54
-
55
- ## [1.0.4] - 2026-03-19
56
-
57
- ### Fixed
58
- - Align Reachy Mini integration with current SDK assumptions by removing legacy compatibility paths and private client health checks
59
- - Replace direct SDK private `_respeaker` access with `audio_control_utils`-based ReSpeaker initialization
60
- - Tighten camera and pose composition to require current SDK media/utils APIs and valid `look_at_image` inputs
61
-
62
- ### Improved
63
- - Unify idle behavior into a single persisted Home Assistant entity and remove old idle compatibility aliases
64
- - Replace separate wake/sleep buttons with a single sleep control entity
65
- - Update Sendspin integration for current `aiosendspin` lifecycle, stream handling, listener cleanup, and synchronized buffering
66
- - Standardize daemon URL usage on shared config across controller, sleep manager, and daemon monitor
67
-
68
- ## [1.0.3] - 2026-03-07
69
-
70
- ### Added
71
- - Idle Random Actions switch in Home Assistant with preferences persistence and startup restore
72
- - Configurable `idle_random_actions` presets in `conversation_animations.json` for centralized idle motion tuning
73
-
74
- ### Fixed
75
- - Remove duplicate `idle_random_actions` fields/methods and complete runtime control wiring in controller/entity registry/movement manager
76
-
77
- ### Optimized
78
- - Increase idle breathing and antenna sway cadence to 0.24Hz with wiggle antenna profile for more natural standby motion
79
- - Remove `set_target` global rate limiting and unchanged-pose skip gating to continuously stream motion commands each control tick
80
- - Remove idle antenna slew-rate limiter so antenna motion follows animation waveforms directly for reference-like smoothness
81
-
82
- ## [1.0.2] - 2026-03-06
83
-
84
- ### Fixed
85
- - Restore idle antenna sway animation and tune idle breathing parameters to reduce perceived stiffness
86
- - Reintroduce idle anti-chatter smoothing/deadband for antenna and body updates to reduce mechanical jitter/noise
87
- - Switch sleep/wake control to daemon API (`start` with `wake_up=true`, `stop` with `goto_sleep=true`) so `/api/daemon/status` reflects real sleep state on SDK 1.5
88
- - Normalize daemon status parsing for SDK 1.5 object-based status responses
89
- - Remove all app-side antenna power on/off operations to avoid SDK instability and external-control conflicts
90
- - Sync Idle Motion toggle with Idle Antenna Motion toggle for expected behavior in ESPHome
91
- - Remove legacy app-managed audio routing hooks and rely on native SDK/system audio selection
92
- - Harden startup against import-time failures (lazy emotion library loading and graceful Sendspin disable)
93
-
94
- ### Changed
95
- - Keep idle antenna behavior as animation-only control (no torque coupling)
96
- - Tighten preference loading to current schema (no legacy config fallback filtering)
97
-
98
- ### Added
99
- - Home Assistant blueprint for Reachy presence companion automation
100
- - GitHub workflow to auto-create releases when pyproject/changelog version updates produce a new tag
101
-
102
- ### Improved
103
- - Blueprint supports device-first auto-binding and richer usage instructions
104
- - Refresh landing page (`index.html`) with current version, GitHub source link, and new Blueprint/Auto Release capability cards
105
-
106
- ## [1.0.1] - 2026-03-05
107
-
108
- ### Changed
109
- - Update runtime dependency baseline to `reachy-mini>=1.5.0`
110
-
111
- ### Fixed
112
- - Remove legacy Zenoh 7447 startup precheck for SDK v1.5 compatibility
113
- - Remove legacy ZError string matching from connection error handling
114
- - Adapt daemon status handling to SDK v1.5 `DaemonStatus` object (prevents `AttributeError` on `status.get`)
115
- - Harden stop-word handling with runtime activation/deactivation and mute-aware trigger gating
116
- - Align wakeup stream start timing with reference behavior (start microphone stream after wakeup sound)
117
- - Improve TTS streaming robustness and reduce cutoffs with retry-based audio push
118
-
119
- ### Optimized
120
- - Support single-request streaming with in-memory fallback cache for one-time TTS URLs (no temp file dependency)
121
- - Lower streaming fetch chunk size and apply unthrottled preroll for faster first audio
122
-
123
- ## [1.0.0] - 2026-03-04
124
-
125
- ### Changed
126
- - Require `reachy-mini[gstreamer]>=1.4.1`
127
-
128
- ### Added
129
- - Sendspin switch in ESPHome (default OFF, persistent, runtime enable/disable)
130
- - Face Tracking and Gesture Detection switches in ESPHome (both default OFF, persistent)
131
- - Face Confidence number entity (0.0-1.0, persistent)
132
-
133
- ### Fixed
134
- - Improve gesture responsiveness and stability (faster smoothing, min processing cadence, no-gesture alignment)
135
- - Auto-match ONNX gesture input size from model shape to prevent `INVALID_ARGUMENT` dimension errors
136
- - Disable antenna torque in idle mode and re-enable outside idle to reduce chatter/noise
137
- - Enforce deterministic audio startup path and fail fast when microphone capture is not ready
138
- - Add on-demand `/snapshot` JPEG generation when no cached stream frame is available
139
-
140
- ### Optimized
141
- - Unload/reload face and gesture models when toggled off/on to save resources
142
- - Update idle behavior to breathing + look-around alternation, idle antenna sway disabled
143
- - Adjust idle breathing to human-like cadence
144
- - Make MJPEG streaming viewer-aware (skip continuous JPEG encode/push when no stream clients)
145
- - Keep face/gesture AI processing active even when stream viewers are absent
146
-
147
- ### Changed
148
- - Use camera backend default FPS/resolution for stream path instead of forcing fixed 1080p/25fps
149
-
150
- ## [0.9.9] - 2026-01-28
151
-
152
- ### Fixed
153
- - **SDK Buffer Overflow During Idle**
154
- - Add SDK buffer flush on GStreamer lock timeout
155
- - Prevents buffer overflow during long idle periods when lock contention prevents buffer drainage
156
- - Audio thread flushes SDK audio buffer when lock acquisition times out
157
- - Camera thread flushes SDK video buffer when lock acquisition times out
158
- - Audio playback flushes SDK playback buffer when lock acquisition times out
159
- - Resolves SDK crashes during extended wake-up idle periods without conversation
160
- - Requires Reachy Mini hardware (not applicable to simulation mode)
161
-
162
- ### Fixed
163
- - **Memory Leaks**
164
- - Audio buffer memory leak - added size limit to prevent unbounded growth
165
- - Temp file leak - downloaded audio files now cleaned up after playback
166
- - Multiple memory leak and resource leak issues fixed
167
- - Thread-safe draining flag using threading.Event
168
- - Silent failures now logged for debugging
169
-
170
- ### Optimized
171
- - **Gesture Recognition Sensitivity**
172
- - Simplify GestureSmoother to frequency-based confirmation (1 frame)
173
- - Remove all confidence filtering - return all detections to Home Assistant
174
- - Remove unused parameters (confidence_threshold, detection_threshold, GestureConfig)
175
- - Remove duplicate empty check in gesture detection
176
- - Add GestureSmoother class with history tracking for stable output
177
- - Reduce gesture detection interval from 3 frames to 1 frame for higher frequency
178
- - Fix: Gesture detection now returns all detected hands instead of only the highest confidence one
179
- - Matches reference implementation behavior for improved detection rate
180
- - No conflicts with face tracking (shared frame, independent processing)
181
-
182
- ### Code Quality
183
- - Fix Ruff linter issues (import ordering, missing newlines, __all__ sorting)
184
- - Format code with Ruff formatter (5 files reformatted)
185
- - Fix slice index error in gesture detection (convert coordinates to integers)
186
- - Fix Python 3.12 type annotation compatibility
187
-
188
- ## [0.9.8] - 2026-01-27
189
-
190
- ### New
191
- - Mute switch entity - suspends voice services only (not camera/motion)
192
- - Disable Camera switch entity - suspends camera and AI processing
193
- - Home Assistant connection-driven feature loading
194
- - Automatic suspend/resume on HA disconnect/reconnect
195
-
196
- ### Fixed
197
- - Camera disable logic - corrected inverted conditions for proper operation
198
- - Prevent daemon crash when entering idle state
199
- - Camera preview in Home Assistant
200
- - SDK crash during idle - optimized audio processing to skip get_frame() when not streaming to Home Assistant, reducing GStreamer resource competition
201
- - Add GStreamer threading lock to prevent pipeline competition between audio, playback, and camera threads
202
- - Audio thread gets priority during conversations - bypasses lock when conversation is active
203
- - Remove GStreamer lock to fix wake word detection in idle state (lock was preventing wake word detection)
204
-
205
- ### Optimized
206
- - Reduce log output by 30-40%
207
- - Bundle face tracking model with package - eliminated HuggingFace download dependency, removed huggingface_hub from requirements, models now load from local package directory for offline operation
208
- - Replace HTTP API polling with SDK Zenoh for daemon status monitoring to reduce uvicorn blocking and improve stability
209
- - Device ID now reads /etc/machine-id directly - removed uuid.getnode() and file persistence
210
- - Implement high-priority SDK improvements
211
- - Remove aiohttp dependency from daemon_monitor - fully migrated to SDK Zenoh
212
-
213
- ### Removed
214
- - Temporarily disable emotion playback during TTS
215
- - Unused config items (connection_timeout)
216
-
217
- ### Code Quality
218
- - Code quality improvements
219
-
220
- ## [0.9.7] - 2026-01-20
221
-
222
- ### Fixed
223
- - Device ID file path corrected after util.py moved to core/ subdirectory (prevents HA seeing device as new)
224
- - Animation file path corrected (was looking in wrong directory)
225
- - Remove hey_jarvis from required wake words (it's optional in openWakeWord/)
226
-
227
- ## [0.9.6] - 2026-01-20
228
-
229
- ### New
230
- - Add ruff linter/formatter and mypy type checker configuration
231
- - Add pre-commit hooks for automated code quality checks
232
-
233
- ### Fixed
234
- - Remove duplicate resume() method in audio_player.py
235
- - Remove duplicate connection_lost() method in satellite.py
236
- - Store asyncio task reference in sleep_manager.py to prevent garbage collection
237
-
238
- ### Optimized
239
- - Use dict.items() for efficient iteration in smoothing.py
240
-
241
- ## [0.9.5] - 2026-01-19
242
-
243
- ### Refactored
244
- - Modularize codebase - new core/motion/vision/audio/entities module structure
245
- - Remove legacy/compatibility code
246
- - Remove audio diagnostics debug code
247
-
248
- ### New
249
- - Direct callbacks for HA sleep/wake buttons to suspend/resume services
250
-
251
- ### Optimized
252
- - Audio processing latency - reduced chunk size from 1024 to 256 samples (64ms -> 16ms)
253
- - Audio loop delay reduced from 10ms to 1ms for faster VAD response
254
- - Stereo to mono conversion uses first channel instead of mean for cleaner signal
255
-
256
- ### Improved
257
- - Camera resume_from_suspend now synchronous for reliable wake from sleep
258
- - Rotation clamping in face tracking to prevent IK collisions
259
- - Audio gain boosted for faster VAD detection
260
- - Audio NaN/Inf values causing STT issues fixed
261
-
262
- ## [0.9.0] - 2026-01-18
263
-
264
- ### New
265
- - Robot state monitor for proper sleep mode handling - services pause when robot disconnects and resume on reconnect
266
- - System diagnostics entities (CPU, memory, disk, uptime) exposed as Home Assistant diagnostic sensors
267
- - Phase 24 with 9 diagnostic sensors (cpu_percent, cpu_temperature, memory_percent, memory_used_gb, disk_percent, disk_free_gb, uptime_hours, process_cpu_percent, process_memory_mb)
268
-
269
- ### Fixed
270
- - Voice assistant and movement manager now properly pause during robot sleep mode instead of generating error spam
271
-
272
- ### Improved
273
- - Graceful service lifecycle management with RobotStateMonitor callbacks
274
-
275
- ## [0.8.7] - 2026-01-18
276
-
277
- ### Fixed
278
- - Clamp body_yaw to safe range to prevent IK collision warnings during emotion playback
279
- - Emotion moves and face tracking now respect SDK safety limits
280
-
281
- ### Improved
282
- - Face tracking smoothness - removed EMA smoothing (matches reference project)
283
- - Face tracking timing updated to match reference (2s delay, 1s interpolation)
284
-
285
- ## [0.8.6] - 2026-01-18
286
-
287
- ### Fixed
288
- - Audio buffer memory leak - added size limit to prevent unbounded growth
289
- - Temp file leak - downloaded audio files now cleaned up after playback
290
- - Camera thread termination timeout increased for clean shutdown
291
- - Thread-safe draining flag using threading.Event
292
- - Silent failures now logged for debugging
293
-
294
- ## [0.8.5] - 2026-01-18
295
-
296
- ### Fixed
297
- - DOA turn-to-sound direction inverted - now turns correctly toward sound source
298
- - Graceful shutdown prevents daemon crash on app stop
299
-
300
- ## [0.8.4] - 2026-01-18
301
-
302
- ### Improved
303
- - Smooth idle animation with interpolation phase (matches reference BreathingMove)
304
- - Two-phase animation - interpolates to neutral before oscillation
305
- - Antenna frequency updated to 0.5Hz (was 0.15Hz) for more natural sway
306
-
307
- ## [0.8.3] - 2026-01-18
308
-
309
- ### Fixed
310
- - Body now properly follows head rotation during face tracking
311
- - body_yaw extracted from final head pose matrix and synced with head_yaw
312
- - Matches reference project sweep_look behavior for natural body movement
313
-
314
- ## [0.8.2] - 2026-01-18
315
-
316
- ### Fixed
317
- - Body follows head rotation during face tracking - body_yaw syncs with head_yaw
318
- - Matches reference project sweep_look behavior for natural body movement
319
-
320
- ## [0.8.1] - 2026-01-18
321
-
322
- ### Fixed
323
- - face_detected entity now pushes state updates to Home Assistant in real-time
324
- - Body yaw simplified to match reference project - SDK automatic_body_yaw handles collision prevention
325
- - Idle animation now starts immediately on app launch
326
- - Smooth antenna animation - removed pose change threshold for continuous motion
327
-
328
- ## [0.8.0] - 2026-01-17
329
-
330
- ### New
331
- - Comprehensive emotion keyword mapping with 280+ Chinese and English keywords
332
- - 35 emotion categories mapped to robot expressions
333
- - Auto-trigger expressions from conversation text patterns
334
-
335
- ## [0.7.3] - 2026-01-12
336
-
337
- ### Fixed
338
- - Revert to reference project pattern - use refractory period instead of state flags
339
- - Remove broken _in_pipeline and _tts_playing state management
340
- - Restore correct RUN_END event handling from linux-voice-assistant
341
-
342
- ## [0.7.2] - 2026-01-12
343
-
344
- ### Fixed
345
- - Remove premature _tts_played reset in RUN_END event
346
- - Ensure _in_pipeline stays True until TTS playback completes
347
-
348
- ## [0.7.1] - 2026-01-12
349
-
350
- ### Fixed
351
- - Prevent wake word detection during TTS playback
352
- - Add _tts_playing flag to track TTS audio state precisely
353
-
354
- ## [0.7.0] - 2026-01-12
355
-
356
- ### New
357
- - Gesture detection using HaGRID ONNX models (18 gesture classes)
358
- - gesture_detected and gesture_confidence entities in Home Assistant
359
-
360
- ### Fixed
361
- - Gesture state now properly pushed to Home Assistant in real-time
362
-
363
- ### Optimized
364
- - Aggressive power saving - 0.5fps idle mode after 30s without face
365
- - Gesture detection only runs when face detected (saves CPU)
366
-
367
- ## [0.6.1] - 2026-01-12
368
-
369
- ### Fixed
370
- - Prioritize MicroWakeWord over OpenWakeWord for same-name wake words
371
- - OpenWakeWord wake words now visible in Home Assistant selection
372
- - Stop word detection now works correctly
373
- - STT/LLM response time improved with fixed audio chunk size
374
-
375
- ## [0.6.0] - 2026-01-11
376
-
377
- ### New
378
- - Real-time audio-driven speech animation (SwayRollRT algorithm)
379
- - JSON-driven animation system - all animations configurable
380
-
381
- ### Refactored
382
- - Remove hardcoded actions, use animation offsets only
383
-
384
- ### Fixed
385
- - TTS audio analysis now works with local playback
386
-
387
- ## [0.5.16] - 2026-01-11
388
-
389
- ### Removed
390
- - Tap-to-wake feature (too many false triggers)
391
-
392
- ### New
393
- - Continuous Conversation switch in Home Assistant
394
-
395
- ### Refactored
396
- - Simplified satellite.py and voice_assistant.py
397
-
398
- ## [0.5.15] - 2026-01-11
399
-
400
- ### New
401
- - Audio settings persistence (AGC, Noise Suppression, Tap Sensitivity)
402
-
403
- ### Refactored
404
- - Move Sendspin mDNS discovery to zeroconf.py
405
-
406
- ### Fixed
407
- - Tap detection not re-enabled during emotion playback in conversation
408
-
409
- ## [0.5.14] - 2026-01-11
410
-
411
- ### Fixed
412
- - Skip ALL wake word processing when pipeline is active
413
- - Eliminate race condition in pipeline state during continuous conversation
414
-
415
- ### Improved
416
- - Control loop increased to 100Hz (daemon updated)
417
-
418
- ## [0.5.13] - 2026-01-10
419
-
420
- ### New
421
- - JSON-driven animation system for conversation states
422
- - AnimationPlayer class inspired by SimpleDances project
423
-
424
- ### Refactored
425
- - Replace SpeechSwayGenerator and BreathingAnimation with unified animation system
426
-
427
- ## [0.5.12] - 2026-01-10
428
-
429
- ### Removed
430
- - Deleted broken hey_reachy wake word model
431
-
432
- ### Revert
433
- - Default wake word back to "Okay Nabu"
434
-
435
- ## [0.5.11] - 2026-01-10
436
-
437
- ### Fixed
438
- - Reset feature extractors when switching wake words
439
- - Add refractory period after wake word switch
440
-
441
- ## [0.5.10] - 2026-01-10
442
-
443
- ### Fixed
444
- - Wake word models now have 'id' attribute set correctly
445
- - Wake word switching from Home Assistant now works
446
-
447
- ## [0.5.9] - 2026-01-10
448
-
449
- ### New
450
- - Default wake word changed to hey_reachy
451
-
452
- ### Fixed
453
- - Wake word switching bug
454
-
455
- ## [0.5.8] - 2026-01-09
456
-
457
- ### Fixed
458
- - Tap detection waits for emotion playback to complete
459
- - Poll daemon API for move completion
460
-
461
- ## [0.5.7] - 2026-01-09
462
-
463
- ### New
464
- - DOA turn-to-sound at wakeup
465
-
466
- ### Fixed
467
- - Show raw DOA angle in Home Assistant (0-180)
468
- - Invert DOA yaw direction
469
-
470
- ## [0.5.6] - 2026-01-08
471
-
472
- ### Fixed
473
- - Better pipeline state tracking to prevent duplicate audio
474
-
475
- ## [0.5.5] - 2026-01-08
476
-
477
- ### New
478
- - Prevent concurrent pipelines
479
- - Add prompt sound for continuous conversation
480
-
481
- ## [0.5.4] - 2026-01-08
482
-
483
- ### Fixed
484
- - Wait for RUN_END before starting new conversation
485
-
486
- ## [0.5.3] - 2026-01-08
487
-
488
- ### Fixed
489
- - Improve continuous conversation with conversation_id tracking
490
-
491
- ## [0.5.2] - 2026-01-08
492
-
493
- ### Fixed
494
- - Enable HA control of robot pose
495
- - Continuous conversation improvements
496
-
497
- ## [0.5.1] - 2026-01-08
498
-
499
- ### Fixed
500
- - Sendspin connects to music_player instead of tts_player
501
- - Persist tap_sensitivity settings
502
- - Pause Sendspin during voice assistant wakeup
503
- - Sendspin prioritize 16kHz sample rate
504
-
505
- ## [0.5.0] - 2026-01-07
506
-
507
- ### New
508
- - Face tracking with adaptive frequency
509
- - Sendspin multi-room audio integration
510
-
511
- ### Optimized
512
- - Shutdown mechanism improvements
513
-
514
- ## [0.4.0] - 2026-01-07
515
-
516
- ### Fixed
517
- - Daemon stability fixes
518
-
519
- ### New
520
- - Face tracking enabled by default
521
-
522
- ### Optimized
523
- - Microphone settings for better sensitivity
524
-
525
- ## [0.3.0] - 2026-01-06
526
-
527
- ### New
528
- - Tap sensitivity slider entity
529
-
530
- ### Fixed
531
- - Music Assistant compatibility
532
-
533
- ### Optimized
534
- - Face tracking and tap detection
535
-
536
- ## [0.2.21] - 2026-01-06
537
-
538
- ### Fixed
539
- - Daemon crash - reduce control loop to 2Hz
540
- - Pause control loop during audio playback
541
-
542
- ## [0.2.20] - 2026-01-06
543
-
544
- ### Revert
545
- - Audio/satellite/voice_assistant to v0.2.9 working state
546
-
547
- ## [0.2.19] - 2026-01-06
548
-
549
- ### Fixed
550
- - Force localhost connection mode to prevent WebRTC errors
551
-
552
- ## [0.2.18] - 2026-01-06
553
-
554
- ### Fixed
555
- - Audio playback - restore wakeup sound
556
- - Use push_audio_sample for TTS
557
-
558
- ## [0.2.17] - 2026-01-06
559
-
560
- ### Removed
561
- - head_joints/passive_joints entities
562
- - error_message to diagnostic category
563
-
564
- ## [0.2.16] - 2026-01-06
565
-
566
- ### Fixed
567
- - TTS playback - pause recording during playback
568
-
569
- ## [0.2.15] - 2026-01-06
570
-
571
- ### Fixed
572
- - Use play_sound() instead of push_audio_sample() for TTS
573
-
574
- ## [0.2.14] - 2026-01-06
575
-
576
- ### Fixed
577
- - Pause audio recording during TTS playback
578
-
579
- ## [0.2.13] - 2026-01-06
580
-
581
- ### Fixed
582
- - Don't manually start/stop media - let SDK/daemon manage it
583
-
584
- ## [0.2.12] - 2026-01-05
585
-
586
- ### Fixed
587
- - Disable breathing animation to prevent serial port overflow
588
-
589
- ## [0.2.11] - 2026-01-05
590
-
591
- ### Fixed
592
- - Disable wakeup sound to prevent daemon crash
593
- - Add debug logging for troubleshooting
594
-
595
- ## [0.2.10] - 2026-01-05
596
-
597
- ### Added
598
- - Debug logging for motion init
599
-
600
- ### Fixed
601
- - Audio fallback samplerate
602
-
603
- ## [0.2.9] - 2026-01-05
604
-
605
- ### Removed
606
- - DOA/speech detection - replaced by face tracking
607
-
608
- ## [0.2.8] - 2026-01-05
609
-
610
- ### New
611
- - Replace DOA with YOLO face tracking
612
-
613
- ## [0.2.7] - 2026-01-05
614
-
615
- ### Fixed
616
- - Add DOA caching to prevent ReSpeaker query overload
617
-
618
- ## [0.2.6] - 2026-01-05
619
-
620
- ### New
621
- - Thread-safe ReSpeaker USB access to prevent daemon deadlock
622
-
623
- ## [0.2.4] - 2026-01-05
624
-
625
- ### Fixed
626
- - Microphone volume control via daemon HTTP API
627
-
628
- ## [0.2.3] - 2026-01-05
629
-
630
- ### Fixed
631
- - Daemon crash caused by conflicting pose commands
632
- - Disable: Pose setter methods in ReachyController
633
-
634
- ## [0.2.2] - 2026-01-05
635
-
636
- ### Fixed
637
- - Second conversation motion failure
638
- - Reduce: Control loop from 20Hz to 10Hz
639
- - Improve: Connection recovery (faster reconnect)
640
-
641
- ## [0.2.1] - 2026-01-05
642
-
643
- ### Fixed
644
- - Daemon crash issue
645
- - Optimize: Code structure
646
-
647
- ## [0.2.0] - 2026-01-05
648
-
649
- ### New
650
- - Automatic facial expressions during conversation
651
- - New: Emotion playback integration
652
-
653
- ### Refactored
654
- - Integrate emotion playback into MovementManager
655
-
656
- ## [0.1.5] - 2026-01-04
657
-
658
- ### Optimized
659
- - Code splitting and organization
660
-
661
- ### Fixed
662
- - Program crash issues
663
-
664
- ## [0.1.0] - 2026-01-01
665
-
666
- ### New
667
- - Initial release
668
- - ESPHome protocol server implementation
669
- - mDNS auto-discovery for Home Assistant
670
- - Local wake word detection (microWakeWord)
671
- - Voice assistant pipeline integration
672
- - Basic motion feedback (nod, shake)
673
-
674
- ---
675
-
676
- ## Version History Summary
677
-
678
- | Version | Date | Major Changes |
679
- |---------|------|--------------|
680
- | 0.9.9 | 2026-01-28 | SDK buffer overflow fixes, memory leak fixes, gesture detection optimization |
681
- | 0.9.8 | 2026-01-27 | Mute/Disable entities, HA connection-driven features, log reduction |
682
- | 0.9.7 | 2026-01-20 | Device ID path fix, animation path fix |
683
- | 0.9.6 | 2026-01-20 | Code quality tools (ruff, mypy, pre-commit) |
684
- | 0.9.5 | 2026-01-19 | Modular architecture refactoring, audio latency optimization |
685
- | 0.9.0 | 2026-01-18 | Robot state monitor, system diagnostics entities |
686
- | 0.8.7 | 2026-01-18 | Body yaw clamping, face tracking smoothness |
687
- | 0.8.0 | 2026-01-17 | Emotion keyword mapping (280+ keywords, 35 categories) |
688
- | 0.7.0 | 2026-01-12 | Gesture detection with HaGRID ONNX models (18 gestures) |
689
- | 0.6.0 | 2026-01-11 | Real-time audio-driven speech animation, JSON animation system |
690
- | 0.5.0 | 2026-01-07 | Face tracking, Sendspin multi-room audio |
691
- | 0.4.0 | 2026-01-07 | Daemon stability, microphone optimization |
692
- | 0.3.0 | 2026-01-06 | Tap sensitivity slider |
693
- | 0.2.0 | 2026-01-05 | Emotion playback integration |
694
- | 0.1.0 | 2026-01-01 | Initial release |
695
-
696
- ## Project Statistics
697
-
698
- - **Total Versions**: 29 (from 0.1.0 to 0.9.9)
699
- - **Development Period**: ~30 days (2026-01-01 to 2026-01-28)
700
- - **Average Release Rate**: ~1 version per day
701
- - **Lines of Code**: ~18,000 lines across 52 Python files
702
- - **ESPHome Entities**: 54 entities implemented
703
- - **Supported Features**:
704
- - Voice assistant pipeline integration
705
- - Local wake word detection (multiple models)
706
- - Face tracking with YOLO
707
- - Gesture detection (18 classes)
708
- - Multi-room audio (Sendspin)
709
- - Real-time speech animation
710
- - Emotion keyword detection (280+ keywords)
711
- - System diagnostics
712
-
713
- For detailed implementation notes, see [PROJECT_PLAN.md](./PROJECT_PLAN.md).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
PROJECT_PLAN.md ADDED
@@ -0,0 +1,1186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reachy Mini for Home Assistant - Project Plan
2
+
3
+ ## Project Overview
4
+
5
+ Integrate Home Assistant voice assistant functionality into Reachy Mini Wi-Fi robot, communicating with Home Assistant via ESPHome protocol.
6
+
7
+ ## Local Reference Directories (DO NOT modify any files in reference directories)
8
+ 1. [linux-voice-assistant](reference/linux-voice-assistant) - Linux-based Home Assistant voice assistant app for reference
9
+ 2. [Reachy Mini SDK](reference/reachy_mini) - Reachy Mini SDK local directory for reference
10
+ 3. [reachy_mini_conversation_app](reference/reachy_mini_conversation_app) - Reachy Mini conversation app for reference
11
+ 4. [reachy-mini-desktop-app](reference/reachy-mini-desktop-app) - Reachy Mini desktop app for reference
12
+ 5. [sendspin](reference/sendspin-cli/) - Sendspin client for reference
13
+
14
+ ## Core Design Principles
15
+
16
+ 1. **Zero Configuration** - Users only need to install the app, no manual configuration required
17
+ 2. **Native Hardware** - Use robot's built-in microphone and speaker
18
+ 3. **Home Assistant Centralized Management** - All configuration done on Home Assistant side
19
+ 4. **Motion Feedback** - Provide head movement and antenna animation feedback during voice interaction
20
+ 5. **Project Constraints** - Strictly follow [Reachy Mini SDK](reachy_mini) architecture design and constraints
21
+ 6. **Code Quality** - Follow Python development standards with consistent code style, clear structure, complete comments, comprehensive documentation, high test coverage, high code quality, readability, maintainability, extensibility, and reusability
22
+ 7. **Feature Priority** - Voice conversation with Home Assistant is highest priority; other features are auxiliary and must not affect voice conversation functionality or response speed
23
+ 8. **No LED Functions** - LEDs are hidden inside the robot; all LED control is ignored
24
+ 9. **Preserve Functionality** - Any code modifications should optimize while preserving completed features; do not remove features to solve problems. When issues occur, prioritize solving problems after referencing examples, not adding various log outputs
25
+
26
+ ## Technical Architecture
27
+
28
+ ```
29
+ ┌─────────────────────────────────────────────────────────────────────────────┐
30
+ │ Reachy Mini (ARM64) │
31
+ │ │
32
+ │ ┌─────────────────────────────── AUDIO INPUT ───────────────────────────┐ │
33
+ │ │ ReSpeaker XVF3800 (16kHz) │ │
34
+ │ │ ┌──────────────┐ ┌──────────────────────────────────────────────┐ │ │
35
+ │ │ │ 4-Mic Array │ → │ XVF3800 DSP │ │ │
36
+ │ │ └──────────────┘ │ • Echo Cancellation (AEC) │ │ │
37
+ │ │ │ • Noise Suppression (NS) │ │ │
38
+ │ │ │ • Auto Gain Control (AGC, max 30dB) │ │ │
39
+ │ │ │ • Direction of Arrival (DOA) │ │ │
40
+ │ │ │ • Voice Activity Detection (VAD) │ │ │
41
+ │ │ └──────────────────────────────────────────────┘ │ │
42
+ │ │ │ │ │
43
+ │ │ ▼ │ │
44
+ │ │ ┌──────────────────────────────────────────────┐ │ │
45
+ │ │ │ Wake Word Detection (microWakeWord) │ │ │
46
+ │ │ │ • "Okay Nabu" / "Hey Jarvis" │ │ │
47
+ │ │ │ • Stop word detection │ │ │
48
+ │ │ └──────────────────────────────────────────────┘ │ │
49
+ │ └───────────────────────────────────────────────────────────────────────┘ │
50
+ │ │
51
+ │ ┌───────────��─────────────────── AUDIO OUTPUT ──────────────────────────┐ │
52
+ │ │ ┌──────────────────────────┐ ┌──────────────────────────────────┐ │ │
53
+ │ │ │ TTS Player │ │ Music Player (Sendspin) │ │ │
54
+ │ │ │ • Voice assistant speech │ │ • Multi-room audio streaming │ │ │
55
+ │ │ │ • Sound effects │ │ • Auto-discovery via mDNS │ │ │
56
+ │ │ │ • Priority over music │ │ • Auto-pause during conversation │ │ │
57
+ │ │ └──────────────────────────┘ └──────────────────────────────────┘ │ │
58
+ │ │ │ │ │ │
59
+ │ │ └──────────────┬───────────────┘ │ │
60
+ │ │ ▼ │ │
61
+ │ │ ┌──────────────────────────────────────────────────┐ │ │
62
+ │ │ │ ReSpeaker Speaker (16kHz) │ │ │
63
+ │ │ └──────────────────────────────────────────────────┘ │ │
64
+ │ └───────────────────────────────────────────────────────────────────────┘ │
65
+ │ │
66
+ │ ┌─────────────────────────── VISION & TRACKING ─────────────────────────┐ │
67
+ │ │ ┌──────────────────────────┐ ┌──────────────────────────────────┐ │ │
68
+ │ │ │ Camera (VPU accelerated) │ → │ YOLO Face Detection │ │ │
69
+ │ │ │ • MJPEG stream server │ │ • AdamCodd/YOLOv11n-face │ │ │
70
+ │ │ │ • ESPHome Camera entity │ │ • Adaptive frame rate: │ │ │
71
+ │ │ └──────────────────────────┘ │ - 15fps: conversation/face │ │ │
72
+ │ │ │ - 2fps: idle (power saving) │ │ │
73
+ │ │ │ • look_at_image() pose calc │ │ │
74
+ │ │ │ • Smooth return after face lost │ │ │
75
+ │ │ └──────────────────────────────────┘ │ │
76
+ │ └───────────────────────────────────────────────────────────────────────┘ │
77
+ │ │
78
+ │ ┌─────────────────────────── MOTION CONTROL ────────────────────────────┐ │
79
+ │ │ MovementManager (100Hz Control Loop) │ │
80
+ │ │ ┌────────────────────────────────────────────────────────────────┐ │ │
81
+ │ │ │ Motion Layers (Priority: Move > Action > SpeechSway > Breath) │ │ │
82
+ │ │ │ ┌────────────┐ ┌────────────┐ ┌────────────┐ ┌──────────────┐ │ │ │
83
+ │ │ │ │ Move Queue │ │ Actions │ │ SpeechSway │ │ Breathing │ │ │ │
84
+ │ │ │ │ (Emotions) │ │ (Nod/Shake)│ │ (Voice VAD)│ │ (Idle anim) │ │ │ │
85
+ │ │ │ └────────────┘ └────────────┘ └────────────┘ └──────────────┘ │ │ │
86
+ │ │ └─────────────────────────────���──────────────────────────────────┘ │ │
87
+ │ │ │ │
88
+ │ │ ┌────────────────────────────────────────────────────────────────┐ │ │
89
+ │ │ │ Face Tracking Offsets (Secondary Pose Overlay) │ │ │
90
+ │ │ │ • Pitch offset: +9° (down compensation) │ │ │
91
+ │ │ │ • Yaw offset: -7° (right compensation) │ │ │
92
+ │ │ └────────────────────────────────────────────────────────────────┘ │ │
93
+ │ │ │ │
94
+ │ │ State Machine: on_wakeup → on_listening → on_speaking → on_idle │ │
95
+ │ │ │ │
96
+ │ │ ┌────────────────────────────────────────────────────────────────┐ │ │
97
+ │ │ │ Body Following (v0.8.3) │ │ │
98
+ │ │ │ • Body yaw syncs with head yaw for natural tracking │ │ │
99
+ │ │ │ • Extracted from final head pose matrix │ │ │
100
+ │ │ └────────────────────────────────────────────────────────────────┘ │ │
101
+ │ └───────────────────────────────────────────────────────────────────────┘ │
102
+ │ │
103
+ │ ┌─────────────────────────── GESTURE DETECTION ────────────────────────┐ │
104
+ │ │ HaGRID ONNX Models │ │
105
+ │ │ • 18 gesture classes (call, like, dislike, fist, ok, palm, etc.) │ │
106
+ │ │ • Only runs when face detected (power saving) │ │
107
+ │ │ • Real-time state push to Home Assistant │ │
108
+ │ └───────────────────────────────────────────────────────────────────────┘ │
109
+ │ │
110
+ │ ┌─────────────────────────── ESPHOME SERVER ────────────────────────────┐ │
111
+ │ │ Port 6053 (mDNS auto-discovery) │ │
112
+ │ │ • 43+ entities (sensors, controls, media player, camera) │ │
113
+ │ │ • Voice Assistant pipeline integration │ │
114
+ │ │ • Real-time state synchronization │ │
115
+ │ └───────────────────────────────────────────────────────────────────────┘ │
116
+ └─────────────────────────────────────────────────────────────────────────────┘
117
+
118
+ │ ESPHome Protocol (protobuf)
119
+
120
+ ┌─────────────────────────────────────────────────────────────────────────────┐
121
+ │ Home Assistant │
122
+ │ ┌──────────────────┐ ┌──────────────────┐ ┌────────────────────────────┐ │
123
+ │ │ STT Engine │ │ Intent Processing│ │ TTS Engine │ │
124
+ │ │ (User configured)│ │ (Conversation) │ │ (User configured) │ │
125
+ │ └────────���─────────┘ └──────────────────┘ └────────────────────────────┘ │
126
+ └─────────────────────────────────────────────────────────────────────────────┘
127
+ ```
128
+
129
+ ## Completed Features
130
+
131
+ ### Core Features
132
+ - [x] ESPHome protocol server implementation
133
+ - [x] mDNS service discovery (auto-discovered by Home Assistant)
134
+ - [x] Local wake word detection (microWakeWord)
135
+ - [x] Continuous conversation mode (controlled via Home Assistant switch)
136
+ - [x] Audio stream transmission to Home Assistant
137
+ - [x] TTS audio playback
138
+ - [x] Stop word detection
139
+
140
+ ### Reachy Mini Integration
141
+ - [x] Use Reachy Mini SDK microphone input
142
+ - [x] Use Reachy Mini SDK speaker output
143
+ - [x] Head motion control (nod, shake, gaze)
144
+ - [x] Antenna animation control
145
+ - [x] Voice state feedback actions
146
+ - [x] YOLO face tracking (replaces DOA sound source localization)
147
+ - [x] 100Hz unified motion control loop
148
+
149
+ ### Application Architecture
150
+ - [x] Compliant with Reachy Mini App architecture
151
+
152
+
153
+
154
+ ## File List
155
+
156
+ ```
157
+ reachy_mini_ha_voice/
158
+ ├── reachy_mini_ha_voice/
159
+ │ ├── __init__.py # Package initialization
160
+ │ ├── __main__.py # Command line entry
161
+ │ ├── main.py # ReachyMiniApp entry
162
+ │ ├── voice_assistant.py # Voice assistant service
163
+ │ ├── satellite.py # ESPHome protocol handling
164
+ │ ├── audio_player.py # Audio player (TTS + Sendspin)
165
+ │ ├── camera_server.py # MJPEG camera stream server + face tracking
166
+ │ ├── head_tracker.py # YOLO face detector
167
+ │ ├── motion.py # Motion control (high-level API)
168
+ │ ├── movement_manager.py # Unified movement manager (100Hz control loop)
169
+ │ ├── animation_player.py # JSON-driven animation system
170
+ │ ├── speech_sway.py # Voice-driven head micro-movements
171
+ │ ├── models.py # Data models
172
+ │ ├── entity.py # ESPHome base entity
173
+ │ ├── entity_extensions.py # Extended entity types
174
+ │ ├── entity_registry.py # Entity registry
175
+ │ ├── reachy_controller.py # Reachy Mini controller wrapper
176
+ │ ├── gesture_detector.py # HaGRID gesture detection
177
+ │ ├── api_server.py # HTTP API server
178
+ │ ├── zeroconf.py # mDNS discovery (ESPHome + Sendspin)
179
+ │ └── util.py # Utility functions
180
+ │ └── animations/ # Animation definitions
181
+ │ ├── conversation_animations.json # Conversation state animations
182
+ │ └── emotion_keywords.json # Emotion keyword mapping (280+ keywords)
183
+ │ └── wakewords/ # Wake word models
184
+ │ ├── okay_nabu.json/.tflite
185
+ │ ├── hey_jarvis.json/.tflite (openWakeWord)
186
+ │ ├── alexa.json/.tflite
187
+ │ ├── hey_luna.json/.tflite
188
+ │ └── stop.json/.tflite # Stop word detection
189
+ ├── sounds/ # Sound effect files (auto-download)
190
+ │ ├── wake_word_triggered.flac
191
+ │ └── timer_finished.flac
192
+ ├── pyproject.toml # Project configuration
193
+ ├── README.md # Documentation
194
+ ├── changelog.json # Version changelog
195
+ └── PROJECT_PLAN.md # Project plan
196
+ ```
197
+
198
+ ## Dependencies
199
+
200
+ ```toml
201
+ dependencies = [
202
+ # Reachy Mini SDK (provides audio via media system)
203
+ "reachy-mini",
204
+
205
+ # Audio processing (fallback when not on Reachy Mini)
206
+ "sounddevice>=0.5.0",
207
+ "soundfile>=0.13.0",
208
+ "numpy>=2.0.0",
209
+
210
+ # Camera streaming
211
+ "opencv-python>=4.10.0",
212
+
213
+ # Wake word detection (local)
214
+ "pymicro-wakeword>=2.0.0,<3.0.0",
215
+ "pyopen-wakeword>=1.0.0,<2.0.0",
216
+
217
+ # ESPHome protocol (communication with Home Assistant)
218
+ "aioesphomeapi>=43.10.1",
219
+ "zeroconf>=0.140.0",
220
+
221
+ # Motion control (head movements)
222
+ "scipy>=1.14.0",
223
+
224
+ # Face tracking (YOLO-based head detection)
225
+ "ultralytics>=8.3.0",
226
+ "supervision>=0.25.0",
227
+ "huggingface_hub>=0.27.0",
228
+
229
+ # Sendspin synchronized audio (optional, for multi-room playback)
230
+ "aiosendspin>=2.0.1",
231
+
232
+ # Gesture detection (ONNX runtime for HaGRID models)
233
+ "onnxruntime>=1.18.0",
234
+ ]
235
+ ```
236
+
237
+ ## Usage Flow
238
+
239
+ 1. **Install App**
240
+ - Install `reachy_mini_ha_voice` from Reachy Mini App Store
241
+
242
+ 2. **Start App**
243
+ - App auto-starts ESPHome server (port 6053)
244
+ - Auto-downloads required models and sounds
245
+
246
+ 3. **Connect Home Assistant**
247
+ - Home Assistant auto-discovers device (mDNS)
248
+ - Or manually add: Settings → Devices & Services → Add Integration → ESPHome
249
+
250
+ 4. **Use Voice Assistant**
251
+ - Say "Okay Nabu" to wake
252
+ - Speak command
253
+ - Reachy Mini provides motion feedback
254
+
255
+ ## ESPHome Entity Planning
256
+
257
+ Based on deep analysis of Reachy Mini SDK, the following entities are exposed to Home Assistant:
258
+
259
+ ### Implemented Entities
260
+
261
+ | Entity Type | Name | Description |
262
+ |-------------|------|-------------|
263
+ | Media Player | `media_player` | Audio playback control |
264
+ | Voice Assistant | `voice_assistant` | Voice assistant pipeline |
265
+
266
+ ### Implemented Control Entities (Read/Write)
267
+
268
+ #### Phase 1-3: Basic Controls and Pose
269
+
270
+ | ESPHome Entity Type | Name | SDK API | Range/Options | Description |
271
+ |---------------------|------|---------|---------------|-------------|
272
+ | `Number` | `speaker_volume` | `AudioPlayer.set_volume()` | 0-100 | Speaker volume |
273
+ | `Select` | `motor_mode` | `set_motor_control_mode()` | enabled/disabled/gravity_compensation | Motor mode selection |
274
+ | `Switch` | `motors_enabled` | `enable_motors()` / `disable_motors()` | on/off | Motor torque switch |
275
+ | `Button` | `wake_up` | `mini.wake_up()` | - | Wake robot action |
276
+ | `Button` | `go_to_sleep` | `mini.goto_sleep()` | - | Sleep robot action |
277
+ | `Number` | `head_x` | `goto_target(head=...)` | ±50mm | Head X position control |
278
+ | `Number` | `head_y` | `goto_target(head=...)` | ±50mm | Head Y position control |
279
+ | `Number` | `head_z` | `goto_target(head=...)` | ±50mm | Head Z position control |
280
+ | `Number` | `head_roll` | `goto_target(head=...)` | -40° ~ +40° | Head roll angle control |
281
+ | `Number` | `head_pitch` | `goto_target(head=...)` | -40° ~ +40° | Head pitch angle control |
282
+ | `Number` | `head_yaw` | `goto_target(head=...)` | -180° ~ +180° | Head yaw angle control |
283
+ | `Number` | `body_yaw` | `goto_target(body_yaw=...)` | -160° ~ +160° | Body yaw angle control |
284
+ | `Number` | `antenna_left` | `goto_target(antennas=...)` | -90° ~ +90° | Left antenna angle control |
285
+ | `Number` | `antenna_right` | `goto_target(antennas=...)` | -90° ~ +90° | Right antenna angle control |
286
+
287
+ #### Phase 4: Gaze Control
288
+
289
+ | ESPHome Entity Type | Name | SDK API | Range/Options | Description |
290
+ |---------------------|------|---------|---------------|-------------|
291
+ | `Number` | `look_at_x` | `look_at_world(x, y, z)` | World coordinates | Gaze point X coordinate |
292
+ | `Number` | `look_at_y` | `look_at_world(x, y, z)` | World coordinates | Gaze point Y coordinate |
293
+ | `Number` | `look_at_z` | `look_at_world(x, y, z)` | World coordinates | Gaze point Z coordinate |
294
+
295
+
296
+ ### Implemented Sensor Entities (Read-only)
297
+
298
+ #### Phase 1 & 5: Basic Status and Audio Sensors
299
+
300
+ | ESPHome Entity Type | Name | SDK API | Description |
301
+ |---------------------|------|---------|-------------|
302
+ | `Text Sensor` | `daemon_state` | `DaemonStatus.state` | Daemon status |
303
+ | `Binary Sensor` | `backend_ready` | `backend_status.ready` | Backend ready status |
304
+ | `Text Sensor` | `error_message` | `DaemonStatus.error` | Current error message |
305
+ | `Sensor` | `doa_angle` | `DoAInfo.angle` | Sound source direction angle (°) |
306
+ | `Binary Sensor` | `speech_detected` | `DoAInfo.speech_detected` | Speech detection status |
307
+
308
+ #### Phase 6: Diagnostic Information
309
+
310
+ | ESPHome Entity Type | Name | SDK API | Description |
311
+ |---------------------|------|---------|-------------|
312
+ | `Sensor` | `control_loop_frequency` | `control_loop_stats` | Control loop frequency (Hz) |
313
+ | `Text Sensor` | `sdk_version` | `DaemonStatus.version` | SDK version |
314
+ | `Text Sensor` | `robot_name` | `DaemonStatus.robot_name` | Robot name |
315
+ | `Binary Sensor` | `wireless_version` | `DaemonStatus.wireless_version` | Wireless version flag |
316
+ | `Binary Sensor` | `simulation_mode` | `DaemonStatus.simulation_enabled` | Simulation mode flag |
317
+ | `Text Sensor` | `wlan_ip` | `DaemonStatus.wlan_ip` | Wireless IP address |
318
+
319
+ #### Phase 7: IMU Sensors (Wireless version only)
320
+
321
+ | ESPHome Entity Type | Name | SDK API | Description |
322
+ |---------------------|------|---------|-------------|
323
+ | `Sensor` | `imu_accel_x` | `mini.imu["accelerometer"][0]` | X-axis acceleration (m/s²) |
324
+ | `Sensor` | `imu_accel_y` | `mini.imu["accelerometer"][1]` | Y-axis acceleration (m/s²) |
325
+ | `Sensor` | `imu_accel_z` | `mini.imu["accelerometer"][2]` | Z-axis acceleration (m/s²) |
326
+ | `Sensor` | `imu_gyro_x` | `mini.imu["gyroscope"][0]` | X-axis angular velocity (rad/s) |
327
+ | `Sensor` | `imu_gyro_y` | `mini.imu["gyroscope"][1]` | Y-axis angular velocity (rad/s) |
328
+ | `Sensor` | `imu_gyro_z` | `mini.imu["gyroscope"][2]` | Z-axis angular velocity (rad/s) |
329
+ | `Sensor` | `imu_temperature` | `mini.imu["temperature"]` | IMU temperature (°C) |
330
+
331
+ #### Phase 8-12: Extended Features
332
+
333
+ | ESPHome Entity Type | Name | Description |
334
+ |---------------------|------|-------------|
335
+ | `Select` | `emotion` | Emotion selector (Happy/Sad/Angry/Fear/Surprise/Disgust) |
336
+ | `Number` | `microphone_volume` | Microphone volume (0-100%) |
337
+ | `Camera` | `camera` | ESPHome Camera entity (live preview) |
338
+ | `Number` | `led_brightness` | LED brightness (0-100%) |
339
+ | `Select` | `led_effect` | LED effect (off/solid/breathing/rainbow/doa) |
340
+ | `Number` | `led_color_r` | LED red component (0-255) |
341
+ | `Number` | `led_color_g` | LED green component (0-255) |
342
+ | `Number` | `led_color_b` | LED blue component (0-255) |
343
+ | `Switch` | `agc_enabled` | Auto gain control switch |
344
+ | `Number` | `agc_max_gain` | AGC max gain (0-30 dB) |
345
+ | `Number` | `noise_suppression` | Noise suppression level (0-100%) |
346
+ | `Binary Sensor` | `echo_cancellation_converged` | Echo cancellation convergence status |
347
+
348
+ > **Note**: Head position (x/y/z) and angles (roll/pitch/yaw), body yaw, antenna angles are all **controllable** entities,
349
+ > using `Number` type for bidirectional control. Call `goto_target()` when setting new values, call `get_current_head_pose()` etc. when reading current values.
350
+
351
+ ### Implementation Priority
352
+
353
+ 1. **Phase 1 - Basic Status and Volume** (High Priority) ✅ **Completed**
354
+ - [x] `daemon_state` - Daemon status sensor
355
+ - [x] `backend_ready` - Backend ready status
356
+ - [x] `error_message` - Error message
357
+ - [x] `speaker_volume` - Speaker volume control
358
+
359
+ 2. **Phase 2 - Motor Control** (High Priority) ✅ **Completed**
360
+ - [x] `motors_enabled` - Motor switch
361
+ - [x] `motor_mode` - Motor mode selection (enabled/disabled/gravity_compensation)
362
+ - [x] `wake_up` / `go_to_sleep` - Wake/sleep buttons
363
+
364
+ 3. **Phase 3 - Pose Control** (Medium Priority) ✅ **Completed**
365
+ - [x] `head_x/y/z` - Head position control
366
+ - [x] `head_roll/pitch/yaw` - Head angle control
367
+ - [x] `body_yaw` - Body yaw angle control
368
+ - [x] `antenna_left/right` - Antenna angle control
369
+
370
+ 4. **Phase 4 - Gaze Control** (Medium Priority) ✅ **Completed**
371
+ - [x] `look_at_x/y/z` - Gaze point coordinate control
372
+
373
+ 5. **Phase 5 - DOA (Direction of Arrival)** ✅ **Re-added for wakeup turn-to-sound**
374
+ - [x] `doa_angle` - Sound source direction (degrees, 0-180°, where 0°=left, 90°=front, 180°=right)
375
+ - [x] `speech_detected` - Speech detection status
376
+ - [x] Turn-to-sound at wakeup (robot turns toward speaker when wake word detected)
377
+ - [x] Direction correction: `yaw = π/2 - doa` (fixed left/right inversion)
378
+ - Note: DOA only read once at wakeup to avoid daemon pressure; face tracking takes over after
379
+
380
+ 6. **Phase 6 - Diagnostic Information** (Low Priority) ✅ **Completed**
381
+ - [x] `control_loop_frequency` - Control loop frequency
382
+ - [x] `sdk_version` - SDK version
383
+ - [x] `robot_name` - Robot name
384
+ - [x] `wireless_version` - Wireless version flag
385
+ - [x] `simulation_mode` - Simulation mode flag
386
+ - [x] `wlan_ip` - Wireless IP address
387
+
388
+ 7. **Phase 7 - IMU Sensors** (Optional, wireless version only) ✅ **Completed**
389
+ - [x] `imu_accel_x/y/z` - Accelerometer
390
+ - [x] `imu_gyro_x/y/z` - Gyroscope
391
+ - [x] `imu_temperature` - IMU temperature
392
+
393
+ 8. **Phase 8 - Emotion Control** ✅ **Completed**
394
+ - [x] `emotion` - Emotion selector (Happy/Sad/Angry/Fear/Surprise/Disgust)
395
+
396
+ 9. **Phase 9 - Audio Control** ✅ **Completed**
397
+ - [x] `microphone_volume` - Microphone volume control (0-100%)
398
+
399
+ 10. **Phase 10 - Camera Integration** ✅ **Completed**
400
+ - [x] `camera` - ESPHome Camera entity (live preview)
401
+
402
+ 11. **Phase 11 - LED Control** ❌ **Disabled (LEDs hidden inside robot)**
403
+ - [ ] `led_brightness` - LED brightness (0-100%) - Commented out
404
+ - [ ] `led_effect` - LED effect (off/solid/breathing/rainbow/doa) - Commented out
405
+ - [ ] `led_color_r/g/b` - LED RGB color (0-255) - Commented out
406
+
407
+ 12. **Phase 12 - Audio Processing Parameters** ✅ **Completed**
408
+ - [x] `agc_enabled` - Auto gain control switch
409
+ - [x] `agc_max_gain` - AGC max gain (0-30 dB)
410
+ - [x] `noise_suppression` - Noise suppression level (0-100%)
411
+ - [x] `echo_cancellation_converged` - Echo cancellation convergence status (read-only)
412
+
413
+ 13. **Phase 13 - Sendspin Audio Playback Support** ✅ **Completed**
414
+ - [x] `sendspin_enabled` - Sendspin switch (Switch)
415
+ - [x] `sendspin_url` - Sendspin server URL (Text Sensor)
416
+ - [x] `sendspin_connected` - Sendspin connection status (Binary Sensor)
417
+ - [x] AudioPlayer integrates aiosendspin library
418
+ - [x] TTS audio sent to both local speaker and Sendspin server
419
+
420
+ 14. **Phase 22 - Gesture Detection** ✅ **Completed**
421
+ - [x] `gesture_detected` - Detected gesture name (Text Sensor)
422
+ - [x] `gesture_confidence` - Gesture detection confidence % (Sensor)
423
+ - [x] HaGRID ONNX models: hand_detector.onnx + crops_classifier.onnx
424
+ - [x] Real-time state push to Home Assistant
425
+ - [x] 18 supported gestures:
426
+ | Gesture | Emoji | Gesture | Emoji |
427
+ |---------|-------|---------|-------|
428
+ | call | 🤙 | like | 👍 |
429
+ | dislike | 👎 | mute | 🤫 |
430
+ | fist | ✊ | ok | 👌 |
431
+ | four | 🖐️ | one | ☝️ |
432
+ | palm | ✋ | peace | ✌️ |
433
+ | peace_inverted | 🔻✌️ | rock | 🤘 |
434
+ | stop | 🛑 | stop_inverted | 🔻🛑 |
435
+ | three | 3️⃣ | three2 | 🤟 |
436
+ | two_up | ✌️☝️ | two_up_inverted | 🔻✌️☝️ |
437
+
438
+ ---
439
+
440
+ ## 🎉 Phase 1-13 + Phase 22 Entities Completed!
441
+
442
+ **Total Completed: 45 entities**
443
+ - Phase 1: 4 entities (Basic status and volume)
444
+ - Phase 2: 4 entities (Motor control)
445
+ - Phase 3: 9 entities (Pose control)
446
+ - Phase 4: 3 entities (Gaze control)
447
+ - Phase 5: 2 entities (Audio sensors)
448
+ - Phase 6: 6 entities (Diagnostic information)
449
+ - Phase 7: 7 entities (IMU sensors)
450
+ - Phase 8: 1 entity (Emotion control)
451
+ - Phase 9: 1 entity (Microphone volume)
452
+ - Phase 10: 1 entity (Camera)
453
+ - Phase 11: 0 entities (LED control - Disabled)
454
+ - Phase 12: 4 entities (Audio processing parameters)
455
+ - Phase 13: 3 entities (Sendspin audio output)
456
+ - Phase 22: 2 entities (Gesture detection)
457
+
458
+
459
+ ---
460
+
461
+ ## 🚀 Voice Assistant Enhancement Features Implementation Status
462
+
463
+ ### Phase 14 - Emotion Action Feedback System (Enhanced) ✅
464
+
465
+ **Implementation Status**: Full keyword-based emotion detection implemented with 280+ Chinese/English keywords mapped to 35 emotion categories
466
+
467
+ **Implemented Features**:
468
+ - ✅ Phase 8 Emotion Selector entity (`emotion`)
469
+ - ✅ Basic emotion action playback API (`_play_emotion`)
470
+ - ✅ Emotion mapping: Happy/Sad/Angry/Fear/Surprise/Disgust
471
+ - ✅ Integration with HuggingFace action library (`pollen-robotics/reachy-mini-emotions-library`)
472
+ - ✅ SpeechSway system for natural head micro-movements during conversation (non-blocking)
473
+ - ✅ Tap detection disabled during emotion playback (polls daemon API for completion)
474
+ - ✅ **NEW (v0.8.0)**: Comprehensive emotion keyword detection from conversation text
475
+ - ✅ **NEW (v0.8.0)**: 280+ Chinese and English keywords mapped to 35 emotion categories
476
+ - ✅ **NEW (v0.8.0)**: Auto-trigger expressions based on text patterns in LLM responses
477
+
478
+ **Emotion Keyword Categories (v0.8.0)**:
479
+
480
+ | Expression ID | Category | Chinese Keywords | English Keywords |
481
+ |---------------|----------|------------------|------------------|
482
+ | `cheerful1` | Happy | 太棒了、开心、高兴 | great, awesome, happy |
483
+ | `laughing1` | Laughing | 哈哈、笑死、好笑 | haha, lol, funny |
484
+ | `enthusiastic1` | Excited | 兴奋、激动、耶 | excited, yay, cool |
485
+ | `amazed1` | Amazed | 神奇、厉害、牛 | amazing, incredible |
486
+ | `surprised1` | Surprised | 哇、天啊、真的吗 | wow, omg, really |
487
+ | `loving1` | Love | 爱、喜欢、可爱 | love, cute, adore |
488
+ | `grateful1` | Grateful | 谢谢、感谢 | thanks, appreciate |
489
+ | `welcoming1` | Welcome | 欢迎、你好 | hello, welcome |
490
+ | `helpful1` | Helpful | 当然、好的、没问题 | sure, of course |
491
+ | `curious1` | Curious | 好奇、有趣 | curious, interesting |
492
+ | `thoughtful1` | Thinking | 嗯、让我想想 | hmm, let me think |
493
+ | `sad1` | Sad | 难过、伤心、可惜 | sad, unfortunately |
494
+ | `oops1` | Oops | 抱歉、糟糕、哎呀 | sorry, oops |
495
+ | `confused1` | Confused | 困惑、搞不懂 | confused, puzzled |
496
+ | `fear1` | Fear | 害怕、可怕 | afraid, scared |
497
+ | `rage1` | Angry | 生气、愤怒 | angry, mad |
498
+ | `yes1` | Yes | 是的、对、没错 | yes, correct |
499
+ | `no1` | No | 不是、不行 | no, wrong |
500
+ | ... | ... | ... | ... |
501
+
502
+ **Design Decisions**:
503
+ - 🎯 No auto-play of full emotion actions during conversation to avoid blocking
504
+ - 🎯 Use voice-driven head sway (SpeechSway) for natural motion feedback
505
+ - 🎯 Emotion actions retained as manual trigger feature via ESPHome entity
506
+ - 🎯 Tap detection waits for actual move completion via `/api/move/running` polling
507
+ - 🎯 **NEW**: Keyword detection is case-insensitive and configurable via JSON
508
+
509
+ **Partially Implemented**:
510
+ - 🟡 Intent recognition and emotion matching (basic keyword matching implemented)
511
+ - ❌ Dance action library integration
512
+ - ❌ Context awareness (e.g., weather query - sunny plays happy, rainy plays sad)
513
+
514
+ **Code Locations**:
515
+ - `animations/emotion_keywords.json` - **NEW**: Emotion keyword mapping configuration (280+ keywords)
516
+ - `entity_registry.py:633-658` - Emotion Selector entity
517
+ - `satellite.py:_load_emotion_keywords()` - Load emotion keywords from JSON
518
+ - `satellite.py:_detect_and_play_emotion()` - Auto-detect emotions from text
519
+ - `satellite.py:_play_emotion()` - Emotion playback with move UUID tracking
520
+ - `satellite.py:_wait_for_move_completion()` - Polls daemon API for move completion
521
+ - `motion.py:132-156` - Conversation start motion control (uses SpeechSway)
522
+ - `movement_manager.py:541-595` - Move queue management (allows SpeechSway overlay)
523
+
524
+ **Actual Behavior**:
525
+
526
+ | Voice Assistant Event | Actual Action | Implementation Status |
527
+ |----------------------|---------------|----------------------|
528
+ | Wake word detected | Turn toward sound source + nod confirmation | ✅ Implemented |
529
+ | Conversation start | Voice-driven head micro-movements (SpeechSway) | ✅ Implemented |
530
+ | During conversation | Continuous voice-driven micro-movements + breathing animation | ✅ Implemented |
531
+ | Conversation end | Return to neutral position + breathing animation | ✅ Implemented |
532
+ | Manual emotion trigger | Play via ESPHome `emotion` entity | ✅ Implemented |
533
+
534
+ **Technical Details**:
535
+ ```python
536
+ # motion.py - Use SpeechSway instead of full emotion actions during conversation
537
+ def on_speaking_start(self):
538
+ self._is_speaking = True
539
+ self._movement_manager.set_state(RobotState.SPEAKING)
540
+ # SpeechSway automatically generates natural head micro-movements based on audio loudness
541
+ # No full emotion actions played to avoid blocking conversation experience
542
+
543
+ # movement_manager.py - Motion layering system
544
+ # 1. Move queue (emotion actions) - Sets base pose
545
+ # 2. Action (nod/shake etc.) - Overlays on base pose
546
+ # 3. SpeechSway - Voice-driven micro-movements, can coexist with Move
547
+ # 4. Breathing - Idle breathing animation
548
+ ```
549
+
550
+ **Original Plan** (Decided not to implement to avoid blocking conversation):
551
+
552
+ | Voice Assistant Event | Original Planned Action | Reason Not Implemented |
553
+ |----------------------|------------------------|------------------------|
554
+ | Positive response received | Play "happy" action | Full action would block conversation fluency |
555
+ | Negative response received | Play "sad" action | Full action would block conversation fluency |
556
+ | Play music/entertainment | Play "dance" action | Full action would block conversation fluency |
557
+ | Timer completed | Play "alert" action | Full action would block conversation fluency |
558
+ | Error/cannot understand | Play "confused" action | Full action would block conversation fluency |
559
+
560
+ **Manual Emotion Trigger Example**:
561
+ ```yaml
562
+ # Home Assistant automation example - Manual emotion trigger
563
+ automation:
564
+ - alias: "Reachy Good Morning Greeting"
565
+ trigger:
566
+ - platform: time
567
+ at: "07:00:00"
568
+ action:
569
+ - service: select.select_option
570
+ target:
571
+ entity_id: select.reachy_mini_emotion
572
+ data:
573
+ option: "Happy"
574
+ ```
575
+
576
+ ### Phase 15 - Face Tracking (Complements DOA Turn-to-Sound) ✅ **Completed**
577
+
578
+ **Goal**: Implement natural face tracking so robot looks at speaker during conversation.
579
+
580
+ **Design Decision**:
581
+ - ✅ DOA (Direction of Arrival): Used once at wakeup to turn toward sound source
582
+ - ✅ YOLO face detection: Takes over after initial turn for continuous tracking
583
+ - ✅ Body follows head rotation: Body yaw automatically syncs with head yaw for natural tracking
584
+ - Reason: DOA provides quick initial orientation, face tracking provides accurate continuous tracking, body following enables natural whole-body tracking similar to human behavior
585
+
586
+ **Wakeup Turn-to-Sound Flow**:
587
+ 1. Wake word detected → Read DOA angle once (avoid daemon pressure)
588
+ 2. If DOA angle > 10°: Turn head toward sound source (80% of angle, conservative)
589
+ 3. Face tracking takes over for continuous tracking during conversation
590
+
591
+ **Implemented Features**:
592
+
593
+ | Feature | Description | Implementation Location | Status |
594
+ |---------|-------------|------------------------|--------|
595
+ | DOA turn-to-sound | Turn toward speaker at wakeup | `satellite.py:_turn_to_sound_source()` | ✅ Implemented |
596
+ | YOLO face detection | Uses `AdamCodd/YOLOv11n-face-detection` model | `head_tracker.py` | ✅ Implemented |
597
+ | Adaptive frame rate tracking | 15fps during conversation, 2fps when idle without face | `camera_server.py` | ✅ Implemented |
598
+ | look_at_image() | Calculate target pose from face position | `camera_server.py` | ✅ Implemented |
599
+ | Smooth return to neutral | Smooth return within 1 second after face lost | `camera_server.py` | ✅ Implemented |
600
+ | face_tracking_offsets | As secondary pose overlay to motion control | `movement_manager.py` | ✅ Implemented |
601
+ | Body follows head rotation | Body yaw syncs with head yaw extracted from final pose matrix | `movement_manager.py:_compose_final_pose()` | ✅ Implemented (v0.8.3) |
602
+ | DOA entities | `doa_angle` and `speech_detected` exposed to Home Assistant | `entity_registry.py` | ✅ Implemented |
603
+ | face_detected entity | Binary sensor for face detection state | `entity_registry.py` | ✅ Implemented |
604
+ | Model download retry | 3 retries, 5 second interval | `head_tracker.py` | ✅ Implemented |
605
+ | Conversation mode integration | Auto-switch tracking frequency on voice assistant state change | `satellite.py` | ✅ Implemented |
606
+
607
+ **Resource Optimization (v0.5.1, updated v0.6.2)**:
608
+ - During conversation (listening/thinking/speaking): High-frequency tracking 15fps
609
+ - Idle with face detected: High-frequency tracking 15fps
610
+ - Idle without face for 5s: Low-power mode 2fps
611
+ - Idle without face for 30s: Ultra-low power mode 0.5fps (every 2 seconds)
612
+ - Gesture detection only runs when face detected recently (within 5s)
613
+ - Immediately restore high-frequency tracking when face detected
614
+
615
+ **Code Locations**:
616
+ - `satellite.py:_turn_to_sound_source()` - DOA turn-to-sound at wakeup
617
+ - `head_tracker.py` - YOLO face detector (`HeadTracker` class)
618
+ - `camera_server.py:_capture_frames()` - Adaptive frame rate face tracking
619
+ - `camera_server.py:set_conversation_mode()` - Conversation mode switch API
620
+ - `satellite.py:_set_conversation_mode()` - Voice assistant state integration
621
+ - `movement_manager.py:set_face_tracking_offsets()` - Face tracking offset API
622
+ - `movement_manager.py:_compose_final_pose()` - Body yaw follows head yaw (v0.8.3)
623
+
624
+ **Technical Details**:
625
+ ```python
626
+ # camera_server.py - Adaptive frame rate face tracking
627
+ class MJPEGCameraServer:
628
+ def __init__(self):
629
+ self._fps_high = 15 # During conversation/face detected
630
+ self._fps_low = 2 # Idle without face (5-30s)
631
+ self._fps_idle = 0.5 # Ultra-low power (>30s without face)
632
+ self._low_power_threshold = 5.0 # 5s without face switches to low power
633
+ self._idle_threshold = 30.0 # 30s without face switches to idle mode
634
+
635
+ def _should_run_ai_inference(self, current_time):
636
+ # Conversation mode: Always high-frequency tracking
637
+ if self._in_conversation:
638
+ return True
639
+ # High-frequency mode: Track every frame
640
+ if self._current_fps == self._fps_high:
641
+ return True
642
+ # Low/idle power mode: Periodic detection
643
+ return time.since_last_check >= 1/self._current_fps
644
+
645
+ # satellite.py - Voice assistant state integration
646
+ def _reachy_on_listening(self):
647
+ self._set_conversation_mode(True) # Start conversation, high-frequency tracking
648
+
649
+ def _reachy_on_idle(self):
650
+ self._set_conversation_mode(False) # End conversation, adaptive tracking
651
+
652
+ # movement_manager.py - Body follows head rotation (v0.8.3)
653
+ # This enables natural body rotation when tracking faces, similar to how
654
+ # the reference project's sweep_look tool synchronizes body_yaw with head_yaw.
655
+ def _compose_final_pose(self) -> Tuple[np.ndarray, Tuple[float, float], float]:
656
+ # ... compose head pose from all motion sources ...
657
+
658
+ # Extract yaw from final head pose rotation matrix
659
+ # The rotation matrix uses xyz euler convention
660
+ final_rotation = R.from_matrix(final_head[:3, :3])
661
+ _, _, final_head_yaw = final_rotation.as_euler('xyz')
662
+
663
+ # Body follows head yaw directly
664
+ # SDK's automatic_body_yaw (inverse_kinematics_safe) only handles collision
665
+ # prevention by clamping relative angle to max 65°, not active following
666
+ body_yaw = final_head_yaw
667
+
668
+ return final_head, (antenna_right, antenna_left), body_yaw
669
+ ```
670
+
671
+ **Body Following Head Rotation (v0.8.3)**:
672
+ - SDK's `automatic_body_yaw` is only **collision protection**, not active body following
673
+ - The `inverse_kinematics_safe` function with `max_relative_yaw=65°` only prevents head-body collision
674
+ - To enable natural body following, `body_yaw` must be explicitly set to match `head_yaw`
675
+ - Body yaw is extracted from final head pose matrix using scipy's `R.from_matrix().as_euler('xyz')`
676
+ - This matches the reference project's `sweep_look.py` behavior where `target_body_yaw = head_yaw`
677
+
678
+
679
+ ### Phase 16 - Cartoon Style Motion Mode (Partial) 🟡
680
+
681
+ **Goal**: Use SDK interpolation techniques for more expressive robot movements.
682
+
683
+ **SDK Support**: `InterpolationTechnique` enum
684
+ - `LINEAR` - Linear, mechanical feel
685
+ - `MIN_JERK` - Minimum jerk, natural and smooth (default)
686
+ - `EASE_IN_OUT` - Ease in-out, elegant
687
+ - `CARTOON` - Cartoon style, with bounce effect, lively and cute
688
+
689
+ **Implemented Features**:
690
+ - ✅ 100Hz unified control loop (`movement_manager.py`) - Restored to 100Hz after daemon update
691
+ - ✅ JSON-driven animation system (`AnimationPlayer`) - Inspired by SimpleDances project
692
+ - ✅ Conversation state animations (idle/listening/thinking/speaking)
693
+ - ✅ Pose change detection - Only send commands on significant changes (threshold 0.005)
694
+ - ✅ State query caching - 2s TTL, reduces daemon load
695
+ - ✅ Smooth interpolation (ease in-out curve)
696
+ - ✅ Command queue mode - Thread-safe external API
697
+ - ✅ Error throttling - Prevents log explosion
698
+ - ✅ Connection health monitoring - Auto-detect and recover from connection loss
699
+
700
+ **Animation System (v0.5.13)**:
701
+ - `AnimationPlayer` class loads animations from `conversation_animations.json`
702
+ - Each animation defines: pitch/yaw/roll amplitudes, position offsets, antenna movements, frequency
703
+ - Smooth transitions between animations (configurable duration)
704
+ - State-to-animation mapping: idle→idle, listening→listening, thinking→thinking, speaking→speaking
705
+
706
+ **Not Implemented**:
707
+ - ❌ Dynamic interpolation technique switching (CARTOON/EASE_IN_OUT etc.)
708
+ - ❌ Exaggerated cartoon bounce effects
709
+
710
+ **Code Locations**:
711
+ - `animation_player.py` - AnimationPlayer class
712
+ - `animations/conversation_animations.json` - Animation definitions
713
+ - `movement_manager.py` - 100Hz control loop with animation integration
714
+
715
+ **Scene Implementation Status**:
716
+
717
+ | Scene | Recommended Interpolation | Effect | Status |
718
+ |-------|--------------------------|--------|--------|
719
+ | Wake nod | `CARTOON` | Lively bounce effect | ❌ Not implemented |
720
+ | Thinking head up | `EASE_IN_OUT` | Elegant transition | ✅ Implemented (smooth interpolation) |
721
+ | Speaking micro-movements | `MIN_JERK` | Natural and fluid | ✅ Implemented (SpeechSway) |
722
+ | Error head shake | `CARTOON` | Exaggerated denial | ❌ Not implemented |
723
+ | Return to neutral | `MIN_JERK` | Smooth return | ✅ Implemented |
724
+ | Idle breathing | - | Subtle sense of life | ✅ Implemented (BreathingAnimation) |
725
+
726
+ ### Phase 17 - Antenna Sync Animation During Speech (Completed) ✅
727
+
728
+ **Goal**: Antennas sway with audio rhythm during TTS playback, simulating "speaking" effect.
729
+
730
+ **Implemented Features**:
731
+ - ✅ JSON-driven animation system with antenna movements
732
+ - ✅ Different antenna patterns: "both" (sync), "wiggle" (opposite phase)
733
+ - ✅ State-specific antenna animations (listening/thinking/speaking)
734
+ - ✅ Smooth transitions between animation states
735
+
736
+ **Code Locations**:
737
+ - `animation_player.py` - AnimationPlayer with antenna offset calculation
738
+ - `animations/conversation_animations.json` - Antenna amplitude and pattern definitions
739
+ - `movement_manager.py` - Antenna offset composition in final pose
740
+
741
+ ### Phase 18 - Visual Gaze Interaction (Not Implemented) ❌
742
+
743
+ **Goal**: Use camera to detect faces for eye contact.
744
+
745
+ **SDK Support**:
746
+ - `look_at_image(u, v)` - Look at point in image
747
+ - `look_at_world(x, y, z)` - Look at world coordinate point
748
+ - `media.get_frame()` - Get camera frame (✅ Already implemented in `camera_server.py:146`)
749
+
750
+ **Not Implemented Features**:
751
+
752
+ | Feature | Description | Status |
753
+ |---------|-------------|--------|
754
+ | Face detection | Use OpenCV/MediaPipe to detect faces | ❌ Not implemented |
755
+ | Eye tracking | Look at speaker's face during conversation | ❌ Not implemented |
756
+ | Multi-person switching | When multiple people detected, look at current speaker | ❌ Not implemented |
757
+ | Idle scanning | Randomly look around when idle | ❌ Not implemented |
758
+
759
+ ### Phase 19 - Gravity Compensation Interactive Mode (Partial) 🟡
760
+
761
+ **Goal**: Allow users to physically touch and guide robot head for "teaching" style interaction.
762
+
763
+ **SDK Support**: `enable_gravity_compensation()` - Motors enter gravity compensation mode, can be manually moved
764
+
765
+ **Implemented Features**:
766
+ - ✅ Gravity compensation mode switch (`motor_mode` Select entity, option "gravity_compensation")
767
+ - ✅ `reachy_controller.py:236-237` - Gravity compensation API call
768
+
769
+ **Not Implemented**:
770
+ - ❌ Teaching mode - Record motion trajectory
771
+ - ❌ Save/playback custom actions
772
+ - ❌ Voice command triggered teaching flow
773
+
774
+ **Application Scenarios**:
775
+ - ❌ User says "Let me teach you a move" → Enter gravity compensation mode
776
+ - ❌ User manually moves head → Record motion trajectory
777
+ - ❌ User says "Remember this" → Save action
778
+ - ❌ User says "Do that action again" → Playback recorded action
779
+
780
+ ### Phase 20 - Environment Awareness Response (Partial) 🟡
781
+
782
+ **Goal**: Use IMU sensors to sense environment changes and respond.
783
+
784
+ **SDK Support**:
785
+ - ✅ `mini.imu["accelerometer"]` - Accelerometer (Phase 7 implemented as entity)
786
+ - ✅ `mini.imu["gyroscope"]` - Gyroscope (Phase 7 implemented as entity)
787
+
788
+ **Implemented Features**:
789
+
790
+ | Feature | Description | Status |
791
+ |---------|-------------|--------|
792
+ | Continuous conversation | Controlled via Home Assistant switch | ✅ Implemented |
793
+ | IMU sensor entities | Accelerometer and gyroscope exposed to HA | ✅ Implemented |
794
+
795
+ > **Note**: Tap-to-wake feature was removed in v0.5.16 due to false triggers from robot movement. Continuous conversation is now controlled via Home Assistant switch.
796
+
797
+ **Not Implemented**:
798
+
799
+ | Detection Event | Response Action | Status |
800
+ |-----------------|-----------------|--------|
801
+ | Being shaken | Play dizzy action + voice "Don't shake me~" | ❌ Not implemented |
802
+ | Tilted/fallen | Play help action + voice "I fell, help me" | ❌ Not implemented |
803
+ | Long idle | Enter sleep animation | ❌ Not implemented |
804
+
805
+ ### Phase 21 - Home Assistant Scene Integration (Not Implemented) ❌
806
+
807
+ **Goal**: Trigger robot actions based on Home Assistant scenes/automations.
808
+
809
+ **Implementation**: Via ESPHome service calls
810
+
811
+ **Not Implemented Scenes**:
812
+
813
+ | HA Scene | Robot Response | Status |
814
+ |----------|----------------|--------|
815
+ | Good morning scene | Play wake action + "Good morning!" | ❌ Not implemented |
816
+ | Good night scene | Play sleep action + "Good night~" | ❌ Not implemented |
817
+ | Someone home | Turn toward door + wave + "Welcome home!" | ❌ Not implemented |
818
+ | Doorbell rings | Turn toward door + alert action | ❌ Not implemented |
819
+ | Play music | Sway with music rhythm | ❌ Not implemented |
820
+
821
+
822
+ ---
823
+
824
+ ## 📊 Feature Implementation Summary
825
+
826
+ ### ✅ Completed Features
827
+
828
+ #### Core Voice Assistant (Phase 1-12)
829
+ - **40+ ESPHome entities** - All implemented (Phase 11 LED disabled)
830
+ - **Basic voice interaction** - Wake word detection (microWakeWord/openWakeWord), STT/TTS integration
831
+ - **Motion feedback** - Nod, shake, gaze and other basic actions
832
+ - **Audio processing** - AGC, noise suppression, echo cancellation
833
+ - **Camera stream** - MJPEG live preview with ESPHome Camera entity
834
+
835
+ #### Extended Features (Phase 13-22)
836
+ - **Phase 13** ✅ - Sendspin multi-room audio support
837
+ - **Phase 14** ✅ - Emotion keyword detection (280+ keywords, 35 categories)
838
+ - **Phase 15** ✅ - Face tracking with body following (DOA + YOLO + body_yaw sync)
839
+ - **Phase 16** ✅ - JSON-driven animation system (100Hz control loop)
840
+ - **Phase 17** ✅ - Antenna sync animation during speech
841
+ - **Phase 22** ✅ - Gesture detection (HaGRID ONNX, 18 gestures)
842
+
843
+ ### 🟡 Partially Implemented Features
844
+
845
+ - **Phase 19** - Gravity compensation mode switch (teaching flow not implemented)
846
+ - **Phase 20** - IMU sensor entities (trigger logic not implemented)
847
+
848
+ ### ❌ Not Implemented Features
849
+
850
+ - **Phase 18** - Visual gaze interaction (eye contact with multiple people)
851
+ - **Phase 21** - Home Assistant scene integration (morning/night routines)
852
+
853
+ ---
854
+
855
+ ## Feature Priority Summary (Updated v0.8.3)
856
+
857
+ ### Completed ✅
858
+ - ✅ **Phase 1-12**: Core ESPHome entities and voice assistant
859
+ - ✅ **Phase 13**: Sendspin audio playback
860
+ - ✅ **Phase 14**: Emotion keyword detection and auto-trigger
861
+ - ✅ **Phase 15**: Face tracking with body following
862
+ - ✅ **Phase 16**: JSON-driven animation system
863
+ - ✅ **Phase 17**: Antenna sync animation
864
+ - ✅ **Phase 22**: Gesture detection
865
+
866
+ ### Partial 🟡
867
+ - 🟡 **Phase 19**: Gravity compensation mode (teaching flow pending)
868
+ - 🟡 **Phase 20**: Environment awareness (IMU entities done, triggers pending)
869
+
870
+ ### Not Implemented ❌
871
+ - ❌ **Phase 18**: Visual gaze interaction
872
+ - ❌ **Phase 21**: Home Assistant scene integration
873
+
874
+ ---
875
+
876
+ ## 📈 Completion Statistics
877
+
878
+ | Phase | Status | Completion | Notes |
879
+ |-------|--------|------------|-------|
880
+ | Phase 1-12 | ✅ Complete | 100% | 40 ESPHome entities implemented (Phase 11 LED disabled) |
881
+ | Phase 13 | ✅ Complete | 100% | Sendspin audio playback support |
882
+ | Phase 14 | ✅ Complete | 95% | Emotion keyword detection with 280+ keywords, 35 categories |
883
+ | Phase 15 | ✅ Complete | 100% | Face tracking with DOA, YOLO detection, body follows head (v0.8.3) |
884
+ | Phase 16 | ✅ Complete | 100% | JSON-driven animation system (100Hz control loop) |
885
+ | Phase 17 | ✅ Complete | 100% | Antenna sync animation during speech |
886
+ | Phase 18 | ❌ Not done | 10% | Camera implemented, missing multi-person gaze |
887
+ | Phase 19 | 🟡 Partial | 40% | Gravity compensation mode switch, missing teaching flow |
888
+ | Phase 20 | 🟡 Partial | 30% | IMU sensors exposed, missing trigger logic |
889
+ | Phase 21 | ❌ Not done | 0% | Home Assistant scene integration not implemented |
890
+ | Phase 22 | ✅ Complete | 100% | Gesture detection with HaGRID ONNX models |
891
+
892
+ **Overall Completion**: **Phase 1-17 + 22: ~98%** | **Phase 18-21: ~20%**
893
+
894
+
895
+ ---
896
+
897
+ ## 🔧 Daemon Crash Fix (2025-01-05)
898
+
899
+ ### Problem Description
900
+ During long-term operation, `reachy_mini daemon` would crash, causing robot to become unresponsive.
901
+
902
+ ### Root Cause
903
+ 1. **100Hz control loop too frequent** - Calling `robot.set_target()` every 10ms, even when pose hasn't changed
904
+ 2. **Frequent state queries** - Every entity state read calls `get_status()`, `get_current_head_pose()` etc.
905
+ 3. **Missing change detection** - Even when pose hasn't changed, continues sending same commands
906
+ 4. **Zenoh message queue blocking** - Accumulated 150+ messages per second, daemon cannot process in time
907
+
908
+ ### Fix Solution
909
+
910
+ #### 1. Control loop frequency (movement_manager.py)
911
+ ```python
912
+ # Initially reduced from 100Hz to 20Hz, then later restored to 100Hz
913
+ # See "Update (2026-01-12)" below for current status
914
+ CONTROL_LOOP_FREQUENCY_HZ = 100 # Now restored to 100Hz
915
+ ```
916
+
917
+ #### 2. Add pose change detection (movement_manager.py)
918
+ ```python
919
+ # Only send commands on significant pose changes
920
+ if self._last_sent_pose is not None:
921
+ max_diff = max(abs(pose[k] - self._last_sent_pose.get(k, 0.0)) for k in pose.keys())
922
+ if max_diff < 0.001: # Threshold: 0.001 rad or 0.001 m
923
+ return # Skip sending
924
+ ```
925
+
926
+ #### 3. State query caching (reachy_controller.py)
927
+ ```python
928
+ # Cache daemon status query results
929
+ self._cache_ttl = 0.1 # 100ms TTL
930
+ self._last_status_query = 0.0
931
+
932
+ def _get_cached_status(self):
933
+ now = time.time()
934
+ if now - self._last_status_query < self._cache_ttl:
935
+ return self._state_cache.get('status') # Use cache
936
+ # ... query and update cache
937
+ ```
938
+
939
+ #### 4. Head pose query caching (reachy_controller.py)
940
+ ```python
941
+ # Cache get_current_head_pose() and get_current_joint_positions() results
942
+ def _get_cached_head_pose(self):
943
+ # Reuse cached results within 100ms
944
+ ```
945
+
946
+ ### Fix Results
947
+
948
+ | Metric | Before Fix | After Fix | Improvement |
949
+ |--------|------------|-----------|-------------|
950
+ | Control message frequency | ~100 msg/s | ~20 msg/s | ↓ 80% |
951
+ | State query frequency | ~50 msg/s | ~5 msg/s | ↓ 90% |
952
+ | Total Zenoh messages | ~150 msg/s | ~25 msg/s | ↓ 83% |
953
+ | Daemon CPU load | Sustained high load | Normal load | Significantly reduced |
954
+ | Expected stability | Crash within hours | Stable for days | Major improvement |
955
+
956
+ ### Related Files
957
+ - `DAEMON_CRASH_FIX_PLAN.md` - Detailed fix plan and test plan
958
+ - `movement_manager.py` - Control loop optimization
959
+ - `reachy_controller.py` - State query caching
960
+
961
+ ### Future Optimization Suggestions
962
+ 1. ⏳ Dynamic frequency adjustment - 50Hz during motion, 5Hz when idle
963
+ 2. ⏳ Batch state queries - Get all states at once
964
+ 3. ⏳ Performance monitoring and alerts - Real-time daemon health monitoring
965
+
966
+ ---
967
+
968
+ ## 🔧 Daemon Crash Deep Fix (2026-01-07)
969
+
970
+ > **Update (2026-01-12)**: After daemon updates and further testing, control loop frequency has been restored to 100Hz (same as `reachy_mini_conversation_app`). The pose change threshold (0.005) and state cache TTL (2s) optimizations remain in place to reduce unnecessary Zenoh messages.
971
+
972
+ ### Problem Description
973
+ During long-term operation, `reachy_mini daemon` still crashes, previous fix not thorough enough.
974
+
975
+ ### Root Cause Analysis
976
+
977
+ Through deep analysis of SDK source code:
978
+
979
+ 1. **Each `set_target()` sends 3 Zenoh messages**
980
+ - `set_target_head_pose()` - 1 message
981
+ - `set_target_antenna_joint_positions()` - 1 message
982
+ - `set_target_body_yaw()` - 1 message
983
+
984
+ 2. **Daemon control loop is 50Hz**
985
+ - See `reachy_mini/daemon/backend/robot/backend.py`: `control_loop_frequency = 50.0`
986
+ - If message send frequency exceeds 50Hz, daemon may not process in time
987
+
988
+ 3. **Previous 20Hz control loop still too high**
989
+ - 20Hz × 3 messages = 60 messages/second
990
+ - Already exceeds daemon's 50Hz processing capacity
991
+
992
+ 4. **Pose change threshold too small (0.002)**
993
+ - Breathing animation, speech sway, face tracking continuously produce tiny changes
994
+ - Almost every loop triggers `set_target()`
995
+
996
+ ### Fix Solution
997
+
998
+ #### 1. Control loop frequency history (movement_manager.py)
999
+ ```python
1000
+ # Evolution: 100Hz -> 20Hz -> 10Hz -> 100Hz (restored)
1001
+ # After daemon updates, 100Hz is now stable
1002
+ CONTROL_LOOP_FREQUENCY_HZ = 100 # Restored to 100Hz (2026-01-12)
1003
+ ```
1004
+
1005
+ #### 2. Increase pose change threshold (movement_manager.py)
1006
+ ```python
1007
+ # Increased from 0.002 to 0.005
1008
+ # 0.005 rad ≈ 0.29 degrees, still smooth enough
1009
+ self._pose_change_threshold = 0.005
1010
+ ```
1011
+
1012
+ #### 3. Reduce camera/face tracking frequency (camera_server.py)
1013
+ ```python
1014
+ # Reduced from 15fps to 10fps
1015
+ fps: int = 10
1016
+ ```
1017
+
1018
+ #### 4. Increase state cache TTL (reachy_controller.py)
1019
+ ```python
1020
+ # Increased from 1 second to 2 seconds
1021
+ self._cache_ttl = 2.0
1022
+ ```
1023
+
1024
+ ### Fix Results
1025
+
1026
+ > **Note**: Control loop has been restored to 100Hz as of 2026-01-12. The table below shows historical values before restoration.
1027
+
1028
+ | Metric | Before (20Hz) | After (10Hz) | Current (100Hz) |
1029
+ |--------|---------------|--------------|-----------------|
1030
+ | Control loop frequency | 20 Hz | 10 Hz | 100 Hz (restored) |
1031
+ | Max Zenoh messages | 60 msg/s | 30 msg/s | ~100 msg/s (optimized) |
1032
+ | Actual messages (with change detection) | ~40 msg/s | ~15 msg/s | ~30 msg/s |
1033
+ | Face tracking frequency | 15 Hz | 10 Hz | Adaptive (2-15 Hz) |
1034
+ | State cache TTL | 1 second | 2 seconds | 2 seconds |
1035
+ | Expected stability | Crash within hours | Stable operation | Stable (daemon updated) |
1036
+
1037
+ ### Key Finding
1038
+
1039
+ Reference `reachy_mini_conversation_app` uses 100Hz control loop. After daemon updates and optimizations (pose change threshold 0.005, state cache TTL 2s), our app now also runs stably at 100Hz.
1040
+
1041
+ ### Related Files
1042
+ - `movement_manager.py` - Control loop frequency and pose threshold
1043
+ - `camera_server.py` - Face tracking frequency
1044
+ - `reachy_controller.py` - State cache TTL
1045
+
1046
+
1047
+ ---
1048
+
1049
+ ## 🔧 Microphone Sensitivity Optimization (2026-01-07)
1050
+
1051
+ ### Problem
1052
+ Low microphone sensitivity - Need to be very close for voice recognition.
1053
+
1054
+ ### Solution
1055
+ Comprehensive ReSpeaker XVF3800 microphone optimization:
1056
+
1057
+ | Parameter | Default | Optimized | Notes |
1058
+ |-----------|---------|-----------|-------|
1059
+ | AGC | Off | On | Auto volume normalization |
1060
+ | AGC max gain | ~15dB | 30dB | Better distant speech pickup |
1061
+ | AGC target level | -25dB | -18dB | Stronger output signal |
1062
+ | Microphone gain | 1.0x | 2.0x | Base gain doubled |
1063
+ | Noise suppression | ~0.5 | 0.15 | Reduced speech mis-suppression |
1064
+
1065
+ ### Result
1066
+ Microphone sensitivity improved from ~30cm to ~2-3m effective range.
1067
+
1068
+ ---
1069
+
1070
+ ## 🔧 v0.5.1 Bug Fixes (2026-01-08)
1071
+
1072
+ ### Issue 1: Music Not Resuming After Voice Conversation
1073
+
1074
+ **Fix**: Sendspin now connects to `music_player` instead of `tts_player`
1075
+
1076
+ ### Issue 2: Audio Conflict During Voice Assistant Wakeup
1077
+
1078
+ **Fix**: Added `pause_sendspin()` and `resume_sendspin()` methods to `audio_player.py`
1079
+
1080
+ ### Issue 3: Sendspin Sample Rate Optimization
1081
+
1082
+ **Fix**: Prioritize 16kHz in Sendspin supported formats (hardware limitation)
1083
+
1084
+ ---
1085
+
1086
+ ## 🔧 v0.5.15 Updates (2026-01-11)
1087
+
1088
+ ### Feature 1: Audio Settings Persistence
1089
+
1090
+ AGC Enabled, AGC Max Gain, Noise Suppression settings now persist to `preferences.json`.
1091
+
1092
+ ### Feature 2: Sendspin Discovery Refactoring
1093
+
1094
+ Moved mDNS discovery to `zeroconf.py` for better separation of concerns.
1095
+
1096
+
1097
+ ---
1098
+
1099
+ ### SDK Data Structure Reference
1100
+
1101
+ ```python
1102
+ # Motor control mode
1103
+ class MotorControlMode(str, Enum):
1104
+ Enabled = "enabled" # Torque on, position control
1105
+ Disabled = "disabled" # Torque off
1106
+ GravityCompensation = "gravity_compensation" # Gravity compensation mode
1107
+
1108
+ # Daemon state
1109
+ class DaemonState(Enum):
1110
+ NOT_INITIALIZED = "not_initialized"
1111
+ STARTING = "starting"
1112
+ RUNNING = "running"
1113
+ STOPPING = "stopping"
1114
+ STOPPED = "stopped"
1115
+ ERROR = "error"
1116
+
1117
+ # Full state
1118
+ class FullState:
1119
+ control_mode: MotorControlMode
1120
+ head_pose: XYZRPYPose # x, y, z (m), roll, pitch, yaw (rad)
1121
+ head_joints: list[float] # 7 joint angles
1122
+ body_yaw: float
1123
+ antennas_position: list[float] # [right, left]
1124
+ doa: DoAInfo # angle (rad), speech_detected (bool)
1125
+
1126
+ # IMU data (wireless version only)
1127
+ imu_data = {
1128
+ "accelerometer": [x, y, z], # m/s²
1129
+ "gyroscope": [x, y, z], # rad/s
1130
+ "quaternion": [w, x, y, z], # Attitude quaternion
1131
+ "temperature": float # °C
1132
+ }
1133
+
1134
+ # Safety limits
1135
+ HEAD_PITCH_ROLL_LIMIT = [-40°, +40°]
1136
+ HEAD_YAW_LIMIT = [-180°, +180°]
1137
+ BODY_YAW_LIMIT = [-160°, +160°]
1138
+ YAW_DELTA_MAX = 65° # Max difference between head and body yaw
1139
+ ```
1140
+
1141
+ ### ESPHome Protocol Implementation Notes
1142
+
1143
+ ESPHome protocol communicates with Home Assistant via protobuf messages. The following message types need to be implemented:
1144
+
1145
+ ```python
1146
+ from aioesphomeapi.api_pb2 import (
1147
+ # Number entity (volume/angle control)
1148
+ ListEntitiesNumberResponse,
1149
+ NumberStateResponse,
1150
+ NumberCommandRequest,
1151
+
1152
+ # Select entity (motor mode)
1153
+ ListEntitiesSelectResponse,
1154
+ SelectStateResponse,
1155
+ SelectCommandRequest,
1156
+
1157
+ # Button entity (wake/sleep)
1158
+ ListEntitiesButtonResponse,
1159
+ ButtonCommandRequest,
1160
+
1161
+ # Switch entity (motor switch)
1162
+ ListEntitiesSwitchResponse,
1163
+ SwitchStateResponse,
1164
+ SwitchCommandRequest,
1165
+
1166
+ # Sensor entity (numeric sensors)
1167
+ ListEntitiesSensorResponse,
1168
+ SensorStateResponse,
1169
+
1170
+ # Binary Sensor entity (boolean sensors)
1171
+ ListEntitiesBinarySensorResponse,
1172
+ BinarySensorStateResponse,
1173
+
1174
+ # Text Sensor entity (text sensors)
1175
+ ListEntitiesTextSensorResponse,
1176
+ TextSensorStateResponse,
1177
+ )
1178
+ ```
1179
+
1180
+ ## Reference Projects
1181
+
1182
+ - [OHF-Voice/linux-voice-assistant](https://github.com/OHF-Voice/linux-voice-assistant)
1183
+ - [pollen-robotics/reachy_mini](https://github.com/pollen-robotics/reachy_mini)
1184
+ - [reachy_mini_conversation_app](https://github.com/pollen-robotics/reachy_mini_conversation_app)
1185
+ - [sendspin-cli](https://github.com/Sendspin/sendspin-cli)
1186
+ - [home-assistant-voice](https://github.com/esphome/home-assistant-voice-pe/blob/dev/home-assistant-voice.yaml)
Project_Summary.md DELETED
@@ -1,1439 +0,0 @@
1
- # Reachy Mini for Home Assistant - Project Plan (Current snapshot: v1.0.6)
2
-
3
- ## Project Overview
4
-
5
- Integrate Home Assistant voice assistant functionality into Reachy Mini Wi-Fi robot, communicating with Home Assistant via ESPHome protocol.
6
-
7
- ## Local Reference Directories (DO NOT modify any files in reference directories)
8
- 1. [linux-voice-assistant](reference/linux-voice-assistant) - Linux-based Home Assistant voice assistant app for reference
9
- 2. [Reachy Mini SDK](reference/reachy_mini) - Reachy Mini SDK local directory for reference
10
- 3. [reachy_mini_conversation_app](reference/reachy_mini_conversation_app) - Reachy Mini conversation app for reference
11
- 4. [reachy-mini-desktop-app](reference/reachy-mini-desktop-app) - Reachy Mini desktop app for reference
12
- 5. [sendspin](reference/sendspin-cli/) - Sendspin client for reference
13
- 6. [aiosendspin](reference/aiosendspin/) - Sendspin protocol client library reference
14
- 7. [dynamic_gestures](reference/dynamic_gestures/) - Dynamic gesture reference
15
- 8. [SimpleDances](reference/SimpleDances/) - Local reference snapshot
16
-
17
- ## Core Design Principles
18
-
19
- 1. **Zero Configuration** - Users only need to install the app, no manual configuration required
20
- 2. **Native Hardware** - Use robot's built-in microphone and speaker
21
- 3. **Home Assistant Centralized Management** - STT/TTS/intent configuration stays on Home Assistant side
22
- 4. **Motion Feedback** - Provide head movement and antenna animation feedback during voice interaction
23
- 5. **Project Constraints** - Strictly follow [Reachy Mini SDK](reachy_mini) architecture design and constraints
24
- 6. **Code Quality** - Follow Python development standards with consistent code style, clear structure, complete comments, comprehensive documentation, high test coverage, high code quality, readability, maintainability, extensibility, and reusability
25
- 7. **Feature Priority** - Voice conversation with Home Assistant is highest priority; other features are auxiliary and must not affect voice conversation functionality or response speed
26
- 8. **No LED Functions** - LEDs are hidden inside the robot; all LED control is ignored
27
- 9. **Preserve Functionality** - Any code modifications should optimize while preserving completed features; do not remove features to solve problems. When issues occur, prioritize solving problems after referencing examples, not adding various log outputs
28
- 10. **No App-Managed Sleep/Wake** - The app no longer manages robot sleep/wake transitions; current SDK behavior is treated as source of truth
29
-
30
- ## Technical Architecture
31
-
32
- ```
33
- 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?
34
- 鈹? Reachy Mini (ARM64) 鈹?
35
- 鈹? 鈹?
36
- 鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€ AUDIO INPUT 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹?
37
- 鈹? 鈹? ReSpeaker XVF3800 (16kHz) 鈹? 鈹?
38
- 鈹? 鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹? 鈹?
39
- 鈹? 鈹? 鈹?4-Mic Array 鈹?鈫?鈹?XVF3800 DSP 鈹? 鈹? 鈹?
40
- 鈹? 鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹?鈥?Hardware DSP path available 鈹? 鈹? 鈹?
41
- 鈹? 鈹? 鈹?鈥?App currently relies on HA STT/TTS 鈹? 鈹? 鈹?
42
- 鈹? 鈹? 鈹?鈥?DOA/VAD used by the current runtime 鈹? 鈹? 鈹?
43
- 鈹? 鈹? 鈹?鈥?Direction of Arrival (DOA) 鈹? 鈹? 鈹?
44
- 鈹? 鈹? 鈹?鈥?Voice Activity Detection (VAD) 鈹? 鈹? 鈹?
45
- 鈹? 鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹? 鈹?
46
- 鈹? 鈹? 鈹? 鈹? 鈹?
47
- 鈹? 鈹? 鈻? 鈹? 鈹?
48
- 鈹? 鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹? 鈹?
49
- 鈹? 鈹? 鈹?Wake Word Detection (microWakeWord) 鈹? 鈹? 鈹?
50
- 鈹? 鈹? 鈹?鈥?"Okay Nabu" / "Hey Jarvis" 鈹? 鈹? 鈹?
51
- 鈹? 鈹? 鈹?鈥?Stop word detection 鈹? 鈹? 鈹?
52
- 鈹? 鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹? 鈹?
53
- 鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹?
54
- 鈹? 鈹?
55
- 鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€ AUDIO OUTPUT 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹?
56
- 鈹? 鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?鈹? 鈹?
57
- 鈹? 鈹? 鈹?TTS Player 鈹? 鈹?Music Player (Sendspin) 鈹?鈹? 鈹?
58
- 鈹? 鈹? 鈹?鈥?Voice assistant speech 鈹? 鈹?鈥?Multi-room audio streaming 鈹?鈹? 鈹?
59
- 鈹? 鈹? 鈹?鈥?Sound effects 鈹? 鈹?鈥?Auto-discovery via mDNS 鈹?鈹? 鈹?
60
- 鈹? 鈹? 鈹?鈥?Priority over music 鈹? 鈹?鈥?Auto-pause during conversation 鈹?鈹? 鈹?
61
- 鈹? 鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?鈹? 鈹?
62
- 鈹? 鈹? 鈹? 鈹? 鈹? 鈹?
63
- 鈹? 鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹? 鈹?
64
- 鈹? 鈹? 鈻? 鈹? 鈹?
65
- 鈹? 鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹? 鈹?
66
- 鈹? 鈹? 鈹?ReSpeaker Speaker (16kHz) 鈹? 鈹? 鈹?
67
- 鈹? 鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹? 鈹?
68
- 鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹?
69
- 鈹? 鈹?
70
- 鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€ VISION & TRACKING 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹?
71
- 鈹? 鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?鈹? 鈹?
72
- 鈹? 鈹? 鈹?Camera (VPU accelerated) 鈹?鈫? 鈹?YOLO Face Detection 鈹?鈹? 鈹?
73
- 鈹? 鈹? 鈹?鈥?MJPEG stream server 鈹? 鈹?鈥?AdamCodd/YOLOv11n-face 鈹?鈹? 鈹?
74
- 鈹? 鈹? 鈹?鈥?ESPHome Camera entity 鈹? 鈹?鈥?Adaptive frame rate: 鈹?鈹? 鈹?
75
- 鈹? 鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹? - 15fps: conversation/face 鈹?鈹? 鈹?
76
- 鈹? 鈹? 鈹? - 2fps: idle (power saving) 鈹?鈹? 鈹?
77
- 鈹? 鈹? 鈹?鈥?look_at_image() pose calc 鈹?鈹? 鈹?
78
- 鈹? 鈹? 鈹?鈥?Smooth return after face lost 鈹?鈹? 鈹?
79
- 鈹? 鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?鈹? 鈹?
80
- 鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹?
81
- 鈹? 鈹?
82
- 鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€ MOTION CONTROL 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹?
83
- 鈹? 鈹? MovementManager (50Hz Control Loop) 鈹? 鈹?
84
- 鈹? 鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹? 鈹?
85
- 鈹? 鈹? 鈹?Motion Layers (Priority: Move > Action > SpeechSway > Breath) 鈹? 鈹? 鈹?
86
- 鈹? 鈹? 鈹?鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹? 鈹? 鈹?
87
- 鈹? 鈹? 鈹?鈹?Move Queue 鈹?鈹?Actions 鈹?鈹?SpeechSway 鈹?鈹?Breathing 鈹? 鈹? 鈹? 鈹?
88
- 鈹? 鈹? 鈹?鈹?(Emotions) 鈹?鈹?(Nod/Shake)鈹?鈹?(Voice VAD)鈹?鈹?(Idle anim) 鈹? 鈹? 鈹? 鈹?
89
- 鈹? 鈹? 鈹?鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹? 鈹? 鈹?
90
- 鈹? 鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹? 鈹?
91
- 鈹? 鈹? 鈹? 鈹?
92
- 鈹? 鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹? 鈹?
93
- 鈹? 鈹? 鈹?Face Tracking Offsets (Secondary Pose Overlay) 鈹? 鈹? 鈹?
94
- 鈹? 鈹? 鈹?鈥?Pitch offset: +9掳 (down compensation) 鈹? 鈹? 鈹?
95
- 鈹? 鈹? 鈹?鈥?Yaw offset: -7掳 (right compensation) 鈹? 鈹? 鈹?
96
- 鈹? 鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹? 鈹?
97
- 鈹? 鈹? 鈹? 鈹?
98
- 鈹? 鈹? State Machine: on_wakeup 鈫?on_listening 鈫?on_speaking 鈫?on_idle 鈹? 鈹?
99
- 鈹? 鈹? 鈹? 鈹?
100
- 鈹? 鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹? 鈹?
101
- 鈹? 鈹? 鈹?Body Following 鈹? 鈹? 鈹?
102
- 鈹? 鈹? 鈹?鈥?Body yaw syncs with head yaw for natural tracking 鈹? 鈹? 鈹?
103
- 鈹? 鈹? 鈹?鈥?Extracted from final head pose matrix 鈹? 鈹? 鈹?
104
- 鈹? 鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹? 鈹?
105
- 鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹?
106
- 鈹? 鈹?
107
- 鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€ GESTURE DETECTION 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹?
108
- 鈹? 鈹? HaGRID ONNX Models 鈹? 鈹?
109
- 鈹? 鈹? 鈥?18 gesture classes (call, like, dislike, fist, ok, palm, etc.) 鈹? 鈹?
110
- 鈹? 鈹? 鈥?Runtime result publishing only 鈹? 鈹?
111
- 鈹? 鈹? 鈥?Batch detection: all hands (not just highest confidence) 鈹? 鈹?
112
- 鈹? 鈹? 鈥?Detection cadence: adaptive scheduler + minimum processing FPS 鈹? 鈹?
113
- 鈹? 鈹? 鈥?No confidence filtering - all detections passed to Home Assistant鈹? 鈹?
114
- 鈹? 鈹? 鈥?Runtime switchable (default OFF, model unloaded when disabled) 鈹? 鈹?
115
- 鈹? 鈹? 鈥?Real-time state push to Home Assistant 鈹? 鈹?
116
- 鈹? 鈹? 鈥?No conflicts with face tracking (shared frame, independent) 鈹? 鈹?
117
- 鈹? 鈹? 鈥?SDK integration: MediaBackend detection, proper resource cleanup 鈹? 鈹?
118
- 鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹?
119
- 鈹? 鈹?
120
- 鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€ ESPHOME SERVER 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹?
121
- 鈹? 鈹? Port 6053 (mDNS auto-discovery) 鈹? 鈹?
122
- 鈹? 鈹? 鈥?Entity count evolves by release (sensors, controls, media, camera) 鈹? 鈹?
123
- 鈹? 鈹? 鈥?Voice Assistant pipeline integration 鈹? 鈹?
124
- 鈹? 鈹? 鈥?Real-time state synchronization 鈹? 鈹?
125
- 鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹���鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹?
126
- 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?
127
- 鈹?
128
- 鈹?ESPHome Protocol (protobuf)
129
- 鈻?
130
- 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?
131
- 鈹? Home Assistant 鈹?
132
- 鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?鈹?
133
- 鈹? 鈹?STT Engine 鈹? 鈹?Intent Processing鈹? 鈹?TTS Engine 鈹?鈹?
134
- 鈹? 鈹?(User configured)鈹? 鈹?(Conversation) 鈹? 鈹?(User configured) 鈹?鈹?
135
- 鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?鈹?
136
- 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?
137
- ```
138
-
139
- ### Software Module Architecture (v1.0.6)
140
-
141
- ```
142
- reachy_mini_home_assistant/
143
- 鈹?
144
- 鈹溾攢鈹€ main.py # ReachyMiniApp entry point
145
- 鈹溾攢鈹€ __main__.py # Standalone CLI entry point
146
- 鈹溾攢鈹€ voice_assistant.py # Voice assistant service orchestrator
147
- 鈹溾攢鈹€ reachy_controller.py # Reachy Mini SDK wrapper
148
- 鈹溾攢鈹€ models.py # Data models / preferences / server state
149
- 鈹?
150
- 鈹溾攢鈹€ core/ # Core Infrastructure
151
- 鈹? 鈹溾攢鈹€ config.py # Centralized nested configuration
152
- 鈹? 鈹溾攢鈹€ service_base.py # Suspend/resume-aware service helpers
153
- 鈹? 鈹溾攢鈹€ system_diagnostics.py # System diagnostics
154
- 鈹? 鈹溾攢鈹€ exceptions.py # Custom exception classes
155
- 鈹? 鈹斺攢鈹€ util.py # Utility functions
156
- 鈹?
157
- 鈹溾攢鈹€ motion/ # Motion Control
158
- 鈹? 鈹溾攢鈹€ movement_manager.py # 50Hz unified motion control loop
159
- 鈹? 鈹溾攢鈹€ command_runtime.py # Command queue handling / state transitions
160
- 鈹? 鈹溾攢鈹€ control_runtime.py # Control-loop runtime helpers
161
- 鈹? 鈹溾攢鈹€ idle_runtime.py # Idle behavior / idle rest handling
162
- 鈹? 鈹溾攢鈹€ antenna.py # Antenna control / freeze logic
163
- 鈹? 鈹溾攢鈹€ pose_composer.py # Pose composition from multiple sources
164
- 鈹? 鈹溾攢鈹€ smoothing.py # Motion smoothing algorithms
165
- 鈹? 鈹溾攢鈹€ state_machine.py # Robot state definitions / idle config parsing
166
- 鈹? 鈹溾攢鈹€ animation_player.py # Animation player
167
- 鈹? 鈹溾攢鈹€ emotion_moves.py # Emotion moves
168
- 鈹? 鈹溾攢鈹€ speech_sway.py # Speech-driven head micro-movements
169
- 鈹? 鈹斺攢鈹€ reachy_motion.py # Reachy motion API
170
- 鈹?
171
- 鈹溾攢鈹€ vision/ # Vision Processing
172
- 鈹? 鈹溾攢鈹€ camera_server.py # MJPEG camera stream server facade
173
- 鈹? 鈹溾攢鈹€ camera_runtime.py # Camera lifecycle helpers
174
- 鈹? 鈹溾攢鈹€ camera_processing.py # Frame capture / AI processing helpers
175
- 鈹? 鈹溾攢鈹€ camera_http.py # HTTP handlers for stream/snapshot
176
- 鈹? 鈹溾攢鈹€ head_tracker.py # YOLO face detector
177
- 鈹? 鈹溾攢鈹€ gesture_detector.py # HaGRID gesture detection
178
- 鈹? 鈹溾攢鈹€ face_tracking_interpolator.py # Smooth face tracking
179
- 鈹? 鈹斺攢鈹€ frame_processor.py # Adaptive frame rate management
180
- 鈹?
181
- 鈹溾攢鈹€ audio/ # Audio runtime support
182
- 鈹? 鈹溾攢鈹€ audio_player.py # AudioPlayer facade
183
- 鈹? 鈹溾攢鈹€ audio_player_shared.py # Shared audio/sendspin constants + helpers
184
- 鈹? 鈹溾攢鈹€ audio_player_playback.py # Playback orchestration / lifecycle
185
- 鈹? 鈹溾攢鈹€ audio_player_local.py # Local file + fallback playback
186
- 鈹? 鈹溾攢鈹€ audio_player_stream_pcm.py # PCM streaming playback
187
- 鈹? 鈹溾攢鈹€ audio_player_stream_decoded.py # Decoded/GStreamer streaming playback
188
- 鈹? 鈹溾攢鈹€ audio_player_sendspin.py # Sendspin runtime integration
189
- 鈹? 鈹溾攢鈹€ microphone.py # Hardware audio helper / legacy tuning code
190
- 鈹? 鈹斺攢鈹€ doa_tracker.py # Direction of Arrival tracking
191
- 鈹?
192
- 鈹溾攢鈹€ entities/ # Home Assistant Entities
193
- 鈹? 鈹溾攢鈹€ entity.py # ESPHome base entity
194
- 鈹? 鈹溾攢鈹€ entity_registry.py # ESPHome entity registry
195
- 鈹? 鈹溾攢鈹€ entity_factory.py # Entity creation factory
196
- 鈹? 鈹溾攢鈹€ entity_keys.py # Entity key constants
197
- 鈹? 鈹溾攢鈹€ entity_extensions.py # Extended entity types
198
- 鈹? 鈹溾攢鈹€ runtime_entity_setup.py # Runtime/control entity wiring
199
- 鈹? 鈹溾攢鈹€ sensor_entity_setup.py # Sensor/diagnostic entity wiring
200
- 鈹? 鈹溾攢鈹€ event_emotion_mapper.py # HA event 鈫?Emotion mapping
201
- 鈹? 鈹斺攢鈹€ emotion_detector.py # Disabled runtime path for text emotion detection
202
- 鈹?
203
- 鈹溾攢鈹€ protocol/ # Protocol Handling
204
- 鈹? 鈹溾攢鈹€ satellite.py # ESPHome protocol handler facade
205
- 鈹? 鈹溾攢鈹€ api_server.py # HTTP API server
206
- 鈹? 鈹溾攢鈹€ zeroconf.py # mDNS discovery
207
- 鈹? 鈹溾攢鈹€ entity_bridge.py # Protocol/entity bridge helpers
208
- 鈹? 鈹溾攢鈹€ message_dispatch.py # ESPHome message dispatch
209
- 鈹? 鈹溾攢鈹€ motion_bridge.py # Voice 鈫?motion bridge
210
- 鈹? 鈹溾攢鈹€ session_flow.py # Conversation lifecycle helpers
211
- 鈹? 鈹溾攢鈹€ voice_pipeline.py # Voice event handling / TTS / stop / ducking
212
- 鈹? 鈹斺攢鈹€ wakeword_assets.py # Wake word asset helpers
213
- 鈹?
214
- 鈹溾攢鈹€ animations/ # Animation definitions
215
- 鈹? 鈹斺攢鈹€ conversation_animations.json # Unified built-in behavior resource file
216
- 鈹?
217
- 鈹斺攢鈹€ wakewords/ # Wake word models
218
- 鈹溾攢鈹€ okay_nabu.json/.tflite
219
- 鈹溾攢鈹€ hey_jarvis.json/.tflite
220
- 鈹溾攢鈹€ alexa.json/.tflite
221
- 鈹溾攢鈹€ hey_luna.json/.tflite
222
- 鈹斺攢鈹€ stop.json/.tflite
223
- ```
224
-
225
-
226
- ### Current Runtime Defaults (v1.0.6)
227
-
228
- - `idle_behavior_enabled`: user-controlled
229
- - `sendspin_enabled`: OFF
230
- - `face_tracking_enabled`: OFF
231
- - `gesture_detection_enabled`: OFF
232
- - `face_confidence_threshold`: 0.5 (persistent)
233
- - `continuous_conversation`: user-controlled
234
- - `Idle Behavior = OFF` means a parked no-animation state aligned to configured idle rest pose
235
- - When `Idle Behavior = OFF`, camera server is stopped entirely to save resources
236
- - When `Idle Behavior = ON`, camera server can run and `/snapshot` supports on-demand frame capture when cache is empty
237
- - Idle antenna behavior: torque disabled in `IDLE`, re-enabled when leaving `IDLE`
238
- - Voice phases and HA-triggered emotions are routed through one built-in zero-config behavior layer
239
-
240
- When face/gesture switches are OFF, their models are unloaded to save resources.
241
-
242
- ### Current Audio Startup Note (SDK 1.7.0)
243
-
244
- - The app now aligns to the current Reachy Mini SDK media model instead of carrying older compatibility paths.
245
- - Camera snapshots can be fetched on demand when the MJPEG cache is empty and the camera server is still running.
246
- - Audio block size is currently `512` samples to reduce CPU overhead versus the earlier `256`-sample path.
247
-
248
- ### Latest Incremental Update (2026-03-04) - Viewer-Aware Camera Streaming
249
-
250
- - MJPEG encoding/push is now viewer-aware: when no `/stream` client is connected, continuous MJPEG encoding is skipped to reduce CPU usage.
251
- - Face tracking and gesture detection still run without active stream viewers, so AI behavior remains available.
252
- - `/snapshot` now supports on-demand frame encode when no cached stream frame exists.
253
- - Stream output no longer forces fixed 1080p/25fps; it follows camera backend defaults (resolution/FPS) and only falls back when backend FPS is unavailable.
254
- - Transition from "watching" to "not watching" returns to adaptive idle pacing for resource saving.
255
-
256
- ## Completed Features
257
-
258
- ### Core Features
259
- - [x] ESPHome protocol server implementation
260
- - [x] mDNS service discovery (auto-discovered by Home Assistant)
261
- - [x] Local wake word detection (microWakeWord)
262
- - [x] Continuous conversation mode (controlled via Home Assistant switch)
263
- - [x] Audio stream transmission to Home Assistant
264
- - [x] TTS audio playback
265
- - [x] Stop word detection
266
-
267
- ### Reachy Mini Integration
268
- - [x] Use Reachy Mini SDK microphone input
269
- - [x] Use Reachy Mini SDK speaker output
270
- - [x] Head motion control (nod, shake, gaze)
271
- - [x] Antenna animation control
272
- - [x] Voice state feedback actions
273
- - [x] YOLO face tracking (complements DOA wakeup orientation)
274
- - [x] 50Hz unified motion control loop
275
-
276
- ### Application Architecture
277
- - [x] Compliant with Reachy Mini App architecture
278
-
279
-
280
-
281
- ## File List
282
-
283
- ```
284
- reachy_mini_ha_voice/
285
- 鈹溾攢鈹€ reachy_mini_ha_voice/
286
- 鈹? 鈹溾攢鈹€ __init__.py # Package initialization (v0.9.9)
287
- 鈹? 鈹溾攢鈹€ __main__.py # Command line entry
288
- 鈹? 鈹溾攢鈹€ main.py # ReachyMiniApp entry
289
- 鈹? 鈹溾攢鈹€ voice_assistant.py # Voice assistant service (1270 lines)
290
- 鈹? 鈹溾攢鈹€ protocol/ # ESPHome protocol handling
291
- 鈹? 鈹? 鈹溾攢鈹€ __init__.py # Module exports (13 lines)
292
- 鈹? 鈹? 鈹溾攢鈹€ satellite.py # ESPHome protocol handler facade
293
- 鈹? 鈹? 鈹溾攢鈹€ api_server.py # HTTP API server
294
- 鈹? 鈹? 鈹溾攢鈹€ zeroconf.py # mDNS discovery
295
- 鈹? 鈹? 鈹溾攢鈹€ entity_bridge.py # Protocol/entity bridge helpers
296
- 鈹? 鈹? 鈹溾攢鈹€ message_dispatch.py # ESPHome message dispatch
297
- 鈹? 鈹? 鈹溾攢鈹€ motion_bridge.py # Voice 鈫?motion bridge
298
- 鈹? 鈹? 鈹溾攢鈹€ session_flow.py # Conversation lifecycle helpers
299
- 鈹? 鈹? 鈹溾攢鈹€ voice_pipeline.py # Voice event handling / TTS / stop / ducking
300
- 鈹? 鈹? 鈹斺攢鈹€ wakeword_assets.py # Wake word asset helpers
301
- 鈹? 鈹溾攢鈹€ models.py # Data models
302
- 鈹? 鈹斺攢鈹€ reachy_controller.py # Reachy Mini controller wrapper (961 lines)
303
- 鈹? 鈹?
304
- 鈹? 鈹溾攢鈹€ core/ # Core infrastructure modules
305
- 鈹? 鈹? 鈹溾攢鈹€ __init__.py # Module exports
306
- 鈹? 鈹? 鈹溾攢鈹€ config.py # Centralized configuration (368 lines)
307
- 鈹? 鈹? 鈹溾攢鈹€ service_base.py # Suspend/resume-aware service helpers
308
- 鈹? 鈹? 鈹溾攢鈹€ system_diagnostics.py # System diagnostics (250 lines)
309
- 鈹? 鈹? 鈹斺攢鈹€ exceptions.py # Custom exception classes (68 lines)
310
- 鈹? 鈹? 鈹斺攢鈹€ util.py # Utility functions (28 lines)
311
- 鈹? 鈹?
312
- 鈹? 鈹溾攢鈹€ motion/ # Motion control modules
313
- 鈹? 鈹? 鈹溾攢鈹€ __init__.py # Module exports
314
- 鈹? 鈹? 鈹溾攢鈹€ antenna.py # Antenna freeze/unfreeze control
315
- 鈹? 鈹? 鈹溾攢鈹€ pose_composer.py # Pose composition utilities
316
- 鈹? 鈹? 鈹溾攢鈹€ command_runtime.py # Command queue handling / state transitions
317
- 鈹? 鈹? 鈹溾攢鈹€ control_runtime.py # Control-loop runtime helpers
318
- 鈹? 鈹? 鈹溾攢鈹€ idle_runtime.py # Idle behavior / idle rest handling
319
- 鈹? 鈹? 鈹溾攢鈹€ smoothing.py # Smoothing/transition algorithms
320
- 鈹? 鈹? 鈹溾攢鈹€ state_machine.py # State machine definitions
321
- 鈹? 鈹? 鈹溾攢鈹€ animation_player.py # Animation player
322
- 鈹? 鈹? 鈹溾攢鈹€ emotion_moves.py # Emotion moves
323
- 鈹? 鈹? 鈹溾攢鈹€ speech_sway.py # Speech-driven head micro-movements (338 lines)
324
- 鈹? 鈹? 鈹斺攢鈹€ reachy_motion.py # Reachy motion API
325
- 鈹? 鈹?
326
- 鈹? 鈹溾攢鈹€ vision/ # Vision processing modules
327
- 鈹? 鈹? 鈹溾攢鈹€ __init__.py # Module exports (30 lines)
328
- 鈹? 鈹? 鈹溾攢鈹€ frame_processor.py # Adaptive frame rate management (227 lines)
329
- 鈹? 鈹? 鈹溾攢鈹€ face_tracking_interpolator.py # Face lost interpolation (253 lines)
330
- 鈹? 鈹? 鈹溾攢鈹€ gesture_detector.py # HaGRID gesture detection
331
- 鈹? 鈹? 鈹溾攢鈹€ head_tracker.py # YOLO face detector
332
- 鈹? 鈹? 鈹溾攢鈹€ camera_runtime.py # Camera lifecycle helpers
333
- 鈹? 鈹? 鈹溾攢鈹€ camera_processing.py # Frame capture / AI processing helpers
334
- 鈹? 鈹? 鈹溾攢鈹€ camera_http.py # HTTP handlers for stream/snapshot
335
- 鈹? 鈹? 鈹斺攢鈹€ camera_server.py # MJPEG camera stream server facade
336
- 鈹? 鈹?
337
- 鈹? 鈹溾攢鈹€ audio/ # Audio runtime modules
338
- 鈹? 鈹? 鈹溾攢鈹€ __init__.py # Module exports (21 lines)
339
- 鈹? 鈹? 鈹溾攢鈹€ microphone.py # Hardware audio helper / legacy tuning code
340
- 鈹? 鈹? 鈹溾攢鈹€ doa_tracker.py # Direction of Arrival tracking
341
- 鈹? 鈹? 鈹溾攢鈹€ audio_player.py # AudioPlayer facade
342
- 鈹? 鈹? 鈹溾攢鈹€ audio_player_shared.py # Shared audio/sendspin constants + helpers
343
- 鈹? 鈹? 鈹溾攢鈹€ audio_player_playback.py # Playback orchestration / lifecycle
344
- 鈹? 鈹? 鈹溾攢鈹€ audio_player_local.py # Local file + fallback playback
345
- 鈹? 鈹? 鈹溾攢鈹€ audio_player_stream_pcm.py # PCM streaming playback
346
- 鈹? 鈹? 鈹溾攢鈹€ audio_player_stream_decoded.py # Decoded/GStreamer streaming playback
347
- 鈹? 鈹? 鈹斺攢鈹€ audio_player_sendspin.py # Sendspin runtime integration
348
- 鈹? 鈹?
349
- 鈹? 鈹溾攢鈹€ entities/ # Home Assistant entity modules
350
- 鈹? 鈹? 鈹溾攢鈹€ __init__.py # Module exports (38 lines)
351
- 鈹? 鈹? 鈹溾攢鈹€ entity.py # ESPHome base entity (402 lines)
352
- 鈹? 鈹? 鈹溾攢鈹€ entity_factory.py # Entity factory pattern (440 lines)
353
- 鈹? 鈹? 鈹溾攢鈹€ entity_keys.py # Entity key constants (155 lines)
354
- 鈹? 鈹? 鈹溾攢鈹€ entity_extensions.py # Extended entity types (258 lines)
355
- 鈹? 鈹? 鈹溾攢鈹€ entity_registry.py # ESPHome entity registry
356
- 鈹? 鈹? 鈹溾攢鈹€ runtime_entity_setup.py # Runtime/control entity wiring
357
- 鈹? 鈹? 鈹溾攢鈹€ sensor_entity_setup.py # Sensor/diagnostic entity wiring
358
- 鈹? 鈹? 鈹溾攢鈹€ event_emotion_mapper.py # HA event to emotion mapping
359
- 鈹? 鈹? 鈹斺攢鈹€ emotion_detector.py # Disabled runtime path for text emotion detection
360
- 鈹? 鈹?
361
- 鈹? 鈹溾攢鈹€ animations/ # Animation definitions
362
- 鈹? 鈹? 鈹斺攢鈹€ conversation_animations.json # Unified animations / gestures / HA events / keyword resources
363
- 鈹? 鈹?
364
- 鈹? 鈹斺攢鈹€ wakewords/ # Wake word models
365
- 鈹? 鈹溾攢鈹€ okay_nabu.json/.tflite
366
- 鈹? 鈹溾攢鈹€ hey_jarvis.json/.tflite (openWakeWord)
367
- 鈹? 鈹溾攢鈹€ alexa.json/.tflite
368
- 鈹? 鈹溾攢鈹€ hey_luna.json/.tflite
369
- 鈹? 鈹斺攢鈹€ stop.json/.tflite # Stop word detection
370
- 鈹?
371
- 鈹溾攢鈹€ sounds/ # Sound effect files (auto-download)
372
- 鈹? 鈹溾攢鈹€ wake_word_triggered.flac
373
- 鈹? 鈹斺攢鈹€ timer_finished.flac
374
- 鈹溾攢鈹€ pyproject.toml # Project configuration
375
- 鈹溾攢鈹€ README.md # Documentation
376
- 鈹溾攢鈹€ changelog.json # Version changelog
377
- 鈹斺攢鈹€ PROJECT_PLAN.md # Project plan
378
- ```
379
-
380
- ## Dependencies
381
-
382
- ```toml
383
- dependencies = [
384
- "reachy-mini>=1.7.0",
385
- "soundfile>=0.13.0",
386
- "numpy>=2.2.5,<=2.2.5",
387
- "opencv-python>=4.12.0.88",
388
- "pymicro-wakeword>=2.0.0,<3.0.0",
389
- "pyopen-wakeword>=1.0.0,<2.0.0",
390
- "aioesphomeapi>=43.10.1",
391
- "zeroconf>=0.131,<1",
392
- "websockets>=12,<16",
393
- "aiohttp",
394
- "scipy>=1.15.3,<2.0.0",
395
- "ultralytics",
396
- "supervision",
397
- "aiosendspin>=5.1,<6.0",
398
- "onnxruntime>=1.18.0",
399
- "torch==2.5.1",
400
- "torchvision==0.20.1",
401
- "pillow<12.0",
402
- "pydantic<=2.12.5",
403
- "requests>=2.33.0",
404
- "gstreamer-bundle==1.28.1; sys_platform != 'linux'",
405
- ]
406
- ```
407
-
408
- ## Usage Flow
409
-
410
- 1. **Install App**
411
- - Install `reachy_mini_ha_voice` from Reachy Mini App Store
412
-
413
- 2. **Start App**
414
- - App auto-starts ESPHome server (port 6053)
415
- - Auto-downloads required models and sounds
416
-
417
- 3. **Connect Home Assistant**
418
- - Home Assistant auto-discovers device (mDNS)
419
- - Or manually add: Settings 閳?Devices & Services 閳?Add Integration 閳?ESPHome
420
-
421
- 4. **Use Voice Assistant**
422
- - Say "Okay Nabu" to wake
423
- - Speak command
424
- - Reachy Mini provides motion feedback
425
-
426
- ## ESPHome Entity Planning
427
-
428
- Based on deep analysis of Reachy Mini SDK, the following entities are exposed to Home Assistant:
429
-
430
- ### Implemented Entities
431
-
432
- | Entity Type | Name | Description |
433
- |-------------|------|-------------|
434
- | Media Player | `media_player` | Audio playback control |
435
- | Voice Assistant | `voice_assistant` | Voice assistant pipeline |
436
-
437
- ### Implemented Control Entities (Read/Write)
438
-
439
- #### Phase 1-3: Basic Controls and Pose
440
-
441
- | ESPHome Entity Type | Name | SDK API | Range/Options | Description |
442
- |---------------------|------|---------|---------------|-------------|
443
- | `Number` | `speaker_volume` | `AudioPlayer.set_volume()` | 0-100 | Speaker volume |
444
- | `Switch` | `idle_behavior_enabled` | `set_idle_behavior_enabled()` | off=parked/on=idle runtime enabled | Unified idle behavior toggle |
445
- | `Number` | `head_x` | `goto_target(head=...)` | 卤50mm | Head X position control |
446
- | `Number` | `head_y` | `goto_target(head=...)` | 卤50mm | Head Y position control |
447
- | `Number` | `head_z` | `goto_target(head=...)` | 卤50mm | Head Z position control |
448
- | `Number` | `head_roll` | `goto_target(head=...)` | -40掳 ~ +40掳 | Head roll angle control |
449
- | `Number` | `head_pitch` | `goto_target(head=...)` | -40掳 ~ +40掳 | Head pitch angle control |
450
- | `Number` | `head_yaw` | `goto_target(head=...)` | -180掳 ~ +180掳 | Head yaw angle control |
451
- | `Number` | `body_yaw` | `goto_target(body_yaw=...)` | -160掳 ~ +160掳 | Body yaw angle control |
452
- | `Number` | `antenna_left` | `goto_target(antennas=...)` | -90掳 ~ +90掳 | Left antenna angle control |
453
- | `Number` | `antenna_right` | `goto_target(antennas=...)` | -90掳 ~ +90掳 | Right antenna angle control |
454
-
455
- #### Phase 4: Gaze Control
456
-
457
- | ESPHome Entity Type | Name | SDK API | Range/Options | Description |
458
- |---------------------|------|---------|---------------|-------------|
459
- | `Number` | `look_at_x` | `look_at_world(x, y, z)` | World coordinates | Gaze point X coordinate |
460
- | `Number` | `look_at_y` | `look_at_world(x, y, z)` | World coordinates | Gaze point Y coordinate |
461
- | `Number` | `look_at_z` | `look_at_world(x, y, z)` | World coordinates | Gaze point Z coordinate |
462
-
463
-
464
- ### Implemented Sensor Entities (Read-only)
465
-
466
- #### Phase 1 & 5: Basic Status and Audio Sensors
467
-
468
- | ESPHome Entity Type | Name | SDK API | Description |
469
- |---------------------|------|---------|-------------|
470
- | `Text Sensor` | `daemon_state` | `DaemonStatus.state` | Daemon status |
471
- | `Binary Sensor` | `backend_ready` | `backend_status.ready` | Backend ready status |
472
- | `Text Sensor` | `error_message` | `DaemonStatus.error` | Current error message |
473
- | `Sensor` | `doa_angle` | `DoAInfo.angle` | Sound source direction angle (鎺? |
474
- | `Binary Sensor` | `speech_detected` | `DoAInfo.speech_detected` | Speech detection status |
475
-
476
- #### Phase 6: Diagnostic Information
477
-
478
- | ESPHome Entity Type | Name | SDK API | Description |
479
- |---------------------|------|---------|-------------|
480
- | `Sensor` | `control_loop_frequency` | `control_loop_stats` | Control loop frequency (Hz) |
481
- | `Text Sensor` | `sdk_version` | `DaemonStatus.version` | SDK version |
482
- | `Text Sensor` | `robot_name` | `DaemonStatus.robot_name` | Robot name |
483
- | `Binary Sensor` | `wireless_version` | `DaemonStatus.wireless_version` | Wireless version flag |
484
- | `Binary Sensor` | `simulation_mode` | `DaemonStatus.simulation_enabled` | Simulation mode flag |
485
- | `Text Sensor` | `wlan_ip` | `DaemonStatus.wlan_ip` | Wireless IP address |
486
-
487
- #### Phase 7: IMU Sensors (Wireless version only)
488
-
489
- | ESPHome Entity Type | Name | SDK API | Description |
490
- |---------------------|------|---------|-------------|
491
- | `Sensor` | `imu_accel_x` | `mini.imu["accelerometer"][0]` | X-axis acceleration (m/s铏? |
492
- | `Sensor` | `imu_accel_y` | `mini.imu["accelerometer"][1]` | Y-axis acceleration (m/s铏? |
493
- | `Sensor` | `imu_accel_z` | `mini.imu["accelerometer"][2]` | Z-axis acceleration (m/s铏? |
494
- | `Sensor` | `imu_gyro_x` | `mini.imu["gyroscope"][0]` | X-axis angular velocity (rad/s) |
495
- | `Sensor` | `imu_gyro_y` | `mini.imu["gyroscope"][1]` | Y-axis angular velocity (rad/s) |
496
- | `Sensor` | `imu_gyro_z` | `mini.imu["gyroscope"][2]` | Z-axis angular velocity (rad/s) |
497
- | `Sensor` | `imu_temperature` | `mini.imu["temperature"]` | IMU temperature (鎺矯) |
498
-
499
- #### Current Runtime Control and Sensor Entities
500
-
501
- | Phase | ESPHome Entity Type | Name | Description |
502
- |------|---------------------|------|-------------|
503
- | 1 | `Switch` | `mute` | Suspend/resume the voice pipeline |
504
- | 1 | `Switch` | `camera_disabled` | Disable/enable camera runtime |
505
- | 1 | `Switch` | `idle_behavior_enabled` | Unified idle motion / antenna / micro-actions toggle |
506
- | 1 | `Switch` | `sendspin_enabled` | Enable/disable Sendspin playback integration |
507
- | 1 | `Switch` | `face_tracking_enabled` | Enable/disable face tracking models |
508
- | 1 | `Switch` | `gesture_detection_enabled` | Enable/disable gesture detection models |
509
- | 1 | `Number` | `face_confidence_threshold` | Face tracking confidence threshold (0-1) |
510
- | 2 | `Binary Sensor` | `services_suspended` | Runtime suspension state |
511
- | 8 | `Select` | `emotion` | Manual emotion trigger |
512
- | 10 | `Camera` | `camera` | ESPHome camera entity / live preview |
513
- | 21 | `Switch` | `continuous_conversation` | Multi-turn conversation mode |
514
- | 22 | `Text Sensor` | `gesture_detected` | Current detected gesture |
515
- | 22 | `Sensor` | `gesture_confidence` | Current gesture confidence |
516
- | 23 | `Binary Sensor` | `face_detected` | Face currently visible |
517
-
518
- > **Note**: Head position (x/y/z) and angles (roll/pitch/yaw), body yaw, antenna angles are all **controllable** entities,
519
- > using `Number` type for bidirectional control. Call `goto_target()` when setting new values, call `get_current_head_pose()` etc. when reading current values.
520
-
521
- ### Implementation Priority
522
-
523
- 1. **Phase 1 - Basic Status and Volume** (High Priority) 閴?**Completed**
524
- - [x] `daemon_state` - Daemon status sensor
525
- - [x] `backend_ready` - Backend ready status
526
- - [x] `error_message` - Error message
527
- - [x] `speaker_volume` - Speaker volume control
528
-
529
- 2. **Phase 2 - Runtime State** (High Priority) 鉁?**Completed**
530
- - [x] `services_suspended` - Service suspension state sensor
531
- - [x] App-managed sleep/wake entities removed from the current runtime
532
-
533
- 3. **Phase 3 - Pose Control** (Medium Priority) 閴?**Completed**
534
- - [x] `head_x/y/z` - Head position control
535
- - [x] `head_roll/pitch/yaw` - Head angle control
536
- - [x] `body_yaw` - Body yaw angle control
537
- - [x] `antenna_left/right` - Antenna angle control
538
-
539
- 4. **Phase 4 - Gaze Control** (Medium Priority) 閴?**Completed**
540
- - [x] `look_at_x/y/z` - Gaze point coordinate control
541
-
542
- 5. **Phase 5 - DOA (Direction of Arrival)** 閴?**Re-added for wakeup turn-to-sound**
543
- - [x] `doa_angle` - Sound source direction (degrees, 0-180鎺? where 0鎺?left, 90鎺?front, 180鎺?right)
544
- - [x] `speech_detected` - Speech detection status
545
- - [x] Turn-to-sound at wakeup (robot turns toward speaker when wake word detected)
546
- - [x] Direction correction: `yaw = 锜?2 - doa` (fixed left/right inversion)
547
- - Note: DOA only read once at wakeup to avoid daemon pressure; face tracking takes over after
548
-
549
- 6. **Phase 6 - Diagnostic Information** (Low Priority) 閴?**Completed**
550
- - [x] `control_loop_frequency` - Control loop frequency
551
- - [x] `sdk_version` - SDK version
552
- - [x] `robot_name` - Robot name
553
- - [x] `wireless_version` - Wireless version flag
554
- - [x] `simulation_mode` - Simulation mode flag
555
- - [x] `wlan_ip` - Wireless IP address
556
-
557
- 7. **Phase 7 - IMU Sensors** (Optional, wireless version only) 閴?**Completed**
558
- - [x] `imu_accel_x/y/z` - Accelerometer
559
- - [x] `imu_gyro_x/y/z` - Gyroscope
560
- - [x] `imu_temperature` - IMU temperature
561
-
562
- 8. **Phase 8 - Emotion Control** 閴?**Completed**
563
- - [x] `emotion` - Emotion selector (Happy/Sad/Angry/Fear/Surprise/Disgust)
564
-
565
- 9. **Phase 10 - Camera Integration** 閴?**Completed**
566
- - [x] `camera` - ESPHome Camera entity (live preview)
567
-
568
- 10. **Phase 11 - LED Control** 閴?**Disabled (LEDs hidden inside robot)**
569
- - [ ] `led_brightness` - LED brightness (0-100%) - Commented out
570
- - [ ] `led_effect` - LED effect (off/solid/breathing/rainbow/doa) - Commented out
571
- - [ ] `led_color_r/g/b` - LED RGB color (0-255) - Commented out
572
-
573
- 11. **Phase 13 - Sendspin Audio Playback Support** 閴?**Completed**
574
- - [x] `sendspin_enabled` - Sendspin switch (Switch)
575
- - [x] AudioPlayer integrates aiosendspin library
576
- - [x] Local music/sendspin path coexists with voice playback and is auto-paused during conversation
577
-
578
- 12. **Phase 21 - Continuous Conversation** 閴?**Completed**
579
- - [x] `continuous_conversation` - Conversation continuation switch
580
-
581
- 13. **Phase 22 - Gesture Detection** 鉁?**Completed (current runtime behavior)**
582
- - [x] `gesture_detected` - Detected gesture name (Text Sensor)
583
- - [x] `gesture_confidence` - Gesture detection confidence % (Sensor)
584
- - [x] HaGRID ONNX models: hand_detector.onnx + crops_classifier.onnx
585
- - [x] Real-time state push to Home Assistant
586
- - [x] Runtime gesture result publishing only (no gesture-driven robot actions)
587
- - [x] Runtime toggle supported (default OFF, model unload on disable)
588
- - [x] Batch detection: returns all detected hands (not just highest confidence)
589
- - [x] Minimum processing cadence preserved for responsiveness
590
- - [x] No conflicts with face tracking (shared frame, independent processing)
591
- - [x] SDK integration: MediaBackend detection, proper resource cleanup on shutdown
592
- - [x] 18 supported gestures:
593
- | Gesture | Emoji | Gesture | Emoji |
594
- |---------|-------|---------|-------|
595
- | call | 棣冾樉 | like | 棣冩啢 |
596
- | dislike | 棣冩啣 | mute | 棣冦亱 |
597
- | fist | 閴?| ok | 棣冩啠 |
598
- | four | 棣冩瀾閿?| one | 閳芥繐绗?|
599
- | palm | 閴?| peace | 閴佸矉绗?|
600
- | peace_inverted | 棣冩暰閴佸矉绗?| rock | 棣冾樈 |
601
- | stop | 棣冩磧 | stop_inverted | 棣冩暰棣冩磧 |
602
- | three | 3閿斿繆鍎?| three2 | 棣冾檮 |
603
- | two_up | 閴佸矉绗嶉埥婵撶瑣 | two_up_inverted | 棣冩暰閴佸矉绗嶉埥婵撶瑣 |
604
-
605
- 14. **Phase 23 - Face Detection** 閴?**Completed**
606
- - [x] `face_detected` - Face visibility sensor
607
-
608
- 15. **Phase 24 - System Diagnostics** 閴?**Completed**
609
- - [x] `sys_cpu_percent` - CPU usage percentage (Sensor, diagnostic)
610
- - [x] `sys_cpu_temperature` - CPU temperature in Celsius (Sensor, diagnostic)
611
- - [x] `sys_memory_percent` - Memory usage percentage (Sensor, diagnostic)
612
- - [x] `sys_memory_used` - Used memory in GB (Sensor, diagnostic)
613
- - [x] `sys_disk_percent` - Disk usage percentage (Sensor, diagnostic)
614
- - [x] `sys_disk_free` - Free disk space in GB (Sensor, diagnostic)
615
- - [x] `sys_uptime` - System uptime in hours (Sensor, diagnostic)
616
- - [x] `sys_process_cpu` - This process CPU usage (Sensor, diagnostic)
617
- - [x] `sys_process_memory` - This process memory in MB (Sensor, diagnostic)
618
-
619
- ---
620
-
621
- ## 棣冨竴 Current Runtime Entity Coverage
622
-
623
- **Total Completed: See runtime registry (count evolves with releases)**
624
- - Phase 1: 10 entities (status, zero-config runtime switches, volume)
625
- - Phase 2: runtime state entities only (`services_suspended`; sleep entities removed)
626
- - Phase 3: 9 entities (Pose control)
627
- - Phase 4: 3 entities (Gaze control)
628
- - Phase 5: 3 entities (DOA sensors and tracking switch)
629
- - Phase 6: 7 entities (Diagnostic information)
630
- - Phase 7: 7 entities (IMU sensors)
631
- - Phase 8: 1 entity (Emotion control)
632
- - Phase 10: 1 entity (Camera)
633
- - Phase 11: 0 entities (LED control - Disabled)
634
- - Phase 13: 1 entity (Sendspin toggle)
635
- - Phase 21: 1 entity (Continuous conversation)
636
- - Phase 22: 2 entities (Gesture detection)
637
- - Phase 23: 1 entity (Face detection)
638
- - Phase 24: 9 entities (System diagnostics)
639
-
640
-
641
- ---
642
-
643
- ## 棣冩畬 Voice Assistant Enhancement Features Implementation Status
644
-
645
- ### Phase 14 - Emotion and Motion Feedback 閴?
646
- **Current Status**: Manual emotion playback and non-blocking motion feedback are implemented. Automatic keyword-based emotion triggering is currently disabled in the runtime.
647
-
648
- **Implemented Features**:
649
- - 閴?Phase 8 Emotion Selector entity (`emotion`)
650
- - 閴?`_play_emotion()` queues emotion moves through `MovementManager`
651
- - 閴?Wake/listen/think/speak/idle motion transitions are non-blocking
652
- - 閴?Timer-finished motion feedback is implemented
653
- - 閴?Gesture detection publishes recognized gesture label and confidence to Home Assistant entities
654
- - 閴?Voice phases and HA state reactions share one built-in behavior dispatcher
655
-
656
- **Current Behavior**:
657
-
658
- | Voice Assistant Event | Actual Action | Implementation Status |
659
- |----------------------|---------------|----------------------|
660
- | Wake word detected | Turn toward sound source + listening pose | 閴?Implemented |
661
- | Listening | Attentive listening state | 閴?Implemented |
662
- | Thinking | Thinking state animation | 閴?Implemented |
663
- | Speaking | Speech-reactive motion | 閴?Implemented |
664
- | Timer completed | Alert shake motion | 閴?Implemented |
665
- | Manual emotion trigger | Play via ESPHome `emotion` entity | 閴?Implemented |
666
-
667
- **Deliberately Not Active In Runtime**:
668
- - Automatic emotion keyword detection from assistant text
669
- - Blocking full-action choreography during conversation
670
- - Dance/personalization layers that require user configuration
671
-
672
- **Manual Emotion Trigger Example**:
673
- ```yaml
674
- # Home Assistant automation example - Manual emotion trigger
675
- automation:
676
- - alias: "Reachy Good Morning Greeting"
677
- trigger:
678
- - platform: time
679
- at: "07:00:00"
680
- action:
681
- - service: select.select_option
682
- target:
683
- entity_id: select.reachy_mini_emotion
684
- data:
685
- option: "Happy"
686
- ```
687
-
688
- ### Phase 15 - Face Tracking (Complements DOA Turn-to-Sound) 閴?**Completed**
689
-
690
- **Goal**: Implement natural face tracking so robot looks at speaker during conversation.
691
-
692
- **Design Decision**:
693
- - 閴?DOA (Direction of Arrival): Used once at wakeup to turn toward sound source
694
- - 閴?YOLO face detection: Takes over after initial turn for continuous tracking
695
- - 閴?Body follows head rotation: Body yaw automatically syncs with head yaw for natural tracking
696
- - Reason: DOA provides quick initial orientation, face tracking provides accurate continuous tracking, body following enables natural whole-body tracking similar to human behavior
697
-
698
- **Wakeup Turn-to-Sound Flow**:
699
- 1. Wake word detected 閳?Read DOA angle once (avoid daemon pressure)
700
- 2. If DOA angle > 10鎺? Turn head toward sound source (80% of angle, conservative)
701
- 3. Face tracking takes over for continuous tracking during conversation
702
-
703
- **Implemented Features**:
704
-
705
- | Feature | Description | Implementation Location | Status |
706
- |---------|-------------|------------------------|--------|
707
- | DOA turn-to-sound | Turn toward speaker at wakeup | `protocol/satellite.py:_turn_to_sound_source()` | 閴?Implemented |
708
- | YOLO face detection | Uses `AdamCodd/YOLOv11n-face-detection` model | `vision/head_tracker.py` | 閴?Implemented |
709
- | Adaptive frame rate tracking | 15fps during conversation, 2fps when idle without face | `camera_server.py` | 閴?Implemented |
710
- | look_at_image() | Calculate target pose from face position | `camera_server.py` | 閴?Implemented |
711
- | Smooth return to neutral | Smooth return within 1 second after face lost | `camera_server.py` | 閴?Implemented |
712
- | face_tracking_offsets | As secondary pose overlay to motion control | `movement_manager.py` | 閴?Implemented |
713
- | Body follows head rotation | Body yaw syncs with head yaw extracted from final pose matrix | `motion/movement_manager.py:_compose_final_pose()` | 閴?Implemented (v0.8.3) |
714
- | DOA entities | `doa_angle` and `speech_detected` exposed to Home Assistant | `entity_registry.py` | 閴?Implemented |
715
- | face_detected entity | Binary sensor for face detection state | `entity_registry.py` | 閴?Implemented |
716
- | Model download retry | 3 retries, 5 second interval | `head_tracker.py` | 閴?Implemented |
717
- | Conversation mode integration | Auto-switch tracking frequency on voice assistant state change | `satellite.py` | 閴?Implemented |
718
-
719
- **Resource Optimization (v0.5.1, updated v0.6.2)**:
720
- - During conversation (listening/thinking/speaking): High-frequency tracking 15fps
721
- - Idle with face detected: High-frequency tracking 15fps
722
- - Idle without face for 5s: Low-power mode 2fps
723
- - Idle without face for 30s: Ultra-low power mode 0.5fps (every 2 seconds)
724
- - Gesture detection is switch-controlled and can run independently of face tracking
725
- - Immediately restore high-frequency tracking when face detected
726
-
727
- **Code Locations**:
728
- - `protocol/satellite.py:_turn_to_sound_source()` - DOA turn-to-sound at wakeup
729
- - `vision/head_tracker.py` - YOLO face detector (`HeadTracker` class)
730
- - `vision/camera_server.py:_capture_frames()` - Adaptive frame rate face tracking
731
- - `vision/camera_server.py:set_conversation_mode()` - Conversation mode switch API
732
- - `protocol/satellite.py:_set_conversation_mode()` - Voice assistant state integration
733
- - `motion/movement_manager.py:set_face_tracking_offsets()` - Face tracking offset API
734
- - `motion/movement_manager.py:_compose_final_pose()` - Body yaw follows head yaw (v0.8.3)
735
-
736
- **Technical Details**:
737
- ```python
738
- # vision/camera_server.py - Adaptive frame rate face tracking
739
- class MJPEGCameraServer:
740
- def __init__(self):
741
- self._fps_high = 15 # During conversation/face detected
742
- self._fps_low = 2 # Idle without face (5-30s)
743
- self._fps_idle = 0.5 # Ultra-low power (>30s without face)
744
- self._low_power_threshold = 5.0 # 5s without face switches to low power
745
- self._idle_threshold = 30.0 # 30s without face switches to idle mode
746
-
747
- def _should_run_ai_inference(self, current_time):
748
- # Conversation mode: Always high-frequency tracking
749
- if self._in_conversation:
750
- return True
751
- # High-frequency mode: Track every frame
752
- if self._current_fps == self._fps_high:
753
- return True
754
- # Low/idle power mode: Periodic detection
755
- return time.since_last_check >= 1/self._current_fps
756
-
757
- # protocol/satellite.py - Voice assistant state integration
758
- def _reachy_on_listening(self):
759
- self._set_conversation_mode(True) # Start conversation, high-frequency tracking
760
-
761
- def _reachy_on_idle(self):
762
- self._set_conversation_mode(False) # End conversation, adaptive tracking
763
-
764
- # motion/movement_manager.py - Body follows head rotation (v0.8.3)
765
- # This enables natural body rotation when tracking faces, similar to how
766
- # the reference project's sweep_look tool synchronizes body_yaw with head_yaw.
767
- def _compose_final_pose(self) -> Tuple[np.ndarray, Tuple[float, float], float]:
768
- # ... compose head pose from all motion sources ...
769
-
770
- # Extract yaw from final head pose rotation matrix
771
- # The rotation matrix uses xyz euler convention
772
- final_rotation = R.from_matrix(final_head[:3, :3])
773
- _, _, final_head_yaw = final_rotation.as_euler('xyz')
774
-
775
- # Body follows head yaw directly
776
- # SDK's automatic_body_yaw (inverse_kinematics_safe) only handles collision
777
- # prevention by clamping relative angle to max 65鎺? not active following
778
- body_yaw = final_head_yaw
779
-
780
- return final_head, (antenna_right, antenna_left), body_yaw
781
- ```
782
-
783
- **Body Following Head Rotation (v0.8.3)**:
784
- - SDK's `automatic_body_yaw` is only **collision protection**, not active body following
785
- - The `inverse_kinematics_safe` function with `max_relative_yaw=65鎺砢 only prevents head-body collision
786
- - To enable natural body following, `body_yaw` must be explicitly set to match `head_yaw`
787
- - Body yaw is extracted from final head pose matrix using scipy's `R.from_matrix().as_euler('xyz')`
788
- - This matches the reference project's `sweep_look.py` behavior where `target_body_yaw = head_yaw`
789
-
790
-
791
- ### Phase 16 - Cartoon Style Motion Mode (Partial) 棣冪厸
792
-
793
- **Goal**: Use SDK interpolation techniques for more expressive robot movements.
794
-
795
- **SDK Support**: `InterpolationTechnique` enum
796
- - `LINEAR` - Linear, mechanical feel
797
- - `MIN_JERK` - Minimum jerk, natural and smooth (default)
798
- - `EASE_IN_OUT` - Ease in-out, elegant
799
- - `CARTOON` - Cartoon style, with bounce effect, lively and cute
800
-
801
- **Implemented Features**:
802
- - 閴?50Hz unified control loop (`motion/movement_manager.py`) - Current stable frequency
803
- - 閴?JSON-driven animation system (`AnimationPlayer`) - Inspired by SimpleDances project
804
- - 閴?Conversation state animations (idle/listening/thinking/speaking)
805
- - 閴?Pose change detection - Only send commands on significant changes (threshold 0.005)
806
- - 閴?State query caching - 2s TTL, reduces daemon load
807
- - 閴?Smooth interpolation (ease in-out curve)
808
- - 閴?Command queue mode - Thread-safe external API
809
- - 閴?Error throttling - Prevents log explosion
810
- - 閴?Connection health monitoring - Auto-detect and recover from connection loss
811
-
812
- **Animation System (v0.5.13)**:
813
- - `AnimationPlayer` class loads animations from `conversation_animations.json`
814
- - Each animation defines: pitch/yaw/roll amplitudes, position offsets, antenna movements, frequency
815
- - Smooth transitions between animations (configurable duration)
816
- - State-to-animation mapping: idle閳姕dle, listening閳姡istening, thinking閳姲hinking, speaking閳姱peaking
817
-
818
- **Not Implemented**:
819
- - 閴?Dynamic interpolation technique switching (CARTOON/EASE_IN_OUT etc.)
820
- - 閴?Exaggerated cartoon bounce effects
821
-
822
- **Code Locations**:
823
- - `motion/animation_player.py` - AnimationPlayer class
824
- - `animations/conversation_animations.json` - Animation definitions
825
- - `motion/movement_manager.py` - 50Hz control loop with animation integration
826
-
827
- **Scene Implementation Status**:
828
-
829
- | Scene | Recommended Interpolation | Effect | Status |
830
- |-------|--------------------------|--------|--------|
831
- | Wake nod | `CARTOON` | Lively bounce effect | 閴?Not implemented |
832
- | Thinking head up | `EASE_IN_OUT` | Elegant transition | 閴?Implemented (smooth interpolation) |
833
- | Speaking micro-movements | `MIN_JERK` | Natural and fluid | 閴?Implemented (SpeechSway) |
834
- | Error head shake | `CARTOON` | Exaggerated denial | 閴?Not implemented |
835
- | Return to neutral | `MIN_JERK` | Smooth return | 閴?Implemented |
836
- | Idle breathing | - | Subtle sense of life | 閴?Implemented (BreathingAnimation) |
837
-
838
- ### Phase 17 - Antenna Sync Animation During Speech (Completed) 閴?
839
- **Goal**: Antennas sway with audio rhythm during TTS playback, simulating "speaking" effect.
840
-
841
- **Implemented Features**:
842
- - 閴?JSON-driven animation system with antenna movements
843
- - 閴?Different antenna patterns: "both" (sync), "wiggle" (opposite phase)
844
- - 閴?State-specific antenna animations (listening/thinking/speaking)
845
- - 閴?Smooth transitions between animation states
846
- - 閴?v1.0.0 idle refinement: idle antenna sway disabled while conversation-state antenna behaviors are retained
847
- - 閴?v1.0.0 hardware refinement: antenna torque disabled in `IDLE` to reduce idle chatter/noise
848
-
849
- **Code Locations**:
850
- - `motion/animation_player.py` - AnimationPlayer with antenna offset calculation
851
- - `animations/conversation_animations.json` - Antenna amplitude and pattern definitions
852
- - `motion/movement_manager.py` - Antenna offset composition in final pose
853
-
854
- ### Phase 18 - Visual Gaze Interaction (Single-face only) 閴?
855
- **Goal**: Use camera to detect faces for eye contact.
856
-
857
- **SDK Support**:
858
- - `look_at_image(u, v)` - Look at point in image
859
- - `look_at_world(x, y, z)` - Look at world coordinate point
860
- - `media.get_frame()` - Get camera frame (閴?Already implemented in `vision/camera_server.py:146`)
861
-
862
- **Current Status**:
863
-
864
- | Feature | Description | Status |
865
- |---------|-------------|--------|
866
- | Face detection | YOLO-based face detection (`AdamCodd/YOLOv11n-face-detection`) | 閴?Implemented |
867
- | Eye tracking | Robot tracks detected face during conversation/active mode | 閴?Implemented |
868
- | Idle scanning | Random look-around in idle cycles (switch-controlled) | 閴?Implemented |
869
-
870
- > Scope note: Current implementation is intentionally single-face tracking for stability and device performance.
871
-
872
- ### Phase 19 - Gravity Compensation Interactive Mode (Historical / Not Current Target)
873
-
874
- This was an exploration direction for manual teaching workflows.
875
-
876
- **Current Runtime Position**:
877
- - The zero-config runtime does not depend on a teaching flow
878
- - No user-facing teaching interaction is exposed as a core feature
879
- - If gravity-compensation support is revisited, it should remain optional and not become a required setup path
880
-
881
- ### Phase 20 - Environment Awareness Response (Partial) 棣冪厸
882
-
883
- **Goal**: Use IMU sensors to sense environment changes and respond.
884
-
885
- **SDK Support**:
886
- - 閴?`mini.imu["accelerometer"]` - Accelerometer (Phase 7 implemented as entity)
887
- - 閴?`mini.imu["gyroscope"]` - Gyroscope (Phase 7 implemented as entity)
888
-
889
- **Implemented Features**:
890
-
891
- | Feature | Description | Status |
892
- |---------|-------------|--------|
893
- | Continuous conversation | Controlled via Home Assistant switch | 閴?Implemented |
894
- | IMU sensor entities | Accelerometer and gyroscope exposed to HA | 閴?Implemented |
895
-
896
- > **Note**: Tap-to-wake feature was removed in v0.5.16 due to false triggers from robot movement. Continuous conversation is now controlled via Home Assistant switch.
897
-
898
- **Not Implemented**:
899
-
900
- | Detection Event | Response Action | Status |
901
- |-----------------|-----------------|--------|
902
- | Being shaken | Play dizzy action + voice "Don't shake me~" | 閴?Not implemented |
903
- | Tilted/fallen | Play help action + voice "I fell, help me" | 閴?Not implemented |
904
- | Long idle | Enter sleep animation | 閴?Not implemented |
905
-
906
- ### Phase 21 - Home Assistant Orchestration Scope
907
-
908
- The current runtime already exposes the main zero-config controls needed by Home Assistant:
909
-
910
- - `services_suspended`
911
- - `idle_behavior_enabled`
912
- - `continuous_conversation`
913
- - `emotion`
914
- - gesture / face / diagnostic sensors
915
-
916
- More elaborate scene orchestration remains intentionally outside the core runtime scope unless it can be delivered without introducing user configuration burden.
917
-
918
-
919
- ---
920
-
921
- ## 棣冩惓 Feature Implementation Summary
922
-
923
- ### 閴?Completed Features
924
-
925
- #### Core Voice Assistant (Phase 1-12)
926
- - **ESPHome entities** - Core phases implemented (Phase 11 LED intentionally disabled); exact count evolves by release
927
- - **Basic voice interaction** - Wake word detection (microWakeWord/openWakeWord), STT/TTS integration
928
- - **Motion feedback** - Nod, shake, gaze and other basic actions
929
- - **Audio path** - local wake word / stop word detection plus HA-managed STT/TTS
930
- - **Camera stream** - MJPEG live preview with ESPHome Camera entity
931
-
932
- #### Extended Features (Phase 13-22)
933
- - **Phase 13** 閴?- Sendspin multi-room audio support
934
- - **Phase 14** 閴?- Manual emotion playback + non-blocking motion feedback
935
- - **Phase 15** 閴?- Face tracking with body following (DOA + YOLO + body_yaw sync)
936
- - **Phase 16** 閴?- JSON-driven animation system (50Hz control loop)
937
- - **Phase 17** 閴?- Antenna sync animation during speech
938
- - **Phase 22** 閴?- Gesture detection (HaGRID ONNX, 18 gestures)
939
-
940
- ### 棣冪厸 Partially Implemented Features
941
-
942
- - **Phase 20** - IMU sensor entities are exposed; higher-level trigger logic is intentionally minimal
943
-
944
- ### 閴?Not Implemented Features
945
-
946
- - Zero-config scene orchestration beyond the provided runtime switches and blueprint defaults
947
-
948
- ---
949
-
950
- ## Feature Priority Summary (Updated v1.0.6)
951
-
952
- ### Completed 鉁?
953
- - 鉁?**Phase 1-12**: Core ESPHome entities and voice assistant
954
- - 鉁?**Phase 13**: Sendspin audio playback
955
- - 鉁?**Phase 14**: Emotion playback and motion feedback
956
- - 鉁?**Phase 15**: Face tracking with body following
957
- - 鉁?**Phase 16**: JSON-driven animation system
958
- - 鉁?**Phase 17**: Antenna sync animation + v1.0.0 idle antenna behavior refinements
959
- - 鉁?**Phase 21**: Continuous conversation switch
960
- - 鉁?**Phase 22**: Gesture detection
961
- - 鉁?**Phase 23**: Face detection sensor
962
- - 鉁?**Phase 24**: System diagnostics entities
963
-
964
- ### Partial 棣冪厸
965
- - 棣冪厸 **Phase 20**: Environment awareness (IMU entities done, triggers pending)
966
-
967
- ### Not Implemented 閴?- 閴?Zero-config scene orchestration layer beyond current runtime behavior
968
-
969
- ---
970
-
971
- ## 棣冩惐 Completion Statistics
972
-
973
- | Phase | Status | Completion | Notes |
974
- |-------|--------|------------|-------|
975
- | Phase 1-12 | 閴?Complete | 100% | Core ESPHome entities implemented (Phase 11 LED intentionally disabled) |
976
- | Phase 13 | 閴?Complete | 100% | Sendspin audio playback support |
977
- | Phase 14 | 閴?Complete | 100% | Manual emotion playback and non-blocking motion feedback |
978
- | Phase 15 | 閴?Complete | 100% | Face tracking with DOA, YOLO detection, body follows head |
979
- | Phase 16 | 閴?Complete | 100% | JSON-driven animation system (50Hz control loop) |
980
- | Phase 17 | 閴?Complete | 100% | Antenna sync animation during speech |
981
- | Phase 18 | 閴?Complete | 100% | Single-face visual gaze interaction with idle scanning |
982
- | Phase 19 | Not a current runtime target | - | Historical planning item, not part of the zero-config runtime model |
983
- | Phase 20 | 馃煛 Partial | 30% | IMU sensors exposed, missing trigger logic |
984
- | Phase 21 | 鉁?Complete | 100% | Continuous conversation switch implemented |
985
- | Phase 22 | 鉁?Complete | 100% | Gesture detection with HaGRID ONNX models |
986
- | Phase 23 | 鉁?Complete | 100% | Face detection sensor exposed |
987
- | Phase 24 | 鉁?Complete | 100% | System diagnostics entities (9 sensors) |
988
- | **v0.9.5** | 鉁?Complete | 100% | Modular architecture refactoring |
989
- | **v1.0.0** | 鉁?Complete | 100% | Runtime toggles/persistence (Sendspin, face, gesture, confidence) + idle and gesture stability updates |
990
-
991
- **Overall Completion**: current zero-config runtime path is functionally complete; remaining gaps are optional orchestration ideas rather than missing core runtime features.
992
-
993
-
994
- ---
995
-
996
- ## 棣冩暋 Daemon Crash Fix (2025-01-05)
997
-
998
- ### Problem Description
999
- During long-term operation, `reachy_mini daemon` would crash, causing robot to become unresponsive.
1000
-
1001
- ### Root Cause
1002
- 1. **50Hz control loop** - Current stable frequency for motion control
1003
- 2. **Frequent state queries** - Every entity state read calls `get_status()`, `get_current_head_pose()` etc.
1004
- 3. **Missing change detection** - Even when pose hasn't changed, continues sending same commands
1005
- 4. **Zenoh message queue blocking** - Accumulated 150+ messages per second, daemon cannot process in time
1006
-
1007
- ### Fix Solution
1008
-
1009
- #### 1. Control loop frequency (motion/movement_manager.py)
1010
- ```python
1011
- # Evolution: 100Hz -> 20Hz -> 10Hz -> 50Hz (current)
1012
- # Current stable frequency for production use
1013
- CONTROL_LOOP_FREQUENCY_HZ = 50 # Current stable frequency
1014
- ```
1015
-
1016
- #### 2. Add pose change detection (movement_manager.py)
1017
- ```python
1018
- # Only send commands on significant pose changes
1019
- if self._last_sent_pose is not None:
1020
- max_diff = max(abs(pose[k] - self._last_sent_pose.get(k, 0.0)) for k in pose.keys())
1021
- if max_diff < 0.001: # Threshold: 0.001 rad or 0.001 m
1022
- return # Skip sending
1023
- ```
1024
-
1025
- #### 3. State query caching (reachy_controller.py)
1026
- ```python
1027
- # Cache daemon status query results
1028
- self._cache_ttl = 0.1 # 100ms TTL
1029
- self._last_status_query = 0.0
1030
-
1031
- def _get_cached_status(self):
1032
- now = time.time()
1033
- if now - self._last_status_query < self._cache_ttl:
1034
- return self._state_cache.get('status') # Use cache
1035
- # ... query and update cache
1036
- ```
1037
-
1038
- #### 4. Head pose query caching (reachy_controller.py)
1039
- ```python
1040
- # Cache get_current_head_pose() and get_current_joint_positions() results
1041
- def _get_cached_head_pose(self):
1042
- # Reuse cached results within 100ms
1043
- ```
1044
-
1045
- ### Fix Results
1046
-
1047
- | Metric | Before Fix | After Fix | Improvement |
1048
- |--------|------------|-----------|-------------|
1049
- | Control message frequency | ~100 msg/s | ~20 msg/s | 閳?80% |
1050
- | State query frequency | ~50 msg/s | ~5 msg/s | 閳?90% |
1051
- | Total Zenoh messages | ~150 msg/s | ~25 msg/s | 閳?83% |
1052
- | Daemon CPU load | Sustained high load | Normal load | Significantly reduced |
1053
- | Expected stability | Crash within hours | Stable for days | Major improvement |
1054
-
1055
- ### Related Files
1056
- - `DAEMON_CRASH_FIX_PLAN.md` - Detailed fix plan and test plan
1057
- - `movement_manager.py` - Control loop optimization
1058
- - `reachy_controller.py` - State query caching
1059
-
1060
- ### Future Optimization Suggestions
1061
- 1. 鈴?Dynamic frequency adjustment - 50Hz during motion, 5Hz when idle
1062
- 2. 鈴?Batch state queries - Get all states at once
1063
- 3. 鈴?Further runtime efficiency tuning after real usage profiling
1064
-
1065
- ---
1066
-
1067
- ## 棣冩暋 Daemon Crash Deep Fix (2026-01-07)
1068
-
1069
- > **Update (2026-01-30)**: Current implementation uses 50Hz control loop for stability and performance. The control loop frequency aligns with daemon backend processing capacity. The pose change threshold (0.005) and state cache TTL (2s) optimizations remain in place to reduce unnecessary Zenoh messages.
1070
-
1071
- ### Problem Description
1072
- During long-term operation, `reachy_mini daemon` still crashes, previous fix not thorough enough.
1073
-
1074
- ### Root Cause Analysis
1075
-
1076
- Through deep analysis of SDK source code:
1077
-
1078
- 1. **Each `set_target()` sends 3 Zenoh messages**
1079
- - `set_target_head_pose()` - 1 message
1080
- - `set_target_antenna_joint_positions()` - 1 message
1081
- - `set_target_body_yaw()` - 1 message
1082
-
1083
- 2. **Daemon control loop is 50Hz**
1084
- - See `reachy_mini/daemon/backend/robot/backend.py`: `control_loop_frequency = 50.0`
1085
- - If message send frequency exceeds 50Hz, daemon may not process in time
1086
-
1087
- 3. **Previous 20Hz control loop still too high**
1088
- - 20Hz 鑴?3 messages = 60 messages/second
1089
- - Already exceeds daemon's 50Hz processing capacity
1090
-
1091
- 4. **Pose change threshold too small (0.002)**
1092
- - Breathing animation, speech sway, face tracking continuously produce tiny changes
1093
- - Almost every loop triggers `set_target()`
1094
-
1095
- ### Fix Solution
1096
-
1097
- #### 1. Control loop frequency history (motion/movement_manager.py)
1098
- ```python
1099
- # Evolution: 100Hz -> 20Hz -> 10Hz -> 50Hz (current)
1100
- # Current stable frequency for production use
1101
- CONTROL_LOOP_FREQUENCY_HZ = 50 # Current (2026-01-30)
1102
- ```
1103
-
1104
- #### 2. Increase pose change threshold (movement_manager.py)
1105
- ```python
1106
- # Increased from 0.002 to 0.005
1107
- # 0.005 rad 閳?0.29 degrees, still smooth enough
1108
- self._pose_change_threshold = 0.005
1109
- ```
1110
-
1111
- #### 3. Reduce camera/face tracking frequency (camera_server.py)
1112
- ```python
1113
- # Reduced from 15fps to 10fps
1114
- fps: int = 10
1115
- ```
1116
-
1117
- #### 4. Increase state cache TTL (reachy_controller.py)
1118
- ```python
1119
- # Increased from 1 second to 2 seconds
1120
- self._cache_ttl = 2.0
1121
- ```
1122
-
1123
- ### Fix Results
1124
-
1125
- > **Note**: Current implementation uses 50Hz control loop as of 2026-01-30. The table below shows historical evolution.
1126
-
1127
- | Metric | Before (20Hz) | After (10Hz) | Current (50Hz) |
1128
- |--------|---------------|--------------|-----------------|
1129
- | Control loop frequency | 20 Hz | 10 Hz | 50 Hz (current) |
1130
- | Max Zenoh messages | 60 msg/s | 30 msg/s | ~50 msg/s (optimized) |
1131
- | Actual messages (with change detection) | ~40 msg/s | ~15 msg/s | ~30 msg/s |
1132
- | Face tracking frequency | 15 Hz | 10 Hz | Adaptive (2-15 Hz) |
1133
- | State cache TTL | 1 second | 2 seconds | 2 seconds |
1134
- | Expected stability | Crash within hours | Stable operation | Stable (daemon updated) |
1135
-
1136
- ### Key Finding
1137
-
1138
- Current implementation uses 50Hz control loop for stability and performance. The control loop frequency aligns with daemon backend processing capacity.
1139
-
1140
- ### Related Files
1141
- - `motion/movement_manager.py` - Control loop frequency and pose threshold
1142
- - `vision/camera_server.py` - Face tracking frequency
1143
- - `reachy_controller.py` - State cache TTL
1144
-
1145
-
1146
- ---
1147
-
1148
- ## 棣冩暋 Microphone Sensitivity Optimization (2026-01-07)
1149
-
1150
- > Historical background only. These notes describe earlier low-level microphone tuning experiments and should not be read as current Home Assistant entity capabilities.
1151
-
1152
- ### Problem
1153
- Low microphone sensitivity - Need to be very close for voice recognition.
1154
-
1155
- ### Solution
1156
- Comprehensive ReSpeaker XVF3800 microphone optimization:
1157
-
1158
- | Parameter | Default | Optimized | Notes |
1159
- |-----------|---------|-----------|-------|
1160
- | AGC | Off | On | Auto volume normalization |
1161
- | AGC max gain | ~15dB | 30dB | Better distant speech pickup |
1162
- | AGC target level | -25dB | -18dB | Stronger output signal |
1163
- | Microphone gain | 1.0x | 2.0x | Base gain doubled |
1164
- | Noise suppression | ~0.5 | 0.15 | Reduced speech mis-suppression |
1165
-
1166
- ### Result
1167
- Microphone sensitivity improved from ~30cm to ~2-3m effective range.
1168
-
1169
- ---
1170
-
1171
- ## 棣冩暋 v0.5.1 Bug Fixes (2026-01-08)
1172
-
1173
- ### Issue 1: Music Not Resuming After Voice Conversation
1174
-
1175
- **Fix**: Sendspin now connects to `music_player` instead of `tts_player`
1176
-
1177
- ### Issue 2: Audio Conflict During Voice Assistant Wakeup
1178
-
1179
- **Fix**: Added `pause_sendspin()` and `resume_sendspin()` methods to `audio/audio_player.py`
1180
-
1181
- ### Issue 3: Sendspin Sample Rate Optimization
1182
-
1183
- **Fix**: Prioritize 16kHz in Sendspin supported formats (hardware limitation)
1184
-
1185
- ---
1186
-
1187
- ## 棣冩暋 v0.5.15 Updates (2026-01-11)
1188
-
1189
- ### Feature 1: Audio Settings Persistence
1190
-
1191
- Historical note: older audio processing preferences were once persisted here. The current app no longer exposes AGC or noise suppression entities.
1192
-
1193
- ### Feature 2: Sendspin Discovery Refactoring
1194
-
1195
- Moved mDNS discovery to `zeroconf.py` for better separation of concerns.
1196
-
1197
-
1198
- ---
1199
-
1200
- ### SDK Data Structure Reference
1201
-
1202
- ```python
1203
- # Motor control mode
1204
- class MotorControlMode(str, Enum):
1205
- Enabled = "enabled" # Torque on, position control
1206
- Disabled = "disabled" # Torque off
1207
- GravityCompensation = "gravity_compensation" # Gravity compensation mode
1208
-
1209
- # Daemon state
1210
- class DaemonState(Enum):
1211
- NOT_INITIALIZED = "not_initialized"
1212
- STARTING = "starting"
1213
- RUNNING = "running"
1214
- STOPPING = "stopping"
1215
- STOPPED = "stopped"
1216
- ERROR = "error"
1217
-
1218
- # Full state
1219
- class FullState:
1220
- control_mode: MotorControlMode
1221
- head_pose: XYZRPYPose # x, y, z (m), roll, pitch, yaw (rad)
1222
- head_joints: list[float] # 7 joint angles
1223
- body_yaw: float
1224
- antennas_position: list[float] # [right, left]
1225
- doa: DoAInfo # angle (rad), speech_detected (bool)
1226
-
1227
- # IMU data (wireless version only)
1228
- imu_data = {
1229
- "accelerometer": [x, y, z], # m/s铏?
1230
- "gyroscope": [x, y, z], # rad/s
1231
- "quaternion": [w, x, y, z], # Attitude quaternion
1232
- "temperature": float # 鎺矯
1233
- }
1234
-
1235
- # Safety limits
1236
- HEAD_PITCH_ROLL_LIMIT = [-40鎺? +40鎺砞
1237
- HEAD_YAW_LIMIT = [-180鎺? +180鎺砞
1238
- BODY_YAW_LIMIT = [-160鎺? +160鎺砞
1239
- YAW_DELTA_MAX = 65鎺? # Max difference between head and body yaw
1240
- ```
1241
-
1242
- ### ESPHome Protocol Implementation Notes
1243
-
1244
- ESPHome protocol communicates with Home Assistant via protobuf messages. The runtime primarily uses switch/number/select/sensor/binary_sensor/text_sensor/camera entities; button-only wake/sleep flows are historical and no longer the main control model.
1245
-
1246
- ```python
1247
- from aioesphomeapi.api_pb2 import (
1248
- # Number entity (volume/angle/confidence control)
1249
- ListEntitiesNumberResponse,
1250
- NumberStateResponse,
1251
- NumberCommandRequest,
1252
-
1253
- # Select entity (emotion)
1254
- ListEntitiesSelectResponse,
1255
- SelectStateResponse,
1256
- SelectCommandRequest,
1257
-
1258
- # Switch entity (sleep/runtime toggles)
1259
- ListEntitiesSwitchResponse,
1260
- SwitchStateResponse,
1261
- SwitchCommandRequest,
1262
-
1263
- # Sensor entity (numeric sensors)
1264
- ListEntitiesSensorResponse,
1265
- SensorStateResponse,
1266
-
1267
- # Binary Sensor entity (boolean sensors)
1268
- ListEntitiesBinarySensorResponse,
1269
- BinarySensorStateResponse,
1270
-
1271
- # Text Sensor entity (text sensors)
1272
- ListEntitiesTextSensorResponse,
1273
- TextSensorStateResponse,
1274
- )
1275
- ```
1276
-
1277
- ## Reference Projects
1278
-
1279
- - [OHF-Voice/linux-voice-assistant](https://github.com/OHF-Voice/linux-voice-assistant)
1280
- - [pollen-robotics/reachy_mini](https://github.com/pollen-robotics/reachy_mini)
1281
- - [reachy_mini_conversation_app](https://github.com/pollen-robotics/reachy_mini_conversation_app)
1282
- - [sendspin-cli](https://github.com/Sendspin/sendspin-cli)
1283
- - [home-assistant-voice](https://github.com/esphome/home-assistant-voice-pe/blob/dev/home-assistant-voice.yaml)
1284
-
1285
- ---
1286
-
1287
- ## 棣冩暋 Code Refactoring & Improvement Plan (v0.9.5)
1288
-
1289
- > Comprehensive improvement plan based on code analysis
1290
- > Target Platform: Raspberry Pi CM4 (4GB RAM, 4-core CPU)
1291
-
1292
- ### Code Size Statistics (Updated 2026-01-19)
1293
-
1294
- | File | Original | Current | Status |
1295
- |------|----------|---------|--------|
1296
- | `movement_manager.py` | 1205 | 1260 | 閳跨媴绗?Modularized but still large |
1297
- | `voice_assistant.py` | 1097 | 1270 | 閴?Enhanced with new features |
1298
- | `satellite.py` | 1003 | 1022 | 閴?Optimized (-2%) |
1299
- | `camera_server.py` | 1070 | 1009 | 閴?Optimized (-6%) |
1300
- | `reachy_controller.py` | 878 | 961 | 閴?Enhanced |
1301
- | `entity_registry.py` | 1129 | 844 | 閴?Optimized (-25%) |
1302
- | `audio_player.py` | 599 | 679 | 閴?Acceptable |
1303
- | `core/service_base.py` | - | 552 | 棣冨晭 New module |
1304
- | `entities/entity_factory.py` | - | 440 | 棣冨晭 New module |
1305
-
1306
- > **Optimization Notes**:
1307
- > - `entity_registry.py`: Factory pattern refactoring reduced 285 lines
1308
- > - `camera_server.py`: Using `FaceTrackingInterpolator` module reduced 61 lines
1309
- > - `protocol/satellite.py`: Runtime paths are now centered on voice state handling and HA event reactions
1310
- > - New modular architecture with 6 sub-packages: `core/`, `motion/`, `vision/`, `audio/`, `entities/`, `protocol/`
1311
-
1312
- ### New Module List (Updated 2026-01-19)
1313
-
1314
- | Directory | Module | Lines | Description |
1315
- |-----------|--------|-------|-------------|
1316
- | `core/` | `config.py` | 454 | Centralized nested configuration |
1317
- | `core/` | `service_base.py` | 552 | Suspend/resume service helpers + RobustOperationMixin |
1318
- | `core/` | `system_diagnostics.py` | 250 | System diagnostics |
1319
- | `core/` | `exceptions.py` | 68 | Custom exception classes |
1320
- | `core/` | `util.py` | 28 | Utility functions |
1321
- | `motion/` | `antenna.py` | - | Antenna freeze/unfreeze control |
1322
- | `motion/` | `pose_composer.py` | - | Pose composition utilities |
1323
- | `motion/` | `command_runtime.py` | - | Command queue handling / state transitions |
1324
- | `motion/` | `control_runtime.py` | - | Control-loop runtime helpers |
1325
- | `motion/` | `idle_runtime.py` | - | Idle behavior / idle rest handling |
1326
- | `motion/` | `state_machine.py` | - | State machine definitions |
1327
- | `motion/` | `smoothing.py` | - | Smoothing/transition algorithms |
1328
- | `motion/` | `animation_player.py` | - | Animation player |
1329
- | `motion/` | `emotion_moves.py` | - | Emotion moves |
1330
- | `motion/` | `speech_sway.py` | 338 | Speech-driven head micro-movements |
1331
- | `motion/` | `reachy_motion.py` | - | Reachy motion API |
1332
- | `vision/` | `frame_processor.py` | 227 | Adaptive frame rate management |
1333
- | `vision/` | `face_tracking_interpolator.py` | 253 | Face lost interpolation |
1334
- | `vision/` | `gesture_smoother.py` | 80 | Historical gesture smoothing module; current runtime no longer depends on it |
1335
- | `vision/` | `gesture_detector.py` | 285 | HaGRID gesture detection |
1336
- | `vision/` | `head_tracker.py` | 367 | YOLO face detector |
1337
- | `vision/` | `camera_server.py` | 1009 | MJPEG camera stream server facade |
1338
- | `audio/` | `doa_tracker.py` | 206 | Direction of Arrival tracking |
1339
- | `audio/` | `microphone.py` | 219 | Hardware audio helper / legacy tuning code |
1340
- | `audio/` | `audio_player.py` | facade | AudioPlayer facade (split into playback/sendspin/local streaming modules) |
1341
- | `entities/` | `entity.py` | 402 | ESPHome base entity |
1342
- | `entities/` | `entity_factory.py` | 440 | Entity factory pattern |
1343
- | `entities/` | `entity_keys.py` | 155 | Entity key constants |
1344
- | `entities/` | `entity_extensions.py` | 258 | Extended entity types |
1345
- | `entities/` | `event_emotion_mapper.py` | 351 | HA event to emotion mapping |
1346
- | `protocol/` | `satellite.py` | 1022 | ESPHome protocol handler |
1347
- | `protocol/` | `api_server.py` | 172 | HTTP API server |
1348
- | `protocol/` | `zeroconf.py` | - | mDNS discovery |
1349
-
1350
- ### Improvement Plan Status
1351
-
1352
- #### Phase 1: Runtime Suspend/Resume Foundation 鉁?Complete
1353
-
1354
- - [x] Create `core/service_base.py` - runtime suspend/resume service helpers
1355
- - [x] All required services implement `suspend()` / `resume()` methods where needed
1356
- - [x] Historical app-managed sleep/wake flow was later removed to align with the current SDK
1357
-
1358
- #### Phase 2: Code Modularization 閴?Complete
1359
-
1360
- - [x] Create new directory structure (`core/`, `motion/`, `audio/`, `vision/`, `entities/`)
1361
- - [x] Extract from `movement_manager.py` 閳?`motion/antenna.py`, `motion/pose_composer.py`
1362
- - [x] Extract from `camera_server.py` 閳?`vision/frame_processor.py`, `vision/face_tracking_interpolator.py`
1363
- - [x] Extract from `entity_registry.py` 閳?`entities/entity_factory.py`, `entities/entity_keys.py`
1364
- - [x] Create `core/config.py` for centralized configuration
1365
- - [x] Ensure no circular dependencies
1366
-
1367
- #### Phase 3: Stability & Performance 閴?Complete
1368
-
1369
- - [x] Create `core/exceptions.py` - Custom exception classes
1370
- - [x] Implement `RobustOperationMixin` - Unified error handling
1371
- - [x] `CameraServer` implements Context Manager pattern
1372
- - [x] Improve `CameraServer` resource cleanup
1373
- - [x] Fix MJPEG client tracking (proper register/unregister)
1374
- - [x] Historical health/memory monitor modules were added during earlier SDK instability periods
1375
- - [x] Health/memory monitor modules were later removed after runtime simplification
1376
- - [ ] Long-running stability test (24h+)
1377
-
1378
- #### Phase 4: Feature Enhancements 閴?Complete
1379
-
1380
- - [x] Historical gesture-action runtime path explored
1381
- - [x] Gesture runtime later simplified to publish recognition results only
1382
- - [x] Create `audio/doa_tracker.py` - DOATracker
1383
- - [x] Implement sound source tracking with motion control integration
1384
- - [x] Create `entities/event_emotion_mapper.py` - EventEmotionMapper
1385
- - [x] Fold HA event behavior config into `animations/conversation_animations.json`
1386
- - [x] Add DOA tracking toggle HA entity
1387
-
1388
- ### SDK Compatibility Verification 閴?Passed
1389
-
1390
- | API Call | Status | Notes |
1391
- |----------|--------|-------|
1392
- | `set_target(head, antennas, body_yaw)` | 閴?| Correct usage |
1393
- | `goto_target()` | 閴?| Correct usage |
1394
- | `look_at_image(u: int, v: int)` | 閴?| Fixed float閳姕nt |
1395
- | `create_head_pose(degrees=False)` | 閴?| Using radians |
1396
- | `compose_world_offset()` | 閴?| SDK function correctly called |
1397
- | `linear_pose_interpolation()` | 閴?| Has fallback implementation |
1398
- | Body yaw range | 閴?| Clamped to 鍗?60鎺?|
1399
-
1400
- ---
1401
-
1402
- ## 棣冩暋 v0.9.5 Updates (2026-01-19)
1403
-
1404
- ### Major Changes: Modular Architecture Refactoring
1405
-
1406
- The codebase has been restructured into a modular architecture with 5 sub-packages:
1407
-
1408
- | Package | Purpose | Key Modules |
1409
- |---------|---------|-------------|
1410
- | `core/` | Core infrastructure | `config.py`, `service_base.py`, `system_diagnostics.py` |
1411
- | `motion/` | Motion control | `antenna.py`, `pose_composer.py`, `command_runtime.py`, `control_runtime.py`, `idle_runtime.py`, `smoothing.py` |
1412
- | `vision/` | Vision processing | `frame_processor.py`, `face_tracking_interpolator.py` |
1413
- | `audio/` | Audio processing | `microphone.py`, `doa_tracker.py` |
1414
- | `entities/` | HA entity management | `entity_factory.py`, `entity_keys.py`, `event_emotion_mapper.py` |
1415
-
1416
- ### New Features
1417
-
1418
- 1. **Historical note**
1419
- - Earlier versions explored direct sleep/wake callbacks and polling-based state handling
1420
- - Current runtime no longer uses app-managed sleep/wake callbacks
1421
-
1422
- 2. **Camera runtime evolution**
1423
- - Camera lifecycle was later split into dedicated runtime/processing/http helpers
1424
- - Current runtime can fully stop camera service when `Idle Behavior` is disabled
1425
-
1426
- ### Audio Optimizations
1427
-
1428
- | Parameter | Before | After | Improvement |
1429
- |-----------|--------|-------|-------------|
1430
- | Audio chunk size | 1024 samples | 512 samples | 64ms 鈫?32ms latency with lower CPU load |
1431
- | Audio loop delay | 10ms | 1ms | Faster VAD response |
1432
- | Stereo閳墷ono | Mean of channels | First channel | Cleaner signal |
1433
-
1434
- ### Code Quality Improvements
1435
-
1436
- - Removed all legacy/compatibility code
1437
- - Centralized configuration in nested dataclasses
1438
- - NaN/Inf cleaning in audio pipeline
1439
- - Rotation clamping in face tracking to prevent IK collisions
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md CHANGED
@@ -9,6 +9,7 @@ short_description: Deep integration of Reachy Mini robot with Home Assistant
9
  tags:
10
  - reachy_mini
11
  - reachy_mini_python_app
 
12
  - reachy_mini_home_assistant
13
  - home_assistant
14
  - homeassistant
 
9
  tags:
10
  - reachy_mini
11
  - reachy_mini_python_app
12
+ - reachy_mini_ha_voice
13
  - reachy_mini_home_assistant
14
  - home_assistant
15
  - homeassistant
changelog.json CHANGED
@@ -1,229 +1,4 @@
1
- [ {
2
- "version": "1.0.7",
3
- "date": "2026-05-05",
4
- "changes": [
5
- "Build: Bump package version to 1.0.7",
6
- "Change: Align audio runtime with current SDK patterns by splitting local TTS playback from Sendspin-capable music playback and moving wakeword/stopword loading into shared helpers",
7
- "Change: Raise the Reachy Mini SDK baseline to reachy-mini>=1.7.1",
8
- "Fix: Keep wakeup and TTS playback on the local player path while binding both local and Sendspin players to shared speech sway helpers",
9
- "Fix: Synchronize Idle Behavior shutdown with ESPHome face and gesture switches plus runtime state updates",
10
- "Fix: Remove obsolete runtime monitor modules that are no longer needed with the current SDK behavior",
11
- "Optimize: Tighten Sendspin buffering with proactive backpressure and cleaner local queue handling"
12
- ]
13
- },
14
- {
15
- "version": "1.0.6",
16
- "date": "2026-05-01",
17
- "changes": [
18
- "Build: Bump package version to 1.0.6",
19
- "Change: Align pyproject.toml with the current Reachy Mini SDK baseline (reachy-mini>=1.7.0, Python>=3.12, zeroconf>=0.131,<1, aiohttp, websockets>=12,<16, and gstreamer-bundle==1.28.1 on non-Linux)",
20
- "Change: Align Sendspin dependency with the current upstream client line via aiosendspin>=5.1,<6.0",
21
- "Fix: Fetch camera snapshot frames on demand when the MJPEG cache is empty so Home Assistant camera proxy requests keep working with the Reachy Mini SDK 1.7.0 media pull model",
22
- "Optimize: Stop the camera server entirely when Idle Behavior is disabled instead of only unloading vision models"
23
- ]
24
- },
25
- {
26
- "version": "1.0.5",
27
- "date": "2026-04-12",
28
- "changes": [
29
- "Build: Bump package version to 1.0.5",
30
- "Change: Remove app-managed robot sleep/wake handling because the current Reachy Mini SDK no longer allows mini apps to stay active while the robot enters sleep",
31
- "Change: Limit resource suspend/resume to ESPHome-driven runtime toggles such as Home Assistant disconnect, mute, camera disable, and service recovery",
32
- "Change: Align pyproject.toml runtime constraints with the current Reachy Mini reference SDK package (reachy-mini>=1.6.3, websockets>=12,<16, Python baseline >=3.10, and uv gstreamer metadata)",
33
- "Remove: Delete SleepManager integration and app-side sleep/wake callback flow from the voice assistant runtime",
34
- "Remove: Delete Home Assistant sleep control entities and internal robot sleep state tracking from the mini app"
35
- ]
36
- },
37
- {
38
- "version": "1.0.4",
39
- "date": "2026-03-19",
40
- "changes": [
41
- "Build: Bump package version to 1.0.4",
42
- "Fix: Align Reachy Mini integration with current SDK assumptions by removing legacy compatibility paths and private client health checks",
43
- "Fix: Replace direct SDK private _respeaker access with audio_control_utils-based ReSpeaker initialization",
44
- "Fix: Tighten camera and pose composition to require current SDK media/utils APIs and valid look_at_image inputs",
45
- "Improve: Unify idle behavior into a single persisted Home Assistant entity and remove old idle compatibility aliases",
46
- "Improve: Replace separate wake/sleep buttons with a single sleep control entity",
47
- "Improve: Update Sendspin integration for current aiosendspin lifecycle, stream handling, listener cleanup, and synchronized buffering",
48
- "Improve: Standardize daemon URL usage on shared config across controller, sleep manager, and daemon monitor"
49
- ]
50
- },
51
- {
52
- "version": "1.0.3",
53
- "date": "2026-03-07",
54
- "changes": [
55
- "Build: Bump package version to 1.0.3",
56
- "New: Add Idle Random Actions switch in Home Assistant with preferences persistence and startup restore",
57
- "New: Add configurable idle_random_actions action presets in conversation_animations.json for centralized idle motion tuning",
58
- "Fix: Remove duplicate idle_random_actions fields/methods and complete runtime control wiring in controller/entity registry/movement manager",
59
- "Improve: Increase idle breathing and antenna sway cadence to 0.24Hz with wiggle antenna profile for more natural standby motion",
60
- "Optimize: Remove set_target global rate limiting and unchanged-pose skip gating to continuously stream motion commands each control tick",
61
- "Optimize: Remove idle antenna slew-rate limiter so antenna motion follows animation waveforms directly for reference-like smoothness"
62
- ]
63
- },
64
- {
65
- "version": "1.0.2",
66
- "date": "2026-03-06",
67
- "changes": [
68
- "Build: Bump package version to 1.0.2",
69
- "Fix: Restore idle antenna sway animation and tune idle breathing parameters to reduce perceived stiffness",
70
- "Fix: Reintroduce idle anti-chatter smoothing/deadband for antenna and body updates to reduce mechanical jitter/noise",
71
- "Fix: Switch sleep/wake control to daemon API (start/stop with wake_up/goto_sleep) so /api/daemon/status reflects real sleep state on SDK 1.5",
72
- "Fix: Normalize daemon status parsing for SDK 1.5 object-based status responses",
73
- "Fix: Remove all app-side antenna power on/off operations to avoid SDK instability and external-control conflicts",
74
- "Change: Keep idle antenna behavior as animation-only control (no torque coupling)",
75
- "Change: Tighten preference loading to current schema (no legacy config fallback filtering)",
76
- "Fix: Sync Idle Motion toggle with Idle Antenna Motion toggle for expected behavior in ESPHome",
77
- "Fix: Remove legacy app-managed audio routing hooks and rely on native SDK/system audio selection",
78
- "New: Add Home Assistant blueprint for Reachy presence companion automation",
79
- "Improve: Blueprint supports device-first auto-binding and richer usage instructions",
80
- "Docs: Refresh landing page (index.html) with current version, GitHub source link, and new Blueprint/Auto Release capability cards",
81
- "New: Add GitHub workflow to auto-create releases when pyproject/changelog version updates produce a new tag",
82
- "Chore: Ignore local wiki workspace artifacts (local/) from repository tracking"
83
- ]
84
- },
85
- {
86
- "version": "1.0.1",
87
- "date": "2026-03-05",
88
- "changes": [
89
- "Build: Bump package version to 1.0.1",
90
- "Deps: Update runtime dependency baseline to reachy-mini>=1.5.0",
91
- "Fix: Remove legacy Zenoh 7447 startup precheck for SDK v1.5 compatibility",
92
- "Fix: Remove legacy ZError string matching from connection error handling",
93
- "Fix: Adapt daemon status handling to SDK v1.5 DaemonStatus object (prevents AttributeError on status.get)",
94
- "Fix: Harden stop-word handling with runtime activation/deactivation and mute-aware trigger gating",
95
- "Fix: Align wakeup stream start timing with reference behavior (start microphone stream after wakeup sound)",
96
- "Fix: Improve TTS streaming robustness and reduce cutoffs with retry-based audio push",
97
- "Optimize: Support single-request streaming with in-memory fallback cache for one-time TTS URLs (no temp file dependency)",
98
- "Optimize: Lower streaming fetch chunk size and apply unthrottled preroll for faster first audio"
99
- ]
100
- },
101
- {
102
- "version": "1.0.0",
103
- "date": "2026-03-04",
104
- "changes": [
105
- "Build: Bump package version to 1.0.0",
106
- "Deps: Require reachy-mini[gstreamer]>=1.4.1",
107
- "Fix: Improve gesture responsiveness and stability (faster smoothing, min processing cadence, no-gesture alignment)",
108
- "Fix: Auto-match ONNX gesture input size from model shape to prevent INVALID_ARGUMENT dimension errors",
109
- "New: Add Sendspin switch in ESPHome (default OFF, persistent, runtime enable/disable)",
110
- "New: Add Face Tracking and Gesture Detection switches in ESPHome (both default OFF, persistent)",
111
- "New: Add Face Confidence number entity (0.0-1.0, persistent)",
112
- "Optimize: Unload/reload face and gesture models when toggled off/on to save resources",
113
- "Optimize: Idle behavior updated to breathing + look-around alternation, idle antenna sway disabled",
114
- "Optimize: Adjust idle breathing to human-like cadence",
115
- "Fix: Disable antenna torque in idle mode and re-enable outside idle to reduce chatter/noise",
116
- "Fix: Harden startup against import-time failures (lazy emotion library loading and graceful Sendspin disable)",
117
- "Fix: Enforce deterministic audio startup path and fail fast when microphone capture is not ready",
118
- "Optimize: Make MJPEG streaming viewer-aware (skip continuous JPEG encode/push when no stream clients)",
119
- "Optimize: Keep face/gesture AI processing active even when stream viewers are absent",
120
- "Fix: Add on-demand /snapshot JPEG generation when no cached stream frame is available",
121
- "Change: Use camera backend default FPS/resolution for stream path instead of forcing fixed 1080p/25fps"
122
- ]
123
- },
124
- {
125
- "version": "0.9.9",
126
- "date": "2026-01-28",
127
- "changes": [
128
- "Fix: Audio buffer overflow - require Reachy Mini hardware, use only Reachy microphone with 50ms sleep",
129
- "Optimize: Gesture detection sensitivity - remove all confidence filtering, return all detections to Home Assistant",
130
- "Optimize: Gesture detection now runs at 1 frame interval for maximum responsiveness",
131
- "Refactor: Simplify GestureSmoother to frequency-based confirmation (1 frame)",
132
- "Refactor: Remove unused parameters (confidence_threshold, detection_threshold, GestureConfig)",
133
- "Fix: Remove duplicate empty check in gesture detection",
134
- "Optimize: SDK integration - add MediaBackend detection and proper resource cleanup",
135
- "Document: ReSpeaker private attribute access risk with TODO comments"
136
- ]
137
- },
138
- {
139
- "version": "0.9.8",
140
- "date": "2026-01-27",
141
- "changes": [
142
- "New: Mute switch and Disable Camera entities for granular control",
143
- "Fix: Camera disable logic and daemon crash prevention",
144
- "New: Home Assistant connection-driven feature loading with auto suspend/resume",
145
- "Optimize: Reduce log output by 30-40%",
146
- "Fix: Code quality improvements",
147
- "Fix: SDK crash during idle - optimize audio processing and add GStreamer threading lock",
148
- "Optimize: Bundle face tracking model, use SDK Zenoh for daemon monitoring",
149
- "Simplify: Device ID reads /etc/machine-id directly",
150
- "Clean up: Remove unused config items"
151
- ]
152
- },
153
- {
154
- "version": "0.9.7",
155
- "date": "2026-01-20",
156
- "changes": [
157
- "Fix: Device ID file path corrected after util.py moved to core/ subdirectory (prevents HA seeing device as new)",
158
- "Fix: Animation file path corrected (was looking in wrong directory)",
159
- "Fix: Remove hey_jarvis from required wake words (it's optional in openWakeWord/)"
160
- ]
161
- },
162
- {
163
- "version": "0.9.6",
164
- "date": "2026-01-20",
165
- "changes": [
166
- "New: Add ruff linter/formatter and mypy type checker configuration",
167
- "New: Add pre-commit hooks for automated code quality checks",
168
- "Fix: Remove duplicate resume() method in audio_player.py",
169
- "Fix: Remove duplicate connection_lost() method in satellite.py",
170
- "Fix: Store asyncio task reference in sleep_manager.py to prevent garbage collection",
171
- "Optimize: Use dict.items() for efficient iteration in smoothing.py"
172
- ]
173
- },
174
- {
175
- "version": "0.9.5",
176
- "date": "2026-01-19",
177
- "changes": [
178
- "Refactor: Modularize codebase - new core/motion/vision/audio/entities module structure",
179
- "New: Direct callbacks for HA sleep/wake buttons to suspend/resume services",
180
- "Optimize: Audio processing latency - reduced chunk size from 1024 to 256 samples (64ms -> 16ms)",
181
- "Optimize: Audio loop delay reduced from 10ms to 1ms for faster VAD response",
182
- "Optimize: Stereo to mono conversion uses first channel instead of mean for cleaner signal",
183
- "Improve: Camera resume_from_suspend now synchronous for reliable wake from sleep",
184
- "Improve: Rotation clamping in face tracking to prevent IK collisions"
185
- ]
186
- },
187
- {
188
- "version": "0.9.0",
189
- "date": "2026-01-18",
190
- "changes": [
191
- "New: Robot state monitor for proper sleep mode handling - services pause when robot disconnects and resume on reconnect",
192
- "New: System diagnostics entities (CPU, memory, disk, uptime) exposed as Home Assistant diagnostic sensors",
193
- "New: Phase 24 with 9 diagnostic sensors (cpu_percent, cpu_temperature, memory_percent, memory_used_gb, disk_percent, disk_free_gb, uptime_hours, process_cpu_percent, process_memory_mb)",
194
- "Fix: Voice assistant and movement manager now properly pause during robot sleep mode instead of generating error spam",
195
- "Improve: Graceful service lifecycle management with RobotStateMonitor callbacks"
196
- ]
197
- },
198
- {
199
- "version": "0.8.7",
200
- "date": "2026-01-18",
201
- "changes": [
202
- "Fix: Clamp body_yaw to safe range to prevent IK collision warnings during emotion playback",
203
- "Fix: Emotion moves and face tracking now respect SDK safety limits",
204
- "Improve: Face tracking smoothness - removed EMA smoothing (matches reference project)",
205
- "Improve: Face tracking timing updated to match reference (2s delay, 1s interpolation)"
206
- ]
207
- },
208
- {
209
- "version": "0.8.6",
210
- "date": "2026-01-18",
211
- "changes": [
212
- "Fix: Audio buffer memory leak - added size limit to prevent unbounded growth",
213
- "Fix: Temp file leak - downloaded audio files now cleaned up after playback",
214
- "Fix: Camera thread termination timeout increased for clean shutdown",
215
- "Fix: Thread-safe draining flag using threading.Event",
216
- "Fix: Silent failures now logged for debugging"
217
- ]
218
- },
219
- {
220
- "version": "0.8.5",
221
- "date": "2026-01-18",
222
- "changes": [
223
- "Fix: DOA turn-to-sound direction inverted - now turns correctly toward sound source",
224
- "Fix: Graceful shutdown prevents daemon crash on app stop"
225
- ]
226
- },
227
  {
228
  "version": "0.8.4",
229
  "date": "2026-01-18",
@@ -663,4 +438,3 @@
663
  ]
664
  }
665
  ]
666
-
 
1
+ [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  {
3
  "version": "0.8.4",
4
  "date": "2026-01-18",
 
438
  ]
439
  }
440
  ]
 
docs/USER_MANUAL_CN.md DELETED
@@ -1,244 +0,0 @@
1
- # Reachy Mini 语音助手 - 用户手册
2
-
3
- ## 系统要求
4
-
5
- ### 硬件
6
- - Reachy Mini 机器人(带 ReSpeaker XVF3800 麦克风)
7
- - WiFi 网络连接
8
-
9
- ### 软件
10
- - Home Assistant(2024.1 或更高版本)
11
- - Home Assistant 中已启用 ESPHome 集成
12
-
13
- ---
14
-
15
- ## 安装步骤
16
-
17
- ### 第一步:安装应用
18
- 从 Reachy Mini 应用商店安装 `reachy_mini_home_assistant`。
19
-
20
- ### 第二步:启动应用
21
- 应用将自动:
22
- - 在端口 6053 启动 ESPHome 服务器
23
- - 加载预打包的唤醒词模型
24
- - 通过 mDNS 注册以便自动发现
25
- - 如果网络上有 Sendspin 服务器则自动连接
26
-
27
- ### 第三步:连接 Home Assistant
28
- **自动连接(推荐):**
29
- Home Assistant 会通过 mDNS 自动发现 Reachy Mini。
30
-
31
- **手动连接:**
32
- 1. 进入 设置 → 设备与服务
33
- 2. 点击"添加集成"
34
- 3. 选择"ESPHome"
35
- 4. 输入机器人的 IP 地址和端口 6053
36
-
37
- ---
38
-
39
- ## 功能介绍
40
-
41
- ### 语音助手
42
- - **唤醒词检测**:说 "Okay Nabu" 激活(本地处理)
43
- - **停止词**:说 "Stop" 结束对话
44
- - **连续对话模式**:无需重复唤醒词即可持续对话
45
- - **语音识别/合成**:使用 Home Assistant 配置的语音引擎
46
-
47
- **支持的唤醒词:**
48
- - Okay Nabu(默认)
49
- - Hey Jarvis
50
- - Alexa
51
- - Hey Luna
52
-
53
- ### 人脸追踪
54
- - 基于 YOLO 的人脸检测
55
- - 头部跟随检测到的人脸
56
- - 头部转动时身体随之旋转
57
- - 自适应帧率:活跃时 15fps,空闲时 2fps
58
- - 可在 Home Assistant 中运行时开关
59
-
60
- ### 手势检测
61
- 检测到的手势会作为实体状态同步到 Home Assistant。
62
- 当前默认运行时不会直接用手势触发机器人动作。
63
-
64
- | 输出 | 说明 |
65
- |------|------|
66
- | `gesture_detected` | 当前识别到的手势标签 |
67
- | `gesture_confidence` | 手势识别置信度 |
68
-
69
- ### 情绪响应
70
- 机器人可播放 35 种不同情绪:
71
- - 基础:开心、难过、愤怒、恐惧、惊讶、厌恶
72
- - 扩展:大笑、爱慕、骄傲、感激、热情、好奇、惊叹、害羞、困惑、沉思、焦虑、害怕、沮丧、烦躁、狂怒、轻蔑、无聊、疲倦、精疲力竭、孤独、沮丧、顺从、不确定、不舒服
73
-
74
- ### 音频功能
75
- - 扬声器音量控制(0-100%)
76
- - 静音开关,可暂停/恢复语音链路
77
- - 支持唤醒提示音与计时器完成提示音
78
- - STT/TTS 由 Home Assistant 负责
79
-
80
- ### Sendspin 多房间音频
81
- - 通过 mDNS 自动发现 Sendspin 服务器
82
- - 同步多房间音频播放
83
- - Reachy Mini 作为 PLAYER 接收音频流
84
- - 语音对话时自动暂停
85
- - 无需用户配置
86
-
87
- ### DOA 声源追踪
88
- - 声源方向检测
89
- - 唤醒时机器人转向声源
90
- - 可通过开关启用/禁用
91
-
92
- ---
93
-
94
- ## Home Assistant 实体
95
-
96
- ### 阶段 1:基础状态
97
- | 实体 | 类型 | 说明 |
98
- |------|------|------|
99
- | Daemon State | 文本传感器 | 机器人守护进程状态 |
100
- | Backend Ready | 二进制传感器 | 后端连接状态 |
101
- | Mute | 开关 | 暂停/恢复语音链路 |
102
- | Speaker Volume | 数值 (0-100%) | 扬声器音量控制 |
103
- | Disable Camera | 开关 | 暂停/恢复摄像头服务 |
104
- | Idle Behavior | 开关 | 统一空闲行为:头部、天线、微动作 |
105
- | Sendspin | 开关 | 启用/禁用 Sendspin 发现与播放 |
106
- | Face Tracking | 开关 | 启用/禁用人脸跟踪 |
107
- | Gesture Detection | 开关 | 启用/禁用手势检测 |
108
- | Face Confidence | 数值 (0-1) | 人脸跟踪置信度阈值 |
109
-
110
- ### 阶段 2:睡眠与运行状态
111
- | 实体 | 类型 | 说明 |
112
- |------|------|------|
113
- | Sleep Control | 开关 | 打开表示进入睡眠,关闭表示唤醒 |
114
- | Sleep Mode | 二进制传感器 | 运行中表示唤醒,非运行表示睡眠 |
115
- | Services Suspended | 二进制传感器 | 运行中表示服务活跃 |
116
-
117
- ### 阶段 3:姿态控制
118
- | 实体 | 类型 | 范围 |
119
- |------|------|------|
120
- | Head X/Y/Z | 数值 | ±50mm |
121
- | Head Roll/Pitch/Yaw | 数值 | ±40° |
122
- | Body Yaw | 数值 | ±160° |
123
- | Antenna Left/Right | 数值 | ±90° |
124
-
125
- ### 阶段 4:注视控制
126
- | 实体 | 类型 | 说明 |
127
- |------|------|------|
128
- | Look At X/Y/Z | 数值 | 注视目标的世界坐标 |
129
-
130
- ### 阶段 5:DOA(声源定位)
131
- | 实体 | 类型 | 说明 |
132
- |------|------|------|
133
- | DOA Angle | 传感器 (°) | 声源方向 |
134
- | Speech Detected | 二进制传感器 | 语音活动检测 |
135
- | DOA Sound Tracking | 开关 | 启用/禁用 DOA 追踪 |
136
-
137
- ### 阶段 6:诊断信息
138
- | 实体 | 类型 | 说明 |
139
- |------|------|------|
140
- | Control Loop Frequency | 传感器 (Hz) | 运动控制循环频率 |
141
- | SDK Version | 文本传感器 | Reachy Mini SDK 版本 |
142
- | Robot Name | 文本传感器 | 设备名称 |
143
- | Wireless Version | 二进制传感器 | 无线版本标志 |
144
- | Simulation Mode | 二进制传感器 | 仿真模式标志 |
145
- | WLAN IP | 文本传感器 | WiFi IP 地址 |
146
- | Error Message | 文本传感器 | 当前错误 |
147
-
148
- ### 阶段 7:IMU 传感器(仅无线版本)
149
- | 实体 | 类型 | 说明 |
150
- |------|------|------|
151
- | IMU Accel X/Y/Z | 传感器 (m/s²) | 加速度计 |
152
- | IMU Gyro X/Y/Z | 传感器 (rad/s) | ���螺仪 |
153
- | IMU Temperature | 传感器 (°C) | IMU 温度 |
154
-
155
- ### 阶段 8:情绪控制
156
- | 实体 | 类型 | 说明 |
157
- |------|------|------|
158
- | Emotion | 选择器 | 选择要播放的情绪(35 个选项)|
159
-
160
- ### 阶段 10:摄像头
161
- | 实体 | 类型 | 说明 |
162
- |------|------|------|
163
- | Camera | 摄像头 | 实时 MJPEG 流 |
164
-
165
- ### 3D 可视化卡片
166
- 可在 Home Assistant 中安装自定义 Lovelace 卡片,实时 3D 可视化 Reachy Mini 机器人。
167
-
168
- 安装地址:[ha-reachy-mini](https://github.com/Desmond-Dong/ha-reachy-mini)
169
-
170
- 功能:
171
- - 实时 3D 机器人可视化
172
- - 交互式机器人状态视图
173
- - 连接机器人守护进程获取实时更新
174
-
175
- ### 阶段 21:对话
176
- | 实体 | 类型 | 说明 |
177
- |------|------|------|
178
- | Continuous Conversation | 开关 | 多轮对话模式 |
179
-
180
- ### 阶段 22:手势检测
181
- | 实体 | 类型 | 说明 |
182
- |------|------|------|
183
- | Gesture Detected | 文本传感器 | 当前手势名称 |
184
- | Gesture Confidence | 传感器 (%) | 检测置信度 |
185
-
186
- ### 阶段 23:人脸检测
187
- | 实体 | 类型 | 说明 |
188
- |------|------|------|
189
- | Face Detected | 二进制传感器 | 视野中是否有人脸 |
190
-
191
- ### 阶段 24:系统诊断
192
- | 实体 | 类型 | 说明 |
193
- |------|------|------|
194
- | CPU Percent | 传感器 (%) | CPU 使用率 |
195
- | CPU Temperature | 传感器 (°C) | CPU 温度 |
196
- | Memory Percent | 传感器 (%) | 内存使用率 |
197
- | Memory Used | 传感器 (GB) | 已用内存 |
198
- | Disk Percent | 传感器 (%) | 磁盘使用率 |
199
- | Disk Free | 传感器 (GB) | 磁盘可用空间 |
200
- | Uptime | 传感器 (hours) | 系统运行时间 |
201
- | Process CPU | 传感器 (%) | 应用 CPU 使用率 |
202
- | Process Memory | 传感器 (MB) | 应用内存使用 |
203
-
204
- ---
205
-
206
- ## 睡眠模式
207
-
208
- 运行时反应是零配置的:语音阶段、计时器提醒和 HA 状态触发情绪,共用同一套内建行为模型。
209
-
210
- ### 进入睡眠
211
- - 在 Home Assistant 中打开 `Sleep Control` 开关
212
- - 机器人放松电机、停止摄像头、暂停语音检测
213
-
214
- ### 唤醒
215
- - 在 Home Assistant 中关闭 `Sleep Control` 开关
216
- - 或说唤醒词
217
- - 机器人恢复所有功能
218
-
219
- ---
220
-
221
- ## 故障排除
222
-
223
- | 问题 | 解决方案 |
224
- |------|----------|
225
- | 不响应唤醒词 | 检查 Mute 是否关闭,减少背景噪音,并确认已连接 Home Assistant |
226
- | 人脸追踪不工作 | 确保光线充足,检查 Face Detected 传感器 |
227
- | 没有音频输出 | 检查 Speaker Volume,验证 HA 中的 TTS 引擎 |
228
- | 无法连接 HA | 确认在同一网络,检查端口 6053 |
229
- | 手势检测不到 | 确保光线充足,正对摄像头 |
230
-
231
- ---
232
-
233
- ## 快速参考
234
-
235
- ```
236
- 唤醒词: "Okay Nabu"
237
- 停止词: "Stop"
238
- ESPHome 端口: 6053
239
- 摄像头端口: 8081 (MJPEG)
240
- ```
241
-
242
- ---
243
-
244
- *Reachy Mini 语音助手 v1.0.4*
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
docs/USER_MANUAL_EN.md DELETED
@@ -1,244 +0,0 @@
1
- # Reachy Mini Voice Assistant - User Manual
2
-
3
- ## Requirements
4
-
5
- ### Hardware
6
- - Reachy Mini robot (with ReSpeaker XVF3800 microphone)
7
- - WiFi network connection
8
-
9
- ### Software
10
- - Home Assistant (2024.1 or later)
11
- - ESPHome integration enabled in Home Assistant
12
-
13
- ---
14
-
15
- ## Installation
16
-
17
- ### Step 1: Install the App
18
- Install `reachy_mini_home_assistant` from the Reachy Mini App Store.
19
-
20
- ### Step 2: Start the App
21
- The app will automatically:
22
- - Start the ESPHome server on port 6053
23
- - Load pre-packaged wake word models
24
- - Register with mDNS for auto-discovery
25
- - Connect to Sendspin server if available on network
26
-
27
- ### Step 3: Connect to Home Assistant
28
- **Automatic (Recommended):**
29
- Home Assistant will auto-discover Reachy Mini via mDNS.
30
-
31
- **Manual:**
32
- 1. Go to Settings → Devices & Services
33
- 2. Click "Add Integration"
34
- 3. Select "ESPHome"
35
- 4. Enter the robot's IP address and port 6053
36
-
37
- ---
38
-
39
- ## Features
40
-
41
- ### Voice Assistant
42
- - **Wake Word Detection**: Say "Okay Nabu" to activate (local processing)
43
- - **Stop Word**: Say "Stop" to end conversation
44
- - **Continuous Conversation Mode**: Keep talking without repeating wake word
45
- - **STT/TTS**: Uses Home Assistant's configured speech engines
46
-
47
- **Supported Wake Words:**
48
- - Okay Nabu (default)
49
- - Hey Jarvis
50
- - Alexa
51
- - Hey Luna
52
-
53
- ### Face Tracking
54
- - YOLO-based face detection
55
- - Head follows detected face
56
- - Body follows head when turned far
57
- - Adaptive frame rate: 15fps active, 2fps idle
58
- - Runtime switchable from Home Assistant
59
-
60
- ### Gesture Detection
61
- Detected gestures are published to Home Assistant as entity state updates.
62
- The default runtime does not trigger built-in robot actions from gestures.
63
-
64
- | Output | Description |
65
- |--------|-------------|
66
- | `gesture_detected` | Current gesture label |
67
- | `gesture_confidence` | Detection confidence |
68
-
69
- ### Emotion Responses
70
- The robot can play 35 different emotions:
71
- - Basic: Happy, Sad, Angry, Fear, Surprise, Disgust
72
- - Extended: Laughing, Loving, Proud, Grateful, Enthusiastic, Curious, Amazed, Shy, Confused, Thoughtful, Anxious, Scared, Frustrated, Irritated, Furious, Contempt, Bored, Tired, Exhausted, Lonely, Downcast, Resigned, Uncertain, Uncomfortable
73
-
74
- ### Audio Features
75
- - Speaker volume control (0-100%)
76
- - Mute switch for voice pipeline pause/resume
77
- - Wake sound and timer-finished sound playback
78
- - Home Assistant handles STT/TTS engines
79
-
80
- ### Sendspin Multi-Room Audio
81
- - Automatic discovery of Sendspin servers via mDNS
82
- - Synchronized multi-room audio playback
83
- - Reachy Mini acts as a PLAYER to receive audio streams
84
- - Auto-pause during voice conversations
85
- - No user configuration required
86
-
87
- ### DOA Sound Tracking
88
- - Direction of Arrival detection
89
- - Robot turns toward sound source on wake word
90
- - Can be enabled/disabled via switch
91
-
92
- ---
93
-
94
- ## Home Assistant Entities
95
-
96
- ### Phase 1: Basic Status
97
- | Entity | Type | Description |
98
- |--------|------|-------------|
99
- | Daemon State | Text Sensor | Robot daemon status |
100
- | Backend Ready | Binary Sensor | Backend connection status |
101
- | Mute | Switch | Suspend/resume voice pipeline |
102
- | Speaker Volume | Number (0-100%) | Speaker volume control |
103
- | Disable Camera | Switch | Suspend/resume camera service |
104
- | Idle Behavior | Switch | Unified idle motion + idle antenna + idle micro-actions |
105
- | Sendspin | Switch | Enable/disable Sendspin discovery and playback |
106
- | Face Tracking | Switch | Enable/disable face tracking |
107
- | Gesture Detection | Switch | Enable/disable gesture detection |
108
- | Face Confidence | Number (0-1) | Face tracking confidence threshold |
109
-
110
- ### Phase 2: Sleep and Runtime State
111
- | Entity | Type | Description |
112
- |--------|------|-------------|
113
- | Sleep Control | Switch | Turn on to sleep, turn off to wake |
114
- | Sleep Mode | Binary Sensor | Running when awake, not running when sleeping |
115
- | Services Suspended | Binary Sensor | Running when services are active |
116
-
117
- ### Phase 3: Pose Control
118
- | Entity | Type | Range |
119
- |--------|------|-------|
120
- | Head X/Y/Z | Number | ±50mm |
121
- | Head Roll/Pitch/Yaw | Number | ±40° |
122
- | Body Yaw | Number | ±160° |
123
- | Antenna Left/Right | Number | ±90° |
124
-
125
- ### Phase 4: Look At Control
126
- | Entity | Type | Description |
127
- |--------|------|-------------|
128
- | Look At X/Y/Z | Number | World coordinates for gaze target |
129
-
130
- ### Phase 5: DOA (Direction of Arrival)
131
- | Entity | Type | Description |
132
- |--------|------|-------------|
133
- | DOA Angle | Sensor (°) | Sound source direction |
134
- | Speech Detected | Binary Sensor | Voice activity detection |
135
- | DOA Sound Tracking | Switch | Enable/disable DOA tracking |
136
-
137
- ### Phase 6: Diagnostics
138
- | Entity | Type | Description |
139
- |--------|------|-------------|
140
- | Control Loop Frequency | Sensor (Hz) | Motion control loop rate |
141
- | SDK Version | Text Sensor | Reachy Mini SDK version |
142
- | Robot Name | Text Sensor | Device name |
143
- | Wireless Version | Binary Sensor | Wireless model flag |
144
- | Simulation Mode | Binary Sensor | Simulation flag |
145
- | WLAN IP | Text Sensor | WiFi IP address |
146
- | Error Message | Text Sensor | Current error |
147
-
148
- ### Phase 7: IMU Sensors (Wireless version only)
149
- | Entity | Type | Description |
150
- |--------|------|-------------|
151
- | IMU Accel X/Y/Z | Sensor (m/s²) | Accelerometer |
152
- | IMU Gyro X/Y/Z | Sensor (rad/s) | Gyroscope |
153
- | IMU Temperature | Sensor (°C) | IMU temperature |
154
-
155
- ### Phase 8: Emotion Control
156
- | Entity | Type | Description |
157
- |--------|------|-------------|
158
- | Emotion | Select | Choose emotion to play (35 options) |
159
-
160
- ### Phase 10: Camera
161
- | Entity | Type | Description |
162
- |--------|------|-------------|
163
- | Camera | Camera | Live MJPEG stream |
164
-
165
- ### 3D Visualization Card
166
- A custom Lovelace card is available for real-time 3D visualization of the Reachy Mini robot in Home Assistant.
167
-
168
- Install from: [ha-reachy-mini](https://github.com/Desmond-Dong/ha-reachy-mini)
169
-
170
- Features:
171
- - Real-time 3D robot visualization
172
- - Interactive view of robot state
173
- - Connects to robot daemon for live updates
174
-
175
- ### Phase 21: Conversation
176
- | Entity | Type | Description |
177
- |--------|------|-------------|
178
- | Continuous Conversation | Switch | Multi-turn conversation mode |
179
-
180
- ### Phase 22: Gesture Detection
181
- | Entity | Type | Description |
182
- |--------|------|-------------|
183
- | Gesture Detected | Text Sensor | Current gesture name |
184
- | Gesture Confidence | Sensor (%) | Detection confidence |
185
-
186
- ### Phase 23: Face Detection
187
- | Entity | Type | Description |
188
- |--------|------|-------------|
189
- | Face Detected | Binary Sensor | Face in view |
190
-
191
- ### Phase 24: System Diagnostics
192
- | Entity | Type | Description |
193
- |--------|------|-------------|
194
- | CPU Percent | Sensor (%) | CPU usage |
195
- | CPU Temperature | Sensor (°C) | CPU temperature |
196
- | Memory Percent | Sensor (%) | RAM usage |
197
- | Memory Used | Sensor (GB) | RAM used |
198
- | Disk Percent | Sensor (%) | Disk usage |
199
- | Disk Free | Sensor (GB) | Disk free space |
200
- | Uptime | Sensor (hours) | System uptime |
201
- | Process CPU | Sensor (%) | App CPU usage |
202
- | Process Memory | Sensor (MB) | App memory usage |
203
-
204
- ---
205
-
206
- ## Sleep Mode
207
-
208
- Runtime reactions are zero-config: voice phases, timer alerts, and HA state-triggered emotions use the same built-in behavior model.
209
-
210
- ### Enter Sleep
211
- - Turn on the `Sleep Control` switch in Home Assistant
212
- - Robot relaxes motors, stops camera, pauses voice detection
213
-
214
- ### Wake Up
215
- - Turn off the `Sleep Control` switch in Home Assistant
216
- - Or say the wake word
217
- - Robot resumes all functions
218
-
219
- ---
220
-
221
- ## Troubleshooting
222
-
223
- | Problem | Solution |
224
- |---------|----------|
225
- | Not responding to wake word | Check Mute is off, reduce background noise, verify Home Assistant is connected |
226
- | Face tracking not working | Ensure adequate lighting, check Face Detected sensor |
227
- | No audio output | Check Speaker Volume, verify TTS engine in HA |
228
- | Can't connect to HA | Verify same network, check port 6053 |
229
- | Gestures not detected | Ensure good lighting, face the camera directly |
230
-
231
- ---
232
-
233
- ## Quick Reference
234
-
235
- ```
236
- Wake Word: "Okay Nabu"
237
- Stop Word: "Stop"
238
- ESPHome Port: 6053
239
- Camera Port: 8081 (MJPEG)
240
- ```
241
-
242
- ---
243
-
244
- *Reachy Mini Voice Assistant v1.0.4*
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
home_assistant_blueprints/reachy_mini_presence_companion.yaml DELETED
@@ -1,246 +0,0 @@
1
- blueprint:
2
- name: Reachy Mini Presence Companion
3
- description: >-
4
- Presence-driven automation for Reachy Mini in Home Assistant.
5
-
6
- How to use:
7
- 1) Select Home occupancy entity (person/group/binary_sensor).
8
- 2) Select Reachy ESPHome device (recommended).
9
- 3) Leave optional fallback entity inputs empty unless auto-binding fails.
10
- 4) Set away delay and day/night volume.
11
-
12
- What this automation does:
13
- - Occupied: Wake Reachy, enable unified idle behavior, set day volume.
14
- - Empty (after delay): Disable unified idle behavior, send Reachy to sleep.
15
- - Quiet hours start/end: Apply night/day volume while occupied.
16
-
17
- Auto-binding rules (when Reachy device is selected):
18
- - Sleep switch suffix: sleep_control
19
- - Idle behavior switch suffix: idle_behavior_enabled
20
- - Volume number suffix: speaker_volume
21
-
22
- If your entities use different names, fill optional fallback entity inputs manually.
23
- domain: automation
24
- input:
25
- occupancy_entity:
26
- name: Home occupancy entity
27
- description: Person, group, or binary sensor representing home presence.
28
- selector:
29
- entity: {}
30
-
31
- reachy_device:
32
- name: Reachy device (recommended)
33
- description: Select your Reachy ESPHome device for automatic entity binding.
34
- default: ""
35
- selector:
36
- device:
37
- filter:
38
- - integration: esphome
39
-
40
- reachy_sleep_switch:
41
- name: Sleep Control switch (optional fallback)
42
- description: Leave empty to auto-bind from Reachy device.
43
- default: ""
44
- selector:
45
- entity:
46
- domain: switch
47
-
48
- idle_behavior_switch:
49
- name: Idle Behavior switch (optional fallback)
50
- description: Leave empty to auto-bind from Reachy device.
51
- default: ""
52
- selector:
53
- entity:
54
- domain: switch
55
-
56
- reachy_volume_number:
57
- name: Speaker Volume number (optional fallback)
58
- description: Leave empty to auto-bind from Reachy device.
59
- default: ""
60
- selector:
61
- entity:
62
- domain: number
63
-
64
- away_delay_minutes:
65
- name: Away delay (minutes)
66
- description: Wait before sleeping after everyone leaves.
67
- default: 20
68
- selector:
69
- number:
70
- min: 1
71
- max: 180
72
- mode: box
73
- unit_of_measurement: min
74
-
75
- day_volume:
76
- name: Day volume
77
- default: 80
78
- selector:
79
- number:
80
- min: 0
81
- max: 100
82
- step: 1
83
- mode: slider
84
-
85
- night_volume:
86
- name: Night volume
87
- default: 35
88
- selector:
89
- number:
90
- min: 0
91
- max: 100
92
- step: 1
93
- mode: slider
94
-
95
- quiet_start:
96
- name: Quiet hours start
97
- default: "22:30:00"
98
- selector:
99
- time: {}
100
-
101
- quiet_end:
102
- name: Quiet hours end
103
- default: "07:30:00"
104
- selector:
105
- time: {}
106
-
107
- mode: restart
108
-
109
- variables:
110
- occupancy_entity: !input occupancy_entity
111
- reachy_device: !input reachy_device
112
- manual_sleep_switch: !input reachy_sleep_switch
113
- manual_idle_behavior_switch: !input idle_behavior_switch
114
- manual_volume_number: !input reachy_volume_number
115
- day_volume: !input day_volume
116
- night_volume: !input night_volume
117
-
118
- device_entities_list: >-
119
- {{ device_entities(reachy_device) if reachy_device else [] }}
120
-
121
- sleep_switch_auto: >-
122
- {{ (device_entities_list | select('match', '^switch\..*sleep_control$') | list | first) or '' }}
123
- idle_behavior_switch_auto: >-
124
- {{ (device_entities_list | select('match', '^switch\..*idle_behavior_enabled$') | list | first) or '' }}
125
- volume_number_auto: >-
126
- {{ (device_entities_list | select('match', '^number\..*speaker_volume$') | list | first) or '' }}
127
-
128
- sleep_switch: >-
129
- {{ manual_sleep_switch if manual_sleep_switch else sleep_switch_auto }}
130
- idle_behavior_switch: >-
131
- {{ manual_idle_behavior_switch if manual_idle_behavior_switch else idle_behavior_switch_auto }}
132
- volume_number: >-
133
- {{ manual_volume_number if manual_volume_number else volume_number_auto }}
134
-
135
- is_occupied: >-
136
- {{ states(occupancy_entity) in ['home', 'on'] }}
137
-
138
- trigger:
139
- - platform: state
140
- id: occupied_home
141
- entity_id: !input occupancy_entity
142
- to: "home"
143
-
144
- - platform: state
145
- id: occupied_on
146
- entity_id: !input occupancy_entity
147
- to: "on"
148
-
149
- - platform: state
150
- id: empty_not_home
151
- entity_id: !input occupancy_entity
152
- to: "not_home"
153
- for:
154
- minutes: !input away_delay_minutes
155
-
156
- - platform: state
157
- id: empty_off
158
- entity_id: !input occupancy_entity
159
- to: "off"
160
- for:
161
- minutes: !input away_delay_minutes
162
-
163
- - platform: time
164
- id: quiet_start
165
- at: !input quiet_start
166
-
167
- - platform: time
168
- id: quiet_end
169
- at: !input quiet_end
170
-
171
- action:
172
- - choose:
173
- - conditions:
174
- - condition: template
175
- value_template: "{{ trigger.id in ['occupied_home', 'occupied_on'] }}"
176
- sequence:
177
- - if:
178
- - condition: template
179
- value_template: "{{ sleep_switch != '' }}"
180
- then:
181
- - service: switch.turn_off
182
- target:
183
- entity_id: "{{ sleep_switch }}"
184
- - if:
185
- - condition: template
186
- value_template: "{{ idle_behavior_switch != '' }}"
187
- then:
188
- - service: switch.turn_on
189
- target:
190
- entity_id: "{{ idle_behavior_switch }}"
191
- - if:
192
- - condition: template
193
- value_template: "{{ volume_number != '' }}"
194
- then:
195
- - service: number.set_value
196
- target:
197
- entity_id: "{{ volume_number }}"
198
- data:
199
- value: "{{ day_volume }}"
200
-
201
- - conditions:
202
- - condition: template
203
- value_template: "{{ trigger.id in ['empty_not_home', 'empty_off'] }}"
204
- sequence:
205
- - if:
206
- - condition: template
207
- value_template: "{{ idle_behavior_switch != '' }}"
208
- then:
209
- - service: switch.turn_off
210
- target:
211
- entity_id: "{{ idle_behavior_switch }}"
212
- - if:
213
- - condition: template
214
- value_template: "{{ sleep_switch != '' }}"
215
- then:
216
- - service: switch.turn_on
217
- target:
218
- entity_id: "{{ sleep_switch }}"
219
-
220
- - conditions:
221
- - condition: template
222
- value_template: "{{ trigger.id == 'quiet_start' and is_occupied }}"
223
- sequence:
224
- - if:
225
- - condition: template
226
- value_template: "{{ volume_number != '' }}"
227
- then:
228
- - service: number.set_value
229
- target:
230
- entity_id: "{{ volume_number }}"
231
- data:
232
- value: "{{ night_volume }}"
233
-
234
- - conditions:
235
- - condition: template
236
- value_template: "{{ trigger.id == 'quiet_end' and is_occupied }}"
237
- sequence:
238
- - if:
239
- - condition: template
240
- value_template: "{{ volume_number != '' }}"
241
- then:
242
- - service: number.set_value
243
- target:
244
- entity_id: "{{ volume_number }}"
245
- data:
246
- value: "{{ day_volume }}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
index.html CHANGED
@@ -18,7 +18,7 @@
18
  <span class="brand-name">Reachy Mini for Home Assistant</span>
19
  </div>
20
  <div class="pill">Voice · Gestures · Smart Home</div>
21
- <div class="version-pill" id="version-pill">v1.0.4</div>
22
  </div>
23
  <div class="hero-grid">
24
  <div class="hero-copy">
@@ -89,7 +89,7 @@
89
  <div class="story-grid">
90
  <div class="story-card">
91
  <p class="eyebrow">Installation</p>
92
- <h3>Up and running in 1 minute</h3>
93
  <ul class="story-list">
94
  <li><span>1️⃣</span> Open Reachy Mini Dashboard → Applications</li>
95
  <li><span>2️⃣</span> Enable "Show community apps"</li>
@@ -101,12 +101,12 @@
101
  <p class="eyebrow">How it works</p>
102
  <h3>Seamless integration</h3>
103
  <p class="story-text">
104
- This Reachy Mini app uses ESPHome protocol to communicate with Home Assistant — no ESPHome device needed. Home Assistant discovers it via mDNS and adds the robot entities automatically. Voice commands are processed by your Home Assistant instance — STT, intent recognition, and TTS all happen there.
105
  </p>
106
  <div class="chips">
107
  <span class="chip">ESPHome Protocol</span>
108
  <span class="chip">mDNS Discovery</span>
109
- <span class="chip">Robot Entities</span>
110
  <span class="chip">Zero Config</span>
111
  </div>
112
  </div>
@@ -117,7 +117,7 @@
117
  <div class="section-header">
118
  <p class="eyebrow">Capabilities</p>
119
  <h2>Everything you need for smart home control</h2>
120
- <p class="intro">Zero-configuration robot entities, built-in reactions, and auto-discovery via mDNS.</p>
121
  </div>
122
  <div class="feature-grid">
123
  <div class="feature-card">
@@ -138,12 +138,12 @@
138
  <div class="feature-card">
139
  <span class="icon">🤚</span>
140
  <h3>Gesture Detection</h3>
141
- <p>HaGRID ONNX models recognize hand gestures and publish the detected gesture label and confidence to Home Assistant entities.</p>
142
  </div>
143
  <div class="feature-card">
144
  <span class="icon">😊</span>
145
  <h3>Expressive Motion</h3>
146
- <p>Built-in listening, thinking, speaking, timer, and emotion reactions with natural head sway and non-blocking motion during conversations.</p>
147
  </div>
148
  <div class="feature-card">
149
  <span class="icon">📹</span>
@@ -158,23 +158,13 @@
158
  <div class="feature-card">
159
  <span class="icon">⚡</span>
160
  <h3>Zero Configuration</h3>
161
- <p>Install and go. mDNS auto-discovery and built-in HA reactions mean the default experience works without extra setup.</p>
162
  </div>
163
  <div class="feature-card">
164
  <span class="icon">🃏</span>
165
  <h3>Dashboard Card</h3>
166
  <p>Custom Lovelace card for Home Assistant. Real-time 3D visualization of robot pose and status.</p>
167
  </div>
168
- <div class="feature-card">
169
- <span class="icon">🧩</span>
170
- <h3>HA Blueprint</h3>
171
- <p>Device-first Home Assistant blueprint for presence automations using the current zero-config model: sleep control, idle behavior, and speaker volume.</p>
172
- </div>
173
- <div class="feature-card">
174
- <span class="icon">🚀</span>
175
- <h3>Auto Release</h3>
176
- <p>Version-driven GitHub release workflow. Update pyproject/changelog, then release is created automatically.</p>
177
- </div>
178
  </div>
179
  </section>
180
 
@@ -236,15 +226,10 @@
236
  <h3>HA Dashboard Card</h3>
237
  <p>Lovelace Card for HA</p>
238
  </a>
239
- <a href="https://github.com/ha-china/Reachy_Mini_For_Home_Assistant" target="_blank" class="link-card">
240
  <span class="icon">📦</span>
241
  <h3>Source Code</h3>
242
- <p>GitHub Repository</p>
243
- </a>
244
- <a href="home_assistant_blueprints/reachy_mini_presence_companion.yaml" target="_blank" class="link-card">
245
- <span class="icon">🧩</span>
246
- <h3>HA Blueprint</h3>
247
- <p>Presence Companion YAML</p>
248
  </a>
249
  <a href="https://www.pollen-robotics.com/" target="_blank" class="link-card">
250
  <span class="icon">🤖</span>
@@ -279,7 +264,7 @@
279
  <a href="https://github.com/ai-forever/dynamic_gestures" target="_blank" class="link-card">
280
  <span class="icon">✋</span>
281
  <h3>Dynamic Gestures</h3>
282
- <p>Reference Project</p>
283
  </a>
284
  <a href="https://github.com/Sendspin/sendspin-cli" target="_blank" class="link-card">
285
  <span class="icon">🔊</span>
 
18
  <span class="brand-name">Reachy Mini for Home Assistant</span>
19
  </div>
20
  <div class="pill">Voice · Gestures · Smart Home</div>
21
+ <div class="version-pill" id="version-pill">v0.8.3</div>
22
  </div>
23
  <div class="hero-grid">
24
  <div class="hero-copy">
 
89
  <div class="story-grid">
90
  <div class="story-card">
91
  <p class="eyebrow">Installation</p>
92
+ <h3>Up and running in 1 minutes</h3>
93
  <ul class="story-list">
94
  <li><span>1️⃣</span> Open Reachy Mini Dashboard → Applications</li>
95
  <li><span>2️⃣</span> Enable "Show community apps"</li>
 
101
  <p class="eyebrow">How it works</p>
102
  <h3>Seamless integration</h3>
103
  <p class="story-text">
104
+ This Reachy Mini app uses ESPHome protocol to communicate with Home Assistant — no ESPHome device needed. Home Assistant discovers it via mDNS and adds all 45+ entities automatically. Voice commands are processed by your Home Assistant instance — STT, intent recognition, and TTS all happen there.
105
  </p>
106
  <div class="chips">
107
  <span class="chip">ESPHome Protocol</span>
108
  <span class="chip">mDNS Discovery</span>
109
+ <span class="chip">45+ Entities</span>
110
  <span class="chip">Zero Config</span>
111
  </div>
112
  </div>
 
117
  <div class="section-header">
118
  <p class="eyebrow">Capabilities</p>
119
  <h2>Everything you need for smart home control</h2>
120
+ <p class="intro">45+ Home Assistant entities. Zero configuration. Auto-discovery via mDNS.</p>
121
  </div>
122
  <div class="feature-grid">
123
  <div class="feature-card">
 
138
  <div class="feature-card">
139
  <span class="icon">🤚</span>
140
  <h3>Gesture Detection</h3>
141
+ <p>HaGRID ONNX models recognize 18 hand gestures: 👍👎✌️🤘👌✊🤙🤫 and more.</p>
142
  </div>
143
  <div class="feature-card">
144
  <span class="icon">😊</span>
145
  <h3>Expressive Motion</h3>
146
+ <p>280+ emotion keywords trigger 35 expressions. Real-time audio-driven animations with natural head sway during conversations.</p>
147
  </div>
148
  <div class="feature-card">
149
  <span class="icon">📹</span>
 
158
  <div class="feature-card">
159
  <span class="icon">⚡</span>
160
  <h3>Zero Configuration</h3>
161
+ <p>Install and go. mDNS auto-discovery means Home Assistant finds your robot automatically.</p>
162
  </div>
163
  <div class="feature-card">
164
  <span class="icon">🃏</span>
165
  <h3>Dashboard Card</h3>
166
  <p>Custom Lovelace card for Home Assistant. Real-time 3D visualization of robot pose and status.</p>
167
  </div>
 
 
 
 
 
 
 
 
 
 
168
  </div>
169
  </section>
170
 
 
226
  <h3>HA Dashboard Card</h3>
227
  <p>Lovelace Card for HA</p>
228
  </a>
229
+ <a href="https://huggingface.co/spaces/djhui5710/reachy_mini_ha_voice/tree/main" target="_blank" class="link-card">
230
  <span class="icon">📦</span>
231
  <h3>Source Code</h3>
232
+ <p>HuggingFace Spaces</p>
 
 
 
 
 
233
  </a>
234
  <a href="https://www.pollen-robotics.com/" target="_blank" class="link-card">
235
  <span class="icon">🤖</span>
 
264
  <a href="https://github.com/ai-forever/dynamic_gestures" target="_blank" class="link-card">
265
  <span class="icon">✋</span>
266
  <h3>Dynamic Gestures</h3>
267
+ <p>ONNX Models</p>
268
  </a>
269
  <a href="https://github.com/Sendspin/sendspin-cli" target="_blank" class="link-card">
270
  <span class="icon">🔊</span>
pyproject.toml CHANGED
@@ -1,24 +1,25 @@
1
  [build-system]
2
- requires = ["setuptools"]
3
  build-backend = "setuptools.build_meta"
4
 
5
  [project]
6
- name = "reachy_mini_home_assistant"
7
- version = "1.0.7"
8
- description = "Deep integration of Reachy Mini robot with Home Assistant"
9
  readme = "README.md"
10
  requires-python = ">=3.12"
11
  license = {text = "Apache-2.0"}
12
  dependencies = [
13
- # Reachy Mini SDK with gstreamer support (for camera streaming)
14
- "reachy-mini>=1.7.1",
15
 
16
- # Audio processing (for audio file analysis)
 
17
  "soundfile>=0.13.0",
18
- "numpy>=2.2.5,<=2.2.5",
19
 
20
  # Camera streaming
21
- "opencv-python>=4.12.0.88",
22
 
23
  # Wake word detection (local)
24
  # STT/TTS is handled by Home Assistant, not locally
@@ -27,36 +28,26 @@ dependencies = [
27
 
28
  # ESPHome protocol (communication with Home Assistant)
29
  "aioesphomeapi>=43.10.1",
30
- "zeroconf>=0.131,<1",
31
- "websockets>=12,<16",
32
- "aiohttp",
33
 
34
  # Motion control (head movements)
35
- "scipy>=1.15.3,<2.0.0",
36
-
37
  # Face tracking (YOLO-based head detection)
38
- "ultralytics",
39
- "supervision",
40
-
 
41
  # Sendspin synchronized audio (optional, for multi-room playback)
42
- "aiosendspin>=5.1,<6.0",
43
-
44
  # Gesture detection (ONNX runtime for HaGRID models)
45
  "onnxruntime>=1.18.0",
46
-
47
- # PyTorch (for vision models)
48
- "torch==2.5.1",
49
- "torchvision==0.20.1",
50
-
51
- # Compatibility with system packages (gradio, etc.)
52
- "pillow<12.0",
53
- "pydantic<=2.12.5",
54
- "requests>=2.33.0",
55
  ]
56
  keywords = ["reachy-mini-app", "reachy-mini", "home-assistant", "voice-assistant"]
57
 
58
  [project.entry-points."reachy_mini_apps"]
59
- reachy_mini_home_assistant = "reachy_mini_home_assistant.main:ReachyMiniHaVoice"
60
 
61
  [tool.setuptools]
62
  package-dir = { "" = "." }
@@ -66,114 +57,4 @@ include-package-data = true
66
  where = ["."]
67
 
68
  [tool.setuptools.package-data]
69
- "*" = ["*.json", "*.flac", "*.md", "*.tflite", "*.onnx", "*.pt"]
70
-
71
- # ============================================================================
72
- # Ruff - Fast Python linter and formatter
73
- # ============================================================================
74
- [tool.ruff]
75
- target-version = "py312"
76
- line-length = 120
77
- src = ["reachy_mini_home_assistant"]
78
-
79
- # Exclude reference code and generated files
80
- exclude = [
81
- "reference/",
82
- "__pycache__",
83
- ".git",
84
- "*.egg-info",
85
- ]
86
-
87
- [dependency-groups]
88
- dev = [
89
- "ruff==0.15.4",
90
- "mypy==1.20.0",
91
- ]
92
-
93
- [tool.uv]
94
- dependency-metadata = [
95
- { name = "gstreamer-libs", version = "1.28.1", requires-dist = ["gstreamer-msvc-runtime; sys_platform == 'win32'", "setuptools"] },
96
- ]
97
-
98
- [tool.ruff.lint]
99
- select = [
100
- "E", # pycodestyle errors
101
- "W", # pycodestyle warnings
102
- "F", # Pyflakes
103
- "I", # isort (import sorting)
104
- "B", # flake8-bugbear (common bugs)
105
- "C4", # flake8-comprehensions
106
- "UP", # pyupgrade (modern Python syntax)
107
- "SIM", # flake8-simplify
108
- "TCH", # flake8-type-checking (TYPE_CHECKING optimization)
109
- "RUF", # Ruff-specific rules
110
- "PTH", # flake8-use-pathlib
111
- "PL", # Pylint
112
- ]
113
- ignore = [
114
- "E501", # line too long (handled by formatter)
115
- "PLR0913", # too many arguments (common in robot control)
116
- "PLR2004", # magic value comparison (many thresholds in motion code)
117
- "PLR0912", # too many branches
118
- "PLR0915", # too many statements
119
- "PLR0911", # too many return statements
120
- "SIM108", # use ternary operator (sometimes less readable)
121
- "B008", # function call in default argument (used for field factories)
122
- # The following are intentional patterns in this codebase:
123
- "PLC0415", # import-outside-top-level (lazy imports for optional deps)
124
- "PLW0603", # global-statement (used for singletons)
125
- "SIM102", # collapsible-if (sometimes more readable expanded)
126
- "SIM105", # suppressible-exception (explicit try/except is clearer)
127
- "PTH123", # builtin-open (pathlib not always better)
128
- "PTH108", # os-unlink (pathlib not always better)
129
- "RUF013", # implicit-optional (legacy code)
130
- "TC002", # third-party import (numpy is required at runtime)
131
- ]
132
-
133
- [tool.ruff.lint.per-file-ignores]
134
- "__init__.py" = ["F401"] # unused imports in __init__ are intentional
135
-
136
- [tool.ruff.lint.isort]
137
- known-first-party = ["reachy_mini_home_assistant"]
138
-
139
- # ============================================================================
140
- # Mypy - Static type checker
141
- # ============================================================================
142
- [tool.mypy]
143
- python_version = "3.12"
144
- warn_return_any = false # Too noisy for mixed typed/untyped codebase
145
- warn_unused_ignores = true
146
- disallow_untyped_defs = false # Start lenient, can tighten later
147
- check_untyped_defs = false # Too strict for initial setup
148
- ignore_missing_imports = true # Many robot SDK libs lack type stubs
149
- no_implicit_optional = false # Allow implicit Optional for now
150
- # Disable some checks that are too strict for this codebase
151
- disable_error_code = [
152
- "union-attr", # Too many Optional accesses without None checks
153
- "no-redef", # Class redefinitions for SDK compatibility
154
- "attr-defined", # Some dynamic attributes from SDK
155
- "assignment", # Variable type changes (common in Python)
156
- "arg-type", # Argument type mismatches (often SDK issues)
157
- "unused-ignore", # Type ignore comments from before config
158
- "return-value", # Return type mismatches (often fine)
159
- "no-untyped-def", # Missing type annotations (too strict initially)
160
- "valid-type", # Type validity (some edge cases)
161
- "has-type", # Cannot determine type
162
- "call-arg", # Too few/many arguments
163
- "import-untyped", # Missing stubs for third-party libs
164
- "misc", # Miscellaneous errors
165
- ]
166
- exclude = [
167
- "reference/",
168
- "tests/",
169
- ]
170
-
171
- # Stricter checking for core modules (can enable gradually)
172
- [[tool.mypy.overrides]]
173
- module = [
174
- "reachy_mini_home_assistant.core.*",
175
- "reachy_mini_home_assistant.motion.smoothing",
176
- "reachy_mini_home_assistant.motion.pose_composer",
177
- ]
178
- disallow_untyped_defs = true
179
- warn_unreachable = true
 
1
  [build-system]
2
+ requires = ["setuptools>=61.0"]
3
  build-backend = "setuptools.build_meta"
4
 
5
  [project]
6
+ name = "reachy_mini_ha_voice"
7
+ version = "0.8.4"
8
+ description = "Home Assistant Voice Assistant for Reachy Mini"
9
  readme = "README.md"
10
  requires-python = ">=3.12"
11
  license = {text = "Apache-2.0"}
12
  dependencies = [
13
+ # Reachy Mini SDK (provides audio via media system)
14
+ "reachy-mini",
15
 
16
+ # Audio processing (fallback when not on Reachy Mini)
17
+ "sounddevice>=0.5.0",
18
  "soundfile>=0.13.0",
19
+ "numpy>=2.0.0",
20
 
21
  # Camera streaming
22
+ "opencv-python>=4.10.0",
23
 
24
  # Wake word detection (local)
25
  # STT/TTS is handled by Home Assistant, not locally
 
28
 
29
  # ESPHome protocol (communication with Home Assistant)
30
  "aioesphomeapi>=43.10.1",
31
+ "zeroconf>=0.140.0",
 
 
32
 
33
  # Motion control (head movements)
34
+ "scipy>=1.14.0",
35
+
36
  # Face tracking (YOLO-based head detection)
37
+ "ultralytics>=8.3.0",
38
+ "supervision>=0.25.0",
39
+ "huggingface_hub>=0.27.0",
40
+
41
  # Sendspin synchronized audio (optional, for multi-room playback)
42
+ "aiosendspin>=2.0.1",
43
+
44
  # Gesture detection (ONNX runtime for HaGRID models)
45
  "onnxruntime>=1.18.0",
 
 
 
 
 
 
 
 
 
46
  ]
47
  keywords = ["reachy-mini-app", "reachy-mini", "home-assistant", "voice-assistant"]
48
 
49
  [project.entry-points."reachy_mini_apps"]
50
+ reachy_mini_ha_voice = "reachy_mini_ha_voice.main:ReachyMiniHaVoice"
51
 
52
  [tool.setuptools]
53
  package-dir = { "" = "." }
 
57
  where = ["."]
58
 
59
  [tool.setuptools.package-data]
60
+ "*" = ["*.json", "*.flac", "*.md", "*.tflite", "*.onnx"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
{reachy_mini_home_assistant → reachy_mini_ha_voice}/__init__.py RENAMED
@@ -1,29 +1,24 @@
1
- """
2
- Reachy Mini for Home Assistant
3
-
4
- A deep integration app combining Reachy Mini robot with Home Assistant,
5
- enabling voice control, smart home automation, and expressive robot interactions.
6
-
7
- Key features:
8
- - Local wake word detection (microWakeWord/openWakeWord)
9
- - ESPHome protocol for seamless Home Assistant communication
10
- - STT/TTS powered by Home Assistant voice pipeline
11
- - Reachy Mini motion control with expressive animations
12
- - Camera streaming and gesture detection
13
- - Smart home entity control through natural voice commands
14
- """
15
-
16
- try:
17
- from importlib.metadata import version
18
-
19
- __version__ = version("reachy_mini_home_assistant")
20
- except Exception:
21
- __version__ = "0.0.0" # Fallback for development
22
- __author__ = "Desmond Dong"
23
-
24
- # Don't import main module here to avoid runpy warning
25
- # The app is loaded via entry point: reachy_mini_home_assistant.main:ReachyMiniHaVoiceApp
26
-
27
- __all__ = [
28
- "__version__",
29
- ]
 
1
+ """
2
+ Reachy Mini for Home Assistant
3
+
4
+ A deep integration app combining Reachy Mini robot with Home Assistant,
5
+ enabling voice control, smart home automation, and expressive robot interactions.
6
+
7
+ Key features:
8
+ - Local wake word detection (microWakeWord/openWakeWord)
9
+ - ESPHome protocol for seamless Home Assistant communication
10
+ - STT/TTS powered by Home Assistant voice pipeline
11
+ - Reachy Mini motion control with expressive animations
12
+ - Camera streaming and gesture detection
13
+ - Smart home entity control through natural voice commands
14
+ """
15
+
16
+ __version__ = "0.8.4"
17
+ __author__ = "Desmond Dong"
18
+
19
+ # Don't import main module here to avoid runpy warning
20
+ # The app is loaded via entry point: reachy_mini_ha_voice.main:ReachyMiniHAVoiceApp
21
+
22
+ __all__ = [
23
+ "__version__",
24
+ ]
 
 
 
 
 
{reachy_mini_home_assistant → reachy_mini_ha_voice}/__main__.py RENAMED
@@ -2,7 +2,7 @@
2
  """Main entry point for Reachy Mini for Home Assistant.
3
 
4
  This module provides a command-line interface for running the voice assistant
5
- without the ReachyMini App framework.
6
  """
7
 
8
  import argparse
@@ -10,17 +10,17 @@ import asyncio
10
  import logging
11
  import threading
12
 
13
- from .protocol.zeroconf import get_default_friendly_name
14
-
15
  _LOGGER = logging.getLogger(__name__)
16
 
17
 
18
  async def main() -> None:
19
- parser = argparse.ArgumentParser(description="Reachy Mini for Home Assistant")
 
 
20
  parser.add_argument(
21
  "--name",
22
- default=get_default_friendly_name(),
23
- help="Name of the voice assistant (default: auto-generated from MAC)",
24
  )
25
  parser.add_argument(
26
  "--host",
@@ -49,6 +49,11 @@ async def main() -> None:
49
  action="store_true",
50
  help="Disable camera server",
51
  )
 
 
 
 
 
52
  parser.add_argument(
53
  "--debug",
54
  action="store_true",
@@ -63,53 +68,59 @@ async def main() -> None:
63
  format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
64
  )
65
 
66
- # Initialize Reachy Mini (required)
67
- from reachy_mini import ReachyMini
68
-
69
- with ReachyMini() as reachy_mini:
70
- _LOGGER.info("Reachy Mini connected")
71
-
72
- # Import and create VoiceAssistantService
73
- from .voice_assistant import VoiceAssistantService
74
-
75
- service = VoiceAssistantService(
76
- reachy_mini=reachy_mini,
77
- name=args.name,
78
- host=args.host,
79
- port=args.port,
80
- wake_model=args.wake_model,
81
- camera_port=args.camera_port,
82
- camera_enabled=not args.no_camera,
83
- )
84
-
85
- # Create stop event for graceful shutdown
86
- stop_event = threading.Event()
87
-
88
  try:
89
- await service.start()
90
-
91
- _LOGGER.info("=" * 50)
92
- _LOGGER.info("Reachy Mini Voice Assistant Started")
93
- _LOGGER.info("=" * 50)
94
- _LOGGER.info("Name: %s", args.name)
95
- _LOGGER.info("ESPHome Server: %s:%s", args.host, args.port)
96
- _LOGGER.info("Camera Server: %s:%s", args.host, args.camera_port)
97
- _LOGGER.info("Motion control: enabled")
98
- _LOGGER.info("=" * 50)
99
- _LOGGER.info("Add this device in Home Assistant:")
100
- _LOGGER.info(" Settings -> Devices & Services -> Add Integration -> ESPHome")
101
- _LOGGER.info(" Enter: <this-device-ip>:%s", args.port)
102
- _LOGGER.info("=" * 50)
103
-
104
- # Wait for stop signal
105
- while not stop_event.is_set():
106
- await asyncio.sleep(0.5)
107
-
108
- except KeyboardInterrupt:
109
- _LOGGER.info("Shutting down...")
110
- finally:
111
- await service.stop()
112
- _LOGGER.info("Voice assistant stopped")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
 
115
  def run():
 
2
  """Main entry point for Reachy Mini for Home Assistant.
3
 
4
  This module provides a command-line interface for running the voice assistant
5
+ in standalone mode (without the ReachyMini App framework).
6
  """
7
 
8
  import argparse
 
10
  import logging
11
  import threading
12
 
 
 
13
  _LOGGER = logging.getLogger(__name__)
14
 
15
 
16
  async def main() -> None:
17
+ parser = argparse.ArgumentParser(
18
+ description="Reachy Mini for Home Assistant"
19
+ )
20
  parser.add_argument(
21
  "--name",
22
+ default="Reachy Mini",
23
+ help="Name of the voice assistant (default: Reachy Mini)",
24
  )
25
  parser.add_argument(
26
  "--host",
 
49
  action="store_true",
50
  help="Disable camera server",
51
  )
52
+ parser.add_argument(
53
+ "--no-motion",
54
+ action="store_true",
55
+ help="Disable Reachy Mini motion control",
56
+ )
57
  parser.add_argument(
58
  "--debug",
59
  action="store_true",
 
68
  format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
69
  )
70
 
71
+ # Initialize Reachy Mini (if available)
72
+ reachy_mini = None
73
+ if not args.no_motion:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  try:
75
+ from reachy_mini import ReachyMini
76
+ reachy_mini = ReachyMini()
77
+ _LOGGER.info("Reachy Mini connected")
78
+ except ImportError:
79
+ _LOGGER.warning("reachy-mini not installed, motion control disabled")
80
+ except Exception as e:
81
+ _LOGGER.warning("Failed to connect to Reachy Mini: %s", e)
82
+
83
+ # Import and create VoiceAssistantService
84
+ from .voice_assistant import VoiceAssistantService
85
+
86
+ service = VoiceAssistantService(
87
+ reachy_mini=reachy_mini,
88
+ name=args.name,
89
+ host=args.host,
90
+ port=args.port,
91
+ wake_model=args.wake_model,
92
+ camera_port=args.camera_port,
93
+ camera_enabled=not args.no_camera,
94
+ )
95
+
96
+ # Create stop event for graceful shutdown
97
+ stop_event = threading.Event()
98
+
99
+ try:
100
+ await service.start()
101
+
102
+ _LOGGER.info("=" * 50)
103
+ _LOGGER.info("Reachy Mini Voice Assistant Started")
104
+ _LOGGER.info("=" * 50)
105
+ _LOGGER.info("Name: %s", args.name)
106
+ _LOGGER.info("ESPHome Server: %s:%s", args.host, args.port)
107
+ _LOGGER.info("Camera Server: %s:%s", args.host, args.camera_port)
108
+ _LOGGER.info("Motion control: %s", "enabled" if reachy_mini else "disabled")
109
+ _LOGGER.info("=" * 50)
110
+ _LOGGER.info("Add this device in Home Assistant:")
111
+ _LOGGER.info(" Settings -> Devices & Services -> Add Integration -> ESPHome")
112
+ _LOGGER.info(" Enter: <this-device-ip>:%s", args.port)
113
+ _LOGGER.info("=" * 50)
114
+
115
+ # Wait for stop signal
116
+ while not stop_event.is_set():
117
+ await asyncio.sleep(0.5)
118
+
119
+ except KeyboardInterrupt:
120
+ _LOGGER.info("Shutting down...")
121
+ finally:
122
+ await service.stop()
123
+ _LOGGER.info("Voice assistant stopped")
124
 
125
 
126
  def run():
{reachy_mini_home_assistant/motion → reachy_mini_ha_voice}/animation_player.py RENAMED
@@ -8,6 +8,7 @@ on top of other movements. The speaking animation uses multi-frequency
8
  oscillators for more natural head sway.
9
  """
10
 
 
11
  import logging
12
  import math
13
  import random
@@ -15,20 +16,17 @@ import threading
15
  import time
16
  from dataclasses import dataclass
17
  from pathlib import Path
18
-
19
- from ..animations.animation_config import load_animation_config
20
 
21
  _LOGGER = logging.getLogger(__name__)
22
 
23
  _MODULE_DIR = Path(__file__).parent
24
- _PACKAGE_DIR = _MODULE_DIR.parent # reachy_mini_home_assistant/
25
- _ANIMATIONS_FILE = _PACKAGE_DIR / "animations" / "conversation_animations.json"
26
 
27
 
28
  @dataclass
29
  class AnimationParams:
30
  """Parameters for a single animation with per-axis frequencies."""
31
-
32
  name: str
33
  description: str
34
  # Position amplitudes (meters)
@@ -75,12 +73,12 @@ class AnimationPlayer:
75
  """
76
 
77
  def __init__(self):
78
- self._animations: dict[str, AnimationParams] = {}
79
  self._amplitude_scale: float = 1.0
80
  self._transition_duration: float = 0.3
81
- self._interpolation_duration: float = 0.2
82
- self._current_animation: str | None = None
83
- self._target_animation: str | None = None
84
  self._transition_start: float = 0.0
85
  self._phase_start: float = 0.0
86
  self._lock = threading.Lock()
@@ -94,25 +92,15 @@ class AnimationPlayer:
94
  # Interpolation state (for smooth transition to neutral before oscillation)
95
  self._in_interpolation: bool = False
96
  self._interpolation_start_time: float = 0.0
97
- self._interpolation_start_offsets: dict[str, float] = {
98
- "pitch": 0.0,
99
- "yaw": 0.0,
100
- "roll": 0.0,
101
- "x": 0.0,
102
- "y": 0.0,
103
- "z": 0.0,
104
- "antenna_left": 0.0,
105
- "antenna_right": 0.0,
106
  }
107
- self._last_offsets: dict[str, float] = {
108
- "pitch": 0.0,
109
- "yaw": 0.0,
110
- "roll": 0.0,
111
- "x": 0.0,
112
- "y": 0.0,
113
- "z": 0.0,
114
- "antenna_left": 0.0,
115
- "antenna_right": 0.0,
116
  }
117
  self._load_config()
118
 
@@ -122,7 +110,8 @@ class AnimationPlayer:
122
  _LOGGER.warning("Animations file not found: %s", _ANIMATIONS_FILE)
123
  return
124
  try:
125
- data = load_animation_config(_ANIMATIONS_FILE)
 
126
 
127
  settings = data.get("settings", {})
128
  self._amplitude_scale = settings.get("amplitude_scale", 1.0)
@@ -204,7 +193,7 @@ class AnimationPlayer:
204
  self._current_animation = None
205
  self._target_animation = None
206
 
207
- def get_offsets(self, dt: float = 0.0) -> dict[str, float]:
208
  """Calculate current animation offsets.
209
 
210
  Uses two-phase animation like BreathingMove in reference project:
@@ -232,14 +221,9 @@ class AnimationPlayer:
232
  # No animation
233
  if self._current_animation is None:
234
  result = {
235
- "pitch": 0.0,
236
- "yaw": 0.0,
237
- "roll": 0.0,
238
- "x": 0.0,
239
- "y": 0.0,
240
- "z": 0.0,
241
- "antenna_left": 0.0,
242
- "antenna_right": 0.0,
243
  }
244
  self._last_offsets = result.copy()
245
  return result
@@ -247,14 +231,9 @@ class AnimationPlayer:
247
  params = self._animations.get(self._current_animation)
248
  if params is None:
249
  result = {
250
- "pitch": 0.0,
251
- "yaw": 0.0,
252
- "roll": 0.0,
253
- "x": 0.0,
254
- "y": 0.0,
255
- "z": 0.0,
256
- "antenna_left": 0.0,
257
- "antenna_right": 0.0,
258
  }
259
  self._last_offsets = result.copy()
260
  return result
@@ -301,23 +280,29 @@ class AnimationPlayer:
301
  z_freq = params.z_frequency_hz if params.z_frequency_hz > 0 else base_freq
302
 
303
  # Calculate oscillations with per-axis frequencies and random phases
304
- pitch = params.pitch_offset_rad + params.pitch_amplitude_rad * math.sin(
305
- 2 * math.pi * pitch_freq * elapsed + self._phase_pitch
306
- )
307
 
308
- yaw = params.yaw_offset_rad + params.yaw_amplitude_rad * math.sin(
309
- 2 * math.pi * yaw_freq * elapsed + self._phase_yaw
310
- )
311
 
312
- roll = params.roll_offset_rad + params.roll_amplitude_rad * math.sin(
313
- 2 * math.pi * roll_freq * elapsed + self._phase_roll
314
- )
315
 
316
- x = params.x_offset_m + params.x_amplitude_m * math.sin(2 * math.pi * x_freq * elapsed + self._phase_x)
 
 
317
 
318
- y = params.y_offset_m + params.y_amplitude_m * math.sin(2 * math.pi * y_freq * elapsed + self._phase_y)
 
 
319
 
320
- z = params.z_offset_m + params.z_amplitude_m * math.sin(2 * math.pi * z_freq * elapsed + self._phase_z)
 
 
321
 
322
  # Antenna movement with its own frequency
323
  antenna_freq = params.antenna_frequency_hz if params.antenna_frequency_hz > 0 else base_freq
@@ -347,7 +332,7 @@ class AnimationPlayer:
347
  return result
348
 
349
  @property
350
- def current_animation(self) -> str | None:
351
  """Get the current animation name."""
352
  with self._lock:
353
  return self._current_animation
 
8
  oscillators for more natural head sway.
9
  """
10
 
11
+ import json
12
  import logging
13
  import math
14
  import random
 
16
  import time
17
  from dataclasses import dataclass
18
  from pathlib import Path
19
+ from typing import Dict, Optional
 
20
 
21
  _LOGGER = logging.getLogger(__name__)
22
 
23
  _MODULE_DIR = Path(__file__).parent
24
+ _ANIMATIONS_FILE = _MODULE_DIR / "animations" / "conversation_animations.json"
 
25
 
26
 
27
  @dataclass
28
  class AnimationParams:
29
  """Parameters for a single animation with per-axis frequencies."""
 
30
  name: str
31
  description: str
32
  # Position amplitudes (meters)
 
73
  """
74
 
75
  def __init__(self):
76
+ self._animations: Dict[str, AnimationParams] = {}
77
  self._amplitude_scale: float = 1.0
78
  self._transition_duration: float = 0.3
79
+ self._interpolation_duration: float = 1.0 # Time to interpolate to neutral (same as BreathingMove)
80
+ self._current_animation: Optional[str] = None
81
+ self._target_animation: Optional[str] = None
82
  self._transition_start: float = 0.0
83
  self._phase_start: float = 0.0
84
  self._lock = threading.Lock()
 
92
  # Interpolation state (for smooth transition to neutral before oscillation)
93
  self._in_interpolation: bool = False
94
  self._interpolation_start_time: float = 0.0
95
+ self._interpolation_start_offsets: Dict[str, float] = {
96
+ "pitch": 0.0, "yaw": 0.0, "roll": 0.0,
97
+ "x": 0.0, "y": 0.0, "z": 0.0,
98
+ "antenna_left": 0.0, "antenna_right": 0.0,
 
 
 
 
 
99
  }
100
+ self._last_offsets: Dict[str, float] = {
101
+ "pitch": 0.0, "yaw": 0.0, "roll": 0.0,
102
+ "x": 0.0, "y": 0.0, "z": 0.0,
103
+ "antenna_left": 0.0, "antenna_right": 0.0,
 
 
 
 
 
104
  }
105
  self._load_config()
106
 
 
110
  _LOGGER.warning("Animations file not found: %s", _ANIMATIONS_FILE)
111
  return
112
  try:
113
+ with open(_ANIMATIONS_FILE, "r", encoding="utf-8") as f:
114
+ data = json.load(f)
115
 
116
  settings = data.get("settings", {})
117
  self._amplitude_scale = settings.get("amplitude_scale", 1.0)
 
193
  self._current_animation = None
194
  self._target_animation = None
195
 
196
+ def get_offsets(self, dt: float = 0.0) -> Dict[str, float]:
197
  """Calculate current animation offsets.
198
 
199
  Uses two-phase animation like BreathingMove in reference project:
 
221
  # No animation
222
  if self._current_animation is None:
223
  result = {
224
+ "pitch": 0.0, "yaw": 0.0, "roll": 0.0,
225
+ "x": 0.0, "y": 0.0, "z": 0.0,
226
+ "antenna_left": 0.0, "antenna_right": 0.0,
 
 
 
 
 
227
  }
228
  self._last_offsets = result.copy()
229
  return result
 
231
  params = self._animations.get(self._current_animation)
232
  if params is None:
233
  result = {
234
+ "pitch": 0.0, "yaw": 0.0, "roll": 0.0,
235
+ "x": 0.0, "y": 0.0, "z": 0.0,
236
+ "antenna_left": 0.0, "antenna_right": 0.0,
 
 
 
 
 
237
  }
238
  self._last_offsets = result.copy()
239
  return result
 
280
  z_freq = params.z_frequency_hz if params.z_frequency_hz > 0 else base_freq
281
 
282
  # Calculate oscillations with per-axis frequencies and random phases
283
+ pitch = (params.pitch_offset_rad +
284
+ params.pitch_amplitude_rad *
285
+ math.sin(2 * math.pi * pitch_freq * elapsed + self._phase_pitch))
286
 
287
+ yaw = (params.yaw_offset_rad +
288
+ params.yaw_amplitude_rad *
289
+ math.sin(2 * math.pi * yaw_freq * elapsed + self._phase_yaw))
290
 
291
+ roll = (params.roll_offset_rad +
292
+ params.roll_amplitude_rad *
293
+ math.sin(2 * math.pi * roll_freq * elapsed + self._phase_roll))
294
 
295
+ x = (params.x_offset_m +
296
+ params.x_amplitude_m *
297
+ math.sin(2 * math.pi * x_freq * elapsed + self._phase_x))
298
 
299
+ y = (params.y_offset_m +
300
+ params.y_amplitude_m *
301
+ math.sin(2 * math.pi * y_freq * elapsed + self._phase_y))
302
 
303
+ z = (params.z_offset_m +
304
+ params.z_amplitude_m *
305
+ math.sin(2 * math.pi * z_freq * elapsed + self._phase_z))
306
 
307
  # Antenna movement with its own frequency
308
  antenna_freq = params.antenna_frequency_hz if params.antenna_frequency_hz > 0 else base_freq
 
332
  return result
333
 
334
  @property
335
+ def current_animation(self) -> Optional[str]:
336
  """Get the current animation name."""
337
  with self._lock:
338
  return self._current_animation
reachy_mini_ha_voice/animations/conversation_animations.json ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "animations": {
3
+ "idle": {
4
+ "description": "Breathing animation when idle - gentle z-axis movement and antenna sway (same as conversation_app BreathingMove)",
5
+ "z_amplitude_m": 0.005,
6
+ "z_frequency_hz": 0.1,
7
+ "antenna_amplitude_rad": 0.262,
8
+ "antenna_move_name": "wiggle",
9
+ "antenna_frequency_hz": 0.5,
10
+ "frequency_hz": 0.5
11
+ },
12
+ "listening": {
13
+ "description": "Attentive pose while listening to user - slight forward lean",
14
+ "pitch_offset_rad": -0.05,
15
+ "pitch_amplitude_rad": 0.03,
16
+ "z_amplitude_m": 0.003,
17
+ "antenna_amplitude_rad": 0.2,
18
+ "antenna_move_name": "both",
19
+ "frequency_hz": 0.6
20
+ },
21
+ "thinking": {
22
+ "description": "Processing/thinking animation - head tilted with gentle sway",
23
+ "roll_offset_rad": 0.08,
24
+ "pitch_amplitude_rad": 0.03,
25
+ "yaw_amplitude_rad": 0.05,
26
+ "roll_amplitude_rad": 0.04,
27
+ "z_amplitude_m": 0.003,
28
+ "antenna_amplitude_rad": 0.25,
29
+ "antenna_move_name": "wiggle",
30
+ "frequency_hz": 0.4
31
+ }
32
+ },
33
+ "emotions": {
34
+ "happy": {
35
+ "description": "Happy/joyful expression - bouncy head movement with excited antennas",
36
+ "duration": 2.0,
37
+ "pitch_amplitude": 0.15,
38
+ "z_amplitude": 0.015,
39
+ "antenna_left": 0.5,
40
+ "antenna_right": 0.5,
41
+ "frequency": 1.5
42
+ },
43
+ "sad": {
44
+ "description": "Sad/disappointed expression - drooping head and antennas",
45
+ "duration": 2.5,
46
+ "pitch": 0.15,
47
+ "z": -0.01,
48
+ "pitch_amplitude": 0.05,
49
+ "antenna_left": -0.2,
50
+ "antenna_right": -0.2,
51
+ "frequency": 0.3
52
+ },
53
+ "surprised": {
54
+ "description": "Surprised/shocked expression - head pulls back with alert antennas",
55
+ "duration": 1.5,
56
+ "pitch": -0.1,
57
+ "z": 0.01,
58
+ "antenna_left": 0.7,
59
+ "antenna_right": 0.7,
60
+ "frequency": 2.0
61
+ },
62
+ "confused": {
63
+ "description": "Confused/puzzled expression - head tilts with asymmetric antennas",
64
+ "duration": 2.0,
65
+ "roll": 0.12,
66
+ "yaw_amplitude": 0.15,
67
+ "antenna_left": 0.3,
68
+ "antenna_right": -0.3,
69
+ "frequency": 0.8
70
+ },
71
+ "excited": {
72
+ "description": "Excited/enthusiastic expression - energetic multi-axis movement",
73
+ "duration": 2.0,
74
+ "pitch_amplitude": 0.12,
75
+ "yaw_amplitude": 0.1,
76
+ "z_amplitude": 0.02,
77
+ "antenna_left": 0.6,
78
+ "antenna_right": 0.6,
79
+ "frequency": 2.0
80
+ },
81
+ "thinking_emotion": {
82
+ "description": "Thinking/pondering expression - slight tilt with slow movement",
83
+ "duration": 2.0,
84
+ "roll": 0.08,
85
+ "pitch": -0.05,
86
+ "yaw_amplitude": 0.08,
87
+ "antenna_left": 0.4,
88
+ "antenna_right": -0.2,
89
+ "frequency": 0.4
90
+ },
91
+ "nod": {
92
+ "description": "Nodding gesture - quick up-down head movement",
93
+ "duration": 0.8,
94
+ "pitch_amplitude": 0.2,
95
+ "frequency": 2.5
96
+ },
97
+ "shake": {
98
+ "description": "Shaking gesture - quick left-right head movement",
99
+ "duration": 0.8,
100
+ "yaw_amplitude": 0.25,
101
+ "frequency": 3.0
102
+ },
103
+ "curious": {
104
+ "description": "Curious/interested expression - head tilts forward with alert antennas",
105
+ "duration": 1.5,
106
+ "pitch": -0.08,
107
+ "roll": 0.1,
108
+ "antenna_left": 0.5,
109
+ "antenna_right": 0.3,
110
+ "frequency": 0.6
111
+ },
112
+ "sleepy": {
113
+ "description": "Sleepy/tired expression - slow drooping movement",
114
+ "duration": 3.0,
115
+ "pitch": 0.12,
116
+ "z": -0.015,
117
+ "pitch_amplitude": 0.03,
118
+ "antenna_left": -0.4,
119
+ "antenna_right": -0.4,
120
+ "frequency": 0.15
121
+ },
122
+ "angry": {
123
+ "description": "Angry/frustrated expression - intense forward lean with tense antennas",
124
+ "duration": 1.5,
125
+ "pitch": -0.12,
126
+ "roll_amplitude": 0.08,
127
+ "antenna_left": 0.8,
128
+ "antenna_right": 0.8,
129
+ "frequency": 1.8
130
+ },
131
+ "shy": {
132
+ "description": "Shy/embarrassed expression - head turns away slightly",
133
+ "duration": 2.0,
134
+ "yaw": 0.15,
135
+ "pitch": 0.08,
136
+ "roll": 0.05,
137
+ "antenna_left": -0.1,
138
+ "antenna_right": -0.1,
139
+ "frequency": 0.3
140
+ },
141
+ "love": {
142
+ "description": "Loving/affectionate expression - gentle swaying with happy antennas",
143
+ "duration": 2.5,
144
+ "yaw_amplitude": 0.08,
145
+ "pitch_amplitude": 0.06,
146
+ "z_amplitude": 0.008,
147
+ "antenna_left": 0.4,
148
+ "antenna_right": 0.4,
149
+ "frequency": 0.8
150
+ },
151
+ "bored": {
152
+ "description": "Bored/uninterested expression - slow side-to-side with droopy antennas",
153
+ "duration": 3.0,
154
+ "yaw_amplitude": 0.1,
155
+ "pitch": 0.05,
156
+ "antenna_left": -0.15,
157
+ "antenna_right": -0.15,
158
+ "frequency": 0.2
159
+ },
160
+ "alert": {
161
+ "description": "Alert/attentive expression - quick upward movement with perky antennas",
162
+ "duration": 1.0,
163
+ "pitch": -0.15,
164
+ "z": 0.015,
165
+ "antenna_left": 0.7,
166
+ "antenna_right": 0.7,
167
+ "frequency": 2.5
168
+ }
169
+ },
170
+ "settings": {
171
+ "amplitude_scale": 1.0,
172
+ "transition_duration_s": 0.3,
173
+ "default_emotion_duration": 2.0,
174
+ "default_emotion_frequency": 1.0
175
+ }
176
+ }
reachy_mini_ha_voice/animations/emotion_keywords.json ADDED
@@ -0,0 +1,424 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "description": "Emotion keyword detection for automatic emotion triggering. Maps text patterns to robot emotion names from pollen-robotics/reachy-mini-emotions-library.",
3
+ "keywords": {
4
+ "===== 开心/高兴 (Cheerful) =====": "cheerful1",
5
+ "太棒了": "cheerful1",
6
+ "太好了": "cheerful1",
7
+ "好开心": "cheerful1",
8
+ "真高兴": "cheerful1",
9
+ "开心": "cheerful1",
10
+ "高兴": "cheerful1",
11
+ "快乐": "cheerful1",
12
+ "棒": "cheerful1",
13
+ "恭喜": "cheerful1",
14
+ "真好": "cheerful1",
15
+ "nice": "cheerful1",
16
+ "great": "cheerful1",
17
+ "awesome": "cheerful1",
18
+ "wonderful": "cheerful1",
19
+ "excellent": "cheerful1",
20
+ "happy": "cheerful1",
21
+ "congratulations": "cheerful1",
22
+ "congrats": "cheerful1",
23
+ "good job": "cheerful1",
24
+ "well done": "cheerful1",
25
+
26
+ "===== 大笑 (Laughing) =====": "laughing1",
27
+ "哈哈": "laughing1",
28
+ "哈哈哈": "laughing1",
29
+ "嘿嘿": "laughing1",
30
+ "呵呵": "laughing1",
31
+ "笑死": "laughing1",
32
+ "太搞笑": "laughing1",
33
+ "好笑": "laughing1",
34
+ "笑了": "laughing1",
35
+ "haha": "laughing1",
36
+ "hehe": "laughing1",
37
+ "lol": "laughing1",
38
+ "lmao": "laughing1",
39
+ "rofl": "laughing1",
40
+ "funny": "laughing1",
41
+ "hilarious": "laughing1",
42
+
43
+ "===== 热情/兴奋 (Enthusiastic) =====": "enthusiastic1",
44
+ "太激动": "enthusiastic1",
45
+ "太兴奋": "enthusiastic1",
46
+ "兴奋": "enthusiastic1",
47
+ "激动": "enthusiastic1",
48
+ "耶": "enthusiastic1",
49
+ "太酷了": "enthusiastic1",
50
+ "酷": "enthusiastic1",
51
+ "fantastic": "enthusiastic1",
52
+ "excited": "enthusiastic1",
53
+ "exciting": "enthusiastic1",
54
+ "yay": "enthusiastic1",
55
+ "woohoo": "enthusiastic1",
56
+ "cool": "enthusiastic1",
57
+
58
+ "===== 惊讶/惊叹 (Amazed) =====": "amazed1",
59
+ "太神奇": "amazed1",
60
+ "神奇": "amazed1",
61
+ "不可思议": "amazed1",
62
+ "太厉害": "amazed1",
63
+ "厉害": "amazed1",
64
+ "牛": "amazed1",
65
+ "amazing": "amazed1",
66
+ "incredible": "amazed1",
67
+ "unbelievable": "amazed1",
68
+ "impressive": "amazed1",
69
+
70
+ "===== 惊讶 (Surprised) =====": "surprised1",
71
+ "哇": "surprised1",
72
+ "哇塞": "surprised1",
73
+ "真的吗": "surprised1",
74
+ "天啊": "surprised1",
75
+ "我的天": "surprised1",
76
+ "什么": "surprised1",
77
+ "啊": "surprised1",
78
+ "wow": "surprised1",
79
+ "really": "surprised1",
80
+ "oh my god": "surprised1",
81
+ "omg": "surprised1",
82
+ "seriously": "surprised1",
83
+ "no way": "surprised1",
84
+
85
+ "===== 爱/喜欢 (Loving) =====": "loving1",
86
+ "爱": "loving1",
87
+ "爱你": "loving1",
88
+ "喜欢": "loving1",
89
+ "太爱了": "loving1",
90
+ "可爱": "loving1",
91
+ "萌": "loving1",
92
+ "爱死了": "loving1",
93
+ "love": "loving1",
94
+ "lovely": "loving1",
95
+ "adore": "loving1",
96
+ "cute": "loving1",
97
+
98
+ "===== 感谢 (Grateful) =====": "grateful1",
99
+ "谢谢": "grateful1",
100
+ "感谢": "grateful1",
101
+ "多谢": "grateful1",
102
+ "太感谢": "grateful1",
103
+ "非常感谢": "grateful1",
104
+ "感激": "grateful1",
105
+ "thank": "grateful1",
106
+ "thanks": "grateful1",
107
+ "appreciate": "grateful1",
108
+ "grateful": "grateful1",
109
+
110
+ "===== 欢迎 (Welcoming) =====": "welcoming1",
111
+ "欢迎": "welcoming1",
112
+ "你好": "welcoming1",
113
+ "早上好": "welcoming1",
114
+ "下午好": "welcoming1",
115
+ "晚上好": "welcoming1",
116
+ "嗨": "welcoming1",
117
+ "welcome": "welcoming1",
118
+ "hello": "welcoming1",
119
+ "hi": "welcoming1",
120
+ "hey": "welcoming1",
121
+ "good morning": "welcoming1",
122
+ "good afternoon": "welcoming1",
123
+ "good evening": "welcoming1",
124
+
125
+ "===== 乐于助人 (Helpful) =====": "helpful1",
126
+ "当然": "helpful1",
127
+ "好的": "helpful1",
128
+ "没问题": "helpful1",
129
+ "可以": "helpful1",
130
+ "行": "helpful1",
131
+ "好啊": "helpful1",
132
+ "乐意": "helpful1",
133
+ "sure": "helpful1",
134
+ "of course": "helpful1",
135
+ "no problem": "helpful1",
136
+ "certainly": "helpful1",
137
+ "absolutely": "helpful1",
138
+ "glad to help": "helpful1",
139
+
140
+ "===== 好奇 (Curious) =====": "curious1",
141
+ "好奇": "curious1",
142
+ "有意思": "curious1",
143
+ "有趣": "curious1",
144
+ "奇怪": "curious1",
145
+ "怎么回事": "curious1",
146
+ "curious": "curious1",
147
+ "interesting": "curious1",
148
+ "intriguing": "curious1",
149
+ "wonder": "curious1",
150
+
151
+ "===== 思考 (Thoughtful) =====": "thoughtful1",
152
+ "嗯": "thoughtful1",
153
+ "让我想想": "thoughtful1",
154
+ "想一想": "thoughtful1",
155
+ "考虑": "thoughtful1",
156
+ "思考": "thoughtful1",
157
+ "想想看": "thoughtful1",
158
+ "我觉得": "thoughtful1",
159
+ "hmm": "thoughtful1",
160
+ "let me think": "thoughtful1",
161
+ "thinking": "thoughtful1",
162
+ "i think": "thoughtful1",
163
+ "perhaps": "thoughtful1",
164
+ "maybe": "thoughtful1",
165
+
166
+ "===== 理解 (Understanding) =====": "understanding1",
167
+ "明白": "understanding1",
168
+ "懂了": "understanding1",
169
+ "理解": "understanding1",
170
+ "原来如此": "understanding1",
171
+ "我明白了": "understanding1",
172
+ "i see": "understanding1",
173
+ "i understand": "understanding1",
174
+ "got it": "understanding1",
175
+ "makes sense": "understanding1",
176
+
177
+ "===== 骄傲/自豪 (Proud) =====": "proud1",
178
+ "骄傲": "proud1",
179
+ "自豪": "proud1",
180
+ "了不起": "proud1",
181
+ "真棒": "proud1",
182
+ "做得好": "proud1",
183
+ "proud": "proud1",
184
+ "bravo": "proud1",
185
+
186
+ "===== 成功 (Success) =====": "success1",
187
+ "成功": "success1",
188
+ "搞定": "success1",
189
+ "完成": "success1",
190
+ "做到了": "success1",
191
+ "success": "success1",
192
+ "done": "success1",
193
+ "finished": "success1",
194
+ "accomplished": "success1",
195
+
196
+ "===== 悲伤 (Sad) =====": "sad1",
197
+ "难过": "sad1",
198
+ "伤心": "sad1",
199
+ "悲伤": "sad1",
200
+ "可惜": "sad1",
201
+ "遗憾": "sad1",
202
+ "唉": "sad1",
203
+ "不幸": "sad1",
204
+ "sad": "sad1",
205
+ "unfortunately": "sad1",
206
+ "too bad": "sad1",
207
+ "pity": "sad1",
208
+
209
+ "===== 抱歉 (Sorry/Oops) =====": "oops1",
210
+ "抱歉": "oops1",
211
+ "对不起": "oops1",
212
+ "不好意思": "oops1",
213
+ "糟糕": "oops1",
214
+ "哎呀": "oops1",
215
+ "失误": "oops1",
216
+ "搞砸": "oops1",
217
+ "sorry": "oops1",
218
+ "oops": "oops1",
219
+ "my bad": "oops1",
220
+ "excuse me": "oops1",
221
+ "pardon": "oops1",
222
+
223
+ "===== 不确定 (Uncertain) =====": "uncertain1",
224
+ "不知道": "uncertain1",
225
+ "不确定": "uncertain1",
226
+ "不太清楚": "uncertain1",
227
+ "说不准": "uncertain1",
228
+ "不一定": "uncertain1",
229
+ "don't know": "uncertain1",
230
+ "not sure": "uncertain1",
231
+ "uncertain": "uncertain1",
232
+ "unsure": "uncertain1",
233
+ "i'm not sure": "uncertain1",
234
+
235
+ "===== 困惑 (Confused) =====": "confused1",
236
+ "困惑": "confused1",
237
+ "迷惑": "confused1",
238
+ "搞不懂": "confused1",
239
+ "看不懂": "confused1",
240
+ "不理解": "confused1",
241
+ "晕": "confused1",
242
+ "懵": "confused1",
243
+ "confused": "confused1",
244
+ "confusing": "confused1",
245
+ "puzzled": "confused1",
246
+ "what": "confused1",
247
+
248
+ "===== 害怕/恐惧 (Fear) =====": "fear1",
249
+ "害怕": "fear1",
250
+ "恐惧": "fear1",
251
+ "可怕": "fear1",
252
+ "吓人": "fear1",
253
+ "担心": "fear1",
254
+ "afraid": "fear1",
255
+ "scared": "fear1",
256
+ "fear": "fear1",
257
+ "scary": "fear1",
258
+ "frightening": "fear1",
259
+
260
+ "===== 焦虑 (Anxiety) =====": "anxiety1",
261
+ "焦虑": "anxiety1",
262
+ "紧张": "anxiety1",
263
+ "忐忑": "anxiety1",
264
+ "不安": "anxiety1",
265
+ "anxious": "anxiety1",
266
+ "nervous": "anxiety1",
267
+ "worried": "anxiety1",
268
+ "anxiety": "anxiety1",
269
+
270
+ "===== 愤怒 (Rage) =====": "rage1",
271
+ "生气": "rage1",
272
+ "愤怒": "rage1",
273
+ "气死": "rage1",
274
+ "太过分": "rage1",
275
+ "可恶": "rage1",
276
+ "angry": "rage1",
277
+ "mad": "rage1",
278
+ "rage": "rage1",
279
+ "outrageous": "rage1",
280
+
281
+ "===== 烦躁/不耐烦 (Irritated) =====": "irritated1",
282
+ "烦": "irritated1",
283
+ "烦躁": "irritated1",
284
+ "烦死了": "irritated1",
285
+ "讨厌": "irritated1",
286
+ "annoyed": "irritated1",
287
+ "irritated": "irritated1",
288
+ "annoying": "irritated1",
289
+
290
+ "===== 沮丧 (Frustrated) =====": "frustrated1",
291
+ "沮丧": "frustrated1",
292
+ "挫败": "frustrated1",
293
+ "郁闷": "frustrated1",
294
+ "心烦": "frustrated1",
295
+ "frustrated": "frustrated1",
296
+ "frustrating": "frustrated1",
297
+
298
+ "===== 厌恶 (Disgusted) =====": "disgusted1",
299
+ "恶心": "disgusted1",
300
+ "厌恶": "disgusted1",
301
+ "反感": "disgusted1",
302
+ "太恶心": "disgusted1",
303
+ "disgusted": "disgusted1",
304
+ "disgusting": "disgusted1",
305
+ "gross": "disgusted1",
306
+ "yuck": "disgusted1",
307
+ "eww": "disgusted1",
308
+
309
+ "===== 无聊 (Boredom) =====": "boredom1",
310
+ "无聊": "boredom1",
311
+ "好无聊": "boredom1",
312
+ "没意思": "boredom1",
313
+ "闷": "boredom1",
314
+ "bored": "boredom1",
315
+ "boring": "boredom1",
316
+
317
+ "===== 疲惫 (Tired) =====": "tired1",
318
+ "累": "tired1",
319
+ "好累": "tired1",
320
+ "累了": "tired1",
321
+ "疲惫": "tired1",
322
+ "困": "tired1",
323
+ "tired": "tired1",
324
+ "exhausted": "tired1",
325
+ "sleepy": "tired1",
326
+
327
+ "===== 困倦/睡眠 (Sleep) =====": "sleep1",
328
+ "想睡": "sleep1",
329
+ "睡觉": "sleep1",
330
+ "晚安": "sleep1",
331
+ "good night": "sleep1",
332
+ "sleep": "sleep1",
333
+ "nap": "sleep1",
334
+
335
+ "===== 害羞 (Shy) =====": "shy1",
336
+ "害羞": "shy1",
337
+ "不好意思": "shy1",
338
+ "脸红": "shy1",
339
+ "shy": "shy1",
340
+ "embarrassed": "shy1",
341
+ "blush": "shy1",
342
+
343
+ "===== 孤独 (Lonely) =====": "lonely1",
344
+ "孤独": "lonely1",
345
+ "寂寞": "lonely1",
346
+ "孤单": "lonely1",
347
+ "lonely": "lonely1",
348
+ "alone": "lonely1",
349
+
350
+ "===== 放松/释然 (Relief) =====": "relief1",
351
+ "放松": "relief1",
352
+ "松一口气": "relief1",
353
+ "终于": "relief1",
354
+ "幸好": "relief1",
355
+ "还好": "relief1",
356
+ "relief": "relief1",
357
+ "relieved": "relief1",
358
+ "finally": "relief1",
359
+ "phew": "relief1",
360
+
361
+ "===== 平静 (Serenity) =====": "serenity1",
362
+ "平静": "serenity1",
363
+ "安静": "serenity1",
364
+ "冷静": "serenity1",
365
+ "淡定": "serenity1",
366
+ "calm": "serenity1",
367
+ "peaceful": "serenity1",
368
+ "serene": "serenity1",
369
+ "relax": "serenity1",
370
+
371
+ "===== 肯定/是 (Yes) =====": "yes1",
372
+ "是的": "yes1",
373
+ "对": "yes1",
374
+ "没错": "yes1",
375
+ "正确": "yes1",
376
+ "嗯嗯": "yes1",
377
+ "是啊": "yes1",
378
+ "对的": "yes1",
379
+ "yes": "yes1",
380
+ "yeah": "yes1",
381
+ "yep": "yes1",
382
+ "correct": "yes1",
383
+ "right": "yes1",
384
+ "exactly": "yes1",
385
+
386
+ "===== 否定/不 (No) =====": "no1",
387
+ "不是": "no1",
388
+ "不行": "no1",
389
+ "不可以": "no1",
390
+ "不对": "no1",
391
+ "错": "no1",
392
+ "no": "no1",
393
+ "nope": "no1",
394
+ "wrong": "no1",
395
+ "incorrect": "no1",
396
+
397
+ "===== 过来 (Come) =====": "come1",
398
+ "过来": "come1",
399
+ "来": "come1",
400
+ "来吧": "come1",
401
+ "come": "come1",
402
+ "come here": "come1",
403
+
404
+ "===== 走开 (Go away) =====": "go_away1",
405
+ "走开": "go_away1",
406
+ "滚": "go_away1",
407
+ "走": "go_away1",
408
+ "go away": "go_away1",
409
+ "leave": "go_away1",
410
+ "get out": "go_away1",
411
+
412
+ "===== 跳舞 (Dance) =====": "dance1",
413
+ "跳舞": "dance1",
414
+ "舞蹈": "dance1",
415
+ "蹦迪": "dance1",
416
+ "dance": "dance1",
417
+ "dancing": "dance1"
418
+ },
419
+ "settings": {
420
+ "enabled": true,
421
+ "case_sensitive": false,
422
+ "max_emotions_per_response": 1
423
+ }
424
+ }
{reachy_mini_home_assistant/protocol → reachy_mini_ha_voice}/api_server.py RENAMED
@@ -4,7 +4,7 @@ import asyncio
4
  import logging
5
  from abc import abstractmethod
6
  from collections.abc import Iterable
7
- from typing import TYPE_CHECKING
8
 
9
  # pylint: disable=no-name-in-module
10
  from aioesphomeapi._frame_helper.packets import make_plain_text_packets
@@ -31,7 +31,7 @@ class APIServer(asyncio.Protocol):
31
 
32
  def __init__(self, name: str) -> None:
33
  self.name = name
34
- self._buffer: bytes | None = None
35
  self._buffer_len: int = 0
36
  self._pos: int = 0
37
  self._transport = None
@@ -41,77 +41,52 @@ class APIServer(asyncio.Protocol):
41
  def handle_message(self, msg: message.Message) -> Iterable[message.Message]:
42
  pass
43
 
44
- def on_authenticated(self) -> None:
45
- """Hook called after authentication succeeds."""
46
- return
47
-
48
  def process_packet(self, msg_type: int, packet_data: bytes) -> None:
49
- try:
50
- msg_class = MESSAGE_TYPE_TO_PROTO[msg_type]
51
- msg_inst = msg_class.FromString(packet_data)
52
- _LOGGER.debug("Received message: %s", msg_class.__name__)
53
-
54
- if isinstance(msg_inst, HelloRequest):
55
- _LOGGER.info("HelloRequest received, sending HelloResponse")
56
- self.send_messages(
57
- [
58
- HelloResponse(
59
- api_version_major=1,
60
- api_version_minor=10,
61
- name=self.name,
62
- )
63
- ]
64
- )
65
- return
66
 
67
- if isinstance(msg_inst, AuthenticationRequest):
68
- _LOGGER.info("AuthenticationRequest received, sending AuthenticationResponse")
69
- self.send_messages([AuthenticationResponse()])
70
- self.on_authenticated()
71
- elif isinstance(msg_inst, DisconnectRequest):
72
- self.send_messages([DisconnectResponse()])
73
- _LOGGER.debug("Disconnect requested")
74
- if self._transport:
75
- self._transport.close()
76
- self._transport = None
77
- self._writelines = None
78
- elif isinstance(msg_inst, PingRequest):
79
- self.send_messages([PingResponse()])
80
- elif msgs := self.handle_message(msg_inst):
81
- if isinstance(msgs, message.Message):
82
- msgs = [msgs]
83
- self.send_messages(msgs)
84
- except Exception:
85
- _LOGGER.exception("Unhandled ESPHome protocol error while processing message type %s", msg_type)
86
  if self._transport:
87
  self._transport.close()
88
  self._transport = None
89
  self._writelines = None
90
-
91
- def send_messages(self, msgs: list[message.Message]):
 
 
 
 
 
 
92
  if self._writelines is None:
93
  return
94
 
95
- try:
96
- packets = [(PROTO_TO_MESSAGE_TYPE[msg.__class__], msg.SerializeToString()) for msg in msgs]
97
- packet_bytes = make_plain_text_packets(packets)
98
- self._writelines(packet_bytes)
99
- except (IndexError, OSError, BrokenPipeError, ConnectionResetError) as e:
100
- _LOGGER.warning(
101
- "Error sending message (%s): %s - connection may be lost",
102
- msgs[0].__class__.__name__ if msgs else "unknown",
103
- e,
104
- )
105
- # Mark transport as invalid to prevent further writes
106
- self._writelines = None
107
- if self._transport:
108
- self._transport.close()
109
- self._transport = None
110
 
111
  def connection_made(self, transport) -> None:
112
  self._transport = transport
113
  self._writelines = transport.writelines
114
- _LOGGER.info("ESPHome client connected from %s", transport.get_extra_info("peername"))
115
 
116
  def data_received(self, data: bytes):
117
  if self._buffer is None:
@@ -165,13 +140,8 @@ class APIServer(asyncio.Protocol):
165
  return cstr[original_pos:new_pos]
166
 
167
  def connection_lost(self, exc):
168
- _LOGGER.info("ESPHome client disconnected: %s", exc)
169
  self._transport = None
170
  self._writelines = None
171
- # Clear buffer to prevent memory leak
172
- self._buffer = None
173
- self._buffer_len = 0
174
- self._pos = 0
175
 
176
  def _read_varuint(self) -> int:
177
  """Read a varuint from the buffer or -1 if the buffer runs out of bytes."""
 
4
  import logging
5
  from abc import abstractmethod
6
  from collections.abc import Iterable
7
+ from typing import TYPE_CHECKING, List, Optional
8
 
9
  # pylint: disable=no-name-in-module
10
  from aioesphomeapi._frame_helper.packets import make_plain_text_packets
 
31
 
32
  def __init__(self, name: str) -> None:
33
  self.name = name
34
+ self._buffer: Optional[bytes] = None
35
  self._buffer_len: int = 0
36
  self._pos: int = 0
37
  self._transport = None
 
41
  def handle_message(self, msg: message.Message) -> Iterable[message.Message]:
42
  pass
43
 
 
 
 
 
44
  def process_packet(self, msg_type: int, packet_data: bytes) -> None:
45
+ msg_class = MESSAGE_TYPE_TO_PROTO[msg_type]
46
+ msg_inst = msg_class.FromString(packet_data)
47
+
48
+ if isinstance(msg_inst, HelloRequest):
49
+ self.send_messages(
50
+ [
51
+ HelloResponse(
52
+ api_version_major=1,
53
+ api_version_minor=10,
54
+ name=self.name,
55
+ )
56
+ ]
57
+ )
58
+ return
 
 
 
59
 
60
+ if isinstance(msg_inst, AuthenticationRequest):
61
+ self.send_messages([AuthenticationResponse()])
62
+ elif isinstance(msg_inst, DisconnectRequest):
63
+ self.send_messages([DisconnectResponse()])
64
+ _LOGGER.debug("Disconnect requested")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  if self._transport:
66
  self._transport.close()
67
  self._transport = None
68
  self._writelines = None
69
+ elif isinstance(msg_inst, PingRequest):
70
+ self.send_messages([PingResponse()])
71
+ elif msgs := self.handle_message(msg_inst):
72
+ if isinstance(msgs, message.Message):
73
+ msgs = [msgs]
74
+ self.send_messages(msgs)
75
+
76
+ def send_messages(self, msgs: List[message.Message]):
77
  if self._writelines is None:
78
  return
79
 
80
+ packets = [
81
+ (PROTO_TO_MESSAGE_TYPE[msg.__class__], msg.SerializeToString())
82
+ for msg in msgs
83
+ ]
84
+ packet_bytes = make_plain_text_packets(packets)
85
+ self._writelines(packet_bytes)
 
 
 
 
 
 
 
 
 
86
 
87
  def connection_made(self, transport) -> None:
88
  self._transport = transport
89
  self._writelines = transport.writelines
 
90
 
91
  def data_received(self, data: bytes):
92
  if self._buffer is None:
 
140
  return cstr[original_pos:new_pos]
141
 
142
  def connection_lost(self, exc):
 
143
  self._transport = None
144
  self._writelines = None
 
 
 
 
145
 
146
  def _read_varuint(self) -> int:
147
  """Read a varuint from the buffer or -1 if the buffer runs out of bytes."""
reachy_mini_ha_voice/audio_player.py ADDED
@@ -0,0 +1,589 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Audio player using Reachy Mini's media system with automatic Sendspin support.
2
+
3
+ Sendspin integration allows synchronized multi-room audio playback through
4
+ a Sendspin server. Reachy Mini connects as a PLAYER to receive audio streams
5
+ from Home Assistant or other Sendspin controllers.
6
+
7
+ Sendspin is automatically enabled by default - no user configuration needed.
8
+ The system uses mDNS to discover Sendspin servers on the local network.
9
+ """
10
+
11
+ import hashlib
12
+ import logging
13
+ import socket
14
+ import threading
15
+ import time
16
+ from collections.abc import Callable
17
+ from typing import List, Optional, TYPE_CHECKING, Union
18
+
19
+ if TYPE_CHECKING:
20
+ from .zeroconf import SendspinDiscovery
21
+
22
+ _LOGGER = logging.getLogger(__name__)
23
+
24
+ # Movement latency to sync head motion with audio playback
25
+ # Audio playback has hardware buffer latency, so we delay head motion to match
26
+ # Same as reachy_mini_conversation_app's HeadWobbler.MOVEMENT_LATENCY_S
27
+ MOVEMENT_LATENCY_S = 0.2 # 200ms latency between audio start and head movement
28
+
29
+ # Check if aiosendspin is available
30
+ try:
31
+ from aiosendspin.client import SendspinClient, PCMFormat
32
+ from aiosendspin.models.types import Roles, AudioCodec, PlayerCommand
33
+ from aiosendspin.models.player import ClientHelloPlayerSupport, SupportedAudioFormat
34
+ from aiosendspin.models.core import StreamStartMessage
35
+ SENDSPIN_AVAILABLE = True
36
+ except ImportError:
37
+ SENDSPIN_AVAILABLE = False
38
+ _LOGGER.debug("aiosendspin not installed, Sendspin support disabled")
39
+
40
+
41
+ def _get_stable_client_id() -> str:
42
+ """Generate a stable client ID based on machine identity.
43
+
44
+ Uses hostname and MAC address to create a consistent ID across restarts.
45
+ """
46
+ try:
47
+ hostname = socket.gethostname()
48
+ # Create a hash of hostname for stability
49
+ hash_input = f"reachy-mini-{hostname}"
50
+ return hashlib.sha256(hash_input.encode()).hexdigest()[:16]
51
+ except Exception:
52
+ return "reachy-mini-default"
53
+
54
+
55
+ class AudioPlayer:
56
+ """Audio player using Reachy Mini's media system with automatic Sendspin support.
57
+
58
+ Supports audio playback modes:
59
+ 1. Reachy Mini's built-in media system (default)
60
+ 2. Sendspin synchronized multi-room playback (as PLAYER - receives audio)
61
+ 3. Sounddevice fallback (when Reachy Mini not available)
62
+
63
+ When connected to Sendspin as a PLAYER, Reachy Mini receives audio streams
64
+ from Home Assistant or other controllers for synchronized playback.
65
+ """
66
+
67
+ def __init__(self, reachy_mini=None) -> None:
68
+ """Initialize audio player.
69
+
70
+ Args:
71
+ reachy_mini: Reachy Mini SDK instance.
72
+ """
73
+ self.reachy_mini = reachy_mini
74
+ self.is_playing = False
75
+ self._playlist: List[str] = []
76
+ self._done_callback: Optional[Callable[[], None]] = None
77
+ self._done_callback_lock = threading.Lock()
78
+ self._duck_volume: float = 0.5
79
+ self._unduck_volume: float = 1.0
80
+ self._current_volume: float = 1.0
81
+ self._stop_flag = threading.Event()
82
+
83
+ # Speech sway callback for audio-driven head motion
84
+ self._sway_callback: Optional[Callable[[dict], None]] = None
85
+
86
+ # Sendspin support (auto-enabled via mDNS discovery)
87
+ # Uses stable client_id so HA recognizes the same device after restart
88
+ self._sendspin_client_id = _get_stable_client_id()
89
+ self._sendspin_client: Optional["SendspinClient"] = None
90
+ self._sendspin_enabled = False
91
+ self._sendspin_url: Optional[str] = None
92
+ self._sendspin_discovery: Optional["SendspinDiscovery"] = None
93
+ self._sendspin_unsubscribers: List[Callable] = []
94
+
95
+ # Audio buffer for Sendspin playback
96
+ self._sendspin_audio_format: Optional["PCMFormat"] = None
97
+ self._sendspin_playback_started = False
98
+ self._sendspin_paused = False # Pause Sendspin when voice assistant is active
99
+
100
+ def set_sway_callback(self, callback: Optional[Callable[[dict], None]]) -> None:
101
+ """Set callback for speech-driven sway animation.
102
+
103
+ Args:
104
+ callback: Function called with sway dict containing
105
+ pitch_rad, yaw_rad, roll_rad, x_m, y_m, z_m
106
+ """
107
+ self._sway_callback = callback
108
+
109
+ def set_reachy_mini(self, reachy_mini) -> None:
110
+ """Set the Reachy Mini instance."""
111
+ self.reachy_mini = reachy_mini
112
+
113
+ # ========== Sendspin Integration (Auto-enabled via mDNS) ==========
114
+
115
+ @property
116
+ def sendspin_available(self) -> bool:
117
+ """Check if Sendspin library is available."""
118
+ return SENDSPIN_AVAILABLE
119
+
120
+ @property
121
+ def sendspin_enabled(self) -> bool:
122
+ """Check if Sendspin output is enabled and connected."""
123
+ return self._sendspin_enabled and self._sendspin_client is not None
124
+
125
+ @property
126
+ def sendspin_url(self) -> Optional[str]:
127
+ """Get current Sendspin server URL."""
128
+ return self._sendspin_url
129
+
130
+ def pause_sendspin(self) -> None:
131
+ """Pause Sendspin audio playback.
132
+
133
+ Called when voice assistant is activated to prevent audio conflicts.
134
+ Incoming Sendspin audio chunks will be dropped until resumed.
135
+ """
136
+ if self._sendspin_paused:
137
+ return
138
+ self._sendspin_paused = True
139
+ _LOGGER.debug("Sendspin audio paused (voice assistant active)")
140
+
141
+ def resume_sendspin(self) -> None:
142
+ """Resume Sendspin audio playback.
143
+
144
+ Called when voice assistant returns to idle state.
145
+ """
146
+ if not self._sendspin_paused:
147
+ return
148
+ self._sendspin_paused = False
149
+ self._logged_resample = False # Reset resample log flag for new stream
150
+ _LOGGER.debug("Sendspin audio resumed")
151
+
152
+ async def start_sendspin_discovery(self) -> None:
153
+ """Start mDNS discovery for Sendspin servers.
154
+
155
+ This runs in the background and automatically connects when a server is found.
156
+ Called automatically during voice assistant startup.
157
+ """
158
+ if not SENDSPIN_AVAILABLE:
159
+ _LOGGER.debug("aiosendspin not installed, skipping Sendspin discovery")
160
+ return
161
+
162
+ if self._sendspin_discovery is not None and self._sendspin_discovery.is_running:
163
+ _LOGGER.debug("Sendspin discovery already running")
164
+ return
165
+
166
+ # Import here to avoid circular imports
167
+ from .zeroconf import SendspinDiscovery
168
+
169
+ _LOGGER.info("Starting Sendspin server discovery...")
170
+ self._sendspin_discovery = SendspinDiscovery(self._on_sendspin_server_found)
171
+ await self._sendspin_discovery.start()
172
+
173
+ async def _on_sendspin_server_found(self, server_url: str) -> None:
174
+ """Callback when a Sendspin server is discovered via mDNS.
175
+
176
+ Args:
177
+ server_url: WebSocket URL of the discovered server.
178
+ """
179
+ await self._connect_to_server(server_url)
180
+
181
+ async def _connect_to_server(self, server_url: str) -> bool:
182
+ """Connect to a discovered Sendspin server as PLAYER.
183
+
184
+ Args:
185
+ server_url: WebSocket URL of the Sendspin server.
186
+
187
+ Returns:
188
+ True if connected successfully.
189
+ """
190
+ if not SENDSPIN_AVAILABLE:
191
+ return False
192
+
193
+ # Already connected to this server
194
+ if self._sendspin_enabled and self._sendspin_url == server_url:
195
+ return True
196
+
197
+ # Disconnect from previous server if any
198
+ if self._sendspin_client is not None:
199
+ await self._disconnect_sendspin()
200
+
201
+ try:
202
+ # Use stable client_id so HA recognizes the same device after restart
203
+ # Configure player support with audio formats
204
+ # Prioritize 16kHz since ReSpeaker hardware only supports 16kHz output
205
+ # Higher sample rates will be resampled down, causing quality loss
206
+ player_support = ClientHelloPlayerSupport(
207
+ supported_formats=[
208
+ # Prefer 16kHz (native ReSpeaker sample rate - no resampling needed)
209
+ SupportedAudioFormat(
210
+ codec=AudioCodec.PCM, channels=2, sample_rate=16000, bit_depth=16
211
+ ),
212
+ SupportedAudioFormat(
213
+ codec=AudioCodec.PCM, channels=1, sample_rate=16000, bit_depth=16
214
+ ),
215
+ # Also support higher sample rates (will be resampled to 16kHz)
216
+ SupportedAudioFormat(
217
+ codec=AudioCodec.PCM, channels=2, sample_rate=48000, bit_depth=16
218
+ ),
219
+ SupportedAudioFormat(
220
+ codec=AudioCodec.PCM, channels=2, sample_rate=44100, bit_depth=16
221
+ ),
222
+ SupportedAudioFormat(
223
+ codec=AudioCodec.PCM, channels=1, sample_rate=48000, bit_depth=16
224
+ ),
225
+ SupportedAudioFormat(
226
+ codec=AudioCodec.PCM, channels=1, sample_rate=44100, bit_depth=16
227
+ ),
228
+ ],
229
+ buffer_capacity=32_000_000,
230
+ supported_commands=[PlayerCommand.VOLUME, PlayerCommand.MUTE],
231
+ )
232
+
233
+ self._sendspin_client = SendspinClient(
234
+ client_id=self._sendspin_client_id,
235
+ client_name="Reachy Mini",
236
+ roles=[Roles.PLAYER], # PLAYER role to receive audio
237
+ player_support=player_support,
238
+ )
239
+
240
+ await self._sendspin_client.connect(server_url)
241
+
242
+ # Register audio listeners
243
+ self._sendspin_unsubscribers = [
244
+ self._sendspin_client.add_audio_chunk_listener(self._on_sendspin_audio_chunk),
245
+ self._sendspin_client.add_stream_start_listener(self._on_sendspin_stream_start),
246
+ self._sendspin_client.add_stream_end_listener(self._on_sendspin_stream_end),
247
+ self._sendspin_client.add_stream_clear_listener(self._on_sendspin_stream_clear),
248
+ ]
249
+
250
+ self._sendspin_url = server_url
251
+ self._sendspin_enabled = True
252
+
253
+ _LOGGER.info("Sendspin connected as PLAYER: %s (client_id=%s)",
254
+ server_url, self._sendspin_client_id)
255
+ return True
256
+
257
+ except Exception as e:
258
+ _LOGGER.warning("Failed to connect to Sendspin server %s: %s", server_url, e)
259
+ self._sendspin_client = None
260
+ self._sendspin_enabled = False
261
+ return False
262
+
263
+ def _on_sendspin_audio_chunk(self, server_timestamp_us: int, audio_data: bytes, fmt: "PCMFormat") -> None:
264
+ """Handle incoming audio chunks from Sendspin server.
265
+
266
+ Plays the audio through Reachy Mini's speaker using push_audio_sample().
267
+ Resamples audio if needed (Reachy Mini uses 16kHz).
268
+
269
+ Note: Audio is dropped when Sendspin is paused (e.g., during voice assistant interaction).
270
+ """
271
+ if self.reachy_mini is None:
272
+ return
273
+
274
+ # Drop audio when paused (voice assistant is active)
275
+ if self._sendspin_paused:
276
+ return
277
+
278
+ try:
279
+ # Store format for potential use
280
+ self._sendspin_audio_format = fmt
281
+
282
+ import numpy as np
283
+
284
+ # Convert bytes to numpy array based on format
285
+ if fmt.bit_depth == 16:
286
+ dtype = np.int16
287
+ max_val = 32768.0
288
+ elif fmt.bit_depth == 32:
289
+ dtype = np.int32
290
+ max_val = 2147483648.0
291
+ else:
292
+ dtype = np.int16
293
+ max_val = 32768.0
294
+
295
+ audio_array = np.frombuffer(audio_data, dtype=dtype)
296
+
297
+ # Convert to float32 for playback (SDK expects float32)
298
+ audio_float = audio_array.astype(np.float32) / max_val
299
+
300
+ # Reshape for channels if needed
301
+ if fmt.channels > 1:
302
+ # Reshape to (samples, channels)
303
+ audio_float = audio_float.reshape(-1, fmt.channels)
304
+ else:
305
+ # Mono: reshape to (samples, 1)
306
+ audio_float = audio_float.reshape(-1, 1)
307
+
308
+ # Resample if needed (ReSpeaker hardware only supports 16kHz)
309
+ target_sample_rate = self.reachy_mini.media.get_output_audio_samplerate()
310
+ if fmt.sample_rate != target_sample_rate and target_sample_rate > 0:
311
+ import scipy.signal
312
+ # Calculate new length
313
+ new_length = int(len(audio_float) * target_sample_rate / fmt.sample_rate)
314
+ if new_length > 0:
315
+ audio_float = scipy.signal.resample(audio_float, new_length, axis=0)
316
+ # Log resampling only once per stream
317
+ if not hasattr(self, '_logged_resample') or not self._logged_resample:
318
+ _LOGGER.debug("Resampling Sendspin audio: %d Hz -> %d Hz",
319
+ fmt.sample_rate, target_sample_rate)
320
+ self._logged_resample = True
321
+
322
+ # Apply volume
323
+ audio_float = audio_float * self._current_volume
324
+
325
+ # Ensure media playback is started
326
+ if not self._sendspin_playback_started:
327
+ try:
328
+ self.reachy_mini.media.start_playing()
329
+ self._sendspin_playback_started = True
330
+ _LOGGER.info("Started media playback for Sendspin audio (target: %d Hz)", target_sample_rate)
331
+ except Exception as e:
332
+ _LOGGER.warning("Failed to start media playback: %s", e)
333
+
334
+ # Play through Reachy Mini's media system using push_audio_sample
335
+ self.reachy_mini.media.push_audio_sample(audio_float)
336
+
337
+ except Exception as e:
338
+ _LOGGER.debug("Error playing Sendspin audio: %s", e)
339
+
340
+ def _on_sendspin_stream_start(self, message: "StreamStartMessage") -> None:
341
+ """Handle stream start from Sendspin server."""
342
+ _LOGGER.debug("Sendspin stream started")
343
+ # No need to clear buffer - just start fresh
344
+
345
+ def _on_sendspin_stream_end(self, roles: Optional[List[Roles]]) -> None:
346
+ """Handle stream end from Sendspin server."""
347
+ if roles is None or Roles.PLAYER in roles:
348
+ _LOGGER.debug("Sendspin stream ended")
349
+
350
+ def _on_sendspin_stream_clear(self, roles: Optional[List[Roles]]) -> None:
351
+ """Handle stream clear from Sendspin server."""
352
+ if roles is None or Roles.PLAYER in roles:
353
+ _LOGGER.debug("Sendspin stream cleared")
354
+ if self.reachy_mini is not None:
355
+ try:
356
+ self.reachy_mini.media.stop_playing()
357
+ self._sendspin_playback_started = False
358
+ except Exception:
359
+ pass
360
+
361
+ async def _disconnect_sendspin(self) -> None:
362
+ """Disconnect from current Sendspin server."""
363
+ # Unsubscribe from listeners
364
+ for unsub in self._sendspin_unsubscribers:
365
+ try:
366
+ unsub()
367
+ except Exception:
368
+ pass
369
+ self._sendspin_unsubscribers.clear()
370
+
371
+ if self._sendspin_client is not None:
372
+ try:
373
+ await self._sendspin_client.disconnect()
374
+ except Exception as e:
375
+ _LOGGER.debug("Error disconnecting from Sendspin: %s", e)
376
+ self._sendspin_client = None
377
+
378
+ self._sendspin_enabled = False
379
+ self._sendspin_url = None
380
+ self._sendspin_audio_format = None
381
+
382
+ async def stop_sendspin(self) -> None:
383
+ """Stop Sendspin discovery and disconnect from server."""
384
+ # Stop discovery
385
+ if self._sendspin_discovery is not None:
386
+ await self._sendspin_discovery.stop()
387
+ self._sendspin_discovery = None
388
+
389
+ # Disconnect from server
390
+ await self._disconnect_sendspin()
391
+
392
+ _LOGGER.info("Sendspin stopped")
393
+
394
+ # ========== Core Playback Methods ==========
395
+
396
+ def play(
397
+ self,
398
+ url: Union[str, List[str]],
399
+ done_callback: Optional[Callable[[], None]] = None,
400
+ stop_first: bool = True,
401
+ ) -> None:
402
+ """Play audio from URL(s).
403
+
404
+ Args:
405
+ url: Single URL or list of URLs to play.
406
+ done_callback: Called when playback finishes.
407
+ stop_first: Stop current playback before starting new.
408
+ """
409
+ if stop_first:
410
+ self.stop()
411
+
412
+ if isinstance(url, str):
413
+ self._playlist = [url]
414
+ else:
415
+ self._playlist = list(url)
416
+
417
+ self._done_callback = done_callback
418
+ self._stop_flag.clear()
419
+ self._play_next()
420
+
421
+ def _play_next(self) -> None:
422
+ """Play next item in playlist."""
423
+ if not self._playlist or self._stop_flag.is_set():
424
+ self._on_playback_finished()
425
+ return
426
+
427
+ next_url = self._playlist.pop(0)
428
+ _LOGGER.debug("Playing %s", next_url)
429
+ self.is_playing = True
430
+
431
+ # Start playback in a thread
432
+ thread = threading.Thread(target=self._play_file, args=(next_url,), daemon=True)
433
+ thread.start()
434
+
435
+ def _play_file(self, file_path: str) -> None:
436
+ """Play an audio file with optional speech-driven sway animation."""
437
+ try:
438
+ # Handle URLs - download first
439
+ if file_path.startswith(("http://", "https://")):
440
+ import urllib.request
441
+ import tempfile
442
+
443
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
444
+ urllib.request.urlretrieve(file_path, tmp.name)
445
+ file_path = tmp.name
446
+
447
+ if self._stop_flag.is_set():
448
+ return
449
+
450
+ # Play locally using Reachy Mini's media system
451
+ if self.reachy_mini is not None:
452
+ try:
453
+ # Read audio data for duration calculation and sway analysis
454
+ import soundfile as sf
455
+ data, sample_rate = sf.read(file_path)
456
+ duration = len(data) / sample_rate
457
+
458
+ # Pre-analyze audio for speech sway if callback is set
459
+ sway_frames = []
460
+ if self._sway_callback is not None:
461
+ from .speech_sway import SpeechSwayRT
462
+ sway = SpeechSwayRT()
463
+ sway_frames = sway.feed(data, sample_rate)
464
+ _LOGGER.debug("Generated %d sway frames for %.2fs audio",
465
+ len(sway_frames), duration)
466
+
467
+ # Start playback
468
+ self.reachy_mini.media.play_sound(file_path)
469
+
470
+ # Playback loop with sway animation
471
+ # Apply MOVEMENT_LATENCY_S delay to sync head motion with audio
472
+ # (audio playback has hardware buffer latency)
473
+ start_time = time.time()
474
+ frame_duration = 0.05 # 50ms per sway frame (HOP_MS)
475
+ frame_idx = 0
476
+
477
+ while time.time() - start_time < duration:
478
+ if self._stop_flag.is_set():
479
+ self.reachy_mini.media.stop_playing()
480
+ break
481
+
482
+ # Apply sway frame if available, with 200ms delay
483
+ if self._sway_callback and frame_idx < len(sway_frames):
484
+ elapsed = time.time() - start_time
485
+ # Apply latency: head motion starts MOVEMENT_LATENCY_S after audio
486
+ effective_elapsed = max(0, elapsed - MOVEMENT_LATENCY_S)
487
+ target_frame = int(effective_elapsed / frame_duration)
488
+
489
+ # Skip frames if falling behind (lag compensation)
490
+ while frame_idx <= target_frame and frame_idx < len(sway_frames):
491
+ self._sway_callback(sway_frames[frame_idx])
492
+ frame_idx += 1
493
+
494
+ time.sleep(0.02) # 20ms sleep for responsive sway
495
+
496
+ # Reset sway to zero when done
497
+ if self._sway_callback:
498
+ self._sway_callback({
499
+ "pitch_rad": 0.0, "yaw_rad": 0.0, "roll_rad": 0.0,
500
+ "x_m": 0.0, "y_m": 0.0, "z_m": 0.0,
501
+ })
502
+
503
+ except Exception as e:
504
+ _LOGGER.warning("Reachy Mini audio failed, falling back: %s", e)
505
+ self._play_file_fallback(file_path)
506
+ else:
507
+ self._play_file_fallback(file_path)
508
+
509
+ except Exception as e:
510
+ _LOGGER.error("Error playing audio: %s", e)
511
+ finally:
512
+ self.is_playing = False
513
+ if self._playlist and not self._stop_flag.is_set():
514
+ self._play_next()
515
+ else:
516
+ self._on_playback_finished()
517
+
518
+ def _play_file_fallback(self, file_path: str) -> None:
519
+ """Fallback to sounddevice for audio playback."""
520
+ import sounddevice as sd
521
+ import soundfile as sf
522
+
523
+ data, samplerate = sf.read(file_path)
524
+ data = data * self._current_volume
525
+
526
+ if not self._stop_flag.is_set():
527
+ sd.play(data, samplerate)
528
+ sd.wait()
529
+
530
+ def _on_playback_finished(self) -> None:
531
+ """Called when playback is finished."""
532
+ self.is_playing = False
533
+ todo_callback: Optional[Callable[[], None]] = None
534
+
535
+ with self._done_callback_lock:
536
+ if self._done_callback:
537
+ todo_callback = self._done_callback
538
+ self._done_callback = None
539
+
540
+ if todo_callback:
541
+ try:
542
+ todo_callback()
543
+ except Exception:
544
+ _LOGGER.exception("Unexpected error running done callback")
545
+
546
+ def pause(self) -> None:
547
+ """Pause playback.
548
+
549
+ Stops current audio output but preserves playlist for resume.
550
+ """
551
+ self._stop_flag.set()
552
+ if self.reachy_mini is not None:
553
+ try:
554
+ self.reachy_mini.media.stop_playing()
555
+ except Exception:
556
+ pass
557
+ self.is_playing = False
558
+
559
+ def resume(self) -> None:
560
+ """Resume playback from where it was paused."""
561
+ self._stop_flag.clear()
562
+ if self._playlist:
563
+ self._play_next()
564
+
565
+ def stop(self) -> None:
566
+ """Stop playback and clear playlist."""
567
+ self._stop_flag.set()
568
+ if self.reachy_mini is not None:
569
+ try:
570
+ self.reachy_mini.media.stop_playing()
571
+ except Exception:
572
+ pass
573
+ self._playlist.clear()
574
+ self.is_playing = False
575
+
576
+ def duck(self) -> None:
577
+ """Reduce volume for announcements."""
578
+ self._current_volume = self._duck_volume
579
+
580
+ def unduck(self) -> None:
581
+ """Restore normal volume."""
582
+ self._current_volume = self._unduck_volume
583
+
584
+ def set_volume(self, volume: int) -> None:
585
+ """Set volume level (0-100)."""
586
+ volume = max(0, min(100, volume))
587
+ self._unduck_volume = volume / 100.0
588
+ self._duck_volume = self._unduck_volume / 2
589
+ self._current_volume = self._unduck_volume
reachy_mini_ha_voice/camera_server.py ADDED
@@ -0,0 +1,877 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ MJPEG Camera Server for Reachy Mini with Face Tracking.
3
+
4
+ This module provides an HTTP server that streams camera frames from Reachy Mini
5
+ as MJPEG, which can be integrated with Home Assistant via Generic Camera.
6
+ Also provides face tracking for head movement control.
7
+
8
+ Reference: reachy_mini_conversation_app/src/reachy_mini_conversation_app/camera_worker.py
9
+ """
10
+
11
+ import asyncio
12
+ import logging
13
+ import threading
14
+ import time
15
+ from typing import Optional, Tuple, List, TYPE_CHECKING
16
+
17
+ import cv2
18
+ import numpy as np
19
+ from scipy.spatial.transform import Rotation as R
20
+
21
+ # Import SDK interpolation utilities (same as conversation_app)
22
+ try:
23
+ from reachy_mini.utils.interpolation import linear_pose_interpolation
24
+ SDK_INTERPOLATION_AVAILABLE = True
25
+ except ImportError:
26
+ SDK_INTERPOLATION_AVAILABLE = False
27
+
28
+ if TYPE_CHECKING:
29
+ from reachy_mini import ReachyMini
30
+
31
+ _LOGGER = logging.getLogger(__name__)
32
+
33
+ # MJPEG boundary string
34
+ MJPEG_BOUNDARY = "frame"
35
+
36
+
37
+ class MJPEGCameraServer:
38
+ """
39
+ MJPEG streaming server for Reachy Mini camera with face tracking.
40
+
41
+ Provides HTTP endpoints:
42
+ - /stream - MJPEG video stream
43
+ - /snapshot - Single JPEG image
44
+ - / - Simple status page
45
+
46
+ Also provides face tracking offsets for head movement control.
47
+
48
+ Resource Optimization:
49
+ - Adaptive frame rate: high (15fps) when face detected or in conversation,
50
+ low (3fps) when idle and no face for extended period
51
+ - Face detection pauses after prolonged absence to save CPU
52
+ """
53
+
54
+ def __init__(
55
+ self,
56
+ reachy_mini: Optional["ReachyMini"] = None,
57
+ host: str = "0.0.0.0",
58
+ port: int = 8081,
59
+ fps: int = 15, # 15fps for smooth face tracking
60
+ quality: int = 80,
61
+ enable_face_tracking: bool = True,
62
+ ):
63
+ """
64
+ Initialize the MJPEG camera server.
65
+
66
+ Args:
67
+ reachy_mini: Reachy Mini robot instance (can be None for testing)
68
+ host: Host address to bind to
69
+ port: Port number for the HTTP server
70
+ fps: Target frames per second for the stream
71
+ quality: JPEG quality (1-100)
72
+ enable_face_tracking: Enable face tracking for head movement
73
+ """
74
+ self.reachy_mini = reachy_mini
75
+ self.host = host
76
+ self.port = port
77
+ self.fps = fps
78
+ self.quality = quality
79
+ self.enable_face_tracking = enable_face_tracking
80
+
81
+ self._server: Optional[asyncio.Server] = None
82
+ self._running = False
83
+ self._frame_interval = 1.0 / fps
84
+ self._last_frame: Optional[bytes] = None
85
+ self._last_frame_time: float = 0
86
+ self._frame_lock = threading.Lock()
87
+
88
+ # Frame capture thread
89
+ self._capture_thread: Optional[threading.Thread] = None
90
+
91
+ # Face tracking state
92
+ self._head_tracker = None
93
+ self._face_tracking_enabled = True # Enabled by default for always-on face tracking
94
+ self._face_tracking_offsets: List[float] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
95
+ self._face_tracking_lock = threading.Lock()
96
+
97
+ # Gesture detection state
98
+ self._gesture_detector = None
99
+ self._gesture_detection_enabled = True
100
+ self._current_gesture = "none"
101
+ self._gesture_confidence = 0.0
102
+ self._gesture_lock = threading.Lock()
103
+ self._gesture_frame_counter = 0
104
+ self._gesture_detection_interval = 3 # Run gesture detection every N frames
105
+ self._gesture_state_callback = None # Callback to notify entity registry
106
+
107
+ # Face detection state callback (similar to gesture)
108
+ self._face_state_callback = None # Callback to notify entity registry
109
+ self._last_face_detected_state = False # Track previous state for change detection
110
+
111
+ # Face tracking timing (smooth interpolation when face lost)
112
+ self._last_face_detected_time: Optional[float] = None
113
+ self._interpolation_start_time: Optional[float] = None
114
+ self._interpolation_start_pose: Optional[np.ndarray] = None
115
+ self._face_lost_delay = 1.0 # Reduced from 2.0s to 1.0s for faster response
116
+ self._interpolation_duration = 0.8 # Reduced from 1.0s to 0.8s for faster return
117
+
118
+ # Offset scaling (same as conversation_app)
119
+ self._offset_scale = 0.6
120
+
121
+ # =====================================================================
122
+ # Resource optimization: Adaptive frame rate for face tracking
123
+ # =====================================================================
124
+ # High frequency when: face detected, in conversation, or recently active
125
+ # Low frequency when: idle and no face for extended period
126
+ # Ultra-low when: idle for very long time (just MJPEG stream, minimal AI)
127
+ self._fps_high = fps # Normal tracking rate (15fps)
128
+ self._fps_low = 2 # Low power rate (2fps) - periodic face check
129
+ self._fps_idle = 0.5 # Ultra-low power (0.5fps) - minimal CPU usage
130
+ self._current_fps = fps
131
+
132
+ # Conversation state (set by voice assistant)
133
+ self._in_conversation = False
134
+ self._conversation_lock = threading.Lock()
135
+
136
+ # Adaptive tracking timing
137
+ self._no_face_duration = 0.0 # How long since last face detection
138
+ self._low_power_threshold = 5.0 # Switch to low power after 5s without face
139
+ self._idle_threshold = 30.0 # Switch to idle mode after 30s without face
140
+ self._last_face_check_time = 0.0
141
+
142
+ # Skip AI inference in idle mode (only stream MJPEG)
143
+ self._ai_enabled = True
144
+
145
+ async def start(self) -> None:
146
+ """Start the MJPEG camera server."""
147
+ if self._running:
148
+ _LOGGER.warning("Camera server already running")
149
+ return
150
+
151
+ self._running = True
152
+
153
+ # Initialize head tracker if face tracking enabled
154
+ if self.enable_face_tracking:
155
+ try:
156
+ from .head_tracker import HeadTracker
157
+ self._head_tracker = HeadTracker()
158
+ _LOGGER.info("Face tracking enabled with YOLO head tracker")
159
+ except ImportError as e:
160
+ _LOGGER.error("Failed to import head tracker: %s", e)
161
+ self._head_tracker = None
162
+ except Exception as e:
163
+ _LOGGER.warning("Failed to initialize head tracker: %s", e)
164
+ self._head_tracker = None
165
+ else:
166
+ _LOGGER.info("Face tracking disabled by configuration")
167
+
168
+ # Initialize gesture detector
169
+ if self._gesture_detection_enabled:
170
+ try:
171
+ from .gesture_detector import GestureDetector
172
+ self._gesture_detector = GestureDetector()
173
+ if self._gesture_detector.is_available:
174
+ _LOGGER.info("Gesture detection enabled (18 HaGRID classes)")
175
+ else:
176
+ _LOGGER.warning("Gesture detection not available")
177
+ self._gesture_detector = None
178
+ except ImportError as e:
179
+ _LOGGER.warning("Failed to import gesture detector: %s", e)
180
+ self._gesture_detector = None
181
+ except Exception as e:
182
+ _LOGGER.warning("Failed to initialize gesture detector: %s", e)
183
+ self._gesture_detector = None
184
+
185
+ # Start frame capture thread
186
+ self._capture_thread = threading.Thread(
187
+ target=self._capture_frames,
188
+ daemon=True,
189
+ name="camera-capture"
190
+ )
191
+ self._capture_thread.start()
192
+
193
+ # Start HTTP server
194
+ self._server = await asyncio.start_server(
195
+ self._handle_client,
196
+ self.host,
197
+ self.port,
198
+ )
199
+
200
+ _LOGGER.info("MJPEG Camera server started on http://%s:%d", self.host, self.port)
201
+ _LOGGER.info(" Stream URL: http://<ip>:%d/stream", self.port)
202
+ _LOGGER.info(" Snapshot URL: http://<ip>:%d/snapshot", self.port)
203
+
204
+ async def stop(self) -> None:
205
+ """Stop the MJPEG camera server."""
206
+ self._running = False
207
+
208
+ if self._capture_thread:
209
+ self._capture_thread.join(timeout=0.5)
210
+ self._capture_thread = None
211
+
212
+ if self._server:
213
+ self._server.close()
214
+ await self._server.wait_closed()
215
+ self._server = None
216
+
217
+ _LOGGER.info("MJPEG Camera server stopped")
218
+
219
+ def _capture_frames(self) -> None:
220
+ """Background thread to capture frames from Reachy Mini and do face tracking.
221
+
222
+ Resource optimization:
223
+ - High frequency (15fps) when face detected or in conversation
224
+ - Low frequency (2fps) when idle and no face for short period
225
+ - Ultra-low (0.5fps) when idle for extended period - minimal AI inference
226
+ """
227
+ _LOGGER.info("Starting camera capture thread (face_tracking=%s)", self._face_tracking_enabled)
228
+
229
+ frame_count = 0
230
+ face_detect_count = 0
231
+ last_log_time = time.time()
232
+
233
+ while self._running:
234
+ try:
235
+ current_time = time.time()
236
+
237
+ # Determine if we should run AI inference this frame
238
+ should_run_ai = self._should_run_ai_inference(current_time)
239
+
240
+ # Only get frame if needed (AI inference or MJPEG streaming)
241
+ frame = self._get_camera_frame() if should_run_ai or self._has_stream_clients() else None
242
+
243
+ if frame is not None:
244
+ frame_count += 1
245
+
246
+ # Encode frame as JPEG for streaming
247
+ encode_params = [cv2.IMWRITE_JPEG_QUALITY, self.quality]
248
+ success, jpeg_data = cv2.imencode('.jpg', frame, encode_params)
249
+
250
+ if success:
251
+ with self._frame_lock:
252
+ self._last_frame = jpeg_data.tobytes()
253
+ self._last_frame_time = time.time()
254
+
255
+ # Only run AI inference when enabled
256
+ if should_run_ai:
257
+ # Face tracking
258
+ if self._face_tracking_enabled and self._head_tracker is not None:
259
+ face_detect_count += 1
260
+ face_detected = self._process_face_tracking(frame, current_time)
261
+
262
+ # Update adaptive timing based on detection result
263
+ if face_detected:
264
+ self._no_face_duration = 0.0
265
+ self._current_fps = self._fps_high
266
+ self._ai_enabled = True
267
+ else:
268
+ # Accumulate no-face duration
269
+ if self._last_face_detected_time is not None:
270
+ self._no_face_duration = current_time - self._last_face_detected_time
271
+ else:
272
+ self._no_face_duration += 1.0 / self._current_fps
273
+
274
+ # Adaptive power mode
275
+ if self._no_face_duration > self._idle_threshold:
276
+ self._current_fps = self._fps_idle
277
+ elif self._no_face_duration > self._low_power_threshold:
278
+ self._current_fps = self._fps_low
279
+
280
+ self._last_face_check_time = current_time
281
+
282
+ # Check for face detection state change and notify callback
283
+ # Use is_face_detected() which considers face_lost_delay
284
+ current_face_state = self.is_face_detected()
285
+ if current_face_state != self._last_face_detected_state:
286
+ self._last_face_detected_state = current_face_state
287
+ if self._face_state_callback:
288
+ try:
289
+ self._face_state_callback()
290
+ except Exception as e:
291
+ _LOGGER.debug("Face state callback error: %s", e)
292
+
293
+ # Handle smooth interpolation when face lost
294
+ self._process_face_lost_interpolation(current_time)
295
+
296
+ # Gesture detection (only when face detected recently)
297
+ if (self._gesture_detection_enabled and
298
+ self._gesture_detector is not None and
299
+ self._no_face_duration < 5.0): # Only detect gestures when someone is present
300
+ self._gesture_frame_counter += 1
301
+ if self._gesture_frame_counter >= self._gesture_detection_interval:
302
+ self._gesture_frame_counter = 0
303
+ self._process_gesture_detection(frame)
304
+
305
+ # Log stats every 30 seconds
306
+ if current_time - last_log_time >= 30.0:
307
+ fps = frame_count / (current_time - last_log_time)
308
+ detect_fps = face_detect_count / (current_time - last_log_time)
309
+ mode = "HIGH" if self._current_fps == self._fps_high else ("LOW" if self._current_fps == self._fps_low else "IDLE")
310
+ _LOGGER.debug("Camera: %.1f fps, AI: %.1f fps (%s), no_face: %.0fs",
311
+ fps, detect_fps, mode, self._no_face_duration)
312
+ frame_count = 0
313
+ face_detect_count = 0
314
+ last_log_time = current_time
315
+
316
+ # Sleep to maintain target FPS (use current adaptive rate)
317
+ sleep_time = 1.0 / self._current_fps
318
+ time.sleep(sleep_time)
319
+
320
+ except Exception as e:
321
+ _LOGGER.error("Error capturing frame: %s", e)
322
+ time.sleep(1.0)
323
+
324
+ _LOGGER.info("Camera capture thread stopped")
325
+
326
+ def _should_run_ai_inference(self, current_time: float) -> bool:
327
+ """Determine if AI inference (face/gesture detection) should run.
328
+
329
+ Returns True if:
330
+ - In conversation mode (always run)
331
+ - Face was recently detected
332
+ - Periodic check in low power mode
333
+ """
334
+ # Always run during conversation
335
+ with self._conversation_lock:
336
+ if self._in_conversation:
337
+ return True
338
+
339
+ # High frequency mode: run every frame
340
+ if self._current_fps == self._fps_high:
341
+ return True
342
+
343
+ # Low/idle power mode: run periodically
344
+ time_since_last = current_time - self._last_face_check_time
345
+ return time_since_last >= (1.0 / self._current_fps)
346
+
347
+ def _has_stream_clients(self) -> bool:
348
+ """Check if there are active MJPEG stream clients."""
349
+ # For now, always return True to keep stream available
350
+ # Could be optimized to track actual client connections
351
+ return True
352
+
353
+ def _process_face_tracking(self, frame: np.ndarray, current_time: float) -> bool:
354
+ """Process face tracking on a frame.
355
+
356
+ Returns:
357
+ True if face was detected, False otherwise
358
+ """
359
+ if self._head_tracker is None or self.reachy_mini is None:
360
+ return False
361
+
362
+ try:
363
+ face_center, confidence = self._head_tracker.get_head_position(frame)
364
+
365
+ if face_center is not None:
366
+ # Face detected - update tracking
367
+ self._last_face_detected_time = current_time
368
+ self._interpolation_start_time = None # Stop any interpolation
369
+
370
+ # Convert normalized coordinates to pixel coordinates
371
+ h, w = frame.shape[:2]
372
+ eye_center_norm = (face_center + 1) / 2
373
+
374
+ eye_center_pixels = [
375
+ eye_center_norm[0] * w,
376
+ eye_center_norm[1] * h,
377
+ ]
378
+
379
+ # Get the head pose needed to look at the target
380
+ target_pose = self.reachy_mini.look_at_image(
381
+ eye_center_pixels[0],
382
+ eye_center_pixels[1],
383
+ duration=0.0,
384
+ perform_movement=False,
385
+ )
386
+
387
+ # Extract translation and rotation from target pose
388
+ translation = target_pose[:3, 3]
389
+ rotation = R.from_matrix(target_pose[:3, :3]).as_euler("xyz", degrees=False)
390
+
391
+ # Scale down for smoother tracking (same as conversation_app)
392
+ translation = translation * self._offset_scale
393
+ rotation = rotation * self._offset_scale
394
+
395
+ # Apply pitch offset compensation (robot tends to look up)
396
+ # rotation[1] is pitch in xyz euler order
397
+ # Positive pitch = look down in robot coordinate system
398
+ pitch_offset_rad = np.radians(9.0) # Look down 9 degrees
399
+ rotation[1] += pitch_offset_rad
400
+
401
+ # Apply yaw offset compensation (robot tends to look to user's right)
402
+ # rotation[2] is yaw in xyz euler order
403
+ # Negative yaw = turn right (towards user's left from robot's perspective)
404
+ yaw_offset_rad = np.radians(-7.0) # Turn right 7 degrees
405
+ rotation[2] += yaw_offset_rad
406
+
407
+ # Update face tracking offsets
408
+ with self._face_tracking_lock:
409
+ self._face_tracking_offsets = [
410
+ float(translation[0]),
411
+ float(translation[1]),
412
+ float(translation[2]),
413
+ float(rotation[0]),
414
+ float(rotation[1]),
415
+ float(rotation[2]),
416
+ ]
417
+
418
+ return True
419
+
420
+ return False
421
+
422
+ except Exception as e:
423
+ _LOGGER.debug("Face tracking error: %s", e)
424
+ return False
425
+
426
+ def _process_face_lost_interpolation(self, current_time: float) -> None:
427
+ """Handle smooth interpolation back to neutral when face is lost."""
428
+ if self._last_face_detected_time is None:
429
+ return
430
+
431
+ time_since_face_lost = current_time - self._last_face_detected_time
432
+
433
+ if time_since_face_lost < self._face_lost_delay:
434
+ return # Still within delay period, keep current offsets
435
+
436
+ # Start interpolation if not already started
437
+ if self._interpolation_start_time is None:
438
+ self._interpolation_start_time = current_time
439
+ # Capture current pose as start of interpolation
440
+ with self._face_tracking_lock:
441
+ current_offsets = self._face_tracking_offsets.copy()
442
+
443
+ # Convert to 4x4 pose matrix
444
+ pose_matrix = np.eye(4, dtype=np.float32)
445
+ pose_matrix[:3, 3] = current_offsets[:3]
446
+ pose_matrix[:3, :3] = R.from_euler("xyz", current_offsets[3:]).as_matrix()
447
+ self._interpolation_start_pose = pose_matrix
448
+
449
+ # Calculate interpolation progress
450
+ elapsed = current_time - self._interpolation_start_time
451
+ t = min(1.0, elapsed / self._interpolation_duration)
452
+
453
+ # Interpolate to neutral (identity matrix)
454
+ if self._interpolation_start_pose is not None:
455
+ neutral_pose = np.eye(4, dtype=np.float32)
456
+ interpolated_pose = self._linear_pose_interpolation(
457
+ self._interpolation_start_pose, neutral_pose, t
458
+ )
459
+
460
+ # Extract translation and rotation
461
+ translation = interpolated_pose[:3, 3]
462
+ rotation = R.from_matrix(interpolated_pose[:3, :3]).as_euler("xyz", degrees=False)
463
+
464
+ with self._face_tracking_lock:
465
+ self._face_tracking_offsets = [
466
+ float(translation[0]),
467
+ float(translation[1]),
468
+ float(translation[2]),
469
+ float(rotation[0]),
470
+ float(rotation[1]),
471
+ float(rotation[2]),
472
+ ]
473
+
474
+ # Reset when interpolation complete
475
+ if t >= 1.0:
476
+ self._last_face_detected_time = None
477
+ self._interpolation_start_time = None
478
+ self._interpolation_start_pose = None
479
+
480
+ def _linear_pose_interpolation(
481
+ self, start: np.ndarray, end: np.ndarray, t: float
482
+ ) -> np.ndarray:
483
+ """Linear interpolation between two 4x4 pose matrices.
484
+
485
+ Uses SDK's linear_pose_interpolation if available, otherwise falls back
486
+ to manual SLERP implementation.
487
+ """
488
+ if SDK_INTERPOLATION_AVAILABLE:
489
+ return linear_pose_interpolation(start, end, t)
490
+
491
+ # Fallback: manual interpolation
492
+ # Interpolate translation
493
+ start_trans = start[:3, 3]
494
+ end_trans = end[:3, 3]
495
+ interp_trans = start_trans * (1 - t) + end_trans * t
496
+
497
+ # Interpolate rotation using SLERP
498
+ start_rot = R.from_matrix(start[:3, :3])
499
+ end_rot = R.from_matrix(end[:3, :3])
500
+
501
+ # Use scipy's slerp - create Rotation array from list
502
+ from scipy.spatial.transform import Slerp
503
+ key_rots = R.from_quat(np.array([start_rot.as_quat(), end_rot.as_quat()]))
504
+ slerp = Slerp([0, 1], key_rots)
505
+ interp_rot = slerp(t)
506
+
507
+ # Build result matrix
508
+ result = np.eye(4, dtype=np.float32)
509
+ result[:3, :3] = interp_rot.as_matrix()
510
+ result[:3, 3] = interp_trans
511
+
512
+ return result
513
+
514
+ # =========================================================================
515
+ # Public API for face tracking
516
+ # =========================================================================
517
+
518
+ def get_face_tracking_offsets(self) -> Tuple[float, float, float, float, float, float]:
519
+ """Get current face tracking offsets (thread-safe).
520
+
521
+ Returns:
522
+ Tuple of (x, y, z, roll, pitch, yaw) offsets
523
+ """
524
+ with self._face_tracking_lock:
525
+ offsets = self._face_tracking_offsets
526
+ return (offsets[0], offsets[1], offsets[2], offsets[3], offsets[4], offsets[5])
527
+
528
+ def is_face_detected(self) -> bool:
529
+ """Check if a face is currently detected.
530
+
531
+ Returns True if face was detected recently (within face_lost_delay period).
532
+ This is useful for Home Assistant entities to expose face detection status.
533
+
534
+ Returns:
535
+ True if face is detected, False otherwise
536
+ """
537
+ if self._last_face_detected_time is None:
538
+ return False
539
+
540
+ # Face is considered detected if we saw it recently
541
+ time_since_detected = time.time() - self._last_face_detected_time
542
+ return time_since_detected < self._face_lost_delay
543
+
544
+ def set_face_tracking_enabled(self, enabled: bool) -> None:
545
+ """Enable or disable face tracking."""
546
+ if self._face_tracking_enabled == enabled:
547
+ return # No change, skip logging
548
+ self._face_tracking_enabled = enabled
549
+ if not enabled:
550
+ # Start interpolation back to neutral
551
+ self._last_face_detected_time = time.time()
552
+ self._interpolation_start_time = None
553
+ _LOGGER.info("Face tracking %s", "enabled" if enabled else "disabled")
554
+
555
+ def set_conversation_mode(self, in_conversation: bool) -> None:
556
+ """Set conversation mode for adaptive face tracking.
557
+
558
+ When in conversation mode, face tracking runs at high frequency
559
+ regardless of whether a face is currently detected.
560
+
561
+ Args:
562
+ in_conversation: True when voice assistant is actively conversing
563
+ """
564
+ with self._conversation_lock:
565
+ self._in_conversation = in_conversation
566
+
567
+ if in_conversation:
568
+ # Immediately switch to high frequency mode
569
+ self._current_fps = self._fps_high
570
+ self._ai_enabled = True
571
+ self._no_face_duration = 0.0 # Reset no-face timer
572
+ _LOGGER.debug("Face tracking: conversation mode ON (high frequency)")
573
+ else:
574
+ _LOGGER.debug("Face tracking: conversation mode OFF (adaptive)")
575
+
576
+ # =========================================================================
577
+ # Gesture detection
578
+ # =========================================================================
579
+
580
+ def _process_gesture_detection(self, frame: np.ndarray) -> None:
581
+ """Process gesture detection on a frame."""
582
+ if self._gesture_detector is None:
583
+ return
584
+
585
+ try:
586
+ # Detect gesture
587
+ detected_gesture, confidence = self._gesture_detector.detect(frame)
588
+
589
+ # Update current gesture state
590
+ state_changed = False
591
+ with self._gesture_lock:
592
+ old_gesture = self._current_gesture
593
+ if detected_gesture.value != "no_gesture":
594
+ self._current_gesture = detected_gesture.value
595
+ self._gesture_confidence = confidence
596
+ if old_gesture != detected_gesture.value:
597
+ state_changed = True
598
+ _LOGGER.debug("Gesture: %s (%.0f%%)",
599
+ detected_gesture.value, confidence * 100)
600
+ else:
601
+ if self._current_gesture != "none":
602
+ state_changed = True
603
+ self._current_gesture = "none"
604
+ self._gesture_confidence = 0.0
605
+
606
+ # Notify entity registry to push update to Home Assistant
607
+ if state_changed and self._gesture_state_callback:
608
+ try:
609
+ self._gesture_state_callback()
610
+ except Exception:
611
+ pass # Ignore callback errors
612
+
613
+ except Exception as e:
614
+ _LOGGER.warning("Gesture detection error: %s", e)
615
+
616
+ def get_current_gesture(self) -> str:
617
+ """Get current detected gesture name (thread-safe).
618
+
619
+ Returns:
620
+ Gesture name string (e.g., "like", "peace", "none")
621
+ """
622
+ with self._gesture_lock:
623
+ return self._current_gesture
624
+
625
+ def get_gesture_confidence(self) -> float:
626
+ """Get current gesture detection confidence (thread-safe).
627
+
628
+ Returns:
629
+ Confidence value (0.0 to 1.0), multiplied by 100 for percentage display
630
+ """
631
+ with self._gesture_lock:
632
+ return self._gesture_confidence * 100.0 # Return as percentage
633
+
634
+ def set_gesture_detection_enabled(self, enabled: bool) -> None:
635
+ """Enable or disable gesture detection."""
636
+ self._gesture_detection_enabled = enabled
637
+ if not enabled:
638
+ with self._gesture_lock:
639
+ self._current_gesture = "none"
640
+ self._gesture_confidence = 0.0
641
+ _LOGGER.info("Gesture detection %s", "enabled" if enabled else "disabled")
642
+
643
+ def set_gesture_state_callback(self, callback) -> None:
644
+ """Set callback to notify when gesture state changes."""
645
+ self._gesture_state_callback = callback
646
+
647
+ def set_face_state_callback(self, callback) -> None:
648
+ """Set callback to notify when face detection state changes."""
649
+ self._face_state_callback = callback
650
+
651
+ def _get_camera_frame(self) -> Optional[np.ndarray]:
652
+ """Get a frame from Reachy Mini's camera."""
653
+ if self.reachy_mini is None:
654
+ # Return a test pattern if no robot connected
655
+ return self._generate_test_frame()
656
+
657
+ try:
658
+ frame = self.reachy_mini.media.get_frame()
659
+ return frame
660
+ except Exception as e:
661
+ _LOGGER.debug("Failed to get camera frame: %s", e)
662
+ return None
663
+
664
+ def _generate_test_frame(self) -> np.ndarray:
665
+ """Generate a test pattern frame when no camera is available."""
666
+ # Create a simple test pattern
667
+ frame = np.zeros((480, 640, 3), dtype=np.uint8)
668
+
669
+ # Add some visual elements
670
+ cv2.putText(
671
+ frame,
672
+ "Reachy Mini Camera",
673
+ (150, 200),
674
+ cv2.FONT_HERSHEY_SIMPLEX,
675
+ 1.2,
676
+ (255, 255, 255),
677
+ 2,
678
+ )
679
+ cv2.putText(
680
+ frame,
681
+ "No camera connected",
682
+ (180, 280),
683
+ cv2.FONT_HERSHEY_SIMPLEX,
684
+ 0.8,
685
+ (128, 128, 128),
686
+ 1,
687
+ )
688
+
689
+ # Add timestamp
690
+ timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
691
+ cv2.putText(
692
+ frame,
693
+ timestamp,
694
+ (220, 350),
695
+ cv2.FONT_HERSHEY_SIMPLEX,
696
+ 0.6,
697
+ (0, 255, 0),
698
+ 1,
699
+ )
700
+
701
+ return frame
702
+
703
+ def get_snapshot(self) -> Optional[bytes]:
704
+ """Get the latest frame as JPEG bytes."""
705
+ with self._frame_lock:
706
+ return self._last_frame
707
+
708
+ async def _handle_client(
709
+ self,
710
+ reader: asyncio.StreamReader,
711
+ writer: asyncio.StreamWriter,
712
+ ) -> None:
713
+ """Handle incoming HTTP client connections."""
714
+ try:
715
+ # Read HTTP request
716
+ request_line = await asyncio.wait_for(
717
+ reader.readline(),
718
+ timeout=10.0
719
+ )
720
+ request = request_line.decode('utf-8', errors='ignore').strip()
721
+
722
+ # Read headers (we don't need them but must consume them)
723
+ while True:
724
+ line = await asyncio.wait_for(reader.readline(), timeout=5.0)
725
+ if line == b'\r\n' or line == b'\n' or line == b'':
726
+ break
727
+
728
+ # Parse request path
729
+ parts = request.split(' ')
730
+ if len(parts) >= 2:
731
+ path = parts[1]
732
+ else:
733
+ path = '/'
734
+
735
+ _LOGGER.debug("HTTP request: %s", request)
736
+
737
+ if path == '/stream':
738
+ await self._handle_stream(writer)
739
+ elif path == '/snapshot':
740
+ await self._handle_snapshot(writer)
741
+ else:
742
+ await self._handle_index(writer)
743
+
744
+ except asyncio.TimeoutError:
745
+ _LOGGER.debug("Client connection timeout")
746
+ except ConnectionResetError:
747
+ _LOGGER.debug("Client connection reset")
748
+ except Exception as e:
749
+ _LOGGER.error("Error handling client: %s", e)
750
+ finally:
751
+ try:
752
+ writer.close()
753
+ await writer.wait_closed()
754
+ except Exception:
755
+ pass
756
+
757
+ async def _handle_index(self, writer: asyncio.StreamWriter) -> None:
758
+ """Handle index page request."""
759
+ html = f"""<!DOCTYPE html>
760
+ <html>
761
+ <head>
762
+ <title>Reachy Mini Camera</title>
763
+ <style>
764
+ body {{ font-family: Arial, sans-serif; margin: 40px; background: #1a1a2e; color: #eee; }}
765
+ h1 {{ color: #00d4ff; }}
766
+ .container {{ max-width: 800px; margin: 0 auto; }}
767
+ .stream {{ width: 100%; max-width: 640px; border: 2px solid #00d4ff; border-radius: 8px; }}
768
+ a {{ color: #00d4ff; }}
769
+ .info {{ background: #16213e; padding: 20px; border-radius: 8px; margin-top: 20px; }}
770
+ </style>
771
+ </head>
772
+ <body>
773
+ <div class="container">
774
+ <h1>Reachy Mini Camera</h1>
775
+ <img class="stream" src="/stream" alt="Camera Stream">
776
+ <div class="info">
777
+ <h3>Endpoints:</h3>
778
+ <ul>
779
+ <li><a href="/stream">/stream</a> - MJPEG video stream</li>
780
+ <li><a href="/snapshot">/snapshot</a> - Single JPEG snapshot</li>
781
+ </ul>
782
+ <h3>Home Assistant Integration:</h3>
783
+ <p>Add a Generic Camera with URL: <code>http://&lt;ip&gt;:{self.port}/stream</code></p>
784
+ </div>
785
+ </div>
786
+ </body>
787
+ </html>"""
788
+
789
+ response = (
790
+ "HTTP/1.1 200 OK\r\n"
791
+ "Content-Type: text/html; charset=utf-8\r\n"
792
+ f"Content-Length: {len(html)}\r\n"
793
+ "Connection: close\r\n"
794
+ "\r\n"
795
+ )
796
+
797
+ writer.write(response.encode('utf-8'))
798
+ writer.write(html.encode('utf-8'))
799
+ await writer.drain()
800
+
801
+ async def _handle_snapshot(self, writer: asyncio.StreamWriter) -> None:
802
+ """Handle snapshot request - return single JPEG image."""
803
+ jpeg_data = self.get_snapshot()
804
+
805
+ if jpeg_data is None:
806
+ response = (
807
+ "HTTP/1.1 503 Service Unavailable\r\n"
808
+ "Content-Type: text/plain\r\n"
809
+ "Connection: close\r\n"
810
+ "\r\n"
811
+ "No frame available"
812
+ )
813
+ writer.write(response.encode('utf-8'))
814
+ else:
815
+ response = (
816
+ "HTTP/1.1 200 OK\r\n"
817
+ "Content-Type: image/jpeg\r\n"
818
+ f"Content-Length: {len(jpeg_data)}\r\n"
819
+ "Cache-Control: no-cache, no-store, must-revalidate\r\n"
820
+ "Connection: close\r\n"
821
+ "\r\n"
822
+ )
823
+ writer.write(response.encode('utf-8'))
824
+ writer.write(jpeg_data)
825
+
826
+ await writer.drain()
827
+
828
+ async def _handle_stream(self, writer: asyncio.StreamWriter) -> None:
829
+ """Handle MJPEG stream request."""
830
+ # Send MJPEG headers
831
+ response = (
832
+ "HTTP/1.1 200 OK\r\n"
833
+ f"Content-Type: multipart/x-mixed-replace; boundary={MJPEG_BOUNDARY}\r\n"
834
+ "Cache-Control: no-cache, no-store, must-revalidate\r\n"
835
+ "Connection: keep-alive\r\n"
836
+ "\r\n"
837
+ )
838
+ writer.write(response.encode('utf-8'))
839
+ await writer.drain()
840
+
841
+ _LOGGER.debug("Started MJPEG stream")
842
+
843
+ last_sent_time = 0
844
+
845
+ try:
846
+ while self._running:
847
+ # Get latest frame
848
+ with self._frame_lock:
849
+ jpeg_data = self._last_frame
850
+ frame_time = self._last_frame_time
851
+
852
+ # Only send if we have a new frame
853
+ if jpeg_data is not None and frame_time > last_sent_time:
854
+ # Send MJPEG frame
855
+ frame_header = (
856
+ f"--{MJPEG_BOUNDARY}\r\n"
857
+ "Content-Type: image/jpeg\r\n"
858
+ f"Content-Length: {len(jpeg_data)}\r\n"
859
+ "\r\n"
860
+ )
861
+
862
+ writer.write(frame_header.encode('utf-8'))
863
+ writer.write(jpeg_data)
864
+ writer.write(b"\r\n")
865
+ await writer.drain()
866
+
867
+ last_sent_time = frame_time
868
+
869
+ # Small delay to prevent busy loop
870
+ await asyncio.sleep(0.01)
871
+
872
+ except (ConnectionResetError, BrokenPipeError):
873
+ _LOGGER.debug("Client disconnected from stream")
874
+ except Exception as e:
875
+ _LOGGER.error("Error in MJPEG stream: %s", e)
876
+
877
+ _LOGGER.debug("Ended MJPEG stream")
{reachy_mini_home_assistant/entities → reachy_mini_ha_voice}/entity.py RENAMED
@@ -1,56 +1,56 @@
1
  """ESPHome entity definitions."""
2
 
3
- import logging
4
  from abc import abstractmethod
5
- from collections.abc import Callable, Iterable
6
- from typing import TYPE_CHECKING
 
7
 
8
  # pylint: disable=no-name-in-module
9
  from aioesphomeapi.api_pb2 import ( # type: ignore[attr-defined]
10
- BinarySensorStateResponse,
11
- CameraImageRequest,
12
- CameraImageResponse,
13
  ListEntitiesBinarySensorResponse,
 
14
  ListEntitiesCameraResponse,
15
  ListEntitiesMediaPlayerResponse,
16
  ListEntitiesNumberResponse,
17
  ListEntitiesRequest,
 
 
 
18
  ListEntitiesTextSensorResponse,
 
 
 
 
19
  MediaPlayerCommandRequest,
20
  MediaPlayerStateResponse,
21
  NumberCommandRequest,
22
  NumberStateResponse,
 
 
 
23
  SubscribeHomeAssistantStatesRequest,
24
  SubscribeStatesRequest,
 
 
25
  TextSensorStateResponse,
26
  )
27
- from aioesphomeapi.model import MediaPlayerCommand, MediaPlayerEntityFeature, MediaPlayerState
28
  from google.protobuf import message
29
 
30
- from ..audio.audio_player import AudioPlayer
31
- from ..core.util import call_all
 
32
 
33
  if TYPE_CHECKING:
34
- from ..protocol.api_server import APIServer
35
 
36
  logger = logging.getLogger(__name__)
37
 
38
 
39
- def _safe_get_value(getter: Callable[[], object] | None, current_value: object, entity_name: str) -> object:
40
- """Read an entity value without letting getter failures break the ESPHome session."""
41
- if getter is None:
42
- return current_value
43
- try:
44
- return getter()
45
- except Exception as e:
46
- logger.error("Entity getter failed for %s: %s", entity_name, e)
47
- return current_value
48
-
49
-
50
  class ESPHomeEntity:
51
  """Base class for ESPHome entities."""
52
 
53
- def __init__(self, server: "APIServer") -> None:
54
  self.server = server
55
 
56
  @abstractmethod
@@ -63,7 +63,7 @@ class MediaPlayerEntity(ESPHomeEntity):
63
 
64
  def __init__(
65
  self,
66
- server: "APIServer",
67
  key: int,
68
  name: str,
69
  object_id: str,
@@ -82,9 +82,9 @@ class MediaPlayerEntity(ESPHomeEntity):
82
 
83
  def play(
84
  self,
85
- url: str | list[str],
86
  announcement: bool = False,
87
- done_callback: Callable[[], None] | None = None,
88
  ) -> Iterable[message.Message]:
89
  if announcement:
90
  if self.music_player.is_playing:
@@ -92,14 +92,18 @@ class MediaPlayerEntity(ESPHomeEntity):
92
  self.music_player.pause()
93
  self.announce_player.play(
94
  url,
95
- done_callback=lambda: call_all(self.music_player.resume, done_callback),
 
 
96
  )
97
  else:
98
  # Announce, idle
99
  self.announce_player.play(
100
  url,
101
  done_callback=lambda: call_all(
102
- lambda: self.server.send_messages([self._update_state(MediaPlayerState.IDLE)]),
 
 
103
  done_callback,
104
  ),
105
  )
@@ -108,7 +112,9 @@ class MediaPlayerEntity(ESPHomeEntity):
108
  self.music_player.play(
109
  url,
110
  done_callback=lambda: call_all(
111
- lambda: self.server.send_messages([self._update_state(MediaPlayerState.IDLE)]),
 
 
112
  done_callback,
113
  ),
114
  )
@@ -127,9 +133,6 @@ class MediaPlayerEntity(ESPHomeEntity):
127
  elif msg.command == MediaPlayerCommand.PLAY:
128
  self.music_player.resume()
129
  yield self._update_state(MediaPlayerState.PLAYING)
130
- elif msg.command == MediaPlayerCommand.STOP:
131
- self.music_player.stop()
132
- yield self._update_state(MediaPlayerState.IDLE)
133
  elif msg.has_volume:
134
  volume = int(msg.volume * 100)
135
  self.music_player.set_volume(volume)
@@ -173,13 +176,13 @@ class TextSensorEntity(ESPHomeEntity):
173
 
174
  def __init__(
175
  self,
176
- server: "APIServer",
177
  key: int,
178
  name: str,
179
  object_id: str,
180
  icon: str = "",
181
  entity_category: int = 0, # 0 = none, 1 = config, 2 = diagnostic
182
- value_getter: Callable[[], str] | None = None,
183
  ) -> None:
184
  ESPHomeEntity.__init__(self, server)
185
  self.key = key
@@ -192,7 +195,9 @@ class TextSensorEntity(ESPHomeEntity):
192
 
193
  @property
194
  def value(self) -> str:
195
- return str(_safe_get_value(self._value_getter, self._value, self.object_id))
 
 
196
 
197
  @value.setter
198
  def value(self, new_value: str) -> None:
@@ -227,14 +232,14 @@ class BinarySensorEntity(ESPHomeEntity):
227
 
228
  def __init__(
229
  self,
230
- server: "APIServer",
231
  key: int,
232
  name: str,
233
  object_id: str,
234
  icon: str = "",
235
  device_class: str = "",
236
  entity_category: int = 0, # 0 = none, 1 = config, 2 = diagnostic
237
- value_getter: Callable[[], bool] | None = None,
238
  ) -> None:
239
  ESPHomeEntity.__init__(self, server)
240
  self.key = key
@@ -248,7 +253,9 @@ class BinarySensorEntity(ESPHomeEntity):
248
 
249
  @property
250
  def value(self) -> bool:
251
- return bool(_safe_get_value(self._value_getter, self._value, self.object_id))
 
 
252
 
253
  @value.setter
254
  def value(self, new_value: bool) -> None:
@@ -284,7 +291,7 @@ class NumberEntity(ESPHomeEntity):
284
 
285
  def __init__(
286
  self,
287
- server: "APIServer",
288
  key: int,
289
  name: str,
290
  object_id: str,
@@ -295,8 +302,8 @@ class NumberEntity(ESPHomeEntity):
295
  unit_of_measurement: str = "",
296
  mode: int = 0, # 0 = auto, 1 = box, 2 = slider
297
  entity_category: int = 0, # 0 = none, 1 = config, 2 = diagnostic
298
- value_getter: Callable[[], float] | None = None,
299
- value_setter: Callable[[float], None] | None = None,
300
  ) -> None:
301
  ESPHomeEntity.__init__(self, server)
302
  self.key = key
@@ -315,7 +322,9 @@ class NumberEntity(ESPHomeEntity):
315
 
316
  @property
317
  def value(self) -> float:
318
- return float(_safe_get_value(self._value_getter, self._value, self.object_id))
 
 
319
 
320
  @value.setter
321
  def value(self, new_value: float) -> None:
@@ -362,12 +371,12 @@ class CameraEntity(ESPHomeEntity):
362
 
363
  def __init__(
364
  self,
365
- server: "APIServer",
366
  key: int,
367
  name: str,
368
  object_id: str,
369
  icon: str = "mdi:camera",
370
- image_getter: Callable[[], bytes | None] | None = None,
371
  ) -> None:
372
  ESPHomeEntity.__init__(self, server)
373
  self.key = key
@@ -376,7 +385,7 @@ class CameraEntity(ESPHomeEntity):
376
  self.icon = icon
377
  self._image_getter = image_getter
378
 
379
- def get_image(self) -> bytes | None:
380
  """Get the current camera image as JPEG bytes."""
381
  if self._image_getter:
382
  return self._image_getter()
@@ -407,3 +416,4 @@ class CameraEntity(ESPHomeEntity):
407
  data=b"",
408
  done=True,
409
  )
 
 
1
  """ESPHome entity definitions."""
2
 
 
3
  from abc import abstractmethod
4
+ from collections.abc import Iterable
5
+ from typing import Callable, List, Optional, Union, TYPE_CHECKING
6
+ import logging
7
 
8
  # pylint: disable=no-name-in-module
9
  from aioesphomeapi.api_pb2 import ( # type: ignore[attr-defined]
 
 
 
10
  ListEntitiesBinarySensorResponse,
11
+ ListEntitiesButtonResponse,
12
  ListEntitiesCameraResponse,
13
  ListEntitiesMediaPlayerResponse,
14
  ListEntitiesNumberResponse,
15
  ListEntitiesRequest,
16
+ ListEntitiesSelectResponse,
17
+ ListEntitiesSensorResponse,
18
+ ListEntitiesSwitchResponse,
19
  ListEntitiesTextSensorResponse,
20
+ BinarySensorStateResponse,
21
+ ButtonCommandRequest,
22
+ CameraImageRequest,
23
+ CameraImageResponse,
24
  MediaPlayerCommandRequest,
25
  MediaPlayerStateResponse,
26
  NumberCommandRequest,
27
  NumberStateResponse,
28
+ SelectCommandRequest,
29
+ SelectStateResponse,
30
+ SensorStateResponse,
31
  SubscribeHomeAssistantStatesRequest,
32
  SubscribeStatesRequest,
33
+ SwitchCommandRequest,
34
+ SwitchStateResponse,
35
  TextSensorStateResponse,
36
  )
37
+ from aioesphomeapi.model import MediaPlayerCommand, MediaPlayerState, MediaPlayerEntityFeature
38
  from google.protobuf import message
39
 
40
+ from .api_server import APIServer
41
+ from .audio_player import AudioPlayer
42
+ from .util import call_all
43
 
44
  if TYPE_CHECKING:
45
+ from reachy_mini import ReachyMini
46
 
47
  logger = logging.getLogger(__name__)
48
 
49
 
 
 
 
 
 
 
 
 
 
 
 
50
  class ESPHomeEntity:
51
  """Base class for ESPHome entities."""
52
 
53
+ def __init__(self, server: APIServer) -> None:
54
  self.server = server
55
 
56
  @abstractmethod
 
63
 
64
  def __init__(
65
  self,
66
+ server: APIServer,
67
  key: int,
68
  name: str,
69
  object_id: str,
 
82
 
83
  def play(
84
  self,
85
+ url: Union[str, List[str]],
86
  announcement: bool = False,
87
+ done_callback: Optional[Callable[[], None]] = None,
88
  ) -> Iterable[message.Message]:
89
  if announcement:
90
  if self.music_player.is_playing:
 
92
  self.music_player.pause()
93
  self.announce_player.play(
94
  url,
95
+ done_callback=lambda: call_all(
96
+ self.music_player.resume, done_callback
97
+ ),
98
  )
99
  else:
100
  # Announce, idle
101
  self.announce_player.play(
102
  url,
103
  done_callback=lambda: call_all(
104
+ lambda: self.server.send_messages(
105
+ [self._update_state(MediaPlayerState.IDLE)]
106
+ ),
107
  done_callback,
108
  ),
109
  )
 
112
  self.music_player.play(
113
  url,
114
  done_callback=lambda: call_all(
115
+ lambda: self.server.send_messages(
116
+ [self._update_state(MediaPlayerState.IDLE)]
117
+ ),
118
  done_callback,
119
  ),
120
  )
 
133
  elif msg.command == MediaPlayerCommand.PLAY:
134
  self.music_player.resume()
135
  yield self._update_state(MediaPlayerState.PLAYING)
 
 
 
136
  elif msg.has_volume:
137
  volume = int(msg.volume * 100)
138
  self.music_player.set_volume(volume)
 
176
 
177
  def __init__(
178
  self,
179
+ server: APIServer,
180
  key: int,
181
  name: str,
182
  object_id: str,
183
  icon: str = "",
184
  entity_category: int = 0, # 0 = none, 1 = config, 2 = diagnostic
185
+ value_getter: Optional[Callable[[], str]] = None,
186
  ) -> None:
187
  ESPHomeEntity.__init__(self, server)
188
  self.key = key
 
195
 
196
  @property
197
  def value(self) -> str:
198
+ if self._value_getter:
199
+ return self._value_getter()
200
+ return self._value
201
 
202
  @value.setter
203
  def value(self, new_value: str) -> None:
 
232
 
233
  def __init__(
234
  self,
235
+ server: APIServer,
236
  key: int,
237
  name: str,
238
  object_id: str,
239
  icon: str = "",
240
  device_class: str = "",
241
  entity_category: int = 0, # 0 = none, 1 = config, 2 = diagnostic
242
+ value_getter: Optional[Callable[[], bool]] = None,
243
  ) -> None:
244
  ESPHomeEntity.__init__(self, server)
245
  self.key = key
 
253
 
254
  @property
255
  def value(self) -> bool:
256
+ if self._value_getter:
257
+ return self._value_getter()
258
+ return self._value
259
 
260
  @value.setter
261
  def value(self, new_value: bool) -> None:
 
291
 
292
  def __init__(
293
  self,
294
+ server: APIServer,
295
  key: int,
296
  name: str,
297
  object_id: str,
 
302
  unit_of_measurement: str = "",
303
  mode: int = 0, # 0 = auto, 1 = box, 2 = slider
304
  entity_category: int = 0, # 0 = none, 1 = config, 2 = diagnostic
305
+ value_getter: Optional[Callable[[], float]] = None,
306
+ value_setter: Optional[Callable[[float], None]] = None,
307
  ) -> None:
308
  ESPHomeEntity.__init__(self, server)
309
  self.key = key
 
322
 
323
  @property
324
  def value(self) -> float:
325
+ if self._value_getter:
326
+ return self._value_getter()
327
+ return self._value
328
 
329
  @value.setter
330
  def value(self, new_value: float) -> None:
 
371
 
372
  def __init__(
373
  self,
374
+ server: APIServer,
375
  key: int,
376
  name: str,
377
  object_id: str,
378
  icon: str = "mdi:camera",
379
+ image_getter: Optional[Callable[[], Optional[bytes]]] = None,
380
  ) -> None:
381
  ESPHomeEntity.__init__(self, server)
382
  self.key = key
 
385
  self.icon = icon
386
  self._image_getter = image_getter
387
 
388
+ def get_image(self) -> Optional[bytes]:
389
  """Get the current camera image as JPEG bytes."""
390
  if self._image_getter:
391
  return self._image_getter()
 
416
  data=b"",
417
  done=True,
418
  )
419
+
{reachy_mini_home_assistant/entities → reachy_mini_ha_voice}/entity_extensions.py RENAMED
@@ -1,16 +1,16 @@
1
  """Extended ESPHome entity types for Reachy Mini control."""
2
 
 
 
3
  import logging
4
- from collections.abc import Callable, Iterable
5
- from typing import TYPE_CHECKING
6
 
7
  from aioesphomeapi.api_pb2 import ( # type: ignore[attr-defined]
8
- ButtonCommandRequest,
9
  ListEntitiesButtonResponse,
10
  ListEntitiesRequest,
11
  ListEntitiesSelectResponse,
12
  ListEntitiesSensorResponse,
13
  ListEntitiesSwitchResponse,
 
14
  SelectCommandRequest,
15
  SelectStateResponse,
16
  SensorStateResponse,
@@ -21,28 +21,14 @@ from aioesphomeapi.api_pb2 import ( # type: ignore[attr-defined]
21
  )
22
  from google.protobuf import message
23
 
 
24
  from .entity import ESPHomeEntity
25
 
26
- if TYPE_CHECKING:
27
- from ..protocol.api_server import APIServer
28
-
29
  logger = logging.getLogger(__name__)
30
 
31
 
32
- def _safe_get_value(getter: Callable[[], object] | None, current_value: object, entity_name: str) -> object:
33
- """Read an entity value without letting getter failures break the ESPHome session."""
34
- if getter is None:
35
- return current_value
36
- try:
37
- return getter()
38
- except Exception as e:
39
- logger.error("Entity getter failed for %s: %s", entity_name, e)
40
- return current_value
41
-
42
-
43
  class SensorStateClass:
44
  """ESPHome SensorStateClass enum values."""
45
-
46
  NONE = 0
47
  MEASUREMENT = 1
48
  TOTAL_INCREASING = 2
@@ -54,7 +40,7 @@ class SensorEntity(ESPHomeEntity):
54
 
55
  def __init__(
56
  self,
57
- server: "APIServer",
58
  key: int,
59
  name: str,
60
  object_id: str,
@@ -64,7 +50,7 @@ class SensorEntity(ESPHomeEntity):
64
  device_class: str = "",
65
  state_class: int = SensorStateClass.NONE,
66
  entity_category: int = 0, # 0 = none, 1 = config, 2 = diagnostic
67
- value_getter: Callable[[], float] | None = None,
68
  ) -> None:
69
  ESPHomeEntity.__init__(self, server)
70
  self.key = key
@@ -75,7 +61,7 @@ class SensorEntity(ESPHomeEntity):
75
  self.accuracy_decimals = accuracy_decimals
76
  self.device_class = device_class
77
  self.entity_category = entity_category
78
- # Convert string state_class to enum
79
  if isinstance(state_class, str):
80
  state_class_map = {
81
  "": SensorStateClass.NONE,
@@ -91,7 +77,9 @@ class SensorEntity(ESPHomeEntity):
91
 
92
  @property
93
  def value(self) -> float:
94
- return float(_safe_get_value(self._value_getter, self._value, self.object_id))
 
 
95
 
96
  @value.setter
97
  def value(self, new_value: float) -> None:
@@ -130,15 +118,15 @@ class SwitchEntity(ESPHomeEntity):
130
 
131
  def __init__(
132
  self,
133
- server: "APIServer",
134
  key: int,
135
  name: str,
136
  object_id: str,
137
  icon: str = "",
138
  device_class: str = "",
139
  entity_category: int = 0, # 0 = none, 1 = config, 2 = diagnostic
140
- value_getter: Callable[[], bool] | None = None,
141
- value_setter: Callable[[bool], None] | None = None,
142
  ) -> None:
143
  ESPHomeEntity.__init__(self, server)
144
  self.key = key
@@ -153,7 +141,9 @@ class SwitchEntity(ESPHomeEntity):
153
 
154
  @property
155
  def value(self) -> bool:
156
- return bool(_safe_get_value(self._value_getter, self._value, self.object_id))
 
 
157
 
158
  @value.setter
159
  def value(self, new_value: bool) -> None:
@@ -193,15 +183,15 @@ class SelectEntity(ESPHomeEntity):
193
 
194
  def __init__(
195
  self,
196
- server: "APIServer",
197
  key: int,
198
  name: str,
199
  object_id: str,
200
- options: list[str],
201
  icon: str = "",
202
  entity_category: int = 0, # 0 = none, 1 = config, 2 = diagnostic
203
- value_getter: Callable[[], str] | None = None,
204
- value_setter: Callable[[str], None] | None = None,
205
  ) -> None:
206
  ESPHomeEntity.__init__(self, server)
207
  self.key = key
@@ -216,7 +206,9 @@ class SelectEntity(ESPHomeEntity):
216
 
217
  @property
218
  def value(self) -> str:
219
- return str(_safe_get_value(self._value_getter, self._value, self.object_id))
 
 
220
 
221
  @value.setter
222
  def value(self, new_value: str) -> None:
@@ -260,14 +252,14 @@ class ButtonEntity(ESPHomeEntity):
260
 
261
  def __init__(
262
  self,
263
- server: "APIServer",
264
  key: int,
265
  name: str,
266
  object_id: str,
267
  icon: str = "",
268
  device_class: str = "",
269
  entity_category: int = 0, # 0 = none, 1 = config, 2 = diagnostic
270
- on_press: Callable[[], None] | None = None,
271
  ) -> None:
272
  ESPHomeEntity.__init__(self, server)
273
  self.key = key
 
1
  """Extended ESPHome entity types for Reachy Mini control."""
2
 
3
+ from collections.abc import Iterable
4
+ from typing import Callable, List, Optional
5
  import logging
 
 
6
 
7
  from aioesphomeapi.api_pb2 import ( # type: ignore[attr-defined]
 
8
  ListEntitiesButtonResponse,
9
  ListEntitiesRequest,
10
  ListEntitiesSelectResponse,
11
  ListEntitiesSensorResponse,
12
  ListEntitiesSwitchResponse,
13
+ ButtonCommandRequest,
14
  SelectCommandRequest,
15
  SelectStateResponse,
16
  SensorStateResponse,
 
21
  )
22
  from google.protobuf import message
23
 
24
+ from .api_server import APIServer
25
  from .entity import ESPHomeEntity
26
 
 
 
 
27
  logger = logging.getLogger(__name__)
28
 
29
 
 
 
 
 
 
 
 
 
 
 
 
30
  class SensorStateClass:
31
  """ESPHome SensorStateClass enum values."""
 
32
  NONE = 0
33
  MEASUREMENT = 1
34
  TOTAL_INCREASING = 2
 
40
 
41
  def __init__(
42
  self,
43
+ server: APIServer,
44
  key: int,
45
  name: str,
46
  object_id: str,
 
50
  device_class: str = "",
51
  state_class: int = SensorStateClass.NONE,
52
  entity_category: int = 0, # 0 = none, 1 = config, 2 = diagnostic
53
+ value_getter: Optional[Callable[[], float]] = None,
54
  ) -> None:
55
  ESPHomeEntity.__init__(self, server)
56
  self.key = key
 
61
  self.accuracy_decimals = accuracy_decimals
62
  self.device_class = device_class
63
  self.entity_category = entity_category
64
+ # Convert string state_class to int if needed (for backward compatibility)
65
  if isinstance(state_class, str):
66
  state_class_map = {
67
  "": SensorStateClass.NONE,
 
77
 
78
  @property
79
  def value(self) -> float:
80
+ if self._value_getter:
81
+ return self._value_getter()
82
+ return self._value
83
 
84
  @value.setter
85
  def value(self, new_value: float) -> None:
 
118
 
119
  def __init__(
120
  self,
121
+ server: APIServer,
122
  key: int,
123
  name: str,
124
  object_id: str,
125
  icon: str = "",
126
  device_class: str = "",
127
  entity_category: int = 0, # 0 = none, 1 = config, 2 = diagnostic
128
+ value_getter: Optional[Callable[[], bool]] = None,
129
+ value_setter: Optional[Callable[[bool], None]] = None,
130
  ) -> None:
131
  ESPHomeEntity.__init__(self, server)
132
  self.key = key
 
141
 
142
  @property
143
  def value(self) -> bool:
144
+ if self._value_getter:
145
+ return self._value_getter()
146
+ return self._value
147
 
148
  @value.setter
149
  def value(self, new_value: bool) -> None:
 
183
 
184
  def __init__(
185
  self,
186
+ server: APIServer,
187
  key: int,
188
  name: str,
189
  object_id: str,
190
+ options: List[str],
191
  icon: str = "",
192
  entity_category: int = 0, # 0 = none, 1 = config, 2 = diagnostic
193
+ value_getter: Optional[Callable[[], str]] = None,
194
+ value_setter: Optional[Callable[[str], None]] = None,
195
  ) -> None:
196
  ESPHomeEntity.__init__(self, server)
197
  self.key = key
 
206
 
207
  @property
208
  def value(self) -> str:
209
+ if self._value_getter:
210
+ return self._value_getter()
211
+ return self._value
212
 
213
  @value.setter
214
  def value(self, new_value: str) -> None:
 
252
 
253
  def __init__(
254
  self,
255
+ server: APIServer,
256
  key: int,
257
  name: str,
258
  object_id: str,
259
  icon: str = "",
260
  device_class: str = "",
261
  entity_category: int = 0, # 0 = none, 1 = config, 2 = diagnostic
262
+ on_press: Optional[Callable[[], None]] = None,
263
  ) -> None:
264
  ESPHomeEntity.__init__(self, server)
265
  self.key = key
reachy_mini_ha_voice/entity_registry.py ADDED
@@ -0,0 +1,976 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Entity registry for ESPHome entities.
2
+
3
+ This module handles the registration and management of all ESPHome entities
4
+ for the Reachy Mini voice assistant.
5
+ """
6
+
7
+ import logging
8
+ from typing import TYPE_CHECKING, Callable, Dict, List, Optional
9
+
10
+ from .entity import BinarySensorEntity, CameraEntity, NumberEntity, TextSensorEntity
11
+ from .entity_extensions import SensorEntity, SwitchEntity, SelectEntity, ButtonEntity
12
+
13
+ if TYPE_CHECKING:
14
+ from .reachy_controller import ReachyController
15
+ from .camera_server import MJPEGCameraServer
16
+
17
+ _LOGGER = logging.getLogger(__name__)
18
+
19
+
20
+ # Fixed entity key mapping - ensures consistent keys across restarts
21
+ # Keys are based on object_id hash to ensure uniqueness and consistency
22
+ ENTITY_KEYS: Dict[str, int] = {
23
+ # Media player (key 0 reserved)
24
+ "reachy_mini_media_player": 0,
25
+ # Phase 1: Basic status and volume
26
+ "daemon_state": 100,
27
+ "backend_ready": 101,
28
+ "speaker_volume": 103,
29
+ # Phase 2: Motor control
30
+ "motors_enabled": 200,
31
+ "motor_mode": 201,
32
+ "wake_up": 202,
33
+ "go_to_sleep": 203,
34
+ # Phase 3: Pose control
35
+ "head_x": 300,
36
+ "head_y": 301,
37
+ "head_z": 302,
38
+ "head_roll": 303,
39
+ "head_pitch": 304,
40
+ "head_yaw": 305,
41
+ "body_yaw": 306,
42
+ "antenna_left": 307,
43
+ "antenna_right": 308,
44
+ # Phase 4: Look at control
45
+ "look_at_x": 400,
46
+ "look_at_y": 401,
47
+ "look_at_z": 402,
48
+ # Phase 5: DOA (Direction of Arrival) - re-added for wakeup turn-to-sound
49
+ "doa_angle": 500,
50
+ "speech_detected": 501,
51
+ # Phase 6: Diagnostic information
52
+ "control_loop_frequency": 600,
53
+ "sdk_version": 601,
54
+ "robot_name": 602,
55
+ "wireless_version": 603,
56
+ "simulation_mode": 604,
57
+ "wlan_ip": 605,
58
+ "error_message": 606, # Moved to diagnostic
59
+ # Phase 7: IMU sensors
60
+ "imu_accel_x": 700,
61
+ "imu_accel_y": 701,
62
+ "imu_accel_z": 702,
63
+ "imu_gyro_x": 703,
64
+ "imu_gyro_y": 704,
65
+ "imu_gyro_z": 705,
66
+ "imu_temperature": 706,
67
+ # Phase 8: Emotion selector
68
+ "emotion": 800,
69
+ # Phase 9: Audio controls
70
+ "microphone_volume": 900,
71
+ # Phase 10: Camera
72
+ "camera_url": 1000, # Keep for backward compatibility
73
+ "camera": 1001, # New camera entity
74
+ # Phase 11: LED control (disabled - not visible)
75
+ # "led_brightness": 1100,
76
+ # "led_effect": 1101,
77
+ # "led_color_r": 1102,
78
+ # "led_color_g": 1103,
79
+ # "led_color_b": 1104,
80
+ # Phase 12: Audio processing
81
+ "agc_enabled": 1200,
82
+ "agc_max_gain": 1201,
83
+ "noise_suppression": 1202,
84
+ "echo_cancellation_converged": 1203,
85
+ # Phase 13: Sendspin - auto-enabled via mDNS, no user entities needed
86
+ # Phase 21: Continuous conversation
87
+ "continuous_conversation": 1500,
88
+ # Phase 22: Gesture detection
89
+ "gesture_detected": 1600,
90
+ "gesture_confidence": 1601,
91
+ # Phase 23: Face detection status
92
+ "face_detected": 1700,
93
+ }
94
+
95
+
96
+ def get_entity_key(object_id: str) -> int:
97
+ """Get a consistent entity key for the given object_id."""
98
+ if object_id in ENTITY_KEYS:
99
+ return ENTITY_KEYS[object_id]
100
+ # Fallback: generate key from hash (should not happen if all entities are registered)
101
+ _LOGGER.warning(f"Entity key not found for {object_id}, generating from hash")
102
+ return abs(hash(object_id)) % 10000 + 2000
103
+
104
+
105
+ class EntityRegistry:
106
+ """Registry for managing ESPHome entities."""
107
+
108
+ def __init__(
109
+ self,
110
+ server,
111
+ reachy_controller: "ReachyController",
112
+ camera_server: Optional["MJPEGCameraServer"] = None,
113
+ play_emotion_callback: Optional[Callable[[str], None]] = None,
114
+ ):
115
+ """Initialize the entity registry.
116
+
117
+ Args:
118
+ server: The VoiceSatelliteProtocol server instance
119
+ reachy_controller: The ReachyController instance
120
+ camera_server: Optional camera server for camera entity
121
+ play_emotion_callback: Optional callback for playing emotions
122
+ """
123
+ self.server = server
124
+ self.reachy_controller = reachy_controller
125
+ self.camera_server = camera_server
126
+ self._play_emotion_callback = play_emotion_callback
127
+
128
+ # Gesture detection state
129
+ self._current_gesture = "none"
130
+ self._gesture_confidence = 0.0
131
+
132
+ # Emotion state
133
+ self._current_emotion = "None"
134
+ # Map emotion names to available robot emotions
135
+ # Full list of available emotions from robot
136
+ self._emotion_map = {
137
+ "None": None,
138
+ # Basic emotions
139
+ "Happy": "cheerful1",
140
+ "Sad": "sad1",
141
+ "Angry": "rage1",
142
+ "Fear": "fear1",
143
+ "Surprise": "surprised1",
144
+ "Disgust": "disgusted1",
145
+ # Extended emotions
146
+ "Laughing": "laughing1",
147
+ "Loving": "loving1",
148
+ "Proud": "proud1",
149
+ "Grateful": "grateful1",
150
+ "Enthusiastic": "enthusiastic1",
151
+ "Curious": "curious1",
152
+ "Amazed": "amazed1",
153
+ "Shy": "shy1",
154
+ "Confused": "confused1",
155
+ "Thoughtful": "thoughtful1",
156
+ "Anxious": "anxiety1",
157
+ "Scared": "scared1",
158
+ "Frustrated": "frustrated1",
159
+ "Irritated": "irritated1",
160
+ "Furious": "furious1",
161
+ "Contempt": "contempt1",
162
+ "Bored": "boredom1",
163
+ "Tired": "tired1",
164
+ "Exhausted": "exhausted1",
165
+ "Lonely": "lonely1",
166
+ "Downcast": "downcast1",
167
+ "Resigned": "resigned1",
168
+ "Uncertain": "uncertain1",
169
+ "Uncomfortable": "uncomfortable1",
170
+ "Lost": "lost1",
171
+ "Indifferent": "indifferent1",
172
+ # Positive actions
173
+ "Yes": "yes1",
174
+ "No": "no1",
175
+ "Welcoming": "welcoming1",
176
+ "Helpful": "helpful1",
177
+ "Attentive": "attentive1",
178
+ "Understanding": "understanding1",
179
+ "Calming": "calming1",
180
+ "Relief": "relief1",
181
+ "Success": "success1",
182
+ "Serenity": "serenity1",
183
+ # Negative actions
184
+ "Oops": "oops1",
185
+ "Displeased": "displeased1",
186
+ "Impatient": "impatient1",
187
+ "Reprimand": "reprimand1",
188
+ "GoAway": "go_away1",
189
+ # Special
190
+ "Come": "come1",
191
+ "Inquiring": "inquiring1",
192
+ "Sleep": "sleep1",
193
+ "Dance": "dance1",
194
+ "Electric": "electric1",
195
+ "Dying": "dying1",
196
+ }
197
+
198
+ def setup_all_entities(self, entities: List) -> None:
199
+ """Setup all entity phases.
200
+
201
+ Args:
202
+ entities: The list to append entities to
203
+ """
204
+ self._setup_phase1_entities(entities)
205
+ self._setup_phase2_entities(entities)
206
+ self._setup_phase3_entities(entities)
207
+ self._setup_phase4_entities(entities)
208
+ self._setup_phase5_entities(entities) # DOA for wakeup turn-to-sound
209
+ self._setup_phase6_entities(entities)
210
+ self._setup_phase7_entities(entities)
211
+ self._setup_phase8_entities(entities)
212
+ self._setup_phase9_entities(entities)
213
+ self._setup_phase10_entities(entities)
214
+ # Phase 11 (LED control) disabled - LEDs are inside the robot and not visible
215
+ self._setup_phase12_entities(entities)
216
+ # Phase 13 (Sendspin) - auto-enabled via mDNS discovery, no user entities
217
+ # Phase 14 (head_joints, passive_joints) removed - not needed
218
+ # Phase 20 (Tap detection) disabled - too many false triggers
219
+ self._setup_phase21_entities(entities)
220
+ self._setup_phase22_entities(entities)
221
+ self._setup_phase23_entities(entities)
222
+
223
+ _LOGGER.info("All entities registered: %d total", len(entities))
224
+
225
+ def _setup_phase1_entities(self, entities: List) -> None:
226
+ """Setup Phase 1 entities: Basic status and volume control."""
227
+ rc = self.reachy_controller
228
+
229
+ entities.append(TextSensorEntity(
230
+ server=self.server,
231
+ key=get_entity_key("daemon_state"),
232
+ name="Daemon State",
233
+ object_id="daemon_state",
234
+ icon="mdi:robot",
235
+ value_getter=rc.get_daemon_state,
236
+ ))
237
+
238
+ entities.append(BinarySensorEntity(
239
+ server=self.server,
240
+ key=get_entity_key("backend_ready"),
241
+ name="Backend Ready",
242
+ object_id="backend_ready",
243
+ icon="mdi:check-circle",
244
+ device_class="connectivity",
245
+ value_getter=rc.get_backend_ready,
246
+ ))
247
+
248
+ entities.append(NumberEntity(
249
+ server=self.server,
250
+ key=get_entity_key("speaker_volume"),
251
+ name="Speaker Volume",
252
+ object_id="speaker_volume",
253
+ min_value=0.0,
254
+ max_value=100.0,
255
+ step=1.0,
256
+ icon="mdi:volume-high",
257
+ unit_of_measurement="%",
258
+ mode=2, # Slider mode
259
+ entity_category=1, # config
260
+ value_getter=rc.get_speaker_volume,
261
+ value_setter=rc.set_speaker_volume,
262
+ ))
263
+
264
+ _LOGGER.debug("Phase 1 entities registered: daemon_state, backend_ready, speaker_volume")
265
+
266
+ def _setup_phase2_entities(self, entities: List) -> None:
267
+ """Setup Phase 2 entities: Motor control."""
268
+ rc = self.reachy_controller
269
+
270
+ entities.append(SwitchEntity(
271
+ server=self.server,
272
+ key=get_entity_key("motors_enabled"),
273
+ name="Motors Enabled",
274
+ object_id="motors_enabled",
275
+ icon="mdi:engine",
276
+ device_class="switch",
277
+ value_getter=rc.get_motors_enabled,
278
+ value_setter=rc.set_motors_enabled,
279
+ ))
280
+
281
+ entities.append(ButtonEntity(
282
+ server=self.server,
283
+ key=get_entity_key("wake_up"),
284
+ name="Wake Up",
285
+ object_id="wake_up",
286
+ icon="mdi:alarm",
287
+ device_class="restart",
288
+ on_press=rc.wake_up,
289
+ ))
290
+
291
+ entities.append(ButtonEntity(
292
+ server=self.server,
293
+ key=get_entity_key("go_to_sleep"),
294
+ name="Go to Sleep",
295
+ object_id="go_to_sleep",
296
+ icon="mdi:sleep",
297
+ device_class="restart",
298
+ on_press=rc.go_to_sleep,
299
+ ))
300
+
301
+ _LOGGER.debug("Phase 2 entities registered: motors_enabled, wake_up, go_to_sleep")
302
+
303
+ def _setup_phase3_entities(self, entities: List) -> None:
304
+ """Setup Phase 3 entities: Pose control."""
305
+ rc = self.reachy_controller
306
+
307
+ # Head position controls (X, Y, Z in mm)
308
+ entities.append(NumberEntity(
309
+ server=self.server,
310
+ key=get_entity_key("head_x"),
311
+ name="Head X Position",
312
+ object_id="head_x",
313
+ min_value=-50.0,
314
+ max_value=50.0,
315
+ step=1.0,
316
+ icon="mdi:axis-x-arrow",
317
+ unit_of_measurement="mm",
318
+ mode=2,
319
+ value_getter=rc.get_head_x,
320
+ value_setter=rc.set_head_x,
321
+ ))
322
+
323
+ entities.append(NumberEntity(
324
+ server=self.server,
325
+ key=get_entity_key("head_y"),
326
+ name="Head Y Position",
327
+ object_id="head_y",
328
+ min_value=-50.0,
329
+ max_value=50.0,
330
+ step=1.0,
331
+ icon="mdi:axis-y-arrow",
332
+ unit_of_measurement="mm",
333
+ mode=2,
334
+ value_getter=rc.get_head_y,
335
+ value_setter=rc.set_head_y,
336
+ ))
337
+
338
+ entities.append(NumberEntity(
339
+ server=self.server,
340
+ key=get_entity_key("head_z"),
341
+ name="Head Z Position",
342
+ object_id="head_z",
343
+ min_value=-50.0,
344
+ max_value=50.0,
345
+ step=1.0,
346
+ icon="mdi:axis-z-arrow",
347
+ unit_of_measurement="mm",
348
+ mode=2,
349
+ value_getter=rc.get_head_z,
350
+ value_setter=rc.set_head_z,
351
+ ))
352
+
353
+ # Head orientation controls (Roll, Pitch, Yaw in degrees)
354
+ entities.append(NumberEntity(
355
+ server=self.server,
356
+ key=get_entity_key("head_roll"),
357
+ name="Head Roll",
358
+ object_id="head_roll",
359
+ min_value=-40.0,
360
+ max_value=40.0,
361
+ step=1.0,
362
+ icon="mdi:rotate-3d-variant",
363
+ unit_of_measurement="°",
364
+ mode=2,
365
+ value_getter=rc.get_head_roll,
366
+ value_setter=rc.set_head_roll,
367
+ ))
368
+
369
+ entities.append(NumberEntity(
370
+ server=self.server,
371
+ key=get_entity_key("head_pitch"),
372
+ name="Head Pitch",
373
+ object_id="head_pitch",
374
+ min_value=-40.0,
375
+ max_value=40.0,
376
+ step=1.0,
377
+ icon="mdi:rotate-3d-variant",
378
+ unit_of_measurement="°",
379
+ mode=2,
380
+ value_getter=rc.get_head_pitch,
381
+ value_setter=rc.set_head_pitch,
382
+ ))
383
+
384
+ entities.append(NumberEntity(
385
+ server=self.server,
386
+ key=get_entity_key("head_yaw"),
387
+ name="Head Yaw",
388
+ object_id="head_yaw",
389
+ min_value=-180.0,
390
+ max_value=180.0,
391
+ step=1.0,
392
+ icon="mdi:rotate-3d-variant",
393
+ unit_of_measurement="°",
394
+ mode=2,
395
+ value_getter=rc.get_head_yaw,
396
+ value_setter=rc.set_head_yaw,
397
+ ))
398
+
399
+ # Body yaw control
400
+ entities.append(NumberEntity(
401
+ server=self.server,
402
+ key=get_entity_key("body_yaw"),
403
+ name="Body Yaw",
404
+ object_id="body_yaw",
405
+ min_value=-160.0,
406
+ max_value=160.0,
407
+ step=1.0,
408
+ icon="mdi:rotate-3d-variant",
409
+ unit_of_measurement="°",
410
+ mode=2,
411
+ value_getter=rc.get_body_yaw,
412
+ value_setter=rc.set_body_yaw,
413
+ ))
414
+
415
+ # Antenna controls
416
+ entities.append(NumberEntity(
417
+ server=self.server,
418
+ key=get_entity_key("antenna_left"),
419
+ name="Antenna(L)",
420
+ object_id="antenna_left",
421
+ min_value=-90.0,
422
+ max_value=90.0,
423
+ step=1.0,
424
+ icon="mdi:antenna",
425
+ unit_of_measurement="°",
426
+ mode=2,
427
+ value_getter=rc.get_antenna_left,
428
+ value_setter=rc.set_antenna_left,
429
+ ))
430
+
431
+ entities.append(NumberEntity(
432
+ server=self.server,
433
+ key=get_entity_key("antenna_right"),
434
+ name="Antenna(R)",
435
+ object_id="antenna_right",
436
+ min_value=-90.0,
437
+ max_value=90.0,
438
+ step=1.0,
439
+ icon="mdi:antenna",
440
+ unit_of_measurement="°",
441
+ mode=2,
442
+ value_getter=rc.get_antenna_right,
443
+ value_setter=rc.set_antenna_right,
444
+ ))
445
+
446
+ _LOGGER.debug("Phase 3 entities registered: head position/orientation, body_yaw, antennas")
447
+
448
+ def _setup_phase4_entities(self, entities: List) -> None:
449
+ """Setup Phase 4 entities: Look at control."""
450
+ rc = self.reachy_controller
451
+
452
+ entities.append(NumberEntity(
453
+ server=self.server,
454
+ key=get_entity_key("look_at_x"),
455
+ name="Look At X",
456
+ object_id="look_at_x",
457
+ min_value=-2.0,
458
+ max_value=2.0,
459
+ step=0.1,
460
+ icon="mdi:crosshairs-gps",
461
+ unit_of_measurement="m",
462
+ mode=1, # Box mode for precise input
463
+ value_getter=rc.get_look_at_x,
464
+ value_setter=rc.set_look_at_x,
465
+ ))
466
+
467
+ entities.append(NumberEntity(
468
+ server=self.server,
469
+ key=get_entity_key("look_at_y"),
470
+ name="Look At Y",
471
+ object_id="look_at_y",
472
+ min_value=-2.0,
473
+ max_value=2.0,
474
+ step=0.1,
475
+ icon="mdi:crosshairs-gps",
476
+ unit_of_measurement="m",
477
+ mode=1,
478
+ value_getter=rc.get_look_at_y,
479
+ value_setter=rc.set_look_at_y,
480
+ ))
481
+
482
+ entities.append(NumberEntity(
483
+ server=self.server,
484
+ key=get_entity_key("look_at_z"),
485
+ name="Look At Z",
486
+ object_id="look_at_z",
487
+ min_value=-2.0,
488
+ max_value=2.0,
489
+ step=0.1,
490
+ icon="mdi:crosshairs-gps",
491
+ unit_of_measurement="m",
492
+ mode=1,
493
+ value_getter=rc.get_look_at_z,
494
+ value_setter=rc.set_look_at_z,
495
+ ))
496
+
497
+ _LOGGER.debug("Phase 4 entities registered: look_at_x/y/z")
498
+
499
+ def _setup_phase5_entities(self, entities: List) -> None:
500
+ """Setup Phase 5 entities: DOA (Direction of Arrival) for wakeup turn-to-sound."""
501
+ rc = self.reachy_controller
502
+
503
+ entities.append(SensorEntity(
504
+ server=self.server,
505
+ key=get_entity_key("doa_angle"),
506
+ name="DOA Angle",
507
+ object_id="doa_angle",
508
+ icon="mdi:surround-sound",
509
+ unit_of_measurement="°",
510
+ accuracy_decimals=1,
511
+ state_class="measurement",
512
+ value_getter=rc.get_doa_angle_degrees,
513
+ ))
514
+
515
+ entities.append(BinarySensorEntity(
516
+ server=self.server,
517
+ key=get_entity_key("speech_detected"),
518
+ name="Speech Detected",
519
+ object_id="speech_detected",
520
+ icon="mdi:account-voice",
521
+ device_class="sound",
522
+ value_getter=rc.get_speech_detected,
523
+ ))
524
+
525
+ _LOGGER.debug("Phase 5 entities registered: doa_angle, speech_detected")
526
+
527
+ def _setup_phase6_entities(self, entities: List) -> None:
528
+ """Setup Phase 6 entities: Diagnostic information."""
529
+ rc = self.reachy_controller
530
+
531
+ entities.append(SensorEntity(
532
+ server=self.server,
533
+ key=get_entity_key("control_loop_frequency"),
534
+ name="Control Loop Frequency",
535
+ object_id="control_loop_frequency",
536
+ icon="mdi:speedometer",
537
+ unit_of_measurement="Hz",
538
+ accuracy_decimals=1,
539
+ state_class="measurement",
540
+ entity_category=2, # diagnostic
541
+ value_getter=rc.get_control_loop_frequency,
542
+ ))
543
+
544
+ entities.append(TextSensorEntity(
545
+ server=self.server,
546
+ key=get_entity_key("sdk_version"),
547
+ name="SDK Version",
548
+ object_id="sdk_version",
549
+ icon="mdi:information",
550
+ entity_category=2, # diagnostic
551
+ value_getter=rc.get_sdk_version,
552
+ ))
553
+
554
+ entities.append(TextSensorEntity(
555
+ server=self.server,
556
+ key=get_entity_key("robot_name"),
557
+ name="Robot Name",
558
+ object_id="robot_name",
559
+ icon="mdi:robot",
560
+ entity_category=2, # diagnostic
561
+ value_getter=rc.get_robot_name,
562
+ ))
563
+
564
+ entities.append(BinarySensorEntity(
565
+ server=self.server,
566
+ key=get_entity_key("wireless_version"),
567
+ name="Wireless Version",
568
+ object_id="wireless_version",
569
+ icon="mdi:wifi",
570
+ device_class="connectivity",
571
+ entity_category=2, # diagnostic
572
+ value_getter=rc.get_wireless_version,
573
+ ))
574
+
575
+ entities.append(BinarySensorEntity(
576
+ server=self.server,
577
+ key=get_entity_key("simulation_mode"),
578
+ name="Simulation Mode",
579
+ object_id="simulation_mode",
580
+ icon="mdi:virtual-reality",
581
+ entity_category=2, # diagnostic
582
+ value_getter=rc.get_simulation_mode,
583
+ ))
584
+
585
+ entities.append(TextSensorEntity(
586
+ server=self.server,
587
+ key=get_entity_key("wlan_ip"),
588
+ name="WLAN IP",
589
+ object_id="wlan_ip",
590
+ icon="mdi:ip-network",
591
+ entity_category=2, # diagnostic
592
+ value_getter=rc.get_wlan_ip,
593
+ ))
594
+
595
+ entities.append(TextSensorEntity(
596
+ server=self.server,
597
+ key=get_entity_key("error_message"),
598
+ name="Error Message",
599
+ object_id="error_message",
600
+ icon="mdi:alert-circle",
601
+ entity_category=2, # diagnostic
602
+ value_getter=rc.get_error_message,
603
+ ))
604
+
605
+ _LOGGER.debug(
606
+ "Phase 6 entities registered: control_loop_frequency, sdk_version, "
607
+ "robot_name, wireless_version, simulation_mode, wlan_ip, error_message"
608
+ )
609
+
610
+ def _setup_phase7_entities(self, entities: List) -> None:
611
+ """Setup Phase 7 entities: IMU sensors (wireless only)."""
612
+ rc = self.reachy_controller
613
+
614
+ # IMU Accelerometer
615
+ entities.append(SensorEntity(
616
+ server=self.server,
617
+ key=get_entity_key("imu_accel_x"),
618
+ name="IMU Accel X",
619
+ object_id="imu_accel_x",
620
+ icon="mdi:axis-x-arrow",
621
+ unit_of_measurement="m/s²",
622
+ accuracy_decimals=3,
623
+ state_class="measurement",
624
+ value_getter=rc.get_imu_accel_x,
625
+ ))
626
+
627
+ entities.append(SensorEntity(
628
+ server=self.server,
629
+ key=get_entity_key("imu_accel_y"),
630
+ name="IMU Accel Y",
631
+ object_id="imu_accel_y",
632
+ icon="mdi:axis-y-arrow",
633
+ unit_of_measurement="m/s²",
634
+ accuracy_decimals=3,
635
+ state_class="measurement",
636
+ value_getter=rc.get_imu_accel_y,
637
+ ))
638
+
639
+ entities.append(SensorEntity(
640
+ server=self.server,
641
+ key=get_entity_key("imu_accel_z"),
642
+ name="IMU Accel Z",
643
+ object_id="imu_accel_z",
644
+ icon="mdi:axis-z-arrow",
645
+ unit_of_measurement="m/s²",
646
+ accuracy_decimals=3,
647
+ state_class="measurement",
648
+ value_getter=rc.get_imu_accel_z,
649
+ ))
650
+
651
+ # IMU Gyroscope
652
+ entities.append(SensorEntity(
653
+ server=self.server,
654
+ key=get_entity_key("imu_gyro_x"),
655
+ name="IMU Gyro X",
656
+ object_id="imu_gyro_x",
657
+ icon="mdi:rotate-3d-variant",
658
+ unit_of_measurement="rad/s",
659
+ accuracy_decimals=3,
660
+ state_class="measurement",
661
+ value_getter=rc.get_imu_gyro_x,
662
+ ))
663
+
664
+ entities.append(SensorEntity(
665
+ server=self.server,
666
+ key=get_entity_key("imu_gyro_y"),
667
+ name="IMU Gyro Y",
668
+ object_id="imu_gyro_y",
669
+ icon="mdi:rotate-3d-variant",
670
+ unit_of_measurement="rad/s",
671
+ accuracy_decimals=3,
672
+ state_class="measurement",
673
+ value_getter=rc.get_imu_gyro_y,
674
+ ))
675
+
676
+ entities.append(SensorEntity(
677
+ server=self.server,
678
+ key=get_entity_key("imu_gyro_z"),
679
+ name="IMU Gyro Z",
680
+ object_id="imu_gyro_z",
681
+ icon="mdi:rotate-3d-variant",
682
+ unit_of_measurement="rad/s",
683
+ accuracy_decimals=3,
684
+ state_class="measurement",
685
+ value_getter=rc.get_imu_gyro_z,
686
+ ))
687
+
688
+ # IMU Temperature
689
+ entities.append(SensorEntity(
690
+ server=self.server,
691
+ key=get_entity_key("imu_temperature"),
692
+ name="IMU Temperature",
693
+ object_id="imu_temperature",
694
+ icon="mdi:thermometer",
695
+ unit_of_measurement="°C",
696
+ accuracy_decimals=1,
697
+ device_class="temperature",
698
+ state_class="measurement",
699
+ value_getter=rc.get_imu_temperature,
700
+ ))
701
+
702
+ _LOGGER.debug("Phase 7 entities registered: IMU accelerometer, gyroscope, temperature")
703
+
704
+ def _setup_phase8_entities(self, entities: List) -> None:
705
+ """Setup Phase 8 entities: Emotion selector."""
706
+
707
+ def get_emotion() -> str:
708
+ return self._current_emotion
709
+
710
+ def set_emotion(emotion: str) -> None:
711
+ self._current_emotion = emotion
712
+ emotion_name = self._emotion_map.get(emotion)
713
+ if emotion_name and self._play_emotion_callback:
714
+ self._play_emotion_callback(emotion_name)
715
+ # Reset to None after playing
716
+ self._current_emotion = "None"
717
+
718
+ entities.append(SelectEntity(
719
+ server=self.server,
720
+ key=get_entity_key("emotion"),
721
+ name="Emotion",
722
+ object_id="emotion",
723
+ options=list(self._emotion_map.keys()),
724
+ icon="mdi:emoticon",
725
+ value_getter=get_emotion,
726
+ value_setter=set_emotion,
727
+ ))
728
+
729
+ _LOGGER.debug("Phase 8 entities registered: emotion selector")
730
+
731
+ def _setup_phase9_entities(self, entities: List) -> None:
732
+ """Setup Phase 9 entities: Audio controls."""
733
+ rc = self.reachy_controller
734
+
735
+ entities.append(NumberEntity(
736
+ server=self.server,
737
+ key=get_entity_key("microphone_volume"),
738
+ name="Microphone Volume",
739
+ object_id="microphone_volume",
740
+ min_value=0.0,
741
+ max_value=100.0,
742
+ step=1.0,
743
+ icon="mdi:microphone",
744
+ unit_of_measurement="%",
745
+ mode=2, # Slider mode
746
+ entity_category=1, # config
747
+ value_getter=rc.get_microphone_volume,
748
+ value_setter=rc.set_microphone_volume,
749
+ ))
750
+
751
+ _LOGGER.debug("Phase 9 entities registered: microphone_volume")
752
+
753
+ def _setup_phase10_entities(self, entities: List) -> None:
754
+ """Setup Phase 10 entities: Camera for Home Assistant integration."""
755
+
756
+ def get_camera_image() -> Optional[bytes]:
757
+ """Get camera snapshot as JPEG bytes."""
758
+ if self.camera_server:
759
+ return self.camera_server.get_snapshot()
760
+ return None
761
+
762
+ entities.append(CameraEntity(
763
+ server=self.server,
764
+ key=get_entity_key("camera"),
765
+ name="Camera",
766
+ object_id="camera",
767
+ icon="mdi:camera",
768
+ image_getter=get_camera_image,
769
+ ))
770
+
771
+ _LOGGER.debug("Phase 10 entities registered: camera (ESPHome Camera entity)")
772
+
773
+ def _setup_phase12_entities(self, entities: List) -> None:
774
+ """Setup Phase 12 entities: Audio processing parameters (via local SDK)."""
775
+ rc = self.reachy_controller
776
+
777
+ def set_agc_enabled_with_save(enabled: bool) -> None:
778
+ """Set AGC enabled and save to preferences."""
779
+ rc.set_agc_enabled(enabled)
780
+ if hasattr(self.server, 'state') and self.server.state:
781
+ self.server.state.preferences.agc_enabled = enabled
782
+ self.server.state.save_preferences()
783
+ _LOGGER.debug("AGC enabled saved to preferences: %s", enabled)
784
+
785
+ def set_agc_max_gain_with_save(gain: float) -> None:
786
+ """Set AGC max gain and save to preferences."""
787
+ rc.set_agc_max_gain(gain)
788
+ if hasattr(self.server, 'state') and self.server.state:
789
+ self.server.state.preferences.agc_max_gain = gain
790
+ self.server.state.save_preferences()
791
+ _LOGGER.debug("AGC max gain saved to preferences: %.1f dB", gain)
792
+
793
+ def set_noise_suppression_with_save(level: float) -> None:
794
+ """Set noise suppression and save to preferences."""
795
+ rc.set_noise_suppression(level)
796
+ if hasattr(self.server, 'state') and self.server.state:
797
+ self.server.state.preferences.noise_suppression = level
798
+ self.server.state.save_preferences()
799
+ _LOGGER.debug("Noise suppression saved to preferences: %.1f%%", level)
800
+
801
+ entities.append(SwitchEntity(
802
+ server=self.server,
803
+ key=get_entity_key("agc_enabled"),
804
+ name="AGC Enabled",
805
+ object_id="agc_enabled",
806
+ icon="mdi:tune-vertical",
807
+ device_class="switch",
808
+ entity_category=1, # config
809
+ value_getter=rc.get_agc_enabled,
810
+ value_setter=set_agc_enabled_with_save,
811
+ ))
812
+
813
+ entities.append(NumberEntity(
814
+ server=self.server,
815
+ key=get_entity_key("agc_max_gain"),
816
+ name="AGC Max Gain",
817
+ object_id="agc_max_gain",
818
+ min_value=0.0,
819
+ max_value=40.0, # XVF3800 supports up to 40dB
820
+ step=1.0,
821
+ icon="mdi:volume-plus",
822
+ unit_of_measurement="dB",
823
+ mode=2,
824
+ entity_category=1, # config
825
+ value_getter=rc.get_agc_max_gain,
826
+ value_setter=set_agc_max_gain_with_save,
827
+ ))
828
+
829
+ entities.append(NumberEntity(
830
+ server=self.server,
831
+ key=get_entity_key("noise_suppression"),
832
+ name="Noise Suppression",
833
+ object_id="noise_suppression",
834
+ min_value=0.0,
835
+ max_value=100.0,
836
+ step=1.0,
837
+ icon="mdi:volume-off",
838
+ unit_of_measurement="%",
839
+ mode=2,
840
+ entity_category=1, # config
841
+ value_getter=rc.get_noise_suppression,
842
+ value_setter=set_noise_suppression_with_save,
843
+ ))
844
+
845
+ entities.append(BinarySensorEntity(
846
+ server=self.server,
847
+ key=get_entity_key("echo_cancellation_converged"),
848
+ name="Echo Cancellation Converged",
849
+ object_id="echo_cancellation_converged",
850
+ icon="mdi:waveform",
851
+ device_class="running",
852
+ entity_category=2, # diagnostic
853
+ value_getter=rc.get_echo_cancellation_converged,
854
+ ))
855
+
856
+ _LOGGER.debug(
857
+ "Phase 12 entities registered: agc_enabled, agc_max_gain, "
858
+ "noise_suppression, echo_cancellation_converged"
859
+ )
860
+
861
+ def _setup_phase21_entities(self, entities: List) -> None:
862
+ """Setup Phase 21 entities: Continuous conversation mode."""
863
+
864
+ def get_continuous_conversation() -> bool:
865
+ """Get current continuous conversation mode state."""
866
+ if hasattr(self.server, 'state') and self.server.state:
867
+ prefs = self.server.state.preferences
868
+ return getattr(prefs, 'continuous_conversation', False)
869
+ return False
870
+
871
+ def set_continuous_conversation(enabled: bool) -> None:
872
+ """Set continuous conversation mode and save to preferences."""
873
+ if hasattr(self.server, 'state') and self.server.state:
874
+ self.server.state.preferences.continuous_conversation = enabled
875
+ self.server.state.save_preferences()
876
+ _LOGGER.info("Continuous conversation mode %s", "enabled" if enabled else "disabled")
877
+
878
+ entities.append(SwitchEntity(
879
+ server=self.server,
880
+ key=get_entity_key("continuous_conversation"),
881
+ name="Continuous Conversation",
882
+ object_id="continuous_conversation",
883
+ icon="mdi:message-reply-text",
884
+ device_class="switch",
885
+ entity_category=1, # config
886
+ value_getter=get_continuous_conversation,
887
+ value_setter=set_continuous_conversation,
888
+ ))
889
+
890
+ _LOGGER.debug("Phase 21 entities registered: continuous_conversation")
891
+
892
+ def _setup_phase22_entities(self, entities: List) -> None:
893
+ """Setup Phase 22 entities: Gesture detection."""
894
+
895
+ def get_gesture() -> str:
896
+ """Get current detected gesture."""
897
+ if self.camera_server:
898
+ return self.camera_server.get_current_gesture()
899
+ return "none"
900
+
901
+ def get_gesture_confidence() -> float:
902
+ """Get gesture detection confidence."""
903
+ if self.camera_server:
904
+ return self.camera_server.get_gesture_confidence()
905
+ return 0.0
906
+
907
+ gesture_entity = TextSensorEntity(
908
+ server=self.server,
909
+ key=get_entity_key("gesture_detected"),
910
+ name="Gesture Detected",
911
+ object_id="gesture_detected",
912
+ icon="mdi:hand-wave",
913
+ value_getter=get_gesture,
914
+ )
915
+ entities.append(gesture_entity)
916
+ self._gesture_entity = gesture_entity
917
+
918
+ confidence_entity = SensorEntity(
919
+ server=self.server,
920
+ key=get_entity_key("gesture_confidence"),
921
+ name="Gesture Confidence",
922
+ object_id="gesture_confidence",
923
+ icon="mdi:percent",
924
+ unit_of_measurement="%",
925
+ accuracy_decimals=1,
926
+ state_class="measurement",
927
+ value_getter=get_gesture_confidence,
928
+ )
929
+ entities.append(confidence_entity)
930
+ self._gesture_confidence_entity = confidence_entity
931
+
932
+ _LOGGER.debug("Phase 22 entities registered: gesture_detected, gesture_confidence")
933
+
934
+ def _setup_phase23_entities(self, entities: List) -> None:
935
+ """Setup Phase 23 entities: Face detection status."""
936
+
937
+ def get_face_detected() -> bool:
938
+ """Get current face detection state from camera server."""
939
+ if self.camera_server:
940
+ return self.camera_server.is_face_detected()
941
+ return False
942
+
943
+ face_detected_entity = BinarySensorEntity(
944
+ server=self.server,
945
+ key=get_entity_key("face_detected"),
946
+ name="Face Detected",
947
+ object_id="face_detected",
948
+ icon="mdi:face-recognition",
949
+ device_class="occupancy",
950
+ value_getter=get_face_detected,
951
+ )
952
+ entities.append(face_detected_entity)
953
+ self._face_detected_entity = face_detected_entity
954
+
955
+ _LOGGER.debug("Phase 23 entities registered: face_detected")
956
+
957
+ def update_face_detected_state(self) -> None:
958
+ """Push face_detected state update to Home Assistant."""
959
+ if hasattr(self, '_face_detected_entity') and self._face_detected_entity:
960
+ self._face_detected_entity.update_state()
961
+
962
+ def update_gesture_state(self) -> None:
963
+ """Push gesture state update to Home Assistant."""
964
+ if hasattr(self, '_gesture_entity') and self._gesture_entity:
965
+ self._gesture_entity.update_state()
966
+ if hasattr(self, '_gesture_confidence_entity') and self._gesture_confidence_entity:
967
+ self._gesture_confidence_entity.update_state()
968
+
969
+ def find_entity_references(self, entities: List) -> None:
970
+ """Find and store references to special entities from existing list.
971
+
972
+ Args:
973
+ entities: The list of existing entities to search
974
+ """
975
+ # DOA entities are read-only sensors, no special references needed
976
+ pass
reachy_mini_ha_voice/gesture_detector.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Gesture detection using HaGRID ONNX models."""
2
+
3
+ from __future__ import annotations
4
+ import logging
5
+ from enum import Enum
6
+ from pathlib import Path
7
+ from typing import Optional, Tuple
8
+
9
+ import cv2
10
+ import numpy as np
11
+ from numpy.typing import NDArray
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class Gesture(Enum):
17
+ NONE = "no_gesture"
18
+ CALL = "call"
19
+ DISLIKE = "dislike"
20
+ FIST = "fist"
21
+ FOUR = "four"
22
+ LIKE = "like"
23
+ MUTE = "mute"
24
+ OK = "ok"
25
+ ONE = "one"
26
+ PALM = "palm"
27
+ PEACE = "peace"
28
+ PEACE_INVERTED = "peace_inverted"
29
+ ROCK = "rock"
30
+ STOP = "stop"
31
+ STOP_INVERTED = "stop_inverted"
32
+ THREE = "three"
33
+ THREE2 = "three2"
34
+ TWO_UP = "two_up"
35
+ TWO_UP_INVERTED = "two_up_inverted"
36
+
37
+
38
+ _GESTURE_CLASSES = [
39
+ 'hand_down', 'hand_right', 'hand_left', 'thumb_index', 'thumb_left',
40
+ 'thumb_right', 'thumb_down', 'half_up', 'half_left', 'half_right',
41
+ 'half_down', 'part_hand_heart', 'part_hand_heart2', 'fist_inverted',
42
+ 'two_left', 'two_right', 'two_down', 'grabbing', 'grip', 'point',
43
+ 'call', 'three3', 'little_finger', 'middle_finger', 'dislike', 'fist',
44
+ 'four', 'like', 'mute', 'ok', 'one', 'palm', 'peace', 'peace_inverted',
45
+ 'rock', 'stop', 'stop_inverted', 'three', 'three2', 'two_up',
46
+ 'two_up_inverted', 'three_gun', 'one_left', 'one_right', 'one_down'
47
+ ]
48
+
49
+ _NAME_TO_GESTURE = {
50
+ 'call': Gesture.CALL, 'dislike': Gesture.DISLIKE, 'fist': Gesture.FIST,
51
+ 'four': Gesture.FOUR, 'like': Gesture.LIKE, 'mute': Gesture.MUTE,
52
+ 'ok': Gesture.OK, 'one': Gesture.ONE, 'palm': Gesture.PALM,
53
+ 'peace': Gesture.PEACE, 'peace_inverted': Gesture.PEACE_INVERTED,
54
+ 'rock': Gesture.ROCK, 'stop': Gesture.STOP,
55
+ 'stop_inverted': Gesture.STOP_INVERTED, 'three': Gesture.THREE,
56
+ 'three2': Gesture.THREE2, 'two_up': Gesture.TWO_UP,
57
+ 'two_up_inverted': Gesture.TWO_UP_INVERTED,
58
+ }
59
+
60
+
61
+ class GestureDetector:
62
+ def __init__(self, confidence_threshold: float = 0.3, detection_threshold: float = 0.3):
63
+ self._confidence_threshold = confidence_threshold
64
+ self._detection_threshold = detection_threshold
65
+ models_dir = Path(__file__).parent / "models"
66
+ self._detector_path = models_dir / "hand_detector.onnx"
67
+ self._classifier_path = models_dir / "crops_classifier.onnx"
68
+ self._detector = None
69
+ self._classifier = None
70
+ self._available = False
71
+ self._mean = np.array([127, 127, 127], dtype=np.float32)
72
+ self._std = np.array([128, 128, 128], dtype=np.float32)
73
+ self._detector_size = (320, 240)
74
+ self._classifier_size = (128, 128)
75
+ self._load_models()
76
+
77
+ def _load_models(self) -> None:
78
+ try:
79
+ import onnxruntime as ort
80
+ except ImportError:
81
+ logger.warning("onnxruntime not installed")
82
+ return
83
+ if not self._detector_path.exists() or not self._classifier_path.exists():
84
+ logger.warning("Model files not found")
85
+ return
86
+ try:
87
+ providers = ['CPUExecutionProvider']
88
+ logger.info("Loading gesture models...")
89
+ self._detector = ort.InferenceSession(str(self._detector_path), providers=providers)
90
+ self._classifier = ort.InferenceSession(str(self._classifier_path), providers=providers)
91
+ self._det_input = self._detector.get_inputs()[0].name
92
+ self._det_outputs = [o.name for o in self._detector.get_outputs()]
93
+ self._cls_input = self._classifier.get_inputs()[0].name
94
+ self._available = True
95
+ logger.info("Gesture detection ready")
96
+ except Exception as e:
97
+ logger.error("Failed to load models: %s", e)
98
+
99
+ @property
100
+ def is_available(self) -> bool:
101
+ return self._available
102
+
103
+ def _preprocess(self, frame: NDArray, size: Tuple[int, int]) -> NDArray:
104
+ img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
105
+ img = cv2.resize(img, size)
106
+ img = (img.astype(np.float32) - self._mean) / self._std
107
+ img = np.transpose(img, [2, 0, 1])
108
+ return np.expand_dims(img, axis=0)
109
+
110
+ def _detect_hand(self, frame: NDArray) -> Optional[Tuple[int, int, int, int, float]]:
111
+ if self._detector is None:
112
+ return None
113
+ h, w = frame.shape[:2]
114
+ inp = self._preprocess(frame, self._detector_size)
115
+ outs = self._detector.run(self._det_outputs, {self._det_input: inp})
116
+ boxes = outs[0]
117
+ scores = outs[2]
118
+ if len(boxes) == 0:
119
+ return None
120
+ best_i, best_c = -1, self._detection_threshold
121
+ for i, c in enumerate(scores):
122
+ if c > best_c:
123
+ best_c, best_i = float(c), i
124
+ if best_i < 0:
125
+ return None
126
+ b = boxes[best_i]
127
+ # Model outputs normalized coordinates (0-1), scale to original frame size
128
+ x1, y1 = int(b[0] * w), int(b[1] * h)
129
+ x2, y2 = int(b[2] * w), int(b[3] * h)
130
+ x1, y1 = max(0, x1), max(0, y1)
131
+ x2, y2 = min(w-1, x2), min(h-1, y2)
132
+ if x2 <= x1 or y2 <= y1:
133
+ return None
134
+ return (x1, y1, x2, y2, best_c)
135
+
136
+ def _get_square_crop(self, frame: NDArray, box: Tuple[int, int, int, int]) -> NDArray:
137
+ h, w = frame.shape[:2]
138
+ x1, y1, x2, y2 = box
139
+ bw, bh = x2 - x1, y2 - y1
140
+ if bh < bw:
141
+ y1, y2 = y1 - (bw - bh) // 2, y1 - (bw - bh) // 2 + bw
142
+ elif bh > bw:
143
+ x1, x2 = x1 - (bh - bw) // 2, x1 - (bh - bw) // 2 + bh
144
+ x1, y1 = max(0, x1), max(0, y1)
145
+ x2, y2 = min(w-1, x2), min(h-1, y2)
146
+ return frame[y1:y2, x1:x2]
147
+
148
+ def _classify(self, crop: NDArray) -> Tuple[Gesture, float]:
149
+ if self._classifier is None or crop.size == 0:
150
+ return Gesture.NONE, 0.0
151
+ inp = self._preprocess(crop, self._classifier_size)
152
+ logits = self._classifier.run(None, {self._cls_input: inp})[0][0]
153
+ idx = int(np.argmax(logits))
154
+ exp_l = np.exp(logits - np.max(logits))
155
+ conf = float(exp_l[idx] / np.sum(exp_l))
156
+ if idx >= len(_GESTURE_CLASSES) or conf < self._confidence_threshold:
157
+ return Gesture.NONE, conf
158
+ name = _GESTURE_CLASSES[idx]
159
+ return _NAME_TO_GESTURE.get(name, Gesture.NONE), conf
160
+
161
+ def detect(self, frame: NDArray) -> Tuple[Gesture, float]:
162
+ if not self._available:
163
+ return Gesture.NONE, 0.0
164
+ try:
165
+ det = self._detect_hand(frame)
166
+ if det is None:
167
+ return Gesture.NONE, 0.0
168
+ x1, y1, x2, y2, det_c = det
169
+ logger.debug("Hand: box=(%d,%d,%d,%d) conf=%.2f", x1, y1, x2, y2, det_c)
170
+ crop = self._get_square_crop(frame, (x1, y1, x2, y2))
171
+ if crop.size == 0:
172
+ return Gesture.NONE, 0.0
173
+ gest, cls_c = self._classify(crop)
174
+ if gest != Gesture.NONE:
175
+ logger.debug("Gesture: %s (det=%.2f cls=%.2f)", gest.value, det_c, cls_c)
176
+ return gest, det_c * cls_c
177
+ except Exception as e:
178
+ logger.warning("Gesture error: %s", e)
179
+ return Gesture.NONE, 0.0
180
+
181
+ def close(self) -> None:
182
+ self._detector = self._classifier = None
183
+ self._available = False
{reachy_mini_home_assistant/vision → reachy_mini_ha_voice}/head_tracker.py RENAMED
@@ -3,35 +3,24 @@
3
  Ported from reachy_mini_conversation_app for voice assistant integration.
4
  Model is loaded at initialization time (not lazy) to ensure face tracking
5
  is ready immediately when the camera server starts.
6
-
7
- Performance Optimizations:
8
- - Optional frame downscaling for faster inference on low-power devices
9
- - Frame skip support for reduced CPU usage when tracking is stable
10
- - Configurable inference resolution (default: native resolution)
11
  """
12
 
13
  from __future__ import annotations
14
-
15
  import logging
16
- from typing import TYPE_CHECKING
17
 
18
  import numpy as np
 
19
 
20
- if TYPE_CHECKING:
21
- from numpy.typing import NDArray
22
 
23
  logger = logging.getLogger(__name__)
24
 
25
 
26
  class HeadTracker:
27
  """Lightweight head tracker using YOLO for face detection.
28
-
29
  Model is loaded at initialization time to ensure face tracking
30
  is ready immediately (matching conversation_app behavior).
31
-
32
- Performance Features:
33
- - Frame downscaling: Reduces inference resolution for ~4x speedup
34
- - Frame skipping: Reuses last detection result for stable tracking
35
  """
36
 
37
  def __init__(
@@ -40,7 +29,6 @@ class HeadTracker:
40
  model_filename: str = "model.pt",
41
  confidence_threshold: float = 0.3,
42
  device: str = "cpu",
43
- inference_scale: float = 1.0, # Scale factor for inference (0.5 = half resolution)
44
  ) -> None:
45
  """Initialize YOLO-based head tracker.
46
 
@@ -49,7 +37,6 @@ class HeadTracker:
49
  model_filename: Model file name
50
  confidence_threshold: Minimum confidence for face detection
51
  device: Device to run inference on ('cpu' or 'cuda')
52
- inference_scale: Scale factor for inference (0.5 = half res for ~4x speedup)
53
  """
54
  self.confidence_threshold = confidence_threshold
55
  self.model = None
@@ -58,57 +45,57 @@ class HeadTracker:
58
  self._device = device
59
  self._detections_class = None
60
  self._model_load_attempted = False
61
- self._model_load_error: str | None = None
62
-
63
- # Performance optimization settings
64
- self._inference_scale = min(1.0, max(0.25, inference_scale))
65
-
66
- # Frame skip support for stable tracking
67
- self._last_detection: tuple[NDArray, float] | None = None
68
- self._frames_since_detection = 0
69
- self._max_skip_frames = 0 # 0 = no skipping (can be set externally)
70
-
71
  # Load model immediately at init (not lazy)
72
  self._load_model()
73
 
74
  def _load_model(self) -> None:
75
- """Load YOLO model for face detection."""
76
  if self._model_load_attempted:
77
  return
78
-
79
  self._model_load_attempted = True
80
-
81
  try:
82
- from pathlib import Path
83
-
84
- from supervision import Detections
85
  from ultralytics import YOLO
86
-
 
 
 
87
  self._detections_class = Detections
88
-
89
- # Load local model from models directory
90
- models_dir = Path(__file__).resolve().parents[1] / "models"
91
- local_model_path = models_dir / self._model_filename
92
-
93
- if not local_model_path.exists():
94
- raise FileNotFoundError(
95
- f"Model file not found: {local_model_path}. "
96
- f"Please place {self._model_filename} in the models directory."
97
- )
98
-
99
- model_path = str(local_model_path)
100
- logger.info("Loading local YOLO model: %s", model_path)
101
-
 
 
 
 
 
 
 
 
 
 
 
 
102
  self.model = YOLO(model_path).to(self._device)
103
- logger.info("YOLO face detection model loaded successfully")
104
  except ImportError as e:
105
  self._model_load_error = f"Missing dependencies: {e}"
106
  logger.warning("Face tracking disabled - missing dependencies: %s", e)
107
  self.model = None
108
- except FileNotFoundError as e:
109
- self._model_load_error = str(e)
110
- logger.error("Failed to load YOLO model: %s", e)
111
- self.model = None
112
  except Exception as e:
113
  self._model_load_error = str(e)
114
  logger.error("Failed to load YOLO model: %s", e)
@@ -119,7 +106,7 @@ class HeadTracker:
119
  """Check if the head tracker is available and ready."""
120
  return self.model is not None and self._detections_class is not None
121
 
122
- def _select_best_face(self, detections) -> int | None:
123
  """Select the best face based on confidence and area.
124
 
125
  Args:
@@ -152,7 +139,9 @@ class HeadTracker:
152
  best_idx = valid_indices[np.argmax(scores)]
153
  return int(best_idx)
154
 
155
- def _bbox_to_normalized_coords(self, bbox: NDArray[np.float32], w: int, h: int) -> NDArray[np.float32]:
 
 
156
  """Convert bounding box center to normalized coordinates [-1, 1].
157
 
158
  Args:
@@ -172,7 +161,9 @@ class HeadTracker:
172
 
173
  return np.array([norm_x, norm_y], dtype=np.float32)
174
 
175
- def get_head_position(self, img: NDArray[np.uint8]) -> tuple[NDArray[np.float32] | None, float | None]:
 
 
176
  """Get head position from face detection.
177
 
178
  Args:
@@ -186,36 +177,14 @@ class HeadTracker:
186
 
187
  h, w = img.shape[:2]
188
 
189
- # Frame skip optimization: return last detection if within skip limit
190
- if (
191
- self._max_skip_frames > 0
192
- and self._last_detection is not None
193
- and self._frames_since_detection < self._max_skip_frames
194
- ):
195
- self._frames_since_detection += 1
196
- return self._last_detection
197
-
198
  try:
199
- # Downscale image for faster inference if scale < 1.0
200
- if self._inference_scale < 1.0:
201
- import cv2
202
-
203
- new_w = int(w * self._inference_scale)
204
- new_h = int(h * self._inference_scale)
205
- inference_img = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
206
- else:
207
- inference_img = img
208
- new_w, new_h = w, h
209
-
210
  # Run YOLO inference
211
- results = self.model(inference_img, verbose=False)
212
  detections = self._detections_class.from_ultralytics(results[0])
213
 
214
  # Select best face
215
  face_idx = self._select_best_face(detections)
216
  if face_idx is None:
217
- self._last_detection = None
218
- self._frames_since_detection = 0
219
  return None, None
220
 
221
  bbox = detections.xyxy[face_idx]
@@ -223,90 +192,11 @@ class HeadTracker:
223
  if detections.confidence is not None:
224
  confidence = float(detections.confidence[face_idx])
225
 
226
- # Scale bbox back to original resolution if downscaled
227
- if self._inference_scale < 1.0:
228
- scale_factor = 1.0 / self._inference_scale
229
- bbox = bbox * scale_factor
230
-
231
- # Get face center in [-1, 1] coordinates (using original dimensions)
232
  face_center = self._bbox_to_normalized_coords(bbox, w, h)
233
 
234
- # Cache result for frame skipping
235
- self._last_detection = (face_center, confidence)
236
- self._frames_since_detection = 0
237
-
238
  return face_center, confidence
239
 
240
  except Exception as e:
241
  logger.debug("Error in head position detection: %s", e)
242
  return None, None
243
-
244
- def set_inference_scale(self, scale: float) -> None:
245
- """Set the inference resolution scale factor.
246
-
247
- Args:
248
- scale: Scale factor (0.25 to 1.0). Lower = faster but less accurate.
249
- """
250
- self._inference_scale = min(1.0, max(0.25, scale))
251
- logger.debug("Inference scale set to %.2f", self._inference_scale)
252
-
253
- def set_max_skip_frames(self, skip: int) -> None:
254
- """Set maximum frames to skip between detections.
255
-
256
- Args:
257
- skip: Number of frames to skip (0 = no skipping).
258
- Higher values reduce CPU but may cause tracking lag.
259
- """
260
- self._max_skip_frames = max(0, skip)
261
- logger.debug("Max skip frames set to %d", self._max_skip_frames)
262
-
263
- def clear_detection_cache(self) -> None:
264
- """Clear cached detection result."""
265
- self._last_detection = None
266
- self._frames_since_detection = 0
267
-
268
- def suspend(self) -> None:
269
- """Suspend the head tracker to release YOLO model from memory.
270
-
271
- Call resume() to reload the model.
272
- """
273
- if self.model is None:
274
- logger.debug("HeadTracker model not loaded, nothing to suspend")
275
- return
276
-
277
- logger.info("Suspending HeadTracker - releasing YOLO model...")
278
-
279
- try:
280
- # Release YOLO model from memory
281
- del self.model
282
- self.model = None
283
-
284
- # Also clear the detections class reference
285
- self._detections_class = None
286
-
287
- # Reset load state so resume can reload
288
- self._model_load_attempted = False
289
- self._model_load_error = None
290
-
291
- # Clear detection cache
292
- self.clear_detection_cache()
293
-
294
- logger.info("HeadTracker suspended - YOLO model released")
295
- except Exception as e:
296
- logger.warning("Error suspending HeadTracker: %s", e)
297
-
298
- def resume(self) -> None:
299
- """Resume the head tracker by reloading the YOLO model."""
300
- if self.model is not None:
301
- logger.debug("HeadTracker model already loaded")
302
- return
303
-
304
- logger.info("Resuming HeadTracker - reloading YOLO model...")
305
-
306
- # Reload the model
307
- self._load_model()
308
-
309
- if self.is_available:
310
- logger.info("HeadTracker resumed - YOLO model loaded")
311
- else:
312
- logger.warning("HeadTracker resume failed - model not available")
 
3
  Ported from reachy_mini_conversation_app for voice assistant integration.
4
  Model is loaded at initialization time (not lazy) to ensure face tracking
5
  is ready immediately when the camera server starts.
 
 
 
 
 
6
  """
7
 
8
  from __future__ import annotations
 
9
  import logging
10
+ from typing import Tuple, Optional
11
 
12
  import numpy as np
13
+ from numpy.typing import NDArray
14
 
 
 
15
 
16
  logger = logging.getLogger(__name__)
17
 
18
 
19
  class HeadTracker:
20
  """Lightweight head tracker using YOLO for face detection.
21
+
22
  Model is loaded at initialization time to ensure face tracking
23
  is ready immediately (matching conversation_app behavior).
 
 
 
 
24
  """
25
 
26
  def __init__(
 
29
  model_filename: str = "model.pt",
30
  confidence_threshold: float = 0.3,
31
  device: str = "cpu",
 
32
  ) -> None:
33
  """Initialize YOLO-based head tracker.
34
 
 
37
  model_filename: Model file name
38
  confidence_threshold: Minimum confidence for face detection
39
  device: Device to run inference on ('cpu' or 'cuda')
 
40
  """
41
  self.confidence_threshold = confidence_threshold
42
  self.model = None
 
45
  self._device = device
46
  self._detections_class = None
47
  self._model_load_attempted = False
48
+ self._model_load_error: Optional[str] = None
49
+
 
 
 
 
 
 
 
 
50
  # Load model immediately at init (not lazy)
51
  self._load_model()
52
 
53
  def _load_model(self) -> None:
54
+ """Load YOLO model with retry logic."""
55
  if self._model_load_attempted:
56
  return
57
+
58
  self._model_load_attempted = True
59
+
60
  try:
 
 
 
61
  from ultralytics import YOLO
62
+ from supervision import Detections
63
+ from huggingface_hub import hf_hub_download
64
+ import time
65
+
66
  self._detections_class = Detections
67
+
68
+ # Download with retries
69
+ max_retries = 3
70
+ retry_delay = 5
71
+ model_path = None
72
+ last_error = None
73
+
74
+ for attempt in range(max_retries):
75
+ try:
76
+ model_path = hf_hub_download(
77
+ repo_id=self._model_repo,
78
+ filename=self._model_filename,
79
+ )
80
+ break
81
+ except Exception as e:
82
+ last_error = e
83
+ if attempt < max_retries - 1:
84
+ logger.warning(
85
+ "Model download failed (attempt %d/%d): %s. Retrying in %ds...",
86
+ attempt + 1, max_retries, e, retry_delay
87
+ )
88
+ time.sleep(retry_delay)
89
+
90
+ if model_path is None:
91
+ raise last_error
92
+
93
  self.model = YOLO(model_path).to(self._device)
94
+ logger.info("YOLO face detection model loaded")
95
  except ImportError as e:
96
  self._model_load_error = f"Missing dependencies: {e}"
97
  logger.warning("Face tracking disabled - missing dependencies: %s", e)
98
  self.model = None
 
 
 
 
99
  except Exception as e:
100
  self._model_load_error = str(e)
101
  logger.error("Failed to load YOLO model: %s", e)
 
106
  """Check if the head tracker is available and ready."""
107
  return self.model is not None and self._detections_class is not None
108
 
109
+ def _select_best_face(self, detections) -> Optional[int]:
110
  """Select the best face based on confidence and area.
111
 
112
  Args:
 
139
  best_idx = valid_indices[np.argmax(scores)]
140
  return int(best_idx)
141
 
142
+ def _bbox_to_normalized_coords(
143
+ self, bbox: NDArray[np.float32], w: int, h: int
144
+ ) -> NDArray[np.float32]:
145
  """Convert bounding box center to normalized coordinates [-1, 1].
146
 
147
  Args:
 
161
 
162
  return np.array([norm_x, norm_y], dtype=np.float32)
163
 
164
+ def get_head_position(
165
+ self, img: NDArray[np.uint8]
166
+ ) -> Tuple[Optional[NDArray[np.float32]], Optional[float]]:
167
  """Get head position from face detection.
168
 
169
  Args:
 
177
 
178
  h, w = img.shape[:2]
179
 
 
 
 
 
 
 
 
 
 
180
  try:
 
 
 
 
 
 
 
 
 
 
 
181
  # Run YOLO inference
182
+ results = self.model(img, verbose=False)
183
  detections = self._detections_class.from_ultralytics(results[0])
184
 
185
  # Select best face
186
  face_idx = self._select_best_face(detections)
187
  if face_idx is None:
 
 
188
  return None, None
189
 
190
  bbox = detections.xyxy[face_idx]
 
192
  if detections.confidence is not None:
193
  confidence = float(detections.confidence[face_idx])
194
 
195
+ # Get face center in [-1, 1] coordinates
 
 
 
 
 
196
  face_center = self._bbox_to_normalized_coords(bbox, w, h)
197
 
 
 
 
 
198
  return face_center, confidence
199
 
200
  except Exception as e:
201
  logger.debug("Error in head position detection: %s", e)
202
  return None, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
{reachy_mini_home_assistant → reachy_mini_ha_voice}/main.py RENAMED
@@ -7,14 +7,48 @@ with Home Assistant via ESPHome protocol for voice control.
7
 
8
  import asyncio
9
  import logging
10
- import sys
11
  import threading
 
 
12
 
13
- from reachy_mini import ReachyMiniApp
14
 
15
- from .voice_assistant import VoiceAssistantService
16
 
17
- logger = logging.getLogger(__name__)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
 
20
  class ReachyMiniHaVoice(ReachyMiniApp):
@@ -27,40 +61,60 @@ class ReachyMiniHaVoice(ReachyMiniApp):
27
  """
28
 
29
  # No custom web UI needed - configuration is automatic via Home Assistant
30
- custom_app_url: str | None = None
31
 
32
  def __init__(self, *args, **kwargs):
33
  """Initialize the app."""
34
  super().__init__(*args, **kwargs)
35
- self.stop_event = threading.Event()
 
36
 
37
  def wrapped_run(self, *args, **kwargs) -> None:
38
  """
39
- Override wrapped_run to handle Reachy Mini connection failures.
 
 
40
  """
41
  logger.info("Starting Reachy Mini HA Voice App...")
42
 
43
- # Connect to ReachyMini
44
- try:
45
- logger.info("Attempting to connect to Reachy Mini...")
46
- super().wrapped_run(*args, **kwargs)
47
- except TimeoutError as e:
48
- logger.error(f"Timeout connecting to Reachy Mini: {e}")
49
- sys.exit(1)
50
- except Exception as e:
51
- error_str = str(e)
52
- if "Unable to connect" in error_str or "Timeout" in error_str:
53
- logger.error(f"Failed to connect to Reachy Mini: {e}")
54
- sys.exit(1)
55
- else:
56
- raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
  def run(self, reachy_mini, stop_event: threading.Event) -> None:
59
  """
60
  Main application entry point.
61
 
62
  Args:
63
- reachy_mini: The Reachy Mini robot instance (required, cannot be None)
64
  stop_event: Event to signal graceful shutdown
65
  """
66
  logger.info("Starting Reachy Mini for Home Assistant...")
@@ -82,8 +136,12 @@ class ReachyMiniHaVoice(ReachyMiniApp):
82
  logger.info("ESPHome Server: 0.0.0.0:6053")
83
  logger.info("Camera Server: 0.0.0.0:8081")
84
  logger.info("Wake word: Okay Nabu")
85
- logger.info("Motion control: enabled")
86
- logger.info("Camera: enabled (Reachy Mini)")
 
 
 
 
87
  logger.info("=" * 50)
88
  logger.info("To connect from Home Assistant:")
89
  logger.info(" Settings -> Devices & Services -> Add Integration")
@@ -120,19 +178,13 @@ class ReachyMiniHaVoice(ReachyMiniApp):
120
  logger.info("Reachy Mini HA stopped.")
121
 
122
 
123
- # This is called when running as: python -m reachy_mini_home_assistant.main
124
  if __name__ == "__main__":
125
  logging.basicConfig(
126
  level=logging.INFO,
127
  format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
128
  )
129
 
130
- # Reduce verbosity for some noisy modules
131
- logging.getLogger("reachy_mini.media.media_manager").setLevel(logging.WARNING)
132
- logging.getLogger("reachy_mini.media.camera_base").setLevel(logging.WARNING)
133
- logging.getLogger("reachy_mini.media.audio_base").setLevel(logging.WARNING)
134
- logging.getLogger("matplotlib").setLevel(logging.WARNING)
135
-
136
  app = ReachyMiniHaVoice()
137
  try:
138
  app.wrapped_run()
 
7
 
8
  import asyncio
9
  import logging
10
+ import socket
11
  import threading
12
+ import time
13
+ from typing import Optional
14
 
15
+ logger = logging.getLogger(__name__)
16
 
 
17
 
18
+ def _check_zenoh_available(timeout: float = 1.0) -> bool:
19
+ """Check if Zenoh service is available."""
20
+ try:
21
+ with socket.create_connection(("127.0.0.1", 7447), timeout=timeout):
22
+ return True
23
+ except (socket.timeout, ConnectionRefusedError, OSError):
24
+ return False
25
+
26
+
27
+ # Only import ReachyMiniApp if we're running as an app
28
+ try:
29
+ from reachy_mini import ReachyMini, ReachyMiniApp
30
+ REACHY_MINI_AVAILABLE = True
31
+ except ImportError:
32
+ REACHY_MINI_AVAILABLE = False
33
+
34
+ # Create a dummy base class
35
+ class ReachyMiniApp:
36
+ custom_app_url = None
37
+
38
+ def __init__(self):
39
+ self.stop_event = threading.Event()
40
+
41
+ def wrapped_run(self, *args, **kwargs):
42
+ pass
43
+
44
+ def stop(self):
45
+ self.stop_event.set()
46
+
47
+ ReachyMini = None
48
+
49
+
50
+ from .voice_assistant import VoiceAssistantService
51
+ from .motion import ReachyMiniMotion
52
 
53
 
54
  class ReachyMiniHaVoice(ReachyMiniApp):
 
61
  """
62
 
63
  # No custom web UI needed - configuration is automatic via Home Assistant
64
+ custom_app_url: Optional[str] = None
65
 
66
  def __init__(self, *args, **kwargs):
67
  """Initialize the app."""
68
  super().__init__(*args, **kwargs)
69
+ if not hasattr(self, 'stop_event'):
70
+ self.stop_event = threading.Event()
71
 
72
  def wrapped_run(self, *args, **kwargs) -> None:
73
  """
74
+ Override wrapped_run to handle Zenoh connection failures gracefully.
75
+
76
+ If Zenoh is not available, run in standalone mode without robot control.
77
  """
78
  logger.info("Starting Reachy Mini HA Voice App...")
79
 
80
+ # Check if Zenoh is available before trying to connect
81
+ if not _check_zenoh_available():
82
+ logger.warning("Zenoh service not available (port 7447)")
83
+ logger.info("Running in standalone mode without robot control")
84
+ self._run_standalone()
85
+ return
86
+
87
+ # Zenoh is available, try normal startup with ReachyMini
88
+ if REACHY_MINI_AVAILABLE:
89
+ try:
90
+ logger.info("Attempting to connect to Reachy Mini...")
91
+ super().wrapped_run(*args, **kwargs)
92
+ except TimeoutError as e:
93
+ logger.warning(f"Timeout connecting to Reachy Mini: {e}")
94
+ logger.info("Falling back to standalone mode")
95
+ self._run_standalone()
96
+ except Exception as e:
97
+ error_str = str(e)
98
+ if "Unable to connect" in error_str or "ZError" in error_str or "Timeout" in error_str:
99
+ logger.warning(f"Failed to connect to Reachy Mini: {e}")
100
+ logger.info("Falling back to standalone mode")
101
+ self._run_standalone()
102
+ else:
103
+ raise
104
+ else:
105
+ logger.info("Reachy Mini SDK not available, running standalone")
106
+ self._run_standalone()
107
+
108
+ def _run_standalone(self) -> None:
109
+ """Run in standalone mode without robot."""
110
+ self.run(None, self.stop_event)
111
 
112
  def run(self, reachy_mini, stop_event: threading.Event) -> None:
113
  """
114
  Main application entry point.
115
 
116
  Args:
117
+ reachy_mini: The Reachy Mini robot instance (can be None)
118
  stop_event: Event to signal graceful shutdown
119
  """
120
  logger.info("Starting Reachy Mini for Home Assistant...")
 
136
  logger.info("ESPHome Server: 0.0.0.0:6053")
137
  logger.info("Camera Server: 0.0.0.0:8081")
138
  logger.info("Wake word: Okay Nabu")
139
+ if reachy_mini:
140
+ logger.info("Motion control: enabled")
141
+ logger.info("Camera: enabled (Reachy Mini)")
142
+ else:
143
+ logger.info("Motion control: disabled (no robot)")
144
+ logger.info("Camera: test pattern (no robot)")
145
  logger.info("=" * 50)
146
  logger.info("To connect from Home Assistant:")
147
  logger.info(" Settings -> Devices & Services -> Add Integration")
 
178
  logger.info("Reachy Mini HA stopped.")
179
 
180
 
181
+ # This is called when running as: python -m reachy_mini_ha_voice.main
182
  if __name__ == "__main__":
183
  logging.basicConfig(
184
  level=logging.INFO,
185
  format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
186
  )
187
 
 
 
 
 
 
 
188
  app = ReachyMiniHaVoice()
189
  try:
190
  app.wrapped_run()
{reachy_mini_home_assistant → reachy_mini_ha_voice}/models.py RENAMED
@@ -5,18 +5,15 @@ import logging
5
  from dataclasses import asdict, dataclass, field
6
  from enum import Enum
7
  from pathlib import Path
8
- from typing import TYPE_CHECKING
 
9
 
10
  if TYPE_CHECKING:
11
- import threading
12
- from queue import Queue
13
-
14
  from pymicro_wakeword import MicroWakeWord
15
  from pyopen_wakeword import OpenWakeWord
16
-
17
- from .audio.audio_player import AudioPlayer
18
- from .entities.entity import ESPHomeEntity, MediaPlayerEntity
19
- from .protocol.satellite import VoiceSatelliteProtocol
20
 
21
  _LOGGER = logging.getLogger(__name__)
22
 
@@ -31,21 +28,18 @@ class AvailableWakeWord:
31
  id: str
32
  type: WakeWordType
33
  wake_word: str
34
- trained_languages: list[str]
35
  wake_word_path: Path
36
- probability_cutoff: float = 0.7
37
 
38
- def load(self) -> "MicroWakeWord | OpenWakeWord":
39
  if self.type == WakeWordType.MICRO_WAKE_WORD:
40
  from pymicro_wakeword import MicroWakeWord
41
-
42
  return MicroWakeWord.from_config(config_path=self.wake_word_path)
43
 
44
  if self.type == WakeWordType.OPEN_WAKE_WORD:
45
  from pyopen_wakeword import OpenWakeWord
46
-
47
  oww_model = OpenWakeWord.from_model(model_path=self.wake_word_path)
48
- oww_model.wake_word = self.wake_word
49
  return oww_model
50
 
51
  raise ValueError(f"Unexpected wake word type: {self.type}")
@@ -53,34 +47,25 @@ class AvailableWakeWord:
53
 
54
  @dataclass
55
  class Preferences:
56
- active_wake_words: list[str] = field(default_factory=list)
 
 
 
 
57
  # Continuous conversation mode (controlled from Home Assistant)
58
  continuous_conversation: bool = False
59
- # Unified idle behavior toggle (controlled from Home Assistant)
60
- idle_behavior_enabled: bool = False
61
- # Sendspin discovery and playback toggle (controlled from Home Assistant)
62
- sendspin_enabled: bool = False
63
- # Vision toggles and parameters (controlled from Home Assistant)
64
- face_tracking_enabled: bool = False
65
- gesture_detection_enabled: bool = False
66
- face_confidence_threshold: float = 0.5
67
-
68
- def set_idle_behavior_enabled(self, enabled: bool) -> None:
69
- """Update the unified idle behavior toggle."""
70
- self.idle_behavior_enabled = enabled
71
 
72
 
73
  @dataclass
74
  class ServerState:
75
  """Global server state."""
76
-
77
  name: str
78
  mac_address: str
79
- audio_queue: "Queue[bytes | None]"
80
- entities: "list[ESPHomeEntity]"
81
- available_wake_words: "dict[str, AvailableWakeWord]"
82
- wake_words: "dict[str, MicroWakeWord | OpenWakeWord]"
83
- active_wake_words: set[str]
84
  stop_word: "MicroWakeWord"
85
  music_player: "AudioPlayer"
86
  tts_player: "AudioPlayer"
@@ -91,88 +76,20 @@ class ServerState:
91
  download_dir: Path
92
 
93
  # Reachy Mini specific
94
- reachy_mini: object
95
  motion_enabled: bool = True
96
- motion: object | None = None # ReachyMiniMotion instance
97
 
98
- media_player_entity: "MediaPlayerEntity | None" = None
99
- satellite: "VoiceSatelliteProtocol | None" = None
100
  wake_words_changed: bool = False
101
  refractory_seconds: float = 2.0
102
- timer_max_ring_seconds: float = 900.0
103
- _entities_initialized: bool = False
104
-
105
- _services_suspended: bool = False
106
-
107
- # Mute state (controlled from Home Assistant) - thread-safe via properties
108
- _is_muted: bool = False
109
-
110
- # Camera state (controlled from Home Assistant) - thread-safe via properties
111
- _camera_enabled: bool = True
112
-
113
- # Thread safety
114
- _state_lock: "threading.Lock | None" = None
115
-
116
- def __post_init__(self):
117
- """Initialize state lock after dataclass creation."""
118
- import threading
119
-
120
- object.__setattr__(self, "_state_lock", threading.Lock())
121
-
122
- @property
123
- def services_suspended(self) -> bool:
124
- """Thread-safe getter for services_suspended."""
125
- if self._state_lock is None:
126
- return self._services_suspended
127
- with self._state_lock:
128
- return self._services_suspended
129
-
130
- @services_suspended.setter
131
- def services_suspended(self, value: bool) -> None:
132
- """Thread-safe setter for services_suspended."""
133
- if self._state_lock is None:
134
- object.__setattr__(self, "_services_suspended", value)
135
- else:
136
- with self._state_lock:
137
- object.__setattr__(self, "_services_suspended", value)
138
-
139
- @property
140
- def is_muted(self) -> bool:
141
- """Thread-safe getter for is_muted."""
142
- if self._state_lock is None:
143
- return self._is_muted
144
- with self._state_lock:
145
- return self._is_muted
146
-
147
- @is_muted.setter
148
- def is_muted(self, value: bool) -> None:
149
- """Thread-safe setter for is_muted."""
150
- if self._state_lock is None:
151
- object.__setattr__(self, "_is_muted", value)
152
- else:
153
- with self._state_lock:
154
- object.__setattr__(self, "_is_muted", value)
155
-
156
- @property
157
- def camera_enabled(self) -> bool:
158
- """Thread-safe getter for camera_enabled."""
159
- if self._state_lock is None:
160
- return self._camera_enabled
161
- with self._state_lock:
162
- return self._camera_enabled
163
-
164
- @camera_enabled.setter
165
- def camera_enabled(self, value: bool) -> None:
166
- """Thread-safe setter for camera_enabled."""
167
- if self._state_lock is None:
168
- object.__setattr__(self, "_camera_enabled", value)
169
- else:
170
- with self._state_lock:
171
- object.__setattr__(self, "_camera_enabled", value)
172
 
173
  def save_preferences(self) -> None:
174
  """Save preferences as JSON."""
175
  _LOGGER.debug("Saving preferences: %s", self.preferences_path)
176
  self.preferences_path.parent.mkdir(parents=True, exist_ok=True)
177
  with open(self.preferences_path, "w", encoding="utf-8") as preferences_file:
178
- json.dump(asdict(self.preferences), preferences_file, ensure_ascii=False, indent=4)
 
 
 
5
  from dataclasses import asdict, dataclass, field
6
  from enum import Enum
7
  from pathlib import Path
8
+ from queue import Queue
9
+ from typing import TYPE_CHECKING, Dict, List, Optional, Set, Union
10
 
11
  if TYPE_CHECKING:
 
 
 
12
  from pymicro_wakeword import MicroWakeWord
13
  from pyopen_wakeword import OpenWakeWord
14
+ from .entity import ESPHomeEntity, MediaPlayerEntity
15
+ from .audio_player import AudioPlayer
16
+ from .satellite import VoiceSatelliteProtocol
 
17
 
18
  _LOGGER = logging.getLogger(__name__)
19
 
 
28
  id: str
29
  type: WakeWordType
30
  wake_word: str
31
+ trained_languages: List[str]
32
  wake_word_path: Path
 
33
 
34
+ def load(self) -> "Union[MicroWakeWord, OpenWakeWord]":
35
  if self.type == WakeWordType.MICRO_WAKE_WORD:
36
  from pymicro_wakeword import MicroWakeWord
 
37
  return MicroWakeWord.from_config(config_path=self.wake_word_path)
38
 
39
  if self.type == WakeWordType.OPEN_WAKE_WORD:
40
  from pyopen_wakeword import OpenWakeWord
 
41
  oww_model = OpenWakeWord.from_model(model_path=self.wake_word_path)
42
+ setattr(oww_model, "wake_word", self.wake_word)
43
  return oww_model
44
 
45
  raise ValueError(f"Unexpected wake word type: {self.type}")
 
47
 
48
  @dataclass
49
  class Preferences:
50
+ active_wake_words: List[str] = field(default_factory=list)
51
+ # Audio processing settings (persisted from Home Assistant)
52
+ agc_enabled: Optional[bool] = None # None = use hardware default
53
+ agc_max_gain: Optional[float] = None # None = use hardware default
54
+ noise_suppression: Optional[float] = None # None = use hardware default
55
  # Continuous conversation mode (controlled from Home Assistant)
56
  continuous_conversation: bool = False
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
 
59
  @dataclass
60
  class ServerState:
61
  """Global server state."""
 
62
  name: str
63
  mac_address: str
64
+ audio_queue: "Queue[Optional[bytes]]"
65
+ entities: "List[ESPHomeEntity]"
66
+ available_wake_words: "Dict[str, AvailableWakeWord]"
67
+ wake_words: "Dict[str, Union[MicroWakeWord, OpenWakeWord]]"
68
+ active_wake_words: Set[str]
69
  stop_word: "MicroWakeWord"
70
  music_player: "AudioPlayer"
71
  tts_player: "AudioPlayer"
 
76
  download_dir: Path
77
 
78
  # Reachy Mini specific
79
+ reachy_mini: Optional[object] = None
80
  motion_enabled: bool = True
81
+ motion: Optional[object] = None # ReachyMiniMotion instance
82
 
83
+ media_player_entity: "Optional[MediaPlayerEntity]" = None
84
+ satellite: "Optional[VoiceSatelliteProtocol]" = None
85
  wake_words_changed: bool = False
86
  refractory_seconds: float = 2.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
  def save_preferences(self) -> None:
89
  """Save preferences as JSON."""
90
  _LOGGER.debug("Saving preferences: %s", self.preferences_path)
91
  self.preferences_path.parent.mkdir(parents=True, exist_ok=True)
92
  with open(self.preferences_path, "w", encoding="utf-8") as preferences_file:
93
+ json.dump(
94
+ asdict(self.preferences), preferences_file, ensure_ascii=False, indent=4
95
+ )
{reachy_mini_home_assistant → reachy_mini_ha_voice}/models/crops_classifier.onnx RENAMED
File without changes
{reachy_mini_home_assistant → reachy_mini_ha_voice}/models/hand_detector.onnx RENAMED
File without changes
reachy_mini_home_assistant/motion/reachy_motion.py → reachy_mini_ha_voice/motion.py RENAMED
@@ -5,6 +5,7 @@ MovementManager for unified 5Hz control with face tracking.
5
  """
6
 
7
  import logging
 
8
 
9
  from .movement_manager import MovementManager, RobotState
10
 
@@ -18,28 +19,31 @@ class ReachyMiniMotion:
18
  to the MovementManager which handles them in its 5Hz control loop.
19
  """
20
 
21
- def __init__(self, reachy_mini):
22
  self.reachy_mini = reachy_mini
23
- self._movement_manager: MovementManager | None = None
24
  self._camera_server = None # Reference to camera server for face tracking control
25
  self._is_speaking = False
26
 
27
  _LOGGER.debug("ReachyMiniMotion.__init__ called with reachy_mini=%s", reachy_mini)
28
 
29
- # Initialize movement manager
30
- try:
31
- self._movement_manager = MovementManager(reachy_mini)
32
- _LOGGER.debug("MovementManager created successfully")
33
- except Exception as e:
34
- _LOGGER.error("Failed to create MovementManager: %s", e, exc_info=True)
35
- self._movement_manager = None
 
 
 
36
 
37
  def set_reachy_mini(self, reachy_mini):
38
  """Set the Reachy Mini instance."""
39
  self.reachy_mini = reachy_mini
40
- if self._movement_manager is None:
41
  self._movement_manager = MovementManager(reachy_mini)
42
- else:
43
  self._movement_manager.robot = reachy_mini
44
 
45
  def set_camera_server(self, camera_server):
@@ -68,7 +72,7 @@ class ReachyMiniMotion:
68
  _LOGGER.info("Motion control stopped")
69
 
70
  @property
71
- def movement_manager(self) -> MovementManager | None:
72
  """Get the movement manager instance."""
73
  return self._movement_manager
74
 
@@ -164,31 +168,13 @@ class ReachyMiniMotion:
164
 
165
  self._is_speaking = False
166
  self._movement_manager.set_state(RobotState.IDLE)
167
- if self._movement_manager.get_idle_behavior_enabled():
168
- self._movement_manager.reset_to_neutral(duration=2.0)
169
- else:
170
- self._movement_manager.transition_to_idle_rest(duration=2.0)
171
 
172
  # Note: Face tracking remains enabled for continuous tracking
173
  # This allows the robot to always look at the user when they approach
174
 
175
  _LOGGER.debug("Reachy Mini: Idle pose")
176
 
177
- def on_pause_motion(self):
178
- """Called when motion should settle immediately.
179
-
180
- Used for zero-config gesture reactions such as the palm gesture.
181
- The robot smoothly returns to a neutral pose and then resumes its
182
- normal idle behavior.
183
- """
184
- if self._movement_manager is None:
185
- return
186
-
187
- self._is_speaking = False
188
- self._movement_manager.reset_to_neutral(duration=0.6)
189
- self._movement_manager.set_state(RobotState.IDLE)
190
- _LOGGER.debug("Reachy Mini: Motion paused to neutral idle")
191
-
192
  def on_timer_finished(self):
193
  """Called when a timer finishes - alert animation.
194
 
 
5
  """
6
 
7
  import logging
8
+ from typing import Optional
9
 
10
  from .movement_manager import MovementManager, RobotState
11
 
 
19
  to the MovementManager which handles them in its 5Hz control loop.
20
  """
21
 
22
+ def __init__(self, reachy_mini=None):
23
  self.reachy_mini = reachy_mini
24
+ self._movement_manager: Optional[MovementManager] = None
25
  self._camera_server = None # Reference to camera server for face tracking control
26
  self._is_speaking = False
27
 
28
  _LOGGER.debug("ReachyMiniMotion.__init__ called with reachy_mini=%s", reachy_mini)
29
 
30
+ # Initialize movement manager if robot is available
31
+ if reachy_mini is not None:
32
+ try:
33
+ self._movement_manager = MovementManager(reachy_mini)
34
+ _LOGGER.debug("MovementManager created successfully")
35
+ except Exception as e:
36
+ _LOGGER.error("Failed to create MovementManager: %s", e, exc_info=True)
37
+ self._movement_manager = None
38
+ else:
39
+ _LOGGER.debug("reachy_mini is None, MovementManager not created")
40
 
41
  def set_reachy_mini(self, reachy_mini):
42
  """Set the Reachy Mini instance."""
43
  self.reachy_mini = reachy_mini
44
+ if reachy_mini is not None and self._movement_manager is None:
45
  self._movement_manager = MovementManager(reachy_mini)
46
+ elif reachy_mini is not None and self._movement_manager is not None:
47
  self._movement_manager.robot = reachy_mini
48
 
49
  def set_camera_server(self, camera_server):
 
72
  _LOGGER.info("Motion control stopped")
73
 
74
  @property
75
+ def movement_manager(self) -> Optional[MovementManager]:
76
  """Get the movement manager instance."""
77
  return self._movement_manager
78
 
 
168
 
169
  self._is_speaking = False
170
  self._movement_manager.set_state(RobotState.IDLE)
171
+ self._movement_manager.reset_to_neutral(duration=0.5)
 
 
 
172
 
173
  # Note: Face tracking remains enabled for continuous tracking
174
  # This allows the robot to always look at the user when they approach
175
 
176
  _LOGGER.debug("Reachy Mini: Idle pose")
177
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
  def on_timer_finished(self):
179
  """Called when a timer finishes - alert animation.
180
 
{reachy_mini_home_assistant/motion → reachy_mini_ha_voice}/movement_manager.py RENAMED
@@ -5,7 +5,7 @@ This module provides a centralized control system for robot movements,
5
  inspired by the reachy_mini_conversation_app architecture.
6
 
7
  Key features:
8
- - Configurable control loop frequency (default 50Hz)
9
  - Command queue pattern (thread-safe external API)
10
  - Error throttling (prevents log explosion)
11
  - JSON-driven animation system (conversation state animations)
@@ -18,158 +18,159 @@ Key features:
18
 
19
  import logging
20
  import math
 
21
  import threading
22
  import time
23
- from collections import deque
24
- from pathlib import Path
25
- from queue import Queue
26
- from typing import TYPE_CHECKING, Any
27
 
28
  import numpy as np
29
-
30
- from ..audio.doa_tracker import DOAConfig, DOATracker
31
- from ..core.config import Config
32
- from .animation_player import AnimationPlayer
33
- from .antenna import AntennaController
34
- from .command_runtime import handle_command, poll_commands, start_action
35
- from .control_runtime import (
36
- compose_final_pose,
37
- issue_control_command,
38
- run_control_loop,
39
- update_emotion_move,
40
- update_face_tracking,
41
- )
42
- from .emotion_moves import EmotionMove, is_emotion_available
43
- from .idle_runtime import (
44
- apply_idle_behavior_enabled,
45
- apply_idle_rest_pose,
46
- clear_idle_activity,
47
- clear_idle_animation,
48
- schedule_next_idle_action_time,
49
- transition_or_apply_idle_rest_pose,
50
- update_idle_look_around,
51
- )
52
- from .state_machine import (
53
- build_idle_pending_action,
54
- load_idle_behavior_config,
55
- MovementState,
56
- PendingAction,
57
- pick_idle_random_action,
58
- RobotState,
59
- )
60
 
61
  if TYPE_CHECKING:
62
  from reachy_mini import ReachyMini
63
 
64
  logger = logging.getLogger(__name__)
65
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
  # =============================================================================
68
  # Constants
69
  # =============================================================================
70
 
71
- # Control loop defaults (actual values come from Config.motion)
72
- DEFAULT_CONTROL_LOOP_FREQUENCY_HZ = 100
73
- MAX_CONTROL_DT_S = 0.05
 
 
 
74
 
75
  # Animation suppression when face detected
76
  FACE_DETECTED_THRESHOLD = 0.001 # Minimum offset magnitude to consider face detected
77
- ANIMATION_BLEND_DURATION = 0.18 # Seconds to blend animation back when face lost
78
- FACE_TRACKING_ANIMATION_BLEND = 0.35
79
- IDLE_ACTION_ANIMATION_BLEND_DURATION = 0.4 # Slightly longer fade avoids visible idle/action handoff steps
80
- IDLE_ACTION_ANTENNA_SUPPRESSION = 0.25 # Keep idle antenna motion mostly continuous during idle actions
81
-
82
-
83
- def _smoothstep(value: float) -> float:
84
- """Return a smooth ease-in-out factor in the 0..1 range."""
85
- clamped = max(0.0, min(1.0, value))
86
- return clamped * clamped * (3.0 - 2.0 * clamped)
87
-
88
-
89
- def _smootherstep(value: float) -> float:
90
- """Return a softer ease-in-out factor with flatter endpoints."""
91
- clamped = max(0.0, min(1.0, value))
92
- return clamped * clamped * clamped * (clamped * (clamped * 6.0 - 15.0) + 10.0)
93
-
94
-
95
- # Pose epsilon constants are kept for compatibility with existing motion logic.
96
- POSE_EPS = 1e-3 # Max element delta in 4x4 pose matrix
97
- ANTENNA_EPS = 0.005 # Radians (~0.29 deg)
98
- BODY_YAW_EPS = 0.005 # Radians (~0.29 deg)
99
- IDLE_POSE_EPS = 0.0018 # Slightly relaxed pose deadband in quiet idle
100
- IDLE_BODY_YAW_EPS = 0.01 # Slightly relaxed body yaw deadband in quiet idle
101
- IDLE_ANTENNA_EPS = 0.012 # Larger idle antenna deadband to reduce tiny updates
102
 
103
  # Idle look-around behavior parameters
104
- IDLE_LOOK_AROUND_MIN_INTERVAL = 6.0 # Minimum seconds between look-arounds
105
- IDLE_LOOK_AROUND_MAX_INTERVAL = 14.0 # Maximum seconds between look-arounds
106
- IDLE_LOOK_AROUND_YAW_RANGE = 15.0 # Maximum yaw angle in degrees
107
- IDLE_LOOK_AROUND_PITCH_RANGE = 6.0 # Maximum pitch angle in degrees
108
- IDLE_LOOK_AROUND_DURATION = 2.0 # Duration of look-around action in seconds
109
- IDLE_INACTIVITY_THRESHOLD = 6.0 # Seconds of inactivity before look-around starts
110
- IDLE_LOOK_AROUND_PROBABILITY = 0.8 # Otherwise keep breathing-only cycle
111
- DEFAULT_IDLE_REST_POSE = {
112
- "pitch_deg": 0.0,
113
- "yaw_deg": 0.0,
114
- "roll_deg": 0.0,
115
- "x_m": 0.0,
116
- "y_m": 0.0,
117
- "z_m": 0.0,
118
- "antenna_left_rad": 0.0,
119
- "antenna_right_rad": 0.0,
120
  }
121
 
122
- _ANIMATION_CONFIG_FILE = Path(__file__).resolve().parent.parent / "animations" / "conversation_animations.json"
123
- _DEFAULT_IDLE_RANDOM_ACTIONS: list[dict[str, Any]] = [
124
- {
125
- "name": "curious_left",
126
- "weight": 1.0,
127
- "duration_s": 1.8,
128
- "yaw_range_deg": [-16.0, -6.0],
129
- "pitch_range_deg": [-3.0, 4.0],
130
- "roll_range_deg": [-4.0, 2.0],
131
- },
132
- {
133
- "name": "curious_right",
134
- "weight": 1.0,
135
- "duration_s": 1.8,
136
- "yaw_range_deg": [6.0, 16.0],
137
- "pitch_range_deg": [-3.0, 4.0],
138
- "roll_range_deg": [-2.0, 4.0],
139
- },
140
- {
141
- "name": "micro_nod",
142
- "weight": 0.9,
143
- "duration_s": 1.3,
144
- "yaw_range_deg": [-3.0, 3.0],
145
- "pitch_range_deg": [-10.0, -4.0],
146
- "roll_range_deg": [-2.0, 2.0],
147
- },
148
- {
149
- "name": "micro_tilt",
150
- "weight": 0.8,
151
- "duration_s": 1.6,
152
- "yaw_range_deg": [-6.0, 6.0],
153
- "pitch_range_deg": [-2.0, 4.0],
154
- "roll_range_deg": [-7.0, 7.0],
155
- },
156
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
 
158
 
159
  class MovementManager:
160
  """
161
- Unified movement manager with configurable control loop.
162
 
163
  All external interactions go through the command queue,
164
  ensuring thread safety and preventing race conditions.
165
  """
166
 
167
- def __init__(self, reachy_mini: "ReachyMini"):
168
  self.robot = reachy_mini
169
  self._now = time.monotonic
170
 
171
- # Command queue - all external threads communicate through this (size limit 100)
172
- self._command_queue: Queue[tuple[str, Any]] = Queue(maxsize=100)
173
 
174
  # Internal state (only modified by control loop)
175
  self.state = MovementState()
@@ -181,288 +182,69 @@ class MovementManager:
181
 
182
  # Thread control
183
  self._stop_event = threading.Event()
184
- self._draining_event = threading.Event() # Thread-safe graceful shutdown flag
185
- self._emotion_playing_event = threading.Event() # Pause when emotion animation playing
186
- self._robot_paused_event = threading.Event() # Pause when robot disconnected/sleeping
187
- self._robot_resumed_event = threading.Event() # Signal when robot resumes (for event-driven wait)
188
- self._robot_resumed_event.set() # Start in resumed state
189
- self._thread: threading.Thread | None = None
190
 
191
  # Error throttling
192
  self._last_error_time = 0.0
193
- self._error_interval = 2.0 # Log at most once per 2 seconds in error mode
194
  self._suppressed_errors = 0
195
 
196
  # Connection health tracking
197
  self._connection_lost = False
198
  self._last_successful_command = self._now()
199
  self._connection_timeout = 3.0
200
- self._reconnect_backoff_initial = max(0.2, float(Config.motion.reconnect_backoff_initial_s))
201
- self._reconnect_backoff_max = max(self._reconnect_backoff_initial, float(Config.motion.reconnect_backoff_max_s))
202
- self._reconnect_backoff_multiplier = max(1.0, float(Config.motion.reconnect_backoff_multiplier))
203
- self._reconnect_attempt_interval = self._reconnect_backoff_initial
204
  self._last_reconnect_attempt = 0.0
205
  self._consecutive_errors = 0
206
  self._max_consecutive_errors = 5
207
 
208
  # Pending action
209
- self._pending_action: PendingAction | None = None
210
  self._action_start_time: float = 0.0
211
- self._action_start_pose: dict[str, float] = {}
212
- self._idle_action_queue: deque[PendingAction] = deque()
213
- self._idle_action_animation_suppression = 0.0
214
 
215
  # Face tracking offsets (from camera worker)
216
- self._face_tracking_offsets: tuple[float, float, float, float, float, float] = (0.0, 0.0, 0.0, 0.0, 0.0, 0.0)
217
  self._face_tracking_lock = threading.Lock()
218
-
219
- # Last sent pose for change detection (reduce daemon load)
220
- self._last_sent_head_pose: np.ndarray | None = None
221
- self._last_sent_antennas: tuple[float, float] | None = None
222
- self._last_sent_body_yaw: float | None = None
223
- self._last_send_time = 0.0
224
-
225
- # Idle antenna smoothing state
226
- self._idle_antenna_smoothed: tuple[float, float] | None = None
227
- self._last_idle_antenna_update = 0.0
228
-
229
- # Command send pacing (separate from control loop frequency)
230
- control_rate = max(1.0, float(Config.motion.control_rate_hz or DEFAULT_CONTROL_LOOP_FREQUENCY_HZ))
231
- self._control_loop_hz = control_rate
232
- self._target_period = 1.0 / control_rate
233
- # Body yaw smoothing state (rate-limited)
234
- self._body_yaw_smoothed: float | None = None
235
- self._last_body_yaw_update = 0.0
236
-
237
  # Camera server reference for face tracking
238
  self._camera_server = None
 
 
 
 
239
 
240
- # Face tracking smoothing - DISABLED to match reference project
241
- # Reference project applies face tracking offsets directly without smoothing
242
- # Smoothing causes "lag" and "trailing" that looks unnatural
243
- # Only smooth interpolation when face is lost (handled in camera_server.py)
244
- self._smoothed_face_offsets: list[float] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
245
- # self._face_smoothing_factor = 0.3 # DISABLED - direct application instead
246
-
247
- # Emotion move playback state
248
- self._emotion_move: EmotionMove | None = None
249
- self._emotion_start_time: float = 0.0
250
- self._emotion_move_lock = threading.Lock()
251
-
252
- # DOA (Direction of Arrival) sound tracking
253
- self._doa_tracker = DOATracker(
254
- movement_callback=self._on_doa_turn,
255
- config=DOAConfig(),
256
- )
257
- self._doa_enabled = True # Can be disabled via entity
258
-
259
- # Idle look-around behavior toggle (exposed via ESPHome switch)
260
- # Default OFF to prioritize long-running stability.
261
- self._idle_motion_enabled = False
262
- # Idle antenna animation toggle (exposed via ESPHome switch)
263
- self._idle_antenna_enabled = False
264
- # Idle random actions toggle (pure movement, no audio)
265
- self._idle_random_actions_enabled = False
266
- self._idle_rest_head_pitch_rad = math.radians(float(DEFAULT_IDLE_REST_POSE["pitch_deg"]))
267
- self._idle_rest_head_yaw_rad = math.radians(float(DEFAULT_IDLE_REST_POSE["yaw_deg"]))
268
- self._idle_rest_head_roll_rad = math.radians(float(DEFAULT_IDLE_REST_POSE["roll_deg"]))
269
- self._idle_rest_x_m = float(DEFAULT_IDLE_REST_POSE["x_m"])
270
- self._idle_rest_y_m = float(DEFAULT_IDLE_REST_POSE["y_m"])
271
- self._idle_rest_z_m = float(DEFAULT_IDLE_REST_POSE["z_m"])
272
- self._idle_rest_antenna_left_rad = float(DEFAULT_IDLE_REST_POSE["antenna_left_rad"])
273
- self._idle_rest_antenna_right_rad = float(DEFAULT_IDLE_REST_POSE["antenna_right_rad"])
274
- self._idle_random_actions_probability = IDLE_LOOK_AROUND_PROBABILITY
275
- self._idle_random_actions_min_interval = IDLE_LOOK_AROUND_MIN_INTERVAL
276
- self._idle_random_actions_max_interval = IDLE_LOOK_AROUND_MAX_INTERVAL
277
- self._idle_random_actions: list[dict[str, Any]] = []
278
- self._load_idle_random_actions_config()
279
-
280
- # Antenna controller (handles freeze/unfreeze for listening mode)
281
- self._antenna_controller = AntennaController(time_func=self._now)
282
-
283
- logger.info("MovementManager initialized with AnimationPlayer and DOA tracking")
284
-
285
- @staticmethod
286
- def _is_connection_error(exc: Exception) -> bool:
287
- """Best-effort connection error detection without relying on private SDK state."""
288
- if isinstance(exc, (ConnectionError, TimeoutError, OSError)):
289
- return True
290
-
291
- error_msg = str(exc).lower()
292
- connection_markers = (
293
- "lost connection",
294
- "connection lost",
295
- "connection refused",
296
- "connection reset",
297
- "not connected",
298
- "timed out",
299
- "timeout",
300
- "broken pipe",
301
- "unavailable",
302
- )
303
- return any(marker in error_msg for marker in connection_markers)
304
 
305
  # =========================================================================
306
  # Thread-safe public API (called from any thread)
307
  # =========================================================================
308
 
309
- def _enqueue_command(self, command: str, payload: Any, warning_label: str, timeout: float = 0.1) -> bool:
310
- """Queue a command for the control loop."""
311
- try:
312
- self._command_queue.put((command, payload), timeout=timeout)
313
- return True
314
- except Exception:
315
- logger.warning("Command queue full, dropping %s command", warning_label)
316
- return False
317
-
318
  def set_state(self, new_state: RobotState) -> None:
319
  """Thread-safe: Set robot state."""
320
- self._enqueue_command("set_state", new_state, "set_state")
321
 
322
  def set_listening(self, listening: bool) -> None:
323
  """Thread-safe: Set listening state."""
324
  state = RobotState.LISTENING if listening else RobotState.IDLE
325
- self._enqueue_command("set_state", state, "set_listening")
326
 
327
  def set_thinking(self) -> None:
328
  """Thread-safe: Set thinking state."""
329
- self._enqueue_command("set_state", RobotState.THINKING, "set_thinking")
330
 
331
  def set_speaking(self, speaking: bool) -> None:
332
  """Thread-safe: Set speaking state."""
333
  state = RobotState.SPEAKING if speaking else RobotState.IDLE
334
- self._enqueue_command("set_state", state, "set_speaking")
335
 
336
  def set_idle(self) -> None:
337
  """Thread-safe: Return to idle state."""
338
- self._enqueue_command("set_state", RobotState.IDLE, "set_idle", timeout=0)
339
-
340
- def pause_for_emotion(self) -> None:
341
- """Thread-safe: Pause control loop while emotion animation is playing.
342
-
343
- DEPRECATED: Use queue_emotion_move() instead, which integrates emotion
344
- playback into the control loop without needing to pause.
345
- """
346
- self._emotion_playing_event.set()
347
- logger.debug("MovementManager paused for emotion animation")
348
-
349
- def resume_after_emotion(self) -> None:
350
- """Thread-safe: Resume control loop after emotion animation completes.
351
-
352
- DEPRECATED: Use queue_emotion_move() instead.
353
- """
354
- self._emotion_playing_event.clear()
355
- logger.debug("MovementManager resumed after emotion animation")
356
-
357
- def pause_for_robot_disconnect(self) -> None:
358
- """Thread-safe: Pause control loop when robot is disconnected.
359
-
360
- Called by robot state monitor when connection is lost (e.g., sleep mode).
361
- The control loop will skip sending commands while paused.
362
- """
363
- if not self._robot_paused_event.is_set():
364
- self._robot_paused_event.set()
365
- self._robot_resumed_event.clear() # Clear resume signal
366
- # Reset connection tracking state
367
- self._connection_lost = False
368
- self._consecutive_errors = 0
369
- self._suppressed_errors = 0
370
- logger.info("MovementManager paused - robot disconnected")
371
-
372
- def resume_after_robot_connect(self) -> None:
373
- """Thread-safe: Resume control loop when robot reconnects.
374
-
375
- Called by robot state monitor when connection is restored.
376
- """
377
- if self._robot_paused_event.is_set():
378
- self._robot_paused_event.clear()
379
- self._robot_resumed_event.set() # Signal resume to wake waiting threads
380
- self._last_successful_command = self._now()
381
- logger.info("MovementManager resumed - robot reconnected")
382
-
383
- def suspend(self) -> None:
384
- """Suspend the movement manager runtime resources.
385
-
386
- This stops the control loop thread to release CPU resources.
387
- The service can be resumed later with resume().
388
- """
389
- if not self.is_running:
390
- logger.debug("MovementManager not running, nothing to suspend")
391
- return
392
-
393
- logger.info("Suspending MovementManager resources...")
394
-
395
- # First pause the robot operations
396
- self.pause_for_robot_disconnect()
397
-
398
- # Then stop the control loop thread to release CPU
399
- self._draining_event.set()
400
- time.sleep(0.05) # Wait for in-flight commands
401
- self._stop_event.set()
402
-
403
- # Wait for thread to finish
404
- if self._thread is not None:
405
- self._thread.join(timeout=1.0)
406
- if self._thread.is_alive():
407
- logger.warning("MovementManager thread did not stop cleanly during suspend")
408
-
409
- # Clear events for next start
410
- self._draining_event.clear()
411
- self._stop_event.clear()
412
-
413
- logger.info("MovementManager suspended - CPU released")
414
-
415
- def resume_from_suspend(self) -> None:
416
- """Resume the movement manager runtime resources.
417
-
418
- This restarts the control loop thread.
419
- """
420
- if self.is_running:
421
- logger.debug("MovementManager already running")
422
- return
423
-
424
- logger.info("Resuming MovementManager resources...")
425
-
426
- # Resume robot operations
427
- self.resume_after_robot_connect()
428
-
429
- # Restart the control loop thread
430
- self._stop_event.clear()
431
- self._draining_event.clear()
432
-
433
- # Reset idle animation state
434
- self._animation_player.set_animation("idle")
435
- self.state.robot_state = RobotState.IDLE
436
- self.state.idle_start_time = self._now()
437
-
438
- # Start thread
439
- self._thread = threading.Thread(
440
- target=self._control_loop,
441
- daemon=True,
442
- name="MovementManager",
443
- )
444
- self._thread.start()
445
-
446
- logger.info("MovementManager resumed")
447
-
448
- def queue_emotion_move(self, emotion_name: str) -> bool:
449
- """Thread-safe: Queue an emotion move to be played by the control loop.
450
-
451
- This method uses the SDK's RecordedMoves.evaluate(t) API to sample
452
- emotion poses in the control loop, which avoids conflicts with
453
- set_target() calls that would cause "a move is currently running" warnings.
454
-
455
- Args:
456
- emotion_name: Name of the emotion (e.g., "happy1", "sad1")
457
-
458
- Returns:
459
- True if emotion was queued successfully, False otherwise
460
- """
461
- return self._enqueue_command("emotion_move", emotion_name, "emotion_move")
462
 
463
  def queue_action(self, action: PendingAction) -> None:
464
  """Thread-safe: Queue a motion action."""
465
- self._enqueue_command("action", action, "action")
466
 
467
  def turn_to_angle(self, yaw_deg: float, duration: float = 0.8) -> None:
468
  """Thread-safe: Turn head to face a direction."""
@@ -471,17 +253,20 @@ class MovementManager:
471
  target_yaw=math.radians(yaw_deg),
472
  duration=duration,
473
  )
474
- self._enqueue_command("action", action, "turn_to")
475
 
476
  def nod(self, amplitude_deg: float = 15, duration: float = 0.5) -> None:
477
  """Thread-safe: Perform a nod gesture."""
478
- self._enqueue_command("nod", (amplitude_deg, duration), "nod")
479
 
480
  def shake(self, amplitude_deg: float = 20, duration: float = 0.5) -> None:
481
  """Thread-safe: Perform a head shake gesture."""
482
- self._enqueue_command("shake", (amplitude_deg, duration), "shake")
483
 
484
- def set_speech_sway(self, x: float, y: float, z: float, roll: float, pitch: float, yaw: float) -> None:
 
 
 
485
  """Thread-safe: Set speech-driven sway offsets.
486
 
487
  These offsets are applied on top of the current animation
@@ -491,7 +276,7 @@ class MovementManager:
491
  x, y, z: Position offsets in meters
492
  roll, pitch, yaw: Orientation offsets in radians
493
  """
494
- self._enqueue_command("speech_sway", (x, y, z, roll, pitch, yaw), "speech_sway")
495
 
496
  def reset_to_neutral(self, duration: float = 0.5) -> None:
497
  """Thread-safe: Reset to neutral position."""
@@ -503,27 +288,9 @@ class MovementManager:
503
  target_x=0.0,
504
  target_y=0.0,
505
  target_z=0.0,
506
- target_antenna_left=0.0,
507
- target_antenna_right=0.0,
508
  duration=duration,
509
  )
510
- self._enqueue_command("action", action, "neutral", timeout=0)
511
-
512
- def transition_to_idle_rest(self, duration: float = 2.0) -> None:
513
- """Thread-safe: Smoothly move into the configured idle rest pose."""
514
- action = PendingAction(
515
- name="idle_rest",
516
- target_pitch=self._idle_rest_head_pitch_rad,
517
- target_yaw=self._idle_rest_head_yaw_rad,
518
- target_roll=self._idle_rest_head_roll_rad,
519
- target_x=self._idle_rest_x_m,
520
- target_y=self._idle_rest_y_m,
521
- target_z=self._idle_rest_z_m,
522
- target_antenna_left=self._idle_rest_antenna_left_rad,
523
- target_antenna_right=self._idle_rest_antenna_right_rad,
524
- duration=duration,
525
- )
526
- self._enqueue_command("action", action, "idle_rest", timeout=0)
527
 
528
  def set_camera_server(self, camera_server) -> None:
529
  """Set the camera server for face tracking offsets.
@@ -534,103 +301,9 @@ class MovementManager:
534
  self._camera_server = camera_server
535
  logger.info("Camera server set for face tracking")
536
 
537
- # =========================================================================
538
- # DOA (Direction of Arrival) Sound Tracking API
539
- # =========================================================================
540
-
541
- def set_doa_enabled(self, enabled: bool) -> None:
542
- """Enable or disable DOA sound tracking.
543
-
544
- Args:
545
- enabled: True to enable, False to disable
546
- """
547
- self._doa_enabled = enabled
548
- self._doa_tracker.enabled = enabled
549
- logger.info("DOA tracking %s", "enabled" if enabled else "disabled")
550
-
551
- def get_doa_enabled(self) -> bool:
552
- """Get whether DOA sound tracking is enabled."""
553
- return self._doa_enabled
554
-
555
- def get_idle_behavior_enabled(self) -> bool:
556
- """Get whether any idle behavior subsystem is enabled."""
557
- return self._idle_behavior_enabled()
558
-
559
- def set_idle_behavior_enabled(self, enabled: bool) -> None:
560
- """Thread-safe: Enable or disable all idle behavior subsystems together."""
561
- self._enqueue_command("set_idle_behavior", enabled, "set_idle_behavior")
562
-
563
- def _idle_behavior_enabled(self) -> bool:
564
- """Whether any idle behavior subsystem is currently enabled."""
565
- return self._idle_motion_enabled or self._idle_antenna_enabled or self._idle_random_actions_enabled
566
-
567
- def _apply_idle_behavior_enabled(self, enabled: bool) -> None:
568
- apply_idle_behavior_enabled(self, enabled)
569
-
570
- def _apply_idle_rest_pose(self) -> None:
571
- apply_idle_rest_pose(self)
572
-
573
- def _transition_or_apply_idle_rest_pose(self, duration: float = 2.0) -> None:
574
- transition_or_apply_idle_rest_pose(self, duration=duration)
575
-
576
- def _clear_idle_activity(self) -> None:
577
- clear_idle_activity(self)
578
-
579
- def _clear_idle_animation(self) -> None:
580
- clear_idle_animation(self)
581
-
582
- def update_doa(self, angle_deg: float, energy: float) -> bool:
583
- """Update DOA tracker with new sound direction data.
584
-
585
- This should be called from the audio processing loop with data
586
- from the ReSpeaker microphone array.
587
-
588
- Args:
589
- angle_deg: Direction of arrival in degrees (-180 to 180)
590
- energy: Sound energy level (0 to 1)
591
-
592
- Returns:
593
- True if a turn was triggered, False otherwise
594
- """
595
- if not self._doa_enabled:
596
- return False
597
-
598
- # Update face detection state for DOA tracker
599
- self._doa_tracker.set_face_detected(self.state.face_detected)
600
-
601
- # Update conversation state
602
- in_conversation = self.state.robot_state in (
603
- RobotState.LISTENING,
604
- RobotState.THINKING,
605
- RobotState.SPEAKING,
606
- )
607
- self._doa_tracker.set_conversation_mode(in_conversation)
608
-
609
- return self._doa_tracker.update(angle_deg, energy)
610
-
611
- def _on_doa_turn(self, yaw_degrees: float, duration: float) -> None:
612
- """Callback from DOATracker when a turn should be executed.
613
-
614
- Args:
615
- yaw_degrees: Target yaw angle in degrees
616
- duration: Duration of the turn in seconds
617
- """
618
- # Create a look action similar to idle look-around
619
- action = PendingAction(
620
- name="doa_turn",
621
- target_yaw=math.radians(yaw_degrees),
622
- target_pitch=0.0, # Keep pitch neutral for DOA turns
623
- duration=duration,
624
- )
625
- try:
626
- self._command_queue.put(("action", action), timeout=0.1)
627
- logger.debug("DOA turn queued: %.1f° over %.1fs", yaw_degrees, duration)
628
- except Exception:
629
- logger.warning("Command queue full, dropping doa_turn command")
630
-
631
- def set_face_tracking_offsets(self, offsets: tuple[float, float, float, float, float, float]) -> None:
632
  """Thread-safe: Update face tracking offsets manually.
633
-
634
  Args:
635
  offsets: Tuple of (x, y, z, roll, pitch, yaw) in meters/radians
636
  """
@@ -639,14 +312,14 @@ class MovementManager:
639
 
640
  def set_target_pose(
641
  self,
642
- x: float | None = None,
643
- y: float | None = None,
644
- z: float | None = None,
645
- roll: float | None = None,
646
- pitch: float | None = None,
647
- yaw: float | None = None,
648
- antenna_left: float | None = None,
649
- antenna_right: float | None = None,
650
  ) -> None:
651
  """Thread-safe: Set target pose components.
652
 
@@ -660,91 +333,113 @@ class MovementManager:
660
  roll, pitch, yaw: Head orientation in radians
661
  antenna_left, antenna_right: Antenna angles in radians
662
  """
663
- try:
664
- self._command_queue.put(
665
- (
666
- "set_pose",
667
- {
668
- "x": x,
669
- "y": y,
670
- "z": z,
671
- "roll": roll,
672
- "pitch": pitch,
673
- "yaw": yaw,
674
- "antenna_left": antenna_left,
675
- "antenna_right": antenna_right,
676
- },
677
- ),
678
- timeout=0.1,
679
- )
680
- except Exception:
681
- logger.warning("Command queue full, dropping set_pose command")
682
 
683
  # =========================================================================
684
  # Internal: Command processing (runs in control loop)
685
  # =========================================================================
686
 
687
- def _schedule_next_idle_action_time(self, now: float) -> None:
688
- schedule_next_idle_action_time(self, now)
689
-
690
- def _load_idle_random_actions_config(self) -> None:
691
- """Load idle random action definitions from animation config."""
692
- config = load_idle_behavior_config(
693
- config_path=_ANIMATION_CONFIG_FILE,
694
- default_rest_pose=DEFAULT_IDLE_REST_POSE,
695
- default_actions=_DEFAULT_IDLE_RANDOM_ACTIONS,
696
- default_min_interval_s=IDLE_LOOK_AROUND_MIN_INTERVAL,
697
- default_max_interval_s=IDLE_LOOK_AROUND_MAX_INTERVAL,
698
- default_probability=IDLE_LOOK_AROUND_PROBABILITY,
699
- default_yaw_range_deg=IDLE_LOOK_AROUND_YAW_RANGE,
700
- default_pitch_range_deg=IDLE_LOOK_AROUND_PITCH_RANGE,
701
- default_duration_s=IDLE_LOOK_AROUND_DURATION,
702
- )
703
-
704
- self._idle_random_actions = config.actions
705
- self._idle_rest_head_pitch_rad = config.rest_pose.pitch_rad
706
- self._idle_rest_head_yaw_rad = config.rest_pose.yaw_rad
707
- self._idle_rest_head_roll_rad = config.rest_pose.roll_rad
708
- self._idle_rest_x_m = config.rest_pose.x_m
709
- self._idle_rest_y_m = config.rest_pose.y_m
710
- self._idle_rest_z_m = config.rest_pose.z_m
711
- self._idle_rest_antenna_left_rad = config.rest_pose.antenna_left_rad
712
- self._idle_rest_antenna_right_rad = config.rest_pose.antenna_right_rad
713
- self._idle_random_actions_min_interval = config.min_interval_s
714
- self._idle_random_actions_max_interval = config.max_interval_s
715
- self._idle_random_actions_probability = config.trigger_probability
716
-
717
- def _pick_idle_random_action(self) -> dict[str, Any]:
718
- """Pick one idle random action from weighted definitions."""
719
- return pick_idle_random_action(self._idle_random_actions, _DEFAULT_IDLE_RANDOM_ACTIONS)
720
-
721
  def _poll_commands(self) -> None:
722
- poll_commands(self)
723
-
724
- def _handle_command(self, cmd: str, payload: Any) -> None:
725
- handle_command(self, cmd, payload)
726
-
727
- def _start_emotion_move(self, emotion_name: str) -> None:
728
- """Start playing an emotion move.
729
 
730
- Creates an EmotionMove and sets it as the active emotion, which will
731
- be sampled in the control loop via _update_emotion_move().
732
- """
733
- if not is_emotion_available():
734
- logger.warning("Cannot play emotion '%s': emotion library not available", emotion_name)
735
- return
736
 
737
- try:
738
- emotion_move = EmotionMove(emotion_name)
739
- with self._emotion_move_lock:
740
- self._emotion_move = emotion_move
741
- self._emotion_start_time = self._now()
742
- logger.info("Started emotion move: %s (duration=%.2fs)", emotion_name, emotion_move.duration)
743
- except Exception as e:
744
- logger.error("Failed to start emotion '%s': %s", emotion_name, e)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
745
 
746
  def _start_action(self, action: PendingAction) -> None:
747
- start_action(self, action)
 
 
 
 
 
 
 
 
 
 
 
748
 
749
  def _do_nod(self, amplitude_deg: float, duration: float) -> None:
750
  """Execute nod gesture (blocking in control loop context)."""
@@ -780,17 +475,13 @@ class MovementManager:
780
  def _update_action(self, dt: float) -> None:
781
  """Update pending action interpolation."""
782
  if self._pending_action is None:
783
- if self._idle_action_queue:
784
- self._start_action(self._idle_action_queue.popleft())
785
- else:
786
- self.state.look_around_in_progress = False
787
  return
788
 
789
  elapsed = self._now() - self._action_start_time
790
  progress = min(1.0, elapsed / self._pending_action.duration)
791
 
792
- # Use a softer easing curve so idle actions and micro gestures start/stop less abruptly.
793
- t = _smootherstep(progress)
794
 
795
  # Interpolate pose
796
  start = self._action_start_pose
@@ -802,149 +493,391 @@ class MovementManager:
802
  self.state.target_x = start["x"] + t * (action.target_x - start["x"])
803
  self.state.target_y = start["y"] + t * (action.target_y - start["y"])
804
  self.state.target_z = start["z"] + t * (action.target_z - start["z"])
805
- self.state.target_antenna_left = start["antenna_left"] + t * (
806
- action.target_antenna_left - start["antenna_left"]
807
- )
808
- self.state.target_antenna_right = start["antenna_right"] + t * (
809
- action.target_antenna_right - start["antenna_right"]
810
- )
811
 
812
  # Action complete
813
  if progress >= 1.0:
814
- completed_action = self._pending_action
815
-
816
- if completed_action.callback:
817
  try:
818
- completed_action.callback()
819
  except Exception as e:
820
  logger.error("Action callback error: %s", e)
821
-
822
- self._pending_action = None
823
-
824
- # Keep idle action state active until the full idle action queue is drained
825
- if completed_action.name.startswith("idle_action") and self._idle_action_queue:
826
- self._start_action(self._idle_action_queue.popleft())
827
- elif completed_action.name.startswith("idle_action") or completed_action.name == "look_around":
828
  self.state.look_around_in_progress = False
 
829
 
830
  def _update_animation(self, dt: float) -> None:
831
  """Update animation offsets from AnimationPlayer."""
832
- dt_safe = max(0.0, min(dt, MAX_CONTROL_DT_S))
833
- idle_queue_action_active = (
834
- self.state.robot_state == RobotState.IDLE
835
- and self.state.look_around_in_progress
836
- and (
837
- (self._pending_action is not None and self._pending_action.name.startswith("idle_action"))
838
- or len(self._idle_action_queue) > 0
839
- )
840
- )
841
-
842
- fade_duration = max(1e-3, IDLE_ACTION_ANIMATION_BLEND_DURATION)
843
- fade_step = dt_safe / fade_duration
844
- target_suppression = 1.0 if idle_queue_action_active else 0.0
845
- if target_suppression > self._idle_action_animation_suppression:
846
- self._idle_action_animation_suppression = min(
847
- target_suppression,
848
- self._idle_action_animation_suppression + fade_step,
849
- )
850
- else:
851
- self._idle_action_animation_suppression = max(
852
- target_suppression,
853
- self._idle_action_animation_suppression - fade_step,
854
- )
855
-
856
- if self.state.robot_state == RobotState.IDLE and not self._idle_motion_enabled:
857
- self.state.anim_pitch = 0.0
858
- self.state.anim_yaw = 0.0
859
- self.state.anim_roll = 0.0
860
- self.state.anim_x = 0.0
861
- self.state.anim_y = 0.0
862
- self.state.anim_z = 0.0
863
- self.state.anim_antenna_left = 0.0
864
- self.state.anim_antenna_right = 0.0
865
- self._idle_action_animation_suppression = 0.0
866
- return
867
-
868
  offsets = self._animation_player.get_offsets(dt)
869
- suppression = _smoothstep(self._idle_action_animation_suppression)
870
- idle_animation_scale = 1.0 - suppression
871
- antenna_animation_scale = 1.0 - suppression * IDLE_ACTION_ANTENNA_SUPPRESSION
872
-
873
- self.state.anim_pitch = offsets["pitch"] * idle_animation_scale
874
- self.state.anim_yaw = offsets["yaw"] * idle_animation_scale
875
- self.state.anim_roll = offsets["roll"] * idle_animation_scale
876
- self.state.anim_x = offsets["x"] * idle_animation_scale
877
- self.state.anim_y = offsets["y"] * idle_animation_scale
878
- self.state.anim_z = offsets["z"] * idle_animation_scale
879
- if self.state.robot_state != RobotState.IDLE or self._idle_antenna_enabled:
880
- self.state.anim_antenna_left = offsets["antenna_left"] * antenna_animation_scale
881
- self.state.anim_antenna_right = offsets["antenna_right"] * antenna_animation_scale
882
- else:
883
- self.state.anim_antenna_left = 0.0
884
- self.state.anim_antenna_right = 0.0
885
 
886
  def _freeze_antennas(self) -> None:
887
  """Freeze antennas at current position (for listening mode)."""
 
888
  current_left = self.state.target_antenna_left + self.state.anim_antenna_left
889
  current_right = self.state.target_antenna_right + self.state.anim_antenna_right
890
- self._antenna_controller.freeze(current_left, current_right)
 
 
 
 
 
 
891
 
892
  def _start_antenna_unfreeze(self) -> None:
893
  """Start unfreezing antennas (smooth blend back to normal)."""
894
- self._antenna_controller.start_unfreeze()
 
 
 
 
895
 
896
  def _update_antenna_blend(self, dt: float) -> None:
897
  """Update antenna blend state for smooth unfreezing."""
898
- self._antenna_controller.update(dt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
899
 
900
  def _update_animation_blend(self) -> None:
901
  """Update animation blend factor when face is lost.
902
 
903
- Keep existing idle/speaking features active, but reduce idle animation
904
- weight while face tracking is actively steering the head.
905
  """
906
- target_blend = FACE_TRACKING_ANIMATION_BLEND if self.state.face_detected else 1.0
907
- current_blend = self.state.animation_blend
908
- if abs(target_blend - current_blend) < 1e-3:
909
- self.state.animation_blend = target_blend
910
  return
911
 
912
- step = self._target_period / max(1e-3, ANIMATION_BLEND_DURATION)
913
- if target_blend > current_blend:
914
- self.state.animation_blend = min(target_blend, current_blend + step)
915
- else:
916
- self.state.animation_blend = max(target_blend, current_blend - step)
 
 
 
 
 
 
917
 
918
  def _update_face_tracking(self) -> None:
919
- update_face_tracking(self, FACE_DETECTED_THRESHOLD)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
920
 
921
  def _update_idle_look_around(self) -> None:
922
- update_idle_look_around(
923
- self,
924
- inactivity_threshold_s=IDLE_INACTIVITY_THRESHOLD,
925
- legacy_probability=IDLE_LOOK_AROUND_PROBABILITY,
926
- yaw_range_deg=IDLE_LOOK_AROUND_YAW_RANGE,
927
- pitch_range_deg=IDLE_LOOK_AROUND_PITCH_RANGE,
928
- duration_s=IDLE_LOOK_AROUND_DURATION,
929
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
930
 
931
- def _update_emotion_move(self) -> tuple[np.ndarray, tuple[float, float], float] | None:
932
- return update_emotion_move(self)
933
 
934
- def is_emotion_playing(self) -> bool:
935
- """Check if an emotion move is currently playing."""
936
- with self._emotion_move_lock:
937
- return self._emotion_move is not None
938
 
939
- def _compose_final_pose(self) -> tuple[np.ndarray, tuple[float, float], float]:
940
- return compose_final_pose(self)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
941
 
942
  # =========================================================================
943
  # Internal: Robot control (runs in control loop)
944
  # =========================================================================
945
 
946
- def _issue_control_command(self, head_pose: np.ndarray, antennas: tuple[float, float], body_yaw: float) -> None:
947
- issue_control_command(self, head_pose, antennas, body_yaw)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
948
 
949
  def _log_error_throttled(self, message: str) -> None:
950
  """Log error with throttling to prevent log explosion."""
@@ -963,7 +896,54 @@ class MovementManager:
963
  # =========================================================================
964
 
965
  def _control_loop(self) -> None:
966
- run_control_loop(self, max_control_dt_s=MAX_CONTROL_DT_S, face_detected_threshold=FACE_DETECTED_THRESHOLD)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
967
 
968
  # =========================================================================
969
  # Lifecycle
@@ -977,11 +957,6 @@ class MovementManager:
977
 
978
  self._stop_event.clear()
979
 
980
- # Reset to neutral position first (handles restart after crash/disconnect)
981
- # This ensures head returns to center on app startup
982
- self.reset_to_neutral(duration=0.5)
983
- logger.info("Reset to neutral position on startup")
984
-
985
  # Initialize idle animation immediately so breathing starts on launch
986
  # This matches the reference project's behavior where BreathingMove
987
  # starts after idle_inactivity_delay (0.3s)
@@ -999,44 +974,29 @@ class MovementManager:
999
  logger.info("Movement manager started")
1000
 
1001
  def stop(self) -> None:
1002
- """Stop the control loop and reset robot.
1003
-
1004
- Implements graceful shutdown to prevent daemon crashes:
1005
- 1. Stop sending new commands to robot (drain mode)
1006
- 2. Wait for current command cycle to complete
1007
- 3. Signal control loop to stop
1008
- 4. Wait for thread to finish cleanly
1009
- """
1010
  if self._thread is None or not self._thread.is_alive():
1011
  return
1012
 
1013
  logger.info("Stopping movement manager...")
1014
 
1015
- # Phase 1: Enter drain mode - stop sending commands to robot
1016
- # This prevents partial command transmission that can crash daemon
1017
- self._draining_event.set()
1018
-
1019
- # Give the control loop time to finish any in-flight command
1020
- # 50ms is enough for multiple control cycles at default rates
1021
- time.sleep(0.05)
1022
-
1023
- # Phase 2: Signal stop
1024
  self._stop_event.set()
1025
 
1026
- # Phase 3: Wait for thread with reasonable timeout
1027
  self._thread.join(timeout=0.5)
1028
  if self._thread.is_alive():
1029
  logger.warning("Movement manager thread did not stop in time")
1030
 
1031
- # Reset drain flag for potential restart
1032
- self._draining_event.clear()
1033
-
1034
  # Skip reset to neutral - let the app manager handle it
1035
  # This speeds up shutdown significantly
1036
  logger.info("Movement manager stopped")
1037
 
1038
  def _reset_to_neutral_blocking(self) -> None:
1039
  """Reset robot to neutral position (blocking)."""
 
 
 
1040
  try:
1041
  neutral_pose = np.eye(4)
1042
  self.robot.goto_target(
 
5
  inspired by the reachy_mini_conversation_app architecture.
6
 
7
  Key features:
8
+ - Single 100Hz control loop (same as reachy_mini_conversation_app)
9
  - Command queue pattern (thread-safe external API)
10
  - Error throttling (prevents log explosion)
11
  - JSON-driven animation system (conversation state animations)
 
18
 
19
  import logging
20
  import math
21
+ import random
22
  import threading
23
  import time
24
+ from dataclasses import dataclass, field
25
+ from enum import Enum
26
+ from queue import Queue, Empty
27
+ from typing import Any, Callable, Dict, List, Optional, Tuple, TYPE_CHECKING
28
 
29
  import numpy as np
30
+ from scipy.spatial.transform import Rotation as R
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
  if TYPE_CHECKING:
33
  from reachy_mini import ReachyMini
34
 
35
  logger = logging.getLogger(__name__)
36
 
37
+ # Import SDK utilities for pose composition (same as conversation_app)
38
+ try:
39
+ from reachy_mini.utils import create_head_pose
40
+ from reachy_mini.utils.interpolation import compose_world_offset
41
+ SDK_UTILS_AVAILABLE = True
42
+ except ImportError:
43
+ SDK_UTILS_AVAILABLE = False
44
+ logger.warning("SDK utils not available, using fallback pose composition")
45
+
46
+ # Import animation player
47
+ from .animation_player import AnimationPlayer
48
+
49
 
50
  # =============================================================================
51
  # Constants
52
  # =============================================================================
53
 
54
+ # Control loop frequency - daemon now supports higher rates
55
+ CONTROL_LOOP_FREQUENCY_HZ = 100 # 100Hz control loop (same as conversation_app)
56
+ TARGET_PERIOD = 1.0 / CONTROL_LOOP_FREQUENCY_HZ
57
+
58
+ # Antenna freeze parameters (listening mode)
59
+ ANTENNA_BLEND_DURATION = 0.5 # Seconds to blend back from frozen state
60
 
61
  # Animation suppression when face detected
62
  FACE_DETECTED_THRESHOLD = 0.001 # Minimum offset magnitude to consider face detected
63
+ ANIMATION_BLEND_DURATION = 0.5 # Seconds to blend animation back when face lost
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
  # Idle look-around behavior parameters
66
+ IDLE_LOOK_AROUND_MIN_INTERVAL = 8.0 # Minimum seconds between look-arounds
67
+ IDLE_LOOK_AROUND_MAX_INTERVAL = 20.0 # Maximum seconds between look-arounds
68
+ IDLE_LOOK_AROUND_YAW_RANGE = 25.0 # Maximum yaw angle in degrees
69
+ IDLE_LOOK_AROUND_PITCH_RANGE = 10.0 # Maximum pitch angle in degrees
70
+ IDLE_LOOK_AROUND_DURATION = 1.2 # Duration of look-around action in seconds
71
+ IDLE_INACTIVITY_THRESHOLD = 5.0 # Seconds of inactivity before look-around starts
72
+
73
+ # State to animation mapping
74
+ # Note: SPEAKING uses idle animation as base, with speech_sway offsets layered on top
75
+ STATE_ANIMATION_MAP = {
76
+ "idle": "idle",
77
+ "listening": "listening",
78
+ "thinking": "thinking",
79
+ "speaking": "idle", # Base animation only; actual motion from speech_sway
 
 
80
  }
81
 
82
+
83
+ class RobotState(Enum):
84
+ """Robot state machine states."""
85
+ IDLE = "idle"
86
+ LISTENING = "listening"
87
+ THINKING = "thinking"
88
+ SPEAKING = "speaking"
89
+
90
+
91
+ @dataclass
92
+ class MovementState:
93
+ """Internal movement state (only modified by control loop)."""
94
+ # Current robot state
95
+ robot_state: RobotState = RobotState.IDLE
96
+
97
+ # Animation offsets (from AnimationPlayer)
98
+ anim_pitch: float = 0.0
99
+ anim_yaw: float = 0.0
100
+ anim_roll: float = 0.0
101
+ anim_x: float = 0.0
102
+ anim_y: float = 0.0
103
+ anim_z: float = 0.0
104
+ anim_antenna_left: float = 0.0
105
+ anim_antenna_right: float = 0.0
106
+
107
+ # Speech sway offsets (from audio analysis)
108
+ sway_pitch: float = 0.0
109
+ sway_yaw: float = 0.0
110
+ sway_roll: float = 0.0
111
+ sway_x: float = 0.0
112
+ sway_y: float = 0.0
113
+ sway_z: float = 0.0
114
+
115
+ # Target pose (from actions)
116
+ target_pitch: float = 0.0
117
+ target_yaw: float = 0.0
118
+ target_roll: float = 0.0
119
+ target_x: float = 0.0
120
+ target_y: float = 0.0
121
+ target_z: float = 0.0
122
+ target_antenna_left: float = 0.0
123
+ target_antenna_right: float = 0.0
124
+
125
+ # Timing
126
+ last_activity_time: float = 0.0
127
+ idle_start_time: float = 0.0
128
+
129
+ # Antenna freeze state (listening mode)
130
+ antenna_frozen: bool = False
131
+ frozen_antenna_left: float = 0.0
132
+ frozen_antenna_right: float = 0.0
133
+ antenna_blend: float = 1.0 # 0=frozen, 1=normal
134
+ antenna_blend_start_time: float = 0.0
135
+
136
+ # Idle look-around behavior
137
+ next_look_around_time: float = 0.0
138
+ look_around_in_progress: bool = False
139
+
140
+ # Face tracking animation suppression
141
+ face_detected: bool = False
142
+ face_lost_time: float = 0.0
143
+ animation_blend: float = 1.0 # 0=suppressed (face tracking), 1=full animation
144
+
145
+
146
+ @dataclass
147
+ class PendingAction:
148
+ """A pending motion action."""
149
+ name: str
150
+ target_pitch: float = 0.0
151
+ target_yaw: float = 0.0
152
+ target_roll: float = 0.0
153
+ target_x: float = 0.0
154
+ target_y: float = 0.0
155
+ target_z: float = 0.0
156
+ duration: float = 0.5
157
+ callback: Optional[Callable] = None
158
 
159
 
160
  class MovementManager:
161
  """
162
+ Unified movement manager with 100Hz control loop.
163
 
164
  All external interactions go through the command queue,
165
  ensuring thread safety and preventing race conditions.
166
  """
167
 
168
+ def __init__(self, reachy_mini: Optional["ReachyMini"] = None):
169
  self.robot = reachy_mini
170
  self._now = time.monotonic
171
 
172
+ # Command queue - all external threads communicate through this
173
+ self._command_queue: Queue[Tuple[str, Any]] = Queue()
174
 
175
  # Internal state (only modified by control loop)
176
  self.state = MovementState()
 
182
 
183
  # Thread control
184
  self._stop_event = threading.Event()
185
+ self._thread: Optional[threading.Thread] = None
 
 
 
 
 
186
 
187
  # Error throttling
188
  self._last_error_time = 0.0
189
+ self._error_interval = 1.0 # Log at most once per second
190
  self._suppressed_errors = 0
191
 
192
  # Connection health tracking
193
  self._connection_lost = False
194
  self._last_successful_command = self._now()
195
  self._connection_timeout = 3.0
196
+ self._reconnect_attempt_interval = 2.0
 
 
 
197
  self._last_reconnect_attempt = 0.0
198
  self._consecutive_errors = 0
199
  self._max_consecutive_errors = 5
200
 
201
  # Pending action
202
+ self._pending_action: Optional[PendingAction] = None
203
  self._action_start_time: float = 0.0
204
+ self._action_start_pose: Dict[str, float] = {}
 
 
205
 
206
  # Face tracking offsets (from camera worker)
207
+ self._face_tracking_offsets: Tuple[float, float, float, float, float, float] = (0.0, 0.0, 0.0, 0.0, 0.0, 0.0)
208
  self._face_tracking_lock = threading.Lock()
209
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
  # Camera server reference for face tracking
211
  self._camera_server = None
212
+
213
+ # Face tracking smoothing (exponential moving average)
214
+ self._smoothed_face_offsets: List[float] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
215
+ self._face_smoothing_factor = 0.3
216
 
217
+ logger.info("MovementManager initialized with AnimationPlayer")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
 
219
  # =========================================================================
220
  # Thread-safe public API (called from any thread)
221
  # =========================================================================
222
 
 
 
 
 
 
 
 
 
 
223
  def set_state(self, new_state: RobotState) -> None:
224
  """Thread-safe: Set robot state."""
225
+ self._command_queue.put(("set_state", new_state))
226
 
227
  def set_listening(self, listening: bool) -> None:
228
  """Thread-safe: Set listening state."""
229
  state = RobotState.LISTENING if listening else RobotState.IDLE
230
+ self._command_queue.put(("set_state", state))
231
 
232
  def set_thinking(self) -> None:
233
  """Thread-safe: Set thinking state."""
234
+ self._command_queue.put(("set_state", RobotState.THINKING))
235
 
236
  def set_speaking(self, speaking: bool) -> None:
237
  """Thread-safe: Set speaking state."""
238
  state = RobotState.SPEAKING if speaking else RobotState.IDLE
239
+ self._command_queue.put(("set_state", state))
240
 
241
  def set_idle(self) -> None:
242
  """Thread-safe: Return to idle state."""
243
+ self._command_queue.put(("set_state", RobotState.IDLE))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
 
245
  def queue_action(self, action: PendingAction) -> None:
246
  """Thread-safe: Queue a motion action."""
247
+ self._command_queue.put(("action", action))
248
 
249
  def turn_to_angle(self, yaw_deg: float, duration: float = 0.8) -> None:
250
  """Thread-safe: Turn head to face a direction."""
 
253
  target_yaw=math.radians(yaw_deg),
254
  duration=duration,
255
  )
256
+ self._command_queue.put(("action", action))
257
 
258
  def nod(self, amplitude_deg: float = 15, duration: float = 0.5) -> None:
259
  """Thread-safe: Perform a nod gesture."""
260
+ self._command_queue.put(("nod", (amplitude_deg, duration)))
261
 
262
  def shake(self, amplitude_deg: float = 20, duration: float = 0.5) -> None:
263
  """Thread-safe: Perform a head shake gesture."""
264
+ self._command_queue.put(("shake", (amplitude_deg, duration)))
265
 
266
+ def set_speech_sway(
267
+ self, x: float, y: float, z: float,
268
+ roll: float, pitch: float, yaw: float
269
+ ) -> None:
270
  """Thread-safe: Set speech-driven sway offsets.
271
 
272
  These offsets are applied on top of the current animation
 
276
  x, y, z: Position offsets in meters
277
  roll, pitch, yaw: Orientation offsets in radians
278
  """
279
+ self._command_queue.put(("speech_sway", (x, y, z, roll, pitch, yaw)))
280
 
281
  def reset_to_neutral(self, duration: float = 0.5) -> None:
282
  """Thread-safe: Reset to neutral position."""
 
288
  target_x=0.0,
289
  target_y=0.0,
290
  target_z=0.0,
 
 
291
  duration=duration,
292
  )
293
+ self._command_queue.put(("action", action))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
294
 
295
  def set_camera_server(self, camera_server) -> None:
296
  """Set the camera server for face tracking offsets.
 
301
  self._camera_server = camera_server
302
  logger.info("Camera server set for face tracking")
303
 
304
+ def set_face_tracking_offsets(self, offsets: Tuple[float, float, float, float, float, float]) -> None:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
305
  """Thread-safe: Update face tracking offsets manually.
306
+
307
  Args:
308
  offsets: Tuple of (x, y, z, roll, pitch, yaw) in meters/radians
309
  """
 
312
 
313
  def set_target_pose(
314
  self,
315
+ x: Optional[float] = None,
316
+ y: Optional[float] = None,
317
+ z: Optional[float] = None,
318
+ roll: Optional[float] = None,
319
+ pitch: Optional[float] = None,
320
+ yaw: Optional[float] = None,
321
+ antenna_left: Optional[float] = None,
322
+ antenna_right: Optional[float] = None,
323
  ) -> None:
324
  """Thread-safe: Set target pose components.
325
 
 
333
  roll, pitch, yaw: Head orientation in radians
334
  antenna_left, antenna_right: Antenna angles in radians
335
  """
336
+ self._command_queue.put(("set_pose", {
337
+ "x": x,
338
+ "y": y,
339
+ "z": z,
340
+ "roll": roll,
341
+ "pitch": pitch,
342
+ "yaw": yaw,
343
+ "antenna_left": antenna_left,
344
+ "antenna_right": antenna_right,
345
+ }))
 
 
 
 
 
 
 
 
 
346
 
347
  # =========================================================================
348
  # Internal: Command processing (runs in control loop)
349
  # =========================================================================
350
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
351
  def _poll_commands(self) -> None:
352
+ """Process all pending commands from the queue."""
353
+ while True:
354
+ try:
355
+ cmd, payload = self._command_queue.get_nowait()
356
+ except Empty:
357
+ break
 
358
 
359
+ self._handle_command(cmd, payload)
 
 
 
 
 
360
 
361
+ def _handle_command(self, cmd: str, payload: Any) -> None:
362
+ """Handle a single command."""
363
+ if cmd == "set_state":
364
+ old_state = self.state.robot_state
365
+ self.state.robot_state = payload
366
+ self.state.last_activity_time = self._now()
367
+
368
+ # Update animation based on state
369
+ animation_name = STATE_ANIMATION_MAP.get(payload.value, "idle")
370
+ self._animation_player.set_animation(animation_name)
371
+
372
+ # State transition logic
373
+ if payload == RobotState.IDLE and old_state != RobotState.IDLE:
374
+ self.state.idle_start_time = self._now()
375
+ # Unfreeze antennas when returning to idle
376
+ self._start_antenna_unfreeze()
377
+
378
+ # Freeze antennas when entering listening mode
379
+ if payload == RobotState.LISTENING:
380
+ self._freeze_antennas()
381
+ elif old_state == RobotState.LISTENING and payload != RobotState.LISTENING:
382
+ # Start unfreezing when leaving listening mode
383
+ self._start_antenna_unfreeze()
384
+
385
+ logger.debug("State changed: %s -> %s, animation: %s",
386
+ old_state.value, payload.value, animation_name)
387
+
388
+ elif cmd == "action":
389
+ self._start_action(payload)
390
+
391
+ elif cmd == "nod":
392
+ amplitude_deg, duration = payload
393
+ self._do_nod(amplitude_deg, duration)
394
+
395
+ elif cmd == "shake":
396
+ amplitude_deg, duration = payload
397
+ self._do_shake(amplitude_deg, duration)
398
+
399
+ elif cmd == "set_pose":
400
+ # Update target pose from external control (e.g., Home Assistant)
401
+ if payload.get("x") is not None:
402
+ self.state.target_x = payload["x"]
403
+ if payload.get("y") is not None:
404
+ self.state.target_y = payload["y"]
405
+ if payload.get("z") is not None:
406
+ self.state.target_z = payload["z"]
407
+ if payload.get("roll") is not None:
408
+ self.state.target_roll = payload["roll"]
409
+ if payload.get("pitch") is not None:
410
+ self.state.target_pitch = payload["pitch"]
411
+ if payload.get("yaw") is not None:
412
+ self.state.target_yaw = payload["yaw"]
413
+ # Note: body_yaw is calculated in _compose_final_pose based on head yaw
414
+ if payload.get("antenna_left") is not None:
415
+ self.state.target_antenna_left = payload["antenna_left"]
416
+ if payload.get("antenna_right") is not None:
417
+ self.state.target_antenna_right = payload["antenna_right"]
418
+ logger.debug("External pose update: %s", payload)
419
+
420
+ elif cmd == "speech_sway":
421
+ # Update speech-driven sway offsets
422
+ x, y, z, roll, pitch, yaw = payload
423
+ self.state.sway_x = x
424
+ self.state.sway_y = y
425
+ self.state.sway_z = z
426
+ self.state.sway_roll = roll
427
+ self.state.sway_pitch = pitch
428
+ self.state.sway_yaw = yaw
429
 
430
  def _start_action(self, action: PendingAction) -> None:
431
+ """Start a new motion action."""
432
+ self._pending_action = action
433
+ self._action_start_time = self._now()
434
+ self._action_start_pose = {
435
+ "pitch": self.state.target_pitch,
436
+ "yaw": self.state.target_yaw,
437
+ "roll": self.state.target_roll,
438
+ "x": self.state.target_x,
439
+ "y": self.state.target_y,
440
+ "z": self.state.target_z,
441
+ }
442
+ logger.debug("Starting action: %s", action.name)
443
 
444
  def _do_nod(self, amplitude_deg: float, duration: float) -> None:
445
  """Execute nod gesture (blocking in control loop context)."""
 
475
  def _update_action(self, dt: float) -> None:
476
  """Update pending action interpolation."""
477
  if self._pending_action is None:
 
 
 
 
478
  return
479
 
480
  elapsed = self._now() - self._action_start_time
481
  progress = min(1.0, elapsed / self._pending_action.duration)
482
 
483
+ # Smooth interpolation (ease in-out)
484
+ t = progress * progress * (3 - 2 * progress)
485
 
486
  # Interpolate pose
487
  start = self._action_start_pose
 
493
  self.state.target_x = start["x"] + t * (action.target_x - start["x"])
494
  self.state.target_y = start["y"] + t * (action.target_y - start["y"])
495
  self.state.target_z = start["z"] + t * (action.target_z - start["z"])
 
 
 
 
 
 
496
 
497
  # Action complete
498
  if progress >= 1.0:
499
+ if self._pending_action.callback:
 
 
500
  try:
501
+ self._pending_action.callback()
502
  except Exception as e:
503
  logger.error("Action callback error: %s", e)
504
+ # Reset look-around state if this was a look-around action
505
+ if self._pending_action.name == "look_around":
 
 
 
 
 
506
  self.state.look_around_in_progress = False
507
+ self._pending_action = None
508
 
509
  def _update_animation(self, dt: float) -> None:
510
  """Update animation offsets from AnimationPlayer."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
511
  offsets = self._animation_player.get_offsets(dt)
512
+
513
+ self.state.anim_pitch = offsets["pitch"]
514
+ self.state.anim_yaw = offsets["yaw"]
515
+ self.state.anim_roll = offsets["roll"]
516
+ self.state.anim_x = offsets["x"]
517
+ self.state.anim_y = offsets["y"]
518
+ self.state.anim_z = offsets["z"]
519
+ self.state.anim_antenna_left = offsets["antenna_left"]
520
+ self.state.anim_antenna_right = offsets["antenna_right"]
 
 
 
 
 
 
 
521
 
522
  def _freeze_antennas(self) -> None:
523
  """Freeze antennas at current position (for listening mode)."""
524
+ # Capture current antenna positions
525
  current_left = self.state.target_antenna_left + self.state.anim_antenna_left
526
  current_right = self.state.target_antenna_right + self.state.anim_antenna_right
527
+
528
+ self.state.antenna_frozen = True
529
+ self.state.frozen_antenna_left = current_left
530
+ self.state.frozen_antenna_right = current_right
531
+ self.state.antenna_blend = 0.0 # Fully frozen
532
+ logger.debug("Antennas frozen at left=%.2f, right=%.2f",
533
+ math.degrees(current_left), math.degrees(current_right))
534
 
535
  def _start_antenna_unfreeze(self) -> None:
536
  """Start unfreezing antennas (smooth blend back to normal)."""
537
+ if not self.state.antenna_frozen:
538
+ return
539
+
540
+ self.state.antenna_blend_start_time = self._now()
541
+ logger.debug("Starting antenna unfreeze")
542
 
543
  def _update_antenna_blend(self, dt: float) -> None:
544
  """Update antenna blend state for smooth unfreezing."""
545
+ if not self.state.antenna_frozen:
546
+ return
547
+
548
+ if self.state.antenna_blend >= 1.0:
549
+ # Fully unfrozen
550
+ self.state.antenna_frozen = False
551
+ return
552
+
553
+ # Calculate blend progress
554
+ elapsed = self._now() - self.state.antenna_blend_start_time
555
+ if elapsed > 0:
556
+ self.state.antenna_blend = min(1.0, elapsed / ANTENNA_BLEND_DURATION)
557
+
558
+ if self.state.antenna_blend >= 1.0:
559
+ self.state.antenna_frozen = False
560
+ logger.debug("Antennas unfrozen")
561
 
562
  def _update_animation_blend(self) -> None:
563
  """Update animation blend factor when face is lost.
564
 
565
+ When face is detected, animation_blend is set to 0 immediately.
566
+ When face is lost, we smoothly blend animation back to 1.0.
567
  """
568
+ if self.state.face_detected:
569
+ # Face is detected, keep animation suppressed
 
 
570
  return
571
 
572
+ if self.state.animation_blend >= 1.0:
573
+ # Already fully blended, nothing to do
574
+ return
575
+
576
+ # Calculate blend progress since face was lost
577
+ elapsed = self._now() - self.state.face_lost_time
578
+ if elapsed > 0:
579
+ self.state.animation_blend = min(1.0, elapsed / ANIMATION_BLEND_DURATION)
580
+
581
+ if self.state.animation_blend >= 1.0:
582
+ logger.debug("Animation fully restored")
583
 
584
  def _update_face_tracking(self) -> None:
585
+ """Get face tracking offsets from camera server with smoothing.
586
+
587
+ Also updates face detection state for animation suppression.
588
+ """
589
+ if self._camera_server is not None:
590
+ try:
591
+ raw_offsets = self._camera_server.get_face_tracking_offsets()
592
+
593
+ # Apply exponential moving average smoothing
594
+ alpha = self._face_smoothing_factor
595
+ for i in range(6):
596
+ self._smoothed_face_offsets[i] = (
597
+ alpha * raw_offsets[i] +
598
+ (1 - alpha) * self._smoothed_face_offsets[i]
599
+ )
600
+
601
+ with self._face_tracking_lock:
602
+ self._face_tracking_offsets = tuple(self._smoothed_face_offsets)
603
+
604
+ # Check if face is detected (any offset is non-zero)
605
+ offset_magnitude = sum(abs(o) for o in self._smoothed_face_offsets)
606
+ face_now_detected = offset_magnitude > FACE_DETECTED_THRESHOLD
607
+
608
+ # Update face detection state
609
+ if face_now_detected:
610
+ if not self.state.face_detected:
611
+ logger.debug("Face detected - suppressing breathing animation")
612
+ self.state.face_detected = True
613
+ self.state.animation_blend = 0.0 # Immediately suppress animation
614
+ else:
615
+ if self.state.face_detected:
616
+ # Face just lost - start blend timer
617
+ self.state.face_lost_time = self._now()
618
+ logger.debug("Face lost - will restore animation after blend")
619
+ self.state.face_detected = False
620
+
621
+ except Exception as e:
622
+ logger.debug("Error getting face tracking offsets: %s", e)
623
 
624
  def _update_idle_look_around(self) -> None:
625
+ """Trigger random look-around behavior when idle for a while.
626
+
627
+ This adds life-like behavior to the robot by occasionally looking around
628
+ when not engaged in conversation. Similar to conversation_app's idle behaviors.
629
+ """
630
+ # Only trigger when in IDLE state
631
+ if self.state.robot_state != RobotState.IDLE:
632
+ # Reset timing when not idle
633
+ self.state.next_look_around_time = 0.0
634
+ self.state.look_around_in_progress = False
635
+ return
636
+
637
+ # Check if we have an action in progress
638
+ if self._pending_action is not None:
639
+ return
640
+
641
+ now = self._now()
642
+ idle_duration = now - self.state.idle_start_time
643
+
644
+ # Only start look-around after sufficient inactivity
645
+ if idle_duration < IDLE_INACTIVITY_THRESHOLD:
646
+ return
647
+
648
+ # Schedule next look-around if not scheduled
649
+ if self.state.next_look_around_time == 0.0:
650
+ interval = random.uniform(
651
+ IDLE_LOOK_AROUND_MIN_INTERVAL,
652
+ IDLE_LOOK_AROUND_MAX_INTERVAL
653
+ )
654
+ self.state.next_look_around_time = now + interval
655
+ logger.debug("Scheduled next look-around in %.1fs", interval)
656
+ return
657
+
658
+ # Check if it's time for look-around
659
+ if now >= self.state.next_look_around_time and not self.state.look_around_in_progress:
660
+ # Generate random look direction
661
+ target_yaw = random.uniform(
662
+ -IDLE_LOOK_AROUND_YAW_RANGE,
663
+ IDLE_LOOK_AROUND_YAW_RANGE
664
+ )
665
+ target_pitch = random.uniform(
666
+ -IDLE_LOOK_AROUND_PITCH_RANGE,
667
+ IDLE_LOOK_AROUND_PITCH_RANGE
668
+ )
669
+
670
+ # Create look-around action
671
+ action = PendingAction(
672
+ name="look_around",
673
+ target_yaw=math.radians(target_yaw),
674
+ target_pitch=math.radians(target_pitch),
675
+ duration=IDLE_LOOK_AROUND_DURATION,
676
+ )
677
+
678
+ # Start the action
679
+ self._start_action(action)
680
+ self.state.look_around_in_progress = True
681
+
682
+ # Schedule return to center and next look-around
683
+ interval = random.uniform(
684
+ IDLE_LOOK_AROUND_MIN_INTERVAL,
685
+ IDLE_LOOK_AROUND_MAX_INTERVAL
686
+ )
687
+ self.state.next_look_around_time = now + IDLE_LOOK_AROUND_DURATION * 2 + interval
688
+
689
+ logger.debug("Starting look-around: yaw=%.1f°, pitch=%.1f°",
690
+ target_yaw, target_pitch)
691
 
 
 
692
 
693
+ def _compose_final_pose(self) -> Tuple[np.ndarray, Tuple[float, float], float]:
694
+ """Compose final pose from all sources using SDK's compose_world_offset.
 
 
695
 
696
+ Body yaw follows head yaw to enable natural head tracking with body rotation.
697
+ When head turns beyond a threshold, body rotates to follow it, similar to
698
+ how the reference project's sweep_look tool synchronizes body_yaw with head_yaw.
699
+
700
+ Returns:
701
+ Tuple of (head_pose_4x4, (antenna_right, antenna_left), body_yaw)
702
+ """
703
+ # Build primary head pose from target state
704
+ if SDK_UTILS_AVAILABLE:
705
+ primary_head = create_head_pose(
706
+ x=self.state.target_x,
707
+ y=self.state.target_y,
708
+ z=self.state.target_z,
709
+ roll=self.state.target_roll,
710
+ pitch=self.state.target_pitch,
711
+ yaw=self.state.target_yaw,
712
+ degrees=False,
713
+ mm=False,
714
+ )
715
+ else:
716
+ # Fallback: build matrix manually
717
+ rotation = R.from_euler('xyz', [
718
+ self.state.target_roll,
719
+ self.state.target_pitch,
720
+ self.state.target_yaw,
721
+ ])
722
+ primary_head = np.eye(4)
723
+ primary_head[:3, :3] = rotation.as_matrix()
724
+ primary_head[0, 3] = self.state.target_x
725
+ primary_head[1, 3] = self.state.target_y
726
+ primary_head[2, 3] = self.state.target_z
727
+
728
+ # Build secondary pose from animation + face tracking + speech sway
729
+ with self._face_tracking_lock:
730
+ face_offsets = self._face_tracking_offsets
731
+
732
+ # Apply animation blend factor (0 when face detected, 1 when no face)
733
+ # This suppresses breathing animation during face tracking
734
+ anim_blend = self.state.animation_blend
735
+ anim_x = self.state.anim_x * anim_blend
736
+ anim_y = self.state.anim_y * anim_blend
737
+ anim_z = self.state.anim_z * anim_blend
738
+ anim_roll = self.state.anim_roll * anim_blend
739
+ anim_pitch = self.state.anim_pitch * anim_blend
740
+ anim_yaw = self.state.anim_yaw * anim_blend
741
+
742
+ secondary_x = anim_x + self.state.sway_x + face_offsets[0]
743
+ secondary_y = anim_y + self.state.sway_y + face_offsets[1]
744
+ secondary_z = anim_z + self.state.sway_z + face_offsets[2]
745
+ secondary_roll = anim_roll + self.state.sway_roll + face_offsets[3]
746
+ secondary_pitch = anim_pitch + self.state.sway_pitch + face_offsets[4]
747
+ secondary_yaw = anim_yaw + self.state.sway_yaw + face_offsets[5]
748
+
749
+ if SDK_UTILS_AVAILABLE:
750
+ secondary_head = create_head_pose(
751
+ x=secondary_x,
752
+ y=secondary_y,
753
+ z=secondary_z,
754
+ roll=secondary_roll,
755
+ pitch=secondary_pitch,
756
+ yaw=secondary_yaw,
757
+ degrees=False,
758
+ mm=False,
759
+ )
760
+ # Compose using SDK's compose_world_offset (same as conversation_app)
761
+ final_head = compose_world_offset(primary_head, secondary_head, reorthonormalize=True)
762
+ else:
763
+ # Fallback: simple addition (less accurate but works)
764
+ secondary_rotation = R.from_euler('xyz', [secondary_roll, secondary_pitch, secondary_yaw])
765
+ secondary_head = np.eye(4)
766
+ secondary_head[:3, :3] = secondary_rotation.as_matrix()
767
+ secondary_head[0, 3] = secondary_x
768
+ secondary_head[1, 3] = secondary_y
769
+ secondary_head[2, 3] = secondary_z
770
+
771
+ # Simple composition: R_final = R_secondary @ R_primary, t_final = t_primary + t_secondary
772
+ final_head = np.eye(4)
773
+ final_head[:3, :3] = secondary_head[:3, :3] @ primary_head[:3, :3]
774
+ final_head[:3, 3] = primary_head[:3, 3] + secondary_head[:3, 3]
775
+
776
+ # Antenna pose with freeze blending
777
+ # Apply animation blend to antenna as well (suppress when face detected)
778
+ anim_antenna_left = self.state.anim_antenna_left * anim_blend
779
+ anim_antenna_right = self.state.anim_antenna_right * anim_blend
780
+
781
+ target_antenna_left = self.state.target_antenna_left + anim_antenna_left
782
+ target_antenna_right = self.state.target_antenna_right + anim_antenna_right
783
+
784
+ # Apply antenna freeze blending (listening mode)
785
+ blend = self.state.antenna_blend
786
+ if blend < 1.0:
787
+ # Blend between frozen position and target position
788
+ antenna_left = (self.state.frozen_antenna_left * (1.0 - blend) +
789
+ target_antenna_left * blend)
790
+ antenna_right = (self.state.frozen_antenna_right * (1.0 - blend) +
791
+ target_antenna_right * blend)
792
+ else:
793
+ antenna_left = target_antenna_left
794
+ antenna_right = target_antenna_right
795
+
796
+ # Calculate body_yaw to follow head yaw
797
+ # Extract yaw from the final head pose rotation matrix
798
+ # The rotation matrix uses xyz euler convention
799
+ final_rotation = R.from_matrix(final_head[:3, :3])
800
+ _, _, final_head_yaw = final_rotation.as_euler('xyz')
801
+
802
+ # Body follows head yaw directly
803
+ # The SDK's automatic_body_yaw mechanism (inverse_kinematics_safe) will
804
+ # clamp the relative angle between head and body to max 65 degrees
805
+ # and limit body_yaw to ±160 degrees for collision prevention
806
+ body_yaw = final_head_yaw
807
+
808
+ return final_head, (antenna_right, antenna_left), body_yaw
809
 
810
  # =========================================================================
811
  # Internal: Robot control (runs in control loop)
812
  # =========================================================================
813
 
814
+ def _issue_control_command(self, head_pose: np.ndarray, antennas: Tuple[float, float], body_yaw: float) -> None:
815
+ """Send control command to robot with error throttling and connection health tracking.
816
+
817
+ Body yaw follows head yaw for natural tracking. The SDK's automatic_body_yaw
818
+ mechanism (inverse_kinematics_safe) handles collision prevention.
819
+
820
+ Args:
821
+ head_pose: 4x4 head pose matrix
822
+ antennas: Tuple of (right_angle, left_angle) in radians
823
+ body_yaw: Body yaw angle (follows head yaw for natural tracking)
824
+ """
825
+ if self.robot is None:
826
+ return
827
+
828
+ now = self._now()
829
+
830
+ # Check if we should skip due to connection loss (but always try periodically)
831
+ if self._connection_lost:
832
+ if now - self._last_reconnect_attempt < self._reconnect_attempt_interval:
833
+ # Skip sending commands to reduce error spam
834
+ return
835
+ # Time to try reconnecting
836
+ self._last_reconnect_attempt = now
837
+ logger.debug("Attempting to send command after connection loss...")
838
+
839
+ try:
840
+ # Pass calculated body_yaw to set_target
841
+ # Body yaw is calculated in _compose_final_pose based on head yaw
842
+ self.robot.set_target(
843
+ head=head_pose,
844
+ antennas=list(antennas),
845
+ body_yaw=body_yaw,
846
+ )
847
+
848
+ # Command succeeded - update connection health
849
+ self._last_successful_command = now
850
+ self._consecutive_errors = 0 # Reset error counter
851
+
852
+ if self._connection_lost:
853
+ logger.info("✓ Connection to robot restored")
854
+ self._connection_lost = False
855
+ self._suppressed_errors = 0
856
+
857
+ except Exception as e:
858
+ error_msg = str(e)
859
+ self._consecutive_errors += 1
860
+
861
+ # Check if this is a connection error
862
+ is_connection_error = "Lost connection" in error_msg or "ZError" in error_msg
863
+
864
+ if is_connection_error:
865
+ if not self._connection_lost:
866
+ # First time detecting connection loss
867
+ if self._consecutive_errors >= self._max_consecutive_errors:
868
+ logger.warning(f"Connection unstable after {self._consecutive_errors} errors: {error_msg}")
869
+ logger.warning(" Will retry connection every %.1fs...", self._reconnect_attempt_interval)
870
+ self._connection_lost = True
871
+ self._last_reconnect_attempt = now
872
+ else:
873
+ # Transient error, log but don't mark as lost yet
874
+ self._log_error_throttled(f"Transient connection error ({self._consecutive_errors}/{self._max_consecutive_errors}): {error_msg}")
875
+ else:
876
+ # Already in lost state, use throttled logging
877
+ self._log_error_throttled(f"Connection still lost: {error_msg}")
878
+ else:
879
+ # Non-connection error - log but don't affect connection state
880
+ self._log_error_throttled(f"Failed to set robot target: {error_msg}")
881
 
882
  def _log_error_throttled(self, message: str) -> None:
883
  """Log error with throttling to prevent log explosion."""
 
896
  # =========================================================================
897
 
898
  def _control_loop(self) -> None:
899
+ """Main 100Hz control loop."""
900
+ logger.info("Movement manager control loop started (%.0f Hz)", CONTROL_LOOP_FREQUENCY_HZ)
901
+
902
+ last_time = self._now()
903
+
904
+ while not self._stop_event.is_set():
905
+ loop_start = self._now()
906
+ dt = loop_start - last_time
907
+ last_time = loop_start
908
+
909
+ try:
910
+ # 1. Process commands from queue
911
+ self._poll_commands()
912
+
913
+ # 2. Update action interpolation
914
+ self._update_action(dt)
915
+
916
+ # 3. Update animation offsets (JSON-driven)
917
+ self._update_animation(dt)
918
+
919
+ # 4. Update antenna blend (listening mode freeze/unfreeze)
920
+ self._update_antenna_blend(dt)
921
+
922
+ # 5. Update face tracking offsets from camera server
923
+ self._update_face_tracking()
924
+
925
+ # 6. Update animation blend (suppress when face detected)
926
+ self._update_animation_blend()
927
+
928
+ # 7. Update idle look-around behavior
929
+ self._update_idle_look_around()
930
+
931
+ # 8. Compose final pose (returns head_pose matrix, antennas tuple, body_yaw)
932
+ head_pose, antennas, body_yaw = self._compose_final_pose()
933
+
934
+ # 9. Send to robot with body_yaw for automatic adjustment
935
+ self._issue_control_command(head_pose, antennas, body_yaw)
936
+
937
+ except Exception as e:
938
+ self._log_error_throttled(f"Control loop error: {e}")
939
+
940
+ # Adaptive sleep
941
+ elapsed = self._now() - loop_start
942
+ sleep_time = max(0.0, TARGET_PERIOD - elapsed)
943
+ if sleep_time > 0:
944
+ time.sleep(sleep_time)
945
+
946
+ logger.info("Movement manager control loop stopped")
947
 
948
  # =========================================================================
949
  # Lifecycle
 
957
 
958
  self._stop_event.clear()
959
 
 
 
 
 
 
960
  # Initialize idle animation immediately so breathing starts on launch
961
  # This matches the reference project's behavior where BreathingMove
962
  # starts after idle_inactivity_delay (0.3s)
 
974
  logger.info("Movement manager started")
975
 
976
  def stop(self) -> None:
977
+ """Stop the control loop and reset robot."""
 
 
 
 
 
 
 
978
  if self._thread is None or not self._thread.is_alive():
979
  return
980
 
981
  logger.info("Stopping movement manager...")
982
 
983
+ # Signal stop
 
 
 
 
 
 
 
 
984
  self._stop_event.set()
985
 
986
+ # Wait for thread with shorter timeout
987
  self._thread.join(timeout=0.5)
988
  if self._thread.is_alive():
989
  logger.warning("Movement manager thread did not stop in time")
990
 
 
 
 
991
  # Skip reset to neutral - let the app manager handle it
992
  # This speeds up shutdown significantly
993
  logger.info("Movement manager stopped")
994
 
995
  def _reset_to_neutral_blocking(self) -> None:
996
  """Reset robot to neutral position (blocking)."""
997
+ if self.robot is None:
998
+ return
999
+
1000
  try:
1001
  neutral_pose = np.eye(4)
1002
  self.robot.goto_target(
{reachy_mini_home_assistant → reachy_mini_ha_voice}/reachy_controller.py RENAMED
@@ -1,735 +1,878 @@
1
- """Reachy Mini controller wrapper for ESPHome entities."""
2
-
3
- import logging
4
- import math
5
- import time
6
- from typing import TYPE_CHECKING, Any
7
-
8
- import numpy as np
9
- import requests
10
- from scipy.spatial.transform import Rotation as R
11
-
12
- from .core.config import Config
13
-
14
- if TYPE_CHECKING:
15
- from reachy_mini import ReachyMini
16
-
17
- logger = logging.getLogger(__name__)
18
-
19
-
20
- class ReachyController:
21
- """
22
- Wrapper class for Reachy Mini control operations.
23
-
24
- Provides safe access to Reachy Mini SDK functions with error handling.
25
- """
26
-
27
- def __init__(self, reachy_mini: "ReachyMini"):
28
- """
29
- Initialize the controller.
30
-
31
- Args:
32
- reachy_mini: ReachyMini instance (required)
33
- """
34
- self.reachy = reachy_mini
35
- self._speaker_volume = 100 # Default volume
36
- self._movement_manager = None # Set later via set_movement_manager()
37
-
38
- # Shared session to reduce per-request overhead
39
- self._http_session = requests.Session()
40
- self._http_timeout = 5.0 # seconds
41
- self._cache_ttl = Config.daemon.status_cache_ttl
42
- self._daemon_base_url = Config.daemon.url.rstrip("/")
43
-
44
- # Status caching - only for get_status() which may trigger I/O
45
- # Note: get_current_head_pose() and get_current_joint_positions() are
46
- # non-blocking in the SDK (they return cached Zenoh data), so no caching needed
47
- self._state_cache: dict[str, Any] = {}
48
- self._last_status_query = 0.0
49
-
50
- self._look_at_x = 0.0
51
- self._look_at_y = 0.0
52
- self._look_at_z = 0.0
53
-
54
- def set_movement_manager(self, movement_manager) -> None:
55
- """Set the MovementManager instance for pose control.
56
-
57
- Args:
58
- movement_manager: MovementManager instance
59
- """
60
- self._movement_manager = movement_manager
61
- logger.info("MovementManager set for ReachyController")
62
-
63
- @property
64
- def is_available(self) -> bool:
65
- """Check if robot is available."""
66
- return self.reachy is not None
67
-
68
- def _with_movement_manager(self, caller: str):
69
- if self._movement_manager is None:
70
- logger.warning("%s failed - MovementManager not set", caller)
71
- return None
72
- return self._movement_manager
73
-
74
- def _get_movement_bool(self, getter_name: str, log_label: str) -> bool:
75
- movement_manager = self._movement_manager
76
- if movement_manager is None:
77
- return False
78
- try:
79
- return bool(getattr(movement_manager, getter_name)())
80
- except Exception as e:
81
- logger.debug("Error getting %s state: %s", log_label, e)
82
- return False
83
-
84
- def get_idle_behavior_enabled(self) -> bool:
85
- """Get whether any idle behavior subsystem is enabled."""
86
- return self._get_movement_bool("get_idle_behavior_enabled", "idle behavior")
87
-
88
- def set_idle_behavior_enabled(self, enabled: bool) -> None:
89
- """Enable or disable all idle behavior subsystems together."""
90
- movement_manager = self._with_movement_manager("set_idle_behavior_enabled")
91
- if movement_manager is not None:
92
- movement_manager.set_idle_behavior_enabled(enabled)
93
-
94
- # ========== Phase 1: Basic Status & Volume ==========
95
-
96
- @staticmethod
97
- def _status_value(status: Any, key: str, default: Any = None) -> Any:
98
- if status is None:
99
- return default
100
- if isinstance(status, dict):
101
- return status.get(key, default)
102
- return getattr(status, key, default)
103
-
104
- @classmethod
105
- def _nested_status_value(cls, status: Any, parent_key: str, child_key: str, default: Any = None) -> Any:
106
- parent = cls._status_value(status, parent_key, None)
107
- if parent is None:
108
- return default
109
- if isinstance(parent, dict):
110
- return parent.get(child_key, default)
111
- return getattr(parent, child_key, default)
112
-
113
- def _get_cached_status(self) -> Any:
114
- """Get cached daemon status to reduce query frequency.
115
-
116
- Note: get_status() may trigger I/O, so we cache it.
117
- Unlike get_current_head_pose() and get_current_joint_positions()
118
- which are non-blocking in the SDK.
119
- """
120
- now = time.time()
121
- if now - self._last_status_query < self._cache_ttl:
122
- return self._state_cache.get("status")
123
-
124
- if not self.is_available:
125
- return None
126
-
127
- try:
128
- status = self.reachy.client.get_status(wait=False)
129
- self._state_cache["status"] = status
130
- self._last_status_query = now
131
- return status
132
- except Exception as e:
133
- logger.error(f"Error getting status: {e}")
134
- return None
135
-
136
- def get_daemon_state(self) -> str:
137
- """Get daemon state with caching."""
138
- status = self._get_cached_status()
139
- if status is None:
140
- return "not_available"
141
- return str(self._status_value(status, "state", "unknown"))
142
-
143
- def get_backend_ready(self) -> bool:
144
- """Check if backend is ready with caching."""
145
- status = self._get_cached_status()
146
- if status is None:
147
- return False
148
- return self._status_value(status, "state") == "running"
149
-
150
- def get_error_message(self) -> str:
151
- """Get current error message with caching."""
152
- status = self._get_cached_status()
153
- if status is None:
154
- return "Robot not available"
155
- return str(self._status_value(status, "error", "") or "")
156
-
157
- def _get_volume_via_api(self, path: str, cached_value: float, label: str) -> float:
158
- """Fetch a volume value from the daemon API, falling back to the cached value."""
159
- try:
160
- resp = self._http_session.get(
161
- f"{self._daemon_base_url}{path}",
162
- timeout=self._http_timeout,
163
- )
164
- resp.raise_for_status()
165
- data = resp.json()
166
- if isinstance(data, dict) and "volume" in data:
167
- return float(data["volume"])
168
- except Exception as e:
169
- logger.warning("Failed to get %s volume via daemon API: %s", label, e)
170
-
171
- return cached_value
172
-
173
- def _set_volume_via_api(self, path: str, volume: float, label: str) -> float:
174
- """Write a volume value through the daemon API and return the confirmed level."""
175
- try:
176
- resp = self._http_session.post(
177
- f"{self._daemon_base_url}{path}",
178
- json={"volume": int(volume)},
179
- timeout=self._http_timeout,
180
- )
181
- resp.raise_for_status()
182
- data = resp.json()
183
- if isinstance(data, dict) and "volume" in data:
184
- return float(data["volume"])
185
- return volume
186
- except Exception as e:
187
- logger.error("Failed to set %s volume via daemon API: %s", label, e)
188
- return volume
189
-
190
- def _motor_mode_from_status(self, status: Any) -> str | None:
191
- motor_mode = self._nested_status_value(status, "backend_status", "motor_control_mode", None)
192
- if motor_mode is not None:
193
- return str(motor_mode)
194
- return None
195
-
196
- def get_speaker_volume(self) -> float:
197
- """Get speaker volume (0-100) from the daemon volume API."""
198
- self._speaker_volume = self._get_volume_via_api("/api/volume/current", self._speaker_volume, "speaker")
199
- return self._speaker_volume
200
-
201
- def set_speaker_volume(self, volume: float) -> None:
202
- """Set speaker volume (0-100) through the daemon volume API."""
203
- volume = max(0.0, min(100.0, volume))
204
- self._speaker_volume = self._set_volume_via_api("/api/volume/set", volume, "speaker")
205
- logger.info("Speaker volume set to %.1f%% via daemon API", self._speaker_volume)
206
-
207
- # ========== Phase 2: Motor Control ==========
208
-
209
- def get_motors_enabled(self) -> bool:
210
- """Check if motors are enabled with caching."""
211
- status = self._get_cached_status()
212
- if status is None:
213
- return False
214
- try:
215
- motor_mode = self._motor_mode_from_status(status)
216
- return motor_mode == "enabled"
217
- except Exception as e:
218
- logger.error(f"Error getting motor state: {e}")
219
- return False
220
-
221
- def set_motors_enabled(self, enabled: bool) -> None:
222
- """
223
- Enable or disable motors.
224
-
225
- Args:
226
- enabled: True to enable, False to disable
227
- """
228
- if not self.is_available:
229
- logger.warning("Cannot control motors: robot not available")
230
- return
231
-
232
- try:
233
- if enabled:
234
- self.reachy.enable_motors()
235
- logger.info("Motors enabled")
236
- else:
237
- self.reachy.disable_motors()
238
- logger.info("Motors disabled")
239
- except Exception as e:
240
- logger.error(f"Error setting motor state: {e}")
241
-
242
- def get_motor_mode(self) -> str:
243
- """Get current motor control mode with caching."""
244
- status = self._get_cached_status()
245
- if status is None:
246
- return "disabled"
247
- try:
248
- return self._motor_mode_from_status(status) or "disabled"
249
- except Exception as e:
250
- logger.error(f"Error getting motor mode: {e}")
251
- return "error"
252
-
253
- def set_motor_mode(self, mode: str) -> None:
254
- """
255
- Set motor control mode.
256
-
257
- Args:
258
- mode: One of "enabled", "disabled", "gravity_compensation"
259
- """
260
- if not self.is_available:
261
- logger.warning("Cannot set motor mode: robot not available")
262
- return
263
-
264
- try:
265
- if mode == "enabled":
266
- self.reachy.enable_motors()
267
- elif mode == "disabled":
268
- self.reachy.disable_motors()
269
- elif mode == "gravity_compensation":
270
- self.reachy.enable_gravity_compensation()
271
- else:
272
- logger.warning(f"Invalid motor mode: {mode}")
273
- return
274
- logger.info(f"Motor mode set to {mode}")
275
- except Exception as e:
276
- logger.error(f"Error setting motor mode: {e}")
277
-
278
- def get_doa_enabled(self) -> bool:
279
- """Get whether DOA sound tracking is enabled."""
280
- return self._get_movement_bool("get_doa_enabled", "DOA tracking")
281
-
282
- def set_doa_enabled(self, enabled: bool) -> None:
283
- """Enable or disable DOA sound tracking."""
284
- movement_manager = self._with_movement_manager("set_doa_enabled")
285
- if movement_manager is not None:
286
- movement_manager.set_doa_enabled(enabled)
287
-
288
- def _daemon_command(self, path: str, params: dict[str, str] | None = None) -> None:
289
- """Send a daemon command request and wait for the daemon state to settle."""
290
- url = f"{self._daemon_base_url}{path}"
291
- resp = self._http_session.post(url, params=params or {}, timeout=self._http_timeout)
292
- resp.raise_for_status()
293
-
294
- desired_state = None
295
- if path.endswith("/start"):
296
- desired_state = "running"
297
- elif path.endswith("/stop"):
298
- desired_state = "stopped"
299
-
300
- if desired_state is not None:
301
- self._wait_for_daemon_state(desired_state)
302
-
303
- def _wait_for_daemon_state(self, desired_state: str, timeout: float = 10.0) -> None:
304
- """Poll daemon status until the requested state is reached."""
305
- deadline = time.time() + timeout
306
- while time.time() < deadline:
307
- try:
308
- resp = self._http_session.get(
309
- f"{self._daemon_base_url}/api/daemon/status",
310
- timeout=self._http_timeout,
311
- )
312
- resp.raise_for_status()
313
- data = resp.json()
314
- current_state = str(data.get("state", "")).lower()
315
- if current_state == desired_state:
316
- self._last_status_query = 0.0
317
- return
318
- except Exception as e:
319
- logger.debug("Waiting for daemon state %s failed: %s", desired_state, e)
320
- time.sleep(0.2)
321
-
322
- logger.warning("Timed out waiting for daemon state '%s'", desired_state)
323
-
324
- # ========== Phase 3: Pose Control ==========
325
-
326
- def _get_head_pose(self) -> np.ndarray | None:
327
- """Get current head pose from SDK.
328
-
329
- Note: SDK's get_current_head_pose() is non-blocking - it returns
330
- cached data from Zenoh subscriptions, so no throttling needed.
331
- """
332
- if not self.is_available:
333
- return None
334
-
335
- try:
336
- return self.reachy.get_current_head_pose()
337
- except Exception as e:
338
- logger.error(f"Error getting head pose: {e}")
339
- return None
340
-
341
- def _get_joint_positions(self) -> tuple | None:
342
- """Get current joint positions from SDK.
343
-
344
- Note: SDK's get_current_joint_positions() is non-blocking - it returns
345
- cached data from Zenoh subscriptions, so no throttling needed.
346
- """
347
- if not self.is_available:
348
- return None
349
-
350
- try:
351
- return self.reachy.get_current_joint_positions()
352
- except Exception as e:
353
- logger.error(f"Error getting joint positions: {e}")
354
- return None
355
-
356
- def _extract_pose_from_matrix(self, pose_matrix: np.ndarray) -> tuple:
357
- """
358
- Extract position (x, y, z) and rotation (roll, pitch, yaw) from 4x4 pose matrix.
359
-
360
- Args:
361
- pose_matrix: 4x4 homogeneous transformation matrix
362
-
363
- Returns:
364
- tuple: (x, y, z, roll, pitch, yaw) where position is in meters and angles in radians
365
- """
366
- # Extract position from the last column
367
- x = pose_matrix[0, 3]
368
- y = pose_matrix[1, 3]
369
- z = pose_matrix[2, 3]
370
-
371
- # Extract rotation matrix and convert to euler angles
372
- rotation_matrix = pose_matrix[:3, :3]
373
- rotation = R.from_matrix(rotation_matrix)
374
- # Use 'xyz' convention for roll, pitch, yaw
375
- roll, pitch, yaw = rotation.as_euler("xyz")
376
-
377
- return x, y, z, roll, pitch, yaw
378
-
379
- def _get_head_pose_component(self, component: str) -> float:
380
- """Get a specific component from head pose.
381
-
382
- Args:
383
- component: One of 'x', 'y', 'z' (mm), 'roll', 'pitch', 'yaw' (degrees)
384
-
385
- Returns:
386
- The component value, or 0.0 on error
387
- """
388
- pose = self._get_head_pose()
389
- if pose is None:
390
- return 0.0
391
- try:
392
- x, y, z, roll, pitch, yaw = self._extract_pose_from_matrix(pose)
393
- components = {
394
- "x": x * 1000, # m to mm
395
- "y": y * 1000,
396
- "z": z * 1000,
397
- "roll": math.degrees(roll),
398
- "pitch": math.degrees(pitch),
399
- "yaw": math.degrees(yaw),
400
- }
401
- return components.get(component, 0.0)
402
- except Exception as e:
403
- logger.error(f"Error getting head {component}: {e}")
404
- return 0.0
405
-
406
- def _disabled_pose_setter(self, name: str) -> None:
407
- """Log warning when MovementManager is not available."""
408
- logger.warning(f"set_{name} failed - MovementManager not set")
409
-
410
- def _set_pose_via_manager(self, **kwargs) -> bool:
411
- """Set pose via MovementManager if available.
412
-
413
- Returns True if successful, False if MovementManager not available.
414
- """
415
- if self._movement_manager is None:
416
- return False
417
- self._movement_manager.set_target_pose(**kwargs)
418
- return True
419
-
420
- # Head position getters and setters
421
- def get_head_x(self) -> float:
422
- """Get head X position in mm."""
423
- return self._get_head_pose_component("x")
424
-
425
- def set_head_x(self, x_mm: float) -> None:
426
- """Set head X position in mm via MovementManager."""
427
- if not self._set_pose_via_manager(x=x_mm / 1000.0): # mm to m
428
- self._disabled_pose_setter("head_x")
429
-
430
- def get_head_y(self) -> float:
431
- """Get head Y position in mm."""
432
- return self._get_head_pose_component("y")
433
-
434
- def set_head_y(self, y_mm: float) -> None:
435
- """Set head Y position in mm via MovementManager."""
436
- if not self._set_pose_via_manager(y=y_mm / 1000.0): # mm to m
437
- self._disabled_pose_setter("head_y")
438
-
439
- def get_head_z(self) -> float:
440
- """Get head Z position in mm."""
441
- return self._get_head_pose_component("z")
442
-
443
- def set_head_z(self, z_mm: float) -> None:
444
- """Set head Z position in mm via MovementManager."""
445
- if not self._set_pose_via_manager(z=z_mm / 1000.0): # mm to m
446
- self._disabled_pose_setter("head_z")
447
-
448
- # Head orientation getters and setters
449
- def get_head_roll(self) -> float:
450
- """Get head roll angle in degrees."""
451
- return self._get_head_pose_component("roll")
452
-
453
- def set_head_roll(self, roll_deg: float) -> None:
454
- """Set head roll angle in degrees via MovementManager."""
455
- if not self._set_pose_via_manager(roll=math.radians(roll_deg)):
456
- self._disabled_pose_setter("head_roll")
457
-
458
- def get_head_pitch(self) -> float:
459
- """Get head pitch angle in degrees."""
460
- return self._get_head_pose_component("pitch")
461
-
462
- def set_head_pitch(self, pitch_deg: float) -> None:
463
- """Set head pitch angle in degrees via MovementManager."""
464
- if not self._set_pose_via_manager(pitch=math.radians(pitch_deg)):
465
- self._disabled_pose_setter("head_pitch")
466
-
467
- def get_head_yaw(self) -> float:
468
- """Get head yaw angle in degrees."""
469
- return self._get_head_pose_component("yaw")
470
-
471
- def set_head_yaw(self, yaw_deg: float) -> None:
472
- """Set head yaw angle in degrees via MovementManager."""
473
- if not self._set_pose_via_manager(yaw=math.radians(yaw_deg)):
474
- self._disabled_pose_setter("head_yaw")
475
-
476
- def get_body_yaw(self) -> float:
477
- """Get body yaw angle in degrees."""
478
- joints = self._get_joint_positions()
479
- if joints is None:
480
- return 0.0
481
- try:
482
- head_joints, _ = joints
483
- return math.degrees(head_joints[0])
484
- except Exception as e:
485
- logger.error(f"Error getting body yaw: {e}")
486
- return 0.0
487
-
488
- def set_body_yaw(self, yaw_deg: float) -> None:
489
- """Set body yaw angle in degrees.
490
-
491
- Note: This directly calls SDK's set_target_body_yaw since automatic body yaw
492
- is enabled. Manual control will temporarily override automatic mode.
493
- """
494
- if self.reachy is None:
495
- self._disabled_pose_setter("body_yaw")
496
- return
497
- try:
498
- self.reachy.set_target_body_yaw(math.radians(yaw_deg))
499
- except Exception as e:
500
- logger.error(f"Error setting body yaw: {e}")
501
-
502
- def get_antenna_left(self) -> float:
503
- """Get left antenna angle in degrees."""
504
- joints = self._get_joint_positions()
505
- if joints is None:
506
- return 0.0
507
- try:
508
- _, antennas = joints
509
- return math.degrees(antennas[1]) # left is index 1
510
- except Exception as e:
511
- logger.error(f"Error getting left antenna: {e}")
512
- return 0.0
513
-
514
- def set_antenna_left(self, angle_deg: float) -> None:
515
- """Set left antenna angle in degrees via MovementManager."""
516
- if not self._set_pose_via_manager(antenna_left=math.radians(angle_deg)):
517
- self._disabled_pose_setter("antenna_left")
518
-
519
- def get_antenna_right(self) -> float:
520
- """Get right antenna angle in degrees."""
521
- joints = self._get_joint_positions()
522
- if joints is None:
523
- return 0.0
524
- try:
525
- _, antennas = joints
526
- return math.degrees(antennas[0]) # right is index 0
527
- except Exception as e:
528
- logger.error(f"Error getting right antenna: {e}")
529
- return 0.0
530
-
531
- def set_antenna_right(self, angle_deg: float) -> None:
532
- """Set right antenna angle in degrees via MovementManager."""
533
- if not self._set_pose_via_manager(antenna_right=math.radians(angle_deg)):
534
- self._disabled_pose_setter("antenna_right")
535
-
536
- # ========== Phase 4: Look At Control ==========
537
-
538
- def get_look_at_x(self) -> float:
539
- """Get look at target X coordinate in world frame (meters)."""
540
- return self._look_at_x
541
-
542
- def set_look_at_x(self, x: float) -> None:
543
- """Set look at target X coordinate."""
544
- self._look_at_x = x
545
- self._update_look_at()
546
-
547
- def get_look_at_y(self) -> float:
548
- """Get look at target Y coordinate in world frame (meters)."""
549
- return self._look_at_y
550
-
551
- def set_look_at_y(self, y: float) -> None:
552
- """Set look at target Y coordinate."""
553
- self._look_at_y = y
554
- self._update_look_at()
555
-
556
- def get_look_at_z(self) -> float:
557
- """Get look at target Z coordinate in world frame (meters)."""
558
- return self._look_at_z
559
-
560
- def set_look_at_z(self, z: float) -> None:
561
- """Set look at target Z coordinate."""
562
- self._look_at_z = z
563
- self._update_look_at()
564
-
565
- def _update_look_at(self) -> None:
566
- """Update robot to look at the target coordinates.
567
-
568
- NOTE: Disabled to prevent conflict with MovementManager's control loop.
569
- """
570
- logger.warning("_update_look_at is disabled - MovementManager controls head pose")
571
- # if not self.is_available:
572
- # return
573
- # try:
574
- # x = getattr(self, '_look_at_x', 0.0)
575
- # y = getattr(self, '_look_at_y', 0.0)
576
- # z = getattr(self, '_look_at_z', 0.0)
577
- # self.reachy.look_at_world(x, y, z)
578
- # logger.info(f"Looking at world coordinates: ({x}, {y}, {z})")
579
- # except Exception as e:
580
- # logger.error(f"Error updating look at: {e}")
581
-
582
- # ========== Phase 6: Diagnostic Information ==========
583
-
584
- def get_control_loop_frequency(self) -> float:
585
- """Get control loop frequency in Hz with caching."""
586
- status = self._get_cached_status()
587
- if status is None:
588
- return 0.0
589
- try:
590
- control_loop_stats = self._nested_status_value(status, "backend_status", "control_loop_stats", None)
591
- if isinstance(control_loop_stats, dict):
592
- return float(control_loop_stats.get("mean_control_loop_frequency", 0.0))
593
- if control_loop_stats is not None:
594
- return float(getattr(control_loop_stats, "mean_control_loop_frequency", 0.0))
595
- return 0.0
596
- except Exception as e:
597
- logger.error(f"Error getting control loop frequency: {e}")
598
- return 0.0
599
-
600
- def get_sdk_version(self) -> str:
601
- """Get SDK version with caching."""
602
- status = self._get_cached_status()
603
- if status is None:
604
- return "N/A"
605
- return str(self._status_value(status, "version", "unknown") or "unknown")
606
-
607
- def get_robot_name(self) -> str:
608
- """Get robot name with caching."""
609
- status = self._get_cached_status()
610
- if status is None:
611
- return "N/A"
612
- return str(self._status_value(status, "robot_name", "unknown") or "unknown")
613
-
614
- def get_wireless_version(self) -> bool:
615
- """Check if this is a wireless version with caching."""
616
- status = self._get_cached_status()
617
- if status is None:
618
- return False
619
- return bool(self._status_value(status, "wireless_version", False))
620
-
621
- def get_simulation_mode(self) -> bool:
622
- """Check if simulation mode is enabled with caching."""
623
- status = self._get_cached_status()
624
- if status is None:
625
- return False
626
- return bool(self._status_value(status, "simulation_enabled", False))
627
-
628
- def get_wlan_ip(self) -> str:
629
- """Get WLAN IP address with caching."""
630
- status = self._get_cached_status()
631
- if status is None:
632
- return "N/A"
633
- return str(self._status_value(status, "wlan_ip", "N/A") or "N/A")
634
-
635
- # ========== Phase 7: IMU Sensors (Wireless only) ==========
636
-
637
- def _get_imu_value(self, sensor_type: str, index: int) -> float:
638
- """Get a specific IMU sensor value.
639
-
640
- Args:
641
- sensor_type: 'accelerometer', 'gyroscope', or 'temperature'
642
- index: Array index (0=x, 1=y, 2=z) or -1 for scalar values
643
-
644
- Returns:
645
- The sensor value, or 0.0 on error
646
- """
647
- if not self.is_available:
648
- return 0.0
649
- try:
650
- imu_data = self.reachy.imu
651
- if imu_data is None or sensor_type not in imu_data:
652
- return 0.0
653
- value = imu_data[sensor_type]
654
- return float(value[index]) if index >= 0 else float(value)
655
- except Exception as e:
656
- logger.debug(f"Error getting IMU {sensor_type}: {e}")
657
- return 0.0
658
-
659
- def get_imu_accel_x(self) -> float:
660
- """Get IMU X-axis acceleration in m/s²."""
661
- return self._get_imu_value("accelerometer", 0)
662
-
663
- def get_imu_accel_y(self) -> float:
664
- """Get IMU Y-axis acceleration in m/s²."""
665
- return self._get_imu_value("accelerometer", 1)
666
-
667
- def get_imu_accel_z(self) -> float:
668
- """Get IMU Z-axis acceleration in m/s²."""
669
- return self._get_imu_value("accelerometer", 2)
670
-
671
- def get_imu_gyro_x(self) -> float:
672
- """Get IMU X-axis angular velocity in rad/s."""
673
- return self._get_imu_value("gyroscope", 0)
674
-
675
- def get_imu_gyro_y(self) -> float:
676
- """Get IMU Y-axis angular velocity in rad/s."""
677
- return self._get_imu_value("gyroscope", 1)
678
-
679
- def get_imu_gyro_z(self) -> float:
680
- """Get IMU Z-axis angular velocity in rad/s."""
681
- return self._get_imu_value("gyroscope", 2)
682
-
683
- def get_imu_temperature(self) -> float:
684
- """Get IMU temperature in °C."""
685
- return self._get_imu_value("temperature", -1)
686
-
687
- # ========== Phase 11: LED Control (DISABLED) ==========
688
- # LED control is disabled because LEDs are hidden inside the robot.
689
- # See PROJECT_PLAN.md principle 8.
690
-
691
- # ========== DOA (Direction of Arrival) ==========
692
-
693
- def get_doa_angle(self) -> tuple[float, bool] | None:
694
- """Get Direction of Arrival angle from microphone array.
695
-
696
- The DOA angle indicates the direction of the sound source relative to the robot.
697
- Angle is in radians: 0 = left, π/2 = front/back, π = right.
698
-
699
- Returns:
700
- Tuple of (angle_radians, speech_detected), or None if unavailable.
701
- - angle_radians: Sound source direction in radians
702
- - speech_detected: Whether speech is currently detected
703
- """
704
- if not self.is_available:
705
- return None
706
- try:
707
- return self.reachy.media.get_DoA()
708
- except Exception as e:
709
- logger.debug(f"Error getting DOA: {e}")
710
- return None
711
-
712
- def get_doa_angle_degrees(self) -> float:
713
- """Get DOA angle in degrees for Home Assistant entity.
714
-
715
- Returns the raw DOA angle in degrees (0-180°).
716
- SDK convention: 0° = left, 90° = front, 180° = right
717
- """
718
- doa = self.get_doa_angle()
719
- if doa is None:
720
- return 0.0
721
- angle_rad, _ = doa
722
- # Return raw angle in degrees (0-180°)
723
- angle_deg = math.degrees(angle_rad)
724
- return angle_deg
725
-
726
- def get_speech_detected(self) -> bool:
727
- """Get speech detection status from DOA.
728
-
729
- Returns True if speech is currently detected.
730
- """
731
- doa = self.get_doa_angle()
732
- if doa is None:
733
- return False
734
- _, speech_detected = doa
735
- return speech_detected
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Reachy Mini controller wrapper for ESPHome entities."""
2
+
3
+ import logging
4
+ import time
5
+ from typing import Any, Dict, Optional, TYPE_CHECKING
6
+ import math
7
+ import numpy as np
8
+ from scipy.spatial.transform import Rotation as R
9
+ import requests
10
+
11
+ if TYPE_CHECKING:
12
+ from reachy_mini import ReachyMini
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class _ReSpeakerContext:
18
+ """Context manager for thread-safe ReSpeaker access."""
19
+
20
+ def __init__(self, respeaker, lock):
21
+ self._respeaker = respeaker
22
+ self._lock = lock
23
+
24
+ def __enter__(self):
25
+ self._lock.acquire()
26
+ return self._respeaker
27
+
28
+ def __exit__(self, exc_type, exc_val, exc_tb):
29
+ self._lock.release()
30
+ return False
31
+
32
+
33
+ class ReachyController:
34
+ """
35
+ Wrapper class for Reachy Mini control operations.
36
+
37
+ Provides safe access to Reachy Mini SDK functions with error handling
38
+ and fallback for standalone mode (when robot is not available).
39
+ """
40
+
41
+ def __init__(self, reachy_mini: Optional["ReachyMini"] = None):
42
+ """
43
+ Initialize the controller.
44
+
45
+ Args:
46
+ reachy_mini: ReachyMini instance, or None for standalone mode
47
+ """
48
+ self.reachy = reachy_mini
49
+ self._speaker_volume = 100 # Default volume
50
+ self._movement_manager = None # Set later via set_movement_manager()
51
+
52
+ # Status caching - only for get_status() which may trigger I/O
53
+ # Note: get_current_head_pose() and get_current_joint_positions() are
54
+ # non-blocking in the SDK (they return cached Zenoh data), so no caching needed
55
+ self._state_cache: Dict[str, Any] = {}
56
+ self._cache_ttl = 2.0 # 2 second cache TTL for status queries (increased from 1s)
57
+ self._last_status_query = 0.0
58
+
59
+ # Thread lock for ReSpeaker USB access to prevent conflicts with GStreamer audio pipeline
60
+ self._respeaker_lock = __import__('threading').Lock()
61
+
62
+ def set_movement_manager(self, movement_manager) -> None:
63
+ """Set the MovementManager instance for pose control.
64
+
65
+ Args:
66
+ movement_manager: MovementManager instance
67
+ """
68
+ self._movement_manager = movement_manager
69
+ logger.info("MovementManager set for ReachyController")
70
+
71
+ @property
72
+ def is_available(self) -> bool:
73
+ """Check if robot is available."""
74
+ return self.reachy is not None
75
+
76
+ # ========== Phase 1: Basic Status & Volume ==========
77
+
78
+ def _get_cached_status(self) -> Optional[Dict]:
79
+ """Get cached daemon status to reduce query frequency.
80
+
81
+ Note: get_status() may trigger I/O, so we cache it.
82
+ Unlike get_current_head_pose() and get_current_joint_positions()
83
+ which are non-blocking in the SDK.
84
+ """
85
+ now = time.time()
86
+ if now - self._last_status_query < self._cache_ttl:
87
+ return self._state_cache.get('status')
88
+
89
+ if not self.is_available:
90
+ return None
91
+
92
+ try:
93
+ status = self.reachy.client.get_status(wait=False)
94
+ self._state_cache['status'] = status
95
+ self._last_status_query = now
96
+ return status
97
+ except Exception as e:
98
+ logger.error(f"Error getting status: {e}")
99
+ return self._state_cache.get('status') # Return stale cache on error
100
+
101
+ def get_daemon_state(self) -> str:
102
+ """Get daemon state with caching."""
103
+ status = self._get_cached_status()
104
+ if status is None:
105
+ return "not_available"
106
+ return status.get('state', 'unknown')
107
+
108
+ def get_backend_ready(self) -> bool:
109
+ """Check if backend is ready with caching."""
110
+ status = self._get_cached_status()
111
+ if status is None:
112
+ return False
113
+ return status.get('state') == 'running'
114
+
115
+ def get_error_message(self) -> str:
116
+ """Get current error message with caching."""
117
+ status = self._get_cached_status()
118
+ if status is None:
119
+ return "Robot not available"
120
+ return status.get('error') or ""
121
+
122
+ def get_speaker_volume(self) -> float:
123
+ """Get speaker volume (0-100) with caching."""
124
+ if not self.is_available:
125
+ return self._speaker_volume
126
+ try:
127
+ # Get volume from daemon API (use cached status for IP)
128
+ status = self._get_cached_status()
129
+ if status is None:
130
+ return self._speaker_volume
131
+ wlan_ip = status.get('wlan_ip', 'localhost')
132
+ response = requests.get(f"http://{wlan_ip}:8000/api/volume/current", timeout=2)
133
+ if response.status_code == 200:
134
+ data = response.json()
135
+ self._speaker_volume = float(data.get('volume', self._speaker_volume))
136
+ except Exception as e:
137
+ logger.debug(f"Could not get volume from API: {e}")
138
+ return self._speaker_volume
139
+
140
+ def set_speaker_volume(self, volume: float) -> None:
141
+ """
142
+ Set speaker volume (0-100) with cached status.
143
+
144
+ Args:
145
+ volume: Volume level 0-100
146
+ """
147
+ volume = max(0.0, min(100.0, volume))
148
+ self._speaker_volume = volume
149
+
150
+ if not self.is_available:
151
+ logger.warning("Cannot set volume: robot not available")
152
+ return
153
+
154
+ try:
155
+ # Set volume via daemon API (use cached status for IP)
156
+ status = self._get_cached_status()
157
+ if status is None:
158
+ logger.error("Cannot get daemon status for volume control")
159
+ return
160
+ wlan_ip = status.get('wlan_ip', 'localhost')
161
+ response = requests.post(
162
+ f"http://{wlan_ip}:8000/api/volume/set",
163
+ json={"volume": int(volume)},
164
+ timeout=5
165
+ )
166
+ if response.status_code == 200:
167
+ logger.info(f"Speaker volume set to {volume}%")
168
+ else:
169
+ logger.error(f"Failed to set volume: {response.status_code} {response.text}")
170
+ except Exception as e:
171
+ logger.error(f"Error setting speaker volume: {e}")
172
+
173
+ def get_microphone_volume(self) -> float:
174
+ """Get microphone volume (0-100) using daemon HTTP API."""
175
+ if not self.is_available:
176
+ return getattr(self, '_microphone_volume', 50.0)
177
+
178
+ try:
179
+ # Get WLAN IP from cached daemon status
180
+ status = self._get_cached_status()
181
+ if status is None:
182
+ return getattr(self, '_microphone_volume', 50.0)
183
+ wlan_ip = status.get('wlan_ip', 'localhost')
184
+
185
+ # Call the daemon API to get microphone volume
186
+ response = requests.get(
187
+ f"http://{wlan_ip}:8000/api/volume/microphone/current",
188
+ timeout=2
189
+ )
190
+ if response.status_code == 200:
191
+ data = response.json()
192
+ self._microphone_volume = float(data.get('volume', 50))
193
+ return self._microphone_volume
194
+ except Exception as e:
195
+ logger.debug(f"Could not get microphone volume from API: {e}")
196
+
197
+ return getattr(self, '_microphone_volume', 50.0)
198
+
199
+ def set_microphone_volume(self, volume: float) -> None:
200
+ """
201
+ Set microphone volume (0-100) using daemon HTTP API.
202
+
203
+ Args:
204
+ volume: Volume level 0-100
205
+ """
206
+ volume = max(0.0, min(100.0, volume))
207
+ self._microphone_volume = volume
208
+
209
+ if not self.is_available:
210
+ logger.warning("Cannot set microphone volume: robot not available")
211
+ return
212
+
213
+ try:
214
+ # Get WLAN IP from cached daemon status
215
+ status = self._get_cached_status()
216
+ if status is None:
217
+ logger.error("Cannot get daemon status for microphone volume control")
218
+ return
219
+ wlan_ip = status.get('wlan_ip', 'localhost')
220
+
221
+ # Call the daemon API to set microphone volume
222
+ response = requests.post(
223
+ f"http://{wlan_ip}:8000/api/volume/microphone/set",
224
+ json={"volume": int(volume)},
225
+ timeout=5
226
+ )
227
+ if response.status_code == 200:
228
+ logger.info(f"Microphone volume set to {volume}%")
229
+ else:
230
+ logger.error(f"Failed to set microphone volume: {response.status_code} {response.text}")
231
+ except Exception as e:
232
+ logger.error(f"Error setting microphone volume: {e}")
233
+
234
+ # ========== Phase 2: Motor Control ==========
235
+
236
+ def get_motors_enabled(self) -> bool:
237
+ """Check if motors are enabled with caching."""
238
+ status = self._get_cached_status()
239
+ if status is None:
240
+ return False
241
+ try:
242
+ backend_status = status.get('backend_status')
243
+ if backend_status and isinstance(backend_status, dict):
244
+ motor_mode = backend_status.get('motor_control_mode', 'disabled')
245
+ return motor_mode == 'enabled'
246
+ return status.get('state') == 'running'
247
+ except Exception as e:
248
+ logger.error(f"Error getting motor state: {e}")
249
+ return False
250
+
251
+ def set_motors_enabled(self, enabled: bool) -> None:
252
+ """
253
+ Enable or disable motors.
254
+
255
+ Args:
256
+ enabled: True to enable, False to disable
257
+ """
258
+ if not self.is_available:
259
+ logger.warning("Cannot control motors: robot not available")
260
+ return
261
+
262
+ try:
263
+ if enabled:
264
+ self.reachy.enable_motors()
265
+ logger.info("Motors enabled")
266
+ else:
267
+ self.reachy.disable_motors()
268
+ logger.info("Motors disabled")
269
+ except Exception as e:
270
+ logger.error(f"Error setting motor state: {e}")
271
+
272
+ def get_motor_mode(self) -> str:
273
+ """Get current motor control mode with caching."""
274
+ status = self._get_cached_status()
275
+ if status is None:
276
+ return "disabled"
277
+ try:
278
+ backend_status = status.get('backend_status')
279
+ if backend_status and isinstance(backend_status, dict):
280
+ motor_mode = backend_status.get('motor_control_mode', 'disabled')
281
+ return motor_mode
282
+ if status.get('state') == 'running':
283
+ return "enabled"
284
+ return "disabled"
285
+ except Exception as e:
286
+ logger.error(f"Error getting motor mode: {e}")
287
+ return "error"
288
+
289
+ def set_motor_mode(self, mode: str) -> None:
290
+ """
291
+ Set motor control mode.
292
+
293
+ Args:
294
+ mode: One of "enabled", "disabled", "gravity_compensation"
295
+ """
296
+ if not self.is_available:
297
+ logger.warning("Cannot set motor mode: robot not available")
298
+ return
299
+
300
+ try:
301
+ if mode == "enabled":
302
+ self.reachy.enable_motors()
303
+ elif mode == "disabled":
304
+ self.reachy.disable_motors()
305
+ elif mode == "gravity_compensation":
306
+ self.reachy.enable_gravity_compensation()
307
+ else:
308
+ logger.warning(f"Invalid motor mode: {mode}")
309
+ return
310
+ logger.info(f"Motor mode set to {mode}")
311
+ except Exception as e:
312
+ logger.error(f"Error setting motor mode: {e}")
313
+
314
+ def wake_up(self) -> None:
315
+ """Execute wake up animation."""
316
+ if not self.is_available:
317
+ logger.warning("Cannot wake up: robot not available")
318
+ return
319
+
320
+ try:
321
+ self.reachy.wake_up()
322
+ logger.info("Wake up animation executed")
323
+ except Exception as e:
324
+ logger.error(f"Error executing wake up: {e}")
325
+
326
+ def go_to_sleep(self) -> None:
327
+ """Execute sleep animation."""
328
+ if not self.is_available:
329
+ logger.warning("Cannot sleep: robot not available")
330
+ return
331
+
332
+ try:
333
+ self.reachy.goto_sleep()
334
+ logger.info("Sleep animation executed")
335
+ except Exception as e:
336
+ logger.error(f"Error executing sleep: {e}")
337
+
338
+ # ========== Phase 3: Pose Control ==========
339
+
340
+ def _get_head_pose(self) -> Optional[np.ndarray]:
341
+ """Get current head pose from SDK.
342
+
343
+ Note: SDK's get_current_head_pose() is non-blocking - it returns
344
+ cached data from Zenoh subscriptions, so no throttling needed.
345
+ """
346
+ if not self.is_available:
347
+ return None
348
+
349
+ try:
350
+ return self.reachy.get_current_head_pose()
351
+ except Exception as e:
352
+ logger.error(f"Error getting head pose: {e}")
353
+ return None
354
+
355
+ def _get_joint_positions(self) -> Optional[tuple]:
356
+ """Get current joint positions from SDK.
357
+
358
+ Note: SDK's get_current_joint_positions() is non-blocking - it returns
359
+ cached data from Zenoh subscriptions, so no throttling needed.
360
+ """
361
+ if not self.is_available:
362
+ return None
363
+
364
+ try:
365
+ return self.reachy.get_current_joint_positions()
366
+ except Exception as e:
367
+ logger.error(f"Error getting joint positions: {e}")
368
+ return None
369
+
370
+ def _extract_pose_from_matrix(self, pose_matrix: np.ndarray) -> tuple:
371
+ """
372
+ Extract position (x, y, z) and rotation (roll, pitch, yaw) from 4x4 pose matrix.
373
+
374
+ Args:
375
+ pose_matrix: 4x4 homogeneous transformation matrix
376
+
377
+ Returns:
378
+ tuple: (x, y, z, roll, pitch, yaw) where position is in meters and angles in radians
379
+ """
380
+ # Extract position from the last column
381
+ x = pose_matrix[0, 3]
382
+ y = pose_matrix[1, 3]
383
+ z = pose_matrix[2, 3]
384
+
385
+ # Extract rotation matrix and convert to euler angles
386
+ rotation_matrix = pose_matrix[:3, :3]
387
+ rotation = R.from_matrix(rotation_matrix)
388
+ # Use 'xyz' convention for roll, pitch, yaw
389
+ roll, pitch, yaw = rotation.as_euler('xyz')
390
+
391
+ return x, y, z, roll, pitch, yaw
392
+
393
+ def _get_head_pose_component(self, component: str) -> float:
394
+ """Get a specific component from head pose.
395
+
396
+ Args:
397
+ component: One of 'x', 'y', 'z' (mm), 'roll', 'pitch', 'yaw' (degrees)
398
+
399
+ Returns:
400
+ The component value, or 0.0 on error
401
+ """
402
+ pose = self._get_head_pose()
403
+ if pose is None:
404
+ return 0.0
405
+ try:
406
+ x, y, z, roll, pitch, yaw = self._extract_pose_from_matrix(pose)
407
+ components = {
408
+ 'x': x * 1000, # m to mm
409
+ 'y': y * 1000,
410
+ 'z': z * 1000,
411
+ 'roll': math.degrees(roll),
412
+ 'pitch': math.degrees(pitch),
413
+ 'yaw': math.degrees(yaw),
414
+ }
415
+ return components.get(component, 0.0)
416
+ except Exception as e:
417
+ logger.error(f"Error getting head {component}: {e}")
418
+ return 0.0
419
+
420
+ def _disabled_pose_setter(self, name: str) -> None:
421
+ """Log warning when MovementManager is not available."""
422
+ logger.warning(f"set_{name} failed - MovementManager not set")
423
+
424
+ def _set_pose_via_manager(self, **kwargs) -> bool:
425
+ """Set pose via MovementManager if available.
426
+
427
+ Returns True if successful, False if MovementManager not available.
428
+ """
429
+ if self._movement_manager is None:
430
+ return False
431
+ self._movement_manager.set_target_pose(**kwargs)
432
+ return True
433
+
434
+ # Head position getters and setters
435
+ def get_head_x(self) -> float:
436
+ """Get head X position in mm."""
437
+ return self._get_head_pose_component('x')
438
+
439
+ def set_head_x(self, x_mm: float) -> None:
440
+ """Set head X position in mm via MovementManager."""
441
+ if not self._set_pose_via_manager(x=x_mm / 1000.0): # mm to m
442
+ self._disabled_pose_setter('head_x')
443
+
444
+ def get_head_y(self) -> float:
445
+ """Get head Y position in mm."""
446
+ return self._get_head_pose_component('y')
447
+
448
+ def set_head_y(self, y_mm: float) -> None:
449
+ """Set head Y position in mm via MovementManager."""
450
+ if not self._set_pose_via_manager(y=y_mm / 1000.0): # mm to m
451
+ self._disabled_pose_setter('head_y')
452
+
453
+ def get_head_z(self) -> float:
454
+ """Get head Z position in mm."""
455
+ return self._get_head_pose_component('z')
456
+
457
+ def set_head_z(self, z_mm: float) -> None:
458
+ """Set head Z position in mm via MovementManager."""
459
+ if not self._set_pose_via_manager(z=z_mm / 1000.0): # mm to m
460
+ self._disabled_pose_setter('head_z')
461
+
462
+ # Head orientation getters and setters
463
+ def get_head_roll(self) -> float:
464
+ """Get head roll angle in degrees."""
465
+ return self._get_head_pose_component('roll')
466
+
467
+ def set_head_roll(self, roll_deg: float) -> None:
468
+ """Set head roll angle in degrees via MovementManager."""
469
+ if not self._set_pose_via_manager(roll=math.radians(roll_deg)):
470
+ self._disabled_pose_setter('head_roll')
471
+
472
+ def get_head_pitch(self) -> float:
473
+ """Get head pitch angle in degrees."""
474
+ return self._get_head_pose_component('pitch')
475
+
476
+ def set_head_pitch(self, pitch_deg: float) -> None:
477
+ """Set head pitch angle in degrees via MovementManager."""
478
+ if not self._set_pose_via_manager(pitch=math.radians(pitch_deg)):
479
+ self._disabled_pose_setter('head_pitch')
480
+
481
+ def get_head_yaw(self) -> float:
482
+ """Get head yaw angle in degrees."""
483
+ return self._get_head_pose_component('yaw')
484
+
485
+ def set_head_yaw(self, yaw_deg: float) -> None:
486
+ """Set head yaw angle in degrees via MovementManager."""
487
+ if not self._set_pose_via_manager(yaw=math.radians(yaw_deg)):
488
+ self._disabled_pose_setter('head_yaw')
489
+
490
+ def get_body_yaw(self) -> float:
491
+ """Get body yaw angle in degrees."""
492
+ joints = self._get_joint_positions()
493
+ if joints is None:
494
+ return 0.0
495
+ try:
496
+ head_joints, _ = joints
497
+ return math.degrees(head_joints[0])
498
+ except Exception as e:
499
+ logger.error(f"Error getting body yaw: {e}")
500
+ return 0.0
501
+
502
+ def set_body_yaw(self, yaw_deg: float) -> None:
503
+ """Set body yaw angle in degrees.
504
+
505
+ Note: This directly calls SDK's set_target_body_yaw since automatic body yaw
506
+ is enabled. Manual control will temporarily override automatic mode.
507
+ """
508
+ if self.reachy is None:
509
+ self._disabled_pose_setter('body_yaw')
510
+ return
511
+ try:
512
+ self.reachy.set_target_body_yaw(math.radians(yaw_deg))
513
+ except Exception as e:
514
+ logger.error(f"Error setting body yaw: {e}")
515
+
516
+ def get_antenna_left(self) -> float:
517
+ """Get left antenna angle in degrees."""
518
+ joints = self._get_joint_positions()
519
+ if joints is None:
520
+ return 0.0
521
+ try:
522
+ _, antennas = joints
523
+ return math.degrees(antennas[1]) # left is index 1
524
+ except Exception as e:
525
+ logger.error(f"Error getting left antenna: {e}")
526
+ return 0.0
527
+
528
+ def set_antenna_left(self, angle_deg: float) -> None:
529
+ """Set left antenna angle in degrees via MovementManager."""
530
+ if not self._set_pose_via_manager(antenna_left=math.radians(angle_deg)):
531
+ self._disabled_pose_setter('antenna_left')
532
+
533
+ def get_antenna_right(self) -> float:
534
+ """Get right antenna angle in degrees."""
535
+ joints = self._get_joint_positions()
536
+ if joints is None:
537
+ return 0.0
538
+ try:
539
+ _, antennas = joints
540
+ return math.degrees(antennas[0]) # right is index 0
541
+ except Exception as e:
542
+ logger.error(f"Error getting right antenna: {e}")
543
+ return 0.0
544
+
545
+ def set_antenna_right(self, angle_deg: float) -> None:
546
+ """Set right antenna angle in degrees via MovementManager."""
547
+ if not self._set_pose_via_manager(antenna_right=math.radians(angle_deg)):
548
+ self._disabled_pose_setter('antenna_right')
549
+
550
+ # ========== Phase 4: Look At Control ==========
551
+
552
+ def get_look_at_x(self) -> float:
553
+ """Get look at target X coordinate in world frame (meters)."""
554
+ # This is a target position, not a current state
555
+ # We'll store it internally
556
+ return getattr(self, '_look_at_x', 0.0)
557
+
558
+ def set_look_at_x(self, x: float) -> None:
559
+ """Set look at target X coordinate."""
560
+ self._look_at_x = x
561
+ self._update_look_at()
562
+
563
+ def get_look_at_y(self) -> float:
564
+ """Get look at target Y coordinate in world frame (meters)."""
565
+ return getattr(self, '_look_at_y', 0.0)
566
+
567
+ def set_look_at_y(self, y: float) -> None:
568
+ """Set look at target Y coordinate."""
569
+ self._look_at_y = y
570
+ self._update_look_at()
571
+
572
+ def get_look_at_z(self) -> float:
573
+ """Get look at target Z coordinate in world frame (meters)."""
574
+ return getattr(self, '_look_at_z', 0.0)
575
+
576
+ def set_look_at_z(self, z: float) -> None:
577
+ """Set look at target Z coordinate."""
578
+ self._look_at_z = z
579
+ self._update_look_at()
580
+
581
+ def _update_look_at(self) -> None:
582
+ """Update robot to look at the target coordinates.
583
+
584
+ NOTE: Disabled to prevent conflict with MovementManager's control loop.
585
+ """
586
+ logger.warning("_update_look_at is disabled - MovementManager controls head pose")
587
+ # if not self.is_available:
588
+ # return
589
+ # try:
590
+ # x = getattr(self, '_look_at_x', 0.0)
591
+ # y = getattr(self, '_look_at_y', 0.0)
592
+ # z = getattr(self, '_look_at_z', 0.0)
593
+ # self.reachy.look_at_world(x, y, z)
594
+ # logger.info(f"Looking at world coordinates: ({x}, {y}, {z})")
595
+ # except Exception as e:
596
+ # logger.error(f"Error updating look at: {e}")
597
+
598
+ # ========== Phase 6: Diagnostic Information ==========
599
+
600
+ def get_control_loop_frequency(self) -> float:
601
+ """Get control loop frequency in Hz with caching."""
602
+ status = self._get_cached_status()
603
+ if status is None:
604
+ return 0.0
605
+ try:
606
+ backend_status = status.get('backend_status')
607
+ if backend_status and isinstance(backend_status, dict):
608
+ control_loop_stats = backend_status.get('control_loop_stats', {})
609
+ return control_loop_stats.get('mean_control_loop_frequency', 0.0)
610
+ return 0.0
611
+ except Exception as e:
612
+ logger.error(f"Error getting control loop frequency: {e}")
613
+ return 0.0
614
+
615
+ def get_sdk_version(self) -> str:
616
+ """Get SDK version with caching."""
617
+ status = self._get_cached_status()
618
+ if status is None:
619
+ return "N/A"
620
+ return status.get('version') or "unknown"
621
+
622
+ def get_robot_name(self) -> str:
623
+ """Get robot name with caching."""
624
+ status = self._get_cached_status()
625
+ if status is None:
626
+ return "N/A"
627
+ return status.get('robot_name') or "unknown"
628
+
629
+ def get_wireless_version(self) -> bool:
630
+ """Check if this is a wireless version with caching."""
631
+ status = self._get_cached_status()
632
+ if status is None:
633
+ return False
634
+ return status.get('wireless_version', False)
635
+
636
+ def get_simulation_mode(self) -> bool:
637
+ """Check if simulation mode is enabled with caching."""
638
+ status = self._get_cached_status()
639
+ if status is None:
640
+ return False
641
+ return status.get('simulation_enabled', False)
642
+
643
+ def get_wlan_ip(self) -> str:
644
+ """Get WLAN IP address with caching."""
645
+ status = self._get_cached_status()
646
+ if status is None:
647
+ return "N/A"
648
+ return status.get('wlan_ip') or "N/A"
649
+
650
+ # ========== Phase 7: IMU Sensors (Wireless only) ==========
651
+
652
+ def _get_imu_value(self, sensor_type: str, index: int) -> float:
653
+ """Get a specific IMU sensor value.
654
+
655
+ Args:
656
+ sensor_type: 'accelerometer', 'gyroscope', or 'temperature'
657
+ index: Array index (0=x, 1=y, 2=z) or -1 for scalar values
658
+
659
+ Returns:
660
+ The sensor value, or 0.0 on error
661
+ """
662
+ if not self.is_available:
663
+ return 0.0
664
+ try:
665
+ imu_data = self.reachy.imu
666
+ if imu_data is None or sensor_type not in imu_data:
667
+ return 0.0
668
+ value = imu_data[sensor_type]
669
+ return float(value[index]) if index >= 0 else float(value)
670
+ except Exception as e:
671
+ logger.debug(f"Error getting IMU {sensor_type}: {e}")
672
+ return 0.0
673
+
674
+ def get_imu_accel_x(self) -> float:
675
+ """Get IMU X-axis acceleration in m/s²."""
676
+ return self._get_imu_value('accelerometer', 0)
677
+
678
+ def get_imu_accel_y(self) -> float:
679
+ """Get IMU Y-axis acceleration in m/s²."""
680
+ return self._get_imu_value('accelerometer', 1)
681
+
682
+ def get_imu_accel_z(self) -> float:
683
+ """Get IMU Z-axis acceleration in m/s²."""
684
+ return self._get_imu_value('accelerometer', 2)
685
+
686
+ def get_imu_gyro_x(self) -> float:
687
+ """Get IMU X-axis angular velocity in rad/s."""
688
+ return self._get_imu_value('gyroscope', 0)
689
+
690
+ def get_imu_gyro_y(self) -> float:
691
+ """Get IMU Y-axis angular velocity in rad/s."""
692
+ return self._get_imu_value('gyroscope', 1)
693
+
694
+ def get_imu_gyro_z(self) -> float:
695
+ """Get IMU Z-axis angular velocity in rad/s."""
696
+ return self._get_imu_value('gyroscope', 2)
697
+
698
+ def get_imu_temperature(self) -> float:
699
+ """Get IMU temperature in °C."""
700
+ return self._get_imu_value('temperature', -1)
701
+
702
+ # ========== Phase 11: LED Control (DISABLED) ==========
703
+ # LED control is disabled because LEDs are hidden inside the robot.
704
+ # See PROJECT_PLAN.md principle 8.
705
+
706
+ def _get_respeaker(self):
707
+ """Get ReSpeaker device from media manager with thread-safe access.
708
+
709
+ Returns a context manager that holds the lock during ReSpeaker operations.
710
+ Usage:
711
+ with self._get_respeaker() as respeaker:
712
+ if respeaker:
713
+ respeaker.read("...")
714
+ """
715
+ if not self.is_available:
716
+ return _ReSpeakerContext(None, self._respeaker_lock)
717
+ try:
718
+ if not self.reachy.media or not self.reachy.media.audio:
719
+ return _ReSpeakerContext(None, self._respeaker_lock)
720
+ respeaker = self.reachy.media.audio._respeaker
721
+ return _ReSpeakerContext(respeaker, self._respeaker_lock)
722
+ except Exception:
723
+ return _ReSpeakerContext(None, self._respeaker_lock)
724
+
725
+ # ========== Phase 12: Audio Processing (via local SDK with thread-safe access) ==========
726
+
727
+ def get_agc_enabled(self) -> bool:
728
+ """Get AGC (Automatic Gain Control) enabled status."""
729
+ with self._get_respeaker() as respeaker:
730
+ if respeaker is None:
731
+ return getattr(self, '_agc_enabled', True) # Default to enabled
732
+ try:
733
+ result = respeaker.read("PP_AGCONOFF")
734
+ if result is not None:
735
+ self._agc_enabled = bool(result[1])
736
+ return self._agc_enabled
737
+ except Exception as e:
738
+ logger.debug(f"Error getting AGC status: {e}")
739
+ return getattr(self, '_agc_enabled', True)
740
+
741
+ def set_agc_enabled(self, enabled: bool) -> None:
742
+ """Set AGC (Automatic Gain Control) enabled status."""
743
+ self._agc_enabled = enabled
744
+ with self._get_respeaker() as respeaker:
745
+ if respeaker is None:
746
+ return
747
+ try:
748
+ respeaker.write("PP_AGCONOFF", [1 if enabled else 0])
749
+ logger.info(f"AGC {'enabled' if enabled else 'disabled'}")
750
+ except Exception as e:
751
+ logger.error(f"Error setting AGC status: {e}")
752
+
753
+ def get_agc_max_gain(self) -> float:
754
+ """Get AGC maximum gain in dB (0-40 dB range)."""
755
+ with self._get_respeaker() as respeaker:
756
+ if respeaker is None:
757
+ return getattr(self, '_agc_max_gain', 30.0) # Default to optimized value
758
+ try:
759
+ result = respeaker.read("PP_AGCMAXGAIN")
760
+ if result is not None:
761
+ self._agc_max_gain = float(result[0])
762
+ return self._agc_max_gain
763
+ except Exception as e:
764
+ logger.debug(f"Error getting AGC max gain: {e}")
765
+ return getattr(self, '_agc_max_gain', 30.0)
766
+
767
+ def set_agc_max_gain(self, gain: float) -> None:
768
+ """Set AGC maximum gain in dB (0-40 dB range)."""
769
+ gain = max(0.0, min(40.0, gain)) # XVF3800 supports up to 40dB
770
+ self._agc_max_gain = gain
771
+ with self._get_respeaker() as respeaker:
772
+ if respeaker is None:
773
+ return
774
+ try:
775
+ respeaker.write("PP_AGCMAXGAIN", [gain])
776
+ logger.info(f"AGC max gain set to {gain} dB")
777
+ except Exception as e:
778
+ logger.error(f"Error setting AGC max gain: {e}")
779
+
780
+ def get_noise_suppression(self) -> float:
781
+ """Get noise suppression level (0-100%).
782
+
783
+ PP_MIN_NS represents "minimum signal preservation ratio":
784
+ - PP_MIN_NS = 0.85 means "keep at least 85% of signal" = 15% suppression
785
+ - PP_MIN_NS = 0.15 means "keep at least 15% of signal" = 85% suppression
786
+
787
+ We display "noise suppression strength" to user, so:
788
+ - suppression_percent = (1.0 - PP_MIN_NS) * 100
789
+ """
790
+ with self._get_respeaker() as respeaker:
791
+ if respeaker is None:
792
+ return getattr(self, '_noise_suppression', 15.0)
793
+ try:
794
+ result = respeaker.read("PP_MIN_NS")
795
+ if result is not None:
796
+ raw_value = result[0]
797
+ # Convert: PP_MIN_NS=0.85 -> 15% suppression, PP_MIN_NS=0.15 -> 85% suppression
798
+ self._noise_suppression = max(0.0, min(100.0, (1.0 - raw_value) * 100.0))
799
+ logger.debug(f"Noise suppression: PP_MIN_NS={raw_value:.2f} -> {self._noise_suppression:.1f}%")
800
+ return self._noise_suppression
801
+ except Exception as e:
802
+ logger.debug(f"Error getting noise suppression: {e}")
803
+ return getattr(self, '_noise_suppression', 15.0)
804
+
805
+ def set_noise_suppression(self, level: float) -> None:
806
+ """Set noise suppression level (0-100%)."""
807
+ level = max(0.0, min(100.0, level))
808
+ self._noise_suppression = level
809
+ with self._get_respeaker() as respeaker:
810
+ if respeaker is None:
811
+ return
812
+ try:
813
+ # Convert percentage to PP_MIN_NS value (inverted)
814
+ value = 1.0 - (level / 100.0)
815
+ respeaker.write("PP_MIN_NS", [value])
816
+ logger.info(f"Noise suppression set to {level}%")
817
+ except Exception as e:
818
+ logger.error(f"Error setting noise suppression: {e}")
819
+
820
+ def get_echo_cancellation_converged(self) -> bool:
821
+ """Check if echo cancellation has converged."""
822
+ with self._get_respeaker() as respeaker:
823
+ if respeaker is None:
824
+ return False
825
+ try:
826
+ result = respeaker.read("AEC_AECCONVERGED")
827
+ if result is not None:
828
+ return bool(result[1])
829
+ except Exception as e:
830
+ logger.debug(f"Error getting AEC converged status: {e}")
831
+ return False
832
+
833
+ # ========== DOA (Direction of Arrival) ==========
834
+
835
+ def get_doa_angle(self) -> tuple[float, bool] | None:
836
+ """Get Direction of Arrival angle from microphone array.
837
+
838
+ The DOA angle indicates the direction of the sound source relative to the robot.
839
+ Angle is in radians: 0 = left, π/2 = front/back, π = right.
840
+
841
+ Returns:
842
+ Tuple of (angle_radians, speech_detected), or None if unavailable.
843
+ - angle_radians: Sound source direction in radians
844
+ - speech_detected: Whether speech is currently detected
845
+ """
846
+ if not self.is_available:
847
+ return None
848
+ try:
849
+ if self.reachy.media and self.reachy.media.audio:
850
+ return self.reachy.media.audio.get_DoA()
851
+ except Exception as e:
852
+ logger.debug(f"Error getting DOA: {e}")
853
+ return None
854
+
855
+ def get_doa_angle_degrees(self) -> float:
856
+ """Get DOA angle in degrees for Home Assistant entity.
857
+
858
+ Returns the raw DOA angle in degrees (0-180°).
859
+ SDK convention: 0° = left, 90° = front, 180° = right
860
+ """
861
+ doa = self.get_doa_angle()
862
+ if doa is None:
863
+ return 0.0
864
+ angle_rad, _ = doa
865
+ # Return raw angle in degrees (0-180°)
866
+ angle_deg = math.degrees(angle_rad)
867
+ return angle_deg
868
+
869
+ def get_speech_detected(self) -> bool:
870
+ """Get speech detection status from DOA.
871
+
872
+ Returns True if speech is currently detected.
873
+ """
874
+ doa = self.get_doa_angle()
875
+ if doa is None:
876
+ return False
877
+ _, speech_detected = doa
878
+ return speech_detected
reachy_mini_ha_voice/satellite.py ADDED
@@ -0,0 +1,856 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Voice satellite protocol for Reachy Mini."""
2
+
3
+ import hashlib
4
+ import logging
5
+ import math
6
+ import posixpath
7
+ import shutil
8
+ import time
9
+ from collections.abc import Iterable
10
+ from typing import Dict, Optional, Set, Union, TYPE_CHECKING
11
+ from urllib.parse import urlparse, urlunparse
12
+ from urllib.request import urlopen
13
+
14
+ if TYPE_CHECKING:
15
+ from .camera_server import MJPEGCameraServer
16
+
17
+ # pylint: disable=no-name-in-module
18
+ from aioesphomeapi.api_pb2 import ( # type: ignore[attr-defined]
19
+ ButtonCommandRequest,
20
+ CameraImageRequest,
21
+ DeviceInfoRequest,
22
+ DeviceInfoResponse,
23
+ ListEntitiesDoneResponse,
24
+ ListEntitiesRequest,
25
+ MediaPlayerCommandRequest,
26
+ NumberCommandRequest,
27
+ SelectCommandRequest,
28
+ SubscribeHomeAssistantStatesRequest,
29
+ SubscribeStatesRequest,
30
+ SwitchCommandRequest,
31
+ VoiceAssistantAnnounceFinished,
32
+ VoiceAssistantAnnounceRequest,
33
+ VoiceAssistantAudio,
34
+ VoiceAssistantConfigurationRequest,
35
+ VoiceAssistantConfigurationResponse,
36
+ VoiceAssistantEventResponse,
37
+ VoiceAssistantExternalWakeWord,
38
+ VoiceAssistantRequest,
39
+ VoiceAssistantSetConfiguration,
40
+ VoiceAssistantTimerEventResponse,
41
+ VoiceAssistantWakeWord,
42
+ )
43
+ from aioesphomeapi.model import (
44
+ VoiceAssistantEventType,
45
+ VoiceAssistantFeature,
46
+ VoiceAssistantTimerEventType,
47
+ )
48
+ from google.protobuf import message
49
+ from pymicro_wakeword import MicroWakeWord
50
+ from pyopen_wakeword import OpenWakeWord
51
+
52
+ from .api_server import APIServer
53
+ from .entity import MediaPlayerEntity
54
+ from .entity_registry import EntityRegistry, get_entity_key
55
+ from .models import AvailableWakeWord, ServerState, WakeWordType
56
+ from .util import call_all
57
+ from .reachy_controller import ReachyController
58
+
59
+ _LOGGER = logging.getLogger(__name__)
60
+
61
+
62
+ class VoiceSatelliteProtocol(APIServer):
63
+ """Voice satellite protocol handler for ESPHome."""
64
+
65
+ def __init__(self, state: ServerState, camera_server: Optional["MJPEGCameraServer"] = None) -> None:
66
+ super().__init__(state.name)
67
+ self.state = state
68
+ self.state.satellite = self
69
+ self.camera_server = camera_server
70
+
71
+ # Initialize streaming state early (before entity setup)
72
+ self._is_streaming_audio = False
73
+ self._tts_url: Optional[str] = None
74
+ self._tts_played = False
75
+ self._continue_conversation = False
76
+ self._timer_finished = False
77
+ self._external_wake_words: Dict[str, VoiceAssistantExternalWakeWord] = {}
78
+
79
+ # Conversation tracking for continuous conversation
80
+ self._conversation_id: Optional[str] = None
81
+ self._conversation_timeout = 300.0 # 5 minutes, same as ESPHome default
82
+ self._last_conversation_time = 0.0
83
+
84
+ # Initialize Reachy controller
85
+ self.reachy_controller = ReachyController(state.reachy_mini)
86
+
87
+ # Connect MovementManager to ReachyController for pose control from HA
88
+ if state.motion is not None and state.motion.movement_manager is not None:
89
+ self.reachy_controller.set_movement_manager(state.motion.movement_manager)
90
+
91
+ # Setup speech sway callback for audio-driven head motion
92
+ def sway_callback(sway: dict) -> None:
93
+ mm = state.motion.movement_manager
94
+ if mm is not None:
95
+ mm.set_speech_sway(
96
+ sway.get("x_m", 0.0),
97
+ sway.get("y_m", 0.0),
98
+ sway.get("z_m", 0.0),
99
+ sway.get("roll_rad", 0.0),
100
+ sway.get("pitch_rad", 0.0),
101
+ sway.get("yaw_rad", 0.0),
102
+ )
103
+
104
+ state.tts_player.set_sway_callback(sway_callback)
105
+ _LOGGER.info("Speech sway callback configured for TTS player")
106
+
107
+ # Initialize entity registry
108
+ self._entity_registry = EntityRegistry(
109
+ server=self,
110
+ reachy_controller=self.reachy_controller,
111
+ camera_server=camera_server,
112
+ play_emotion_callback=self._play_emotion,
113
+ )
114
+
115
+ # Connect gesture state callback
116
+ if camera_server:
117
+ camera_server.set_gesture_state_callback(self._entity_registry.update_gesture_state)
118
+ camera_server.set_face_state_callback(self._entity_registry.update_face_detected_state)
119
+
120
+ # Only setup entities once (check if already initialized)
121
+ # This prevents duplicate entity registration on reconnection
122
+ if not getattr(self.state, '_entities_initialized', False):
123
+ if self.state.media_player_entity is None:
124
+ self.state.media_player_entity = MediaPlayerEntity(
125
+ server=self,
126
+ key=get_entity_key("reachy_mini_media_player"),
127
+ name="Media Player",
128
+ object_id="reachy_mini_media_player",
129
+ music_player=state.music_player,
130
+ announce_player=state.tts_player,
131
+ )
132
+ self.state.entities.append(self.state.media_player_entity)
133
+
134
+ # Setup all entities using the registry
135
+ self._entity_registry.setup_all_entities(self.state.entities)
136
+
137
+ # Mark entities as initialized
138
+ self.state._entities_initialized = True
139
+ _LOGGER.info("Entities initialized: %d total", len(self.state.entities))
140
+ else:
141
+ _LOGGER.debug("Entities already initialized, skipping setup")
142
+ # Update server reference in existing entities
143
+ for entity in self.state.entities:
144
+ entity.server = self
145
+
146
+ # Load emotion keywords from JSON file for auto-triggering
147
+ self._emotion_keywords: Dict[str, str] = {}
148
+ self._emotion_detection_enabled = True
149
+ self._load_emotion_keywords()
150
+
151
+ def handle_voice_event(
152
+ self, event_type: VoiceAssistantEventType, data: Dict[str, str]
153
+ ) -> None:
154
+ _LOGGER.debug("Voice event: type=%s, data=%s", event_type.name, data)
155
+
156
+ if event_type == VoiceAssistantEventType.VOICE_ASSISTANT_RUN_START:
157
+ self._tts_url = data.get("url")
158
+ self._tts_played = False
159
+ self._continue_conversation = False
160
+ # Reachy Mini: Start listening animation
161
+ self._reachy_on_listening()
162
+
163
+ # Note: TTS URL requires HA authentication, cannot pre-download
164
+ # Speaking animation uses JSON-defined multi-frequency sway instead
165
+
166
+ elif event_type in (
167
+ VoiceAssistantEventType.VOICE_ASSISTANT_STT_VAD_END,
168
+ VoiceAssistantEventType.VOICE_ASSISTANT_STT_END,
169
+ ):
170
+ self._is_streaming_audio = False
171
+ # Reachy Mini: Stop listening, start thinking
172
+ self._reachy_on_thinking()
173
+
174
+ elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_INTENT_PROGRESS:
175
+ if data.get("tts_start_streaming") == "1":
176
+ # Start streaming early
177
+ self.play_tts()
178
+
179
+ elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_INTENT_END:
180
+ if data.get("continue_conversation") == "1":
181
+ self._continue_conversation = True
182
+
183
+ elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_TTS_START:
184
+ # Reachy Mini: Start speaking animation (JSON-defined multi-frequency sway)
185
+ _LOGGER.debug("TTS_START event received, triggering speaking animation")
186
+ self._reachy_on_speaking()
187
+
188
+ # Auto-trigger emotion based on response text
189
+ # TTS_START may contain the text to be spoken
190
+ tts_text = data.get("tts_output") or data.get("text") or ""
191
+ if tts_text:
192
+ self._detect_and_play_emotion(tts_text)
193
+
194
+ elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_TTS_END:
195
+ self._tts_url = data.get("url")
196
+ self.play_tts()
197
+
198
+ elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_RUN_END:
199
+ # Pipeline run ended
200
+ self._is_streaming_audio = False
201
+
202
+ # Following reference project pattern
203
+ if not self._tts_played:
204
+ self._tts_finished()
205
+
206
+ self._tts_played = False
207
+
208
+ def handle_timer_event(
209
+ self,
210
+ event_type: VoiceAssistantTimerEventType,
211
+ msg: VoiceAssistantTimerEventResponse,
212
+ ) -> None:
213
+ _LOGGER.debug("Timer event: type=%s", event_type.name)
214
+
215
+ if event_type == VoiceAssistantTimerEventType.VOICE_ASSISTANT_TIMER_FINISHED:
216
+ if not self._timer_finished:
217
+ self.state.active_wake_words.add(self.state.stop_word.id)
218
+ self._timer_finished = True
219
+ self.duck()
220
+ self._play_timer_finished()
221
+ # Reachy Mini: Timer finished animation
222
+ self._reachy_on_timer_finished()
223
+
224
+ def handle_message(self, msg: message.Message) -> Iterable[message.Message]:
225
+ if isinstance(msg, VoiceAssistantEventResponse):
226
+ # Pipeline event
227
+ data: Dict[str, str] = {}
228
+ for arg in msg.data:
229
+ data[arg.name] = arg.value
230
+ self.handle_voice_event(VoiceAssistantEventType(msg.event_type), data)
231
+
232
+ elif isinstance(msg, VoiceAssistantAnnounceRequest):
233
+ _LOGGER.debug("Announcing: %s", msg.text)
234
+ assert self.state.media_player_entity is not None
235
+
236
+ urls = []
237
+ if msg.preannounce_media_id:
238
+ urls.append(msg.preannounce_media_id)
239
+ urls.append(msg.media_id)
240
+
241
+ self.state.active_wake_words.add(self.state.stop_word.id)
242
+ self._continue_conversation = msg.start_conversation
243
+ self.duck()
244
+
245
+ yield from self.state.media_player_entity.play(
246
+ urls, announcement=True, done_callback=self._tts_finished
247
+ )
248
+
249
+ elif isinstance(msg, VoiceAssistantTimerEventResponse):
250
+ self.handle_timer_event(VoiceAssistantTimerEventType(msg.event_type), msg)
251
+
252
+ elif isinstance(msg, DeviceInfoRequest):
253
+ yield DeviceInfoResponse(
254
+ uses_password=False,
255
+ name=self.state.name,
256
+ mac_address=self.state.mac_address,
257
+ voice_assistant_feature_flags=(
258
+ VoiceAssistantFeature.VOICE_ASSISTANT
259
+ | VoiceAssistantFeature.API_AUDIO
260
+ | VoiceAssistantFeature.ANNOUNCE
261
+ | VoiceAssistantFeature.START_CONVERSATION
262
+ | VoiceAssistantFeature.TIMERS
263
+ ),
264
+ )
265
+
266
+ elif isinstance(
267
+ msg,
268
+ (
269
+ ListEntitiesRequest,
270
+ SubscribeHomeAssistantStatesRequest,
271
+ SubscribeStatesRequest,
272
+ MediaPlayerCommandRequest,
273
+ NumberCommandRequest,
274
+ SwitchCommandRequest,
275
+ SelectCommandRequest,
276
+ ButtonCommandRequest,
277
+ CameraImageRequest,
278
+ ),
279
+ ):
280
+ for entity in self.state.entities:
281
+ yield from entity.handle_message(msg)
282
+
283
+ if isinstance(msg, ListEntitiesRequest):
284
+ yield ListEntitiesDoneResponse()
285
+
286
+ elif isinstance(msg, VoiceAssistantConfigurationRequest):
287
+ available_wake_words = [
288
+ VoiceAssistantWakeWord(
289
+ id=ww.id,
290
+ wake_word=ww.wake_word,
291
+ trained_languages=ww.trained_languages,
292
+ )
293
+ for ww in self.state.available_wake_words.values()
294
+ ]
295
+
296
+ for eww in msg.external_wake_words:
297
+ if eww.model_type != "micro":
298
+ continue
299
+
300
+ available_wake_words.append(
301
+ VoiceAssistantWakeWord(
302
+ id=eww.id,
303
+ wake_word=eww.wake_word,
304
+ trained_languages=eww.trained_languages,
305
+ )
306
+ )
307
+ self._external_wake_words[eww.id] = eww
308
+
309
+ yield VoiceAssistantConfigurationResponse(
310
+ available_wake_words=available_wake_words,
311
+ active_wake_words=[
312
+ ww.id
313
+ for ww in self.state.wake_words.values()
314
+ if ww.id in self.state.active_wake_words
315
+ ],
316
+ max_active_wake_words=2,
317
+ )
318
+
319
+ _LOGGER.info("Connected to Home Assistant")
320
+
321
+ elif isinstance(msg, VoiceAssistantSetConfiguration):
322
+ # Change active wake words
323
+ active_wake_words: Set[str] = set()
324
+
325
+ for wake_word_id in msg.active_wake_words:
326
+ if wake_word_id in self.state.wake_words:
327
+ # Already loaded, just add to active set
328
+ active_wake_words.add(wake_word_id)
329
+ continue
330
+
331
+ model_info = self.state.available_wake_words.get(wake_word_id)
332
+ if not model_info:
333
+ # Check external wake words (may require download)
334
+ external_wake_word = self._external_wake_words.get(wake_word_id)
335
+ if not external_wake_word:
336
+ _LOGGER.warning("Wake word not found: %s", wake_word_id)
337
+ continue
338
+
339
+ model_info = self._download_external_wake_word(external_wake_word)
340
+ if not model_info:
341
+ continue
342
+
343
+ self.state.available_wake_words[wake_word_id] = model_info
344
+
345
+ _LOGGER.debug("Loading wake word: %s", model_info.wake_word_path)
346
+ loaded_model = model_info.load()
347
+ # Set id attribute on the model for later identification
348
+ setattr(loaded_model, 'id', wake_word_id)
349
+ self.state.wake_words[wake_word_id] = loaded_model
350
+ _LOGGER.info("Wake word loaded: %s", wake_word_id)
351
+ active_wake_words.add(wake_word_id)
352
+ # Don't break - load ALL requested wake words, not just the first one
353
+
354
+ self.state.active_wake_words = active_wake_words
355
+ _LOGGER.debug("Active wake words: %s", active_wake_words)
356
+
357
+ self.state.preferences.active_wake_words = list(active_wake_words)
358
+ self.state.save_preferences()
359
+ self.state.wake_words_changed = True
360
+
361
+ def handle_audio(self, audio_chunk: bytes) -> None:
362
+ if not self._is_streaming_audio:
363
+ return
364
+ self.send_messages([VoiceAssistantAudio(data=audio_chunk)])
365
+
366
+ def _get_or_create_conversation_id(self) -> str:
367
+ """Get existing conversation_id or create a new one.
368
+
369
+ Reuses conversation_id if within timeout period, otherwise creates new one.
370
+ """
371
+ now = time.time()
372
+ if (self._conversation_id is None or
373
+ now - self._last_conversation_time > self._conversation_timeout):
374
+ # Create new conversation_id
375
+ import uuid
376
+ self._conversation_id = str(uuid.uuid4())
377
+ _LOGGER.debug("Created new conversation_id: %s", self._conversation_id)
378
+
379
+ self._last_conversation_time = now
380
+ return self._conversation_id
381
+
382
+ def _clear_conversation(self) -> None:
383
+ """Clear conversation state when exiting conversation mode."""
384
+ self._conversation_id = None
385
+ self._continue_conversation = False
386
+
387
+ def wakeup(self, wake_word: Union[MicroWakeWord, OpenWakeWord]) -> None:
388
+ """Handle wake word detection - start voice pipeline."""
389
+ if self._timer_finished:
390
+ # Stop timer instead
391
+ self._timer_finished = False
392
+ self.state.tts_player.stop()
393
+ _LOGGER.debug("Stopping timer finished sound")
394
+ return
395
+
396
+ wake_word_phrase = wake_word.wake_word
397
+ _LOGGER.debug("Detected wake word: %s", wake_word_phrase)
398
+
399
+ # Turn toward sound source using DOA (Direction of Arrival)
400
+ self._turn_to_sound_source()
401
+
402
+ # Get or create conversation_id for context tracking
403
+ conv_id = self._get_or_create_conversation_id()
404
+
405
+ self.send_messages(
406
+ [VoiceAssistantRequest(
407
+ start=True,
408
+ wake_word_phrase=wake_word_phrase,
409
+ conversation_id=conv_id,
410
+ )]
411
+ )
412
+ self.duck()
413
+ self._is_streaming_audio = True
414
+ self.state.tts_player.play(self.state.wakeup_sound)
415
+
416
+ def stop(self) -> None:
417
+ """Stop current TTS playback (e.g., user said stop word)."""
418
+ self.state.active_wake_words.discard(self.state.stop_word.id)
419
+ self.state.tts_player.stop()
420
+
421
+ if self._timer_finished:
422
+ self._timer_finished = False
423
+ _LOGGER.debug("Stopping timer finished sound")
424
+ else:
425
+ _LOGGER.debug("TTS response stopped manually")
426
+ self._tts_finished()
427
+
428
+ def play_tts(self) -> None:
429
+ if (not self._tts_url) or self._tts_played:
430
+ return
431
+
432
+ self._tts_played = True
433
+ _LOGGER.debug("Playing TTS response: %s", self._tts_url)
434
+
435
+ self.state.active_wake_words.add(self.state.stop_word.id)
436
+ self.state.tts_player.play(self._tts_url, done_callback=self._tts_finished)
437
+
438
+ def duck(self) -> None:
439
+ _LOGGER.debug("Ducking music")
440
+ self.state.music_player.duck()
441
+ # Pause Sendspin to prevent audio conflicts during voice interaction
442
+ self.state.music_player.pause_sendspin()
443
+
444
+ def unduck(self) -> None:
445
+ _LOGGER.debug("Unducking music")
446
+ self.state.music_player.unduck()
447
+ # Resume Sendspin audio
448
+ self.state.music_player.resume_sendspin()
449
+
450
+ def _tts_finished(self) -> None:
451
+ """Called when TTS audio playback finishes.
452
+
453
+ Following reference project pattern: handle continue conversation here.
454
+ """
455
+ self.state.active_wake_words.discard(self.state.stop_word.id)
456
+ self.send_messages([VoiceAssistantAnnounceFinished()])
457
+
458
+ # Check if should continue conversation
459
+ # 1. Our switch is ON: Always continue (unconditional)
460
+ # 2. Our switch is OFF: Follow HA's continue_conversation request
461
+ continuous_mode = self.state.preferences.continuous_conversation
462
+ should_continue = continuous_mode or self._continue_conversation
463
+
464
+ if should_continue:
465
+ _LOGGER.debug("Continuing conversation (our_switch=%s, ha_request=%s)",
466
+ continuous_mode, self._continue_conversation)
467
+
468
+ # Play prompt sound to indicate ready for next input
469
+ self.state.tts_player.play(self.state.wakeup_sound)
470
+
471
+ # Use same conversation_id for context continuity
472
+ conv_id = self._get_or_create_conversation_id()
473
+ self.send_messages([VoiceAssistantRequest(
474
+ start=True,
475
+ conversation_id=conv_id,
476
+ )])
477
+ self._is_streaming_audio = True
478
+
479
+ # Stay in listening mode
480
+ self._reachy_on_listening()
481
+ else:
482
+ self._clear_conversation()
483
+ self.unduck()
484
+ _LOGGER.debug("Conversation finished")
485
+
486
+ # Reachy Mini: Return to idle
487
+ self._reachy_on_idle()
488
+
489
+ def _play_timer_finished(self) -> None:
490
+ if not self._timer_finished:
491
+ self.unduck()
492
+ return
493
+
494
+ self.state.tts_player.play(
495
+ self.state.timer_finished_sound,
496
+ done_callback=lambda: call_all(
497
+ lambda: time.sleep(1.0), self._play_timer_finished
498
+ ),
499
+ )
500
+
501
+ def connection_lost(self, exc):
502
+ super().connection_lost(exc)
503
+ _LOGGER.info("Disconnected from Home Assistant")
504
+ # Clear streaming state on disconnect
505
+ self._is_streaming_audio = False
506
+ self._tts_url = None
507
+ self._tts_played = False
508
+ self._continue_conversation = False
509
+
510
+ def _download_external_wake_word(
511
+ self, external_wake_word: VoiceAssistantExternalWakeWord
512
+ ) -> Optional[AvailableWakeWord]:
513
+ eww_dir = self.state.download_dir / "external_wake_words"
514
+ eww_dir.mkdir(parents=True, exist_ok=True)
515
+
516
+ config_path = eww_dir / f"{external_wake_word.id}.json"
517
+ should_download_config = not config_path.exists()
518
+
519
+ # Check if we need to download the model file
520
+ model_path = eww_dir / f"{external_wake_word.id}.tflite"
521
+ should_download_model = True
522
+
523
+ if model_path.exists():
524
+ model_size = model_path.stat().st_size
525
+ if model_size == external_wake_word.model_size:
526
+ with open(model_path, "rb") as model_file:
527
+ model_hash = hashlib.sha256(model_file.read()).hexdigest()
528
+
529
+ if model_hash == external_wake_word.model_hash:
530
+ should_download_model = False
531
+ _LOGGER.debug(
532
+ "Model size and hash match for %s. Skipping download.",
533
+ external_wake_word.id,
534
+ )
535
+
536
+ if should_download_config or should_download_model:
537
+ # Download config
538
+ _LOGGER.debug("Downloading %s to %s", external_wake_word.url, config_path)
539
+ with urlopen(external_wake_word.url) as request:
540
+ if request.status != 200:
541
+ _LOGGER.warning(
542
+ "Failed to download: %s, status=%s",
543
+ external_wake_word.url,
544
+ request.status,
545
+ )
546
+ return None
547
+
548
+ with open(config_path, "wb") as model_file:
549
+ shutil.copyfileobj(request, model_file)
550
+
551
+ if should_download_model:
552
+ # Download model file
553
+ parsed_url = urlparse(external_wake_word.url)
554
+ parsed_url = parsed_url._replace(
555
+ path=posixpath.join(posixpath.dirname(parsed_url.path), model_path.name)
556
+ )
557
+ model_url = urlunparse(parsed_url)
558
+
559
+ _LOGGER.debug("Downloading %s to %s", model_url, model_path)
560
+ with urlopen(model_url) as request:
561
+ if request.status != 200:
562
+ _LOGGER.warning(
563
+ "Failed to download: %s, status=%s", model_url, request.status
564
+ )
565
+ return None
566
+
567
+ with open(model_path, "wb") as model_file:
568
+ shutil.copyfileobj(request, model_file)
569
+
570
+ return AvailableWakeWord(
571
+ id=external_wake_word.id,
572
+ type=WakeWordType.MICRO_WAKE_WORD,
573
+ wake_word=external_wake_word.wake_word,
574
+ trained_languages=external_wake_word.trained_languages,
575
+ wake_word_path=config_path,
576
+ )
577
+
578
+ # -------------------------------------------------------------------------
579
+ # Reachy Mini Motion Control
580
+ # -------------------------------------------------------------------------
581
+
582
+ def _turn_to_sound_source(self) -> None:
583
+ """Turn robot head toward sound source using DOA at wakeup.
584
+
585
+ This is called once at wakeup to orient the robot toward the speaker.
586
+ Face tracking will take over after the initial turn.
587
+
588
+ DOA angle convention (from SDK):
589
+ - 0 radians = left (Y+ direction in head frame)
590
+ - π/2 radians = front (X+ direction in head frame)
591
+ - π radians = right (Y- direction in head frame)
592
+
593
+ The SDK uses: p_head = [sin(doa), cos(doa), 0]
594
+ So we need to convert this to yaw angle.
595
+
596
+ Note: We don't check speech_detected because by the time wake word
597
+ detection completes, the user may have stopped speaking.
598
+ """
599
+ if not self.state.motion_enabled or not self.state.reachy_mini:
600
+ _LOGGER.info("DOA turn-to-sound: motion disabled or no robot")
601
+ return
602
+
603
+ try:
604
+ # Get DOA from reachy_controller (only read once)
605
+ doa = self.reachy_controller.get_doa_angle()
606
+ if doa is None:
607
+ _LOGGER.info("DOA not available, skipping turn-to-sound")
608
+ return
609
+
610
+ angle_rad, speech_detected = doa
611
+ _LOGGER.debug("DOA raw: angle=%.3f rad (%.1f°), speech=%s",
612
+ angle_rad, math.degrees(angle_rad), speech_detected)
613
+
614
+ # Convert DOA to direction vector in head frame
615
+ # SDK convention: p_head = [sin(doa), cos(doa), 0]
616
+ # where X+ is front, Y+ is left
617
+ dir_x = math.sin(angle_rad) # Front component
618
+ dir_y = math.cos(angle_rad) # Left component
619
+
620
+ # Calculate yaw angle from direction vector
621
+ # DOA convention: 0 = left, π/2 = front, π = right
622
+ # Robot yaw: positive = turn left, negative = turn right
623
+ # yaw = doa - π/2 maps: left(0) → -90°, front(π/2) → 0°, right(π) → +90°
624
+ yaw_rad = angle_rad - math.pi / 2
625
+ yaw_deg = math.degrees(yaw_rad)
626
+
627
+ _LOGGER.debug("DOA direction: x=%.2f, y=%.2f, yaw=%.1f°",
628
+ dir_x, dir_y, yaw_deg)
629
+
630
+ # Only turn if angle is significant (> 10°) to avoid noise
631
+ DOA_THRESHOLD_DEG = 10.0
632
+ if abs(yaw_deg) < DOA_THRESHOLD_DEG:
633
+ _LOGGER.debug("DOA angle %.1f° below threshold (%.1f°), skipping turn",
634
+ yaw_deg, DOA_THRESHOLD_DEG)
635
+ return
636
+
637
+ # Apply 80% of DOA angle as conservative strategy
638
+ # This accounts for potential DOA inaccuracy
639
+ DOA_SCALE = 0.8
640
+ target_yaw_deg = yaw_deg * DOA_SCALE
641
+
642
+ _LOGGER.info("Turning toward sound source: DOA=%.1f°, target=%.1f°",
643
+ yaw_deg, target_yaw_deg)
644
+
645
+ # Use MovementManager to turn (non-blocking)
646
+ if self.state.motion and self.state.motion.movement_manager:
647
+ self.state.motion.movement_manager.turn_to_angle(
648
+ target_yaw_deg,
649
+ duration=0.5 # Quick turn
650
+ )
651
+ except Exception as e:
652
+ _LOGGER.error("Error in turn-to-sound: %s", e)
653
+
654
+ def _reachy_on_listening(self) -> None:
655
+ """Called when listening for speech (HA state: Listening)."""
656
+ # Enable high-frequency face tracking during listening
657
+ self._set_conversation_mode(True)
658
+
659
+ # Resume face tracking (may have been paused during speaking)
660
+ if self.camera_server is not None:
661
+ try:
662
+ self.camera_server.set_face_tracking_enabled(True)
663
+ except Exception as e:
664
+ _LOGGER.debug("Failed to resume face tracking: %s", e)
665
+
666
+ if not self.state.motion_enabled or not self.state.reachy_mini:
667
+ return
668
+ try:
669
+ _LOGGER.debug("Reachy Mini: Listening animation")
670
+ if self.state.motion:
671
+ self.state.motion.on_listening()
672
+ except Exception as e:
673
+ _LOGGER.error("Reachy Mini motion error: %s", e)
674
+
675
+ def _reachy_on_thinking(self) -> None:
676
+ """Called when processing speech (HA state: Processing)."""
677
+ # Resume face tracking (may have been paused during speaking)
678
+ if self.camera_server is not None:
679
+ try:
680
+ self.camera_server.set_face_tracking_enabled(True)
681
+ except Exception as e:
682
+ _LOGGER.debug("Failed to resume face tracking: %s", e)
683
+
684
+ if not self.state.motion_enabled or not self.state.reachy_mini:
685
+ return
686
+ try:
687
+ _LOGGER.debug("Reachy Mini: Thinking animation")
688
+ if self.state.motion:
689
+ self.state.motion.on_thinking()
690
+ except Exception as e:
691
+ _LOGGER.error("Reachy Mini motion error: %s", e)
692
+
693
+ def _reachy_on_speaking(self) -> None:
694
+ """Called when TTS is playing (HA state: Responding)."""
695
+ # Pause face tracking during speaking - robot will use speaking animation instead
696
+ if self.camera_server is not None:
697
+ try:
698
+ self.camera_server.set_face_tracking_enabled(False)
699
+ _LOGGER.debug("Face tracking paused during speaking")
700
+ except Exception as e:
701
+ _LOGGER.debug("Failed to pause face tracking: %s", e)
702
+
703
+ if not self.state.motion_enabled:
704
+ _LOGGER.warning("Motion disabled, skipping speaking animation")
705
+ return
706
+ if not self.state.reachy_mini:
707
+ _LOGGER.warning("No reachy_mini instance, skipping speaking animation")
708
+ return
709
+ if not self.state.motion:
710
+ _LOGGER.warning("No motion controller, skipping speaking animation")
711
+ return
712
+
713
+ try:
714
+ _LOGGER.debug("Reachy Mini: Starting speaking animation")
715
+ self.state.motion.on_speaking_start()
716
+ except Exception as e:
717
+ _LOGGER.error("Reachy Mini motion error: %s", e)
718
+
719
+ def _reachy_on_idle(self) -> None:
720
+ """Called when returning to idle state (HA state: Idle)."""
721
+ # Disable high-frequency face tracking, switch to adaptive mode
722
+ self._set_conversation_mode(False)
723
+
724
+ # Resume face tracking (may have been paused during speaking)
725
+ if self.camera_server is not None:
726
+ try:
727
+ self.camera_server.set_face_tracking_enabled(True)
728
+ except Exception as e:
729
+ _LOGGER.debug("Failed to resume face tracking: %s", e)
730
+
731
+ if not self.state.motion_enabled or not self.state.reachy_mini:
732
+ return
733
+ try:
734
+ _LOGGER.debug("Reachy Mini: Idle animation")
735
+ if self.state.motion:
736
+ self.state.motion.on_idle()
737
+ except Exception as e:
738
+ _LOGGER.error("Reachy Mini motion error: %s", e)
739
+
740
+ def _set_conversation_mode(self, in_conversation: bool) -> None:
741
+ """Set conversation mode for adaptive face tracking.
742
+
743
+ When in conversation, face tracking runs at high frequency.
744
+ When idle, face tracking uses adaptive rate to save CPU.
745
+ """
746
+ if self.camera_server is not None:
747
+ try:
748
+ self.camera_server.set_conversation_mode(in_conversation)
749
+ except Exception as e:
750
+ _LOGGER.debug("Failed to set conversation mode: %s", e)
751
+
752
+ def _reachy_on_timer_finished(self) -> None:
753
+ """Called when a timer finishes."""
754
+ if not self.state.motion_enabled or not self.state.reachy_mini:
755
+ return
756
+ try:
757
+ _LOGGER.debug("Reachy Mini: Timer finished animation")
758
+ if self.state.motion:
759
+ self.state.motion.on_timer_finished()
760
+ except Exception as e:
761
+ _LOGGER.error("Reachy Mini motion error: %s", e)
762
+
763
+ def _load_emotion_keywords(self) -> None:
764
+ """Load emotion keywords from JSON configuration file.
765
+
766
+ The file is located at animations/emotion_keywords.json and contains
767
+ keyword-to-emotion mappings for automatic emotion detection.
768
+ """
769
+ import json
770
+ from pathlib import Path
771
+
772
+ keywords_file = Path(__file__).parent / "animations" / "emotion_keywords.json"
773
+
774
+ if not keywords_file.exists():
775
+ _LOGGER.warning("Emotion keywords file not found: %s", keywords_file)
776
+ return
777
+
778
+ try:
779
+ with open(keywords_file, "r", encoding="utf-8") as f:
780
+ data = json.load(f)
781
+
782
+ self._emotion_keywords = data.get("keywords", {})
783
+ settings = data.get("settings", {})
784
+ self._emotion_detection_enabled = settings.get("enabled", True)
785
+
786
+ _LOGGER.info(
787
+ "Loaded %d emotion keywords (enabled=%s)",
788
+ len(self._emotion_keywords),
789
+ self._emotion_detection_enabled
790
+ )
791
+ except Exception as e:
792
+ _LOGGER.error("Failed to load emotion keywords: %s", e)
793
+
794
+ def _detect_and_play_emotion(self, text: str) -> None:
795
+ """Detect emotion from text and trigger corresponding robot animation.
796
+
797
+ This provides automatic emotion expression based on the LLM response content.
798
+ Keywords are matched case-insensitively against the text.
799
+
800
+ Args:
801
+ text: The text to analyze for emotional content
802
+ """
803
+ if not text or not self._emotion_detection_enabled:
804
+ return
805
+
806
+ if not self._emotion_keywords:
807
+ return
808
+
809
+ text_lower = text.lower()
810
+
811
+ # Check each keyword pattern
812
+ for keyword, emotion_name in self._emotion_keywords.items():
813
+ if keyword.lower() in text_lower:
814
+ _LOGGER.info(
815
+ "Auto-detected emotion '%s' from keyword '%s' in response",
816
+ emotion_name, keyword
817
+ )
818
+ self._play_emotion(emotion_name)
819
+ return # Only trigger one emotion per response
820
+
821
+ _LOGGER.debug("No emotion keywords detected in response text")
822
+
823
+ def _play_emotion(self, emotion_name: str) -> None:
824
+ """Play an emotion/expression from the emotions library.
825
+
826
+ Args:
827
+ emotion_name: Name of the emotion (e.g., "happy1", "sad1", etc.)
828
+ """
829
+ try:
830
+ import requests
831
+
832
+ # Get WLAN IP from daemon status
833
+ wlan_ip = "localhost"
834
+ if self.state.reachy_mini is not None:
835
+ try:
836
+ status = self.state.reachy_mini.client.get_status(wait=False)
837
+ wlan_ip = status.get('wlan_ip', 'localhost')
838
+ except Exception:
839
+ wlan_ip = "localhost"
840
+
841
+ # Call the emotion playback API
842
+ # Dataset: pollen-robotics/reachy-mini-emotions-library
843
+ base_url = f"http://{wlan_ip}:8000/api/move/play/recorded-move-dataset"
844
+ dataset = "pollen-robotics/reachy-mini-emotions-library"
845
+ url = f"{base_url}/{dataset}/{emotion_name}"
846
+
847
+ response = requests.post(url, timeout=5)
848
+ if response.status_code == 200:
849
+ result = response.json()
850
+ move_uuid = result.get('uuid')
851
+ _LOGGER.info(f"Playing emotion: {emotion_name} (uuid={move_uuid})")
852
+ else:
853
+ _LOGGER.warning(f"Failed to play emotion {emotion_name}: HTTP {response.status_code}")
854
+
855
+ except Exception as e:
856
+ _LOGGER.error(f"Error playing emotion {emotion_name}: {e}")
{reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/.gitkeep RENAMED
File without changes
{reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/LICENSE.md RENAMED
File without changes
{reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/README.md RENAMED
File without changes
{reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/timer_finished.flac RENAMED
File without changes
{reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/wake_word_triggered.flac RENAMED
File without changes
{reachy_mini_home_assistant/motion → reachy_mini_ha_voice}/speech_sway.py RENAMED
@@ -6,9 +6,8 @@ Analyzes audio loudness to drive natural head movements during TTS playback.
6
 
7
  import math
8
  from collections import deque
9
- from collections.abc import Callable
10
  from itertools import islice
11
- from typing import Any
12
 
13
  import numpy as np
14
  from numpy.typing import NDArray
@@ -65,7 +64,7 @@ def _loudness_gain(db: float, offset: float = SENS_DB_OFFSET) -> float:
65
  """Normalize dB into [0,1] with gamma; clipped to [0,1]."""
66
  t = (db + offset - SWAY_DB_LOW) / (SWAY_DB_HIGH - SWAY_DB_LOW)
67
  t = max(0.0, min(1.0, t))
68
- return t**LOUDNESS_GAMMA if LOUDNESS_GAMMA != 1.0 else t
69
 
70
 
71
  def _to_float32_mono(x: NDArray[Any]) -> NDArray[np.float32]:
@@ -94,7 +93,7 @@ def _resample_linear(x: NDArray[np.float32], sr_in: int, sr_out: int) -> NDArray
94
  """Lightweight linear resampler for short buffers."""
95
  if sr_in == sr_out or x.size == 0:
96
  return x
97
- n_out = round(x.size * sr_out / sr_in)
98
  if n_out <= 1:
99
  return np.zeros(0, dtype=np.float32)
100
  t_in = np.linspace(0.0, 1.0, num=x.size, dtype=np.float32, endpoint=True)
@@ -144,7 +143,7 @@ class SpeechSwayRT:
144
  self.sway_down = 0
145
  self.t = 0.0
146
 
147
- def feed(self, pcm: NDArray[Any], sr: int | None = None) -> list[dict[str, float]]:
148
  """Stream in PCM chunk. Returns list of sway dicts, one per hop.
149
 
150
  Args:
@@ -168,7 +167,7 @@ class SpeechSwayRT:
168
  else:
169
  self.carry = x
170
 
171
- out: list[dict[str, float]] = []
172
 
173
  while self.carry.size >= HOP:
174
  hop = self.carry[:HOP]
@@ -216,35 +215,27 @@ class SpeechSwayRT:
216
  self.t += HOP_MS / 1000.0
217
 
218
  # Oscillators
219
- pitch = (
220
- math.radians(SWAY_A_PITCH_DEG)
221
- * loud
222
- * env
223
- * math.sin(2 * math.pi * SWAY_F_PITCH * self.t + self.phase_pitch)
224
- )
225
- yaw = (
226
- math.radians(SWAY_A_YAW_DEG) * loud * env * math.sin(2 * math.pi * SWAY_F_YAW * self.t + self.phase_yaw)
227
- )
228
- roll = (
229
- math.radians(SWAY_A_ROLL_DEG)
230
- * loud
231
- * env
232
- * math.sin(2 * math.pi * SWAY_F_ROLL * self.t + self.phase_roll)
233
- )
234
- x_m = (SWAY_A_X_MM / 1000.0) * loud * env * math.sin(2 * math.pi * SWAY_F_X * self.t + self.phase_x)
235
- y_m = (SWAY_A_Y_MM / 1000.0) * loud * env * math.sin(2 * math.pi * SWAY_F_Y * self.t + self.phase_y)
236
- z_m = (SWAY_A_Z_MM / 1000.0) * loud * env * math.sin(2 * math.pi * SWAY_F_Z * self.t + self.phase_z)
237
-
238
- out.append(
239
- {
240
- "pitch_rad": pitch,
241
- "yaw_rad": yaw,
242
- "roll_rad": roll,
243
- "x_m": x_m,
244
- "y_m": y_m,
245
- "z_m": z_m,
246
- }
247
- )
248
 
249
  return out
250
 
@@ -252,7 +243,7 @@ class SpeechSwayRT:
252
  def analyze_audio_for_sway(
253
  audio_data: NDArray[Any],
254
  sample_rate: int,
255
- callback: Callable[[dict[str, float]], None],
256
  ) -> None:
257
  """Analyze entire audio and call callback for each sway frame.
258
 
 
6
 
7
  import math
8
  from collections import deque
 
9
  from itertools import islice
10
+ from typing import Any, Callable, Dict, List, Optional
11
 
12
  import numpy as np
13
  from numpy.typing import NDArray
 
64
  """Normalize dB into [0,1] with gamma; clipped to [0,1]."""
65
  t = (db + offset - SWAY_DB_LOW) / (SWAY_DB_HIGH - SWAY_DB_LOW)
66
  t = max(0.0, min(1.0, t))
67
+ return t ** LOUDNESS_GAMMA if LOUDNESS_GAMMA != 1.0 else t
68
 
69
 
70
  def _to_float32_mono(x: NDArray[Any]) -> NDArray[np.float32]:
 
93
  """Lightweight linear resampler for short buffers."""
94
  if sr_in == sr_out or x.size == 0:
95
  return x
96
+ n_out = int(round(x.size * sr_out / sr_in))
97
  if n_out <= 1:
98
  return np.zeros(0, dtype=np.float32)
99
  t_in = np.linspace(0.0, 1.0, num=x.size, dtype=np.float32, endpoint=True)
 
143
  self.sway_down = 0
144
  self.t = 0.0
145
 
146
+ def feed(self, pcm: NDArray[Any], sr: Optional[int] = None) -> List[Dict[str, float]]:
147
  """Stream in PCM chunk. Returns list of sway dicts, one per hop.
148
 
149
  Args:
 
167
  else:
168
  self.carry = x
169
 
170
+ out: List[Dict[str, float]] = []
171
 
172
  while self.carry.size >= HOP:
173
  hop = self.carry[:HOP]
 
215
  self.t += HOP_MS / 1000.0
216
 
217
  # Oscillators
218
+ pitch = (math.radians(SWAY_A_PITCH_DEG) * loud * env *
219
+ math.sin(2 * math.pi * SWAY_F_PITCH * self.t + self.phase_pitch))
220
+ yaw = (math.radians(SWAY_A_YAW_DEG) * loud * env *
221
+ math.sin(2 * math.pi * SWAY_F_YAW * self.t + self.phase_yaw))
222
+ roll = (math.radians(SWAY_A_ROLL_DEG) * loud * env *
223
+ math.sin(2 * math.pi * SWAY_F_ROLL * self.t + self.phase_roll))
224
+ x_m = (SWAY_A_X_MM / 1000.0) * loud * env * math.sin(
225
+ 2 * math.pi * SWAY_F_X * self.t + self.phase_x)
226
+ y_m = (SWAY_A_Y_MM / 1000.0) * loud * env * math.sin(
227
+ 2 * math.pi * SWAY_F_Y * self.t + self.phase_y)
228
+ z_m = (SWAY_A_Z_MM / 1000.0) * loud * env * math.sin(
229
+ 2 * math.pi * SWAY_F_Z * self.t + self.phase_z)
230
+
231
+ out.append({
232
+ "pitch_rad": pitch,
233
+ "yaw_rad": yaw,
234
+ "roll_rad": roll,
235
+ "x_m": x_m,
236
+ "y_m": y_m,
237
+ "z_m": z_m,
238
+ })
 
 
 
 
 
 
 
 
239
 
240
  return out
241
 
 
243
  def analyze_audio_for_sway(
244
  audio_data: NDArray[Any],
245
  sample_rate: int,
246
+ callback: Callable[[Dict[str, float]], None],
247
  ) -> None:
248
  """Analyze entire audio and call callback for each sway frame.
249
 
{reachy_mini_home_assistant → reachy_mini_ha_voice}/static/index.html RENAMED
File without changes
{reachy_mini_home_assistant → reachy_mini_ha_voice}/static/main.js RENAMED
File without changes
{reachy_mini_home_assistant → reachy_mini_ha_voice}/static/style.css RENAMED
File without changes
reachy_mini_ha_voice/util.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Utility functions."""
2
+
3
+ import hashlib
4
+ import uuid
5
+ from collections.abc import Callable
6
+ from pathlib import Path
7
+ from typing import Optional
8
+
9
+
10
+ def call_all(*funcs: Optional[Callable[[], None]]) -> None:
11
+ """Call all non-None functions."""
12
+ for func in funcs:
13
+ if func is not None:
14
+ func()
15
+
16
+
17
+ def get_mac() -> str:
18
+ """Return a stable MAC address for device identification.
19
+
20
+ Uses a cached device ID stored in a file to ensure the same ID
21
+ is used across restarts, preventing Home Assistant from seeing
22
+ the device as new each time.
23
+ """
24
+ # Store device ID in a persistent location
25
+ local_dir = Path(__file__).parent.parent / "local"
26
+ local_dir.mkdir(parents=True, exist_ok=True)
27
+ device_id_file = local_dir / ".device_id"
28
+
29
+ if device_id_file.exists():
30
+ try:
31
+ return device_id_file.read_text().strip()
32
+ except Exception:
33
+ pass
34
+
35
+ # Generate a stable device ID based on machine UUID
36
+ machine_id = uuid.getnode()
37
+ # Create a hash to ensure consistent format
38
+ device_id = hashlib.md5(str(machine_id).encode()).hexdigest()[:12]
39
+
40
+ try:
41
+ device_id_file.write_text(device_id)
42
+ except Exception:
43
+ pass
44
+
45
+ return device_id
reachy_mini_ha_voice/voice_assistant.py ADDED
@@ -0,0 +1,813 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Voice Assistant Service for Reachy Mini.
3
+
4
+ This module provides the main voice assistant service that integrates
5
+ with Home Assistant via ESPHome protocol.
6
+ """
7
+
8
+ import asyncio
9
+ import json
10
+ import logging
11
+ import threading
12
+ import time
13
+ from dataclasses import dataclass, field
14
+ from pathlib import Path
15
+ from queue import Queue
16
+ from typing import Dict, List, Optional, Set, Union
17
+
18
+ import numpy as np
19
+
20
+ from reachy_mini import ReachyMini
21
+
22
+ from .models import AvailableWakeWord, Preferences, ServerState, WakeWordType
23
+ from .audio_player import AudioPlayer
24
+ from .satellite import VoiceSatelliteProtocol
25
+ from .util import get_mac
26
+ from .zeroconf import HomeAssistantZeroconf
27
+ from .motion import ReachyMiniMotion
28
+ from .camera_server import MJPEGCameraServer
29
+
30
+ _LOGGER = logging.getLogger(__name__)
31
+
32
+ _MODULE_DIR = Path(__file__).parent
33
+ _WAKEWORDS_DIR = _MODULE_DIR / "wakewords"
34
+ _SOUNDS_DIR = _MODULE_DIR / "sounds"
35
+ _LOCAL_DIR = _MODULE_DIR.parent / "local"
36
+
37
+
38
+ @dataclass
39
+ class AudioProcessingContext:
40
+ """Context for audio processing, holding mutable state."""
41
+ wake_words: List = field(default_factory=list)
42
+ micro_features: Optional[object] = None
43
+ micro_inputs: List = field(default_factory=list)
44
+ oww_features: Optional[object] = None
45
+ oww_inputs: List = field(default_factory=list)
46
+ has_oww: bool = False
47
+ last_active: Optional[float] = None
48
+
49
+
50
+ # Audio chunk size for consistent streaming (matches reference project)
51
+ AUDIO_BLOCK_SIZE = 1024 # samples at 16kHz = 64ms
52
+
53
+
54
+ class VoiceAssistantService:
55
+ """Voice assistant service that runs ESPHome protocol server."""
56
+
57
+ def __init__(
58
+ self,
59
+ reachy_mini: Optional[ReachyMini] = None,
60
+ name: str = "Reachy Mini",
61
+ host: str = "0.0.0.0",
62
+ port: int = 6053,
63
+ wake_model: str = "okay_nabu",
64
+ camera_port: int = 8081,
65
+ camera_enabled: bool = True,
66
+ ):
67
+ self.reachy_mini = reachy_mini
68
+ self.name = name
69
+ self.host = host
70
+ self.port = port
71
+ self.wake_model = wake_model
72
+ self.camera_port = camera_port
73
+ self.camera_enabled = camera_enabled
74
+
75
+ self._server = None
76
+ self._discovery = None
77
+ self._audio_thread = None
78
+ self._running = False
79
+ self._state: Optional[ServerState] = None
80
+ self._motion = ReachyMiniMotion(reachy_mini)
81
+ self._camera_server: Optional[MJPEGCameraServer] = None
82
+
83
+ # Audio buffer for fixed-size chunk output
84
+ self._audio_buffer: np.ndarray = np.array([], dtype=np.float32)
85
+
86
+ async def start(self) -> None:
87
+ """Start the voice assistant service."""
88
+ _LOGGER.info("Initializing voice assistant service...")
89
+
90
+ # Ensure directories exist
91
+ _WAKEWORDS_DIR.mkdir(parents=True, exist_ok=True)
92
+ _SOUNDS_DIR.mkdir(parents=True, exist_ok=True)
93
+ _LOCAL_DIR.mkdir(parents=True, exist_ok=True)
94
+
95
+ # Verify required files (bundled with package)
96
+ await self._verify_required_files()
97
+
98
+ # Load wake words
99
+ available_wake_words = self._load_available_wake_words()
100
+ _LOGGER.debug("Available wake words: %s", list(available_wake_words.keys()))
101
+
102
+ # Load preferences
103
+ preferences_path = _LOCAL_DIR / "preferences.json"
104
+ preferences = self._load_preferences(preferences_path)
105
+
106
+ # Load wake word models
107
+ wake_models, active_wake_words = self._load_wake_models(
108
+ available_wake_words, preferences
109
+ )
110
+
111
+ # Load stop model
112
+ stop_model = self._load_stop_model()
113
+
114
+ # Create audio players with Reachy Mini reference
115
+ music_player = AudioPlayer(self.reachy_mini)
116
+ tts_player = AudioPlayer(self.reachy_mini)
117
+
118
+ # Create server state
119
+ self._state = ServerState(
120
+ name=self.name,
121
+ mac_address=get_mac(),
122
+ audio_queue=Queue(),
123
+ entities=[],
124
+ available_wake_words=available_wake_words,
125
+ wake_words=wake_models,
126
+ active_wake_words=active_wake_words,
127
+ stop_word=stop_model,
128
+ music_player=music_player,
129
+ tts_player=tts_player,
130
+ wakeup_sound=str(_SOUNDS_DIR / "wake_word_triggered.flac"),
131
+ timer_finished_sound=str(_SOUNDS_DIR / "timer_finished.flac"),
132
+ preferences=preferences,
133
+ preferences_path=preferences_path,
134
+ refractory_seconds=2.0,
135
+ download_dir=_LOCAL_DIR,
136
+ reachy_mini=self.reachy_mini,
137
+ motion_enabled=self.reachy_mini is not None,
138
+ )
139
+
140
+ # Set motion controller reference in state
141
+ self._state.motion = self._motion
142
+
143
+ # Start Reachy Mini media system if available
144
+ if self.reachy_mini is not None:
145
+ try:
146
+ # Check if media system is already running to avoid conflicts
147
+ media = self.reachy_mini.media
148
+ if media.audio is not None:
149
+ # Check recording state
150
+ is_recording = getattr(media, '_recording', False)
151
+ if not is_recording:
152
+ media.start_recording()
153
+ _LOGGER.info("Started Reachy Mini recording")
154
+ else:
155
+ _LOGGER.debug("Reachy Mini recording already active")
156
+
157
+ # Check playback state
158
+ is_playing = getattr(media, '_playing', False)
159
+ if not is_playing:
160
+ media.start_playing()
161
+ _LOGGER.info("Started Reachy Mini playback")
162
+ else:
163
+ _LOGGER.debug("Reachy Mini playback already active")
164
+
165
+ _LOGGER.info("Reachy Mini media system initialized")
166
+
167
+ # Body yaw now follows head yaw in movement_manager.py
168
+ # This enables natural body rotation when tracking faces
169
+
170
+ # Optimize microphone settings for voice recognition
171
+ self._optimize_microphone_settings()
172
+ else:
173
+ _LOGGER.warning("Reachy Mini audio system not available")
174
+ except Exception as e:
175
+ _LOGGER.warning("Failed to initialize Reachy Mini media: %s", e)
176
+
177
+ # Start motion controller (5Hz control loop)
178
+ if self._motion is not None:
179
+ self._motion.start()
180
+
181
+ # Start audio processing thread (non-daemon for proper cleanup)
182
+ self._running = True
183
+ self._audio_thread = threading.Thread(
184
+ target=self._process_audio,
185
+ daemon=False,
186
+ )
187
+ self._audio_thread.start()
188
+
189
+ # Start camera server if enabled (must be before ESPHome server)
190
+ if self.camera_enabled:
191
+ self._camera_server = MJPEGCameraServer(
192
+ reachy_mini=self.reachy_mini,
193
+ host=self.host,
194
+ port=self.camera_port,
195
+ fps=15,
196
+ quality=80,
197
+ enable_face_tracking=True,
198
+ )
199
+ await self._camera_server.start()
200
+
201
+ # Connect camera server to motion controller for face tracking
202
+ if self._motion is not None:
203
+ self._motion.set_camera_server(self._camera_server)
204
+
205
+ # Create ESPHome server (pass camera_server for camera entity)
206
+ loop = asyncio.get_running_loop()
207
+ camera_server = self._camera_server # Capture for lambda
208
+ self._server = await loop.create_server(
209
+ lambda: VoiceSatelliteProtocol(self._state, camera_server=camera_server),
210
+ host=self.host,
211
+ port=self.port,
212
+ )
213
+
214
+ # Start mDNS discovery
215
+ self._discovery = HomeAssistantZeroconf(port=self.port, name=self.name)
216
+ await self._discovery.register_server()
217
+
218
+ # Start Sendspin auto-discovery (auto-enabled, no user config needed)
219
+ # Sendspin is for music playback, so connect to music_player
220
+ await music_player.start_sendspin_discovery()
221
+
222
+ _LOGGER.info("Voice assistant service started on %s:%s", self.host, self.port)
223
+
224
+ def _optimize_microphone_settings(self) -> None:
225
+ """Optimize ReSpeaker XVF3800 microphone settings for voice recognition.
226
+
227
+ This method configures the XMOS XVF3800 audio processor for optimal
228
+ voice command recognition at distances up to 2-3 meters.
229
+
230
+ If user has previously set values via Home Assistant, those values are
231
+ restored from preferences. Otherwise, default optimized values are used.
232
+
233
+ Key optimizations:
234
+ 1. Enable AGC with higher max gain for distant speech
235
+ 2. Reduce noise suppression to preserve quiet speech
236
+ 3. Increase base microphone gain
237
+ 4. Optimize AGC response times for voice commands
238
+
239
+ Reference: reachy_mini/src/reachy_mini/media/audio_control_utils.py
240
+ XMOS docs: https://www.xmos.com/documentation/XM-014888-PC/
241
+ """
242
+ if self.reachy_mini is None:
243
+ return
244
+
245
+ try:
246
+ # Access ReSpeaker through the media audio system
247
+ audio = self.reachy_mini.media.audio
248
+ if audio is None or not hasattr(audio, '_respeaker'):
249
+ _LOGGER.debug("ReSpeaker not available for optimization")
250
+ return
251
+
252
+ respeaker = audio._respeaker
253
+ if respeaker is None:
254
+ _LOGGER.debug("ReSpeaker device not found")
255
+ return
256
+
257
+ # Get saved preferences (if any)
258
+ prefs = self._state.preferences if self._state else None
259
+
260
+ # ========== 1. AGC (Automatic Gain Control) Settings ==========
261
+ # Use saved value if available, otherwise use default (enabled)
262
+ agc_enabled = prefs.agc_enabled if (prefs and prefs.agc_enabled is not None) else True
263
+ try:
264
+ respeaker.write("PP_AGCONOFF", [1 if agc_enabled else 0])
265
+ _LOGGER.info("AGC %s (PP_AGCONOFF=%d)%s",
266
+ "enabled" if agc_enabled else "disabled",
267
+ 1 if agc_enabled else 0,
268
+ " [from preferences]" if (prefs and prefs.agc_enabled is not None) else " [default]")
269
+ except Exception as e:
270
+ _LOGGER.debug("Could not set AGC: %s", e)
271
+
272
+ # Use saved value if available, otherwise use default (30dB)
273
+ agc_max_gain = prefs.agc_max_gain if (prefs and prefs.agc_max_gain is not None) else 30.0
274
+ try:
275
+ respeaker.write("PP_AGCMAXGAIN", [agc_max_gain])
276
+ _LOGGER.info("AGC max gain set (PP_AGCMAXGAIN=%.1fdB)%s",
277
+ agc_max_gain,
278
+ " [from preferences]" if (prefs and prefs.agc_max_gain is not None) else " [default]")
279
+ except Exception as e:
280
+ _LOGGER.debug("Could not set PP_AGCMAXGAIN: %s", e)
281
+
282
+ # Set AGC desired output level (target level after gain)
283
+ # More negative = quieter output, less negative = louder
284
+ # Default is around -25dB, set to -18dB for stronger output
285
+ try:
286
+ respeaker.write("PP_AGCDESIREDLEVEL", [-18.0])
287
+ _LOGGER.debug("AGC desired level set (PP_AGCDESIREDLEVEL=-18.0dB)")
288
+ except Exception as e:
289
+ _LOGGER.debug("Could not set PP_AGCDESIREDLEVEL: %s", e)
290
+
291
+ # Optimize AGC time constants for voice commands
292
+ # Faster attack time helps capture sudden speech onset
293
+ try:
294
+ respeaker.write("PP_AGCTIME", [0.5]) # Main time constant (seconds)
295
+ _LOGGER.debug("AGC time constant set (PP_AGCTIME=0.5s)")
296
+ except Exception as e:
297
+ _LOGGER.debug("Could not set PP_AGCTIME: %s", e)
298
+
299
+ # ========== 2. Base Microphone Gain ==========
300
+ # Increase base microphone gain for better sensitivity
301
+ # Default is 1.0, increase to 2.0 for distant speech
302
+ # Range: 0.0-4.0 (float, linear gain multiplier)
303
+ try:
304
+ respeaker.write("AUDIO_MGR_MIC_GAIN", [2.0])
305
+ _LOGGER.info("Microphone gain increased (AUDIO_MGR_MIC_GAIN=2.0)")
306
+ except Exception as e:
307
+ _LOGGER.debug("Could not set AUDIO_MGR_MIC_GAIN: %s", e)
308
+
309
+ # ========== 3. Noise Suppression Settings ==========
310
+ # Use saved value if available, otherwise use default (15%)
311
+ # PP_MIN_NS: minimum noise suppression threshold
312
+ # Higher values = less aggressive suppression = better voice pickup
313
+ # PP_MIN_NS = 0.85 means "keep at least 85% of signal" = 15% max suppression
314
+ # UI shows "noise suppression strength" so 15% = PP_MIN_NS of 0.85
315
+ noise_suppression = prefs.noise_suppression if (prefs and prefs.noise_suppression is not None) else 15.0
316
+ pp_min_ns = 1.0 - (noise_suppression / 100.0) # Convert percentage to PP_MIN_NS value
317
+ try:
318
+ respeaker.write("PP_MIN_NS", [pp_min_ns])
319
+ _LOGGER.info("Noise suppression set to %.0f%% strength (PP_MIN_NS=%.2f)%s",
320
+ noise_suppression, pp_min_ns,
321
+ " [from preferences]" if (prefs and prefs.noise_suppression is not None) else " [default]")
322
+ except Exception as e:
323
+ _LOGGER.debug("Could not set PP_MIN_NS: %s", e)
324
+
325
+ # PP_MIN_NN: minimum noise floor estimation
326
+ # Higher values = less aggressive noise floor tracking
327
+ try:
328
+ respeaker.write("PP_MIN_NN", [pp_min_ns]) # Match PP_MIN_NS
329
+ _LOGGER.debug("Noise floor threshold set (PP_MIN_NN=%.2f)", pp_min_ns)
330
+ except Exception as e:
331
+ _LOGGER.debug("Could not set PP_MIN_NN: %s", e)
332
+
333
+ # ========== 4. Echo Cancellation Settings ==========
334
+ # Ensure echo cancellation is enabled (important for TTS playback)
335
+ try:
336
+ respeaker.write("PP_ECHOONOFF", [1])
337
+ _LOGGER.debug("Echo cancellation enabled (PP_ECHOONOFF=1)")
338
+ except Exception as e:
339
+ _LOGGER.debug("Could not set PP_ECHOONOFF: %s", e)
340
+
341
+ # ========== 5. High-pass filter (remove low frequency noise) ==========
342
+ try:
343
+ respeaker.write("AEC_HPFONOFF", [1])
344
+ _LOGGER.debug("High-pass filter enabled (AEC_HPFONOFF=1)")
345
+ except Exception as e:
346
+ _LOGGER.debug("Could not set AEC_HPFONOFF: %s", e)
347
+
348
+ _LOGGER.info("Microphone settings initialized (AGC=%s, MaxGain=%.0fdB, NoiseSuppression=%.0f%%)",
349
+ "ON" if agc_enabled else "OFF", agc_max_gain, noise_suppression)
350
+
351
+ except Exception as e:
352
+ _LOGGER.warning("Failed to optimize microphone settings: %s", e)
353
+
354
+ async def stop(self) -> None:
355
+ """Stop the voice assistant service."""
356
+ _LOGGER.info("Stopping voice assistant service...")
357
+
358
+ # 1. First stop audio recording to prevent new data from coming in
359
+ if self.reachy_mini is not None:
360
+ try:
361
+ self.reachy_mini.media.stop_recording()
362
+ _LOGGER.debug("Reachy Mini recording stopped")
363
+ except Exception as e:
364
+ _LOGGER.warning("Error stopping Reachy Mini recording: %s", e)
365
+
366
+ # 2. Set stop flag
367
+ self._running = False
368
+
369
+ # 3. Wait for audio thread to finish
370
+ if self._audio_thread:
371
+ self._audio_thread.join(timeout=1.0)
372
+ if self._audio_thread.is_alive():
373
+ _LOGGER.warning("Audio thread did not stop in time")
374
+
375
+ # 4. Stop playback
376
+ if self.reachy_mini is not None:
377
+ try:
378
+ self.reachy_mini.media.stop_playing()
379
+ _LOGGER.debug("Reachy Mini playback stopped")
380
+ except Exception as e:
381
+ _LOGGER.warning("Error stopping Reachy Mini playback: %s", e)
382
+
383
+ # 5. Stop ESPHome server
384
+ if self._server:
385
+ self._server.close()
386
+ await self._server.wait_closed()
387
+
388
+ # 6. Unregister mDNS
389
+ if self._discovery:
390
+ await self._discovery.unregister_server()
391
+
392
+ # 6.5. Stop Sendspin
393
+ if self._state and self._state.music_player:
394
+ await self._state.music_player.stop_sendspin()
395
+
396
+ # 7. Stop camera server
397
+ if self._camera_server:
398
+ await self._camera_server.stop()
399
+ self._camera_server = None
400
+
401
+ # 8. Shutdown motion executor
402
+ if self._motion:
403
+ self._motion.shutdown()
404
+
405
+ _LOGGER.info("Voice assistant service stopped.")
406
+
407
+ async def _verify_required_files(self) -> None:
408
+ """Verify required model and sound files exist (bundled with package)."""
409
+ # Required wake word files (bundled in wakewords/ directory)
410
+ required_wakewords = [
411
+ "okay_nabu.tflite",
412
+ "okay_nabu.json",
413
+ "hey_jarvis.tflite",
414
+ "hey_jarvis.json",
415
+ "stop.tflite",
416
+ "stop.json",
417
+ ]
418
+
419
+ # Required sound files (bundled in sounds/ directory)
420
+ required_sounds = [
421
+ "wake_word_triggered.flac",
422
+ "timer_finished.flac",
423
+ ]
424
+
425
+ # Verify wake word files
426
+ missing_wakewords = []
427
+ for filename in required_wakewords:
428
+ filepath = _WAKEWORDS_DIR / filename
429
+ if not filepath.exists():
430
+ missing_wakewords.append(filename)
431
+
432
+ if missing_wakewords:
433
+ _LOGGER.warning(
434
+ "Missing wake word files: %s. These should be bundled with the package.",
435
+ missing_wakewords
436
+ )
437
+
438
+ # Verify sound files
439
+ missing_sounds = []
440
+ for filename in required_sounds:
441
+ filepath = _SOUNDS_DIR / filename
442
+ if not filepath.exists():
443
+ missing_sounds.append(filename)
444
+
445
+ if missing_sounds:
446
+ _LOGGER.warning(
447
+ "Missing sound files: %s. These should be bundled with the package.",
448
+ missing_sounds
449
+ )
450
+
451
+ if not missing_wakewords and not missing_sounds:
452
+ _LOGGER.info("All required files verified successfully.")
453
+
454
+ def _load_available_wake_words(self) -> Dict[str, AvailableWakeWord]:
455
+ """Load available wake word configurations."""
456
+ available_wake_words: Dict[str, AvailableWakeWord] = {}
457
+
458
+ # Load order: OpenWakeWord first, then MicroWakeWord, then external
459
+ # Later entries override earlier ones, so MicroWakeWord takes priority
460
+ wake_word_dirs = [
461
+ _WAKEWORDS_DIR / "openWakeWord", # OpenWakeWord (lowest priority)
462
+ _LOCAL_DIR / "external_wake_words", # External wake words
463
+ _WAKEWORDS_DIR, # MicroWakeWord (highest priority)
464
+ ]
465
+
466
+ for wake_word_dir in wake_word_dirs:
467
+ if not wake_word_dir.exists():
468
+ continue
469
+
470
+ for config_path in wake_word_dir.glob("*.json"):
471
+ model_id = config_path.stem
472
+ if model_id == "stop":
473
+ continue
474
+
475
+ try:
476
+ with open(config_path, "r", encoding="utf-8") as f:
477
+ config = json.load(f)
478
+
479
+ model_type = WakeWordType(config.get("type", "micro"))
480
+
481
+ if model_type == WakeWordType.OPEN_WAKE_WORD:
482
+ wake_word_path = config_path.parent / config["model"]
483
+ else:
484
+ wake_word_path = config_path
485
+
486
+ available_wake_words[model_id] = AvailableWakeWord(
487
+ id=model_id,
488
+ type=model_type,
489
+ wake_word=config.get("wake_word", model_id),
490
+ trained_languages=config.get("trained_languages", []),
491
+ wake_word_path=wake_word_path,
492
+ )
493
+ except Exception as e:
494
+ _LOGGER.warning("Failed to load wake word %s: %s", config_path, e)
495
+
496
+ return available_wake_words
497
+
498
+ def _load_preferences(self, preferences_path: Path) -> Preferences:
499
+ """Load user preferences."""
500
+ if preferences_path.exists():
501
+ try:
502
+ with open(preferences_path, "r", encoding="utf-8") as f:
503
+ data = json.load(f)
504
+ return Preferences(**data)
505
+ except Exception as e:
506
+ _LOGGER.warning("Failed to load preferences: %s", e)
507
+
508
+ return Preferences()
509
+
510
+ def _load_wake_models(
511
+ self,
512
+ available_wake_words: Dict[str, AvailableWakeWord],
513
+ preferences: Preferences,
514
+ ):
515
+ """Load wake word models."""
516
+ from pymicro_wakeword import MicroWakeWord
517
+ from pyopen_wakeword import OpenWakeWord
518
+
519
+ wake_models: Dict[str, Union[MicroWakeWord, OpenWakeWord]] = {}
520
+ active_wake_words: Set[str] = set()
521
+
522
+ # Try to load preferred models
523
+ if preferences.active_wake_words:
524
+ for wake_word_id in preferences.active_wake_words:
525
+ wake_word = available_wake_words.get(wake_word_id)
526
+ if wake_word is None:
527
+ _LOGGER.warning("Unknown wake word: %s", wake_word_id)
528
+ continue
529
+
530
+ try:
531
+ _LOGGER.debug("Loading wake model: %s", wake_word_id)
532
+ loaded_model = wake_word.load()
533
+ # Set id attribute on the model for later identification
534
+ setattr(loaded_model, 'id', wake_word_id)
535
+ wake_models[wake_word_id] = loaded_model
536
+ active_wake_words.add(wake_word_id)
537
+ except Exception as e:
538
+ _LOGGER.warning("Failed to load wake model %s: %s", wake_word_id, e)
539
+
540
+ # Load default model if none loaded
541
+ if not wake_models:
542
+ wake_word = available_wake_words.get(self.wake_model)
543
+ if wake_word:
544
+ try:
545
+ _LOGGER.debug("Loading default wake model: %s", self.wake_model)
546
+ loaded_model = wake_word.load()
547
+ # Set id attribute on the model for later identification
548
+ setattr(loaded_model, 'id', self.wake_model)
549
+ wake_models[self.wake_model] = loaded_model
550
+ active_wake_words.add(self.wake_model)
551
+ except Exception as e:
552
+ _LOGGER.error("Failed to load default wake model: %s", e)
553
+
554
+ return wake_models, active_wake_words
555
+
556
+ def _load_stop_model(self):
557
+ """Load the stop word model."""
558
+ from pymicro_wakeword import MicroWakeWord
559
+
560
+ stop_config = _WAKEWORDS_DIR / "stop.json"
561
+ if stop_config.exists():
562
+ try:
563
+ model = MicroWakeWord.from_config(stop_config)
564
+ setattr(model, 'id', 'stop')
565
+ return model
566
+ except Exception as e:
567
+ _LOGGER.warning("Failed to load stop model: %s", e)
568
+
569
+ # Return a dummy model if stop model not available
570
+ _LOGGER.warning("Stop model not available, using fallback")
571
+ okay_nabu_config = _WAKEWORDS_DIR / "okay_nabu.json"
572
+ if okay_nabu_config.exists():
573
+ model = MicroWakeWord.from_config(okay_nabu_config)
574
+ setattr(model, 'id', 'stop')
575
+ return model
576
+
577
+ return None
578
+
579
+ def _process_audio(self) -> None:
580
+ """Process audio from microphone (Reachy Mini or system fallback)."""
581
+ from pymicro_wakeword import MicroWakeWordFeatures
582
+
583
+ ctx = AudioProcessingContext()
584
+ ctx.micro_features = MicroWakeWordFeatures()
585
+
586
+ try:
587
+ _LOGGER.info("Starting audio processing...")
588
+
589
+ if self.reachy_mini is not None:
590
+ _LOGGER.info("Using Reachy Mini's microphone")
591
+ self._audio_loop_reachy(ctx)
592
+ else:
593
+ _LOGGER.info("Using system microphone (fallback)")
594
+ self._audio_loop_fallback(ctx)
595
+
596
+ except Exception:
597
+ _LOGGER.exception("Error processing audio")
598
+
599
+ def _audio_loop_reachy(self, ctx: AudioProcessingContext) -> None:
600
+ """Audio loop using Reachy Mini's microphone."""
601
+ while self._running:
602
+ try:
603
+ if not self._wait_for_satellite():
604
+ continue
605
+
606
+ self._update_wake_words_list(ctx)
607
+
608
+ # Get audio from Reachy Mini
609
+ audio_chunk = self._get_reachy_audio_chunk()
610
+ if audio_chunk is None:
611
+ time.sleep(0.01)
612
+ continue
613
+
614
+ self._process_audio_chunk(ctx, audio_chunk)
615
+
616
+ except Exception as e:
617
+ _LOGGER.error("Error in Reachy audio processing: %s", e)
618
+ time.sleep(0.1)
619
+
620
+ def _audio_loop_fallback(self, ctx: AudioProcessingContext) -> None:
621
+ """Audio loop using system microphone (fallback)."""
622
+ import sounddevice as sd
623
+
624
+ block_size = 1024
625
+
626
+ with sd.InputStream(
627
+ samplerate=16000,
628
+ channels=1,
629
+ blocksize=block_size,
630
+ dtype="float32",
631
+ ) as stream:
632
+ while self._running:
633
+ if not self._wait_for_satellite():
634
+ continue
635
+
636
+ self._update_wake_words_list(ctx)
637
+
638
+ # Get audio from system microphone
639
+ audio_chunk_array, overflowed = stream.read(block_size)
640
+ if overflowed:
641
+ _LOGGER.warning("Audio buffer overflow")
642
+
643
+ audio_chunk_array = audio_chunk_array.reshape(-1)
644
+ audio_chunk = self._convert_to_pcm(audio_chunk_array)
645
+
646
+ self._process_audio_chunk(ctx, audio_chunk)
647
+
648
+ def _wait_for_satellite(self) -> bool:
649
+ """Wait for satellite connection. Returns True if connected."""
650
+ if self._state is None or self._state.satellite is None:
651
+ time.sleep(0.1)
652
+ return False
653
+ return True
654
+
655
+ def _update_wake_words_list(self, ctx: AudioProcessingContext) -> None:
656
+ """Update wake words list if changed."""
657
+ from pyopen_wakeword import OpenWakeWord, OpenWakeWordFeatures
658
+ from pymicro_wakeword import MicroWakeWordFeatures
659
+
660
+ if (not ctx.wake_words) or (self._state.wake_words_changed and self._state.wake_words):
661
+ self._state.wake_words_changed = False
662
+ ctx.wake_words.clear()
663
+
664
+ # Reset feature extractors to clear any residual audio data
665
+ # This prevents false triggers when switching wake words
666
+ ctx.micro_features = MicroWakeWordFeatures()
667
+ ctx.micro_inputs.clear()
668
+ if ctx.oww_features is not None:
669
+ ctx.oww_features = OpenWakeWordFeatures.from_builtin()
670
+ ctx.oww_inputs.clear()
671
+
672
+ # Also reset the refractory period to prevent immediate trigger
673
+ ctx.last_active = time.monotonic()
674
+
675
+ # state.wake_words is Dict[str, MicroWakeWord/OpenWakeWord]
676
+ # We need to filter by active_wake_words (which contains the IDs/keys)
677
+ for ww_id, ww_model in self._state.wake_words.items():
678
+ if ww_id in self._state.active_wake_words:
679
+ # Ensure the model has an 'id' attribute for later use
680
+ if not hasattr(ww_model, 'id'):
681
+ setattr(ww_model, 'id', ww_id)
682
+ ctx.wake_words.append(ww_model)
683
+
684
+ ctx.has_oww = any(isinstance(ww, OpenWakeWord) for ww in ctx.wake_words)
685
+ if ctx.has_oww and ctx.oww_features is None:
686
+ ctx.oww_features = OpenWakeWordFeatures.from_builtin()
687
+
688
+ _LOGGER.info("Active wake words updated: %s (features reset)", list(self._state.active_wake_words))
689
+
690
+ def _get_reachy_audio_chunk(self) -> Optional[bytes]:
691
+ """Get fixed-size audio chunk from Reachy Mini's microphone.
692
+
693
+ Returns exactly AUDIO_BLOCK_SIZE samples each time, buffering
694
+ internally to ensure consistent chunk sizes for streaming.
695
+
696
+ Returns:
697
+ PCM audio bytes of fixed size, or None if not enough data.
698
+ """
699
+ # Get new audio data from SDK
700
+ audio_data = self.reachy_mini.media.get_audio_sample()
701
+
702
+ # Append new data to buffer if valid
703
+ if audio_data is not None and isinstance(audio_data, np.ndarray) and audio_data.size > 0:
704
+ try:
705
+ if audio_data.dtype.kind not in ('S', 'U', 'O', 'V', 'b'):
706
+ if audio_data.dtype != np.float32:
707
+ audio_data = np.asarray(audio_data, dtype=np.float32)
708
+
709
+ # Convert stereo to mono
710
+ if audio_data.ndim == 2 and audio_data.shape[1] == 2:
711
+ audio_data = audio_data.mean(axis=1)
712
+ elif audio_data.ndim == 2:
713
+ audio_data = audio_data[:, 0].copy()
714
+
715
+ if audio_data.ndim == 1:
716
+ self._audio_buffer = np.concatenate([self._audio_buffer, audio_data])
717
+ except (TypeError, ValueError):
718
+ pass
719
+
720
+ # Return fixed-size chunk if we have enough data
721
+ if len(self._audio_buffer) >= AUDIO_BLOCK_SIZE:
722
+ chunk = self._audio_buffer[:AUDIO_BLOCK_SIZE]
723
+ self._audio_buffer = self._audio_buffer[AUDIO_BLOCK_SIZE:]
724
+ return self._convert_to_pcm(chunk)
725
+
726
+ return None
727
+
728
+ def _convert_to_pcm(self, audio_chunk_array: np.ndarray) -> bytes:
729
+ """Convert float32 audio array to 16-bit PCM bytes."""
730
+ return (
731
+ (np.clip(audio_chunk_array, -1.0, 1.0) * 32767.0)
732
+ .astype("<i2")
733
+ .tobytes()
734
+ )
735
+
736
+ def _process_audio_chunk(self, ctx: AudioProcessingContext, audio_chunk: bytes) -> None:
737
+ """Process an audio chunk for wake word detection.
738
+
739
+ Following reference project pattern: always process wake words.
740
+ Refractory period prevents duplicate triggers.
741
+
742
+ Args:
743
+ ctx: Audio processing context
744
+ audio_chunk: PCM audio bytes
745
+ """
746
+ # Stream audio to Home Assistant
747
+ self._state.satellite.handle_audio(audio_chunk)
748
+
749
+ # Process wake word features
750
+ self._process_features(ctx, audio_chunk)
751
+
752
+ # Detect wake words
753
+ self._detect_wake_words(ctx)
754
+
755
+ # Detect stop word
756
+ self._detect_stop_word(ctx)
757
+
758
+ def _process_features(self, ctx: AudioProcessingContext, audio_chunk: bytes) -> None:
759
+ """Process audio features for wake word detection."""
760
+ ctx.micro_inputs.clear()
761
+ ctx.micro_inputs.extend(ctx.micro_features.process_streaming(audio_chunk))
762
+
763
+ if ctx.has_oww and ctx.oww_features is not None:
764
+ ctx.oww_inputs.clear()
765
+ ctx.oww_inputs.extend(ctx.oww_features.process_streaming(audio_chunk))
766
+
767
+ def _detect_wake_words(self, ctx: AudioProcessingContext) -> None:
768
+ """Detect wake words in the processed audio features.
769
+
770
+ Uses refractory period to prevent duplicate triggers.
771
+ Following reference project pattern.
772
+ """
773
+ from pymicro_wakeword import MicroWakeWord
774
+ from pyopen_wakeword import OpenWakeWord
775
+
776
+ for wake_word in ctx.wake_words:
777
+ activated = False
778
+
779
+ if isinstance(wake_word, MicroWakeWord):
780
+ for micro_input in ctx.micro_inputs:
781
+ if wake_word.process_streaming(micro_input):
782
+ activated = True
783
+ elif isinstance(wake_word, OpenWakeWord):
784
+ for oww_input in ctx.oww_inputs:
785
+ for prob in wake_word.process_streaming(oww_input):
786
+ if prob > 0.5:
787
+ activated = True
788
+
789
+ if activated:
790
+ # Check refractory period to prevent duplicate triggers
791
+ now = time.monotonic()
792
+ if (ctx.last_active is None) or (
793
+ (now - ctx.last_active) > self._state.refractory_seconds
794
+ ):
795
+ _LOGGER.info("Wake word detected: %s", wake_word.id)
796
+ self._state.satellite.wakeup(wake_word)
797
+ # Face tracking will handle looking at user automatically
798
+ self._motion.on_wakeup()
799
+ ctx.last_active = now
800
+
801
+ def _detect_stop_word(self, ctx: AudioProcessingContext) -> None:
802
+ """Detect stop word in the processed audio features."""
803
+ if not self._state.stop_word:
804
+ return
805
+
806
+ stopped = False
807
+ for micro_input in ctx.micro_inputs:
808
+ if self._state.stop_word.process_streaming(micro_input):
809
+ stopped = True
810
+
811
+ if stopped and (self._state.stop_word.id in self._state.active_wake_words):
812
+ _LOGGER.info("Stop word detected")
813
+ self._state.satellite.stop()
{reachy_mini_home_assistant → reachy_mini_ha_voice}/wakewords/.gitkeep RENAMED
File without changes
{reachy_mini_home_assistant → reachy_mini_ha_voice}/wakewords/README.md RENAMED
File without changes