fix: use right name for store search

#5
This view is limited to 50 files because it contains too many changes. See the raw diff here.
Files changed (50) hide show
  1. .claude/settings.local.json +9 -38
  2. .gitattributes +6 -3
  3. .github/dependabot.yml +0 -13
  4. .github/workflows/sync_develop_to_hf_edge.yml +0 -86
  5. .github/workflows/sync_to_hf.yml +0 -36
  6. .gitignore +5 -12
  7. .pre-commit-config.yaml +0 -20
  8. CHANGELOG.md +0 -713
  9. PROJECT_PLAN.md +1279 -0
  10. Project_Summary.md +0 -1439
  11. README.md +1 -0
  12. changelog.json +1 -272
  13. docs/USER_MANUAL_CN.md +0 -244
  14. docs/USER_MANUAL_EN.md +0 -244
  15. home_assistant_blueprints/reachy_mini_presence_companion.yaml +0 -246
  16. index.html +36 -98
  17. pyproject.toml +22 -141
  18. {reachy_mini_home_assistant → reachy_mini_ha_voice}/__init__.py +24 -29
  19. {reachy_mini_home_assistant → reachy_mini_ha_voice}/__main__.py +63 -52
  20. {reachy_mini_home_assistant/motion → reachy_mini_ha_voice}/animation_player.py +44 -123
  21. reachy_mini_ha_voice/animations/conversation_animations.json +87 -0
  22. {reachy_mini_home_assistant/protocol → reachy_mini_ha_voice}/api_server.py +35 -65
  23. reachy_mini_ha_voice/audio_player.py +578 -0
  24. reachy_mini_ha_voice/camera_server.py +842 -0
  25. {reachy_mini_home_assistant/entities → reachy_mini_ha_voice}/entity.py +54 -44
  26. {reachy_mini_home_assistant/entities → reachy_mini_ha_voice}/entity_extensions.py +25 -33
  27. reachy_mini_ha_voice/entity_registry.py +945 -0
  28. reachy_mini_ha_voice/gesture_detector.py +183 -0
  29. {reachy_mini_home_assistant/vision → reachy_mini_ha_voice}/head_tracker.py +48 -158
  30. {reachy_mini_home_assistant → reachy_mini_ha_voice}/main.py +83 -31
  31. {reachy_mini_home_assistant → reachy_mini_ha_voice}/models.py +25 -108
  32. {reachy_mini_home_assistant → reachy_mini_ha_voice}/models/crops_classifier.onnx +0 -0
  33. {reachy_mini_home_assistant → reachy_mini_ha_voice}/models/hand_detector.onnx +0 -0
  34. reachy_mini_home_assistant/motion/reachy_motion.py → reachy_mini_ha_voice/motion.py +17 -31
  35. reachy_mini_ha_voice/movement_manager.py +861 -0
  36. {reachy_mini_home_assistant → reachy_mini_ha_voice}/reachy_controller.py +869 -735
  37. reachy_mini_ha_voice/satellite.py +784 -0
  38. {reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/.gitkeep +0 -0
  39. {reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/LICENSE.md +0 -0
  40. {reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/README.md +0 -0
  41. {reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/timer_finished.flac +0 -0
  42. {reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/wake_word_triggered.flac +0 -0
  43. {reachy_mini_home_assistant/motion → reachy_mini_ha_voice}/speech_sway.py +27 -36
  44. {reachy_mini_home_assistant → reachy_mini_ha_voice}/static/index.html +0 -0
  45. {reachy_mini_home_assistant → reachy_mini_ha_voice}/static/main.js +0 -0
  46. {reachy_mini_home_assistant → reachy_mini_ha_voice}/static/style.css +0 -0
  47. reachy_mini_ha_voice/util.py +45 -0
  48. reachy_mini_ha_voice/voice_assistant.py +810 -0
  49. {reachy_mini_home_assistant → reachy_mini_ha_voice}/wakewords/.gitkeep +0 -0
  50. {reachy_mini_home_assistant → reachy_mini_ha_voice}/wakewords/README.md +0 -0
.claude/settings.local.json CHANGED
@@ -3,53 +3,24 @@
3
  "includeCoAuthoredBy": false,
4
  "permissions": {
5
  "allow": [
6
- "Bash",
7
- "BashOutput",
8
  "Edit",
9
- "Glob",
10
- "Grep",
11
- "KillShell",
12
- "NotebookEdit",
13
- "Read",
14
- "SlashCommand",
15
- "Task",
16
- "TodoWrite",
17
- "WebFetch",
18
- "WebSearch",
19
- "Write",
20
- "mcp__ide",
21
- "mcp__exa",
22
- "mcp__context7",
23
- "mcp__mcp-deepwiki",
24
- "mcp__Playwright",
25
- "mcp__spec-workflow",
26
- "mcp__open-websearch",
27
- "mcp__serena",
28
- "All",
29
- "Bash(copy:*)",
30
- "mcp__zread__search_doc",
31
- "mcp__zread__read_file",
32
  "Bash(cd:*)",
33
- "Bash(ls:*)",
34
- "Bash(find:*)",
35
- "mcp__acp__Bash",
36
- "Skill(commit-commands:commit)",
37
- "Skill(commit-commands:commit:*)"
38
  ],
39
  "deny": [],
40
  "ask": []
41
  },
42
- "model": "opus",
43
  "hooks": {},
 
 
44
  "statusLine": {
45
  "type": "command",
46
  "command": "%USERPROFILE%\\.claude\\ccline\\ccline.exe",
47
  "padding": 0
48
  },
49
- "enabledPlugins": {
50
- "glm-plan-usage@zai-coding-plugins": true,
51
- "glm-plan-bug@zai-coding-plugins": true
52
- },
53
- "outputStyle": "Explanatory",
54
- "alwaysThinkingEnabled": true
55
- }
 
3
  "includeCoAuthoredBy": false,
4
  "permissions": {
5
  "allow": [
6
+ "SlashCommand(/zcf:git-commit)",
 
7
  "Edit",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  "Bash(cd:*)",
9
+ "SlashCommand(/zcf:git-commit --emoji)",
10
+ "SlashCommand(/zcf:git-commit:*)",
11
+ "Bash(git:*)",
12
+ "Bash(ls:*)"
 
13
  ],
14
  "deny": [],
15
  "ask": []
16
  },
 
17
  "hooks": {},
18
+ "alwaysThinkingEnabled": true,
19
+ "outputStyle": "default",
20
  "statusLine": {
21
  "type": "command",
22
  "command": "%USERPROFILE%\\.claude\\ccline\\ccline.exe",
23
  "padding": 0
24
  },
25
+ "model": "opus"
26
+ }
 
 
 
 
 
.gitattributes CHANGED
@@ -1,5 +1,8 @@
1
- # LFS tracking for large binary files
 
 
 
 
 
2
  *.tflite filter=lfs diff=lfs merge=lfs -text
3
  *.onnx filter=lfs diff=lfs merge=lfs -text
4
- *.pt filter=lfs diff=lfs merge=lfs -text
5
- *.flac filter=lfs diff=lfs merge=lfs -text
 
1
+ reachy_mini_ha_voice/wakewords/**/*.tflite filter=lfs diff=lfs merge=lfs -text
2
+ reachy_mini_ha_voice/sounds/**/*.flac filter=lfs diff=lfs merge=lfs -text
3
+ "reachy_mini_ha_voice/wakewords/**/*.tflite filter=lfs diff=lfs merge=lfs -text
4
+ reachy_mini_ha_voice/sounds/**/*.flac" filter=lfs diff=lfs merge=lfs -text
5
+ "ha/assets/meshes/*.stl" filter=lfs diff=lfs merge=lfs -text
6
+ "ha/assets/*.urdf" filter=lfs diff=lfs merge=lfs -text
7
  *.tflite filter=lfs diff=lfs merge=lfs -text
8
  *.onnx filter=lfs diff=lfs merge=lfs -text
 
 
.github/dependabot.yml DELETED
@@ -1,13 +0,0 @@
1
- version: 2
2
- updates:
3
- # Enable version updates for pip
4
- - package-ecosystem: "pip"
5
- directory: "/"
6
- schedule:
7
- interval: "weekly"
8
- # Ignore PyTorch updates - locked version required for compatibility
9
- ignore:
10
- - dependency-name: "torch"
11
- versions: [">2.5.1"]
12
- - dependency-name: "torchvision"
13
- versions: [">0.20.1"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.github/workflows/sync_develop_to_hf_edge.yml DELETED
@@ -1,86 +0,0 @@
1
- name: Sync Develop to Hugging Face Edge
2
-
3
- on:
4
- push:
5
- branches: [develop]
6
- workflow_dispatch:
7
-
8
- jobs:
9
- sync-edge:
10
- runs-on: ubuntu-latest
11
- steps:
12
- - name: Checkout GitHub repo
13
- uses: actions/checkout@v4
14
- with:
15
- lfs: true
16
-
17
- - name: Transform project name for edge channel
18
- run: |
19
- python - <<'PY'
20
- from pathlib import Path
21
-
22
- # Keep runtime module path unchanged, only rewrite package/app naming metadata.
23
- pyproject = Path('pyproject.toml')
24
- text = pyproject.read_text(encoding='utf-8')
25
- text = text.replace(
26
- 'name = "reachy_mini_home_assistant"',
27
- 'name = "reachy_mini_home_assistant_edge"',
28
- 1,
29
- )
30
- text = text.replace(
31
- 'reachy_mini_home_assistant = "reachy_mini_home_assistant.main:ReachyMiniHaVoice"',
32
- 'reachy_mini_home_assistant_edge = "reachy_mini_home_assistant.main:ReachyMiniHaVoice"',
33
- 1,
34
- )
35
- pyproject.write_text(text, encoding='utf-8')
36
-
37
- init_file = Path('reachy_mini_home_assistant/__init__.py')
38
- init_text = init_file.read_text(encoding='utf-8')
39
- init_text = init_text.replace(
40
- 'version("reachy_mini_home_assistant")',
41
- 'version("reachy_mini_home_assistant_edge")',
42
- 1,
43
- )
44
- init_file.write_text(init_text, encoding='utf-8')
45
-
46
- readme = Path('README.md')
47
- if readme.exists():
48
- readme_text = readme.read_text(encoding='utf-8')
49
- readme_text = readme_text.replace(
50
- 'title: Reachy Mini for Home Assistant',
51
- 'title: Reachy Mini for Home Assistant (Edge)',
52
- 1,
53
- )
54
- readme_text = readme_text.replace(
55
- 'short_description: Deep integration of Reachy Mini robot with Home Assistant',
56
- 'short_description: Edge channel for Reachy Mini Home Assistant integration',
57
- 1,
58
- )
59
- readme_text = readme_text.replace(
60
- ' - reachy_mini_home_assistant',
61
- ' - reachy_mini_home_assistant_edge',
62
- 1,
63
- )
64
- readme.write_text(readme_text, encoding='utf-8')
65
- PY
66
-
67
- - name: Create fresh commit and push to Hugging Face edge space
68
- env:
69
- HF_TOKEN: ${{ secrets.HF_TOKEN }}
70
- run: |
71
- git config --global user.email "action@github.com"
72
- git config --global user.name "GitHub Action"
73
-
74
- # Create a new orphan branch with no history
75
- git checkout --orphan hf-edge-sync
76
- git add -A
77
- git commit -m "Fresh edge sync: $(date +%Y-%m-%d_%H:%M:%S)"
78
-
79
- # Add Hugging Face edge remote
80
- git remote add hf-edge https://djhui5710:$HF_TOKEN@huggingface.co/spaces/djhui5710/reachy_mini_home_assistant_edge
81
-
82
- # Push LFS objects first
83
- git lfs push hf-edge hf-edge-sync --all
84
-
85
- # Force push as main to HF edge space
86
- git push hf-edge hf-edge-sync:main --force
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.github/workflows/sync_to_hf.yml DELETED
@@ -1,36 +0,0 @@
1
- name: Sync to Hugging Face
2
-
3
- on:
4
- push:
5
- branches: [main]
6
- workflow_dispatch:
7
-
8
- jobs:
9
- sync:
10
- runs-on: ubuntu-latest
11
- steps:
12
- - name: Checkout GitHub repo
13
- uses: actions/checkout@v4
14
- with:
15
- lfs: true
16
-
17
- - name: Create fresh commit and push to Hugging Face
18
- env:
19
- HF_TOKEN: ${{ secrets.HF_TOKEN }}
20
- run: |
21
- git config --global user.email "action@github.com"
22
- git config --global user.name "GitHub Action"
23
-
24
- # Create a new orphan branch with no history
25
- git checkout --orphan hf-sync
26
- git add -A
27
- git commit -m "Fresh sync: $(date +%Y-%m-%d\ %H:%M:%S)"
28
-
29
- # Add Hugging Face remote
30
- git remote add hf https://djhui5710:$HF_TOKEN@huggingface.co/spaces/djhui5710/reachy_mini_home_assistant
31
-
32
- # Push LFS objects first
33
- git lfs push hf hf-sync --all
34
-
35
- # Force push as main to HF (overwrites all history)
36
- git push hf hf-sync:main --force
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.gitignore CHANGED
@@ -39,8 +39,6 @@ env/
39
  .spec-workflow/
40
  .playwright-mcp/
41
  *~
42
- CLAUDE.md
43
- commit_msg.txt
44
 
45
  # Configuration
46
  config.json
@@ -65,19 +63,14 @@ htmlcov/
65
  !reachy_mini_ha_voice/sounds/*.flac
66
 
67
  # Models (exclude package bundled files)
68
- # models/ - ignore external models directory
69
  models/
70
- # Package bundled models
71
- !reachy_mini_ha_voice/models/
72
- reachy_mini_ha_voice/models/*.tflite
73
- reachy_mini_ha_voice/models/*.onnx
74
- reachy_mini_ha_voice/models/*.pt
75
 
76
  # SDK Reference (local development only)
77
  reference/
78
- local/
79
  # ha/ - temporarily commented out for path fixes
80
  # ha/ will be moved to separate repository soon
81
-
82
- # Temporary check scripts
83
- temp_check_scripts/
 
39
  .spec-workflow/
40
  .playwright-mcp/
41
  *~
 
 
42
 
43
  # Configuration
44
  config.json
 
63
  !reachy_mini_ha_voice/sounds/*.flac
64
 
65
  # Models (exclude package bundled files)
 
66
  models/
67
+ # *.tflite - bundled in package
68
+ !reachy_mini_ha_voice/wakewords/*.tflite
69
+ !reachy_mini_ha_voice/wakewords/**/*.tflite
70
+ *.onnx
71
+ !reachy_mini_ha_voice/models/*.onnx
72
 
73
  # SDK Reference (local development only)
74
  reference/
 
75
  # ha/ - temporarily commented out for path fixes
76
  # ha/ will be moved to separate repository soon
 
 
 
.pre-commit-config.yaml DELETED
@@ -1,20 +0,0 @@
1
- # Pre-commit hooks for code quality
2
- # Install: pip install pre-commit && pre-commit install
3
- # Run manually: pre-commit run --all-files
4
-
5
- repos:
6
- - repo: https://github.com/astral-sh/ruff-pre-commit
7
- rev: v0.8.6
8
- hooks:
9
- - id: ruff
10
- args: [--fix]
11
- - id: ruff-format
12
-
13
- - repo: https://github.com/pre-commit/mirrors-mypy
14
- rev: v1.14.1
15
- hooks:
16
- - id: mypy
17
- additional_dependencies: []
18
- args: [--ignore-missing-imports]
19
- # Only check changed files for speed
20
- pass_filenames: true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CHANGELOG.md DELETED
@@ -1,713 +0,0 @@
1
- # Changelog
2
-
3
- All notable changes to the Reachy Mini HA Voice project will be documented in this file.
4
-
5
- The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
- and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
-
8
- ## [Unreleased]
9
-
10
- ### Fixed
11
- - **NameError** - Add missing deque import in gesture smoother
12
- - **Syntax Error** - Add missing class indentation for volume methods in audio_player.py
13
- - **Audio Card Name Detection** - Use SDK's detection logic instead of hardcoded values
14
- - **SDK Port 8000 Blocking** - Use amixer directly for volume control to avoid SDK HTTP API blocking
15
- - **Memory Leak Root Cause** - Audio buffer array creation in loop causing unbounded memory growth
16
- - **Indentation Error** - Fix indentation in audio_player.py stop_sendspin method
17
-
18
- ## [1.0.7] - 2026-05-05
19
-
20
- ### Changed
21
- - Align audio runtime with current SDK patterns by splitting local TTS playback from Sendspin-capable music playback and moving wakeword/stopword loading into shared helpers
22
- - Raise the Reachy Mini SDK baseline to `reachy-mini>=1.7.1`
23
-
24
- ### Fixed
25
- - Keep wakeup/TTS playback on the local player path while binding both local and Sendspin players to shared speech sway helpers
26
- - Synchronize `Idle Behavior` shutdown with ESPHome face/gesture switches and runtime state updates
27
- - Remove obsolete runtime monitor modules that are no longer needed with the current SDK behavior
28
-
29
- ### Optimized
30
- - Tighten Sendspin buffering with proactive backpressure and cleaner local queue handling
31
-
32
- ## [1.0.6] - 2026-05-01
33
-
34
- ### Changed
35
- - Align `pyproject.toml` with the current Reachy Mini SDK baseline by requiring `reachy-mini>=1.7.0`, `Python>=3.12`, `zeroconf>=0.131,<1`, `aiohttp`, `websockets>=12,<16`, and `gstreamer-bundle==1.28.1` on non-Linux platforms
36
- - Align Sendspin client dependency with the current upstream line via `aiosendspin>=5.1,<6.0`
37
-
38
- ### Fixed
39
- - Fetch camera snapshot frames on demand when the MJPEG cache is empty so Home Assistant camera proxy requests keep working with the Reachy Mini SDK 1.7.0 media pull model
40
-
41
- ### Optimized
42
- - Stop the camera server entirely when `Idle Behavior` is disabled instead of only unloading vision models, so idle-without-animation behaves more like a low-resource sleep state
43
-
44
- ## [1.0.5] - 2026-04-12
45
-
46
- ### Changed
47
- - Remove app-managed robot sleep/wake handling because current Reachy Mini SDK no longer supports mini apps remaining active while the robot enters sleep
48
- - Keep resource suspend/resume limited to ESPHome-driven runtime toggles such as Home Assistant disconnect, mute, camera disable, and service recovery
49
- - Align `pyproject.toml` runtime constraints with the current Reachy Mini reference SDK package (`reachy-mini>=1.6.3`, `websockets>=12,<16`, Python baseline `>=3.10`, and uv gstreamer metadata)
50
-
51
- ### Removed
52
- - Remove `SleepManager` integration and app-side sleep/wake callback flow from the voice assistant runtime
53
- - Remove Home Assistant sleep control entities and internal robot sleep state tracking from the mini app
54
-
55
- ## [1.0.4] - 2026-03-19
56
-
57
- ### Fixed
58
- - Align Reachy Mini integration with current SDK assumptions by removing legacy compatibility paths and private client health checks
59
- - Replace direct SDK private `_respeaker` access with `audio_control_utils`-based ReSpeaker initialization
60
- - Tighten camera and pose composition to require current SDK media/utils APIs and valid `look_at_image` inputs
61
-
62
- ### Improved
63
- - Unify idle behavior into a single persisted Home Assistant entity and remove old idle compatibility aliases
64
- - Replace separate wake/sleep buttons with a single sleep control entity
65
- - Update Sendspin integration for current `aiosendspin` lifecycle, stream handling, listener cleanup, and synchronized buffering
66
- - Standardize daemon URL usage on shared config across controller, sleep manager, and daemon monitor
67
-
68
- ## [1.0.3] - 2026-03-07
69
-
70
- ### Added
71
- - Idle Random Actions switch in Home Assistant with preferences persistence and startup restore
72
- - Configurable `idle_random_actions` presets in `conversation_animations.json` for centralized idle motion tuning
73
-
74
- ### Fixed
75
- - Remove duplicate `idle_random_actions` fields/methods and complete runtime control wiring in controller/entity registry/movement manager
76
-
77
- ### Optimized
78
- - Increase idle breathing and antenna sway cadence to 0.24Hz with wiggle antenna profile for more natural standby motion
79
- - Remove `set_target` global rate limiting and unchanged-pose skip gating to continuously stream motion commands each control tick
80
- - Remove idle antenna slew-rate limiter so antenna motion follows animation waveforms directly for reference-like smoothness
81
-
82
- ## [1.0.2] - 2026-03-06
83
-
84
- ### Fixed
85
- - Restore idle antenna sway animation and tune idle breathing parameters to reduce perceived stiffness
86
- - Reintroduce idle anti-chatter smoothing/deadband for antenna and body updates to reduce mechanical jitter/noise
87
- - Switch sleep/wake control to daemon API (`start` with `wake_up=true`, `stop` with `goto_sleep=true`) so `/api/daemon/status` reflects real sleep state on SDK 1.5
88
- - Normalize daemon status parsing for SDK 1.5 object-based status responses
89
- - Remove all app-side antenna power on/off operations to avoid SDK instability and external-control conflicts
90
- - Sync Idle Motion toggle with Idle Antenna Motion toggle for expected behavior in ESPHome
91
- - Remove legacy app-managed audio routing hooks and rely on native SDK/system audio selection
92
- - Harden startup against import-time failures (lazy emotion library loading and graceful Sendspin disable)
93
-
94
- ### Changed
95
- - Keep idle antenna behavior as animation-only control (no torque coupling)
96
- - Tighten preference loading to current schema (no legacy config fallback filtering)
97
-
98
- ### Added
99
- - Home Assistant blueprint for Reachy presence companion automation
100
- - GitHub workflow to auto-create releases when pyproject/changelog version updates produce a new tag
101
-
102
- ### Improved
103
- - Blueprint supports device-first auto-binding and richer usage instructions
104
- - Refresh landing page (`index.html`) with current version, GitHub source link, and new Blueprint/Auto Release capability cards
105
-
106
- ## [1.0.1] - 2026-03-05
107
-
108
- ### Changed
109
- - Update runtime dependency baseline to `reachy-mini>=1.5.0`
110
-
111
- ### Fixed
112
- - Remove legacy Zenoh 7447 startup precheck for SDK v1.5 compatibility
113
- - Remove legacy ZError string matching from connection error handling
114
- - Adapt daemon status handling to SDK v1.5 `DaemonStatus` object (prevents `AttributeError` on `status.get`)
115
- - Harden stop-word handling with runtime activation/deactivation and mute-aware trigger gating
116
- - Align wakeup stream start timing with reference behavior (start microphone stream after wakeup sound)
117
- - Improve TTS streaming robustness and reduce cutoffs with retry-based audio push
118
-
119
- ### Optimized
120
- - Support single-request streaming with in-memory fallback cache for one-time TTS URLs (no temp file dependency)
121
- - Lower streaming fetch chunk size and apply unthrottled preroll for faster first audio
122
-
123
- ## [1.0.0] - 2026-03-04
124
-
125
- ### Changed
126
- - Require `reachy-mini[gstreamer]>=1.4.1`
127
-
128
- ### Added
129
- - Sendspin switch in ESPHome (default OFF, persistent, runtime enable/disable)
130
- - Face Tracking and Gesture Detection switches in ESPHome (both default OFF, persistent)
131
- - Face Confidence number entity (0.0-1.0, persistent)
132
-
133
- ### Fixed
134
- - Improve gesture responsiveness and stability (faster smoothing, min processing cadence, no-gesture alignment)
135
- - Auto-match ONNX gesture input size from model shape to prevent `INVALID_ARGUMENT` dimension errors
136
- - Disable antenna torque in idle mode and re-enable outside idle to reduce chatter/noise
137
- - Enforce deterministic audio startup path and fail fast when microphone capture is not ready
138
- - Add on-demand `/snapshot` JPEG generation when no cached stream frame is available
139
-
140
- ### Optimized
141
- - Unload/reload face and gesture models when toggled off/on to save resources
142
- - Update idle behavior to breathing + look-around alternation, idle antenna sway disabled
143
- - Adjust idle breathing to human-like cadence
144
- - Make MJPEG streaming viewer-aware (skip continuous JPEG encode/push when no stream clients)
145
- - Keep face/gesture AI processing active even when stream viewers are absent
146
-
147
- ### Changed
148
- - Use camera backend default FPS/resolution for stream path instead of forcing fixed 1080p/25fps
149
-
150
- ## [0.9.9] - 2026-01-28
151
-
152
- ### Fixed
153
- - **SDK Buffer Overflow During Idle**
154
- - Add SDK buffer flush on GStreamer lock timeout
155
- - Prevents buffer overflow during long idle periods when lock contention prevents buffer drainage
156
- - Audio thread flushes SDK audio buffer when lock acquisition times out
157
- - Camera thread flushes SDK video buffer when lock acquisition times out
158
- - Audio playback flushes SDK playback buffer when lock acquisition times out
159
- - Resolves SDK crashes during extended wake-up idle periods without conversation
160
- - Requires Reachy Mini hardware (not applicable to simulation mode)
161
-
162
- ### Fixed
163
- - **Memory Leaks**
164
- - Audio buffer memory leak - added size limit to prevent unbounded growth
165
- - Temp file leak - downloaded audio files now cleaned up after playback
166
- - Multiple memory leak and resource leak issues fixed
167
- - Thread-safe draining flag using threading.Event
168
- - Silent failures now logged for debugging
169
-
170
- ### Optimized
171
- - **Gesture Recognition Sensitivity**
172
- - Simplify GestureSmoother to frequency-based confirmation (1 frame)
173
- - Remove all confidence filtering - return all detections to Home Assistant
174
- - Remove unused parameters (confidence_threshold, detection_threshold, GestureConfig)
175
- - Remove duplicate empty check in gesture detection
176
- - Add GestureSmoother class with history tracking for stable output
177
- - Reduce gesture detection interval from 3 frames to 1 frame for higher frequency
178
- - Fix: Gesture detection now returns all detected hands instead of only the highest confidence one
179
- - Matches reference implementation behavior for improved detection rate
180
- - No conflicts with face tracking (shared frame, independent processing)
181
-
182
- ### Code Quality
183
- - Fix Ruff linter issues (import ordering, missing newlines, __all__ sorting)
184
- - Format code with Ruff formatter (5 files reformatted)
185
- - Fix slice index error in gesture detection (convert coordinates to integers)
186
- - Fix Python 3.12 type annotation compatibility
187
-
188
- ## [0.9.8] - 2026-01-27
189
-
190
- ### New
191
- - Mute switch entity - suspends voice services only (not camera/motion)
192
- - Disable Camera switch entity - suspends camera and AI processing
193
- - Home Assistant connection-driven feature loading
194
- - Automatic suspend/resume on HA disconnect/reconnect
195
-
196
- ### Fixed
197
- - Camera disable logic - corrected inverted conditions for proper operation
198
- - Prevent daemon crash when entering idle state
199
- - Camera preview in Home Assistant
200
- - SDK crash during idle - optimized audio processing to skip get_frame() when not streaming to Home Assistant, reducing GStreamer resource competition
201
- - Add GStreamer threading lock to prevent pipeline competition between audio, playback, and camera threads
202
- - Audio thread gets priority during conversations - bypasses lock when conversation is active
203
- - Remove GStreamer lock to fix wake word detection in idle state (lock was preventing wake word detection)
204
-
205
- ### Optimized
206
- - Reduce log output by 30-40%
207
- - Bundle face tracking model with package - eliminated HuggingFace download dependency, removed huggingface_hub from requirements, models now load from local package directory for offline operation
208
- - Replace HTTP API polling with SDK Zenoh for daemon status monitoring to reduce uvicorn blocking and improve stability
209
- - Device ID now reads /etc/machine-id directly - removed uuid.getnode() and file persistence
210
- - Implement high-priority SDK improvements
211
- - Remove aiohttp dependency from daemon_monitor - fully migrated to SDK Zenoh
212
-
213
- ### Removed
214
- - Temporarily disable emotion playback during TTS
215
- - Unused config items (connection_timeout)
216
-
217
- ### Code Quality
218
- - Code quality improvements
219
-
220
- ## [0.9.7] - 2026-01-20
221
-
222
- ### Fixed
223
- - Device ID file path corrected after util.py moved to core/ subdirectory (prevents HA seeing device as new)
224
- - Animation file path corrected (was looking in wrong directory)
225
- - Remove hey_jarvis from required wake words (it's optional in openWakeWord/)
226
-
227
- ## [0.9.6] - 2026-01-20
228
-
229
- ### New
230
- - Add ruff linter/formatter and mypy type checker configuration
231
- - Add pre-commit hooks for automated code quality checks
232
-
233
- ### Fixed
234
- - Remove duplicate resume() method in audio_player.py
235
- - Remove duplicate connection_lost() method in satellite.py
236
- - Store asyncio task reference in sleep_manager.py to prevent garbage collection
237
-
238
- ### Optimized
239
- - Use dict.items() for efficient iteration in smoothing.py
240
-
241
- ## [0.9.5] - 2026-01-19
242
-
243
- ### Refactored
244
- - Modularize codebase - new core/motion/vision/audio/entities module structure
245
- - Remove legacy/compatibility code
246
- - Remove audio diagnostics debug code
247
-
248
- ### New
249
- - Direct callbacks for HA sleep/wake buttons to suspend/resume services
250
-
251
- ### Optimized
252
- - Audio processing latency - reduced chunk size from 1024 to 256 samples (64ms -> 16ms)
253
- - Audio loop delay reduced from 10ms to 1ms for faster VAD response
254
- - Stereo to mono conversion uses first channel instead of mean for cleaner signal
255
-
256
- ### Improved
257
- - Camera resume_from_suspend now synchronous for reliable wake from sleep
258
- - Rotation clamping in face tracking to prevent IK collisions
259
- - Audio gain boosted for faster VAD detection
260
- - Audio NaN/Inf values causing STT issues fixed
261
-
262
- ## [0.9.0] - 2026-01-18
263
-
264
- ### New
265
- - Robot state monitor for proper sleep mode handling - services pause when robot disconnects and resume on reconnect
266
- - System diagnostics entities (CPU, memory, disk, uptime) exposed as Home Assistant diagnostic sensors
267
- - Phase 24 with 9 diagnostic sensors (cpu_percent, cpu_temperature, memory_percent, memory_used_gb, disk_percent, disk_free_gb, uptime_hours, process_cpu_percent, process_memory_mb)
268
-
269
- ### Fixed
270
- - Voice assistant and movement manager now properly pause during robot sleep mode instead of generating error spam
271
-
272
- ### Improved
273
- - Graceful service lifecycle management with RobotStateMonitor callbacks
274
-
275
- ## [0.8.7] - 2026-01-18
276
-
277
- ### Fixed
278
- - Clamp body_yaw to safe range to prevent IK collision warnings during emotion playback
279
- - Emotion moves and face tracking now respect SDK safety limits
280
-
281
- ### Improved
282
- - Face tracking smoothness - removed EMA smoothing (matches reference project)
283
- - Face tracking timing updated to match reference (2s delay, 1s interpolation)
284
-
285
- ## [0.8.6] - 2026-01-18
286
-
287
- ### Fixed
288
- - Audio buffer memory leak - added size limit to prevent unbounded growth
289
- - Temp file leak - downloaded audio files now cleaned up after playback
290
- - Camera thread termination timeout increased for clean shutdown
291
- - Thread-safe draining flag using threading.Event
292
- - Silent failures now logged for debugging
293
-
294
- ## [0.8.5] - 2026-01-18
295
-
296
- ### Fixed
297
- - DOA turn-to-sound direction inverted - now turns correctly toward sound source
298
- - Graceful shutdown prevents daemon crash on app stop
299
-
300
- ## [0.8.4] - 2026-01-18
301
-
302
- ### Improved
303
- - Smooth idle animation with interpolation phase (matches reference BreathingMove)
304
- - Two-phase animation - interpolates to neutral before oscillation
305
- - Antenna frequency updated to 0.5Hz (was 0.15Hz) for more natural sway
306
-
307
- ## [0.8.3] - 2026-01-18
308
-
309
- ### Fixed
310
- - Body now properly follows head rotation during face tracking
311
- - body_yaw extracted from final head pose matrix and synced with head_yaw
312
- - Matches reference project sweep_look behavior for natural body movement
313
-
314
- ## [0.8.2] - 2026-01-18
315
-
316
- ### Fixed
317
- - Body follows head rotation during face tracking - body_yaw syncs with head_yaw
318
- - Matches reference project sweep_look behavior for natural body movement
319
-
320
- ## [0.8.1] - 2026-01-18
321
-
322
- ### Fixed
323
- - face_detected entity now pushes state updates to Home Assistant in real-time
324
- - Body yaw simplified to match reference project - SDK automatic_body_yaw handles collision prevention
325
- - Idle animation now starts immediately on app launch
326
- - Smooth antenna animation - removed pose change threshold for continuous motion
327
-
328
- ## [0.8.0] - 2026-01-17
329
-
330
- ### New
331
- - Comprehensive emotion keyword mapping with 280+ Chinese and English keywords
332
- - 35 emotion categories mapped to robot expressions
333
- - Auto-trigger expressions from conversation text patterns
334
-
335
- ## [0.7.3] - 2026-01-12
336
-
337
- ### Fixed
338
- - Revert to reference project pattern - use refractory period instead of state flags
339
- - Remove broken _in_pipeline and _tts_playing state management
340
- - Restore correct RUN_END event handling from linux-voice-assistant
341
-
342
- ## [0.7.2] - 2026-01-12
343
-
344
- ### Fixed
345
- - Remove premature _tts_played reset in RUN_END event
346
- - Ensure _in_pipeline stays True until TTS playback completes
347
-
348
- ## [0.7.1] - 2026-01-12
349
-
350
- ### Fixed
351
- - Prevent wake word detection during TTS playback
352
- - Add _tts_playing flag to track TTS audio state precisely
353
-
354
- ## [0.7.0] - 2026-01-12
355
-
356
- ### New
357
- - Gesture detection using HaGRID ONNX models (18 gesture classes)
358
- - gesture_detected and gesture_confidence entities in Home Assistant
359
-
360
- ### Fixed
361
- - Gesture state now properly pushed to Home Assistant in real-time
362
-
363
- ### Optimized
364
- - Aggressive power saving - 0.5fps idle mode after 30s without face
365
- - Gesture detection only runs when face detected (saves CPU)
366
-
367
- ## [0.6.1] - 2026-01-12
368
-
369
- ### Fixed
370
- - Prioritize MicroWakeWord over OpenWakeWord for same-name wake words
371
- - OpenWakeWord wake words now visible in Home Assistant selection
372
- - Stop word detection now works correctly
373
- - STT/LLM response time improved with fixed audio chunk size
374
-
375
- ## [0.6.0] - 2026-01-11
376
-
377
- ### New
378
- - Real-time audio-driven speech animation (SwayRollRT algorithm)
379
- - JSON-driven animation system - all animations configurable
380
-
381
- ### Refactored
382
- - Remove hardcoded actions, use animation offsets only
383
-
384
- ### Fixed
385
- - TTS audio analysis now works with local playback
386
-
387
- ## [0.5.16] - 2026-01-11
388
-
389
- ### Removed
390
- - Tap-to-wake feature (too many false triggers)
391
-
392
- ### New
393
- - Continuous Conversation switch in Home Assistant
394
-
395
- ### Refactored
396
- - Simplified satellite.py and voice_assistant.py
397
-
398
- ## [0.5.15] - 2026-01-11
399
-
400
- ### New
401
- - Audio settings persistence (AGC, Noise Suppression, Tap Sensitivity)
402
-
403
- ### Refactored
404
- - Move Sendspin mDNS discovery to zeroconf.py
405
-
406
- ### Fixed
407
- - Tap detection not re-enabled during emotion playback in conversation
408
-
409
- ## [0.5.14] - 2026-01-11
410
-
411
- ### Fixed
412
- - Skip ALL wake word processing when pipeline is active
413
- - Eliminate race condition in pipeline state during continuous conversation
414
-
415
- ### Improved
416
- - Control loop increased to 100Hz (daemon updated)
417
-
418
- ## [0.5.13] - 2026-01-10
419
-
420
- ### New
421
- - JSON-driven animation system for conversation states
422
- - AnimationPlayer class inspired by SimpleDances project
423
-
424
- ### Refactored
425
- - Replace SpeechSwayGenerator and BreathingAnimation with unified animation system
426
-
427
- ## [0.5.12] - 2026-01-10
428
-
429
- ### Removed
430
- - Deleted broken hey_reachy wake word model
431
-
432
- ### Revert
433
- - Default wake word back to "Okay Nabu"
434
-
435
- ## [0.5.11] - 2026-01-10
436
-
437
- ### Fixed
438
- - Reset feature extractors when switching wake words
439
- - Add refractory period after wake word switch
440
-
441
- ## [0.5.10] - 2026-01-10
442
-
443
- ### Fixed
444
- - Wake word models now have 'id' attribute set correctly
445
- - Wake word switching from Home Assistant now works
446
-
447
- ## [0.5.9] - 2026-01-10
448
-
449
- ### New
450
- - Default wake word changed to hey_reachy
451
-
452
- ### Fixed
453
- - Wake word switching bug
454
-
455
- ## [0.5.8] - 2026-01-09
456
-
457
- ### Fixed
458
- - Tap detection waits for emotion playback to complete
459
- - Poll daemon API for move completion
460
-
461
- ## [0.5.7] - 2026-01-09
462
-
463
- ### New
464
- - DOA turn-to-sound at wakeup
465
-
466
- ### Fixed
467
- - Show raw DOA angle in Home Assistant (0-180)
468
- - Invert DOA yaw direction
469
-
470
- ## [0.5.6] - 2026-01-08
471
-
472
- ### Fixed
473
- - Better pipeline state tracking to prevent duplicate audio
474
-
475
- ## [0.5.5] - 2026-01-08
476
-
477
- ### New
478
- - Prevent concurrent pipelines
479
- - Add prompt sound for continuous conversation
480
-
481
- ## [0.5.4] - 2026-01-08
482
-
483
- ### Fixed
484
- - Wait for RUN_END before starting new conversation
485
-
486
- ## [0.5.3] - 2026-01-08
487
-
488
- ### Fixed
489
- - Improve continuous conversation with conversation_id tracking
490
-
491
- ## [0.5.2] - 2026-01-08
492
-
493
- ### Fixed
494
- - Enable HA control of robot pose
495
- - Continuous conversation improvements
496
-
497
- ## [0.5.1] - 2026-01-08
498
-
499
- ### Fixed
500
- - Sendspin connects to music_player instead of tts_player
501
- - Persist tap_sensitivity settings
502
- - Pause Sendspin during voice assistant wakeup
503
- - Sendspin prioritize 16kHz sample rate
504
-
505
- ## [0.5.0] - 2026-01-07
506
-
507
- ### New
508
- - Face tracking with adaptive frequency
509
- - Sendspin multi-room audio integration
510
-
511
- ### Optimized
512
- - Shutdown mechanism improvements
513
-
514
- ## [0.4.0] - 2026-01-07
515
-
516
- ### Fixed
517
- - Daemon stability fixes
518
-
519
- ### New
520
- - Face tracking enabled by default
521
-
522
- ### Optimized
523
- - Microphone settings for better sensitivity
524
-
525
- ## [0.3.0] - 2026-01-06
526
-
527
- ### New
528
- - Tap sensitivity slider entity
529
-
530
- ### Fixed
531
- - Music Assistant compatibility
532
-
533
- ### Optimized
534
- - Face tracking and tap detection
535
-
536
- ## [0.2.21] - 2026-01-06
537
-
538
- ### Fixed
539
- - Daemon crash - reduce control loop to 2Hz
540
- - Pause control loop during audio playback
541
-
542
- ## [0.2.20] - 2026-01-06
543
-
544
- ### Revert
545
- - Audio/satellite/voice_assistant to v0.2.9 working state
546
-
547
- ## [0.2.19] - 2026-01-06
548
-
549
- ### Fixed
550
- - Force localhost connection mode to prevent WebRTC errors
551
-
552
- ## [0.2.18] - 2026-01-06
553
-
554
- ### Fixed
555
- - Audio playback - restore wakeup sound
556
- - Use push_audio_sample for TTS
557
-
558
- ## [0.2.17] - 2026-01-06
559
-
560
- ### Removed
561
- - head_joints/passive_joints entities
562
- - error_message to diagnostic category
563
-
564
- ## [0.2.16] - 2026-01-06
565
-
566
- ### Fixed
567
- - TTS playback - pause recording during playback
568
-
569
- ## [0.2.15] - 2026-01-06
570
-
571
- ### Fixed
572
- - Use play_sound() instead of push_audio_sample() for TTS
573
-
574
- ## [0.2.14] - 2026-01-06
575
-
576
- ### Fixed
577
- - Pause audio recording during TTS playback
578
-
579
- ## [0.2.13] - 2026-01-06
580
-
581
- ### Fixed
582
- - Don't manually start/stop media - let SDK/daemon manage it
583
-
584
- ## [0.2.12] - 2026-01-05
585
-
586
- ### Fixed
587
- - Disable breathing animation to prevent serial port overflow
588
-
589
- ## [0.2.11] - 2026-01-05
590
-
591
- ### Fixed
592
- - Disable wakeup sound to prevent daemon crash
593
- - Add debug logging for troubleshooting
594
-
595
- ## [0.2.10] - 2026-01-05
596
-
597
- ### Added
598
- - Debug logging for motion init
599
-
600
- ### Fixed
601
- - Audio fallback samplerate
602
-
603
- ## [0.2.9] - 2026-01-05
604
-
605
- ### Removed
606
- - DOA/speech detection - replaced by face tracking
607
-
608
- ## [0.2.8] - 2026-01-05
609
-
610
- ### New
611
- - Replace DOA with YOLO face tracking
612
-
613
- ## [0.2.7] - 2026-01-05
614
-
615
- ### Fixed
616
- - Add DOA caching to prevent ReSpeaker query overload
617
-
618
- ## [0.2.6] - 2026-01-05
619
-
620
- ### New
621
- - Thread-safe ReSpeaker USB access to prevent daemon deadlock
622
-
623
- ## [0.2.4] - 2026-01-05
624
-
625
- ### Fixed
626
- - Microphone volume control via daemon HTTP API
627
-
628
- ## [0.2.3] - 2026-01-05
629
-
630
- ### Fixed
631
- - Daemon crash caused by conflicting pose commands
632
- - Disable: Pose setter methods in ReachyController
633
-
634
- ## [0.2.2] - 2026-01-05
635
-
636
- ### Fixed
637
- - Second conversation motion failure
638
- - Reduce: Control loop from 20Hz to 10Hz
639
- - Improve: Connection recovery (faster reconnect)
640
-
641
- ## [0.2.1] - 2026-01-05
642
-
643
- ### Fixed
644
- - Daemon crash issue
645
- - Optimize: Code structure
646
-
647
- ## [0.2.0] - 2026-01-05
648
-
649
- ### New
650
- - Automatic facial expressions during conversation
651
- - New: Emotion playback integration
652
-
653
- ### Refactored
654
- - Integrate emotion playback into MovementManager
655
-
656
- ## [0.1.5] - 2026-01-04
657
-
658
- ### Optimized
659
- - Code splitting and organization
660
-
661
- ### Fixed
662
- - Program crash issues
663
-
664
- ## [0.1.0] - 2026-01-01
665
-
666
- ### New
667
- - Initial release
668
- - ESPHome protocol server implementation
669
- - mDNS auto-discovery for Home Assistant
670
- - Local wake word detection (microWakeWord)
671
- - Voice assistant pipeline integration
672
- - Basic motion feedback (nod, shake)
673
-
674
- ---
675
-
676
- ## Version History Summary
677
-
678
- | Version | Date | Major Changes |
679
- |---------|------|--------------|
680
- | 0.9.9 | 2026-01-28 | SDK buffer overflow fixes, memory leak fixes, gesture detection optimization |
681
- | 0.9.8 | 2026-01-27 | Mute/Disable entities, HA connection-driven features, log reduction |
682
- | 0.9.7 | 2026-01-20 | Device ID path fix, animation path fix |
683
- | 0.9.6 | 2026-01-20 | Code quality tools (ruff, mypy, pre-commit) |
684
- | 0.9.5 | 2026-01-19 | Modular architecture refactoring, audio latency optimization |
685
- | 0.9.0 | 2026-01-18 | Robot state monitor, system diagnostics entities |
686
- | 0.8.7 | 2026-01-18 | Body yaw clamping, face tracking smoothness |
687
- | 0.8.0 | 2026-01-17 | Emotion keyword mapping (280+ keywords, 35 categories) |
688
- | 0.7.0 | 2026-01-12 | Gesture detection with HaGRID ONNX models (18 gestures) |
689
- | 0.6.0 | 2026-01-11 | Real-time audio-driven speech animation, JSON animation system |
690
- | 0.5.0 | 2026-01-07 | Face tracking, Sendspin multi-room audio |
691
- | 0.4.0 | 2026-01-07 | Daemon stability, microphone optimization |
692
- | 0.3.0 | 2026-01-06 | Tap sensitivity slider |
693
- | 0.2.0 | 2026-01-05 | Emotion playback integration |
694
- | 0.1.0 | 2026-01-01 | Initial release |
695
-
696
- ## Project Statistics
697
-
698
- - **Total Versions**: 29 (from 0.1.0 to 0.9.9)
699
- - **Development Period**: ~30 days (2026-01-01 to 2026-01-28)
700
- - **Average Release Rate**: ~1 version per day
701
- - **Lines of Code**: ~18,000 lines across 52 Python files
702
- - **ESPHome Entities**: 54 entities implemented
703
- - **Supported Features**:
704
- - Voice assistant pipeline integration
705
- - Local wake word detection (multiple models)
706
- - Face tracking with YOLO
707
- - Gesture detection (18 classes)
708
- - Multi-room audio (Sendspin)
709
- - Real-time speech animation
710
- - Emotion keyword detection (280+ keywords)
711
- - System diagnostics
712
-
713
- For detailed implementation notes, see [PROJECT_PLAN.md](./PROJECT_PLAN.md).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
PROJECT_PLAN.md ADDED
@@ -0,0 +1,1279 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reachy Mini for Home Assistant - Project Plan
2
+
3
+ ## Project Overview
4
+
5
+ Integrate Home Assistant voice assistant functionality into Reachy Mini Wi-Fi robot, communicating with Home Assistant via ESPHome protocol.
6
+
7
+ ## Local Reference Directories (DO NOT modify any files in reference directories)
8
+ 1. [linux-voice-assistant](reference/linux-voice-assistant) - Linux-based Home Assistant voice assistant app for reference
9
+ 2. [Reachy Mini SDK](reference/reachy_mini) - Reachy Mini SDK local directory for reference
10
+ 3. [reachy_mini_conversation_app](reference/reachy_mini_conversation_app) - Reachy Mini conversation app for reference
11
+ 4. [reachy-mini-desktop-app](reference/reachy-mini-desktop-app) - Reachy Mini desktop app for reference
12
+ 5. [sendspin](reference/sendspin-cli/) - Sendspin client for reference
13
+
14
+ ## Core Design Principles
15
+
16
+ 1. **Zero Configuration** - Users only need to install the app, no manual configuration required
17
+ 2. **Native Hardware** - Use robot's built-in microphone and speaker
18
+ 3. **Home Assistant Centralized Management** - All configuration done on Home Assistant side
19
+ 4. **Motion Feedback** - Provide head movement and antenna animation feedback during voice interaction
20
+ 5. **Project Constraints** - Strictly follow [Reachy Mini SDK](reachy_mini) architecture design and constraints
21
+ 6. **Code Quality** - Follow Python development standards with consistent code style, clear structure, complete comments, comprehensive documentation, high test coverage, high code quality, readability, maintainability, extensibility, and reusability
22
+ 7. **Feature Priority** - Voice conversation with Home Assistant is highest priority; other features are auxiliary and must not affect voice conversation functionality or response speed
23
+ 8. **No LED Functions** - LEDs are hidden inside the robot; all LED control is ignored
24
+ 9. **Preserve Functionality** - Any code modifications should optimize while preserving completed features; do not remove features to solve problems. When issues occur, prioritize solving problems after referencing examples, not adding various log outputs
25
+
26
+ ## Technical Architecture
27
+
28
+ ```
29
+ ┌─────────────────────────────────────────────────────────────────────────────┐
30
+ │ Reachy Mini (ARM64) │
31
+ │ │
32
+ │ ┌─────────────────────────────── AUDIO INPUT ───────────────────────────┐ │
33
+ │ │ ReSpeaker XVF3800 (16kHz) │ │
34
+ │ │ ┌──────────────┐ ┌──────────────────────────────────────────────┐ │ │
35
+ │ │ │ 4-Mic Array │ → │ XVF3800 DSP │ │ │
36
+ │ │ └──────────────┘ │ • Echo Cancellation (AEC) │ │ │
37
+ │ │ │ • Noise Suppression (NS) │ │ │
38
+ │ │ │ • Auto Gain Control (AGC, max 30dB) │ │ │
39
+ │ │ │ • Direction of Arrival (DOA) │ │ │
40
+ │ │ │ • Voice Activity Detection (VAD) │ │ │
41
+ │ │ └──────────────────────────────────────────────┘ │ │
42
+ │ │ │ │ │
43
+ │ │ ▼ │ │
44
+ │ │ ┌──────────────────────────────────────────────┐ │ │
45
+ │ │ │ Wake Word Detection (microWakeWord) │ │ │
46
+ │ │ │ • "Okay Nabu" / "Hey Jarvis" │ │ │
47
+ │ │ │ • Stop word detection │ │ │
48
+ │ │ └──────────────────────────────────────────────┘ │ │
49
+ │ └───────────────────────────────────────────────────────────────────────┘ │
50
+ │ │
51
+ │ ┌───────────��─────────────────── AUDIO OUTPUT ──────────────────────────┐ │
52
+ │ │ ┌──────────────────────────┐ ┌──────────────────────────────────┐ │ │
53
+ │ │ │ TTS Player │ │ Music Player (Sendspin) │ │ │
54
+ │ │ │ • Voice assistant speech │ │ • Multi-room audio streaming │ │ │
55
+ │ │ │ • Sound effects │ │ • Auto-discovery via mDNS │ │ │
56
+ │ │ │ • Priority over music │ │ • Auto-pause during conversation │ │ │
57
+ │ │ └──────────────────────────┘ └──────────────────────────────────┘ │ │
58
+ │ │ │ │ │ │
59
+ │ │ └──────────────┬───────────────┘ │ │
60
+ │ │ ▼ │ │
61
+ │ │ ┌──────────────────────────────────────────────────┐ │ │
62
+ │ │ │ ReSpeaker Speaker (16kHz) │ │ │
63
+ │ │ └──────────────────────────────────────────────────┘ │ │
64
+ │ └───────────────────────────────────────────────────────────────────────┘ │
65
+ │ │
66
+ │ ┌─────────────────────────── VISION & TRACKING ─────────────────────────┐ │
67
+ │ │ ┌──────────────────────────┐ ┌──────────────────────────────────┐ │ │
68
+ │ │ │ Camera (VPU accelerated) │ → │ YOLO Face Detection │ │ │
69
+ │ │ │ • MJPEG stream server │ │ • AdamCodd/YOLOv11n-face │ │ │
70
+ │ │ │ • ESPHome Camera entity │ │ • Adaptive frame rate: │ │ │
71
+ │ │ └──────────────────────────┘ │ - 15fps: conversation/face │ │ │
72
+ │ │ │ - 2fps: idle (power saving) │ │ │
73
+ │ │ │ • look_at_image() pose calc │ │ │
74
+ │ │ │ • Smooth return after face lost │ │ │
75
+ │ │ └──────────────────────────────────┘ │ │
76
+ │ └───────────────────────────────────────────────────────────────────────┘ │
77
+ │ │
78
+ │ ┌─────────────────────────── MOTION CONTROL ────────────────────────────┐ │
79
+ │ │ MovementManager (100Hz Control Loop) │ │
80
+ │ │ ┌────────────────────────────────────────────────────────────────┐ │ │
81
+ │ │ │ Motion Layers (Priority: Move > Action > SpeechSway > Breath) │ │ │
82
+ │ │ │ ┌────────────┐ ┌────────────┐ ┌────────────┐ ┌──────────────┐ │ │ │
83
+ │ │ │ │ Move Queue │ │ Actions │ │ SpeechSway │ │ Breathing │ │ │ │
84
+ │ │ │ │ (Emotions) │ │ (Nod/Shake)│ │ (Voice VAD)│ │ (Idle anim) │ │ │ │
85
+ │ │ │ └────────────┘ └────────────┘ └────────────┘ └──────────────┘ │ │ │
86
+ │ │ └─────────────────────────────���──────────────────────────────────┘ │ │
87
+ │ │ │ │
88
+ │ │ ┌────────────────────────────────────────────────────────────────┐ │ │
89
+ │ │ │ Face Tracking Offsets (Secondary Pose Overlay) │ │ │
90
+ │ │ │ • Pitch offset: +9° (down compensation) │ │ │
91
+ │ │ │ • Yaw offset: -7° (right compensation) │ │ │
92
+ │ │ └────────────────────────────────────────────────────────────────┘ │ │
93
+ │ │ │ │
94
+ │ │ State Machine: on_wakeup → on_listening → on_speaking → on_idle │ │
95
+ │ └───────────────────────────────────────────────────────────────────────┘ │
96
+ │ │
97
+ │ ┌─────────────────────────── TAP DETECTION ─────────────────────────────┐ │
98
+ │ │ IMU Accelerometer (Wireless version only) - DISABLED │ │
99
+ │ │ • Tap-to-wake: REMOVED (too many false triggers) │ │
100
+ │ │ • Continuous conversation now controlled via Home Assistant switch │ │
101
+ │ └───────────────────────────────────────────────────────────────────────┘ │
102
+ │ │
103
+ │ ┌─────────────────────────── ESPHOME SERVER ────────────────────────────┐ │
104
+ │ │ Port 6053 (mDNS auto-discovery) │ │
105
+ │ │ • 43+ entities (sensors, controls, media player, camera) │ │
106
+ │ │ • Voice Assistant pipeline integration │ │
107
+ │ │ • Real-time state synchronization │ │
108
+ │ └───────────────────────────────────────────────────────────────────────┘ │
109
+ └─────────────────────────────────────────────────────────────────────────────┘
110
+
111
+ │ ESPHome Protocol (protobuf)
112
+
113
+ ┌─────────────────────────────────────────────────────────────────────────────┐
114
+ │ Home Assistant │
115
+ │ ┌──────────────────┐ ┌──────────────────┐ ┌────────────────────────────┐ │
116
+ │ │ STT Engine │ │ Intent Processing│ │ TTS Engine │ │
117
+ │ │ (User configured)│ │ (Conversation) │ │ (User configured) │ │
118
+ │ └──────────────────┘ └──────────────────┘ └────────────────────────────┘ │
119
+ └─────────────────────────────────────────────────────────────────────────────┘
120
+ ```
121
+
122
+ ## Completed Features
123
+
124
+ ### Core Features
125
+ - [x] ESPHome protocol server implementation
126
+ - [x] mDNS service discovery (auto-discovered by Home Assistant)
127
+ - [x] Local wake word detection (microWakeWord)
128
+ - [x] Continuous conversation mode (controlled via Home Assistant switch)
129
+ - [x] Audio stream transmission to Home Assistant
130
+ - [x] TTS audio playback
131
+ - [x] Stop word detection
132
+
133
+ ### Reachy Mini Integration
134
+ - [x] Use Reachy Mini SDK microphone input
135
+ - [x] Use Reachy Mini SDK speaker output
136
+ - [x] Head motion control (nod, shake, gaze)
137
+ - [x] Antenna animation control
138
+ - [x] Voice state feedback actions
139
+ - [x] YOLO face tracking (replaces DOA sound source localization)
140
+ - [x] 100Hz unified motion control loop
141
+
142
+ ### Application Architecture
143
+ - [x] Compliant with Reachy Mini App architecture
144
+
145
+
146
+
147
+ ## File List
148
+
149
+ ```
150
+ reachy_mini_ha_voice/
151
+ ├── reachy_mini_ha_voice/
152
+ │ ├── __init__.py # Package initialization
153
+ │ ├── __main__.py # Command line entry
154
+ │ ├── main.py # ReachyMiniApp entry
155
+ │ ├── voice_assistant.py # Voice assistant service
156
+ │ ├── satellite.py # ESPHome protocol handling
157
+ │ ├── audio_player.py # Audio player
158
+ │ ├── camera_server.py # MJPEG camera stream server + face tracking
159
+ │ ├── head_tracker.py # YOLO face detector
160
+ │ ├── motion.py # Motion control (high-level API)
161
+ │ ├── movement_manager.py # Unified movement manager (100Hz control loop)
162
+ │ ├── animation_player.py # JSON-driven animation system
163
+ │ ├── models.py # Data models
164
+ │ ├── entity.py # ESPHome base entity
165
+ │ ├── entity_extensions.py # Extended entity types
166
+ │ ├── entity_registry.py # Entity registry
167
+ │ ├── reachy_controller.py # Reachy Mini controller wrapper
168
+ │ ├── gesture_detector.py # Gesture detection
169
+ │ ├── api_server.py # API server
170
+ │ ├── zeroconf.py # mDNS discovery
171
+ │ └── util.py # Utility functions
172
+ ├── animations/ # Animation definitions
173
+ │ └── conversation_animations.json # Conversation state animations
174
+ ├── wakewords/ # Wake word models (auto-download)
175
+ │ ├── okay_nabu.json
176
+ │ ├── okay_nabu.tflite
177
+ │ ├── hey_jarvis.json
178
+ │ ├── hey_jarvis.tflite
179
+ │ ├── stop.json
180
+ │ └── stop.tflite
181
+ ├── sounds/ # Sound effect files (auto-download)
182
+ │ ├── wake_word_triggered.flac
183
+ │ └── timer_finished.flac
184
+ ├── pyproject.toml # Project configuration
185
+ ├── README.md # Documentation
186
+ └── PROJECT_PLAN.md # Project plan
187
+ ```
188
+
189
+ ## Dependencies
190
+
191
+ ```toml
192
+ dependencies = [
193
+ "reachy-mini", # Reachy Mini SDK
194
+ "sounddevice>=0.4.6", # Audio processing (backup)
195
+ "soundfile>=0.12.0", # Audio file reading
196
+ "numpy>=1.24.0", # Numerical computation
197
+ "pymicro-wakeword>=2.0.0,<3.0.0", # Wake word detection
198
+ "pyopen-wakeword>=1.0.0,<2.0.0", # Backup wake word
199
+ "aioesphomeapi>=42.0.0", # ESPHome protocol
200
+ "zeroconf>=0.100.0", # mDNS discovery
201
+ "scipy>=1.10.0", # Motion control
202
+ "pydantic>=2.0.0", # Data validation
203
+ ]
204
+ ```
205
+
206
+ ## Usage Flow
207
+
208
+ 1. **Install App**
209
+ - Install `reachy_mini_ha_voice` from Reachy Mini App Store
210
+
211
+ 2. **Start App**
212
+ - App auto-starts ESPHome server (port 6053)
213
+ - Auto-downloads required models and sounds
214
+
215
+ 3. **Connect Home Assistant**
216
+ - Home Assistant auto-discovers device (mDNS)
217
+ - Or manually add: Settings → Devices & Services → Add Integration → ESPHome
218
+
219
+ 4. **Use Voice Assistant**
220
+ - Say "Okay Nabu" to wake
221
+ - Speak command
222
+ - Reachy Mini provides motion feedback
223
+
224
+ ## ESPHome Entity Planning
225
+
226
+ Based on deep analysis of Reachy Mini SDK, the following entities are exposed to Home Assistant:
227
+
228
+ ### Implemented Entities
229
+
230
+ | Entity Type | Name | Description |
231
+ |-------------|------|-------------|
232
+ | Media Player | `media_player` | Audio playback control |
233
+ | Voice Assistant | `voice_assistant` | Voice assistant pipeline |
234
+
235
+ ### Implemented Control Entities (Read/Write)
236
+
237
+ #### Phase 1-3: Basic Controls and Pose
238
+
239
+ | ESPHome Entity Type | Name | SDK API | Range/Options | Description |
240
+ |---------------------|------|---------|---------------|-------------|
241
+ | `Number` | `speaker_volume` | `AudioPlayer.set_volume()` | 0-100 | Speaker volume |
242
+ | `Select` | `motor_mode` | `set_motor_control_mode()` | enabled/disabled/gravity_compensation | Motor mode selection |
243
+ | `Switch` | `motors_enabled` | `enable_motors()` / `disable_motors()` | on/off | Motor torque switch |
244
+ | `Button` | `wake_up` | `mini.wake_up()` | - | Wake robot action |
245
+ | `Button` | `go_to_sleep` | `mini.goto_sleep()` | - | Sleep robot action |
246
+ | `Number` | `head_x` | `goto_target(head=...)` | ±50mm | Head X position control |
247
+ | `Number` | `head_y` | `goto_target(head=...)` | ±50mm | Head Y position control |
248
+ | `Number` | `head_z` | `goto_target(head=...)` | ±50mm | Head Z position control |
249
+ | `Number` | `head_roll` | `goto_target(head=...)` | -40° ~ +40° | Head roll angle control |
250
+ | `Number` | `head_pitch` | `goto_target(head=...)` | -40° ~ +40° | Head pitch angle control |
251
+ | `Number` | `head_yaw` | `goto_target(head=...)` | -180° ~ +180° | Head yaw angle control |
252
+ | `Number` | `body_yaw` | `goto_target(body_yaw=...)` | -160° ~ +160° | Body yaw angle control |
253
+ | `Number` | `antenna_left` | `goto_target(antennas=...)` | -90° ~ +90° | Left antenna angle control |
254
+ | `Number` | `antenna_right` | `goto_target(antennas=...)` | -90° ~ +90° | Right antenna angle control |
255
+
256
+ #### Phase 4: Gaze Control
257
+
258
+ | ESPHome Entity Type | Name | SDK API | Range/Options | Description |
259
+ |---------------------|------|---------|---------------|-------------|
260
+ | `Number` | `look_at_x` | `look_at_world(x, y, z)` | World coordinates | Gaze point X coordinate |
261
+ | `Number` | `look_at_y` | `look_at_world(x, y, z)` | World coordinates | Gaze point Y coordinate |
262
+ | `Number` | `look_at_z` | `look_at_world(x, y, z)` | World coordinates | Gaze point Z coordinate |
263
+
264
+
265
+ ### Implemented Sensor Entities (Read-only)
266
+
267
+ #### Phase 1 & 5: Basic Status and Audio Sensors
268
+
269
+ | ESPHome Entity Type | Name | SDK API | Description |
270
+ |---------------------|------|---------|-------------|
271
+ | `Text Sensor` | `daemon_state` | `DaemonStatus.state` | Daemon status |
272
+ | `Binary Sensor` | `backend_ready` | `backend_status.ready` | Backend ready status |
273
+ | `Text Sensor` | `error_message` | `DaemonStatus.error` | Current error message |
274
+ | `Sensor` | `doa_angle` | `DoAInfo.angle` | Sound source direction angle (°) |
275
+ | `Binary Sensor` | `speech_detected` | `DoAInfo.speech_detected` | Speech detection status |
276
+
277
+ #### Phase 6: Diagnostic Information
278
+
279
+ | ESPHome Entity Type | Name | SDK API | Description |
280
+ |---------------------|------|---------|-------------|
281
+ | `Sensor` | `control_loop_frequency` | `control_loop_stats` | Control loop frequency (Hz) |
282
+ | `Text Sensor` | `sdk_version` | `DaemonStatus.version` | SDK version |
283
+ | `Text Sensor` | `robot_name` | `DaemonStatus.robot_name` | Robot name |
284
+ | `Binary Sensor` | `wireless_version` | `DaemonStatus.wireless_version` | Wireless version flag |
285
+ | `Binary Sensor` | `simulation_mode` | `DaemonStatus.simulation_enabled` | Simulation mode flag |
286
+ | `Text Sensor` | `wlan_ip` | `DaemonStatus.wlan_ip` | Wireless IP address |
287
+
288
+ #### Phase 7: IMU Sensors (Wireless version only)
289
+
290
+ | ESPHome Entity Type | Name | SDK API | Description |
291
+ |---------------------|------|---------|-------------|
292
+ | `Sensor` | `imu_accel_x` | `mini.imu["accelerometer"][0]` | X-axis acceleration (m/s²) |
293
+ | `Sensor` | `imu_accel_y` | `mini.imu["accelerometer"][1]` | Y-axis acceleration (m/s²) |
294
+ | `Sensor` | `imu_accel_z` | `mini.imu["accelerometer"][2]` | Z-axis acceleration (m/s²) |
295
+ | `Sensor` | `imu_gyro_x` | `mini.imu["gyroscope"][0]` | X-axis angular velocity (rad/s) |
296
+ | `Sensor` | `imu_gyro_y` | `mini.imu["gyroscope"][1]` | Y-axis angular velocity (rad/s) |
297
+ | `Sensor` | `imu_gyro_z` | `mini.imu["gyroscope"][2]` | Z-axis angular velocity (rad/s) |
298
+ | `Sensor` | `imu_temperature` | `mini.imu["temperature"]` | IMU temperature (°C) |
299
+
300
+ #### Phase 8-12: Extended Features
301
+
302
+ | ESPHome Entity Type | Name | Description |
303
+ |---------------------|------|-------------|
304
+ | `Select` | `emotion` | Emotion selector (Happy/Sad/Angry/Fear/Surprise/Disgust) |
305
+ | `Number` | `microphone_volume` | Microphone volume (0-100%) |
306
+ | `Camera` | `camera` | ESPHome Camera entity (live preview) |
307
+ | `Number` | `led_brightness` | LED brightness (0-100%) |
308
+ | `Select` | `led_effect` | LED effect (off/solid/breathing/rainbow/doa) |
309
+ | `Number` | `led_color_r` | LED red component (0-255) |
310
+ | `Number` | `led_color_g` | LED green component (0-255) |
311
+ | `Number` | `led_color_b` | LED blue component (0-255) |
312
+ | `Switch` | `agc_enabled` | Auto gain control switch |
313
+ | `Number` | `agc_max_gain` | AGC max gain (0-30 dB) |
314
+ | `Number` | `noise_suppression` | Noise suppression level (0-100%) |
315
+ | `Binary Sensor` | `echo_cancellation_converged` | Echo cancellation convergence status |
316
+
317
+ > **Note**: Head position (x/y/z) and angles (roll/pitch/yaw), body yaw, antenna angles are all **controllable** entities,
318
+ > using `Number` type for bidirectional control. Call `goto_target()` when setting new values, call `get_current_head_pose()` etc. when reading current values.
319
+
320
+ ### Implementation Priority
321
+
322
+ 1. **Phase 1 - Basic Status and Volume** (High Priority) ✅ **Completed**
323
+ - [x] `daemon_state` - Daemon status sensor
324
+ - [x] `backend_ready` - Backend ready status
325
+ - [x] `error_message` - Error message
326
+ - [x] `speaker_volume` - Speaker volume control
327
+
328
+ 2. **Phase 2 - Motor Control** (High Priority) ✅ **Completed**
329
+ - [x] `motors_enabled` - Motor switch
330
+ - [x] `motor_mode` - Motor mode selection (enabled/disabled/gravity_compensation)
331
+ - [x] `wake_up` / `go_to_sleep` - Wake/sleep buttons
332
+
333
+ 3. **Phase 3 - Pose Control** (Medium Priority) ✅ **Completed**
334
+ - [x] `head_x/y/z` - Head position control
335
+ - [x] `head_roll/pitch/yaw` - Head angle control
336
+ - [x] `body_yaw` - Body yaw angle control
337
+ - [x] `antenna_left/right` - Antenna angle control
338
+
339
+ 4. **Phase 4 - Gaze Control** (Medium Priority) ✅ **Completed**
340
+ - [x] `look_at_x/y/z` - Gaze point coordinate control
341
+
342
+ 5. **Phase 5 - DOA (Direction of Arrival)** ✅ **Re-added for wakeup turn-to-sound**
343
+ - [x] `doa_angle` - Sound source direction (degrees, 0-180°, where 0°=left, 90°=front, 180°=right)
344
+ - [x] `speech_detected` - Speech detection status
345
+ - [x] Turn-to-sound at wakeup (robot turns toward speaker when wake word detected)
346
+ - [x] Direction correction: `yaw = π/2 - doa` (fixed left/right inversion)
347
+ - Note: DOA only read once at wakeup to avoid daemon pressure; face tracking takes over after
348
+
349
+ 6. **Phase 6 - Diagnostic Information** (Low Priority) ✅ **Completed**
350
+ - [x] `control_loop_frequency` - Control loop frequency
351
+ - [x] `sdk_version` - SDK version
352
+ - [x] `robot_name` - Robot name
353
+ - [x] `wireless_version` - Wireless version flag
354
+ - [x] `simulation_mode` - Simulation mode flag
355
+ - [x] `wlan_ip` - Wireless IP address
356
+
357
+ 7. **Phase 7 - IMU Sensors** (Optional, wireless version only) ✅ **Completed**
358
+ - [x] `imu_accel_x/y/z` - Accelerometer
359
+ - [x] `imu_gyro_x/y/z` - Gyroscope
360
+ - [x] `imu_temperature` - IMU temperature
361
+
362
+ 8. **Phase 8 - Emotion Control** ✅ **Completed**
363
+ - [x] `emotion` - Emotion selector (Happy/Sad/Angry/Fear/Surprise/Disgust)
364
+
365
+ 9. **Phase 9 - Audio Control** ✅ **Completed**
366
+ - [x] `microphone_volume` - Microphone volume control (0-100%)
367
+
368
+ 10. **Phase 10 - Camera Integration** ✅ **Completed**
369
+ - [x] `camera` - ESPHome Camera entity (live preview)
370
+
371
+ 11. **Phase 11 - LED Control** ❌ **Disabled (LEDs hidden inside robot)**
372
+ - [ ] `led_brightness` - LED brightness (0-100%) - Commented out
373
+ - [ ] `led_effect` - LED effect (off/solid/breathing/rainbow/doa) - Commented out
374
+ - [ ] `led_color_r/g/b` - LED RGB color (0-255) - Commented out
375
+
376
+ 12. **Phase 12 - Audio Processing Parameters** ✅ **Completed**
377
+ - [x] `agc_enabled` - Auto gain control switch
378
+ - [x] `agc_max_gain` - AGC max gain (0-30 dB)
379
+ - [x] `noise_suppression` - Noise suppression level (0-100%)
380
+ - [x] `echo_cancellation_converged` - Echo cancellation convergence status (read-only)
381
+
382
+ 13. **Phase 13 - Sendspin Audio Playback Support** ✅ **Completed**
383
+ - [x] `sendspin_enabled` - Sendspin switch (Switch)
384
+ - [x] `sendspin_url` - Sendspin server URL (Text Sensor)
385
+ - [x] `sendspin_connected` - Sendspin connection status (Binary Sensor)
386
+ - [x] AudioPlayer integrates aiosendspin library
387
+ - [x] TTS audio sent to both local speaker and Sendspin server
388
+
389
+ 14. **Phase 22 - Gesture Detection** ✅ **Completed**
390
+ - [x] `gesture_detected` - Detected gesture name (Text Sensor)
391
+ - [x] `gesture_confidence` - Gesture detection confidence % (Sensor)
392
+ - [x] HaGRID ONNX models: hand_detector.onnx + crops_classifier.onnx
393
+ - [x] Real-time state push to Home Assistant
394
+ - [x] 18 supported gestures:
395
+ | Gesture | Emoji | Gesture | Emoji |
396
+ |---------|-------|---------|-------|
397
+ | call | 🤙 | like | 👍 |
398
+ | dislike | 👎 | mute | 🤫 |
399
+ | fist | ✊ | ok | 👌 |
400
+ | four | 🖐️ | one | ☝️ |
401
+ | palm | ✋ | peace | ✌️ |
402
+ | peace_inverted | 🔻✌️ | rock | 🤘 |
403
+ | stop | 🛑 | stop_inverted | 🔻🛑 |
404
+ | three | 3️⃣ | three2 | 🤟 |
405
+ | two_up | ✌️☝️ | two_up_inverted | 🔻✌️☝️ |
406
+
407
+ ---
408
+
409
+ ## 🎉 Phase 1-13 + Phase 22 Entities Completed!
410
+
411
+ **Total Completed: 45 entities**
412
+ - Phase 1: 4 entities (Basic status and volume)
413
+ - Phase 2: 4 entities (Motor control)
414
+ - Phase 3: 9 entities (Pose control)
415
+ - Phase 4: 3 entities (Gaze control)
416
+ - Phase 5: 2 entities (Audio sensors)
417
+ - Phase 6: 6 entities (Diagnostic information)
418
+ - Phase 7: 7 entities (IMU sensors)
419
+ - Phase 8: 1 entity (Emotion control)
420
+ - Phase 9: 1 entity (Microphone volume)
421
+ - Phase 10: 1 entity (Camera)
422
+ - Phase 11: 0 entities (LED control - Disabled)
423
+ - Phase 12: 4 entities (Audio processing parameters)
424
+ - Phase 13: 3 entities (Sendspin audio output)
425
+ - Phase 22: 2 entities (Gesture detection)
426
+
427
+
428
+ ---
429
+
430
+ ## 🚀 Voice Assistant Enhancement Features Implementation Status
431
+
432
+ ### Phase 14 - Emotion Action Feedback System (Partial) 🟡
433
+
434
+ **Implementation Status**: Basic infrastructure ready, supports manual trigger, uses voice-driven natural micro-movements during conversation
435
+
436
+ **Implemented Features**:
437
+ - ✅ Phase 8 Emotion Selector entity (`emotion`)
438
+ - ✅ Basic emotion action playback API (`_play_emotion`)
439
+ - ✅ Emotion mapping: Happy/Sad/Angry/Fear/Surprise/Disgust
440
+ - ✅ Integration with HuggingFace action library (`pollen-robotics/reachy-mini-emotions-library`)
441
+ - ✅ SpeechSway system for natural head micro-movements during conversation (non-blocking)
442
+ - ✅ Tap detection disabled during emotion playback (polls daemon API for completion)
443
+
444
+ **Design Decisions**:
445
+ - 🎯 No auto-play of full emotion actions during conversation to avoid blocking
446
+ - 🎯 Use voice-driven head sway (SpeechSway) for natural motion feedback
447
+ - 🎯 Emotion actions retained as manual trigger feature via ESPHome entity
448
+ - 🎯 Tap detection waits for actual move completion via `/api/move/running` polling
449
+
450
+ **Not Implemented**:
451
+ - ❌ Auto-trigger emotion actions based on voice assistant response (decided not to implement to avoid blocking)
452
+ - ❌ Intent recognition and emotion matching
453
+ - ❌ Dance action library integration
454
+ - ❌ Context awareness (e.g., weather query - sunny plays happy, rainy plays sad)
455
+
456
+ **Code Locations**:
457
+ - `entity_registry.py:633-658` - Emotion Selector entity
458
+ - `satellite.py:_play_emotion()` - Emotion playback with move UUID tracking
459
+ - `satellite.py:_wait_for_move_completion()` - Polls daemon API for move completion
460
+ - `motion.py:132-156` - Conversation start motion control (uses SpeechSway)
461
+ - `movement_manager.py:541-595` - Move queue management (allows SpeechSway overlay)
462
+
463
+ **Actual Behavior**:
464
+
465
+ | Voice Assistant Event | Actual Action | Implementation Status |
466
+ |----------------------|---------------|----------------------|
467
+ | Wake word detected | Turn toward sound source + nod confirmation | ✅ Implemented |
468
+ | Conversation start | Voice-driven head micro-movements (SpeechSway) | ✅ Implemented |
469
+ | During conversation | Continuous voice-driven micro-movements + breathing animation | ✅ Implemented |
470
+ | Conversation end | Return to neutral position + breathing animation | ✅ Implemented |
471
+ | Manual emotion trigger | Play via ESPHome `emotion` entity | ✅ Implemented |
472
+
473
+ **Technical Details**:
474
+ ```python
475
+ # motion.py - Use SpeechSway instead of full emotion actions during conversation
476
+ def on_speaking_start(self):
477
+ self._is_speaking = True
478
+ self._movement_manager.set_state(RobotState.SPEAKING)
479
+ # SpeechSway automatically generates natural head micro-movements based on audio loudness
480
+ # No full emotion actions played to avoid blocking conversation experience
481
+
482
+ # movement_manager.py - Motion layering system
483
+ # 1. Move queue (emotion actions) - Sets base pose
484
+ # 2. Action (nod/shake etc.) - Overlays on base pose
485
+ # 3. SpeechSway - Voice-driven micro-movements, can coexist with Move
486
+ # 4. Breathing - Idle breathing animation
487
+ ```
488
+
489
+ **Original Plan** (Decided not to implement to avoid blocking conversation):
490
+
491
+ | Voice Assistant Event | Original Planned Action | Reason Not Implemented |
492
+ |----------------------|------------------------|------------------------|
493
+ | Positive response received | Play "happy" action | Full action would block conversation fluency |
494
+ | Negative response received | Play "sad" action | Full action would block conversation fluency |
495
+ | Play music/entertainment | Play "dance" action | Full action would block conversation fluency |
496
+ | Timer completed | Play "alert" action | Full action would block conversation fluency |
497
+ | Error/cannot understand | Play "confused" action | Full action would block conversation fluency |
498
+
499
+ **Manual Emotion Trigger Example**:
500
+ ```yaml
501
+ # Home Assistant automation example - Manual emotion trigger
502
+ automation:
503
+ - alias: "Reachy Good Morning Greeting"
504
+ trigger:
505
+ - platform: time
506
+ at: "07:00:00"
507
+ action:
508
+ - service: select.select_option
509
+ target:
510
+ entity_id: select.reachy_mini_emotion
511
+ data:
512
+ option: "Happy"
513
+ ```
514
+
515
+ ### Phase 15 - Face Tracking (Complements DOA Turn-to-Sound) ✅ **Completed**
516
+
517
+ **Goal**: Implement natural face tracking so robot looks at speaker during conversation.
518
+
519
+ **Design Decision**:
520
+ - ✅ DOA (Direction of Arrival): Used once at wakeup to turn toward sound source
521
+ - ✅ YOLO face detection: Takes over after initial turn for continuous tracking
522
+ - Reason: DOA provides quick initial orientation, face tracking provides accurate continuous tracking
523
+
524
+ **Wakeup Turn-to-Sound Flow**:
525
+ 1. Wake word detected → Read DOA angle once (avoid daemon pressure)
526
+ 2. If DOA angle > 10°: Turn head toward sound source (80% of angle, conservative)
527
+ 3. Face tracking takes over for continuous tracking during conversation
528
+
529
+ **Implemented Features**:
530
+
531
+ | Feature | Description | Implementation Location | Status |
532
+ |---------|-------------|------------------------|--------|
533
+ | DOA turn-to-sound | Turn toward speaker at wakeup | `satellite.py:_turn_to_sound_source()` | ✅ Implemented |
534
+ | YOLO face detection | Uses `AdamCodd/YOLOv11n-face-detection` model | `head_tracker.py` | ✅ Implemented |
535
+ | Adaptive frame rate tracking | 15fps during conversation, 2fps when idle without face | `camera_server.py` | ✅ Implemented |
536
+ | look_at_image() | Calculate target pose from face position | `camera_server.py` | ✅ Implemented |
537
+ | Smooth return to neutral | Smooth return within 1 second after face lost | `camera_server.py` | ✅ Implemented |
538
+ | face_tracking_offsets | As secondary pose overlay to motion control | `movement_manager.py` | ✅ Implemented |
539
+ | DOA entities | `doa_angle` and `speech_detected` exposed to Home Assistant | `entity_registry.py` | ✅ Implemented |
540
+ | Model download retry | 3 retries, 5 second interval | `head_tracker.py` | ✅ Implemented |
541
+ | Conversation mode integration | Auto-switch tracking frequency on voice assistant state change | `satellite.py` | ✅ Implemented |
542
+
543
+ **Resource Optimization (v0.5.1, updated v0.6.2)**:
544
+ - During conversation (listening/thinking/speaking): High-frequency tracking 15fps
545
+ - Idle with face detected: High-frequency tracking 15fps
546
+ - Idle without face for 5s: Low-power mode 2fps
547
+ - Idle without face for 30s: Ultra-low power mode 0.5fps (every 2 seconds)
548
+ - Gesture detection only runs when face detected recently (within 5s)
549
+ - Immediately restore high-frequency tracking when face detected
550
+
551
+ **Code Locations**:
552
+ - `satellite.py:_turn_to_sound_source()` - DOA turn-to-sound at wakeup
553
+ - `head_tracker.py` - YOLO face detector (`HeadTracker` class)
554
+ - `camera_server.py:_capture_frames()` - Adaptive frame rate face tracking
555
+ - `camera_server.py:set_conversation_mode()` - Conversation mode switch API
556
+ - `satellite.py:_set_conversation_mode()` - Voice assistant state integration
557
+ - `movement_manager.py:set_face_tracking_offsets()` - Face tracking offset API
558
+
559
+ **Technical Details**:
560
+ ```python
561
+ # camera_server.py - Adaptive frame rate face tracking
562
+ class MJPEGCameraServer:
563
+ def __init__(self):
564
+ self._fps_high = 15 # During conversation/face detected
565
+ self._fps_low = 2 # Idle without face (5-30s)
566
+ self._fps_idle = 0.5 # Ultra-low power (>30s without face)
567
+ self._low_power_threshold = 5.0 # 5s without face switches to low power
568
+ self._idle_threshold = 30.0 # 30s without face switches to idle mode
569
+
570
+ def _should_run_ai_inference(self, current_time):
571
+ # Conversation mode: Always high-frequency tracking
572
+ if self._in_conversation:
573
+ return True
574
+ # High-frequency mode: Track every frame
575
+ if self._current_fps == self._fps_high:
576
+ return True
577
+ # Low/idle power mode: Periodic detection
578
+ return time.since_last_check >= 1/self._current_fps
579
+
580
+ # satellite.py - Voice assistant state integration
581
+ def _reachy_on_listening(self):
582
+ self._set_conversation_mode(True) # Start conversation, high-frequency tracking
583
+
584
+ def _reachy_on_idle(self):
585
+ self._set_conversation_mode(False) # End conversation, adaptive tracking
586
+ ```
587
+
588
+
589
+ ### Phase 16 - Cartoon Style Motion Mode (Partial) 🟡
590
+
591
+ **Goal**: Use SDK interpolation techniques for more expressive robot movements.
592
+
593
+ **SDK Support**: `InterpolationTechnique` enum
594
+ - `LINEAR` - Linear, mechanical feel
595
+ - `MIN_JERK` - Minimum jerk, natural and smooth (default)
596
+ - `EASE_IN_OUT` - Ease in-out, elegant
597
+ - `CARTOON` - Cartoon style, with bounce effect, lively and cute
598
+
599
+ **Implemented Features**:
600
+ - ✅ 100Hz unified control loop (`movement_manager.py`) - Restored to 100Hz after daemon update
601
+ - ✅ JSON-driven animation system (`AnimationPlayer`) - Inspired by SimpleDances project
602
+ - ✅ Conversation state animations (idle/listening/thinking/speaking)
603
+ - ✅ Pose change detection - Only send commands on significant changes (threshold 0.005)
604
+ - ✅ State query caching - 2s TTL, reduces daemon load
605
+ - ✅ Smooth interpolation (ease in-out curve)
606
+ - ✅ Command queue mode - Thread-safe external API
607
+ - ✅ Error throttling - Prevents log explosion
608
+ - ✅ Connection health monitoring - Auto-detect and recover from connection loss
609
+
610
+ **Animation System (v0.5.13)**:
611
+ - `AnimationPlayer` class loads animations from `conversation_animations.json`
612
+ - Each animation defines: pitch/yaw/roll amplitudes, position offsets, antenna movements, frequency
613
+ - Smooth transitions between animations (configurable duration)
614
+ - State-to-animation mapping: idle→idle, listening→listening, thinking→thinking, speaking→speaking
615
+
616
+ **Not Implemented**:
617
+ - ❌ Dynamic interpolation technique switching (CARTOON/EASE_IN_OUT etc.)
618
+ - ❌ Exaggerated cartoon bounce effects
619
+
620
+ **Code Locations**:
621
+ - `animation_player.py` - AnimationPlayer class
622
+ - `animations/conversation_animations.json` - Animation definitions
623
+ - `movement_manager.py` - 100Hz control loop with animation integration
624
+
625
+ **Scene Implementation Status**:
626
+
627
+ | Scene | Recommended Interpolation | Effect | Status |
628
+ |-------|--------------------------|--------|--------|
629
+ | Wake nod | `CARTOON` | Lively bounce effect | ❌ Not implemented |
630
+ | Thinking head up | `EASE_IN_OUT` | Elegant transition | ✅ Implemented (smooth interpolation) |
631
+ | Speaking micro-movements | `MIN_JERK` | Natural and fluid | ✅ Implemented (SpeechSway) |
632
+ | Error head shake | `CARTOON` | Exaggerated denial | ❌ Not implemented |
633
+ | Return to neutral | `MIN_JERK` | Smooth return | ✅ Implemented |
634
+ | Idle breathing | - | Subtle sense of life | ✅ Implemented (BreathingAnimation) |
635
+
636
+ ### Phase 17 - Antenna Sync Animation During Speech (Completed) ✅
637
+
638
+ **Goal**: Antennas sway with audio rhythm during TTS playback, simulating "speaking" effect.
639
+
640
+ **Implemented Features**:
641
+ - ✅ JSON-driven animation system with antenna movements
642
+ - ✅ Different antenna patterns: "both" (sync), "wiggle" (opposite phase)
643
+ - ✅ State-specific antenna animations (listening/thinking/speaking)
644
+ - ✅ Smooth transitions between animation states
645
+
646
+ **Code Locations**:
647
+ - `animation_player.py` - AnimationPlayer with antenna offset calculation
648
+ - `animations/conversation_animations.json` - Antenna amplitude and pattern definitions
649
+ - `movement_manager.py` - Antenna offset composition in final pose
650
+
651
+ ### Phase 18 - Visual Gaze Interaction (Not Implemented) ❌
652
+
653
+ **Goal**: Use camera to detect faces for eye contact.
654
+
655
+ **SDK Support**:
656
+ - `look_at_image(u, v)` - Look at point in image
657
+ - `look_at_world(x, y, z)` - Look at world coordinate point
658
+ - `media.get_frame()` - Get camera frame (✅ Already implemented in `camera_server.py:146`)
659
+
660
+ **Not Implemented Features**:
661
+
662
+ | Feature | Description | Status |
663
+ |---------|-------------|--------|
664
+ | Face detection | Use OpenCV/MediaPipe to detect faces | ❌ Not implemented |
665
+ | Eye tracking | Look at speaker's face during conversation | ❌ Not implemented |
666
+ | Multi-person switching | When multiple people detected, look at current speaker | ❌ Not implemented |
667
+ | Idle scanning | Randomly look around when idle | ❌ Not implemented |
668
+
669
+ ### Phase 19 - Gravity Compensation Interactive Mode (Partial) 🟡
670
+
671
+ **Goal**: Allow users to physically touch and guide robot head for "teaching" style interaction.
672
+
673
+ **SDK Support**: `enable_gravity_compensation()` - Motors enter gravity compensation mode, can be manually moved
674
+
675
+ **Implemented Features**:
676
+ - ✅ Gravity compensation mode switch (`motor_mode` Select entity, option "gravity_compensation")
677
+ - ✅ `reachy_controller.py:236-237` - Gravity compensation API call
678
+
679
+ **Not Implemented**:
680
+ - ❌ Teaching mode - Record motion trajectory
681
+ - ❌ Save/playback custom actions
682
+ - ❌ Voice command triggered teaching flow
683
+
684
+ **Application Scenarios**:
685
+ - ❌ User says "Let me teach you a move" → Enter gravity compensation mode
686
+ - ❌ User manually moves head → Record motion trajectory
687
+ - ❌ User says "Remember this" → Save action
688
+ - ❌ User says "Do that action again" → Playback recorded action
689
+
690
+ ### Phase 20 - Environment Awareness Response (Partial) 🟡
691
+
692
+ **Goal**: Use IMU sensors to sense environment changes and respond.
693
+
694
+ **SDK Support**:
695
+ - ✅ `mini.imu["accelerometer"]` - Accelerometer (Phase 7 implemented as entity)
696
+ - ✅ `mini.imu["gyroscope"]` - Gyroscope (Phase 7 implemented as entity)
697
+
698
+ **Implemented Features**:
699
+
700
+ | Detection Event | Response Action | Status |
701
+ |-----------------|-----------------|--------|
702
+ | Continuous conversation | Controlled via Home Assistant switch | ✅ Implemented |
703
+
704
+ **Tap-to-wake REMOVED** (v0.5.16):
705
+ - Too many false triggers from robot movement and vibrations
706
+ - Continuous conversation mode now controlled via "Continuous Conversation" switch in Home Assistant
707
+ - Users can enable/disable continuous conversation from HA dashboard
708
+
709
+ **Technical Implementation**:
710
+ - `models.py` - `Preferences.continuous_conversation` field
711
+ - `entity_registry.py` - `continuous_conversation` Switch entity (Phase 21)
712
+ - `satellite.py` - `_handle_run_end()` checks `preferences.continuous_conversation`
713
+
714
+ **Not Implemented**:
715
+
716
+ | Detection Event | Response Action | Status |
717
+ |-----------------|-----------------|--------|
718
+ | Being shaken | Play dizzy action + voice "Don't shake me~" | ❌ Not implemented |
719
+ | Tilted/fallen | Play help action + voice "I fell, help me" | ❌ Not implemented |
720
+ | Long idle | Enter sleep animation | ❌ Not implemented |
721
+
722
+ ### Phase 21 - Home Assistant Scene Integration (Not Implemented) ❌
723
+
724
+ **Goal**: Trigger robot actions based on Home Assistant scenes/automations.
725
+
726
+ **Implementation**: Via ESPHome service calls
727
+
728
+ **Not Implemented Scenes**:
729
+
730
+ | HA Scene | Robot Response | Status |
731
+ |----------|----------------|--------|
732
+ | Good morning scene | Play wake action + "Good morning!" | ❌ Not implemented |
733
+ | Good night scene | Play sleep action + "Good night~" | ❌ Not implemented |
734
+ | Someone home | Turn toward door + wave + "Welcome home!" | ❌ Not implemented |
735
+ | Doorbell rings | Turn toward door + alert action | ❌ Not implemented |
736
+ | Play music | Sway with music rhythm | ❌ Not implemented |
737
+
738
+
739
+ ---
740
+
741
+ ## 📊 Feature Implementation Summary
742
+
743
+ ### ✅ Completed Features
744
+
745
+ #### Core Voice Assistant (Phase 1-12)
746
+ - **45+ ESPHome entities** - All implemented
747
+ - **Basic voice interaction** - Wake word detection, STT/TTS integration
748
+ - **Motion feedback** - Nod, shake, gaze and other basic actions
749
+ - **Audio processing** - AGC, noise suppression, echo cancellation
750
+ - **Camera stream** - MJPEG live preview
751
+
752
+ #### Partially Implemented Features (Phase 14-21)
753
+ - **Phase 14** - Emotion action API infrastructure (manual trigger available)
754
+ - **Phase 19** - Gravity compensation mode switch (teaching flow not implemented)
755
+
756
+ ### ❌ Not Implemented Features
757
+
758
+ #### High Priority
759
+ - ~~**Phase 13** - Sendspin audio playback support~~ ✅ **Completed**
760
+ - **Phase 14** - Auto emotion action feedback (needs voice assistant event association)
761
+ - **Phase 15** - Continuous sound source tracking (only turn toward at wakeup)
762
+
763
+ #### Medium Priority
764
+ - **Phase 16** - Cartoon style motion mode (needs dynamic interpolation switching)
765
+ - **Phase 17** - Antenna sync animation
766
+ - **Phase 18** - Face tracking and eye contact interaction
767
+
768
+ #### Low Priority
769
+ - **Phase 19** - Teaching mode record/playback functionality
770
+ - **Phase 20** - IMU environment awareness response
771
+ - **Phase 21** - Home Assistant scene integration
772
+
773
+ ---
774
+
775
+ ## Feature Priority Summary (Updated)
776
+
777
+ ### High Priority (Completed ✅)
778
+ - ✅ **Phase 1-12**: Basic ESPHome entities (45+)
779
+ - ✅ Core voice assistant functionality
780
+ - ✅ Basic motion feedback (nod, shake, gaze)
781
+
782
+ ### High Priority (Partial 🟡)
783
+ - 🟡 **Phase 13**: Emotion action feedback system
784
+ - ✅ Emotion Selector entity and API infrastructure
785
+ - ❌ Auto-trigger emotion actions based on voice assistant response
786
+ - ❌ Intent recognition and emotion matching
787
+ - ❌ Dance action library integration
788
+
789
+ ### High Priority (Not Implemented ❌)
790
+ - ❌ **Phase 14**: Smart sound source tracking enhancement
791
+ - ✅ Turn toward sound source at wakeup
792
+ - ❌ Continuous sound source tracking
793
+ - ❌ Multi-person conversation switching
794
+ - ❌ Sound source visualization
795
+
796
+ ### Medium Priority (Completed ✅)
797
+ - ✅ **Phase 15**: Cartoon style motion mode
798
+ - ✅ 100Hz unified control loop architecture (restored after daemon update)
799
+ - ✅ JSON-driven animation system (AnimationPlayer)
800
+ - ✅ Conversation state animations (idle/listening/thinking/speaking)
801
+ - ✅ Pose change detection + state query caching (reduces daemon load)
802
+ - ❌ Dynamic interpolation technique switching (CARTOON etc.)
803
+ - ✅ **Phase 16**: Antenna sync during speech
804
+ - ✅ JSON-driven antenna animations with different patterns (both/wiggle)
805
+ - ✅ State-specific antenna movements
806
+
807
+ ### Medium Priority (Not Implemented ❌)
808
+ - ❌ **Phase 17**: Visual gaze interaction - Eye contact
809
+
810
+ ### Low Priority (Partial 🟡)
811
+ - 🟡 **Phase 18**: Gravity compensation interactive mode
812
+ - ✅ Gravity compensation mode switch
813
+ - ❌ Teaching style interaction (record/playback functionality)
814
+
815
+ ### Low Priority (Not Implemented ❌)
816
+ - ❌ **Phase 19**: Environment awareness response - IMU triggered actions
817
+ - ❌ **Phase 20**: Home Assistant scene integration - Smart home integration
818
+
819
+ ---
820
+
821
+ ## 📈 Completion Statistics
822
+
823
+ | Phase | Status | Completion | Notes |
824
+ |-------|--------|------------|-------|
825
+ | Phase 1-12 | ✅ Complete | 100% | 40 ESPHome entities implemented (Phase 11 LED disabled) |
826
+ | Phase 13 | 🟡 Partial | 30% | API infrastructure ready, missing auto-trigger |
827
+ | Phase 14 | ❌ Not done | 20% | Only turn toward at wakeup implemented |
828
+ | Phase 15 | 🟡 Partial | 80% | 100Hz control loop + JSON animation system + pose change detection + state cache implemented |
829
+ | Phase 16 | ✅ Complete | 100% | JSON-driven animation with antenna movements |
830
+ | Phase 17 | ❌ Not done | 10% | Camera implemented, missing face detection |
831
+ | Phase 18 | 🟡 Partial | 40% | Mode switch implemented, missing teaching flow |
832
+ | Phase 19 | ❌ Not done | 10% | IMU data exposed, missing trigger logic |
833
+ | Phase 20 | ❌ Not done | 0% | Not implemented |
834
+
835
+ **Overall Completion**: **Phase 1-12: 100%** | **Phase 13-20: ~35%**
836
+
837
+
838
+ ---
839
+
840
+ ## 🔧 Daemon Crash Fix (2025-01-05)
841
+
842
+ ### Problem Description
843
+ During long-term operation, `reachy_mini daemon` would crash, causing robot to become unresponsive.
844
+
845
+ ### Root Cause
846
+ 1. **100Hz control loop too frequent** - Calling `robot.set_target()` every 10ms, even when pose hasn't changed
847
+ 2. **Frequent state queries** - Every entity state read calls `get_status()`, `get_current_head_pose()` etc.
848
+ 3. **Missing change detection** - Even when pose hasn't changed, continues sending same commands
849
+ 4. **Zenoh message queue blocking** - Accumulated 150+ messages per second, daemon cannot process in time
850
+
851
+ ### Fix Solution
852
+
853
+ #### 1. Reduce control loop frequency (movement_manager.py)
854
+ ```python
855
+ # Reduced from 100Hz to 20Hz
856
+ CONTROL_LOOP_FREQUENCY_HZ = 20 # 80% reduction in messages
857
+ ```
858
+
859
+ #### 2. Add pose change detection (movement_manager.py)
860
+ ```python
861
+ # Only send commands on significant pose changes
862
+ if self._last_sent_pose is not None:
863
+ max_diff = max(abs(pose[k] - self._last_sent_pose.get(k, 0.0)) for k in pose.keys())
864
+ if max_diff < 0.001: # Threshold: 0.001 rad or 0.001 m
865
+ return # Skip sending
866
+ ```
867
+
868
+ #### 3. State query caching (reachy_controller.py)
869
+ ```python
870
+ # Cache daemon status query results
871
+ self._cache_ttl = 0.1 # 100ms TTL
872
+ self._last_status_query = 0.0
873
+
874
+ def _get_cached_status(self):
875
+ now = time.time()
876
+ if now - self._last_status_query < self._cache_ttl:
877
+ return self._state_cache.get('status') # Use cache
878
+ # ... query and update cache
879
+ ```
880
+
881
+ #### 4. Head pose query caching (reachy_controller.py)
882
+ ```python
883
+ # Cache get_current_head_pose() and get_current_joint_positions() results
884
+ def _get_cached_head_pose(self):
885
+ # Reuse cached results within 100ms
886
+ ```
887
+
888
+ ### Fix Results
889
+
890
+ | Metric | Before Fix | After Fix | Improvement |
891
+ |--------|------------|-----------|-------------|
892
+ | Control message frequency | ~100 msg/s | ~20 msg/s | ↓ 80% |
893
+ | State query frequency | ~50 msg/s | ~5 msg/s | ↓ 90% |
894
+ | Total Zenoh messages | ~150 msg/s | ~25 msg/s | ↓ 83% |
895
+ | Daemon CPU load | Sustained high load | Normal load | Significantly reduced |
896
+ | Expected stability | Crash within hours | Stable for days | Major improvement |
897
+
898
+ ### Related Files
899
+ - `DAEMON_CRASH_FIX_PLAN.md` - Detailed fix plan and test plan
900
+ - `movement_manager.py` - Control loop optimization
901
+ - `reachy_controller.py` - State query caching
902
+
903
+ ### Future Optimization Suggestions
904
+ 1. ⏳ Dynamic frequency adjustment - 50Hz during motion, 5Hz when idle
905
+ 2. ⏳ Batch state queries - Get all states at once
906
+ 3. ⏳ Performance monitoring and alerts - Real-time daemon health monitoring
907
+
908
+ ---
909
+
910
+ ## 🔧 Daemon Crash Deep Fix (2026-01-07)
911
+
912
+ > **Update (2026-01-12)**: After daemon updates and further testing, control loop frequency has been restored to 100Hz (same as `reachy_mini_conversation_app`). The pose change threshold (0.005) and state cache TTL (2s) optimizations remain in place to reduce unnecessary Zenoh messages.
913
+
914
+ ### Problem Description
915
+ During long-term operation, `reachy_mini daemon` still crashes, previous fix not thorough enough.
916
+
917
+ ### Root Cause Analysis
918
+
919
+ Through deep analysis of SDK source code:
920
+
921
+ 1. **Each `set_target()` sends 3 Zenoh messages**
922
+ - `set_target_head_pose()` - 1 message
923
+ - `set_target_antenna_joint_positions()` - 1 message
924
+ - `set_target_body_yaw()` - 1 message
925
+
926
+ 2. **Daemon control loop is 50Hz**
927
+ - See `reachy_mini/daemon/backend/robot/backend.py`: `control_loop_frequency = 50.0`
928
+ - If message send frequency exceeds 50Hz, daemon may not process in time
929
+
930
+ 3. **Previous 20Hz control loop still too high**
931
+ - 20Hz × 3 messages = 60 messages/second
932
+ - Already exceeds daemon's 50Hz processing capacity
933
+
934
+ 4. **Pose change threshold too small (0.002)**
935
+ - Breathing animation, speech sway, face tracking continuously produce tiny changes
936
+ - Almost every loop triggers `set_target()`
937
+
938
+ ### Fix Solution
939
+
940
+ #### 1. Further reduce control loop frequency (movement_manager.py)
941
+ ```python
942
+ # Reduced from 20Hz to 10Hz
943
+ # 10Hz × 3 messages = 30 messages/second, safely below daemon's 50Hz capacity
944
+ CONTROL_LOOP_FREQUENCY_HZ = 10
945
+ ```
946
+
947
+ #### 2. Increase pose change threshold (movement_manager.py)
948
+ ```python
949
+ # Increased from 0.002 to 0.005
950
+ # 0.005 rad ≈ 0.29 degrees, still smooth enough
951
+ self._pose_change_threshold = 0.005
952
+ ```
953
+
954
+ #### 3. Reduce camera/face tracking frequency (camera_server.py)
955
+ ```python
956
+ # Reduced from 15fps to 10fps
957
+ fps: int = 10
958
+ ```
959
+
960
+ #### 4. Increase state cache TTL (reachy_controller.py)
961
+ ```python
962
+ # Increased from 1 second to 2 seconds
963
+ self._cache_ttl = 2.0
964
+ ```
965
+
966
+ ### Fix Results
967
+
968
+ | Metric | Before (20Hz) | After (10Hz) | Improvement |
969
+ |--------|---------------|--------------|-------------|
970
+ | Control loop frequency | 20 Hz | 10 Hz | ↓ 50% |
971
+ | Max Zenoh messages | 60 msg/s | 30 msg/s | ↓ 50% |
972
+ | Actual messages (with change detection) | ~40 msg/s | ~15 msg/s | ↓ 62% |
973
+ | Face tracking frequency | 15 Hz | 10 Hz | ↓ 33% |
974
+ | State cache TTL | 1 second | 2 seconds | ↑ 100% |
975
+ | Expected stability | Crash within hours | Stable operation | Major improvement |
976
+
977
+ ### Key Finding
978
+
979
+ Reference `reachy_mini_conversation_app` uses 100Hz control loop, but it's an official app that may have special optimizations or runs on more powerful hardware. Our app needs more conservative settings.
980
+
981
+ ### Related Files
982
+ - `movement_manager.py` - Control loop frequency and pose threshold
983
+ - `camera_server.py` - Face tracking frequency
984
+ - `reachy_controller.py` - State cache TTL
985
+
986
+
987
+ ---
988
+
989
+ ## 🔧 Tap-to-Wake and Microphone Sensitivity Fix (2026-01-07)
990
+
991
+ ### Problem Description
992
+ 1. **Tap-to-wake blocking** - Conversation not working properly after tap wake, blocking issues
993
+ 2. **Low microphone sensitivity** - Need to be very close for voice recognition
994
+
995
+ ### Root Cause
996
+ 1. **Audio playback blocking** - `_tap_continue_feedback()` plays sound in continuous conversation mode, blocking audio stream processing
997
+ 2. **AGC settings not optimized** - ReSpeaker XVF3800 default settings not suitable for distant voice recognition
998
+
999
+ ### Fix Solution
1000
+
1001
+ #### 1. Remove audio playback in continuous conversation feedback (satellite.py)
1002
+ ```python
1003
+ def _tap_continue_feedback(self) -> None:
1004
+ """Provide feedback when continuing conversation in tap mode.
1005
+
1006
+ Triggers a nod to indicate ready for next input.
1007
+ Sound is NOT played here to avoid blocking audio streaming.
1008
+ """
1009
+ # NOTE: Do NOT play sound here - it blocks audio streaming
1010
+ if self.state.motion_enabled and self.state.motion:
1011
+ self.state.motion.on_continue_listening()
1012
+ ```
1013
+
1014
+ #### 2. Add exception handling to tap callback (voice_assistant.py)
1015
+ ```python
1016
+ def _on_tap_detected(self) -> None:
1017
+ """Callback when tap is detected on the robot.
1018
+
1019
+ NOTE: This is called from the tap_detector background thread.
1020
+ """
1021
+ try:
1022
+ self._state.satellite.wakeup_from_tap()
1023
+ # ... motion feedback
1024
+ except Exception as e:
1025
+ _LOGGER.error("Error in tap detection callback: %s", e)
1026
+ ```
1027
+
1028
+ #### 3. Comprehensive microphone optimization (voice_assistant.py) - Updated 2026-01-07
1029
+ ```python
1030
+ def _optimize_microphone_settings(self) -> None:
1031
+ """Optimize ReSpeaker XVF3800 microphone settings for voice recognition."""
1032
+
1033
+ # ========== 1. AGC (Automatic Gain Control) Settings ==========
1034
+ # Enable AGC for automatic volume normalization
1035
+ respeaker.write("PP_AGCONOFF", [1])
1036
+
1037
+ # Increase AGC max gain for better distant speech pickup (default ~15dB -> 30dB)
1038
+ respeaker.write("PP_AGCMAXGAIN", [30.0])
1039
+
1040
+ # Set AGC desired output level (default ~-25dB -> -18dB for stronger output)
1041
+ respeaker.write("PP_AGCDESIREDLEVEL", [-18.0])
1042
+
1043
+ # Optimize AGC time constant for voice commands
1044
+ respeaker.write("PP_AGCTIME", [0.5])
1045
+
1046
+ # ========== 2. Base Microphone Gain ==========
1047
+ # Increase base microphone gain (default 1.0 -> 2.0)
1048
+ respeaker.write("AUDIO_MGR_MIC_GAIN", [2.0])
1049
+
1050
+ # ========== 3. Noise Suppression Settings ==========
1051
+ # Reduce noise suppression to preserve quiet speech (default ~0.5 -> 0.15)
1052
+ respeaker.write("PP_MIN_NS", [0.15])
1053
+ respeaker.write("PP_MIN_NN", [0.15])
1054
+
1055
+ # ========== 4. Echo Cancellation & High-pass Filter ==========
1056
+ respeaker.write("PP_ECHOONOFF", [1])
1057
+ respeaker.write("AEC_HPFONOFF", [1])
1058
+ ```
1059
+
1060
+ ### Fix Results
1061
+
1062
+ | Parameter | Before | After | Notes |
1063
+ |-----------|--------|-------|-------|
1064
+ | Tap continuous conversation | Blocking | Working | Removed blocking audio playback |
1065
+ | Microphone sensitivity | ~30cm | ~2-3m | Comprehensive AGC and gain optimization |
1066
+ | AGC switch | Off | On | Auto volume normalization |
1067
+ | AGC max gain | ~15dB | 30dB | Better distant speech pickup |
1068
+ | AGC target level | -25dB | -18dB | Stronger output signal |
1069
+ | Microphone gain | 1.0x | 2.0x | Base gain doubled |
1070
+ | Noise suppression | ~0.5 | 0.15 | Reduced speech mis-suppression |
1071
+ | Echo cancellation | On | On | Maintain clarity during TTS playback |
1072
+ | High-pass filter | Off | On | Remove low-frequency noise |
1073
+
1074
+ ### XVF3800 Parameter Reference
1075
+
1076
+ | Parameter Name | Type | Range | Description |
1077
+ |----------------|------|-------|-------------|
1078
+ | `PP_AGCONOFF` | int32 | 0/1 | AGC switch |
1079
+ | `PP_AGCMAXGAIN` | float | 0-40 dB | AGC max gain |
1080
+ | `PP_AGCDESIREDLEVEL` | float | dB | AGC target output level |
1081
+ | `PP_AGCTIME` | float | seconds | AGC time constant |
1082
+ | `AUDIO_MGR_MIC_GAIN` | float | 0-4.0 | Microphone gain multiplier |
1083
+ | `PP_MIN_NS` | float | 0-1.0 | Minimum noise suppression (lower = less suppression) |
1084
+ | `PP_MIN_NN` | float | 0-1.0 | Minimum noise estimation |
1085
+ | `PP_ECHOONOFF` | int32 | 0/1 | Echo cancellation switch |
1086
+ | `AEC_HPFONOFF` | int32 | 0/1 | High-pass filter switch |
1087
+
1088
+ ### Related Files
1089
+ - `satellite.py` - Removed blocking audio playback
1090
+ - `voice_assistant.py` - Comprehensive microphone optimization
1091
+ - `reachy_controller.py` - AGC entity default value updates
1092
+ - `entity_registry.py` - AGC max gain range update (0-40dB)
1093
+ - `reachy_mini/src/reachy_mini/media/audio_control_utils.py` - SDK reference
1094
+
1095
+ ---
1096
+
1097
+ ## 🔧 v0.5.1 Bug Fixes (2026-01-08)
1098
+
1099
+ ### Issue 1: Music Not Resuming After Voice Conversation
1100
+
1101
+ **Problem**: Music doesn't resume after voice conversation ends.
1102
+
1103
+ **Root Cause**: Sendspin was incorrectly connected to `tts_player` instead of `music_player`.
1104
+
1105
+ **Fix**:
1106
+ - `voice_assistant.py`: Sendspin discovery now connects to `music_player`
1107
+ - `satellite.py`: `duck()`/`unduck()` now call `music_player.pause_sendspin()`/`resume_sendspin()`
1108
+
1109
+ ### Issue 2: tap_sensitivity Not Persisted
1110
+
1111
+ **Problem**: tap_sensitivity value set in ESPHome lost after restart.
1112
+
1113
+ **Fix**:
1114
+ - `models.py`: Added `tap_sensitivity` field to `Preferences` dataclass
1115
+ - `entity_registry.py`: Entity setter now saves to `preferences.json`
1116
+ - Load saved value on startup
1117
+
1118
+ ### Issue 3: Audio Conflict During Voice Assistant Wakeup
1119
+
1120
+ **Problem**: Audio streaming (Sendspin or ESPHome audio) conflicts when voice assistant wakes up.
1121
+
1122
+ **Fix**:
1123
+ - `audio_player.py`: Added `pause_sendspin()` and `resume_sendspin()` methods
1124
+ - `satellite.py`: `duck()` now pauses Sendspin, `unduck()` resumes it
1125
+ - Improved `pause()` method to actually stop audio output
1126
+
1127
+ ### Issue 4: AttributeError for _camera_server
1128
+
1129
+ **Problem**: `_set_conversation_mode()` referenced non-existent `_camera_server` attribute.
1130
+
1131
+ **Fix**: Changed `self._camera_server` to `self.camera_server` (removed underscore prefix)
1132
+
1133
+ ### Issue 5: tap_sensitivity Default Value Wrong
1134
+
1135
+ **Problem**: tap_sensitivity default was still 2.0g instead of expected 0.5g.
1136
+
1137
+ **Fix**: Use `TAP_THRESHOLD_G_DEFAULT` constant as default value
1138
+
1139
+ ### Issue 6: Sendspin Sample Rate Optimization
1140
+
1141
+ **Problem**: ReSpeaker hardware I/O is 16kHz (hardware limitation), but Sendspin might try higher sample rates.
1142
+
1143
+ **Fix**: Prioritize 16kHz in Sendspin supported formats list to avoid unnecessary resampling
1144
+
1145
+ ---
1146
+
1147
+ ## 🔧 v0.5.15 Updates (2026-01-11)
1148
+
1149
+ ### Feature 1: Audio Settings Persistence
1150
+
1151
+ **Problem**: AGC Enabled, AGC Max Gain, Noise Suppression settings lost after restart.
1152
+
1153
+ **Solution**:
1154
+ - `models.py`: Added `agc_enabled`, `agc_max_gain`, `noise_suppression` fields to `Preferences` dataclass (Optional, None = use default)
1155
+ - `entity_registry.py`: Entity setters now save to `preferences.json`
1156
+ - `voice_assistant.py`: `_optimize_microphone_settings()` now restores saved values from preferences on startup
1157
+
1158
+ **Behavior**:
1159
+ - First startup: Use optimized defaults (AGC=ON, MaxGain=30dB, NoiseSuppression=15%)
1160
+ - After user changes via Home Assistant: Values persisted and restored on restart
1161
+
1162
+ ### Feature 2: Sendspin Discovery Refactoring
1163
+
1164
+ **Problem**: Sendspin mDNS discovery code was in `audio_player.py`, mixing concerns.
1165
+
1166
+ **Solution**:
1167
+ - `zeroconf.py`: Added `SendspinDiscovery` class for mDNS service discovery
1168
+ - `audio_player.py`: Simplified to use `SendspinDiscovery` via callback pattern
1169
+ - Better separation of concerns: zeroconf.py handles all mDNS, audio_player.py handles audio
1170
+
1171
+ ### Fix 1: Tap Detection During Emotion Playback
1172
+
1173
+ **Problem**: Tap detection was re-enabled after emotion playback completes, even during active conversation.
1174
+
1175
+ **Root Cause**: `_play_emotion()` and `_wait_for_move_completion()` always re-enabled tap detection without checking pipeline state.
1176
+
1177
+ **Fix**:
1178
+ - `satellite.py`: Check `_pipeline_active` before re-enabling tap detection
1179
+ - Only re-enable tap detection if conversation has ended (pipeline not active)
1180
+
1181
+ **Related Files**:
1182
+ - `models.py` - Preferences fields
1183
+ - `entity_registry.py` - Entity setters with persistence
1184
+ - `voice_assistant.py` - Settings restoration on startup
1185
+ - `zeroconf.py` - SendspinDiscovery class
1186
+ - `audio_player.py` - Simplified Sendspin integration
1187
+ - `satellite.py` - Tap detection fix
1188
+
1189
+
1190
+ ---
1191
+
1192
+ ### SDK Data Structure Reference
1193
+
1194
+ ```python
1195
+ # Motor control mode
1196
+ class MotorControlMode(str, Enum):
1197
+ Enabled = "enabled" # Torque on, position control
1198
+ Disabled = "disabled" # Torque off
1199
+ GravityCompensation = "gravity_compensation" # Gravity compensation mode
1200
+
1201
+ # Daemon state
1202
+ class DaemonState(Enum):
1203
+ NOT_INITIALIZED = "not_initialized"
1204
+ STARTING = "starting"
1205
+ RUNNING = "running"
1206
+ STOPPING = "stopping"
1207
+ STOPPED = "stopped"
1208
+ ERROR = "error"
1209
+
1210
+ # Full state
1211
+ class FullState:
1212
+ control_mode: MotorControlMode
1213
+ head_pose: XYZRPYPose # x, y, z (m), roll, pitch, yaw (rad)
1214
+ head_joints: list[float] # 7 joint angles
1215
+ body_yaw: float
1216
+ antennas_position: list[float] # [right, left]
1217
+ doa: DoAInfo # angle (rad), speech_detected (bool)
1218
+
1219
+ # IMU data (wireless version only)
1220
+ imu_data = {
1221
+ "accelerometer": [x, y, z], # m/s²
1222
+ "gyroscope": [x, y, z], # rad/s
1223
+ "quaternion": [w, x, y, z], # Attitude quaternion
1224
+ "temperature": float # °C
1225
+ }
1226
+
1227
+ # Safety limits
1228
+ HEAD_PITCH_ROLL_LIMIT = [-40°, +40°]
1229
+ HEAD_YAW_LIMIT = [-180°, +180°]
1230
+ BODY_YAW_LIMIT = [-160°, +160°]
1231
+ YAW_DELTA_MAX = 65° # Max difference between head and body yaw
1232
+ ```
1233
+
1234
+ ### ESPHome Protocol Implementation Notes
1235
+
1236
+ ESPHome protocol communicates with Home Assistant via protobuf messages. The following message types need to be implemented:
1237
+
1238
+ ```python
1239
+ from aioesphomeapi.api_pb2 import (
1240
+ # Number entity (volume/angle control)
1241
+ ListEntitiesNumberResponse,
1242
+ NumberStateResponse,
1243
+ NumberCommandRequest,
1244
+
1245
+ # Select entity (motor mode)
1246
+ ListEntitiesSelectResponse,
1247
+ SelectStateResponse,
1248
+ SelectCommandRequest,
1249
+
1250
+ # Button entity (wake/sleep)
1251
+ ListEntitiesButtonResponse,
1252
+ ButtonCommandRequest,
1253
+
1254
+ # Switch entity (motor switch)
1255
+ ListEntitiesSwitchResponse,
1256
+ SwitchStateResponse,
1257
+ SwitchCommandRequest,
1258
+
1259
+ # Sensor entity (numeric sensors)
1260
+ ListEntitiesSensorResponse,
1261
+ SensorStateResponse,
1262
+
1263
+ # Binary Sensor entity (boolean sensors)
1264
+ ListEntitiesBinarySensorResponse,
1265
+ BinarySensorStateResponse,
1266
+
1267
+ # Text Sensor entity (text sensors)
1268
+ ListEntitiesTextSensorResponse,
1269
+ TextSensorStateResponse,
1270
+ )
1271
+ ```
1272
+
1273
+ ## Reference Projects
1274
+
1275
+ - [OHF-Voice/linux-voice-assistant](https://github.com/OHF-Voice/linux-voice-assistant)
1276
+ - [pollen-robotics/reachy_mini](https://github.com/pollen-robotics/reachy_mini)
1277
+ - [reachy_mini_conversation_app](https://github.com/pollen-robotics/reachy_mini_conversation_app)
1278
+ - [sendspin-cli](https://github.com/Sendspin/sendspin-cli)
1279
+ - [home-assistant-voice](https://github.com/esphome/home-assistant-voice-pe/blob/dev/home-assistant-voice.yaml)
Project_Summary.md DELETED
@@ -1,1439 +0,0 @@
1
- # Reachy Mini for Home Assistant - Project Plan (Current snapshot: v1.0.6)
2
-
3
- ## Project Overview
4
-
5
- Integrate Home Assistant voice assistant functionality into Reachy Mini Wi-Fi robot, communicating with Home Assistant via ESPHome protocol.
6
-
7
- ## Local Reference Directories (DO NOT modify any files in reference directories)
8
- 1. [linux-voice-assistant](reference/linux-voice-assistant) - Linux-based Home Assistant voice assistant app for reference
9
- 2. [Reachy Mini SDK](reference/reachy_mini) - Reachy Mini SDK local directory for reference
10
- 3. [reachy_mini_conversation_app](reference/reachy_mini_conversation_app) - Reachy Mini conversation app for reference
11
- 4. [reachy-mini-desktop-app](reference/reachy-mini-desktop-app) - Reachy Mini desktop app for reference
12
- 5. [sendspin](reference/sendspin-cli/) - Sendspin client for reference
13
- 6. [aiosendspin](reference/aiosendspin/) - Sendspin protocol client library reference
14
- 7. [dynamic_gestures](reference/dynamic_gestures/) - Dynamic gesture reference
15
- 8. [SimpleDances](reference/SimpleDances/) - Local reference snapshot
16
-
17
- ## Core Design Principles
18
-
19
- 1. **Zero Configuration** - Users only need to install the app, no manual configuration required
20
- 2. **Native Hardware** - Use robot's built-in microphone and speaker
21
- 3. **Home Assistant Centralized Management** - STT/TTS/intent configuration stays on Home Assistant side
22
- 4. **Motion Feedback** - Provide head movement and antenna animation feedback during voice interaction
23
- 5. **Project Constraints** - Strictly follow [Reachy Mini SDK](reachy_mini) architecture design and constraints
24
- 6. **Code Quality** - Follow Python development standards with consistent code style, clear structure, complete comments, comprehensive documentation, high test coverage, high code quality, readability, maintainability, extensibility, and reusability
25
- 7. **Feature Priority** - Voice conversation with Home Assistant is highest priority; other features are auxiliary and must not affect voice conversation functionality or response speed
26
- 8. **No LED Functions** - LEDs are hidden inside the robot; all LED control is ignored
27
- 9. **Preserve Functionality** - Any code modifications should optimize while preserving completed features; do not remove features to solve problems. When issues occur, prioritize solving problems after referencing examples, not adding various log outputs
28
- 10. **No App-Managed Sleep/Wake** - The app no longer manages robot sleep/wake transitions; current SDK behavior is treated as source of truth
29
-
30
- ## Technical Architecture
31
-
32
- ```
33
- 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?
34
- 鈹? Reachy Mini (ARM64) 鈹?
35
- 鈹? 鈹?
36
- 鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€ AUDIO INPUT 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹?
37
- 鈹? 鈹? ReSpeaker XVF3800 (16kHz) 鈹? 鈹?
38
- 鈹? 鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹? 鈹?
39
- 鈹? 鈹? 鈹?4-Mic Array 鈹?鈫?鈹?XVF3800 DSP 鈹? 鈹? 鈹?
40
- 鈹? 鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹?鈥?Hardware DSP path available 鈹? 鈹? 鈹?
41
- 鈹? 鈹? 鈹?鈥?App currently relies on HA STT/TTS 鈹? 鈹? 鈹?
42
- 鈹? 鈹? 鈹?鈥?DOA/VAD used by the current runtime 鈹? 鈹? 鈹?
43
- 鈹? 鈹? 鈹?鈥?Direction of Arrival (DOA) 鈹? 鈹? 鈹?
44
- 鈹? 鈹? 鈹?鈥?Voice Activity Detection (VAD) 鈹? 鈹? 鈹?
45
- 鈹? 鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹? 鈹?
46
- 鈹? 鈹? 鈹? 鈹? 鈹?
47
- 鈹? 鈹? 鈻? 鈹? 鈹?
48
- 鈹? 鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹? 鈹?
49
- 鈹? 鈹? 鈹?Wake Word Detection (microWakeWord) 鈹? 鈹? 鈹?
50
- 鈹? 鈹? 鈹?鈥?"Okay Nabu" / "Hey Jarvis" 鈹? 鈹? 鈹?
51
- 鈹? 鈹? 鈹?鈥?Stop word detection 鈹? 鈹? 鈹?
52
- 鈹? 鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹? 鈹?
53
- 鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹?
54
- 鈹? 鈹?
55
- 鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€ AUDIO OUTPUT 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹?
56
- 鈹? 鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?鈹? 鈹?
57
- 鈹? 鈹? 鈹?TTS Player 鈹? 鈹?Music Player (Sendspin) 鈹?鈹? 鈹?
58
- 鈹? 鈹? 鈹?鈥?Voice assistant speech 鈹? 鈹?鈥?Multi-room audio streaming 鈹?鈹? 鈹?
59
- 鈹? 鈹? 鈹?鈥?Sound effects 鈹? 鈹?鈥?Auto-discovery via mDNS 鈹?鈹? 鈹?
60
- 鈹? 鈹? 鈹?鈥?Priority over music 鈹? 鈹?鈥?Auto-pause during conversation 鈹?鈹? 鈹?
61
- 鈹? 鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?鈹? 鈹?
62
- 鈹? 鈹? 鈹? 鈹? 鈹? 鈹?
63
- 鈹? 鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹? 鈹?
64
- 鈹? 鈹? 鈻? 鈹? 鈹?
65
- 鈹? 鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹? 鈹?
66
- 鈹? 鈹? 鈹?ReSpeaker Speaker (16kHz) 鈹? 鈹? 鈹?
67
- 鈹? 鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹? 鈹?
68
- 鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹?
69
- 鈹? 鈹?
70
- 鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€ VISION & TRACKING 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹?
71
- 鈹? 鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?鈹? 鈹?
72
- 鈹? 鈹? 鈹?Camera (VPU accelerated) 鈹?鈫? 鈹?YOLO Face Detection 鈹?鈹? 鈹?
73
- 鈹? 鈹? 鈹?鈥?MJPEG stream server 鈹? 鈹?鈥?AdamCodd/YOLOv11n-face 鈹?鈹? 鈹?
74
- 鈹? 鈹? 鈹?鈥?ESPHome Camera entity 鈹? 鈹?鈥?Adaptive frame rate: 鈹?鈹? 鈹?
75
- 鈹? 鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹? - 15fps: conversation/face 鈹?鈹? 鈹?
76
- 鈹? 鈹? 鈹? - 2fps: idle (power saving) 鈹?鈹? 鈹?
77
- 鈹? 鈹? 鈹?鈥?look_at_image() pose calc 鈹?鈹? 鈹?
78
- 鈹? 鈹? 鈹?鈥?Smooth return after face lost 鈹?鈹? 鈹?
79
- 鈹? 鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?鈹? 鈹?
80
- 鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹?
81
- 鈹? 鈹?
82
- 鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€ MOTION CONTROL 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹?
83
- 鈹? 鈹? MovementManager (50Hz Control Loop) 鈹? 鈹?
84
- 鈹? 鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹? 鈹?
85
- 鈹? 鈹? 鈹?Motion Layers (Priority: Move > Action > SpeechSway > Breath) 鈹? 鈹? 鈹?
86
- 鈹? 鈹? 鈹?鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹? 鈹? 鈹?
87
- 鈹? 鈹? 鈹?鈹?Move Queue 鈹?鈹?Actions 鈹?鈹?SpeechSway 鈹?鈹?Breathing 鈹? 鈹? 鈹? 鈹?
88
- 鈹? 鈹? 鈹?鈹?(Emotions) 鈹?鈹?(Nod/Shake)鈹?鈹?(Voice VAD)鈹?鈹?(Idle anim) 鈹? 鈹? 鈹? 鈹?
89
- 鈹? 鈹? 鈹?鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹? 鈹? 鈹?
90
- 鈹? 鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹? 鈹?
91
- 鈹? 鈹? 鈹? 鈹?
92
- 鈹? 鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹? 鈹?
93
- 鈹? 鈹? 鈹?Face Tracking Offsets (Secondary Pose Overlay) 鈹? 鈹? 鈹?
94
- 鈹? 鈹? 鈹?鈥?Pitch offset: +9掳 (down compensation) 鈹? 鈹? 鈹?
95
- 鈹? 鈹? 鈹?鈥?Yaw offset: -7掳 (right compensation) 鈹? 鈹? 鈹?
96
- 鈹? 鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹? 鈹?
97
- 鈹? 鈹? 鈹? 鈹?
98
- 鈹? 鈹? State Machine: on_wakeup 鈫?on_listening 鈫?on_speaking 鈫?on_idle 鈹? 鈹?
99
- 鈹? 鈹? 鈹? 鈹?
100
- 鈹? 鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹? 鈹?
101
- 鈹? 鈹? 鈹?Body Following 鈹? 鈹? 鈹?
102
- 鈹? 鈹? 鈹?鈥?Body yaw syncs with head yaw for natural tracking 鈹? 鈹? 鈹?
103
- 鈹? 鈹? 鈹?鈥?Extracted from final head pose matrix 鈹? 鈹? 鈹?
104
- 鈹? 鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹? 鈹?
105
- 鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹?
106
- 鈹? 鈹?
107
- 鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€ GESTURE DETECTION 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹?
108
- 鈹? 鈹? HaGRID ONNX Models 鈹? 鈹?
109
- 鈹? 鈹? 鈥?18 gesture classes (call, like, dislike, fist, ok, palm, etc.) 鈹? 鈹?
110
- 鈹? 鈹? 鈥?Runtime result publishing only 鈹? 鈹?
111
- 鈹? 鈹? 鈥?Batch detection: all hands (not just highest confidence) 鈹? 鈹?
112
- 鈹? 鈹? 鈥?Detection cadence: adaptive scheduler + minimum processing FPS 鈹? 鈹?
113
- 鈹? 鈹? 鈥?No confidence filtering - all detections passed to Home Assistant鈹? 鈹?
114
- 鈹? 鈹? 鈥?Runtime switchable (default OFF, model unloaded when disabled) 鈹? 鈹?
115
- 鈹? 鈹? 鈥?Real-time state push to Home Assistant 鈹? 鈹?
116
- 鈹? 鈹? 鈥?No conflicts with face tracking (shared frame, independent) 鈹? 鈹?
117
- 鈹? 鈹? 鈥?SDK integration: MediaBackend detection, proper resource cleanup 鈹? 鈹?
118
- 鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹?
119
- 鈹? 鈹?
120
- 鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€ ESPHOME SERVER 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹?
121
- 鈹? 鈹? Port 6053 (mDNS auto-discovery) 鈹? 鈹?
122
- 鈹? 鈹? 鈥?Entity count evolves by release (sensors, controls, media, camera) 鈹? 鈹?
123
- 鈹? 鈹? 鈥?Voice Assistant pipeline integration 鈹? 鈹?
124
- 鈹? 鈹? 鈥?Real-time state synchronization 鈹? 鈹?
125
- 鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹���鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹?
126
- 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?
127
- 鈹?
128
- 鈹?ESPHome Protocol (protobuf)
129
- 鈻?
130
- 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?
131
- 鈹? Home Assistant 鈹?
132
- 鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹屸攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?鈹?
133
- 鈹? 鈹?STT Engine 鈹? 鈹?Intent Processing鈹? 鈹?TTS Engine 鈹?鈹?
134
- 鈹? 鈹?(User configured)鈹? 鈹?(Conversation) 鈹? 鈹?(User configured) 鈹?鈹?
135
- 鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹? 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?鈹?
136
- 鈹斺攢鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹?
137
- ```
138
-
139
- ### Software Module Architecture (v1.0.6)
140
-
141
- ```
142
- reachy_mini_home_assistant/
143
- 鈹?
144
- 鈹溾攢鈹€ main.py # ReachyMiniApp entry point
145
- 鈹溾攢鈹€ __main__.py # Standalone CLI entry point
146
- 鈹溾攢鈹€ voice_assistant.py # Voice assistant service orchestrator
147
- 鈹溾攢鈹€ reachy_controller.py # Reachy Mini SDK wrapper
148
- 鈹溾攢鈹€ models.py # Data models / preferences / server state
149
- 鈹?
150
- 鈹溾攢鈹€ core/ # Core Infrastructure
151
- 鈹? 鈹溾攢鈹€ config.py # Centralized nested configuration
152
- 鈹? 鈹溾攢鈹€ service_base.py # Suspend/resume-aware service helpers
153
- 鈹? 鈹溾攢鈹€ system_diagnostics.py # System diagnostics
154
- 鈹? 鈹溾攢鈹€ exceptions.py # Custom exception classes
155
- 鈹? 鈹斺攢鈹€ util.py # Utility functions
156
- 鈹?
157
- 鈹溾攢鈹€ motion/ # Motion Control
158
- 鈹? 鈹溾攢鈹€ movement_manager.py # 50Hz unified motion control loop
159
- 鈹? 鈹溾攢鈹€ command_runtime.py # Command queue handling / state transitions
160
- 鈹? 鈹溾攢鈹€ control_runtime.py # Control-loop runtime helpers
161
- 鈹? 鈹溾攢鈹€ idle_runtime.py # Idle behavior / idle rest handling
162
- 鈹? 鈹溾攢鈹€ antenna.py # Antenna control / freeze logic
163
- 鈹? 鈹溾攢鈹€ pose_composer.py # Pose composition from multiple sources
164
- 鈹? 鈹溾攢鈹€ smoothing.py # Motion smoothing algorithms
165
- 鈹? 鈹溾攢鈹€ state_machine.py # Robot state definitions / idle config parsing
166
- 鈹? 鈹溾攢鈹€ animation_player.py # Animation player
167
- 鈹? 鈹溾攢鈹€ emotion_moves.py # Emotion moves
168
- 鈹? 鈹溾攢鈹€ speech_sway.py # Speech-driven head micro-movements
169
- 鈹? 鈹斺攢鈹€ reachy_motion.py # Reachy motion API
170
- 鈹?
171
- 鈹溾攢鈹€ vision/ # Vision Processing
172
- 鈹? 鈹溾攢鈹€ camera_server.py # MJPEG camera stream server facade
173
- 鈹? 鈹溾攢鈹€ camera_runtime.py # Camera lifecycle helpers
174
- 鈹? 鈹溾攢鈹€ camera_processing.py # Frame capture / AI processing helpers
175
- 鈹? 鈹溾攢鈹€ camera_http.py # HTTP handlers for stream/snapshot
176
- 鈹? 鈹溾攢鈹€ head_tracker.py # YOLO face detector
177
- 鈹? 鈹溾攢鈹€ gesture_detector.py # HaGRID gesture detection
178
- 鈹? 鈹溾攢鈹€ face_tracking_interpolator.py # Smooth face tracking
179
- 鈹? 鈹斺攢鈹€ frame_processor.py # Adaptive frame rate management
180
- 鈹?
181
- 鈹溾攢鈹€ audio/ # Audio runtime support
182
- 鈹? 鈹溾攢鈹€ audio_player.py # AudioPlayer facade
183
- 鈹? 鈹溾攢鈹€ audio_player_shared.py # Shared audio/sendspin constants + helpers
184
- 鈹? 鈹溾攢鈹€ audio_player_playback.py # Playback orchestration / lifecycle
185
- 鈹? 鈹溾攢鈹€ audio_player_local.py # Local file + fallback playback
186
- 鈹? 鈹溾攢鈹€ audio_player_stream_pcm.py # PCM streaming playback
187
- 鈹? 鈹溾攢鈹€ audio_player_stream_decoded.py # Decoded/GStreamer streaming playback
188
- 鈹? 鈹溾攢鈹€ audio_player_sendspin.py # Sendspin runtime integration
189
- 鈹? 鈹溾攢鈹€ microphone.py # Hardware audio helper / legacy tuning code
190
- 鈹? 鈹斺攢鈹€ doa_tracker.py # Direction of Arrival tracking
191
- 鈹?
192
- 鈹溾攢鈹€ entities/ # Home Assistant Entities
193
- 鈹? 鈹溾攢鈹€ entity.py # ESPHome base entity
194
- 鈹? 鈹溾攢鈹€ entity_registry.py # ESPHome entity registry
195
- 鈹? 鈹溾攢鈹€ entity_factory.py # Entity creation factory
196
- 鈹? 鈹溾攢鈹€ entity_keys.py # Entity key constants
197
- 鈹? 鈹溾攢鈹€ entity_extensions.py # Extended entity types
198
- 鈹? 鈹溾攢鈹€ runtime_entity_setup.py # Runtime/control entity wiring
199
- 鈹? 鈹溾攢鈹€ sensor_entity_setup.py # Sensor/diagnostic entity wiring
200
- 鈹? 鈹溾攢鈹€ event_emotion_mapper.py # HA event 鈫?Emotion mapping
201
- 鈹? 鈹斺攢鈹€ emotion_detector.py # Disabled runtime path for text emotion detection
202
- 鈹?
203
- 鈹溾攢鈹€ protocol/ # Protocol Handling
204
- 鈹? 鈹溾攢鈹€ satellite.py # ESPHome protocol handler facade
205
- 鈹? 鈹溾攢鈹€ api_server.py # HTTP API server
206
- 鈹? 鈹溾攢鈹€ zeroconf.py # mDNS discovery
207
- 鈹? 鈹溾攢鈹€ entity_bridge.py # Protocol/entity bridge helpers
208
- 鈹? 鈹溾攢鈹€ message_dispatch.py # ESPHome message dispatch
209
- 鈹? 鈹溾攢鈹€ motion_bridge.py # Voice 鈫?motion bridge
210
- 鈹? 鈹溾攢鈹€ session_flow.py # Conversation lifecycle helpers
211
- 鈹? 鈹溾攢鈹€ voice_pipeline.py # Voice event handling / TTS / stop / ducking
212
- 鈹? 鈹斺攢鈹€ wakeword_assets.py # Wake word asset helpers
213
- 鈹?
214
- 鈹溾攢鈹€ animations/ # Animation definitions
215
- 鈹? 鈹斺攢鈹€ conversation_animations.json # Unified built-in behavior resource file
216
- 鈹?
217
- 鈹斺攢鈹€ wakewords/ # Wake word models
218
- 鈹溾攢鈹€ okay_nabu.json/.tflite
219
- 鈹溾攢鈹€ hey_jarvis.json/.tflite
220
- 鈹溾攢鈹€ alexa.json/.tflite
221
- 鈹溾攢鈹€ hey_luna.json/.tflite
222
- 鈹斺攢鈹€ stop.json/.tflite
223
- ```
224
-
225
-
226
- ### Current Runtime Defaults (v1.0.6)
227
-
228
- - `idle_behavior_enabled`: user-controlled
229
- - `sendspin_enabled`: OFF
230
- - `face_tracking_enabled`: OFF
231
- - `gesture_detection_enabled`: OFF
232
- - `face_confidence_threshold`: 0.5 (persistent)
233
- - `continuous_conversation`: user-controlled
234
- - `Idle Behavior = OFF` means a parked no-animation state aligned to configured idle rest pose
235
- - When `Idle Behavior = OFF`, camera server is stopped entirely to save resources
236
- - When `Idle Behavior = ON`, camera server can run and `/snapshot` supports on-demand frame capture when cache is empty
237
- - Idle antenna behavior: torque disabled in `IDLE`, re-enabled when leaving `IDLE`
238
- - Voice phases and HA-triggered emotions are routed through one built-in zero-config behavior layer
239
-
240
- When face/gesture switches are OFF, their models are unloaded to save resources.
241
-
242
- ### Current Audio Startup Note (SDK 1.7.0)
243
-
244
- - The app now aligns to the current Reachy Mini SDK media model instead of carrying older compatibility paths.
245
- - Camera snapshots can be fetched on demand when the MJPEG cache is empty and the camera server is still running.
246
- - Audio block size is currently `512` samples to reduce CPU overhead versus the earlier `256`-sample path.
247
-
248
- ### Latest Incremental Update (2026-03-04) - Viewer-Aware Camera Streaming
249
-
250
- - MJPEG encoding/push is now viewer-aware: when no `/stream` client is connected, continuous MJPEG encoding is skipped to reduce CPU usage.
251
- - Face tracking and gesture detection still run without active stream viewers, so AI behavior remains available.
252
- - `/snapshot` now supports on-demand frame encode when no cached stream frame exists.
253
- - Stream output no longer forces fixed 1080p/25fps; it follows camera backend defaults (resolution/FPS) and only falls back when backend FPS is unavailable.
254
- - Transition from "watching" to "not watching" returns to adaptive idle pacing for resource saving.
255
-
256
- ## Completed Features
257
-
258
- ### Core Features
259
- - [x] ESPHome protocol server implementation
260
- - [x] mDNS service discovery (auto-discovered by Home Assistant)
261
- - [x] Local wake word detection (microWakeWord)
262
- - [x] Continuous conversation mode (controlled via Home Assistant switch)
263
- - [x] Audio stream transmission to Home Assistant
264
- - [x] TTS audio playback
265
- - [x] Stop word detection
266
-
267
- ### Reachy Mini Integration
268
- - [x] Use Reachy Mini SDK microphone input
269
- - [x] Use Reachy Mini SDK speaker output
270
- - [x] Head motion control (nod, shake, gaze)
271
- - [x] Antenna animation control
272
- - [x] Voice state feedback actions
273
- - [x] YOLO face tracking (complements DOA wakeup orientation)
274
- - [x] 50Hz unified motion control loop
275
-
276
- ### Application Architecture
277
- - [x] Compliant with Reachy Mini App architecture
278
-
279
-
280
-
281
- ## File List
282
-
283
- ```
284
- reachy_mini_ha_voice/
285
- 鈹溾攢鈹€ reachy_mini_ha_voice/
286
- 鈹? 鈹溾攢鈹€ __init__.py # Package initialization (v0.9.9)
287
- 鈹? 鈹溾攢鈹€ __main__.py # Command line entry
288
- 鈹? 鈹溾攢鈹€ main.py # ReachyMiniApp entry
289
- 鈹? 鈹溾攢鈹€ voice_assistant.py # Voice assistant service (1270 lines)
290
- 鈹? 鈹溾攢鈹€ protocol/ # ESPHome protocol handling
291
- 鈹? 鈹? 鈹溾攢鈹€ __init__.py # Module exports (13 lines)
292
- 鈹? 鈹? 鈹溾攢鈹€ satellite.py # ESPHome protocol handler facade
293
- 鈹? 鈹? 鈹溾攢鈹€ api_server.py # HTTP API server
294
- 鈹? 鈹? 鈹溾攢鈹€ zeroconf.py # mDNS discovery
295
- 鈹? 鈹? 鈹溾攢鈹€ entity_bridge.py # Protocol/entity bridge helpers
296
- 鈹? 鈹? 鈹溾攢鈹€ message_dispatch.py # ESPHome message dispatch
297
- 鈹? 鈹? 鈹溾攢鈹€ motion_bridge.py # Voice 鈫?motion bridge
298
- 鈹? 鈹? 鈹溾攢鈹€ session_flow.py # Conversation lifecycle helpers
299
- 鈹? 鈹? 鈹溾攢鈹€ voice_pipeline.py # Voice event handling / TTS / stop / ducking
300
- 鈹? 鈹? 鈹斺攢鈹€ wakeword_assets.py # Wake word asset helpers
301
- 鈹? 鈹溾攢鈹€ models.py # Data models
302
- 鈹? 鈹斺攢鈹€ reachy_controller.py # Reachy Mini controller wrapper (961 lines)
303
- 鈹? 鈹?
304
- 鈹? 鈹溾攢鈹€ core/ # Core infrastructure modules
305
- 鈹? 鈹? 鈹溾攢鈹€ __init__.py # Module exports
306
- 鈹? 鈹? 鈹溾攢鈹€ config.py # Centralized configuration (368 lines)
307
- 鈹? 鈹? 鈹溾攢鈹€ service_base.py # Suspend/resume-aware service helpers
308
- 鈹? 鈹? 鈹溾攢鈹€ system_diagnostics.py # System diagnostics (250 lines)
309
- 鈹? 鈹? 鈹斺攢鈹€ exceptions.py # Custom exception classes (68 lines)
310
- 鈹? 鈹? 鈹斺攢鈹€ util.py # Utility functions (28 lines)
311
- 鈹? 鈹?
312
- 鈹? 鈹溾攢鈹€ motion/ # Motion control modules
313
- 鈹? 鈹? 鈹溾攢鈹€ __init__.py # Module exports
314
- 鈹? 鈹? 鈹溾攢鈹€ antenna.py # Antenna freeze/unfreeze control
315
- 鈹? 鈹? 鈹溾攢鈹€ pose_composer.py # Pose composition utilities
316
- 鈹? 鈹? 鈹溾攢鈹€ command_runtime.py # Command queue handling / state transitions
317
- 鈹? 鈹? 鈹溾攢鈹€ control_runtime.py # Control-loop runtime helpers
318
- 鈹? 鈹? 鈹溾攢鈹€ idle_runtime.py # Idle behavior / idle rest handling
319
- 鈹? 鈹? 鈹溾攢鈹€ smoothing.py # Smoothing/transition algorithms
320
- 鈹? 鈹? 鈹溾攢鈹€ state_machine.py # State machine definitions
321
- 鈹? 鈹? 鈹溾攢鈹€ animation_player.py # Animation player
322
- 鈹? 鈹? 鈹溾攢鈹€ emotion_moves.py # Emotion moves
323
- 鈹? 鈹? 鈹溾攢鈹€ speech_sway.py # Speech-driven head micro-movements (338 lines)
324
- 鈹? 鈹? 鈹斺攢鈹€ reachy_motion.py # Reachy motion API
325
- 鈹? 鈹?
326
- 鈹? 鈹溾攢鈹€ vision/ # Vision processing modules
327
- 鈹? 鈹? 鈹溾攢鈹€ __init__.py # Module exports (30 lines)
328
- 鈹? 鈹? 鈹溾攢鈹€ frame_processor.py # Adaptive frame rate management (227 lines)
329
- 鈹? 鈹? 鈹溾攢鈹€ face_tracking_interpolator.py # Face lost interpolation (253 lines)
330
- 鈹? 鈹? 鈹溾攢鈹€ gesture_detector.py # HaGRID gesture detection
331
- 鈹? 鈹? 鈹溾攢鈹€ head_tracker.py # YOLO face detector
332
- 鈹? 鈹? 鈹溾攢鈹€ camera_runtime.py # Camera lifecycle helpers
333
- 鈹? 鈹? 鈹溾攢鈹€ camera_processing.py # Frame capture / AI processing helpers
334
- 鈹? 鈹? 鈹溾攢鈹€ camera_http.py # HTTP handlers for stream/snapshot
335
- 鈹? 鈹? 鈹斺攢鈹€ camera_server.py # MJPEG camera stream server facade
336
- 鈹? 鈹?
337
- 鈹? 鈹溾攢鈹€ audio/ # Audio runtime modules
338
- 鈹? 鈹? 鈹溾攢鈹€ __init__.py # Module exports (21 lines)
339
- 鈹? 鈹? 鈹溾攢鈹€ microphone.py # Hardware audio helper / legacy tuning code
340
- 鈹? 鈹? 鈹溾攢鈹€ doa_tracker.py # Direction of Arrival tracking
341
- 鈹? 鈹? 鈹溾攢鈹€ audio_player.py # AudioPlayer facade
342
- 鈹? 鈹? 鈹溾攢鈹€ audio_player_shared.py # Shared audio/sendspin constants + helpers
343
- 鈹? 鈹? 鈹溾攢鈹€ audio_player_playback.py # Playback orchestration / lifecycle
344
- 鈹? 鈹? 鈹溾攢鈹€ audio_player_local.py # Local file + fallback playback
345
- 鈹? 鈹? 鈹溾攢鈹€ audio_player_stream_pcm.py # PCM streaming playback
346
- 鈹? 鈹? 鈹溾攢鈹€ audio_player_stream_decoded.py # Decoded/GStreamer streaming playback
347
- 鈹? 鈹? 鈹斺攢鈹€ audio_player_sendspin.py # Sendspin runtime integration
348
- 鈹? 鈹?
349
- 鈹? 鈹溾攢鈹€ entities/ # Home Assistant entity modules
350
- 鈹? 鈹? 鈹溾攢鈹€ __init__.py # Module exports (38 lines)
351
- 鈹? 鈹? 鈹溾攢鈹€ entity.py # ESPHome base entity (402 lines)
352
- 鈹? 鈹? 鈹溾攢鈹€ entity_factory.py # Entity factory pattern (440 lines)
353
- 鈹? 鈹? 鈹溾攢鈹€ entity_keys.py # Entity key constants (155 lines)
354
- 鈹? 鈹? 鈹溾攢鈹€ entity_extensions.py # Extended entity types (258 lines)
355
- 鈹? 鈹? 鈹溾攢鈹€ entity_registry.py # ESPHome entity registry
356
- 鈹? 鈹? 鈹溾攢鈹€ runtime_entity_setup.py # Runtime/control entity wiring
357
- 鈹? 鈹? 鈹溾攢鈹€ sensor_entity_setup.py # Sensor/diagnostic entity wiring
358
- 鈹? 鈹? 鈹溾攢鈹€ event_emotion_mapper.py # HA event to emotion mapping
359
- 鈹? 鈹? 鈹斺攢鈹€ emotion_detector.py # Disabled runtime path for text emotion detection
360
- 鈹? 鈹?
361
- 鈹? 鈹溾攢鈹€ animations/ # Animation definitions
362
- 鈹? 鈹? 鈹斺攢鈹€ conversation_animations.json # Unified animations / gestures / HA events / keyword resources
363
- 鈹? 鈹?
364
- 鈹? 鈹斺攢鈹€ wakewords/ # Wake word models
365
- 鈹? 鈹溾攢鈹€ okay_nabu.json/.tflite
366
- 鈹? 鈹溾攢鈹€ hey_jarvis.json/.tflite (openWakeWord)
367
- 鈹? 鈹溾攢鈹€ alexa.json/.tflite
368
- 鈹? 鈹溾攢鈹€ hey_luna.json/.tflite
369
- 鈹? 鈹斺攢鈹€ stop.json/.tflite # Stop word detection
370
- 鈹?
371
- 鈹溾攢鈹€ sounds/ # Sound effect files (auto-download)
372
- 鈹? 鈹溾攢鈹€ wake_word_triggered.flac
373
- 鈹? 鈹斺攢鈹€ timer_finished.flac
374
- 鈹溾攢鈹€ pyproject.toml # Project configuration
375
- 鈹溾攢鈹€ README.md # Documentation
376
- 鈹溾攢鈹€ changelog.json # Version changelog
377
- 鈹斺攢鈹€ PROJECT_PLAN.md # Project plan
378
- ```
379
-
380
- ## Dependencies
381
-
382
- ```toml
383
- dependencies = [
384
- "reachy-mini>=1.7.0",
385
- "soundfile>=0.13.0",
386
- "numpy>=2.2.5,<=2.2.5",
387
- "opencv-python>=4.12.0.88",
388
- "pymicro-wakeword>=2.0.0,<3.0.0",
389
- "pyopen-wakeword>=1.0.0,<2.0.0",
390
- "aioesphomeapi>=43.10.1",
391
- "zeroconf>=0.131,<1",
392
- "websockets>=12,<16",
393
- "aiohttp",
394
- "scipy>=1.15.3,<2.0.0",
395
- "ultralytics",
396
- "supervision",
397
- "aiosendspin>=5.1,<6.0",
398
- "onnxruntime>=1.18.0",
399
- "torch==2.5.1",
400
- "torchvision==0.20.1",
401
- "pillow<12.0",
402
- "pydantic<=2.12.5",
403
- "requests>=2.33.0",
404
- "gstreamer-bundle==1.28.1; sys_platform != 'linux'",
405
- ]
406
- ```
407
-
408
- ## Usage Flow
409
-
410
- 1. **Install App**
411
- - Install `reachy_mini_ha_voice` from Reachy Mini App Store
412
-
413
- 2. **Start App**
414
- - App auto-starts ESPHome server (port 6053)
415
- - Auto-downloads required models and sounds
416
-
417
- 3. **Connect Home Assistant**
418
- - Home Assistant auto-discovers device (mDNS)
419
- - Or manually add: Settings 閳?Devices & Services 閳?Add Integration 閳?ESPHome
420
-
421
- 4. **Use Voice Assistant**
422
- - Say "Okay Nabu" to wake
423
- - Speak command
424
- - Reachy Mini provides motion feedback
425
-
426
- ## ESPHome Entity Planning
427
-
428
- Based on deep analysis of Reachy Mini SDK, the following entities are exposed to Home Assistant:
429
-
430
- ### Implemented Entities
431
-
432
- | Entity Type | Name | Description |
433
- |-------------|------|-------------|
434
- | Media Player | `media_player` | Audio playback control |
435
- | Voice Assistant | `voice_assistant` | Voice assistant pipeline |
436
-
437
- ### Implemented Control Entities (Read/Write)
438
-
439
- #### Phase 1-3: Basic Controls and Pose
440
-
441
- | ESPHome Entity Type | Name | SDK API | Range/Options | Description |
442
- |---------------------|------|---------|---------------|-------------|
443
- | `Number` | `speaker_volume` | `AudioPlayer.set_volume()` | 0-100 | Speaker volume |
444
- | `Switch` | `idle_behavior_enabled` | `set_idle_behavior_enabled()` | off=parked/on=idle runtime enabled | Unified idle behavior toggle |
445
- | `Number` | `head_x` | `goto_target(head=...)` | 卤50mm | Head X position control |
446
- | `Number` | `head_y` | `goto_target(head=...)` | 卤50mm | Head Y position control |
447
- | `Number` | `head_z` | `goto_target(head=...)` | 卤50mm | Head Z position control |
448
- | `Number` | `head_roll` | `goto_target(head=...)` | -40掳 ~ +40掳 | Head roll angle control |
449
- | `Number` | `head_pitch` | `goto_target(head=...)` | -40掳 ~ +40掳 | Head pitch angle control |
450
- | `Number` | `head_yaw` | `goto_target(head=...)` | -180掳 ~ +180掳 | Head yaw angle control |
451
- | `Number` | `body_yaw` | `goto_target(body_yaw=...)` | -160掳 ~ +160掳 | Body yaw angle control |
452
- | `Number` | `antenna_left` | `goto_target(antennas=...)` | -90掳 ~ +90掳 | Left antenna angle control |
453
- | `Number` | `antenna_right` | `goto_target(antennas=...)` | -90掳 ~ +90掳 | Right antenna angle control |
454
-
455
- #### Phase 4: Gaze Control
456
-
457
- | ESPHome Entity Type | Name | SDK API | Range/Options | Description |
458
- |---------------------|------|---------|---------------|-------------|
459
- | `Number` | `look_at_x` | `look_at_world(x, y, z)` | World coordinates | Gaze point X coordinate |
460
- | `Number` | `look_at_y` | `look_at_world(x, y, z)` | World coordinates | Gaze point Y coordinate |
461
- | `Number` | `look_at_z` | `look_at_world(x, y, z)` | World coordinates | Gaze point Z coordinate |
462
-
463
-
464
- ### Implemented Sensor Entities (Read-only)
465
-
466
- #### Phase 1 & 5: Basic Status and Audio Sensors
467
-
468
- | ESPHome Entity Type | Name | SDK API | Description |
469
- |---------------------|------|---------|-------------|
470
- | `Text Sensor` | `daemon_state` | `DaemonStatus.state` | Daemon status |
471
- | `Binary Sensor` | `backend_ready` | `backend_status.ready` | Backend ready status |
472
- | `Text Sensor` | `error_message` | `DaemonStatus.error` | Current error message |
473
- | `Sensor` | `doa_angle` | `DoAInfo.angle` | Sound source direction angle (鎺? |
474
- | `Binary Sensor` | `speech_detected` | `DoAInfo.speech_detected` | Speech detection status |
475
-
476
- #### Phase 6: Diagnostic Information
477
-
478
- | ESPHome Entity Type | Name | SDK API | Description |
479
- |---------------------|------|---------|-------------|
480
- | `Sensor` | `control_loop_frequency` | `control_loop_stats` | Control loop frequency (Hz) |
481
- | `Text Sensor` | `sdk_version` | `DaemonStatus.version` | SDK version |
482
- | `Text Sensor` | `robot_name` | `DaemonStatus.robot_name` | Robot name |
483
- | `Binary Sensor` | `wireless_version` | `DaemonStatus.wireless_version` | Wireless version flag |
484
- | `Binary Sensor` | `simulation_mode` | `DaemonStatus.simulation_enabled` | Simulation mode flag |
485
- | `Text Sensor` | `wlan_ip` | `DaemonStatus.wlan_ip` | Wireless IP address |
486
-
487
- #### Phase 7: IMU Sensors (Wireless version only)
488
-
489
- | ESPHome Entity Type | Name | SDK API | Description |
490
- |---------------------|------|---------|-------------|
491
- | `Sensor` | `imu_accel_x` | `mini.imu["accelerometer"][0]` | X-axis acceleration (m/s铏? |
492
- | `Sensor` | `imu_accel_y` | `mini.imu["accelerometer"][1]` | Y-axis acceleration (m/s铏? |
493
- | `Sensor` | `imu_accel_z` | `mini.imu["accelerometer"][2]` | Z-axis acceleration (m/s铏? |
494
- | `Sensor` | `imu_gyro_x` | `mini.imu["gyroscope"][0]` | X-axis angular velocity (rad/s) |
495
- | `Sensor` | `imu_gyro_y` | `mini.imu["gyroscope"][1]` | Y-axis angular velocity (rad/s) |
496
- | `Sensor` | `imu_gyro_z` | `mini.imu["gyroscope"][2]` | Z-axis angular velocity (rad/s) |
497
- | `Sensor` | `imu_temperature` | `mini.imu["temperature"]` | IMU temperature (鎺矯) |
498
-
499
- #### Current Runtime Control and Sensor Entities
500
-
501
- | Phase | ESPHome Entity Type | Name | Description |
502
- |------|---------------------|------|-------------|
503
- | 1 | `Switch` | `mute` | Suspend/resume the voice pipeline |
504
- | 1 | `Switch` | `camera_disabled` | Disable/enable camera runtime |
505
- | 1 | `Switch` | `idle_behavior_enabled` | Unified idle motion / antenna / micro-actions toggle |
506
- | 1 | `Switch` | `sendspin_enabled` | Enable/disable Sendspin playback integration |
507
- | 1 | `Switch` | `face_tracking_enabled` | Enable/disable face tracking models |
508
- | 1 | `Switch` | `gesture_detection_enabled` | Enable/disable gesture detection models |
509
- | 1 | `Number` | `face_confidence_threshold` | Face tracking confidence threshold (0-1) |
510
- | 2 | `Binary Sensor` | `services_suspended` | Runtime suspension state |
511
- | 8 | `Select` | `emotion` | Manual emotion trigger |
512
- | 10 | `Camera` | `camera` | ESPHome camera entity / live preview |
513
- | 21 | `Switch` | `continuous_conversation` | Multi-turn conversation mode |
514
- | 22 | `Text Sensor` | `gesture_detected` | Current detected gesture |
515
- | 22 | `Sensor` | `gesture_confidence` | Current gesture confidence |
516
- | 23 | `Binary Sensor` | `face_detected` | Face currently visible |
517
-
518
- > **Note**: Head position (x/y/z) and angles (roll/pitch/yaw), body yaw, antenna angles are all **controllable** entities,
519
- > using `Number` type for bidirectional control. Call `goto_target()` when setting new values, call `get_current_head_pose()` etc. when reading current values.
520
-
521
- ### Implementation Priority
522
-
523
- 1. **Phase 1 - Basic Status and Volume** (High Priority) 閴?**Completed**
524
- - [x] `daemon_state` - Daemon status sensor
525
- - [x] `backend_ready` - Backend ready status
526
- - [x] `error_message` - Error message
527
- - [x] `speaker_volume` - Speaker volume control
528
-
529
- 2. **Phase 2 - Runtime State** (High Priority) 鉁?**Completed**
530
- - [x] `services_suspended` - Service suspension state sensor
531
- - [x] App-managed sleep/wake entities removed from the current runtime
532
-
533
- 3. **Phase 3 - Pose Control** (Medium Priority) 閴?**Completed**
534
- - [x] `head_x/y/z` - Head position control
535
- - [x] `head_roll/pitch/yaw` - Head angle control
536
- - [x] `body_yaw` - Body yaw angle control
537
- - [x] `antenna_left/right` - Antenna angle control
538
-
539
- 4. **Phase 4 - Gaze Control** (Medium Priority) 閴?**Completed**
540
- - [x] `look_at_x/y/z` - Gaze point coordinate control
541
-
542
- 5. **Phase 5 - DOA (Direction of Arrival)** 閴?**Re-added for wakeup turn-to-sound**
543
- - [x] `doa_angle` - Sound source direction (degrees, 0-180鎺? where 0鎺?left, 90鎺?front, 180鎺?right)
544
- - [x] `speech_detected` - Speech detection status
545
- - [x] Turn-to-sound at wakeup (robot turns toward speaker when wake word detected)
546
- - [x] Direction correction: `yaw = 锜?2 - doa` (fixed left/right inversion)
547
- - Note: DOA only read once at wakeup to avoid daemon pressure; face tracking takes over after
548
-
549
- 6. **Phase 6 - Diagnostic Information** (Low Priority) 閴?**Completed**
550
- - [x] `control_loop_frequency` - Control loop frequency
551
- - [x] `sdk_version` - SDK version
552
- - [x] `robot_name` - Robot name
553
- - [x] `wireless_version` - Wireless version flag
554
- - [x] `simulation_mode` - Simulation mode flag
555
- - [x] `wlan_ip` - Wireless IP address
556
-
557
- 7. **Phase 7 - IMU Sensors** (Optional, wireless version only) 閴?**Completed**
558
- - [x] `imu_accel_x/y/z` - Accelerometer
559
- - [x] `imu_gyro_x/y/z` - Gyroscope
560
- - [x] `imu_temperature` - IMU temperature
561
-
562
- 8. **Phase 8 - Emotion Control** 閴?**Completed**
563
- - [x] `emotion` - Emotion selector (Happy/Sad/Angry/Fear/Surprise/Disgust)
564
-
565
- 9. **Phase 10 - Camera Integration** 閴?**Completed**
566
- - [x] `camera` - ESPHome Camera entity (live preview)
567
-
568
- 10. **Phase 11 - LED Control** 閴?**Disabled (LEDs hidden inside robot)**
569
- - [ ] `led_brightness` - LED brightness (0-100%) - Commented out
570
- - [ ] `led_effect` - LED effect (off/solid/breathing/rainbow/doa) - Commented out
571
- - [ ] `led_color_r/g/b` - LED RGB color (0-255) - Commented out
572
-
573
- 11. **Phase 13 - Sendspin Audio Playback Support** 閴?**Completed**
574
- - [x] `sendspin_enabled` - Sendspin switch (Switch)
575
- - [x] AudioPlayer integrates aiosendspin library
576
- - [x] Local music/sendspin path coexists with voice playback and is auto-paused during conversation
577
-
578
- 12. **Phase 21 - Continuous Conversation** 閴?**Completed**
579
- - [x] `continuous_conversation` - Conversation continuation switch
580
-
581
- 13. **Phase 22 - Gesture Detection** 鉁?**Completed (current runtime behavior)**
582
- - [x] `gesture_detected` - Detected gesture name (Text Sensor)
583
- - [x] `gesture_confidence` - Gesture detection confidence % (Sensor)
584
- - [x] HaGRID ONNX models: hand_detector.onnx + crops_classifier.onnx
585
- - [x] Real-time state push to Home Assistant
586
- - [x] Runtime gesture result publishing only (no gesture-driven robot actions)
587
- - [x] Runtime toggle supported (default OFF, model unload on disable)
588
- - [x] Batch detection: returns all detected hands (not just highest confidence)
589
- - [x] Minimum processing cadence preserved for responsiveness
590
- - [x] No conflicts with face tracking (shared frame, independent processing)
591
- - [x] SDK integration: MediaBackend detection, proper resource cleanup on shutdown
592
- - [x] 18 supported gestures:
593
- | Gesture | Emoji | Gesture | Emoji |
594
- |---------|-------|---------|-------|
595
- | call | 棣冾樉 | like | 棣冩啢 |
596
- | dislike | 棣冩啣 | mute | 棣冦亱 |
597
- | fist | 閴?| ok | 棣冩啠 |
598
- | four | 棣冩瀾閿?| one | 閳芥繐绗?|
599
- | palm | 閴?| peace | 閴佸矉绗?|
600
- | peace_inverted | 棣冩暰閴佸矉绗?| rock | 棣冾樈 |
601
- | stop | 棣冩磧 | stop_inverted | 棣冩暰棣冩磧 |
602
- | three | 3閿斿繆鍎?| three2 | 棣冾檮 |
603
- | two_up | 閴佸矉绗嶉埥婵撶瑣 | two_up_inverted | 棣冩暰閴佸矉绗嶉埥婵撶瑣 |
604
-
605
- 14. **Phase 23 - Face Detection** 閴?**Completed**
606
- - [x] `face_detected` - Face visibility sensor
607
-
608
- 15. **Phase 24 - System Diagnostics** 閴?**Completed**
609
- - [x] `sys_cpu_percent` - CPU usage percentage (Sensor, diagnostic)
610
- - [x] `sys_cpu_temperature` - CPU temperature in Celsius (Sensor, diagnostic)
611
- - [x] `sys_memory_percent` - Memory usage percentage (Sensor, diagnostic)
612
- - [x] `sys_memory_used` - Used memory in GB (Sensor, diagnostic)
613
- - [x] `sys_disk_percent` - Disk usage percentage (Sensor, diagnostic)
614
- - [x] `sys_disk_free` - Free disk space in GB (Sensor, diagnostic)
615
- - [x] `sys_uptime` - System uptime in hours (Sensor, diagnostic)
616
- - [x] `sys_process_cpu` - This process CPU usage (Sensor, diagnostic)
617
- - [x] `sys_process_memory` - This process memory in MB (Sensor, diagnostic)
618
-
619
- ---
620
-
621
- ## 棣冨竴 Current Runtime Entity Coverage
622
-
623
- **Total Completed: See runtime registry (count evolves with releases)**
624
- - Phase 1: 10 entities (status, zero-config runtime switches, volume)
625
- - Phase 2: runtime state entities only (`services_suspended`; sleep entities removed)
626
- - Phase 3: 9 entities (Pose control)
627
- - Phase 4: 3 entities (Gaze control)
628
- - Phase 5: 3 entities (DOA sensors and tracking switch)
629
- - Phase 6: 7 entities (Diagnostic information)
630
- - Phase 7: 7 entities (IMU sensors)
631
- - Phase 8: 1 entity (Emotion control)
632
- - Phase 10: 1 entity (Camera)
633
- - Phase 11: 0 entities (LED control - Disabled)
634
- - Phase 13: 1 entity (Sendspin toggle)
635
- - Phase 21: 1 entity (Continuous conversation)
636
- - Phase 22: 2 entities (Gesture detection)
637
- - Phase 23: 1 entity (Face detection)
638
- - Phase 24: 9 entities (System diagnostics)
639
-
640
-
641
- ---
642
-
643
- ## 棣冩畬 Voice Assistant Enhancement Features Implementation Status
644
-
645
- ### Phase 14 - Emotion and Motion Feedback 閴?
646
- **Current Status**: Manual emotion playback and non-blocking motion feedback are implemented. Automatic keyword-based emotion triggering is currently disabled in the runtime.
647
-
648
- **Implemented Features**:
649
- - 閴?Phase 8 Emotion Selector entity (`emotion`)
650
- - 閴?`_play_emotion()` queues emotion moves through `MovementManager`
651
- - 閴?Wake/listen/think/speak/idle motion transitions are non-blocking
652
- - 閴?Timer-finished motion feedback is implemented
653
- - 閴?Gesture detection publishes recognized gesture label and confidence to Home Assistant entities
654
- - 閴?Voice phases and HA state reactions share one built-in behavior dispatcher
655
-
656
- **Current Behavior**:
657
-
658
- | Voice Assistant Event | Actual Action | Implementation Status |
659
- |----------------------|---------------|----------------------|
660
- | Wake word detected | Turn toward sound source + listening pose | 閴?Implemented |
661
- | Listening | Attentive listening state | 閴?Implemented |
662
- | Thinking | Thinking state animation | 閴?Implemented |
663
- | Speaking | Speech-reactive motion | 閴?Implemented |
664
- | Timer completed | Alert shake motion | 閴?Implemented |
665
- | Manual emotion trigger | Play via ESPHome `emotion` entity | 閴?Implemented |
666
-
667
- **Deliberately Not Active In Runtime**:
668
- - Automatic emotion keyword detection from assistant text
669
- - Blocking full-action choreography during conversation
670
- - Dance/personalization layers that require user configuration
671
-
672
- **Manual Emotion Trigger Example**:
673
- ```yaml
674
- # Home Assistant automation example - Manual emotion trigger
675
- automation:
676
- - alias: "Reachy Good Morning Greeting"
677
- trigger:
678
- - platform: time
679
- at: "07:00:00"
680
- action:
681
- - service: select.select_option
682
- target:
683
- entity_id: select.reachy_mini_emotion
684
- data:
685
- option: "Happy"
686
- ```
687
-
688
- ### Phase 15 - Face Tracking (Complements DOA Turn-to-Sound) 閴?**Completed**
689
-
690
- **Goal**: Implement natural face tracking so robot looks at speaker during conversation.
691
-
692
- **Design Decision**:
693
- - 閴?DOA (Direction of Arrival): Used once at wakeup to turn toward sound source
694
- - 閴?YOLO face detection: Takes over after initial turn for continuous tracking
695
- - 閴?Body follows head rotation: Body yaw automatically syncs with head yaw for natural tracking
696
- - Reason: DOA provides quick initial orientation, face tracking provides accurate continuous tracking, body following enables natural whole-body tracking similar to human behavior
697
-
698
- **Wakeup Turn-to-Sound Flow**:
699
- 1. Wake word detected 閳?Read DOA angle once (avoid daemon pressure)
700
- 2. If DOA angle > 10鎺? Turn head toward sound source (80% of angle, conservative)
701
- 3. Face tracking takes over for continuous tracking during conversation
702
-
703
- **Implemented Features**:
704
-
705
- | Feature | Description | Implementation Location | Status |
706
- |---------|-------------|------------------------|--------|
707
- | DOA turn-to-sound | Turn toward speaker at wakeup | `protocol/satellite.py:_turn_to_sound_source()` | 閴?Implemented |
708
- | YOLO face detection | Uses `AdamCodd/YOLOv11n-face-detection` model | `vision/head_tracker.py` | 閴?Implemented |
709
- | Adaptive frame rate tracking | 15fps during conversation, 2fps when idle without face | `camera_server.py` | 閴?Implemented |
710
- | look_at_image() | Calculate target pose from face position | `camera_server.py` | 閴?Implemented |
711
- | Smooth return to neutral | Smooth return within 1 second after face lost | `camera_server.py` | 閴?Implemented |
712
- | face_tracking_offsets | As secondary pose overlay to motion control | `movement_manager.py` | 閴?Implemented |
713
- | Body follows head rotation | Body yaw syncs with head yaw extracted from final pose matrix | `motion/movement_manager.py:_compose_final_pose()` | 閴?Implemented (v0.8.3) |
714
- | DOA entities | `doa_angle` and `speech_detected` exposed to Home Assistant | `entity_registry.py` | 閴?Implemented |
715
- | face_detected entity | Binary sensor for face detection state | `entity_registry.py` | 閴?Implemented |
716
- | Model download retry | 3 retries, 5 second interval | `head_tracker.py` | 閴?Implemented |
717
- | Conversation mode integration | Auto-switch tracking frequency on voice assistant state change | `satellite.py` | 閴?Implemented |
718
-
719
- **Resource Optimization (v0.5.1, updated v0.6.2)**:
720
- - During conversation (listening/thinking/speaking): High-frequency tracking 15fps
721
- - Idle with face detected: High-frequency tracking 15fps
722
- - Idle without face for 5s: Low-power mode 2fps
723
- - Idle without face for 30s: Ultra-low power mode 0.5fps (every 2 seconds)
724
- - Gesture detection is switch-controlled and can run independently of face tracking
725
- - Immediately restore high-frequency tracking when face detected
726
-
727
- **Code Locations**:
728
- - `protocol/satellite.py:_turn_to_sound_source()` - DOA turn-to-sound at wakeup
729
- - `vision/head_tracker.py` - YOLO face detector (`HeadTracker` class)
730
- - `vision/camera_server.py:_capture_frames()` - Adaptive frame rate face tracking
731
- - `vision/camera_server.py:set_conversation_mode()` - Conversation mode switch API
732
- - `protocol/satellite.py:_set_conversation_mode()` - Voice assistant state integration
733
- - `motion/movement_manager.py:set_face_tracking_offsets()` - Face tracking offset API
734
- - `motion/movement_manager.py:_compose_final_pose()` - Body yaw follows head yaw (v0.8.3)
735
-
736
- **Technical Details**:
737
- ```python
738
- # vision/camera_server.py - Adaptive frame rate face tracking
739
- class MJPEGCameraServer:
740
- def __init__(self):
741
- self._fps_high = 15 # During conversation/face detected
742
- self._fps_low = 2 # Idle without face (5-30s)
743
- self._fps_idle = 0.5 # Ultra-low power (>30s without face)
744
- self._low_power_threshold = 5.0 # 5s without face switches to low power
745
- self._idle_threshold = 30.0 # 30s without face switches to idle mode
746
-
747
- def _should_run_ai_inference(self, current_time):
748
- # Conversation mode: Always high-frequency tracking
749
- if self._in_conversation:
750
- return True
751
- # High-frequency mode: Track every frame
752
- if self._current_fps == self._fps_high:
753
- return True
754
- # Low/idle power mode: Periodic detection
755
- return time.since_last_check >= 1/self._current_fps
756
-
757
- # protocol/satellite.py - Voice assistant state integration
758
- def _reachy_on_listening(self):
759
- self._set_conversation_mode(True) # Start conversation, high-frequency tracking
760
-
761
- def _reachy_on_idle(self):
762
- self._set_conversation_mode(False) # End conversation, adaptive tracking
763
-
764
- # motion/movement_manager.py - Body follows head rotation (v0.8.3)
765
- # This enables natural body rotation when tracking faces, similar to how
766
- # the reference project's sweep_look tool synchronizes body_yaw with head_yaw.
767
- def _compose_final_pose(self) -> Tuple[np.ndarray, Tuple[float, float], float]:
768
- # ... compose head pose from all motion sources ...
769
-
770
- # Extract yaw from final head pose rotation matrix
771
- # The rotation matrix uses xyz euler convention
772
- final_rotation = R.from_matrix(final_head[:3, :3])
773
- _, _, final_head_yaw = final_rotation.as_euler('xyz')
774
-
775
- # Body follows head yaw directly
776
- # SDK's automatic_body_yaw (inverse_kinematics_safe) only handles collision
777
- # prevention by clamping relative angle to max 65鎺? not active following
778
- body_yaw = final_head_yaw
779
-
780
- return final_head, (antenna_right, antenna_left), body_yaw
781
- ```
782
-
783
- **Body Following Head Rotation (v0.8.3)**:
784
- - SDK's `automatic_body_yaw` is only **collision protection**, not active body following
785
- - The `inverse_kinematics_safe` function with `max_relative_yaw=65鎺砢 only prevents head-body collision
786
- - To enable natural body following, `body_yaw` must be explicitly set to match `head_yaw`
787
- - Body yaw is extracted from final head pose matrix using scipy's `R.from_matrix().as_euler('xyz')`
788
- - This matches the reference project's `sweep_look.py` behavior where `target_body_yaw = head_yaw`
789
-
790
-
791
- ### Phase 16 - Cartoon Style Motion Mode (Partial) 棣冪厸
792
-
793
- **Goal**: Use SDK interpolation techniques for more expressive robot movements.
794
-
795
- **SDK Support**: `InterpolationTechnique` enum
796
- - `LINEAR` - Linear, mechanical feel
797
- - `MIN_JERK` - Minimum jerk, natural and smooth (default)
798
- - `EASE_IN_OUT` - Ease in-out, elegant
799
- - `CARTOON` - Cartoon style, with bounce effect, lively and cute
800
-
801
- **Implemented Features**:
802
- - 閴?50Hz unified control loop (`motion/movement_manager.py`) - Current stable frequency
803
- - 閴?JSON-driven animation system (`AnimationPlayer`) - Inspired by SimpleDances project
804
- - 閴?Conversation state animations (idle/listening/thinking/speaking)
805
- - 閴?Pose change detection - Only send commands on significant changes (threshold 0.005)
806
- - 閴?State query caching - 2s TTL, reduces daemon load
807
- - 閴?Smooth interpolation (ease in-out curve)
808
- - 閴?Command queue mode - Thread-safe external API
809
- - 閴?Error throttling - Prevents log explosion
810
- - 閴?Connection health monitoring - Auto-detect and recover from connection loss
811
-
812
- **Animation System (v0.5.13)**:
813
- - `AnimationPlayer` class loads animations from `conversation_animations.json`
814
- - Each animation defines: pitch/yaw/roll amplitudes, position offsets, antenna movements, frequency
815
- - Smooth transitions between animations (configurable duration)
816
- - State-to-animation mapping: idle閳姕dle, listening閳姡istening, thinking閳姲hinking, speaking閳姱peaking
817
-
818
- **Not Implemented**:
819
- - 閴?Dynamic interpolation technique switching (CARTOON/EASE_IN_OUT etc.)
820
- - 閴?Exaggerated cartoon bounce effects
821
-
822
- **Code Locations**:
823
- - `motion/animation_player.py` - AnimationPlayer class
824
- - `animations/conversation_animations.json` - Animation definitions
825
- - `motion/movement_manager.py` - 50Hz control loop with animation integration
826
-
827
- **Scene Implementation Status**:
828
-
829
- | Scene | Recommended Interpolation | Effect | Status |
830
- |-------|--------------------------|--------|--------|
831
- | Wake nod | `CARTOON` | Lively bounce effect | 閴?Not implemented |
832
- | Thinking head up | `EASE_IN_OUT` | Elegant transition | 閴?Implemented (smooth interpolation) |
833
- | Speaking micro-movements | `MIN_JERK` | Natural and fluid | 閴?Implemented (SpeechSway) |
834
- | Error head shake | `CARTOON` | Exaggerated denial | 閴?Not implemented |
835
- | Return to neutral | `MIN_JERK` | Smooth return | 閴?Implemented |
836
- | Idle breathing | - | Subtle sense of life | 閴?Implemented (BreathingAnimation) |
837
-
838
- ### Phase 17 - Antenna Sync Animation During Speech (Completed) 閴?
839
- **Goal**: Antennas sway with audio rhythm during TTS playback, simulating "speaking" effect.
840
-
841
- **Implemented Features**:
842
- - 閴?JSON-driven animation system with antenna movements
843
- - 閴?Different antenna patterns: "both" (sync), "wiggle" (opposite phase)
844
- - 閴?State-specific antenna animations (listening/thinking/speaking)
845
- - 閴?Smooth transitions between animation states
846
- - 閴?v1.0.0 idle refinement: idle antenna sway disabled while conversation-state antenna behaviors are retained
847
- - 閴?v1.0.0 hardware refinement: antenna torque disabled in `IDLE` to reduce idle chatter/noise
848
-
849
- **Code Locations**:
850
- - `motion/animation_player.py` - AnimationPlayer with antenna offset calculation
851
- - `animations/conversation_animations.json` - Antenna amplitude and pattern definitions
852
- - `motion/movement_manager.py` - Antenna offset composition in final pose
853
-
854
- ### Phase 18 - Visual Gaze Interaction (Single-face only) 閴?
855
- **Goal**: Use camera to detect faces for eye contact.
856
-
857
- **SDK Support**:
858
- - `look_at_image(u, v)` - Look at point in image
859
- - `look_at_world(x, y, z)` - Look at world coordinate point
860
- - `media.get_frame()` - Get camera frame (閴?Already implemented in `vision/camera_server.py:146`)
861
-
862
- **Current Status**:
863
-
864
- | Feature | Description | Status |
865
- |---------|-------------|--------|
866
- | Face detection | YOLO-based face detection (`AdamCodd/YOLOv11n-face-detection`) | 閴?Implemented |
867
- | Eye tracking | Robot tracks detected face during conversation/active mode | 閴?Implemented |
868
- | Idle scanning | Random look-around in idle cycles (switch-controlled) | 閴?Implemented |
869
-
870
- > Scope note: Current implementation is intentionally single-face tracking for stability and device performance.
871
-
872
- ### Phase 19 - Gravity Compensation Interactive Mode (Historical / Not Current Target)
873
-
874
- This was an exploration direction for manual teaching workflows.
875
-
876
- **Current Runtime Position**:
877
- - The zero-config runtime does not depend on a teaching flow
878
- - No user-facing teaching interaction is exposed as a core feature
879
- - If gravity-compensation support is revisited, it should remain optional and not become a required setup path
880
-
881
- ### Phase 20 - Environment Awareness Response (Partial) 棣冪厸
882
-
883
- **Goal**: Use IMU sensors to sense environment changes and respond.
884
-
885
- **SDK Support**:
886
- - 閴?`mini.imu["accelerometer"]` - Accelerometer (Phase 7 implemented as entity)
887
- - 閴?`mini.imu["gyroscope"]` - Gyroscope (Phase 7 implemented as entity)
888
-
889
- **Implemented Features**:
890
-
891
- | Feature | Description | Status |
892
- |---------|-------------|--------|
893
- | Continuous conversation | Controlled via Home Assistant switch | 閴?Implemented |
894
- | IMU sensor entities | Accelerometer and gyroscope exposed to HA | 閴?Implemented |
895
-
896
- > **Note**: Tap-to-wake feature was removed in v0.5.16 due to false triggers from robot movement. Continuous conversation is now controlled via Home Assistant switch.
897
-
898
- **Not Implemented**:
899
-
900
- | Detection Event | Response Action | Status |
901
- |-----------------|-----------------|--------|
902
- | Being shaken | Play dizzy action + voice "Don't shake me~" | 閴?Not implemented |
903
- | Tilted/fallen | Play help action + voice "I fell, help me" | 閴?Not implemented |
904
- | Long idle | Enter sleep animation | 閴?Not implemented |
905
-
906
- ### Phase 21 - Home Assistant Orchestration Scope
907
-
908
- The current runtime already exposes the main zero-config controls needed by Home Assistant:
909
-
910
- - `services_suspended`
911
- - `idle_behavior_enabled`
912
- - `continuous_conversation`
913
- - `emotion`
914
- - gesture / face / diagnostic sensors
915
-
916
- More elaborate scene orchestration remains intentionally outside the core runtime scope unless it can be delivered without introducing user configuration burden.
917
-
918
-
919
- ---
920
-
921
- ## 棣冩惓 Feature Implementation Summary
922
-
923
- ### 閴?Completed Features
924
-
925
- #### Core Voice Assistant (Phase 1-12)
926
- - **ESPHome entities** - Core phases implemented (Phase 11 LED intentionally disabled); exact count evolves by release
927
- - **Basic voice interaction** - Wake word detection (microWakeWord/openWakeWord), STT/TTS integration
928
- - **Motion feedback** - Nod, shake, gaze and other basic actions
929
- - **Audio path** - local wake word / stop word detection plus HA-managed STT/TTS
930
- - **Camera stream** - MJPEG live preview with ESPHome Camera entity
931
-
932
- #### Extended Features (Phase 13-22)
933
- - **Phase 13** 閴?- Sendspin multi-room audio support
934
- - **Phase 14** 閴?- Manual emotion playback + non-blocking motion feedback
935
- - **Phase 15** 閴?- Face tracking with body following (DOA + YOLO + body_yaw sync)
936
- - **Phase 16** 閴?- JSON-driven animation system (50Hz control loop)
937
- - **Phase 17** 閴?- Antenna sync animation during speech
938
- - **Phase 22** 閴?- Gesture detection (HaGRID ONNX, 18 gestures)
939
-
940
- ### 棣冪厸 Partially Implemented Features
941
-
942
- - **Phase 20** - IMU sensor entities are exposed; higher-level trigger logic is intentionally minimal
943
-
944
- ### 閴?Not Implemented Features
945
-
946
- - Zero-config scene orchestration beyond the provided runtime switches and blueprint defaults
947
-
948
- ---
949
-
950
- ## Feature Priority Summary (Updated v1.0.6)
951
-
952
- ### Completed 鉁?
953
- - 鉁?**Phase 1-12**: Core ESPHome entities and voice assistant
954
- - 鉁?**Phase 13**: Sendspin audio playback
955
- - 鉁?**Phase 14**: Emotion playback and motion feedback
956
- - 鉁?**Phase 15**: Face tracking with body following
957
- - 鉁?**Phase 16**: JSON-driven animation system
958
- - 鉁?**Phase 17**: Antenna sync animation + v1.0.0 idle antenna behavior refinements
959
- - 鉁?**Phase 21**: Continuous conversation switch
960
- - 鉁?**Phase 22**: Gesture detection
961
- - 鉁?**Phase 23**: Face detection sensor
962
- - 鉁?**Phase 24**: System diagnostics entities
963
-
964
- ### Partial 棣冪厸
965
- - 棣冪厸 **Phase 20**: Environment awareness (IMU entities done, triggers pending)
966
-
967
- ### Not Implemented 閴?- 閴?Zero-config scene orchestration layer beyond current runtime behavior
968
-
969
- ---
970
-
971
- ## 棣冩惐 Completion Statistics
972
-
973
- | Phase | Status | Completion | Notes |
974
- |-------|--------|------------|-------|
975
- | Phase 1-12 | 閴?Complete | 100% | Core ESPHome entities implemented (Phase 11 LED intentionally disabled) |
976
- | Phase 13 | 閴?Complete | 100% | Sendspin audio playback support |
977
- | Phase 14 | 閴?Complete | 100% | Manual emotion playback and non-blocking motion feedback |
978
- | Phase 15 | 閴?Complete | 100% | Face tracking with DOA, YOLO detection, body follows head |
979
- | Phase 16 | 閴?Complete | 100% | JSON-driven animation system (50Hz control loop) |
980
- | Phase 17 | 閴?Complete | 100% | Antenna sync animation during speech |
981
- | Phase 18 | 閴?Complete | 100% | Single-face visual gaze interaction with idle scanning |
982
- | Phase 19 | Not a current runtime target | - | Historical planning item, not part of the zero-config runtime model |
983
- | Phase 20 | 馃煛 Partial | 30% | IMU sensors exposed, missing trigger logic |
984
- | Phase 21 | 鉁?Complete | 100% | Continuous conversation switch implemented |
985
- | Phase 22 | 鉁?Complete | 100% | Gesture detection with HaGRID ONNX models |
986
- | Phase 23 | 鉁?Complete | 100% | Face detection sensor exposed |
987
- | Phase 24 | 鉁?Complete | 100% | System diagnostics entities (9 sensors) |
988
- | **v0.9.5** | 鉁?Complete | 100% | Modular architecture refactoring |
989
- | **v1.0.0** | 鉁?Complete | 100% | Runtime toggles/persistence (Sendspin, face, gesture, confidence) + idle and gesture stability updates |
990
-
991
- **Overall Completion**: current zero-config runtime path is functionally complete; remaining gaps are optional orchestration ideas rather than missing core runtime features.
992
-
993
-
994
- ---
995
-
996
- ## 棣冩暋 Daemon Crash Fix (2025-01-05)
997
-
998
- ### Problem Description
999
- During long-term operation, `reachy_mini daemon` would crash, causing robot to become unresponsive.
1000
-
1001
- ### Root Cause
1002
- 1. **50Hz control loop** - Current stable frequency for motion control
1003
- 2. **Frequent state queries** - Every entity state read calls `get_status()`, `get_current_head_pose()` etc.
1004
- 3. **Missing change detection** - Even when pose hasn't changed, continues sending same commands
1005
- 4. **Zenoh message queue blocking** - Accumulated 150+ messages per second, daemon cannot process in time
1006
-
1007
- ### Fix Solution
1008
-
1009
- #### 1. Control loop frequency (motion/movement_manager.py)
1010
- ```python
1011
- # Evolution: 100Hz -> 20Hz -> 10Hz -> 50Hz (current)
1012
- # Current stable frequency for production use
1013
- CONTROL_LOOP_FREQUENCY_HZ = 50 # Current stable frequency
1014
- ```
1015
-
1016
- #### 2. Add pose change detection (movement_manager.py)
1017
- ```python
1018
- # Only send commands on significant pose changes
1019
- if self._last_sent_pose is not None:
1020
- max_diff = max(abs(pose[k] - self._last_sent_pose.get(k, 0.0)) for k in pose.keys())
1021
- if max_diff < 0.001: # Threshold: 0.001 rad or 0.001 m
1022
- return # Skip sending
1023
- ```
1024
-
1025
- #### 3. State query caching (reachy_controller.py)
1026
- ```python
1027
- # Cache daemon status query results
1028
- self._cache_ttl = 0.1 # 100ms TTL
1029
- self._last_status_query = 0.0
1030
-
1031
- def _get_cached_status(self):
1032
- now = time.time()
1033
- if now - self._last_status_query < self._cache_ttl:
1034
- return self._state_cache.get('status') # Use cache
1035
- # ... query and update cache
1036
- ```
1037
-
1038
- #### 4. Head pose query caching (reachy_controller.py)
1039
- ```python
1040
- # Cache get_current_head_pose() and get_current_joint_positions() results
1041
- def _get_cached_head_pose(self):
1042
- # Reuse cached results within 100ms
1043
- ```
1044
-
1045
- ### Fix Results
1046
-
1047
- | Metric | Before Fix | After Fix | Improvement |
1048
- |--------|------------|-----------|-------------|
1049
- | Control message frequency | ~100 msg/s | ~20 msg/s | 閳?80% |
1050
- | State query frequency | ~50 msg/s | ~5 msg/s | 閳?90% |
1051
- | Total Zenoh messages | ~150 msg/s | ~25 msg/s | 閳?83% |
1052
- | Daemon CPU load | Sustained high load | Normal load | Significantly reduced |
1053
- | Expected stability | Crash within hours | Stable for days | Major improvement |
1054
-
1055
- ### Related Files
1056
- - `DAEMON_CRASH_FIX_PLAN.md` - Detailed fix plan and test plan
1057
- - `movement_manager.py` - Control loop optimization
1058
- - `reachy_controller.py` - State query caching
1059
-
1060
- ### Future Optimization Suggestions
1061
- 1. 鈴?Dynamic frequency adjustment - 50Hz during motion, 5Hz when idle
1062
- 2. 鈴?Batch state queries - Get all states at once
1063
- 3. 鈴?Further runtime efficiency tuning after real usage profiling
1064
-
1065
- ---
1066
-
1067
- ## 棣冩暋 Daemon Crash Deep Fix (2026-01-07)
1068
-
1069
- > **Update (2026-01-30)**: Current implementation uses 50Hz control loop for stability and performance. The control loop frequency aligns with daemon backend processing capacity. The pose change threshold (0.005) and state cache TTL (2s) optimizations remain in place to reduce unnecessary Zenoh messages.
1070
-
1071
- ### Problem Description
1072
- During long-term operation, `reachy_mini daemon` still crashes, previous fix not thorough enough.
1073
-
1074
- ### Root Cause Analysis
1075
-
1076
- Through deep analysis of SDK source code:
1077
-
1078
- 1. **Each `set_target()` sends 3 Zenoh messages**
1079
- - `set_target_head_pose()` - 1 message
1080
- - `set_target_antenna_joint_positions()` - 1 message
1081
- - `set_target_body_yaw()` - 1 message
1082
-
1083
- 2. **Daemon control loop is 50Hz**
1084
- - See `reachy_mini/daemon/backend/robot/backend.py`: `control_loop_frequency = 50.0`
1085
- - If message send frequency exceeds 50Hz, daemon may not process in time
1086
-
1087
- 3. **Previous 20Hz control loop still too high**
1088
- - 20Hz 鑴?3 messages = 60 messages/second
1089
- - Already exceeds daemon's 50Hz processing capacity
1090
-
1091
- 4. **Pose change threshold too small (0.002)**
1092
- - Breathing animation, speech sway, face tracking continuously produce tiny changes
1093
- - Almost every loop triggers `set_target()`
1094
-
1095
- ### Fix Solution
1096
-
1097
- #### 1. Control loop frequency history (motion/movement_manager.py)
1098
- ```python
1099
- # Evolution: 100Hz -> 20Hz -> 10Hz -> 50Hz (current)
1100
- # Current stable frequency for production use
1101
- CONTROL_LOOP_FREQUENCY_HZ = 50 # Current (2026-01-30)
1102
- ```
1103
-
1104
- #### 2. Increase pose change threshold (movement_manager.py)
1105
- ```python
1106
- # Increased from 0.002 to 0.005
1107
- # 0.005 rad 閳?0.29 degrees, still smooth enough
1108
- self._pose_change_threshold = 0.005
1109
- ```
1110
-
1111
- #### 3. Reduce camera/face tracking frequency (camera_server.py)
1112
- ```python
1113
- # Reduced from 15fps to 10fps
1114
- fps: int = 10
1115
- ```
1116
-
1117
- #### 4. Increase state cache TTL (reachy_controller.py)
1118
- ```python
1119
- # Increased from 1 second to 2 seconds
1120
- self._cache_ttl = 2.0
1121
- ```
1122
-
1123
- ### Fix Results
1124
-
1125
- > **Note**: Current implementation uses 50Hz control loop as of 2026-01-30. The table below shows historical evolution.
1126
-
1127
- | Metric | Before (20Hz) | After (10Hz) | Current (50Hz) |
1128
- |--------|---------------|--------------|-----------------|
1129
- | Control loop frequency | 20 Hz | 10 Hz | 50 Hz (current) |
1130
- | Max Zenoh messages | 60 msg/s | 30 msg/s | ~50 msg/s (optimized) |
1131
- | Actual messages (with change detection) | ~40 msg/s | ~15 msg/s | ~30 msg/s |
1132
- | Face tracking frequency | 15 Hz | 10 Hz | Adaptive (2-15 Hz) |
1133
- | State cache TTL | 1 second | 2 seconds | 2 seconds |
1134
- | Expected stability | Crash within hours | Stable operation | Stable (daemon updated) |
1135
-
1136
- ### Key Finding
1137
-
1138
- Current implementation uses 50Hz control loop for stability and performance. The control loop frequency aligns with daemon backend processing capacity.
1139
-
1140
- ### Related Files
1141
- - `motion/movement_manager.py` - Control loop frequency and pose threshold
1142
- - `vision/camera_server.py` - Face tracking frequency
1143
- - `reachy_controller.py` - State cache TTL
1144
-
1145
-
1146
- ---
1147
-
1148
- ## 棣冩暋 Microphone Sensitivity Optimization (2026-01-07)
1149
-
1150
- > Historical background only. These notes describe earlier low-level microphone tuning experiments and should not be read as current Home Assistant entity capabilities.
1151
-
1152
- ### Problem
1153
- Low microphone sensitivity - Need to be very close for voice recognition.
1154
-
1155
- ### Solution
1156
- Comprehensive ReSpeaker XVF3800 microphone optimization:
1157
-
1158
- | Parameter | Default | Optimized | Notes |
1159
- |-----------|---------|-----------|-------|
1160
- | AGC | Off | On | Auto volume normalization |
1161
- | AGC max gain | ~15dB | 30dB | Better distant speech pickup |
1162
- | AGC target level | -25dB | -18dB | Stronger output signal |
1163
- | Microphone gain | 1.0x | 2.0x | Base gain doubled |
1164
- | Noise suppression | ~0.5 | 0.15 | Reduced speech mis-suppression |
1165
-
1166
- ### Result
1167
- Microphone sensitivity improved from ~30cm to ~2-3m effective range.
1168
-
1169
- ---
1170
-
1171
- ## 棣冩暋 v0.5.1 Bug Fixes (2026-01-08)
1172
-
1173
- ### Issue 1: Music Not Resuming After Voice Conversation
1174
-
1175
- **Fix**: Sendspin now connects to `music_player` instead of `tts_player`
1176
-
1177
- ### Issue 2: Audio Conflict During Voice Assistant Wakeup
1178
-
1179
- **Fix**: Added `pause_sendspin()` and `resume_sendspin()` methods to `audio/audio_player.py`
1180
-
1181
- ### Issue 3: Sendspin Sample Rate Optimization
1182
-
1183
- **Fix**: Prioritize 16kHz in Sendspin supported formats (hardware limitation)
1184
-
1185
- ---
1186
-
1187
- ## 棣冩暋 v0.5.15 Updates (2026-01-11)
1188
-
1189
- ### Feature 1: Audio Settings Persistence
1190
-
1191
- Historical note: older audio processing preferences were once persisted here. The current app no longer exposes AGC or noise suppression entities.
1192
-
1193
- ### Feature 2: Sendspin Discovery Refactoring
1194
-
1195
- Moved mDNS discovery to `zeroconf.py` for better separation of concerns.
1196
-
1197
-
1198
- ---
1199
-
1200
- ### SDK Data Structure Reference
1201
-
1202
- ```python
1203
- # Motor control mode
1204
- class MotorControlMode(str, Enum):
1205
- Enabled = "enabled" # Torque on, position control
1206
- Disabled = "disabled" # Torque off
1207
- GravityCompensation = "gravity_compensation" # Gravity compensation mode
1208
-
1209
- # Daemon state
1210
- class DaemonState(Enum):
1211
- NOT_INITIALIZED = "not_initialized"
1212
- STARTING = "starting"
1213
- RUNNING = "running"
1214
- STOPPING = "stopping"
1215
- STOPPED = "stopped"
1216
- ERROR = "error"
1217
-
1218
- # Full state
1219
- class FullState:
1220
- control_mode: MotorControlMode
1221
- head_pose: XYZRPYPose # x, y, z (m), roll, pitch, yaw (rad)
1222
- head_joints: list[float] # 7 joint angles
1223
- body_yaw: float
1224
- antennas_position: list[float] # [right, left]
1225
- doa: DoAInfo # angle (rad), speech_detected (bool)
1226
-
1227
- # IMU data (wireless version only)
1228
- imu_data = {
1229
- "accelerometer": [x, y, z], # m/s铏?
1230
- "gyroscope": [x, y, z], # rad/s
1231
- "quaternion": [w, x, y, z], # Attitude quaternion
1232
- "temperature": float # 鎺矯
1233
- }
1234
-
1235
- # Safety limits
1236
- HEAD_PITCH_ROLL_LIMIT = [-40鎺? +40鎺砞
1237
- HEAD_YAW_LIMIT = [-180鎺? +180鎺砞
1238
- BODY_YAW_LIMIT = [-160鎺? +160鎺砞
1239
- YAW_DELTA_MAX = 65鎺? # Max difference between head and body yaw
1240
- ```
1241
-
1242
- ### ESPHome Protocol Implementation Notes
1243
-
1244
- ESPHome protocol communicates with Home Assistant via protobuf messages. The runtime primarily uses switch/number/select/sensor/binary_sensor/text_sensor/camera entities; button-only wake/sleep flows are historical and no longer the main control model.
1245
-
1246
- ```python
1247
- from aioesphomeapi.api_pb2 import (
1248
- # Number entity (volume/angle/confidence control)
1249
- ListEntitiesNumberResponse,
1250
- NumberStateResponse,
1251
- NumberCommandRequest,
1252
-
1253
- # Select entity (emotion)
1254
- ListEntitiesSelectResponse,
1255
- SelectStateResponse,
1256
- SelectCommandRequest,
1257
-
1258
- # Switch entity (sleep/runtime toggles)
1259
- ListEntitiesSwitchResponse,
1260
- SwitchStateResponse,
1261
- SwitchCommandRequest,
1262
-
1263
- # Sensor entity (numeric sensors)
1264
- ListEntitiesSensorResponse,
1265
- SensorStateResponse,
1266
-
1267
- # Binary Sensor entity (boolean sensors)
1268
- ListEntitiesBinarySensorResponse,
1269
- BinarySensorStateResponse,
1270
-
1271
- # Text Sensor entity (text sensors)
1272
- ListEntitiesTextSensorResponse,
1273
- TextSensorStateResponse,
1274
- )
1275
- ```
1276
-
1277
- ## Reference Projects
1278
-
1279
- - [OHF-Voice/linux-voice-assistant](https://github.com/OHF-Voice/linux-voice-assistant)
1280
- - [pollen-robotics/reachy_mini](https://github.com/pollen-robotics/reachy_mini)
1281
- - [reachy_mini_conversation_app](https://github.com/pollen-robotics/reachy_mini_conversation_app)
1282
- - [sendspin-cli](https://github.com/Sendspin/sendspin-cli)
1283
- - [home-assistant-voice](https://github.com/esphome/home-assistant-voice-pe/blob/dev/home-assistant-voice.yaml)
1284
-
1285
- ---
1286
-
1287
- ## 棣冩暋 Code Refactoring & Improvement Plan (v0.9.5)
1288
-
1289
- > Comprehensive improvement plan based on code analysis
1290
- > Target Platform: Raspberry Pi CM4 (4GB RAM, 4-core CPU)
1291
-
1292
- ### Code Size Statistics (Updated 2026-01-19)
1293
-
1294
- | File | Original | Current | Status |
1295
- |------|----------|---------|--------|
1296
- | `movement_manager.py` | 1205 | 1260 | 閳跨媴绗?Modularized but still large |
1297
- | `voice_assistant.py` | 1097 | 1270 | 閴?Enhanced with new features |
1298
- | `satellite.py` | 1003 | 1022 | 閴?Optimized (-2%) |
1299
- | `camera_server.py` | 1070 | 1009 | 閴?Optimized (-6%) |
1300
- | `reachy_controller.py` | 878 | 961 | 閴?Enhanced |
1301
- | `entity_registry.py` | 1129 | 844 | 閴?Optimized (-25%) |
1302
- | `audio_player.py` | 599 | 679 | 閴?Acceptable |
1303
- | `core/service_base.py` | - | 552 | 棣冨晭 New module |
1304
- | `entities/entity_factory.py` | - | 440 | 棣冨晭 New module |
1305
-
1306
- > **Optimization Notes**:
1307
- > - `entity_registry.py`: Factory pattern refactoring reduced 285 lines
1308
- > - `camera_server.py`: Using `FaceTrackingInterpolator` module reduced 61 lines
1309
- > - `protocol/satellite.py`: Runtime paths are now centered on voice state handling and HA event reactions
1310
- > - New modular architecture with 6 sub-packages: `core/`, `motion/`, `vision/`, `audio/`, `entities/`, `protocol/`
1311
-
1312
- ### New Module List (Updated 2026-01-19)
1313
-
1314
- | Directory | Module | Lines | Description |
1315
- |-----------|--------|-------|-------------|
1316
- | `core/` | `config.py` | 454 | Centralized nested configuration |
1317
- | `core/` | `service_base.py` | 552 | Suspend/resume service helpers + RobustOperationMixin |
1318
- | `core/` | `system_diagnostics.py` | 250 | System diagnostics |
1319
- | `core/` | `exceptions.py` | 68 | Custom exception classes |
1320
- | `core/` | `util.py` | 28 | Utility functions |
1321
- | `motion/` | `antenna.py` | - | Antenna freeze/unfreeze control |
1322
- | `motion/` | `pose_composer.py` | - | Pose composition utilities |
1323
- | `motion/` | `command_runtime.py` | - | Command queue handling / state transitions |
1324
- | `motion/` | `control_runtime.py` | - | Control-loop runtime helpers |
1325
- | `motion/` | `idle_runtime.py` | - | Idle behavior / idle rest handling |
1326
- | `motion/` | `state_machine.py` | - | State machine definitions |
1327
- | `motion/` | `smoothing.py` | - | Smoothing/transition algorithms |
1328
- | `motion/` | `animation_player.py` | - | Animation player |
1329
- | `motion/` | `emotion_moves.py` | - | Emotion moves |
1330
- | `motion/` | `speech_sway.py` | 338 | Speech-driven head micro-movements |
1331
- | `motion/` | `reachy_motion.py` | - | Reachy motion API |
1332
- | `vision/` | `frame_processor.py` | 227 | Adaptive frame rate management |
1333
- | `vision/` | `face_tracking_interpolator.py` | 253 | Face lost interpolation |
1334
- | `vision/` | `gesture_smoother.py` | 80 | Historical gesture smoothing module; current runtime no longer depends on it |
1335
- | `vision/` | `gesture_detector.py` | 285 | HaGRID gesture detection |
1336
- | `vision/` | `head_tracker.py` | 367 | YOLO face detector |
1337
- | `vision/` | `camera_server.py` | 1009 | MJPEG camera stream server facade |
1338
- | `audio/` | `doa_tracker.py` | 206 | Direction of Arrival tracking |
1339
- | `audio/` | `microphone.py` | 219 | Hardware audio helper / legacy tuning code |
1340
- | `audio/` | `audio_player.py` | facade | AudioPlayer facade (split into playback/sendspin/local streaming modules) |
1341
- | `entities/` | `entity.py` | 402 | ESPHome base entity |
1342
- | `entities/` | `entity_factory.py` | 440 | Entity factory pattern |
1343
- | `entities/` | `entity_keys.py` | 155 | Entity key constants |
1344
- | `entities/` | `entity_extensions.py` | 258 | Extended entity types |
1345
- | `entities/` | `event_emotion_mapper.py` | 351 | HA event to emotion mapping |
1346
- | `protocol/` | `satellite.py` | 1022 | ESPHome protocol handler |
1347
- | `protocol/` | `api_server.py` | 172 | HTTP API server |
1348
- | `protocol/` | `zeroconf.py` | - | mDNS discovery |
1349
-
1350
- ### Improvement Plan Status
1351
-
1352
- #### Phase 1: Runtime Suspend/Resume Foundation 鉁?Complete
1353
-
1354
- - [x] Create `core/service_base.py` - runtime suspend/resume service helpers
1355
- - [x] All required services implement `suspend()` / `resume()` methods where needed
1356
- - [x] Historical app-managed sleep/wake flow was later removed to align with the current SDK
1357
-
1358
- #### Phase 2: Code Modularization 閴?Complete
1359
-
1360
- - [x] Create new directory structure (`core/`, `motion/`, `audio/`, `vision/`, `entities/`)
1361
- - [x] Extract from `movement_manager.py` 閳?`motion/antenna.py`, `motion/pose_composer.py`
1362
- - [x] Extract from `camera_server.py` 閳?`vision/frame_processor.py`, `vision/face_tracking_interpolator.py`
1363
- - [x] Extract from `entity_registry.py` 閳?`entities/entity_factory.py`, `entities/entity_keys.py`
1364
- - [x] Create `core/config.py` for centralized configuration
1365
- - [x] Ensure no circular dependencies
1366
-
1367
- #### Phase 3: Stability & Performance 閴?Complete
1368
-
1369
- - [x] Create `core/exceptions.py` - Custom exception classes
1370
- - [x] Implement `RobustOperationMixin` - Unified error handling
1371
- - [x] `CameraServer` implements Context Manager pattern
1372
- - [x] Improve `CameraServer` resource cleanup
1373
- - [x] Fix MJPEG client tracking (proper register/unregister)
1374
- - [x] Historical health/memory monitor modules were added during earlier SDK instability periods
1375
- - [x] Health/memory monitor modules were later removed after runtime simplification
1376
- - [ ] Long-running stability test (24h+)
1377
-
1378
- #### Phase 4: Feature Enhancements 閴?Complete
1379
-
1380
- - [x] Historical gesture-action runtime path explored
1381
- - [x] Gesture runtime later simplified to publish recognition results only
1382
- - [x] Create `audio/doa_tracker.py` - DOATracker
1383
- - [x] Implement sound source tracking with motion control integration
1384
- - [x] Create `entities/event_emotion_mapper.py` - EventEmotionMapper
1385
- - [x] Fold HA event behavior config into `animations/conversation_animations.json`
1386
- - [x] Add DOA tracking toggle HA entity
1387
-
1388
- ### SDK Compatibility Verification 閴?Passed
1389
-
1390
- | API Call | Status | Notes |
1391
- |----------|--------|-------|
1392
- | `set_target(head, antennas, body_yaw)` | 閴?| Correct usage |
1393
- | `goto_target()` | 閴?| Correct usage |
1394
- | `look_at_image(u: int, v: int)` | 閴?| Fixed float閳姕nt |
1395
- | `create_head_pose(degrees=False)` | 閴?| Using radians |
1396
- | `compose_world_offset()` | 閴?| SDK function correctly called |
1397
- | `linear_pose_interpolation()` | 閴?| Has fallback implementation |
1398
- | Body yaw range | 閴?| Clamped to 鍗?60鎺?|
1399
-
1400
- ---
1401
-
1402
- ## 棣冩暋 v0.9.5 Updates (2026-01-19)
1403
-
1404
- ### Major Changes: Modular Architecture Refactoring
1405
-
1406
- The codebase has been restructured into a modular architecture with 5 sub-packages:
1407
-
1408
- | Package | Purpose | Key Modules |
1409
- |---------|---------|-------------|
1410
- | `core/` | Core infrastructure | `config.py`, `service_base.py`, `system_diagnostics.py` |
1411
- | `motion/` | Motion control | `antenna.py`, `pose_composer.py`, `command_runtime.py`, `control_runtime.py`, `idle_runtime.py`, `smoothing.py` |
1412
- | `vision/` | Vision processing | `frame_processor.py`, `face_tracking_interpolator.py` |
1413
- | `audio/` | Audio processing | `microphone.py`, `doa_tracker.py` |
1414
- | `entities/` | HA entity management | `entity_factory.py`, `entity_keys.py`, `event_emotion_mapper.py` |
1415
-
1416
- ### New Features
1417
-
1418
- 1. **Historical note**
1419
- - Earlier versions explored direct sleep/wake callbacks and polling-based state handling
1420
- - Current runtime no longer uses app-managed sleep/wake callbacks
1421
-
1422
- 2. **Camera runtime evolution**
1423
- - Camera lifecycle was later split into dedicated runtime/processing/http helpers
1424
- - Current runtime can fully stop camera service when `Idle Behavior` is disabled
1425
-
1426
- ### Audio Optimizations
1427
-
1428
- | Parameter | Before | After | Improvement |
1429
- |-----------|--------|-------|-------------|
1430
- | Audio chunk size | 1024 samples | 512 samples | 64ms 鈫?32ms latency with lower CPU load |
1431
- | Audio loop delay | 10ms | 1ms | Faster VAD response |
1432
- | Stereo閳墷ono | Mean of channels | First channel | Cleaner signal |
1433
-
1434
- ### Code Quality Improvements
1435
-
1436
- - Removed all legacy/compatibility code
1437
- - Centralized configuration in nested dataclasses
1438
- - NaN/Inf cleaning in audio pipeline
1439
- - Rotation clamping in face tracking to prevent IK collisions
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md CHANGED
@@ -9,6 +9,7 @@ short_description: Deep integration of Reachy Mini robot with Home Assistant
9
  tags:
10
  - reachy_mini
11
  - reachy_mini_python_app
 
12
  - reachy_mini_home_assistant
13
  - home_assistant
14
  - homeassistant
 
9
  tags:
10
  - reachy_mini
11
  - reachy_mini_python_app
12
+ - reachy_mini_ha_voice
13
  - reachy_mini_home_assistant
14
  - home_assistant
15
  - homeassistant
changelog.json CHANGED
@@ -1,274 +1,4 @@
1
- [ {
2
- "version": "1.0.7",
3
- "date": "2026-05-05",
4
- "changes": [
5
- "Build: Bump package version to 1.0.7",
6
- "Change: Align audio runtime with current SDK patterns by splitting local TTS playback from Sendspin-capable music playback and moving wakeword/stopword loading into shared helpers",
7
- "Change: Raise the Reachy Mini SDK baseline to reachy-mini>=1.7.1",
8
- "Fix: Keep wakeup and TTS playback on the local player path while binding both local and Sendspin players to shared speech sway helpers",
9
- "Fix: Synchronize Idle Behavior shutdown with ESPHome face and gesture switches plus runtime state updates",
10
- "Fix: Remove obsolete runtime monitor modules that are no longer needed with the current SDK behavior",
11
- "Optimize: Tighten Sendspin buffering with proactive backpressure and cleaner local queue handling"
12
- ]
13
- },
14
- {
15
- "version": "1.0.6",
16
- "date": "2026-05-01",
17
- "changes": [
18
- "Build: Bump package version to 1.0.6",
19
- "Change: Align pyproject.toml with the current Reachy Mini SDK baseline (reachy-mini>=1.7.0, Python>=3.12, zeroconf>=0.131,<1, aiohttp, websockets>=12,<16, and gstreamer-bundle==1.28.1 on non-Linux)",
20
- "Change: Align Sendspin dependency with the current upstream client line via aiosendspin>=5.1,<6.0",
21
- "Fix: Fetch camera snapshot frames on demand when the MJPEG cache is empty so Home Assistant camera proxy requests keep working with the Reachy Mini SDK 1.7.0 media pull model",
22
- "Optimize: Stop the camera server entirely when Idle Behavior is disabled instead of only unloading vision models"
23
- ]
24
- },
25
- {
26
- "version": "1.0.5",
27
- "date": "2026-04-12",
28
- "changes": [
29
- "Build: Bump package version to 1.0.5",
30
- "Change: Remove app-managed robot sleep/wake handling because the current Reachy Mini SDK no longer allows mini apps to stay active while the robot enters sleep",
31
- "Change: Limit resource suspend/resume to ESPHome-driven runtime toggles such as Home Assistant disconnect, mute, camera disable, and service recovery",
32
- "Change: Align pyproject.toml runtime constraints with the current Reachy Mini reference SDK package (reachy-mini>=1.6.3, websockets>=12,<16, Python baseline >=3.10, and uv gstreamer metadata)",
33
- "Remove: Delete SleepManager integration and app-side sleep/wake callback flow from the voice assistant runtime",
34
- "Remove: Delete Home Assistant sleep control entities and internal robot sleep state tracking from the mini app"
35
- ]
36
- },
37
- {
38
- "version": "1.0.4",
39
- "date": "2026-03-19",
40
- "changes": [
41
- "Build: Bump package version to 1.0.4",
42
- "Fix: Align Reachy Mini integration with current SDK assumptions by removing legacy compatibility paths and private client health checks",
43
- "Fix: Replace direct SDK private _respeaker access with audio_control_utils-based ReSpeaker initialization",
44
- "Fix: Tighten camera and pose composition to require current SDK media/utils APIs and valid look_at_image inputs",
45
- "Improve: Unify idle behavior into a single persisted Home Assistant entity and remove old idle compatibility aliases",
46
- "Improve: Replace separate wake/sleep buttons with a single sleep control entity",
47
- "Improve: Update Sendspin integration for current aiosendspin lifecycle, stream handling, listener cleanup, and synchronized buffering",
48
- "Improve: Standardize daemon URL usage on shared config across controller, sleep manager, and daemon monitor"
49
- ]
50
- },
51
- {
52
- "version": "1.0.3",
53
- "date": "2026-03-07",
54
- "changes": [
55
- "Build: Bump package version to 1.0.3",
56
- "New: Add Idle Random Actions switch in Home Assistant with preferences persistence and startup restore",
57
- "New: Add configurable idle_random_actions action presets in conversation_animations.json for centralized idle motion tuning",
58
- "Fix: Remove duplicate idle_random_actions fields/methods and complete runtime control wiring in controller/entity registry/movement manager",
59
- "Improve: Increase idle breathing and antenna sway cadence to 0.24Hz with wiggle antenna profile for more natural standby motion",
60
- "Optimize: Remove set_target global rate limiting and unchanged-pose skip gating to continuously stream motion commands each control tick",
61
- "Optimize: Remove idle antenna slew-rate limiter so antenna motion follows animation waveforms directly for reference-like smoothness"
62
- ]
63
- },
64
- {
65
- "version": "1.0.2",
66
- "date": "2026-03-06",
67
- "changes": [
68
- "Build: Bump package version to 1.0.2",
69
- "Fix: Restore idle antenna sway animation and tune idle breathing parameters to reduce perceived stiffness",
70
- "Fix: Reintroduce idle anti-chatter smoothing/deadband for antenna and body updates to reduce mechanical jitter/noise",
71
- "Fix: Switch sleep/wake control to daemon API (start/stop with wake_up/goto_sleep) so /api/daemon/status reflects real sleep state on SDK 1.5",
72
- "Fix: Normalize daemon status parsing for SDK 1.5 object-based status responses",
73
- "Fix: Remove all app-side antenna power on/off operations to avoid SDK instability and external-control conflicts",
74
- "Change: Keep idle antenna behavior as animation-only control (no torque coupling)",
75
- "Change: Tighten preference loading to current schema (no legacy config fallback filtering)",
76
- "Fix: Sync Idle Motion toggle with Idle Antenna Motion toggle for expected behavior in ESPHome",
77
- "Fix: Remove legacy app-managed audio routing hooks and rely on native SDK/system audio selection",
78
- "New: Add Home Assistant blueprint for Reachy presence companion automation",
79
- "Improve: Blueprint supports device-first auto-binding and richer usage instructions",
80
- "Docs: Refresh landing page (index.html) with current version, GitHub source link, and new Blueprint/Auto Release capability cards",
81
- "New: Add GitHub workflow to auto-create releases when pyproject/changelog version updates produce a new tag",
82
- "Chore: Ignore local wiki workspace artifacts (local/) from repository tracking"
83
- ]
84
- },
85
- {
86
- "version": "1.0.1",
87
- "date": "2026-03-05",
88
- "changes": [
89
- "Build: Bump package version to 1.0.1",
90
- "Deps: Update runtime dependency baseline to reachy-mini>=1.5.0",
91
- "Fix: Remove legacy Zenoh 7447 startup precheck for SDK v1.5 compatibility",
92
- "Fix: Remove legacy ZError string matching from connection error handling",
93
- "Fix: Adapt daemon status handling to SDK v1.5 DaemonStatus object (prevents AttributeError on status.get)",
94
- "Fix: Harden stop-word handling with runtime activation/deactivation and mute-aware trigger gating",
95
- "Fix: Align wakeup stream start timing with reference behavior (start microphone stream after wakeup sound)",
96
- "Fix: Improve TTS streaming robustness and reduce cutoffs with retry-based audio push",
97
- "Optimize: Support single-request streaming with in-memory fallback cache for one-time TTS URLs (no temp file dependency)",
98
- "Optimize: Lower streaming fetch chunk size and apply unthrottled preroll for faster first audio"
99
- ]
100
- },
101
- {
102
- "version": "1.0.0",
103
- "date": "2026-03-04",
104
- "changes": [
105
- "Build: Bump package version to 1.0.0",
106
- "Deps: Require reachy-mini[gstreamer]>=1.4.1",
107
- "Fix: Improve gesture responsiveness and stability (faster smoothing, min processing cadence, no-gesture alignment)",
108
- "Fix: Auto-match ONNX gesture input size from model shape to prevent INVALID_ARGUMENT dimension errors",
109
- "New: Add Sendspin switch in ESPHome (default OFF, persistent, runtime enable/disable)",
110
- "New: Add Face Tracking and Gesture Detection switches in ESPHome (both default OFF, persistent)",
111
- "New: Add Face Confidence number entity (0.0-1.0, persistent)",
112
- "Optimize: Unload/reload face and gesture models when toggled off/on to save resources",
113
- "Optimize: Idle behavior updated to breathing + look-around alternation, idle antenna sway disabled",
114
- "Optimize: Adjust idle breathing to human-like cadence",
115
- "Fix: Disable antenna torque in idle mode and re-enable outside idle to reduce chatter/noise",
116
- "Fix: Harden startup against import-time failures (lazy emotion library loading and graceful Sendspin disable)",
117
- "Fix: Enforce deterministic audio startup path and fail fast when microphone capture is not ready",
118
- "Optimize: Make MJPEG streaming viewer-aware (skip continuous JPEG encode/push when no stream clients)",
119
- "Optimize: Keep face/gesture AI processing active even when stream viewers are absent",
120
- "Fix: Add on-demand /snapshot JPEG generation when no cached stream frame is available",
121
- "Change: Use camera backend default FPS/resolution for stream path instead of forcing fixed 1080p/25fps"
122
- ]
123
- },
124
- {
125
- "version": "0.9.9",
126
- "date": "2026-01-28",
127
- "changes": [
128
- "Fix: Audio buffer overflow - require Reachy Mini hardware, use only Reachy microphone with 50ms sleep",
129
- "Optimize: Gesture detection sensitivity - remove all confidence filtering, return all detections to Home Assistant",
130
- "Optimize: Gesture detection now runs at 1 frame interval for maximum responsiveness",
131
- "Refactor: Simplify GestureSmoother to frequency-based confirmation (1 frame)",
132
- "Refactor: Remove unused parameters (confidence_threshold, detection_threshold, GestureConfig)",
133
- "Fix: Remove duplicate empty check in gesture detection",
134
- "Optimize: SDK integration - add MediaBackend detection and proper resource cleanup",
135
- "Document: ReSpeaker private attribute access risk with TODO comments"
136
- ]
137
- },
138
- {
139
- "version": "0.9.8",
140
- "date": "2026-01-27",
141
- "changes": [
142
- "New: Mute switch and Disable Camera entities for granular control",
143
- "Fix: Camera disable logic and daemon crash prevention",
144
- "New: Home Assistant connection-driven feature loading with auto suspend/resume",
145
- "Optimize: Reduce log output by 30-40%",
146
- "Fix: Code quality improvements",
147
- "Fix: SDK crash during idle - optimize audio processing and add GStreamer threading lock",
148
- "Optimize: Bundle face tracking model, use SDK Zenoh for daemon monitoring",
149
- "Simplify: Device ID reads /etc/machine-id directly",
150
- "Clean up: Remove unused config items"
151
- ]
152
- },
153
- {
154
- "version": "0.9.7",
155
- "date": "2026-01-20",
156
- "changes": [
157
- "Fix: Device ID file path corrected after util.py moved to core/ subdirectory (prevents HA seeing device as new)",
158
- "Fix: Animation file path corrected (was looking in wrong directory)",
159
- "Fix: Remove hey_jarvis from required wake words (it's optional in openWakeWord/)"
160
- ]
161
- },
162
- {
163
- "version": "0.9.6",
164
- "date": "2026-01-20",
165
- "changes": [
166
- "New: Add ruff linter/formatter and mypy type checker configuration",
167
- "New: Add pre-commit hooks for automated code quality checks",
168
- "Fix: Remove duplicate resume() method in audio_player.py",
169
- "Fix: Remove duplicate connection_lost() method in satellite.py",
170
- "Fix: Store asyncio task reference in sleep_manager.py to prevent garbage collection",
171
- "Optimize: Use dict.items() for efficient iteration in smoothing.py"
172
- ]
173
- },
174
- {
175
- "version": "0.9.5",
176
- "date": "2026-01-19",
177
- "changes": [
178
- "Refactor: Modularize codebase - new core/motion/vision/audio/entities module structure",
179
- "New: Direct callbacks for HA sleep/wake buttons to suspend/resume services",
180
- "Optimize: Audio processing latency - reduced chunk size from 1024 to 256 samples (64ms -> 16ms)",
181
- "Optimize: Audio loop delay reduced from 10ms to 1ms for faster VAD response",
182
- "Optimize: Stereo to mono conversion uses first channel instead of mean for cleaner signal",
183
- "Improve: Camera resume_from_suspend now synchronous for reliable wake from sleep",
184
- "Improve: Rotation clamping in face tracking to prevent IK collisions"
185
- ]
186
- },
187
- {
188
- "version": "0.9.0",
189
- "date": "2026-01-18",
190
- "changes": [
191
- "New: Robot state monitor for proper sleep mode handling - services pause when robot disconnects and resume on reconnect",
192
- "New: System diagnostics entities (CPU, memory, disk, uptime) exposed as Home Assistant diagnostic sensors",
193
- "New: Phase 24 with 9 diagnostic sensors (cpu_percent, cpu_temperature, memory_percent, memory_used_gb, disk_percent, disk_free_gb, uptime_hours, process_cpu_percent, process_memory_mb)",
194
- "Fix: Voice assistant and movement manager now properly pause during robot sleep mode instead of generating error spam",
195
- "Improve: Graceful service lifecycle management with RobotStateMonitor callbacks"
196
- ]
197
- },
198
- {
199
- "version": "0.8.7",
200
- "date": "2026-01-18",
201
- "changes": [
202
- "Fix: Clamp body_yaw to safe range to prevent IK collision warnings during emotion playback",
203
- "Fix: Emotion moves and face tracking now respect SDK safety limits",
204
- "Improve: Face tracking smoothness - removed EMA smoothing (matches reference project)",
205
- "Improve: Face tracking timing updated to match reference (2s delay, 1s interpolation)"
206
- ]
207
- },
208
- {
209
- "version": "0.8.6",
210
- "date": "2026-01-18",
211
- "changes": [
212
- "Fix: Audio buffer memory leak - added size limit to prevent unbounded growth",
213
- "Fix: Temp file leak - downloaded audio files now cleaned up after playback",
214
- "Fix: Camera thread termination timeout increased for clean shutdown",
215
- "Fix: Thread-safe draining flag using threading.Event",
216
- "Fix: Silent failures now logged for debugging"
217
- ]
218
- },
219
- {
220
- "version": "0.8.5",
221
- "date": "2026-01-18",
222
- "changes": [
223
- "Fix: DOA turn-to-sound direction inverted - now turns correctly toward sound source",
224
- "Fix: Graceful shutdown prevents daemon crash on app stop"
225
- ]
226
- },
227
- {
228
- "version": "0.8.4",
229
- "date": "2026-01-18",
230
- "changes": [
231
- "Improve: Smooth idle animation with interpolation phase (matches reference BreathingMove)",
232
- "Improve: Two-phase animation - interpolates to neutral before oscillation",
233
- "Fix: Antenna frequency updated to 0.5Hz (was 0.15Hz) for more natural sway"
234
- ]
235
- },
236
- {
237
- "version": "0.8.3",
238
- "date": "2026-01-18",
239
- "changes": [
240
- "Fix: Body now properly follows head rotation during face tracking",
241
- "Fix: body_yaw extracted from final head pose matrix and synced with head_yaw",
242
- "Fix: Matches reference project sweep_look behavior for natural body movement"
243
- ]
244
- },
245
- {
246
- "version": "0.8.2",
247
- "date": "2026-01-18",
248
- "changes": [
249
- "Fix: Body now follows head rotation during face tracking - body_yaw syncs with head_yaw",
250
- "Fix: Matches reference project sweep_look behavior for natural body movement"
251
- ]
252
- },
253
- {
254
- "version": "0.8.1",
255
- "date": "2026-01-18",
256
- "changes": [
257
- "Fix: face_detected entity now pushes state updates to Home Assistant in real-time",
258
- "Fix: Body yaw simplified to match reference project - SDK automatic_body_yaw handles collision prevention",
259
- "Fix: Idle animation now starts immediately on app launch",
260
- "Fix: Smooth antenna animation - removed pose change threshold for continuous motion"
261
- ]
262
- },
263
- {
264
- "version": "0.8.0",
265
- "date": "2026-01-17",
266
- "changes": [
267
- "New: Comprehensive emotion keyword mapping with 280+ Chinese and English keywords",
268
- "New: 35 emotion categories mapped to robot expressions",
269
- "New: Auto-trigger expressions from conversation text patterns"
270
- ]
271
- },
272
  {
273
  "version": "0.7.3",
274
  "date": "2026-01-12",
@@ -663,4 +393,3 @@
663
  ]
664
  }
665
  ]
666
-
 
1
+ [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  {
3
  "version": "0.7.3",
4
  "date": "2026-01-12",
 
393
  ]
394
  }
395
  ]
 
docs/USER_MANUAL_CN.md DELETED
@@ -1,244 +0,0 @@
1
- # Reachy Mini 语音助手 - 用户手册
2
-
3
- ## 系统要求
4
-
5
- ### 硬件
6
- - Reachy Mini 机器人(带 ReSpeaker XVF3800 麦克风)
7
- - WiFi 网络连接
8
-
9
- ### 软件
10
- - Home Assistant(2024.1 或更高版本)
11
- - Home Assistant 中已启用 ESPHome 集成
12
-
13
- ---
14
-
15
- ## 安装步骤
16
-
17
- ### 第一步:安装应用
18
- 从 Reachy Mini 应用商店安装 `reachy_mini_home_assistant`。
19
-
20
- ### 第二步:启动应用
21
- 应用将自动:
22
- - 在端口 6053 启动 ESPHome 服务器
23
- - 加载预打包的唤醒词模型
24
- - 通过 mDNS 注册以便自动发现
25
- - 如果网络上有 Sendspin 服务器则自动连接
26
-
27
- ### 第三步:连接 Home Assistant
28
- **自动连接(推荐):**
29
- Home Assistant 会通过 mDNS 自动发现 Reachy Mini。
30
-
31
- **手动连接:**
32
- 1. 进入 设置 → 设备与服务
33
- 2. 点击"添加集成"
34
- 3. 选择"ESPHome"
35
- 4. 输入机器人的 IP 地址和端口 6053
36
-
37
- ---
38
-
39
- ## 功能介绍
40
-
41
- ### 语音助手
42
- - **唤醒词检测**:说 "Okay Nabu" 激活(本地处理)
43
- - **停止词**:说 "Stop" 结束对话
44
- - **连续对话模式**:无需重复唤醒词即可持续对话
45
- - **语音识别/合成**:使用 Home Assistant 配置的语音引擎
46
-
47
- **支持的唤醒词:**
48
- - Okay Nabu(默认)
49
- - Hey Jarvis
50
- - Alexa
51
- - Hey Luna
52
-
53
- ### 人脸追踪
54
- - 基于 YOLO 的人脸检测
55
- - 头部跟随检测到的人脸
56
- - 头部转动时身体随之旋转
57
- - 自适应帧率:活跃时 15fps,空闲时 2fps
58
- - 可在 Home Assistant 中运行时开关
59
-
60
- ### 手势检测
61
- 检测到的手势会作为实体状态同步到 Home Assistant。
62
- 当前默认运行时不会直接用手势触发机器人动作。
63
-
64
- | 输出 | 说明 |
65
- |------|------|
66
- | `gesture_detected` | 当前识别到的手势标签 |
67
- | `gesture_confidence` | 手势识别置信度 |
68
-
69
- ### 情绪响应
70
- 机器人可播放 35 种不同情绪:
71
- - 基础:开心、难过、愤怒、恐惧、惊讶、厌恶
72
- - 扩展:大笑、爱慕、骄傲、感激、热情、好奇、惊叹、害羞、困惑、沉思、焦虑、害怕、沮丧、烦躁、狂怒、轻蔑、无聊、疲倦、精疲力竭、孤独、沮丧、顺从、不确定、不舒服
73
-
74
- ### 音频功能
75
- - 扬声器音量控制(0-100%)
76
- - 静音开关,可暂停/恢复语音链路
77
- - 支持唤醒提示音与计时器完成提示音
78
- - STT/TTS 由 Home Assistant 负责
79
-
80
- ### Sendspin 多房间音频
81
- - 通过 mDNS 自动发现 Sendspin 服务器
82
- - 同步多房间音频播放
83
- - Reachy Mini 作为 PLAYER 接收音频流
84
- - 语音对话时自动暂停
85
- - 无需用户配置
86
-
87
- ### DOA 声源追踪
88
- - 声源方向检测
89
- - 唤醒时机器人转向声源
90
- - 可通过开关启用/禁用
91
-
92
- ---
93
-
94
- ## Home Assistant 实体
95
-
96
- ### 阶段 1:基础状态
97
- | 实体 | 类型 | 说明 |
98
- |------|------|------|
99
- | Daemon State | 文本传感器 | 机器人守护进程状态 |
100
- | Backend Ready | 二进制传感器 | 后端连接状态 |
101
- | Mute | 开关 | 暂停/恢复语音链路 |
102
- | Speaker Volume | 数值 (0-100%) | 扬声器音量控制 |
103
- | Disable Camera | 开关 | 暂停/恢复摄像头服务 |
104
- | Idle Behavior | 开关 | 统一空闲行为:头部、天线、微动作 |
105
- | Sendspin | 开关 | 启用/禁用 Sendspin 发现与播放 |
106
- | Face Tracking | 开关 | 启用/禁用人脸跟踪 |
107
- | Gesture Detection | 开关 | 启用/禁用手势检测 |
108
- | Face Confidence | 数值 (0-1) | 人脸跟踪置信度阈值 |
109
-
110
- ### 阶段 2:睡眠与运行状态
111
- | 实体 | 类型 | 说明 |
112
- |------|------|------|
113
- | Sleep Control | 开关 | 打开表示进入睡眠,关闭表示唤醒 |
114
- | Sleep Mode | 二进制传感器 | 运行中表示唤醒,非运行表示睡眠 |
115
- | Services Suspended | 二进制传感器 | 运行中表示服务活跃 |
116
-
117
- ### 阶段 3:姿态控制
118
- | 实体 | 类型 | 范围 |
119
- |------|------|------|
120
- | Head X/Y/Z | 数值 | ±50mm |
121
- | Head Roll/Pitch/Yaw | 数值 | ±40° |
122
- | Body Yaw | 数值 | ±160° |
123
- | Antenna Left/Right | 数值 | ±90° |
124
-
125
- ### 阶段 4:注视控制
126
- | 实体 | 类型 | 说明 |
127
- |------|------|------|
128
- | Look At X/Y/Z | 数值 | 注视目标的世界坐标 |
129
-
130
- ### 阶段 5:DOA(声源定位)
131
- | 实体 | 类型 | 说明 |
132
- |------|------|------|
133
- | DOA Angle | 传感器 (°) | 声源方向 |
134
- | Speech Detected | 二进制传感器 | 语音活动检测 |
135
- | DOA Sound Tracking | 开关 | 启用/禁用 DOA 追踪 |
136
-
137
- ### 阶段 6:诊断信息
138
- | 实体 | 类型 | 说明 |
139
- |------|------|------|
140
- | Control Loop Frequency | 传感器 (Hz) | 运动控制循环频率 |
141
- | SDK Version | 文本传感器 | Reachy Mini SDK 版本 |
142
- | Robot Name | 文本传感器 | 设备名称 |
143
- | Wireless Version | 二进制传感器 | 无线版本标志 |
144
- | Simulation Mode | 二进制传感器 | 仿真模式标志 |
145
- | WLAN IP | 文本传感器 | WiFi IP 地址 |
146
- | Error Message | 文本传感器 | 当前错误 |
147
-
148
- ### 阶段 7:IMU 传感器(仅无线版本)
149
- | 实体 | 类型 | 说明 |
150
- |------|------|------|
151
- | IMU Accel X/Y/Z | 传感器 (m/s²) | 加速度计 |
152
- | IMU Gyro X/Y/Z | 传感器 (rad/s) | ���螺仪 |
153
- | IMU Temperature | 传感器 (°C) | IMU 温度 |
154
-
155
- ### 阶段 8:情绪控制
156
- | 实体 | 类型 | 说明 |
157
- |------|------|------|
158
- | Emotion | 选择器 | 选择要播放的情绪(35 个选项)|
159
-
160
- ### 阶段 10:摄像头
161
- | 实体 | 类型 | 说明 |
162
- |------|------|------|
163
- | Camera | 摄像头 | 实时 MJPEG 流 |
164
-
165
- ### 3D 可视化卡片
166
- 可在 Home Assistant 中安装自定义 Lovelace 卡片,实时 3D 可视化 Reachy Mini 机器人。
167
-
168
- 安装地址:[ha-reachy-mini](https://github.com/Desmond-Dong/ha-reachy-mini)
169
-
170
- 功能:
171
- - 实时 3D 机器人可视化
172
- - 交互式机器人状态视图
173
- - 连接机器人守护进程获取实时更新
174
-
175
- ### 阶段 21:对话
176
- | 实体 | 类型 | 说明 |
177
- |------|------|------|
178
- | Continuous Conversation | 开关 | 多轮对话模式 |
179
-
180
- ### 阶段 22:手势检测
181
- | 实体 | 类型 | 说明 |
182
- |------|------|------|
183
- | Gesture Detected | 文本传感器 | 当前手势名称 |
184
- | Gesture Confidence | 传感器 (%) | 检测置信度 |
185
-
186
- ### 阶段 23:人脸检测
187
- | 实体 | 类型 | 说明 |
188
- |------|------|------|
189
- | Face Detected | 二进制传感器 | 视野中是否有人脸 |
190
-
191
- ### 阶段 24:系统诊断
192
- | 实体 | 类型 | 说明 |
193
- |------|------|------|
194
- | CPU Percent | 传感器 (%) | CPU 使用率 |
195
- | CPU Temperature | 传感器 (°C) | CPU 温度 |
196
- | Memory Percent | 传感器 (%) | 内存使用率 |
197
- | Memory Used | 传感器 (GB) | 已用内存 |
198
- | Disk Percent | 传感器 (%) | 磁盘使用率 |
199
- | Disk Free | 传感器 (GB) | 磁盘可用空间 |
200
- | Uptime | 传感器 (hours) | 系统运行时间 |
201
- | Process CPU | 传感器 (%) | 应用 CPU 使用率 |
202
- | Process Memory | 传感器 (MB) | 应用内存使用 |
203
-
204
- ---
205
-
206
- ## 睡眠模式
207
-
208
- 运行时反应是零配置的:语音阶段、计时器提醒和 HA 状态触发情绪,共用同一套内建行为模型。
209
-
210
- ### 进入睡眠
211
- - 在 Home Assistant 中打开 `Sleep Control` 开关
212
- - 机器人放松电机、停止摄像头、暂停语音检测
213
-
214
- ### 唤醒
215
- - 在 Home Assistant 中关闭 `Sleep Control` 开关
216
- - 或说唤醒词
217
- - 机器人恢复所有功能
218
-
219
- ---
220
-
221
- ## 故障排除
222
-
223
- | 问题 | 解决方案 |
224
- |------|----------|
225
- | 不响应唤醒词 | 检查 Mute 是否关闭,减少背景噪音,并确认已连接 Home Assistant |
226
- | 人脸追踪不工作 | 确保光线充足,检查 Face Detected 传感器 |
227
- | 没有音频输出 | 检查 Speaker Volume,验证 HA 中的 TTS 引擎 |
228
- | 无法连接 HA | 确认在同一网络,检查端口 6053 |
229
- | 手势检测不到 | 确保光线充足,正对摄像头 |
230
-
231
- ---
232
-
233
- ## 快速参考
234
-
235
- ```
236
- 唤醒词: "Okay Nabu"
237
- 停止词: "Stop"
238
- ESPHome 端口: 6053
239
- 摄像头端口: 8081 (MJPEG)
240
- ```
241
-
242
- ---
243
-
244
- *Reachy Mini 语音助手 v1.0.4*
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
docs/USER_MANUAL_EN.md DELETED
@@ -1,244 +0,0 @@
1
- # Reachy Mini Voice Assistant - User Manual
2
-
3
- ## Requirements
4
-
5
- ### Hardware
6
- - Reachy Mini robot (with ReSpeaker XVF3800 microphone)
7
- - WiFi network connection
8
-
9
- ### Software
10
- - Home Assistant (2024.1 or later)
11
- - ESPHome integration enabled in Home Assistant
12
-
13
- ---
14
-
15
- ## Installation
16
-
17
- ### Step 1: Install the App
18
- Install `reachy_mini_home_assistant` from the Reachy Mini App Store.
19
-
20
- ### Step 2: Start the App
21
- The app will automatically:
22
- - Start the ESPHome server on port 6053
23
- - Load pre-packaged wake word models
24
- - Register with mDNS for auto-discovery
25
- - Connect to Sendspin server if available on network
26
-
27
- ### Step 3: Connect to Home Assistant
28
- **Automatic (Recommended):**
29
- Home Assistant will auto-discover Reachy Mini via mDNS.
30
-
31
- **Manual:**
32
- 1. Go to Settings → Devices & Services
33
- 2. Click "Add Integration"
34
- 3. Select "ESPHome"
35
- 4. Enter the robot's IP address and port 6053
36
-
37
- ---
38
-
39
- ## Features
40
-
41
- ### Voice Assistant
42
- - **Wake Word Detection**: Say "Okay Nabu" to activate (local processing)
43
- - **Stop Word**: Say "Stop" to end conversation
44
- - **Continuous Conversation Mode**: Keep talking without repeating wake word
45
- - **STT/TTS**: Uses Home Assistant's configured speech engines
46
-
47
- **Supported Wake Words:**
48
- - Okay Nabu (default)
49
- - Hey Jarvis
50
- - Alexa
51
- - Hey Luna
52
-
53
- ### Face Tracking
54
- - YOLO-based face detection
55
- - Head follows detected face
56
- - Body follows head when turned far
57
- - Adaptive frame rate: 15fps active, 2fps idle
58
- - Runtime switchable from Home Assistant
59
-
60
- ### Gesture Detection
61
- Detected gestures are published to Home Assistant as entity state updates.
62
- The default runtime does not trigger built-in robot actions from gestures.
63
-
64
- | Output | Description |
65
- |--------|-------------|
66
- | `gesture_detected` | Current gesture label |
67
- | `gesture_confidence` | Detection confidence |
68
-
69
- ### Emotion Responses
70
- The robot can play 35 different emotions:
71
- - Basic: Happy, Sad, Angry, Fear, Surprise, Disgust
72
- - Extended: Laughing, Loving, Proud, Grateful, Enthusiastic, Curious, Amazed, Shy, Confused, Thoughtful, Anxious, Scared, Frustrated, Irritated, Furious, Contempt, Bored, Tired, Exhausted, Lonely, Downcast, Resigned, Uncertain, Uncomfortable
73
-
74
- ### Audio Features
75
- - Speaker volume control (0-100%)
76
- - Mute switch for voice pipeline pause/resume
77
- - Wake sound and timer-finished sound playback
78
- - Home Assistant handles STT/TTS engines
79
-
80
- ### Sendspin Multi-Room Audio
81
- - Automatic discovery of Sendspin servers via mDNS
82
- - Synchronized multi-room audio playback
83
- - Reachy Mini acts as a PLAYER to receive audio streams
84
- - Auto-pause during voice conversations
85
- - No user configuration required
86
-
87
- ### DOA Sound Tracking
88
- - Direction of Arrival detection
89
- - Robot turns toward sound source on wake word
90
- - Can be enabled/disabled via switch
91
-
92
- ---
93
-
94
- ## Home Assistant Entities
95
-
96
- ### Phase 1: Basic Status
97
- | Entity | Type | Description |
98
- |--------|------|-------------|
99
- | Daemon State | Text Sensor | Robot daemon status |
100
- | Backend Ready | Binary Sensor | Backend connection status |
101
- | Mute | Switch | Suspend/resume voice pipeline |
102
- | Speaker Volume | Number (0-100%) | Speaker volume control |
103
- | Disable Camera | Switch | Suspend/resume camera service |
104
- | Idle Behavior | Switch | Unified idle motion + idle antenna + idle micro-actions |
105
- | Sendspin | Switch | Enable/disable Sendspin discovery and playback |
106
- | Face Tracking | Switch | Enable/disable face tracking |
107
- | Gesture Detection | Switch | Enable/disable gesture detection |
108
- | Face Confidence | Number (0-1) | Face tracking confidence threshold |
109
-
110
- ### Phase 2: Sleep and Runtime State
111
- | Entity | Type | Description |
112
- |--------|------|-------------|
113
- | Sleep Control | Switch | Turn on to sleep, turn off to wake |
114
- | Sleep Mode | Binary Sensor | Running when awake, not running when sleeping |
115
- | Services Suspended | Binary Sensor | Running when services are active |
116
-
117
- ### Phase 3: Pose Control
118
- | Entity | Type | Range |
119
- |--------|------|-------|
120
- | Head X/Y/Z | Number | ±50mm |
121
- | Head Roll/Pitch/Yaw | Number | ±40° |
122
- | Body Yaw | Number | ±160° |
123
- | Antenna Left/Right | Number | ±90° |
124
-
125
- ### Phase 4: Look At Control
126
- | Entity | Type | Description |
127
- |--------|------|-------------|
128
- | Look At X/Y/Z | Number | World coordinates for gaze target |
129
-
130
- ### Phase 5: DOA (Direction of Arrival)
131
- | Entity | Type | Description |
132
- |--------|------|-------------|
133
- | DOA Angle | Sensor (°) | Sound source direction |
134
- | Speech Detected | Binary Sensor | Voice activity detection |
135
- | DOA Sound Tracking | Switch | Enable/disable DOA tracking |
136
-
137
- ### Phase 6: Diagnostics
138
- | Entity | Type | Description |
139
- |--------|------|-------------|
140
- | Control Loop Frequency | Sensor (Hz) | Motion control loop rate |
141
- | SDK Version | Text Sensor | Reachy Mini SDK version |
142
- | Robot Name | Text Sensor | Device name |
143
- | Wireless Version | Binary Sensor | Wireless model flag |
144
- | Simulation Mode | Binary Sensor | Simulation flag |
145
- | WLAN IP | Text Sensor | WiFi IP address |
146
- | Error Message | Text Sensor | Current error |
147
-
148
- ### Phase 7: IMU Sensors (Wireless version only)
149
- | Entity | Type | Description |
150
- |--------|------|-------------|
151
- | IMU Accel X/Y/Z | Sensor (m/s²) | Accelerometer |
152
- | IMU Gyro X/Y/Z | Sensor (rad/s) | Gyroscope |
153
- | IMU Temperature | Sensor (°C) | IMU temperature |
154
-
155
- ### Phase 8: Emotion Control
156
- | Entity | Type | Description |
157
- |--------|------|-------------|
158
- | Emotion | Select | Choose emotion to play (35 options) |
159
-
160
- ### Phase 10: Camera
161
- | Entity | Type | Description |
162
- |--------|------|-------------|
163
- | Camera | Camera | Live MJPEG stream |
164
-
165
- ### 3D Visualization Card
166
- A custom Lovelace card is available for real-time 3D visualization of the Reachy Mini robot in Home Assistant.
167
-
168
- Install from: [ha-reachy-mini](https://github.com/Desmond-Dong/ha-reachy-mini)
169
-
170
- Features:
171
- - Real-time 3D robot visualization
172
- - Interactive view of robot state
173
- - Connects to robot daemon for live updates
174
-
175
- ### Phase 21: Conversation
176
- | Entity | Type | Description |
177
- |--------|------|-------------|
178
- | Continuous Conversation | Switch | Multi-turn conversation mode |
179
-
180
- ### Phase 22: Gesture Detection
181
- | Entity | Type | Description |
182
- |--------|------|-------------|
183
- | Gesture Detected | Text Sensor | Current gesture name |
184
- | Gesture Confidence | Sensor (%) | Detection confidence |
185
-
186
- ### Phase 23: Face Detection
187
- | Entity | Type | Description |
188
- |--------|------|-------------|
189
- | Face Detected | Binary Sensor | Face in view |
190
-
191
- ### Phase 24: System Diagnostics
192
- | Entity | Type | Description |
193
- |--------|------|-------------|
194
- | CPU Percent | Sensor (%) | CPU usage |
195
- | CPU Temperature | Sensor (°C) | CPU temperature |
196
- | Memory Percent | Sensor (%) | RAM usage |
197
- | Memory Used | Sensor (GB) | RAM used |
198
- | Disk Percent | Sensor (%) | Disk usage |
199
- | Disk Free | Sensor (GB) | Disk free space |
200
- | Uptime | Sensor (hours) | System uptime |
201
- | Process CPU | Sensor (%) | App CPU usage |
202
- | Process Memory | Sensor (MB) | App memory usage |
203
-
204
- ---
205
-
206
- ## Sleep Mode
207
-
208
- Runtime reactions are zero-config: voice phases, timer alerts, and HA state-triggered emotions use the same built-in behavior model.
209
-
210
- ### Enter Sleep
211
- - Turn on the `Sleep Control` switch in Home Assistant
212
- - Robot relaxes motors, stops camera, pauses voice detection
213
-
214
- ### Wake Up
215
- - Turn off the `Sleep Control` switch in Home Assistant
216
- - Or say the wake word
217
- - Robot resumes all functions
218
-
219
- ---
220
-
221
- ## Troubleshooting
222
-
223
- | Problem | Solution |
224
- |---------|----------|
225
- | Not responding to wake word | Check Mute is off, reduce background noise, verify Home Assistant is connected |
226
- | Face tracking not working | Ensure adequate lighting, check Face Detected sensor |
227
- | No audio output | Check Speaker Volume, verify TTS engine in HA |
228
- | Can't connect to HA | Verify same network, check port 6053 |
229
- | Gestures not detected | Ensure good lighting, face the camera directly |
230
-
231
- ---
232
-
233
- ## Quick Reference
234
-
235
- ```
236
- Wake Word: "Okay Nabu"
237
- Stop Word: "Stop"
238
- ESPHome Port: 6053
239
- Camera Port: 8081 (MJPEG)
240
- ```
241
-
242
- ---
243
-
244
- *Reachy Mini Voice Assistant v1.0.4*
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
home_assistant_blueprints/reachy_mini_presence_companion.yaml DELETED
@@ -1,246 +0,0 @@
1
- blueprint:
2
- name: Reachy Mini Presence Companion
3
- description: >-
4
- Presence-driven automation for Reachy Mini in Home Assistant.
5
-
6
- How to use:
7
- 1) Select Home occupancy entity (person/group/binary_sensor).
8
- 2) Select Reachy ESPHome device (recommended).
9
- 3) Leave optional fallback entity inputs empty unless auto-binding fails.
10
- 4) Set away delay and day/night volume.
11
-
12
- What this automation does:
13
- - Occupied: Wake Reachy, enable unified idle behavior, set day volume.
14
- - Empty (after delay): Disable unified idle behavior, send Reachy to sleep.
15
- - Quiet hours start/end: Apply night/day volume while occupied.
16
-
17
- Auto-binding rules (when Reachy device is selected):
18
- - Sleep switch suffix: sleep_control
19
- - Idle behavior switch suffix: idle_behavior_enabled
20
- - Volume number suffix: speaker_volume
21
-
22
- If your entities use different names, fill optional fallback entity inputs manually.
23
- domain: automation
24
- input:
25
- occupancy_entity:
26
- name: Home occupancy entity
27
- description: Person, group, or binary sensor representing home presence.
28
- selector:
29
- entity: {}
30
-
31
- reachy_device:
32
- name: Reachy device (recommended)
33
- description: Select your Reachy ESPHome device for automatic entity binding.
34
- default: ""
35
- selector:
36
- device:
37
- filter:
38
- - integration: esphome
39
-
40
- reachy_sleep_switch:
41
- name: Sleep Control switch (optional fallback)
42
- description: Leave empty to auto-bind from Reachy device.
43
- default: ""
44
- selector:
45
- entity:
46
- domain: switch
47
-
48
- idle_behavior_switch:
49
- name: Idle Behavior switch (optional fallback)
50
- description: Leave empty to auto-bind from Reachy device.
51
- default: ""
52
- selector:
53
- entity:
54
- domain: switch
55
-
56
- reachy_volume_number:
57
- name: Speaker Volume number (optional fallback)
58
- description: Leave empty to auto-bind from Reachy device.
59
- default: ""
60
- selector:
61
- entity:
62
- domain: number
63
-
64
- away_delay_minutes:
65
- name: Away delay (minutes)
66
- description: Wait before sleeping after everyone leaves.
67
- default: 20
68
- selector:
69
- number:
70
- min: 1
71
- max: 180
72
- mode: box
73
- unit_of_measurement: min
74
-
75
- day_volume:
76
- name: Day volume
77
- default: 80
78
- selector:
79
- number:
80
- min: 0
81
- max: 100
82
- step: 1
83
- mode: slider
84
-
85
- night_volume:
86
- name: Night volume
87
- default: 35
88
- selector:
89
- number:
90
- min: 0
91
- max: 100
92
- step: 1
93
- mode: slider
94
-
95
- quiet_start:
96
- name: Quiet hours start
97
- default: "22:30:00"
98
- selector:
99
- time: {}
100
-
101
- quiet_end:
102
- name: Quiet hours end
103
- default: "07:30:00"
104
- selector:
105
- time: {}
106
-
107
- mode: restart
108
-
109
- variables:
110
- occupancy_entity: !input occupancy_entity
111
- reachy_device: !input reachy_device
112
- manual_sleep_switch: !input reachy_sleep_switch
113
- manual_idle_behavior_switch: !input idle_behavior_switch
114
- manual_volume_number: !input reachy_volume_number
115
- day_volume: !input day_volume
116
- night_volume: !input night_volume
117
-
118
- device_entities_list: >-
119
- {{ device_entities(reachy_device) if reachy_device else [] }}
120
-
121
- sleep_switch_auto: >-
122
- {{ (device_entities_list | select('match', '^switch\..*sleep_control$') | list | first) or '' }}
123
- idle_behavior_switch_auto: >-
124
- {{ (device_entities_list | select('match', '^switch\..*idle_behavior_enabled$') | list | first) or '' }}
125
- volume_number_auto: >-
126
- {{ (device_entities_list | select('match', '^number\..*speaker_volume$') | list | first) or '' }}
127
-
128
- sleep_switch: >-
129
- {{ manual_sleep_switch if manual_sleep_switch else sleep_switch_auto }}
130
- idle_behavior_switch: >-
131
- {{ manual_idle_behavior_switch if manual_idle_behavior_switch else idle_behavior_switch_auto }}
132
- volume_number: >-
133
- {{ manual_volume_number if manual_volume_number else volume_number_auto }}
134
-
135
- is_occupied: >-
136
- {{ states(occupancy_entity) in ['home', 'on'] }}
137
-
138
- trigger:
139
- - platform: state
140
- id: occupied_home
141
- entity_id: !input occupancy_entity
142
- to: "home"
143
-
144
- - platform: state
145
- id: occupied_on
146
- entity_id: !input occupancy_entity
147
- to: "on"
148
-
149
- - platform: state
150
- id: empty_not_home
151
- entity_id: !input occupancy_entity
152
- to: "not_home"
153
- for:
154
- minutes: !input away_delay_minutes
155
-
156
- - platform: state
157
- id: empty_off
158
- entity_id: !input occupancy_entity
159
- to: "off"
160
- for:
161
- minutes: !input away_delay_minutes
162
-
163
- - platform: time
164
- id: quiet_start
165
- at: !input quiet_start
166
-
167
- - platform: time
168
- id: quiet_end
169
- at: !input quiet_end
170
-
171
- action:
172
- - choose:
173
- - conditions:
174
- - condition: template
175
- value_template: "{{ trigger.id in ['occupied_home', 'occupied_on'] }}"
176
- sequence:
177
- - if:
178
- - condition: template
179
- value_template: "{{ sleep_switch != '' }}"
180
- then:
181
- - service: switch.turn_off
182
- target:
183
- entity_id: "{{ sleep_switch }}"
184
- - if:
185
- - condition: template
186
- value_template: "{{ idle_behavior_switch != '' }}"
187
- then:
188
- - service: switch.turn_on
189
- target:
190
- entity_id: "{{ idle_behavior_switch }}"
191
- - if:
192
- - condition: template
193
- value_template: "{{ volume_number != '' }}"
194
- then:
195
- - service: number.set_value
196
- target:
197
- entity_id: "{{ volume_number }}"
198
- data:
199
- value: "{{ day_volume }}"
200
-
201
- - conditions:
202
- - condition: template
203
- value_template: "{{ trigger.id in ['empty_not_home', 'empty_off'] }}"
204
- sequence:
205
- - if:
206
- - condition: template
207
- value_template: "{{ idle_behavior_switch != '' }}"
208
- then:
209
- - service: switch.turn_off
210
- target:
211
- entity_id: "{{ idle_behavior_switch }}"
212
- - if:
213
- - condition: template
214
- value_template: "{{ sleep_switch != '' }}"
215
- then:
216
- - service: switch.turn_on
217
- target:
218
- entity_id: "{{ sleep_switch }}"
219
-
220
- - conditions:
221
- - condition: template
222
- value_template: "{{ trigger.id == 'quiet_start' and is_occupied }}"
223
- sequence:
224
- - if:
225
- - condition: template
226
- value_template: "{{ volume_number != '' }}"
227
- then:
228
- - service: number.set_value
229
- target:
230
- entity_id: "{{ volume_number }}"
231
- data:
232
- value: "{{ night_volume }}"
233
-
234
- - conditions:
235
- - condition: template
236
- value_template: "{{ trigger.id == 'quiet_end' and is_occupied }}"
237
- sequence:
238
- - if:
239
- - condition: template
240
- value_template: "{{ volume_number != '' }}"
241
- then:
242
- - service: number.set_value
243
- target:
244
- entity_id: "{{ volume_number }}"
245
- data:
246
- value: "{{ day_volume }}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
index.html CHANGED
@@ -18,24 +18,21 @@
18
  <span class="brand-name">Reachy Mini for Home Assistant</span>
19
  </div>
20
  <div class="pill">Voice · Gestures · Smart Home</div>
21
- <div class="version-pill" id="version-pill">v1.0.4</div>
22
  </div>
23
  <div class="hero-grid">
24
  <div class="hero-copy">
25
  <p class="eyebrow">Reachy Mini App</p>
26
  <h1>Your robot meets your Home Assistant.</h1>
27
  <p class="lede">
28
- Transform Reachy Mini Wi-Fi into a voice-controlled smart home hub. Natural conversations, expressive movements, gesture recognition — all seamlessly connected to Home Assistant.
29
  </p>
30
  <div class="hero-actions">
31
- <a class="btn primary" href="#requirements">Requirements</a>
32
  <a class="btn ghost" href="#install">Quick Start</a>
33
- <a class="btn ghost" href="#features">Features</a>
34
  </div>
35
  <div class="hero-badges">
36
  <span>🎤 Wake Word</span>
37
  <span>👀 Face Tracking</span>
38
- <span>🔄 Body Following</span>
39
  <span>🤚 18 Gestures</span>
40
  <span>🔊 Multi-room Audio</span>
41
  <span>⚡ Zero Config</span>
@@ -50,74 +47,11 @@
50
  </div>
51
  </header>
52
 
53
- <section id="requirements" class="section">
54
- <div class="section-header">
55
- <p class="eyebrow">Before You Start</p>
56
- <h2>Requirements</h2>
57
- <p class="intro">Make sure you have everything ready for a smooth setup.</p>
58
- </div>
59
- <div class="requirements-grid">
60
- <div class="requirement-card">
61
- <span class="icon">🤖</span>
62
- <h3>Reachy Mini Wi-Fi</h3>
63
- <p>This app requires the <strong>Wi-Fi version</strong> of Reachy Mini. The USB version has not been validated</p>
64
- </div>
65
- <div class="requirement-card">
66
- <span class="icon">🏠</span>
67
- <h3>Home Assistant</h3>
68
- <p>A running Home Assistant instance </p>
69
- </div>
70
- <div class="requirement-card">
71
- <span class="icon">📶</span>
72
- <h3>Same Network</h3>
73
- <p>Both Reachy Mini and Home Assistant must be on the <strong>same local network</strong>.</p>
74
- </div>
75
- <div class="requirement-card">
76
- <span class="icon">🎙️</span>
77
- <h3>Voice Pipeline</h3>
78
- <p>Configure a <strong>Voice Assistant pipeline</strong> in Home Assistant (STT + TTS + LLM).</p>
79
- </div>
80
- </div>
81
- </section>
82
-
83
- <section id="install" class="section story">
84
- <div class="section-header">
85
- <p class="eyebrow">Getting Started</p>
86
- <h2>Quick Start</h2>
87
- <p class="intro">Install and connect in under a minute. No configuration needed.</p>
88
- </div>
89
- <div class="story-grid">
90
- <div class="story-card">
91
- <p class="eyebrow">Installation</p>
92
- <h3>Up and running in 1 minute</h3>
93
- <ul class="story-list">
94
- <li><span>1️⃣</span> Open Reachy Mini Dashboard → Applications</li>
95
- <li><span>2️⃣</span> Enable "Show community apps"</li>
96
- <li><span>3️⃣</span> Install "Reachy Mini for Home Assistant"</li>
97
- <li><span>4️⃣</span> Home Assistant discovers automatically</li>
98
- </ul>
99
- </div>
100
- <div class="story-card secondary">
101
- <p class="eyebrow">How it works</p>
102
- <h3>Seamless integration</h3>
103
- <p class="story-text">
104
- This Reachy Mini app uses ESPHome protocol to communicate with Home Assistant — no ESPHome device needed. Home Assistant discovers it via mDNS and adds the robot entities automatically. Voice commands are processed by your Home Assistant instance — STT, intent recognition, and TTS all happen there.
105
- </p>
106
- <div class="chips">
107
- <span class="chip">ESPHome Protocol</span>
108
- <span class="chip">mDNS Discovery</span>
109
- <span class="chip">Robot Entities</span>
110
- <span class="chip">Zero Config</span>
111
- </div>
112
- </div>
113
- </div>
114
- </section>
115
-
116
  <section id="features" class="section features">
117
  <div class="section-header">
118
  <p class="eyebrow">Capabilities</p>
119
  <h2>Everything you need for smart home control</h2>
120
- <p class="intro">Zero-configuration robot entities, built-in reactions, and auto-discovery via mDNS.</p>
121
  </div>
122
  <div class="feature-grid">
123
  <div class="feature-card">
@@ -133,17 +67,17 @@
133
  <div class="feature-card">
134
  <span class="icon">👀</span>
135
  <h3>Face Tracking</h3>
136
- <p>YOLO-based face detection with body following. Head and body move together naturally to track you during conversations.</p>
137
  </div>
138
  <div class="feature-card">
139
  <span class="icon">🤚</span>
140
  <h3>Gesture Detection</h3>
141
- <p>HaGRID ONNX models recognize hand gestures and publish the detected gesture label and confidence to Home Assistant entities.</p>
142
  </div>
143
  <div class="feature-card">
144
  <span class="icon">😊</span>
145
  <h3>Expressive Motion</h3>
146
- <p>Built-in listening, thinking, speaking, timer, and emotion reactions with natural head sway and non-blocking motion during conversations.</p>
147
  </div>
148
  <div class="feature-card">
149
  <span class="icon">📹</span>
@@ -158,22 +92,40 @@
158
  <div class="feature-card">
159
  <span class="icon">⚡</span>
160
  <h3>Zero Configuration</h3>
161
- <p>Install and go. mDNS auto-discovery and built-in HA reactions mean the default experience works without extra setup.</p>
162
  </div>
163
  <div class="feature-card">
164
  <span class="icon">🃏</span>
165
  <h3>Dashboard Card</h3>
166
  <p>Custom Lovelace card for Home Assistant. Real-time 3D visualization of robot pose and status.</p>
167
  </div>
168
- <div class="feature-card">
169
- <span class="icon">🧩</span>
170
- <h3>HA Blueprint</h3>
171
- <p>Device-first Home Assistant blueprint for presence automations using the current zero-config model: sleep control, idle behavior, and speaker volume.</p>
 
 
 
 
 
 
 
 
 
 
172
  </div>
173
- <div class="feature-card">
174
- <span class="icon">🚀</span>
175
- <h3>Auto Release</h3>
176
- <p>Version-driven GitHub release workflow. Update pyproject/changelog, then release is created automatically.</p>
 
 
 
 
 
 
 
 
177
  </div>
178
  </div>
179
  </section>
@@ -197,15 +149,6 @@
197
  fetch('changelog.json')
198
  .then(res => res.json())
199
  .then(data => {
200
- // Update version pill with latest version
201
- if (data.length > 0) {
202
- const versionPill = document.getElementById('version-pill');
203
- if (versionPill) {
204
- versionPill.textContent = `v${data[0].version}`;
205
- }
206
- }
207
-
208
- // Populate changelog grid
209
  const mainGrid = document.getElementById('changelog-grid');
210
  const olderGrid = document.getElementById('changelog-older');
211
  data.forEach((item, index) => {
@@ -236,15 +179,10 @@
236
  <h3>HA Dashboard Card</h3>
237
  <p>Lovelace Card for HA</p>
238
  </a>
239
- <a href="https://github.com/ha-china/Reachy_Mini_For_Home_Assistant" target="_blank" class="link-card">
240
  <span class="icon">📦</span>
241
  <h3>Source Code</h3>
242
- <p>GitHub Repository</p>
243
- </a>
244
- <a href="home_assistant_blueprints/reachy_mini_presence_companion.yaml" target="_blank" class="link-card">
245
- <span class="icon">🧩</span>
246
- <h3>HA Blueprint</h3>
247
- <p>Presence Companion YAML</p>
248
  </a>
249
  <a href="https://www.pollen-robotics.com/" target="_blank" class="link-card">
250
  <span class="icon">🤖</span>
@@ -279,7 +217,7 @@
279
  <a href="https://github.com/ai-forever/dynamic_gestures" target="_blank" class="link-card">
280
  <span class="icon">✋</span>
281
  <h3>Dynamic Gestures</h3>
282
- <p>Reference Project</p>
283
  </a>
284
  <a href="https://github.com/Sendspin/sendspin-cli" target="_blank" class="link-card">
285
  <span class="icon">🔊</span>
 
18
  <span class="brand-name">Reachy Mini for Home Assistant</span>
19
  </div>
20
  <div class="pill">Voice · Gestures · Smart Home</div>
 
21
  </div>
22
  <div class="hero-grid">
23
  <div class="hero-copy">
24
  <p class="eyebrow">Reachy Mini App</p>
25
  <h1>Your robot meets your Home Assistant.</h1>
26
  <p class="lede">
27
+ Transform Reachy Mini into a voice-controlled smart home hub. Natural conversations, expressive movements, gesture recognition — all seamlessly connected to Home Assistant.
28
  </p>
29
  <div class="hero-actions">
30
+ <a class="btn primary" href="#features">Explore Features</a>
31
  <a class="btn ghost" href="#install">Quick Start</a>
 
32
  </div>
33
  <div class="hero-badges">
34
  <span>🎤 Wake Word</span>
35
  <span>👀 Face Tracking</span>
 
36
  <span>🤚 18 Gestures</span>
37
  <span>🔊 Multi-room Audio</span>
38
  <span>⚡ Zero Config</span>
 
47
  </div>
48
  </header>
49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  <section id="features" class="section features">
51
  <div class="section-header">
52
  <p class="eyebrow">Capabilities</p>
53
  <h2>Everything you need for smart home control</h2>
54
+ <p class="intro">45+ Home Assistant entities. Zero configuration. Auto-discovery via mDNS.</p>
55
  </div>
56
  <div class="feature-grid">
57
  <div class="feature-card">
 
67
  <div class="feature-card">
68
  <span class="icon">👀</span>
69
  <h3>Face Tracking</h3>
70
+ <p>YOLO-based face detection. Reachy looks at you during conversations with adaptive frame rate optimization.</p>
71
  </div>
72
  <div class="feature-card">
73
  <span class="icon">🤚</span>
74
  <h3>Gesture Detection</h3>
75
+ <p>HaGRID ONNX models recognize 18 hand gestures: 👍👎✌️🤘👌✊🤙🤫 and more.</p>
76
  </div>
77
  <div class="feature-card">
78
  <span class="icon">😊</span>
79
  <h3>Expressive Motion</h3>
80
+ <p>Real-time audio-driven animations. Natural head sway and antenna movements during conversations.</p>
81
  </div>
82
  <div class="feature-card">
83
  <span class="icon">📹</span>
 
92
  <div class="feature-card">
93
  <span class="icon">⚡</span>
94
  <h3>Zero Configuration</h3>
95
+ <p>Install and go. mDNS auto-discovery means Home Assistant finds your robot automatically.</p>
96
  </div>
97
  <div class="feature-card">
98
  <span class="icon">🃏</span>
99
  <h3>Dashboard Card</h3>
100
  <p>Custom Lovelace card for Home Assistant. Real-time 3D visualization of robot pose and status.</p>
101
  </div>
102
+ </div>
103
+ </section>
104
+
105
+ <section id="install" class="section story">
106
+ <div class="story-grid">
107
+ <div class="story-card">
108
+ <p class="eyebrow">Installation</p>
109
+ <h3>Up and running in 1 minutes</h3>
110
+ <ul class="story-list">
111
+ <li><span>1️⃣</span> Open Reachy Mini Dashboard → Applications</li>
112
+ <li><span>2️⃣</span> Enable "Show community apps"</li>
113
+ <li><span>3️⃣</span> Install "reachy_mini_ha_voice"</li>
114
+ <li><span>4️⃣</span> Home Assistant discovers automatically</li>
115
+ </ul>
116
  </div>
117
+ <div class="story-card secondary">
118
+ <p class="eyebrow">How it works</p>
119
+ <h3>Seamless integration</h3>
120
+ <p class="story-text">
121
+ This Reachy Mini app uses ESPHome protocol to communicate with Home Assistant — no ESPHome device needed. Home Assistant discovers it via mDNS and adds all 45+ entities automatically. Voice commands are processed by your Home Assistant instance — STT, intent recognition, and TTS all happen there.
122
+ </p>
123
+ <div class="chips">
124
+ <span class="chip">ESPHome Protocol</span>
125
+ <span class="chip">mDNS Discovery</span>
126
+ <span class="chip">45+ Entities</span>
127
+ <span class="chip">Zero Config</span>
128
+ </div>
129
  </div>
130
  </div>
131
  </section>
 
149
  fetch('changelog.json')
150
  .then(res => res.json())
151
  .then(data => {
 
 
 
 
 
 
 
 
 
152
  const mainGrid = document.getElementById('changelog-grid');
153
  const olderGrid = document.getElementById('changelog-older');
154
  data.forEach((item, index) => {
 
179
  <h3>HA Dashboard Card</h3>
180
  <p>Lovelace Card for HA</p>
181
  </a>
182
+ <a href="https://huggingface.co/spaces/djhui5710/reachy_mini_ha_voice/tree/main" target="_blank" class="link-card">
183
  <span class="icon">📦</span>
184
  <h3>Source Code</h3>
185
+ <p>HuggingFace Spaces</p>
 
 
 
 
 
186
  </a>
187
  <a href="https://www.pollen-robotics.com/" target="_blank" class="link-card">
188
  <span class="icon">🤖</span>
 
217
  <a href="https://github.com/ai-forever/dynamic_gestures" target="_blank" class="link-card">
218
  <span class="icon">✋</span>
219
  <h3>Dynamic Gestures</h3>
220
+ <p>ONNX Models</p>
221
  </a>
222
  <a href="https://github.com/Sendspin/sendspin-cli" target="_blank" class="link-card">
223
  <span class="icon">🔊</span>
pyproject.toml CHANGED
@@ -1,24 +1,25 @@
1
  [build-system]
2
- requires = ["setuptools"]
3
  build-backend = "setuptools.build_meta"
4
 
5
  [project]
6
- name = "reachy_mini_home_assistant"
7
- version = "1.0.7"
8
- description = "Deep integration of Reachy Mini robot with Home Assistant"
9
  readme = "README.md"
10
- requires-python = ">=3.12"
11
  license = {text = "Apache-2.0"}
12
  dependencies = [
13
- # Reachy Mini SDK with gstreamer support (for camera streaming)
14
- "reachy-mini>=1.7.1",
15
 
16
- # Audio processing (for audio file analysis)
 
17
  "soundfile>=0.13.0",
18
- "numpy>=2.2.5,<=2.2.5",
19
 
20
  # Camera streaming
21
- "opencv-python>=4.12.0.88",
22
 
23
  # Wake word detection (local)
24
  # STT/TTS is handled by Home Assistant, not locally
@@ -27,36 +28,26 @@ dependencies = [
27
 
28
  # ESPHome protocol (communication with Home Assistant)
29
  "aioesphomeapi>=43.10.1",
30
- "zeroconf>=0.131,<1",
31
- "websockets>=12,<16",
32
- "aiohttp",
33
 
34
  # Motion control (head movements)
35
- "scipy>=1.15.3,<2.0.0",
36
-
37
  # Face tracking (YOLO-based head detection)
38
- "ultralytics",
39
- "supervision",
40
-
 
41
  # Sendspin synchronized audio (optional, for multi-room playback)
42
- "aiosendspin>=5.1,<6.0",
43
-
44
  # Gesture detection (ONNX runtime for HaGRID models)
45
  "onnxruntime>=1.18.0",
46
-
47
- # PyTorch (for vision models)
48
- "torch==2.5.1",
49
- "torchvision==0.20.1",
50
-
51
- # Compatibility with system packages (gradio, etc.)
52
- "pillow<12.0",
53
- "pydantic<=2.12.5",
54
- "requests>=2.33.0",
55
  ]
56
  keywords = ["reachy-mini-app", "reachy-mini", "home-assistant", "voice-assistant"]
57
 
58
  [project.entry-points."reachy_mini_apps"]
59
- reachy_mini_home_assistant = "reachy_mini_home_assistant.main:ReachyMiniHaVoice"
60
 
61
  [tool.setuptools]
62
  package-dir = { "" = "." }
@@ -66,114 +57,4 @@ include-package-data = true
66
  where = ["."]
67
 
68
  [tool.setuptools.package-data]
69
- "*" = ["*.json", "*.flac", "*.md", "*.tflite", "*.onnx", "*.pt"]
70
-
71
- # ============================================================================
72
- # Ruff - Fast Python linter and formatter
73
- # ============================================================================
74
- [tool.ruff]
75
- target-version = "py312"
76
- line-length = 120
77
- src = ["reachy_mini_home_assistant"]
78
-
79
- # Exclude reference code and generated files
80
- exclude = [
81
- "reference/",
82
- "__pycache__",
83
- ".git",
84
- "*.egg-info",
85
- ]
86
-
87
- [dependency-groups]
88
- dev = [
89
- "ruff==0.15.4",
90
- "mypy==1.20.0",
91
- ]
92
-
93
- [tool.uv]
94
- dependency-metadata = [
95
- { name = "gstreamer-libs", version = "1.28.1", requires-dist = ["gstreamer-msvc-runtime; sys_platform == 'win32'", "setuptools"] },
96
- ]
97
-
98
- [tool.ruff.lint]
99
- select = [
100
- "E", # pycodestyle errors
101
- "W", # pycodestyle warnings
102
- "F", # Pyflakes
103
- "I", # isort (import sorting)
104
- "B", # flake8-bugbear (common bugs)
105
- "C4", # flake8-comprehensions
106
- "UP", # pyupgrade (modern Python syntax)
107
- "SIM", # flake8-simplify
108
- "TCH", # flake8-type-checking (TYPE_CHECKING optimization)
109
- "RUF", # Ruff-specific rules
110
- "PTH", # flake8-use-pathlib
111
- "PL", # Pylint
112
- ]
113
- ignore = [
114
- "E501", # line too long (handled by formatter)
115
- "PLR0913", # too many arguments (common in robot control)
116
- "PLR2004", # magic value comparison (many thresholds in motion code)
117
- "PLR0912", # too many branches
118
- "PLR0915", # too many statements
119
- "PLR0911", # too many return statements
120
- "SIM108", # use ternary operator (sometimes less readable)
121
- "B008", # function call in default argument (used for field factories)
122
- # The following are intentional patterns in this codebase:
123
- "PLC0415", # import-outside-top-level (lazy imports for optional deps)
124
- "PLW0603", # global-statement (used for singletons)
125
- "SIM102", # collapsible-if (sometimes more readable expanded)
126
- "SIM105", # suppressible-exception (explicit try/except is clearer)
127
- "PTH123", # builtin-open (pathlib not always better)
128
- "PTH108", # os-unlink (pathlib not always better)
129
- "RUF013", # implicit-optional (legacy code)
130
- "TC002", # third-party import (numpy is required at runtime)
131
- ]
132
-
133
- [tool.ruff.lint.per-file-ignores]
134
- "__init__.py" = ["F401"] # unused imports in __init__ are intentional
135
-
136
- [tool.ruff.lint.isort]
137
- known-first-party = ["reachy_mini_home_assistant"]
138
-
139
- # ============================================================================
140
- # Mypy - Static type checker
141
- # ============================================================================
142
- [tool.mypy]
143
- python_version = "3.12"
144
- warn_return_any = false # Too noisy for mixed typed/untyped codebase
145
- warn_unused_ignores = true
146
- disallow_untyped_defs = false # Start lenient, can tighten later
147
- check_untyped_defs = false # Too strict for initial setup
148
- ignore_missing_imports = true # Many robot SDK libs lack type stubs
149
- no_implicit_optional = false # Allow implicit Optional for now
150
- # Disable some checks that are too strict for this codebase
151
- disable_error_code = [
152
- "union-attr", # Too many Optional accesses without None checks
153
- "no-redef", # Class redefinitions for SDK compatibility
154
- "attr-defined", # Some dynamic attributes from SDK
155
- "assignment", # Variable type changes (common in Python)
156
- "arg-type", # Argument type mismatches (often SDK issues)
157
- "unused-ignore", # Type ignore comments from before config
158
- "return-value", # Return type mismatches (often fine)
159
- "no-untyped-def", # Missing type annotations (too strict initially)
160
- "valid-type", # Type validity (some edge cases)
161
- "has-type", # Cannot determine type
162
- "call-arg", # Too few/many arguments
163
- "import-untyped", # Missing stubs for third-party libs
164
- "misc", # Miscellaneous errors
165
- ]
166
- exclude = [
167
- "reference/",
168
- "tests/",
169
- ]
170
-
171
- # Stricter checking for core modules (can enable gradually)
172
- [[tool.mypy.overrides]]
173
- module = [
174
- "reachy_mini_home_assistant.core.*",
175
- "reachy_mini_home_assistant.motion.smoothing",
176
- "reachy_mini_home_assistant.motion.pose_composer",
177
- ]
178
- disallow_untyped_defs = true
179
- warn_unreachable = true
 
1
  [build-system]
2
+ requires = ["setuptools>=61.0"]
3
  build-backend = "setuptools.build_meta"
4
 
5
  [project]
6
+ name = "reachy_mini_ha_voice"
7
+ version = "0.7.3"
8
+ description = "Home Assistant Voice Assistant for Reachy Mini"
9
  readme = "README.md"
10
+ requires-python = ">=3.10"
11
  license = {text = "Apache-2.0"}
12
  dependencies = [
13
+ # Reachy Mini SDK (provides audio via media system)
14
+ "reachy-mini",
15
 
16
+ # Audio processing (fallback when not on Reachy Mini)
17
+ "sounddevice>=0.5.0",
18
  "soundfile>=0.13.0",
19
+ "numpy>=2.0.0",
20
 
21
  # Camera streaming
22
+ "opencv-python>=4.10.0",
23
 
24
  # Wake word detection (local)
25
  # STT/TTS is handled by Home Assistant, not locally
 
28
 
29
  # ESPHome protocol (communication with Home Assistant)
30
  "aioesphomeapi>=43.10.1",
31
+ "zeroconf>=0.140.0",
 
 
32
 
33
  # Motion control (head movements)
34
+ "scipy>=1.14.0",
35
+
36
  # Face tracking (YOLO-based head detection)
37
+ "ultralytics>=8.3.0",
38
+ "supervision>=0.25.0",
39
+ "huggingface_hub>=0.27.0",
40
+
41
  # Sendspin synchronized audio (optional, for multi-room playback)
42
+ "aiosendspin>=2.0.1",
43
+
44
  # Gesture detection (ONNX runtime for HaGRID models)
45
  "onnxruntime>=1.18.0",
 
 
 
 
 
 
 
 
 
46
  ]
47
  keywords = ["reachy-mini-app", "reachy-mini", "home-assistant", "voice-assistant"]
48
 
49
  [project.entry-points."reachy_mini_apps"]
50
+ reachy_mini_ha_voice = "reachy_mini_ha_voice.main:ReachyMiniHaVoice"
51
 
52
  [tool.setuptools]
53
  package-dir = { "" = "." }
 
57
  where = ["."]
58
 
59
  [tool.setuptools.package-data]
60
+ "*" = ["*.json", "*.flac", "*.md", "*.tflite", "*.onnx"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
{reachy_mini_home_assistant → reachy_mini_ha_voice}/__init__.py RENAMED
@@ -1,29 +1,24 @@
1
- """
2
- Reachy Mini for Home Assistant
3
-
4
- A deep integration app combining Reachy Mini robot with Home Assistant,
5
- enabling voice control, smart home automation, and expressive robot interactions.
6
-
7
- Key features:
8
- - Local wake word detection (microWakeWord/openWakeWord)
9
- - ESPHome protocol for seamless Home Assistant communication
10
- - STT/TTS powered by Home Assistant voice pipeline
11
- - Reachy Mini motion control with expressive animations
12
- - Camera streaming and gesture detection
13
- - Smart home entity control through natural voice commands
14
- """
15
-
16
- try:
17
- from importlib.metadata import version
18
-
19
- __version__ = version("reachy_mini_home_assistant")
20
- except Exception:
21
- __version__ = "0.0.0" # Fallback for development
22
- __author__ = "Desmond Dong"
23
-
24
- # Don't import main module here to avoid runpy warning
25
- # The app is loaded via entry point: reachy_mini_home_assistant.main:ReachyMiniHaVoiceApp
26
-
27
- __all__ = [
28
- "__version__",
29
- ]
 
1
+ """
2
+ Reachy Mini for Home Assistant
3
+
4
+ A deep integration app combining Reachy Mini robot with Home Assistant,
5
+ enabling voice control, smart home automation, and expressive robot interactions.
6
+
7
+ Key features:
8
+ - Local wake word detection (microWakeWord/openWakeWord)
9
+ - ESPHome protocol for seamless Home Assistant communication
10
+ - STT/TTS powered by Home Assistant voice pipeline
11
+ - Reachy Mini motion control with expressive animations
12
+ - Camera streaming and gesture detection
13
+ - Smart home entity control through natural voice commands
14
+ """
15
+
16
+ __version__ = "0.7.3"
17
+ __author__ = "Desmond Dong"
18
+
19
+ # Don't import main module here to avoid runpy warning
20
+ # The app is loaded via entry point: reachy_mini_ha_voice.main:ReachyMiniHAVoiceApp
21
+
22
+ __all__ = [
23
+ "__version__",
24
+ ]
 
 
 
 
 
{reachy_mini_home_assistant → reachy_mini_ha_voice}/__main__.py RENAMED
@@ -2,7 +2,7 @@
2
  """Main entry point for Reachy Mini for Home Assistant.
3
 
4
  This module provides a command-line interface for running the voice assistant
5
- without the ReachyMini App framework.
6
  """
7
 
8
  import argparse
@@ -10,17 +10,17 @@ import asyncio
10
  import logging
11
  import threading
12
 
13
- from .protocol.zeroconf import get_default_friendly_name
14
-
15
  _LOGGER = logging.getLogger(__name__)
16
 
17
 
18
  async def main() -> None:
19
- parser = argparse.ArgumentParser(description="Reachy Mini for Home Assistant")
 
 
20
  parser.add_argument(
21
  "--name",
22
- default=get_default_friendly_name(),
23
- help="Name of the voice assistant (default: auto-generated from MAC)",
24
  )
25
  parser.add_argument(
26
  "--host",
@@ -49,6 +49,11 @@ async def main() -> None:
49
  action="store_true",
50
  help="Disable camera server",
51
  )
 
 
 
 
 
52
  parser.add_argument(
53
  "--debug",
54
  action="store_true",
@@ -63,53 +68,59 @@ async def main() -> None:
63
  format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
64
  )
65
 
66
- # Initialize Reachy Mini (required)
67
- from reachy_mini import ReachyMini
68
-
69
- with ReachyMini() as reachy_mini:
70
- _LOGGER.info("Reachy Mini connected")
71
-
72
- # Import and create VoiceAssistantService
73
- from .voice_assistant import VoiceAssistantService
74
-
75
- service = VoiceAssistantService(
76
- reachy_mini=reachy_mini,
77
- name=args.name,
78
- host=args.host,
79
- port=args.port,
80
- wake_model=args.wake_model,
81
- camera_port=args.camera_port,
82
- camera_enabled=not args.no_camera,
83
- )
84
-
85
- # Create stop event for graceful shutdown
86
- stop_event = threading.Event()
87
-
88
  try:
89
- await service.start()
90
-
91
- _LOGGER.info("=" * 50)
92
- _LOGGER.info("Reachy Mini Voice Assistant Started")
93
- _LOGGER.info("=" * 50)
94
- _LOGGER.info("Name: %s", args.name)
95
- _LOGGER.info("ESPHome Server: %s:%s", args.host, args.port)
96
- _LOGGER.info("Camera Server: %s:%s", args.host, args.camera_port)
97
- _LOGGER.info("Motion control: enabled")
98
- _LOGGER.info("=" * 50)
99
- _LOGGER.info("Add this device in Home Assistant:")
100
- _LOGGER.info(" Settings -> Devices & Services -> Add Integration -> ESPHome")
101
- _LOGGER.info(" Enter: <this-device-ip>:%s", args.port)
102
- _LOGGER.info("=" * 50)
103
-
104
- # Wait for stop signal
105
- while not stop_event.is_set():
106
- await asyncio.sleep(0.5)
107
-
108
- except KeyboardInterrupt:
109
- _LOGGER.info("Shutting down...")
110
- finally:
111
- await service.stop()
112
- _LOGGER.info("Voice assistant stopped")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
 
115
  def run():
 
2
  """Main entry point for Reachy Mini for Home Assistant.
3
 
4
  This module provides a command-line interface for running the voice assistant
5
+ in standalone mode (without the ReachyMini App framework).
6
  """
7
 
8
  import argparse
 
10
  import logging
11
  import threading
12
 
 
 
13
  _LOGGER = logging.getLogger(__name__)
14
 
15
 
16
  async def main() -> None:
17
+ parser = argparse.ArgumentParser(
18
+ description="Reachy Mini for Home Assistant"
19
+ )
20
  parser.add_argument(
21
  "--name",
22
+ default="Reachy Mini",
23
+ help="Name of the voice assistant (default: Reachy Mini)",
24
  )
25
  parser.add_argument(
26
  "--host",
 
49
  action="store_true",
50
  help="Disable camera server",
51
  )
52
+ parser.add_argument(
53
+ "--no-motion",
54
+ action="store_true",
55
+ help="Disable Reachy Mini motion control",
56
+ )
57
  parser.add_argument(
58
  "--debug",
59
  action="store_true",
 
68
  format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
69
  )
70
 
71
+ # Initialize Reachy Mini (if available)
72
+ reachy_mini = None
73
+ if not args.no_motion:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  try:
75
+ from reachy_mini import ReachyMini
76
+ reachy_mini = ReachyMini()
77
+ _LOGGER.info("Reachy Mini connected")
78
+ except ImportError:
79
+ _LOGGER.warning("reachy-mini not installed, motion control disabled")
80
+ except Exception as e:
81
+ _LOGGER.warning("Failed to connect to Reachy Mini: %s", e)
82
+
83
+ # Import and create VoiceAssistantService
84
+ from .voice_assistant import VoiceAssistantService
85
+
86
+ service = VoiceAssistantService(
87
+ reachy_mini=reachy_mini,
88
+ name=args.name,
89
+ host=args.host,
90
+ port=args.port,
91
+ wake_model=args.wake_model,
92
+ camera_port=args.camera_port,
93
+ camera_enabled=not args.no_camera,
94
+ )
95
+
96
+ # Create stop event for graceful shutdown
97
+ stop_event = threading.Event()
98
+
99
+ try:
100
+ await service.start()
101
+
102
+ _LOGGER.info("=" * 50)
103
+ _LOGGER.info("Reachy Mini Voice Assistant Started")
104
+ _LOGGER.info("=" * 50)
105
+ _LOGGER.info("Name: %s", args.name)
106
+ _LOGGER.info("ESPHome Server: %s:%s", args.host, args.port)
107
+ _LOGGER.info("Camera Server: %s:%s", args.host, args.camera_port)
108
+ _LOGGER.info("Motion control: %s", "enabled" if reachy_mini else "disabled")
109
+ _LOGGER.info("=" * 50)
110
+ _LOGGER.info("Add this device in Home Assistant:")
111
+ _LOGGER.info(" Settings -> Devices & Services -> Add Integration -> ESPHome")
112
+ _LOGGER.info(" Enter: <this-device-ip>:%s", args.port)
113
+ _LOGGER.info("=" * 50)
114
+
115
+ # Wait for stop signal
116
+ while not stop_event.is_set():
117
+ await asyncio.sleep(0.5)
118
+
119
+ except KeyboardInterrupt:
120
+ _LOGGER.info("Shutting down...")
121
+ finally:
122
+ await service.stop()
123
+ _LOGGER.info("Voice assistant stopped")
124
 
125
 
126
  def run():
{reachy_mini_home_assistant/motion → reachy_mini_ha_voice}/animation_player.py RENAMED
@@ -8,6 +8,7 @@ on top of other movements. The speaking animation uses multi-frequency
8
  oscillators for more natural head sway.
9
  """
10
 
 
11
  import logging
12
  import math
13
  import random
@@ -15,20 +16,17 @@ import threading
15
  import time
16
  from dataclasses import dataclass
17
  from pathlib import Path
18
-
19
- from ..animations.animation_config import load_animation_config
20
 
21
  _LOGGER = logging.getLogger(__name__)
22
 
23
  _MODULE_DIR = Path(__file__).parent
24
- _PACKAGE_DIR = _MODULE_DIR.parent # reachy_mini_home_assistant/
25
- _ANIMATIONS_FILE = _PACKAGE_DIR / "animations" / "conversation_animations.json"
26
 
27
 
28
  @dataclass
29
  class AnimationParams:
30
  """Parameters for a single animation with per-axis frequencies."""
31
-
32
  name: str
33
  description: str
34
  # Position amplitudes (meters)
@@ -50,7 +48,6 @@ class AnimationParams:
50
  # Antenna
51
  antenna_amplitude_rad: float = 0.0
52
  antenna_move_name: str = "both"
53
- antenna_frequency_hz: float = 0.0 # If not specified, uses main frequency_hz
54
  # Per-axis frequencies (Hz) - if not specified, uses main frequency_hz
55
  frequency_hz: float = 0.5
56
  pitch_frequency_hz: float = 0.0
@@ -70,17 +67,14 @@ class AnimationPlayer:
70
  - Multi-frequency oscillators for natural motion
71
  - Random phase offsets per animation start for variation
72
  - Smooth transitions between animations
73
- - Interpolation phase: smooth transition from current pose to neutral before oscillation
74
- (same as BreathingMove in reference project)
75
  """
76
 
77
  def __init__(self):
78
- self._animations: dict[str, AnimationParams] = {}
79
  self._amplitude_scale: float = 1.0
80
  self._transition_duration: float = 0.3
81
- self._interpolation_duration: float = 0.2
82
- self._current_animation: str | None = None
83
- self._target_animation: str | None = None
84
  self._transition_start: float = 0.0
85
  self._phase_start: float = 0.0
86
  self._lock = threading.Lock()
@@ -91,29 +85,6 @@ class AnimationPlayer:
91
  self._phase_x: float = 0.0
92
  self._phase_y: float = 0.0
93
  self._phase_z: float = 0.0
94
- # Interpolation state (for smooth transition to neutral before oscillation)
95
- self._in_interpolation: bool = False
96
- self._interpolation_start_time: float = 0.0
97
- self._interpolation_start_offsets: dict[str, float] = {
98
- "pitch": 0.0,
99
- "yaw": 0.0,
100
- "roll": 0.0,
101
- "x": 0.0,
102
- "y": 0.0,
103
- "z": 0.0,
104
- "antenna_left": 0.0,
105
- "antenna_right": 0.0,
106
- }
107
- self._last_offsets: dict[str, float] = {
108
- "pitch": 0.0,
109
- "yaw": 0.0,
110
- "roll": 0.0,
111
- "x": 0.0,
112
- "y": 0.0,
113
- "z": 0.0,
114
- "antenna_left": 0.0,
115
- "antenna_right": 0.0,
116
- }
117
  self._load_config()
118
 
119
  def _load_config(self) -> None:
@@ -122,7 +93,8 @@ class AnimationPlayer:
122
  _LOGGER.warning("Animations file not found: %s", _ANIMATIONS_FILE)
123
  return
124
  try:
125
- data = load_animation_config(_ANIMATIONS_FILE)
 
126
 
127
  settings = data.get("settings", {})
128
  self._amplitude_scale = settings.get("amplitude_scale", 1.0)
@@ -148,7 +120,6 @@ class AnimationPlayer:
148
  yaw_offset_rad=params.get("yaw_offset_rad", 0.0),
149
  antenna_amplitude_rad=params.get("antenna_amplitude_rad", 0.0),
150
  antenna_move_name=params.get("antenna_move_name", "both"),
151
- antenna_frequency_hz=params.get("antenna_frequency_hz", 0.0),
152
  frequency_hz=params.get("frequency_hz", 0.5),
153
  pitch_frequency_hz=params.get("pitch_frequency_hz", 0.0),
154
  yaw_frequency_hz=params.get("yaw_frequency_hz", 0.0),
@@ -173,29 +144,18 @@ class AnimationPlayer:
173
  self._phase_z = random.random() * 2 * math.pi
174
 
175
  def set_animation(self, name: str) -> bool:
176
- """Set the current animation with smooth transition.
177
-
178
- Like BreathingMove in reference project, this starts an interpolation
179
- phase that smoothly transitions from the current pose to neutral before
180
- starting the oscillation animation.
181
- """
182
  with self._lock:
183
  if name not in self._animations and name is not None:
184
  _LOGGER.warning("Unknown animation: %s", name)
185
  return False
186
- if name == self._current_animation and not self._in_interpolation:
187
  return True
188
-
189
- # Capture current offsets for interpolation start
190
- self._interpolation_start_offsets = self._last_offsets.copy()
191
- self._interpolation_start_time = time.perf_counter()
192
- self._in_interpolation = True
193
-
194
  self._target_animation = name
195
  self._transition_start = time.perf_counter()
196
  # Randomize phases for new animation
197
  self._randomize_phases()
198
- _LOGGER.debug("Transitioning to animation: %s (interpolation phase)", name)
199
  return True
200
 
201
  def stop(self) -> None:
@@ -204,13 +164,10 @@ class AnimationPlayer:
204
  self._current_animation = None
205
  self._target_animation = None
206
 
207
- def get_offsets(self, dt: float = 0.0) -> dict[str, float]:
208
  """Calculate current animation offsets.
209
 
210
- Uses two-phase animation like BreathingMove in reference project:
211
- 1. Interpolation phase: smoothly transition from current pose to neutral
212
- 2. Oscillation phase: continuous sinusoidal breathing motion
213
-
214
  Each axis can have its own frequency for more organic movement.
215
 
216
  Args:
@@ -222,7 +179,7 @@ class AnimationPlayer:
222
  with self._lock:
223
  now = time.perf_counter()
224
 
225
- # Handle transition to new animation
226
  if self._target_animation != self._current_animation:
227
  elapsed = now - self._transition_start
228
  if elapsed >= self._transition_duration:
@@ -231,59 +188,20 @@ class AnimationPlayer:
231
 
232
  # No animation
233
  if self._current_animation is None:
234
- result = {
235
- "pitch": 0.0,
236
- "yaw": 0.0,
237
- "roll": 0.0,
238
- "x": 0.0,
239
- "y": 0.0,
240
- "z": 0.0,
241
- "antenna_left": 0.0,
242
- "antenna_right": 0.0,
243
  }
244
- self._last_offsets = result.copy()
245
- return result
246
 
247
  params = self._animations.get(self._current_animation)
248
  if params is None:
249
- result = {
250
- "pitch": 0.0,
251
- "yaw": 0.0,
252
- "roll": 0.0,
253
- "x": 0.0,
254
- "y": 0.0,
255
- "z": 0.0,
256
- "antenna_left": 0.0,
257
- "antenna_right": 0.0,
258
  }
259
- self._last_offsets = result.copy()
260
- return result
261
-
262
- # Check if in interpolation phase
263
- if self._in_interpolation:
264
- interp_elapsed = now - self._interpolation_start_time
265
- if interp_elapsed < self._interpolation_duration:
266
- # Phase 1: Linear interpolation from current pose to neutral (offset=0)
267
- # Use smooth ease-in-out for natural motion
268
- t = interp_elapsed / self._interpolation_duration
269
- # Smooth step: t * t * (3 - 2 * t)
270
- smooth_t = t * t * (3 - 2 * t)
271
-
272
- result = {}
273
- for key in self._interpolation_start_offsets:
274
- start_val = self._interpolation_start_offsets[key]
275
- # Interpolate toward 0 (neutral)
276
- result[key] = start_val * (1.0 - smooth_t)
277
-
278
- self._last_offsets = result.copy()
279
- return result
280
- else:
281
- # Interpolation complete, start oscillation phase
282
- self._in_interpolation = False
283
- self._phase_start = now
284
- _LOGGER.debug("Interpolation complete, starting oscillation phase")
285
 
286
- # Phase 2: Oscillation animation
287
  elapsed = now - self._phase_start
288
  base_freq = params.frequency_hz
289
 
@@ -301,27 +219,32 @@ class AnimationPlayer:
301
  z_freq = params.z_frequency_hz if params.z_frequency_hz > 0 else base_freq
302
 
303
  # Calculate oscillations with per-axis frequencies and random phases
304
- pitch = params.pitch_offset_rad + params.pitch_amplitude_rad * math.sin(
305
- 2 * math.pi * pitch_freq * elapsed + self._phase_pitch
306
- )
307
 
308
- yaw = params.yaw_offset_rad + params.yaw_amplitude_rad * math.sin(
309
- 2 * math.pi * yaw_freq * elapsed + self._phase_yaw
310
- )
311
 
312
- roll = params.roll_offset_rad + params.roll_amplitude_rad * math.sin(
313
- 2 * math.pi * roll_freq * elapsed + self._phase_roll
314
- )
315
 
316
- x = params.x_offset_m + params.x_amplitude_m * math.sin(2 * math.pi * x_freq * elapsed + self._phase_x)
 
 
317
 
318
- y = params.y_offset_m + params.y_amplitude_m * math.sin(2 * math.pi * y_freq * elapsed + self._phase_y)
 
 
319
 
320
- z = params.z_offset_m + params.z_amplitude_m * math.sin(2 * math.pi * z_freq * elapsed + self._phase_z)
 
 
321
 
322
- # Antenna movement with its own frequency
323
- antenna_freq = params.antenna_frequency_hz if params.antenna_frequency_hz > 0 else base_freq
324
- antenna_phase = 2 * math.pi * antenna_freq * elapsed
325
  if params.antenna_move_name == "both":
326
  left = right = params.antenna_amplitude_rad * math.sin(antenna_phase)
327
  elif params.antenna_move_name == "wiggle":
@@ -333,7 +256,7 @@ class AnimationPlayer:
333
 
334
  # Apply scale and blend
335
  scale = self._amplitude_scale * blend
336
- result = {
337
  "pitch": pitch * scale,
338
  "yaw": yaw * scale,
339
  "roll": roll * scale,
@@ -343,11 +266,9 @@ class AnimationPlayer:
343
  "antenna_left": left * scale,
344
  "antenna_right": right * scale,
345
  }
346
- self._last_offsets = result.copy()
347
- return result
348
 
349
  @property
350
- def current_animation(self) -> str | None:
351
  """Get the current animation name."""
352
  with self._lock:
353
  return self._current_animation
 
8
  oscillators for more natural head sway.
9
  """
10
 
11
+ import json
12
  import logging
13
  import math
14
  import random
 
16
  import time
17
  from dataclasses import dataclass
18
  from pathlib import Path
19
+ from typing import Dict, Optional
 
20
 
21
  _LOGGER = logging.getLogger(__name__)
22
 
23
  _MODULE_DIR = Path(__file__).parent
24
+ _ANIMATIONS_FILE = _MODULE_DIR / "animations" / "conversation_animations.json"
 
25
 
26
 
27
  @dataclass
28
  class AnimationParams:
29
  """Parameters for a single animation with per-axis frequencies."""
 
30
  name: str
31
  description: str
32
  # Position amplitudes (meters)
 
48
  # Antenna
49
  antenna_amplitude_rad: float = 0.0
50
  antenna_move_name: str = "both"
 
51
  # Per-axis frequencies (Hz) - if not specified, uses main frequency_hz
52
  frequency_hz: float = 0.5
53
  pitch_frequency_hz: float = 0.0
 
67
  - Multi-frequency oscillators for natural motion
68
  - Random phase offsets per animation start for variation
69
  - Smooth transitions between animations
 
 
70
  """
71
 
72
  def __init__(self):
73
+ self._animations: Dict[str, AnimationParams] = {}
74
  self._amplitude_scale: float = 1.0
75
  self._transition_duration: float = 0.3
76
+ self._current_animation: Optional[str] = None
77
+ self._target_animation: Optional[str] = None
 
78
  self._transition_start: float = 0.0
79
  self._phase_start: float = 0.0
80
  self._lock = threading.Lock()
 
85
  self._phase_x: float = 0.0
86
  self._phase_y: float = 0.0
87
  self._phase_z: float = 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  self._load_config()
89
 
90
  def _load_config(self) -> None:
 
93
  _LOGGER.warning("Animations file not found: %s", _ANIMATIONS_FILE)
94
  return
95
  try:
96
+ with open(_ANIMATIONS_FILE, "r", encoding="utf-8") as f:
97
+ data = json.load(f)
98
 
99
  settings = data.get("settings", {})
100
  self._amplitude_scale = settings.get("amplitude_scale", 1.0)
 
120
  yaw_offset_rad=params.get("yaw_offset_rad", 0.0),
121
  antenna_amplitude_rad=params.get("antenna_amplitude_rad", 0.0),
122
  antenna_move_name=params.get("antenna_move_name", "both"),
 
123
  frequency_hz=params.get("frequency_hz", 0.5),
124
  pitch_frequency_hz=params.get("pitch_frequency_hz", 0.0),
125
  yaw_frequency_hz=params.get("yaw_frequency_hz", 0.0),
 
144
  self._phase_z = random.random() * 2 * math.pi
145
 
146
  def set_animation(self, name: str) -> bool:
147
+ """Set the current animation with smooth transition."""
 
 
 
 
 
148
  with self._lock:
149
  if name not in self._animations and name is not None:
150
  _LOGGER.warning("Unknown animation: %s", name)
151
  return False
152
+ if name == self._current_animation:
153
  return True
 
 
 
 
 
 
154
  self._target_animation = name
155
  self._transition_start = time.perf_counter()
156
  # Randomize phases for new animation
157
  self._randomize_phases()
158
+ _LOGGER.debug("Transitioning to animation: %s", name)
159
  return True
160
 
161
  def stop(self) -> None:
 
164
  self._current_animation = None
165
  self._target_animation = None
166
 
167
+ def get_offsets(self, dt: float = 0.0) -> Dict[str, float]:
168
  """Calculate current animation offsets.
169
 
170
+ Uses multi-frequency oscillators for natural motion.
 
 
 
171
  Each axis can have its own frequency for more organic movement.
172
 
173
  Args:
 
179
  with self._lock:
180
  now = time.perf_counter()
181
 
182
+ # Handle transition
183
  if self._target_animation != self._current_animation:
184
  elapsed = now - self._transition_start
185
  if elapsed >= self._transition_duration:
 
188
 
189
  # No animation
190
  if self._current_animation is None:
191
+ return {
192
+ "pitch": 0.0, "yaw": 0.0, "roll": 0.0,
193
+ "x": 0.0, "y": 0.0, "z": 0.0,
194
+ "antenna_left": 0.0, "antenna_right": 0.0,
 
 
 
 
 
195
  }
 
 
196
 
197
  params = self._animations.get(self._current_animation)
198
  if params is None:
199
+ return {
200
+ "pitch": 0.0, "yaw": 0.0, "roll": 0.0,
201
+ "x": 0.0, "y": 0.0, "z": 0.0,
202
+ "antenna_left": 0.0, "antenna_right": 0.0,
 
 
 
 
 
203
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
 
 
205
  elapsed = now - self._phase_start
206
  base_freq = params.frequency_hz
207
 
 
219
  z_freq = params.z_frequency_hz if params.z_frequency_hz > 0 else base_freq
220
 
221
  # Calculate oscillations with per-axis frequencies and random phases
222
+ pitch = (params.pitch_offset_rad +
223
+ params.pitch_amplitude_rad *
224
+ math.sin(2 * math.pi * pitch_freq * elapsed + self._phase_pitch))
225
 
226
+ yaw = (params.yaw_offset_rad +
227
+ params.yaw_amplitude_rad *
228
+ math.sin(2 * math.pi * yaw_freq * elapsed + self._phase_yaw))
229
 
230
+ roll = (params.roll_offset_rad +
231
+ params.roll_amplitude_rad *
232
+ math.sin(2 * math.pi * roll_freq * elapsed + self._phase_roll))
233
 
234
+ x = (params.x_offset_m +
235
+ params.x_amplitude_m *
236
+ math.sin(2 * math.pi * x_freq * elapsed + self._phase_x))
237
 
238
+ y = (params.y_offset_m +
239
+ params.y_amplitude_m *
240
+ math.sin(2 * math.pi * y_freq * elapsed + self._phase_y))
241
 
242
+ z = (params.z_offset_m +
243
+ params.z_amplitude_m *
244
+ math.sin(2 * math.pi * z_freq * elapsed + self._phase_z))
245
 
246
+ # Antenna movement
247
+ antenna_phase = 2 * math.pi * base_freq * elapsed
 
248
  if params.antenna_move_name == "both":
249
  left = right = params.antenna_amplitude_rad * math.sin(antenna_phase)
250
  elif params.antenna_move_name == "wiggle":
 
256
 
257
  # Apply scale and blend
258
  scale = self._amplitude_scale * blend
259
+ return {
260
  "pitch": pitch * scale,
261
  "yaw": yaw * scale,
262
  "roll": roll * scale,
 
266
  "antenna_left": left * scale,
267
  "antenna_right": right * scale,
268
  }
 
 
269
 
270
  @property
271
+ def current_animation(self) -> Optional[str]:
272
  """Get the current animation name."""
273
  with self._lock:
274
  return self._current_animation
reachy_mini_ha_voice/animations/conversation_animations.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "animations": {
3
+ "idle": {
4
+ "description": "No movement when idle - robot stays at neutral position",
5
+ "z_amplitude_m": 0.0,
6
+ "antenna_amplitude_rad": 0.0,
7
+ "frequency_hz": 0.0
8
+ },
9
+ "listening": {
10
+ "description": "Attentive pose while listening to user - slight forward lean",
11
+ "pitch_offset_rad": -0.05,
12
+ "pitch_amplitude_rad": 0.03,
13
+ "z_amplitude_m": 0.003,
14
+ "antenna_amplitude_rad": 0.2,
15
+ "antenna_move_name": "both",
16
+ "frequency_hz": 0.6
17
+ },
18
+ "thinking": {
19
+ "description": "Processing/thinking animation - head tilted with gentle sway",
20
+ "roll_offset_rad": 0.08,
21
+ "pitch_amplitude_rad": 0.03,
22
+ "yaw_amplitude_rad": 0.05,
23
+ "roll_amplitude_rad": 0.04,
24
+ "z_amplitude_m": 0.003,
25
+ "antenna_amplitude_rad": 0.25,
26
+ "antenna_move_name": "wiggle",
27
+ "frequency_hz": 0.4
28
+ },
29
+ "speaking": {
30
+ "description": "Speaking animation - multi-frequency natural head sway",
31
+ "pitch_amplitude_rad": 0.08,
32
+ "pitch_frequency_hz": 2.2,
33
+ "yaw_amplitude_rad": 0.13,
34
+ "yaw_frequency_hz": 0.6,
35
+ "roll_amplitude_rad": 0.04,
36
+ "roll_frequency_hz": 1.3,
37
+ "x_amplitude_m": 0.0045,
38
+ "x_frequency_hz": 0.35,
39
+ "y_amplitude_m": 0.00375,
40
+ "y_frequency_hz": 0.45,
41
+ "z_amplitude_m": 0.00225,
42
+ "z_frequency_hz": 0.25,
43
+ "antenna_amplitude_rad": 0.5,
44
+ "antenna_move_name": "wiggle",
45
+ "frequency_hz": 1.0
46
+ },
47
+ "happy": {
48
+ "description": "Happy/positive response",
49
+ "pitch_amplitude_rad": 0.08,
50
+ "z_amplitude_m": 0.01,
51
+ "antenna_amplitude_rad": 0.5,
52
+ "antenna_move_name": "both",
53
+ "frequency_hz": 1.2
54
+ },
55
+ "sad": {
56
+ "description": "Sad/negative response - head droops",
57
+ "pitch_offset_rad": 0.1,
58
+ "pitch_amplitude_rad": 0.04,
59
+ "z_offset_m": -0.01,
60
+ "z_amplitude_m": 0.002,
61
+ "antenna_amplitude_rad": 0.1,
62
+ "antenna_move_name": "both",
63
+ "frequency_hz": 0.3
64
+ },
65
+ "confused": {
66
+ "description": "Confused/error state - head tilts",
67
+ "roll_amplitude_rad": 0.1,
68
+ "yaw_amplitude_rad": 0.12,
69
+ "pitch_amplitude_rad": 0.05,
70
+ "antenna_amplitude_rad": 0.4,
71
+ "antenna_move_name": "wiggle",
72
+ "frequency_hz": 0.7
73
+ },
74
+ "alert": {
75
+ "description": "Alert/timer finished - quick movements",
76
+ "pitch_amplitude_rad": 0.1,
77
+ "z_amplitude_m": 0.012,
78
+ "antenna_amplitude_rad": 0.6,
79
+ "antenna_move_name": "both",
80
+ "frequency_hz": 1.5
81
+ }
82
+ },
83
+ "settings": {
84
+ "amplitude_scale": 1.0,
85
+ "transition_duration_s": 0.3
86
+ }
87
+ }
{reachy_mini_home_assistant/protocol → reachy_mini_ha_voice}/api_server.py RENAMED
@@ -4,7 +4,7 @@ import asyncio
4
  import logging
5
  from abc import abstractmethod
6
  from collections.abc import Iterable
7
- from typing import TYPE_CHECKING
8
 
9
  # pylint: disable=no-name-in-module
10
  from aioesphomeapi._frame_helper.packets import make_plain_text_packets
@@ -31,7 +31,7 @@ class APIServer(asyncio.Protocol):
31
 
32
  def __init__(self, name: str) -> None:
33
  self.name = name
34
- self._buffer: bytes | None = None
35
  self._buffer_len: int = 0
36
  self._pos: int = 0
37
  self._transport = None
@@ -41,77 +41,52 @@ class APIServer(asyncio.Protocol):
41
  def handle_message(self, msg: message.Message) -> Iterable[message.Message]:
42
  pass
43
 
44
- def on_authenticated(self) -> None:
45
- """Hook called after authentication succeeds."""
46
- return
47
-
48
  def process_packet(self, msg_type: int, packet_data: bytes) -> None:
49
- try:
50
- msg_class = MESSAGE_TYPE_TO_PROTO[msg_type]
51
- msg_inst = msg_class.FromString(packet_data)
52
- _LOGGER.debug("Received message: %s", msg_class.__name__)
53
-
54
- if isinstance(msg_inst, HelloRequest):
55
- _LOGGER.info("HelloRequest received, sending HelloResponse")
56
- self.send_messages(
57
- [
58
- HelloResponse(
59
- api_version_major=1,
60
- api_version_minor=10,
61
- name=self.name,
62
- )
63
- ]
64
- )
65
- return
66
 
67
- if isinstance(msg_inst, AuthenticationRequest):
68
- _LOGGER.info("AuthenticationRequest received, sending AuthenticationResponse")
69
- self.send_messages([AuthenticationResponse()])
70
- self.on_authenticated()
71
- elif isinstance(msg_inst, DisconnectRequest):
72
- self.send_messages([DisconnectResponse()])
73
- _LOGGER.debug("Disconnect requested")
74
- if self._transport:
75
- self._transport.close()
76
- self._transport = None
77
- self._writelines = None
78
- elif isinstance(msg_inst, PingRequest):
79
- self.send_messages([PingResponse()])
80
- elif msgs := self.handle_message(msg_inst):
81
- if isinstance(msgs, message.Message):
82
- msgs = [msgs]
83
- self.send_messages(msgs)
84
- except Exception:
85
- _LOGGER.exception("Unhandled ESPHome protocol error while processing message type %s", msg_type)
86
  if self._transport:
87
  self._transport.close()
88
  self._transport = None
89
  self._writelines = None
90
-
91
- def send_messages(self, msgs: list[message.Message]):
 
 
 
 
 
 
92
  if self._writelines is None:
93
  return
94
 
95
- try:
96
- packets = [(PROTO_TO_MESSAGE_TYPE[msg.__class__], msg.SerializeToString()) for msg in msgs]
97
- packet_bytes = make_plain_text_packets(packets)
98
- self._writelines(packet_bytes)
99
- except (IndexError, OSError, BrokenPipeError, ConnectionResetError) as e:
100
- _LOGGER.warning(
101
- "Error sending message (%s): %s - connection may be lost",
102
- msgs[0].__class__.__name__ if msgs else "unknown",
103
- e,
104
- )
105
- # Mark transport as invalid to prevent further writes
106
- self._writelines = None
107
- if self._transport:
108
- self._transport.close()
109
- self._transport = None
110
 
111
  def connection_made(self, transport) -> None:
112
  self._transport = transport
113
  self._writelines = transport.writelines
114
- _LOGGER.info("ESPHome client connected from %s", transport.get_extra_info("peername"))
115
 
116
  def data_received(self, data: bytes):
117
  if self._buffer is None:
@@ -165,13 +140,8 @@ class APIServer(asyncio.Protocol):
165
  return cstr[original_pos:new_pos]
166
 
167
  def connection_lost(self, exc):
168
- _LOGGER.info("ESPHome client disconnected: %s", exc)
169
  self._transport = None
170
  self._writelines = None
171
- # Clear buffer to prevent memory leak
172
- self._buffer = None
173
- self._buffer_len = 0
174
- self._pos = 0
175
 
176
  def _read_varuint(self) -> int:
177
  """Read a varuint from the buffer or -1 if the buffer runs out of bytes."""
 
4
  import logging
5
  from abc import abstractmethod
6
  from collections.abc import Iterable
7
+ from typing import TYPE_CHECKING, List, Optional
8
 
9
  # pylint: disable=no-name-in-module
10
  from aioesphomeapi._frame_helper.packets import make_plain_text_packets
 
31
 
32
  def __init__(self, name: str) -> None:
33
  self.name = name
34
+ self._buffer: Optional[bytes] = None
35
  self._buffer_len: int = 0
36
  self._pos: int = 0
37
  self._transport = None
 
41
  def handle_message(self, msg: message.Message) -> Iterable[message.Message]:
42
  pass
43
 
 
 
 
 
44
  def process_packet(self, msg_type: int, packet_data: bytes) -> None:
45
+ msg_class = MESSAGE_TYPE_TO_PROTO[msg_type]
46
+ msg_inst = msg_class.FromString(packet_data)
47
+
48
+ if isinstance(msg_inst, HelloRequest):
49
+ self.send_messages(
50
+ [
51
+ HelloResponse(
52
+ api_version_major=1,
53
+ api_version_minor=10,
54
+ name=self.name,
55
+ )
56
+ ]
57
+ )
58
+ return
 
 
 
59
 
60
+ if isinstance(msg_inst, AuthenticationRequest):
61
+ self.send_messages([AuthenticationResponse()])
62
+ elif isinstance(msg_inst, DisconnectRequest):
63
+ self.send_messages([DisconnectResponse()])
64
+ _LOGGER.debug("Disconnect requested")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  if self._transport:
66
  self._transport.close()
67
  self._transport = None
68
  self._writelines = None
69
+ elif isinstance(msg_inst, PingRequest):
70
+ self.send_messages([PingResponse()])
71
+ elif msgs := self.handle_message(msg_inst):
72
+ if isinstance(msgs, message.Message):
73
+ msgs = [msgs]
74
+ self.send_messages(msgs)
75
+
76
+ def send_messages(self, msgs: List[message.Message]):
77
  if self._writelines is None:
78
  return
79
 
80
+ packets = [
81
+ (PROTO_TO_MESSAGE_TYPE[msg.__class__], msg.SerializeToString())
82
+ for msg in msgs
83
+ ]
84
+ packet_bytes = make_plain_text_packets(packets)
85
+ self._writelines(packet_bytes)
 
 
 
 
 
 
 
 
 
86
 
87
  def connection_made(self, transport) -> None:
88
  self._transport = transport
89
  self._writelines = transport.writelines
 
90
 
91
  def data_received(self, data: bytes):
92
  if self._buffer is None:
 
140
  return cstr[original_pos:new_pos]
141
 
142
  def connection_lost(self, exc):
 
143
  self._transport = None
144
  self._writelines = None
 
 
 
 
145
 
146
  def _read_varuint(self) -> int:
147
  """Read a varuint from the buffer or -1 if the buffer runs out of bytes."""
reachy_mini_ha_voice/audio_player.py ADDED
@@ -0,0 +1,578 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Audio player using Reachy Mini's media system with automatic Sendspin support.
2
+
3
+ Sendspin integration allows synchronized multi-room audio playback through
4
+ a Sendspin server. Reachy Mini connects as a PLAYER to receive audio streams
5
+ from Home Assistant or other Sendspin controllers.
6
+
7
+ Sendspin is automatically enabled by default - no user configuration needed.
8
+ The system uses mDNS to discover Sendspin servers on the local network.
9
+ """
10
+
11
+ import hashlib
12
+ import logging
13
+ import socket
14
+ import threading
15
+ import time
16
+ from collections.abc import Callable
17
+ from typing import List, Optional, TYPE_CHECKING, Union
18
+
19
+ if TYPE_CHECKING:
20
+ from .zeroconf import SendspinDiscovery
21
+
22
+ _LOGGER = logging.getLogger(__name__)
23
+
24
+ # Check if aiosendspin is available
25
+ try:
26
+ from aiosendspin.client import SendspinClient, PCMFormat
27
+ from aiosendspin.models.types import Roles, AudioCodec, PlayerCommand
28
+ from aiosendspin.models.player import ClientHelloPlayerSupport, SupportedAudioFormat
29
+ from aiosendspin.models.core import StreamStartMessage
30
+ SENDSPIN_AVAILABLE = True
31
+ except ImportError:
32
+ SENDSPIN_AVAILABLE = False
33
+ _LOGGER.debug("aiosendspin not installed, Sendspin support disabled")
34
+
35
+
36
+ def _get_stable_client_id() -> str:
37
+ """Generate a stable client ID based on machine identity.
38
+
39
+ Uses hostname and MAC address to create a consistent ID across restarts.
40
+ """
41
+ try:
42
+ hostname = socket.gethostname()
43
+ # Create a hash of hostname for stability
44
+ hash_input = f"reachy-mini-{hostname}"
45
+ return hashlib.sha256(hash_input.encode()).hexdigest()[:16]
46
+ except Exception:
47
+ return "reachy-mini-default"
48
+
49
+
50
+ class AudioPlayer:
51
+ """Audio player using Reachy Mini's media system with automatic Sendspin support.
52
+
53
+ Supports audio playback modes:
54
+ 1. Reachy Mini's built-in media system (default)
55
+ 2. Sendspin synchronized multi-room playback (as PLAYER - receives audio)
56
+ 3. Sounddevice fallback (when Reachy Mini not available)
57
+
58
+ When connected to Sendspin as a PLAYER, Reachy Mini receives audio streams
59
+ from Home Assistant or other controllers for synchronized playback.
60
+ """
61
+
62
+ def __init__(self, reachy_mini=None) -> None:
63
+ """Initialize audio player.
64
+
65
+ Args:
66
+ reachy_mini: Reachy Mini SDK instance.
67
+ """
68
+ self.reachy_mini = reachy_mini
69
+ self.is_playing = False
70
+ self._playlist: List[str] = []
71
+ self._done_callback: Optional[Callable[[], None]] = None
72
+ self._done_callback_lock = threading.Lock()
73
+ self._duck_volume: float = 0.5
74
+ self._unduck_volume: float = 1.0
75
+ self._current_volume: float = 1.0
76
+ self._stop_flag = threading.Event()
77
+
78
+ # Speech sway callback for audio-driven head motion
79
+ self._sway_callback: Optional[Callable[[dict], None]] = None
80
+
81
+ # Sendspin support (auto-enabled via mDNS discovery)
82
+ # Uses stable client_id so HA recognizes the same device after restart
83
+ self._sendspin_client_id = _get_stable_client_id()
84
+ self._sendspin_client: Optional["SendspinClient"] = None
85
+ self._sendspin_enabled = False
86
+ self._sendspin_url: Optional[str] = None
87
+ self._sendspin_discovery: Optional["SendspinDiscovery"] = None
88
+ self._sendspin_unsubscribers: List[Callable] = []
89
+
90
+ # Audio buffer for Sendspin playback
91
+ self._sendspin_audio_format: Optional["PCMFormat"] = None
92
+ self._sendspin_playback_started = False
93
+ self._sendspin_paused = False # Pause Sendspin when voice assistant is active
94
+
95
+ def set_sway_callback(self, callback: Optional[Callable[[dict], None]]) -> None:
96
+ """Set callback for speech-driven sway animation.
97
+
98
+ Args:
99
+ callback: Function called with sway dict containing
100
+ pitch_rad, yaw_rad, roll_rad, x_m, y_m, z_m
101
+ """
102
+ self._sway_callback = callback
103
+
104
+ def set_reachy_mini(self, reachy_mini) -> None:
105
+ """Set the Reachy Mini instance."""
106
+ self.reachy_mini = reachy_mini
107
+
108
+ # ========== Sendspin Integration (Auto-enabled via mDNS) ==========
109
+
110
+ @property
111
+ def sendspin_available(self) -> bool:
112
+ """Check if Sendspin library is available."""
113
+ return SENDSPIN_AVAILABLE
114
+
115
+ @property
116
+ def sendspin_enabled(self) -> bool:
117
+ """Check if Sendspin output is enabled and connected."""
118
+ return self._sendspin_enabled and self._sendspin_client is not None
119
+
120
+ @property
121
+ def sendspin_url(self) -> Optional[str]:
122
+ """Get current Sendspin server URL."""
123
+ return self._sendspin_url
124
+
125
+ def pause_sendspin(self) -> None:
126
+ """Pause Sendspin audio playback.
127
+
128
+ Called when voice assistant is activated to prevent audio conflicts.
129
+ Incoming Sendspin audio chunks will be dropped until resumed.
130
+ """
131
+ if self._sendspin_paused:
132
+ return
133
+ self._sendspin_paused = True
134
+ _LOGGER.debug("Sendspin audio paused (voice assistant active)")
135
+
136
+ def resume_sendspin(self) -> None:
137
+ """Resume Sendspin audio playback.
138
+
139
+ Called when voice assistant returns to idle state.
140
+ """
141
+ if not self._sendspin_paused:
142
+ return
143
+ self._sendspin_paused = False
144
+ self._logged_resample = False # Reset resample log flag for new stream
145
+ _LOGGER.debug("Sendspin audio resumed")
146
+
147
+ async def start_sendspin_discovery(self) -> None:
148
+ """Start mDNS discovery for Sendspin servers.
149
+
150
+ This runs in the background and automatically connects when a server is found.
151
+ Called automatically during voice assistant startup.
152
+ """
153
+ if not SENDSPIN_AVAILABLE:
154
+ _LOGGER.debug("aiosendspin not installed, skipping Sendspin discovery")
155
+ return
156
+
157
+ if self._sendspin_discovery is not None and self._sendspin_discovery.is_running:
158
+ _LOGGER.debug("Sendspin discovery already running")
159
+ return
160
+
161
+ # Import here to avoid circular imports
162
+ from .zeroconf import SendspinDiscovery
163
+
164
+ _LOGGER.info("Starting Sendspin server discovery...")
165
+ self._sendspin_discovery = SendspinDiscovery(self._on_sendspin_server_found)
166
+ await self._sendspin_discovery.start()
167
+
168
+ async def _on_sendspin_server_found(self, server_url: str) -> None:
169
+ """Callback when a Sendspin server is discovered via mDNS.
170
+
171
+ Args:
172
+ server_url: WebSocket URL of the discovered server.
173
+ """
174
+ await self._connect_to_server(server_url)
175
+
176
+ async def _connect_to_server(self, server_url: str) -> bool:
177
+ """Connect to a discovered Sendspin server as PLAYER.
178
+
179
+ Args:
180
+ server_url: WebSocket URL of the Sendspin server.
181
+
182
+ Returns:
183
+ True if connected successfully.
184
+ """
185
+ if not SENDSPIN_AVAILABLE:
186
+ return False
187
+
188
+ # Already connected to this server
189
+ if self._sendspin_enabled and self._sendspin_url == server_url:
190
+ return True
191
+
192
+ # Disconnect from previous server if any
193
+ if self._sendspin_client is not None:
194
+ await self._disconnect_sendspin()
195
+
196
+ try:
197
+ # Use stable client_id so HA recognizes the same device after restart
198
+ # Configure player support with audio formats
199
+ # Prioritize 16kHz since ReSpeaker hardware only supports 16kHz output
200
+ # Higher sample rates will be resampled down, causing quality loss
201
+ player_support = ClientHelloPlayerSupport(
202
+ supported_formats=[
203
+ # Prefer 16kHz (native ReSpeaker sample rate - no resampling needed)
204
+ SupportedAudioFormat(
205
+ codec=AudioCodec.PCM, channels=2, sample_rate=16000, bit_depth=16
206
+ ),
207
+ SupportedAudioFormat(
208
+ codec=AudioCodec.PCM, channels=1, sample_rate=16000, bit_depth=16
209
+ ),
210
+ # Also support higher sample rates (will be resampled to 16kHz)
211
+ SupportedAudioFormat(
212
+ codec=AudioCodec.PCM, channels=2, sample_rate=48000, bit_depth=16
213
+ ),
214
+ SupportedAudioFormat(
215
+ codec=AudioCodec.PCM, channels=2, sample_rate=44100, bit_depth=16
216
+ ),
217
+ SupportedAudioFormat(
218
+ codec=AudioCodec.PCM, channels=1, sample_rate=48000, bit_depth=16
219
+ ),
220
+ SupportedAudioFormat(
221
+ codec=AudioCodec.PCM, channels=1, sample_rate=44100, bit_depth=16
222
+ ),
223
+ ],
224
+ buffer_capacity=32_000_000,
225
+ supported_commands=[PlayerCommand.VOLUME, PlayerCommand.MUTE],
226
+ )
227
+
228
+ self._sendspin_client = SendspinClient(
229
+ client_id=self._sendspin_client_id,
230
+ client_name="Reachy Mini",
231
+ roles=[Roles.PLAYER], # PLAYER role to receive audio
232
+ player_support=player_support,
233
+ )
234
+
235
+ await self._sendspin_client.connect(server_url)
236
+
237
+ # Register audio listeners
238
+ self._sendspin_unsubscribers = [
239
+ self._sendspin_client.add_audio_chunk_listener(self._on_sendspin_audio_chunk),
240
+ self._sendspin_client.add_stream_start_listener(self._on_sendspin_stream_start),
241
+ self._sendspin_client.add_stream_end_listener(self._on_sendspin_stream_end),
242
+ self._sendspin_client.add_stream_clear_listener(self._on_sendspin_stream_clear),
243
+ ]
244
+
245
+ self._sendspin_url = server_url
246
+ self._sendspin_enabled = True
247
+
248
+ _LOGGER.info("Sendspin connected as PLAYER: %s (client_id=%s)",
249
+ server_url, self._sendspin_client_id)
250
+ return True
251
+
252
+ except Exception as e:
253
+ _LOGGER.warning("Failed to connect to Sendspin server %s: %s", server_url, e)
254
+ self._sendspin_client = None
255
+ self._sendspin_enabled = False
256
+ return False
257
+
258
+ def _on_sendspin_audio_chunk(self, server_timestamp_us: int, audio_data: bytes, fmt: "PCMFormat") -> None:
259
+ """Handle incoming audio chunks from Sendspin server.
260
+
261
+ Plays the audio through Reachy Mini's speaker using push_audio_sample().
262
+ Resamples audio if needed (Reachy Mini uses 16kHz).
263
+
264
+ Note: Audio is dropped when Sendspin is paused (e.g., during voice assistant interaction).
265
+ """
266
+ if self.reachy_mini is None:
267
+ return
268
+
269
+ # Drop audio when paused (voice assistant is active)
270
+ if self._sendspin_paused:
271
+ return
272
+
273
+ try:
274
+ # Store format for potential use
275
+ self._sendspin_audio_format = fmt
276
+
277
+ import numpy as np
278
+
279
+ # Convert bytes to numpy array based on format
280
+ if fmt.bit_depth == 16:
281
+ dtype = np.int16
282
+ max_val = 32768.0
283
+ elif fmt.bit_depth == 32:
284
+ dtype = np.int32
285
+ max_val = 2147483648.0
286
+ else:
287
+ dtype = np.int16
288
+ max_val = 32768.0
289
+
290
+ audio_array = np.frombuffer(audio_data, dtype=dtype)
291
+
292
+ # Convert to float32 for playback (SDK expects float32)
293
+ audio_float = audio_array.astype(np.float32) / max_val
294
+
295
+ # Reshape for channels if needed
296
+ if fmt.channels > 1:
297
+ # Reshape to (samples, channels)
298
+ audio_float = audio_float.reshape(-1, fmt.channels)
299
+ else:
300
+ # Mono: reshape to (samples, 1)
301
+ audio_float = audio_float.reshape(-1, 1)
302
+
303
+ # Resample if needed (ReSpeaker hardware only supports 16kHz)
304
+ target_sample_rate = self.reachy_mini.media.get_output_audio_samplerate()
305
+ if fmt.sample_rate != target_sample_rate and target_sample_rate > 0:
306
+ import scipy.signal
307
+ # Calculate new length
308
+ new_length = int(len(audio_float) * target_sample_rate / fmt.sample_rate)
309
+ if new_length > 0:
310
+ audio_float = scipy.signal.resample(audio_float, new_length, axis=0)
311
+ # Log resampling only once per stream
312
+ if not hasattr(self, '_logged_resample') or not self._logged_resample:
313
+ _LOGGER.debug("Resampling Sendspin audio: %d Hz -> %d Hz",
314
+ fmt.sample_rate, target_sample_rate)
315
+ self._logged_resample = True
316
+
317
+ # Apply volume
318
+ audio_float = audio_float * self._current_volume
319
+
320
+ # Ensure media playback is started
321
+ if not self._sendspin_playback_started:
322
+ try:
323
+ self.reachy_mini.media.start_playing()
324
+ self._sendspin_playback_started = True
325
+ _LOGGER.info("Started media playback for Sendspin audio (target: %d Hz)", target_sample_rate)
326
+ except Exception as e:
327
+ _LOGGER.warning("Failed to start media playback: %s", e)
328
+
329
+ # Play through Reachy Mini's media system using push_audio_sample
330
+ self.reachy_mini.media.push_audio_sample(audio_float)
331
+
332
+ except Exception as e:
333
+ _LOGGER.debug("Error playing Sendspin audio: %s", e)
334
+
335
+ def _on_sendspin_stream_start(self, message: "StreamStartMessage") -> None:
336
+ """Handle stream start from Sendspin server."""
337
+ _LOGGER.debug("Sendspin stream started")
338
+ # No need to clear buffer - just start fresh
339
+
340
+ def _on_sendspin_stream_end(self, roles: Optional[List[Roles]]) -> None:
341
+ """Handle stream end from Sendspin server."""
342
+ if roles is None or Roles.PLAYER in roles:
343
+ _LOGGER.debug("Sendspin stream ended")
344
+
345
+ def _on_sendspin_stream_clear(self, roles: Optional[List[Roles]]) -> None:
346
+ """Handle stream clear from Sendspin server."""
347
+ if roles is None or Roles.PLAYER in roles:
348
+ _LOGGER.debug("Sendspin stream cleared")
349
+ if self.reachy_mini is not None:
350
+ try:
351
+ self.reachy_mini.media.stop_playing()
352
+ self._sendspin_playback_started = False
353
+ except Exception:
354
+ pass
355
+
356
+ async def _disconnect_sendspin(self) -> None:
357
+ """Disconnect from current Sendspin server."""
358
+ # Unsubscribe from listeners
359
+ for unsub in self._sendspin_unsubscribers:
360
+ try:
361
+ unsub()
362
+ except Exception:
363
+ pass
364
+ self._sendspin_unsubscribers.clear()
365
+
366
+ if self._sendspin_client is not None:
367
+ try:
368
+ await self._sendspin_client.disconnect()
369
+ except Exception as e:
370
+ _LOGGER.debug("Error disconnecting from Sendspin: %s", e)
371
+ self._sendspin_client = None
372
+
373
+ self._sendspin_enabled = False
374
+ self._sendspin_url = None
375
+ self._sendspin_audio_format = None
376
+
377
+ async def stop_sendspin(self) -> None:
378
+ """Stop Sendspin discovery and disconnect from server."""
379
+ # Stop discovery
380
+ if self._sendspin_discovery is not None:
381
+ await self._sendspin_discovery.stop()
382
+ self._sendspin_discovery = None
383
+
384
+ # Disconnect from server
385
+ await self._disconnect_sendspin()
386
+
387
+ _LOGGER.info("Sendspin stopped")
388
+
389
+ # ========== Core Playback Methods ==========
390
+
391
+ def play(
392
+ self,
393
+ url: Union[str, List[str]],
394
+ done_callback: Optional[Callable[[], None]] = None,
395
+ stop_first: bool = True,
396
+ ) -> None:
397
+ """Play audio from URL(s).
398
+
399
+ Args:
400
+ url: Single URL or list of URLs to play.
401
+ done_callback: Called when playback finishes.
402
+ stop_first: Stop current playback before starting new.
403
+ """
404
+ if stop_first:
405
+ self.stop()
406
+
407
+ if isinstance(url, str):
408
+ self._playlist = [url]
409
+ else:
410
+ self._playlist = list(url)
411
+
412
+ self._done_callback = done_callback
413
+ self._stop_flag.clear()
414
+ self._play_next()
415
+
416
+ def _play_next(self) -> None:
417
+ """Play next item in playlist."""
418
+ if not self._playlist or self._stop_flag.is_set():
419
+ self._on_playback_finished()
420
+ return
421
+
422
+ next_url = self._playlist.pop(0)
423
+ _LOGGER.debug("Playing %s", next_url)
424
+ self.is_playing = True
425
+
426
+ # Start playback in a thread
427
+ thread = threading.Thread(target=self._play_file, args=(next_url,), daemon=True)
428
+ thread.start()
429
+
430
+ def _play_file(self, file_path: str) -> None:
431
+ """Play an audio file with optional speech-driven sway animation."""
432
+ try:
433
+ # Handle URLs - download first
434
+ if file_path.startswith(("http://", "https://")):
435
+ import urllib.request
436
+ import tempfile
437
+
438
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
439
+ urllib.request.urlretrieve(file_path, tmp.name)
440
+ file_path = tmp.name
441
+
442
+ if self._stop_flag.is_set():
443
+ return
444
+
445
+ # Play locally using Reachy Mini's media system
446
+ if self.reachy_mini is not None:
447
+ try:
448
+ # Read audio data for duration calculation and sway analysis
449
+ import soundfile as sf
450
+ data, sample_rate = sf.read(file_path)
451
+ duration = len(data) / sample_rate
452
+
453
+ # Pre-analyze audio for speech sway if callback is set
454
+ sway_frames = []
455
+ if self._sway_callback is not None:
456
+ from .speech_sway import SpeechSwayRT
457
+ sway = SpeechSwayRT()
458
+ sway_frames = sway.feed(data, sample_rate)
459
+ _LOGGER.debug("Generated %d sway frames for %.2fs audio",
460
+ len(sway_frames), duration)
461
+
462
+ # Start playback
463
+ self.reachy_mini.media.play_sound(file_path)
464
+
465
+ # Playback loop with sway animation
466
+ start_time = time.time()
467
+ frame_duration = 0.05 # 50ms per sway frame (HOP_MS)
468
+ frame_idx = 0
469
+
470
+ while time.time() - start_time < duration:
471
+ if self._stop_flag.is_set():
472
+ self.reachy_mini.media.stop_playing()
473
+ break
474
+
475
+ # Apply sway frame if available
476
+ if self._sway_callback and frame_idx < len(sway_frames):
477
+ elapsed = time.time() - start_time
478
+ target_frame = int(elapsed / frame_duration)
479
+ while frame_idx <= target_frame and frame_idx < len(sway_frames):
480
+ self._sway_callback(sway_frames[frame_idx])
481
+ frame_idx += 1
482
+
483
+ time.sleep(0.02) # 20ms sleep for responsive sway
484
+
485
+ # Reset sway to zero when done
486
+ if self._sway_callback:
487
+ self._sway_callback({
488
+ "pitch_rad": 0.0, "yaw_rad": 0.0, "roll_rad": 0.0,
489
+ "x_m": 0.0, "y_m": 0.0, "z_m": 0.0,
490
+ })
491
+
492
+ except Exception as e:
493
+ _LOGGER.warning("Reachy Mini audio failed, falling back: %s", e)
494
+ self._play_file_fallback(file_path)
495
+ else:
496
+ self._play_file_fallback(file_path)
497
+
498
+ except Exception as e:
499
+ _LOGGER.error("Error playing audio: %s", e)
500
+ finally:
501
+ self.is_playing = False
502
+ if self._playlist and not self._stop_flag.is_set():
503
+ self._play_next()
504
+ else:
505
+ self._on_playback_finished()
506
+
507
+ def _play_file_fallback(self, file_path: str) -> None:
508
+ """Fallback to sounddevice for audio playback."""
509
+ import sounddevice as sd
510
+ import soundfile as sf
511
+
512
+ data, samplerate = sf.read(file_path)
513
+ data = data * self._current_volume
514
+
515
+ if not self._stop_flag.is_set():
516
+ sd.play(data, samplerate)
517
+ sd.wait()
518
+
519
+ def _on_playback_finished(self) -> None:
520
+ """Called when playback is finished."""
521
+ self.is_playing = False
522
+ todo_callback: Optional[Callable[[], None]] = None
523
+
524
+ with self._done_callback_lock:
525
+ if self._done_callback:
526
+ todo_callback = self._done_callback
527
+ self._done_callback = None
528
+
529
+ if todo_callback:
530
+ try:
531
+ todo_callback()
532
+ except Exception:
533
+ _LOGGER.exception("Unexpected error running done callback")
534
+
535
+ def pause(self) -> None:
536
+ """Pause playback.
537
+
538
+ Stops current audio output but preserves playlist for resume.
539
+ """
540
+ self._stop_flag.set()
541
+ if self.reachy_mini is not None:
542
+ try:
543
+ self.reachy_mini.media.stop_playing()
544
+ except Exception:
545
+ pass
546
+ self.is_playing = False
547
+
548
+ def resume(self) -> None:
549
+ """Resume playback from where it was paused."""
550
+ self._stop_flag.clear()
551
+ if self._playlist:
552
+ self._play_next()
553
+
554
+ def stop(self) -> None:
555
+ """Stop playback and clear playlist."""
556
+ self._stop_flag.set()
557
+ if self.reachy_mini is not None:
558
+ try:
559
+ self.reachy_mini.media.stop_playing()
560
+ except Exception:
561
+ pass
562
+ self._playlist.clear()
563
+ self.is_playing = False
564
+
565
+ def duck(self) -> None:
566
+ """Reduce volume for announcements."""
567
+ self._current_volume = self._duck_volume
568
+
569
+ def unduck(self) -> None:
570
+ """Restore normal volume."""
571
+ self._current_volume = self._unduck_volume
572
+
573
+ def set_volume(self, volume: int) -> None:
574
+ """Set volume level (0-100)."""
575
+ volume = max(0, min(100, volume))
576
+ self._unduck_volume = volume / 100.0
577
+ self._duck_volume = self._unduck_volume / 2
578
+ self._current_volume = self._unduck_volume
reachy_mini_ha_voice/camera_server.py ADDED
@@ -0,0 +1,842 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ MJPEG Camera Server for Reachy Mini with Face Tracking.
3
+
4
+ This module provides an HTTP server that streams camera frames from Reachy Mini
5
+ as MJPEG, which can be integrated with Home Assistant via Generic Camera.
6
+ Also provides face tracking for head movement control.
7
+
8
+ Reference: reachy_mini_conversation_app/src/reachy_mini_conversation_app/camera_worker.py
9
+ """
10
+
11
+ import asyncio
12
+ import logging
13
+ import threading
14
+ import time
15
+ from typing import Optional, Tuple, List, TYPE_CHECKING
16
+
17
+ import cv2
18
+ import numpy as np
19
+ from scipy.spatial.transform import Rotation as R
20
+
21
+ # Import SDK interpolation utilities (same as conversation_app)
22
+ try:
23
+ from reachy_mini.utils.interpolation import linear_pose_interpolation
24
+ SDK_INTERPOLATION_AVAILABLE = True
25
+ except ImportError:
26
+ SDK_INTERPOLATION_AVAILABLE = False
27
+
28
+ if TYPE_CHECKING:
29
+ from reachy_mini import ReachyMini
30
+
31
+ _LOGGER = logging.getLogger(__name__)
32
+
33
+ # MJPEG boundary string
34
+ MJPEG_BOUNDARY = "frame"
35
+
36
+
37
+ class MJPEGCameraServer:
38
+ """
39
+ MJPEG streaming server for Reachy Mini camera with face tracking.
40
+
41
+ Provides HTTP endpoints:
42
+ - /stream - MJPEG video stream
43
+ - /snapshot - Single JPEG image
44
+ - / - Simple status page
45
+
46
+ Also provides face tracking offsets for head movement control.
47
+
48
+ Resource Optimization:
49
+ - Adaptive frame rate: high (15fps) when face detected or in conversation,
50
+ low (3fps) when idle and no face for extended period
51
+ - Face detection pauses after prolonged absence to save CPU
52
+ """
53
+
54
+ def __init__(
55
+ self,
56
+ reachy_mini: Optional["ReachyMini"] = None,
57
+ host: str = "0.0.0.0",
58
+ port: int = 8081,
59
+ fps: int = 15, # 15fps for smooth face tracking
60
+ quality: int = 80,
61
+ enable_face_tracking: bool = True,
62
+ ):
63
+ """
64
+ Initialize the MJPEG camera server.
65
+
66
+ Args:
67
+ reachy_mini: Reachy Mini robot instance (can be None for testing)
68
+ host: Host address to bind to
69
+ port: Port number for the HTTP server
70
+ fps: Target frames per second for the stream
71
+ quality: JPEG quality (1-100)
72
+ enable_face_tracking: Enable face tracking for head movement
73
+ """
74
+ self.reachy_mini = reachy_mini
75
+ self.host = host
76
+ self.port = port
77
+ self.fps = fps
78
+ self.quality = quality
79
+ self.enable_face_tracking = enable_face_tracking
80
+
81
+ self._server: Optional[asyncio.Server] = None
82
+ self._running = False
83
+ self._frame_interval = 1.0 / fps
84
+ self._last_frame: Optional[bytes] = None
85
+ self._last_frame_time: float = 0
86
+ self._frame_lock = threading.Lock()
87
+
88
+ # Frame capture thread
89
+ self._capture_thread: Optional[threading.Thread] = None
90
+
91
+ # Face tracking state
92
+ self._head_tracker = None
93
+ self._face_tracking_enabled = True # Enabled by default for always-on face tracking
94
+ self._face_tracking_offsets: List[float] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
95
+ self._face_tracking_lock = threading.Lock()
96
+
97
+ # Gesture detection state
98
+ self._gesture_detector = None
99
+ self._gesture_detection_enabled = True
100
+ self._current_gesture = "none"
101
+ self._gesture_confidence = 0.0
102
+ self._gesture_lock = threading.Lock()
103
+ self._gesture_frame_counter = 0
104
+ self._gesture_detection_interval = 3 # Run gesture detection every N frames
105
+ self._gesture_state_callback = None # Callback to notify entity registry
106
+
107
+ # Face tracking timing (smooth interpolation when face lost)
108
+ self._last_face_detected_time: Optional[float] = None
109
+ self._interpolation_start_time: Optional[float] = None
110
+ self._interpolation_start_pose: Optional[np.ndarray] = None
111
+ self._face_lost_delay = 1.0 # Reduced from 2.0s to 1.0s for faster response
112
+ self._interpolation_duration = 0.8 # Reduced from 1.0s to 0.8s for faster return
113
+
114
+ # Offset scaling (same as conversation_app)
115
+ self._offset_scale = 0.6
116
+
117
+ # =====================================================================
118
+ # Resource optimization: Adaptive frame rate for face tracking
119
+ # =====================================================================
120
+ # High frequency when: face detected, in conversation, or recently active
121
+ # Low frequency when: idle and no face for extended period
122
+ # Ultra-low when: idle for very long time (just MJPEG stream, minimal AI)
123
+ self._fps_high = fps # Normal tracking rate (15fps)
124
+ self._fps_low = 2 # Low power rate (2fps) - periodic face check
125
+ self._fps_idle = 0.5 # Ultra-low power (0.5fps) - minimal CPU usage
126
+ self._current_fps = fps
127
+
128
+ # Conversation state (set by voice assistant)
129
+ self._in_conversation = False
130
+ self._conversation_lock = threading.Lock()
131
+
132
+ # Adaptive tracking timing
133
+ self._no_face_duration = 0.0 # How long since last face detection
134
+ self._low_power_threshold = 5.0 # Switch to low power after 5s without face
135
+ self._idle_threshold = 30.0 # Switch to idle mode after 30s without face
136
+ self._last_face_check_time = 0.0
137
+
138
+ # Skip AI inference in idle mode (only stream MJPEG)
139
+ self._ai_enabled = True
140
+
141
+ async def start(self) -> None:
142
+ """Start the MJPEG camera server."""
143
+ if self._running:
144
+ _LOGGER.warning("Camera server already running")
145
+ return
146
+
147
+ self._running = True
148
+
149
+ # Initialize head tracker if face tracking enabled
150
+ if self.enable_face_tracking:
151
+ try:
152
+ from .head_tracker import HeadTracker
153
+ self._head_tracker = HeadTracker()
154
+ _LOGGER.info("Face tracking enabled with YOLO head tracker")
155
+ except ImportError as e:
156
+ _LOGGER.error("Failed to import head tracker: %s", e)
157
+ self._head_tracker = None
158
+ except Exception as e:
159
+ _LOGGER.warning("Failed to initialize head tracker: %s", e)
160
+ self._head_tracker = None
161
+ else:
162
+ _LOGGER.info("Face tracking disabled by configuration")
163
+
164
+ # Initialize gesture detector
165
+ if self._gesture_detection_enabled:
166
+ try:
167
+ from .gesture_detector import GestureDetector
168
+ self._gesture_detector = GestureDetector()
169
+ if self._gesture_detector.is_available:
170
+ _LOGGER.info("Gesture detection enabled (18 HaGRID classes)")
171
+ else:
172
+ _LOGGER.warning("Gesture detection not available")
173
+ self._gesture_detector = None
174
+ except ImportError as e:
175
+ _LOGGER.warning("Failed to import gesture detector: %s", e)
176
+ self._gesture_detector = None
177
+ except Exception as e:
178
+ _LOGGER.warning("Failed to initialize gesture detector: %s", e)
179
+ self._gesture_detector = None
180
+
181
+ # Start frame capture thread
182
+ self._capture_thread = threading.Thread(
183
+ target=self._capture_frames,
184
+ daemon=True,
185
+ name="camera-capture"
186
+ )
187
+ self._capture_thread.start()
188
+
189
+ # Start HTTP server
190
+ self._server = await asyncio.start_server(
191
+ self._handle_client,
192
+ self.host,
193
+ self.port,
194
+ )
195
+
196
+ _LOGGER.info("MJPEG Camera server started on http://%s:%d", self.host, self.port)
197
+ _LOGGER.info(" Stream URL: http://<ip>:%d/stream", self.port)
198
+ _LOGGER.info(" Snapshot URL: http://<ip>:%d/snapshot", self.port)
199
+
200
+ async def stop(self) -> None:
201
+ """Stop the MJPEG camera server."""
202
+ self._running = False
203
+
204
+ if self._capture_thread:
205
+ self._capture_thread.join(timeout=0.5)
206
+ self._capture_thread = None
207
+
208
+ if self._server:
209
+ self._server.close()
210
+ await self._server.wait_closed()
211
+ self._server = None
212
+
213
+ _LOGGER.info("MJPEG Camera server stopped")
214
+
215
+ def _capture_frames(self) -> None:
216
+ """Background thread to capture frames from Reachy Mini and do face tracking.
217
+
218
+ Resource optimization:
219
+ - High frequency (15fps) when face detected or in conversation
220
+ - Low frequency (2fps) when idle and no face for short period
221
+ - Ultra-low (0.5fps) when idle for extended period - minimal AI inference
222
+ """
223
+ _LOGGER.info("Starting camera capture thread (face_tracking=%s)", self._face_tracking_enabled)
224
+
225
+ frame_count = 0
226
+ face_detect_count = 0
227
+ last_log_time = time.time()
228
+
229
+ while self._running:
230
+ try:
231
+ current_time = time.time()
232
+
233
+ # Determine if we should run AI inference this frame
234
+ should_run_ai = self._should_run_ai_inference(current_time)
235
+
236
+ # Only get frame if needed (AI inference or MJPEG streaming)
237
+ frame = self._get_camera_frame() if should_run_ai or self._has_stream_clients() else None
238
+
239
+ if frame is not None:
240
+ frame_count += 1
241
+
242
+ # Encode frame as JPEG for streaming
243
+ encode_params = [cv2.IMWRITE_JPEG_QUALITY, self.quality]
244
+ success, jpeg_data = cv2.imencode('.jpg', frame, encode_params)
245
+
246
+ if success:
247
+ with self._frame_lock:
248
+ self._last_frame = jpeg_data.tobytes()
249
+ self._last_frame_time = time.time()
250
+
251
+ # Only run AI inference when enabled
252
+ if should_run_ai:
253
+ # Face tracking
254
+ if self._face_tracking_enabled and self._head_tracker is not None:
255
+ face_detect_count += 1
256
+ face_detected = self._process_face_tracking(frame, current_time)
257
+
258
+ # Update adaptive timing based on detection result
259
+ if face_detected:
260
+ self._no_face_duration = 0.0
261
+ self._current_fps = self._fps_high
262
+ self._ai_enabled = True
263
+ else:
264
+ # Accumulate no-face duration
265
+ if self._last_face_detected_time is not None:
266
+ self._no_face_duration = current_time - self._last_face_detected_time
267
+ else:
268
+ self._no_face_duration += 1.0 / self._current_fps
269
+
270
+ # Adaptive power mode
271
+ if self._no_face_duration > self._idle_threshold:
272
+ self._current_fps = self._fps_idle
273
+ elif self._no_face_duration > self._low_power_threshold:
274
+ self._current_fps = self._fps_low
275
+
276
+ self._last_face_check_time = current_time
277
+
278
+ # Handle smooth interpolation when face lost
279
+ self._process_face_lost_interpolation(current_time)
280
+
281
+ # Gesture detection (only when face detected recently)
282
+ if (self._gesture_detection_enabled and
283
+ self._gesture_detector is not None and
284
+ self._no_face_duration < 5.0): # Only detect gestures when someone is present
285
+ self._gesture_frame_counter += 1
286
+ if self._gesture_frame_counter >= self._gesture_detection_interval:
287
+ self._gesture_frame_counter = 0
288
+ self._process_gesture_detection(frame)
289
+
290
+ # Log stats every 30 seconds
291
+ if current_time - last_log_time >= 30.0:
292
+ fps = frame_count / (current_time - last_log_time)
293
+ detect_fps = face_detect_count / (current_time - last_log_time)
294
+ mode = "HIGH" if self._current_fps == self._fps_high else ("LOW" if self._current_fps == self._fps_low else "IDLE")
295
+ _LOGGER.debug("Camera: %.1f fps, AI: %.1f fps (%s), no_face: %.0fs",
296
+ fps, detect_fps, mode, self._no_face_duration)
297
+ frame_count = 0
298
+ face_detect_count = 0
299
+ last_log_time = current_time
300
+
301
+ # Sleep to maintain target FPS (use current adaptive rate)
302
+ sleep_time = 1.0 / self._current_fps
303
+ time.sleep(sleep_time)
304
+
305
+ except Exception as e:
306
+ _LOGGER.error("Error capturing frame: %s", e)
307
+ time.sleep(1.0)
308
+
309
+ _LOGGER.info("Camera capture thread stopped")
310
+
311
+ def _should_run_ai_inference(self, current_time: float) -> bool:
312
+ """Determine if AI inference (face/gesture detection) should run.
313
+
314
+ Returns True if:
315
+ - In conversation mode (always run)
316
+ - Face was recently detected
317
+ - Periodic check in low power mode
318
+ """
319
+ # Always run during conversation
320
+ with self._conversation_lock:
321
+ if self._in_conversation:
322
+ return True
323
+
324
+ # High frequency mode: run every frame
325
+ if self._current_fps == self._fps_high:
326
+ return True
327
+
328
+ # Low/idle power mode: run periodically
329
+ time_since_last = current_time - self._last_face_check_time
330
+ return time_since_last >= (1.0 / self._current_fps)
331
+
332
+ def _has_stream_clients(self) -> bool:
333
+ """Check if there are active MJPEG stream clients."""
334
+ # For now, always return True to keep stream available
335
+ # Could be optimized to track actual client connections
336
+ return True
337
+
338
+ def _process_face_tracking(self, frame: np.ndarray, current_time: float) -> bool:
339
+ """Process face tracking on a frame.
340
+
341
+ Returns:
342
+ True if face was detected, False otherwise
343
+ """
344
+ if self._head_tracker is None or self.reachy_mini is None:
345
+ return False
346
+
347
+ try:
348
+ face_center, confidence = self._head_tracker.get_head_position(frame)
349
+
350
+ if face_center is not None:
351
+ # Face detected - update tracking
352
+ self._last_face_detected_time = current_time
353
+ self._interpolation_start_time = None # Stop any interpolation
354
+
355
+ # Convert normalized coordinates to pixel coordinates
356
+ h, w = frame.shape[:2]
357
+ eye_center_norm = (face_center + 1) / 2
358
+
359
+ eye_center_pixels = [
360
+ eye_center_norm[0] * w,
361
+ eye_center_norm[1] * h,
362
+ ]
363
+
364
+ # Get the head pose needed to look at the target
365
+ target_pose = self.reachy_mini.look_at_image(
366
+ eye_center_pixels[0],
367
+ eye_center_pixels[1],
368
+ duration=0.0,
369
+ perform_movement=False,
370
+ )
371
+
372
+ # Extract translation and rotation from target pose
373
+ translation = target_pose[:3, 3]
374
+ rotation = R.from_matrix(target_pose[:3, :3]).as_euler("xyz", degrees=False)
375
+
376
+ # Scale down for smoother tracking (same as conversation_app)
377
+ translation = translation * self._offset_scale
378
+ rotation = rotation * self._offset_scale
379
+
380
+ # Apply pitch offset compensation (robot tends to look up)
381
+ # rotation[1] is pitch in xyz euler order
382
+ # Positive pitch = look down in robot coordinate system
383
+ pitch_offset_rad = np.radians(9.0) # Look down 9 degrees
384
+ rotation[1] += pitch_offset_rad
385
+
386
+ # Apply yaw offset compensation (robot tends to look to user's right)
387
+ # rotation[2] is yaw in xyz euler order
388
+ # Negative yaw = turn right (towards user's left from robot's perspective)
389
+ yaw_offset_rad = np.radians(-7.0) # Turn right 7 degrees
390
+ rotation[2] += yaw_offset_rad
391
+
392
+ # Update face tracking offsets
393
+ with self._face_tracking_lock:
394
+ self._face_tracking_offsets = [
395
+ float(translation[0]),
396
+ float(translation[1]),
397
+ float(translation[2]),
398
+ float(rotation[0]),
399
+ float(rotation[1]),
400
+ float(rotation[2]),
401
+ ]
402
+
403
+ return True
404
+
405
+ return False
406
+
407
+ except Exception as e:
408
+ _LOGGER.debug("Face tracking error: %s", e)
409
+ return False
410
+
411
+ def _process_face_lost_interpolation(self, current_time: float) -> None:
412
+ """Handle smooth interpolation back to neutral when face is lost."""
413
+ if self._last_face_detected_time is None:
414
+ return
415
+
416
+ time_since_face_lost = current_time - self._last_face_detected_time
417
+
418
+ if time_since_face_lost < self._face_lost_delay:
419
+ return # Still within delay period, keep current offsets
420
+
421
+ # Start interpolation if not already started
422
+ if self._interpolation_start_time is None:
423
+ self._interpolation_start_time = current_time
424
+ # Capture current pose as start of interpolation
425
+ with self._face_tracking_lock:
426
+ current_offsets = self._face_tracking_offsets.copy()
427
+
428
+ # Convert to 4x4 pose matrix
429
+ pose_matrix = np.eye(4, dtype=np.float32)
430
+ pose_matrix[:3, 3] = current_offsets[:3]
431
+ pose_matrix[:3, :3] = R.from_euler("xyz", current_offsets[3:]).as_matrix()
432
+ self._interpolation_start_pose = pose_matrix
433
+
434
+ # Calculate interpolation progress
435
+ elapsed = current_time - self._interpolation_start_time
436
+ t = min(1.0, elapsed / self._interpolation_duration)
437
+
438
+ # Interpolate to neutral (identity matrix)
439
+ if self._interpolation_start_pose is not None:
440
+ neutral_pose = np.eye(4, dtype=np.float32)
441
+ interpolated_pose = self._linear_pose_interpolation(
442
+ self._interpolation_start_pose, neutral_pose, t
443
+ )
444
+
445
+ # Extract translation and rotation
446
+ translation = interpolated_pose[:3, 3]
447
+ rotation = R.from_matrix(interpolated_pose[:3, :3]).as_euler("xyz", degrees=False)
448
+
449
+ with self._face_tracking_lock:
450
+ self._face_tracking_offsets = [
451
+ float(translation[0]),
452
+ float(translation[1]),
453
+ float(translation[2]),
454
+ float(rotation[0]),
455
+ float(rotation[1]),
456
+ float(rotation[2]),
457
+ ]
458
+
459
+ # Reset when interpolation complete
460
+ if t >= 1.0:
461
+ self._last_face_detected_time = None
462
+ self._interpolation_start_time = None
463
+ self._interpolation_start_pose = None
464
+
465
+ def _linear_pose_interpolation(
466
+ self, start: np.ndarray, end: np.ndarray, t: float
467
+ ) -> np.ndarray:
468
+ """Linear interpolation between two 4x4 pose matrices.
469
+
470
+ Uses SDK's linear_pose_interpolation if available, otherwise falls back
471
+ to manual SLERP implementation.
472
+ """
473
+ if SDK_INTERPOLATION_AVAILABLE:
474
+ return linear_pose_interpolation(start, end, t)
475
+
476
+ # Fallback: manual interpolation
477
+ # Interpolate translation
478
+ start_trans = start[:3, 3]
479
+ end_trans = end[:3, 3]
480
+ interp_trans = start_trans * (1 - t) + end_trans * t
481
+
482
+ # Interpolate rotation using SLERP
483
+ start_rot = R.from_matrix(start[:3, :3])
484
+ end_rot = R.from_matrix(end[:3, :3])
485
+
486
+ # Use scipy's slerp - create Rotation array from list
487
+ from scipy.spatial.transform import Slerp
488
+ key_rots = R.from_quat(np.array([start_rot.as_quat(), end_rot.as_quat()]))
489
+ slerp = Slerp([0, 1], key_rots)
490
+ interp_rot = slerp(t)
491
+
492
+ # Build result matrix
493
+ result = np.eye(4, dtype=np.float32)
494
+ result[:3, :3] = interp_rot.as_matrix()
495
+ result[:3, 3] = interp_trans
496
+
497
+ return result
498
+
499
+ # =========================================================================
500
+ # Public API for face tracking
501
+ # =========================================================================
502
+
503
+ def get_face_tracking_offsets(self) -> Tuple[float, float, float, float, float, float]:
504
+ """Get current face tracking offsets (thread-safe).
505
+
506
+ Returns:
507
+ Tuple of (x, y, z, roll, pitch, yaw) offsets
508
+ """
509
+ with self._face_tracking_lock:
510
+ offsets = self._face_tracking_offsets
511
+ return (offsets[0], offsets[1], offsets[2], offsets[3], offsets[4], offsets[5])
512
+
513
+ def set_face_tracking_enabled(self, enabled: bool) -> None:
514
+ """Enable or disable face tracking."""
515
+ if self._face_tracking_enabled == enabled:
516
+ return # No change, skip logging
517
+ self._face_tracking_enabled = enabled
518
+ if not enabled:
519
+ # Start interpolation back to neutral
520
+ self._last_face_detected_time = time.time()
521
+ self._interpolation_start_time = None
522
+ _LOGGER.info("Face tracking %s", "enabled" if enabled else "disabled")
523
+
524
+ def set_conversation_mode(self, in_conversation: bool) -> None:
525
+ """Set conversation mode for adaptive face tracking.
526
+
527
+ When in conversation mode, face tracking runs at high frequency
528
+ regardless of whether a face is currently detected.
529
+
530
+ Args:
531
+ in_conversation: True when voice assistant is actively conversing
532
+ """
533
+ with self._conversation_lock:
534
+ self._in_conversation = in_conversation
535
+
536
+ if in_conversation:
537
+ # Immediately switch to high frequency mode
538
+ self._current_fps = self._fps_high
539
+ self._ai_enabled = True
540
+ self._no_face_duration = 0.0 # Reset no-face timer
541
+ _LOGGER.debug("Face tracking: conversation mode ON (high frequency)")
542
+ else:
543
+ _LOGGER.debug("Face tracking: conversation mode OFF (adaptive)")
544
+
545
+ # =========================================================================
546
+ # Gesture detection
547
+ # =========================================================================
548
+
549
+ def _process_gesture_detection(self, frame: np.ndarray) -> None:
550
+ """Process gesture detection on a frame."""
551
+ if self._gesture_detector is None:
552
+ return
553
+
554
+ try:
555
+ # Detect gesture
556
+ detected_gesture, confidence = self._gesture_detector.detect(frame)
557
+
558
+ # Update current gesture state
559
+ state_changed = False
560
+ with self._gesture_lock:
561
+ old_gesture = self._current_gesture
562
+ if detected_gesture.value != "no_gesture":
563
+ self._current_gesture = detected_gesture.value
564
+ self._gesture_confidence = confidence
565
+ if old_gesture != detected_gesture.value:
566
+ state_changed = True
567
+ _LOGGER.debug("Gesture: %s (%.0f%%)",
568
+ detected_gesture.value, confidence * 100)
569
+ else:
570
+ if self._current_gesture != "none":
571
+ state_changed = True
572
+ self._current_gesture = "none"
573
+ self._gesture_confidence = 0.0
574
+
575
+ # Notify entity registry to push update to Home Assistant
576
+ if state_changed and self._gesture_state_callback:
577
+ try:
578
+ self._gesture_state_callback()
579
+ except Exception:
580
+ pass # Ignore callback errors
581
+
582
+ except Exception as e:
583
+ _LOGGER.warning("Gesture detection error: %s", e)
584
+
585
+ def get_current_gesture(self) -> str:
586
+ """Get current detected gesture name (thread-safe).
587
+
588
+ Returns:
589
+ Gesture name string (e.g., "like", "peace", "none")
590
+ """
591
+ with self._gesture_lock:
592
+ return self._current_gesture
593
+
594
+ def get_gesture_confidence(self) -> float:
595
+ """Get current gesture detection confidence (thread-safe).
596
+
597
+ Returns:
598
+ Confidence value (0.0 to 1.0), multiplied by 100 for percentage display
599
+ """
600
+ with self._gesture_lock:
601
+ return self._gesture_confidence * 100.0 # Return as percentage
602
+
603
+ def set_gesture_detection_enabled(self, enabled: bool) -> None:
604
+ """Enable or disable gesture detection."""
605
+ self._gesture_detection_enabled = enabled
606
+ if not enabled:
607
+ with self._gesture_lock:
608
+ self._current_gesture = "none"
609
+ self._gesture_confidence = 0.0
610
+ _LOGGER.info("Gesture detection %s", "enabled" if enabled else "disabled")
611
+
612
+ def set_gesture_state_callback(self, callback) -> None:
613
+ """Set callback to notify when gesture state changes."""
614
+ self._gesture_state_callback = callback
615
+
616
+ def _get_camera_frame(self) -> Optional[np.ndarray]:
617
+ """Get a frame from Reachy Mini's camera."""
618
+ if self.reachy_mini is None:
619
+ # Return a test pattern if no robot connected
620
+ return self._generate_test_frame()
621
+
622
+ try:
623
+ frame = self.reachy_mini.media.get_frame()
624
+ return frame
625
+ except Exception as e:
626
+ _LOGGER.debug("Failed to get camera frame: %s", e)
627
+ return None
628
+
629
+ def _generate_test_frame(self) -> np.ndarray:
630
+ """Generate a test pattern frame when no camera is available."""
631
+ # Create a simple test pattern
632
+ frame = np.zeros((480, 640, 3), dtype=np.uint8)
633
+
634
+ # Add some visual elements
635
+ cv2.putText(
636
+ frame,
637
+ "Reachy Mini Camera",
638
+ (150, 200),
639
+ cv2.FONT_HERSHEY_SIMPLEX,
640
+ 1.2,
641
+ (255, 255, 255),
642
+ 2,
643
+ )
644
+ cv2.putText(
645
+ frame,
646
+ "No camera connected",
647
+ (180, 280),
648
+ cv2.FONT_HERSHEY_SIMPLEX,
649
+ 0.8,
650
+ (128, 128, 128),
651
+ 1,
652
+ )
653
+
654
+ # Add timestamp
655
+ timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
656
+ cv2.putText(
657
+ frame,
658
+ timestamp,
659
+ (220, 350),
660
+ cv2.FONT_HERSHEY_SIMPLEX,
661
+ 0.6,
662
+ (0, 255, 0),
663
+ 1,
664
+ )
665
+
666
+ return frame
667
+
668
+ def get_snapshot(self) -> Optional[bytes]:
669
+ """Get the latest frame as JPEG bytes."""
670
+ with self._frame_lock:
671
+ return self._last_frame
672
+
673
+ async def _handle_client(
674
+ self,
675
+ reader: asyncio.StreamReader,
676
+ writer: asyncio.StreamWriter,
677
+ ) -> None:
678
+ """Handle incoming HTTP client connections."""
679
+ try:
680
+ # Read HTTP request
681
+ request_line = await asyncio.wait_for(
682
+ reader.readline(),
683
+ timeout=10.0
684
+ )
685
+ request = request_line.decode('utf-8', errors='ignore').strip()
686
+
687
+ # Read headers (we don't need them but must consume them)
688
+ while True:
689
+ line = await asyncio.wait_for(reader.readline(), timeout=5.0)
690
+ if line == b'\r\n' or line == b'\n' or line == b'':
691
+ break
692
+
693
+ # Parse request path
694
+ parts = request.split(' ')
695
+ if len(parts) >= 2:
696
+ path = parts[1]
697
+ else:
698
+ path = '/'
699
+
700
+ _LOGGER.debug("HTTP request: %s", request)
701
+
702
+ if path == '/stream':
703
+ await self._handle_stream(writer)
704
+ elif path == '/snapshot':
705
+ await self._handle_snapshot(writer)
706
+ else:
707
+ await self._handle_index(writer)
708
+
709
+ except asyncio.TimeoutError:
710
+ _LOGGER.debug("Client connection timeout")
711
+ except ConnectionResetError:
712
+ _LOGGER.debug("Client connection reset")
713
+ except Exception as e:
714
+ _LOGGER.error("Error handling client: %s", e)
715
+ finally:
716
+ try:
717
+ writer.close()
718
+ await writer.wait_closed()
719
+ except Exception:
720
+ pass
721
+
722
+ async def _handle_index(self, writer: asyncio.StreamWriter) -> None:
723
+ """Handle index page request."""
724
+ html = f"""<!DOCTYPE html>
725
+ <html>
726
+ <head>
727
+ <title>Reachy Mini Camera</title>
728
+ <style>
729
+ body {{ font-family: Arial, sans-serif; margin: 40px; background: #1a1a2e; color: #eee; }}
730
+ h1 {{ color: #00d4ff; }}
731
+ .container {{ max-width: 800px; margin: 0 auto; }}
732
+ .stream {{ width: 100%; max-width: 640px; border: 2px solid #00d4ff; border-radius: 8px; }}
733
+ a {{ color: #00d4ff; }}
734
+ .info {{ background: #16213e; padding: 20px; border-radius: 8px; margin-top: 20px; }}
735
+ </style>
736
+ </head>
737
+ <body>
738
+ <div class="container">
739
+ <h1>Reachy Mini Camera</h1>
740
+ <img class="stream" src="/stream" alt="Camera Stream">
741
+ <div class="info">
742
+ <h3>Endpoints:</h3>
743
+ <ul>
744
+ <li><a href="/stream">/stream</a> - MJPEG video stream</li>
745
+ <li><a href="/snapshot">/snapshot</a> - Single JPEG snapshot</li>
746
+ </ul>
747
+ <h3>Home Assistant Integration:</h3>
748
+ <p>Add a Generic Camera with URL: <code>http://&lt;ip&gt;:{self.port}/stream</code></p>
749
+ </div>
750
+ </div>
751
+ </body>
752
+ </html>"""
753
+
754
+ response = (
755
+ "HTTP/1.1 200 OK\r\n"
756
+ "Content-Type: text/html; charset=utf-8\r\n"
757
+ f"Content-Length: {len(html)}\r\n"
758
+ "Connection: close\r\n"
759
+ "\r\n"
760
+ )
761
+
762
+ writer.write(response.encode('utf-8'))
763
+ writer.write(html.encode('utf-8'))
764
+ await writer.drain()
765
+
766
+ async def _handle_snapshot(self, writer: asyncio.StreamWriter) -> None:
767
+ """Handle snapshot request - return single JPEG image."""
768
+ jpeg_data = self.get_snapshot()
769
+
770
+ if jpeg_data is None:
771
+ response = (
772
+ "HTTP/1.1 503 Service Unavailable\r\n"
773
+ "Content-Type: text/plain\r\n"
774
+ "Connection: close\r\n"
775
+ "\r\n"
776
+ "No frame available"
777
+ )
778
+ writer.write(response.encode('utf-8'))
779
+ else:
780
+ response = (
781
+ "HTTP/1.1 200 OK\r\n"
782
+ "Content-Type: image/jpeg\r\n"
783
+ f"Content-Length: {len(jpeg_data)}\r\n"
784
+ "Cache-Control: no-cache, no-store, must-revalidate\r\n"
785
+ "Connection: close\r\n"
786
+ "\r\n"
787
+ )
788
+ writer.write(response.encode('utf-8'))
789
+ writer.write(jpeg_data)
790
+
791
+ await writer.drain()
792
+
793
+ async def _handle_stream(self, writer: asyncio.StreamWriter) -> None:
794
+ """Handle MJPEG stream request."""
795
+ # Send MJPEG headers
796
+ response = (
797
+ "HTTP/1.1 200 OK\r\n"
798
+ f"Content-Type: multipart/x-mixed-replace; boundary={MJPEG_BOUNDARY}\r\n"
799
+ "Cache-Control: no-cache, no-store, must-revalidate\r\n"
800
+ "Connection: keep-alive\r\n"
801
+ "\r\n"
802
+ )
803
+ writer.write(response.encode('utf-8'))
804
+ await writer.drain()
805
+
806
+ _LOGGER.debug("Started MJPEG stream")
807
+
808
+ last_sent_time = 0
809
+
810
+ try:
811
+ while self._running:
812
+ # Get latest frame
813
+ with self._frame_lock:
814
+ jpeg_data = self._last_frame
815
+ frame_time = self._last_frame_time
816
+
817
+ # Only send if we have a new frame
818
+ if jpeg_data is not None and frame_time > last_sent_time:
819
+ # Send MJPEG frame
820
+ frame_header = (
821
+ f"--{MJPEG_BOUNDARY}\r\n"
822
+ "Content-Type: image/jpeg\r\n"
823
+ f"Content-Length: {len(jpeg_data)}\r\n"
824
+ "\r\n"
825
+ )
826
+
827
+ writer.write(frame_header.encode('utf-8'))
828
+ writer.write(jpeg_data)
829
+ writer.write(b"\r\n")
830
+ await writer.drain()
831
+
832
+ last_sent_time = frame_time
833
+
834
+ # Small delay to prevent busy loop
835
+ await asyncio.sleep(0.01)
836
+
837
+ except (ConnectionResetError, BrokenPipeError):
838
+ _LOGGER.debug("Client disconnected from stream")
839
+ except Exception as e:
840
+ _LOGGER.error("Error in MJPEG stream: %s", e)
841
+
842
+ _LOGGER.debug("Ended MJPEG stream")
{reachy_mini_home_assistant/entities → reachy_mini_ha_voice}/entity.py RENAMED
@@ -1,56 +1,56 @@
1
  """ESPHome entity definitions."""
2
 
3
- import logging
4
  from abc import abstractmethod
5
- from collections.abc import Callable, Iterable
6
- from typing import TYPE_CHECKING
 
7
 
8
  # pylint: disable=no-name-in-module
9
  from aioesphomeapi.api_pb2 import ( # type: ignore[attr-defined]
10
- BinarySensorStateResponse,
11
- CameraImageRequest,
12
- CameraImageResponse,
13
  ListEntitiesBinarySensorResponse,
 
14
  ListEntitiesCameraResponse,
15
  ListEntitiesMediaPlayerResponse,
16
  ListEntitiesNumberResponse,
17
  ListEntitiesRequest,
 
 
 
18
  ListEntitiesTextSensorResponse,
 
 
 
 
19
  MediaPlayerCommandRequest,
20
  MediaPlayerStateResponse,
21
  NumberCommandRequest,
22
  NumberStateResponse,
 
 
 
23
  SubscribeHomeAssistantStatesRequest,
24
  SubscribeStatesRequest,
 
 
25
  TextSensorStateResponse,
26
  )
27
- from aioesphomeapi.model import MediaPlayerCommand, MediaPlayerEntityFeature, MediaPlayerState
28
  from google.protobuf import message
29
 
30
- from ..audio.audio_player import AudioPlayer
31
- from ..core.util import call_all
 
32
 
33
  if TYPE_CHECKING:
34
- from ..protocol.api_server import APIServer
35
 
36
  logger = logging.getLogger(__name__)
37
 
38
 
39
- def _safe_get_value(getter: Callable[[], object] | None, current_value: object, entity_name: str) -> object:
40
- """Read an entity value without letting getter failures break the ESPHome session."""
41
- if getter is None:
42
- return current_value
43
- try:
44
- return getter()
45
- except Exception as e:
46
- logger.error("Entity getter failed for %s: %s", entity_name, e)
47
- return current_value
48
-
49
-
50
  class ESPHomeEntity:
51
  """Base class for ESPHome entities."""
52
 
53
- def __init__(self, server: "APIServer") -> None:
54
  self.server = server
55
 
56
  @abstractmethod
@@ -63,7 +63,7 @@ class MediaPlayerEntity(ESPHomeEntity):
63
 
64
  def __init__(
65
  self,
66
- server: "APIServer",
67
  key: int,
68
  name: str,
69
  object_id: str,
@@ -82,9 +82,9 @@ class MediaPlayerEntity(ESPHomeEntity):
82
 
83
  def play(
84
  self,
85
- url: str | list[str],
86
  announcement: bool = False,
87
- done_callback: Callable[[], None] | None = None,
88
  ) -> Iterable[message.Message]:
89
  if announcement:
90
  if self.music_player.is_playing:
@@ -92,14 +92,18 @@ class MediaPlayerEntity(ESPHomeEntity):
92
  self.music_player.pause()
93
  self.announce_player.play(
94
  url,
95
- done_callback=lambda: call_all(self.music_player.resume, done_callback),
 
 
96
  )
97
  else:
98
  # Announce, idle
99
  self.announce_player.play(
100
  url,
101
  done_callback=lambda: call_all(
102
- lambda: self.server.send_messages([self._update_state(MediaPlayerState.IDLE)]),
 
 
103
  done_callback,
104
  ),
105
  )
@@ -108,7 +112,9 @@ class MediaPlayerEntity(ESPHomeEntity):
108
  self.music_player.play(
109
  url,
110
  done_callback=lambda: call_all(
111
- lambda: self.server.send_messages([self._update_state(MediaPlayerState.IDLE)]),
 
 
112
  done_callback,
113
  ),
114
  )
@@ -127,9 +133,6 @@ class MediaPlayerEntity(ESPHomeEntity):
127
  elif msg.command == MediaPlayerCommand.PLAY:
128
  self.music_player.resume()
129
  yield self._update_state(MediaPlayerState.PLAYING)
130
- elif msg.command == MediaPlayerCommand.STOP:
131
- self.music_player.stop()
132
- yield self._update_state(MediaPlayerState.IDLE)
133
  elif msg.has_volume:
134
  volume = int(msg.volume * 100)
135
  self.music_player.set_volume(volume)
@@ -173,13 +176,13 @@ class TextSensorEntity(ESPHomeEntity):
173
 
174
  def __init__(
175
  self,
176
- server: "APIServer",
177
  key: int,
178
  name: str,
179
  object_id: str,
180
  icon: str = "",
181
  entity_category: int = 0, # 0 = none, 1 = config, 2 = diagnostic
182
- value_getter: Callable[[], str] | None = None,
183
  ) -> None:
184
  ESPHomeEntity.__init__(self, server)
185
  self.key = key
@@ -192,7 +195,9 @@ class TextSensorEntity(ESPHomeEntity):
192
 
193
  @property
194
  def value(self) -> str:
195
- return str(_safe_get_value(self._value_getter, self._value, self.object_id))
 
 
196
 
197
  @value.setter
198
  def value(self, new_value: str) -> None:
@@ -227,14 +232,14 @@ class BinarySensorEntity(ESPHomeEntity):
227
 
228
  def __init__(
229
  self,
230
- server: "APIServer",
231
  key: int,
232
  name: str,
233
  object_id: str,
234
  icon: str = "",
235
  device_class: str = "",
236
  entity_category: int = 0, # 0 = none, 1 = config, 2 = diagnostic
237
- value_getter: Callable[[], bool] | None = None,
238
  ) -> None:
239
  ESPHomeEntity.__init__(self, server)
240
  self.key = key
@@ -248,7 +253,9 @@ class BinarySensorEntity(ESPHomeEntity):
248
 
249
  @property
250
  def value(self) -> bool:
251
- return bool(_safe_get_value(self._value_getter, self._value, self.object_id))
 
 
252
 
253
  @value.setter
254
  def value(self, new_value: bool) -> None:
@@ -284,7 +291,7 @@ class NumberEntity(ESPHomeEntity):
284
 
285
  def __init__(
286
  self,
287
- server: "APIServer",
288
  key: int,
289
  name: str,
290
  object_id: str,
@@ -295,8 +302,8 @@ class NumberEntity(ESPHomeEntity):
295
  unit_of_measurement: str = "",
296
  mode: int = 0, # 0 = auto, 1 = box, 2 = slider
297
  entity_category: int = 0, # 0 = none, 1 = config, 2 = diagnostic
298
- value_getter: Callable[[], float] | None = None,
299
- value_setter: Callable[[float], None] | None = None,
300
  ) -> None:
301
  ESPHomeEntity.__init__(self, server)
302
  self.key = key
@@ -315,7 +322,9 @@ class NumberEntity(ESPHomeEntity):
315
 
316
  @property
317
  def value(self) -> float:
318
- return float(_safe_get_value(self._value_getter, self._value, self.object_id))
 
 
319
 
320
  @value.setter
321
  def value(self, new_value: float) -> None:
@@ -362,12 +371,12 @@ class CameraEntity(ESPHomeEntity):
362
 
363
  def __init__(
364
  self,
365
- server: "APIServer",
366
  key: int,
367
  name: str,
368
  object_id: str,
369
  icon: str = "mdi:camera",
370
- image_getter: Callable[[], bytes | None] | None = None,
371
  ) -> None:
372
  ESPHomeEntity.__init__(self, server)
373
  self.key = key
@@ -376,7 +385,7 @@ class CameraEntity(ESPHomeEntity):
376
  self.icon = icon
377
  self._image_getter = image_getter
378
 
379
- def get_image(self) -> bytes | None:
380
  """Get the current camera image as JPEG bytes."""
381
  if self._image_getter:
382
  return self._image_getter()
@@ -407,3 +416,4 @@ class CameraEntity(ESPHomeEntity):
407
  data=b"",
408
  done=True,
409
  )
 
 
1
  """ESPHome entity definitions."""
2
 
 
3
  from abc import abstractmethod
4
+ from collections.abc import Iterable
5
+ from typing import Callable, List, Optional, Union, TYPE_CHECKING
6
+ import logging
7
 
8
  # pylint: disable=no-name-in-module
9
  from aioesphomeapi.api_pb2 import ( # type: ignore[attr-defined]
 
 
 
10
  ListEntitiesBinarySensorResponse,
11
+ ListEntitiesButtonResponse,
12
  ListEntitiesCameraResponse,
13
  ListEntitiesMediaPlayerResponse,
14
  ListEntitiesNumberResponse,
15
  ListEntitiesRequest,
16
+ ListEntitiesSelectResponse,
17
+ ListEntitiesSensorResponse,
18
+ ListEntitiesSwitchResponse,
19
  ListEntitiesTextSensorResponse,
20
+ BinarySensorStateResponse,
21
+ ButtonCommandRequest,
22
+ CameraImageRequest,
23
+ CameraImageResponse,
24
  MediaPlayerCommandRequest,
25
  MediaPlayerStateResponse,
26
  NumberCommandRequest,
27
  NumberStateResponse,
28
+ SelectCommandRequest,
29
+ SelectStateResponse,
30
+ SensorStateResponse,
31
  SubscribeHomeAssistantStatesRequest,
32
  SubscribeStatesRequest,
33
+ SwitchCommandRequest,
34
+ SwitchStateResponse,
35
  TextSensorStateResponse,
36
  )
37
+ from aioesphomeapi.model import MediaPlayerCommand, MediaPlayerState, MediaPlayerEntityFeature
38
  from google.protobuf import message
39
 
40
+ from .api_server import APIServer
41
+ from .audio_player import AudioPlayer
42
+ from .util import call_all
43
 
44
  if TYPE_CHECKING:
45
+ from reachy_mini import ReachyMini
46
 
47
  logger = logging.getLogger(__name__)
48
 
49
 
 
 
 
 
 
 
 
 
 
 
 
50
  class ESPHomeEntity:
51
  """Base class for ESPHome entities."""
52
 
53
+ def __init__(self, server: APIServer) -> None:
54
  self.server = server
55
 
56
  @abstractmethod
 
63
 
64
  def __init__(
65
  self,
66
+ server: APIServer,
67
  key: int,
68
  name: str,
69
  object_id: str,
 
82
 
83
  def play(
84
  self,
85
+ url: Union[str, List[str]],
86
  announcement: bool = False,
87
+ done_callback: Optional[Callable[[], None]] = None,
88
  ) -> Iterable[message.Message]:
89
  if announcement:
90
  if self.music_player.is_playing:
 
92
  self.music_player.pause()
93
  self.announce_player.play(
94
  url,
95
+ done_callback=lambda: call_all(
96
+ self.music_player.resume, done_callback
97
+ ),
98
  )
99
  else:
100
  # Announce, idle
101
  self.announce_player.play(
102
  url,
103
  done_callback=lambda: call_all(
104
+ lambda: self.server.send_messages(
105
+ [self._update_state(MediaPlayerState.IDLE)]
106
+ ),
107
  done_callback,
108
  ),
109
  )
 
112
  self.music_player.play(
113
  url,
114
  done_callback=lambda: call_all(
115
+ lambda: self.server.send_messages(
116
+ [self._update_state(MediaPlayerState.IDLE)]
117
+ ),
118
  done_callback,
119
  ),
120
  )
 
133
  elif msg.command == MediaPlayerCommand.PLAY:
134
  self.music_player.resume()
135
  yield self._update_state(MediaPlayerState.PLAYING)
 
 
 
136
  elif msg.has_volume:
137
  volume = int(msg.volume * 100)
138
  self.music_player.set_volume(volume)
 
176
 
177
  def __init__(
178
  self,
179
+ server: APIServer,
180
  key: int,
181
  name: str,
182
  object_id: str,
183
  icon: str = "",
184
  entity_category: int = 0, # 0 = none, 1 = config, 2 = diagnostic
185
+ value_getter: Optional[Callable[[], str]] = None,
186
  ) -> None:
187
  ESPHomeEntity.__init__(self, server)
188
  self.key = key
 
195
 
196
  @property
197
  def value(self) -> str:
198
+ if self._value_getter:
199
+ return self._value_getter()
200
+ return self._value
201
 
202
  @value.setter
203
  def value(self, new_value: str) -> None:
 
232
 
233
  def __init__(
234
  self,
235
+ server: APIServer,
236
  key: int,
237
  name: str,
238
  object_id: str,
239
  icon: str = "",
240
  device_class: str = "",
241
  entity_category: int = 0, # 0 = none, 1 = config, 2 = diagnostic
242
+ value_getter: Optional[Callable[[], bool]] = None,
243
  ) -> None:
244
  ESPHomeEntity.__init__(self, server)
245
  self.key = key
 
253
 
254
  @property
255
  def value(self) -> bool:
256
+ if self._value_getter:
257
+ return self._value_getter()
258
+ return self._value
259
 
260
  @value.setter
261
  def value(self, new_value: bool) -> None:
 
291
 
292
  def __init__(
293
  self,
294
+ server: APIServer,
295
  key: int,
296
  name: str,
297
  object_id: str,
 
302
  unit_of_measurement: str = "",
303
  mode: int = 0, # 0 = auto, 1 = box, 2 = slider
304
  entity_category: int = 0, # 0 = none, 1 = config, 2 = diagnostic
305
+ value_getter: Optional[Callable[[], float]] = None,
306
+ value_setter: Optional[Callable[[float], None]] = None,
307
  ) -> None:
308
  ESPHomeEntity.__init__(self, server)
309
  self.key = key
 
322
 
323
  @property
324
  def value(self) -> float:
325
+ if self._value_getter:
326
+ return self._value_getter()
327
+ return self._value
328
 
329
  @value.setter
330
  def value(self, new_value: float) -> None:
 
371
 
372
  def __init__(
373
  self,
374
+ server: APIServer,
375
  key: int,
376
  name: str,
377
  object_id: str,
378
  icon: str = "mdi:camera",
379
+ image_getter: Optional[Callable[[], Optional[bytes]]] = None,
380
  ) -> None:
381
  ESPHomeEntity.__init__(self, server)
382
  self.key = key
 
385
  self.icon = icon
386
  self._image_getter = image_getter
387
 
388
+ def get_image(self) -> Optional[bytes]:
389
  """Get the current camera image as JPEG bytes."""
390
  if self._image_getter:
391
  return self._image_getter()
 
416
  data=b"",
417
  done=True,
418
  )
419
+
{reachy_mini_home_assistant/entities → reachy_mini_ha_voice}/entity_extensions.py RENAMED
@@ -1,16 +1,16 @@
1
  """Extended ESPHome entity types for Reachy Mini control."""
2
 
 
 
3
  import logging
4
- from collections.abc import Callable, Iterable
5
- from typing import TYPE_CHECKING
6
 
7
  from aioesphomeapi.api_pb2 import ( # type: ignore[attr-defined]
8
- ButtonCommandRequest,
9
  ListEntitiesButtonResponse,
10
  ListEntitiesRequest,
11
  ListEntitiesSelectResponse,
12
  ListEntitiesSensorResponse,
13
  ListEntitiesSwitchResponse,
 
14
  SelectCommandRequest,
15
  SelectStateResponse,
16
  SensorStateResponse,
@@ -21,28 +21,14 @@ from aioesphomeapi.api_pb2 import ( # type: ignore[attr-defined]
21
  )
22
  from google.protobuf import message
23
 
 
24
  from .entity import ESPHomeEntity
25
 
26
- if TYPE_CHECKING:
27
- from ..protocol.api_server import APIServer
28
-
29
  logger = logging.getLogger(__name__)
30
 
31
 
32
- def _safe_get_value(getter: Callable[[], object] | None, current_value: object, entity_name: str) -> object:
33
- """Read an entity value without letting getter failures break the ESPHome session."""
34
- if getter is None:
35
- return current_value
36
- try:
37
- return getter()
38
- except Exception as e:
39
- logger.error("Entity getter failed for %s: %s", entity_name, e)
40
- return current_value
41
-
42
-
43
  class SensorStateClass:
44
  """ESPHome SensorStateClass enum values."""
45
-
46
  NONE = 0
47
  MEASUREMENT = 1
48
  TOTAL_INCREASING = 2
@@ -54,7 +40,7 @@ class SensorEntity(ESPHomeEntity):
54
 
55
  def __init__(
56
  self,
57
- server: "APIServer",
58
  key: int,
59
  name: str,
60
  object_id: str,
@@ -64,7 +50,7 @@ class SensorEntity(ESPHomeEntity):
64
  device_class: str = "",
65
  state_class: int = SensorStateClass.NONE,
66
  entity_category: int = 0, # 0 = none, 1 = config, 2 = diagnostic
67
- value_getter: Callable[[], float] | None = None,
68
  ) -> None:
69
  ESPHomeEntity.__init__(self, server)
70
  self.key = key
@@ -75,7 +61,7 @@ class SensorEntity(ESPHomeEntity):
75
  self.accuracy_decimals = accuracy_decimals
76
  self.device_class = device_class
77
  self.entity_category = entity_category
78
- # Convert string state_class to enum
79
  if isinstance(state_class, str):
80
  state_class_map = {
81
  "": SensorStateClass.NONE,
@@ -91,7 +77,9 @@ class SensorEntity(ESPHomeEntity):
91
 
92
  @property
93
  def value(self) -> float:
94
- return float(_safe_get_value(self._value_getter, self._value, self.object_id))
 
 
95
 
96
  @value.setter
97
  def value(self, new_value: float) -> None:
@@ -130,15 +118,15 @@ class SwitchEntity(ESPHomeEntity):
130
 
131
  def __init__(
132
  self,
133
- server: "APIServer",
134
  key: int,
135
  name: str,
136
  object_id: str,
137
  icon: str = "",
138
  device_class: str = "",
139
  entity_category: int = 0, # 0 = none, 1 = config, 2 = diagnostic
140
- value_getter: Callable[[], bool] | None = None,
141
- value_setter: Callable[[bool], None] | None = None,
142
  ) -> None:
143
  ESPHomeEntity.__init__(self, server)
144
  self.key = key
@@ -153,7 +141,9 @@ class SwitchEntity(ESPHomeEntity):
153
 
154
  @property
155
  def value(self) -> bool:
156
- return bool(_safe_get_value(self._value_getter, self._value, self.object_id))
 
 
157
 
158
  @value.setter
159
  def value(self, new_value: bool) -> None:
@@ -193,15 +183,15 @@ class SelectEntity(ESPHomeEntity):
193
 
194
  def __init__(
195
  self,
196
- server: "APIServer",
197
  key: int,
198
  name: str,
199
  object_id: str,
200
- options: list[str],
201
  icon: str = "",
202
  entity_category: int = 0, # 0 = none, 1 = config, 2 = diagnostic
203
- value_getter: Callable[[], str] | None = None,
204
- value_setter: Callable[[str], None] | None = None,
205
  ) -> None:
206
  ESPHomeEntity.__init__(self, server)
207
  self.key = key
@@ -216,7 +206,9 @@ class SelectEntity(ESPHomeEntity):
216
 
217
  @property
218
  def value(self) -> str:
219
- return str(_safe_get_value(self._value_getter, self._value, self.object_id))
 
 
220
 
221
  @value.setter
222
  def value(self, new_value: str) -> None:
@@ -260,14 +252,14 @@ class ButtonEntity(ESPHomeEntity):
260
 
261
  def __init__(
262
  self,
263
- server: "APIServer",
264
  key: int,
265
  name: str,
266
  object_id: str,
267
  icon: str = "",
268
  device_class: str = "",
269
  entity_category: int = 0, # 0 = none, 1 = config, 2 = diagnostic
270
- on_press: Callable[[], None] | None = None,
271
  ) -> None:
272
  ESPHomeEntity.__init__(self, server)
273
  self.key = key
 
1
  """Extended ESPHome entity types for Reachy Mini control."""
2
 
3
+ from collections.abc import Iterable
4
+ from typing import Callable, List, Optional
5
  import logging
 
 
6
 
7
  from aioesphomeapi.api_pb2 import ( # type: ignore[attr-defined]
 
8
  ListEntitiesButtonResponse,
9
  ListEntitiesRequest,
10
  ListEntitiesSelectResponse,
11
  ListEntitiesSensorResponse,
12
  ListEntitiesSwitchResponse,
13
+ ButtonCommandRequest,
14
  SelectCommandRequest,
15
  SelectStateResponse,
16
  SensorStateResponse,
 
21
  )
22
  from google.protobuf import message
23
 
24
+ from .api_server import APIServer
25
  from .entity import ESPHomeEntity
26
 
 
 
 
27
  logger = logging.getLogger(__name__)
28
 
29
 
 
 
 
 
 
 
 
 
 
 
 
30
  class SensorStateClass:
31
  """ESPHome SensorStateClass enum values."""
 
32
  NONE = 0
33
  MEASUREMENT = 1
34
  TOTAL_INCREASING = 2
 
40
 
41
  def __init__(
42
  self,
43
+ server: APIServer,
44
  key: int,
45
  name: str,
46
  object_id: str,
 
50
  device_class: str = "",
51
  state_class: int = SensorStateClass.NONE,
52
  entity_category: int = 0, # 0 = none, 1 = config, 2 = diagnostic
53
+ value_getter: Optional[Callable[[], float]] = None,
54
  ) -> None:
55
  ESPHomeEntity.__init__(self, server)
56
  self.key = key
 
61
  self.accuracy_decimals = accuracy_decimals
62
  self.device_class = device_class
63
  self.entity_category = entity_category
64
+ # Convert string state_class to int if needed (for backward compatibility)
65
  if isinstance(state_class, str):
66
  state_class_map = {
67
  "": SensorStateClass.NONE,
 
77
 
78
  @property
79
  def value(self) -> float:
80
+ if self._value_getter:
81
+ return self._value_getter()
82
+ return self._value
83
 
84
  @value.setter
85
  def value(self, new_value: float) -> None:
 
118
 
119
  def __init__(
120
  self,
121
+ server: APIServer,
122
  key: int,
123
  name: str,
124
  object_id: str,
125
  icon: str = "",
126
  device_class: str = "",
127
  entity_category: int = 0, # 0 = none, 1 = config, 2 = diagnostic
128
+ value_getter: Optional[Callable[[], bool]] = None,
129
+ value_setter: Optional[Callable[[bool], None]] = None,
130
  ) -> None:
131
  ESPHomeEntity.__init__(self, server)
132
  self.key = key
 
141
 
142
  @property
143
  def value(self) -> bool:
144
+ if self._value_getter:
145
+ return self._value_getter()
146
+ return self._value
147
 
148
  @value.setter
149
  def value(self, new_value: bool) -> None:
 
183
 
184
  def __init__(
185
  self,
186
+ server: APIServer,
187
  key: int,
188
  name: str,
189
  object_id: str,
190
+ options: List[str],
191
  icon: str = "",
192
  entity_category: int = 0, # 0 = none, 1 = config, 2 = diagnostic
193
+ value_getter: Optional[Callable[[], str]] = None,
194
+ value_setter: Optional[Callable[[str], None]] = None,
195
  ) -> None:
196
  ESPHomeEntity.__init__(self, server)
197
  self.key = key
 
206
 
207
  @property
208
  def value(self) -> str:
209
+ if self._value_getter:
210
+ return self._value_getter()
211
+ return self._value
212
 
213
  @value.setter
214
  def value(self, new_value: str) -> None:
 
252
 
253
  def __init__(
254
  self,
255
+ server: APIServer,
256
  key: int,
257
  name: str,
258
  object_id: str,
259
  icon: str = "",
260
  device_class: str = "",
261
  entity_category: int = 0, # 0 = none, 1 = config, 2 = diagnostic
262
+ on_press: Optional[Callable[[], None]] = None,
263
  ) -> None:
264
  ESPHomeEntity.__init__(self, server)
265
  self.key = key
reachy_mini_ha_voice/entity_registry.py ADDED
@@ -0,0 +1,945 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Entity registry for ESPHome entities.
2
+
3
+ This module handles the registration and management of all ESPHome entities
4
+ for the Reachy Mini voice assistant.
5
+ """
6
+
7
+ import logging
8
+ from typing import TYPE_CHECKING, Callable, Dict, List, Optional
9
+
10
+ from .entity import BinarySensorEntity, CameraEntity, NumberEntity, TextSensorEntity
11
+ from .entity_extensions import SensorEntity, SwitchEntity, SelectEntity, ButtonEntity
12
+
13
+ if TYPE_CHECKING:
14
+ from .reachy_controller import ReachyController
15
+ from .camera_server import MJPEGCameraServer
16
+
17
+ _LOGGER = logging.getLogger(__name__)
18
+
19
+
20
+ # Fixed entity key mapping - ensures consistent keys across restarts
21
+ # Keys are based on object_id hash to ensure uniqueness and consistency
22
+ ENTITY_KEYS: Dict[str, int] = {
23
+ # Media player (key 0 reserved)
24
+ "reachy_mini_media_player": 0,
25
+ # Phase 1: Basic status and volume
26
+ "daemon_state": 100,
27
+ "backend_ready": 101,
28
+ "speaker_volume": 103,
29
+ # Phase 2: Motor control
30
+ "motors_enabled": 200,
31
+ "motor_mode": 201,
32
+ "wake_up": 202,
33
+ "go_to_sleep": 203,
34
+ # Phase 3: Pose control
35
+ "head_x": 300,
36
+ "head_y": 301,
37
+ "head_z": 302,
38
+ "head_roll": 303,
39
+ "head_pitch": 304,
40
+ "head_yaw": 305,
41
+ "body_yaw": 306,
42
+ "antenna_left": 307,
43
+ "antenna_right": 308,
44
+ # Phase 4: Look at control
45
+ "look_at_x": 400,
46
+ "look_at_y": 401,
47
+ "look_at_z": 402,
48
+ # Phase 5: DOA (Direction of Arrival) - re-added for wakeup turn-to-sound
49
+ "doa_angle": 500,
50
+ "speech_detected": 501,
51
+ # Phase 6: Diagnostic information
52
+ "control_loop_frequency": 600,
53
+ "sdk_version": 601,
54
+ "robot_name": 602,
55
+ "wireless_version": 603,
56
+ "simulation_mode": 604,
57
+ "wlan_ip": 605,
58
+ "error_message": 606, # Moved to diagnostic
59
+ # Phase 7: IMU sensors
60
+ "imu_accel_x": 700,
61
+ "imu_accel_y": 701,
62
+ "imu_accel_z": 702,
63
+ "imu_gyro_x": 703,
64
+ "imu_gyro_y": 704,
65
+ "imu_gyro_z": 705,
66
+ "imu_temperature": 706,
67
+ # Phase 8: Emotion selector
68
+ "emotion": 800,
69
+ # Phase 9: Audio controls
70
+ "microphone_volume": 900,
71
+ # Phase 10: Camera
72
+ "camera_url": 1000, # Keep for backward compatibility
73
+ "camera": 1001, # New camera entity
74
+ # Phase 11: LED control (disabled - not visible)
75
+ # "led_brightness": 1100,
76
+ # "led_effect": 1101,
77
+ # "led_color_r": 1102,
78
+ # "led_color_g": 1103,
79
+ # "led_color_b": 1104,
80
+ # Phase 12: Audio processing
81
+ "agc_enabled": 1200,
82
+ "agc_max_gain": 1201,
83
+ "noise_suppression": 1202,
84
+ "echo_cancellation_converged": 1203,
85
+ # Phase 13: Sendspin - auto-enabled via mDNS, no user entities needed
86
+ # Phase 21: Continuous conversation
87
+ "continuous_conversation": 1500,
88
+ # Phase 22: Gesture detection
89
+ "gesture_detected": 1600,
90
+ "gesture_confidence": 1601,
91
+ }
92
+
93
+
94
+ def get_entity_key(object_id: str) -> int:
95
+ """Get a consistent entity key for the given object_id."""
96
+ if object_id in ENTITY_KEYS:
97
+ return ENTITY_KEYS[object_id]
98
+ # Fallback: generate key from hash (should not happen if all entities are registered)
99
+ _LOGGER.warning(f"Entity key not found for {object_id}, generating from hash")
100
+ return abs(hash(object_id)) % 10000 + 2000
101
+
102
+
103
+ class EntityRegistry:
104
+ """Registry for managing ESPHome entities."""
105
+
106
+ def __init__(
107
+ self,
108
+ server,
109
+ reachy_controller: "ReachyController",
110
+ camera_server: Optional["MJPEGCameraServer"] = None,
111
+ play_emotion_callback: Optional[Callable[[str], None]] = None,
112
+ ):
113
+ """Initialize the entity registry.
114
+
115
+ Args:
116
+ server: The VoiceSatelliteProtocol server instance
117
+ reachy_controller: The ReachyController instance
118
+ camera_server: Optional camera server for camera entity
119
+ play_emotion_callback: Optional callback for playing emotions
120
+ """
121
+ self.server = server
122
+ self.reachy_controller = reachy_controller
123
+ self.camera_server = camera_server
124
+ self._play_emotion_callback = play_emotion_callback
125
+
126
+ # Gesture detection state
127
+ self._current_gesture = "none"
128
+ self._gesture_confidence = 0.0
129
+
130
+ # Emotion state
131
+ self._current_emotion = "None"
132
+ # Map emotion names to available robot emotions
133
+ # Full list of available emotions from robot
134
+ self._emotion_map = {
135
+ "None": None,
136
+ # Basic emotions
137
+ "Happy": "cheerful1",
138
+ "Sad": "sad1",
139
+ "Angry": "rage1",
140
+ "Fear": "fear1",
141
+ "Surprise": "surprised1",
142
+ "Disgust": "disgusted1",
143
+ # Extended emotions
144
+ "Laughing": "laughing1",
145
+ "Loving": "loving1",
146
+ "Proud": "proud1",
147
+ "Grateful": "grateful1",
148
+ "Enthusiastic": "enthusiastic1",
149
+ "Curious": "curious1",
150
+ "Amazed": "amazed1",
151
+ "Shy": "shy1",
152
+ "Confused": "confused1",
153
+ "Thoughtful": "thoughtful1",
154
+ "Anxious": "anxiety1",
155
+ "Scared": "scared1",
156
+ "Frustrated": "frustrated1",
157
+ "Irritated": "irritated1",
158
+ "Furious": "furious1",
159
+ "Contempt": "contempt1",
160
+ "Bored": "boredom1",
161
+ "Tired": "tired1",
162
+ "Exhausted": "exhausted1",
163
+ "Lonely": "lonely1",
164
+ "Downcast": "downcast1",
165
+ "Resigned": "resigned1",
166
+ "Uncertain": "uncertain1",
167
+ "Uncomfortable": "uncomfortable1",
168
+ "Lost": "lost1",
169
+ "Indifferent": "indifferent1",
170
+ # Positive actions
171
+ "Yes": "yes1",
172
+ "No": "no1",
173
+ "Welcoming": "welcoming1",
174
+ "Helpful": "helpful1",
175
+ "Attentive": "attentive1",
176
+ "Understanding": "understanding1",
177
+ "Calming": "calming1",
178
+ "Relief": "relief1",
179
+ "Success": "success1",
180
+ "Serenity": "serenity1",
181
+ # Negative actions
182
+ "Oops": "oops1",
183
+ "Displeased": "displeased1",
184
+ "Impatient": "impatient1",
185
+ "Reprimand": "reprimand1",
186
+ "GoAway": "go_away1",
187
+ # Special
188
+ "Come": "come1",
189
+ "Inquiring": "inquiring1",
190
+ "Sleep": "sleep1",
191
+ "Dance": "dance1",
192
+ "Electric": "electric1",
193
+ "Dying": "dying1",
194
+ }
195
+
196
+ def setup_all_entities(self, entities: List) -> None:
197
+ """Setup all entity phases.
198
+
199
+ Args:
200
+ entities: The list to append entities to
201
+ """
202
+ self._setup_phase1_entities(entities)
203
+ self._setup_phase2_entities(entities)
204
+ self._setup_phase3_entities(entities)
205
+ self._setup_phase4_entities(entities)
206
+ self._setup_phase5_entities(entities) # DOA for wakeup turn-to-sound
207
+ self._setup_phase6_entities(entities)
208
+ self._setup_phase7_entities(entities)
209
+ self._setup_phase8_entities(entities)
210
+ self._setup_phase9_entities(entities)
211
+ self._setup_phase10_entities(entities)
212
+ # Phase 11 (LED control) disabled - LEDs are inside the robot and not visible
213
+ self._setup_phase12_entities(entities)
214
+ # Phase 13 (Sendspin) - auto-enabled via mDNS discovery, no user entities
215
+ # Phase 14 (head_joints, passive_joints) removed - not needed
216
+ # Phase 20 (Tap detection) disabled - too many false triggers
217
+ self._setup_phase21_entities(entities)
218
+ self._setup_phase22_entities(entities)
219
+
220
+ _LOGGER.info("All entities registered: %d total", len(entities))
221
+
222
+ def _setup_phase1_entities(self, entities: List) -> None:
223
+ """Setup Phase 1 entities: Basic status and volume control."""
224
+ rc = self.reachy_controller
225
+
226
+ entities.append(TextSensorEntity(
227
+ server=self.server,
228
+ key=get_entity_key("daemon_state"),
229
+ name="Daemon State",
230
+ object_id="daemon_state",
231
+ icon="mdi:robot",
232
+ value_getter=rc.get_daemon_state,
233
+ ))
234
+
235
+ entities.append(BinarySensorEntity(
236
+ server=self.server,
237
+ key=get_entity_key("backend_ready"),
238
+ name="Backend Ready",
239
+ object_id="backend_ready",
240
+ icon="mdi:check-circle",
241
+ device_class="connectivity",
242
+ value_getter=rc.get_backend_ready,
243
+ ))
244
+
245
+ entities.append(NumberEntity(
246
+ server=self.server,
247
+ key=get_entity_key("speaker_volume"),
248
+ name="Speaker Volume",
249
+ object_id="speaker_volume",
250
+ min_value=0.0,
251
+ max_value=100.0,
252
+ step=1.0,
253
+ icon="mdi:volume-high",
254
+ unit_of_measurement="%",
255
+ mode=2, # Slider mode
256
+ entity_category=1, # config
257
+ value_getter=rc.get_speaker_volume,
258
+ value_setter=rc.set_speaker_volume,
259
+ ))
260
+
261
+ _LOGGER.debug("Phase 1 entities registered: daemon_state, backend_ready, speaker_volume")
262
+
263
+ def _setup_phase2_entities(self, entities: List) -> None:
264
+ """Setup Phase 2 entities: Motor control."""
265
+ rc = self.reachy_controller
266
+
267
+ entities.append(SwitchEntity(
268
+ server=self.server,
269
+ key=get_entity_key("motors_enabled"),
270
+ name="Motors Enabled",
271
+ object_id="motors_enabled",
272
+ icon="mdi:engine",
273
+ device_class="switch",
274
+ value_getter=rc.get_motors_enabled,
275
+ value_setter=rc.set_motors_enabled,
276
+ ))
277
+
278
+ entities.append(ButtonEntity(
279
+ server=self.server,
280
+ key=get_entity_key("wake_up"),
281
+ name="Wake Up",
282
+ object_id="wake_up",
283
+ icon="mdi:alarm",
284
+ device_class="restart",
285
+ on_press=rc.wake_up,
286
+ ))
287
+
288
+ entities.append(ButtonEntity(
289
+ server=self.server,
290
+ key=get_entity_key("go_to_sleep"),
291
+ name="Go to Sleep",
292
+ object_id="go_to_sleep",
293
+ icon="mdi:sleep",
294
+ device_class="restart",
295
+ on_press=rc.go_to_sleep,
296
+ ))
297
+
298
+ _LOGGER.debug("Phase 2 entities registered: motors_enabled, wake_up, go_to_sleep")
299
+
300
+ def _setup_phase3_entities(self, entities: List) -> None:
301
+ """Setup Phase 3 entities: Pose control."""
302
+ rc = self.reachy_controller
303
+
304
+ # Head position controls (X, Y, Z in mm)
305
+ entities.append(NumberEntity(
306
+ server=self.server,
307
+ key=get_entity_key("head_x"),
308
+ name="Head X Position",
309
+ object_id="head_x",
310
+ min_value=-50.0,
311
+ max_value=50.0,
312
+ step=1.0,
313
+ icon="mdi:axis-x-arrow",
314
+ unit_of_measurement="mm",
315
+ mode=2,
316
+ value_getter=rc.get_head_x,
317
+ value_setter=rc.set_head_x,
318
+ ))
319
+
320
+ entities.append(NumberEntity(
321
+ server=self.server,
322
+ key=get_entity_key("head_y"),
323
+ name="Head Y Position",
324
+ object_id="head_y",
325
+ min_value=-50.0,
326
+ max_value=50.0,
327
+ step=1.0,
328
+ icon="mdi:axis-y-arrow",
329
+ unit_of_measurement="mm",
330
+ mode=2,
331
+ value_getter=rc.get_head_y,
332
+ value_setter=rc.set_head_y,
333
+ ))
334
+
335
+ entities.append(NumberEntity(
336
+ server=self.server,
337
+ key=get_entity_key("head_z"),
338
+ name="Head Z Position",
339
+ object_id="head_z",
340
+ min_value=-50.0,
341
+ max_value=50.0,
342
+ step=1.0,
343
+ icon="mdi:axis-z-arrow",
344
+ unit_of_measurement="mm",
345
+ mode=2,
346
+ value_getter=rc.get_head_z,
347
+ value_setter=rc.set_head_z,
348
+ ))
349
+
350
+ # Head orientation controls (Roll, Pitch, Yaw in degrees)
351
+ entities.append(NumberEntity(
352
+ server=self.server,
353
+ key=get_entity_key("head_roll"),
354
+ name="Head Roll",
355
+ object_id="head_roll",
356
+ min_value=-40.0,
357
+ max_value=40.0,
358
+ step=1.0,
359
+ icon="mdi:rotate-3d-variant",
360
+ unit_of_measurement="°",
361
+ mode=2,
362
+ value_getter=rc.get_head_roll,
363
+ value_setter=rc.set_head_roll,
364
+ ))
365
+
366
+ entities.append(NumberEntity(
367
+ server=self.server,
368
+ key=get_entity_key("head_pitch"),
369
+ name="Head Pitch",
370
+ object_id="head_pitch",
371
+ min_value=-40.0,
372
+ max_value=40.0,
373
+ step=1.0,
374
+ icon="mdi:rotate-3d-variant",
375
+ unit_of_measurement="°",
376
+ mode=2,
377
+ value_getter=rc.get_head_pitch,
378
+ value_setter=rc.set_head_pitch,
379
+ ))
380
+
381
+ entities.append(NumberEntity(
382
+ server=self.server,
383
+ key=get_entity_key("head_yaw"),
384
+ name="Head Yaw",
385
+ object_id="head_yaw",
386
+ min_value=-180.0,
387
+ max_value=180.0,
388
+ step=1.0,
389
+ icon="mdi:rotate-3d-variant",
390
+ unit_of_measurement="°",
391
+ mode=2,
392
+ value_getter=rc.get_head_yaw,
393
+ value_setter=rc.set_head_yaw,
394
+ ))
395
+
396
+ # Body yaw control
397
+ entities.append(NumberEntity(
398
+ server=self.server,
399
+ key=get_entity_key("body_yaw"),
400
+ name="Body Yaw",
401
+ object_id="body_yaw",
402
+ min_value=-160.0,
403
+ max_value=160.0,
404
+ step=1.0,
405
+ icon="mdi:rotate-3d-variant",
406
+ unit_of_measurement="°",
407
+ mode=2,
408
+ value_getter=rc.get_body_yaw,
409
+ value_setter=rc.set_body_yaw,
410
+ ))
411
+
412
+ # Antenna controls
413
+ entities.append(NumberEntity(
414
+ server=self.server,
415
+ key=get_entity_key("antenna_left"),
416
+ name="Antenna(L)",
417
+ object_id="antenna_left",
418
+ min_value=-90.0,
419
+ max_value=90.0,
420
+ step=1.0,
421
+ icon="mdi:antenna",
422
+ unit_of_measurement="°",
423
+ mode=2,
424
+ value_getter=rc.get_antenna_left,
425
+ value_setter=rc.set_antenna_left,
426
+ ))
427
+
428
+ entities.append(NumberEntity(
429
+ server=self.server,
430
+ key=get_entity_key("antenna_right"),
431
+ name="Antenna(R)",
432
+ object_id="antenna_right",
433
+ min_value=-90.0,
434
+ max_value=90.0,
435
+ step=1.0,
436
+ icon="mdi:antenna",
437
+ unit_of_measurement="°",
438
+ mode=2,
439
+ value_getter=rc.get_antenna_right,
440
+ value_setter=rc.set_antenna_right,
441
+ ))
442
+
443
+ _LOGGER.debug("Phase 3 entities registered: head position/orientation, body_yaw, antennas")
444
+
445
+ def _setup_phase4_entities(self, entities: List) -> None:
446
+ """Setup Phase 4 entities: Look at control."""
447
+ rc = self.reachy_controller
448
+
449
+ entities.append(NumberEntity(
450
+ server=self.server,
451
+ key=get_entity_key("look_at_x"),
452
+ name="Look At X",
453
+ object_id="look_at_x",
454
+ min_value=-2.0,
455
+ max_value=2.0,
456
+ step=0.1,
457
+ icon="mdi:crosshairs-gps",
458
+ unit_of_measurement="m",
459
+ mode=1, # Box mode for precise input
460
+ value_getter=rc.get_look_at_x,
461
+ value_setter=rc.set_look_at_x,
462
+ ))
463
+
464
+ entities.append(NumberEntity(
465
+ server=self.server,
466
+ key=get_entity_key("look_at_y"),
467
+ name="Look At Y",
468
+ object_id="look_at_y",
469
+ min_value=-2.0,
470
+ max_value=2.0,
471
+ step=0.1,
472
+ icon="mdi:crosshairs-gps",
473
+ unit_of_measurement="m",
474
+ mode=1,
475
+ value_getter=rc.get_look_at_y,
476
+ value_setter=rc.set_look_at_y,
477
+ ))
478
+
479
+ entities.append(NumberEntity(
480
+ server=self.server,
481
+ key=get_entity_key("look_at_z"),
482
+ name="Look At Z",
483
+ object_id="look_at_z",
484
+ min_value=-2.0,
485
+ max_value=2.0,
486
+ step=0.1,
487
+ icon="mdi:crosshairs-gps",
488
+ unit_of_measurement="m",
489
+ mode=1,
490
+ value_getter=rc.get_look_at_z,
491
+ value_setter=rc.set_look_at_z,
492
+ ))
493
+
494
+ _LOGGER.debug("Phase 4 entities registered: look_at_x/y/z")
495
+
496
+ def _setup_phase5_entities(self, entities: List) -> None:
497
+ """Setup Phase 5 entities: DOA (Direction of Arrival) for wakeup turn-to-sound."""
498
+ rc = self.reachy_controller
499
+
500
+ entities.append(SensorEntity(
501
+ server=self.server,
502
+ key=get_entity_key("doa_angle"),
503
+ name="DOA Angle",
504
+ object_id="doa_angle",
505
+ icon="mdi:surround-sound",
506
+ unit_of_measurement="°",
507
+ accuracy_decimals=1,
508
+ state_class="measurement",
509
+ value_getter=rc.get_doa_angle_degrees,
510
+ ))
511
+
512
+ entities.append(BinarySensorEntity(
513
+ server=self.server,
514
+ key=get_entity_key("speech_detected"),
515
+ name="Speech Detected",
516
+ object_id="speech_detected",
517
+ icon="mdi:account-voice",
518
+ device_class="sound",
519
+ value_getter=rc.get_speech_detected,
520
+ ))
521
+
522
+ _LOGGER.debug("Phase 5 entities registered: doa_angle, speech_detected")
523
+
524
+ def _setup_phase6_entities(self, entities: List) -> None:
525
+ """Setup Phase 6 entities: Diagnostic information."""
526
+ rc = self.reachy_controller
527
+
528
+ entities.append(SensorEntity(
529
+ server=self.server,
530
+ key=get_entity_key("control_loop_frequency"),
531
+ name="Control Loop Frequency",
532
+ object_id="control_loop_frequency",
533
+ icon="mdi:speedometer",
534
+ unit_of_measurement="Hz",
535
+ accuracy_decimals=1,
536
+ state_class="measurement",
537
+ entity_category=2, # diagnostic
538
+ value_getter=rc.get_control_loop_frequency,
539
+ ))
540
+
541
+ entities.append(TextSensorEntity(
542
+ server=self.server,
543
+ key=get_entity_key("sdk_version"),
544
+ name="SDK Version",
545
+ object_id="sdk_version",
546
+ icon="mdi:information",
547
+ entity_category=2, # diagnostic
548
+ value_getter=rc.get_sdk_version,
549
+ ))
550
+
551
+ entities.append(TextSensorEntity(
552
+ server=self.server,
553
+ key=get_entity_key("robot_name"),
554
+ name="Robot Name",
555
+ object_id="robot_name",
556
+ icon="mdi:robot",
557
+ entity_category=2, # diagnostic
558
+ value_getter=rc.get_robot_name,
559
+ ))
560
+
561
+ entities.append(BinarySensorEntity(
562
+ server=self.server,
563
+ key=get_entity_key("wireless_version"),
564
+ name="Wireless Version",
565
+ object_id="wireless_version",
566
+ icon="mdi:wifi",
567
+ device_class="connectivity",
568
+ entity_category=2, # diagnostic
569
+ value_getter=rc.get_wireless_version,
570
+ ))
571
+
572
+ entities.append(BinarySensorEntity(
573
+ server=self.server,
574
+ key=get_entity_key("simulation_mode"),
575
+ name="Simulation Mode",
576
+ object_id="simulation_mode",
577
+ icon="mdi:virtual-reality",
578
+ entity_category=2, # diagnostic
579
+ value_getter=rc.get_simulation_mode,
580
+ ))
581
+
582
+ entities.append(TextSensorEntity(
583
+ server=self.server,
584
+ key=get_entity_key("wlan_ip"),
585
+ name="WLAN IP",
586
+ object_id="wlan_ip",
587
+ icon="mdi:ip-network",
588
+ entity_category=2, # diagnostic
589
+ value_getter=rc.get_wlan_ip,
590
+ ))
591
+
592
+ entities.append(TextSensorEntity(
593
+ server=self.server,
594
+ key=get_entity_key("error_message"),
595
+ name="Error Message",
596
+ object_id="error_message",
597
+ icon="mdi:alert-circle",
598
+ entity_category=2, # diagnostic
599
+ value_getter=rc.get_error_message,
600
+ ))
601
+
602
+ _LOGGER.debug(
603
+ "Phase 6 entities registered: control_loop_frequency, sdk_version, "
604
+ "robot_name, wireless_version, simulation_mode, wlan_ip, error_message"
605
+ )
606
+
607
+ def _setup_phase7_entities(self, entities: List) -> None:
608
+ """Setup Phase 7 entities: IMU sensors (wireless only)."""
609
+ rc = self.reachy_controller
610
+
611
+ # IMU Accelerometer
612
+ entities.append(SensorEntity(
613
+ server=self.server,
614
+ key=get_entity_key("imu_accel_x"),
615
+ name="IMU Accel X",
616
+ object_id="imu_accel_x",
617
+ icon="mdi:axis-x-arrow",
618
+ unit_of_measurement="m/s²",
619
+ accuracy_decimals=3,
620
+ state_class="measurement",
621
+ value_getter=rc.get_imu_accel_x,
622
+ ))
623
+
624
+ entities.append(SensorEntity(
625
+ server=self.server,
626
+ key=get_entity_key("imu_accel_y"),
627
+ name="IMU Accel Y",
628
+ object_id="imu_accel_y",
629
+ icon="mdi:axis-y-arrow",
630
+ unit_of_measurement="m/s²",
631
+ accuracy_decimals=3,
632
+ state_class="measurement",
633
+ value_getter=rc.get_imu_accel_y,
634
+ ))
635
+
636
+ entities.append(SensorEntity(
637
+ server=self.server,
638
+ key=get_entity_key("imu_accel_z"),
639
+ name="IMU Accel Z",
640
+ object_id="imu_accel_z",
641
+ icon="mdi:axis-z-arrow",
642
+ unit_of_measurement="m/s²",
643
+ accuracy_decimals=3,
644
+ state_class="measurement",
645
+ value_getter=rc.get_imu_accel_z,
646
+ ))
647
+
648
+ # IMU Gyroscope
649
+ entities.append(SensorEntity(
650
+ server=self.server,
651
+ key=get_entity_key("imu_gyro_x"),
652
+ name="IMU Gyro X",
653
+ object_id="imu_gyro_x",
654
+ icon="mdi:rotate-3d-variant",
655
+ unit_of_measurement="rad/s",
656
+ accuracy_decimals=3,
657
+ state_class="measurement",
658
+ value_getter=rc.get_imu_gyro_x,
659
+ ))
660
+
661
+ entities.append(SensorEntity(
662
+ server=self.server,
663
+ key=get_entity_key("imu_gyro_y"),
664
+ name="IMU Gyro Y",
665
+ object_id="imu_gyro_y",
666
+ icon="mdi:rotate-3d-variant",
667
+ unit_of_measurement="rad/s",
668
+ accuracy_decimals=3,
669
+ state_class="measurement",
670
+ value_getter=rc.get_imu_gyro_y,
671
+ ))
672
+
673
+ entities.append(SensorEntity(
674
+ server=self.server,
675
+ key=get_entity_key("imu_gyro_z"),
676
+ name="IMU Gyro Z",
677
+ object_id="imu_gyro_z",
678
+ icon="mdi:rotate-3d-variant",
679
+ unit_of_measurement="rad/s",
680
+ accuracy_decimals=3,
681
+ state_class="measurement",
682
+ value_getter=rc.get_imu_gyro_z,
683
+ ))
684
+
685
+ # IMU Temperature
686
+ entities.append(SensorEntity(
687
+ server=self.server,
688
+ key=get_entity_key("imu_temperature"),
689
+ name="IMU Temperature",
690
+ object_id="imu_temperature",
691
+ icon="mdi:thermometer",
692
+ unit_of_measurement="°C",
693
+ accuracy_decimals=1,
694
+ device_class="temperature",
695
+ state_class="measurement",
696
+ value_getter=rc.get_imu_temperature,
697
+ ))
698
+
699
+ _LOGGER.debug("Phase 7 entities registered: IMU accelerometer, gyroscope, temperature")
700
+
701
+ def _setup_phase8_entities(self, entities: List) -> None:
702
+ """Setup Phase 8 entities: Emotion selector."""
703
+
704
+ def get_emotion() -> str:
705
+ return self._current_emotion
706
+
707
+ def set_emotion(emotion: str) -> None:
708
+ self._current_emotion = emotion
709
+ emotion_name = self._emotion_map.get(emotion)
710
+ if emotion_name and self._play_emotion_callback:
711
+ self._play_emotion_callback(emotion_name)
712
+ # Reset to None after playing
713
+ self._current_emotion = "None"
714
+
715
+ entities.append(SelectEntity(
716
+ server=self.server,
717
+ key=get_entity_key("emotion"),
718
+ name="Emotion",
719
+ object_id="emotion",
720
+ options=list(self._emotion_map.keys()),
721
+ icon="mdi:emoticon",
722
+ value_getter=get_emotion,
723
+ value_setter=set_emotion,
724
+ ))
725
+
726
+ _LOGGER.debug("Phase 8 entities registered: emotion selector")
727
+
728
+ def _setup_phase9_entities(self, entities: List) -> None:
729
+ """Setup Phase 9 entities: Audio controls."""
730
+ rc = self.reachy_controller
731
+
732
+ entities.append(NumberEntity(
733
+ server=self.server,
734
+ key=get_entity_key("microphone_volume"),
735
+ name="Microphone Volume",
736
+ object_id="microphone_volume",
737
+ min_value=0.0,
738
+ max_value=100.0,
739
+ step=1.0,
740
+ icon="mdi:microphone",
741
+ unit_of_measurement="%",
742
+ mode=2, # Slider mode
743
+ entity_category=1, # config
744
+ value_getter=rc.get_microphone_volume,
745
+ value_setter=rc.set_microphone_volume,
746
+ ))
747
+
748
+ _LOGGER.debug("Phase 9 entities registered: microphone_volume")
749
+
750
+ def _setup_phase10_entities(self, entities: List) -> None:
751
+ """Setup Phase 10 entities: Camera for Home Assistant integration."""
752
+
753
+ def get_camera_image() -> Optional[bytes]:
754
+ """Get camera snapshot as JPEG bytes."""
755
+ if self.camera_server:
756
+ return self.camera_server.get_snapshot()
757
+ return None
758
+
759
+ entities.append(CameraEntity(
760
+ server=self.server,
761
+ key=get_entity_key("camera"),
762
+ name="Camera",
763
+ object_id="camera",
764
+ icon="mdi:camera",
765
+ image_getter=get_camera_image,
766
+ ))
767
+
768
+ _LOGGER.debug("Phase 10 entities registered: camera (ESPHome Camera entity)")
769
+
770
+ def _setup_phase12_entities(self, entities: List) -> None:
771
+ """Setup Phase 12 entities: Audio processing parameters (via local SDK)."""
772
+ rc = self.reachy_controller
773
+
774
+ def set_agc_enabled_with_save(enabled: bool) -> None:
775
+ """Set AGC enabled and save to preferences."""
776
+ rc.set_agc_enabled(enabled)
777
+ if hasattr(self.server, 'state') and self.server.state:
778
+ self.server.state.preferences.agc_enabled = enabled
779
+ self.server.state.save_preferences()
780
+ _LOGGER.debug("AGC enabled saved to preferences: %s", enabled)
781
+
782
+ def set_agc_max_gain_with_save(gain: float) -> None:
783
+ """Set AGC max gain and save to preferences."""
784
+ rc.set_agc_max_gain(gain)
785
+ if hasattr(self.server, 'state') and self.server.state:
786
+ self.server.state.preferences.agc_max_gain = gain
787
+ self.server.state.save_preferences()
788
+ _LOGGER.debug("AGC max gain saved to preferences: %.1f dB", gain)
789
+
790
+ def set_noise_suppression_with_save(level: float) -> None:
791
+ """Set noise suppression and save to preferences."""
792
+ rc.set_noise_suppression(level)
793
+ if hasattr(self.server, 'state') and self.server.state:
794
+ self.server.state.preferences.noise_suppression = level
795
+ self.server.state.save_preferences()
796
+ _LOGGER.debug("Noise suppression saved to preferences: %.1f%%", level)
797
+
798
+ entities.append(SwitchEntity(
799
+ server=self.server,
800
+ key=get_entity_key("agc_enabled"),
801
+ name="AGC Enabled",
802
+ object_id="agc_enabled",
803
+ icon="mdi:tune-vertical",
804
+ device_class="switch",
805
+ entity_category=1, # config
806
+ value_getter=rc.get_agc_enabled,
807
+ value_setter=set_agc_enabled_with_save,
808
+ ))
809
+
810
+ entities.append(NumberEntity(
811
+ server=self.server,
812
+ key=get_entity_key("agc_max_gain"),
813
+ name="AGC Max Gain",
814
+ object_id="agc_max_gain",
815
+ min_value=0.0,
816
+ max_value=40.0, # XVF3800 supports up to 40dB
817
+ step=1.0,
818
+ icon="mdi:volume-plus",
819
+ unit_of_measurement="dB",
820
+ mode=2,
821
+ entity_category=1, # config
822
+ value_getter=rc.get_agc_max_gain,
823
+ value_setter=set_agc_max_gain_with_save,
824
+ ))
825
+
826
+ entities.append(NumberEntity(
827
+ server=self.server,
828
+ key=get_entity_key("noise_suppression"),
829
+ name="Noise Suppression",
830
+ object_id="noise_suppression",
831
+ min_value=0.0,
832
+ max_value=100.0,
833
+ step=1.0,
834
+ icon="mdi:volume-off",
835
+ unit_of_measurement="%",
836
+ mode=2,
837
+ entity_category=1, # config
838
+ value_getter=rc.get_noise_suppression,
839
+ value_setter=set_noise_suppression_with_save,
840
+ ))
841
+
842
+ entities.append(BinarySensorEntity(
843
+ server=self.server,
844
+ key=get_entity_key("echo_cancellation_converged"),
845
+ name="Echo Cancellation Converged",
846
+ object_id="echo_cancellation_converged",
847
+ icon="mdi:waveform",
848
+ device_class="running",
849
+ entity_category=2, # diagnostic
850
+ value_getter=rc.get_echo_cancellation_converged,
851
+ ))
852
+
853
+ _LOGGER.debug(
854
+ "Phase 12 entities registered: agc_enabled, agc_max_gain, "
855
+ "noise_suppression, echo_cancellation_converged"
856
+ )
857
+
858
+ def _setup_phase21_entities(self, entities: List) -> None:
859
+ """Setup Phase 21 entities: Continuous conversation mode."""
860
+
861
+ def get_continuous_conversation() -> bool:
862
+ """Get current continuous conversation mode state."""
863
+ if hasattr(self.server, 'state') and self.server.state:
864
+ prefs = self.server.state.preferences
865
+ return getattr(prefs, 'continuous_conversation', False)
866
+ return False
867
+
868
+ def set_continuous_conversation(enabled: bool) -> None:
869
+ """Set continuous conversation mode and save to preferences."""
870
+ if hasattr(self.server, 'state') and self.server.state:
871
+ self.server.state.preferences.continuous_conversation = enabled
872
+ self.server.state.save_preferences()
873
+ _LOGGER.info("Continuous conversation mode %s", "enabled" if enabled else "disabled")
874
+
875
+ entities.append(SwitchEntity(
876
+ server=self.server,
877
+ key=get_entity_key("continuous_conversation"),
878
+ name="Continuous Conversation",
879
+ object_id="continuous_conversation",
880
+ icon="mdi:message-reply-text",
881
+ device_class="switch",
882
+ entity_category=1, # config
883
+ value_getter=get_continuous_conversation,
884
+ value_setter=set_continuous_conversation,
885
+ ))
886
+
887
+ _LOGGER.debug("Phase 21 entities registered: continuous_conversation")
888
+
889
+ def _setup_phase22_entities(self, entities: List) -> None:
890
+ """Setup Phase 22 entities: Gesture detection."""
891
+
892
+ def get_gesture() -> str:
893
+ """Get current detected gesture."""
894
+ if self.camera_server:
895
+ return self.camera_server.get_current_gesture()
896
+ return "none"
897
+
898
+ def get_gesture_confidence() -> float:
899
+ """Get gesture detection confidence."""
900
+ if self.camera_server:
901
+ return self.camera_server.get_gesture_confidence()
902
+ return 0.0
903
+
904
+ gesture_entity = TextSensorEntity(
905
+ server=self.server,
906
+ key=get_entity_key("gesture_detected"),
907
+ name="Gesture Detected",
908
+ object_id="gesture_detected",
909
+ icon="mdi:hand-wave",
910
+ value_getter=get_gesture,
911
+ )
912
+ entities.append(gesture_entity)
913
+ self._gesture_entity = gesture_entity
914
+
915
+ confidence_entity = SensorEntity(
916
+ server=self.server,
917
+ key=get_entity_key("gesture_confidence"),
918
+ name="Gesture Confidence",
919
+ object_id="gesture_confidence",
920
+ icon="mdi:percent",
921
+ unit_of_measurement="%",
922
+ accuracy_decimals=1,
923
+ state_class="measurement",
924
+ value_getter=get_gesture_confidence,
925
+ )
926
+ entities.append(confidence_entity)
927
+ self._gesture_confidence_entity = confidence_entity
928
+
929
+ _LOGGER.debug("Phase 22 entities registered: gesture_detected, gesture_confidence")
930
+
931
+ def update_gesture_state(self) -> None:
932
+ """Push gesture state update to Home Assistant."""
933
+ if hasattr(self, '_gesture_entity') and self._gesture_entity:
934
+ self._gesture_entity.update_state()
935
+ if hasattr(self, '_gesture_confidence_entity') and self._gesture_confidence_entity:
936
+ self._gesture_confidence_entity.update_state()
937
+
938
+ def find_entity_references(self, entities: List) -> None:
939
+ """Find and store references to special entities from existing list.
940
+
941
+ Args:
942
+ entities: The list of existing entities to search
943
+ """
944
+ # DOA entities are read-only sensors, no special references needed
945
+ pass
reachy_mini_ha_voice/gesture_detector.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Gesture detection using HaGRID ONNX models."""
2
+
3
+ from __future__ import annotations
4
+ import logging
5
+ from enum import Enum
6
+ from pathlib import Path
7
+ from typing import Optional, Tuple
8
+
9
+ import cv2
10
+ import numpy as np
11
+ from numpy.typing import NDArray
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class Gesture(Enum):
17
+ NONE = "no_gesture"
18
+ CALL = "call"
19
+ DISLIKE = "dislike"
20
+ FIST = "fist"
21
+ FOUR = "four"
22
+ LIKE = "like"
23
+ MUTE = "mute"
24
+ OK = "ok"
25
+ ONE = "one"
26
+ PALM = "palm"
27
+ PEACE = "peace"
28
+ PEACE_INVERTED = "peace_inverted"
29
+ ROCK = "rock"
30
+ STOP = "stop"
31
+ STOP_INVERTED = "stop_inverted"
32
+ THREE = "three"
33
+ THREE2 = "three2"
34
+ TWO_UP = "two_up"
35
+ TWO_UP_INVERTED = "two_up_inverted"
36
+
37
+
38
+ _GESTURE_CLASSES = [
39
+ 'hand_down', 'hand_right', 'hand_left', 'thumb_index', 'thumb_left',
40
+ 'thumb_right', 'thumb_down', 'half_up', 'half_left', 'half_right',
41
+ 'half_down', 'part_hand_heart', 'part_hand_heart2', 'fist_inverted',
42
+ 'two_left', 'two_right', 'two_down', 'grabbing', 'grip', 'point',
43
+ 'call', 'three3', 'little_finger', 'middle_finger', 'dislike', 'fist',
44
+ 'four', 'like', 'mute', 'ok', 'one', 'palm', 'peace', 'peace_inverted',
45
+ 'rock', 'stop', 'stop_inverted', 'three', 'three2', 'two_up',
46
+ 'two_up_inverted', 'three_gun', 'one_left', 'one_right', 'one_down'
47
+ ]
48
+
49
+ _NAME_TO_GESTURE = {
50
+ 'call': Gesture.CALL, 'dislike': Gesture.DISLIKE, 'fist': Gesture.FIST,
51
+ 'four': Gesture.FOUR, 'like': Gesture.LIKE, 'mute': Gesture.MUTE,
52
+ 'ok': Gesture.OK, 'one': Gesture.ONE, 'palm': Gesture.PALM,
53
+ 'peace': Gesture.PEACE, 'peace_inverted': Gesture.PEACE_INVERTED,
54
+ 'rock': Gesture.ROCK, 'stop': Gesture.STOP,
55
+ 'stop_inverted': Gesture.STOP_INVERTED, 'three': Gesture.THREE,
56
+ 'three2': Gesture.THREE2, 'two_up': Gesture.TWO_UP,
57
+ 'two_up_inverted': Gesture.TWO_UP_INVERTED,
58
+ }
59
+
60
+
61
+ class GestureDetector:
62
+ def __init__(self, confidence_threshold: float = 0.3, detection_threshold: float = 0.3):
63
+ self._confidence_threshold = confidence_threshold
64
+ self._detection_threshold = detection_threshold
65
+ models_dir = Path(__file__).parent / "models"
66
+ self._detector_path = models_dir / "hand_detector.onnx"
67
+ self._classifier_path = models_dir / "crops_classifier.onnx"
68
+ self._detector = None
69
+ self._classifier = None
70
+ self._available = False
71
+ self._mean = np.array([127, 127, 127], dtype=np.float32)
72
+ self._std = np.array([128, 128, 128], dtype=np.float32)
73
+ self._detector_size = (320, 240)
74
+ self._classifier_size = (128, 128)
75
+ self._load_models()
76
+
77
+ def _load_models(self) -> None:
78
+ try:
79
+ import onnxruntime as ort
80
+ except ImportError:
81
+ logger.warning("onnxruntime not installed")
82
+ return
83
+ if not self._detector_path.exists() or not self._classifier_path.exists():
84
+ logger.warning("Model files not found")
85
+ return
86
+ try:
87
+ providers = ['CPUExecutionProvider']
88
+ logger.info("Loading gesture models...")
89
+ self._detector = ort.InferenceSession(str(self._detector_path), providers=providers)
90
+ self._classifier = ort.InferenceSession(str(self._classifier_path), providers=providers)
91
+ self._det_input = self._detector.get_inputs()[0].name
92
+ self._det_outputs = [o.name for o in self._detector.get_outputs()]
93
+ self._cls_input = self._classifier.get_inputs()[0].name
94
+ self._available = True
95
+ logger.info("Gesture detection ready")
96
+ except Exception as e:
97
+ logger.error("Failed to load models: %s", e)
98
+
99
+ @property
100
+ def is_available(self) -> bool:
101
+ return self._available
102
+
103
+ def _preprocess(self, frame: NDArray, size: Tuple[int, int]) -> NDArray:
104
+ img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
105
+ img = cv2.resize(img, size)
106
+ img = (img.astype(np.float32) - self._mean) / self._std
107
+ img = np.transpose(img, [2, 0, 1])
108
+ return np.expand_dims(img, axis=0)
109
+
110
+ def _detect_hand(self, frame: NDArray) -> Optional[Tuple[int, int, int, int, float]]:
111
+ if self._detector is None:
112
+ return None
113
+ h, w = frame.shape[:2]
114
+ inp = self._preprocess(frame, self._detector_size)
115
+ outs = self._detector.run(self._det_outputs, {self._det_input: inp})
116
+ boxes = outs[0]
117
+ scores = outs[2]
118
+ if len(boxes) == 0:
119
+ return None
120
+ best_i, best_c = -1, self._detection_threshold
121
+ for i, c in enumerate(scores):
122
+ if c > best_c:
123
+ best_c, best_i = float(c), i
124
+ if best_i < 0:
125
+ return None
126
+ b = boxes[best_i]
127
+ # Model outputs normalized coordinates (0-1), scale to original frame size
128
+ x1, y1 = int(b[0] * w), int(b[1] * h)
129
+ x2, y2 = int(b[2] * w), int(b[3] * h)
130
+ x1, y1 = max(0, x1), max(0, y1)
131
+ x2, y2 = min(w-1, x2), min(h-1, y2)
132
+ if x2 <= x1 or y2 <= y1:
133
+ return None
134
+ return (x1, y1, x2, y2, best_c)
135
+
136
+ def _get_square_crop(self, frame: NDArray, box: Tuple[int, int, int, int]) -> NDArray:
137
+ h, w = frame.shape[:2]
138
+ x1, y1, x2, y2 = box
139
+ bw, bh = x2 - x1, y2 - y1
140
+ if bh < bw:
141
+ y1, y2 = y1 - (bw - bh) // 2, y1 - (bw - bh) // 2 + bw
142
+ elif bh > bw:
143
+ x1, x2 = x1 - (bh - bw) // 2, x1 - (bh - bw) // 2 + bh
144
+ x1, y1 = max(0, x1), max(0, y1)
145
+ x2, y2 = min(w-1, x2), min(h-1, y2)
146
+ return frame[y1:y2, x1:x2]
147
+
148
+ def _classify(self, crop: NDArray) -> Tuple[Gesture, float]:
149
+ if self._classifier is None or crop.size == 0:
150
+ return Gesture.NONE, 0.0
151
+ inp = self._preprocess(crop, self._classifier_size)
152
+ logits = self._classifier.run(None, {self._cls_input: inp})[0][0]
153
+ idx = int(np.argmax(logits))
154
+ exp_l = np.exp(logits - np.max(logits))
155
+ conf = float(exp_l[idx] / np.sum(exp_l))
156
+ if idx >= len(_GESTURE_CLASSES) or conf < self._confidence_threshold:
157
+ return Gesture.NONE, conf
158
+ name = _GESTURE_CLASSES[idx]
159
+ return _NAME_TO_GESTURE.get(name, Gesture.NONE), conf
160
+
161
+ def detect(self, frame: NDArray) -> Tuple[Gesture, float]:
162
+ if not self._available:
163
+ return Gesture.NONE, 0.0
164
+ try:
165
+ det = self._detect_hand(frame)
166
+ if det is None:
167
+ return Gesture.NONE, 0.0
168
+ x1, y1, x2, y2, det_c = det
169
+ logger.debug("Hand: box=(%d,%d,%d,%d) conf=%.2f", x1, y1, x2, y2, det_c)
170
+ crop = self._get_square_crop(frame, (x1, y1, x2, y2))
171
+ if crop.size == 0:
172
+ return Gesture.NONE, 0.0
173
+ gest, cls_c = self._classify(crop)
174
+ if gest != Gesture.NONE:
175
+ logger.debug("Gesture: %s (det=%.2f cls=%.2f)", gest.value, det_c, cls_c)
176
+ return gest, det_c * cls_c
177
+ except Exception as e:
178
+ logger.warning("Gesture error: %s", e)
179
+ return Gesture.NONE, 0.0
180
+
181
+ def close(self) -> None:
182
+ self._detector = self._classifier = None
183
+ self._available = False
{reachy_mini_home_assistant/vision → reachy_mini_ha_voice}/head_tracker.py RENAMED
@@ -3,35 +3,24 @@
3
  Ported from reachy_mini_conversation_app for voice assistant integration.
4
  Model is loaded at initialization time (not lazy) to ensure face tracking
5
  is ready immediately when the camera server starts.
6
-
7
- Performance Optimizations:
8
- - Optional frame downscaling for faster inference on low-power devices
9
- - Frame skip support for reduced CPU usage when tracking is stable
10
- - Configurable inference resolution (default: native resolution)
11
  """
12
 
13
  from __future__ import annotations
14
-
15
  import logging
16
- from typing import TYPE_CHECKING
17
 
18
  import numpy as np
 
19
 
20
- if TYPE_CHECKING:
21
- from numpy.typing import NDArray
22
 
23
  logger = logging.getLogger(__name__)
24
 
25
 
26
  class HeadTracker:
27
  """Lightweight head tracker using YOLO for face detection.
28
-
29
  Model is loaded at initialization time to ensure face tracking
30
  is ready immediately (matching conversation_app behavior).
31
-
32
- Performance Features:
33
- - Frame downscaling: Reduces inference resolution for ~4x speedup
34
- - Frame skipping: Reuses last detection result for stable tracking
35
  """
36
 
37
  def __init__(
@@ -40,7 +29,6 @@ class HeadTracker:
40
  model_filename: str = "model.pt",
41
  confidence_threshold: float = 0.3,
42
  device: str = "cpu",
43
- inference_scale: float = 1.0, # Scale factor for inference (0.5 = half resolution)
44
  ) -> None:
45
  """Initialize YOLO-based head tracker.
46
 
@@ -49,7 +37,6 @@ class HeadTracker:
49
  model_filename: Model file name
50
  confidence_threshold: Minimum confidence for face detection
51
  device: Device to run inference on ('cpu' or 'cuda')
52
- inference_scale: Scale factor for inference (0.5 = half res for ~4x speedup)
53
  """
54
  self.confidence_threshold = confidence_threshold
55
  self.model = None
@@ -58,57 +45,57 @@ class HeadTracker:
58
  self._device = device
59
  self._detections_class = None
60
  self._model_load_attempted = False
61
- self._model_load_error: str | None = None
62
-
63
- # Performance optimization settings
64
- self._inference_scale = min(1.0, max(0.25, inference_scale))
65
-
66
- # Frame skip support for stable tracking
67
- self._last_detection: tuple[NDArray, float] | None = None
68
- self._frames_since_detection = 0
69
- self._max_skip_frames = 0 # 0 = no skipping (can be set externally)
70
-
71
  # Load model immediately at init (not lazy)
72
  self._load_model()
73
 
74
  def _load_model(self) -> None:
75
- """Load YOLO model for face detection."""
76
  if self._model_load_attempted:
77
  return
78
-
79
  self._model_load_attempted = True
80
-
81
  try:
82
- from pathlib import Path
83
-
84
- from supervision import Detections
85
  from ultralytics import YOLO
86
-
 
 
 
87
  self._detections_class = Detections
88
-
89
- # Load local model from models directory
90
- models_dir = Path(__file__).resolve().parents[1] / "models"
91
- local_model_path = models_dir / self._model_filename
92
-
93
- if not local_model_path.exists():
94
- raise FileNotFoundError(
95
- f"Model file not found: {local_model_path}. "
96
- f"Please place {self._model_filename} in the models directory."
97
- )
98
-
99
- model_path = str(local_model_path)
100
- logger.info("Loading local YOLO model: %s", model_path)
101
-
 
 
 
 
 
 
 
 
 
 
 
 
102
  self.model = YOLO(model_path).to(self._device)
103
- logger.info("YOLO face detection model loaded successfully")
104
  except ImportError as e:
105
  self._model_load_error = f"Missing dependencies: {e}"
106
  logger.warning("Face tracking disabled - missing dependencies: %s", e)
107
  self.model = None
108
- except FileNotFoundError as e:
109
- self._model_load_error = str(e)
110
- logger.error("Failed to load YOLO model: %s", e)
111
- self.model = None
112
  except Exception as e:
113
  self._model_load_error = str(e)
114
  logger.error("Failed to load YOLO model: %s", e)
@@ -119,7 +106,7 @@ class HeadTracker:
119
  """Check if the head tracker is available and ready."""
120
  return self.model is not None and self._detections_class is not None
121
 
122
- def _select_best_face(self, detections) -> int | None:
123
  """Select the best face based on confidence and area.
124
 
125
  Args:
@@ -152,7 +139,9 @@ class HeadTracker:
152
  best_idx = valid_indices[np.argmax(scores)]
153
  return int(best_idx)
154
 
155
- def _bbox_to_normalized_coords(self, bbox: NDArray[np.float32], w: int, h: int) -> NDArray[np.float32]:
 
 
156
  """Convert bounding box center to normalized coordinates [-1, 1].
157
 
158
  Args:
@@ -172,7 +161,9 @@ class HeadTracker:
172
 
173
  return np.array([norm_x, norm_y], dtype=np.float32)
174
 
175
- def get_head_position(self, img: NDArray[np.uint8]) -> tuple[NDArray[np.float32] | None, float | None]:
 
 
176
  """Get head position from face detection.
177
 
178
  Args:
@@ -186,36 +177,14 @@ class HeadTracker:
186
 
187
  h, w = img.shape[:2]
188
 
189
- # Frame skip optimization: return last detection if within skip limit
190
- if (
191
- self._max_skip_frames > 0
192
- and self._last_detection is not None
193
- and self._frames_since_detection < self._max_skip_frames
194
- ):
195
- self._frames_since_detection += 1
196
- return self._last_detection
197
-
198
  try:
199
- # Downscale image for faster inference if scale < 1.0
200
- if self._inference_scale < 1.0:
201
- import cv2
202
-
203
- new_w = int(w * self._inference_scale)
204
- new_h = int(h * self._inference_scale)
205
- inference_img = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
206
- else:
207
- inference_img = img
208
- new_w, new_h = w, h
209
-
210
  # Run YOLO inference
211
- results = self.model(inference_img, verbose=False)
212
  detections = self._detections_class.from_ultralytics(results[0])
213
 
214
  # Select best face
215
  face_idx = self._select_best_face(detections)
216
  if face_idx is None:
217
- self._last_detection = None
218
- self._frames_since_detection = 0
219
  return None, None
220
 
221
  bbox = detections.xyxy[face_idx]
@@ -223,90 +192,11 @@ class HeadTracker:
223
  if detections.confidence is not None:
224
  confidence = float(detections.confidence[face_idx])
225
 
226
- # Scale bbox back to original resolution if downscaled
227
- if self._inference_scale < 1.0:
228
- scale_factor = 1.0 / self._inference_scale
229
- bbox = bbox * scale_factor
230
-
231
- # Get face center in [-1, 1] coordinates (using original dimensions)
232
  face_center = self._bbox_to_normalized_coords(bbox, w, h)
233
 
234
- # Cache result for frame skipping
235
- self._last_detection = (face_center, confidence)
236
- self._frames_since_detection = 0
237
-
238
  return face_center, confidence
239
 
240
  except Exception as e:
241
  logger.debug("Error in head position detection: %s", e)
242
  return None, None
243
-
244
- def set_inference_scale(self, scale: float) -> None:
245
- """Set the inference resolution scale factor.
246
-
247
- Args:
248
- scale: Scale factor (0.25 to 1.0). Lower = faster but less accurate.
249
- """
250
- self._inference_scale = min(1.0, max(0.25, scale))
251
- logger.debug("Inference scale set to %.2f", self._inference_scale)
252
-
253
- def set_max_skip_frames(self, skip: int) -> None:
254
- """Set maximum frames to skip between detections.
255
-
256
- Args:
257
- skip: Number of frames to skip (0 = no skipping).
258
- Higher values reduce CPU but may cause tracking lag.
259
- """
260
- self._max_skip_frames = max(0, skip)
261
- logger.debug("Max skip frames set to %d", self._max_skip_frames)
262
-
263
- def clear_detection_cache(self) -> None:
264
- """Clear cached detection result."""
265
- self._last_detection = None
266
- self._frames_since_detection = 0
267
-
268
- def suspend(self) -> None:
269
- """Suspend the head tracker to release YOLO model from memory.
270
-
271
- Call resume() to reload the model.
272
- """
273
- if self.model is None:
274
- logger.debug("HeadTracker model not loaded, nothing to suspend")
275
- return
276
-
277
- logger.info("Suspending HeadTracker - releasing YOLO model...")
278
-
279
- try:
280
- # Release YOLO model from memory
281
- del self.model
282
- self.model = None
283
-
284
- # Also clear the detections class reference
285
- self._detections_class = None
286
-
287
- # Reset load state so resume can reload
288
- self._model_load_attempted = False
289
- self._model_load_error = None
290
-
291
- # Clear detection cache
292
- self.clear_detection_cache()
293
-
294
- logger.info("HeadTracker suspended - YOLO model released")
295
- except Exception as e:
296
- logger.warning("Error suspending HeadTracker: %s", e)
297
-
298
- def resume(self) -> None:
299
- """Resume the head tracker by reloading the YOLO model."""
300
- if self.model is not None:
301
- logger.debug("HeadTracker model already loaded")
302
- return
303
-
304
- logger.info("Resuming HeadTracker - reloading YOLO model...")
305
-
306
- # Reload the model
307
- self._load_model()
308
-
309
- if self.is_available:
310
- logger.info("HeadTracker resumed - YOLO model loaded")
311
- else:
312
- logger.warning("HeadTracker resume failed - model not available")
 
3
  Ported from reachy_mini_conversation_app for voice assistant integration.
4
  Model is loaded at initialization time (not lazy) to ensure face tracking
5
  is ready immediately when the camera server starts.
 
 
 
 
 
6
  """
7
 
8
  from __future__ import annotations
 
9
  import logging
10
+ from typing import Tuple, Optional
11
 
12
  import numpy as np
13
+ from numpy.typing import NDArray
14
 
 
 
15
 
16
  logger = logging.getLogger(__name__)
17
 
18
 
19
  class HeadTracker:
20
  """Lightweight head tracker using YOLO for face detection.
21
+
22
  Model is loaded at initialization time to ensure face tracking
23
  is ready immediately (matching conversation_app behavior).
 
 
 
 
24
  """
25
 
26
  def __init__(
 
29
  model_filename: str = "model.pt",
30
  confidence_threshold: float = 0.3,
31
  device: str = "cpu",
 
32
  ) -> None:
33
  """Initialize YOLO-based head tracker.
34
 
 
37
  model_filename: Model file name
38
  confidence_threshold: Minimum confidence for face detection
39
  device: Device to run inference on ('cpu' or 'cuda')
 
40
  """
41
  self.confidence_threshold = confidence_threshold
42
  self.model = None
 
45
  self._device = device
46
  self._detections_class = None
47
  self._model_load_attempted = False
48
+ self._model_load_error: Optional[str] = None
49
+
 
 
 
 
 
 
 
 
50
  # Load model immediately at init (not lazy)
51
  self._load_model()
52
 
53
  def _load_model(self) -> None:
54
+ """Load YOLO model with retry logic."""
55
  if self._model_load_attempted:
56
  return
57
+
58
  self._model_load_attempted = True
59
+
60
  try:
 
 
 
61
  from ultralytics import YOLO
62
+ from supervision import Detections
63
+ from huggingface_hub import hf_hub_download
64
+ import time
65
+
66
  self._detections_class = Detections
67
+
68
+ # Download with retries
69
+ max_retries = 3
70
+ retry_delay = 5
71
+ model_path = None
72
+ last_error = None
73
+
74
+ for attempt in range(max_retries):
75
+ try:
76
+ model_path = hf_hub_download(
77
+ repo_id=self._model_repo,
78
+ filename=self._model_filename,
79
+ )
80
+ break
81
+ except Exception as e:
82
+ last_error = e
83
+ if attempt < max_retries - 1:
84
+ logger.warning(
85
+ "Model download failed (attempt %d/%d): %s. Retrying in %ds...",
86
+ attempt + 1, max_retries, e, retry_delay
87
+ )
88
+ time.sleep(retry_delay)
89
+
90
+ if model_path is None:
91
+ raise last_error
92
+
93
  self.model = YOLO(model_path).to(self._device)
94
+ logger.info("YOLO face detection model loaded")
95
  except ImportError as e:
96
  self._model_load_error = f"Missing dependencies: {e}"
97
  logger.warning("Face tracking disabled - missing dependencies: %s", e)
98
  self.model = None
 
 
 
 
99
  except Exception as e:
100
  self._model_load_error = str(e)
101
  logger.error("Failed to load YOLO model: %s", e)
 
106
  """Check if the head tracker is available and ready."""
107
  return self.model is not None and self._detections_class is not None
108
 
109
+ def _select_best_face(self, detections) -> Optional[int]:
110
  """Select the best face based on confidence and area.
111
 
112
  Args:
 
139
  best_idx = valid_indices[np.argmax(scores)]
140
  return int(best_idx)
141
 
142
+ def _bbox_to_normalized_coords(
143
+ self, bbox: NDArray[np.float32], w: int, h: int
144
+ ) -> NDArray[np.float32]:
145
  """Convert bounding box center to normalized coordinates [-1, 1].
146
 
147
  Args:
 
161
 
162
  return np.array([norm_x, norm_y], dtype=np.float32)
163
 
164
+ def get_head_position(
165
+ self, img: NDArray[np.uint8]
166
+ ) -> Tuple[Optional[NDArray[np.float32]], Optional[float]]:
167
  """Get head position from face detection.
168
 
169
  Args:
 
177
 
178
  h, w = img.shape[:2]
179
 
 
 
 
 
 
 
 
 
 
180
  try:
 
 
 
 
 
 
 
 
 
 
 
181
  # Run YOLO inference
182
+ results = self.model(img, verbose=False)
183
  detections = self._detections_class.from_ultralytics(results[0])
184
 
185
  # Select best face
186
  face_idx = self._select_best_face(detections)
187
  if face_idx is None:
 
 
188
  return None, None
189
 
190
  bbox = detections.xyxy[face_idx]
 
192
  if detections.confidence is not None:
193
  confidence = float(detections.confidence[face_idx])
194
 
195
+ # Get face center in [-1, 1] coordinates
 
 
 
 
 
196
  face_center = self._bbox_to_normalized_coords(bbox, w, h)
197
 
 
 
 
 
198
  return face_center, confidence
199
 
200
  except Exception as e:
201
  logger.debug("Error in head position detection: %s", e)
202
  return None, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
{reachy_mini_home_assistant → reachy_mini_ha_voice}/main.py RENAMED
@@ -7,14 +7,48 @@ with Home Assistant via ESPHome protocol for voice control.
7
 
8
  import asyncio
9
  import logging
10
- import sys
11
  import threading
 
 
12
 
13
- from reachy_mini import ReachyMiniApp
14
 
15
- from .voice_assistant import VoiceAssistantService
16
 
17
- logger = logging.getLogger(__name__)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
 
20
  class ReachyMiniHaVoice(ReachyMiniApp):
@@ -27,40 +61,60 @@ class ReachyMiniHaVoice(ReachyMiniApp):
27
  """
28
 
29
  # No custom web UI needed - configuration is automatic via Home Assistant
30
- custom_app_url: str | None = None
31
 
32
  def __init__(self, *args, **kwargs):
33
  """Initialize the app."""
34
  super().__init__(*args, **kwargs)
35
- self.stop_event = threading.Event()
 
36
 
37
  def wrapped_run(self, *args, **kwargs) -> None:
38
  """
39
- Override wrapped_run to handle Reachy Mini connection failures.
 
 
40
  """
41
  logger.info("Starting Reachy Mini HA Voice App...")
42
 
43
- # Connect to ReachyMini
44
- try:
45
- logger.info("Attempting to connect to Reachy Mini...")
46
- super().wrapped_run(*args, **kwargs)
47
- except TimeoutError as e:
48
- logger.error(f"Timeout connecting to Reachy Mini: {e}")
49
- sys.exit(1)
50
- except Exception as e:
51
- error_str = str(e)
52
- if "Unable to connect" in error_str or "Timeout" in error_str:
53
- logger.error(f"Failed to connect to Reachy Mini: {e}")
54
- sys.exit(1)
55
- else:
56
- raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
  def run(self, reachy_mini, stop_event: threading.Event) -> None:
59
  """
60
  Main application entry point.
61
 
62
  Args:
63
- reachy_mini: The Reachy Mini robot instance (required, cannot be None)
64
  stop_event: Event to signal graceful shutdown
65
  """
66
  logger.info("Starting Reachy Mini for Home Assistant...")
@@ -82,8 +136,12 @@ class ReachyMiniHaVoice(ReachyMiniApp):
82
  logger.info("ESPHome Server: 0.0.0.0:6053")
83
  logger.info("Camera Server: 0.0.0.0:8081")
84
  logger.info("Wake word: Okay Nabu")
85
- logger.info("Motion control: enabled")
86
- logger.info("Camera: enabled (Reachy Mini)")
 
 
 
 
87
  logger.info("=" * 50)
88
  logger.info("To connect from Home Assistant:")
89
  logger.info(" Settings -> Devices & Services -> Add Integration")
@@ -120,19 +178,13 @@ class ReachyMiniHaVoice(ReachyMiniApp):
120
  logger.info("Reachy Mini HA stopped.")
121
 
122
 
123
- # This is called when running as: python -m reachy_mini_home_assistant.main
124
  if __name__ == "__main__":
125
  logging.basicConfig(
126
  level=logging.INFO,
127
  format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
128
  )
129
 
130
- # Reduce verbosity for some noisy modules
131
- logging.getLogger("reachy_mini.media.media_manager").setLevel(logging.WARNING)
132
- logging.getLogger("reachy_mini.media.camera_base").setLevel(logging.WARNING)
133
- logging.getLogger("reachy_mini.media.audio_base").setLevel(logging.WARNING)
134
- logging.getLogger("matplotlib").setLevel(logging.WARNING)
135
-
136
  app = ReachyMiniHaVoice()
137
  try:
138
  app.wrapped_run()
 
7
 
8
  import asyncio
9
  import logging
10
+ import socket
11
  import threading
12
+ import time
13
+ from typing import Optional
14
 
15
+ logger = logging.getLogger(__name__)
16
 
 
17
 
18
+ def _check_zenoh_available(timeout: float = 1.0) -> bool:
19
+ """Check if Zenoh service is available."""
20
+ try:
21
+ with socket.create_connection(("127.0.0.1", 7447), timeout=timeout):
22
+ return True
23
+ except (socket.timeout, ConnectionRefusedError, OSError):
24
+ return False
25
+
26
+
27
+ # Only import ReachyMiniApp if we're running as an app
28
+ try:
29
+ from reachy_mini import ReachyMini, ReachyMiniApp
30
+ REACHY_MINI_AVAILABLE = True
31
+ except ImportError:
32
+ REACHY_MINI_AVAILABLE = False
33
+
34
+ # Create a dummy base class
35
+ class ReachyMiniApp:
36
+ custom_app_url = None
37
+
38
+ def __init__(self):
39
+ self.stop_event = threading.Event()
40
+
41
+ def wrapped_run(self, *args, **kwargs):
42
+ pass
43
+
44
+ def stop(self):
45
+ self.stop_event.set()
46
+
47
+ ReachyMini = None
48
+
49
+
50
+ from .voice_assistant import VoiceAssistantService
51
+ from .motion import ReachyMiniMotion
52
 
53
 
54
  class ReachyMiniHaVoice(ReachyMiniApp):
 
61
  """
62
 
63
  # No custom web UI needed - configuration is automatic via Home Assistant
64
+ custom_app_url: Optional[str] = None
65
 
66
  def __init__(self, *args, **kwargs):
67
  """Initialize the app."""
68
  super().__init__(*args, **kwargs)
69
+ if not hasattr(self, 'stop_event'):
70
+ self.stop_event = threading.Event()
71
 
72
  def wrapped_run(self, *args, **kwargs) -> None:
73
  """
74
+ Override wrapped_run to handle Zenoh connection failures gracefully.
75
+
76
+ If Zenoh is not available, run in standalone mode without robot control.
77
  """
78
  logger.info("Starting Reachy Mini HA Voice App...")
79
 
80
+ # Check if Zenoh is available before trying to connect
81
+ if not _check_zenoh_available():
82
+ logger.warning("Zenoh service not available (port 7447)")
83
+ logger.info("Running in standalone mode without robot control")
84
+ self._run_standalone()
85
+ return
86
+
87
+ # Zenoh is available, try normal startup with ReachyMini
88
+ if REACHY_MINI_AVAILABLE:
89
+ try:
90
+ logger.info("Attempting to connect to Reachy Mini...")
91
+ super().wrapped_run(*args, **kwargs)
92
+ except TimeoutError as e:
93
+ logger.warning(f"Timeout connecting to Reachy Mini: {e}")
94
+ logger.info("Falling back to standalone mode")
95
+ self._run_standalone()
96
+ except Exception as e:
97
+ error_str = str(e)
98
+ if "Unable to connect" in error_str or "ZError" in error_str or "Timeout" in error_str:
99
+ logger.warning(f"Failed to connect to Reachy Mini: {e}")
100
+ logger.info("Falling back to standalone mode")
101
+ self._run_standalone()
102
+ else:
103
+ raise
104
+ else:
105
+ logger.info("Reachy Mini SDK not available, running standalone")
106
+ self._run_standalone()
107
+
108
+ def _run_standalone(self) -> None:
109
+ """Run in standalone mode without robot."""
110
+ self.run(None, self.stop_event)
111
 
112
  def run(self, reachy_mini, stop_event: threading.Event) -> None:
113
  """
114
  Main application entry point.
115
 
116
  Args:
117
+ reachy_mini: The Reachy Mini robot instance (can be None)
118
  stop_event: Event to signal graceful shutdown
119
  """
120
  logger.info("Starting Reachy Mini for Home Assistant...")
 
136
  logger.info("ESPHome Server: 0.0.0.0:6053")
137
  logger.info("Camera Server: 0.0.0.0:8081")
138
  logger.info("Wake word: Okay Nabu")
139
+ if reachy_mini:
140
+ logger.info("Motion control: enabled")
141
+ logger.info("Camera: enabled (Reachy Mini)")
142
+ else:
143
+ logger.info("Motion control: disabled (no robot)")
144
+ logger.info("Camera: test pattern (no robot)")
145
  logger.info("=" * 50)
146
  logger.info("To connect from Home Assistant:")
147
  logger.info(" Settings -> Devices & Services -> Add Integration")
 
178
  logger.info("Reachy Mini HA stopped.")
179
 
180
 
181
+ # This is called when running as: python -m reachy_mini_ha_voice.main
182
  if __name__ == "__main__":
183
  logging.basicConfig(
184
  level=logging.INFO,
185
  format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
186
  )
187
 
 
 
 
 
 
 
188
  app = ReachyMiniHaVoice()
189
  try:
190
  app.wrapped_run()
{reachy_mini_home_assistant → reachy_mini_ha_voice}/models.py RENAMED
@@ -5,18 +5,15 @@ import logging
5
  from dataclasses import asdict, dataclass, field
6
  from enum import Enum
7
  from pathlib import Path
8
- from typing import TYPE_CHECKING
 
9
 
10
  if TYPE_CHECKING:
11
- import threading
12
- from queue import Queue
13
-
14
  from pymicro_wakeword import MicroWakeWord
15
  from pyopen_wakeword import OpenWakeWord
16
-
17
- from .audio.audio_player import AudioPlayer
18
- from .entities.entity import ESPHomeEntity, MediaPlayerEntity
19
- from .protocol.satellite import VoiceSatelliteProtocol
20
 
21
  _LOGGER = logging.getLogger(__name__)
22
 
@@ -31,21 +28,18 @@ class AvailableWakeWord:
31
  id: str
32
  type: WakeWordType
33
  wake_word: str
34
- trained_languages: list[str]
35
  wake_word_path: Path
36
- probability_cutoff: float = 0.7
37
 
38
- def load(self) -> "MicroWakeWord | OpenWakeWord":
39
  if self.type == WakeWordType.MICRO_WAKE_WORD:
40
  from pymicro_wakeword import MicroWakeWord
41
-
42
  return MicroWakeWord.from_config(config_path=self.wake_word_path)
43
 
44
  if self.type == WakeWordType.OPEN_WAKE_WORD:
45
  from pyopen_wakeword import OpenWakeWord
46
-
47
  oww_model = OpenWakeWord.from_model(model_path=self.wake_word_path)
48
- oww_model.wake_word = self.wake_word
49
  return oww_model
50
 
51
  raise ValueError(f"Unexpected wake word type: {self.type}")
@@ -53,34 +47,25 @@ class AvailableWakeWord:
53
 
54
  @dataclass
55
  class Preferences:
56
- active_wake_words: list[str] = field(default_factory=list)
 
 
 
 
57
  # Continuous conversation mode (controlled from Home Assistant)
58
  continuous_conversation: bool = False
59
- # Unified idle behavior toggle (controlled from Home Assistant)
60
- idle_behavior_enabled: bool = False
61
- # Sendspin discovery and playback toggle (controlled from Home Assistant)
62
- sendspin_enabled: bool = False
63
- # Vision toggles and parameters (controlled from Home Assistant)
64
- face_tracking_enabled: bool = False
65
- gesture_detection_enabled: bool = False
66
- face_confidence_threshold: float = 0.5
67
-
68
- def set_idle_behavior_enabled(self, enabled: bool) -> None:
69
- """Update the unified idle behavior toggle."""
70
- self.idle_behavior_enabled = enabled
71
 
72
 
73
  @dataclass
74
  class ServerState:
75
  """Global server state."""
76
-
77
  name: str
78
  mac_address: str
79
- audio_queue: "Queue[bytes | None]"
80
- entities: "list[ESPHomeEntity]"
81
- available_wake_words: "dict[str, AvailableWakeWord]"
82
- wake_words: "dict[str, MicroWakeWord | OpenWakeWord]"
83
- active_wake_words: set[str]
84
  stop_word: "MicroWakeWord"
85
  music_player: "AudioPlayer"
86
  tts_player: "AudioPlayer"
@@ -91,88 +76,20 @@ class ServerState:
91
  download_dir: Path
92
 
93
  # Reachy Mini specific
94
- reachy_mini: object
95
  motion_enabled: bool = True
96
- motion: object | None = None # ReachyMiniMotion instance
97
 
98
- media_player_entity: "MediaPlayerEntity | None" = None
99
- satellite: "VoiceSatelliteProtocol | None" = None
100
  wake_words_changed: bool = False
101
  refractory_seconds: float = 2.0
102
- timer_max_ring_seconds: float = 900.0
103
- _entities_initialized: bool = False
104
-
105
- _services_suspended: bool = False
106
-
107
- # Mute state (controlled from Home Assistant) - thread-safe via properties
108
- _is_muted: bool = False
109
-
110
- # Camera state (controlled from Home Assistant) - thread-safe via properties
111
- _camera_enabled: bool = True
112
-
113
- # Thread safety
114
- _state_lock: "threading.Lock | None" = None
115
-
116
- def __post_init__(self):
117
- """Initialize state lock after dataclass creation."""
118
- import threading
119
-
120
- object.__setattr__(self, "_state_lock", threading.Lock())
121
-
122
- @property
123
- def services_suspended(self) -> bool:
124
- """Thread-safe getter for services_suspended."""
125
- if self._state_lock is None:
126
- return self._services_suspended
127
- with self._state_lock:
128
- return self._services_suspended
129
-
130
- @services_suspended.setter
131
- def services_suspended(self, value: bool) -> None:
132
- """Thread-safe setter for services_suspended."""
133
- if self._state_lock is None:
134
- object.__setattr__(self, "_services_suspended", value)
135
- else:
136
- with self._state_lock:
137
- object.__setattr__(self, "_services_suspended", value)
138
-
139
- @property
140
- def is_muted(self) -> bool:
141
- """Thread-safe getter for is_muted."""
142
- if self._state_lock is None:
143
- return self._is_muted
144
- with self._state_lock:
145
- return self._is_muted
146
-
147
- @is_muted.setter
148
- def is_muted(self, value: bool) -> None:
149
- """Thread-safe setter for is_muted."""
150
- if self._state_lock is None:
151
- object.__setattr__(self, "_is_muted", value)
152
- else:
153
- with self._state_lock:
154
- object.__setattr__(self, "_is_muted", value)
155
-
156
- @property
157
- def camera_enabled(self) -> bool:
158
- """Thread-safe getter for camera_enabled."""
159
- if self._state_lock is None:
160
- return self._camera_enabled
161
- with self._state_lock:
162
- return self._camera_enabled
163
-
164
- @camera_enabled.setter
165
- def camera_enabled(self, value: bool) -> None:
166
- """Thread-safe setter for camera_enabled."""
167
- if self._state_lock is None:
168
- object.__setattr__(self, "_camera_enabled", value)
169
- else:
170
- with self._state_lock:
171
- object.__setattr__(self, "_camera_enabled", value)
172
 
173
  def save_preferences(self) -> None:
174
  """Save preferences as JSON."""
175
  _LOGGER.debug("Saving preferences: %s", self.preferences_path)
176
  self.preferences_path.parent.mkdir(parents=True, exist_ok=True)
177
  with open(self.preferences_path, "w", encoding="utf-8") as preferences_file:
178
- json.dump(asdict(self.preferences), preferences_file, ensure_ascii=False, indent=4)
 
 
 
5
  from dataclasses import asdict, dataclass, field
6
  from enum import Enum
7
  from pathlib import Path
8
+ from queue import Queue
9
+ from typing import TYPE_CHECKING, Dict, List, Optional, Set, Union
10
 
11
  if TYPE_CHECKING:
 
 
 
12
  from pymicro_wakeword import MicroWakeWord
13
  from pyopen_wakeword import OpenWakeWord
14
+ from .entity import ESPHomeEntity, MediaPlayerEntity
15
+ from .audio_player import AudioPlayer
16
+ from .satellite import VoiceSatelliteProtocol
 
17
 
18
  _LOGGER = logging.getLogger(__name__)
19
 
 
28
  id: str
29
  type: WakeWordType
30
  wake_word: str
31
+ trained_languages: List[str]
32
  wake_word_path: Path
 
33
 
34
+ def load(self) -> "Union[MicroWakeWord, OpenWakeWord]":
35
  if self.type == WakeWordType.MICRO_WAKE_WORD:
36
  from pymicro_wakeword import MicroWakeWord
 
37
  return MicroWakeWord.from_config(config_path=self.wake_word_path)
38
 
39
  if self.type == WakeWordType.OPEN_WAKE_WORD:
40
  from pyopen_wakeword import OpenWakeWord
 
41
  oww_model = OpenWakeWord.from_model(model_path=self.wake_word_path)
42
+ setattr(oww_model, "wake_word", self.wake_word)
43
  return oww_model
44
 
45
  raise ValueError(f"Unexpected wake word type: {self.type}")
 
47
 
48
  @dataclass
49
  class Preferences:
50
+ active_wake_words: List[str] = field(default_factory=list)
51
+ # Audio processing settings (persisted from Home Assistant)
52
+ agc_enabled: Optional[bool] = None # None = use hardware default
53
+ agc_max_gain: Optional[float] = None # None = use hardware default
54
+ noise_suppression: Optional[float] = None # None = use hardware default
55
  # Continuous conversation mode (controlled from Home Assistant)
56
  continuous_conversation: bool = False
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
 
59
  @dataclass
60
  class ServerState:
61
  """Global server state."""
 
62
  name: str
63
  mac_address: str
64
+ audio_queue: "Queue[Optional[bytes]]"
65
+ entities: "List[ESPHomeEntity]"
66
+ available_wake_words: "Dict[str, AvailableWakeWord]"
67
+ wake_words: "Dict[str, Union[MicroWakeWord, OpenWakeWord]]"
68
+ active_wake_words: Set[str]
69
  stop_word: "MicroWakeWord"
70
  music_player: "AudioPlayer"
71
  tts_player: "AudioPlayer"
 
76
  download_dir: Path
77
 
78
  # Reachy Mini specific
79
+ reachy_mini: Optional[object] = None
80
  motion_enabled: bool = True
81
+ motion: Optional[object] = None # ReachyMiniMotion instance
82
 
83
+ media_player_entity: "Optional[MediaPlayerEntity]" = None
84
+ satellite: "Optional[VoiceSatelliteProtocol]" = None
85
  wake_words_changed: bool = False
86
  refractory_seconds: float = 2.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
  def save_preferences(self) -> None:
89
  """Save preferences as JSON."""
90
  _LOGGER.debug("Saving preferences: %s", self.preferences_path)
91
  self.preferences_path.parent.mkdir(parents=True, exist_ok=True)
92
  with open(self.preferences_path, "w", encoding="utf-8") as preferences_file:
93
+ json.dump(
94
+ asdict(self.preferences), preferences_file, ensure_ascii=False, indent=4
95
+ )
{reachy_mini_home_assistant → reachy_mini_ha_voice}/models/crops_classifier.onnx RENAMED
File without changes
{reachy_mini_home_assistant → reachy_mini_ha_voice}/models/hand_detector.onnx RENAMED
File without changes
reachy_mini_home_assistant/motion/reachy_motion.py → reachy_mini_ha_voice/motion.py RENAMED
@@ -5,6 +5,7 @@ MovementManager for unified 5Hz control with face tracking.
5
  """
6
 
7
  import logging
 
8
 
9
  from .movement_manager import MovementManager, RobotState
10
 
@@ -18,28 +19,31 @@ class ReachyMiniMotion:
18
  to the MovementManager which handles them in its 5Hz control loop.
19
  """
20
 
21
- def __init__(self, reachy_mini):
22
  self.reachy_mini = reachy_mini
23
- self._movement_manager: MovementManager | None = None
24
  self._camera_server = None # Reference to camera server for face tracking control
25
  self._is_speaking = False
26
 
27
  _LOGGER.debug("ReachyMiniMotion.__init__ called with reachy_mini=%s", reachy_mini)
28
 
29
- # Initialize movement manager
30
- try:
31
- self._movement_manager = MovementManager(reachy_mini)
32
- _LOGGER.debug("MovementManager created successfully")
33
- except Exception as e:
34
- _LOGGER.error("Failed to create MovementManager: %s", e, exc_info=True)
35
- self._movement_manager = None
 
 
 
36
 
37
  def set_reachy_mini(self, reachy_mini):
38
  """Set the Reachy Mini instance."""
39
  self.reachy_mini = reachy_mini
40
- if self._movement_manager is None:
41
  self._movement_manager = MovementManager(reachy_mini)
42
- else:
43
  self._movement_manager.robot = reachy_mini
44
 
45
  def set_camera_server(self, camera_server):
@@ -68,7 +72,7 @@ class ReachyMiniMotion:
68
  _LOGGER.info("Motion control stopped")
69
 
70
  @property
71
- def movement_manager(self) -> MovementManager | None:
72
  """Get the movement manager instance."""
73
  return self._movement_manager
74
 
@@ -164,31 +168,13 @@ class ReachyMiniMotion:
164
 
165
  self._is_speaking = False
166
  self._movement_manager.set_state(RobotState.IDLE)
167
- if self._movement_manager.get_idle_behavior_enabled():
168
- self._movement_manager.reset_to_neutral(duration=2.0)
169
- else:
170
- self._movement_manager.transition_to_idle_rest(duration=2.0)
171
 
172
  # Note: Face tracking remains enabled for continuous tracking
173
  # This allows the robot to always look at the user when they approach
174
 
175
  _LOGGER.debug("Reachy Mini: Idle pose")
176
 
177
- def on_pause_motion(self):
178
- """Called when motion should settle immediately.
179
-
180
- Used for zero-config gesture reactions such as the palm gesture.
181
- The robot smoothly returns to a neutral pose and then resumes its
182
- normal idle behavior.
183
- """
184
- if self._movement_manager is None:
185
- return
186
-
187
- self._is_speaking = False
188
- self._movement_manager.reset_to_neutral(duration=0.6)
189
- self._movement_manager.set_state(RobotState.IDLE)
190
- _LOGGER.debug("Reachy Mini: Motion paused to neutral idle")
191
-
192
  def on_timer_finished(self):
193
  """Called when a timer finishes - alert animation.
194
 
 
5
  """
6
 
7
  import logging
8
+ from typing import Optional
9
 
10
  from .movement_manager import MovementManager, RobotState
11
 
 
19
  to the MovementManager which handles them in its 5Hz control loop.
20
  """
21
 
22
+ def __init__(self, reachy_mini=None):
23
  self.reachy_mini = reachy_mini
24
+ self._movement_manager: Optional[MovementManager] = None
25
  self._camera_server = None # Reference to camera server for face tracking control
26
  self._is_speaking = False
27
 
28
  _LOGGER.debug("ReachyMiniMotion.__init__ called with reachy_mini=%s", reachy_mini)
29
 
30
+ # Initialize movement manager if robot is available
31
+ if reachy_mini is not None:
32
+ try:
33
+ self._movement_manager = MovementManager(reachy_mini)
34
+ _LOGGER.debug("MovementManager created successfully")
35
+ except Exception as e:
36
+ _LOGGER.error("Failed to create MovementManager: %s", e, exc_info=True)
37
+ self._movement_manager = None
38
+ else:
39
+ _LOGGER.debug("reachy_mini is None, MovementManager not created")
40
 
41
  def set_reachy_mini(self, reachy_mini):
42
  """Set the Reachy Mini instance."""
43
  self.reachy_mini = reachy_mini
44
+ if reachy_mini is not None and self._movement_manager is None:
45
  self._movement_manager = MovementManager(reachy_mini)
46
+ elif reachy_mini is not None and self._movement_manager is not None:
47
  self._movement_manager.robot = reachy_mini
48
 
49
  def set_camera_server(self, camera_server):
 
72
  _LOGGER.info("Motion control stopped")
73
 
74
  @property
75
+ def movement_manager(self) -> Optional[MovementManager]:
76
  """Get the movement manager instance."""
77
  return self._movement_manager
78
 
 
168
 
169
  self._is_speaking = False
170
  self._movement_manager.set_state(RobotState.IDLE)
171
+ self._movement_manager.reset_to_neutral(duration=0.5)
 
 
 
172
 
173
  # Note: Face tracking remains enabled for continuous tracking
174
  # This allows the robot to always look at the user when they approach
175
 
176
  _LOGGER.debug("Reachy Mini: Idle pose")
177
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
  def on_timer_finished(self):
179
  """Called when a timer finishes - alert animation.
180
 
reachy_mini_ha_voice/movement_manager.py ADDED
@@ -0,0 +1,861 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Unified Movement Manager for Reachy Mini.
3
+
4
+ This module provides a centralized control system for robot movements,
5
+ inspired by the reachy_mini_conversation_app architecture.
6
+
7
+ Key features:
8
+ - Single 10Hz control loop (balanced between responsiveness and stability)
9
+ - Command queue pattern (thread-safe external API)
10
+ - Error throttling (prevents log explosion)
11
+ - JSON-driven animation system (conversation state animations)
12
+ - Graceful shutdown
13
+ - Pose change detection (skip sending if no significant change)
14
+ - Robust connection recovery (faster reconnection attempts)
15
+ - Proper pose composition using SDK's compose_world_offset (same as conversation_app)
16
+ - Antenna freeze during listening mode with smooth blend back
17
+ """
18
+
19
+ import logging
20
+ import math
21
+ import threading
22
+ import time
23
+ from dataclasses import dataclass, field
24
+ from enum import Enum
25
+ from queue import Queue, Empty
26
+ from typing import Any, Callable, Dict, List, Optional, Tuple, TYPE_CHECKING
27
+
28
+ import numpy as np
29
+ from scipy.spatial.transform import Rotation as R
30
+
31
+ if TYPE_CHECKING:
32
+ from reachy_mini import ReachyMini
33
+
34
+ logger = logging.getLogger(__name__)
35
+
36
+ # Import SDK utilities for pose composition (same as conversation_app)
37
+ try:
38
+ from reachy_mini.utils import create_head_pose
39
+ from reachy_mini.utils.interpolation import compose_world_offset
40
+ SDK_UTILS_AVAILABLE = True
41
+ except ImportError:
42
+ SDK_UTILS_AVAILABLE = False
43
+ logger.warning("SDK utils not available, using fallback pose composition")
44
+
45
+ # Import animation player
46
+ from .animation_player import AnimationPlayer
47
+
48
+
49
+ # =============================================================================
50
+ # Constants
51
+ # =============================================================================
52
+
53
+ # Control loop frequency - daemon now supports higher rates
54
+ CONTROL_LOOP_FREQUENCY_HZ = 100 # 100Hz control loop (same as conversation_app)
55
+ TARGET_PERIOD = 1.0 / CONTROL_LOOP_FREQUENCY_HZ
56
+
57
+ # Antenna freeze parameters (listening mode)
58
+ ANTENNA_BLEND_DURATION = 0.5 # Seconds to blend back from frozen state
59
+
60
+ # State to animation mapping
61
+ STATE_ANIMATION_MAP = {
62
+ "idle": "idle",
63
+ "listening": "listening",
64
+ "thinking": "thinking",
65
+ "speaking": "speaking",
66
+ }
67
+
68
+
69
+ class RobotState(Enum):
70
+ """Robot state machine states."""
71
+ IDLE = "idle"
72
+ LISTENING = "listening"
73
+ THINKING = "thinking"
74
+ SPEAKING = "speaking"
75
+
76
+
77
+ @dataclass
78
+ class MovementState:
79
+ """Internal movement state (only modified by control loop)."""
80
+ # Current robot state
81
+ robot_state: RobotState = RobotState.IDLE
82
+
83
+ # Animation offsets (from AnimationPlayer)
84
+ anim_pitch: float = 0.0
85
+ anim_yaw: float = 0.0
86
+ anim_roll: float = 0.0
87
+ anim_x: float = 0.0
88
+ anim_y: float = 0.0
89
+ anim_z: float = 0.0
90
+ anim_antenna_left: float = 0.0
91
+ anim_antenna_right: float = 0.0
92
+
93
+ # Speech sway offsets (from audio analysis)
94
+ sway_pitch: float = 0.0
95
+ sway_yaw: float = 0.0
96
+ sway_roll: float = 0.0
97
+ sway_x: float = 0.0
98
+ sway_y: float = 0.0
99
+ sway_z: float = 0.0
100
+
101
+ # Target pose (from actions)
102
+ target_pitch: float = 0.0
103
+ target_yaw: float = 0.0
104
+ target_roll: float = 0.0
105
+ target_x: float = 0.0
106
+ target_y: float = 0.0
107
+ target_z: float = 0.0
108
+ target_antenna_left: float = 0.0
109
+ target_antenna_right: float = 0.0
110
+ target_body_yaw: float = 0.0
111
+
112
+ # Timing
113
+ last_activity_time: float = 0.0
114
+ idle_start_time: float = 0.0
115
+
116
+ # Antenna freeze state (listening mode)
117
+ antenna_frozen: bool = False
118
+ frozen_antenna_left: float = 0.0
119
+ frozen_antenna_right: float = 0.0
120
+ antenna_blend: float = 1.0 # 0=frozen, 1=normal
121
+ antenna_blend_start_time: float = 0.0
122
+
123
+
124
+ @dataclass
125
+ class PendingAction:
126
+ """A pending motion action."""
127
+ name: str
128
+ target_pitch: float = 0.0
129
+ target_yaw: float = 0.0
130
+ target_roll: float = 0.0
131
+ target_x: float = 0.0
132
+ target_y: float = 0.0
133
+ target_z: float = 0.0
134
+ duration: float = 0.5
135
+ callback: Optional[Callable] = None
136
+
137
+
138
+ class MovementManager:
139
+ """
140
+ Unified movement manager with 10Hz control loop.
141
+
142
+ All external interactions go through the command queue,
143
+ ensuring thread safety and preventing race conditions.
144
+
145
+ Note: Frequency reduced from 100Hz to 10Hz to prevent daemon crashes
146
+ caused by excessive Zenoh message traffic.
147
+ """
148
+
149
+ def __init__(self, reachy_mini: Optional["ReachyMini"] = None):
150
+ self.robot = reachy_mini
151
+ self._now = time.monotonic
152
+
153
+ # Command queue - all external threads communicate through this
154
+ self._command_queue: Queue[Tuple[str, Any]] = Queue()
155
+
156
+ # Internal state (only modified by control loop)
157
+ self.state = MovementState()
158
+ self.state.last_activity_time = self._now()
159
+ self.state.idle_start_time = self._now()
160
+
161
+ # Animation player (JSON-driven animations)
162
+ self._animation_player = AnimationPlayer()
163
+
164
+ # Thread control
165
+ self._stop_event = threading.Event()
166
+ self._thread: Optional[threading.Thread] = None
167
+
168
+ # Error throttling
169
+ self._last_error_time = 0.0
170
+ self._error_interval = 1.0 # Log at most once per second
171
+ self._suppressed_errors = 0
172
+
173
+ # Connection health tracking
174
+ self._connection_lost = False
175
+ self._last_successful_command = self._now()
176
+ self._connection_timeout = 3.0
177
+ self._reconnect_attempt_interval = 2.0
178
+ self._last_reconnect_attempt = 0.0
179
+ self._consecutive_errors = 0
180
+ self._max_consecutive_errors = 5
181
+
182
+ # Pending action
183
+ self._pending_action: Optional[PendingAction] = None
184
+ self._action_start_time: float = 0.0
185
+ self._action_start_pose: Dict[str, float] = {}
186
+
187
+ # Pose change detection threshold
188
+ self._last_sent_pose: Optional[Dict[str, float]] = None
189
+ self._pose_change_threshold = 0.005
190
+
191
+ # Face tracking offsets (from camera worker)
192
+ self._face_tracking_offsets: Tuple[float, float, float, float, float, float] = (0.0, 0.0, 0.0, 0.0, 0.0, 0.0)
193
+ self._face_tracking_lock = threading.Lock()
194
+
195
+ # Camera server reference for face tracking
196
+ self._camera_server = None
197
+
198
+ # Face tracking smoothing (exponential moving average)
199
+ self._smoothed_face_offsets: List[float] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
200
+ self._face_smoothing_factor = 0.3
201
+
202
+ logger.info("MovementManager initialized with AnimationPlayer")
203
+
204
+ # =========================================================================
205
+ # Thread-safe public API (called from any thread)
206
+ # =========================================================================
207
+
208
+ def set_state(self, new_state: RobotState) -> None:
209
+ """Thread-safe: Set robot state."""
210
+ self._command_queue.put(("set_state", new_state))
211
+
212
+ def set_listening(self, listening: bool) -> None:
213
+ """Thread-safe: Set listening state."""
214
+ state = RobotState.LISTENING if listening else RobotState.IDLE
215
+ self._command_queue.put(("set_state", state))
216
+
217
+ def set_thinking(self) -> None:
218
+ """Thread-safe: Set thinking state."""
219
+ self._command_queue.put(("set_state", RobotState.THINKING))
220
+
221
+ def set_speaking(self, speaking: bool) -> None:
222
+ """Thread-safe: Set speaking state."""
223
+ state = RobotState.SPEAKING if speaking else RobotState.IDLE
224
+ self._command_queue.put(("set_state", state))
225
+
226
+ def set_idle(self) -> None:
227
+ """Thread-safe: Return to idle state."""
228
+ self._command_queue.put(("set_state", RobotState.IDLE))
229
+
230
+ def queue_action(self, action: PendingAction) -> None:
231
+ """Thread-safe: Queue a motion action."""
232
+ self._command_queue.put(("action", action))
233
+
234
+ def turn_to_angle(self, yaw_deg: float, duration: float = 0.8) -> None:
235
+ """Thread-safe: Turn head to face a direction."""
236
+ action = PendingAction(
237
+ name="turn_to",
238
+ target_yaw=math.radians(yaw_deg),
239
+ duration=duration,
240
+ )
241
+ self._command_queue.put(("action", action))
242
+
243
+ def nod(self, amplitude_deg: float = 15, duration: float = 0.5) -> None:
244
+ """Thread-safe: Perform a nod gesture."""
245
+ self._command_queue.put(("nod", (amplitude_deg, duration)))
246
+
247
+ def shake(self, amplitude_deg: float = 20, duration: float = 0.5) -> None:
248
+ """Thread-safe: Perform a head shake gesture."""
249
+ self._command_queue.put(("shake", (amplitude_deg, duration)))
250
+
251
+ def set_speech_sway(
252
+ self, x: float, y: float, z: float,
253
+ roll: float, pitch: float, yaw: float
254
+ ) -> None:
255
+ """Thread-safe: Set speech-driven sway offsets.
256
+
257
+ These offsets are applied on top of the current animation
258
+ to create audio-synchronized head motion during TTS playback.
259
+
260
+ Args:
261
+ x, y, z: Position offsets in meters
262
+ roll, pitch, yaw: Orientation offsets in radians
263
+ """
264
+ self._command_queue.put(("speech_sway", (x, y, z, roll, pitch, yaw)))
265
+
266
+ def reset_to_neutral(self, duration: float = 0.5) -> None:
267
+ """Thread-safe: Reset to neutral position."""
268
+ action = PendingAction(
269
+ name="neutral",
270
+ target_pitch=0.0,
271
+ target_yaw=0.0,
272
+ target_roll=0.0,
273
+ target_x=0.0,
274
+ target_y=0.0,
275
+ target_z=0.0,
276
+ duration=duration,
277
+ )
278
+ self._command_queue.put(("action", action))
279
+
280
+ def set_camera_server(self, camera_server) -> None:
281
+ """Set the camera server for face tracking offsets.
282
+
283
+ Args:
284
+ camera_server: MJPEGCameraServer instance with face tracking
285
+ """
286
+ self._camera_server = camera_server
287
+ logger.info("Camera server set for face tracking")
288
+
289
+ def set_face_tracking_offsets(self, offsets: Tuple[float, float, float, float, float, float]) -> None:
290
+ """Thread-safe: Update face tracking offsets manually.
291
+
292
+ Args:
293
+ offsets: Tuple of (x, y, z, roll, pitch, yaw) in meters/radians
294
+ """
295
+ with self._face_tracking_lock:
296
+ self._face_tracking_offsets = offsets
297
+
298
+ def set_target_pose(
299
+ self,
300
+ x: Optional[float] = None,
301
+ y: Optional[float] = None,
302
+ z: Optional[float] = None,
303
+ roll: Optional[float] = None,
304
+ pitch: Optional[float] = None,
305
+ yaw: Optional[float] = None,
306
+ body_yaw: Optional[float] = None,
307
+ antenna_left: Optional[float] = None,
308
+ antenna_right: Optional[float] = None,
309
+ ) -> None:
310
+ """Thread-safe: Set target pose components.
311
+
312
+ Only provided values will be updated. Values are in meters for position
313
+ and radians for angles.
314
+
315
+ Args:
316
+ x, y, z: Head position in meters
317
+ roll, pitch, yaw: Head orientation in radians
318
+ body_yaw: Body yaw in radians
319
+ antenna_left, antenna_right: Antenna angles in radians
320
+ """
321
+ self._command_queue.put(("set_pose", {
322
+ "x": x,
323
+ "y": y,
324
+ "z": z,
325
+ "roll": roll,
326
+ "pitch": pitch,
327
+ "yaw": yaw,
328
+ "body_yaw": body_yaw,
329
+ "antenna_left": antenna_left,
330
+ "antenna_right": antenna_right,
331
+ }))
332
+
333
+ # =========================================================================
334
+ # Internal: Command processing (runs in control loop)
335
+ # =========================================================================
336
+
337
+ def _poll_commands(self) -> None:
338
+ """Process all pending commands from the queue."""
339
+ while True:
340
+ try:
341
+ cmd, payload = self._command_queue.get_nowait()
342
+ except Empty:
343
+ break
344
+
345
+ self._handle_command(cmd, payload)
346
+
347
+ def _handle_command(self, cmd: str, payload: Any) -> None:
348
+ """Handle a single command."""
349
+ if cmd == "set_state":
350
+ old_state = self.state.robot_state
351
+ self.state.robot_state = payload
352
+ self.state.last_activity_time = self._now()
353
+
354
+ # Update animation based on state
355
+ animation_name = STATE_ANIMATION_MAP.get(payload.value, "idle")
356
+ self._animation_player.set_animation(animation_name)
357
+
358
+ # State transition logic
359
+ if payload == RobotState.IDLE and old_state != RobotState.IDLE:
360
+ self.state.idle_start_time = self._now()
361
+ # Unfreeze antennas when returning to idle
362
+ self._start_antenna_unfreeze()
363
+
364
+ # Freeze antennas when entering listening mode
365
+ if payload == RobotState.LISTENING:
366
+ self._freeze_antennas()
367
+ elif old_state == RobotState.LISTENING and payload != RobotState.LISTENING:
368
+ # Start unfreezing when leaving listening mode
369
+ self._start_antenna_unfreeze()
370
+
371
+ logger.debug("State changed: %s -> %s, animation: %s",
372
+ old_state.value, payload.value, animation_name)
373
+
374
+ elif cmd == "action":
375
+ self._start_action(payload)
376
+
377
+ elif cmd == "nod":
378
+ amplitude_deg, duration = payload
379
+ self._do_nod(amplitude_deg, duration)
380
+
381
+ elif cmd == "shake":
382
+ amplitude_deg, duration = payload
383
+ self._do_shake(amplitude_deg, duration)
384
+
385
+ elif cmd == "set_pose":
386
+ # Update target pose from external control (e.g., Home Assistant)
387
+ if payload.get("x") is not None:
388
+ self.state.target_x = payload["x"]
389
+ if payload.get("y") is not None:
390
+ self.state.target_y = payload["y"]
391
+ if payload.get("z") is not None:
392
+ self.state.target_z = payload["z"]
393
+ if payload.get("roll") is not None:
394
+ self.state.target_roll = payload["roll"]
395
+ if payload.get("pitch") is not None:
396
+ self.state.target_pitch = payload["pitch"]
397
+ if payload.get("yaw") is not None:
398
+ self.state.target_yaw = payload["yaw"]
399
+ if payload.get("body_yaw") is not None:
400
+ self.state.target_body_yaw = payload["body_yaw"]
401
+ if payload.get("antenna_left") is not None:
402
+ self.state.target_antenna_left = payload["antenna_left"]
403
+ if payload.get("antenna_right") is not None:
404
+ self.state.target_antenna_right = payload["antenna_right"]
405
+ logger.debug("External pose update: %s", payload)
406
+
407
+ elif cmd == "speech_sway":
408
+ # Update speech-driven sway offsets
409
+ x, y, z, roll, pitch, yaw = payload
410
+ self.state.sway_x = x
411
+ self.state.sway_y = y
412
+ self.state.sway_z = z
413
+ self.state.sway_roll = roll
414
+ self.state.sway_pitch = pitch
415
+ self.state.sway_yaw = yaw
416
+
417
+ def _start_action(self, action: PendingAction) -> None:
418
+ """Start a new motion action."""
419
+ self._pending_action = action
420
+ self._action_start_time = self._now()
421
+ self._action_start_pose = {
422
+ "pitch": self.state.target_pitch,
423
+ "yaw": self.state.target_yaw,
424
+ "roll": self.state.target_roll,
425
+ "x": self.state.target_x,
426
+ "y": self.state.target_y,
427
+ "z": self.state.target_z,
428
+ }
429
+ logger.debug("Starting action: %s", action.name)
430
+
431
+ def _do_nod(self, amplitude_deg: float, duration: float) -> None:
432
+ """Execute nod gesture (blocking in control loop context)."""
433
+ # This is simplified - in production, use action queue
434
+ amplitude_rad = math.radians(amplitude_deg)
435
+ half_duration = duration / 2
436
+
437
+ # Nod down
438
+ action_down = PendingAction(
439
+ name="nod_down",
440
+ target_pitch=amplitude_rad,
441
+ duration=half_duration,
442
+ )
443
+ self._start_action(action_down)
444
+
445
+ def _do_shake(self, amplitude_deg: float, duration: float) -> None:
446
+ """Execute shake gesture (blocking in control loop context)."""
447
+ amplitude_rad = math.radians(amplitude_deg)
448
+ half_duration = duration / 2
449
+
450
+ # Shake left
451
+ action_left = PendingAction(
452
+ name="shake_left",
453
+ target_yaw=-amplitude_rad,
454
+ duration=half_duration,
455
+ )
456
+ self._start_action(action_left)
457
+
458
+ # =========================================================================
459
+ # Internal: Motion updates (runs in control loop)
460
+ # =========================================================================
461
+
462
+ def _update_action(self, dt: float) -> None:
463
+ """Update pending action interpolation."""
464
+ if self._pending_action is None:
465
+ return
466
+
467
+ elapsed = self._now() - self._action_start_time
468
+ progress = min(1.0, elapsed / self._pending_action.duration)
469
+
470
+ # Smooth interpolation (ease in-out)
471
+ t = progress * progress * (3 - 2 * progress)
472
+
473
+ # Interpolate pose
474
+ start = self._action_start_pose
475
+ action = self._pending_action
476
+
477
+ self.state.target_pitch = start["pitch"] + t * (action.target_pitch - start["pitch"])
478
+ self.state.target_yaw = start["yaw"] + t * (action.target_yaw - start["yaw"])
479
+ self.state.target_roll = start["roll"] + t * (action.target_roll - start["roll"])
480
+ self.state.target_x = start["x"] + t * (action.target_x - start["x"])
481
+ self.state.target_y = start["y"] + t * (action.target_y - start["y"])
482
+ self.state.target_z = start["z"] + t * (action.target_z - start["z"])
483
+
484
+ # Action complete
485
+ if progress >= 1.0:
486
+ if self._pending_action.callback:
487
+ try:
488
+ self._pending_action.callback()
489
+ except Exception as e:
490
+ logger.error("Action callback error: %s", e)
491
+ self._pending_action = None
492
+
493
+ def _update_animation(self, dt: float) -> None:
494
+ """Update animation offsets from AnimationPlayer."""
495
+ offsets = self._animation_player.get_offsets(dt)
496
+
497
+ self.state.anim_pitch = offsets["pitch"]
498
+ self.state.anim_yaw = offsets["yaw"]
499
+ self.state.anim_roll = offsets["roll"]
500
+ self.state.anim_x = offsets["x"]
501
+ self.state.anim_y = offsets["y"]
502
+ self.state.anim_z = offsets["z"]
503
+ self.state.anim_antenna_left = offsets["antenna_left"]
504
+ self.state.anim_antenna_right = offsets["antenna_right"]
505
+
506
+ def _freeze_antennas(self) -> None:
507
+ """Freeze antennas at current position (for listening mode)."""
508
+ # Capture current antenna positions
509
+ current_left = self.state.target_antenna_left + self.state.anim_antenna_left
510
+ current_right = self.state.target_antenna_right + self.state.anim_antenna_right
511
+
512
+ self.state.antenna_frozen = True
513
+ self.state.frozen_antenna_left = current_left
514
+ self.state.frozen_antenna_right = current_right
515
+ self.state.antenna_blend = 0.0 # Fully frozen
516
+ logger.debug("Antennas frozen at left=%.2f, right=%.2f",
517
+ math.degrees(current_left), math.degrees(current_right))
518
+
519
+ def _start_antenna_unfreeze(self) -> None:
520
+ """Start unfreezing antennas (smooth blend back to normal)."""
521
+ if not self.state.antenna_frozen:
522
+ return
523
+
524
+ self.state.antenna_blend_start_time = self._now()
525
+ logger.debug("Starting antenna unfreeze")
526
+
527
+ def _update_antenna_blend(self, dt: float) -> None:
528
+ """Update antenna blend state for smooth unfreezing."""
529
+ if not self.state.antenna_frozen:
530
+ return
531
+
532
+ if self.state.antenna_blend >= 1.0:
533
+ # Fully unfrozen
534
+ self.state.antenna_frozen = False
535
+ return
536
+
537
+ # Calculate blend progress
538
+ elapsed = self._now() - self.state.antenna_blend_start_time
539
+ if elapsed > 0:
540
+ self.state.antenna_blend = min(1.0, elapsed / ANTENNA_BLEND_DURATION)
541
+
542
+ if self.state.antenna_blend >= 1.0:
543
+ self.state.antenna_frozen = False
544
+ logger.debug("Antennas unfrozen")
545
+
546
+ def _update_face_tracking(self) -> None:
547
+ """Get face tracking offsets from camera server with smoothing."""
548
+ if self._camera_server is not None:
549
+ try:
550
+ raw_offsets = self._camera_server.get_face_tracking_offsets()
551
+
552
+ # Apply exponential moving average smoothing
553
+ alpha = self._face_smoothing_factor
554
+ for i in range(6):
555
+ self._smoothed_face_offsets[i] = (
556
+ alpha * raw_offsets[i] +
557
+ (1 - alpha) * self._smoothed_face_offsets[i]
558
+ )
559
+
560
+ with self._face_tracking_lock:
561
+ self._face_tracking_offsets = tuple(self._smoothed_face_offsets)
562
+
563
+ except Exception as e:
564
+ logger.debug("Error getting face tracking offsets: %s", e)
565
+
566
+ def _compose_final_pose(self) -> Tuple[np.ndarray, Tuple[float, float], float]:
567
+ """Compose final pose from all sources using SDK's compose_world_offset.
568
+
569
+ Returns:
570
+ Tuple of (head_pose_4x4, (antenna_right, antenna_left), body_yaw)
571
+ """
572
+ # Build primary head pose from target state
573
+ if SDK_UTILS_AVAILABLE:
574
+ primary_head = create_head_pose(
575
+ x=self.state.target_x,
576
+ y=self.state.target_y,
577
+ z=self.state.target_z,
578
+ roll=self.state.target_roll,
579
+ pitch=self.state.target_pitch,
580
+ yaw=self.state.target_yaw,
581
+ degrees=False,
582
+ mm=False,
583
+ )
584
+ else:
585
+ # Fallback: build matrix manually
586
+ rotation = R.from_euler('xyz', [
587
+ self.state.target_roll,
588
+ self.state.target_pitch,
589
+ self.state.target_yaw,
590
+ ])
591
+ primary_head = np.eye(4)
592
+ primary_head[:3, :3] = rotation.as_matrix()
593
+ primary_head[0, 3] = self.state.target_x
594
+ primary_head[1, 3] = self.state.target_y
595
+ primary_head[2, 3] = self.state.target_z
596
+
597
+ # Build secondary pose from animation + face tracking + speech sway
598
+ with self._face_tracking_lock:
599
+ face_offsets = self._face_tracking_offsets
600
+
601
+ secondary_x = self.state.anim_x + self.state.sway_x + face_offsets[0]
602
+ secondary_y = self.state.anim_y + self.state.sway_y + face_offsets[1]
603
+ secondary_z = self.state.anim_z + self.state.sway_z + face_offsets[2]
604
+ secondary_roll = self.state.anim_roll + self.state.sway_roll + face_offsets[3]
605
+ secondary_pitch = self.state.anim_pitch + self.state.sway_pitch + face_offsets[4]
606
+ secondary_yaw = self.state.anim_yaw + self.state.sway_yaw + face_offsets[5]
607
+
608
+ if SDK_UTILS_AVAILABLE:
609
+ secondary_head = create_head_pose(
610
+ x=secondary_x,
611
+ y=secondary_y,
612
+ z=secondary_z,
613
+ roll=secondary_roll,
614
+ pitch=secondary_pitch,
615
+ yaw=secondary_yaw,
616
+ degrees=False,
617
+ mm=False,
618
+ )
619
+ # Compose using SDK's compose_world_offset (same as conversation_app)
620
+ final_head = compose_world_offset(primary_head, secondary_head, reorthonormalize=True)
621
+ else:
622
+ # Fallback: simple addition (less accurate but works)
623
+ secondary_rotation = R.from_euler('xyz', [secondary_roll, secondary_pitch, secondary_yaw])
624
+ secondary_head = np.eye(4)
625
+ secondary_head[:3, :3] = secondary_rotation.as_matrix()
626
+ secondary_head[0, 3] = secondary_x
627
+ secondary_head[1, 3] = secondary_y
628
+ secondary_head[2, 3] = secondary_z
629
+
630
+ # Simple composition: R_final = R_secondary @ R_primary, t_final = t_primary + t_secondary
631
+ final_head = np.eye(4)
632
+ final_head[:3, :3] = secondary_head[:3, :3] @ primary_head[:3, :3]
633
+ final_head[:3, 3] = primary_head[:3, 3] + secondary_head[:3, 3]
634
+
635
+ # Antenna pose with freeze blending
636
+ target_antenna_left = self.state.target_antenna_left + self.state.anim_antenna_left
637
+ target_antenna_right = self.state.target_antenna_right + self.state.anim_antenna_right
638
+
639
+ # Apply antenna freeze blending (listening mode)
640
+ blend = self.state.antenna_blend
641
+ if blend < 1.0:
642
+ # Blend between frozen position and target position
643
+ antenna_left = (self.state.frozen_antenna_left * (1.0 - blend) +
644
+ target_antenna_left * blend)
645
+ antenna_right = (self.state.frozen_antenna_right * (1.0 - blend) +
646
+ target_antenna_right * blend)
647
+ else:
648
+ antenna_left = target_antenna_left
649
+ antenna_right = target_antenna_right
650
+
651
+ return final_head, (antenna_right, antenna_left), self.state.target_body_yaw
652
+
653
+ # =========================================================================
654
+ # Internal: Robot control (runs in control loop)
655
+ # =========================================================================
656
+
657
+ def _issue_control_command(self, head_pose: np.ndarray, antennas: Tuple[float, float], body_yaw: float) -> None:
658
+ """Send control command to robot with error throttling and connection health tracking."""
659
+ if self.robot is None:
660
+ return
661
+
662
+ # Check if pose changed significantly (prevent unnecessary commands)
663
+ # Extract euler angles for comparison
664
+ rotation = R.from_matrix(head_pose[:3, :3])
665
+ euler = rotation.as_euler('xyz') # [roll, pitch, yaw]
666
+
667
+ current_pose = {
668
+ "x": head_pose[0, 3],
669
+ "y": head_pose[1, 3],
670
+ "z": head_pose[2, 3],
671
+ "roll": euler[0],
672
+ "pitch": euler[1],
673
+ "yaw": euler[2],
674
+ "antenna_right": antennas[0],
675
+ "antenna_left": antennas[1],
676
+ "body_yaw": body_yaw,
677
+ }
678
+
679
+ if self._last_sent_pose is not None:
680
+ max_diff = max(
681
+ abs(current_pose[k] - self._last_sent_pose.get(k, 0.0))
682
+ for k in current_pose.keys()
683
+ )
684
+ if max_diff < self._pose_change_threshold:
685
+ # No significant change, skip sending command
686
+ return
687
+
688
+ now = self._now()
689
+
690
+ # Check if we should skip due to connection loss (but always try periodically)
691
+ if self._connection_lost:
692
+ if now - self._last_reconnect_attempt < self._reconnect_attempt_interval:
693
+ # Skip sending commands to reduce error spam
694
+ return
695
+ # Time to try reconnecting
696
+ self._last_reconnect_attempt = now
697
+ logger.debug("Attempting to send command after connection loss...")
698
+
699
+ try:
700
+ # Send to robot (single control point!)
701
+ # head_pose is already a 4x4 matrix from _compose_final_pose
702
+ self.robot.set_target(
703
+ head=head_pose,
704
+ antennas=list(antennas),
705
+ body_yaw=body_yaw,
706
+ )
707
+
708
+ # Command succeeded - update connection health and cache
709
+ self._last_successful_command = now
710
+ self._last_sent_pose = current_pose.copy() # Cache sent pose
711
+ self._consecutive_errors = 0 # Reset error counter
712
+
713
+ if self._connection_lost:
714
+ logger.info("✓ Connection to robot restored")
715
+ self._connection_lost = False
716
+ self._suppressed_errors = 0
717
+
718
+ except Exception as e:
719
+ error_msg = str(e)
720
+ self._consecutive_errors += 1
721
+
722
+ # Check if this is a connection error
723
+ is_connection_error = "Lost connection" in error_msg or "ZError" in error_msg
724
+
725
+ if is_connection_error:
726
+ if not self._connection_lost:
727
+ # First time detecting connection loss
728
+ if self._consecutive_errors >= self._max_consecutive_errors:
729
+ logger.warning(f"Connection unstable after {self._consecutive_errors} errors: {error_msg}")
730
+ logger.warning(" Will retry connection every %.1fs...", self._reconnect_attempt_interval)
731
+ self._connection_lost = True
732
+ self._last_reconnect_attempt = now
733
+ else:
734
+ # Transient error, log but don't mark as lost yet
735
+ self._log_error_throttled(f"Transient connection error ({self._consecutive_errors}/{self._max_consecutive_errors}): {error_msg}")
736
+ else:
737
+ # Already in lost state, use throttled logging
738
+ self._log_error_throttled(f"Connection still lost: {error_msg}")
739
+ else:
740
+ # Non-connection error - log but don't affect connection state
741
+ self._log_error_throttled(f"Failed to set robot target: {error_msg}")
742
+
743
+ def _log_error_throttled(self, message: str) -> None:
744
+ """Log error with throttling to prevent log explosion."""
745
+ now = self._now()
746
+ if now - self._last_error_time >= self._error_interval:
747
+ if self._suppressed_errors > 0:
748
+ message += f" (suppressed {self._suppressed_errors} repeats)"
749
+ self._suppressed_errors = 0
750
+ logger.error(message)
751
+ self._last_error_time = now
752
+ else:
753
+ self._suppressed_errors += 1
754
+
755
+ # =========================================================================
756
+ # Control loop
757
+ # =========================================================================
758
+
759
+ def _control_loop(self) -> None:
760
+ """Main 10Hz control loop."""
761
+ logger.info("Movement manager control loop started (%.0f Hz)", CONTROL_LOOP_FREQUENCY_HZ)
762
+
763
+ last_time = self._now()
764
+
765
+ while not self._stop_event.is_set():
766
+ loop_start = self._now()
767
+ dt = loop_start - last_time
768
+ last_time = loop_start
769
+
770
+ try:
771
+ # 1. Process commands from queue
772
+ self._poll_commands()
773
+
774
+ # 2. Update action interpolation
775
+ self._update_action(dt)
776
+
777
+ # 3. Update animation offsets (JSON-driven)
778
+ self._update_animation(dt)
779
+
780
+ # 4. Update antenna blend (listening mode freeze/unfreeze)
781
+ self._update_antenna_blend(dt)
782
+
783
+ # 5. Update face tracking offsets from camera server
784
+ self._update_face_tracking()
785
+
786
+ # 6. Compose final pose (returns head_pose matrix, antennas tuple, body_yaw)
787
+ head_pose, antennas, body_yaw = self._compose_final_pose()
788
+
789
+ # 7. Send to robot (single control point!)
790
+ self._issue_control_command(head_pose, antennas, body_yaw)
791
+
792
+ except Exception as e:
793
+ self._log_error_throttled(f"Control loop error: {e}")
794
+
795
+ # Adaptive sleep
796
+ elapsed = self._now() - loop_start
797
+ sleep_time = max(0.0, TARGET_PERIOD - elapsed)
798
+ if sleep_time > 0:
799
+ time.sleep(sleep_time)
800
+
801
+ logger.info("Movement manager control loop stopped")
802
+
803
+ # =========================================================================
804
+ # Lifecycle
805
+ # =========================================================================
806
+
807
+ def start(self) -> None:
808
+ """Start the control loop."""
809
+ if self._thread is not None and self._thread.is_alive():
810
+ logger.warning("Movement manager already running")
811
+ return
812
+
813
+ self._stop_event.clear()
814
+ self._thread = threading.Thread(
815
+ target=self._control_loop,
816
+ daemon=True,
817
+ name="MovementManager",
818
+ )
819
+ self._thread.start()
820
+ logger.info("Movement manager started")
821
+
822
+ def stop(self) -> None:
823
+ """Stop the control loop and reset robot."""
824
+ if self._thread is None or not self._thread.is_alive():
825
+ return
826
+
827
+ logger.info("Stopping movement manager...")
828
+
829
+ # Signal stop
830
+ self._stop_event.set()
831
+
832
+ # Wait for thread with shorter timeout
833
+ self._thread.join(timeout=0.5)
834
+ if self._thread.is_alive():
835
+ logger.warning("Movement manager thread did not stop in time")
836
+
837
+ # Skip reset to neutral - let the app manager handle it
838
+ # This speeds up shutdown significantly
839
+ logger.info("Movement manager stopped")
840
+
841
+ def _reset_to_neutral_blocking(self) -> None:
842
+ """Reset robot to neutral position (blocking)."""
843
+ if self.robot is None:
844
+ return
845
+
846
+ try:
847
+ neutral_pose = np.eye(4)
848
+ self.robot.goto_target(
849
+ head=neutral_pose,
850
+ antennas=[0.0, 0.0],
851
+ body_yaw=0.0,
852
+ duration=0.3, # Faster reset
853
+ )
854
+ logger.info("Robot reset to neutral position")
855
+ except Exception as e:
856
+ logger.error("Failed to reset robot: %s", e)
857
+
858
+ @property
859
+ def is_running(self) -> bool:
860
+ """Check if control loop is running."""
861
+ return self._thread is not None and self._thread.is_alive()
{reachy_mini_home_assistant → reachy_mini_ha_voice}/reachy_controller.py RENAMED
@@ -1,735 +1,869 @@
1
- """Reachy Mini controller wrapper for ESPHome entities."""
2
-
3
- import logging
4
- import math
5
- import time
6
- from typing import TYPE_CHECKING, Any
7
-
8
- import numpy as np
9
- import requests
10
- from scipy.spatial.transform import Rotation as R
11
-
12
- from .core.config import Config
13
-
14
- if TYPE_CHECKING:
15
- from reachy_mini import ReachyMini
16
-
17
- logger = logging.getLogger(__name__)
18
-
19
-
20
- class ReachyController:
21
- """
22
- Wrapper class for Reachy Mini control operations.
23
-
24
- Provides safe access to Reachy Mini SDK functions with error handling.
25
- """
26
-
27
- def __init__(self, reachy_mini: "ReachyMini"):
28
- """
29
- Initialize the controller.
30
-
31
- Args:
32
- reachy_mini: ReachyMini instance (required)
33
- """
34
- self.reachy = reachy_mini
35
- self._speaker_volume = 100 # Default volume
36
- self._movement_manager = None # Set later via set_movement_manager()
37
-
38
- # Shared session to reduce per-request overhead
39
- self._http_session = requests.Session()
40
- self._http_timeout = 5.0 # seconds
41
- self._cache_ttl = Config.daemon.status_cache_ttl
42
- self._daemon_base_url = Config.daemon.url.rstrip("/")
43
-
44
- # Status caching - only for get_status() which may trigger I/O
45
- # Note: get_current_head_pose() and get_current_joint_positions() are
46
- # non-blocking in the SDK (they return cached Zenoh data), so no caching needed
47
- self._state_cache: dict[str, Any] = {}
48
- self._last_status_query = 0.0
49
-
50
- self._look_at_x = 0.0
51
- self._look_at_y = 0.0
52
- self._look_at_z = 0.0
53
-
54
- def set_movement_manager(self, movement_manager) -> None:
55
- """Set the MovementManager instance for pose control.
56
-
57
- Args:
58
- movement_manager: MovementManager instance
59
- """
60
- self._movement_manager = movement_manager
61
- logger.info("MovementManager set for ReachyController")
62
-
63
- @property
64
- def is_available(self) -> bool:
65
- """Check if robot is available."""
66
- return self.reachy is not None
67
-
68
- def _with_movement_manager(self, caller: str):
69
- if self._movement_manager is None:
70
- logger.warning("%s failed - MovementManager not set", caller)
71
- return None
72
- return self._movement_manager
73
-
74
- def _get_movement_bool(self, getter_name: str, log_label: str) -> bool:
75
- movement_manager = self._movement_manager
76
- if movement_manager is None:
77
- return False
78
- try:
79
- return bool(getattr(movement_manager, getter_name)())
80
- except Exception as e:
81
- logger.debug("Error getting %s state: %s", log_label, e)
82
- return False
83
-
84
- def get_idle_behavior_enabled(self) -> bool:
85
- """Get whether any idle behavior subsystem is enabled."""
86
- return self._get_movement_bool("get_idle_behavior_enabled", "idle behavior")
87
-
88
- def set_idle_behavior_enabled(self, enabled: bool) -> None:
89
- """Enable or disable all idle behavior subsystems together."""
90
- movement_manager = self._with_movement_manager("set_idle_behavior_enabled")
91
- if movement_manager is not None:
92
- movement_manager.set_idle_behavior_enabled(enabled)
93
-
94
- # ========== Phase 1: Basic Status & Volume ==========
95
-
96
- @staticmethod
97
- def _status_value(status: Any, key: str, default: Any = None) -> Any:
98
- if status is None:
99
- return default
100
- if isinstance(status, dict):
101
- return status.get(key, default)
102
- return getattr(status, key, default)
103
-
104
- @classmethod
105
- def _nested_status_value(cls, status: Any, parent_key: str, child_key: str, default: Any = None) -> Any:
106
- parent = cls._status_value(status, parent_key, None)
107
- if parent is None:
108
- return default
109
- if isinstance(parent, dict):
110
- return parent.get(child_key, default)
111
- return getattr(parent, child_key, default)
112
-
113
- def _get_cached_status(self) -> Any:
114
- """Get cached daemon status to reduce query frequency.
115
-
116
- Note: get_status() may trigger I/O, so we cache it.
117
- Unlike get_current_head_pose() and get_current_joint_positions()
118
- which are non-blocking in the SDK.
119
- """
120
- now = time.time()
121
- if now - self._last_status_query < self._cache_ttl:
122
- return self._state_cache.get("status")
123
-
124
- if not self.is_available:
125
- return None
126
-
127
- try:
128
- status = self.reachy.client.get_status(wait=False)
129
- self._state_cache["status"] = status
130
- self._last_status_query = now
131
- return status
132
- except Exception as e:
133
- logger.error(f"Error getting status: {e}")
134
- return None
135
-
136
- def get_daemon_state(self) -> str:
137
- """Get daemon state with caching."""
138
- status = self._get_cached_status()
139
- if status is None:
140
- return "not_available"
141
- return str(self._status_value(status, "state", "unknown"))
142
-
143
- def get_backend_ready(self) -> bool:
144
- """Check if backend is ready with caching."""
145
- status = self._get_cached_status()
146
- if status is None:
147
- return False
148
- return self._status_value(status, "state") == "running"
149
-
150
- def get_error_message(self) -> str:
151
- """Get current error message with caching."""
152
- status = self._get_cached_status()
153
- if status is None:
154
- return "Robot not available"
155
- return str(self._status_value(status, "error", "") or "")
156
-
157
- def _get_volume_via_api(self, path: str, cached_value: float, label: str) -> float:
158
- """Fetch a volume value from the daemon API, falling back to the cached value."""
159
- try:
160
- resp = self._http_session.get(
161
- f"{self._daemon_base_url}{path}",
162
- timeout=self._http_timeout,
163
- )
164
- resp.raise_for_status()
165
- data = resp.json()
166
- if isinstance(data, dict) and "volume" in data:
167
- return float(data["volume"])
168
- except Exception as e:
169
- logger.warning("Failed to get %s volume via daemon API: %s", label, e)
170
-
171
- return cached_value
172
-
173
- def _set_volume_via_api(self, path: str, volume: float, label: str) -> float:
174
- """Write a volume value through the daemon API and return the confirmed level."""
175
- try:
176
- resp = self._http_session.post(
177
- f"{self._daemon_base_url}{path}",
178
- json={"volume": int(volume)},
179
- timeout=self._http_timeout,
180
- )
181
- resp.raise_for_status()
182
- data = resp.json()
183
- if isinstance(data, dict) and "volume" in data:
184
- return float(data["volume"])
185
- return volume
186
- except Exception as e:
187
- logger.error("Failed to set %s volume via daemon API: %s", label, e)
188
- return volume
189
-
190
- def _motor_mode_from_status(self, status: Any) -> str | None:
191
- motor_mode = self._nested_status_value(status, "backend_status", "motor_control_mode", None)
192
- if motor_mode is not None:
193
- return str(motor_mode)
194
- return None
195
-
196
- def get_speaker_volume(self) -> float:
197
- """Get speaker volume (0-100) from the daemon volume API."""
198
- self._speaker_volume = self._get_volume_via_api("/api/volume/current", self._speaker_volume, "speaker")
199
- return self._speaker_volume
200
-
201
- def set_speaker_volume(self, volume: float) -> None:
202
- """Set speaker volume (0-100) through the daemon volume API."""
203
- volume = max(0.0, min(100.0, volume))
204
- self._speaker_volume = self._set_volume_via_api("/api/volume/set", volume, "speaker")
205
- logger.info("Speaker volume set to %.1f%% via daemon API", self._speaker_volume)
206
-
207
- # ========== Phase 2: Motor Control ==========
208
-
209
- def get_motors_enabled(self) -> bool:
210
- """Check if motors are enabled with caching."""
211
- status = self._get_cached_status()
212
- if status is None:
213
- return False
214
- try:
215
- motor_mode = self._motor_mode_from_status(status)
216
- return motor_mode == "enabled"
217
- except Exception as e:
218
- logger.error(f"Error getting motor state: {e}")
219
- return False
220
-
221
- def set_motors_enabled(self, enabled: bool) -> None:
222
- """
223
- Enable or disable motors.
224
-
225
- Args:
226
- enabled: True to enable, False to disable
227
- """
228
- if not self.is_available:
229
- logger.warning("Cannot control motors: robot not available")
230
- return
231
-
232
- try:
233
- if enabled:
234
- self.reachy.enable_motors()
235
- logger.info("Motors enabled")
236
- else:
237
- self.reachy.disable_motors()
238
- logger.info("Motors disabled")
239
- except Exception as e:
240
- logger.error(f"Error setting motor state: {e}")
241
-
242
- def get_motor_mode(self) -> str:
243
- """Get current motor control mode with caching."""
244
- status = self._get_cached_status()
245
- if status is None:
246
- return "disabled"
247
- try:
248
- return self._motor_mode_from_status(status) or "disabled"
249
- except Exception as e:
250
- logger.error(f"Error getting motor mode: {e}")
251
- return "error"
252
-
253
- def set_motor_mode(self, mode: str) -> None:
254
- """
255
- Set motor control mode.
256
-
257
- Args:
258
- mode: One of "enabled", "disabled", "gravity_compensation"
259
- """
260
- if not self.is_available:
261
- logger.warning("Cannot set motor mode: robot not available")
262
- return
263
-
264
- try:
265
- if mode == "enabled":
266
- self.reachy.enable_motors()
267
- elif mode == "disabled":
268
- self.reachy.disable_motors()
269
- elif mode == "gravity_compensation":
270
- self.reachy.enable_gravity_compensation()
271
- else:
272
- logger.warning(f"Invalid motor mode: {mode}")
273
- return
274
- logger.info(f"Motor mode set to {mode}")
275
- except Exception as e:
276
- logger.error(f"Error setting motor mode: {e}")
277
-
278
- def get_doa_enabled(self) -> bool:
279
- """Get whether DOA sound tracking is enabled."""
280
- return self._get_movement_bool("get_doa_enabled", "DOA tracking")
281
-
282
- def set_doa_enabled(self, enabled: bool) -> None:
283
- """Enable or disable DOA sound tracking."""
284
- movement_manager = self._with_movement_manager("set_doa_enabled")
285
- if movement_manager is not None:
286
- movement_manager.set_doa_enabled(enabled)
287
-
288
- def _daemon_command(self, path: str, params: dict[str, str] | None = None) -> None:
289
- """Send a daemon command request and wait for the daemon state to settle."""
290
- url = f"{self._daemon_base_url}{path}"
291
- resp = self._http_session.post(url, params=params or {}, timeout=self._http_timeout)
292
- resp.raise_for_status()
293
-
294
- desired_state = None
295
- if path.endswith("/start"):
296
- desired_state = "running"
297
- elif path.endswith("/stop"):
298
- desired_state = "stopped"
299
-
300
- if desired_state is not None:
301
- self._wait_for_daemon_state(desired_state)
302
-
303
- def _wait_for_daemon_state(self, desired_state: str, timeout: float = 10.0) -> None:
304
- """Poll daemon status until the requested state is reached."""
305
- deadline = time.time() + timeout
306
- while time.time() < deadline:
307
- try:
308
- resp = self._http_session.get(
309
- f"{self._daemon_base_url}/api/daemon/status",
310
- timeout=self._http_timeout,
311
- )
312
- resp.raise_for_status()
313
- data = resp.json()
314
- current_state = str(data.get("state", "")).lower()
315
- if current_state == desired_state:
316
- self._last_status_query = 0.0
317
- return
318
- except Exception as e:
319
- logger.debug("Waiting for daemon state %s failed: %s", desired_state, e)
320
- time.sleep(0.2)
321
-
322
- logger.warning("Timed out waiting for daemon state '%s'", desired_state)
323
-
324
- # ========== Phase 3: Pose Control ==========
325
-
326
- def _get_head_pose(self) -> np.ndarray | None:
327
- """Get current head pose from SDK.
328
-
329
- Note: SDK's get_current_head_pose() is non-blocking - it returns
330
- cached data from Zenoh subscriptions, so no throttling needed.
331
- """
332
- if not self.is_available:
333
- return None
334
-
335
- try:
336
- return self.reachy.get_current_head_pose()
337
- except Exception as e:
338
- logger.error(f"Error getting head pose: {e}")
339
- return None
340
-
341
- def _get_joint_positions(self) -> tuple | None:
342
- """Get current joint positions from SDK.
343
-
344
- Note: SDK's get_current_joint_positions() is non-blocking - it returns
345
- cached data from Zenoh subscriptions, so no throttling needed.
346
- """
347
- if not self.is_available:
348
- return None
349
-
350
- try:
351
- return self.reachy.get_current_joint_positions()
352
- except Exception as e:
353
- logger.error(f"Error getting joint positions: {e}")
354
- return None
355
-
356
- def _extract_pose_from_matrix(self, pose_matrix: np.ndarray) -> tuple:
357
- """
358
- Extract position (x, y, z) and rotation (roll, pitch, yaw) from 4x4 pose matrix.
359
-
360
- Args:
361
- pose_matrix: 4x4 homogeneous transformation matrix
362
-
363
- Returns:
364
- tuple: (x, y, z, roll, pitch, yaw) where position is in meters and angles in radians
365
- """
366
- # Extract position from the last column
367
- x = pose_matrix[0, 3]
368
- y = pose_matrix[1, 3]
369
- z = pose_matrix[2, 3]
370
-
371
- # Extract rotation matrix and convert to euler angles
372
- rotation_matrix = pose_matrix[:3, :3]
373
- rotation = R.from_matrix(rotation_matrix)
374
- # Use 'xyz' convention for roll, pitch, yaw
375
- roll, pitch, yaw = rotation.as_euler("xyz")
376
-
377
- return x, y, z, roll, pitch, yaw
378
-
379
- def _get_head_pose_component(self, component: str) -> float:
380
- """Get a specific component from head pose.
381
-
382
- Args:
383
- component: One of 'x', 'y', 'z' (mm), 'roll', 'pitch', 'yaw' (degrees)
384
-
385
- Returns:
386
- The component value, or 0.0 on error
387
- """
388
- pose = self._get_head_pose()
389
- if pose is None:
390
- return 0.0
391
- try:
392
- x, y, z, roll, pitch, yaw = self._extract_pose_from_matrix(pose)
393
- components = {
394
- "x": x * 1000, # m to mm
395
- "y": y * 1000,
396
- "z": z * 1000,
397
- "roll": math.degrees(roll),
398
- "pitch": math.degrees(pitch),
399
- "yaw": math.degrees(yaw),
400
- }
401
- return components.get(component, 0.0)
402
- except Exception as e:
403
- logger.error(f"Error getting head {component}: {e}")
404
- return 0.0
405
-
406
- def _disabled_pose_setter(self, name: str) -> None:
407
- """Log warning when MovementManager is not available."""
408
- logger.warning(f"set_{name} failed - MovementManager not set")
409
-
410
- def _set_pose_via_manager(self, **kwargs) -> bool:
411
- """Set pose via MovementManager if available.
412
-
413
- Returns True if successful, False if MovementManager not available.
414
- """
415
- if self._movement_manager is None:
416
- return False
417
- self._movement_manager.set_target_pose(**kwargs)
418
- return True
419
-
420
- # Head position getters and setters
421
- def get_head_x(self) -> float:
422
- """Get head X position in mm."""
423
- return self._get_head_pose_component("x")
424
-
425
- def set_head_x(self, x_mm: float) -> None:
426
- """Set head X position in mm via MovementManager."""
427
- if not self._set_pose_via_manager(x=x_mm / 1000.0): # mm to m
428
- self._disabled_pose_setter("head_x")
429
-
430
- def get_head_y(self) -> float:
431
- """Get head Y position in mm."""
432
- return self._get_head_pose_component("y")
433
-
434
- def set_head_y(self, y_mm: float) -> None:
435
- """Set head Y position in mm via MovementManager."""
436
- if not self._set_pose_via_manager(y=y_mm / 1000.0): # mm to m
437
- self._disabled_pose_setter("head_y")
438
-
439
- def get_head_z(self) -> float:
440
- """Get head Z position in mm."""
441
- return self._get_head_pose_component("z")
442
-
443
- def set_head_z(self, z_mm: float) -> None:
444
- """Set head Z position in mm via MovementManager."""
445
- if not self._set_pose_via_manager(z=z_mm / 1000.0): # mm to m
446
- self._disabled_pose_setter("head_z")
447
-
448
- # Head orientation getters and setters
449
- def get_head_roll(self) -> float:
450
- """Get head roll angle in degrees."""
451
- return self._get_head_pose_component("roll")
452
-
453
- def set_head_roll(self, roll_deg: float) -> None:
454
- """Set head roll angle in degrees via MovementManager."""
455
- if not self._set_pose_via_manager(roll=math.radians(roll_deg)):
456
- self._disabled_pose_setter("head_roll")
457
-
458
- def get_head_pitch(self) -> float:
459
- """Get head pitch angle in degrees."""
460
- return self._get_head_pose_component("pitch")
461
-
462
- def set_head_pitch(self, pitch_deg: float) -> None:
463
- """Set head pitch angle in degrees via MovementManager."""
464
- if not self._set_pose_via_manager(pitch=math.radians(pitch_deg)):
465
- self._disabled_pose_setter("head_pitch")
466
-
467
- def get_head_yaw(self) -> float:
468
- """Get head yaw angle in degrees."""
469
- return self._get_head_pose_component("yaw")
470
-
471
- def set_head_yaw(self, yaw_deg: float) -> None:
472
- """Set head yaw angle in degrees via MovementManager."""
473
- if not self._set_pose_via_manager(yaw=math.radians(yaw_deg)):
474
- self._disabled_pose_setter("head_yaw")
475
-
476
- def get_body_yaw(self) -> float:
477
- """Get body yaw angle in degrees."""
478
- joints = self._get_joint_positions()
479
- if joints is None:
480
- return 0.0
481
- try:
482
- head_joints, _ = joints
483
- return math.degrees(head_joints[0])
484
- except Exception as e:
485
- logger.error(f"Error getting body yaw: {e}")
486
- return 0.0
487
-
488
- def set_body_yaw(self, yaw_deg: float) -> None:
489
- """Set body yaw angle in degrees.
490
-
491
- Note: This directly calls SDK's set_target_body_yaw since automatic body yaw
492
- is enabled. Manual control will temporarily override automatic mode.
493
- """
494
- if self.reachy is None:
495
- self._disabled_pose_setter("body_yaw")
496
- return
497
- try:
498
- self.reachy.set_target_body_yaw(math.radians(yaw_deg))
499
- except Exception as e:
500
- logger.error(f"Error setting body yaw: {e}")
501
-
502
- def get_antenna_left(self) -> float:
503
- """Get left antenna angle in degrees."""
504
- joints = self._get_joint_positions()
505
- if joints is None:
506
- return 0.0
507
- try:
508
- _, antennas = joints
509
- return math.degrees(antennas[1]) # left is index 1
510
- except Exception as e:
511
- logger.error(f"Error getting left antenna: {e}")
512
- return 0.0
513
-
514
- def set_antenna_left(self, angle_deg: float) -> None:
515
- """Set left antenna angle in degrees via MovementManager."""
516
- if not self._set_pose_via_manager(antenna_left=math.radians(angle_deg)):
517
- self._disabled_pose_setter("antenna_left")
518
-
519
- def get_antenna_right(self) -> float:
520
- """Get right antenna angle in degrees."""
521
- joints = self._get_joint_positions()
522
- if joints is None:
523
- return 0.0
524
- try:
525
- _, antennas = joints
526
- return math.degrees(antennas[0]) # right is index 0
527
- except Exception as e:
528
- logger.error(f"Error getting right antenna: {e}")
529
- return 0.0
530
-
531
- def set_antenna_right(self, angle_deg: float) -> None:
532
- """Set right antenna angle in degrees via MovementManager."""
533
- if not self._set_pose_via_manager(antenna_right=math.radians(angle_deg)):
534
- self._disabled_pose_setter("antenna_right")
535
-
536
- # ========== Phase 4: Look At Control ==========
537
-
538
- def get_look_at_x(self) -> float:
539
- """Get look at target X coordinate in world frame (meters)."""
540
- return self._look_at_x
541
-
542
- def set_look_at_x(self, x: float) -> None:
543
- """Set look at target X coordinate."""
544
- self._look_at_x = x
545
- self._update_look_at()
546
-
547
- def get_look_at_y(self) -> float:
548
- """Get look at target Y coordinate in world frame (meters)."""
549
- return self._look_at_y
550
-
551
- def set_look_at_y(self, y: float) -> None:
552
- """Set look at target Y coordinate."""
553
- self._look_at_y = y
554
- self._update_look_at()
555
-
556
- def get_look_at_z(self) -> float:
557
- """Get look at target Z coordinate in world frame (meters)."""
558
- return self._look_at_z
559
-
560
- def set_look_at_z(self, z: float) -> None:
561
- """Set look at target Z coordinate."""
562
- self._look_at_z = z
563
- self._update_look_at()
564
-
565
- def _update_look_at(self) -> None:
566
- """Update robot to look at the target coordinates.
567
-
568
- NOTE: Disabled to prevent conflict with MovementManager's control loop.
569
- """
570
- logger.warning("_update_look_at is disabled - MovementManager controls head pose")
571
- # if not self.is_available:
572
- # return
573
- # try:
574
- # x = getattr(self, '_look_at_x', 0.0)
575
- # y = getattr(self, '_look_at_y', 0.0)
576
- # z = getattr(self, '_look_at_z', 0.0)
577
- # self.reachy.look_at_world(x, y, z)
578
- # logger.info(f"Looking at world coordinates: ({x}, {y}, {z})")
579
- # except Exception as e:
580
- # logger.error(f"Error updating look at: {e}")
581
-
582
- # ========== Phase 6: Diagnostic Information ==========
583
-
584
- def get_control_loop_frequency(self) -> float:
585
- """Get control loop frequency in Hz with caching."""
586
- status = self._get_cached_status()
587
- if status is None:
588
- return 0.0
589
- try:
590
- control_loop_stats = self._nested_status_value(status, "backend_status", "control_loop_stats", None)
591
- if isinstance(control_loop_stats, dict):
592
- return float(control_loop_stats.get("mean_control_loop_frequency", 0.0))
593
- if control_loop_stats is not None:
594
- return float(getattr(control_loop_stats, "mean_control_loop_frequency", 0.0))
595
- return 0.0
596
- except Exception as e:
597
- logger.error(f"Error getting control loop frequency: {e}")
598
- return 0.0
599
-
600
- def get_sdk_version(self) -> str:
601
- """Get SDK version with caching."""
602
- status = self._get_cached_status()
603
- if status is None:
604
- return "N/A"
605
- return str(self._status_value(status, "version", "unknown") or "unknown")
606
-
607
- def get_robot_name(self) -> str:
608
- """Get robot name with caching."""
609
- status = self._get_cached_status()
610
- if status is None:
611
- return "N/A"
612
- return str(self._status_value(status, "robot_name", "unknown") or "unknown")
613
-
614
- def get_wireless_version(self) -> bool:
615
- """Check if this is a wireless version with caching."""
616
- status = self._get_cached_status()
617
- if status is None:
618
- return False
619
- return bool(self._status_value(status, "wireless_version", False))
620
-
621
- def get_simulation_mode(self) -> bool:
622
- """Check if simulation mode is enabled with caching."""
623
- status = self._get_cached_status()
624
- if status is None:
625
- return False
626
- return bool(self._status_value(status, "simulation_enabled", False))
627
-
628
- def get_wlan_ip(self) -> str:
629
- """Get WLAN IP address with caching."""
630
- status = self._get_cached_status()
631
- if status is None:
632
- return "N/A"
633
- return str(self._status_value(status, "wlan_ip", "N/A") or "N/A")
634
-
635
- # ========== Phase 7: IMU Sensors (Wireless only) ==========
636
-
637
- def _get_imu_value(self, sensor_type: str, index: int) -> float:
638
- """Get a specific IMU sensor value.
639
-
640
- Args:
641
- sensor_type: 'accelerometer', 'gyroscope', or 'temperature'
642
- index: Array index (0=x, 1=y, 2=z) or -1 for scalar values
643
-
644
- Returns:
645
- The sensor value, or 0.0 on error
646
- """
647
- if not self.is_available:
648
- return 0.0
649
- try:
650
- imu_data = self.reachy.imu
651
- if imu_data is None or sensor_type not in imu_data:
652
- return 0.0
653
- value = imu_data[sensor_type]
654
- return float(value[index]) if index >= 0 else float(value)
655
- except Exception as e:
656
- logger.debug(f"Error getting IMU {sensor_type}: {e}")
657
- return 0.0
658
-
659
- def get_imu_accel_x(self) -> float:
660
- """Get IMU X-axis acceleration in m/s²."""
661
- return self._get_imu_value("accelerometer", 0)
662
-
663
- def get_imu_accel_y(self) -> float:
664
- """Get IMU Y-axis acceleration in m/s²."""
665
- return self._get_imu_value("accelerometer", 1)
666
-
667
- def get_imu_accel_z(self) -> float:
668
- """Get IMU Z-axis acceleration in m/s²."""
669
- return self._get_imu_value("accelerometer", 2)
670
-
671
- def get_imu_gyro_x(self) -> float:
672
- """Get IMU X-axis angular velocity in rad/s."""
673
- return self._get_imu_value("gyroscope", 0)
674
-
675
- def get_imu_gyro_y(self) -> float:
676
- """Get IMU Y-axis angular velocity in rad/s."""
677
- return self._get_imu_value("gyroscope", 1)
678
-
679
- def get_imu_gyro_z(self) -> float:
680
- """Get IMU Z-axis angular velocity in rad/s."""
681
- return self._get_imu_value("gyroscope", 2)
682
-
683
- def get_imu_temperature(self) -> float:
684
- """Get IMU temperature in °C."""
685
- return self._get_imu_value("temperature", -1)
686
-
687
- # ========== Phase 11: LED Control (DISABLED) ==========
688
- # LED control is disabled because LEDs are hidden inside the robot.
689
- # See PROJECT_PLAN.md principle 8.
690
-
691
- # ========== DOA (Direction of Arrival) ==========
692
-
693
- def get_doa_angle(self) -> tuple[float, bool] | None:
694
- """Get Direction of Arrival angle from microphone array.
695
-
696
- The DOA angle indicates the direction of the sound source relative to the robot.
697
- Angle is in radians: 0 = left, π/2 = front/back, π = right.
698
-
699
- Returns:
700
- Tuple of (angle_radians, speech_detected), or None if unavailable.
701
- - angle_radians: Sound source direction in radians
702
- - speech_detected: Whether speech is currently detected
703
- """
704
- if not self.is_available:
705
- return None
706
- try:
707
- return self.reachy.media.get_DoA()
708
- except Exception as e:
709
- logger.debug(f"Error getting DOA: {e}")
710
- return None
711
-
712
- def get_doa_angle_degrees(self) -> float:
713
- """Get DOA angle in degrees for Home Assistant entity.
714
-
715
- Returns the raw DOA angle in degrees (0-180°).
716
- SDK convention: = left, 90° = front, 180° = right
717
- """
718
- doa = self.get_doa_angle()
719
- if doa is None:
720
- return 0.0
721
- angle_rad, _ = doa
722
- # Return raw angle in degrees (0-180°)
723
- angle_deg = math.degrees(angle_rad)
724
- return angle_deg
725
-
726
- def get_speech_detected(self) -> bool:
727
- """Get speech detection status from DOA.
728
-
729
- Returns True if speech is currently detected.
730
- """
731
- doa = self.get_doa_angle()
732
- if doa is None:
733
- return False
734
- _, speech_detected = doa
735
- return speech_detected
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Reachy Mini controller wrapper for ESPHome entities."""
2
+
3
+ import logging
4
+ import time
5
+ from typing import Any, Dict, Optional, TYPE_CHECKING
6
+ import math
7
+ import numpy as np
8
+ from scipy.spatial.transform import Rotation as R
9
+ import requests
10
+
11
+ if TYPE_CHECKING:
12
+ from reachy_mini import ReachyMini
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class _ReSpeakerContext:
18
+ """Context manager for thread-safe ReSpeaker access."""
19
+
20
+ def __init__(self, respeaker, lock):
21
+ self._respeaker = respeaker
22
+ self._lock = lock
23
+
24
+ def __enter__(self):
25
+ self._lock.acquire()
26
+ return self._respeaker
27
+
28
+ def __exit__(self, exc_type, exc_val, exc_tb):
29
+ self._lock.release()
30
+ return False
31
+
32
+
33
+ class ReachyController:
34
+ """
35
+ Wrapper class for Reachy Mini control operations.
36
+
37
+ Provides safe access to Reachy Mini SDK functions with error handling
38
+ and fallback for standalone mode (when robot is not available).
39
+ """
40
+
41
+ def __init__(self, reachy_mini: Optional["ReachyMini"] = None):
42
+ """
43
+ Initialize the controller.
44
+
45
+ Args:
46
+ reachy_mini: ReachyMini instance, or None for standalone mode
47
+ """
48
+ self.reachy = reachy_mini
49
+ self._speaker_volume = 100 # Default volume
50
+ self._movement_manager = None # Set later via set_movement_manager()
51
+
52
+ # Status caching - only for get_status() which may trigger I/O
53
+ # Note: get_current_head_pose() and get_current_joint_positions() are
54
+ # non-blocking in the SDK (they return cached Zenoh data), so no caching needed
55
+ self._state_cache: Dict[str, Any] = {}
56
+ self._cache_ttl = 2.0 # 2 second cache TTL for status queries (increased from 1s)
57
+ self._last_status_query = 0.0
58
+
59
+ # Thread lock for ReSpeaker USB access to prevent conflicts with GStreamer audio pipeline
60
+ self._respeaker_lock = __import__('threading').Lock()
61
+
62
+ def set_movement_manager(self, movement_manager) -> None:
63
+ """Set the MovementManager instance for pose control.
64
+
65
+ Args:
66
+ movement_manager: MovementManager instance
67
+ """
68
+ self._movement_manager = movement_manager
69
+ logger.info("MovementManager set for ReachyController")
70
+
71
+ @property
72
+ def is_available(self) -> bool:
73
+ """Check if robot is available."""
74
+ return self.reachy is not None
75
+
76
+ # ========== Phase 1: Basic Status & Volume ==========
77
+
78
+ def _get_cached_status(self) -> Optional[Dict]:
79
+ """Get cached daemon status to reduce query frequency.
80
+
81
+ Note: get_status() may trigger I/O, so we cache it.
82
+ Unlike get_current_head_pose() and get_current_joint_positions()
83
+ which are non-blocking in the SDK.
84
+ """
85
+ now = time.time()
86
+ if now - self._last_status_query < self._cache_ttl:
87
+ return self._state_cache.get('status')
88
+
89
+ if not self.is_available:
90
+ return None
91
+
92
+ try:
93
+ status = self.reachy.client.get_status(wait=False)
94
+ self._state_cache['status'] = status
95
+ self._last_status_query = now
96
+ return status
97
+ except Exception as e:
98
+ logger.error(f"Error getting status: {e}")
99
+ return self._state_cache.get('status') # Return stale cache on error
100
+
101
+ def get_daemon_state(self) -> str:
102
+ """Get daemon state with caching."""
103
+ status = self._get_cached_status()
104
+ if status is None:
105
+ return "not_available"
106
+ return status.get('state', 'unknown')
107
+
108
+ def get_backend_ready(self) -> bool:
109
+ """Check if backend is ready with caching."""
110
+ status = self._get_cached_status()
111
+ if status is None:
112
+ return False
113
+ return status.get('state') == 'running'
114
+
115
+ def get_error_message(self) -> str:
116
+ """Get current error message with caching."""
117
+ status = self._get_cached_status()
118
+ if status is None:
119
+ return "Robot not available"
120
+ return status.get('error') or ""
121
+
122
+ def get_speaker_volume(self) -> float:
123
+ """Get speaker volume (0-100) with caching."""
124
+ if not self.is_available:
125
+ return self._speaker_volume
126
+ try:
127
+ # Get volume from daemon API (use cached status for IP)
128
+ status = self._get_cached_status()
129
+ if status is None:
130
+ return self._speaker_volume
131
+ wlan_ip = status.get('wlan_ip', 'localhost')
132
+ response = requests.get(f"http://{wlan_ip}:8000/api/volume/current", timeout=2)
133
+ if response.status_code == 200:
134
+ data = response.json()
135
+ self._speaker_volume = float(data.get('volume', self._speaker_volume))
136
+ except Exception as e:
137
+ logger.debug(f"Could not get volume from API: {e}")
138
+ return self._speaker_volume
139
+
140
+ def set_speaker_volume(self, volume: float) -> None:
141
+ """
142
+ Set speaker volume (0-100) with cached status.
143
+
144
+ Args:
145
+ volume: Volume level 0-100
146
+ """
147
+ volume = max(0.0, min(100.0, volume))
148
+ self._speaker_volume = volume
149
+
150
+ if not self.is_available:
151
+ logger.warning("Cannot set volume: robot not available")
152
+ return
153
+
154
+ try:
155
+ # Set volume via daemon API (use cached status for IP)
156
+ status = self._get_cached_status()
157
+ if status is None:
158
+ logger.error("Cannot get daemon status for volume control")
159
+ return
160
+ wlan_ip = status.get('wlan_ip', 'localhost')
161
+ response = requests.post(
162
+ f"http://{wlan_ip}:8000/api/volume/set",
163
+ json={"volume": int(volume)},
164
+ timeout=5
165
+ )
166
+ if response.status_code == 200:
167
+ logger.info(f"Speaker volume set to {volume}%")
168
+ else:
169
+ logger.error(f"Failed to set volume: {response.status_code} {response.text}")
170
+ except Exception as e:
171
+ logger.error(f"Error setting speaker volume: {e}")
172
+
173
+ def get_microphone_volume(self) -> float:
174
+ """Get microphone volume (0-100) using daemon HTTP API."""
175
+ if not self.is_available:
176
+ return getattr(self, '_microphone_volume', 50.0)
177
+
178
+ try:
179
+ # Get WLAN IP from cached daemon status
180
+ status = self._get_cached_status()
181
+ if status is None:
182
+ return getattr(self, '_microphone_volume', 50.0)
183
+ wlan_ip = status.get('wlan_ip', 'localhost')
184
+
185
+ # Call the daemon API to get microphone volume
186
+ response = requests.get(
187
+ f"http://{wlan_ip}:8000/api/volume/microphone/current",
188
+ timeout=2
189
+ )
190
+ if response.status_code == 200:
191
+ data = response.json()
192
+ self._microphone_volume = float(data.get('volume', 50))
193
+ return self._microphone_volume
194
+ except Exception as e:
195
+ logger.debug(f"Could not get microphone volume from API: {e}")
196
+
197
+ return getattr(self, '_microphone_volume', 50.0)
198
+
199
+ def set_microphone_volume(self, volume: float) -> None:
200
+ """
201
+ Set microphone volume (0-100) using daemon HTTP API.
202
+
203
+ Args:
204
+ volume: Volume level 0-100
205
+ """
206
+ volume = max(0.0, min(100.0, volume))
207
+ self._microphone_volume = volume
208
+
209
+ if not self.is_available:
210
+ logger.warning("Cannot set microphone volume: robot not available")
211
+ return
212
+
213
+ try:
214
+ # Get WLAN IP from cached daemon status
215
+ status = self._get_cached_status()
216
+ if status is None:
217
+ logger.error("Cannot get daemon status for microphone volume control")
218
+ return
219
+ wlan_ip = status.get('wlan_ip', 'localhost')
220
+
221
+ # Call the daemon API to set microphone volume
222
+ response = requests.post(
223
+ f"http://{wlan_ip}:8000/api/volume/microphone/set",
224
+ json={"volume": int(volume)},
225
+ timeout=5
226
+ )
227
+ if response.status_code == 200:
228
+ logger.info(f"Microphone volume set to {volume}%")
229
+ else:
230
+ logger.error(f"Failed to set microphone volume: {response.status_code} {response.text}")
231
+ except Exception as e:
232
+ logger.error(f"Error setting microphone volume: {e}")
233
+
234
+ # ========== Phase 2: Motor Control ==========
235
+
236
+ def get_motors_enabled(self) -> bool:
237
+ """Check if motors are enabled with caching."""
238
+ status = self._get_cached_status()
239
+ if status is None:
240
+ return False
241
+ try:
242
+ backend_status = status.get('backend_status')
243
+ if backend_status and isinstance(backend_status, dict):
244
+ motor_mode = backend_status.get('motor_control_mode', 'disabled')
245
+ return motor_mode == 'enabled'
246
+ return status.get('state') == 'running'
247
+ except Exception as e:
248
+ logger.error(f"Error getting motor state: {e}")
249
+ return False
250
+
251
+ def set_motors_enabled(self, enabled: bool) -> None:
252
+ """
253
+ Enable or disable motors.
254
+
255
+ Args:
256
+ enabled: True to enable, False to disable
257
+ """
258
+ if not self.is_available:
259
+ logger.warning("Cannot control motors: robot not available")
260
+ return
261
+
262
+ try:
263
+ if enabled:
264
+ self.reachy.enable_motors()
265
+ logger.info("Motors enabled")
266
+ else:
267
+ self.reachy.disable_motors()
268
+ logger.info("Motors disabled")
269
+ except Exception as e:
270
+ logger.error(f"Error setting motor state: {e}")
271
+
272
+ def get_motor_mode(self) -> str:
273
+ """Get current motor control mode with caching."""
274
+ status = self._get_cached_status()
275
+ if status is None:
276
+ return "disabled"
277
+ try:
278
+ backend_status = status.get('backend_status')
279
+ if backend_status and isinstance(backend_status, dict):
280
+ motor_mode = backend_status.get('motor_control_mode', 'disabled')
281
+ return motor_mode
282
+ if status.get('state') == 'running':
283
+ return "enabled"
284
+ return "disabled"
285
+ except Exception as e:
286
+ logger.error(f"Error getting motor mode: {e}")
287
+ return "error"
288
+
289
+ def set_motor_mode(self, mode: str) -> None:
290
+ """
291
+ Set motor control mode.
292
+
293
+ Args:
294
+ mode: One of "enabled", "disabled", "gravity_compensation"
295
+ """
296
+ if not self.is_available:
297
+ logger.warning("Cannot set motor mode: robot not available")
298
+ return
299
+
300
+ try:
301
+ if mode == "enabled":
302
+ self.reachy.enable_motors()
303
+ elif mode == "disabled":
304
+ self.reachy.disable_motors()
305
+ elif mode == "gravity_compensation":
306
+ self.reachy.enable_gravity_compensation()
307
+ else:
308
+ logger.warning(f"Invalid motor mode: {mode}")
309
+ return
310
+ logger.info(f"Motor mode set to {mode}")
311
+ except Exception as e:
312
+ logger.error(f"Error setting motor mode: {e}")
313
+
314
+ def wake_up(self) -> None:
315
+ """Execute wake up animation."""
316
+ if not self.is_available:
317
+ logger.warning("Cannot wake up: robot not available")
318
+ return
319
+
320
+ try:
321
+ self.reachy.wake_up()
322
+ logger.info("Wake up animation executed")
323
+ except Exception as e:
324
+ logger.error(f"Error executing wake up: {e}")
325
+
326
+ def go_to_sleep(self) -> None:
327
+ """Execute sleep animation."""
328
+ if not self.is_available:
329
+ logger.warning("Cannot sleep: robot not available")
330
+ return
331
+
332
+ try:
333
+ self.reachy.goto_sleep()
334
+ logger.info("Sleep animation executed")
335
+ except Exception as e:
336
+ logger.error(f"Error executing sleep: {e}")
337
+
338
+ # ========== Phase 3: Pose Control ==========
339
+
340
+ def _get_head_pose(self) -> Optional[np.ndarray]:
341
+ """Get current head pose from SDK.
342
+
343
+ Note: SDK's get_current_head_pose() is non-blocking - it returns
344
+ cached data from Zenoh subscriptions, so no throttling needed.
345
+ """
346
+ if not self.is_available:
347
+ return None
348
+
349
+ try:
350
+ return self.reachy.get_current_head_pose()
351
+ except Exception as e:
352
+ logger.error(f"Error getting head pose: {e}")
353
+ return None
354
+
355
+ def _get_joint_positions(self) -> Optional[tuple]:
356
+ """Get current joint positions from SDK.
357
+
358
+ Note: SDK's get_current_joint_positions() is non-blocking - it returns
359
+ cached data from Zenoh subscriptions, so no throttling needed.
360
+ """
361
+ if not self.is_available:
362
+ return None
363
+
364
+ try:
365
+ return self.reachy.get_current_joint_positions()
366
+ except Exception as e:
367
+ logger.error(f"Error getting joint positions: {e}")
368
+ return None
369
+
370
+ def _extract_pose_from_matrix(self, pose_matrix: np.ndarray) -> tuple:
371
+ """
372
+ Extract position (x, y, z) and rotation (roll, pitch, yaw) from 4x4 pose matrix.
373
+
374
+ Args:
375
+ pose_matrix: 4x4 homogeneous transformation matrix
376
+
377
+ Returns:
378
+ tuple: (x, y, z, roll, pitch, yaw) where position is in meters and angles in radians
379
+ """
380
+ # Extract position from the last column
381
+ x = pose_matrix[0, 3]
382
+ y = pose_matrix[1, 3]
383
+ z = pose_matrix[2, 3]
384
+
385
+ # Extract rotation matrix and convert to euler angles
386
+ rotation_matrix = pose_matrix[:3, :3]
387
+ rotation = R.from_matrix(rotation_matrix)
388
+ # Use 'xyz' convention for roll, pitch, yaw
389
+ roll, pitch, yaw = rotation.as_euler('xyz')
390
+
391
+ return x, y, z, roll, pitch, yaw
392
+
393
+ def _get_head_pose_component(self, component: str) -> float:
394
+ """Get a specific component from head pose.
395
+
396
+ Args:
397
+ component: One of 'x', 'y', 'z' (mm), 'roll', 'pitch', 'yaw' (degrees)
398
+
399
+ Returns:
400
+ The component value, or 0.0 on error
401
+ """
402
+ pose = self._get_head_pose()
403
+ if pose is None:
404
+ return 0.0
405
+ try:
406
+ x, y, z, roll, pitch, yaw = self._extract_pose_from_matrix(pose)
407
+ components = {
408
+ 'x': x * 1000, # m to mm
409
+ 'y': y * 1000,
410
+ 'z': z * 1000,
411
+ 'roll': math.degrees(roll),
412
+ 'pitch': math.degrees(pitch),
413
+ 'yaw': math.degrees(yaw),
414
+ }
415
+ return components.get(component, 0.0)
416
+ except Exception as e:
417
+ logger.error(f"Error getting head {component}: {e}")
418
+ return 0.0
419
+
420
+ def _disabled_pose_setter(self, name: str) -> None:
421
+ """Log warning when MovementManager is not available."""
422
+ logger.warning(f"set_{name} failed - MovementManager not set")
423
+
424
+ def _set_pose_via_manager(self, **kwargs) -> bool:
425
+ """Set pose via MovementManager if available.
426
+
427
+ Returns True if successful, False if MovementManager not available.
428
+ """
429
+ if self._movement_manager is None:
430
+ return False
431
+ self._movement_manager.set_target_pose(**kwargs)
432
+ return True
433
+
434
+ # Head position getters and setters
435
+ def get_head_x(self) -> float:
436
+ """Get head X position in mm."""
437
+ return self._get_head_pose_component('x')
438
+
439
+ def set_head_x(self, x_mm: float) -> None:
440
+ """Set head X position in mm via MovementManager."""
441
+ if not self._set_pose_via_manager(x=x_mm / 1000.0): # mm to m
442
+ self._disabled_pose_setter('head_x')
443
+
444
+ def get_head_y(self) -> float:
445
+ """Get head Y position in mm."""
446
+ return self._get_head_pose_component('y')
447
+
448
+ def set_head_y(self, y_mm: float) -> None:
449
+ """Set head Y position in mm via MovementManager."""
450
+ if not self._set_pose_via_manager(y=y_mm / 1000.0): # mm to m
451
+ self._disabled_pose_setter('head_y')
452
+
453
+ def get_head_z(self) -> float:
454
+ """Get head Z position in mm."""
455
+ return self._get_head_pose_component('z')
456
+
457
+ def set_head_z(self, z_mm: float) -> None:
458
+ """Set head Z position in mm via MovementManager."""
459
+ if not self._set_pose_via_manager(z=z_mm / 1000.0): # mm to m
460
+ self._disabled_pose_setter('head_z')
461
+
462
+ # Head orientation getters and setters
463
+ def get_head_roll(self) -> float:
464
+ """Get head roll angle in degrees."""
465
+ return self._get_head_pose_component('roll')
466
+
467
+ def set_head_roll(self, roll_deg: float) -> None:
468
+ """Set head roll angle in degrees via MovementManager."""
469
+ if not self._set_pose_via_manager(roll=math.radians(roll_deg)):
470
+ self._disabled_pose_setter('head_roll')
471
+
472
+ def get_head_pitch(self) -> float:
473
+ """Get head pitch angle in degrees."""
474
+ return self._get_head_pose_component('pitch')
475
+
476
+ def set_head_pitch(self, pitch_deg: float) -> None:
477
+ """Set head pitch angle in degrees via MovementManager."""
478
+ if not self._set_pose_via_manager(pitch=math.radians(pitch_deg)):
479
+ self._disabled_pose_setter('head_pitch')
480
+
481
+ def get_head_yaw(self) -> float:
482
+ """Get head yaw angle in degrees."""
483
+ return self._get_head_pose_component('yaw')
484
+
485
+ def set_head_yaw(self, yaw_deg: float) -> None:
486
+ """Set head yaw angle in degrees via MovementManager."""
487
+ if not self._set_pose_via_manager(yaw=math.radians(yaw_deg)):
488
+ self._disabled_pose_setter('head_yaw')
489
+
490
+ def get_body_yaw(self) -> float:
491
+ """Get body yaw angle in degrees."""
492
+ joints = self._get_joint_positions()
493
+ if joints is None:
494
+ return 0.0
495
+ try:
496
+ head_joints, _ = joints
497
+ return math.degrees(head_joints[0])
498
+ except Exception as e:
499
+ logger.error(f"Error getting body yaw: {e}")
500
+ return 0.0
501
+
502
+ def set_body_yaw(self, yaw_deg: float) -> None:
503
+ """Set body yaw angle in degrees via MovementManager."""
504
+ if not self._set_pose_via_manager(body_yaw=math.radians(yaw_deg)):
505
+ self._disabled_pose_setter('body_yaw')
506
+
507
+ def get_antenna_left(self) -> float:
508
+ """Get left antenna angle in degrees."""
509
+ joints = self._get_joint_positions()
510
+ if joints is None:
511
+ return 0.0
512
+ try:
513
+ _, antennas = joints
514
+ return math.degrees(antennas[1]) # left is index 1
515
+ except Exception as e:
516
+ logger.error(f"Error getting left antenna: {e}")
517
+ return 0.0
518
+
519
+ def set_antenna_left(self, angle_deg: float) -> None:
520
+ """Set left antenna angle in degrees via MovementManager."""
521
+ if not self._set_pose_via_manager(antenna_left=math.radians(angle_deg)):
522
+ self._disabled_pose_setter('antenna_left')
523
+
524
+ def get_antenna_right(self) -> float:
525
+ """Get right antenna angle in degrees."""
526
+ joints = self._get_joint_positions()
527
+ if joints is None:
528
+ return 0.0
529
+ try:
530
+ _, antennas = joints
531
+ return math.degrees(antennas[0]) # right is index 0
532
+ except Exception as e:
533
+ logger.error(f"Error getting right antenna: {e}")
534
+ return 0.0
535
+
536
+ def set_antenna_right(self, angle_deg: float) -> None:
537
+ """Set right antenna angle in degrees via MovementManager."""
538
+ if not self._set_pose_via_manager(antenna_right=math.radians(angle_deg)):
539
+ self._disabled_pose_setter('antenna_right')
540
+
541
+ # ========== Phase 4: Look At Control ==========
542
+
543
+ def get_look_at_x(self) -> float:
544
+ """Get look at target X coordinate in world frame (meters)."""
545
+ # This is a target position, not a current state
546
+ # We'll store it internally
547
+ return getattr(self, '_look_at_x', 0.0)
548
+
549
+ def set_look_at_x(self, x: float) -> None:
550
+ """Set look at target X coordinate."""
551
+ self._look_at_x = x
552
+ self._update_look_at()
553
+
554
+ def get_look_at_y(self) -> float:
555
+ """Get look at target Y coordinate in world frame (meters)."""
556
+ return getattr(self, '_look_at_y', 0.0)
557
+
558
+ def set_look_at_y(self, y: float) -> None:
559
+ """Set look at target Y coordinate."""
560
+ self._look_at_y = y
561
+ self._update_look_at()
562
+
563
+ def get_look_at_z(self) -> float:
564
+ """Get look at target Z coordinate in world frame (meters)."""
565
+ return getattr(self, '_look_at_z', 0.0)
566
+
567
+ def set_look_at_z(self, z: float) -> None:
568
+ """Set look at target Z coordinate."""
569
+ self._look_at_z = z
570
+ self._update_look_at()
571
+
572
+ def _update_look_at(self) -> None:
573
+ """Update robot to look at the target coordinates.
574
+
575
+ NOTE: Disabled to prevent conflict with MovementManager's control loop.
576
+ """
577
+ logger.warning("_update_look_at is disabled - MovementManager controls head pose")
578
+ # if not self.is_available:
579
+ # return
580
+ # try:
581
+ # x = getattr(self, '_look_at_x', 0.0)
582
+ # y = getattr(self, '_look_at_y', 0.0)
583
+ # z = getattr(self, '_look_at_z', 0.0)
584
+ # self.reachy.look_at_world(x, y, z)
585
+ # logger.info(f"Looking at world coordinates: ({x}, {y}, {z})")
586
+ # except Exception as e:
587
+ # logger.error(f"Error updating look at: {e}")
588
+
589
+ # ========== Phase 6: Diagnostic Information ==========
590
+
591
+ def get_control_loop_frequency(self) -> float:
592
+ """Get control loop frequency in Hz with caching."""
593
+ status = self._get_cached_status()
594
+ if status is None:
595
+ return 0.0
596
+ try:
597
+ backend_status = status.get('backend_status')
598
+ if backend_status and isinstance(backend_status, dict):
599
+ control_loop_stats = backend_status.get('control_loop_stats', {})
600
+ return control_loop_stats.get('mean_control_loop_frequency', 0.0)
601
+ return 0.0
602
+ except Exception as e:
603
+ logger.error(f"Error getting control loop frequency: {e}")
604
+ return 0.0
605
+
606
+ def get_sdk_version(self) -> str:
607
+ """Get SDK version with caching."""
608
+ status = self._get_cached_status()
609
+ if status is None:
610
+ return "N/A"
611
+ return status.get('version') or "unknown"
612
+
613
+ def get_robot_name(self) -> str:
614
+ """Get robot name with caching."""
615
+ status = self._get_cached_status()
616
+ if status is None:
617
+ return "N/A"
618
+ return status.get('robot_name') or "unknown"
619
+
620
+ def get_wireless_version(self) -> bool:
621
+ """Check if this is a wireless version with caching."""
622
+ status = self._get_cached_status()
623
+ if status is None:
624
+ return False
625
+ return status.get('wireless_version', False)
626
+
627
+ def get_simulation_mode(self) -> bool:
628
+ """Check if simulation mode is enabled with caching."""
629
+ status = self._get_cached_status()
630
+ if status is None:
631
+ return False
632
+ return status.get('simulation_enabled', False)
633
+
634
+ def get_wlan_ip(self) -> str:
635
+ """Get WLAN IP address with caching."""
636
+ status = self._get_cached_status()
637
+ if status is None:
638
+ return "N/A"
639
+ return status.get('wlan_ip') or "N/A"
640
+
641
+ # ========== Phase 7: IMU Sensors (Wireless only) ==========
642
+
643
+ def _get_imu_value(self, sensor_type: str, index: int) -> float:
644
+ """Get a specific IMU sensor value.
645
+
646
+ Args:
647
+ sensor_type: 'accelerometer', 'gyroscope', or 'temperature'
648
+ index: Array index (0=x, 1=y, 2=z) or -1 for scalar values
649
+
650
+ Returns:
651
+ The sensor value, or 0.0 on error
652
+ """
653
+ if not self.is_available:
654
+ return 0.0
655
+ try:
656
+ imu_data = self.reachy.imu
657
+ if imu_data is None or sensor_type not in imu_data:
658
+ return 0.0
659
+ value = imu_data[sensor_type]
660
+ return float(value[index]) if index >= 0 else float(value)
661
+ except Exception as e:
662
+ logger.debug(f"Error getting IMU {sensor_type}: {e}")
663
+ return 0.0
664
+
665
+ def get_imu_accel_x(self) -> float:
666
+ """Get IMU X-axis acceleration in m/s²."""
667
+ return self._get_imu_value('accelerometer', 0)
668
+
669
+ def get_imu_accel_y(self) -> float:
670
+ """Get IMU Y-axis acceleration in m/s²."""
671
+ return self._get_imu_value('accelerometer', 1)
672
+
673
+ def get_imu_accel_z(self) -> float:
674
+ """Get IMU Z-axis acceleration in m/s²."""
675
+ return self._get_imu_value('accelerometer', 2)
676
+
677
+ def get_imu_gyro_x(self) -> float:
678
+ """Get IMU X-axis angular velocity in rad/s."""
679
+ return self._get_imu_value('gyroscope', 0)
680
+
681
+ def get_imu_gyro_y(self) -> float:
682
+ """Get IMU Y-axis angular velocity in rad/s."""
683
+ return self._get_imu_value('gyroscope', 1)
684
+
685
+ def get_imu_gyro_z(self) -> float:
686
+ """Get IMU Z-axis angular velocity in rad/s."""
687
+ return self._get_imu_value('gyroscope', 2)
688
+
689
+ def get_imu_temperature(self) -> float:
690
+ """Get IMU temperature in °C."""
691
+ return self._get_imu_value('temperature', -1)
692
+
693
+ # ========== Phase 11: LED Control (DISABLED) ==========
694
+ # LED control is disabled because LEDs are hidden inside the robot.
695
+ # See PROJECT_PLAN.md principle 8.
696
+
697
+ def _get_respeaker(self):
698
+ """Get ReSpeaker device from media manager with thread-safe access.
699
+
700
+ Returns a context manager that holds the lock during ReSpeaker operations.
701
+ Usage:
702
+ with self._get_respeaker() as respeaker:
703
+ if respeaker:
704
+ respeaker.read("...")
705
+ """
706
+ if not self.is_available:
707
+ return _ReSpeakerContext(None, self._respeaker_lock)
708
+ try:
709
+ if not self.reachy.media or not self.reachy.media.audio:
710
+ return _ReSpeakerContext(None, self._respeaker_lock)
711
+ respeaker = self.reachy.media.audio._respeaker
712
+ return _ReSpeakerContext(respeaker, self._respeaker_lock)
713
+ except Exception:
714
+ return _ReSpeakerContext(None, self._respeaker_lock)
715
+
716
+ # ========== Phase 12: Audio Processing (via local SDK with thread-safe access) ==========
717
+
718
+ def get_agc_enabled(self) -> bool:
719
+ """Get AGC (Automatic Gain Control) enabled status."""
720
+ with self._get_respeaker() as respeaker:
721
+ if respeaker is None:
722
+ return getattr(self, '_agc_enabled', True) # Default to enabled
723
+ try:
724
+ result = respeaker.read("PP_AGCONOFF")
725
+ if result is not None:
726
+ self._agc_enabled = bool(result[1])
727
+ return self._agc_enabled
728
+ except Exception as e:
729
+ logger.debug(f"Error getting AGC status: {e}")
730
+ return getattr(self, '_agc_enabled', True)
731
+
732
+ def set_agc_enabled(self, enabled: bool) -> None:
733
+ """Set AGC (Automatic Gain Control) enabled status."""
734
+ self._agc_enabled = enabled
735
+ with self._get_respeaker() as respeaker:
736
+ if respeaker is None:
737
+ return
738
+ try:
739
+ respeaker.write("PP_AGCONOFF", [1 if enabled else 0])
740
+ logger.info(f"AGC {'enabled' if enabled else 'disabled'}")
741
+ except Exception as e:
742
+ logger.error(f"Error setting AGC status: {e}")
743
+
744
+ def get_agc_max_gain(self) -> float:
745
+ """Get AGC maximum gain in dB (0-40 dB range)."""
746
+ with self._get_respeaker() as respeaker:
747
+ if respeaker is None:
748
+ return getattr(self, '_agc_max_gain', 30.0) # Default to optimized value
749
+ try:
750
+ result = respeaker.read("PP_AGCMAXGAIN")
751
+ if result is not None:
752
+ self._agc_max_gain = float(result[0])
753
+ return self._agc_max_gain
754
+ except Exception as e:
755
+ logger.debug(f"Error getting AGC max gain: {e}")
756
+ return getattr(self, '_agc_max_gain', 30.0)
757
+
758
+ def set_agc_max_gain(self, gain: float) -> None:
759
+ """Set AGC maximum gain in dB (0-40 dB range)."""
760
+ gain = max(0.0, min(40.0, gain)) # XVF3800 supports up to 40dB
761
+ self._agc_max_gain = gain
762
+ with self._get_respeaker() as respeaker:
763
+ if respeaker is None:
764
+ return
765
+ try:
766
+ respeaker.write("PP_AGCMAXGAIN", [gain])
767
+ logger.info(f"AGC max gain set to {gain} dB")
768
+ except Exception as e:
769
+ logger.error(f"Error setting AGC max gain: {e}")
770
+
771
+ def get_noise_suppression(self) -> float:
772
+ """Get noise suppression level (0-100%).
773
+
774
+ PP_MIN_NS represents "minimum signal preservation ratio":
775
+ - PP_MIN_NS = 0.85 means "keep at least 85% of signal" = 15% suppression
776
+ - PP_MIN_NS = 0.15 means "keep at least 15% of signal" = 85% suppression
777
+
778
+ We display "noise suppression strength" to user, so:
779
+ - suppression_percent = (1.0 - PP_MIN_NS) * 100
780
+ """
781
+ with self._get_respeaker() as respeaker:
782
+ if respeaker is None:
783
+ return getattr(self, '_noise_suppression', 15.0)
784
+ try:
785
+ result = respeaker.read("PP_MIN_NS")
786
+ if result is not None:
787
+ raw_value = result[0]
788
+ # Convert: PP_MIN_NS=0.85 -> 15% suppression, PP_MIN_NS=0.15 -> 85% suppression
789
+ self._noise_suppression = max(0.0, min(100.0, (1.0 - raw_value) * 100.0))
790
+ logger.debug(f"Noise suppression: PP_MIN_NS={raw_value:.2f} -> {self._noise_suppression:.1f}%")
791
+ return self._noise_suppression
792
+ except Exception as e:
793
+ logger.debug(f"Error getting noise suppression: {e}")
794
+ return getattr(self, '_noise_suppression', 15.0)
795
+
796
+ def set_noise_suppression(self, level: float) -> None:
797
+ """Set noise suppression level (0-100%)."""
798
+ level = max(0.0, min(100.0, level))
799
+ self._noise_suppression = level
800
+ with self._get_respeaker() as respeaker:
801
+ if respeaker is None:
802
+ return
803
+ try:
804
+ # Convert percentage to PP_MIN_NS value (inverted)
805
+ value = 1.0 - (level / 100.0)
806
+ respeaker.write("PP_MIN_NS", [value])
807
+ logger.info(f"Noise suppression set to {level}%")
808
+ except Exception as e:
809
+ logger.error(f"Error setting noise suppression: {e}")
810
+
811
+ def get_echo_cancellation_converged(self) -> bool:
812
+ """Check if echo cancellation has converged."""
813
+ with self._get_respeaker() as respeaker:
814
+ if respeaker is None:
815
+ return False
816
+ try:
817
+ result = respeaker.read("AEC_AECCONVERGED")
818
+ if result is not None:
819
+ return bool(result[1])
820
+ except Exception as e:
821
+ logger.debug(f"Error getting AEC converged status: {e}")
822
+ return False
823
+
824
+ # ========== DOA (Direction of Arrival) ==========
825
+
826
+ def get_doa_angle(self) -> tuple[float, bool] | None:
827
+ """Get Direction of Arrival angle from microphone array.
828
+
829
+ The DOA angle indicates the direction of the sound source relative to the robot.
830
+ Angle is in radians: 0 = left, π/2 = front/back, π = right.
831
+
832
+ Returns:
833
+ Tuple of (angle_radians, speech_detected), or None if unavailable.
834
+ - angle_radians: Sound source direction in radians
835
+ - speech_detected: Whether speech is currently detected
836
+ """
837
+ if not self.is_available:
838
+ return None
839
+ try:
840
+ if self.reachy.media and self.reachy.media.audio:
841
+ return self.reachy.media.audio.get_DoA()
842
+ except Exception as e:
843
+ logger.debug(f"Error getting DOA: {e}")
844
+ return None
845
+
846
+ def get_doa_angle_degrees(self) -> float:
847
+ """Get DOA angle in degrees for Home Assistant entity.
848
+
849
+ Returns the raw DOA angle in degrees (0-180°).
850
+ SDK convention: 0° = left, 90° = front, 180° = right
851
+ """
852
+ doa = self.get_doa_angle()
853
+ if doa is None:
854
+ return 0.0
855
+ angle_rad, _ = doa
856
+ # Return raw angle in degrees (0-180°)
857
+ angle_deg = math.degrees(angle_rad)
858
+ return angle_deg
859
+
860
+ def get_speech_detected(self) -> bool:
861
+ """Get speech detection status from DOA.
862
+
863
+ Returns True if speech is currently detected.
864
+ """
865
+ doa = self.get_doa_angle()
866
+ if doa is None:
867
+ return False
868
+ _, speech_detected = doa
869
+ return speech_detected
reachy_mini_ha_voice/satellite.py ADDED
@@ -0,0 +1,784 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Voice satellite protocol for Reachy Mini."""
2
+
3
+ import hashlib
4
+ import logging
5
+ import math
6
+ import posixpath
7
+ import shutil
8
+ import time
9
+ from collections.abc import Iterable
10
+ from typing import Dict, Optional, Set, Union, TYPE_CHECKING
11
+ from urllib.parse import urlparse, urlunparse
12
+ from urllib.request import urlopen
13
+
14
+ if TYPE_CHECKING:
15
+ from .camera_server import MJPEGCameraServer
16
+
17
+ # pylint: disable=no-name-in-module
18
+ from aioesphomeapi.api_pb2 import ( # type: ignore[attr-defined]
19
+ ButtonCommandRequest,
20
+ CameraImageRequest,
21
+ DeviceInfoRequest,
22
+ DeviceInfoResponse,
23
+ ListEntitiesDoneResponse,
24
+ ListEntitiesRequest,
25
+ MediaPlayerCommandRequest,
26
+ NumberCommandRequest,
27
+ SelectCommandRequest,
28
+ SubscribeHomeAssistantStatesRequest,
29
+ SubscribeStatesRequest,
30
+ SwitchCommandRequest,
31
+ VoiceAssistantAnnounceFinished,
32
+ VoiceAssistantAnnounceRequest,
33
+ VoiceAssistantAudio,
34
+ VoiceAssistantConfigurationRequest,
35
+ VoiceAssistantConfigurationResponse,
36
+ VoiceAssistantEventResponse,
37
+ VoiceAssistantExternalWakeWord,
38
+ VoiceAssistantRequest,
39
+ VoiceAssistantSetConfiguration,
40
+ VoiceAssistantTimerEventResponse,
41
+ VoiceAssistantWakeWord,
42
+ )
43
+ from aioesphomeapi.model import (
44
+ VoiceAssistantEventType,
45
+ VoiceAssistantFeature,
46
+ VoiceAssistantTimerEventType,
47
+ )
48
+ from google.protobuf import message
49
+ from pymicro_wakeword import MicroWakeWord
50
+ from pyopen_wakeword import OpenWakeWord
51
+
52
+ from .api_server import APIServer
53
+ from .entity import MediaPlayerEntity
54
+ from .entity_registry import EntityRegistry, get_entity_key
55
+ from .models import AvailableWakeWord, ServerState, WakeWordType
56
+ from .util import call_all
57
+ from .reachy_controller import ReachyController
58
+
59
+ _LOGGER = logging.getLogger(__name__)
60
+
61
+
62
+ class VoiceSatelliteProtocol(APIServer):
63
+ """Voice satellite protocol handler for ESPHome."""
64
+
65
+ def __init__(self, state: ServerState, camera_server: Optional["MJPEGCameraServer"] = None) -> None:
66
+ super().__init__(state.name)
67
+ self.state = state
68
+ self.state.satellite = self
69
+ self.camera_server = camera_server
70
+
71
+ # Initialize streaming state early (before entity setup)
72
+ self._is_streaming_audio = False
73
+ self._tts_url: Optional[str] = None
74
+ self._tts_played = False
75
+ self._continue_conversation = False
76
+ self._timer_finished = False
77
+ self._external_wake_words: Dict[str, VoiceAssistantExternalWakeWord] = {}
78
+
79
+ # Conversation tracking for continuous conversation
80
+ self._conversation_id: Optional[str] = None
81
+ self._conversation_timeout = 300.0 # 5 minutes, same as ESPHome default
82
+ self._last_conversation_time = 0.0
83
+
84
+ # Initialize Reachy controller
85
+ self.reachy_controller = ReachyController(state.reachy_mini)
86
+
87
+ # Connect MovementManager to ReachyController for pose control from HA
88
+ if state.motion is not None and state.motion.movement_manager is not None:
89
+ self.reachy_controller.set_movement_manager(state.motion.movement_manager)
90
+
91
+ # Setup speech sway callback for audio-driven head motion
92
+ def sway_callback(sway: dict) -> None:
93
+ mm = state.motion.movement_manager
94
+ if mm is not None:
95
+ mm.set_speech_sway(
96
+ sway.get("x_m", 0.0),
97
+ sway.get("y_m", 0.0),
98
+ sway.get("z_m", 0.0),
99
+ sway.get("roll_rad", 0.0),
100
+ sway.get("pitch_rad", 0.0),
101
+ sway.get("yaw_rad", 0.0),
102
+ )
103
+
104
+ state.tts_player.set_sway_callback(sway_callback)
105
+ _LOGGER.info("Speech sway callback configured for TTS player")
106
+
107
+ # Initialize entity registry
108
+ self._entity_registry = EntityRegistry(
109
+ server=self,
110
+ reachy_controller=self.reachy_controller,
111
+ camera_server=camera_server,
112
+ play_emotion_callback=self._play_emotion,
113
+ )
114
+
115
+ # Connect gesture state callback
116
+ if camera_server:
117
+ camera_server.set_gesture_state_callback(self._entity_registry.update_gesture_state)
118
+
119
+ # Only setup entities once (check if already initialized)
120
+ # This prevents duplicate entity registration on reconnection
121
+ if not getattr(self.state, '_entities_initialized', False):
122
+ if self.state.media_player_entity is None:
123
+ self.state.media_player_entity = MediaPlayerEntity(
124
+ server=self,
125
+ key=get_entity_key("reachy_mini_media_player"),
126
+ name="Media Player",
127
+ object_id="reachy_mini_media_player",
128
+ music_player=state.music_player,
129
+ announce_player=state.tts_player,
130
+ )
131
+ self.state.entities.append(self.state.media_player_entity)
132
+
133
+ # Setup all entities using the registry
134
+ self._entity_registry.setup_all_entities(self.state.entities)
135
+
136
+ # Mark entities as initialized
137
+ self.state._entities_initialized = True
138
+ _LOGGER.info("Entities initialized: %d total", len(self.state.entities))
139
+ else:
140
+ _LOGGER.debug("Entities already initialized, skipping setup")
141
+ # Update server reference in existing entities
142
+ for entity in self.state.entities:
143
+ entity.server = self
144
+
145
+ def handle_voice_event(
146
+ self, event_type: VoiceAssistantEventType, data: Dict[str, str]
147
+ ) -> None:
148
+ _LOGGER.debug("Voice event: type=%s, data=%s", event_type.name, data)
149
+
150
+ if event_type == VoiceAssistantEventType.VOICE_ASSISTANT_RUN_START:
151
+ self._tts_url = data.get("url")
152
+ self._tts_played = False
153
+ self._continue_conversation = False
154
+ # Reachy Mini: Start listening animation
155
+ self._reachy_on_listening()
156
+
157
+ # Note: TTS URL requires HA authentication, cannot pre-download
158
+ # Speaking animation uses JSON-defined multi-frequency sway instead
159
+
160
+ elif event_type in (
161
+ VoiceAssistantEventType.VOICE_ASSISTANT_STT_VAD_END,
162
+ VoiceAssistantEventType.VOICE_ASSISTANT_STT_END,
163
+ ):
164
+ self._is_streaming_audio = False
165
+ # Reachy Mini: Stop listening, start thinking
166
+ self._reachy_on_thinking()
167
+
168
+ elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_INTENT_PROGRESS:
169
+ if data.get("tts_start_streaming") == "1":
170
+ # Start streaming early
171
+ self.play_tts()
172
+
173
+ elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_INTENT_END:
174
+ if data.get("continue_conversation") == "1":
175
+ self._continue_conversation = True
176
+
177
+ elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_TTS_START:
178
+ # Reachy Mini: Start speaking animation (JSON-defined multi-frequency sway)
179
+ _LOGGER.debug("TTS_START event received, triggering speaking animation")
180
+ self._reachy_on_speaking()
181
+
182
+ elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_TTS_END:
183
+ self._tts_url = data.get("url")
184
+ self.play_tts()
185
+
186
+ elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_RUN_END:
187
+ # Pipeline run ended
188
+ self._is_streaming_audio = False
189
+
190
+ # Following reference project pattern
191
+ if not self._tts_played:
192
+ self._tts_finished()
193
+
194
+ self._tts_played = False
195
+
196
+ def handle_timer_event(
197
+ self,
198
+ event_type: VoiceAssistantTimerEventType,
199
+ msg: VoiceAssistantTimerEventResponse,
200
+ ) -> None:
201
+ _LOGGER.debug("Timer event: type=%s", event_type.name)
202
+
203
+ if event_type == VoiceAssistantTimerEventType.VOICE_ASSISTANT_TIMER_FINISHED:
204
+ if not self._timer_finished:
205
+ self.state.active_wake_words.add(self.state.stop_word.id)
206
+ self._timer_finished = True
207
+ self.duck()
208
+ self._play_timer_finished()
209
+ # Reachy Mini: Timer finished animation
210
+ self._reachy_on_timer_finished()
211
+
212
+ def handle_message(self, msg: message.Message) -> Iterable[message.Message]:
213
+ if isinstance(msg, VoiceAssistantEventResponse):
214
+ # Pipeline event
215
+ data: Dict[str, str] = {}
216
+ for arg in msg.data:
217
+ data[arg.name] = arg.value
218
+ self.handle_voice_event(VoiceAssistantEventType(msg.event_type), data)
219
+
220
+ elif isinstance(msg, VoiceAssistantAnnounceRequest):
221
+ _LOGGER.debug("Announcing: %s", msg.text)
222
+ assert self.state.media_player_entity is not None
223
+
224
+ urls = []
225
+ if msg.preannounce_media_id:
226
+ urls.append(msg.preannounce_media_id)
227
+ urls.append(msg.media_id)
228
+
229
+ self.state.active_wake_words.add(self.state.stop_word.id)
230
+ self._continue_conversation = msg.start_conversation
231
+ self.duck()
232
+
233
+ yield from self.state.media_player_entity.play(
234
+ urls, announcement=True, done_callback=self._tts_finished
235
+ )
236
+
237
+ elif isinstance(msg, VoiceAssistantTimerEventResponse):
238
+ self.handle_timer_event(VoiceAssistantTimerEventType(msg.event_type), msg)
239
+
240
+ elif isinstance(msg, DeviceInfoRequest):
241
+ yield DeviceInfoResponse(
242
+ uses_password=False,
243
+ name=self.state.name,
244
+ mac_address=self.state.mac_address,
245
+ voice_assistant_feature_flags=(
246
+ VoiceAssistantFeature.VOICE_ASSISTANT
247
+ | VoiceAssistantFeature.API_AUDIO
248
+ | VoiceAssistantFeature.ANNOUNCE
249
+ | VoiceAssistantFeature.START_CONVERSATION
250
+ | VoiceAssistantFeature.TIMERS
251
+ ),
252
+ )
253
+
254
+ elif isinstance(
255
+ msg,
256
+ (
257
+ ListEntitiesRequest,
258
+ SubscribeHomeAssistantStatesRequest,
259
+ SubscribeStatesRequest,
260
+ MediaPlayerCommandRequest,
261
+ NumberCommandRequest,
262
+ SwitchCommandRequest,
263
+ SelectCommandRequest,
264
+ ButtonCommandRequest,
265
+ CameraImageRequest,
266
+ ),
267
+ ):
268
+ for entity in self.state.entities:
269
+ yield from entity.handle_message(msg)
270
+
271
+ if isinstance(msg, ListEntitiesRequest):
272
+ yield ListEntitiesDoneResponse()
273
+
274
+ elif isinstance(msg, VoiceAssistantConfigurationRequest):
275
+ available_wake_words = [
276
+ VoiceAssistantWakeWord(
277
+ id=ww.id,
278
+ wake_word=ww.wake_word,
279
+ trained_languages=ww.trained_languages,
280
+ )
281
+ for ww in self.state.available_wake_words.values()
282
+ ]
283
+
284
+ for eww in msg.external_wake_words:
285
+ if eww.model_type != "micro":
286
+ continue
287
+
288
+ available_wake_words.append(
289
+ VoiceAssistantWakeWord(
290
+ id=eww.id,
291
+ wake_word=eww.wake_word,
292
+ trained_languages=eww.trained_languages,
293
+ )
294
+ )
295
+ self._external_wake_words[eww.id] = eww
296
+
297
+ yield VoiceAssistantConfigurationResponse(
298
+ available_wake_words=available_wake_words,
299
+ active_wake_words=[
300
+ ww.id
301
+ for ww in self.state.wake_words.values()
302
+ if ww.id in self.state.active_wake_words
303
+ ],
304
+ max_active_wake_words=2,
305
+ )
306
+
307
+ _LOGGER.info("Connected to Home Assistant")
308
+
309
+ elif isinstance(msg, VoiceAssistantSetConfiguration):
310
+ # Change active wake words
311
+ active_wake_words: Set[str] = set()
312
+
313
+ for wake_word_id in msg.active_wake_words:
314
+ if wake_word_id in self.state.wake_words:
315
+ # Already loaded, just add to active set
316
+ active_wake_words.add(wake_word_id)
317
+ continue
318
+
319
+ model_info = self.state.available_wake_words.get(wake_word_id)
320
+ if not model_info:
321
+ # Check external wake words (may require download)
322
+ external_wake_word = self._external_wake_words.get(wake_word_id)
323
+ if not external_wake_word:
324
+ _LOGGER.warning("Wake word not found: %s", wake_word_id)
325
+ continue
326
+
327
+ model_info = self._download_external_wake_word(external_wake_word)
328
+ if not model_info:
329
+ continue
330
+
331
+ self.state.available_wake_words[wake_word_id] = model_info
332
+
333
+ _LOGGER.debug("Loading wake word: %s", model_info.wake_word_path)
334
+ loaded_model = model_info.load()
335
+ # Set id attribute on the model for later identification
336
+ setattr(loaded_model, 'id', wake_word_id)
337
+ self.state.wake_words[wake_word_id] = loaded_model
338
+ _LOGGER.info("Wake word loaded: %s", wake_word_id)
339
+ active_wake_words.add(wake_word_id)
340
+ # Don't break - load ALL requested wake words, not just the first one
341
+
342
+ self.state.active_wake_words = active_wake_words
343
+ _LOGGER.debug("Active wake words: %s", active_wake_words)
344
+
345
+ self.state.preferences.active_wake_words = list(active_wake_words)
346
+ self.state.save_preferences()
347
+ self.state.wake_words_changed = True
348
+
349
+ def handle_audio(self, audio_chunk: bytes) -> None:
350
+ if not self._is_streaming_audio:
351
+ return
352
+ self.send_messages([VoiceAssistantAudio(data=audio_chunk)])
353
+
354
+ def _get_or_create_conversation_id(self) -> str:
355
+ """Get existing conversation_id or create a new one.
356
+
357
+ Reuses conversation_id if within timeout period, otherwise creates new one.
358
+ """
359
+ now = time.time()
360
+ if (self._conversation_id is None or
361
+ now - self._last_conversation_time > self._conversation_timeout):
362
+ # Create new conversation_id
363
+ import uuid
364
+ self._conversation_id = str(uuid.uuid4())
365
+ _LOGGER.debug("Created new conversation_id: %s", self._conversation_id)
366
+
367
+ self._last_conversation_time = now
368
+ return self._conversation_id
369
+
370
+ def _clear_conversation(self) -> None:
371
+ """Clear conversation state when exiting conversation mode."""
372
+ self._conversation_id = None
373
+ self._continue_conversation = False
374
+
375
+ def wakeup(self, wake_word: Union[MicroWakeWord, OpenWakeWord]) -> None:
376
+ """Handle wake word detection - start voice pipeline."""
377
+ if self._timer_finished:
378
+ # Stop timer instead
379
+ self._timer_finished = False
380
+ self.state.tts_player.stop()
381
+ _LOGGER.debug("Stopping timer finished sound")
382
+ return
383
+
384
+ wake_word_phrase = wake_word.wake_word
385
+ _LOGGER.debug("Detected wake word: %s", wake_word_phrase)
386
+
387
+ # Turn toward sound source using DOA (Direction of Arrival)
388
+ self._turn_to_sound_source()
389
+
390
+ # Get or create conversation_id for context tracking
391
+ conv_id = self._get_or_create_conversation_id()
392
+
393
+ self.send_messages(
394
+ [VoiceAssistantRequest(
395
+ start=True,
396
+ wake_word_phrase=wake_word_phrase,
397
+ conversation_id=conv_id,
398
+ )]
399
+ )
400
+ self.duck()
401
+ self._is_streaming_audio = True
402
+ self.state.tts_player.play(self.state.wakeup_sound)
403
+
404
+ def stop(self) -> None:
405
+ """Stop current TTS playback (e.g., user said stop word)."""
406
+ self.state.active_wake_words.discard(self.state.stop_word.id)
407
+ self.state.tts_player.stop()
408
+
409
+ if self._timer_finished:
410
+ self._timer_finished = False
411
+ _LOGGER.debug("Stopping timer finished sound")
412
+ else:
413
+ _LOGGER.debug("TTS response stopped manually")
414
+ self._tts_finished()
415
+
416
+ def play_tts(self) -> None:
417
+ if (not self._tts_url) or self._tts_played:
418
+ return
419
+
420
+ self._tts_played = True
421
+ _LOGGER.debug("Playing TTS response: %s", self._tts_url)
422
+
423
+ self.state.active_wake_words.add(self.state.stop_word.id)
424
+ self.state.tts_player.play(self._tts_url, done_callback=self._tts_finished)
425
+
426
+ def duck(self) -> None:
427
+ _LOGGER.debug("Ducking music")
428
+ self.state.music_player.duck()
429
+ # Pause Sendspin to prevent audio conflicts during voice interaction
430
+ self.state.music_player.pause_sendspin()
431
+
432
+ def unduck(self) -> None:
433
+ _LOGGER.debug("Unducking music")
434
+ self.state.music_player.unduck()
435
+ # Resume Sendspin audio
436
+ self.state.music_player.resume_sendspin()
437
+
438
+ def _tts_finished(self) -> None:
439
+ """Called when TTS audio playback finishes.
440
+
441
+ Following reference project pattern: handle continue conversation here.
442
+ """
443
+ self.state.active_wake_words.discard(self.state.stop_word.id)
444
+ self.send_messages([VoiceAssistantAnnounceFinished()])
445
+
446
+ # Check if should continue conversation
447
+ # 1. Our switch is ON: Always continue (unconditional)
448
+ # 2. Our switch is OFF: Follow HA's continue_conversation request
449
+ continuous_mode = self.state.preferences.continuous_conversation
450
+ should_continue = continuous_mode or self._continue_conversation
451
+
452
+ if should_continue:
453
+ _LOGGER.debug("Continuing conversation (our_switch=%s, ha_request=%s)",
454
+ continuous_mode, self._continue_conversation)
455
+
456
+ # Play prompt sound to indicate ready for next input
457
+ self.state.tts_player.play(self.state.wakeup_sound)
458
+
459
+ # Use same conversation_id for context continuity
460
+ conv_id = self._get_or_create_conversation_id()
461
+ self.send_messages([VoiceAssistantRequest(
462
+ start=True,
463
+ conversation_id=conv_id,
464
+ )])
465
+ self._is_streaming_audio = True
466
+
467
+ # Stay in listening mode
468
+ self._reachy_on_listening()
469
+ else:
470
+ self._clear_conversation()
471
+ self.unduck()
472
+ _LOGGER.debug("Conversation finished")
473
+
474
+ # Reachy Mini: Return to idle
475
+ self._reachy_on_idle()
476
+
477
+ def _play_timer_finished(self) -> None:
478
+ if not self._timer_finished:
479
+ self.unduck()
480
+ return
481
+
482
+ self.state.tts_player.play(
483
+ self.state.timer_finished_sound,
484
+ done_callback=lambda: call_all(
485
+ lambda: time.sleep(1.0), self._play_timer_finished
486
+ ),
487
+ )
488
+
489
+ def connection_lost(self, exc):
490
+ super().connection_lost(exc)
491
+ _LOGGER.info("Disconnected from Home Assistant")
492
+ # Clear streaming state on disconnect
493
+ self._is_streaming_audio = False
494
+ self._tts_url = None
495
+ self._tts_played = False
496
+ self._continue_conversation = False
497
+
498
+ def _download_external_wake_word(
499
+ self, external_wake_word: VoiceAssistantExternalWakeWord
500
+ ) -> Optional[AvailableWakeWord]:
501
+ eww_dir = self.state.download_dir / "external_wake_words"
502
+ eww_dir.mkdir(parents=True, exist_ok=True)
503
+
504
+ config_path = eww_dir / f"{external_wake_word.id}.json"
505
+ should_download_config = not config_path.exists()
506
+
507
+ # Check if we need to download the model file
508
+ model_path = eww_dir / f"{external_wake_word.id}.tflite"
509
+ should_download_model = True
510
+
511
+ if model_path.exists():
512
+ model_size = model_path.stat().st_size
513
+ if model_size == external_wake_word.model_size:
514
+ with open(model_path, "rb") as model_file:
515
+ model_hash = hashlib.sha256(model_file.read()).hexdigest()
516
+
517
+ if model_hash == external_wake_word.model_hash:
518
+ should_download_model = False
519
+ _LOGGER.debug(
520
+ "Model size and hash match for %s. Skipping download.",
521
+ external_wake_word.id,
522
+ )
523
+
524
+ if should_download_config or should_download_model:
525
+ # Download config
526
+ _LOGGER.debug("Downloading %s to %s", external_wake_word.url, config_path)
527
+ with urlopen(external_wake_word.url) as request:
528
+ if request.status != 200:
529
+ _LOGGER.warning(
530
+ "Failed to download: %s, status=%s",
531
+ external_wake_word.url,
532
+ request.status,
533
+ )
534
+ return None
535
+
536
+ with open(config_path, "wb") as model_file:
537
+ shutil.copyfileobj(request, model_file)
538
+
539
+ if should_download_model:
540
+ # Download model file
541
+ parsed_url = urlparse(external_wake_word.url)
542
+ parsed_url = parsed_url._replace(
543
+ path=posixpath.join(posixpath.dirname(parsed_url.path), model_path.name)
544
+ )
545
+ model_url = urlunparse(parsed_url)
546
+
547
+ _LOGGER.debug("Downloading %s to %s", model_url, model_path)
548
+ with urlopen(model_url) as request:
549
+ if request.status != 200:
550
+ _LOGGER.warning(
551
+ "Failed to download: %s, status=%s", model_url, request.status
552
+ )
553
+ return None
554
+
555
+ with open(model_path, "wb") as model_file:
556
+ shutil.copyfileobj(request, model_file)
557
+
558
+ return AvailableWakeWord(
559
+ id=external_wake_word.id,
560
+ type=WakeWordType.MICRO_WAKE_WORD,
561
+ wake_word=external_wake_word.wake_word,
562
+ trained_languages=external_wake_word.trained_languages,
563
+ wake_word_path=config_path,
564
+ )
565
+
566
+ # -------------------------------------------------------------------------
567
+ # Reachy Mini Motion Control
568
+ # -------------------------------------------------------------------------
569
+
570
+ def _turn_to_sound_source(self) -> None:
571
+ """Turn robot head toward sound source using DOA at wakeup.
572
+
573
+ This is called once at wakeup to orient the robot toward the speaker.
574
+ Face tracking will take over after the initial turn.
575
+
576
+ DOA angle convention (from SDK):
577
+ - 0 radians = left (Y+ direction in head frame)
578
+ - π/2 radians = front (X+ direction in head frame)
579
+ - π radians = right (Y- direction in head frame)
580
+
581
+ The SDK uses: p_head = [sin(doa), cos(doa), 0]
582
+ So we need to convert this to yaw angle.
583
+
584
+ Note: We don't check speech_detected because by the time wake word
585
+ detection completes, the user may have stopped speaking.
586
+ """
587
+ if not self.state.motion_enabled or not self.state.reachy_mini:
588
+ _LOGGER.info("DOA turn-to-sound: motion disabled or no robot")
589
+ return
590
+
591
+ try:
592
+ # Get DOA from reachy_controller (only read once)
593
+ doa = self.reachy_controller.get_doa_angle()
594
+ if doa is None:
595
+ _LOGGER.info("DOA not available, skipping turn-to-sound")
596
+ return
597
+
598
+ angle_rad, speech_detected = doa
599
+ _LOGGER.debug("DOA raw: angle=%.3f rad (%.1f°), speech=%s",
600
+ angle_rad, math.degrees(angle_rad), speech_detected)
601
+
602
+ # Convert DOA to direction vector in head frame
603
+ # SDK convention: p_head = [sin(doa), cos(doa), 0]
604
+ # where X+ is front, Y+ is left
605
+ dir_x = math.sin(angle_rad) # Front component
606
+ dir_y = math.cos(angle_rad) # Left component
607
+
608
+ # Calculate yaw angle from direction vector
609
+ # DOA convention: 0 = left, π/2 = front, π = right
610
+ # Robot yaw: positive = turn left, negative = turn right
611
+ # yaw = doa - π/2 maps: left(0) → -90°, front(π/2) → 0°, right(π) → +90°
612
+ yaw_rad = angle_rad - math.pi / 2
613
+ yaw_deg = math.degrees(yaw_rad)
614
+
615
+ _LOGGER.debug("DOA direction: x=%.2f, y=%.2f, yaw=%.1f°",
616
+ dir_x, dir_y, yaw_deg)
617
+
618
+ # Only turn if angle is significant (> 10°) to avoid noise
619
+ DOA_THRESHOLD_DEG = 10.0
620
+ if abs(yaw_deg) < DOA_THRESHOLD_DEG:
621
+ _LOGGER.debug("DOA angle %.1f° below threshold (%.1f°), skipping turn",
622
+ yaw_deg, DOA_THRESHOLD_DEG)
623
+ return
624
+
625
+ # Apply 80% of DOA angle as conservative strategy
626
+ # This accounts for potential DOA inaccuracy
627
+ DOA_SCALE = 0.8
628
+ target_yaw_deg = yaw_deg * DOA_SCALE
629
+
630
+ _LOGGER.info("Turning toward sound source: DOA=%.1f°, target=%.1f°",
631
+ yaw_deg, target_yaw_deg)
632
+
633
+ # Use MovementManager to turn (non-blocking)
634
+ if self.state.motion and self.state.motion.movement_manager:
635
+ self.state.motion.movement_manager.turn_to_angle(
636
+ target_yaw_deg,
637
+ duration=0.5 # Quick turn
638
+ )
639
+ except Exception as e:
640
+ _LOGGER.error("Error in turn-to-sound: %s", e)
641
+
642
+ def _reachy_on_listening(self) -> None:
643
+ """Called when listening for speech (HA state: Listening)."""
644
+ # Enable high-frequency face tracking during listening
645
+ self._set_conversation_mode(True)
646
+
647
+ # Resume face tracking (may have been paused during speaking)
648
+ if self.camera_server is not None:
649
+ try:
650
+ self.camera_server.set_face_tracking_enabled(True)
651
+ except Exception as e:
652
+ _LOGGER.debug("Failed to resume face tracking: %s", e)
653
+
654
+ if not self.state.motion_enabled or not self.state.reachy_mini:
655
+ return
656
+ try:
657
+ _LOGGER.debug("Reachy Mini: Listening animation")
658
+ if self.state.motion:
659
+ self.state.motion.on_listening()
660
+ except Exception as e:
661
+ _LOGGER.error("Reachy Mini motion error: %s", e)
662
+
663
+ def _reachy_on_thinking(self) -> None:
664
+ """Called when processing speech (HA state: Processing)."""
665
+ # Resume face tracking (may have been paused during speaking)
666
+ if self.camera_server is not None:
667
+ try:
668
+ self.camera_server.set_face_tracking_enabled(True)
669
+ except Exception as e:
670
+ _LOGGER.debug("Failed to resume face tracking: %s", e)
671
+
672
+ if not self.state.motion_enabled or not self.state.reachy_mini:
673
+ return
674
+ try:
675
+ _LOGGER.debug("Reachy Mini: Thinking animation")
676
+ if self.state.motion:
677
+ self.state.motion.on_thinking()
678
+ except Exception as e:
679
+ _LOGGER.error("Reachy Mini motion error: %s", e)
680
+
681
+ def _reachy_on_speaking(self) -> None:
682
+ """Called when TTS is playing (HA state: Responding)."""
683
+ # Pause face tracking during speaking - robot will use speaking animation instead
684
+ if self.camera_server is not None:
685
+ try:
686
+ self.camera_server.set_face_tracking_enabled(False)
687
+ _LOGGER.debug("Face tracking paused during speaking")
688
+ except Exception as e:
689
+ _LOGGER.debug("Failed to pause face tracking: %s", e)
690
+
691
+ if not self.state.motion_enabled:
692
+ _LOGGER.warning("Motion disabled, skipping speaking animation")
693
+ return
694
+ if not self.state.reachy_mini:
695
+ _LOGGER.warning("No reachy_mini instance, skipping speaking animation")
696
+ return
697
+ if not self.state.motion:
698
+ _LOGGER.warning("No motion controller, skipping speaking animation")
699
+ return
700
+
701
+ try:
702
+ _LOGGER.debug("Reachy Mini: Starting speaking animation")
703
+ self.state.motion.on_speaking_start()
704
+ except Exception as e:
705
+ _LOGGER.error("Reachy Mini motion error: %s", e)
706
+
707
+ def _reachy_on_idle(self) -> None:
708
+ """Called when returning to idle state (HA state: Idle)."""
709
+ # Disable high-frequency face tracking, switch to adaptive mode
710
+ self._set_conversation_mode(False)
711
+
712
+ # Resume face tracking (may have been paused during speaking)
713
+ if self.camera_server is not None:
714
+ try:
715
+ self.camera_server.set_face_tracking_enabled(True)
716
+ except Exception as e:
717
+ _LOGGER.debug("Failed to resume face tracking: %s", e)
718
+
719
+ if not self.state.motion_enabled or not self.state.reachy_mini:
720
+ return
721
+ try:
722
+ _LOGGER.debug("Reachy Mini: Idle animation")
723
+ if self.state.motion:
724
+ self.state.motion.on_idle()
725
+ except Exception as e:
726
+ _LOGGER.error("Reachy Mini motion error: %s", e)
727
+
728
+ def _set_conversation_mode(self, in_conversation: bool) -> None:
729
+ """Set conversation mode for adaptive face tracking.
730
+
731
+ When in conversation, face tracking runs at high frequency.
732
+ When idle, face tracking uses adaptive rate to save CPU.
733
+ """
734
+ if self.camera_server is not None:
735
+ try:
736
+ self.camera_server.set_conversation_mode(in_conversation)
737
+ except Exception as e:
738
+ _LOGGER.debug("Failed to set conversation mode: %s", e)
739
+
740
+ def _reachy_on_timer_finished(self) -> None:
741
+ """Called when a timer finishes."""
742
+ if not self.state.motion_enabled or not self.state.reachy_mini:
743
+ return
744
+ try:
745
+ _LOGGER.debug("Reachy Mini: Timer finished animation")
746
+ if self.state.motion:
747
+ self.state.motion.on_timer_finished()
748
+ except Exception as e:
749
+ _LOGGER.error("Reachy Mini motion error: %s", e)
750
+
751
+ def _play_emotion(self, emotion_name: str) -> None:
752
+ """Play an emotion/expression from the emotions library.
753
+
754
+ Args:
755
+ emotion_name: Name of the emotion (e.g., "happy1", "sad1", etc.)
756
+ """
757
+ try:
758
+ import requests
759
+
760
+ # Get WLAN IP from daemon status
761
+ wlan_ip = "localhost"
762
+ if self.state.reachy_mini is not None:
763
+ try:
764
+ status = self.state.reachy_mini.client.get_status(wait=False)
765
+ wlan_ip = status.get('wlan_ip', 'localhost')
766
+ except Exception:
767
+ wlan_ip = "localhost"
768
+
769
+ # Call the emotion playback API
770
+ # Dataset: pollen-robotics/reachy-mini-emotions-library
771
+ base_url = f"http://{wlan_ip}:8000/api/move/play/recorded-move-dataset"
772
+ dataset = "pollen-robotics/reachy-mini-emotions-library"
773
+ url = f"{base_url}/{dataset}/{emotion_name}"
774
+
775
+ response = requests.post(url, timeout=5)
776
+ if response.status_code == 200:
777
+ result = response.json()
778
+ move_uuid = result.get('uuid')
779
+ _LOGGER.info(f"Playing emotion: {emotion_name} (uuid={move_uuid})")
780
+ else:
781
+ _LOGGER.warning(f"Failed to play emotion {emotion_name}: HTTP {response.status_code}")
782
+
783
+ except Exception as e:
784
+ _LOGGER.error(f"Error playing emotion {emotion_name}: {e}")
{reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/.gitkeep RENAMED
File without changes
{reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/LICENSE.md RENAMED
File without changes
{reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/README.md RENAMED
File without changes
{reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/timer_finished.flac RENAMED
File without changes
{reachy_mini_home_assistant → reachy_mini_ha_voice}/sounds/wake_word_triggered.flac RENAMED
File without changes
{reachy_mini_home_assistant/motion → reachy_mini_ha_voice}/speech_sway.py RENAMED
@@ -6,9 +6,8 @@ Analyzes audio loudness to drive natural head movements during TTS playback.
6
 
7
  import math
8
  from collections import deque
9
- from collections.abc import Callable
10
  from itertools import islice
11
- from typing import Any
12
 
13
  import numpy as np
14
  from numpy.typing import NDArray
@@ -65,7 +64,7 @@ def _loudness_gain(db: float, offset: float = SENS_DB_OFFSET) -> float:
65
  """Normalize dB into [0,1] with gamma; clipped to [0,1]."""
66
  t = (db + offset - SWAY_DB_LOW) / (SWAY_DB_HIGH - SWAY_DB_LOW)
67
  t = max(0.0, min(1.0, t))
68
- return t**LOUDNESS_GAMMA if LOUDNESS_GAMMA != 1.0 else t
69
 
70
 
71
  def _to_float32_mono(x: NDArray[Any]) -> NDArray[np.float32]:
@@ -94,7 +93,7 @@ def _resample_linear(x: NDArray[np.float32], sr_in: int, sr_out: int) -> NDArray
94
  """Lightweight linear resampler for short buffers."""
95
  if sr_in == sr_out or x.size == 0:
96
  return x
97
- n_out = round(x.size * sr_out / sr_in)
98
  if n_out <= 1:
99
  return np.zeros(0, dtype=np.float32)
100
  t_in = np.linspace(0.0, 1.0, num=x.size, dtype=np.float32, endpoint=True)
@@ -144,7 +143,7 @@ class SpeechSwayRT:
144
  self.sway_down = 0
145
  self.t = 0.0
146
 
147
- def feed(self, pcm: NDArray[Any], sr: int | None = None) -> list[dict[str, float]]:
148
  """Stream in PCM chunk. Returns list of sway dicts, one per hop.
149
 
150
  Args:
@@ -168,7 +167,7 @@ class SpeechSwayRT:
168
  else:
169
  self.carry = x
170
 
171
- out: list[dict[str, float]] = []
172
 
173
  while self.carry.size >= HOP:
174
  hop = self.carry[:HOP]
@@ -216,35 +215,27 @@ class SpeechSwayRT:
216
  self.t += HOP_MS / 1000.0
217
 
218
  # Oscillators
219
- pitch = (
220
- math.radians(SWAY_A_PITCH_DEG)
221
- * loud
222
- * env
223
- * math.sin(2 * math.pi * SWAY_F_PITCH * self.t + self.phase_pitch)
224
- )
225
- yaw = (
226
- math.radians(SWAY_A_YAW_DEG) * loud * env * math.sin(2 * math.pi * SWAY_F_YAW * self.t + self.phase_yaw)
227
- )
228
- roll = (
229
- math.radians(SWAY_A_ROLL_DEG)
230
- * loud
231
- * env
232
- * math.sin(2 * math.pi * SWAY_F_ROLL * self.t + self.phase_roll)
233
- )
234
- x_m = (SWAY_A_X_MM / 1000.0) * loud * env * math.sin(2 * math.pi * SWAY_F_X * self.t + self.phase_x)
235
- y_m = (SWAY_A_Y_MM / 1000.0) * loud * env * math.sin(2 * math.pi * SWAY_F_Y * self.t + self.phase_y)
236
- z_m = (SWAY_A_Z_MM / 1000.0) * loud * env * math.sin(2 * math.pi * SWAY_F_Z * self.t + self.phase_z)
237
-
238
- out.append(
239
- {
240
- "pitch_rad": pitch,
241
- "yaw_rad": yaw,
242
- "roll_rad": roll,
243
- "x_m": x_m,
244
- "y_m": y_m,
245
- "z_m": z_m,
246
- }
247
- )
248
 
249
  return out
250
 
@@ -252,7 +243,7 @@ class SpeechSwayRT:
252
  def analyze_audio_for_sway(
253
  audio_data: NDArray[Any],
254
  sample_rate: int,
255
- callback: Callable[[dict[str, float]], None],
256
  ) -> None:
257
  """Analyze entire audio and call callback for each sway frame.
258
 
 
6
 
7
  import math
8
  from collections import deque
 
9
  from itertools import islice
10
+ from typing import Any, Callable, Dict, List, Optional
11
 
12
  import numpy as np
13
  from numpy.typing import NDArray
 
64
  """Normalize dB into [0,1] with gamma; clipped to [0,1]."""
65
  t = (db + offset - SWAY_DB_LOW) / (SWAY_DB_HIGH - SWAY_DB_LOW)
66
  t = max(0.0, min(1.0, t))
67
+ return t ** LOUDNESS_GAMMA if LOUDNESS_GAMMA != 1.0 else t
68
 
69
 
70
  def _to_float32_mono(x: NDArray[Any]) -> NDArray[np.float32]:
 
93
  """Lightweight linear resampler for short buffers."""
94
  if sr_in == sr_out or x.size == 0:
95
  return x
96
+ n_out = int(round(x.size * sr_out / sr_in))
97
  if n_out <= 1:
98
  return np.zeros(0, dtype=np.float32)
99
  t_in = np.linspace(0.0, 1.0, num=x.size, dtype=np.float32, endpoint=True)
 
143
  self.sway_down = 0
144
  self.t = 0.0
145
 
146
+ def feed(self, pcm: NDArray[Any], sr: Optional[int] = None) -> List[Dict[str, float]]:
147
  """Stream in PCM chunk. Returns list of sway dicts, one per hop.
148
 
149
  Args:
 
167
  else:
168
  self.carry = x
169
 
170
+ out: List[Dict[str, float]] = []
171
 
172
  while self.carry.size >= HOP:
173
  hop = self.carry[:HOP]
 
215
  self.t += HOP_MS / 1000.0
216
 
217
  # Oscillators
218
+ pitch = (math.radians(SWAY_A_PITCH_DEG) * loud * env *
219
+ math.sin(2 * math.pi * SWAY_F_PITCH * self.t + self.phase_pitch))
220
+ yaw = (math.radians(SWAY_A_YAW_DEG) * loud * env *
221
+ math.sin(2 * math.pi * SWAY_F_YAW * self.t + self.phase_yaw))
222
+ roll = (math.radians(SWAY_A_ROLL_DEG) * loud * env *
223
+ math.sin(2 * math.pi * SWAY_F_ROLL * self.t + self.phase_roll))
224
+ x_m = (SWAY_A_X_MM / 1000.0) * loud * env * math.sin(
225
+ 2 * math.pi * SWAY_F_X * self.t + self.phase_x)
226
+ y_m = (SWAY_A_Y_MM / 1000.0) * loud * env * math.sin(
227
+ 2 * math.pi * SWAY_F_Y * self.t + self.phase_y)
228
+ z_m = (SWAY_A_Z_MM / 1000.0) * loud * env * math.sin(
229
+ 2 * math.pi * SWAY_F_Z * self.t + self.phase_z)
230
+
231
+ out.append({
232
+ "pitch_rad": pitch,
233
+ "yaw_rad": yaw,
234
+ "roll_rad": roll,
235
+ "x_m": x_m,
236
+ "y_m": y_m,
237
+ "z_m": z_m,
238
+ })
 
 
 
 
 
 
 
 
239
 
240
  return out
241
 
 
243
  def analyze_audio_for_sway(
244
  audio_data: NDArray[Any],
245
  sample_rate: int,
246
+ callback: Callable[[Dict[str, float]], None],
247
  ) -> None:
248
  """Analyze entire audio and call callback for each sway frame.
249
 
{reachy_mini_home_assistant → reachy_mini_ha_voice}/static/index.html RENAMED
File without changes
{reachy_mini_home_assistant → reachy_mini_ha_voice}/static/main.js RENAMED
File without changes
{reachy_mini_home_assistant → reachy_mini_ha_voice}/static/style.css RENAMED
File without changes
reachy_mini_ha_voice/util.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Utility functions."""
2
+
3
+ import hashlib
4
+ import uuid
5
+ from collections.abc import Callable
6
+ from pathlib import Path
7
+ from typing import Optional
8
+
9
+
10
+ def call_all(*funcs: Optional[Callable[[], None]]) -> None:
11
+ """Call all non-None functions."""
12
+ for func in funcs:
13
+ if func is not None:
14
+ func()
15
+
16
+
17
+ def get_mac() -> str:
18
+ """Return a stable MAC address for device identification.
19
+
20
+ Uses a cached device ID stored in a file to ensure the same ID
21
+ is used across restarts, preventing Home Assistant from seeing
22
+ the device as new each time.
23
+ """
24
+ # Store device ID in a persistent location
25
+ local_dir = Path(__file__).parent.parent / "local"
26
+ local_dir.mkdir(parents=True, exist_ok=True)
27
+ device_id_file = local_dir / ".device_id"
28
+
29
+ if device_id_file.exists():
30
+ try:
31
+ return device_id_file.read_text().strip()
32
+ except Exception:
33
+ pass
34
+
35
+ # Generate a stable device ID based on machine UUID
36
+ machine_id = uuid.getnode()
37
+ # Create a hash to ensure consistent format
38
+ device_id = hashlib.md5(str(machine_id).encode()).hexdigest()[:12]
39
+
40
+ try:
41
+ device_id_file.write_text(device_id)
42
+ except Exception:
43
+ pass
44
+
45
+ return device_id
reachy_mini_ha_voice/voice_assistant.py ADDED
@@ -0,0 +1,810 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Voice Assistant Service for Reachy Mini.
3
+
4
+ This module provides the main voice assistant service that integrates
5
+ with Home Assistant via ESPHome protocol.
6
+ """
7
+
8
+ import asyncio
9
+ import json
10
+ import logging
11
+ import threading
12
+ import time
13
+ from dataclasses import dataclass, field
14
+ from pathlib import Path
15
+ from queue import Queue
16
+ from typing import Dict, List, Optional, Set, Union
17
+
18
+ import numpy as np
19
+
20
+ from reachy_mini import ReachyMini
21
+
22
+ from .models import AvailableWakeWord, Preferences, ServerState, WakeWordType
23
+ from .audio_player import AudioPlayer
24
+ from .satellite import VoiceSatelliteProtocol
25
+ from .util import get_mac
26
+ from .zeroconf import HomeAssistantZeroconf
27
+ from .motion import ReachyMiniMotion
28
+ from .camera_server import MJPEGCameraServer
29
+
30
+ _LOGGER = logging.getLogger(__name__)
31
+
32
+ _MODULE_DIR = Path(__file__).parent
33
+ _WAKEWORDS_DIR = _MODULE_DIR / "wakewords"
34
+ _SOUNDS_DIR = _MODULE_DIR / "sounds"
35
+ _LOCAL_DIR = _MODULE_DIR.parent / "local"
36
+
37
+
38
+ @dataclass
39
+ class AudioProcessingContext:
40
+ """Context for audio processing, holding mutable state."""
41
+ wake_words: List = field(default_factory=list)
42
+ micro_features: Optional[object] = None
43
+ micro_inputs: List = field(default_factory=list)
44
+ oww_features: Optional[object] = None
45
+ oww_inputs: List = field(default_factory=list)
46
+ has_oww: bool = False
47
+ last_active: Optional[float] = None
48
+
49
+
50
+ # Audio chunk size for consistent streaming (matches reference project)
51
+ AUDIO_BLOCK_SIZE = 1024 # samples at 16kHz = 64ms
52
+
53
+
54
+ class VoiceAssistantService:
55
+ """Voice assistant service that runs ESPHome protocol server."""
56
+
57
+ def __init__(
58
+ self,
59
+ reachy_mini: Optional[ReachyMini] = None,
60
+ name: str = "Reachy Mini",
61
+ host: str = "0.0.0.0",
62
+ port: int = 6053,
63
+ wake_model: str = "okay_nabu",
64
+ camera_port: int = 8081,
65
+ camera_enabled: bool = True,
66
+ ):
67
+ self.reachy_mini = reachy_mini
68
+ self.name = name
69
+ self.host = host
70
+ self.port = port
71
+ self.wake_model = wake_model
72
+ self.camera_port = camera_port
73
+ self.camera_enabled = camera_enabled
74
+
75
+ self._server = None
76
+ self._discovery = None
77
+ self._audio_thread = None
78
+ self._running = False
79
+ self._state: Optional[ServerState] = None
80
+ self._motion = ReachyMiniMotion(reachy_mini)
81
+ self._camera_server: Optional[MJPEGCameraServer] = None
82
+
83
+ # Audio buffer for fixed-size chunk output
84
+ self._audio_buffer: np.ndarray = np.array([], dtype=np.float32)
85
+
86
+ async def start(self) -> None:
87
+ """Start the voice assistant service."""
88
+ _LOGGER.info("Initializing voice assistant service...")
89
+
90
+ # Ensure directories exist
91
+ _WAKEWORDS_DIR.mkdir(parents=True, exist_ok=True)
92
+ _SOUNDS_DIR.mkdir(parents=True, exist_ok=True)
93
+ _LOCAL_DIR.mkdir(parents=True, exist_ok=True)
94
+
95
+ # Verify required files (bundled with package)
96
+ await self._verify_required_files()
97
+
98
+ # Load wake words
99
+ available_wake_words = self._load_available_wake_words()
100
+ _LOGGER.debug("Available wake words: %s", list(available_wake_words.keys()))
101
+
102
+ # Load preferences
103
+ preferences_path = _LOCAL_DIR / "preferences.json"
104
+ preferences = self._load_preferences(preferences_path)
105
+
106
+ # Load wake word models
107
+ wake_models, active_wake_words = self._load_wake_models(
108
+ available_wake_words, preferences
109
+ )
110
+
111
+ # Load stop model
112
+ stop_model = self._load_stop_model()
113
+
114
+ # Create audio players with Reachy Mini reference
115
+ music_player = AudioPlayer(self.reachy_mini)
116
+ tts_player = AudioPlayer(self.reachy_mini)
117
+
118
+ # Create server state
119
+ self._state = ServerState(
120
+ name=self.name,
121
+ mac_address=get_mac(),
122
+ audio_queue=Queue(),
123
+ entities=[],
124
+ available_wake_words=available_wake_words,
125
+ wake_words=wake_models,
126
+ active_wake_words=active_wake_words,
127
+ stop_word=stop_model,
128
+ music_player=music_player,
129
+ tts_player=tts_player,
130
+ wakeup_sound=str(_SOUNDS_DIR / "wake_word_triggered.flac"),
131
+ timer_finished_sound=str(_SOUNDS_DIR / "timer_finished.flac"),
132
+ preferences=preferences,
133
+ preferences_path=preferences_path,
134
+ refractory_seconds=2.0,
135
+ download_dir=_LOCAL_DIR,
136
+ reachy_mini=self.reachy_mini,
137
+ motion_enabled=self.reachy_mini is not None,
138
+ )
139
+
140
+ # Set motion controller reference in state
141
+ self._state.motion = self._motion
142
+
143
+ # Start Reachy Mini media system if available
144
+ if self.reachy_mini is not None:
145
+ try:
146
+ # Check if media system is already running to avoid conflicts
147
+ media = self.reachy_mini.media
148
+ if media.audio is not None:
149
+ # Check recording state
150
+ is_recording = getattr(media, '_recording', False)
151
+ if not is_recording:
152
+ media.start_recording()
153
+ _LOGGER.info("Started Reachy Mini recording")
154
+ else:
155
+ _LOGGER.debug("Reachy Mini recording already active")
156
+
157
+ # Check playback state
158
+ is_playing = getattr(media, '_playing', False)
159
+ if not is_playing:
160
+ media.start_playing()
161
+ _LOGGER.info("Started Reachy Mini playback")
162
+ else:
163
+ _LOGGER.debug("Reachy Mini playback already active")
164
+
165
+ _LOGGER.info("Reachy Mini media system initialized")
166
+
167
+ # Optimize microphone settings for voice recognition
168
+ self._optimize_microphone_settings()
169
+ else:
170
+ _LOGGER.warning("Reachy Mini audio system not available")
171
+ except Exception as e:
172
+ _LOGGER.warning("Failed to initialize Reachy Mini media: %s", e)
173
+
174
+ # Start motion controller (5Hz control loop)
175
+ if self._motion is not None:
176
+ self._motion.start()
177
+
178
+ # Start audio processing thread (non-daemon for proper cleanup)
179
+ self._running = True
180
+ self._audio_thread = threading.Thread(
181
+ target=self._process_audio,
182
+ daemon=False,
183
+ )
184
+ self._audio_thread.start()
185
+
186
+ # Start camera server if enabled (must be before ESPHome server)
187
+ if self.camera_enabled:
188
+ self._camera_server = MJPEGCameraServer(
189
+ reachy_mini=self.reachy_mini,
190
+ host=self.host,
191
+ port=self.camera_port,
192
+ fps=15,
193
+ quality=80,
194
+ enable_face_tracking=True,
195
+ )
196
+ await self._camera_server.start()
197
+
198
+ # Connect camera server to motion controller for face tracking
199
+ if self._motion is not None:
200
+ self._motion.set_camera_server(self._camera_server)
201
+
202
+ # Create ESPHome server (pass camera_server for camera entity)
203
+ loop = asyncio.get_running_loop()
204
+ camera_server = self._camera_server # Capture for lambda
205
+ self._server = await loop.create_server(
206
+ lambda: VoiceSatelliteProtocol(self._state, camera_server=camera_server),
207
+ host=self.host,
208
+ port=self.port,
209
+ )
210
+
211
+ # Start mDNS discovery
212
+ self._discovery = HomeAssistantZeroconf(port=self.port, name=self.name)
213
+ await self._discovery.register_server()
214
+
215
+ # Start Sendspin auto-discovery (auto-enabled, no user config needed)
216
+ # Sendspin is for music playback, so connect to music_player
217
+ await music_player.start_sendspin_discovery()
218
+
219
+ _LOGGER.info("Voice assistant service started on %s:%s", self.host, self.port)
220
+
221
+ def _optimize_microphone_settings(self) -> None:
222
+ """Optimize ReSpeaker XVF3800 microphone settings for voice recognition.
223
+
224
+ This method configures the XMOS XVF3800 audio processor for optimal
225
+ voice command recognition at distances up to 2-3 meters.
226
+
227
+ If user has previously set values via Home Assistant, those values are
228
+ restored from preferences. Otherwise, default optimized values are used.
229
+
230
+ Key optimizations:
231
+ 1. Enable AGC with higher max gain for distant speech
232
+ 2. Reduce noise suppression to preserve quiet speech
233
+ 3. Increase base microphone gain
234
+ 4. Optimize AGC response times for voice commands
235
+
236
+ Reference: reachy_mini/src/reachy_mini/media/audio_control_utils.py
237
+ XMOS docs: https://www.xmos.com/documentation/XM-014888-PC/
238
+ """
239
+ if self.reachy_mini is None:
240
+ return
241
+
242
+ try:
243
+ # Access ReSpeaker through the media audio system
244
+ audio = self.reachy_mini.media.audio
245
+ if audio is None or not hasattr(audio, '_respeaker'):
246
+ _LOGGER.debug("ReSpeaker not available for optimization")
247
+ return
248
+
249
+ respeaker = audio._respeaker
250
+ if respeaker is None:
251
+ _LOGGER.debug("ReSpeaker device not found")
252
+ return
253
+
254
+ # Get saved preferences (if any)
255
+ prefs = self._state.preferences if self._state else None
256
+
257
+ # ========== 1. AGC (Automatic Gain Control) Settings ==========
258
+ # Use saved value if available, otherwise use default (enabled)
259
+ agc_enabled = prefs.agc_enabled if (prefs and prefs.agc_enabled is not None) else True
260
+ try:
261
+ respeaker.write("PP_AGCONOFF", [1 if agc_enabled else 0])
262
+ _LOGGER.info("AGC %s (PP_AGCONOFF=%d)%s",
263
+ "enabled" if agc_enabled else "disabled",
264
+ 1 if agc_enabled else 0,
265
+ " [from preferences]" if (prefs and prefs.agc_enabled is not None) else " [default]")
266
+ except Exception as e:
267
+ _LOGGER.debug("Could not set AGC: %s", e)
268
+
269
+ # Use saved value if available, otherwise use default (30dB)
270
+ agc_max_gain = prefs.agc_max_gain if (prefs and prefs.agc_max_gain is not None) else 30.0
271
+ try:
272
+ respeaker.write("PP_AGCMAXGAIN", [agc_max_gain])
273
+ _LOGGER.info("AGC max gain set (PP_AGCMAXGAIN=%.1fdB)%s",
274
+ agc_max_gain,
275
+ " [from preferences]" if (prefs and prefs.agc_max_gain is not None) else " [default]")
276
+ except Exception as e:
277
+ _LOGGER.debug("Could not set PP_AGCMAXGAIN: %s", e)
278
+
279
+ # Set AGC desired output level (target level after gain)
280
+ # More negative = quieter output, less negative = louder
281
+ # Default is around -25dB, set to -18dB for stronger output
282
+ try:
283
+ respeaker.write("PP_AGCDESIREDLEVEL", [-18.0])
284
+ _LOGGER.debug("AGC desired level set (PP_AGCDESIREDLEVEL=-18.0dB)")
285
+ except Exception as e:
286
+ _LOGGER.debug("Could not set PP_AGCDESIREDLEVEL: %s", e)
287
+
288
+ # Optimize AGC time constants for voice commands
289
+ # Faster attack time helps capture sudden speech onset
290
+ try:
291
+ respeaker.write("PP_AGCTIME", [0.5]) # Main time constant (seconds)
292
+ _LOGGER.debug("AGC time constant set (PP_AGCTIME=0.5s)")
293
+ except Exception as e:
294
+ _LOGGER.debug("Could not set PP_AGCTIME: %s", e)
295
+
296
+ # ========== 2. Base Microphone Gain ==========
297
+ # Increase base microphone gain for better sensitivity
298
+ # Default is 1.0, increase to 2.0 for distant speech
299
+ # Range: 0.0-4.0 (float, linear gain multiplier)
300
+ try:
301
+ respeaker.write("AUDIO_MGR_MIC_GAIN", [2.0])
302
+ _LOGGER.info("Microphone gain increased (AUDIO_MGR_MIC_GAIN=2.0)")
303
+ except Exception as e:
304
+ _LOGGER.debug("Could not set AUDIO_MGR_MIC_GAIN: %s", e)
305
+
306
+ # ========== 3. Noise Suppression Settings ==========
307
+ # Use saved value if available, otherwise use default (15%)
308
+ # PP_MIN_NS: minimum noise suppression threshold
309
+ # Higher values = less aggressive suppression = better voice pickup
310
+ # PP_MIN_NS = 0.85 means "keep at least 85% of signal" = 15% max suppression
311
+ # UI shows "noise suppression strength" so 15% = PP_MIN_NS of 0.85
312
+ noise_suppression = prefs.noise_suppression if (prefs and prefs.noise_suppression is not None) else 15.0
313
+ pp_min_ns = 1.0 - (noise_suppression / 100.0) # Convert percentage to PP_MIN_NS value
314
+ try:
315
+ respeaker.write("PP_MIN_NS", [pp_min_ns])
316
+ _LOGGER.info("Noise suppression set to %.0f%% strength (PP_MIN_NS=%.2f)%s",
317
+ noise_suppression, pp_min_ns,
318
+ " [from preferences]" if (prefs and prefs.noise_suppression is not None) else " [default]")
319
+ except Exception as e:
320
+ _LOGGER.debug("Could not set PP_MIN_NS: %s", e)
321
+
322
+ # PP_MIN_NN: minimum noise floor estimation
323
+ # Higher values = less aggressive noise floor tracking
324
+ try:
325
+ respeaker.write("PP_MIN_NN", [pp_min_ns]) # Match PP_MIN_NS
326
+ _LOGGER.debug("Noise floor threshold set (PP_MIN_NN=%.2f)", pp_min_ns)
327
+ except Exception as e:
328
+ _LOGGER.debug("Could not set PP_MIN_NN: %s", e)
329
+
330
+ # ========== 4. Echo Cancellation Settings ==========
331
+ # Ensure echo cancellation is enabled (important for TTS playback)
332
+ try:
333
+ respeaker.write("PP_ECHOONOFF", [1])
334
+ _LOGGER.debug("Echo cancellation enabled (PP_ECHOONOFF=1)")
335
+ except Exception as e:
336
+ _LOGGER.debug("Could not set PP_ECHOONOFF: %s", e)
337
+
338
+ # ========== 5. High-pass filter (remove low frequency noise) ==========
339
+ try:
340
+ respeaker.write("AEC_HPFONOFF", [1])
341
+ _LOGGER.debug("High-pass filter enabled (AEC_HPFONOFF=1)")
342
+ except Exception as e:
343
+ _LOGGER.debug("Could not set AEC_HPFONOFF: %s", e)
344
+
345
+ _LOGGER.info("Microphone settings initialized (AGC=%s, MaxGain=%.0fdB, NoiseSuppression=%.0f%%)",
346
+ "ON" if agc_enabled else "OFF", agc_max_gain, noise_suppression)
347
+
348
+ except Exception as e:
349
+ _LOGGER.warning("Failed to optimize microphone settings: %s", e)
350
+
351
+ async def stop(self) -> None:
352
+ """Stop the voice assistant service."""
353
+ _LOGGER.info("Stopping voice assistant service...")
354
+
355
+ # 1. First stop audio recording to prevent new data from coming in
356
+ if self.reachy_mini is not None:
357
+ try:
358
+ self.reachy_mini.media.stop_recording()
359
+ _LOGGER.debug("Reachy Mini recording stopped")
360
+ except Exception as e:
361
+ _LOGGER.warning("Error stopping Reachy Mini recording: %s", e)
362
+
363
+ # 2. Set stop flag
364
+ self._running = False
365
+
366
+ # 3. Wait for audio thread to finish
367
+ if self._audio_thread:
368
+ self._audio_thread.join(timeout=1.0)
369
+ if self._audio_thread.is_alive():
370
+ _LOGGER.warning("Audio thread did not stop in time")
371
+
372
+ # 4. Stop playback
373
+ if self.reachy_mini is not None:
374
+ try:
375
+ self.reachy_mini.media.stop_playing()
376
+ _LOGGER.debug("Reachy Mini playback stopped")
377
+ except Exception as e:
378
+ _LOGGER.warning("Error stopping Reachy Mini playback: %s", e)
379
+
380
+ # 5. Stop ESPHome server
381
+ if self._server:
382
+ self._server.close()
383
+ await self._server.wait_closed()
384
+
385
+ # 6. Unregister mDNS
386
+ if self._discovery:
387
+ await self._discovery.unregister_server()
388
+
389
+ # 6.5. Stop Sendspin
390
+ if self._state and self._state.music_player:
391
+ await self._state.music_player.stop_sendspin()
392
+
393
+ # 7. Stop camera server
394
+ if self._camera_server:
395
+ await self._camera_server.stop()
396
+ self._camera_server = None
397
+
398
+ # 8. Shutdown motion executor
399
+ if self._motion:
400
+ self._motion.shutdown()
401
+
402
+ _LOGGER.info("Voice assistant service stopped.")
403
+
404
+ async def _verify_required_files(self) -> None:
405
+ """Verify required model and sound files exist (bundled with package)."""
406
+ # Required wake word files (bundled in wakewords/ directory)
407
+ required_wakewords = [
408
+ "okay_nabu.tflite",
409
+ "okay_nabu.json",
410
+ "hey_jarvis.tflite",
411
+ "hey_jarvis.json",
412
+ "stop.tflite",
413
+ "stop.json",
414
+ ]
415
+
416
+ # Required sound files (bundled in sounds/ directory)
417
+ required_sounds = [
418
+ "wake_word_triggered.flac",
419
+ "timer_finished.flac",
420
+ ]
421
+
422
+ # Verify wake word files
423
+ missing_wakewords = []
424
+ for filename in required_wakewords:
425
+ filepath = _WAKEWORDS_DIR / filename
426
+ if not filepath.exists():
427
+ missing_wakewords.append(filename)
428
+
429
+ if missing_wakewords:
430
+ _LOGGER.warning(
431
+ "Missing wake word files: %s. These should be bundled with the package.",
432
+ missing_wakewords
433
+ )
434
+
435
+ # Verify sound files
436
+ missing_sounds = []
437
+ for filename in required_sounds:
438
+ filepath = _SOUNDS_DIR / filename
439
+ if not filepath.exists():
440
+ missing_sounds.append(filename)
441
+
442
+ if missing_sounds:
443
+ _LOGGER.warning(
444
+ "Missing sound files: %s. These should be bundled with the package.",
445
+ missing_sounds
446
+ )
447
+
448
+ if not missing_wakewords and not missing_sounds:
449
+ _LOGGER.info("All required files verified successfully.")
450
+
451
+ def _load_available_wake_words(self) -> Dict[str, AvailableWakeWord]:
452
+ """Load available wake word configurations."""
453
+ available_wake_words: Dict[str, AvailableWakeWord] = {}
454
+
455
+ # Load order: OpenWakeWord first, then MicroWakeWord, then external
456
+ # Later entries override earlier ones, so MicroWakeWord takes priority
457
+ wake_word_dirs = [
458
+ _WAKEWORDS_DIR / "openWakeWord", # OpenWakeWord (lowest priority)
459
+ _LOCAL_DIR / "external_wake_words", # External wake words
460
+ _WAKEWORDS_DIR, # MicroWakeWord (highest priority)
461
+ ]
462
+
463
+ for wake_word_dir in wake_word_dirs:
464
+ if not wake_word_dir.exists():
465
+ continue
466
+
467
+ for config_path in wake_word_dir.glob("*.json"):
468
+ model_id = config_path.stem
469
+ if model_id == "stop":
470
+ continue
471
+
472
+ try:
473
+ with open(config_path, "r", encoding="utf-8") as f:
474
+ config = json.load(f)
475
+
476
+ model_type = WakeWordType(config.get("type", "micro"))
477
+
478
+ if model_type == WakeWordType.OPEN_WAKE_WORD:
479
+ wake_word_path = config_path.parent / config["model"]
480
+ else:
481
+ wake_word_path = config_path
482
+
483
+ available_wake_words[model_id] = AvailableWakeWord(
484
+ id=model_id,
485
+ type=model_type,
486
+ wake_word=config.get("wake_word", model_id),
487
+ trained_languages=config.get("trained_languages", []),
488
+ wake_word_path=wake_word_path,
489
+ )
490
+ except Exception as e:
491
+ _LOGGER.warning("Failed to load wake word %s: %s", config_path, e)
492
+
493
+ return available_wake_words
494
+
495
+ def _load_preferences(self, preferences_path: Path) -> Preferences:
496
+ """Load user preferences."""
497
+ if preferences_path.exists():
498
+ try:
499
+ with open(preferences_path, "r", encoding="utf-8") as f:
500
+ data = json.load(f)
501
+ return Preferences(**data)
502
+ except Exception as e:
503
+ _LOGGER.warning("Failed to load preferences: %s", e)
504
+
505
+ return Preferences()
506
+
507
+ def _load_wake_models(
508
+ self,
509
+ available_wake_words: Dict[str, AvailableWakeWord],
510
+ preferences: Preferences,
511
+ ):
512
+ """Load wake word models."""
513
+ from pymicro_wakeword import MicroWakeWord
514
+ from pyopen_wakeword import OpenWakeWord
515
+
516
+ wake_models: Dict[str, Union[MicroWakeWord, OpenWakeWord]] = {}
517
+ active_wake_words: Set[str] = set()
518
+
519
+ # Try to load preferred models
520
+ if preferences.active_wake_words:
521
+ for wake_word_id in preferences.active_wake_words:
522
+ wake_word = available_wake_words.get(wake_word_id)
523
+ if wake_word is None:
524
+ _LOGGER.warning("Unknown wake word: %s", wake_word_id)
525
+ continue
526
+
527
+ try:
528
+ _LOGGER.debug("Loading wake model: %s", wake_word_id)
529
+ loaded_model = wake_word.load()
530
+ # Set id attribute on the model for later identification
531
+ setattr(loaded_model, 'id', wake_word_id)
532
+ wake_models[wake_word_id] = loaded_model
533
+ active_wake_words.add(wake_word_id)
534
+ except Exception as e:
535
+ _LOGGER.warning("Failed to load wake model %s: %s", wake_word_id, e)
536
+
537
+ # Load default model if none loaded
538
+ if not wake_models:
539
+ wake_word = available_wake_words.get(self.wake_model)
540
+ if wake_word:
541
+ try:
542
+ _LOGGER.debug("Loading default wake model: %s", self.wake_model)
543
+ loaded_model = wake_word.load()
544
+ # Set id attribute on the model for later identification
545
+ setattr(loaded_model, 'id', self.wake_model)
546
+ wake_models[self.wake_model] = loaded_model
547
+ active_wake_words.add(self.wake_model)
548
+ except Exception as e:
549
+ _LOGGER.error("Failed to load default wake model: %s", e)
550
+
551
+ return wake_models, active_wake_words
552
+
553
+ def _load_stop_model(self):
554
+ """Load the stop word model."""
555
+ from pymicro_wakeword import MicroWakeWord
556
+
557
+ stop_config = _WAKEWORDS_DIR / "stop.json"
558
+ if stop_config.exists():
559
+ try:
560
+ model = MicroWakeWord.from_config(stop_config)
561
+ setattr(model, 'id', 'stop')
562
+ return model
563
+ except Exception as e:
564
+ _LOGGER.warning("Failed to load stop model: %s", e)
565
+
566
+ # Return a dummy model if stop model not available
567
+ _LOGGER.warning("Stop model not available, using fallback")
568
+ okay_nabu_config = _WAKEWORDS_DIR / "okay_nabu.json"
569
+ if okay_nabu_config.exists():
570
+ model = MicroWakeWord.from_config(okay_nabu_config)
571
+ setattr(model, 'id', 'stop')
572
+ return model
573
+
574
+ return None
575
+
576
+ def _process_audio(self) -> None:
577
+ """Process audio from microphone (Reachy Mini or system fallback)."""
578
+ from pymicro_wakeword import MicroWakeWordFeatures
579
+
580
+ ctx = AudioProcessingContext()
581
+ ctx.micro_features = MicroWakeWordFeatures()
582
+
583
+ try:
584
+ _LOGGER.info("Starting audio processing...")
585
+
586
+ if self.reachy_mini is not None:
587
+ _LOGGER.info("Using Reachy Mini's microphone")
588
+ self._audio_loop_reachy(ctx)
589
+ else:
590
+ _LOGGER.info("Using system microphone (fallback)")
591
+ self._audio_loop_fallback(ctx)
592
+
593
+ except Exception:
594
+ _LOGGER.exception("Error processing audio")
595
+
596
+ def _audio_loop_reachy(self, ctx: AudioProcessingContext) -> None:
597
+ """Audio loop using Reachy Mini's microphone."""
598
+ while self._running:
599
+ try:
600
+ if not self._wait_for_satellite():
601
+ continue
602
+
603
+ self._update_wake_words_list(ctx)
604
+
605
+ # Get audio from Reachy Mini
606
+ audio_chunk = self._get_reachy_audio_chunk()
607
+ if audio_chunk is None:
608
+ time.sleep(0.01)
609
+ continue
610
+
611
+ self._process_audio_chunk(ctx, audio_chunk)
612
+
613
+ except Exception as e:
614
+ _LOGGER.error("Error in Reachy audio processing: %s", e)
615
+ time.sleep(0.1)
616
+
617
+ def _audio_loop_fallback(self, ctx: AudioProcessingContext) -> None:
618
+ """Audio loop using system microphone (fallback)."""
619
+ import sounddevice as sd
620
+
621
+ block_size = 1024
622
+
623
+ with sd.InputStream(
624
+ samplerate=16000,
625
+ channels=1,
626
+ blocksize=block_size,
627
+ dtype="float32",
628
+ ) as stream:
629
+ while self._running:
630
+ if not self._wait_for_satellite():
631
+ continue
632
+
633
+ self._update_wake_words_list(ctx)
634
+
635
+ # Get audio from system microphone
636
+ audio_chunk_array, overflowed = stream.read(block_size)
637
+ if overflowed:
638
+ _LOGGER.warning("Audio buffer overflow")
639
+
640
+ audio_chunk_array = audio_chunk_array.reshape(-1)
641
+ audio_chunk = self._convert_to_pcm(audio_chunk_array)
642
+
643
+ self._process_audio_chunk(ctx, audio_chunk)
644
+
645
+ def _wait_for_satellite(self) -> bool:
646
+ """Wait for satellite connection. Returns True if connected."""
647
+ if self._state is None or self._state.satellite is None:
648
+ time.sleep(0.1)
649
+ return False
650
+ return True
651
+
652
+ def _update_wake_words_list(self, ctx: AudioProcessingContext) -> None:
653
+ """Update wake words list if changed."""
654
+ from pyopen_wakeword import OpenWakeWord, OpenWakeWordFeatures
655
+ from pymicro_wakeword import MicroWakeWordFeatures
656
+
657
+ if (not ctx.wake_words) or (self._state.wake_words_changed and self._state.wake_words):
658
+ self._state.wake_words_changed = False
659
+ ctx.wake_words.clear()
660
+
661
+ # Reset feature extractors to clear any residual audio data
662
+ # This prevents false triggers when switching wake words
663
+ ctx.micro_features = MicroWakeWordFeatures()
664
+ ctx.micro_inputs.clear()
665
+ if ctx.oww_features is not None:
666
+ ctx.oww_features = OpenWakeWordFeatures.from_builtin()
667
+ ctx.oww_inputs.clear()
668
+
669
+ # Also reset the refractory period to prevent immediate trigger
670
+ ctx.last_active = time.monotonic()
671
+
672
+ # state.wake_words is Dict[str, MicroWakeWord/OpenWakeWord]
673
+ # We need to filter by active_wake_words (which contains the IDs/keys)
674
+ for ww_id, ww_model in self._state.wake_words.items():
675
+ if ww_id in self._state.active_wake_words:
676
+ # Ensure the model has an 'id' attribute for later use
677
+ if not hasattr(ww_model, 'id'):
678
+ setattr(ww_model, 'id', ww_id)
679
+ ctx.wake_words.append(ww_model)
680
+
681
+ ctx.has_oww = any(isinstance(ww, OpenWakeWord) for ww in ctx.wake_words)
682
+ if ctx.has_oww and ctx.oww_features is None:
683
+ ctx.oww_features = OpenWakeWordFeatures.from_builtin()
684
+
685
+ _LOGGER.info("Active wake words updated: %s (features reset)", list(self._state.active_wake_words))
686
+
687
+ def _get_reachy_audio_chunk(self) -> Optional[bytes]:
688
+ """Get fixed-size audio chunk from Reachy Mini's microphone.
689
+
690
+ Returns exactly AUDIO_BLOCK_SIZE samples each time, buffering
691
+ internally to ensure consistent chunk sizes for streaming.
692
+
693
+ Returns:
694
+ PCM audio bytes of fixed size, or None if not enough data.
695
+ """
696
+ # Get new audio data from SDK
697
+ audio_data = self.reachy_mini.media.get_audio_sample()
698
+
699
+ # Append new data to buffer if valid
700
+ if audio_data is not None and isinstance(audio_data, np.ndarray) and audio_data.size > 0:
701
+ try:
702
+ if audio_data.dtype.kind not in ('S', 'U', 'O', 'V', 'b'):
703
+ if audio_data.dtype != np.float32:
704
+ audio_data = np.asarray(audio_data, dtype=np.float32)
705
+
706
+ # Convert stereo to mono
707
+ if audio_data.ndim == 2 and audio_data.shape[1] == 2:
708
+ audio_data = audio_data.mean(axis=1)
709
+ elif audio_data.ndim == 2:
710
+ audio_data = audio_data[:, 0].copy()
711
+
712
+ if audio_data.ndim == 1:
713
+ self._audio_buffer = np.concatenate([self._audio_buffer, audio_data])
714
+ except (TypeError, ValueError):
715
+ pass
716
+
717
+ # Return fixed-size chunk if we have enough data
718
+ if len(self._audio_buffer) >= AUDIO_BLOCK_SIZE:
719
+ chunk = self._audio_buffer[:AUDIO_BLOCK_SIZE]
720
+ self._audio_buffer = self._audio_buffer[AUDIO_BLOCK_SIZE:]
721
+ return self._convert_to_pcm(chunk)
722
+
723
+ return None
724
+
725
+ def _convert_to_pcm(self, audio_chunk_array: np.ndarray) -> bytes:
726
+ """Convert float32 audio array to 16-bit PCM bytes."""
727
+ return (
728
+ (np.clip(audio_chunk_array, -1.0, 1.0) * 32767.0)
729
+ .astype("<i2")
730
+ .tobytes()
731
+ )
732
+
733
+ def _process_audio_chunk(self, ctx: AudioProcessingContext, audio_chunk: bytes) -> None:
734
+ """Process an audio chunk for wake word detection.
735
+
736
+ Following reference project pattern: always process wake words.
737
+ Refractory period prevents duplicate triggers.
738
+
739
+ Args:
740
+ ctx: Audio processing context
741
+ audio_chunk: PCM audio bytes
742
+ """
743
+ # Stream audio to Home Assistant
744
+ self._state.satellite.handle_audio(audio_chunk)
745
+
746
+ # Process wake word features
747
+ self._process_features(ctx, audio_chunk)
748
+
749
+ # Detect wake words
750
+ self._detect_wake_words(ctx)
751
+
752
+ # Detect stop word
753
+ self._detect_stop_word(ctx)
754
+
755
+ def _process_features(self, ctx: AudioProcessingContext, audio_chunk: bytes) -> None:
756
+ """Process audio features for wake word detection."""
757
+ ctx.micro_inputs.clear()
758
+ ctx.micro_inputs.extend(ctx.micro_features.process_streaming(audio_chunk))
759
+
760
+ if ctx.has_oww and ctx.oww_features is not None:
761
+ ctx.oww_inputs.clear()
762
+ ctx.oww_inputs.extend(ctx.oww_features.process_streaming(audio_chunk))
763
+
764
+ def _detect_wake_words(self, ctx: AudioProcessingContext) -> None:
765
+ """Detect wake words in the processed audio features.
766
+
767
+ Uses refractory period to prevent duplicate triggers.
768
+ Following reference project pattern.
769
+ """
770
+ from pymicro_wakeword import MicroWakeWord
771
+ from pyopen_wakeword import OpenWakeWord
772
+
773
+ for wake_word in ctx.wake_words:
774
+ activated = False
775
+
776
+ if isinstance(wake_word, MicroWakeWord):
777
+ for micro_input in ctx.micro_inputs:
778
+ if wake_word.process_streaming(micro_input):
779
+ activated = True
780
+ elif isinstance(wake_word, OpenWakeWord):
781
+ for oww_input in ctx.oww_inputs:
782
+ for prob in wake_word.process_streaming(oww_input):
783
+ if prob > 0.5:
784
+ activated = True
785
+
786
+ if activated:
787
+ # Check refractory period to prevent duplicate triggers
788
+ now = time.monotonic()
789
+ if (ctx.last_active is None) or (
790
+ (now - ctx.last_active) > self._state.refractory_seconds
791
+ ):
792
+ _LOGGER.info("Wake word detected: %s", wake_word.id)
793
+ self._state.satellite.wakeup(wake_word)
794
+ # Face tracking will handle looking at user automatically
795
+ self._motion.on_wakeup()
796
+ ctx.last_active = now
797
+
798
+ def _detect_stop_word(self, ctx: AudioProcessingContext) -> None:
799
+ """Detect stop word in the processed audio features."""
800
+ if not self._state.stop_word:
801
+ return
802
+
803
+ stopped = False
804
+ for micro_input in ctx.micro_inputs:
805
+ if self._state.stop_word.process_streaming(micro_input):
806
+ stopped = True
807
+
808
+ if stopped and (self._state.stop_word.id in self._state.active_wake_words):
809
+ _LOGGER.info("Stop word detected")
810
+ self._state.satellite.stop()
{reachy_mini_home_assistant → reachy_mini_ha_voice}/wakewords/.gitkeep RENAMED
File without changes
{reachy_mini_home_assistant → reachy_mini_ha_voice}/wakewords/README.md RENAMED
File without changes