Desmond-Dong commited on
Commit
6f4787d
·
1 Parent(s): 56430b3

docs: update architecture to reflect Home Assistant STT/TTS

Browse files

- Remove STT/TTS engine sections (handled by Home Assistant)
- Clarify audio streaming to/from Home Assistant
- Update data flow diagrams
- Emphasize ESPHome protocol integration
- Keep wake word detection (local offline)

Files changed (1) hide show
  1. ARCHITECTURE.md +682 -356
ARCHITECTURE.md CHANGED
@@ -14,25 +14,24 @@
14
  ┌─────────────────────────────────────────────────────────────────┐
15
  │ 业务逻辑层 (Business Logic) │
16
  │ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
17
- │ │ Voice │ │ Motion │ │ Vision │ │
18
- │ │ Manager │ │ Controller │ │ Processor │ │
19
- │ └──────────────┘ └──────────────┘ └──────────────┘ │
20
- │ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
21
- │ │ ESPHome │ │ State │ │ Event │ │
22
- │ │ Handler │ │ Manager │ │ Dispatcher │ │
23
  │ └──────────────┘ └──────────────┘ └──────────────┘ │
 
 
 
 
24
  └─────────────────────────────────────────────────────────────────┘
25
 
26
  ┌─────────────────────────────────────────────────────────────────┐
27
  │ 服务层 (Services) │
28
  │ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
29
- │ │ Wake Word │ │ STT Engine │ │ TTS Engine │ │
30
- │ │ Detector │ │ (Whisper) │ │ (Piper) │ │
31
- │ └──────────────┘ └──────────────┘ └──────────────┘ │
32
- │ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
33
- │ │ Audio │ │ Motion │ │ Face │ │
34
- │ │ Processor │ │ Queue │ │ Tracker │ │
35
  │ └──────────────┘ └──────────────┘ └──────────────┘ │
 
 
 
36
  └─────────────────────────────────────────────────────────────────┘
37
 
38
  ┌─────────────────────────────────────────────────────────────────┐
@@ -48,7 +47,7 @@
48
  └─────────────────────────────────────────────────────────────────┘
49
 
50
  ┌─────────────────────────────────────────────────────────────────┐
51
- Reachy Mini Hardware
52
  │ ��──────────────┐ ┌──────────────┐ ┌──────────────┐ │
53
  │ │ Microphone │ │ Head Motors │ │ Camera │ │
54
  │ │ Array (4) │ │ (6 DOF) │ │ (Wide) │ │
@@ -57,18 +56,37 @@
57
  │ │ Speaker │ │ Antennas │ │
58
  │ │ (5W) │ │ (2) │ │
59
  │ └──────────────┘ └──────────────┘ │
 
 
 
 
60
  └─────────────────────────────────────────────────────────────────┘
61
  ```
62
 
63
- ## 2. 模块设计
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
- ### 2.1 音频模块 (audio/)
66
 
67
  **职责**:
68
  - 音频设备管理(麦克风、扬声器)
69
  - 音频录制和播放
70
- - 音频格式转换
71
- - 回声消除
72
 
73
  **接口**:
74
 
@@ -82,57 +100,91 @@ class AudioAdapter(ABC):
82
  pass
83
 
84
  @abstractmethod
85
- async def list_output_devices(self) -> List[AudioDevice]:
86
- """列出可用的音频输出设备"""
87
- pass
88
-
89
- @abstractmethod
90
- async def start_recording(self, device_id: str, callback: Callable[[bytes], None]):
 
 
91
  """开始录制音频"""
92
  pass
93
 
94
  @abstractmethod
95
- async def stop_recording(self):
96
- """停止录制音频"""
97
- pass
98
-
99
- @abstractmethod
100
- async def play_audio(self, audio_data: bytes, device_id: str):
 
101
  """播放音频"""
102
  pass
103
 
104
 
105
  class MicrophoneArray(AudioAdapter):
106
- """麦克风阵列适配器"""
107
 
108
  def __init__(self, sample_rate: int = 16000, channels: int = 1):
109
  self.sample_rate = sample_rate
110
  self.channels = channels
111
  self._stream = None
112
-
113
- async def start_recording(self, device_id: str, callback: Callable[[bytes], None]):
114
- """开始从麦克风阵列录制音频"""
115
- # 使用 sounddevice 或 pyaudio
116
- pass
117
 
118
 
119
  class Speaker(AudioAdapter):
120
- """扬声器适配器"""
121
 
122
  def __init__(self, sample_rate: int = 16000):
123
  self.sample_rate = sample_rate
 
 
 
 
 
 
 
124
 
125
- async def play_audio(self, audio_data: bytes, device_id: str):
126
- """播放音频到扬声器"""
127
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  ```
129
 
130
- ### 2.2 语音模块 (voice/)
131
 
132
  **职责**:
133
- - 唤醒词检测
134
- - 语音转文字(STT)
135
- - 文字转语音(TTS)
136
 
137
  **接口**:
138
 
@@ -149,102 +201,76 @@ class WakeWordDetector(ABC):
149
  async def process_audio(self, audio_chunk: bytes) -> bool:
150
  """处理音频块,返回是否检测到唤醒词"""
151
  pass
152
-
153
- @abstractmethod
154
- async def get_confidence(self) -> float:
155
- """获取检测置信度"""
156
- pass
157
 
158
 
159
  class MicroWakeWordDetector(WakeWordDetector):
160
- """microWakeWord 检测器"""
161
 
162
  def __init__(self, model_path: str):
163
  self.model = None
164
  self.features = None
 
 
 
165
 
166
  async def load_model(self, model_path: str):
167
  """加载 microWakeWord 模型"""
168
- from pymicro_wakeword import MicroWakeWord
169
- self.model = MicroWakeWord.from_config(model_path)
170
  self.features = MicroWakeWordFeatures()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
 
172
 
173
  class OpenWakeWordDetector(WakeWordDetector):
174
- """openWakeWord 检测器"""
175
 
176
  def __init__(self, model_path: str):
177
  self.model = None
178
  self.features = None
 
 
 
179
 
180
  async def load_model(self, model_path: str):
181
  """加载 openWakeWord 模型"""
182
- from pyopen_wakeword import OpenWakeWord
183
- self.model = OpenWakeWord(model_path)
184
  self.features = OpenWakeWordFeatures.from_builtin()
185
-
186
-
187
- class STTEngine(ABC):
188
- """语音转文字引擎抽象基类"""
189
-
190
- @abstractmethod
191
- async def transcribe(self, audio_data: bytes) -> str:
192
- """将音频转换为文字"""
193
- pass
194
-
195
-
196
- class WhisperSTT(STTEngine):
197
- """Whisper STT 引擎"""
198
-
199
- def __init__(self, model_name: str = "base"):
200
- self.model = None
201
- self.model_name = model_name
202
-
203
- async def load_model(self):
204
- """加载 Whisper 模型"""
205
- import whisper
206
- self.model = whisper.load_model(self.model_name)
207
-
208
- async def transcribe(self, audio_data: bytes) -> str:
209
- """将音频转换为文字"""
210
- # 转换音频格式
211
- audio = np.frombuffer(audio_data, dtype=np.int16).astype(np.float32) / 32768.0
212
- result = self.model.transcribe(audio)
213
- return result["text"]
214
-
215
-
216
- class TTSEngine(ABC):
217
- """文字转语音引擎抽象基类"""
218
-
219
- @abstractmethod
220
- async def synthesize(self, text: str) -> bytes:
221
- """将文字转换为音频"""
222
- pass
223
-
224
-
225
- class PiperTTS(TTSEngine):
226
- """Piper TTS 引擎"""
227
-
228
- def __init__(self, model_path: str):
229
- self.model = None
230
- self.model_path = model_path
231
-
232
- async def load_model(self):
233
- """加载 Piper 模型"""
234
- from piper import PiperVoice
235
- self.model = PiperVoice.load(self.model_path)
236
 
237
- async def synthesize(self, text: str) -> bytes:
238
- """将文字转换为音频"""
239
- # 使用 Piper 合成语音
240
- pass
 
 
 
 
 
 
 
 
241
  ```
242
 
243
- ### 2.3 运动模块 (motion/)
244
 
245
  **职责**:
246
- - 头部运动控制
247
- - 表情系统
248
  - 运动队列管理
249
  - 语音反应性运动
250
 
@@ -254,6 +280,11 @@ class PiperTTS(TTSEngine):
254
  class MotionController(ABC):
255
  """运动控制器抽象基类"""
256
 
 
 
 
 
 
257
  @abstractmethod
258
  async def wake_up(self):
259
  """唤醒机器人"""
@@ -265,14 +296,34 @@ class MotionController(ABC):
265
  pass
266
 
267
  @abstractmethod
268
- async def move_head(self, pose: np.ndarray, duration: float):
269
- """移动头部到指定姿态"""
270
  pass
271
 
272
  @abstractmethod
273
- async def move_antennas(self, left: float, right: float, duration: float):
274
  """移动天线"""
275
  pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
 
277
 
278
  class ReachyMiniMotionController(MotionController):
@@ -280,13 +331,16 @@ class ReachyMiniMotionController(MotionController):
280
 
281
  def __init__(self):
282
  self.reachy_mini = None
283
- self.motion_queue = MotionQueue()
 
 
284
 
285
  async def connect(self, host: str = 'localhost'):
286
  """连接到 Reachy Mini"""
287
  from reachy_mini import ReachyMini
 
288
  self.reachy_mini = ReachyMini(host=host)
289
- await self.wake_up()
290
 
291
  async def wake_up(self):
292
  """唤醒机器人"""
@@ -296,15 +350,73 @@ class ReachyMiniMotionController(MotionController):
296
  """关闭机器人"""
297
  self.reachy_mini.turn_off()
298
 
299
- async def move_head(self, pose: np.ndarray, duration: float):
300
- """移动头部到指定姿态"""
301
  self.reachy_mini.goto_target(head=pose, duration=duration)
302
 
303
- async def move_antennas(self, left: float, right: float, duration: float):
304
  """移动天线"""
305
  self.reachy_mini.goto_target(antennas=[left, right], duration=duration)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
 
 
307
 
 
308
  class MotionQueue:
309
  """运动队列管理器"""
310
 
@@ -313,112 +425,195 @@ class MotionQueue:
313
  self.medium_priority = asyncio.Queue()
314
  self.low_priority = asyncio.Queue()
315
  self.is_running = False
 
 
316
 
317
- async def add_high_priority(self, motion: Motion):
318
- """添加高优先级运动"""
319
- await self.high_priority.put(motion)
 
 
 
 
 
320
 
321
- async def add_medium_priority(self, motion: Motion):
322
- """添加中优先级运动"""
323
- await self.medium_priority.put(motion)
 
324
 
325
- async def add_low_priority(self, motion: Motion):
326
- """添加低优先级运动"""
327
- await self.low_priority.put(motion)
 
 
328
 
329
- async def process(self):
330
  """处理运动队列"""
331
- self.is_running = True
332
  while self.is_running:
333
- # 优先级: > >
334
- if not self.high_priority.empty():
335
- motion = await self.high_priority.get()
336
- elif not self.medium_priority.empty():
337
- motion = await self.medium_priority.get()
338
- elif not self.low_priority.empty():
339
- motion = await self.low_priority.get()
340
- else:
341
  await asyncio.sleep(0.01)
342
  continue
343
 
 
344
  await motion.execute()
 
 
 
 
 
 
 
 
 
 
 
 
345
  ```
346
 
347
- ### 2.4 ESPHome 模块 (esphome/)
348
 
349
  **职责**:
350
  - ESPHome 协议实现
351
  - 与 Home Assistant 通信
352
- - 事件处理
 
353
 
354
  **接口**:
355
 
356
  ```python
357
- class ESPHomeServer(ABC):
358
- """ESPHome 服务器抽象基类"""
359
 
360
- @abstractmethod
361
- async def start(self, host: str, port: int):
 
 
 
 
 
 
 
 
362
  """启动 ESPHome 服务器"""
363
- pass
 
 
 
 
 
364
 
365
- @abstractmethod
366
  async def stop(self):
367
  """停止 ESPHome 服务器"""
368
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
369
 
370
- @abstractmethod
371
  async def send_audio(self, audio_data: bytes):
372
- """发送音频数据到 Home Assistant"""
373
- pass
 
 
 
 
 
374
 
375
- @abstractmethod
376
  async def send_event(self, event_type: VoiceAssistantEventType, data: dict):
377
- """发送语音事件"""
378
- pass
379
-
380
-
381
- class VoiceSatelliteProtocol(ESPHomeServer):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
382
  """语音卫星协议处理器"""
383
 
384
  def __init__(self, state: ServerState):
385
  self.state = state
386
  self._is_streaming = False
387
-
388
- async def handle_message(self, msg: message.Message):
389
- """处理 ESPHome 消息"""
390
- if isinstance(msg, VoiceAssistantRequest):
391
- if msg.start:
392
- self._is_streaming = True
393
- else:
394
- self._is_streaming = False
395
-
396
- elif isinstance(msg, VoiceAssistantEventResponse):
397
- event_type = VoiceAssistantEventType(msg.event_type)
398
- await self.handle_voice_event(event_type, msg.data)
399
 
400
  async def handle_audio(self, audio_chunk: bytes):
401
- """处理音频数据"""
402
- if self._is_streaming:
403
- await self.send_audio(audio_chunk)
404
-
405
- async def handle_voice_event(self, event_type: VoiceAssistantEventType, data: dict):
406
- """处理语音事件"""
407
- if event_type == VoiceAssistantEventType.VOICE_ASSISTANT_STT_END:
408
- # STT 完成
409
- text = data.get('text', '')
410
- await self.state.voice_manager.process_text(text)
411
 
412
- elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_TTS_START:
413
- # TTS 开始
414
- await self.state.motion_controller.start_speech_reactive_motion()
415
 
416
- elif event_type == VoiceAssistantEventType.VOICE_ASSISTANT_TTS_END:
417
- # TTS 结束
418
- await self.state.motion_controller.stop_speech_reactive_motion()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
419
  ```
420
 
421
- ### 2.5 配置模块 (config/)
422
 
423
  **职责**:
424
  - 配置文件管理
@@ -432,20 +627,20 @@ class ConfigManager:
432
  """配置管理器"""
433
 
434
  def __init__(self, config_path: str = "config.json"):
435
- self.config_path = config_path
436
  self.config = self.load_config()
437
 
438
  def load_config(self) -> dict:
439
  """加载配置文件"""
440
- if os.path.exists(self.config_path):
441
- with open(self.config_path, 'r') as f:
442
  return json.load(f)
443
  return self.get_default_config()
444
 
445
  def save_config(self):
446
  """保存配置文件"""
447
- with open(self.config_path, 'w') as f:
448
- json.dump(self.config, f, indent=2)
449
 
450
  def get_default_config(self) -> dict:
451
  """获取默认配置"""
@@ -459,15 +654,11 @@ class ConfigManager:
459
  },
460
  "voice": {
461
  "wake_word": "okay_nabu",
462
- "stt_engine": "whisper",
463
- "stt_model": "base",
464
- "tts_engine": "piper",
465
- "tts_model": "en_US-lessac-medium"
466
  },
467
  "motion": {
468
  "enabled": True,
469
- "speech_reactive": True,
470
- "face_tracking": False
471
  },
472
  "esphome": {
473
  "host": "0.0.0.0",
@@ -480,16 +671,19 @@ class ConfigManager:
480
  }
481
  }
482
 
483
- def get(self, key: str, default=None):
484
- """获取配置值"""
485
  keys = key.split('.')
486
  value = self.config
487
  for k in keys:
488
- value = value.get(k, default)
 
 
 
489
  return value
490
 
491
- def set(self, key: str, value):
492
- """设置配置值"""
493
  keys = key.split('.')
494
  config = self.config
495
  for k in keys[:-1]:
@@ -498,12 +692,215 @@ class ConfigManager:
498
  self.save_config()
499
  ```
500
 
501
- ## 3. 数据流设计
 
 
 
 
502
 
503
- ### 3.1 音频处理流程
504
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
505
  ```
506
- 麦克风阵列
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
507
  ↓ (16KHz PCM)
508
  音频块 (1024 samples)
509
 
@@ -517,21 +914,38 @@ class ConfigManager:
517
 
518
 
519
  ┌─────────────────┐
520
- 发送到 HA
521
  │ (ESPHome) │
522
  └────────┬────────┘
523
 
524
- (HA 返回 TTS)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
525
  ┌─────────────────┐
526
  │ 播放音频 │
527
  │ (扬声器) │
528
  └─────────────────┘
529
  ```
530
 
531
- ### 3.2 运动控制流程
532
 
533
  ```
534
- 语音事件
535
 
536
  ┌─────────────────┐
537
  │ 运动队列管理 │
@@ -540,7 +954,7 @@ class ConfigManager:
540
 
541
  ┌─────────────────┐
542
  │ 高优先级运动 │
543
- │ (舞蹈、表情) │
544
  └────────┬────────┘
545
 
546
 
@@ -552,7 +966,7 @@ class ConfigManager:
552
 
553
  ┌─────────────────┐
554
  │ 低优先级运动 │
555
- │ (呼吸、微动)
556
  └────────┬────────┘
557
 
558
 
@@ -565,90 +979,77 @@ class ConfigManager:
565
  └─────────────────┘
566
  ```
567
 
568
- ## 4. 错误处理
569
-
570
- ### 4.1 错误类型
571
-
572
- ```python
573
- class AudioDeviceError(Exception):
574
- """音频设备错误"""
575
- pass
576
-
577
-
578
- class MotionError(Exception):
579
- """运动控制错误"""
580
- pass
581
-
582
-
583
- class ESPHomeError(Exception):
584
- """ESPHome 协议错误"""
585
- pass
586
-
587
-
588
- class WakeWordError(Exception):
589
- """唤醒词检测错误"""
590
- pass
591
-
592
-
593
- class STTError(Exception):
594
- """语音识别错误"""
595
- pass
596
 
 
597
 
598
- class TTSError(Exception):
599
- """语音合成错误"""
600
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
601
  ```
602
 
603
- ### 4.2 错误处理策略
604
-
605
- 1. **音频设备错误**:
606
- - 记录错误日志
607
- - 尝试重新连接设备
608
- - 降级到备用设备(如果有)
609
- - 通知用户
610
-
611
- 2. **运动控制错误**:
612
- - 记录错误日志
613
- - 停止当前运动
614
- - 检查机器人连接状态
615
- - 恢复到安全姿态
616
 
617
- 3. **ESPHome 错误**:
618
- - 记录错误日志
619
- - 尝试重新连接 Home Assistant
620
- - 缓存未发送的消息
621
- - 通知用户
622
 
623
- 4. **唤醒词错误**:
624
- - 记录错误日志
625
- - 重新加载模型
626
- - 通知用户
 
627
 
628
- ## 5. 性能优化
 
 
 
 
 
629
 
630
- ### 5.1 音频处理
631
 
 
632
  - 使用异步 I/O 减少阻塞
633
  - 音频块大小优化(1024 samples)
634
  - 使用 numpy 加速数值计算
635
  - 预分配缓冲区减少内存分配
636
 
637
- ### 5.2 运动控制
638
-
639
  - 运动队列优先级管理
640
  - 运动平滑插值
641
  - 批量运动命令合并
642
  - 延迟预算管理
643
 
644
- ### 5.3 网络
645
-
646
  - ESPHome 连接池
647
  - 消息批量发送
648
  - 压缩音频数据
649
  - 心跳检测
650
 
651
- ## 6. 安全考虑
652
 
653
  1. **音频隐私**:
654
  - 不存储用户音频(除非明确授权)
@@ -667,91 +1068,9 @@ class TTSError(Exception):
667
  - 防火墙配置
668
  - 访问控制
669
 
670
- ## 7. 测试策略
671
-
672
- ### 7.1 单元测试
673
-
674
- - 音频模块测试
675
- - 语音模块测试
676
- - 运动模块测试
677
- - ESPHome 模块测试
678
-
679
- ### 7.2 集成测试
680
-
681
- - 端到端音频流程
682
- - 运动控制流程
683
- - ESPHome 通信流程
684
-
685
- ### 7.3 硬件测试
686
-
687
- - Reachy Mini 连接测试
688
- - 音频设备测试
689
- - 运动功能测试
690
-
691
  ## 8. 部署
692
 
693
- ### 8.1 依赖项
694
-
695
- ```toml
696
- [project]
697
- name = "reachy-mini-ha-voice"
698
- version = "0.1.0"
699
- requires-python = ">=3.8"
700
-
701
- dependencies = [
702
- # Reachy Mini SDK
703
- "reachy-mini",
704
-
705
- # 音频处理
706
- "sounddevice>=0.4.6",
707
- "numpy>=1.24.0",
708
-
709
- # 语音处理
710
- "pymicro-wakeword>=2,<3",
711
- "pyopen-wakeword>=1,<2",
712
- "openai-whisper>=20231117",
713
- "piper-tts>=1.2.0",
714
-
715
- # ESPHome
716
- "aioesphomeapi>=42.0.0",
717
- "zeroconf>=0.100.0",
718
-
719
- # 运动控制
720
- "scipy>=1.10.0",
721
-
722
- # Web UI
723
- "gradio>=4.0.0",
724
-
725
- # 计算机视觉(可选)
726
- "opencv-python>=4.8.0",
727
- "mediapipe>=0.10.0",
728
-
729
- # 通信
730
- "websockets>=12.0",
731
-
732
- # 配置
733
- "pydantic>=2.0.0",
734
- ]
735
-
736
- [project.optional-dependencies]
737
- wireless = [
738
- "reachy-mini[wireless]",
739
- ]
740
-
741
- vision = [
742
- "pollen-vision",
743
- "torch>=2.0.0",
744
- "transformers>=4.30.0",
745
- ]
746
-
747
- dev = [
748
- "pytest>=7.4.0",
749
- "pytest-asyncio>=0.21.0",
750
- "ruff>=0.1.0",
751
- ]
752
- ```
753
-
754
- ### 8.2 安装步骤
755
 
756
  ```bash
757
  # 创建虚拟环境
@@ -765,7 +1084,7 @@ pip install -e .
765
  pip install -e .[wireless,vision,dev]
766
  ```
767
 
768
- ### 8.3 运行
769
 
770
  ```bash
771
  # 启动应用
@@ -776,4 +1095,11 @@ python -m reachy_mini_ha_voice --gradio
776
 
777
  # 启动无线版本
778
  python -m reachy_mini_ha_voice --wireless
779
- ```
 
 
 
 
 
 
 
 
14
  ┌─────────────────────────────────────────────────────────────────┐
15
  │ 业务逻辑层 (Business Logic) │
16
  │ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
17
+ │ │ Voice │ │ Motion │ │ State │ │
18
+ │ │ Manager │ │ Controller │ │ Manager │ │
 
 
 
 
19
  │ └──────────────┘ └──────────────┘ └──────────────┘ │
20
+ │ ┌──────────────┐ ┌──────────────┐ │
21
+ │ │ ESPHome │ │ Event │ │
22
+ │ │ Handler │ │ Dispatcher │ │
23
+ │ └──────────────┘ └──────────────┘ │
24
  └─────────────────────────────────────────────────────────────────┘
25
 
26
  ┌─────────────────────────────────────────────────────────────────┐
27
  │ 服务层 (Services) │
28
  │ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
29
+ │ │ Wake Word │ │ Audio │ │ Motion │ │
30
+ │ │ Detector │ │ Processor │ │ Queue │ │
 
 
 
 
31
  │ └──────────────┘ └──────────────┘ └──────────────┘ │
32
+ │ ┌──────────────────────────────────────────────────────┐ │
33
+ │ │ ESPHome Protocol (Audio Streaming to/from HA) │ │
34
+ │ └──────────────────────────────────────────────────────┘ │
35
  └─────────────────────────────────────────────────────────────────┘
36
 
37
  ┌─────────────────────────────────────────────────────────────────┐
 
47
  └─────────────────────────────────────────────────────────────────┘
48
 
49
  ┌─────────────────────────────────────────────────────────────────┐
50
+ Reachy Mini Hardware + Home Assistant
51
  │ ��──────────────┐ ┌──────────────┐ ┌──────────────┐ │
52
  │ │ Microphone │ │ Head Motors │ │ Camera │ │
53
  │ │ Array (4) │ │ (6 DOF) │ │ (Wide) │ │
 
56
  │ │ Speaker │ │ Antennas │ │
57
  │ │ (5W) │ │ (2) │ │
58
  │ └──────────────┘ └──────────────┘ │
59
+ │ │
60
+ │ ┌──────────────────────────────────────────────────────┐ │
61
+ │ │ Home Assistant (STT/TTS Processing) │ │
62
+ │ └──────────────────────────────────────────────────────┘ │
63
  └─────────────────────────────────────────────────────────────────┘
64
  ```
65
 
66
+ ## 2. 核心设计原则
67
+
68
+ ### 2.1 基于 linux-voice-assistant
69
+ 本项目基于 [OHF-Voice/linux-voice-assistant](https://github.com/OHF-Voice/linux-voice-assistant) 的架构设计,主要特点:
70
+
71
+ - **STT/TTS 由 Home Assistant 处理**:音频数据通过 ESPHome 协议传输到 Home Assistant,由 HA 进行语音识别和合成
72
+ - **本地唤醒词检测**:使用 microWakeWord 或 openWakeWord 进行离线唤醒词检测
73
+ - **ESPHome 协议通信**:通过 ESPHome 协议与 Home Assistant 通信
74
+ - **运动控制增强**:集成 Reachy Mini 的运动控制能力
75
+
76
+ ### 2.2 架构特点
77
+ - **模块化设计**:音频、语音、运动、ESPHome 各模块独立
78
+ - **异步处理**:使用 asyncio 实现高性能异步处理
79
+ - **状态管理**:集中的状态管理(ServerState)
80
+ - **事件驱动**:基于事件的通信机制
81
+
82
+ ## 3. 模块设计
83
 
84
+ ### 3.1 音频模块 (audio/)
85
 
86
  **职责**:
87
  - 音频设备管理(麦克风、扬声器)
88
  - 音频录制和播放
89
+ - 音频格式转换(16KHz 单声道 PCM)
 
90
 
91
  **接口**:
92
 
 
100
  pass
101
 
102
  @abstractmethod
103
+ async def start_recording(
104
+ self,
105
+ device_id: str,
106
+ callback: Callable[[bytes], None],
107
+ sample_rate: int = 16000,
108
+ channels: int = 1,
109
+ block_size: int = 1024
110
+ ):
111
  """开始录制音频"""
112
  pass
113
 
114
  @abstractmethod
115
+ async def play_audio(
116
+ self,
117
+ audio_data: bytes,
118
+ device_id: str,
119
+ sample_rate: int = 16000,
120
+ channels: int = 1
121
+ ):
122
  """播放音频"""
123
  pass
124
 
125
 
126
  class MicrophoneArray(AudioAdapter):
127
+ """麦克风阵列适配器(Reachy Mini 的 4 麦克风阵列)"""
128
 
129
  def __init__(self, sample_rate: int = 16000, channels: int = 1):
130
  self.sample_rate = sample_rate
131
  self.channels = channels
132
  self._stream = None
133
+ self._is_recording = False
134
+ self._callback = None
135
+ self._loop = None
 
 
136
 
137
 
138
  class Speaker(AudioAdapter):
139
+ """扬声器适配器(Reachy Mini 的 5W 扬声器)"""
140
 
141
  def __init__(self, sample_rate: int = 16000):
142
  self.sample_rate = sample_rate
143
+ ```
144
+
145
+ **音频处理器**:
146
+
147
+ ```python
148
+ class AudioProcessor:
149
+ """处理音频块,用于唤醒词检测和流式传输"""
150
 
151
+ def __init__(
152
+ self,
153
+ sample_rate: int = 16000,
154
+ channels: int = 1,
155
+ block_size: int = 1024
156
+ ):
157
+ self.sample_rate = sample_rate
158
+ self.channels = channels
159
+ self.block_size = block_size
160
+
161
+ self._wake_word_callbacks: list[Callable[[bytes], None]] = []
162
+ self._stream_callbacks: list[Callable[[bytes], None]] = []
163
+
164
+ def add_wake_word_callback(self, callback: Callable[[bytes], None]):
165
+ """添加唤醒词检测回调"""
166
+ self._wake_word_callbacks.append(callback)
167
+
168
+ def add_stream_callback(self, callback: Callable[[bytes], None]):
169
+ """添加音频流回调(发送到 Home Assistant)"""
170
+ self._stream_callbacks.append(callback)
171
+
172
+ async def process_audio_chunk(self, audio_chunk: bytes):
173
+ """处理音频块"""
174
+ # 调用唤醒词检测回调
175
+ for callback in self._wake_word_callbacks:
176
+ callback(audio_chunk)
177
+
178
+ # 调用流式传输回调
179
+ for callback in self._stream_callbacks:
180
+ callback(audio_chunk)
181
  ```
182
 
183
+ ### 3.2 语音模块 (voice/)
184
 
185
  **职责**:
186
+ - 唤醒词检测(本地离线)
187
+ - STT/TTS 由 Home Assistant 处理(不在此模块
 
188
 
189
  **接口**:
190
 
 
201
  async def process_audio(self, audio_chunk: bytes) -> bool:
202
  """处理音频块,返回是否检测到唤醒词"""
203
  pass
 
 
 
 
 
204
 
205
 
206
  class MicroWakeWordDetector(WakeWordDetector):
207
+ """microWakeWord 检测器(轻量级,适合 Raspberry Pi)"""
208
 
209
  def __init__(self, model_path: str):
210
  self.model = None
211
  self.features = None
212
+ self.model_path = Path(model_path)
213
+ self._confidence = 0.0
214
+ self._loaded = False
215
 
216
  async def load_model(self, model_path: str):
217
  """加载 microWakeWord 模型"""
218
+ from pymicro_wakeword import MicroWakeWord, MicroWakeWordFeatures
219
+
220
  self.features = MicroWakeWordFeatures()
221
+ self.model = MicroWakeWord.from_config(model_path)
222
+ self._loaded = True
223
+
224
+ async def process_audio(self, audio_chunk: bytes) -> bool:
225
+ """处理音频块"""
226
+ import numpy as np
227
+ audio_array = np.frombuffer(audio_chunk, dtype=np.int16).astype(np.float32) / 32768.0
228
+
229
+ features = self.features.process_streaming(audio_array)
230
+ for feature in features:
231
+ score = self.model.process_streaming(feature)
232
+ if score is not None and score >= 0.5:
233
+ return True
234
+ return False
235
 
236
 
237
  class OpenWakeWordDetector(WakeWordDetector):
238
+ """openWakeWord 检测器(更多唤醒词选择)"""
239
 
240
  def __init__(self, model_path: str):
241
  self.model = None
242
  self.features = None
243
+ self.model_path = Path(model_path)
244
+ self._confidence = 0.0
245
+ self._loaded = False
246
 
247
  async def load_model(self, model_path: str):
248
  """加载 openWakeWord 模型"""
249
+ from pyopen_wakeword import OpenWakeWord, OpenWakeWordFeatures
250
+
251
  self.features = OpenWakeWordFeatures.from_builtin()
252
+ self.model = OpenWakeWord(model_path)
253
+ self._loaded = True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
254
 
255
+ async def process_audio(self, audio_chunk: bytes) -> bool:
256
+ """处理音频"""
257
+ import numpy as np
258
+ audio_array = np.frombuffer(audio_chunk, dtype=np.int16).astype(np.float32) / 32768.0
259
+
260
+ features = self.features.process_streaming(audio_array)
261
+ for feature in features:
262
+ scores = self.model.process_streaming(feature)
263
+ for score in scores:
264
+ if score >= 0.5:
265
+ return True
266
+ return False
267
  ```
268
 
269
+ ### 3.3 运动模块 (motion/)
270
 
271
  **职责**:
272
+ - 头部运动控制(6 自由度)
273
+ - 天线控制(2 个天线)
274
  - 运动队列管理
275
  - 语音反应性运动
276
 
 
280
  class MotionController(ABC):
281
  """运动控制器抽象基类"""
282
 
283
+ @abstractmethod
284
+ async def connect(self, host: str = 'localhost'):
285
+ """连接到机器人"""
286
+ pass
287
+
288
  @abstractmethod
289
  async def wake_up(self):
290
  """唤醒机器人"""
 
296
  pass
297
 
298
  @abstractmethod
299
+ async def move_head(self, pose: np.ndarray, duration: float = 1.0):
300
+ """移动头部到姿态"""
301
  pass
302
 
303
  @abstractmethod
304
+ async def move_antennas(self, left: float, right: float, duration: float = 1.0):
305
  """移动天线"""
306
  pass
307
+
308
+ @abstractmethod
309
+ async def nod(self, count: int = 1, duration: float = 0.5):
310
+ """点头"""
311
+ pass
312
+
313
+ @abstractmethod
314
+ async def shake(self, count: int = 1, duration: float = 0.5):
315
+ """摇头"""
316
+ pass
317
+
318
+ @abstractmethod
319
+ async def start_speech_reactive_motion(self):
320
+ """开始语音反应性运动"""
321
+ pass
322
+
323
+ @abstractmethod
324
+ async def stop_speech_reactive_motion(self):
325
+ """停止语音反应性运动"""
326
+ pass
327
 
328
 
329
  class ReachyMiniMotionController(MotionController):
 
331
 
332
  def __init__(self):
333
  self.reachy_mini = None
334
+ self._connected = False
335
+ self._speech_reactive = False
336
+ self._speech_task = None
337
 
338
  async def connect(self, host: str = 'localhost'):
339
  """连接到 Reachy Mini"""
340
  from reachy_mini import ReachyMini
341
+
342
  self.reachy_mini = ReachyMini(host=host)
343
+ self._connected = True
344
 
345
  async def wake_up(self):
346
  """唤醒机器人"""
 
350
  """关闭机器人"""
351
  self.reachy_mini.turn_off()
352
 
353
+ async def move_head(self, pose: np.ndarray, duration: float = 1.0):
354
+ """移动头部到姿态"""
355
  self.reachy_mini.goto_target(head=pose, duration=duration)
356
 
357
+ async def move_antennas(self, left: float, right: float, duration: float = 1.0):
358
  """移动天线"""
359
  self.reachy_mini.goto_target(antennas=[left, right], duration=duration)
360
+
361
+ async def nod(self, count: int = 1, duration: float = 0.5):
362
+ """点头"""
363
+ import numpy as np
364
+ from scipy.spatial.transform import Rotation as R
365
+
366
+ for _ in range(count):
367
+ # 点头
368
+ pose_down = np.eye(4)
369
+ pose_down[:3, :3] = R.from_euler('xyz', [15, 0, 0], degrees=True).as_matrix()
370
+ await self.move_head(pose_down, duration=duration / 2)
371
+
372
+ pose_up = np.eye(4)
373
+ pose_up[:3, :3] = R.from_euler('xyz', [-15, 0, 0], degrees=True).as_matrix()
374
+ await self.move_head(pose_up, duration=duration / 2)
375
+
376
+ async def shake(self, count: int = 1, duration: float = 0.5):
377
+ """摇头"""
378
+ import numpy as np
379
+ from scipy.spatial.transform import Rotation as R
380
+
381
+ for _ in range(count):
382
+ # 摇头
383
+ pose_left = np.eye(4)
384
+ pose_left[:3, :3] = R.from_euler('xyz', [0, 0, -20], degrees=True).as_matrix()
385
+ await self.move_head(pose_left, duration=duration / 2)
386
+
387
+ pose_right = np.eye(4)
388
+ pose_right[:3, :3] = R.from_euler('xyz', [0, 0, 20], degrees=True).as_matrix()
389
+ await self.move_head(pose_right, duration=duration / 2)
390
+
391
+ async def start_speech_reactive_motion(self):
392
+ """开始语音反应性运动(说话时的微动)"""
393
+ self._speech_reactive = True
394
+ self._speech_task = asyncio.create_task(self._speech_reactive_loop())
395
+
396
+ async def stop_speech_reactive_motion(self):
397
+ """停止语音反应性运动"""
398
+ self._speech_reactive = False
399
+ if self._speech_task:
400
+ self._speech_task.cancel()
401
+
402
+ async def _speech_reactive_loop(self):
403
+ """语音反应性运动循环"""
404
+ import numpy as np
405
+ from scipy.spatial.transform import Rotation as R
406
+
407
+ while self._speech_reactive:
408
+ # 生成微小的摆动
409
+ roll = np.sin(asyncio.get_event_loop().time() * 2) * 3
410
+ pose = np.eye(4)
411
+ pose[:3, :3] = R.from_euler('xyz', [0, 0, roll], degrees=True).as_matrix()
412
+
413
+ await self.move_head(pose, duration=0.1)
414
+ await asyncio.sleep(0.1)
415
+ ```
416
 
417
+ **运动队列**:
418
 
419
+ ```python
420
  class MotionQueue:
421
  """运动队列管理器"""
422
 
 
425
  self.medium_priority = asyncio.Queue()
426
  self.low_priority = asyncio.Queue()
427
  self.is_running = False
428
+ self._current_motion = None
429
+ self._task = None
430
 
431
+ async def add_motion(self, motion: Motion):
432
+ """添加运动到队列"""
433
+ if motion.priority == MotionPriority.HIGH:
434
+ await self.high_priority.put(motion)
435
+ elif motion.priority == MotionPriority.MEDIUM:
436
+ await self.medium_priority.put(motion)
437
+ elif motion.priority == MotionPriority.LOW:
438
+ await self.low_priority.put(motion)
439
 
440
+ async def start(self):
441
+ """开始处理运动队列"""
442
+ self.is_running = True
443
+ self._task = asyncio.create_task(self._process_queue())
444
 
445
+ async def stop(self):
446
+ """停止处理运动队列"""
447
+ self.is_running = False
448
+ if self._task:
449
+ self._task.cancel()
450
 
451
+ async def _process_queue(self):
452
  """处理运动队列"""
 
453
  while self.is_running:
454
+ # 优先级:HIGH > MEDIUM > LOW
455
+ motion = await self._get_next_motion()
456
+
457
+ if motion is None:
 
 
 
 
458
  await asyncio.sleep(0.01)
459
  continue
460
 
461
+ self._current_motion = motion
462
  await motion.execute()
463
+ self._current_motion = None
464
+
465
+ async def _get_next_motion(self) -> Optional[Motion]:
466
+ """获取下一个运动"""
467
+ if not self.high_priority.empty():
468
+ return await self.high_priority.get()
469
+ elif not self.medium_priority.empty():
470
+ return await self.medium_priority.get()
471
+ elif not self.low_priority.empty():
472
+ return await self.low_priority.get()
473
+ else:
474
+ return None
475
  ```
476
 
477
+ ### 3.4 ESPHome 模块 (esphome/)
478
 
479
  **职责**:
480
  - ESPHome 协议实现
481
  - 与 Home Assistant 通信
482
+ - 音频流传输
483
+ - 事件处理
484
 
485
  **接口**:
486
 
487
  ```python
488
+ class ESPHomeServer:
489
+ """ESPHome 协议服务器"""
490
 
491
+ def __init__(self, host: str = "0.0.0.0", port: int = 6053):
492
+ self.host = host
493
+ self.port = port
494
+ self._server = None
495
+ self._is_running = False
496
+ self._clients = []
497
+ self._audio_callback = None
498
+ self._event_callback = None
499
+
500
+ async def start(self):
501
  """启动 ESPHome 服务器"""
502
+ self._server = await asyncio.start_server(
503
+ self._handle_client,
504
+ self.host,
505
+ self.port
506
+ )
507
+ self._is_running = True
508
 
 
509
  async def stop(self):
510
  """停止 ESPHome 服务器"""
511
+ self._is_running = False
512
+
513
+ for client in self._clients:
514
+ client.close()
515
+ self._clients.clear()
516
+
517
+ if self._server:
518
+ self._server.close()
519
+ await self._server.wait_closed()
520
+
521
+ def set_audio_callback(self, callback: Callable[[bytes], None]):
522
+ """设置音频回调(接收来自 Home Assistant 的 TTS 音频)"""
523
+ self._audio_callback = callback
524
+
525
+ def set_event_callback(self, callback: Callable[[VoiceAssistantEventType, dict], None]):
526
+ """设置事件回调(接收来自 Home Assistant 的事件)"""
527
+ self._event_callback = callback
528
 
 
529
  async def send_audio(self, audio_data: bytes):
530
+ """发送音频数据到 Home Assistant(STT 输入)"""
531
+ for client in self._clients:
532
+ try:
533
+ client.write(audio_data)
534
+ await client.drain()
535
+ except Exception as e:
536
+ logger.error(f"Error sending audio to client: {e}")
537
 
 
538
  async def send_event(self, event_type: VoiceAssistantEventType, data: dict):
539
+ """发送事件到 Home Assistant"""
540
+ if self._event_callback:
541
+ self._event_callback(event_type, data)
542
+
543
+ async def _handle_client(self, reader, writer):
544
+ """处理客户端连接"""
545
+ client_addr = writer.get_extra_info('peername')
546
+ self._clients.append(writer)
547
+
548
+ try:
549
+ while self._is_running:
550
+ data = await reader.read(4096)
551
+ if not data:
552
+ break
553
+
554
+ # 处理来自 Home Assistant 的数据
555
+ await self._process_data(data)
556
+ except Exception as e:
557
+ logger.error(f"Error handling client {client_addr}: {e}")
558
+ finally:
559
+ self._clients.remove(writer)
560
+ writer.close()
561
+ await writer.wait_closed()
562
+
563
+
564
+ class VoiceSatelliteProtocol:
565
  """语音卫星协议处理器"""
566
 
567
  def __init__(self, state: ServerState):
568
  self.state = state
569
  self._is_streaming = False
570
+ self._refractory_period = 2.0
571
+ self._last_wake_word_time = 0.0
 
 
 
 
 
 
 
 
 
 
572
 
573
  async def handle_audio(self, audio_chunk: bytes):
574
+ """处理音频块(发送到 Home Assistant)"""
575
+ if self._is_streaming and self.state.esphome_server:
576
+ await self.state.esphome_server.send_audio(audio_chunk)
577
+
578
+ async def handle_wake_word(self):
579
+ """处理唤醒词检测"""
580
+ current_time = asyncio.get_event_loop().time()
 
 
 
581
 
582
+ # 检查冷却期
583
+ if current_time - self._last_wake_word_time < self._refractory_period:
584
+ return
585
 
586
+ self._last_wake_word_time = current_time
587
+
588
+ # 发送唤醒词事件到 Home Assistant
589
+ if self.state.esphome_server:
590
+ await self.state.esphome_server.send_event(
591
+ VoiceAssistantEventType.VOICE_ASSISTANT_WAKE_WORD_END,
592
+ {"wake_word": "detected"}
593
+ )
594
+
595
+ # 开始流式传输
596
+ self._is_streaming = True
597
+
598
+ async def stop_streaming(self):
599
+ """停止流式传输"""
600
+ self._is_streaming = False
601
+
602
+
603
+ class VoiceAssistantEventType(Enum):
604
+ """语音助手事件类型"""
605
+ VOICE_ASSISTANT_START = 0
606
+ VOICE_ASSISTANT_END = 1
607
+ VOICE_ASSISTANT_ERROR = 2
608
+ VOICE_ASSISTANT_STT_START = 3
609
+ VOICE_ASSISTANT_STT_END = 4
610
+ VOICE_ASSISTANT_TTS_START = 5
611
+ VOICE_ASSISTANT_TTS_END = 6
612
+ VOICE_ASSISTANT_WAKE_WORD_START = 9
613
+ VOICE_ASSISTANT_WAKE_WORD_END = 10
614
  ```
615
 
616
+ ### 3.5 配置模块 (config/)
617
 
618
  **职责**:
619
  - 配置文件管理
 
627
  """配置管理器"""
628
 
629
  def __init__(self, config_path: str = "config.json"):
630
+ self.config_path = Path(config_path)
631
  self.config = self.load_config()
632
 
633
  def load_config(self) -> dict:
634
  """加载配置文件"""
635
+ if self.config_path.exists():
636
+ with open(self.config_path, 'r', encoding='utf-8') as f:
637
  return json.load(f)
638
  return self.get_default_config()
639
 
640
  def save_config(self):
641
  """保存配置文件"""
642
+ with open(self.config_path, 'w', encoding='utf-8') as f:
643
+ json.dump(self.config, f, indent=2, ensure_ascii=False)
644
 
645
  def get_default_config(self) -> dict:
646
  """获取默认配置"""
 
654
  },
655
  "voice": {
656
  "wake_word": "okay_nabu",
657
+ "wake_word_dirs": ["wakewords"]
 
 
 
658
  },
659
  "motion": {
660
  "enabled": True,
661
+ "speech_reactive": True
 
662
  },
663
  "esphome": {
664
  "host": "0.0.0.0",
 
671
  }
672
  }
673
 
674
+ def get(self, key: str, default: Any = None) -> Any:
675
+ """获取配置值(支持嵌套键)"""
676
  keys = key.split('.')
677
  value = self.config
678
  for k in keys:
679
+ if isinstance(value, dict):
680
+ value = value.get(k, default)
681
+ else:
682
+ return default
683
  return value
684
 
685
+ def set(self, key: str, value: Any):
686
+ """设置配置值(支持嵌套键)"""
687
  keys = key.split('.')
688
  config = self.config
689
  for k in keys[:-1]:
 
692
  self.save_config()
693
  ```
694
 
695
+ ### 3.6 状态管理 (state.py)
696
+
697
+ **职责**:
698
+ - 全局状态管理
699
+ - 组件生命周期管理
700
 
701
+ **接口**:
702
 
703
+ ```python
704
+ @dataclass
705
+ class ServerState:
706
+ """全局服务器状态"""
707
+ name: str
708
+
709
+ # 配置
710
+ config: Optional[ConfigManager] = None
711
+
712
+ # 音频
713
+ microphone: Optional[MicrophoneArray] = None
714
+ speaker: Optional[Speaker] = None
715
+ audio_queue: Queue = field(default_factory=Queue)
716
+
717
+ # 语音
718
+ wake_word_detector: Optional[WakeWordDetector] = None
719
+ active_wake_words: list = field(default_factory=list)
720
+
721
+ # 运动
722
+ motion_controller: Optional[MotionController] = None
723
+ motion_queue: Optional[MotionQueue] = None
724
+
725
+ # ESPHome
726
+ esphome_server: Optional[ESPHomeServer] = None
727
+ voice_satellite: Optional[VoiceSatelliteProtocol] = None
728
+
729
+ # 状态
730
+ is_running: bool = False
731
+ is_streaming: bool = False
732
+
733
+ # 回调
734
+ on_wake_word: Optional[callable] = None
735
+ on_stt_result: Optional[callable] = None
736
+ on_tts_audio: Optional[callable] = None
737
+
738
+ async def cleanup(self):
739
+ """清理资源"""
740
+ if self.microphone:
741
+ await self.microphone.stop_recording()
742
+
743
+ if self.motion_controller:
744
+ await self.motion_controller.stop_speech_reactive_motion()
745
+ await self.motion_controller.turn_off()
746
+ await self.motion_controller.disconnect()
747
+
748
+ if self.motion_queue:
749
+ await self.motion_queue.stop()
750
+
751
+ if self.esphome_server:
752
+ await self.esphome_server.stop()
753
  ```
754
+
755
+ ### 3.7 主应用 (app.py)
756
+
757
+ **职责**:
758
+ - 应用生命周期管理
759
+ - 组件初始化和协调
760
+ - 事件处理
761
+
762
+ **接口**:
763
+
764
+ ```python
765
+ class ReachyMiniVoiceApp:
766
+ """主应用类"""
767
+
768
+ def __init__(
769
+ self,
770
+ name: str,
771
+ config: ConfigManager,
772
+ audio_input_device: Optional[str] = None,
773
+ audio_output_device: Optional[str] = None,
774
+ wake_model: Optional[str] = None,
775
+ wake_word_dirs: Optional[list] = None,
776
+ host: str = "0.0.0.0",
777
+ port: int = 6053,
778
+ robot_host: str = "localhost",
779
+ wireless: bool = False,
780
+ gradio: bool = False
781
+ ):
782
+ self.name = name
783
+ self.config = config
784
+ self.audio_input_device = audio_input_device
785
+ self.audio_output_device = audio_output_device
786
+ self.wake_model = wake_model
787
+ self.wake_word_dirs = wake_word_dirs
788
+ self.host = host
789
+ self.port = port
790
+ self.robot_host = robot_host
791
+ self.wireless = wireless
792
+ self.gradio = gradio
793
+
794
+ self.state = ServerState(name)
795
+ self._is_running = False
796
+
797
+ async def start(self):
798
+ """启动应用"""
799
+ # 初始化状态
800
+ await self.state.initialize(self.config)
801
+
802
+ # 设置回调
803
+ self._setup_callbacks()
804
+
805
+ # 启动音频录制
806
+ await self.state.microphone.start_recording(
807
+ self.audio_input_device,
808
+ self._audio_callback,
809
+ sample_rate=self.config.get("audio.sample_rate", 16000),
810
+ channels=self.config.get("audio.channels", 1),
811
+ block_size=self.config.get("audio.block_size", 1024)
812
+ )
813
+
814
+ # 启动 ESPHome 服务器
815
+ await self.state.esphome_server.start()
816
+
817
+ # 注册 mDNS 发现
818
+ await self._register_mdns()
819
+
820
+ self._is_running = True
821
+
822
+ # 保持运行
823
+ while self._is_running:
824
+ await asyncio.sleep(1)
825
+
826
+ async def stop(self):
827
+ """停止应用"""
828
+ self._is_running = False
829
+ await self.state.cleanup()
830
+
831
+ def _setup_callbacks(self):
832
+ """设置回调"""
833
+ self.state.audio_processor.add_wake_word_callback(self._on_audio_chunk)
834
+ self.state.audio_processor.add_stream_callback(self._on_stream_audio)
835
+
836
+ async def _audio_callback(self, audio_chunk: bytes):
837
+ """音频录制回调"""
838
+ await self.state.audio_processor.process_audio_chunk(audio_chunk)
839
+
840
+ async def _on_audio_chunk(self, audio_chunk: bytes):
841
+ """唤醒词检测回调"""
842
+ if self.state.wake_word_detector:
843
+ detected = await self.state.wake_word_detector.process_audio(audio_chunk)
844
+ if detected:
845
+ await self._on_wake_word_detected()
846
+
847
+ async def _on_stream_audio(self, audio_chunk: bytes):
848
+ """音频流传输回调(发送到 Home Assistant)"""
849
+ if self.state.voice_satellite:
850
+ await self.state.voice_satellite.handle_audio(audio_chunk)
851
+
852
+ async def _on_wake_word_detected(self):
853
+ """唤醒词检测回调"""
854
+ # 点头确认
855
+ if self.state.motion_controller:
856
+ await self.state.motion_controller.nod(count=1, duration=0.3)
857
+
858
+ # 触发语音卫星
859
+ if self.state.voice_satellite:
860
+ await self.state.voice_satellite.handle_wake_word()
861
+
862
+ async def handle_tts_audio(self, audio_data: bytes):
863
+ """处理来自 Home Assistant 的 TTS 音频"""
864
+ # 播放音频
865
+ if self.state.speaker:
866
+ await self.state.speaker.play_audio(
867
+ audio_data,
868
+ self.audio_output_device,
869
+ sample_rate=self.config.get("audio.sample_rate", 16000),
870
+ channels=self.config.get("audio.channels", 1)
871
+ )
872
+
873
+ async def handle_stt_result(self, text: str):
874
+ """处理来自 Home Assistant 的 STT 结果"""
875
+ # 处理文本(添加自定义逻辑)
876
+ pass
877
+
878
+ async def _register_mdns(self):
879
+ """注册 mDNS 服务发现"""
880
+ from zeroconf import ServiceInfo, Zeroconf
881
+
882
+ info = ServiceInfo(
883
+ "_esphomelib._tcp.local.",
884
+ f"{self.name}._esphomelib._tcp.local.",
885
+ addresses=[],
886
+ port=self.port,
887
+ properties={
888
+ "version": "1.0",
889
+ "name": self.name,
890
+ "platform": "reachy_mini"
891
+ }
892
+ )
893
+
894
+ zeroconf = Zeroconf()
895
+ zeroconf.register_service(info)
896
+ ```
897
+
898
+ ## 4. 数据流
899
+
900
+ ### 4.1 音频输入流程
901
+
902
+ ```
903
+ 麦克风阵列 (4 麦克风)
904
  ↓ (16KHz PCM)
905
  音频块 (1024 samples)
906
 
 
914
 
915
 
916
  ┌─────────────────┐
917
+ 开始流式传输
918
  │ (ESPHome) │
919
  └────────┬────────┘
920
 
921
+
922
+ ┌─────────────────┐
923
+ │ 发送到 HA │
924
+ │ (STT 输入) │
925
+ └─────────────────┘
926
+ ```
927
+
928
+ ### 4.2 音频输出流程
929
+
930
+ ```
931
+ Home Assistant (TTS 输出)
932
+
933
+ ┌─────────────────┐
934
+ │ ESPHome 服务器 │
935
+ │ (接收音频) │
936
+ └────────┬────────┘
937
+
938
+
939
  ┌─────────────────┐
940
  │ 播放音频 │
941
  │ (扬声器) │
942
  └─────────────────┘
943
  ```
944
 
945
+ ### 4.3 运动控制流程
946
 
947
  ```
948
+ 唤醒词检测 / STT 结果 / TTS 事件
949
 
950
  ┌─────────────────┐
951
  │ 运动队列管理 │
 
954
 
955
  ┌─────────────────┐
956
  │ 高优先级运动 │
957
+ │ (唤醒词确认) │
958
  └────────┬────────┘
959
 
960
 
 
966
 
967
  ┌─────────────────┐
968
  │ 低优先级运动 │
969
+ │ (语音反应)
970
  └────────┬────────┘
971
 
972
 
 
979
  └─────────────────┘
980
  ```
981
 
982
+ ## 5. 依赖项
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
983
 
984
+ ### 5.1 核心依赖
985
 
986
+ ```toml
987
+ dependencies = [
988
+ # Reachy Mini SDK
989
+ "reachy-mini",
990
+
991
+ # 音频处理
992
+ "sounddevice>=0.4.6",
993
+ "numpy>=1.24.0",
994
+
995
+ # 语音处理
996
+ "pymicro-wakeword>=2.0.0,<3.0.0",
997
+ "pyopen-wakeword>=1.0.0,<2.0.0",
998
+
999
+ # ESPHome
1000
+ "aioesphomeapi>=42.0.0",
1001
+ "zeroconf>=0.100.0",
1002
+
1003
+ # 运动控制
1004
+ "scipy>=1.10.0",
1005
+
1006
+ # Web UI (可选)
1007
+ "gradio>=4.0.0",
1008
+ ]
1009
  ```
1010
 
1011
+ ### 5.2 可选依赖
 
 
 
 
 
 
 
 
 
 
 
 
1012
 
1013
+ ```toml
1014
+ [project.optional-dependencies]
1015
+ wireless = [
1016
+ "reachy-mini[wireless]",
1017
+ ]
1018
 
1019
+ vision = [
1020
+ "pollen-vision",
1021
+ "opencv-python>=4.8.0",
1022
+ "mediapipe>=0.10.0",
1023
+ ]
1024
 
1025
+ dev = [
1026
+ "pytest>=7.4.0",
1027
+ "pytest-asyncio>=0.21.0",
1028
+ "ruff>=0.1.0",
1029
+ ]
1030
+ ```
1031
 
1032
+ ## 6. 性能优化
1033
 
1034
+ ### 6.1 音频处理
1035
  - 使用异步 I/O 减少阻塞
1036
  - 音频块大小优化(1024 samples)
1037
  - 使用 numpy 加速数值计算
1038
  - 预分配缓冲区减少内存分配
1039
 
1040
+ ### 6.2 运动控制
 
1041
  - 运动队列优先级管理
1042
  - 运动平滑插值
1043
  - 批量运动命令合并
1044
  - 延迟预算管理
1045
 
1046
+ ### 6.3 网络
 
1047
  - ESPHome 连接池
1048
  - 消息批量发送
1049
  - 压缩音频数据
1050
  - 心跳检测
1051
 
1052
+ ## 7. 安全考虑
1053
 
1054
  1. **音频隐私**:
1055
  - 不存储用户音频(除非明确授权)
 
1068
  - 防火墙配置
1069
  - 访问控制
1070
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1071
  ## 8. 部署
1072
 
1073
+ ### 8.1 安装步骤
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1074
 
1075
  ```bash
1076
  # 创建虚拟环境
 
1084
  pip install -e .[wireless,vision,dev]
1085
  ```
1086
 
1087
+ ### 8.2 运行
1088
 
1089
  ```bash
1090
  # 启动应用
 
1095
 
1096
  # 启动无线版本
1097
  python -m reachy_mini_ha_voice --wireless
1098
+ ```
1099
+
1100
+ ### 8.3 Home Assistant 集成
1101
+
1102
+ 1. 在 Home Assistant 中添加 ESPHome 集成
1103
+ 2. 输入 Reachy Mini 的 IP 地址和端口(6053)
1104
+ 3. 配置 STT/TTS 服务
1105
+ 4. 创建自动化和脚本