Desmond-Dong commited on
Commit
0832ea3
·
1 Parent(s): a88f884

v0.2.18: Fix audio playback - restore wakeup sound, use push_audio_sample

Browse files

- Rewrite audio_player.py based on linux-voice-assistant pattern
- Use push_audio_sample() with proper pacing (like conversation_app)
- Restore wakeup sound playback in satellite.py wakeup()
- Remove tts_playing flag - recording and playback can run simultaneously
- GStreamer has separate pipelines for recording and playback

pyproject.toml CHANGED
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
 
5
  [project]
6
  name = "reachy_mini_ha_voice"
7
- version = "0.2.17"
8
  description = "Home Assistant Voice Assistant for Reachy Mini"
9
  readme = "README.md"
10
  requires-python = ">=3.10"
 
4
 
5
  [project]
6
  name = "reachy_mini_ha_voice"
7
+ version = "0.2.18"
8
  description = "Home Assistant Voice Assistant for Reachy Mini"
9
  readme = "README.md"
10
  requires-python = ">=3.10"
reachy_mini_ha_voice/__init__.py CHANGED
@@ -11,7 +11,7 @@ Key features:
11
  - Reachy Mini motion control integration
12
  """
13
 
14
- __version__ = "0.2.17"
15
  __author__ = "Desmond Dong"
16
 
17
  # Don't import main module here to avoid runpy warning
 
11
  - Reachy Mini motion control integration
12
  """
13
 
14
+ __version__ = "0.2.18"
15
  __author__ = "Desmond Dong"
16
 
17
  # Don't import main module here to avoid runpy warning
reachy_mini_ha_voice/audio_player.py CHANGED
@@ -1,10 +1,21 @@
1
- """Audio player using Reachy Mini's media system."""
 
 
 
 
 
 
 
 
 
2
 
3
  import logging
 
 
4
  import threading
5
  import time
 
6
  from collections.abc import Callable
7
- from pathlib import Path
8
  from typing import List, Optional, Union
9
 
10
  import numpy as np
@@ -17,8 +28,7 @@ _LOGGER = logging.getLogger(__name__)
17
  class AudioPlayer:
18
  """Audio player using Reachy Mini's media system.
19
 
20
- Uses push_audio_sample() to write audio to the GStreamer pipeline.
21
- The caller must pause audio recording during playback to avoid conflicts.
22
  """
23
 
24
  def __init__(self, reachy_mini=None) -> None:
@@ -31,6 +41,7 @@ class AudioPlayer:
31
  self._unduck_volume: float = 1.0
32
  self._current_volume: float = 1.0
33
  self._stop_flag = threading.Event()
 
34
 
35
  def set_reachy_mini(self, reachy_mini) -> None:
36
  """Set the Reachy Mini instance."""
@@ -42,6 +53,13 @@ class AudioPlayer:
42
  done_callback: Optional[Callable[[], None]] = None,
43
  stop_first: bool = True,
44
  ) -> None:
 
 
 
 
 
 
 
45
  if stop_first:
46
  self.stop()
47
 
@@ -55,6 +73,7 @@ class AudioPlayer:
55
  self._play_next()
56
 
57
  def _play_next(self) -> None:
 
58
  if not self._playlist or self._stop_flag.is_set():
59
  self._on_playback_finished()
60
  return
@@ -64,39 +83,55 @@ class AudioPlayer:
64
  self.is_playing = True
65
 
66
  # Start playback in a thread
67
- thread = threading.Thread(target=self._play_file, args=(next_url,), daemon=True)
68
- thread.start()
 
 
 
 
69
 
70
  def _play_file(self, file_path: str) -> None:
71
- """Play an audio file using push_audio_sample()."""
 
 
 
 
 
72
  try:
73
  # Handle URLs - download first
74
  if file_path.startswith(("http://", "https://")):
75
- import urllib.request
76
- import tempfile
77
-
78
- _LOGGER.debug("Downloading TTS audio from %s", file_path)
79
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
80
- urllib.request.urlretrieve(file_path, tmp.name)
81
- file_path = tmp.name
82
  _LOGGER.debug("Downloaded to %s", file_path)
83
 
84
  if self._stop_flag.is_set():
85
  return
86
 
87
- # Use push_audio_sample for playback
 
 
 
 
88
  if self.reachy_mini is not None:
89
- try:
90
- self._play_via_push_audio(file_path)
91
- except Exception as e:
92
- _LOGGER.warning("push_audio_sample failed: %s", e)
93
  else:
94
  _LOGGER.warning("No reachy_mini instance, cannot play audio")
95
 
96
  except Exception as e:
97
  _LOGGER.error("Error playing audio: %s", e)
98
  finally:
 
 
 
 
 
 
 
99
  self.is_playing = False
 
100
  # Play next in playlist or finish
101
  if self._playlist and not self._stop_flag.is_set():
102
  self._play_next()
@@ -104,18 +139,17 @@ class AudioPlayer:
104
  self._on_playback_finished()
105
 
106
  def _play_via_push_audio(self, file_path: str) -> None:
107
- """Play audio by pushing samples to Reachy Mini's GStreamer pipeline.
108
 
109
- This writes audio directly to the existing playback pipeline.
110
- The caller should pause audio recording during this operation.
 
111
  """
112
  # Read audio file
113
  data, input_samplerate = sf.read(file_path, dtype='float32')
114
- _LOGGER.debug("Audio file: %s, samplerate=%d, shape=%s", file_path, input_samplerate, data.shape)
115
 
116
- # Get output sample rate from Reachy Mini
117
  output_samplerate = self.reachy_mini.media.get_output_audio_samplerate()
118
- _LOGGER.debug("Output samplerate: %d", output_samplerate)
119
 
120
  # Convert to mono if stereo
121
  if data.ndim == 2:
@@ -128,29 +162,47 @@ class AudioPlayer:
128
  if input_samplerate != output_samplerate:
129
  num_samples = int(len(data) * output_samplerate / input_samplerate)
130
  data = scipy.signal.resample(data, num_samples)
131
- _LOGGER.debug("Resampled to %d samples", num_samples)
132
 
133
- # Push audio in chunks (like conversation_app)
134
- # Use smaller chunks for smoother playback
135
- chunk_duration = 0.05 # 50ms chunks
 
 
136
  chunk_size = int(output_samplerate * chunk_duration)
137
 
 
 
 
138
  for i in range(0, len(data), chunk_size):
139
  if self._stop_flag.is_set():
140
- _LOGGER.debug("Playback stopped by flag")
141
- break
 
142
  chunk = data[i:i + chunk_size].astype(np.float32)
143
  self.reachy_mini.media.push_audio_sample(chunk)
144
- # Sleep to match chunk duration (prevents buffer overflow)
145
- time.sleep(chunk_duration * 0.8) # Slightly less to keep buffer fed
 
 
 
 
 
 
 
 
 
 
 
 
 
146
 
147
  _LOGGER.debug("Audio playback complete")
148
 
149
  def _on_playback_finished(self) -> None:
150
- """Called when playback is finished."""
151
  self.is_playing = False
 
152
  todo_callback: Optional[Callable[[], None]] = None
153
-
154
  with self._done_callback_lock:
155
  if self._done_callback:
156
  todo_callback = self._done_callback
@@ -163,29 +215,41 @@ class AudioPlayer:
163
  _LOGGER.exception("Unexpected error running done callback")
164
 
165
  def pause(self) -> None:
 
166
  self.is_playing = False
167
 
168
  def resume(self) -> None:
 
169
  if self._playlist:
170
  self._play_next()
171
 
172
  def stop(self) -> None:
 
173
  self._stop_flag.set()
 
 
174
  if self.reachy_mini is not None:
175
  try:
176
- self.reachy_mini.media.clear_output_buffer()
177
- except Exception:
178
- pass
 
 
 
 
179
  self._playlist.clear()
180
  self.is_playing = False
181
 
182
  def duck(self) -> None:
 
183
  self._current_volume = self._duck_volume
184
 
185
  def unduck(self) -> None:
 
186
  self._current_volume = self._unduck_volume
187
 
188
  def set_volume(self, volume: int) -> None:
 
189
  volume = max(0, min(100, volume))
190
  self._unduck_volume = volume / 100.0
191
  self._duck_volume = self._unduck_volume / 2
 
1
+ """Audio player using Reachy Mini's media system.
2
+
3
+ This module provides audio playback functionality similar to linux-voice-assistant's
4
+ MpvMediaPlayer, but using Reachy Mini's GStreamer-based audio system.
5
+
6
+ For local files: Uses play_sound() which creates an independent playbin pipeline.
7
+ For URLs (TTS): Downloads to temp file, then uses play_sound().
8
+
9
+ This approach avoids conflicts with the recording pipeline.
10
+ """
11
 
12
  import logging
13
+ import os
14
+ import tempfile
15
  import threading
16
  import time
17
+ import urllib.request
18
  from collections.abc import Callable
 
19
  from typing import List, Optional, Union
20
 
21
  import numpy as np
 
28
  class AudioPlayer:
29
  """Audio player using Reachy Mini's media system.
30
 
31
+ Similar to linux-voice-assistant's MpvMediaPlayer but using GStreamer.
 
32
  """
33
 
34
  def __init__(self, reachy_mini=None) -> None:
 
41
  self._unduck_volume: float = 1.0
42
  self._current_volume: float = 1.0
43
  self._stop_flag = threading.Event()
44
+ self._playback_thread: Optional[threading.Thread] = None
45
 
46
  def set_reachy_mini(self, reachy_mini) -> None:
47
  """Set the Reachy Mini instance."""
 
53
  done_callback: Optional[Callable[[], None]] = None,
54
  stop_first: bool = True,
55
  ) -> None:
56
+ """Play audio file(s) or URL(s).
57
+
58
+ Args:
59
+ url: Single URL/path or list of URLs/paths to play
60
+ done_callback: Called when all playback is finished
61
+ stop_first: Stop current playback before starting new
62
+ """
63
  if stop_first:
64
  self.stop()
65
 
 
73
  self._play_next()
74
 
75
  def _play_next(self) -> None:
76
+ """Play the next item in the playlist."""
77
  if not self._playlist or self._stop_flag.is_set():
78
  self._on_playback_finished()
79
  return
 
83
  self.is_playing = True
84
 
85
  # Start playback in a thread
86
+ self._playback_thread = threading.Thread(
87
+ target=self._play_file,
88
+ args=(next_url,),
89
+ daemon=True
90
+ )
91
+ self._playback_thread.start()
92
 
93
  def _play_file(self, file_path: str) -> None:
94
+ """Play an audio file.
95
+
96
+ For URLs: Download to temp file first.
97
+ Then use push_audio_sample() to play through the GStreamer pipeline.
98
+ """
99
+ temp_file = None
100
  try:
101
  # Handle URLs - download first
102
  if file_path.startswith(("http://", "https://")):
103
+ _LOGGER.debug("Downloading audio from %s", file_path)
104
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
105
+ temp_file.close()
106
+ urllib.request.urlretrieve(file_path, temp_file.name)
107
+ file_path = temp_file.name
 
 
108
  _LOGGER.debug("Downloaded to %s", file_path)
109
 
110
  if self._stop_flag.is_set():
111
  return
112
 
113
+ if not os.path.exists(file_path):
114
+ _LOGGER.error("Audio file not found: %s", file_path)
115
+ return
116
+
117
+ # Play using Reachy Mini's audio system
118
  if self.reachy_mini is not None:
119
+ self._play_via_push_audio(file_path)
 
 
 
120
  else:
121
  _LOGGER.warning("No reachy_mini instance, cannot play audio")
122
 
123
  except Exception as e:
124
  _LOGGER.error("Error playing audio: %s", e)
125
  finally:
126
+ # Clean up temp file
127
+ if temp_file is not None:
128
+ try:
129
+ os.unlink(temp_file.name)
130
+ except Exception:
131
+ pass
132
+
133
  self.is_playing = False
134
+
135
  # Play next in playlist or finish
136
  if self._playlist and not self._stop_flag.is_set():
137
  self._play_next()
 
139
  self._on_playback_finished()
140
 
141
  def _play_via_push_audio(self, file_path: str) -> None:
142
+ """Play audio using push_audio_sample().
143
 
144
+ This pushes audio data to the GStreamer playback pipeline.
145
+ Recording and playback pipelines are separate in GStreamer,
146
+ so they can run simultaneously (like in conversation_app).
147
  """
148
  # Read audio file
149
  data, input_samplerate = sf.read(file_path, dtype='float32')
 
150
 
151
+ # Get output sample rate
152
  output_samplerate = self.reachy_mini.media.get_output_audio_samplerate()
 
153
 
154
  # Convert to mono if stereo
155
  if data.ndim == 2:
 
162
  if input_samplerate != output_samplerate:
163
  num_samples = int(len(data) * output_samplerate / input_samplerate)
164
  data = scipy.signal.resample(data, num_samples)
 
165
 
166
+ total_duration = len(data) / output_samplerate
167
+ _LOGGER.debug("Playing %.2fs audio at %dHz", total_duration, output_samplerate)
168
+
169
+ # Push audio in chunks (like conversation_app's play_loop)
170
+ chunk_duration = 0.02 # 20ms chunks
171
  chunk_size = int(output_samplerate * chunk_duration)
172
 
173
+ start_time = time.monotonic()
174
+ samples_pushed = 0
175
+
176
  for i in range(0, len(data), chunk_size):
177
  if self._stop_flag.is_set():
178
+ _LOGGER.debug("Playback stopped")
179
+ return
180
+
181
  chunk = data[i:i + chunk_size].astype(np.float32)
182
  self.reachy_mini.media.push_audio_sample(chunk)
183
+ samples_pushed += len(chunk)
184
+
185
+ # Pace the pushing to avoid buffer overflow
186
+ # Calculate how much time should have elapsed
187
+ expected_time = samples_pushed / output_samplerate
188
+ actual_time = time.monotonic() - start_time
189
+ sleep_time = expected_time - actual_time - 0.01 # 10ms ahead
190
+
191
+ if sleep_time > 0:
192
+ time.sleep(sleep_time)
193
+
194
+ # Wait for playback to complete
195
+ remaining = total_duration - (time.monotonic() - start_time)
196
+ if remaining > 0:
197
+ time.sleep(remaining + 0.05) # Small buffer
198
 
199
  _LOGGER.debug("Audio playback complete")
200
 
201
  def _on_playback_finished(self) -> None:
202
+ """Called when all playback is finished."""
203
  self.is_playing = False
204
+
205
  todo_callback: Optional[Callable[[], None]] = None
 
206
  with self._done_callback_lock:
207
  if self._done_callback:
208
  todo_callback = self._done_callback
 
215
  _LOGGER.exception("Unexpected error running done callback")
216
 
217
  def pause(self) -> None:
218
+ """Pause playback."""
219
  self.is_playing = False
220
 
221
  def resume(self) -> None:
222
+ """Resume playback."""
223
  if self._playlist:
224
  self._play_next()
225
 
226
  def stop(self) -> None:
227
+ """Stop playback and clear playlist."""
228
  self._stop_flag.set()
229
+
230
+ # Clear the playback buffer
231
  if self.reachy_mini is not None:
232
  try:
233
+ if hasattr(self.reachy_mini.media, 'audio'):
234
+ audio = self.reachy_mini.media.audio
235
+ if hasattr(audio, 'clear_player'):
236
+ audio.clear_player()
237
+ except Exception as e:
238
+ _LOGGER.debug("Error clearing player: %s", e)
239
+
240
  self._playlist.clear()
241
  self.is_playing = False
242
 
243
  def duck(self) -> None:
244
+ """Lower volume for ducking."""
245
  self._current_volume = self._duck_volume
246
 
247
  def unduck(self) -> None:
248
+ """Restore volume after ducking."""
249
  self._current_volume = self._unduck_volume
250
 
251
  def set_volume(self, volume: int) -> None:
252
+ """Set volume (0-100)."""
253
  volume = max(0, min(100, volume))
254
  self._unduck_volume = volume / 100.0
255
  self._duck_volume = self._unduck_volume / 2
reachy_mini_ha_voice/models.py CHANGED
@@ -73,10 +73,6 @@ class ServerState:
73
  reachy_mini: Optional[object] = None
74
  motion_enabled: bool = True
75
  motion: Optional[object] = None # ReachyMiniMotion instance
76
-
77
- # TTS playback state - when True, audio recording should pause
78
- # to avoid GStreamer pipeline conflicts (recording and playback share resources)
79
- tts_playing: bool = False
80
 
81
  media_player_entity: "Optional[MediaPlayerEntity]" = None
82
  satellite: "Optional[VoiceSatelliteProtocol]" = None
 
73
  reachy_mini: Optional[object] = None
74
  motion_enabled: bool = True
75
  motion: Optional[object] = None # ReachyMiniMotion instance
 
 
 
 
76
 
77
  media_player_entity: "Optional[MediaPlayerEntity]" = None
78
  satellite: "Optional[VoiceSatelliteProtocol]" = None
reachy_mini_ha_voice/satellite.py CHANGED
@@ -328,10 +328,8 @@ class VoiceSatelliteProtocol(APIServer):
328
  )
329
  self.duck()
330
  self._is_streaming_audio = True
331
- # NOTE: Wakeup sound disabled - playing audio while recording causes daemon crash
332
- # The GStreamer pipeline conflicts when push_audio_sample is called during get_audio_sample
333
- # self.state.tts_player.play(self.state.wakeup_sound)
334
- _LOGGER.debug("Wakeup sound skipped to prevent daemon crash")
335
 
336
  def stop(self) -> None:
337
  self.state.active_wake_words.discard(self.state.stop_word.id)
@@ -352,10 +350,6 @@ class VoiceSatelliteProtocol(APIServer):
352
  self._tts_played = True
353
  _LOGGER.debug("Playing TTS response: %s", self._tts_url)
354
 
355
- # Set flag to pause audio recording during TTS playback
356
- # This prevents GStreamer pipeline conflicts (recording and playback share resources)
357
- self.state.tts_playing = True
358
-
359
  self.state.active_wake_words.add(self.state.stop_word.id)
360
  self.state.tts_player.play(self._tts_url, done_callback=self._tts_finished)
361
 
@@ -368,9 +362,6 @@ class VoiceSatelliteProtocol(APIServer):
368
  self.state.music_player.unduck()
369
 
370
  def _tts_finished(self) -> None:
371
- # Clear TTS playing flag to resume audio recording
372
- self.state.tts_playing = False
373
-
374
  self.state.active_wake_words.discard(self.state.stop_word.id)
375
  self.send_messages([VoiceAssistantAnnounceFinished()])
376
 
 
328
  )
329
  self.duck()
330
  self._is_streaming_audio = True
331
+ # Play wakeup sound (like linux-voice-assistant does)
332
+ self.state.tts_player.play(self.state.wakeup_sound)
 
 
333
 
334
  def stop(self) -> None:
335
  self.state.active_wake_words.discard(self.state.stop_word.id)
 
350
  self._tts_played = True
351
  _LOGGER.debug("Playing TTS response: %s", self._tts_url)
352
 
 
 
 
 
353
  self.state.active_wake_words.add(self.state.stop_word.id)
354
  self.state.tts_player.play(self._tts_url, done_callback=self._tts_finished)
355
 
 
362
  self.state.music_player.unduck()
363
 
364
  def _tts_finished(self) -> None:
 
 
 
365
  self.state.active_wake_words.discard(self.state.stop_word.id)
366
  self.send_messages([VoiceAssistantAnnounceFinished()])
367
 
reachy_mini_ha_voice/voice_assistant.py CHANGED
@@ -424,12 +424,6 @@ class VoiceAssistantService:
424
  if not self._wait_for_satellite():
425
  continue
426
 
427
- # Pause audio recording during TTS playback to avoid GStreamer conflicts
428
- # Recording and playback pipelines share the same audio device
429
- if self._state is not None and self._state.tts_playing:
430
- time.sleep(0.05)
431
- continue
432
-
433
  self._update_wake_words_list(ctx)
434
 
435
  # Get audio from Reachy Mini
 
424
  if not self._wait_for_satellite():
425
  continue
426
 
 
 
 
 
 
 
427
  self._update_wake_words_list(ctx)
428
 
429
  # Get audio from Reachy Mini