Desmond-Dong commited on
Commit
cd59e9e
·
1 Parent(s): d94735f

v0.2.16: Fix TTS playback - pause recording during playback

Browse files

- Restore tts_playing flag in models.py
- Set tts_playing=True in play_tts(), clear in _tts_finished()
- Pause audio recording loop when tts_playing is True
- Use push_audio_sample() with proper synchronization
- This prevents GStreamer pipeline conflicts between recording and playback

pyproject.toml CHANGED
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
 
5
  [project]
6
  name = "reachy_mini_ha_voice"
7
- version = "0.2.15"
8
  description = "Home Assistant Voice Assistant for Reachy Mini"
9
  readme = "README.md"
10
  requires-python = ">=3.10"
 
4
 
5
  [project]
6
  name = "reachy_mini_ha_voice"
7
+ version = "0.2.16"
8
  description = "Home Assistant Voice Assistant for Reachy Mini"
9
  readme = "README.md"
10
  requires-python = ">=3.10"
reachy_mini_ha_voice/__init__.py CHANGED
@@ -11,7 +11,7 @@ Key features:
11
  - Reachy Mini motion control integration
12
  """
13
 
14
- __version__ = "0.2.15"
15
  __author__ = "Desmond Dong"
16
 
17
  # Don't import main module here to avoid runpy warning
 
11
  - Reachy Mini motion control integration
12
  """
13
 
14
+ __version__ = "0.2.16"
15
  __author__ = "Desmond Dong"
16
 
17
  # Don't import main module here to avoid runpy warning
reachy_mini_ha_voice/audio_player.py CHANGED
@@ -9,12 +9,17 @@ from typing import List, Optional, Union
9
 
10
  import numpy as np
11
  import soundfile as sf
 
12
 
13
  _LOGGER = logging.getLogger(__name__)
14
 
15
 
16
  class AudioPlayer:
17
- """Audio player using Reachy Mini's media system."""
 
 
 
 
18
 
19
  def __init__(self, reachy_mini=None) -> None:
20
  self.reachy_mini = reachy_mini
@@ -63,31 +68,30 @@ class AudioPlayer:
63
  thread.start()
64
 
65
  def _play_file(self, file_path: str) -> None:
66
- """Play an audio file using play_sound() - independent GStreamer playbin."""
67
  try:
68
  # Handle URLs - download first
69
  if file_path.startswith(("http://", "https://")):
70
  import urllib.request
71
  import tempfile
72
 
 
73
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
74
  urllib.request.urlretrieve(file_path, tmp.name)
75
  file_path = tmp.name
 
76
 
77
  if self._stop_flag.is_set():
78
  return
79
 
80
- # Use Reachy Mini's play_sound() for TTS playback
81
- # This creates an independent GStreamer playbin that doesn't conflict
82
- # with the recording pipeline (unlike push_audio_sample which shares appsrc)
83
  if self.reachy_mini is not None:
84
  try:
85
- self._play_via_play_sound(file_path)
86
  except Exception as e:
87
- _LOGGER.warning("play_sound failed, trying sounddevice: %s", e)
88
- self._play_file_fallback(file_path)
89
  else:
90
- self._play_file_fallback(file_path)
91
 
92
  except Exception as e:
93
  _LOGGER.error("Error playing audio: %s", e)
@@ -99,56 +103,48 @@ class AudioPlayer:
99
  else:
100
  self._on_playback_finished()
101
 
102
- def _play_via_play_sound(self, file_path: str) -> None:
103
- """Play audio using Reachy Mini's play_sound() method.
104
 
105
- This creates an independent GStreamer playbin pipeline that doesn't
106
- conflict with the recording pipeline. This is the key difference from
107
- push_audio_sample() which writes to the shared appsrc pipeline.
108
  """
109
- # play_sound() is non-blocking, so we need to estimate duration and wait
110
- data, samplerate = sf.read(file_path, dtype='float32')
111
- duration = len(data) / samplerate
112
 
113
- _LOGGER.debug("Playing via play_sound: %s (duration=%.2fs)", file_path, duration)
 
 
114
 
115
- # Call play_sound - creates independent playbin
116
- self.reachy_mini.media.play_sound(file_path)
117
-
118
- # Wait for playback to complete (play_sound is non-blocking)
119
- # Check stop flag periodically
120
- elapsed = 0.0
121
- while elapsed < duration and not self._stop_flag.is_set():
122
- time.sleep(0.1)
123
- elapsed += 0.1
124
-
125
- def _play_file_fallback(self, file_path: str) -> None:
126
- """Play audio using sounddevice (fallback when Reachy Mini not available)."""
127
- import sounddevice as sd
128
- import scipy.signal
129
-
130
- data, samplerate = sf.read(file_path, dtype='float32')
131
-
132
  # Convert to mono if stereo
133
  if data.ndim == 2:
134
  data = data.mean(axis=1)
135
-
136
  # Apply volume
137
  data = data * self._current_volume
138
 
139
- # Resample to 48000Hz (standard rate supported by most devices)
140
- target_samplerate = 48000
141
- if samplerate != target_samplerate:
142
- num_samples = int(len(data) * target_samplerate / samplerate)
143
  data = scipy.signal.resample(data, num_samples)
144
- samplerate = target_samplerate
145
-
146
- if not self._stop_flag.is_set():
147
- try:
148
- sd.play(data.astype(np.float32), samplerate)
149
- sd.wait()
150
- except Exception as e:
151
- _LOGGER.warning("sounddevice playback failed: %s", e)
 
 
 
 
 
 
 
 
 
152
 
153
  def _on_playback_finished(self) -> None:
154
  """Called when playback is finished."""
@@ -175,6 +171,11 @@ class AudioPlayer:
175
 
176
  def stop(self) -> None:
177
  self._stop_flag.set()
 
 
 
 
 
178
  self._playlist.clear()
179
  self.is_playing = False
180
 
 
9
 
10
  import numpy as np
11
  import soundfile as sf
12
+ import scipy.signal
13
 
14
  _LOGGER = logging.getLogger(__name__)
15
 
16
 
17
  class AudioPlayer:
18
+ """Audio player using Reachy Mini's media system.
19
+
20
+ Uses push_audio_sample() to write audio to the GStreamer pipeline.
21
+ The caller must pause audio recording during playback to avoid conflicts.
22
+ """
23
 
24
  def __init__(self, reachy_mini=None) -> None:
25
  self.reachy_mini = reachy_mini
 
68
  thread.start()
69
 
70
  def _play_file(self, file_path: str) -> None:
71
+ """Play an audio file using push_audio_sample()."""
72
  try:
73
  # Handle URLs - download first
74
  if file_path.startswith(("http://", "https://")):
75
  import urllib.request
76
  import tempfile
77
 
78
+ _LOGGER.debug("Downloading TTS audio from %s", file_path)
79
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
80
  urllib.request.urlretrieve(file_path, tmp.name)
81
  file_path = tmp.name
82
+ _LOGGER.debug("Downloaded to %s", file_path)
83
 
84
  if self._stop_flag.is_set():
85
  return
86
 
87
+ # Use push_audio_sample for playback
 
 
88
  if self.reachy_mini is not None:
89
  try:
90
+ self._play_via_push_audio(file_path)
91
  except Exception as e:
92
+ _LOGGER.warning("push_audio_sample failed: %s", e)
 
93
  else:
94
+ _LOGGER.warning("No reachy_mini instance, cannot play audio")
95
 
96
  except Exception as e:
97
  _LOGGER.error("Error playing audio: %s", e)
 
103
  else:
104
  self._on_playback_finished()
105
 
106
+ def _play_via_push_audio(self, file_path: str) -> None:
107
+ """Play audio by pushing samples to Reachy Mini's GStreamer pipeline.
108
 
109
+ This writes audio directly to the existing playback pipeline.
110
+ The caller should pause audio recording during this operation.
 
111
  """
112
+ # Read audio file
113
+ data, input_samplerate = sf.read(file_path, dtype='float32')
114
+ _LOGGER.debug("Audio file: %s, samplerate=%d, shape=%s", file_path, input_samplerate, data.shape)
115
 
116
+ # Get output sample rate from Reachy Mini
117
+ output_samplerate = self.reachy_mini.media.get_output_audio_samplerate()
118
+ _LOGGER.debug("Output samplerate: %d", output_samplerate)
119
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  # Convert to mono if stereo
121
  if data.ndim == 2:
122
  data = data.mean(axis=1)
123
+
124
  # Apply volume
125
  data = data * self._current_volume
126
 
127
+ # Resample if needed
128
+ if input_samplerate != output_samplerate:
129
+ num_samples = int(len(data) * output_samplerate / input_samplerate)
 
130
  data = scipy.signal.resample(data, num_samples)
131
+ _LOGGER.debug("Resampled to %d samples", num_samples)
132
+
133
+ # Push audio in chunks (like conversation_app)
134
+ # Use smaller chunks for smoother playback
135
+ chunk_duration = 0.05 # 50ms chunks
136
+ chunk_size = int(output_samplerate * chunk_duration)
137
+
138
+ for i in range(0, len(data), chunk_size):
139
+ if self._stop_flag.is_set():
140
+ _LOGGER.debug("Playback stopped by flag")
141
+ break
142
+ chunk = data[i:i + chunk_size].astype(np.float32)
143
+ self.reachy_mini.media.push_audio_sample(chunk)
144
+ # Sleep to match chunk duration (prevents buffer overflow)
145
+ time.sleep(chunk_duration * 0.8) # Slightly less to keep buffer fed
146
+
147
+ _LOGGER.debug("Audio playback complete")
148
 
149
  def _on_playback_finished(self) -> None:
150
  """Called when playback is finished."""
 
171
 
172
  def stop(self) -> None:
173
  self._stop_flag.set()
174
+ if self.reachy_mini is not None:
175
+ try:
176
+ self.reachy_mini.media.clear_output_buffer()
177
+ except Exception:
178
+ pass
179
  self._playlist.clear()
180
  self.is_playing = False
181
 
reachy_mini_ha_voice/models.py CHANGED
@@ -73,6 +73,10 @@ class ServerState:
73
  reachy_mini: Optional[object] = None
74
  motion_enabled: bool = True
75
  motion: Optional[object] = None # ReachyMiniMotion instance
 
 
 
 
76
 
77
  media_player_entity: "Optional[MediaPlayerEntity]" = None
78
  satellite: "Optional[VoiceSatelliteProtocol]" = None
 
73
  reachy_mini: Optional[object] = None
74
  motion_enabled: bool = True
75
  motion: Optional[object] = None # ReachyMiniMotion instance
76
+
77
+ # TTS playback state - when True, audio recording should pause
78
+ # to avoid GStreamer pipeline conflicts (recording and playback share resources)
79
+ tts_playing: bool = False
80
 
81
  media_player_entity: "Optional[MediaPlayerEntity]" = None
82
  satellite: "Optional[VoiceSatelliteProtocol]" = None
reachy_mini_ha_voice/satellite.py CHANGED
@@ -352,6 +352,10 @@ class VoiceSatelliteProtocol(APIServer):
352
  self._tts_played = True
353
  _LOGGER.debug("Playing TTS response: %s", self._tts_url)
354
 
 
 
 
 
355
  self.state.active_wake_words.add(self.state.stop_word.id)
356
  self.state.tts_player.play(self._tts_url, done_callback=self._tts_finished)
357
 
@@ -364,6 +368,9 @@ class VoiceSatelliteProtocol(APIServer):
364
  self.state.music_player.unduck()
365
 
366
  def _tts_finished(self) -> None:
 
 
 
367
  self.state.active_wake_words.discard(self.state.stop_word.id)
368
  self.send_messages([VoiceAssistantAnnounceFinished()])
369
 
 
352
  self._tts_played = True
353
  _LOGGER.debug("Playing TTS response: %s", self._tts_url)
354
 
355
+ # Set flag to pause audio recording during TTS playback
356
+ # This prevents GStreamer pipeline conflicts (recording and playback share resources)
357
+ self.state.tts_playing = True
358
+
359
  self.state.active_wake_words.add(self.state.stop_word.id)
360
  self.state.tts_player.play(self._tts_url, done_callback=self._tts_finished)
361
 
 
368
  self.state.music_player.unduck()
369
 
370
  def _tts_finished(self) -> None:
371
+ # Clear TTS playing flag to resume audio recording
372
+ self.state.tts_playing = False
373
+
374
  self.state.active_wake_words.discard(self.state.stop_word.id)
375
  self.send_messages([VoiceAssistantAnnounceFinished()])
376
 
reachy_mini_ha_voice/voice_assistant.py CHANGED
@@ -424,6 +424,12 @@ class VoiceAssistantService:
424
  if not self._wait_for_satellite():
425
  continue
426
 
 
 
 
 
 
 
427
  self._update_wake_words_list(ctx)
428
 
429
  # Get audio from Reachy Mini
 
424
  if not self._wait_for_satellite():
425
  continue
426
 
427
+ # Pause audio recording during TTS playback to avoid GStreamer conflicts
428
+ # Recording and playback pipelines share the same audio device
429
+ if self._state is not None and self._state.tts_playing:
430
+ time.sleep(0.05)
431
+ continue
432
+
433
  self._update_wake_words_list(ctx)
434
 
435
  # Get audio from Reachy Mini