Desmond-Dong commited on
Commit
beedd8b
·
1 Parent(s): 9790221

"fix-improve-audio-dtype-validation-and-error-handling"

Browse files
reachy_mini_ha_voice/voice_assistant.py CHANGED
@@ -398,33 +398,41 @@ class VoiceAssistantService:
398
  time.sleep(0.01)
399
  continue
400
 
401
- # Convert to float32 BEFORE any math operations
 
402
  # This prevents "ufunc 'add' did not contain a loop" errors
403
- # when SDK returns unexpected types (e.g., dtype='S1')
404
- if audio_data.dtype != np.float32:
405
  # Check for non-numeric types that cannot be converted
406
- if audio_data.dtype.kind in ('S', 'U', 'O', 'V'): # bytes, unicode, object, void
407
  _LOGGER.debug("Audio data has non-numeric dtype: %s, skipping", audio_data.dtype)
408
  time.sleep(0.01)
409
  continue
410
- try:
411
- audio_data = audio_data.astype(np.float32)
412
- except (TypeError, ValueError) as e:
413
- _LOGGER.debug("Failed to convert audio to float32: %s (dtype=%s)", e, audio_data.dtype)
414
- time.sleep(0.01)
415
- continue
 
 
416
 
417
  # Convert stereo to mono (take mean of channels)
418
  # SDK returns shape (samples, 2) for stereo
419
- # audio_data is already float32 at this point
420
- if audio_data.ndim == 2 and audio_data.shape[1] == 2:
421
- audio_chunk_array = np.mean(audio_data, axis=1)
422
- elif audio_data.ndim == 2:
423
- audio_chunk_array = audio_data[:, 0]
424
- elif audio_data.ndim == 1:
425
- audio_chunk_array = audio_data
426
- else:
427
- _LOGGER.debug("Unexpected audio shape: %s", audio_data.shape)
 
 
 
 
 
 
428
  time.sleep(0.01)
429
  continue
430
 
 
398
  time.sleep(0.01)
399
  continue
400
 
401
+ # CRITICAL: Validate and convert dtype BEFORE any math operations
402
+ # SDK should return float32, but sometimes returns unexpected types
403
  # This prevents "ufunc 'add' did not contain a loop" errors
404
+ try:
 
405
  # Check for non-numeric types that cannot be converted
406
+ if audio_data.dtype.kind in ('S', 'U', 'O', 'V', 'b'): # bytes, unicode, object, void, boolean
407
  _LOGGER.debug("Audio data has non-numeric dtype: %s, skipping", audio_data.dtype)
408
  time.sleep(0.01)
409
  continue
410
+
411
+ # Force conversion to float32 if not already
412
+ if audio_data.dtype != np.float32:
413
+ audio_data = np.asarray(audio_data, dtype=np.float32)
414
+ except (TypeError, ValueError) as e:
415
+ _LOGGER.debug("Failed to convert audio to float32: %s (dtype=%s)", e, audio_data.dtype)
416
+ time.sleep(0.01)
417
+ continue
418
 
419
  # Convert stereo to mono (take mean of channels)
420
  # SDK returns shape (samples, 2) for stereo
421
+ # audio_data is guaranteed to be float32 at this point
422
+ try:
423
+ if audio_data.ndim == 2 and audio_data.shape[1] == 2:
424
+ audio_chunk_array = audio_data.mean(axis=1)
425
+ elif audio_data.ndim == 2:
426
+ audio_chunk_array = audio_data[:, 0].copy()
427
+ elif audio_data.ndim == 1:
428
+ audio_chunk_array = audio_data
429
+ else:
430
+ _LOGGER.debug("Unexpected audio shape: %s", audio_data.shape)
431
+ time.sleep(0.01)
432
+ continue
433
+ except Exception as e:
434
+ _LOGGER.debug("Error converting stereo to mono: %s (dtype=%s, shape=%s)",
435
+ e, audio_data.dtype, audio_data.shape)
436
  time.sleep(0.01)
437
  continue
438