Desmond-Dong commited on
Commit
5b55e2a
·
1 Parent(s): dadc0d2

refactor: 使用SDK的compose_world_offset进行正确的pose组合

Browse files

- 使用create_head_pose构建head pose矩阵
- 使用compose_world_offset组合primary和secondary pose(与conversation_app一致)
- 添加Sendspin音频重采样(从48kHz/44.1kHz到16kHz)
- 修复_issue_control_command直接接收head_pose矩阵

reachy_mini_ha_voice/audio_player.py CHANGED
@@ -261,6 +261,7 @@ class AudioPlayer:
261
  """Handle incoming audio chunks from Sendspin server.
262
 
263
  Plays the audio through Reachy Mini's speaker using push_audio_sample().
 
264
  """
265
  if self.reachy_mini is None:
266
  return
@@ -295,6 +296,15 @@ class AudioPlayer:
295
  # Mono: reshape to (samples, 1)
296
  audio_float = audio_float.reshape(-1, 1)
297
 
 
 
 
 
 
 
 
 
 
298
  # Apply volume
299
  audio_float = audio_float * self._current_volume
300
 
@@ -303,7 +313,7 @@ class AudioPlayer:
303
  try:
304
  self.reachy_mini.media.start_playing()
305
  self._sendspin_playback_started = True
306
- _LOGGER.info("Started media playback for Sendspin audio")
307
  except Exception as e:
308
  _LOGGER.warning("Failed to start media playback: %s", e)
309
 
 
261
  """Handle incoming audio chunks from Sendspin server.
262
 
263
  Plays the audio through Reachy Mini's speaker using push_audio_sample().
264
+ Resamples audio if needed (Reachy Mini uses 16kHz).
265
  """
266
  if self.reachy_mini is None:
267
  return
 
296
  # Mono: reshape to (samples, 1)
297
  audio_float = audio_float.reshape(-1, 1)
298
 
299
+ # Resample if needed (Reachy Mini uses 16kHz)
300
+ target_sample_rate = self.reachy_mini.media.get_output_audio_samplerate()
301
+ if fmt.sample_rate != target_sample_rate and target_sample_rate > 0:
302
+ import scipy.signal
303
+ # Calculate new length
304
+ new_length = int(len(audio_float) * target_sample_rate / fmt.sample_rate)
305
+ if new_length > 0:
306
+ audio_float = scipy.signal.resample(audio_float, new_length, axis=0)
307
+
308
  # Apply volume
309
  audio_float = audio_float * self._current_volume
310
 
 
313
  try:
314
  self.reachy_mini.media.start_playing()
315
  self._sendspin_playback_started = True
316
+ _LOGGER.info("Started media playback for Sendspin audio (target: %d Hz)", target_sample_rate)
317
  except Exception as e:
318
  _LOGGER.warning("Failed to start media playback: %s", e)
319
 
reachy_mini_ha_voice/movement_manager.py CHANGED
@@ -704,40 +704,74 @@ class MovementManager:
704
  except Exception as e:
705
  logger.debug("Error getting face tracking offsets: %s", e)
706
 
707
- def _compose_final_pose(self) -> Dict[str, float]:
708
- """Compose final pose from all sources.
709
 
710
- Uses simple addition for pose composition (same as original working version).
 
711
  """
712
- # Primary pose (from actions)
713
- pitch = self.state.target_pitch
714
- yaw = self.state.target_yaw
715
- roll = self.state.target_roll
716
- x = self.state.target_x
717
- y = self.state.target_y
718
- z = self.state.target_z
719
-
720
- # Add speech sway (secondary) - rotation and translation
721
- pitch += self.state.speech_pitch
722
- yaw += self.state.speech_yaw
723
- roll += self.state.speech_roll
724
- x += self.state.speech_x
725
- y += self.state.speech_y
726
- z += self.state.speech_z
727
-
728
- # Add breathing
729
- z += self.state.breathing_z
730
-
731
- # Add face tracking offsets (from camera worker)
 
 
 
 
 
 
732
  with self._face_tracking_lock:
733
  face_offsets = self._face_tracking_offsets
734
 
735
- x += face_offsets[0]
736
- y += face_offsets[1]
737
- z += face_offsets[2]
738
- roll += face_offsets[3]
739
- pitch += face_offsets[4]
740
- yaw += face_offsets[5]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
741
 
742
  # Antenna pose with freeze blending
743
  target_antenna_left = self.state.target_antenna_left + self.state.breathing_antenna_left
@@ -755,32 +789,38 @@ class MovementManager:
755
  antenna_left = target_antenna_left
756
  antenna_right = target_antenna_right
757
 
758
- return {
759
- "pitch": pitch,
760
- "yaw": yaw,
761
- "roll": roll,
762
- "x": x,
763
- "y": y,
764
- "z": z,
765
- "antenna_left": antenna_left,
766
- "antenna_right": antenna_right,
767
- "body_yaw": self.state.target_body_yaw,
768
- }
769
 
770
  # =========================================================================
771
  # Internal: Robot control (runs in control loop)
772
  # =========================================================================
773
 
774
- def _issue_control_command(self, pose: Dict[str, float]) -> None:
775
  """Send control command to robot with error throttling and connection health tracking."""
776
  if self.robot is None:
777
  return
778
 
779
  # Check if pose changed significantly (prevent unnecessary commands)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
780
  if self._last_sent_pose is not None:
781
  max_diff = max(
782
- abs(pose[k] - self._last_sent_pose.get(k, 0.0))
783
- for k in pose.keys()
784
  )
785
  if max_diff < self._pose_change_threshold:
786
  # No significant change, skip sending command
@@ -798,30 +838,17 @@ class MovementManager:
798
  logger.debug("Attempting to send command after connection loss...")
799
 
800
  try:
801
- # Build head pose matrix
802
- # SDK uses 'xyz' euler order with [roll, pitch, yaw] (same as create_head_pose)
803
- rotation = R.from_euler('xyz', [
804
- pose["roll"],
805
- pose["pitch"],
806
- pose["yaw"],
807
- ])
808
-
809
- head_pose = np.eye(4)
810
- head_pose[:3, :3] = rotation.as_matrix()
811
- head_pose[0, 3] = pose["x"]
812
- head_pose[1, 3] = pose["y"]
813
- head_pose[2, 3] = pose["z"]
814
-
815
  # Send to robot (single control point!)
 
816
  self.robot.set_target(
817
  head=head_pose,
818
- antennas=[pose["antenna_right"], pose["antenna_left"]],
819
- body_yaw=pose["body_yaw"],
820
  )
821
 
822
  # Command succeeded - update connection health and cache
823
  self._last_successful_command = now
824
- self._last_sent_pose = pose.copy() # Cache sent pose
825
  self._consecutive_errors = 0 # Reset error counter
826
 
827
  if self._connection_lost:
@@ -900,11 +927,11 @@ class MovementManager:
900
  # 6. Update face tracking offsets from camera server
901
  self._update_face_tracking()
902
 
903
- # 7. Compose final pose
904
- pose = self._compose_final_pose()
905
 
906
  # 8. Send to robot (single control point!)
907
- self._issue_control_command(pose)
908
 
909
  except Exception as e:
910
  self._log_error_throttled(f"Control loop error: {e}")
 
704
  except Exception as e:
705
  logger.debug("Error getting face tracking offsets: %s", e)
706
 
707
+ def _compose_final_pose(self) -> Tuple[np.ndarray, Tuple[float, float], float]:
708
+ """Compose final pose from all sources using SDK's compose_world_offset.
709
 
710
+ Returns:
711
+ Tuple of (head_pose_4x4, (antenna_right, antenna_left), body_yaw)
712
  """
713
+ # Build primary head pose from target state
714
+ if SDK_UTILS_AVAILABLE:
715
+ primary_head = create_head_pose(
716
+ x=self.state.target_x,
717
+ y=self.state.target_y,
718
+ z=self.state.target_z,
719
+ roll=self.state.target_roll,
720
+ pitch=self.state.target_pitch,
721
+ yaw=self.state.target_yaw,
722
+ degrees=False,
723
+ mm=False,
724
+ )
725
+ else:
726
+ # Fallback: build matrix manually
727
+ rotation = R.from_euler('xyz', [
728
+ self.state.target_roll,
729
+ self.state.target_pitch,
730
+ self.state.target_yaw,
731
+ ])
732
+ primary_head = np.eye(4)
733
+ primary_head[:3, :3] = rotation.as_matrix()
734
+ primary_head[0, 3] = self.state.target_x
735
+ primary_head[1, 3] = self.state.target_y
736
+ primary_head[2, 3] = self.state.target_z
737
+
738
+ # Build secondary pose from speech sway + face tracking + breathing
739
  with self._face_tracking_lock:
740
  face_offsets = self._face_tracking_offsets
741
 
742
+ secondary_x = self.state.speech_x + face_offsets[0]
743
+ secondary_y = self.state.speech_y + face_offsets[1]
744
+ secondary_z = self.state.speech_z + face_offsets[2] + self.state.breathing_z
745
+ secondary_roll = self.state.speech_roll + face_offsets[3]
746
+ secondary_pitch = self.state.speech_pitch + face_offsets[4]
747
+ secondary_yaw = self.state.speech_yaw + face_offsets[5]
748
+
749
+ if SDK_UTILS_AVAILABLE:
750
+ secondary_head = create_head_pose(
751
+ x=secondary_x,
752
+ y=secondary_y,
753
+ z=secondary_z,
754
+ roll=secondary_roll,
755
+ pitch=secondary_pitch,
756
+ yaw=secondary_yaw,
757
+ degrees=False,
758
+ mm=False,
759
+ )
760
+ # Compose using SDK's compose_world_offset (same as conversation_app)
761
+ final_head = compose_world_offset(primary_head, secondary_head, reorthonormalize=True)
762
+ else:
763
+ # Fallback: simple addition (less accurate but works)
764
+ secondary_rotation = R.from_euler('xyz', [secondary_roll, secondary_pitch, secondary_yaw])
765
+ secondary_head = np.eye(4)
766
+ secondary_head[:3, :3] = secondary_rotation.as_matrix()
767
+ secondary_head[0, 3] = secondary_x
768
+ secondary_head[1, 3] = secondary_y
769
+ secondary_head[2, 3] = secondary_z
770
+
771
+ # Simple composition: R_final = R_secondary @ R_primary, t_final = t_primary + t_secondary
772
+ final_head = np.eye(4)
773
+ final_head[:3, :3] = secondary_head[:3, :3] @ primary_head[:3, :3]
774
+ final_head[:3, 3] = primary_head[:3, 3] + secondary_head[:3, 3]
775
 
776
  # Antenna pose with freeze blending
777
  target_antenna_left = self.state.target_antenna_left + self.state.breathing_antenna_left
 
789
  antenna_left = target_antenna_left
790
  antenna_right = target_antenna_right
791
 
792
+ return final_head, (antenna_right, antenna_left), self.state.target_body_yaw
 
 
 
 
 
 
 
 
 
 
793
 
794
  # =========================================================================
795
  # Internal: Robot control (runs in control loop)
796
  # =========================================================================
797
 
798
+ def _issue_control_command(self, head_pose: np.ndarray, antennas: Tuple[float, float], body_yaw: float) -> None:
799
  """Send control command to robot with error throttling and connection health tracking."""
800
  if self.robot is None:
801
  return
802
 
803
  # Check if pose changed significantly (prevent unnecessary commands)
804
+ # Extract euler angles for comparison
805
+ rotation = R.from_matrix(head_pose[:3, :3])
806
+ euler = rotation.as_euler('xyz') # [roll, pitch, yaw]
807
+
808
+ current_pose = {
809
+ "x": head_pose[0, 3],
810
+ "y": head_pose[1, 3],
811
+ "z": head_pose[2, 3],
812
+ "roll": euler[0],
813
+ "pitch": euler[1],
814
+ "yaw": euler[2],
815
+ "antenna_right": antennas[0],
816
+ "antenna_left": antennas[1],
817
+ "body_yaw": body_yaw,
818
+ }
819
+
820
  if self._last_sent_pose is not None:
821
  max_diff = max(
822
+ abs(current_pose[k] - self._last_sent_pose.get(k, 0.0))
823
+ for k in current_pose.keys()
824
  )
825
  if max_diff < self._pose_change_threshold:
826
  # No significant change, skip sending command
 
838
  logger.debug("Attempting to send command after connection loss...")
839
 
840
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
841
  # Send to robot (single control point!)
842
+ # head_pose is already a 4x4 matrix from _compose_final_pose
843
  self.robot.set_target(
844
  head=head_pose,
845
+ antennas=list(antennas),
846
+ body_yaw=body_yaw,
847
  )
848
 
849
  # Command succeeded - update connection health and cache
850
  self._last_successful_command = now
851
+ self._last_sent_pose = current_pose.copy() # Cache sent pose
852
  self._consecutive_errors = 0 # Reset error counter
853
 
854
  if self._connection_lost:
 
927
  # 6. Update face tracking offsets from camera server
928
  self._update_face_tracking()
929
 
930
+ # 7. Compose final pose (returns head_pose matrix, antennas tuple, body_yaw)
931
+ head_pose, antennas, body_yaw = self._compose_final_pose()
932
 
933
  # 8. Send to robot (single control point!)
934
+ self._issue_control_command(head_pose, antennas, body_yaw)
935
 
936
  except Exception as e:
937
  self._log_error_throttled(f"Control loop error: {e}")