Desmond-Dong commited on
Commit
606f861
·
1 Parent(s): 585e14d

手势识别: 首次运行自动安装mediapipe 0.10.18 (ARM64)

Browse files
PROJECT_PLAN.md CHANGED
@@ -757,7 +757,7 @@ camera_server.set_gesture_callbacks(
757
  - **Phase 13** - Sendspin 多房间音频支持 ✅
758
  - **Phase 15** - YOLO 人脸追踪 ✅
759
  - **Phase 20** - 拍一拍唤醒 ✅
760
- - **Phase 21** - 手势识别 (7 种手势) ✅
761
 
762
  #### 部分实现功能
763
  - **Phase 14** - 情感动作 API 基础设施 (手动触发可用)
 
757
  - **Phase 13** - Sendspin 多房间音频支持 ✅
758
  - **Phase 15** - YOLO 人脸追踪 ✅
759
  - **Phase 20** - 拍一拍唤醒 ✅
760
+ - **Phase 21** - 手势识别 (11 种手势,自动安装mediapipe) ✅
761
 
762
  #### 部分实现功能
763
  - **Phase 14** - 情感动作 API 基础设施 (手动触发可用)
reachy_mini_ha_voice/gesture_detector.py CHANGED
@@ -1,28 +1,16 @@
1
  """Gesture detection using MediaPipe Hands.
2
 
3
  Detects 11 hand gestures for robot interaction:
4
- - thumbs_up: 👍 Confirmation/like
5
- - thumbs_down: 👎 Reject/dislike
6
- - open_palm: ✋ Stop/halt
7
- - fist: ✊ Pause/hold
8
- - peace: ✌️ Victory sign
9
- - pointing_up: ☝️ Attention/one
10
- - ok: 👌 OK sign
11
- - rock: 🤘 Rock on
12
- - call: 🤙 Call me
13
- - three: 3️⃣ Three fingers
14
- - four: 4️⃣ Four fingers
15
 
16
- Requires mediapipe to be pre-installed. If not available, gesture detection
17
- is silently disabled (no network installation attempts).
18
-
19
- For ARM64 (Raspberry Pi), install manually:
20
- pip install mediapipe==0.10.18 --no-deps
21
- pip install flatbuffers absl-py
22
  """
23
 
24
  from __future__ import annotations
25
  import logging
 
 
26
  from enum import Enum
27
  from typing import Optional, Tuple, Callable
28
  import time
@@ -33,19 +21,52 @@ from numpy.typing import NDArray
33
  logger = logging.getLogger(__name__)
34
 
35
 
36
- # Try to import mediapipe (no auto-install to avoid network issues)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  _mp_hands = None
38
  _mediapipe_available = False
39
 
40
- try:
41
- import mediapipe as mp
42
- _mp_hands = mp.solutions.hands
43
- _mediapipe_available = True
44
- logger.info("MediaPipe loaded for gesture detection")
45
- except ImportError:
46
- logger.info("MediaPipe not installed - gesture detection disabled")
47
- except Exception as e:
48
- logger.warning("MediaPipe load error: %s - gesture detection disabled", e)
49
 
50
 
51
  class Gesture(Enum):
@@ -76,12 +97,10 @@ class GestureDetector:
76
  self._hands = None
77
  self._available = False
78
 
79
- # Callbacks
80
  self._callbacks: dict[Gesture, Optional[Callable[[], None]]] = {
81
  g: None for g in Gesture if g != Gesture.NONE
82
  }
83
 
84
- # State
85
  self._last_gesture = Gesture.NONE
86
  self._current_gesture = Gesture.NONE
87
  self._gesture_start_time: Optional[float] = None
@@ -100,9 +119,9 @@ class GestureDetector:
100
  min_tracking_confidence=min_tracking_confidence,
101
  )
102
  self._available = True
103
- logger.info("MediaPipe Hands initialized")
104
  except Exception as e:
105
- logger.warning("Failed to initialize MediaPipe Hands: %s", e)
106
 
107
  @property
108
  def is_available(self) -> bool:
@@ -168,47 +187,26 @@ class GestureDetector:
168
 
169
  thumb_index_dist = self._dist(thumb_tip, index_tip)
170
 
171
- # Thumbs up
172
  if thumb_up and thumb_extended and all_curled:
173
  return Gesture.THUMBS_UP
174
-
175
- # Thumbs down
176
  if thumb_down and thumb_extended and all_curled:
177
  return Gesture.THUMBS_DOWN
178
-
179
- # Fist
180
  if all_curled and not thumb_extended:
181
  return Gesture.FIST
182
-
183
- # OK sign
184
  if thumb_index_dist < 0.05 and middle_ext and ring_ext and pinky_ext:
185
  return Gesture.OK
186
-
187
- # Rock (index + pinky)
188
  if index_ext and pinky_ext and not middle_ext and not ring_ext:
189
  return Gesture.ROCK
190
-
191
- # Call (thumb + pinky)
192
  if thumb_extended and pinky_ext and not index_ext and not middle_ext and not ring_ext:
193
  return Gesture.CALL
194
-
195
- # Pointing up
196
  if index_ext and not middle_ext and not ring_ext and not pinky_ext:
197
  return Gesture.POINTING_UP
198
-
199
- # Peace
200
  if index_ext and middle_ext and not ring_ext and not pinky_ext:
201
  return Gesture.PEACE
202
-
203
- # Three
204
  if index_ext and middle_ext and ring_ext and not pinky_ext:
205
  return Gesture.THREE
206
-
207
- # Four
208
  if index_ext and middle_ext and ring_ext and pinky_ext and not thumb_extended:
209
  return Gesture.FOUR
210
-
211
- # Open palm
212
  if ext_count >= 4 and thumb_extended:
213
  return Gesture.OPEN_PALM
214
 
@@ -217,15 +215,12 @@ class GestureDetector:
217
  def detect(self, frame: NDArray[np.uint8]) -> Gesture:
218
  if not self._available:
219
  return Gesture.NONE
220
-
221
  try:
222
  import cv2
223
  rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
224
  results = self._hands.process(rgb)
225
-
226
  if not results.multi_hand_landmarks:
227
  return Gesture.NONE
228
-
229
  return self._classify_gesture(results.multi_hand_landmarks[0])
230
  except Exception as e:
231
  logger.debug("Gesture detection error: %s", e)
@@ -253,16 +248,14 @@ class GestureDetector:
253
  if now - self._gesture_start_time >= self._gesture_hold_threshold:
254
  self._last_trigger_time = now
255
  self._gesture_start_time = None
256
-
257
  callback = self._callbacks.get(gesture)
258
  if callback:
259
- logger.info("Gesture triggered: %s", gesture.value)
260
  try:
261
  callback()
262
  except Exception as e:
263
  logger.error("Gesture callback error: %s", e)
264
  return gesture
265
-
266
  return None
267
 
268
  def close(self) -> None:
 
1
  """Gesture detection using MediaPipe Hands.
2
 
3
  Detects 11 hand gestures for robot interaction:
4
+ - thumbs_up, thumbs_down, open_palm, fist, peace, pointing_up
5
+ - ok, rock, call, three, four
 
 
 
 
 
 
 
 
 
6
 
7
+ Auto-installs mediapipe on first run (ARM64: 0.10.18 with --no-deps).
 
 
 
 
 
8
  """
9
 
10
  from __future__ import annotations
11
  import logging
12
+ import subprocess
13
+ import sys
14
  from enum import Enum
15
  from typing import Optional, Tuple, Callable
16
  import time
 
21
  logger = logging.getLogger(__name__)
22
 
23
 
24
+ def _ensure_mediapipe_installed() -> bool:
25
+ """Ensure mediapipe is installed. Auto-install on ARM64 if missing."""
26
+ try:
27
+ import mediapipe
28
+ return True
29
+ except ImportError:
30
+ pass
31
+
32
+ # Auto-install for ARM64 (Raspberry Pi CM4)
33
+ logger.info("MediaPipe not found, installing for ARM64...")
34
+ try:
35
+ # Install mediapipe 0.10.18 without deps to avoid numpy conflict
36
+ subprocess.check_call([
37
+ sys.executable, '-m', 'pip', 'install', '-q',
38
+ 'mediapipe==0.10.18', '--no-deps'
39
+ ], timeout=120)
40
+ # Install required deps
41
+ subprocess.check_call([
42
+ sys.executable, '-m', 'pip', 'install', '-q',
43
+ 'flatbuffers>=2.0', 'absl-py', 'attrs>=19.1.0'
44
+ ], timeout=60)
45
+ logger.info("MediaPipe installed successfully")
46
+ return True
47
+ except subprocess.TimeoutExpired:
48
+ logger.warning("MediaPipe installation timed out")
49
+ return False
50
+ except subprocess.CalledProcessError as e:
51
+ logger.warning("MediaPipe installation failed: %s", e)
52
+ return False
53
+ except Exception as e:
54
+ logger.warning("MediaPipe installation error: %s", e)
55
+ return False
56
+
57
+
58
+ # Try to load mediapipe
59
  _mp_hands = None
60
  _mediapipe_available = False
61
 
62
+ if _ensure_mediapipe_installed():
63
+ try:
64
+ import mediapipe as mp
65
+ _mp_hands = mp.solutions.hands
66
+ _mediapipe_available = True
67
+ logger.info("MediaPipe Hands loaded")
68
+ except Exception as e:
69
+ logger.warning("MediaPipe load failed: %s", e)
 
70
 
71
 
72
  class Gesture(Enum):
 
97
  self._hands = None
98
  self._available = False
99
 
 
100
  self._callbacks: dict[Gesture, Optional[Callable[[], None]]] = {
101
  g: None for g in Gesture if g != Gesture.NONE
102
  }
103
 
 
104
  self._last_gesture = Gesture.NONE
105
  self._current_gesture = Gesture.NONE
106
  self._gesture_start_time: Optional[float] = None
 
119
  min_tracking_confidence=min_tracking_confidence,
120
  )
121
  self._available = True
122
+ logger.info("Gesture detection enabled")
123
  except Exception as e:
124
+ logger.warning("Gesture detection init failed: %s", e)
125
 
126
  @property
127
  def is_available(self) -> bool:
 
187
 
188
  thumb_index_dist = self._dist(thumb_tip, index_tip)
189
 
 
190
  if thumb_up and thumb_extended and all_curled:
191
  return Gesture.THUMBS_UP
 
 
192
  if thumb_down and thumb_extended and all_curled:
193
  return Gesture.THUMBS_DOWN
 
 
194
  if all_curled and not thumb_extended:
195
  return Gesture.FIST
 
 
196
  if thumb_index_dist < 0.05 and middle_ext and ring_ext and pinky_ext:
197
  return Gesture.OK
 
 
198
  if index_ext and pinky_ext and not middle_ext and not ring_ext:
199
  return Gesture.ROCK
 
 
200
  if thumb_extended and pinky_ext and not index_ext and not middle_ext and not ring_ext:
201
  return Gesture.CALL
 
 
202
  if index_ext and not middle_ext and not ring_ext and not pinky_ext:
203
  return Gesture.POINTING_UP
 
 
204
  if index_ext and middle_ext and not ring_ext and not pinky_ext:
205
  return Gesture.PEACE
 
 
206
  if index_ext and middle_ext and ring_ext and not pinky_ext:
207
  return Gesture.THREE
 
 
208
  if index_ext and middle_ext and ring_ext and pinky_ext and not thumb_extended:
209
  return Gesture.FOUR
 
 
210
  if ext_count >= 4 and thumb_extended:
211
  return Gesture.OPEN_PALM
212
 
 
215
  def detect(self, frame: NDArray[np.uint8]) -> Gesture:
216
  if not self._available:
217
  return Gesture.NONE
 
218
  try:
219
  import cv2
220
  rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
221
  results = self._hands.process(rgb)
 
222
  if not results.multi_hand_landmarks:
223
  return Gesture.NONE
 
224
  return self._classify_gesture(results.multi_hand_landmarks[0])
225
  except Exception as e:
226
  logger.debug("Gesture detection error: %s", e)
 
248
  if now - self._gesture_start_time >= self._gesture_hold_threshold:
249
  self._last_trigger_time = now
250
  self._gesture_start_time = None
 
251
  callback = self._callbacks.get(gesture)
252
  if callback:
253
+ logger.info("Gesture: %s", gesture.value)
254
  try:
255
  callback()
256
  except Exception as e:
257
  logger.error("Gesture callback error: %s", e)
258
  return gesture
 
259
  return None
260
 
261
  def close(self) -> None: