Commit ·
606f861
1
Parent(s): 585e14d
手势识别: 首次运行自动安装mediapipe 0.10.18 (ARM64)
Browse files- PROJECT_PLAN.md +1 -1
- reachy_mini_ha_voice/gesture_detector.py +51 -58
PROJECT_PLAN.md
CHANGED
|
@@ -757,7 +757,7 @@ camera_server.set_gesture_callbacks(
|
|
| 757 |
- **Phase 13** - Sendspin 多房间音频支持 ✅
|
| 758 |
- **Phase 15** - YOLO 人脸追踪 ✅
|
| 759 |
- **Phase 20** - 拍一拍唤醒 ✅
|
| 760 |
-
- **Phase 21** - 手势识别 (
|
| 761 |
|
| 762 |
#### 部分实现功能
|
| 763 |
- **Phase 14** - 情感动作 API 基础设施 (手动触发可用)
|
|
|
|
| 757 |
- **Phase 13** - Sendspin 多房间音频支持 ✅
|
| 758 |
- **Phase 15** - YOLO 人脸追踪 ✅
|
| 759 |
- **Phase 20** - 拍一拍唤醒 ✅
|
| 760 |
+
- **Phase 21** - 手势识别 (11 种手势,自动安装mediapipe) ✅
|
| 761 |
|
| 762 |
#### 部分实现功能
|
| 763 |
- **Phase 14** - 情感动作 API 基础设施 (手动触发可用)
|
reachy_mini_ha_voice/gesture_detector.py
CHANGED
|
@@ -1,28 +1,16 @@
|
|
| 1 |
"""Gesture detection using MediaPipe Hands.
|
| 2 |
|
| 3 |
Detects 11 hand gestures for robot interaction:
|
| 4 |
-
- thumbs_up
|
| 5 |
-
-
|
| 6 |
-
- open_palm: ✋ Stop/halt
|
| 7 |
-
- fist: ✊ Pause/hold
|
| 8 |
-
- peace: ✌️ Victory sign
|
| 9 |
-
- pointing_up: ☝️ Attention/one
|
| 10 |
-
- ok: 👌 OK sign
|
| 11 |
-
- rock: 🤘 Rock on
|
| 12 |
-
- call: 🤙 Call me
|
| 13 |
-
- three: 3️⃣ Three fingers
|
| 14 |
-
- four: 4️⃣ Four fingers
|
| 15 |
|
| 16 |
-
|
| 17 |
-
is silently disabled (no network installation attempts).
|
| 18 |
-
|
| 19 |
-
For ARM64 (Raspberry Pi), install manually:
|
| 20 |
-
pip install mediapipe==0.10.18 --no-deps
|
| 21 |
-
pip install flatbuffers absl-py
|
| 22 |
"""
|
| 23 |
|
| 24 |
from __future__ import annotations
|
| 25 |
import logging
|
|
|
|
|
|
|
| 26 |
from enum import Enum
|
| 27 |
from typing import Optional, Tuple, Callable
|
| 28 |
import time
|
|
@@ -33,19 +21,52 @@ from numpy.typing import NDArray
|
|
| 33 |
logger = logging.getLogger(__name__)
|
| 34 |
|
| 35 |
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
_mp_hands = None
|
| 38 |
_mediapipe_available = False
|
| 39 |
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
logger.warning("MediaPipe load error: %s - gesture detection disabled", e)
|
| 49 |
|
| 50 |
|
| 51 |
class Gesture(Enum):
|
|
@@ -76,12 +97,10 @@ class GestureDetector:
|
|
| 76 |
self._hands = None
|
| 77 |
self._available = False
|
| 78 |
|
| 79 |
-
# Callbacks
|
| 80 |
self._callbacks: dict[Gesture, Optional[Callable[[], None]]] = {
|
| 81 |
g: None for g in Gesture if g != Gesture.NONE
|
| 82 |
}
|
| 83 |
|
| 84 |
-
# State
|
| 85 |
self._last_gesture = Gesture.NONE
|
| 86 |
self._current_gesture = Gesture.NONE
|
| 87 |
self._gesture_start_time: Optional[float] = None
|
|
@@ -100,9 +119,9 @@ class GestureDetector:
|
|
| 100 |
min_tracking_confidence=min_tracking_confidence,
|
| 101 |
)
|
| 102 |
self._available = True
|
| 103 |
-
logger.info("
|
| 104 |
except Exception as e:
|
| 105 |
-
logger.warning("
|
| 106 |
|
| 107 |
@property
|
| 108 |
def is_available(self) -> bool:
|
|
@@ -168,47 +187,26 @@ class GestureDetector:
|
|
| 168 |
|
| 169 |
thumb_index_dist = self._dist(thumb_tip, index_tip)
|
| 170 |
|
| 171 |
-
# Thumbs up
|
| 172 |
if thumb_up and thumb_extended and all_curled:
|
| 173 |
return Gesture.THUMBS_UP
|
| 174 |
-
|
| 175 |
-
# Thumbs down
|
| 176 |
if thumb_down and thumb_extended and all_curled:
|
| 177 |
return Gesture.THUMBS_DOWN
|
| 178 |
-
|
| 179 |
-
# Fist
|
| 180 |
if all_curled and not thumb_extended:
|
| 181 |
return Gesture.FIST
|
| 182 |
-
|
| 183 |
-
# OK sign
|
| 184 |
if thumb_index_dist < 0.05 and middle_ext and ring_ext and pinky_ext:
|
| 185 |
return Gesture.OK
|
| 186 |
-
|
| 187 |
-
# Rock (index + pinky)
|
| 188 |
if index_ext and pinky_ext and not middle_ext and not ring_ext:
|
| 189 |
return Gesture.ROCK
|
| 190 |
-
|
| 191 |
-
# Call (thumb + pinky)
|
| 192 |
if thumb_extended and pinky_ext and not index_ext and not middle_ext and not ring_ext:
|
| 193 |
return Gesture.CALL
|
| 194 |
-
|
| 195 |
-
# Pointing up
|
| 196 |
if index_ext and not middle_ext and not ring_ext and not pinky_ext:
|
| 197 |
return Gesture.POINTING_UP
|
| 198 |
-
|
| 199 |
-
# Peace
|
| 200 |
if index_ext and middle_ext and not ring_ext and not pinky_ext:
|
| 201 |
return Gesture.PEACE
|
| 202 |
-
|
| 203 |
-
# Three
|
| 204 |
if index_ext and middle_ext and ring_ext and not pinky_ext:
|
| 205 |
return Gesture.THREE
|
| 206 |
-
|
| 207 |
-
# Four
|
| 208 |
if index_ext and middle_ext and ring_ext and pinky_ext and not thumb_extended:
|
| 209 |
return Gesture.FOUR
|
| 210 |
-
|
| 211 |
-
# Open palm
|
| 212 |
if ext_count >= 4 and thumb_extended:
|
| 213 |
return Gesture.OPEN_PALM
|
| 214 |
|
|
@@ -217,15 +215,12 @@ class GestureDetector:
|
|
| 217 |
def detect(self, frame: NDArray[np.uint8]) -> Gesture:
|
| 218 |
if not self._available:
|
| 219 |
return Gesture.NONE
|
| 220 |
-
|
| 221 |
try:
|
| 222 |
import cv2
|
| 223 |
rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
| 224 |
results = self._hands.process(rgb)
|
| 225 |
-
|
| 226 |
if not results.multi_hand_landmarks:
|
| 227 |
return Gesture.NONE
|
| 228 |
-
|
| 229 |
return self._classify_gesture(results.multi_hand_landmarks[0])
|
| 230 |
except Exception as e:
|
| 231 |
logger.debug("Gesture detection error: %s", e)
|
|
@@ -253,16 +248,14 @@ class GestureDetector:
|
|
| 253 |
if now - self._gesture_start_time >= self._gesture_hold_threshold:
|
| 254 |
self._last_trigger_time = now
|
| 255 |
self._gesture_start_time = None
|
| 256 |
-
|
| 257 |
callback = self._callbacks.get(gesture)
|
| 258 |
if callback:
|
| 259 |
-
logger.info("Gesture
|
| 260 |
try:
|
| 261 |
callback()
|
| 262 |
except Exception as e:
|
| 263 |
logger.error("Gesture callback error: %s", e)
|
| 264 |
return gesture
|
| 265 |
-
|
| 266 |
return None
|
| 267 |
|
| 268 |
def close(self) -> None:
|
|
|
|
| 1 |
"""Gesture detection using MediaPipe Hands.
|
| 2 |
|
| 3 |
Detects 11 hand gestures for robot interaction:
|
| 4 |
+
- thumbs_up, thumbs_down, open_palm, fist, peace, pointing_up
|
| 5 |
+
- ok, rock, call, three, four
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
+
Auto-installs mediapipe on first run (ARM64: 0.10.18 with --no-deps).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
"""
|
| 9 |
|
| 10 |
from __future__ import annotations
|
| 11 |
import logging
|
| 12 |
+
import subprocess
|
| 13 |
+
import sys
|
| 14 |
from enum import Enum
|
| 15 |
from typing import Optional, Tuple, Callable
|
| 16 |
import time
|
|
|
|
| 21 |
logger = logging.getLogger(__name__)
|
| 22 |
|
| 23 |
|
| 24 |
+
def _ensure_mediapipe_installed() -> bool:
|
| 25 |
+
"""Ensure mediapipe is installed. Auto-install on ARM64 if missing."""
|
| 26 |
+
try:
|
| 27 |
+
import mediapipe
|
| 28 |
+
return True
|
| 29 |
+
except ImportError:
|
| 30 |
+
pass
|
| 31 |
+
|
| 32 |
+
# Auto-install for ARM64 (Raspberry Pi CM4)
|
| 33 |
+
logger.info("MediaPipe not found, installing for ARM64...")
|
| 34 |
+
try:
|
| 35 |
+
# Install mediapipe 0.10.18 without deps to avoid numpy conflict
|
| 36 |
+
subprocess.check_call([
|
| 37 |
+
sys.executable, '-m', 'pip', 'install', '-q',
|
| 38 |
+
'mediapipe==0.10.18', '--no-deps'
|
| 39 |
+
], timeout=120)
|
| 40 |
+
# Install required deps
|
| 41 |
+
subprocess.check_call([
|
| 42 |
+
sys.executable, '-m', 'pip', 'install', '-q',
|
| 43 |
+
'flatbuffers>=2.0', 'absl-py', 'attrs>=19.1.0'
|
| 44 |
+
], timeout=60)
|
| 45 |
+
logger.info("MediaPipe installed successfully")
|
| 46 |
+
return True
|
| 47 |
+
except subprocess.TimeoutExpired:
|
| 48 |
+
logger.warning("MediaPipe installation timed out")
|
| 49 |
+
return False
|
| 50 |
+
except subprocess.CalledProcessError as e:
|
| 51 |
+
logger.warning("MediaPipe installation failed: %s", e)
|
| 52 |
+
return False
|
| 53 |
+
except Exception as e:
|
| 54 |
+
logger.warning("MediaPipe installation error: %s", e)
|
| 55 |
+
return False
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
# Try to load mediapipe
|
| 59 |
_mp_hands = None
|
| 60 |
_mediapipe_available = False
|
| 61 |
|
| 62 |
+
if _ensure_mediapipe_installed():
|
| 63 |
+
try:
|
| 64 |
+
import mediapipe as mp
|
| 65 |
+
_mp_hands = mp.solutions.hands
|
| 66 |
+
_mediapipe_available = True
|
| 67 |
+
logger.info("MediaPipe Hands loaded")
|
| 68 |
+
except Exception as e:
|
| 69 |
+
logger.warning("MediaPipe load failed: %s", e)
|
|
|
|
| 70 |
|
| 71 |
|
| 72 |
class Gesture(Enum):
|
|
|
|
| 97 |
self._hands = None
|
| 98 |
self._available = False
|
| 99 |
|
|
|
|
| 100 |
self._callbacks: dict[Gesture, Optional[Callable[[], None]]] = {
|
| 101 |
g: None for g in Gesture if g != Gesture.NONE
|
| 102 |
}
|
| 103 |
|
|
|
|
| 104 |
self._last_gesture = Gesture.NONE
|
| 105 |
self._current_gesture = Gesture.NONE
|
| 106 |
self._gesture_start_time: Optional[float] = None
|
|
|
|
| 119 |
min_tracking_confidence=min_tracking_confidence,
|
| 120 |
)
|
| 121 |
self._available = True
|
| 122 |
+
logger.info("Gesture detection enabled")
|
| 123 |
except Exception as e:
|
| 124 |
+
logger.warning("Gesture detection init failed: %s", e)
|
| 125 |
|
| 126 |
@property
|
| 127 |
def is_available(self) -> bool:
|
|
|
|
| 187 |
|
| 188 |
thumb_index_dist = self._dist(thumb_tip, index_tip)
|
| 189 |
|
|
|
|
| 190 |
if thumb_up and thumb_extended and all_curled:
|
| 191 |
return Gesture.THUMBS_UP
|
|
|
|
|
|
|
| 192 |
if thumb_down and thumb_extended and all_curled:
|
| 193 |
return Gesture.THUMBS_DOWN
|
|
|
|
|
|
|
| 194 |
if all_curled and not thumb_extended:
|
| 195 |
return Gesture.FIST
|
|
|
|
|
|
|
| 196 |
if thumb_index_dist < 0.05 and middle_ext and ring_ext and pinky_ext:
|
| 197 |
return Gesture.OK
|
|
|
|
|
|
|
| 198 |
if index_ext and pinky_ext and not middle_ext and not ring_ext:
|
| 199 |
return Gesture.ROCK
|
|
|
|
|
|
|
| 200 |
if thumb_extended and pinky_ext and not index_ext and not middle_ext and not ring_ext:
|
| 201 |
return Gesture.CALL
|
|
|
|
|
|
|
| 202 |
if index_ext and not middle_ext and not ring_ext and not pinky_ext:
|
| 203 |
return Gesture.POINTING_UP
|
|
|
|
|
|
|
| 204 |
if index_ext and middle_ext and not ring_ext and not pinky_ext:
|
| 205 |
return Gesture.PEACE
|
|
|
|
|
|
|
| 206 |
if index_ext and middle_ext and ring_ext and not pinky_ext:
|
| 207 |
return Gesture.THREE
|
|
|
|
|
|
|
| 208 |
if index_ext and middle_ext and ring_ext and pinky_ext and not thumb_extended:
|
| 209 |
return Gesture.FOUR
|
|
|
|
|
|
|
| 210 |
if ext_count >= 4 and thumb_extended:
|
| 211 |
return Gesture.OPEN_PALM
|
| 212 |
|
|
|
|
| 215 |
def detect(self, frame: NDArray[np.uint8]) -> Gesture:
|
| 216 |
if not self._available:
|
| 217 |
return Gesture.NONE
|
|
|
|
| 218 |
try:
|
| 219 |
import cv2
|
| 220 |
rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
| 221 |
results = self._hands.process(rgb)
|
|
|
|
| 222 |
if not results.multi_hand_landmarks:
|
| 223 |
return Gesture.NONE
|
|
|
|
| 224 |
return self._classify_gesture(results.multi_hand_landmarks[0])
|
| 225 |
except Exception as e:
|
| 226 |
logger.debug("Gesture detection error: %s", e)
|
|
|
|
| 248 |
if now - self._gesture_start_time >= self._gesture_hold_threshold:
|
| 249 |
self._last_trigger_time = now
|
| 250 |
self._gesture_start_time = None
|
|
|
|
| 251 |
callback = self._callbacks.get(gesture)
|
| 252 |
if callback:
|
| 253 |
+
logger.info("Gesture: %s", gesture.value)
|
| 254 |
try:
|
| 255 |
callback()
|
| 256 |
except Exception as e:
|
| 257 |
logger.error("Gesture callback error: %s", e)
|
| 258 |
return gesture
|
|
|
|
| 259 |
return None
|
| 260 |
|
| 261 |
def close(self) -> None:
|