Desmond-Dong's picture
人脸模型下载加重试机制(3次重试,间隔5秒)
2f0e868
raw
history blame
6.73 kB
"""Lightweight head tracker using YOLO for face detection.
Ported from reachy_mini_conversation_app for voice assistant integration.
Model is loaded at initialization time (not lazy) to ensure face tracking
is ready immediately when the camera server starts.
"""
from __future__ import annotations
import logging
from typing import Tuple, Optional
import numpy as np
from numpy.typing import NDArray
logger = logging.getLogger(__name__)
class HeadTracker:
"""Lightweight head tracker using YOLO for face detection.
Model is loaded at initialization time to ensure face tracking
is ready immediately (matching conversation_app behavior).
"""
def __init__(
self,
model_repo: str = "AdamCodd/YOLOv11n-face-detection",
model_filename: str = "model.pt",
confidence_threshold: float = 0.3,
device: str = "cpu",
) -> None:
"""Initialize YOLO-based head tracker.
Args:
model_repo: HuggingFace model repository
model_filename: Model file name
confidence_threshold: Minimum confidence for face detection
device: Device to run inference on ('cpu' or 'cuda')
"""
self.confidence_threshold = confidence_threshold
self.model = None
self._model_repo = model_repo
self._model_filename = model_filename
self._device = device
self._detections_class = None
self._model_load_attempted = False
self._model_load_error: Optional[str] = None
# Load model immediately at init (not lazy)
self._load_model()
def _load_model(self) -> None:
"""Load YOLO model with retry logic."""
if self._model_load_attempted:
return
self._model_load_attempted = True
try:
from ultralytics import YOLO
from supervision import Detections
from huggingface_hub import hf_hub_download
import time
self._detections_class = Detections
# Download with retries
max_retries = 3
retry_delay = 5
model_path = None
last_error = None
for attempt in range(max_retries):
try:
model_path = hf_hub_download(
repo_id=self._model_repo,
filename=self._model_filename,
)
break
except Exception as e:
last_error = e
if attempt < max_retries - 1:
logger.warning(
"Model download failed (attempt %d/%d): %s. Retrying in %ds...",
attempt + 1, max_retries, e, retry_delay
)
time.sleep(retry_delay)
if model_path is None:
raise last_error
self.model = YOLO(model_path).to(self._device)
logger.info("YOLO face detection model loaded")
except ImportError as e:
self._model_load_error = f"Missing dependencies: {e}"
logger.warning("Face tracking disabled - missing dependencies: %s", e)
self.model = None
except Exception as e:
self._model_load_error = str(e)
logger.error("Failed to load YOLO model: %s", e)
self.model = None
@property
def is_available(self) -> bool:
"""Check if the head tracker is available and ready."""
return self.model is not None and self._detections_class is not None
def _select_best_face(self, detections) -> Optional[int]:
"""Select the best face based on confidence and area.
Args:
detections: Supervision detections object
Returns:
Index of best face or None if no valid faces
"""
if detections.xyxy.shape[0] == 0:
return None
if detections.confidence is None:
return None
# Filter by confidence threshold
valid_mask = detections.confidence >= self.confidence_threshold
if not np.any(valid_mask):
return None
valid_indices = np.where(valid_mask)[0]
# Calculate areas for valid detections
boxes = detections.xyxy[valid_indices]
areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
# Combine confidence and area (weighted towards larger faces)
confidences = detections.confidence[valid_indices]
scores = confidences * 0.7 + (areas / np.max(areas)) * 0.3
best_idx = valid_indices[np.argmax(scores)]
return int(best_idx)
def _bbox_to_normalized_coords(
self, bbox: NDArray[np.float32], w: int, h: int
) -> NDArray[np.float32]:
"""Convert bounding box center to normalized coordinates [-1, 1].
Args:
bbox: Bounding box [x1, y1, x2, y2]
w: Image width
h: Image height
Returns:
Center point in [-1, 1] coordinates
"""
center_x = (bbox[0] + bbox[2]) / 2.0
center_y = (bbox[1] + bbox[3]) / 2.0
# Normalize to [0, 1] then to [-1, 1]
norm_x = (center_x / w) * 2.0 - 1.0
norm_y = (center_y / h) * 2.0 - 1.0
return np.array([norm_x, norm_y], dtype=np.float32)
def get_head_position(
self, img: NDArray[np.uint8]
) -> Tuple[Optional[NDArray[np.float32]], Optional[float]]:
"""Get head position from face detection.
Args:
img: Input image (BGR format)
Returns:
Tuple of (face_center [-1,1], confidence) or (None, None) if no face
"""
if not self.is_available:
return None, None
h, w = img.shape[:2]
try:
# Run YOLO inference
results = self.model(img, verbose=False)
detections = self._detections_class.from_ultralytics(results[0])
# Select best face
face_idx = self._select_best_face(detections)
if face_idx is None:
return None, None
bbox = detections.xyxy[face_idx]
confidence = None
if detections.confidence is not None:
confidence = float(detections.confidence[face_idx])
# Get face center in [-1, 1] coordinates
face_center = self._bbox_to_normalized_coords(bbox, w, h)
return face_center, confidence
except Exception as e:
logger.debug("Error in head position detection: %s", e)
return None, None