Spaces:

djhui5710
/

reachy_mini_home_assistant

Running

App Files Files Community

reachy_mini_home_assistant / reachy_mini_ha_voice /head_tracker.py

Desmond-Dong

人脸模型下载加重试机制(3次重试，间隔5秒)

2f0e868 5 months ago

raw

history blame

6.73 kB

	"""Lightweight head tracker using YOLO for face detection.

	Ported from reachy_mini_conversation_app for voice assistant integration.
	Model is loaded at initialization time (not lazy) to ensure face tracking
	is ready immediately when the camera server starts.
	"""

	from __future__ import annotations
	import logging
	from typing import Tuple, Optional

	import numpy as np
	from numpy.typing import NDArray


	logger = logging.getLogger(__name__)


	class HeadTracker:
	"""Lightweight head tracker using YOLO for face detection.

	Model is loaded at initialization time to ensure face tracking
	is ready immediately (matching conversation_app behavior).
	"""

	def __init__(
	self,
	model_repo: str = "AdamCodd/YOLOv11n-face-detection",
	model_filename: str = "model.pt",
	confidence_threshold: float = 0.3,
	device: str = "cpu",
	) -> None:
	"""Initialize YOLO-based head tracker.

	Args:
	model_repo: HuggingFace model repository
	model_filename: Model file name
	confidence_threshold: Minimum confidence for face detection
	device: Device to run inference on ('cpu' or 'cuda')
	"""
	self.confidence_threshold = confidence_threshold
	self.model = None
	self._model_repo = model_repo
	self._model_filename = model_filename
	self._device = device
	self._detections_class = None
	self._model_load_attempted = False
	self._model_load_error: Optional[str] = None

	# Load model immediately at init (not lazy)
	self._load_model()

	def _load_model(self) -> None:
	"""Load YOLO model with retry logic."""
	if self._model_load_attempted:
	return

	self._model_load_attempted = True

	try:
	from ultralytics import YOLO
	from supervision import Detections
	from huggingface_hub import hf_hub_download
	import time

	self._detections_class = Detections

	# Download with retries
	max_retries = 3
	retry_delay = 5
	model_path = None
	last_error = None

	for attempt in range(max_retries):
	try:
	model_path = hf_hub_download(
	repo_id=self._model_repo,
	filename=self._model_filename,
	)
	break
	except Exception as e:
	last_error = e
	if attempt < max_retries - 1:
	logger.warning(
	"Model download failed (attempt %d/%d): %s. Retrying in %ds...",
	attempt + 1, max_retries, e, retry_delay
	)
	time.sleep(retry_delay)

	if model_path is None:
	raise last_error

	self.model = YOLO(model_path).to(self._device)
	logger.info("YOLO face detection model loaded")
	except ImportError as e:
	self._model_load_error = f"Missing dependencies: {e}"
	logger.warning("Face tracking disabled - missing dependencies: %s", e)
	self.model = None
	except Exception as e:
	self._model_load_error = str(e)
	logger.error("Failed to load YOLO model: %s", e)
	self.model = None

	@property
	def is_available(self) -> bool:
	"""Check if the head tracker is available and ready."""
	return self.model is not None and self._detections_class is not None

	def _select_best_face(self, detections) -> Optional[int]:
	"""Select the best face based on confidence and area.

	Args:
	detections: Supervision detections object

	Returns:
	Index of best face or None if no valid faces
	"""
	if detections.xyxy.shape[0] == 0:
	return None

	if detections.confidence is None:
	return None

	# Filter by confidence threshold
	valid_mask = detections.confidence >= self.confidence_threshold
	if not np.any(valid_mask):
	return None

	valid_indices = np.where(valid_mask)[0]

	# Calculate areas for valid detections
	boxes = detections.xyxy[valid_indices]
	areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])

	# Combine confidence and area (weighted towards larger faces)
	confidences = detections.confidence[valid_indices]
	scores = confidences * 0.7 + (areas / np.max(areas)) * 0.3

	best_idx = valid_indices[np.argmax(scores)]
	return int(best_idx)

	def _bbox_to_normalized_coords(
	self, bbox: NDArray[np.float32], w: int, h: int
	) -> NDArray[np.float32]:
	"""Convert bounding box center to normalized coordinates [-1, 1].

	Args:
	bbox: Bounding box [x1, y1, x2, y2]
	w: Image width
	h: Image height

	Returns:
	Center point in [-1, 1] coordinates
	"""
	center_x = (bbox[0] + bbox[2]) / 2.0
	center_y = (bbox[1] + bbox[3]) / 2.0

	# Normalize to [0, 1] then to [-1, 1]
	norm_x = (center_x / w) * 2.0 - 1.0
	norm_y = (center_y / h) * 2.0 - 1.0

	return np.array([norm_x, norm_y], dtype=np.float32)

	def get_head_position(
	self, img: NDArray[np.uint8]
	) -> Tuple[Optional[NDArray[np.float32]], Optional[float]]:
	"""Get head position from face detection.

	Args:
	img: Input image (BGR format)

	Returns:
	Tuple of (face_center [-1,1], confidence) or (None, None) if no face
	"""
	if not self.is_available:
	return None, None

	h, w = img.shape[:2]

	try:
	# Run YOLO inference
	results = self.model(img, verbose=False)
	detections = self._detections_class.from_ultralytics(results[0])

	# Select best face
	face_idx = self._select_best_face(detections)
	if face_idx is None:
	return None, None

	bbox = detections.xyxy[face_idx]
	confidence = None
	if detections.confidence is not None:
	confidence = float(detections.confidence[face_idx])

	# Get face center in [-1, 1] coordinates
	face_center = self._bbox_to_normalized_coords(bbox, w, h)

	return face_center, confidence

	except Exception as e:
	logger.debug("Error in head position detection: %s", e)
	return None, None