Segment-Anything-2-video-tracking

Runtime error

App Files Files Community

Mirko Trasciatti commited on Nov 14, 2025

Commit

11e7a5f

1 Parent(s): 1205e2f

Visualize SAM2 ball trajectory using mask centroids

Browse files

Files changed (1) hide show

app.py +51 -0

app.py CHANGED Viewed

@@ -212,6 +212,7 @@ class AppState:
         self.pending_box_start_frame_idx: int | None = None
         self.pending_box_start_obj_id: int | None = None
         self.is_switching_model: bool = False
         # Model selection
         self.model_repo_key: str = "tiny"
         self.model_repo_id: str | None = None
@@ -286,6 +287,7 @@ def init_video_session(GLOBAL_STATE: gr.State, video: str | dict) -> tuple[AppSt
     GLOBAL_STATE.inference_session = None
     GLOBAL_STATE.masks_by_frame = {}
     GLOBAL_STATE.color_by_obj = {}
     load_model_if_needed(GLOBAL_STATE)
@@ -397,6 +399,15 @@ def compose_frame(state: AppState, frame_idx: int, remove_bg: bool = False) -> I
             color = state.color_by_obj.get(obj_id, (255, 255, 255))
             for x1, y1, x2, y2 in boxes:
                 draw.rectangle([(x1, y1), (x2, y2)], outline=color, width=2)
     # Save to cache and return
     state.composited_frames[frame_idx] = out_img
     return out_img
@@ -418,6 +429,43 @@ def _ensure_color_for_obj(state: AppState, obj_id: int):
         state.color_by_obj[obj_id] = pastel_color_for_object(obj_id)
 def on_image_click(
     img: Image.Image | np.ndarray,
     state: AppState,
@@ -545,6 +593,7 @@ def on_image_click(
         masks_for_frame[int(oid)] = mask_2d
     state.masks_by_frame[int(frame_idx)] = masks_for_frame
     # Invalidate cache for this frame to force recomposition
     state.composited_frames.pop(int(frame_idx), None)
@@ -590,6 +639,7 @@ def propagate_masks(GLOBAL_STATE: gr.State):
                 mask_2d = video_res_masks[i].cpu().numpy().squeeze()
                 masks_for_frame[int(oid)] = mask_2d
             GLOBAL_STATE.masks_by_frame[frame_idx] = masks_for_frame
             # Invalidate cache for that frame to force recomposition
             GLOBAL_STATE.composited_frames.pop(frame_idx, None)
@@ -618,6 +668,7 @@ def reset_session(GLOBAL_STATE: gr.State) -> tuple[AppState, Image.Image, int, i
     GLOBAL_STATE.pending_box_start = None
     GLOBAL_STATE.pending_box_start_frame_idx = None
     GLOBAL_STATE.pending_box_start_obj_id = None
     # Dispose and re-init inference session for current model with existing frames
     try:

         self.pending_box_start_frame_idx: int | None = None
         self.pending_box_start_obj_id: int | None = None
         self.is_switching_model: bool = False
+        self.ball_centers: dict[int, dict[int, tuple[int, int]]] = {}
         # Model selection
         self.model_repo_key: str = "tiny"
         self.model_repo_id: str | None = None
     GLOBAL_STATE.inference_session = None
     GLOBAL_STATE.masks_by_frame = {}
     GLOBAL_STATE.color_by_obj = {}
+    GLOBAL_STATE.ball_centers = {}
     load_model_if_needed(GLOBAL_STATE)
             color = state.color_by_obj.get(obj_id, (255, 255, 255))
             for x1, y1, x2, y2 in boxes:
                 draw.rectangle([(x1, y1), (x2, y2)], outline=color, width=2)
+    # Draw trajectory centers (all frames)
+    if state.ball_centers:
+        draw = ImageDraw.Draw(out_img)
+        cross_half = 4
+        for obj_id, centers in state.ball_centers.items():
+            color = state.color_by_obj.get(obj_id, (255, 255, 0))
+            for cx, cy in centers.values():
+                draw.line([(cx - cross_half, cy), (cx + cross_half, cy)], fill=color, width=2)
+                draw.line([(cx, cy - cross_half), (cx, cy + cross_half)], fill=color, width=2)
     # Save to cache and return
     state.composited_frames[frame_idx] = out_img
     return out_img
         state.color_by_obj[obj_id] = pastel_color_for_object(obj_id)
+def _compute_mask_centroid(mask: np.ndarray) -> tuple[int, int] | None:
+    if mask is None:
+        return None
+    mask_np = np.array(mask)
+    if mask_np.ndim == 3:
+        mask_np = mask_np.squeeze()
+    if mask_np.size == 0:
+        return None
+    mask_float = np.clip(mask_np, 0.0, 1.0).astype(np.float32)
+    moments = cv2.moments(mask_float)
+    if moments["m00"] == 0:
+        return None
+    cx = int(moments["m10"] / moments["m00"])
+    cy = int(moments["m01"] / moments["m00"])
+    return cx, cy
+def _update_centroids_for_frame(state: AppState, frame_idx: int):
+    if state is None:
+        return
+    masks = state.masks_by_frame.get(int(frame_idx), {})
+    seen_obj_ids: set[int] = set()
+    for obj_id, mask in masks.items():
+        centroid = _compute_mask_centroid(mask)
+        centers = state.ball_centers.setdefault(int(obj_id), {})
+        if centroid is not None:
+            centers[int(frame_idx)] = centroid
+        else:
+            centers.pop(int(frame_idx), None)
+        seen_obj_ids.add(int(obj_id))
+        _ensure_color_for_obj(state, int(obj_id))
+    # Remove frames for objects without masks at this frame
+    for obj_id, centers in state.ball_centers.items():
+        if obj_id not in seen_obj_ids:
+            centers.pop(int(frame_idx), None)
 def on_image_click(
     img: Image.Image | np.ndarray,
     state: AppState,
         masks_for_frame[int(oid)] = mask_2d
     state.masks_by_frame[int(frame_idx)] = masks_for_frame
+    _update_centroids_for_frame(state, int(frame_idx))
     # Invalidate cache for this frame to force recomposition
     state.composited_frames.pop(int(frame_idx), None)
                 mask_2d = video_res_masks[i].cpu().numpy().squeeze()
                 masks_for_frame[int(oid)] = mask_2d
             GLOBAL_STATE.masks_by_frame[frame_idx] = masks_for_frame
+            _update_centroids_for_frame(GLOBAL_STATE, frame_idx)
             # Invalidate cache for that frame to force recomposition
             GLOBAL_STATE.composited_frames.pop(frame_idx, None)
     GLOBAL_STATE.pending_box_start = None
     GLOBAL_STATE.pending_box_start_frame_idx = None
     GLOBAL_STATE.pending_box_start_obj_id = None
+    GLOBAL_STATE.ball_centers.clear()
     # Dispose and re-init inference session for current model with existing frames
     try: