embedl
/

sam-3d-body

+#!/usr/bin/env python3
+"""End-to-end 3D human-mesh demo using the Embedl INT8 backbone.
+Our quantized DINOv3 backbone (this repo) provides the image features; the
+upstream SAM-3D-Body decoder + MHR mesh head turn them into a 3D body mesh.
+This script runs the full pipeline and renders the result with matplotlib
+(no OpenGL needed).
+Prerequisites
+-------------
+    # 1. upstream pipeline (you must have accepted the gated upstream license)
+    git clone https://github.com/facebookresearch/sam-3d-body
+    pip install -e sam-3d-body              # + its deps (see its INSTALL.md)
+    pip install torch matplotlib pillow numpy imageio huggingface_hub
+    # 2. gated checkpoint (facebook/sam-3d-body-dinov3): model.ckpt, model_config.yaml,
+    #    assets/mhr_model.pt   ->  download with `hf download` after accepting the license
+    # 3. this repo's backbone:  embedl_sam3dbody_int8.pt2
+Run
+---
+    python demo_3d.py --image person.jpg --ckpt-dir ./sam3d_ckpt \
+        --pt2 embedl_sam3dbody_int8.pt2 --bbox 180 210 700 950 --out mesh_demo.png
+"""
+import argparse, types, numpy as np, cv2, torch
+import matplotlib; matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+from matplotlib.collections import PolyCollection
+import imageio.v2 as imageio
+from sam_3d_body import load_sam_3d_body, SAM3DBodyEstimator   # upstream repo
+LIGHT = np.array([0.3, 0.5, 1.0]); LIGHT /= np.linalg.norm(LIGHT)
+SKIN = np.array([0.80, 0.78, 0.72])
+def recover_mesh(image, ckpt_dir, pt2, bbox):
+    dev = "cuda" if torch.cuda.is_available() else "cpu"
+    model, cfg = load_sam_3d_body(f"{ckpt_dir}/model.ckpt", device=dev,
+                                  mhr_path=f"{ckpt_dir}/assets/mhr_model.pt")
+    # swap in the Embedl INT8 backbone (same I/O as the DINOv3 encoder; pipeline is bf16)
+    qb = torch.export.load(pt2).module().to(dev)
+    def backbone(self, x, *a, **k):
+        return torch.cat([qb(x[i:i + 1].float()) for i in range(x.shape[0])], 0).to(x.dtype)
+    model.backbone.forward = types.MethodType(backbone, model.backbone)
+    est = SAM3DBodyEstimator(model, cfg)                       # no detector: pass a bbox
+    h, w = cv2.imread(image).shape[:2]
+    box = np.array([bbox if bbox else [0, 0, w, h]], dtype=np.float32)
+    out = est.process_one_image(image, bboxes=box, use_mask=False)[0]
+    return out["pred_vertices"], est.faces, out["pred_cam_t"], float(out["focal_length"])
+def _shade(v, f):
+    n = np.cross(v[f][:, 1] - v[f][:, 0], v[f][:, 2] - v[f][:, 0])
+    n /= (np.linalg.norm(n, axis=1, keepdims=True) + 1e-9)
+    lam = np.clip(np.abs(n @ LIGHT), 0, 1)[:, None]
+    return np.clip(0.25 + 0.75 * lam * SKIN, 0, 1)
+def _view(ax, V, F, deg, title):
+    Vc = V - V.mean(0); th = np.radians(deg)
+    R = np.array([[np.cos(th), 0, np.sin(th)], [0, 1, 0], [-np.sin(th), 0, np.cos(th)]])
+    Vr = Vc @ R.T; p = Vr[:, :2] * [1, -1]; o = np.argsort(Vr[F].mean(1)[:, 2])
+    ax.add_collection(PolyCollection(p[F][o], facecolors=_shade(Vr, F)[o], edgecolors="none"))
+    ax.set_xlim(p[:, 0].min(), p[:, 0].max()); ax.set_ylim(p[:, 1].min(), p[:, 1].max())
+    ax.set_aspect("equal"); ax.axis("off"); ax.set_title(title, fontsize=11)
+def render(image, V, F, cam_t, focal, bbox, out):
+    img = cv2.cvtColor(cv2.imread(image), cv2.COLOR_BGR2RGB)
+    x1, y1, x2, y2 = bbox if bbox else [0, 0, img.shape[1], img.shape[0]]
+    crop = cv2.resize(img[y1:y2, x1:x2], (512, 512))
+    fig, ax = plt.subplots(1, 4, figsize=(15, 6)); fig.patch.set_facecolor("white")
+    ax[0].imshow(img); ax[0].axis("off"); ax[0].set_title("Input", fontsize=11)
+    Vc = V + cam_t; z = np.clip(Vc[:, 2], 1e-3, None)
+    p = np.stack([focal * Vc[:, 0] / z + 256, focal * Vc[:, 1] / z + 256], 1)
+    o = np.argsort(-Vc[F].mean(1)[:, 2])
+    ax[1].imshow(crop)
+    ax[1].add_collection(PolyCollection(p[F][o], facecolors=_shade(Vc, F)[o], edgecolors="none", alpha=0.8))
+    ax[1].set_xlim(0, 512); ax[1].set_ylim(512, 0); ax[1].axis("off"); ax[1].set_title("Mesh overlay", fontsize=11)
+    _view(ax[2], V, F, 20, "¾ view"); _view(ax[3], V, F, 90, "side view")
+    plt.tight_layout(); plt.savefig(out, dpi=160, bbox_inches="tight"); plt.close()
+    frames = []
+    for a in range(0, 360, 15):
+        fig, axx = plt.subplots(figsize=(4, 6)); fig.patch.set_facecolor("white"); _view(axx, V, F, a, "")
+        fig.canvas.draw()
+        frames.append(np.asarray(fig.canvas.buffer_rgba())[..., :3].copy()); plt.close()
+    imageio.mimsave(out.rsplit(".", 1)[0] + "_spin.gif", frames, duration=0.1, loop=0)
+    print(f"wrote {out} and {out.rsplit('.', 1)[0]}_spin.gif")
+if __name__ == "__main__":
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--image", required=True)
+    ap.add_argument("--ckpt-dir", required=True, help="dir with model.ckpt, model_config.yaml, assets/mhr_model.pt")
+    ap.add_argument("--pt2", default="embedl_sam3dbody_int8.pt2")
+    ap.add_argument("--bbox", type=int, nargs=4, default=None, metavar=("x1", "y1", "x2", "y2"))
+    ap.add_argument("--out", default="mesh_demo.png")
+    a = ap.parse_args()
+    V, F, cam_t, focal = recover_mesh(a.image, a.ckpt_dir, a.pt2, a.bbox)
+    print(f"recovered mesh: {V.shape[0]} vertices")
+    render(a.image, V, F, cam_t, focal, a.bbox, a.out)