#!/usr/bin/env python3 """End-to-end 3D human-mesh demo using the Embedl INT8 backbone. Our quantized DINOv3 backbone (this repo) provides the image features; the upstream SAM-3D-Body decoder + MHR mesh head turn them into a 3D body mesh. This script runs the full pipeline and renders the result with matplotlib (no OpenGL needed). Prerequisites ------------- # 1. upstream pipeline (you must have accepted the gated upstream license) git clone https://github.com/facebookresearch/sam-3d-body pip install -e sam-3d-body # + its deps (see its INSTALL.md) pip install torch matplotlib pillow numpy imageio huggingface_hub # 2. gated checkpoint (facebook/sam-3d-body-dinov3): model.ckpt, model_config.yaml, # assets/mhr_model.pt -> download with `hf download` after accepting the license # 3. this repo's backbone: embedl_sam3dbody_int8.pt2 Run --- python demo_3d.py --image person.jpg --ckpt-dir ./sam3d_ckpt \ --pt2 embedl_sam3dbody_int8.pt2 --bbox 180 210 700 950 --out mesh_demo.png """ import argparse, types, numpy as np, cv2, torch import matplotlib; matplotlib.use("Agg") import matplotlib.pyplot as plt from matplotlib.collections import PolyCollection import imageio.v2 as imageio from sam_3d_body import load_sam_3d_body, SAM3DBodyEstimator # upstream repo LIGHT = np.array([0.3, 0.5, 1.0]); LIGHT /= np.linalg.norm(LIGHT) SKIN = np.array([0.80, 0.78, 0.72]) def recover_mesh(image, ckpt_dir, pt2, bbox): dev = "cuda" if torch.cuda.is_available() else "cpu" model, cfg = load_sam_3d_body(f"{ckpt_dir}/model.ckpt", device=dev, mhr_path=f"{ckpt_dir}/assets/mhr_model.pt") # swap in the Embedl INT8 backbone (same I/O as the DINOv3 encoder; pipeline is bf16) qb = torch.export.load(pt2).module().to(dev) def backbone(self, x, *a, **k): return torch.cat([qb(x[i:i + 1].float()) for i in range(x.shape[0])], 0).to(x.dtype) model.backbone.forward = types.MethodType(backbone, model.backbone) est = SAM3DBodyEstimator(model, cfg) # no detector: pass a bbox h, w = cv2.imread(image).shape[:2] box = np.array([bbox if bbox else [0, 0, w, h]], dtype=np.float32) out = est.process_one_image(image, bboxes=box, use_mask=False)[0] return out["pred_vertices"], est.faces, out["pred_cam_t"], float(out["focal_length"]) def _shade(v, f): n = np.cross(v[f][:, 1] - v[f][:, 0], v[f][:, 2] - v[f][:, 0]) n /= (np.linalg.norm(n, axis=1, keepdims=True) + 1e-9) lam = np.clip(np.abs(n @ LIGHT), 0, 1)[:, None] return np.clip(0.25 + 0.75 * lam * SKIN, 0, 1) def _view(ax, V, F, deg, title): Vc = V - V.mean(0); th = np.radians(deg) R = np.array([[np.cos(th), 0, np.sin(th)], [0, 1, 0], [-np.sin(th), 0, np.cos(th)]]) Vr = Vc @ R.T; p = Vr[:, :2] * [1, -1]; o = np.argsort(Vr[F].mean(1)[:, 2]) ax.add_collection(PolyCollection(p[F][o], facecolors=_shade(Vr, F)[o], edgecolors="none")) ax.set_xlim(p[:, 0].min(), p[:, 0].max()); ax.set_ylim(p[:, 1].min(), p[:, 1].max()) ax.set_aspect("equal"); ax.axis("off"); ax.set_title(title, fontsize=11) def render(image, V, F, cam_t, focal, bbox, out): img = cv2.cvtColor(cv2.imread(image), cv2.COLOR_BGR2RGB) x1, y1, x2, y2 = bbox if bbox else [0, 0, img.shape[1], img.shape[0]] crop = cv2.resize(img[y1:y2, x1:x2], (512, 512)) fig, ax = plt.subplots(1, 4, figsize=(15, 6)); fig.patch.set_facecolor("white") ax[0].imshow(img); ax[0].axis("off"); ax[0].set_title("Input", fontsize=11) Vc = V + cam_t; z = np.clip(Vc[:, 2], 1e-3, None) p = np.stack([focal * Vc[:, 0] / z + 256, focal * Vc[:, 1] / z + 256], 1) o = np.argsort(-Vc[F].mean(1)[:, 2]) ax[1].imshow(crop) ax[1].add_collection(PolyCollection(p[F][o], facecolors=_shade(Vc, F)[o], edgecolors="none", alpha=0.8)) ax[1].set_xlim(0, 512); ax[1].set_ylim(512, 0); ax[1].axis("off"); ax[1].set_title("Mesh overlay", fontsize=11) _view(ax[2], V, F, 20, "¾ view"); _view(ax[3], V, F, 90, "side view") plt.tight_layout(); plt.savefig(out, dpi=160, bbox_inches="tight"); plt.close() frames = [] for a in range(0, 360, 15): fig, axx = plt.subplots(figsize=(4, 6)); fig.patch.set_facecolor("white"); _view(axx, V, F, a, "") fig.canvas.draw() frames.append(np.asarray(fig.canvas.buffer_rgba())[..., :3].copy()); plt.close() imageio.mimsave(out.rsplit(".", 1)[0] + "_spin.gif", frames, duration=0.1, loop=0) print(f"wrote {out} and {out.rsplit('.', 1)[0]}_spin.gif") if __name__ == "__main__": ap = argparse.ArgumentParser() ap.add_argument("--image", required=True) ap.add_argument("--ckpt-dir", required=True, help="dir with model.ckpt, model_config.yaml, assets/mhr_model.pt") ap.add_argument("--pt2", default="embedl_sam3dbody_int8.pt2") ap.add_argument("--bbox", type=int, nargs=4, default=None, metavar=("x1", "y1", "x2", "y2")) ap.add_argument("--out", default="mesh_demo.png") a = ap.parse_args() V, F, cam_t, focal = recover_mesh(a.image, a.ckpt_dir, a.pt2, a.bbox) print(f"recovered mesh: {V.shape[0]} vertices") render(a.image, V, F, cam_t, focal, a.bbox, a.out)