#!/usr/bin/env python3
"""End-to-end 3D human-mesh demo using the Embedl INT8 backbone.

Our quantized DINOv3 backbone (this repo) provides the image features; the
upstream SAM-3D-Body decoder + MHR mesh head turn them into a 3D body mesh.
This script runs the full pipeline and renders the result with matplotlib
(no OpenGL needed).

Prerequisites
-------------
    # 1. upstream pipeline (you must have accepted the gated upstream license)
    git clone https://github.com/facebookresearch/sam-3d-body
    pip install -e sam-3d-body              # + its deps (see its INSTALL.md)
    pip install torch matplotlib pillow numpy imageio huggingface_hub
    # 2. gated checkpoint (facebook/sam-3d-body-dinov3): model.ckpt, model_config.yaml,
    #    assets/mhr_model.pt   ->  download with `hf download` after accepting the license
    # 3. this repo's backbone:  embedl_sam3dbody_int8.pt2

Run
---
    python demo_3d.py --image person.jpg --ckpt-dir ./sam3d_ckpt \
        --pt2 embedl_sam3dbody_int8.pt2 --bbox 180 210 700 950 --out mesh_demo.png
"""
import argparse, types, numpy as np, cv2, torch
import matplotlib; matplotlib.use("Agg")
import matplotlib.pyplot as plt
from matplotlib.collections import PolyCollection
import imageio.v2 as imageio
from sam_3d_body import load_sam_3d_body, SAM3DBodyEstimator   # upstream repo

LIGHT = np.array([0.3, 0.5, 1.0]); LIGHT /= np.linalg.norm(LIGHT)
SKIN = np.array([0.80, 0.78, 0.72])


def recover_mesh(image, ckpt_dir, pt2, bbox):
    dev = "cuda" if torch.cuda.is_available() else "cpu"
    model, cfg = load_sam_3d_body(f"{ckpt_dir}/model.ckpt", device=dev,
                                  mhr_path=f"{ckpt_dir}/assets/mhr_model.pt")
    # swap in the Embedl INT8 backbone (same I/O as the DINOv3 encoder; pipeline is bf16)
    qb = torch.export.load(pt2).module().to(dev)
    def backbone(self, x, *a, **k):
        return torch.cat([qb(x[i:i + 1].float()) for i in range(x.shape[0])], 0).to(x.dtype)
    model.backbone.forward = types.MethodType(backbone, model.backbone)

    est = SAM3DBodyEstimator(model, cfg)                       # no detector: pass a bbox
    h, w = cv2.imread(image).shape[:2]
    box = np.array([bbox if bbox else [0, 0, w, h]], dtype=np.float32)
    out = est.process_one_image(image, bboxes=box, use_mask=False)[0]
    return out["pred_vertices"], est.faces, out["pred_cam_t"], float(out["focal_length"])


def _shade(v, f):
    n = np.cross(v[f][:, 1] - v[f][:, 0], v[f][:, 2] - v[f][:, 0])
    n /= (np.linalg.norm(n, axis=1, keepdims=True) + 1e-9)
    lam = np.clip(np.abs(n @ LIGHT), 0, 1)[:, None]
    return np.clip(0.25 + 0.75 * lam * SKIN, 0, 1)


def _view(ax, V, F, deg, title):
    Vc = V - V.mean(0); th = np.radians(deg)
    R = np.array([[np.cos(th), 0, np.sin(th)], [0, 1, 0], [-np.sin(th), 0, np.cos(th)]])
    Vr = Vc @ R.T; p = Vr[:, :2] * [1, -1]; o = np.argsort(Vr[F].mean(1)[:, 2])
    ax.add_collection(PolyCollection(p[F][o], facecolors=_shade(Vr, F)[o], edgecolors="none"))
    ax.set_xlim(p[:, 0].min(), p[:, 0].max()); ax.set_ylim(p[:, 1].min(), p[:, 1].max())
    ax.set_aspect("equal"); ax.axis("off"); ax.set_title(title, fontsize=11)


def render(image, V, F, cam_t, focal, bbox, out):
    img = cv2.cvtColor(cv2.imread(image), cv2.COLOR_BGR2RGB)
    x1, y1, x2, y2 = bbox if bbox else [0, 0, img.shape[1], img.shape[0]]
    crop = cv2.resize(img[y1:y2, x1:x2], (512, 512))
    fig, ax = plt.subplots(1, 4, figsize=(15, 6)); fig.patch.set_facecolor("white")
    ax[0].imshow(img); ax[0].axis("off"); ax[0].set_title("Input", fontsize=11)
    Vc = V + cam_t; z = np.clip(Vc[:, 2], 1e-3, None)
    p = np.stack([focal * Vc[:, 0] / z + 256, focal * Vc[:, 1] / z + 256], 1)
    o = np.argsort(-Vc[F].mean(1)[:, 2])
    ax[1].imshow(crop)
    ax[1].add_collection(PolyCollection(p[F][o], facecolors=_shade(Vc, F)[o], edgecolors="none", alpha=0.8))
    ax[1].set_xlim(0, 512); ax[1].set_ylim(512, 0); ax[1].axis("off"); ax[1].set_title("Mesh overlay", fontsize=11)
    _view(ax[2], V, F, 20, "¾ view"); _view(ax[3], V, F, 90, "side view")
    plt.tight_layout(); plt.savefig(out, dpi=160, bbox_inches="tight"); plt.close()
    frames = []
    for a in range(0, 360, 15):
        fig, axx = plt.subplots(figsize=(4, 6)); fig.patch.set_facecolor("white"); _view(axx, V, F, a, "")
        fig.canvas.draw()
        frames.append(np.asarray(fig.canvas.buffer_rgba())[..., :3].copy()); plt.close()
    imageio.mimsave(out.rsplit(".", 1)[0] + "_spin.gif", frames, duration=0.1, loop=0)
    print(f"wrote {out} and {out.rsplit('.', 1)[0]}_spin.gif")


if __name__ == "__main__":
    ap = argparse.ArgumentParser()
    ap.add_argument("--image", required=True)
    ap.add_argument("--ckpt-dir", required=True, help="dir with model.ckpt, model_config.yaml, assets/mhr_model.pt")
    ap.add_argument("--pt2", default="embedl_sam3dbody_int8.pt2")
    ap.add_argument("--bbox", type=int, nargs=4, default=None, metavar=("x1", "y1", "x2", "y2"))
    ap.add_argument("--out", default="mesh_demo.png")
    a = ap.parse_args()
    V, F, cam_t, focal = recover_mesh(a.image, a.ckpt_dir, a.pt2, a.bbox)
    print(f"recovered mesh: {V.shape[0]} vertices")
    render(a.image, V, F, cam_t, focal, a.bbox, a.out)