Spaces:

notaneimu
/

video-to-colmap-for-tttlrm

Running

App Files Files Community

notaneimu commited on Mar 11

Commit

66dec57

0 Parent(s):

Initial Space app

Browse files

Files changed (6) hide show

.gitignore +4 -0
README.md +39 -0
app.py +258 -0
packages.txt +2 -0
requirements.txt +4 -0
video_to_colmap.py +632 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,4 @@

+__pycache__/
+*.py[cod]
+.DS_Store
+outputs/

README.md ADDED Viewed

	@@ -0,0 +1,39 @@

+---
+title: Video to COLMAP for tttLRM
+emoji: 🎞️
+colorFrom: amber
+colorTo: slate
+sdk: gradio
+sdk_version: 6.2.0
+python_version: 3.12
+app_file: app.py
+pinned: false
+suggested_hardware: cpu-upgrade
+---
+# Video to COLMAP for tttLRM
+Standalone Hugging Face Space that converts an uploaded video into a raw COLMAP scene archive suitable for the `tttLRM` inference Space.
+## What it does
+- Normalizes uploaded video with `ffmpeg`
+- Samples candidate frames across the clip
+- Scores sharpness, scene cuts, and inter-frame motion
+- Chooses an ordered keyframe set with overlap appropriate for COLMAP
+- Runs CPU COLMAP (`feature_extractor`, `sequential_matcher`, `mapper`)
+- Packages a raw scene archive with:
+  - `images/`
+  - `sparse/0/`
+  - `report.json`
+## Recommended workflow
+1. Upload a short orbit or slow pan video of a single object or scene.
+2. Download the generated raw COLMAP `.zip`.
+3. Upload that archive into the companion `tttLRM` inference Space.
+## Notes
+- This Space is CPU-oriented because COLMAP runs on CPU in standard Hugging Face Spaces more reliably than GPU-specific builds.
+- Best results come from a stable single-shot orbit video with limited lighting changes and no jump cuts.

app.py ADDED Viewed

	@@ -0,0 +1,258 @@

+from __future__ import annotations
+import math
+from pathlib import Path
+from typing import Final
+import gradio as gr
+from video_to_colmap import ConversionOutputs, convert_video_to_colmap_archive
+APP_DIR: Final[Path] = Path(__file__).resolve().parent
+OUTPUTS_DIR: Final[Path] = APP_DIR / "outputs"
+OUTPUTS_DIR.mkdir(parents=True, exist_ok=True)
+gr.set_static_paths(paths=[str(OUTPUTS_DIR)])
+CSS: Final[str] = """
+html { scrollbar-gutter: stable; }
+body { overflow: auto; }
+.gradio-container {
+    max-width: none;
+    width: 100%;
+    margin: 0;
+    padding: 0.75rem 1rem 1rem;
+}
+#main-row {
+    gap: 1rem;
+    align-items: stretch;
+}
+#controls-panel {
+    display: flex;
+    flex-direction: column;
+    gap: 0.75rem;
+}
+#preview-panel {
+    min-height: 540px;
+}
+.preview-placeholder {
+    width: 100%;
+    min-height: 540px;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    border-radius: 14px;
+    background: linear-gradient(135deg, #111827 0%, #1f2937 100%);
+    border: 1px solid rgba(148, 163, 184, 0.2);
+    color: #e5e7eb;
+}
+.preview-inner {
+    max-width: 460px;
+    padding: 32px;
+    text-align: center;
+}
+.preview-title {
+    font-size: 20px;
+    font-weight: 600;
+    margin-bottom: 8px;
+}
+.preview-desc {
+    font-size: 14px;
+    line-height: 1.5;
+    opacity: 0.82;
+}
+#status-text {
+    font-size: 13px;
+    opacity: 0.92;
+}
+@media (max-width: 900px) {
+    #main-row {
+        flex-direction: column;
+    }
+    #preview-panel,
+    .preview-placeholder {
+        min-height: 420px;
+    }
+}
+"""
+def preview_placeholder_html(title: str, description: str) -> str:
+    return f"""
+<div class="preview-placeholder">
+  <div class="preview-inner">
+    <div class="preview-title">{title}</div>
+    <div class="preview-desc">{description}</div>
+  </div>
+</div>
+"""
+def start_generation() -> tuple[object, object, str]:
+    return (
+        gr.update(interactive=False, value="Converting..."),
+        gr.update(interactive=False),
+        preview_placeholder_html(
+            "Preparing Video for COLMAP",
+            "Normalizing the clip, selecting sharp overlapping keyframes, and running sparse reconstruction.",
+        ),
+    )
+def _status_text(outputs: ConversionOutputs) -> str:
+    coverage = 0.0
+    if outputs.selected_frames:
+        coverage = outputs.registered_frames / outputs.selected_frames
+    return (
+        f"Prepared **{outputs.scene_name}** from a **{outputs.duration_seconds:.1f}s** clip. "
+        f"Selected **{outputs.selected_frames}** keyframes, COLMAP registered **{outputs.registered_frames}**, "
+        f"and the reconstruction quality is **{outputs.quality_label}** "
+        f"({math.floor(coverage * 100)}% registration)."
+    )
+def run_conversion(
+    video_path: str | None,
+    target_frames: str,
+    sampling_profile: str,
+    max_edge: str,
+) -> tuple[object, object, object, str]:
+    if not video_path:
+        raise gr.Error("Upload a video first.")
+    try:
+        outputs = convert_video_to_colmap_archive(
+            video_path=video_path,
+            target_frames=int(target_frames),
+            profile_key=sampling_profile,
+            max_image_edge=int(max_edge),
+        )
+        return (
+            gr.update(value=str(outputs.archive_path), visible=True, interactive=True),
+            gr.update(value=str(outputs.report_path), visible=True, interactive=True),
+            gr.update(value=str(outputs.contact_sheet_path), visible=True),
+            _status_text(outputs),
+        )
+    except gr.Error:
+        raise
+    except Exception as exc:
+        raise gr.Error(f"Conversion failed: {type(exc).__name__}: {exc}") from exc
+def clear_all() -> tuple[None, object, object, object, str]:
+    return (
+        None,
+        gr.update(value=None, visible=False),
+        gr.update(value=None, visible=False),
+        gr.update(value=None, visible=False),
+        "",
+    )
+def on_video_change(video_path: str | None) -> tuple[object, object]:
+    has_video = bool(video_path)
+    return (
+        gr.update(interactive=has_video, value="Build COLMAP Archive"),
+        gr.update(interactive=has_video),
+    )
+def build_demo() -> gr.Blocks:
+    with gr.Blocks(
+        css=CSS,
+        title="Video to COLMAP for tttLRM",
+        theme=gr.themes.Origin(),
+    ) as demo:
+        gr.Markdown("## Video to COLMAP for tttLRM")
+        gr.Markdown(
+            "Upload a single video. The Space will pick sharp overlapping keyframes, run COLMAP, and export a raw scene archive ready for the `tttLRM` Space."
+        )
+        with gr.Row(elem_id="main-row", equal_height=True):
+            with gr.Column(scale=3, min_width=320, elem_id="controls-panel"):
+                video_in = gr.File(
+                    label="Input Video",
+                    type="filepath",
+                    file_types=[".mp4", ".mov", ".webm", ".mkv", ".avi"],
+                )
+                target_frames = gr.Dropdown(
+                    label="Target Keyframes",
+                    choices=["16", "24", "32", "48"],
+                    value="24",
+                )
+                sampling_profile = gr.Dropdown(
+                    label="Sampling Profile",
+                    choices=["balanced", "dense", "sparse"],
+                    value="balanced",
+                )
+                max_edge = gr.Dropdown(
+                    label="Max Frame Edge",
+                    choices=["960", "1280", "1600"],
+                    value="1280",
+                )
+                with gr.Row():
+                    generate_btn = gr.Button("Build COLMAP Archive", variant="primary", interactive=False)
+                    clear_btn = gr.Button("Clear", interactive=False)
+                archive_download = gr.File(label="Download Raw COLMAP Archive", visible=False)
+                report_download = gr.File(label="Download Reconstruction Report", visible=False)
+                status_text = gr.Markdown(elem_id="status-text")
+            with gr.Column(scale=7, min_width=520):
+                preview_html = gr.HTML(
+                    value=preview_placeholder_html(
+                        "Keyframe Selection Preview",
+                        "After conversion, the selected frames contact sheet will appear here so you can check overlap and viewpoint coverage.",
+                    ),
+                    elem_id="preview-panel",
+                )
+                contact_sheet = gr.Image(label="Selected Keyframes", visible=False, type="filepath")
+        video_in.change(
+            on_video_change,
+            inputs=[video_in],
+            outputs=[generate_btn, clear_btn],
+        )
+        generate_btn.click(
+            start_generation,
+            outputs=[generate_btn, clear_btn, preview_html],
+            queue=False,
+        ).then(
+            run_conversion,
+            inputs=[video_in, target_frames, sampling_profile, max_edge],
+            outputs=[archive_download, report_download, contact_sheet, status_text],
+        ).then(
+            lambda: (
+                gr.update(interactive=True, value="Build COLMAP Archive"),
+                gr.update(interactive=True),
+                preview_placeholder_html(
+                    "Keyframe Selection Complete",
+                    "Review the contact sheet below and download the raw COLMAP archive for the `tttLRM` Space.",
+                ),
+            ),
+            outputs=[generate_btn, clear_btn, preview_html],
+            queue=False,
+        )
+        clear_btn.click(
+            clear_all,
+            outputs=[video_in, archive_download, report_download, contact_sheet, status_text],
+            queue=False,
+        ).then(
+            lambda: (
+                gr.update(interactive=False),
+                gr.update(interactive=False),
+                preview_placeholder_html(
+                    "Keyframe Selection Preview",
+                    "After conversion, the selected frames contact sheet will appear here so you can check overlap and viewpoint coverage.",
+                ),
+            ),
+            outputs=[generate_btn, clear_btn, preview_html],
+            queue=False,
+        )
+    demo.queue(max_size=4)
+    return demo
+if __name__ == "__main__":
+    build_demo().launch()

packages.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ ffmpeg
2	+ colmap

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+gradio==6.2.0
+numpy==2.2.6
+opencv-python-headless==4.12.0.88
+Pillow==12.0.0

video_to_colmap.py ADDED Viewed

	@@ -0,0 +1,632 @@

+from __future__ import annotations
+import json
+import math
+import re
+import shutil
+import struct
+import subprocess
+import time
+import uuid
+import zipfile
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Final
+import cv2
+import numpy as np
+APP_DIR: Final[Path] = Path(__file__).resolve().parent
+WORK_DIR: Final[Path] = APP_DIR / "work"
+OUTPUTS_DIR: Final[Path] = APP_DIR / "outputs"
+THUMB_SIZE: Final[tuple[int, int]] = (96, 96)
+JPEG_QUALITY: Final[int] = 95
+FONT = cv2.FONT_HERSHEY_SIMPLEX
+WORK_DIR.mkdir(parents=True, exist_ok=True)
+OUTPUTS_DIR.mkdir(parents=True, exist_ok=True)
+@dataclass(frozen=True)
+class ProfileConfig:
+    candidate_multiplier: int
+    cut_threshold: float
+    min_blur_percentile: float
+    sequential_overlap: int
+    min_segment_frames: int
+PROFILES: Final[dict[str, ProfileConfig]] = {
+    "balanced": ProfileConfig(
+        candidate_multiplier=6,
+        cut_threshold=0.42,
+        min_blur_percentile=35.0,
+        sequential_overlap=8,
+        min_segment_frames=14,
+    ),
+    "dense": ProfileConfig(
+        candidate_multiplier=8,
+        cut_threshold=0.38,
+        min_blur_percentile=30.0,
+        sequential_overlap=12,
+        min_segment_frames=18,
+    ),
+    "sparse": ProfileConfig(
+        candidate_multiplier=5,
+        cut_threshold=0.48,
+        min_blur_percentile=40.0,
+        sequential_overlap=6,
+        min_segment_frames=12,
+    ),
+}
+@dataclass(frozen=True)
+class VideoMetadata:
+    fps: float
+    frame_count: int
+    duration_seconds: float
+    width: int
+    height: int
+@dataclass(frozen=True)
+class FrameCandidate:
+    candidate_index: int
+    frame_index: int
+    timestamp_seconds: float
+    path: Path
+    blur_score: float
+    motion_score: float
+    cut_score: float
+    thumb: np.ndarray
+@dataclass(frozen=True)
+class ConversionOutputs:
+    archive_path: Path
+    report_path: Path
+    contact_sheet_path: Path
+    scene_name: str
+    selected_frames: int
+    registered_frames: int
+    duration_seconds: float
+    quality_label: str
+def _now_ms() -> int:
+    return int(time.time() * 1000)
+def _ensure_dir(path: Path) -> Path:
+    path.mkdir(parents=True, exist_ok=True)
+    return path
+def _unique_dir(parent: Path, prefix: str) -> Path:
+    path = parent / f"{prefix}-{_now_ms()}-{uuid.uuid4().hex[:8]}"
+    path.mkdir(parents=True, exist_ok=True)
+    return path
+def _slugify(value: str) -> str:
+    slug = re.sub(r"[^a-zA-Z0-9]+", "-", value).strip("-").lower()
+    return slug or "scene"
+def _run(cmd: list[str], cwd: Path | None = None) -> None:
+    result = subprocess.run(
+        cmd,
+        cwd=str(cwd) if cwd else None,
+        text=True,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        check=False,
+    )
+    if result.returncode != 0:
+        raise RuntimeError(
+            f"Command failed ({result.returncode}): {' '.join(cmd)}\n{result.stdout.strip()}"
+        )
+def _require_binary(binary_name: str) -> None:
+    if shutil.which(binary_name) is None:
+        raise RuntimeError(f"Required executable not found: {binary_name}")
+def normalize_video_input(video_path: Path, work_dir: Path) -> Path:
+    _require_binary("ffmpeg")
+    normalized_path = work_dir / "normalized.mp4"
+    _run(
+        [
+            "ffmpeg",
+            "-y",
+            "-i",
+            str(video_path),
+            "-an",
+            "-movflags",
+            "+faststart",
+            "-pix_fmt",
+            "yuv420p",
+            "-c:v",
+            "libx264",
+            str(normalized_path),
+        ],
+        cwd=work_dir,
+    )
+    return normalized_path
+def read_video_metadata(video_path: Path) -> VideoMetadata:
+    capture = cv2.VideoCapture(str(video_path))
+    if not capture.isOpened():
+        raise RuntimeError(f"Failed to open video: {video_path}")
+    fps = float(capture.get(cv2.CAP_PROP_FPS) or 0.0)
+    frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT) or 0)
+    width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH) or 0)
+    height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT) or 0)
+    capture.release()
+    if frame_count <= 0 or width <= 0 or height <= 0:
+        raise RuntimeError("Video metadata could not be read from the uploaded file.")
+    if fps <= 0:
+        fps = 24.0
+    return VideoMetadata(
+        fps=fps,
+        frame_count=frame_count,
+        duration_seconds=frame_count / fps,
+        width=width,
+        height=height,
+    )
+def _resize_max_edge(frame: np.ndarray, max_edge: int) -> np.ndarray:
+    height, width = frame.shape[:2]
+    current_max = max(height, width)
+    if current_max <= max_edge:
+        return frame
+    scale = max_edge / current_max
+    new_size = (max(2, int(round(width * scale))), max(2, int(round(height * scale))))
+    return cv2.resize(frame, new_size, interpolation=cv2.INTER_AREA)
+def _compute_histogram(frame: np.ndarray) -> np.ndarray:
+    hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
+    hist = cv2.calcHist([hsv], [0, 1], None, [16, 16], [0, 180, 0, 256])
+    cv2.normalize(hist, hist)
+    return hist
+def _compute_thumb(gray_frame: np.ndarray) -> np.ndarray:
+    thumb = cv2.resize(gray_frame, THUMB_SIZE, interpolation=cv2.INTER_AREA)
+    return thumb.astype(np.float32) / 255.0
+def extract_candidates(
+    video_path: Path,
+    metadata: VideoMetadata,
+    candidates_dir: Path,
+    target_frames: int,
+    max_image_edge: int,
+    profile: ProfileConfig,
+) -> list[FrameCandidate]:
+    desired_candidates = min(max(target_frames * profile.candidate_multiplier, target_frames + 8), 240)
+    stride = max(1, metadata.frame_count // desired_candidates)
+    capture = cv2.VideoCapture(str(video_path))
+    if not capture.isOpened():
+        raise RuntimeError(f"Failed to open video for frame extraction: {video_path}")
+    candidates: list[FrameCandidate] = []
+    frame_index = 0
+    candidate_index = 0
+    previous_hist: np.ndarray | None = None
+    previous_thumb: np.ndarray | None = None
+    while True:
+        ok, frame = capture.read()
+        if not ok:
+            break
+        if frame_index % stride != 0:
+            frame_index += 1
+            continue
+        frame = _resize_max_edge(frame, max_image_edge)
+        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+        blur_score = float(cv2.Laplacian(gray, cv2.CV_32F).var())
+        thumb = _compute_thumb(gray)
+        hist = _compute_histogram(frame)
+        motion_score = float(np.mean(np.abs(thumb - previous_thumb))) if previous_thumb is not None else 0.0
+        cut_score = (
+            float(cv2.compareHist(previous_hist, hist, cv2.HISTCMP_BHATTACHARYYA))
+            if previous_hist is not None
+            else 0.0
+        )
+        output_path = candidates_dir / f"candidate_{candidate_index:04d}.jpg"
+        cv2.imwrite(str(output_path), frame, [int(cv2.IMWRITE_JPEG_QUALITY), JPEG_QUALITY])
+        candidates.append(
+            FrameCandidate(
+                candidate_index=candidate_index,
+                frame_index=frame_index,
+                timestamp_seconds=frame_index / metadata.fps,
+                path=output_path,
+                blur_score=blur_score,
+                motion_score=motion_score,
+                cut_score=cut_score,
+                thumb=thumb,
+            )
+        )
+        previous_hist = hist
+        previous_thumb = thumb
+        candidate_index += 1
+        frame_index += 1
+    capture.release()
+    if len(candidates) < max(8, target_frames // 2):
+        raise RuntimeError(
+            f"Video yielded only {len(candidates)} usable candidates; upload a longer or slower video."
+        )
+    return candidates
+def segment_candidates(candidates: list[FrameCandidate], profile: ProfileConfig) -> list[list[FrameCandidate]]:
+    if not candidates:
+        return []
+    segments: list[list[FrameCandidate]] = []
+    start = 0
+    for index in range(1, len(candidates)):
+        if candidates[index].cut_score >= profile.cut_threshold:
+            segments.append(candidates[start:index])
+            start = index
+    segments.append(candidates[start:])
+    return [segment for segment in segments if segment]
+def choose_best_segment(
+    segments: list[list[FrameCandidate]],
+    target_frames: int,
+    profile: ProfileConfig,
+) -> list[FrameCandidate]:
+    if not segments:
+        raise RuntimeError("No coherent video segment was found for reconstruction.")
+    scored_segments: list[tuple[float, list[FrameCandidate]]] = []
+    for segment in segments:
+        duration = segment[-1].timestamp_seconds - segment[0].timestamp_seconds if len(segment) > 1 else 0.0
+        median_blur = float(np.median([candidate.blur_score for candidate in segment]))
+        coverage_bonus = min(len(segment) / max(target_frames, 1), 1.5)
+        segment_penalty = 0.0 if len(segment) >= profile.min_segment_frames else 0.6
+        score = (duration + len(segment) * 0.12) * coverage_bonus * math.log1p(max(median_blur, 1.0)) - segment_penalty
+        scored_segments.append((score, segment))
+    scored_segments.sort(key=lambda item: item[0], reverse=True)
+    return scored_segments[0][1]
+def select_keyframes(
+    segment: list[FrameCandidate],
+    target_frames: int,
+    profile: ProfileConfig,
+) -> list[FrameCandidate]:
+    if len(segment) <= target_frames:
+        return segment
+    blur_scores = np.array([candidate.blur_score for candidate in segment], dtype=np.float32)
+    blur_threshold = float(np.percentile(blur_scores, profile.min_blur_percentile))
+    normalized_blur = blur_scores / max(float(blur_scores.max()), 1e-6)
+    motion = np.array([0.0] + [max(candidate.motion_score, 1e-6) for candidate in segment[1:]], dtype=np.float32)
+    cumulative_motion = np.cumsum(motion)
+    selected_indices: list[int] = []
+    neighborhood = max(2, len(segment) // max(target_frames * 2, 1))
+    if float(cumulative_motion[-1]) <= 1e-5:
+        marks = np.linspace(0, len(segment) - 1, target_frames)
+        mark_distances = np.arange(len(segment), dtype=np.float32)
+    else:
+        marks = np.linspace(float(cumulative_motion[0]), float(cumulative_motion[-1]), target_frames)
+        mark_distances = cumulative_motion
+    for mark in marks:
+        center = int(np.searchsorted(mark_distances, mark))
+        best_index: int | None = None
+        best_score = float("inf")
+        min_allowed = selected_indices[-1] + 1 if selected_indices else 0
+        lower = max(min_allowed, center - neighborhood)
+        upper = min(len(segment), center + neighborhood + 1)
+        search_ranges = [(lower, upper), (min_allowed, len(segment))]
+        for range_start, range_end in search_ranges:
+            for idx in range(range_start, range_end):
+                candidate = segment[idx]
+                mark_penalty = abs(float(mark_distances[idx]) - float(mark))
+                blur_penalty = 0.25 if candidate.blur_score < blur_threshold else 0.0
+                spacing_penalty = 0.15 if selected_indices and idx - selected_indices[-1] < 2 else 0.0
+                sharpness_bonus = 0.08 * float(normalized_blur[idx])
+                score = mark_penalty + blur_penalty + spacing_penalty - sharpness_bonus
+                if score < best_score:
+                    best_score = score
+                    best_index = idx
+            if best_index is not None:
+                break
+        if best_index is not None and (not selected_indices or best_index > selected_indices[-1]):
+            selected_indices.append(best_index)
+    selected_indices = sorted(set(selected_indices))
+    if len(selected_indices) < target_frames:
+        remaining = [idx for idx in range(len(segment)) if idx not in selected_indices]
+        remaining.sort(
+            key=lambda idx: (
+                -segment[idx].blur_score,
+                -(min(abs(idx - chosen) for chosen in selected_indices) if selected_indices else float("inf")),
+            )
+        )
+        for idx in remaining:
+            if len(selected_indices) >= target_frames:
+                break
+            selected_indices.append(idx)
+        selected_indices.sort()
+    trimmed = selected_indices[:target_frames]
+    return [segment[idx] for idx in trimmed]
+def export_selected_images(scene_dir: Path, selected_frames: list[FrameCandidate]) -> list[Path]:
+    images_dir = _ensure_dir(scene_dir / "images")
+    exported: list[Path] = []
+    for index, candidate in enumerate(selected_frames):
+        destination = images_dir / f"frame_{index:04d}.jpg"
+        shutil.copy2(candidate.path, destination)
+        exported.append(destination)
+    return exported
+def run_colmap(scene_dir: Path, selected_count: int, profile: ProfileConfig, max_image_edge: int) -> Path:
+    _require_binary("colmap")
+    database_path = scene_dir / "database.db"
+    images_dir = scene_dir / "images"
+    sparse_dir = _ensure_dir(scene_dir / "sparse")
+    _run(
+        [
+            "colmap",
+            "feature_extractor",
+            "--database_path",
+            str(database_path),
+            "--image_path",
+            str(images_dir),
+            "--ImageReader.single_camera",
+            "1",
+            "--ImageReader.camera_model",
+            "SIMPLE_RADIAL",
+            "--SiftExtraction.use_gpu",
+            "0",
+            "--SiftExtraction.max_image_size",
+            str(max_image_edge),
+        ],
+        cwd=scene_dir,
+    )
+    _run(
+        [
+            "colmap",
+            "sequential_matcher",
+            "--database_path",
+            str(database_path),
+            "--SiftMatching.use_gpu",
+            "0",
+            "--SequentialMatching.overlap",
+            str(min(profile.sequential_overlap, max(selected_count - 1, 1))),
+            "--SequentialMatching.quadratic_overlap",
+            "1",
+            "--SequentialMatching.loop_detection",
+            "0",
+        ],
+        cwd=scene_dir,
+    )
+    _run(
+        [
+            "colmap",
+            "mapper",
+            "--database_path",
+            str(database_path),
+            "--image_path",
+            str(images_dir),
+            "--output_path",
+            str(sparse_dir),
+            "--Mapper.multiple_models",
+            "0",
+            "--Mapper.extract_colors",
+            "0",
+            "--Mapper.min_model_size",
+            str(min(8, max(selected_count // 3, 4))),
+        ],
+        cwd=scene_dir,
+    )
+    model_dirs = sorted(path for path in sparse_dir.iterdir() if path.is_dir())
+    if not model_dirs:
+        raise RuntimeError("COLMAP did not produce a sparse reconstruction.")
+    return model_dirs[0]
+def count_registered_images(model_dir: Path) -> int:
+    image_bin = model_dir / "images.bin"
+    image_txt = model_dir / "images.txt"
+    if image_bin.exists():
+        with image_bin.open("rb") as handle:
+            header = handle.read(8)
+        return int(struct.unpack("<Q", header)[0]) if header else 0
+    if image_txt.exists():
+        lines = [line.strip() for line in image_txt.read_text(encoding="utf-8").splitlines()]
+        payload = [line for line in lines if line and not line.startswith("#")]
+        return len(payload) // 2
+    return 0
+def quality_label(registered_frames: int, selected_frames: int) -> str:
+    if selected_frames <= 0:
+        return "unknown"
+    ratio = registered_frames / selected_frames
+    if ratio >= 0.85:
+        return "strong"
+    if ratio >= 0.6:
+        return "usable"
+    return "weak"
+def create_contact_sheet(selected_frames: list[FrameCandidate], output_path: Path) -> Path:
+    if not selected_frames:
+        raise RuntimeError("No selected frames were available for the contact sheet.")
+    thumbs: list[np.ndarray] = []
+    for candidate in selected_frames:
+        image = cv2.imread(str(candidate.path), cv2.IMREAD_COLOR)
+        if image is None:
+            continue
+        image = _resize_max_edge(image, 320)
+        overlay = image.copy()
+        label = f"{candidate.timestamp_seconds:0.2f}s | blur {candidate.blur_score:0.0f}"
+        cv2.rectangle(overlay, (0, 0), (image.shape[1], 32), (12, 18, 28), -1)
+        image = cv2.addWeighted(overlay, 0.72, image, 0.28, 0.0)
+        cv2.putText(image, label, (10, 22), FONT, 0.55, (230, 235, 240), 1, cv2.LINE_AA)
+        thumbs.append(image)
+    cols = min(4, len(thumbs))
+    rows = int(math.ceil(len(thumbs) / cols))
+    cell_height = max(image.shape[0] for image in thumbs)
+    cell_width = max(image.shape[1] for image in thumbs)
+    canvas = np.full((rows * cell_height, cols * cell_width, 3), 18, dtype=np.uint8)
+    for index, image in enumerate(thumbs):
+        row = index // cols
+        col = index % cols
+        y = row * cell_height
+        x = col * cell_width
+        canvas[y : y + image.shape[0], x : x + image.shape[1]] = image
+    cv2.imwrite(str(output_path), canvas, [int(cv2.IMWRITE_JPEG_QUALITY), 92])
+    return output_path
+def write_report(
+    scene_dir: Path,
+    metadata: VideoMetadata,
+    selected_frames: list[FrameCandidate],
+    registered_frames: int,
+    profile_key: str,
+    max_image_edge: int,
+) -> Path:
+    report = {
+        "scene_name": scene_dir.name,
+        "video": {
+            "fps": metadata.fps,
+            "frame_count": metadata.frame_count,
+            "duration_seconds": metadata.duration_seconds,
+            "width": metadata.width,
+            "height": metadata.height,
+        },
+        "selection": {
+            "profile": profile_key,
+            "max_image_edge": max_image_edge,
+            "selected_frames": len(selected_frames),
+            "registered_frames": registered_frames,
+            "quality_label": quality_label(registered_frames, len(selected_frames)),
+        },
+        "frames": [
+            {
+                "filename": f"images/frame_{index:04d}.jpg",
+                "timestamp_seconds": candidate.timestamp_seconds,
+                "source_frame_index": candidate.frame_index,
+                "blur_score": candidate.blur_score,
+                "motion_score": candidate.motion_score,
+                "cut_score": candidate.cut_score,
+            }
+            for index, candidate in enumerate(selected_frames)
+        ],
+    }
+    report_path = scene_dir / "report.json"
+    report_path.write_text(json.dumps(report, indent=2), encoding="utf-8")
+    return report_path
+def build_archive(scene_dir: Path, output_archive: Path) -> Path:
+    package_dir = _unique_dir(WORK_DIR, "package")
+    scene_package = _ensure_dir(package_dir / scene_dir.name)
+    shutil.copytree(scene_dir / "images", scene_package / "images")
+    shutil.copytree(scene_dir / "sparse", scene_package / "sparse")
+    report_path = scene_dir / "report.json"
+    if report_path.exists():
+        shutil.copy2(report_path, scene_package / "report.json")
+    with zipfile.ZipFile(output_archive, "w", compression=zipfile.ZIP_DEFLATED) as archive:
+        for path in sorted(scene_package.rglob("*")):
+            if path.is_file():
+                archive.write(path, path.relative_to(package_dir))
+    return output_archive
+def convert_video_to_colmap_archive(
+    video_path: str | Path,
+    target_frames: int,
+    profile_key: str,
+    max_image_edge: int,
+) -> ConversionOutputs:
+    if profile_key not in PROFILES:
+        raise ValueError(f"Unknown sampling profile: {profile_key}")
+    source_path = Path(video_path)
+    if not source_path.exists():
+        raise FileNotFoundError(f"Input video not found: {source_path}")
+    job_dir = _unique_dir(WORK_DIR, "video-job")
+    normalized_path = normalize_video_input(source_path, job_dir)
+    metadata = read_video_metadata(normalized_path)
+    profile = PROFILES[profile_key]
+    candidates_dir = _ensure_dir(job_dir / "candidates")
+    candidates = extract_candidates(
+        video_path=normalized_path,
+        metadata=metadata,
+        candidates_dir=candidates_dir,
+        target_frames=target_frames,
+        max_image_edge=max_image_edge,
+        profile=profile,
+    )
+    segment = choose_best_segment(segment_candidates(candidates, profile), target_frames, profile)
+    selected = select_keyframes(segment, target_frames, profile)
+    scene_name = f"{_slugify(source_path.stem)}-{_now_ms()}"
+    scene_dir = _ensure_dir(job_dir / scene_name)
+    export_selected_images(scene_dir, selected)
+    model_dir = run_colmap(scene_dir, len(selected), profile, max_image_edge)
+    registered_frames = count_registered_images(model_dir)
+    report_path = write_report(scene_dir, metadata, selected, registered_frames, profile_key, max_image_edge)
+    output_stem = f"{scene_name}-{profile_key}-{len(selected)}"
+    contact_sheet_path = create_contact_sheet(selected, OUTPUTS_DIR / f"{output_stem}.jpg")
+    archive_path = build_archive(scene_dir, OUTPUTS_DIR / f"{output_stem}.zip")
+    output_report_path = OUTPUTS_DIR / f"{output_stem}.report.json"
+    shutil.copy2(report_path, output_report_path)
+    return ConversionOutputs(
+        archive_path=archive_path,
+        report_path=output_report_path,
+        contact_sheet_path=contact_sheet_path,
+        scene_name=scene_name,
+        selected_frames=len(selected),
+        registered_frames=registered_frames,
+        duration_seconds=metadata.duration_seconds,
+        quality_label=quality_label(registered_frames, len(selected)),
+    )