Commit ·
66dec57
0
Parent(s):
Initial Space app
Browse files- .gitignore +4 -0
- README.md +39 -0
- app.py +258 -0
- packages.txt +2 -0
- requirements.txt +4 -0
- video_to_colmap.py +632 -0
.gitignore
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__pycache__/
|
| 2 |
+
*.py[cod]
|
| 3 |
+
.DS_Store
|
| 4 |
+
outputs/
|
README.md
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Video to COLMAP for tttLRM
|
| 3 |
+
emoji: 🎞️
|
| 4 |
+
colorFrom: amber
|
| 5 |
+
colorTo: slate
|
| 6 |
+
sdk: gradio
|
| 7 |
+
sdk_version: 6.2.0
|
| 8 |
+
python_version: 3.12
|
| 9 |
+
app_file: app.py
|
| 10 |
+
pinned: false
|
| 11 |
+
suggested_hardware: cpu-upgrade
|
| 12 |
+
---
|
| 13 |
+
|
| 14 |
+
# Video to COLMAP for tttLRM
|
| 15 |
+
|
| 16 |
+
Standalone Hugging Face Space that converts an uploaded video into a raw COLMAP scene archive suitable for the `tttLRM` inference Space.
|
| 17 |
+
|
| 18 |
+
## What it does
|
| 19 |
+
|
| 20 |
+
- Normalizes uploaded video with `ffmpeg`
|
| 21 |
+
- Samples candidate frames across the clip
|
| 22 |
+
- Scores sharpness, scene cuts, and inter-frame motion
|
| 23 |
+
- Chooses an ordered keyframe set with overlap appropriate for COLMAP
|
| 24 |
+
- Runs CPU COLMAP (`feature_extractor`, `sequential_matcher`, `mapper`)
|
| 25 |
+
- Packages a raw scene archive with:
|
| 26 |
+
- `images/`
|
| 27 |
+
- `sparse/0/`
|
| 28 |
+
- `report.json`
|
| 29 |
+
|
| 30 |
+
## Recommended workflow
|
| 31 |
+
|
| 32 |
+
1. Upload a short orbit or slow pan video of a single object or scene.
|
| 33 |
+
2. Download the generated raw COLMAP `.zip`.
|
| 34 |
+
3. Upload that archive into the companion `tttLRM` inference Space.
|
| 35 |
+
|
| 36 |
+
## Notes
|
| 37 |
+
|
| 38 |
+
- This Space is CPU-oriented because COLMAP runs on CPU in standard Hugging Face Spaces more reliably than GPU-specific builds.
|
| 39 |
+
- Best results come from a stable single-shot orbit video with limited lighting changes and no jump cuts.
|
app.py
ADDED
|
@@ -0,0 +1,258 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import math
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
from typing import Final
|
| 6 |
+
|
| 7 |
+
import gradio as gr
|
| 8 |
+
|
| 9 |
+
from video_to_colmap import ConversionOutputs, convert_video_to_colmap_archive
|
| 10 |
+
|
| 11 |
+
APP_DIR: Final[Path] = Path(__file__).resolve().parent
|
| 12 |
+
OUTPUTS_DIR: Final[Path] = APP_DIR / "outputs"
|
| 13 |
+
|
| 14 |
+
OUTPUTS_DIR.mkdir(parents=True, exist_ok=True)
|
| 15 |
+
gr.set_static_paths(paths=[str(OUTPUTS_DIR)])
|
| 16 |
+
|
| 17 |
+
CSS: Final[str] = """
|
| 18 |
+
html { scrollbar-gutter: stable; }
|
| 19 |
+
body { overflow: auto; }
|
| 20 |
+
.gradio-container {
|
| 21 |
+
max-width: none;
|
| 22 |
+
width: 100%;
|
| 23 |
+
margin: 0;
|
| 24 |
+
padding: 0.75rem 1rem 1rem;
|
| 25 |
+
}
|
| 26 |
+
#main-row {
|
| 27 |
+
gap: 1rem;
|
| 28 |
+
align-items: stretch;
|
| 29 |
+
}
|
| 30 |
+
#controls-panel {
|
| 31 |
+
display: flex;
|
| 32 |
+
flex-direction: column;
|
| 33 |
+
gap: 0.75rem;
|
| 34 |
+
}
|
| 35 |
+
#preview-panel {
|
| 36 |
+
min-height: 540px;
|
| 37 |
+
}
|
| 38 |
+
.preview-placeholder {
|
| 39 |
+
width: 100%;
|
| 40 |
+
min-height: 540px;
|
| 41 |
+
display: flex;
|
| 42 |
+
align-items: center;
|
| 43 |
+
justify-content: center;
|
| 44 |
+
border-radius: 14px;
|
| 45 |
+
background: linear-gradient(135deg, #111827 0%, #1f2937 100%);
|
| 46 |
+
border: 1px solid rgba(148, 163, 184, 0.2);
|
| 47 |
+
color: #e5e7eb;
|
| 48 |
+
}
|
| 49 |
+
.preview-inner {
|
| 50 |
+
max-width: 460px;
|
| 51 |
+
padding: 32px;
|
| 52 |
+
text-align: center;
|
| 53 |
+
}
|
| 54 |
+
.preview-title {
|
| 55 |
+
font-size: 20px;
|
| 56 |
+
font-weight: 600;
|
| 57 |
+
margin-bottom: 8px;
|
| 58 |
+
}
|
| 59 |
+
.preview-desc {
|
| 60 |
+
font-size: 14px;
|
| 61 |
+
line-height: 1.5;
|
| 62 |
+
opacity: 0.82;
|
| 63 |
+
}
|
| 64 |
+
#status-text {
|
| 65 |
+
font-size: 13px;
|
| 66 |
+
opacity: 0.92;
|
| 67 |
+
}
|
| 68 |
+
@media (max-width: 900px) {
|
| 69 |
+
#main-row {
|
| 70 |
+
flex-direction: column;
|
| 71 |
+
}
|
| 72 |
+
#preview-panel,
|
| 73 |
+
.preview-placeholder {
|
| 74 |
+
min-height: 420px;
|
| 75 |
+
}
|
| 76 |
+
}
|
| 77 |
+
"""
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
def preview_placeholder_html(title: str, description: str) -> str:
|
| 81 |
+
return f"""
|
| 82 |
+
<div class="preview-placeholder">
|
| 83 |
+
<div class="preview-inner">
|
| 84 |
+
<div class="preview-title">{title}</div>
|
| 85 |
+
<div class="preview-desc">{description}</div>
|
| 86 |
+
</div>
|
| 87 |
+
</div>
|
| 88 |
+
"""
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
def start_generation() -> tuple[object, object, str]:
|
| 92 |
+
return (
|
| 93 |
+
gr.update(interactive=False, value="Converting..."),
|
| 94 |
+
gr.update(interactive=False),
|
| 95 |
+
preview_placeholder_html(
|
| 96 |
+
"Preparing Video for COLMAP",
|
| 97 |
+
"Normalizing the clip, selecting sharp overlapping keyframes, and running sparse reconstruction.",
|
| 98 |
+
),
|
| 99 |
+
)
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
def _status_text(outputs: ConversionOutputs) -> str:
|
| 103 |
+
coverage = 0.0
|
| 104 |
+
if outputs.selected_frames:
|
| 105 |
+
coverage = outputs.registered_frames / outputs.selected_frames
|
| 106 |
+
|
| 107 |
+
return (
|
| 108 |
+
f"Prepared **{outputs.scene_name}** from a **{outputs.duration_seconds:.1f}s** clip. "
|
| 109 |
+
f"Selected **{outputs.selected_frames}** keyframes, COLMAP registered **{outputs.registered_frames}**, "
|
| 110 |
+
f"and the reconstruction quality is **{outputs.quality_label}** "
|
| 111 |
+
f"({math.floor(coverage * 100)}% registration)."
|
| 112 |
+
)
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
def run_conversion(
|
| 116 |
+
video_path: str | None,
|
| 117 |
+
target_frames: str,
|
| 118 |
+
sampling_profile: str,
|
| 119 |
+
max_edge: str,
|
| 120 |
+
) -> tuple[object, object, object, str]:
|
| 121 |
+
if not video_path:
|
| 122 |
+
raise gr.Error("Upload a video first.")
|
| 123 |
+
|
| 124 |
+
try:
|
| 125 |
+
outputs = convert_video_to_colmap_archive(
|
| 126 |
+
video_path=video_path,
|
| 127 |
+
target_frames=int(target_frames),
|
| 128 |
+
profile_key=sampling_profile,
|
| 129 |
+
max_image_edge=int(max_edge),
|
| 130 |
+
)
|
| 131 |
+
return (
|
| 132 |
+
gr.update(value=str(outputs.archive_path), visible=True, interactive=True),
|
| 133 |
+
gr.update(value=str(outputs.report_path), visible=True, interactive=True),
|
| 134 |
+
gr.update(value=str(outputs.contact_sheet_path), visible=True),
|
| 135 |
+
_status_text(outputs),
|
| 136 |
+
)
|
| 137 |
+
except gr.Error:
|
| 138 |
+
raise
|
| 139 |
+
except Exception as exc:
|
| 140 |
+
raise gr.Error(f"Conversion failed: {type(exc).__name__}: {exc}") from exc
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
def clear_all() -> tuple[None, object, object, object, str]:
|
| 144 |
+
return (
|
| 145 |
+
None,
|
| 146 |
+
gr.update(value=None, visible=False),
|
| 147 |
+
gr.update(value=None, visible=False),
|
| 148 |
+
gr.update(value=None, visible=False),
|
| 149 |
+
"",
|
| 150 |
+
)
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
def on_video_change(video_path: str | None) -> tuple[object, object]:
|
| 154 |
+
has_video = bool(video_path)
|
| 155 |
+
return (
|
| 156 |
+
gr.update(interactive=has_video, value="Build COLMAP Archive"),
|
| 157 |
+
gr.update(interactive=has_video),
|
| 158 |
+
)
|
| 159 |
+
|
| 160 |
+
|
| 161 |
+
def build_demo() -> gr.Blocks:
|
| 162 |
+
with gr.Blocks(
|
| 163 |
+
css=CSS,
|
| 164 |
+
title="Video to COLMAP for tttLRM",
|
| 165 |
+
theme=gr.themes.Origin(),
|
| 166 |
+
) as demo:
|
| 167 |
+
gr.Markdown("## Video to COLMAP for tttLRM")
|
| 168 |
+
gr.Markdown(
|
| 169 |
+
"Upload a single video. The Space will pick sharp overlapping keyframes, run COLMAP, and export a raw scene archive ready for the `tttLRM` Space."
|
| 170 |
+
)
|
| 171 |
+
|
| 172 |
+
with gr.Row(elem_id="main-row", equal_height=True):
|
| 173 |
+
with gr.Column(scale=3, min_width=320, elem_id="controls-panel"):
|
| 174 |
+
video_in = gr.File(
|
| 175 |
+
label="Input Video",
|
| 176 |
+
type="filepath",
|
| 177 |
+
file_types=[".mp4", ".mov", ".webm", ".mkv", ".avi"],
|
| 178 |
+
)
|
| 179 |
+
target_frames = gr.Dropdown(
|
| 180 |
+
label="Target Keyframes",
|
| 181 |
+
choices=["16", "24", "32", "48"],
|
| 182 |
+
value="24",
|
| 183 |
+
)
|
| 184 |
+
sampling_profile = gr.Dropdown(
|
| 185 |
+
label="Sampling Profile",
|
| 186 |
+
choices=["balanced", "dense", "sparse"],
|
| 187 |
+
value="balanced",
|
| 188 |
+
)
|
| 189 |
+
max_edge = gr.Dropdown(
|
| 190 |
+
label="Max Frame Edge",
|
| 191 |
+
choices=["960", "1280", "1600"],
|
| 192 |
+
value="1280",
|
| 193 |
+
)
|
| 194 |
+
with gr.Row():
|
| 195 |
+
generate_btn = gr.Button("Build COLMAP Archive", variant="primary", interactive=False)
|
| 196 |
+
clear_btn = gr.Button("Clear", interactive=False)
|
| 197 |
+
archive_download = gr.File(label="Download Raw COLMAP Archive", visible=False)
|
| 198 |
+
report_download = gr.File(label="Download Reconstruction Report", visible=False)
|
| 199 |
+
status_text = gr.Markdown(elem_id="status-text")
|
| 200 |
+
|
| 201 |
+
with gr.Column(scale=7, min_width=520):
|
| 202 |
+
preview_html = gr.HTML(
|
| 203 |
+
value=preview_placeholder_html(
|
| 204 |
+
"Keyframe Selection Preview",
|
| 205 |
+
"After conversion, the selected frames contact sheet will appear here so you can check overlap and viewpoint coverage.",
|
| 206 |
+
),
|
| 207 |
+
elem_id="preview-panel",
|
| 208 |
+
)
|
| 209 |
+
contact_sheet = gr.Image(label="Selected Keyframes", visible=False, type="filepath")
|
| 210 |
+
|
| 211 |
+
video_in.change(
|
| 212 |
+
on_video_change,
|
| 213 |
+
inputs=[video_in],
|
| 214 |
+
outputs=[generate_btn, clear_btn],
|
| 215 |
+
)
|
| 216 |
+
generate_btn.click(
|
| 217 |
+
start_generation,
|
| 218 |
+
outputs=[generate_btn, clear_btn, preview_html],
|
| 219 |
+
queue=False,
|
| 220 |
+
).then(
|
| 221 |
+
run_conversion,
|
| 222 |
+
inputs=[video_in, target_frames, sampling_profile, max_edge],
|
| 223 |
+
outputs=[archive_download, report_download, contact_sheet, status_text],
|
| 224 |
+
).then(
|
| 225 |
+
lambda: (
|
| 226 |
+
gr.update(interactive=True, value="Build COLMAP Archive"),
|
| 227 |
+
gr.update(interactive=True),
|
| 228 |
+
preview_placeholder_html(
|
| 229 |
+
"Keyframe Selection Complete",
|
| 230 |
+
"Review the contact sheet below and download the raw COLMAP archive for the `tttLRM` Space.",
|
| 231 |
+
),
|
| 232 |
+
),
|
| 233 |
+
outputs=[generate_btn, clear_btn, preview_html],
|
| 234 |
+
queue=False,
|
| 235 |
+
)
|
| 236 |
+
clear_btn.click(
|
| 237 |
+
clear_all,
|
| 238 |
+
outputs=[video_in, archive_download, report_download, contact_sheet, status_text],
|
| 239 |
+
queue=False,
|
| 240 |
+
).then(
|
| 241 |
+
lambda: (
|
| 242 |
+
gr.update(interactive=False),
|
| 243 |
+
gr.update(interactive=False),
|
| 244 |
+
preview_placeholder_html(
|
| 245 |
+
"Keyframe Selection Preview",
|
| 246 |
+
"After conversion, the selected frames contact sheet will appear here so you can check overlap and viewpoint coverage.",
|
| 247 |
+
),
|
| 248 |
+
),
|
| 249 |
+
outputs=[generate_btn, clear_btn, preview_html],
|
| 250 |
+
queue=False,
|
| 251 |
+
)
|
| 252 |
+
|
| 253 |
+
demo.queue(max_size=4)
|
| 254 |
+
return demo
|
| 255 |
+
|
| 256 |
+
|
| 257 |
+
if __name__ == "__main__":
|
| 258 |
+
build_demo().launch()
|
packages.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ffmpeg
|
| 2 |
+
colmap
|
requirements.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio==6.2.0
|
| 2 |
+
numpy==2.2.6
|
| 3 |
+
opencv-python-headless==4.12.0.88
|
| 4 |
+
Pillow==12.0.0
|
video_to_colmap.py
ADDED
|
@@ -0,0 +1,632 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
import math
|
| 5 |
+
import re
|
| 6 |
+
import shutil
|
| 7 |
+
import struct
|
| 8 |
+
import subprocess
|
| 9 |
+
import time
|
| 10 |
+
import uuid
|
| 11 |
+
import zipfile
|
| 12 |
+
from dataclasses import dataclass
|
| 13 |
+
from pathlib import Path
|
| 14 |
+
from typing import Final
|
| 15 |
+
|
| 16 |
+
import cv2
|
| 17 |
+
import numpy as np
|
| 18 |
+
|
| 19 |
+
APP_DIR: Final[Path] = Path(__file__).resolve().parent
|
| 20 |
+
WORK_DIR: Final[Path] = APP_DIR / "work"
|
| 21 |
+
OUTPUTS_DIR: Final[Path] = APP_DIR / "outputs"
|
| 22 |
+
THUMB_SIZE: Final[tuple[int, int]] = (96, 96)
|
| 23 |
+
JPEG_QUALITY: Final[int] = 95
|
| 24 |
+
FONT = cv2.FONT_HERSHEY_SIMPLEX
|
| 25 |
+
|
| 26 |
+
WORK_DIR.mkdir(parents=True, exist_ok=True)
|
| 27 |
+
OUTPUTS_DIR.mkdir(parents=True, exist_ok=True)
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
@dataclass(frozen=True)
|
| 31 |
+
class ProfileConfig:
|
| 32 |
+
candidate_multiplier: int
|
| 33 |
+
cut_threshold: float
|
| 34 |
+
min_blur_percentile: float
|
| 35 |
+
sequential_overlap: int
|
| 36 |
+
min_segment_frames: int
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
PROFILES: Final[dict[str, ProfileConfig]] = {
|
| 40 |
+
"balanced": ProfileConfig(
|
| 41 |
+
candidate_multiplier=6,
|
| 42 |
+
cut_threshold=0.42,
|
| 43 |
+
min_blur_percentile=35.0,
|
| 44 |
+
sequential_overlap=8,
|
| 45 |
+
min_segment_frames=14,
|
| 46 |
+
),
|
| 47 |
+
"dense": ProfileConfig(
|
| 48 |
+
candidate_multiplier=8,
|
| 49 |
+
cut_threshold=0.38,
|
| 50 |
+
min_blur_percentile=30.0,
|
| 51 |
+
sequential_overlap=12,
|
| 52 |
+
min_segment_frames=18,
|
| 53 |
+
),
|
| 54 |
+
"sparse": ProfileConfig(
|
| 55 |
+
candidate_multiplier=5,
|
| 56 |
+
cut_threshold=0.48,
|
| 57 |
+
min_blur_percentile=40.0,
|
| 58 |
+
sequential_overlap=6,
|
| 59 |
+
min_segment_frames=12,
|
| 60 |
+
),
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
@dataclass(frozen=True)
|
| 65 |
+
class VideoMetadata:
|
| 66 |
+
fps: float
|
| 67 |
+
frame_count: int
|
| 68 |
+
duration_seconds: float
|
| 69 |
+
width: int
|
| 70 |
+
height: int
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
@dataclass(frozen=True)
|
| 74 |
+
class FrameCandidate:
|
| 75 |
+
candidate_index: int
|
| 76 |
+
frame_index: int
|
| 77 |
+
timestamp_seconds: float
|
| 78 |
+
path: Path
|
| 79 |
+
blur_score: float
|
| 80 |
+
motion_score: float
|
| 81 |
+
cut_score: float
|
| 82 |
+
thumb: np.ndarray
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
@dataclass(frozen=True)
|
| 86 |
+
class ConversionOutputs:
|
| 87 |
+
archive_path: Path
|
| 88 |
+
report_path: Path
|
| 89 |
+
contact_sheet_path: Path
|
| 90 |
+
scene_name: str
|
| 91 |
+
selected_frames: int
|
| 92 |
+
registered_frames: int
|
| 93 |
+
duration_seconds: float
|
| 94 |
+
quality_label: str
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
def _now_ms() -> int:
|
| 98 |
+
return int(time.time() * 1000)
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
def _ensure_dir(path: Path) -> Path:
|
| 102 |
+
path.mkdir(parents=True, exist_ok=True)
|
| 103 |
+
return path
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
def _unique_dir(parent: Path, prefix: str) -> Path:
|
| 107 |
+
path = parent / f"{prefix}-{_now_ms()}-{uuid.uuid4().hex[:8]}"
|
| 108 |
+
path.mkdir(parents=True, exist_ok=True)
|
| 109 |
+
return path
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
def _slugify(value: str) -> str:
|
| 113 |
+
slug = re.sub(r"[^a-zA-Z0-9]+", "-", value).strip("-").lower()
|
| 114 |
+
return slug or "scene"
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
def _run(cmd: list[str], cwd: Path | None = None) -> None:
|
| 118 |
+
result = subprocess.run(
|
| 119 |
+
cmd,
|
| 120 |
+
cwd=str(cwd) if cwd else None,
|
| 121 |
+
text=True,
|
| 122 |
+
stdout=subprocess.PIPE,
|
| 123 |
+
stderr=subprocess.STDOUT,
|
| 124 |
+
check=False,
|
| 125 |
+
)
|
| 126 |
+
if result.returncode != 0:
|
| 127 |
+
raise RuntimeError(
|
| 128 |
+
f"Command failed ({result.returncode}): {' '.join(cmd)}\n{result.stdout.strip()}"
|
| 129 |
+
)
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
def _require_binary(binary_name: str) -> None:
|
| 133 |
+
if shutil.which(binary_name) is None:
|
| 134 |
+
raise RuntimeError(f"Required executable not found: {binary_name}")
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
def normalize_video_input(video_path: Path, work_dir: Path) -> Path:
|
| 138 |
+
_require_binary("ffmpeg")
|
| 139 |
+
normalized_path = work_dir / "normalized.mp4"
|
| 140 |
+
_run(
|
| 141 |
+
[
|
| 142 |
+
"ffmpeg",
|
| 143 |
+
"-y",
|
| 144 |
+
"-i",
|
| 145 |
+
str(video_path),
|
| 146 |
+
"-an",
|
| 147 |
+
"-movflags",
|
| 148 |
+
"+faststart",
|
| 149 |
+
"-pix_fmt",
|
| 150 |
+
"yuv420p",
|
| 151 |
+
"-c:v",
|
| 152 |
+
"libx264",
|
| 153 |
+
str(normalized_path),
|
| 154 |
+
],
|
| 155 |
+
cwd=work_dir,
|
| 156 |
+
)
|
| 157 |
+
return normalized_path
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
def read_video_metadata(video_path: Path) -> VideoMetadata:
|
| 161 |
+
capture = cv2.VideoCapture(str(video_path))
|
| 162 |
+
if not capture.isOpened():
|
| 163 |
+
raise RuntimeError(f"Failed to open video: {video_path}")
|
| 164 |
+
|
| 165 |
+
fps = float(capture.get(cv2.CAP_PROP_FPS) or 0.0)
|
| 166 |
+
frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT) or 0)
|
| 167 |
+
width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH) or 0)
|
| 168 |
+
height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT) or 0)
|
| 169 |
+
capture.release()
|
| 170 |
+
|
| 171 |
+
if frame_count <= 0 or width <= 0 or height <= 0:
|
| 172 |
+
raise RuntimeError("Video metadata could not be read from the uploaded file.")
|
| 173 |
+
|
| 174 |
+
if fps <= 0:
|
| 175 |
+
fps = 24.0
|
| 176 |
+
|
| 177 |
+
return VideoMetadata(
|
| 178 |
+
fps=fps,
|
| 179 |
+
frame_count=frame_count,
|
| 180 |
+
duration_seconds=frame_count / fps,
|
| 181 |
+
width=width,
|
| 182 |
+
height=height,
|
| 183 |
+
)
|
| 184 |
+
|
| 185 |
+
|
| 186 |
+
def _resize_max_edge(frame: np.ndarray, max_edge: int) -> np.ndarray:
|
| 187 |
+
height, width = frame.shape[:2]
|
| 188 |
+
current_max = max(height, width)
|
| 189 |
+
if current_max <= max_edge:
|
| 190 |
+
return frame
|
| 191 |
+
|
| 192 |
+
scale = max_edge / current_max
|
| 193 |
+
new_size = (max(2, int(round(width * scale))), max(2, int(round(height * scale))))
|
| 194 |
+
return cv2.resize(frame, new_size, interpolation=cv2.INTER_AREA)
|
| 195 |
+
|
| 196 |
+
|
| 197 |
+
def _compute_histogram(frame: np.ndarray) -> np.ndarray:
|
| 198 |
+
hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
|
| 199 |
+
hist = cv2.calcHist([hsv], [0, 1], None, [16, 16], [0, 180, 0, 256])
|
| 200 |
+
cv2.normalize(hist, hist)
|
| 201 |
+
return hist
|
| 202 |
+
|
| 203 |
+
|
| 204 |
+
def _compute_thumb(gray_frame: np.ndarray) -> np.ndarray:
|
| 205 |
+
thumb = cv2.resize(gray_frame, THUMB_SIZE, interpolation=cv2.INTER_AREA)
|
| 206 |
+
return thumb.astype(np.float32) / 255.0
|
| 207 |
+
|
| 208 |
+
|
| 209 |
+
def extract_candidates(
|
| 210 |
+
video_path: Path,
|
| 211 |
+
metadata: VideoMetadata,
|
| 212 |
+
candidates_dir: Path,
|
| 213 |
+
target_frames: int,
|
| 214 |
+
max_image_edge: int,
|
| 215 |
+
profile: ProfileConfig,
|
| 216 |
+
) -> list[FrameCandidate]:
|
| 217 |
+
desired_candidates = min(max(target_frames * profile.candidate_multiplier, target_frames + 8), 240)
|
| 218 |
+
stride = max(1, metadata.frame_count // desired_candidates)
|
| 219 |
+
|
| 220 |
+
capture = cv2.VideoCapture(str(video_path))
|
| 221 |
+
if not capture.isOpened():
|
| 222 |
+
raise RuntimeError(f"Failed to open video for frame extraction: {video_path}")
|
| 223 |
+
|
| 224 |
+
candidates: list[FrameCandidate] = []
|
| 225 |
+
frame_index = 0
|
| 226 |
+
candidate_index = 0
|
| 227 |
+
previous_hist: np.ndarray | None = None
|
| 228 |
+
previous_thumb: np.ndarray | None = None
|
| 229 |
+
while True:
|
| 230 |
+
ok, frame = capture.read()
|
| 231 |
+
if not ok:
|
| 232 |
+
break
|
| 233 |
+
if frame_index % stride != 0:
|
| 234 |
+
frame_index += 1
|
| 235 |
+
continue
|
| 236 |
+
|
| 237 |
+
frame = _resize_max_edge(frame, max_image_edge)
|
| 238 |
+
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
| 239 |
+
blur_score = float(cv2.Laplacian(gray, cv2.CV_32F).var())
|
| 240 |
+
thumb = _compute_thumb(gray)
|
| 241 |
+
hist = _compute_histogram(frame)
|
| 242 |
+
|
| 243 |
+
motion_score = float(np.mean(np.abs(thumb - previous_thumb))) if previous_thumb is not None else 0.0
|
| 244 |
+
cut_score = (
|
| 245 |
+
float(cv2.compareHist(previous_hist, hist, cv2.HISTCMP_BHATTACHARYYA))
|
| 246 |
+
if previous_hist is not None
|
| 247 |
+
else 0.0
|
| 248 |
+
)
|
| 249 |
+
|
| 250 |
+
output_path = candidates_dir / f"candidate_{candidate_index:04d}.jpg"
|
| 251 |
+
cv2.imwrite(str(output_path), frame, [int(cv2.IMWRITE_JPEG_QUALITY), JPEG_QUALITY])
|
| 252 |
+
candidates.append(
|
| 253 |
+
FrameCandidate(
|
| 254 |
+
candidate_index=candidate_index,
|
| 255 |
+
frame_index=frame_index,
|
| 256 |
+
timestamp_seconds=frame_index / metadata.fps,
|
| 257 |
+
path=output_path,
|
| 258 |
+
blur_score=blur_score,
|
| 259 |
+
motion_score=motion_score,
|
| 260 |
+
cut_score=cut_score,
|
| 261 |
+
thumb=thumb,
|
| 262 |
+
)
|
| 263 |
+
)
|
| 264 |
+
|
| 265 |
+
previous_hist = hist
|
| 266 |
+
previous_thumb = thumb
|
| 267 |
+
candidate_index += 1
|
| 268 |
+
frame_index += 1
|
| 269 |
+
|
| 270 |
+
capture.release()
|
| 271 |
+
if len(candidates) < max(8, target_frames // 2):
|
| 272 |
+
raise RuntimeError(
|
| 273 |
+
f"Video yielded only {len(candidates)} usable candidates; upload a longer or slower video."
|
| 274 |
+
)
|
| 275 |
+
return candidates
|
| 276 |
+
|
| 277 |
+
|
| 278 |
+
def segment_candidates(candidates: list[FrameCandidate], profile: ProfileConfig) -> list[list[FrameCandidate]]:
|
| 279 |
+
if not candidates:
|
| 280 |
+
return []
|
| 281 |
+
|
| 282 |
+
segments: list[list[FrameCandidate]] = []
|
| 283 |
+
start = 0
|
| 284 |
+
for index in range(1, len(candidates)):
|
| 285 |
+
if candidates[index].cut_score >= profile.cut_threshold:
|
| 286 |
+
segments.append(candidates[start:index])
|
| 287 |
+
start = index
|
| 288 |
+
segments.append(candidates[start:])
|
| 289 |
+
return [segment for segment in segments if segment]
|
| 290 |
+
|
| 291 |
+
|
| 292 |
+
def choose_best_segment(
|
| 293 |
+
segments: list[list[FrameCandidate]],
|
| 294 |
+
target_frames: int,
|
| 295 |
+
profile: ProfileConfig,
|
| 296 |
+
) -> list[FrameCandidate]:
|
| 297 |
+
if not segments:
|
| 298 |
+
raise RuntimeError("No coherent video segment was found for reconstruction.")
|
| 299 |
+
|
| 300 |
+
scored_segments: list[tuple[float, list[FrameCandidate]]] = []
|
| 301 |
+
for segment in segments:
|
| 302 |
+
duration = segment[-1].timestamp_seconds - segment[0].timestamp_seconds if len(segment) > 1 else 0.0
|
| 303 |
+
median_blur = float(np.median([candidate.blur_score for candidate in segment]))
|
| 304 |
+
coverage_bonus = min(len(segment) / max(target_frames, 1), 1.5)
|
| 305 |
+
segment_penalty = 0.0 if len(segment) >= profile.min_segment_frames else 0.6
|
| 306 |
+
score = (duration + len(segment) * 0.12) * coverage_bonus * math.log1p(max(median_blur, 1.0)) - segment_penalty
|
| 307 |
+
scored_segments.append((score, segment))
|
| 308 |
+
|
| 309 |
+
scored_segments.sort(key=lambda item: item[0], reverse=True)
|
| 310 |
+
return scored_segments[0][1]
|
| 311 |
+
|
| 312 |
+
|
| 313 |
+
def select_keyframes(
|
| 314 |
+
segment: list[FrameCandidate],
|
| 315 |
+
target_frames: int,
|
| 316 |
+
profile: ProfileConfig,
|
| 317 |
+
) -> list[FrameCandidate]:
|
| 318 |
+
if len(segment) <= target_frames:
|
| 319 |
+
return segment
|
| 320 |
+
|
| 321 |
+
blur_scores = np.array([candidate.blur_score for candidate in segment], dtype=np.float32)
|
| 322 |
+
blur_threshold = float(np.percentile(blur_scores, profile.min_blur_percentile))
|
| 323 |
+
normalized_blur = blur_scores / max(float(blur_scores.max()), 1e-6)
|
| 324 |
+
|
| 325 |
+
motion = np.array([0.0] + [max(candidate.motion_score, 1e-6) for candidate in segment[1:]], dtype=np.float32)
|
| 326 |
+
cumulative_motion = np.cumsum(motion)
|
| 327 |
+
|
| 328 |
+
selected_indices: list[int] = []
|
| 329 |
+
neighborhood = max(2, len(segment) // max(target_frames * 2, 1))
|
| 330 |
+
|
| 331 |
+
if float(cumulative_motion[-1]) <= 1e-5:
|
| 332 |
+
marks = np.linspace(0, len(segment) - 1, target_frames)
|
| 333 |
+
mark_distances = np.arange(len(segment), dtype=np.float32)
|
| 334 |
+
else:
|
| 335 |
+
marks = np.linspace(float(cumulative_motion[0]), float(cumulative_motion[-1]), target_frames)
|
| 336 |
+
mark_distances = cumulative_motion
|
| 337 |
+
|
| 338 |
+
for mark in marks:
|
| 339 |
+
center = int(np.searchsorted(mark_distances, mark))
|
| 340 |
+
best_index: int | None = None
|
| 341 |
+
best_score = float("inf")
|
| 342 |
+
min_allowed = selected_indices[-1] + 1 if selected_indices else 0
|
| 343 |
+
lower = max(min_allowed, center - neighborhood)
|
| 344 |
+
upper = min(len(segment), center + neighborhood + 1)
|
| 345 |
+
search_ranges = [(lower, upper), (min_allowed, len(segment))]
|
| 346 |
+
|
| 347 |
+
for range_start, range_end in search_ranges:
|
| 348 |
+
for idx in range(range_start, range_end):
|
| 349 |
+
candidate = segment[idx]
|
| 350 |
+
mark_penalty = abs(float(mark_distances[idx]) - float(mark))
|
| 351 |
+
blur_penalty = 0.25 if candidate.blur_score < blur_threshold else 0.0
|
| 352 |
+
spacing_penalty = 0.15 if selected_indices and idx - selected_indices[-1] < 2 else 0.0
|
| 353 |
+
sharpness_bonus = 0.08 * float(normalized_blur[idx])
|
| 354 |
+
score = mark_penalty + blur_penalty + spacing_penalty - sharpness_bonus
|
| 355 |
+
if score < best_score:
|
| 356 |
+
best_score = score
|
| 357 |
+
best_index = idx
|
| 358 |
+
if best_index is not None:
|
| 359 |
+
break
|
| 360 |
+
|
| 361 |
+
if best_index is not None and (not selected_indices or best_index > selected_indices[-1]):
|
| 362 |
+
selected_indices.append(best_index)
|
| 363 |
+
|
| 364 |
+
selected_indices = sorted(set(selected_indices))
|
| 365 |
+
if len(selected_indices) < target_frames:
|
| 366 |
+
remaining = [idx for idx in range(len(segment)) if idx not in selected_indices]
|
| 367 |
+
remaining.sort(
|
| 368 |
+
key=lambda idx: (
|
| 369 |
+
-segment[idx].blur_score,
|
| 370 |
+
-(min(abs(idx - chosen) for chosen in selected_indices) if selected_indices else float("inf")),
|
| 371 |
+
)
|
| 372 |
+
)
|
| 373 |
+
for idx in remaining:
|
| 374 |
+
if len(selected_indices) >= target_frames:
|
| 375 |
+
break
|
| 376 |
+
selected_indices.append(idx)
|
| 377 |
+
selected_indices.sort()
|
| 378 |
+
|
| 379 |
+
trimmed = selected_indices[:target_frames]
|
| 380 |
+
return [segment[idx] for idx in trimmed]
|
| 381 |
+
|
| 382 |
+
|
| 383 |
+
def export_selected_images(scene_dir: Path, selected_frames: list[FrameCandidate]) -> list[Path]:
|
| 384 |
+
images_dir = _ensure_dir(scene_dir / "images")
|
| 385 |
+
exported: list[Path] = []
|
| 386 |
+
for index, candidate in enumerate(selected_frames):
|
| 387 |
+
destination = images_dir / f"frame_{index:04d}.jpg"
|
| 388 |
+
shutil.copy2(candidate.path, destination)
|
| 389 |
+
exported.append(destination)
|
| 390 |
+
return exported
|
| 391 |
+
|
| 392 |
+
|
| 393 |
+
def run_colmap(scene_dir: Path, selected_count: int, profile: ProfileConfig, max_image_edge: int) -> Path:
|
| 394 |
+
_require_binary("colmap")
|
| 395 |
+
database_path = scene_dir / "database.db"
|
| 396 |
+
images_dir = scene_dir / "images"
|
| 397 |
+
sparse_dir = _ensure_dir(scene_dir / "sparse")
|
| 398 |
+
|
| 399 |
+
_run(
|
| 400 |
+
[
|
| 401 |
+
"colmap",
|
| 402 |
+
"feature_extractor",
|
| 403 |
+
"--database_path",
|
| 404 |
+
str(database_path),
|
| 405 |
+
"--image_path",
|
| 406 |
+
str(images_dir),
|
| 407 |
+
"--ImageReader.single_camera",
|
| 408 |
+
"1",
|
| 409 |
+
"--ImageReader.camera_model",
|
| 410 |
+
"SIMPLE_RADIAL",
|
| 411 |
+
"--SiftExtraction.use_gpu",
|
| 412 |
+
"0",
|
| 413 |
+
"--SiftExtraction.max_image_size",
|
| 414 |
+
str(max_image_edge),
|
| 415 |
+
],
|
| 416 |
+
cwd=scene_dir,
|
| 417 |
+
)
|
| 418 |
+
_run(
|
| 419 |
+
[
|
| 420 |
+
"colmap",
|
| 421 |
+
"sequential_matcher",
|
| 422 |
+
"--database_path",
|
| 423 |
+
str(database_path),
|
| 424 |
+
"--SiftMatching.use_gpu",
|
| 425 |
+
"0",
|
| 426 |
+
"--SequentialMatching.overlap",
|
| 427 |
+
str(min(profile.sequential_overlap, max(selected_count - 1, 1))),
|
| 428 |
+
"--SequentialMatching.quadratic_overlap",
|
| 429 |
+
"1",
|
| 430 |
+
"--SequentialMatching.loop_detection",
|
| 431 |
+
"0",
|
| 432 |
+
],
|
| 433 |
+
cwd=scene_dir,
|
| 434 |
+
)
|
| 435 |
+
_run(
|
| 436 |
+
[
|
| 437 |
+
"colmap",
|
| 438 |
+
"mapper",
|
| 439 |
+
"--database_path",
|
| 440 |
+
str(database_path),
|
| 441 |
+
"--image_path",
|
| 442 |
+
str(images_dir),
|
| 443 |
+
"--output_path",
|
| 444 |
+
str(sparse_dir),
|
| 445 |
+
"--Mapper.multiple_models",
|
| 446 |
+
"0",
|
| 447 |
+
"--Mapper.extract_colors",
|
| 448 |
+
"0",
|
| 449 |
+
"--Mapper.min_model_size",
|
| 450 |
+
str(min(8, max(selected_count // 3, 4))),
|
| 451 |
+
],
|
| 452 |
+
cwd=scene_dir,
|
| 453 |
+
)
|
| 454 |
+
|
| 455 |
+
model_dirs = sorted(path for path in sparse_dir.iterdir() if path.is_dir())
|
| 456 |
+
if not model_dirs:
|
| 457 |
+
raise RuntimeError("COLMAP did not produce a sparse reconstruction.")
|
| 458 |
+
return model_dirs[0]
|
| 459 |
+
|
| 460 |
+
|
| 461 |
+
def count_registered_images(model_dir: Path) -> int:
|
| 462 |
+
image_bin = model_dir / "images.bin"
|
| 463 |
+
image_txt = model_dir / "images.txt"
|
| 464 |
+
if image_bin.exists():
|
| 465 |
+
with image_bin.open("rb") as handle:
|
| 466 |
+
header = handle.read(8)
|
| 467 |
+
return int(struct.unpack("<Q", header)[0]) if header else 0
|
| 468 |
+
|
| 469 |
+
if image_txt.exists():
|
| 470 |
+
lines = [line.strip() for line in image_txt.read_text(encoding="utf-8").splitlines()]
|
| 471 |
+
payload = [line for line in lines if line and not line.startswith("#")]
|
| 472 |
+
return len(payload) // 2
|
| 473 |
+
|
| 474 |
+
return 0
|
| 475 |
+
|
| 476 |
+
|
| 477 |
+
def quality_label(registered_frames: int, selected_frames: int) -> str:
|
| 478 |
+
if selected_frames <= 0:
|
| 479 |
+
return "unknown"
|
| 480 |
+
|
| 481 |
+
ratio = registered_frames / selected_frames
|
| 482 |
+
if ratio >= 0.85:
|
| 483 |
+
return "strong"
|
| 484 |
+
if ratio >= 0.6:
|
| 485 |
+
return "usable"
|
| 486 |
+
return "weak"
|
| 487 |
+
|
| 488 |
+
|
| 489 |
+
def create_contact_sheet(selected_frames: list[FrameCandidate], output_path: Path) -> Path:
|
| 490 |
+
if not selected_frames:
|
| 491 |
+
raise RuntimeError("No selected frames were available for the contact sheet.")
|
| 492 |
+
|
| 493 |
+
thumbs: list[np.ndarray] = []
|
| 494 |
+
for candidate in selected_frames:
|
| 495 |
+
image = cv2.imread(str(candidate.path), cv2.IMREAD_COLOR)
|
| 496 |
+
if image is None:
|
| 497 |
+
continue
|
| 498 |
+
image = _resize_max_edge(image, 320)
|
| 499 |
+
overlay = image.copy()
|
| 500 |
+
label = f"{candidate.timestamp_seconds:0.2f}s | blur {candidate.blur_score:0.0f}"
|
| 501 |
+
cv2.rectangle(overlay, (0, 0), (image.shape[1], 32), (12, 18, 28), -1)
|
| 502 |
+
image = cv2.addWeighted(overlay, 0.72, image, 0.28, 0.0)
|
| 503 |
+
cv2.putText(image, label, (10, 22), FONT, 0.55, (230, 235, 240), 1, cv2.LINE_AA)
|
| 504 |
+
thumbs.append(image)
|
| 505 |
+
|
| 506 |
+
cols = min(4, len(thumbs))
|
| 507 |
+
rows = int(math.ceil(len(thumbs) / cols))
|
| 508 |
+
cell_height = max(image.shape[0] for image in thumbs)
|
| 509 |
+
cell_width = max(image.shape[1] for image in thumbs)
|
| 510 |
+
canvas = np.full((rows * cell_height, cols * cell_width, 3), 18, dtype=np.uint8)
|
| 511 |
+
|
| 512 |
+
for index, image in enumerate(thumbs):
|
| 513 |
+
row = index // cols
|
| 514 |
+
col = index % cols
|
| 515 |
+
y = row * cell_height
|
| 516 |
+
x = col * cell_width
|
| 517 |
+
canvas[y : y + image.shape[0], x : x + image.shape[1]] = image
|
| 518 |
+
|
| 519 |
+
cv2.imwrite(str(output_path), canvas, [int(cv2.IMWRITE_JPEG_QUALITY), 92])
|
| 520 |
+
return output_path
|
| 521 |
+
|
| 522 |
+
|
| 523 |
+
def write_report(
|
| 524 |
+
scene_dir: Path,
|
| 525 |
+
metadata: VideoMetadata,
|
| 526 |
+
selected_frames: list[FrameCandidate],
|
| 527 |
+
registered_frames: int,
|
| 528 |
+
profile_key: str,
|
| 529 |
+
max_image_edge: int,
|
| 530 |
+
) -> Path:
|
| 531 |
+
report = {
|
| 532 |
+
"scene_name": scene_dir.name,
|
| 533 |
+
"video": {
|
| 534 |
+
"fps": metadata.fps,
|
| 535 |
+
"frame_count": metadata.frame_count,
|
| 536 |
+
"duration_seconds": metadata.duration_seconds,
|
| 537 |
+
"width": metadata.width,
|
| 538 |
+
"height": metadata.height,
|
| 539 |
+
},
|
| 540 |
+
"selection": {
|
| 541 |
+
"profile": profile_key,
|
| 542 |
+
"max_image_edge": max_image_edge,
|
| 543 |
+
"selected_frames": len(selected_frames),
|
| 544 |
+
"registered_frames": registered_frames,
|
| 545 |
+
"quality_label": quality_label(registered_frames, len(selected_frames)),
|
| 546 |
+
},
|
| 547 |
+
"frames": [
|
| 548 |
+
{
|
| 549 |
+
"filename": f"images/frame_{index:04d}.jpg",
|
| 550 |
+
"timestamp_seconds": candidate.timestamp_seconds,
|
| 551 |
+
"source_frame_index": candidate.frame_index,
|
| 552 |
+
"blur_score": candidate.blur_score,
|
| 553 |
+
"motion_score": candidate.motion_score,
|
| 554 |
+
"cut_score": candidate.cut_score,
|
| 555 |
+
}
|
| 556 |
+
for index, candidate in enumerate(selected_frames)
|
| 557 |
+
],
|
| 558 |
+
}
|
| 559 |
+
report_path = scene_dir / "report.json"
|
| 560 |
+
report_path.write_text(json.dumps(report, indent=2), encoding="utf-8")
|
| 561 |
+
return report_path
|
| 562 |
+
|
| 563 |
+
|
| 564 |
+
def build_archive(scene_dir: Path, output_archive: Path) -> Path:
|
| 565 |
+
package_dir = _unique_dir(WORK_DIR, "package")
|
| 566 |
+
scene_package = _ensure_dir(package_dir / scene_dir.name)
|
| 567 |
+
shutil.copytree(scene_dir / "images", scene_package / "images")
|
| 568 |
+
shutil.copytree(scene_dir / "sparse", scene_package / "sparse")
|
| 569 |
+
report_path = scene_dir / "report.json"
|
| 570 |
+
if report_path.exists():
|
| 571 |
+
shutil.copy2(report_path, scene_package / "report.json")
|
| 572 |
+
|
| 573 |
+
with zipfile.ZipFile(output_archive, "w", compression=zipfile.ZIP_DEFLATED) as archive:
|
| 574 |
+
for path in sorted(scene_package.rglob("*")):
|
| 575 |
+
if path.is_file():
|
| 576 |
+
archive.write(path, path.relative_to(package_dir))
|
| 577 |
+
return output_archive
|
| 578 |
+
|
| 579 |
+
|
| 580 |
+
def convert_video_to_colmap_archive(
|
| 581 |
+
video_path: str | Path,
|
| 582 |
+
target_frames: int,
|
| 583 |
+
profile_key: str,
|
| 584 |
+
max_image_edge: int,
|
| 585 |
+
) -> ConversionOutputs:
|
| 586 |
+
if profile_key not in PROFILES:
|
| 587 |
+
raise ValueError(f"Unknown sampling profile: {profile_key}")
|
| 588 |
+
|
| 589 |
+
source_path = Path(video_path)
|
| 590 |
+
if not source_path.exists():
|
| 591 |
+
raise FileNotFoundError(f"Input video not found: {source_path}")
|
| 592 |
+
|
| 593 |
+
job_dir = _unique_dir(WORK_DIR, "video-job")
|
| 594 |
+
normalized_path = normalize_video_input(source_path, job_dir)
|
| 595 |
+
metadata = read_video_metadata(normalized_path)
|
| 596 |
+
|
| 597 |
+
profile = PROFILES[profile_key]
|
| 598 |
+
candidates_dir = _ensure_dir(job_dir / "candidates")
|
| 599 |
+
candidates = extract_candidates(
|
| 600 |
+
video_path=normalized_path,
|
| 601 |
+
metadata=metadata,
|
| 602 |
+
candidates_dir=candidates_dir,
|
| 603 |
+
target_frames=target_frames,
|
| 604 |
+
max_image_edge=max_image_edge,
|
| 605 |
+
profile=profile,
|
| 606 |
+
)
|
| 607 |
+
segment = choose_best_segment(segment_candidates(candidates, profile), target_frames, profile)
|
| 608 |
+
selected = select_keyframes(segment, target_frames, profile)
|
| 609 |
+
|
| 610 |
+
scene_name = f"{_slugify(source_path.stem)}-{_now_ms()}"
|
| 611 |
+
scene_dir = _ensure_dir(job_dir / scene_name)
|
| 612 |
+
export_selected_images(scene_dir, selected)
|
| 613 |
+
model_dir = run_colmap(scene_dir, len(selected), profile, max_image_edge)
|
| 614 |
+
registered_frames = count_registered_images(model_dir)
|
| 615 |
+
report_path = write_report(scene_dir, metadata, selected, registered_frames, profile_key, max_image_edge)
|
| 616 |
+
|
| 617 |
+
output_stem = f"{scene_name}-{profile_key}-{len(selected)}"
|
| 618 |
+
contact_sheet_path = create_contact_sheet(selected, OUTPUTS_DIR / f"{output_stem}.jpg")
|
| 619 |
+
archive_path = build_archive(scene_dir, OUTPUTS_DIR / f"{output_stem}.zip")
|
| 620 |
+
output_report_path = OUTPUTS_DIR / f"{output_stem}.report.json"
|
| 621 |
+
shutil.copy2(report_path, output_report_path)
|
| 622 |
+
|
| 623 |
+
return ConversionOutputs(
|
| 624 |
+
archive_path=archive_path,
|
| 625 |
+
report_path=output_report_path,
|
| 626 |
+
contact_sheet_path=contact_sheet_path,
|
| 627 |
+
scene_name=scene_name,
|
| 628 |
+
selected_frames=len(selected),
|
| 629 |
+
registered_frames=registered_frames,
|
| 630 |
+
duration_seconds=metadata.duration_seconds,
|
| 631 |
+
quality_label=quality_label(registered_frames, len(selected)),
|
| 632 |
+
)
|