notaneimu's picture
Add manual keyframe limit control
5f06829
from __future__ import annotations
import math
from pathlib import Path
from typing import Final
import gradio as gr
from video_to_colmap import (
ConversionOutputs,
VideoMetadata,
convert_video_to_colmap_archive,
infer_target_frames,
read_video_metadata,
)
APP_DIR: Final[Path] = Path(__file__).resolve().parent
OUTPUTS_DIR: Final[Path] = APP_DIR / "outputs"
OUTPUTS_DIR.mkdir(parents=True, exist_ok=True)
gr.set_static_paths(paths=[str(OUTPUTS_DIR)])
DEFAULT_TARGET_FRAMES: Final[int] = 24
TARGET_FRAME_CHOICES: Final[list[str]] = ["Auto", "16", "24", "32", "48"]
CSS: Final[str] = """
html { scrollbar-gutter: stable; }
body { overflow: auto; }
.gradio-container {
max-width: none;
width: 100%;
margin: 0;
padding: 0.75rem 1rem 1rem;
}
#main-row {
gap: 1rem;
align-items: stretch;
}
#controls-panel {
display: flex;
flex-direction: column;
gap: 0.75rem;
}
#preview-panel {
min-height: 540px;
}
.preview-placeholder {
position: relative;
overflow: hidden;
width: 100%;
min-height: 540px;
display: flex;
align-items: center;
justify-content: center;
border-radius: 14px;
background:
radial-gradient(circle at top left, rgba(250, 204, 21, 0.22), transparent 32%),
linear-gradient(135deg, #0f172a 0%, #172554 48%, #1e293b 100%);
border: 1px solid rgba(148, 163, 184, 0.24);
color: #f8fafc;
}
.preview-placeholder::before {
content: "";
position: absolute;
inset: 0;
background: linear-gradient(180deg, rgba(15, 23, 42, 0.18), rgba(15, 23, 42, 0.4));
}
.preview-inner {
position: relative;
z-index: 1;
max-width: 460px;
padding: 32px;
text-align: center;
border-radius: 20px;
background: rgba(15, 23, 42, 0.44);
box-shadow: 0 18px 50px rgba(15, 23, 42, 0.24);
backdrop-filter: blur(10px);
}
.preview-placeholder .preview-title {
color: #f8fafc !important;
font-size: 28px;
font-weight: 600;
margin-bottom: 8px;
letter-spacing: 0.01em;
text-shadow: 0 1px 2px rgba(15, 23, 42, 0.4);
}
.preview-placeholder .preview-desc {
color: rgba(226, 232, 240, 0.96) !important;
font-size: 16px;
line-height: 1.5;
text-shadow: 0 1px 2px rgba(15, 23, 42, 0.35);
}
#status-text {
font-size: 13px;
opacity: 0.92;
}
#video-summary {
margin-top: -0.15rem;
padding: 0.8rem 0.9rem;
border-radius: 12px;
background: rgba(15, 23, 42, 0.04);
border: 1px solid rgba(148, 163, 184, 0.18);
}
@media (max-width: 900px) {
#main-row {
flex-direction: column;
}
#preview-panel,
.preview-placeholder {
min-height: 420px;
}
}
"""
def preview_placeholder_html(title: str, description: str) -> str:
return f"""
<div class="preview-placeholder">
<div class="preview-inner">
<div class="preview-title">{title}</div>
<div class="preview-desc">{description}</div>
</div>
</div>
"""
def default_video_summary() -> str:
return "Upload a video to infer the target keyframe count automatically, or choose a manual max."
def fallback_video_summary() -> str:
return "Could not inspect video metadata on upload. Using the default target of **24 keyframes**."
def manual_video_summary(target_frames: int, metadata: VideoMetadata | None = None) -> str:
if metadata is None:
return f"Using a manual limit of **{target_frames} keyframes**."
return (
f"Using a manual limit of **{target_frames} keyframes** for a **{metadata.duration_seconds:.1f}s** clip "
f"at **{metadata.width}x{metadata.height}** and **{metadata.fps:.1f} fps**."
)
def video_summary_text(metadata: VideoMetadata, target_frames: int) -> str:
return (
f"Auto-selecting **{target_frames} keyframes** from a **{metadata.duration_seconds:.1f}s** clip "
f"at **{metadata.width}x{metadata.height}** and **{metadata.fps:.1f} fps**."
)
def start_generation(target_frames: int) -> tuple[object, object, str]:
return (
gr.update(interactive=False, value="Converting..."),
gr.update(interactive=False),
preview_placeholder_html(
"Preparing Video for COLMAP",
f"Normalizing the clip, selecting {target_frames} sharp overlapping keyframes, and running sparse reconstruction.",
),
)
def _status_text(outputs: ConversionOutputs) -> str:
coverage = 0.0
if outputs.selected_frames:
coverage = outputs.registered_frames / outputs.selected_frames
return (
f"Prepared **{outputs.scene_name}** from a **{outputs.duration_seconds:.1f}s** clip. "
f"Selected **{outputs.selected_frames}** keyframes, COLMAP registered **{outputs.registered_frames}**, "
f"and the reconstruction quality is **{outputs.quality_label}** "
f"({math.floor(coverage * 100)}% registration)."
)
def run_conversion(
video_path: str | None,
target_frames: int,
sampling_profile: str,
max_edge: str,
) -> tuple[object, object, object, str]:
if not video_path:
raise gr.Error("Upload a video first.")
try:
outputs = convert_video_to_colmap_archive(
video_path=video_path,
target_frames=target_frames,
profile_key=sampling_profile,
max_image_edge=int(max_edge),
)
return (
gr.update(value=str(outputs.archive_path), visible=True, interactive=True),
gr.update(value=str(outputs.report_path), visible=True, interactive=True),
gr.update(value=str(outputs.contact_sheet_path), visible=True),
_status_text(outputs),
)
except gr.Error:
raise
except Exception as exc:
raise gr.Error(f"Conversion failed: {type(exc).__name__}: {exc}") from exc
def clear_all() -> tuple[None, object, object, object, str, str, int, object]:
return (
None,
gr.update(value=None, visible=False),
gr.update(value=None, visible=False),
gr.update(value=None, visible=False),
"",
default_video_summary(),
DEFAULT_TARGET_FRAMES,
gr.update(value="Auto"),
)
def update_target_settings(video_path: str | None, target_mode: str) -> tuple[object, object, str, int]:
has_video = bool(video_path)
generate_update = gr.update(interactive=has_video, value="Build COLMAP Archive")
clear_update = gr.update(interactive=has_video)
if not video_path:
if target_mode != "Auto":
manual_frames = int(target_mode)
return (
generate_update,
clear_update,
manual_video_summary(manual_frames),
manual_frames,
)
return (
generate_update,
clear_update,
default_video_summary(),
DEFAULT_TARGET_FRAMES,
)
try:
metadata = read_video_metadata(Path(video_path))
if target_mode == "Auto":
target_frames = infer_target_frames(metadata)
summary = video_summary_text(metadata, target_frames)
else:
target_frames = int(target_mode)
summary = manual_video_summary(target_frames, metadata)
except Exception:
if target_mode == "Auto":
target_frames = DEFAULT_TARGET_FRAMES
summary = fallback_video_summary()
else:
target_frames = int(target_mode)
summary = manual_video_summary(target_frames)
return (
generate_update,
clear_update,
summary,
target_frames,
)
def build_demo() -> gr.Blocks:
with gr.Blocks(
css=CSS,
title="Video to COLMAP for tttLRM",
theme=gr.themes.Origin(),
) as demo:
gr.Markdown("## Video to COLMAP for tttLRM")
gr.Markdown(
"Upload a single video. The Space will pick sharp overlapping keyframes, run COLMAP, and export a raw scene archive ready for the `tttLRM` Space."
)
with gr.Row(elem_id="main-row", equal_height=True):
with gr.Column(scale=3, min_width=320, elem_id="controls-panel"):
video_in = gr.File(
label="Input Video",
type="filepath",
file_types=[".mp4", ".mov", ".webm", ".mkv", ".avi"],
)
target_frames_state = gr.State(value=DEFAULT_TARGET_FRAMES)
target_mode = gr.Dropdown(
label="Max Angles / Keyframes",
choices=TARGET_FRAME_CHOICES,
value="Auto",
)
video_summary = gr.Markdown(default_video_summary(), elem_id="video-summary")
sampling_profile = gr.Dropdown(
label="Sampling Profile",
choices=["balanced", "dense", "sparse"],
value="balanced",
)
max_edge = gr.Dropdown(
label="Max Frame Edge",
choices=["960", "1280", "1600"],
value="1280",
)
with gr.Row():
generate_btn = gr.Button("Build COLMAP Archive", variant="primary", interactive=False)
clear_btn = gr.Button("Clear", interactive=False)
archive_download = gr.File(label="Download Raw COLMAP Archive", visible=False)
report_download = gr.File(label="Download Reconstruction Report", visible=False)
status_text = gr.Markdown(elem_id="status-text")
with gr.Column(scale=7, min_width=520):
preview_html = gr.HTML(
value=preview_placeholder_html(
"Keyframe Selection Preview",
"After conversion, the selected frames contact sheet will appear here so you can check overlap and viewpoint coverage.",
),
elem_id="preview-panel",
)
contact_sheet = gr.Image(label="Selected Keyframes", visible=False, type="filepath")
video_in.change(
update_target_settings,
inputs=[video_in, target_mode],
outputs=[generate_btn, clear_btn, video_summary, target_frames_state],
queue=False,
)
target_mode.change(
update_target_settings,
inputs=[video_in, target_mode],
outputs=[generate_btn, clear_btn, video_summary, target_frames_state],
queue=False,
)
generate_btn.click(
start_generation,
inputs=[target_frames_state],
outputs=[generate_btn, clear_btn, preview_html],
queue=False,
).then(
run_conversion,
inputs=[video_in, target_frames_state, sampling_profile, max_edge],
outputs=[archive_download, report_download, contact_sheet, status_text],
).then(
lambda: (
gr.update(interactive=True, value="Build COLMAP Archive"),
gr.update(interactive=True),
preview_placeholder_html(
"Keyframe Selection Complete",
"Review the contact sheet below and download the raw COLMAP archive for the `tttLRM` Space.",
),
),
outputs=[generate_btn, clear_btn, preview_html],
queue=False,
)
clear_btn.click(
clear_all,
outputs=[video_in, archive_download, report_download, contact_sheet, status_text, video_summary, target_frames_state, target_mode],
queue=False,
).then(
lambda: (
gr.update(interactive=False),
gr.update(interactive=False),
preview_placeholder_html(
"Keyframe Selection Preview",
"After conversion, the selected frames contact sheet will appear here so you can check overlap and viewpoint coverage.",
),
),
outputs=[generate_btn, clear_btn, preview_html],
queue=False,
)
demo.queue(max_size=4)
return demo
if __name__ == "__main__":
build_demo().launch()