Spaces:

notaneimu
/

video-to-colmap-for-tttlrm

Running

App Files Files Community

video-to-colmap-for-tttlrm / app.py

notaneimu

Add manual keyframe limit control

5f06829 3 months ago

raw

history blame contribute delete

12.3 kB

	from __future__ import annotations

	import math
	from pathlib import Path
	from typing import Final

	import gradio as gr

	from video_to_colmap import (
	ConversionOutputs,
	VideoMetadata,
	convert_video_to_colmap_archive,
	infer_target_frames,
	read_video_metadata,
	)

	APP_DIR: Final[Path] = Path(__file__).resolve().parent
	OUTPUTS_DIR: Final[Path] = APP_DIR / "outputs"

	OUTPUTS_DIR.mkdir(parents=True, exist_ok=True)
	gr.set_static_paths(paths=[str(OUTPUTS_DIR)])

	DEFAULT_TARGET_FRAMES: Final[int] = 24
	TARGET_FRAME_CHOICES: Final[list[str]] = ["Auto", "16", "24", "32", "48"]

	CSS: Final[str] = """
	html { scrollbar-gutter: stable; }
	body { overflow: auto; }
	.gradio-container {
	max-width: none;
	width: 100%;
	margin: 0;
	padding: 0.75rem 1rem 1rem;
	}
	#main-row {
	gap: 1rem;
	align-items: stretch;
	}
	#controls-panel {
	display: flex;
	flex-direction: column;
	gap: 0.75rem;
	}
	#preview-panel {
	min-height: 540px;
	}
	.preview-placeholder {
	position: relative;
	overflow: hidden;
	width: 100%;
	min-height: 540px;
	display: flex;
	align-items: center;
	justify-content: center;
	border-radius: 14px;
	background:
	radial-gradient(circle at top left, rgba(250, 204, 21, 0.22), transparent 32%),
	linear-gradient(135deg, #0f172a 0%, #172554 48%, #1e293b 100%);
	border: 1px solid rgba(148, 163, 184, 0.24);
	color: #f8fafc;
	}
	.preview-placeholder::before {
	content: "";
	position: absolute;
	inset: 0;
	background: linear-gradient(180deg, rgba(15, 23, 42, 0.18), rgba(15, 23, 42, 0.4));
	}
	.preview-inner {
	position: relative;
	z-index: 1;
	max-width: 460px;
	padding: 32px;
	text-align: center;
	border-radius: 20px;
	background: rgba(15, 23, 42, 0.44);
	box-shadow: 0 18px 50px rgba(15, 23, 42, 0.24);
	backdrop-filter: blur(10px);
	}
	.preview-placeholder .preview-title {
	color: #f8fafc !important;
	font-size: 28px;
	font-weight: 600;
	margin-bottom: 8px;
	letter-spacing: 0.01em;
	text-shadow: 0 1px 2px rgba(15, 23, 42, 0.4);
	}
	.preview-placeholder .preview-desc {
	color: rgba(226, 232, 240, 0.96) !important;
	font-size: 16px;
	line-height: 1.5;
	text-shadow: 0 1px 2px rgba(15, 23, 42, 0.35);
	}
	#status-text {
	font-size: 13px;
	opacity: 0.92;
	}
	#video-summary {
	margin-top: -0.15rem;
	padding: 0.8rem 0.9rem;
	border-radius: 12px;
	background: rgba(15, 23, 42, 0.04);
	border: 1px solid rgba(148, 163, 184, 0.18);
	}
	@media (max-width: 900px) {
	#main-row {
	flex-direction: column;
	}
	#preview-panel,
	.preview-placeholder {
	min-height: 420px;
	}
	}
	"""


	def preview_placeholder_html(title: str, description: str) -> str:
	return f"""
	<div class="preview-placeholder">
	<div class="preview-inner">
	<div class="preview-title">{title}</div>
	<div class="preview-desc">{description}</div>
	</div>
	</div>
	"""


	def default_video_summary() -> str:
	return "Upload a video to infer the target keyframe count automatically, or choose a manual max."


	def fallback_video_summary() -> str:
	return "Could not inspect video metadata on upload. Using the default target of 24 keyframes."


	def manual_video_summary(target_frames: int, metadata: VideoMetadata \| None = None) -> str:
	if metadata is None:
	return f"Using a manual limit of {target_frames} keyframes."
	return (
	f"Using a manual limit of {target_frames} keyframes for a {metadata.duration_seconds:.1f}s clip "
	f"at {metadata.width}x{metadata.height} and {metadata.fps:.1f} fps."
	)


	def video_summary_text(metadata: VideoMetadata, target_frames: int) -> str:
	return (
	f"Auto-selecting {target_frames} keyframes from a {metadata.duration_seconds:.1f}s clip "
	f"at {metadata.width}x{metadata.height} and {metadata.fps:.1f} fps."
	)


	def start_generation(target_frames: int) -> tuple[object, object, str]:
	return (
	gr.update(interactive=False, value="Converting..."),
	gr.update(interactive=False),
	preview_placeholder_html(
	"Preparing Video for COLMAP",
	f"Normalizing the clip, selecting {target_frames} sharp overlapping keyframes, and running sparse reconstruction.",
	),
	)


	def _status_text(outputs: ConversionOutputs) -> str:
	coverage = 0.0
	if outputs.selected_frames:
	coverage = outputs.registered_frames / outputs.selected_frames

	return (
	f"Prepared {outputs.scene_name} from a {outputs.duration_seconds:.1f}s clip. "
	f"Selected {outputs.selected_frames} keyframes, COLMAP registered {outputs.registered_frames}, "
	f"and the reconstruction quality is {outputs.quality_label} "
	f"({math.floor(coverage * 100)}% registration)."
	)


	def run_conversion(
	video_path: str \| None,
	target_frames: int,
	sampling_profile: str,
	max_edge: str,
	) -> tuple[object, object, object, str]:
	if not video_path:
	raise gr.Error("Upload a video first.")

	try:
	outputs = convert_video_to_colmap_archive(
	video_path=video_path,
	target_frames=target_frames,
	profile_key=sampling_profile,
	max_image_edge=int(max_edge),
	)
	return (
	gr.update(value=str(outputs.archive_path), visible=True, interactive=True),
	gr.update(value=str(outputs.report_path), visible=True, interactive=True),
	gr.update(value=str(outputs.contact_sheet_path), visible=True),
	_status_text(outputs),
	)
	except gr.Error:
	raise
	except Exception as exc:
	raise gr.Error(f"Conversion failed: {type(exc).__name__}: {exc}") from exc


	def clear_all() -> tuple[None, object, object, object, str, str, int, object]:
	return (
	None,
	gr.update(value=None, visible=False),
	gr.update(value=None, visible=False),
	gr.update(value=None, visible=False),
	"",
	default_video_summary(),
	DEFAULT_TARGET_FRAMES,
	gr.update(value="Auto"),
	)


	def update_target_settings(video_path: str \| None, target_mode: str) -> tuple[object, object, str, int]:
	has_video = bool(video_path)
	generate_update = gr.update(interactive=has_video, value="Build COLMAP Archive")
	clear_update = gr.update(interactive=has_video)

	if not video_path:
	if target_mode != "Auto":
	manual_frames = int(target_mode)
	return (
	generate_update,
	clear_update,
	manual_video_summary(manual_frames),
	manual_frames,
	)
	return (
	generate_update,
	clear_update,
	default_video_summary(),
	DEFAULT_TARGET_FRAMES,
	)

	try:
	metadata = read_video_metadata(Path(video_path))
	if target_mode == "Auto":
	target_frames = infer_target_frames(metadata)
	summary = video_summary_text(metadata, target_frames)
	else:
	target_frames = int(target_mode)
	summary = manual_video_summary(target_frames, metadata)
	except Exception:
	if target_mode == "Auto":
	target_frames = DEFAULT_TARGET_FRAMES
	summary = fallback_video_summary()
	else:
	target_frames = int(target_mode)
	summary = manual_video_summary(target_frames)

	return (
	generate_update,
	clear_update,
	summary,
	target_frames,
	)


	def build_demo() -> gr.Blocks:
	with gr.Blocks(
	css=CSS,
	title="Video to COLMAP for tttLRM",
	theme=gr.themes.Origin(),
	) as demo:
	gr.Markdown("## Video to COLMAP for tttLRM")
	gr.Markdown(
	"Upload a single video. The Space will pick sharp overlapping keyframes, run COLMAP, and export a raw scene archive ready for the `tttLRM` Space."
	)

	with gr.Row(elem_id="main-row", equal_height=True):
	with gr.Column(scale=3, min_width=320, elem_id="controls-panel"):
	video_in = gr.File(
	label="Input Video",
	type="filepath",
	file_types=[".mp4", ".mov", ".webm", ".mkv", ".avi"],
	)
	target_frames_state = gr.State(value=DEFAULT_TARGET_FRAMES)
	target_mode = gr.Dropdown(
	label="Max Angles / Keyframes",
	choices=TARGET_FRAME_CHOICES,
	value="Auto",
	)
	video_summary = gr.Markdown(default_video_summary(), elem_id="video-summary")
	sampling_profile = gr.Dropdown(
	label="Sampling Profile",
	choices=["balanced", "dense", "sparse"],
	value="balanced",
	)
	max_edge = gr.Dropdown(
	label="Max Frame Edge",
	choices=["960", "1280", "1600"],
	value="1280",
	)
	with gr.Row():
	generate_btn = gr.Button("Build COLMAP Archive", variant="primary", interactive=False)
	clear_btn = gr.Button("Clear", interactive=False)
	archive_download = gr.File(label="Download Raw COLMAP Archive", visible=False)
	report_download = gr.File(label="Download Reconstruction Report", visible=False)
	status_text = gr.Markdown(elem_id="status-text")

	with gr.Column(scale=7, min_width=520):
	preview_html = gr.HTML(
	value=preview_placeholder_html(
	"Keyframe Selection Preview",
	"After conversion, the selected frames contact sheet will appear here so you can check overlap and viewpoint coverage.",
	),
	elem_id="preview-panel",
	)
	contact_sheet = gr.Image(label="Selected Keyframes", visible=False, type="filepath")

	video_in.change(
	update_target_settings,
	inputs=[video_in, target_mode],
	outputs=[generate_btn, clear_btn, video_summary, target_frames_state],
	queue=False,
	)
	target_mode.change(
	update_target_settings,
	inputs=[video_in, target_mode],
	outputs=[generate_btn, clear_btn, video_summary, target_frames_state],
	queue=False,
	)
	generate_btn.click(
	start_generation,
	inputs=[target_frames_state],
	outputs=[generate_btn, clear_btn, preview_html],
	queue=False,
	).then(
	run_conversion,
	inputs=[video_in, target_frames_state, sampling_profile, max_edge],
	outputs=[archive_download, report_download, contact_sheet, status_text],
	).then(
	lambda: (
	gr.update(interactive=True, value="Build COLMAP Archive"),
	gr.update(interactive=True),
	preview_placeholder_html(
	"Keyframe Selection Complete",
	"Review the contact sheet below and download the raw COLMAP archive for the `tttLRM` Space.",
	),
	),
	outputs=[generate_btn, clear_btn, preview_html],
	queue=False,
	)
	clear_btn.click(
	clear_all,
	outputs=[video_in, archive_download, report_download, contact_sheet, status_text, video_summary, target_frames_state, target_mode],
	queue=False,
	).then(
	lambda: (
	gr.update(interactive=False),
	gr.update(interactive=False),
	preview_placeholder_html(
	"Keyframe Selection Preview",
	"After conversion, the selected frames contact sheet will appear here so you can check overlap and viewpoint coverage.",
	),
	),
	outputs=[generate_btn, clear_btn, preview_html],
	queue=False,
	)

	demo.queue(max_size=4)
	return demo


	if __name__ == "__main__":
	build_demo().launch()