import os
import sys
import gc
import json
import base64
import time
from io import BytesIO
import gradio as gr
import torch
import spaces
from PIL import Image, ImageDraw
from transformers import AutoModel, AutoProcessor, GenerationConfig
from huggingface_hub import snapshot_download
ACCENT = "#FF0000"
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("CUDA_VISIBLE_DEVICES=", os.environ.get("CUDA_VISIBLE_DEVICES"))
print("torch.__version__ =", torch.__version__)
print("torch.version.cuda =", torch.version.cuda)
print("cuda available:", torch.cuda.is_available())
print("cuda device count:", torch.cuda.device_count())
if torch.cuda.is_available():
print("current device:", torch.cuda.current_device())
print("device name:", torch.cuda.get_device_name(torch.cuda.current_device()))
print("Using device:", device)
print("Downloading model snapshot to ensure all scripts are present...")
model_dir = snapshot_download(repo_id="nvidia/NVIDIA-Nemotron-Parse-v1.1")
print(f"Model downloaded to: {model_dir}")
sys.path.append(model_dir)
try:
from postprocessing import extract_classes_bboxes, transform_bbox_to_original, postprocess_text
print("Successfully imported postprocessing functions.")
except ImportError as e:
print(f"Error importing postprocessing: {e}")
raise e
print("Loading Model components...")
processor = AutoProcessor.from_pretrained(model_dir, trust_remote_code=True)
model = AutoModel.from_pretrained(
model_dir,
trust_remote_code=True,
torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32
).to(device).eval()
try:
generation_config = GenerationConfig.from_pretrained(model_dir, trust_remote_code=True)
except Exception as e:
print(f"Warning: Could not load GenerationConfig: {e}. Using default.")
generation_config = GenerationConfig(max_new_tokens=4096)
print("Model loaded successfully.")
image_examples = [
{"image": "examples/1.jpg"},
{"image": "examples/2.jpg"},
{"image": "examples/3.jpg"},
{"image": "examples/4.jpg"},
{"image": "examples/5.jpg"},
]
def pil_to_data_url(img: Image.Image, fmt="PNG"):
buf = BytesIO()
img.save(buf, format=fmt)
data = base64.b64encode(buf.getvalue()).decode()
mime = "image/png" if fmt.upper() == "PNG" else "image/jpeg"
return f"data:{mime};base64,{data}"
def file_to_data_url(path):
if not os.path.exists(path):
return ""
ext = path.rsplit(".", 1)[-1].lower()
mime = {
"jpg": "image/jpeg",
"jpeg": "image/jpeg",
"png": "image/png",
"webp": "image/webp",
}.get(ext, "image/jpeg")
with open(path, "rb") as f:
data = base64.b64encode(f.read()).decode()
return f"data:{mime};base64,{data}"
def make_thumb_b64(path, max_dim=240):
try:
img = Image.open(path).convert("RGB")
img.thumbnail((max_dim, max_dim))
return pil_to_data_url(img, "JPEG")
except Exception as e:
print("Thumbnail error:", e)
return ""
def b64_to_pil(b64_str):
if not b64_str:
return None
try:
if b64_str.startswith("data:"):
_, data = b64_str.split(",", 1)
else:
data = b64_str
image_data = base64.b64decode(data)
return Image.open(BytesIO(image_data)).convert("RGB")
except Exception:
return None
def build_example_cards_html():
cards = ""
for i, ex in enumerate(image_examples):
thumb = make_thumb_b64(ex["image"])
cards += f"""
{"

" if thumb else "
Preview
"}
Nemotron Parse
{os.path.basename(ex["image"])}
"""
return cards
EXAMPLE_CARDS_HTML = build_example_cards_html()
def load_example_data(idx_str):
try:
idx = int(str(idx_str).strip())
except Exception:
return gr.update(value=json.dumps({"status": "error", "message": "Invalid example index"}))
if idx < 0 or idx >= len(image_examples):
return gr.update(value=json.dumps({"status": "error", "message": "Example index out of range"}))
ex = image_examples[idx]
img_b64 = file_to_data_url(ex["image"])
if not img_b64:
return gr.update(value=json.dumps({"status": "error", "message": "Could not load example image"}))
return gr.update(value=json.dumps({
"status": "ok",
"image": img_b64,
"name": os.path.basename(ex["image"]),
}))
def calc_timeout_process(*args, **kwargs):
gpu_timeout = kwargs.get("gpu_timeout", None)
if gpu_timeout is None and args:
gpu_timeout = args[-1]
try:
return int(gpu_timeout)
except Exception:
return 60
@spaces.GPU(duration=calc_timeout_process)
def process_ocr_task_stream(image, gpu_timeout=60):
try:
if image is None:
yield json.dumps({"status": "error", "text": "[ERROR] Please upload an image first.", "annotated": ""})
return
task_prompt = ""
inputs = processor(images=[image], text=task_prompt, return_tensors="pt").to(device)
if device.type == "cuda":
inputs = {
k: v.to(torch.bfloat16) if hasattr(v, "dtype") and v.dtype == torch.float32 else v
for k, v in inputs.items()
}
with torch.no_grad():
outputs = model.generate(
**inputs,
generation_config=generation_config
)
generated_text = processor.batch_decode(outputs, skip_special_tokens=True)[0]
try:
classes, bboxes, texts = extract_classes_bboxes(generated_text)
except Exception as e:
print(f"Error extracting boxes: {e}")
fallback_img = pil_to_data_url(image, "JPEG")
yield json.dumps({"status": "done", "text": generated_text, "annotated": fallback_img})
return
bboxes = [transform_bbox_to_original(bbox, image.width, image.height) for bbox in bboxes]
table_format = "latex"
text_format = "markdown"
blank_text_in_figures = False
processed_texts = [
postprocess_text(
text,
cls=cls,
table_format=table_format,
text_format=text_format,
blank_text_in_figures=blank_text_in_figures
)
for text, cls in zip(texts, classes)
]
result_image = image.copy()
draw = ImageDraw.Draw(result_image)
color_map = {
"Table": "red",
"Figure": "blue",
"Text": "green",
"Title": "purple"
}
final_output_text = ""
for cls, bbox, txt in zip(classes, bboxes, processed_texts):
x1, y1, x2, y2 = bbox
xmin = min(x1, x2)
ymin = min(y1, y2)
xmax = max(x1, x2)
ymax = max(y1, y2)
color = color_map.get(cls, "red")
draw.rectangle([xmin, ymin, xmax, ymax], outline=color, width=3)
if cls == "Table":
final_output_text += f"\n\n--- [Table] ---\n{txt}\n-----------------\n"
elif cls == "Figure":
final_output_text += f"\n\n--- [Figure] ---\n(Figure Detected)\n-----------------\n"
else:
final_output_text += f"{txt}\n"
if not final_output_text.strip() and generated_text:
final_output_text = generated_text
annotated_b64 = pil_to_data_url(result_image, "JPEG")
yield json.dumps({
"status": "done",
"text": final_output_text,
"annotated": annotated_b64
})
except Exception as e:
yield json.dumps({"status": "error", "text": f"[ERROR] {str(e)}", "annotated": ""})
finally:
gc.collect()
if torch.cuda.is_available():
torch.cuda.empty_cache()
def run_ocr(image_b64, gpu_timeout_v):
try:
image = b64_to_pil(image_b64)
yield from process_ocr_task_stream(image=image, gpu_timeout=gpu_timeout_v)
except Exception as e:
yield json.dumps({"status": "error", "text": f"[ERROR] {str(e)}", "annotated": ""})
def noop():
return None
CLOUD_SVG = """
"""
UPLOAD_PREVIEW_SVG = f"""
"""
ANNOTATION_PLACEHOLDER_SVG = f"""
"""
PLAY_SVG = """"""
COPY_SVG = f""""""
SAVE_SVG = f""""""
css = f"""
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800&family=JetBrains+Mono:wght@400;500;600&display=swap');
*{{box-sizing:border-box;margin:0;padding:0}}
html,body{{height:100%;overflow-x:hidden}}
body,.gradio-container{{
background:#0f0f13!important;
font-family:'Inter',system-ui,-apple-system,sans-serif!important;
font-size:14px!important;color:#e4e4e7!important;min-height:100vh;overflow-x:hidden;
}}
.dark body,.dark .gradio-container{{background:#0f0f13!important;color:#e4e4e7!important}}
footer{{display:none!important}}
.hidden-input{{display:none!important;height:0!important;overflow:hidden!important;margin:0!important;padding:0!important}}
#gradio-run-btn,#example-load-btn{{
position:absolute!important;left:-9999px!important;top:-9999px!important;
width:1px!important;height:1px!important;opacity:0.01!important;
pointer-events:none!important;overflow:hidden!important;
}}
.app-shell{{
background:#18181b;border:1px solid #27272a;border-radius:16px;
margin:12px auto;max-width:1400px;overflow:hidden;
box-shadow:0 25px 50px -12px rgba(0,0,0,.6),0 0 0 1px rgba(255,255,255,.03);
}}
.app-header{{
background:linear-gradient(135deg,#18181b,#1e1e24);border-bottom:1px solid #27272a;
padding:14px 24px;display:flex;align-items:center;justify-content:space-between;flex-wrap:wrap;gap:12px;
}}
.app-header-left{{display:flex;align-items:center;gap:12px}}
.app-logo{{
width:38px;height:38px;background:linear-gradient(135deg,{ACCENT},#ff5555,#ff9999);
border-radius:10px;display:flex;align-items:center;justify-content:center;
box-shadow:0 4px 12px rgba(255,0,0,.30);
}}
.app-logo svg{{width:22px;height:22px;fill:#fff;flex-shrink:0}}
.app-title{{
font-size:18px;font-weight:700;background:linear-gradient(135deg,#f5f5f5,#cfc8ff);
-webkit-background-clip:text;-webkit-text-fill-color:transparent;letter-spacing:-.3px;
}}
.app-badge{{
font-size:11px;font-weight:600;padding:3px 10px;border-radius:20px;
background:rgba(255,0,0,.10);color:#ffb3b3;border:1px solid rgba(255,0,0,.24);letter-spacing:.3px;
}}
.app-badge.fast{{background:rgba(255,0,0,.08);color:#ff9d9d;border:1px solid rgba(255,0,0,.20)}}
.app-main-row{{display:flex;gap:0;flex:1;overflow:hidden}}
.app-main-left{{flex:1;display:flex;flex-direction:column;min-width:0;border-right:1px solid #27272a}}
.app-main-right{{width:520px;display:flex;flex-direction:column;flex-shrink:0;background:#18181b}}
#image-drop-zone{{
position:relative;background:#09090b;height:430px;min-height:430px;max-height:430px;overflow:hidden;
}}
#image-drop-zone.drag-over{{outline:2px solid {ACCENT};outline-offset:-2px;background:rgba(255,0,0,.04)}}
.upload-prompt-modern{{position:absolute;inset:0;display:flex;align-items:center;justify-content:center;padding:20px;z-index:20;overflow:hidden}}
.upload-click-area{{
display:flex;flex-direction:column;align-items:center;justify-content:center;cursor:pointer;padding:28px 36px;
max-width:92%;max-height:92%;border:2px dashed #3f3f46;border-radius:16px;background:rgba(255,0,0,.03);
transition:all .2s ease;gap:8px;text-align:center;overflow:hidden;
}}
.upload-click-area:hover{{background:rgba(255,0,0,.08);border-color:{ACCENT};transform:scale(1.02)}}
.upload-click-area:active{{background:rgba(255,0,0,.12);transform:scale(.99)}}
.upload-click-area svg{{width:86px;height:86px;max-width:100%;flex-shrink:0}}
.upload-main-text{{color:#a1a1aa;font-size:14px;font-weight:600;margin-top:4px}}
.upload-sub-text{{color:#71717a;font-size:12px}}
.single-preview-wrap{{width:100%;height:100%;display:none;align-items:center;justify-content:center;padding:16px;overflow:hidden}}
.single-preview-card{{
width:100%;height:100%;max-width:100%;max-height:100%;border-radius:14px;overflow:hidden;border:1px solid #27272a;
background:#111114;display:flex;align-items:center;justify-content:center;position:relative;
}}
.single-preview-card img{{width:100%;height:100%;max-width:100%;max-height:100%;object-fit:contain;display:block}}
.preview-overlay-actions{{position:absolute;top:12px;right:12px;display:flex;gap:8px;z-index:5}}
.preview-action-btn{{
display:inline-flex;align-items:center;justify-content:center;min-width:34px;height:34px;padding:0 12px;background:rgba(0,0,0,.65);
border:1px solid rgba(255,255,255,.14);border-radius:10px;cursor:pointer;color:#fff!important;font-size:12px;font-weight:600;transition:all .15s ease;
}}
.preview-action-btn:hover{{background:{ACCENT};border-color:{ACCENT};color:#fff!important}}
.hint-bar{{
background:rgba(255,0,0,.05);border-top:1px solid #27272a;border-bottom:1px solid #27272a;padding:10px 20px;
font-size:13px;color:#a1a1aa;line-height:1.7;
}}
.hint-bar b{{color:#ffb3b3;font-weight:600}}
.hint-bar kbd{{
display:inline-block;padding:1px 6px;background:#27272a;border:1px solid #3f3f46;border-radius:4px;
font-family:'JetBrains Mono',monospace;font-size:11px;color:#a1a1aa;
}}
.examples-section{{border-top:1px solid #27272a;padding:12px 16px}}
.examples-title{{font-size:12px;font-weight:600;color:#71717a;text-transform:uppercase;letter-spacing:.8px;margin-bottom:10px}}
.examples-scroll{{display:flex;gap:10px;overflow-x:auto;padding-bottom:8px}}
.example-card{{flex-shrink:0;width:220px;background:#09090b;border:1px solid #27272a;border-radius:10px;overflow:hidden;cursor:pointer;transition:all .2s ease}}
.example-card:hover{{border-color:{ACCENT};transform:translateY(-2px);box-shadow:0 4px 12px rgba(255,0,0,.14)}}
.example-card.loading{{opacity:.5;pointer-events:none}}
.example-thumb-wrap{{height:120px;overflow:hidden;background:#18181b}}
.example-thumb-wrap img{{width:100%;height:100%;object-fit:cover}}
.example-thumb-placeholder{{width:100%;height:100%;display:flex;align-items:center;justify-content:center;background:#18181b;color:#3f3f46;font-size:11px}}
.example-meta-row{{padding:6px 10px;display:flex;align-items:center;gap:6px}}
.example-badge{{display:inline-flex;padding:2px 7px;background:rgba(255,0,0,.12);border-radius:4px;font-size:10px;font-weight:600;color:#ffb3b3;font-family:'JetBrains Mono',monospace;white-space:nowrap}}
.example-prompt-text{{padding:0 10px 8px;font-size:11px;color:#a1a1aa;line-height:1.4;display:-webkit-box;-webkit-line-clamp:2;-webkit-box-orient:vertical;overflow:hidden}}
.process-action-wrap{{
padding:14px 20px 12px;
}}
.process-card{{
position:relative;
border:1px solid #2d1a1a;
border-radius:16px;
background:
radial-gradient(circle at top left, rgba(255,0,0,.12), transparent 35%),
linear-gradient(180deg, #17171c 0%, #121217 100%);
overflow:hidden;
box-shadow:0 10px 30px rgba(0,0,0,.28), inset 0 1px 0 rgba(255,255,255,.03);
}}
.process-card::before{{
content:"";
position:absolute;
inset:0;
background:linear-gradient(90deg, transparent, rgba(255,255,255,.03), transparent);
pointer-events:none;
}}
.process-card-top{{
display:flex;
align-items:center;
justify-content:space-between;
gap:10px;
padding:12px 14px 0;
}}
.process-chip{{
display:inline-flex;
align-items:center;
gap:6px;
height:24px;
padding:0 10px;
border-radius:999px;
background:rgba(255,0,0,.10);
border:1px solid rgba(255,0,0,.18);
color:#ffb3b3;
font-size:11px;
font-weight:700;
letter-spacing:.3px;
text-transform:uppercase;
}}
.process-mini-status{{
color:#6b7280;
font-size:11px;
font-family:'JetBrains Mono',monospace;
}}
.btn-run-hero{{
width:100%;
display:flex;
align-items:center;
justify-content:center;
gap:10px;
background:
linear-gradient(135deg, #ff4d4d 0%, {ACCENT} 45%, #d70000 100%);
border:none;
border-top:1px solid rgba(255,255,255,.12);
padding:18px 22px;
cursor:pointer;
font-size:16px;
font-weight:800;
font-family:'Inter',sans-serif;
color:#ffffff!important;
-webkit-text-fill-color:#ffffff!important;
letter-spacing:-.2px;
transition:all .2s ease;
box-shadow:inset 0 1px 0 rgba(255,255,255,.12);
}}
.btn-run-hero:hover{{
background:
linear-gradient(135deg, #ff6b6b 0%, #ff1f1f 45%, {ACCENT} 100%);
transform:translateY(-1px);
box-shadow:0 10px 24px rgba(255,0,0,.22), inset 0 1px 0 rgba(255,255,255,.14);
}}
.btn-run-hero:active{{
transform:translateY(0);
box-shadow:0 4px 12px rgba(255,0,0,.18), inset 0 1px 0 rgba(255,255,255,.10);
}}
.btn-run-hero .hero-icon-wrap{{
width:34px;
height:34px;
border-radius:10px;
display:inline-flex;
align-items:center;
justify-content:center;
background:rgba(255,255,255,.12);
border:1px solid rgba(255,255,255,.18);
flex-shrink:0;
}}
.btn-run-hero .hero-icon-wrap svg{{
width:18px;
height:18px;
fill:#fff!important;
}}
.btn-run-hero .hero-copy{{
display:flex;
flex-direction:column;
align-items:flex-start;
line-height:1.1;
}}
.btn-run-hero .hero-title{{
font-size:16px;
font-weight:800;
color:#fff!important;
}}
.btn-run-hero .hero-sub{{
font-size:11px;
font-weight:600;
opacity:.88;
color:#ffe3e3!important;
margin-top:3px;
}}
#custom-run-btn,#custom-run-btn *,#run-btn-label,.btn-run-hero,.btn-run-hero *{{
color:#ffffff!important;
-webkit-text-fill-color:#ffffff!important;
fill:#ffffff!important;
}}
.annot-frame{{border-bottom:1px solid #27272a;display:flex;flex-direction:column;position:relative}}
.annot-title{{padding:10px 20px;font-size:13px;font-weight:700;text-transform:uppercase;letter-spacing:.8px;border-bottom:1px solid rgba(39,39,42,.6);color:#fff}}
.annot-body{{background:#09090b;height:320px;display:flex;align-items:center;justify-content:center;padding:12px;position:relative;overflow:hidden}}
.annot-body img{{max-width:100%;max-height:100%;object-fit:contain;border:1px solid #27272a;border-radius:10px;background:#111114;display:none;position:relative;z-index:2}}
.annot-placeholder{{position:absolute;inset:0;display:flex;flex-direction:column;align-items:center;justify-content:center;gap:10px;color:#666;z-index:1;padding:16px;text-align:center}}
.annot-placeholder svg{{width:92px;height:92px;max-width:100%;opacity:.95}}
.annot-placeholder-title{{font-size:13px;font-weight:600;color:#ffb3b3}}
.annot-placeholder-sub{{font-size:12px;color:#666;max-width:260px;line-height:1.5}}
.output-frame{{border-bottom:1px solid #27272a;display:flex;flex-direction:column;position:relative}}
.output-frame .out-title,.output-frame .out-title *,#output-title-label{{color:#ffffff!important;-webkit-text-fill-color:#ffffff!important}}
.output-frame .out-title{{
padding:10px 20px;font-size:13px;font-weight:700;text-transform:uppercase;letter-spacing:.8px;border-bottom:1px solid rgba(39,39,42,.6);
display:flex;align-items:center;justify-content:space-between;gap:8px;flex-wrap:wrap;
}}
.out-title-right{{display:flex;gap:8px;align-items:center}}
.out-action-btn{{
display:inline-flex;align-items:center;justify-content:center;background:rgba(255,0,0,.10);border:1px solid rgba(255,0,0,.2);border-radius:6px;
cursor:pointer;padding:3px 10px;font-size:11px;font-weight:500;color:#ffb3b3!important;gap:4px;height:24px;transition:all .15s;
}}
.out-action-btn:hover{{background:rgba(255,0,0,.2);border-color:rgba(255,0,0,.35);color:#ffffff!important}}
.out-action-btn svg{{width:12px;height:12px;fill:{ACCENT}}}
.output-frame .out-body{{flex:1;background:#09090b;display:flex;align-items:stretch;justify-content:stretch;overflow:hidden;min-height:320px;position:relative}}
.output-scroll-wrap{{width:100%;height:100%;padding:0;overflow:hidden}}
.output-textarea{{
width:100%;height:320px;min-height:320px;max-height:320px;background:#09090b;color:#e4e4e7;border:none;outline:none;padding:16px 18px;
font-size:13px;line-height:1.6;font-family:'JetBrains Mono',monospace;overflow:auto;resize:none;white-space:pre-wrap;
}}
.output-textarea::placeholder{{color:#52525b}}
.modern-loader{{
display:none;position:absolute;top:0;left:0;right:0;bottom:0;background:rgba(9,9,11,.92);z-index:15;flex-direction:column;
align-items:center;justify-content:center;gap:16px;backdrop-filter:blur(4px);
}}
.modern-loader.active{{display:flex}}
.modern-loader .loader-spinner{{width:36px;height:36px;border:3px solid #27272a;border-top-color:{ACCENT};border-radius:50%;animation:spin .8s linear infinite}}
@keyframes spin{{to{{transform:rotate(360deg)}}}}
.modern-loader .loader-text{{font-size:13px;color:#a1a1aa;font-weight:500}}
.loader-bar-track{{width:200px;height:4px;background:#27272a;border-radius:2px;overflow:hidden}}
.loader-bar-fill{{height:100%;background:linear-gradient(90deg,{ACCENT},#ff7777,{ACCENT});background-size:200% 100%;animation:shimmer 1.5s ease-in-out infinite;border-radius:2px}}
@keyframes shimmer{{0%{{background-position:200% 0}}100%{{background-position:-200% 0}}}}
.settings-group{{border:1px solid #27272a;border-radius:10px;margin:12px 16px;padding:0;overflow:hidden}}
.settings-group-title{{font-size:12px;font-weight:600;color:#71717a;text-transform:uppercase;letter-spacing:.8px;padding:10px 16px;border-bottom:1px solid #27272a;background:rgba(24,24,27,.5)}}
.settings-group-body{{padding:14px 16px;display:flex;flex-direction:column;gap:12px}}
.slider-row{{display:flex;align-items:center;gap:10px;min-height:28px}}
.slider-row label{{font-size:13px;font-weight:500;color:#a1a1aa;min-width:118px;flex-shrink:0}}
.slider-row input[type="range"]{{flex:1;-webkit-appearance:none;appearance:none;height:6px;background:#27272a;border-radius:3px;outline:none;min-width:0}}
.slider-row input[type="range"]::-webkit-slider-thumb{{-webkit-appearance:none;width:16px;height:16px;background:linear-gradient(135deg,{ACCENT},#d80000);border-radius:50%;cursor:pointer;box-shadow:0 2px 6px rgba(255,0,0,.35);transition:transform .15s}}
.slider-row input[type="range"]::-webkit-slider-thumb:hover{{transform:scale(1.2)}}
.slider-row .slider-val{{min-width:58px;text-align:right;font-family:'JetBrains Mono',monospace;font-size:12px;font-weight:500;padding:3px 8px;background:#09090b;border:1px solid #27272a;border-radius:6px;color:#a1a1aa;flex-shrink:0}}
.app-statusbar{{background:#18181b;border-top:1px solid #27272a;padding:6px 20px;display:flex;gap:12px;height:34px;align-items:center;font-size:12px}}
.app-statusbar .sb-section{{padding:0 12px;flex:1;display:flex;align-items:center;font-family:'JetBrains Mono',monospace;font-size:12px;color:#52525b;overflow:hidden;white-space:nowrap}}
.app-statusbar .sb-section.sb-fixed{{flex:0 0 auto;min-width:110px;text-align:center;justify-content:center;padding:3px 12px;background:rgba(255,0,0,.08);border-radius:6px;color:#ffb3b3;font-weight:500}}
.exp-note{{padding:10px 20px;font-size:12px;color:#52525b;border-top:1px solid #27272a;text-align:center}}
.exp-note a{{color:#ffb3b3;text-decoration:none}}
.exp-note a:hover{{text-decoration:underline}}
.toast-notification{{
position:fixed;top:24px;left:50%;transform:translateX(-50%) translateY(-120%);z-index:9999;padding:10px 24px;
border-radius:10px;font-family:'Inter',sans-serif;font-size:14px;font-weight:600;display:flex;align-items:center;
gap:8px;box-shadow:0 8px 24px rgba(0,0,0,.5);transition:transform .35s cubic-bezier(.34,1.56,.64,1),opacity .35s ease;opacity:0;pointer-events:none;
}}
.toast-notification.visible{{transform:translateX(-50%) translateY(0);opacity:1;pointer-events:auto}}
.toast-notification.error{{background:linear-gradient(135deg,#dc2626,#b91c1c);color:#fff;border:1px solid rgba(255,255,255,.15)}}
.toast-notification.warning{{background:linear-gradient(135deg,{ACCENT},#b91c1c);color:#fff;border:1px solid rgba(255,255,255,.15)}}
.toast-notification.info{{background:linear-gradient(135deg,#ff6b6b,{ACCENT});color:#fff;border:1px solid rgba(255,255,255,.15)}}
::-webkit-scrollbar{{width:8px;height:8px}}
::-webkit-scrollbar-track{{background:#09090b}}
::-webkit-scrollbar-thumb{{background:#27272a;border-radius:4px}}
::-webkit-scrollbar-thumb:hover{{background:#3f3f46}}
@media(max-width:980px){{
.app-main-row{{flex-direction:column}}
.app-main-right{{width:100%}}
.app-main-left{{border-right:none;border-bottom:1px solid #27272a}}
}}
"""
gallery_js = r"""
() => {
function init() {
if (window.__nemotronInitDone) return;
const dropZone = document.getElementById('image-drop-zone');
const uploadPrompt = document.getElementById('upload-prompt');
const uploadClick = document.getElementById('upload-click-area');
const fileInput = document.getElementById('custom-file-input');
const previewWrap = document.getElementById('single-preview-wrap');
const previewImg = document.getElementById('single-preview-img');
const btnUpload = document.getElementById('preview-upload-btn');
const btnClear = document.getElementById('preview-clear-btn');
const runBtnEl = document.getElementById('custom-run-btn');
const outputArea = document.getElementById('custom-output-textarea');
const annotImg = document.getElementById('annotated-output-img');
const annotPlaceholder = document.getElementById('annotated-output-placeholder');
const imgStatus = document.getElementById('sb-image-status');
if (!dropZone || !fileInput || !previewWrap || !previewImg) {
setTimeout(init, 250);
return;
}
window.__nemotronInitDone = true;
let imageState = null;
let toastTimer = null;
let lastSeenExamplePayload = null;
function showToast(message, type) {
let toast = document.getElementById('app-toast');
if (!toast) {
toast = document.createElement('div');
toast.id = 'app-toast';
toast.className = 'toast-notification';
toast.innerHTML = '';
document.body.appendChild(toast);
}
const icon = toast.querySelector('.toast-icon');
const text = toast.querySelector('.toast-text');
toast.className = 'toast-notification ' + (type || 'error');
if (type === 'warning') icon.textContent = '\u26A0';
else if (type === 'info') icon.textContent = '\u2139';
else icon.textContent = '\u2717';
text.textContent = message;
if (toastTimer) clearTimeout(toastTimer);
void toast.offsetWidth;
toast.classList.add('visible');
toastTimer = setTimeout(() => toast.classList.remove('visible'), 3500);
}
function showLoader() {
const l = document.getElementById('output-loader');
if (l) l.classList.add('active');
const sb = document.getElementById('sb-run-state');
if (sb) sb.textContent = 'Processing...';
}
function hideLoader() {
const l = document.getElementById('output-loader');
if (l) l.classList.remove('active');
const sb = document.getElementById('sb-run-state');
if (sb) sb.textContent = 'Done';
}
function setRunErrorState() {
const l = document.getElementById('output-loader');
if (l) l.classList.remove('active');
const sb = document.getElementById('sb-run-state');
if (sb) sb.textContent = 'Error';
}
window.__showToast = showToast;
window.__showLoader = showLoader;
window.__hideLoader = hideLoader;
window.__setRunErrorState = setRunErrorState;
function setGradioValue(containerId, value) {
const container = document.getElementById(containerId);
if (!container) return false;
const el = container.querySelector('textarea, input');
if (!el) return false;
const proto = el.tagName === 'TEXTAREA' ? HTMLTextAreaElement.prototype : HTMLInputElement.prototype;
const ns = Object.getOwnPropertyDescriptor(proto, 'value');
if (ns && ns.set) {
ns.set.call(el, value);
el.dispatchEvent(new Event('input', {bubbles:true, composed:true}));
el.dispatchEvent(new Event('change', {bubbles:true, composed:true}));
return true;
}
return false;
}
function getValueFromContainer(containerId) {
const container = document.getElementById(containerId);
if (!container) return '';
const el = container.querySelector('textarea, input');
return el ? (el.value || '') : '';
}
function syncImageToGradio() {
setGradioValue('hidden-image-b64', imageState ? imageState.b64 : '');
if (imgStatus) imgStatus.textContent = imageState ? '1 image uploaded' : 'No image uploaded';
}
function updateAnnotationState(src) {
if (!annotImg || !annotPlaceholder) return;
if (src) {
annotImg.src = src;
annotImg.style.display = 'block';
annotPlaceholder.style.display = 'none';
} else {
annotImg.src = '';
annotImg.style.display = 'none';
annotPlaceholder.style.display = 'flex';
}
}
function setPreview(b64, name) {
imageState = {b64, name: name || 'image'};
previewImg.src = b64;
previewWrap.style.display = 'flex';
if (uploadPrompt) uploadPrompt.style.display = 'none';
syncImageToGradio();
}
function clearPreview() {
imageState = null;
previewImg.src = '';
previewWrap.style.display = 'none';
if (uploadPrompt) uploadPrompt.style.display = 'flex';
syncImageToGradio();
updateAnnotationState('');
}
window.__setPreview = setPreview;
window.__clearPreview = clearPreview;
window.__updateAnnotationState = updateAnnotationState;
function processFile(file) {
if (!file) return;
if (!file.type.startsWith('image/')) {
showToast('Only image files are supported', 'error');
return;
}
const reader = new FileReader();
reader.onload = (e) => setPreview(e.target.result, file.name);
reader.readAsDataURL(file);
}
fileInput.addEventListener('change', (e) => {
const file = e.target.files && e.target.files[0] ? e.target.files[0] : null;
if (file) processFile(file);
e.target.value = '';
});
if (uploadClick) uploadClick.addEventListener('click', () => fileInput.click());
if (btnUpload) btnUpload.addEventListener('click', () => fileInput.click());
if (btnClear) btnClear.addEventListener('click', clearPreview);
dropZone.addEventListener('dragover', (e) => {
e.preventDefault();
dropZone.classList.add('drag-over');
});
dropZone.addEventListener('dragleave', (e) => {
e.preventDefault();
dropZone.classList.remove('drag-over');
});
dropZone.addEventListener('drop', (e) => {
e.preventDefault();
dropZone.classList.remove('drag-over');
if (e.dataTransfer.files && e.dataTransfer.files.length) processFile(e.dataTransfer.files[0]);
});
function syncSlider(customId, gradioId) {
const slider = document.getElementById(customId);
const valSpan = document.getElementById(customId + '-val');
if (!slider) return;
slider.addEventListener('input', () => {
if (valSpan) valSpan.textContent = slider.value;
const container = document.getElementById(gradioId);
if (!container) return;
container.querySelectorAll('input[type="range"],input[type="number"]').forEach(el => {
const ns = Object.getOwnPropertyDescriptor(HTMLInputElement.prototype, 'value');
if (ns && ns.set) {
ns.set.call(el, slider.value);
el.dispatchEvent(new Event('input', {bubbles:true, composed:true}));
el.dispatchEvent(new Event('change', {bubbles:true, composed:true}));
}
});
});
}
syncSlider('custom-gpu-duration', 'gradio-gpu-duration');
window.__clickGradioRunBtn = function() {
if (!imageState) {
showToast('Please upload an image first', 'error');
return;
}
if (outputArea) outputArea.value = '';
updateAnnotationState('');
showLoader();
setTimeout(() => {
const wrap = document.getElementById('gradio-run-btn');
const btn = wrap ? (wrap.querySelector('button') || wrap) : null;
if (!btn) {
setRunErrorState();
return;
}
btn.click();
}, 180);
};
if (runBtnEl) runBtnEl.addEventListener('click', () => window.__clickGradioRunBtn());
const copyBtn = document.getElementById('copy-output-btn');
if (copyBtn) {
copyBtn.addEventListener('click', async () => {
try {
const text = outputArea ? outputArea.value : '';
if (!text.trim()) return showToast('No output to copy', 'warning');
await navigator.clipboard.writeText(text);
showToast('Output copied to clipboard', 'info');
} catch(e) {
showToast('Copy failed', 'error');
}
});
}
const saveBtn = document.getElementById('save-output-btn');
if (saveBtn) {
saveBtn.addEventListener('click', () => {
const text = outputArea ? outputArea.value : '';
if (!text.trim()) return showToast('No output to save', 'warning');
const blob = new Blob([text], {type: 'text/plain;charset=utf-8'});
const a = document.createElement('a');
a.href = URL.createObjectURL(blob);
a.download = 'nemotron_parse_output.txt';
document.body.appendChild(a);
a.click();
setTimeout(() => {
URL.revokeObjectURL(a.href);
document.body.removeChild(a);
}, 200);
showToast('Output saved', 'info');
});
}
function applyExamplePayload(raw) {
try {
const data = JSON.parse(raw);
if (data.status === 'ok') {
if (data.image) setPreview(data.image, data.name || 'example.jpg');
document.querySelectorAll('.example-card.loading').forEach(c => c.classList.remove('loading'));
showToast('Example loaded', 'info');
} else {
document.querySelectorAll('.example-card.loading').forEach(c => c.classList.remove('loading'));
showToast(data.message || 'Failed to load example', 'error');
}
} catch(e) {
document.querySelectorAll('.example-card.loading').forEach(c => c.classList.remove('loading'));
}
}
function triggerExampleLoad(idx) {
setGradioValue('example-idx-input', String(idx));
setGradioValue('example-result-data', '');
const wrap = document.getElementById('example-load-btn');
const btn = wrap ? (wrap.querySelector('button') || wrap) : null;
if (btn) btn.click();
}
document.querySelectorAll('.example-card[data-idx]').forEach(card => {
card.addEventListener('click', () => {
const idx = card.getAttribute('data-idx');
document.querySelectorAll('.example-card.loading').forEach(c => c.classList.remove('loading'));
card.classList.add('loading');
showToast('Loading example...', 'info');
triggerExampleLoad(idx);
});
});
const observerTarget = document.getElementById('example-result-data');
if (observerTarget) {
const obs = new MutationObserver(() => {
const current = getValueFromContainer('example-result-data');
if (!current || current === lastSeenExamplePayload) return;
lastSeenExamplePayload = current;
applyExamplePayload(current);
});
obs.observe(observerTarget, {childList:true, subtree:true, characterData:true, attributes:true});
}
updateAnnotationState('');
if (outputArea) outputArea.value = '';
const sb = document.getElementById('sb-run-state');
if (sb) sb.textContent = 'Ready';
if (imgStatus) imgStatus.textContent = 'No image uploaded';
}
init();
}
"""
wire_outputs_js = r"""
() => {
function watchOutputs() {
const resultContainer = document.getElementById('gradio-result');
const outArea = document.getElementById('custom-output-textarea');
if (!resultContainer || !outArea) { setTimeout(watchOutputs, 500); return; }
let lastText = '';
function syncOutput() {
const el = resultContainer.querySelector('textarea') || resultContainer.querySelector('input');
if (!el) return;
const val = el.value || '';
if (val !== lastText) {
lastText = val;
try {
const data = JSON.parse(val);
outArea.value = data.text || '';
outArea.scrollTop = outArea.scrollHeight;
if (data.annotated && window.__updateAnnotationState) {
window.__updateAnnotationState(data.annotated);
}
if (data.status === 'error') {
if (window.__setRunErrorState) window.__setRunErrorState();
if (window.__showToast) window.__showToast('Inference failed', 'error');
} else if (data.status === 'done') {
if (window.__hideLoader) window.__hideLoader();
}
} catch (e) {
outArea.value = val;
outArea.scrollTop = outArea.scrollHeight;
}
}
}
const observer = new MutationObserver(syncOutput);
observer.observe(resultContainer, {childList:true, subtree:true, characterData:true, attributes:true});
setInterval(syncOutput, 500);
}
watchOutputs();
}
"""
with gr.Blocks() as demo:
hidden_image_b64 = gr.Textbox(value="", elem_id="hidden-image-b64", elem_classes="hidden-input", container=False)
gpu_duration_state = gr.Number(value=60, elem_id="gradio-gpu-duration", elem_classes="hidden-input", container=False)
result = gr.Textbox(value="", elem_id="gradio-result", elem_classes="hidden-input", container=False)
example_idx = gr.Textbox(value="", elem_id="example-idx-input", elem_classes="hidden-input", container=False)
example_result = gr.Textbox(value="", elem_id="example-result-data", elem_classes="hidden-input", container=False)
example_load_btn = gr.Button("Load Example", elem_id="example-load-btn")
gr.HTML(f"""
Upload: Click or drag to add a document image ·
Output: Parsed markdown / latex and layout detection ·
Clear removes the current image
Quick Examples
{EXAMPLE_CARDS_HTML}
Document Parse
OCR / Layout / Tables
Layout Detection
{ANNOTATION_PLACEHOLDER_SVG}
Annotated layout preview will appear here
Detected tables, figures, titles, and text regions will be visualized after processing.
Parsed Content (Markdown/LaTeX)
Layout-aware parsing for text, tables, figures, and document structure
""")
run_btn = gr.Button("Run", elem_id="gradio-run-btn")
demo.load(fn=noop, inputs=None, outputs=None, js=gallery_js)
demo.load(fn=noop, inputs=None, outputs=None, js=wire_outputs_js)
run_btn.click(
fn=run_ocr,
inputs=[hidden_image_b64, gpu_duration_state],
outputs=[result],
js=r"""(img, gd) => {
const imgContainer = document.getElementById('hidden-image-b64');
let imgVal = img;
if (imgContainer) {
const inner = imgContainer.querySelector('textarea, input');
if (inner) imgVal = inner.value;
}
return [imgVal, gd];
}"""
)
example_load_btn.click(
fn=load_example_data,
inputs=[example_idx],
outputs=[example_result],
queue=False,
)
if __name__ == "__main__":
demo.queue(max_size=30).launch(
css=css,
mcp_server=True,
ssr_mode=False,
show_error=True,
allowed_paths=["examples"],
)