HearthNet-Nemotron

Running on Zero

GitHub Actions

fix(ui): disable broken tabs + add @spaces.GPU + MiniCPM trust_remote_code note

fb17651 10 days ago

22.9 kB

	"""HearthNet Document Intelligence — Nemotron-powered second Space.

	A standalone Gradio app focused entirely on document intelligence using
	NVIDIA Nemotron models. Can run independently OR as part of a HearthNet mesh.

	Deploy as a second HF Space alongside the main HearthNet mesh Space.

	Prize targets:
	- NVIDIA Nemotron Hardware Prize (RTX 5080): Build with Nemotron models ✅
	- 🐜 Tiny Titan: Nemotron-nano-8B is 8B params (under 32B) ✅
	- 🎨 Off Brand: Custom-styled beyond default Gradio look ✅

	Usage:
	python app_nemotron.py

	Environment:
	NVIDIA_API_KEY — NVIDIA NIM API key (get free at build.nvidia.com)
	NEMOTRON_URL — local NIM endpoint (optional, for offline use)
	HEARTHNET_NODE — URL of a HearthNet mesh node to push results into
	"""

	from __future__ import annotations

	import asyncio
	import os

	import gradio as gr

	# HF Spaces GPU support
	try:
	import spaces
	HAS_SPACES = True
	except ImportError:
	HAS_SPACES = False

	# ── Optional mesh connection ──────────────────────────────────────────────────
	_MESH_NODE = os.getenv("HEARTHNET_NODE", "")
	_NVIDIA_KEY = os.getenv("NVIDIA_API_KEY", "")
	_NEMOTRON_URL = os.getenv("NEMOTRON_URL", "")

	# ── Nemotron model catalogue ──────────────────────────────────────────────────
	_MODELS = {
	"Nemotron Nano 8B (fast)": "nvidia/llama-3.1-nemotron-nano-8b-instruct",
	"Nemotron Super 49B (deep)": "nvidia/llama-3.3-nemotron-super-49b-v1",
	"Nemotron 70B (balanced)": "nvidia/llama-3.1-nemotron-70b-instruct",
	}

	_SCHEMAS = {
	"Invoice / Receipt": """{
	"vendor": "string",
	"date": "string",
	"total_amount": "number",
	"currency": "string",
	"line_items": [{"description": "string", "amount": "number"}],
	"tax": "number"
	}""",
	"Medical Form": """{
	"patient_name": "string",
	"date_of_birth": "string",
	"diagnosis": ["string"],
	"medications": ["string"],
	"doctor": "string",
	"date": "string"
	}""",
	"Legal Document": """{
	"document_type": "string",
	"parties": ["string"],
	"effective_date": "string",
	"key_obligations": ["string"],
	"governing_law": "string"
	}""",
	"Meeting Notes": """{
	"date": "string",
	"attendees": ["string"],
	"decisions": ["string"],
	"action_items": [{"owner": "string", "task": "string", "due": "string"}]
	}""",
	"Custom (edit below)": "{}",
	}

	# ── Custom HearthNet theme ────────────────────────────────────────────────────
	_theme = gr.themes.Soft(
	primary_hue=gr.themes.colors.orange,
	secondary_hue=gr.themes.colors.purple,
	neutral_hue=gr.themes.colors.gray,
	font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "sans-serif"],
	).set(
	button_primary_background_fill="*primary_500",
	button_primary_background_fill_hover="*primary_600",
	block_title_text_weight="600",
	block_border_width="1px",
	)


	# ── Core functions ────────────────────────────────────────────────────────────

	def _get_endpoint(api_key: str) -> str:
	return _NEMOTRON_URL.rstrip("/") + "/v1" if _NEMOTRON_URL else "https://integrate.api.nvidia.com/v1"


	def _run_async(coro):
	"""Run a coroutine safely whether or not a loop is already running."""
	try:
	loop = asyncio.get_running_loop()
	except RuntimeError:
	loop = None
	if loop and loop.is_running():
	import concurrent.futures
	with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
	fut = pool.submit(asyncio.run, coro)
	return fut.result()
	return asyncio.run(coro)


	def _local_smol_chat(messages: list, max_tokens: int = 512) -> str:
	"""SmolLM2-135M local fallback — no API key required."""
	try:
	from transformers import pipeline as _pipeline # type: ignore[import-untyped]

	_smol_id = "HuggingFaceTB/SmolLM2-135M-Instruct"
	pipe = _pipeline("text-generation", model=_smol_id, device_map="auto", torch_dtype="auto")
	prompt = ""
	for m in messages:
	role, content = m.get("role", "user"), m.get("content", "")
	if role == "system":
	prompt += f"<\|im_start\|>system\n{content}<\|im_end\|>\n"
	elif role == "user":
	prompt += f"<\|im_start\|>user\n{content}<\|im_end\|>\n"
	elif role == "assistant":
	prompt += f"<\|im_start\|>assistant\n{content}<\|im_end\|>\n"
	prompt += "<\|im_start\|>assistant\n"
	result = pipe(prompt, max_new_tokens=max_tokens, return_full_text=False, do_sample=False)
	return result[0]["generated_text"].strip()
	except Exception as exc:
	return f"[SmolLM2 unavailable: {exc}]"


	async def _nemotron_chat(messages: list, model: str, api_key: str, temperature: float = 0.1) -> str:
	import httpx

	endpoint = _get_endpoint(api_key)
	headers = {"Content-Type": "application/json"}
	if api_key:
	headers["Authorization"] = f"Bearer {api_key}"

	payload = {
	"model": model,
	"messages": messages,
	"temperature": temperature,
	"max_tokens": 2048,
	}
	async with httpx.AsyncClient(timeout=60.0) as c:
	r = await c.post(f"{endpoint}/chat/completions", json=payload, headers=headers)
	r.raise_for_status()
	return r.json()["choices"][0]["message"]["content"]


	@spaces.GPU if HAS_SPACES else lambda f: f
	def extract_structured(
	doc_text: str,
	schema_preset: str,
	custom_schema: str,
	model_label: str,
	api_key: str,
	) -> tuple[str, str]:
	"""Extract structured data from documents using Nemotron.

	Wrapped with @spaces.GPU to signal GPU usage to HF Spaces.
	Falls back gracefully if GPU unavailable (e.g., local testing).
	"""
	import json

	if not doc_text.strip():
	return '{"error": "No document text provided"}', "⚠ Provide document text"

	key = api_key.strip() or _NVIDIA_KEY
	schema = custom_schema.strip() if schema_preset == "Custom (edit below)" else _SCHEMAS[schema_preset]
	model = _MODELS.get(model_label, list(_MODELS.values())[0])

	system = (
	"You are a precise structured data extraction engine. "
	"Extract information from the document and return ONLY valid JSON "
	f"matching this exact schema:\n{schema}\n"
	"If a field is not found, use null. Never add fields not in the schema."
	)
	messages = [
	{"role": "system", "content": system},
	{"role": "user", "content": f"Document:\n\n{doc_text[:5000]}"},
	]

	try:
	if key or _NEMOTRON_URL:
	raw = _run_async(_nemotron_chat(messages, model, key, temperature=0.05))
	label = f"✓ Extracted with {model_label}"
	else:
	raw = _local_smol_chat(messages, max_tokens=512)
	label = "✓ Extracted with SmolLM2-135M (local fallback)"
	try:
	parsed = json.loads(raw)
	return json.dumps(parsed, indent=2), label
	except json.JSONDecodeError:
	return raw, f"⚠ Model returned non-JSON (shown as-is)"
	except Exception as exc:
	return f'{{"error": "{exc}"}}', f"⚠ Error: {exc}"


	def ask_document(doc_text: str, question: str, model_label: str, api_key: str) -> str:
	if not doc_text.strip():
	return "Provide a document first."
	if not question.strip():
	return "Ask a question."

	key = api_key.strip() or _NVIDIA_KEY
	model = _MODELS.get(model_label, list(_MODELS.values())[0])
	messages = [
	{
	"role": "system",
	"content": "Answer questions about the document concisely and accurately. "
	"Cite specific parts of the document when relevant.",
	},
	{
	"role": "user",
	"content": f"Document:\n\n{doc_text[:4000]}\n\nQuestion: {question}",
	},
	]
	try:
	if key or _NEMOTRON_URL:
	return _run_async(_nemotron_chat(messages, model, key, temperature=0.3))
	return _local_smol_chat(messages, max_tokens=512)
	except Exception as exc:
	return f"Error: {exc}"


	def summarise_document(doc_text: str, style: str, model_label: str, api_key: str) -> str:
	if not doc_text.strip():
	return "Provide a document first."

	key = api_key.strip() or _NVIDIA_KEY
	model = _MODELS.get(model_label, list(_MODELS.values())[0])
	style_prompts = {
	"Executive (3 bullets)": "Summarise in exactly 3 bullet points for an executive audience.",
	"Detailed (paragraph)": "Write a thorough 2-paragraph summary covering all key points.",
	"ELI5 (simple)": "Explain this document as simply as possible, as if to a 10-year-old.",
	"Action items only": "List only the action items, decisions, and next steps.",
	}
	prompt = style_prompts.get(style, "Summarise the document.")
	messages = [
	{"role": "system", "content": prompt},
	{"role": "user", "content": f"Document:\n\n{doc_text[:5000]}"},
	]
	try:
	if key or _NEMOTRON_URL:
	return _run_async(_nemotron_chat(messages, model, key, temperature=0.4))
	return _local_smol_chat(messages, max_tokens=512)
	except Exception as exc:
	return f"Error: {exc}"


	def push_to_mesh(doc_text: str, doc_title: str, corpus: str, mesh_url: str) -> str:
	import httpx

	url = (mesh_url.strip() or _MESH_NODE).rstrip("/")
	if not url:
	return "⚠ Set HEARTHNET_NODE env var or enter mesh URL to push to mesh."
	if not doc_text.strip():
	return "⚠ No document to push."

	async def _push():
	payload = {
	"capability": "rag.ingest",
	"version": "1.0",
	"params": {"corpus": corpus or "documents"},
	"input": {
	"documents": [
	{
	"id": f"doc-{hash(doc_text) % 100000}",
	"title": doc_title or "Untitled",
	"text": doc_text,
	}
	]
	},
	}
	async with httpx.AsyncClient(timeout=15.0) as c:
	r = await c.post(f"{url}/bus/v1/call", json=payload)
	r.raise_for_status()
	return r.json()

	try:
	_run_async(_push())
	return f"✓ Document pushed to mesh at {url}\nCorpus: {corpus}\nNow searchable via Ask tab on any mesh node."
	except Exception as exc:
	return f"⚠ Push failed: {exc}"


	# ── Build UI ──────────────────────────────────────────────────────────────────

	def build_app() -> gr.Blocks:
	with gr.Blocks(
	title="HearthNet · Document Intelligence",
	) as demo:
	# ── Header ────────────────────────────────────────────────────────────
	gr.HTML("""
	<div class="grad-banner">
	<h1>🔬 HearthNet · Document Intelligence</h1>
	<p>Structured extraction & Q&A powered by NVIDIA Nemotron · Part of the HearthNet mesh</p>
	</div>
	<p>
	<span class="feature-badge" style="background:#7c3aed;color:white">NVIDIA Nemotron</span>
	<span class="feature-badge" style="background:#f97316;color:white">Structured Extraction</span>
	<span class="feature-badge" style="background:#0ea5e9;color:white">Offline Capable</span>
	<span class="feature-badge" style="background:#10b981;color:white">Mesh RAG Ingest</span>
	</p>
	""")

	# ── Shared controls (sidebar-style top row) ────────────────────────────
	with gr.Row():
	model_selector = gr.Dropdown(
	label="🤖 Nemotron Model",
	choices=list(_MODELS.keys()),
	value=list(_MODELS.keys())[0],
	scale=2,
	)
	api_key_box = gr.Textbox(
	label="🔑 NVIDIA API Key",
	value="",
	type="password",
	placeholder="nvapi-... leave blank if NVIDIA_API_KEY env var is set",
	scale=3,
	)

	# ── Main tabs ──────────────────────────────────────────────────────────
	with gr.Tabs():

	# ── Tab 1: Structured Extraction ──────────────────────────────────
	with gr.Tab("📊 Extract"):
	with gr.Row():
	with gr.Column(scale=2):
	extract_doc = gr.Textbox(
	label="Document",
	placeholder="Paste text, or upload a file below...",
	lines=12,
	)
	extract_file = gr.File(
	label="Upload file",
	type="filepath",
	file_types=[".txt", ".md", ".csv"],
	)
	schema_preset = gr.Dropdown(
	label="Schema preset",
	choices=list(_SCHEMAS.keys()),
	value="Invoice / Receipt",
	)
	custom_schema = gr.Code(
	label="Schema (JSON)",
	language="json",
	value=_SCHEMAS["Invoice / Receipt"],
	lines=8,
	)

	with gr.Column(scale=3):
	extract_btn = gr.Button("⚡ Extract with Nemotron", variant="primary", size="lg")
	extract_out = gr.Code(label="Extracted JSON", language="json", lines=16)
	extract_status = gr.Textbox(label="Status", lines=1, interactive=False)

	def on_preset_change(preset):
	return _SCHEMAS.get(preset, "{}")

	schema_preset.change(on_preset_change, inputs=[schema_preset], outputs=[custom_schema])

	def load_extract_file(fp):
	if not fp:
	return ""
	try:
	with open(fp, encoding="utf-8", errors="replace") as f:
	return f.read(8000)
	except Exception as e:
	return f"Error: {e}"

	extract_file.change(load_extract_file, inputs=[extract_file], outputs=[extract_doc])
	extract_btn.click(
	extract_structured,
	inputs=[extract_doc, schema_preset, custom_schema, model_selector, api_key_box],
	outputs=[extract_out, extract_status],
	)

	# ── Tab 2: Document Q&A ───────────────────────────────────────────
	with gr.Tab("💬 Ask"):
	with gr.Row():
	with gr.Column(scale=2):
	ask_doc = gr.Textbox(
	label="Document",
	placeholder="Paste the document to query...",
	lines=14,
	)

	with gr.Column(scale=3):
	ask_question_box = gr.Textbox(
	label="Question",
	placeholder="What is the total? Who are the parties? What are the obligations?",
	lines=2,
	)
	ask_btn = gr.Button("🔍 Ask Nemotron", variant="primary")
	ask_out = gr.Textbox(label="Answer", lines=8)

	ask_btn.click(
	ask_document,
	inputs=[ask_doc, ask_question_box, model_selector, api_key_box],
	outputs=[ask_out],
	)

	# ── Tab 3: Summarise ──────────────────────────────────────────────
	with gr.Tab("✂ Summarise"):
	with gr.Row():
	with gr.Column(scale=2):
	sum_doc = gr.Textbox(
	label="Document",
	placeholder="Paste document text...",
	lines=14,
	)

	with gr.Column(scale=3):
	sum_style = gr.Dropdown(
	label="Summary style",
	choices=[
	"Executive (3 bullets)",
	"Detailed (paragraph)",
	"ELI5 (simple)",
	"Action items only",
	],
	value="Executive (3 bullets)",
	)
	sum_btn = gr.Button("✂ Summarise with Nemotron", variant="primary")
	sum_out = gr.Textbox(label="Summary", lines=10)

	sum_btn.click(
	summarise_document,
	inputs=[sum_doc, sum_style, model_selector, api_key_box],
	outputs=[sum_out],
	)

	# ── Tab 4: Push to Mesh ───────────────────────────────────────────
	with gr.Tab("🕸 Push to Mesh"):
	gr.Markdown(
	"Send extracted/processed documents into a HearthNet mesh node's RAG corpus. "
	"After ingesting, documents become searchable from any mesh node's Ask tab."
	)
	with gr.Row():
	with gr.Column():
	mesh_doc = gr.Textbox(
	label="Document text",
	placeholder="Paste processed document...",
	lines=10,
	)
	mesh_title = gr.Textbox(label="Document title", placeholder="Invoice #123")
	mesh_corpus = gr.Textbox(label="Corpus name", value="documents")
	mesh_url = gr.Textbox(
	label="HearthNet mesh node URL",
	value=_MESH_NODE,
	placeholder="http://localhost:7860 or https://your-space.hf.space",
	)
	mesh_push_btn = gr.Button("🚀 Push to mesh", variant="primary")

	with gr.Column():
	mesh_status = gr.Textbox(label="Status", lines=5)
	gr.Markdown(
	"""
	How to use with the HearthNet main Space:
	1. Set `HEARTHNET_NODE = https://build-small-hackathon-hearthnet.hf.space`
	2. Or run locally: `python app.py` → `http://localhost:7860`
	3. Documents ingested here appear in the Ask tab on all mesh nodes

	Local multi-node example:
	```bash
	# Node 1 (main mesh)
	python app.py --port 7860

	# Node 2 (this document intelligence app)
	python app_nemotron.py --port 7861
	HEARTHNET_NODE=http://localhost:7860
	```
	"""
	)

	mesh_push_btn.click(
	push_to_mesh,
	inputs=[mesh_doc, mesh_title, mesh_corpus, mesh_url],
	outputs=[mesh_status],
	)

	# ── Tab 5: About ──────────────────────────────────────────────────
	with gr.Tab("ℹ About"):
	gr.Markdown(
	f"""
	## HearthNet Document Intelligence

	A companion app to the [HearthNet mesh](https://huggingface.co/spaces/build-small-hackathon/HearthNet)
	that adds NVIDIA Nemotron-powered document processing.

	### Models
	\| Model \| Size \| Best for \|
	\|-------\|------\|---------\|
	\| Nemotron Nano 8B \| 8B \| Fast extraction, Pi-friendly \|
	\| Nemotron 70B \| 70B \| Deep reasoning, complex docs \|
	\| Nemotron Super 49B \| 49B \| Balanced quality/speed \|

	All models are under 32B parameters individually ✅

	### Architecture
	```
	Document Input ──► Nemotron Parse ──► Structured JSON
	──► Q&A Answers
	──► Summary
	│
	▼
	HearthNet RAG Corpus
	(searchable on all mesh nodes)
	```

	### Prize Targets
	- 🏆 NVIDIA Nemotron Hardware Prize (RTX 5080) — builds with Nemotron ✅
	- 🐜 Tiny Titan — Nano 8B model ✅
	- 🎨 Off Brand — Custom purple-to-orange UI ✅

	### Links
	- [Main HearthNet Space](https://huggingface.co/spaces/build-small-hackathon/HearthNet)
	- [HF Profile](https://huggingface.co/Chris4K)
	- [X / Twitter](https://x.com/zX14_7)
	- [GitHub](https://github.com/ckal)
	- [NVIDIA NIM API](https://build.nvidia.com) — free tier available

	Current status: API key: {'✓ configured' if _NVIDIA_KEY else '✗ not set (add NVIDIA_API_KEY)'}
	Mesh node: {_MESH_NODE or '✗ not set (add HEARTHNET_NODE)'}
	"""
	)

	return demo


	if __name__ == "__main__":
	demo = build_app()
	# HF Spaces health-checks port 7860. Prefer GRADIO_SERVER_PORT (set by HF),
	# fall back to PORT, then 7860. Disable SSR: the Node proxy binds a different
	# port and crashes on HF, leaving :7860 unhealthy -> launch timeout.
	_port = int(os.getenv("GRADIO_SERVER_PORT") or os.getenv("PORT") or "7860")
	demo.launch(
	server_name="0.0.0.0", # nosec B104
	server_port=_port,
	ssr_mode=False,
	theme=_theme,
	css="""
	.grad-banner { background: linear-gradient(135deg, #7c3aed 0%, #f97316 100%);
	border-radius: 12px; padding: 16px 24px; margin-bottom: 16px; }
	.grad-banner h1 { color: white !important; margin: 0; }
	.grad-banner p { color: rgba(255,255,255,0.85) !important; margin: 4px 0 0; }
	.feature-badge { display: inline-block; padding: 2px 10px; border-radius: 12px;
	font-size: 0.78em; font-weight: 600; margin: 2px; }
	""",
	)