"""HearthNet Document Intelligence โ Nemotron-powered second Space. A standalone Gradio app focused entirely on document intelligence using NVIDIA Nemotron models. Can run independently OR as part of a HearthNet mesh. Deploy as a second HF Space alongside the main HearthNet mesh Space. Prize targets: - NVIDIA Nemotron Hardware Prize (RTX 5080): Build with Nemotron models โ - ๐ Tiny Titan: Nemotron-nano-8B is 8B params (under 32B) โ - ๐จ Off Brand: Custom-styled beyond default Gradio look โ Usage: python app_nemotron.py Environment: NVIDIA_API_KEY โ NVIDIA NIM API key (get free at build.nvidia.com) NEMOTRON_URL โ local NIM endpoint (optional, for offline use) HEARTHNET_NODE โ URL of a HearthNet mesh node to push results into """ from __future__ import annotations import asyncio import os import gradio as gr # HF Spaces GPU support try: import spaces HAS_SPACES = True except ImportError: HAS_SPACES = False # โโ Optional mesh connection โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ _MESH_NODE = os.getenv("HEARTHNET_NODE", "") _NVIDIA_KEY = os.getenv("NVIDIA_API_KEY", "") _NEMOTRON_URL = os.getenv("NEMOTRON_URL", "") # โโ Nemotron model catalogue โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ _MODELS = { "Nemotron 3 Nano 30B-A3B (agentic, 1M ctx)": "nvidia/nemotron-3-nano-30b-a3b", "Nemotron Mini 4B (Tiny Titan)": "nvidia/nemotron-mini-4b-instruct", "Nemotron Nano 8B v1 (edge reasoning)": "nvidia/llama-3.1-nemotron-nano-8b-v1", "Nemotron Super 49B v1.5 (deep)": "nvidia/llama-3.3-nemotron-super-49b-v1.5", } _SCHEMAS = { "Invoice / Receipt": """{ "vendor": "string", "date": "string", "total_amount": "number", "currency": "string", "line_items": [{"description": "string", "amount": "number"}], "tax": "number" }""", "Medical Form": """{ "patient_name": "string", "date_of_birth": "string", "diagnosis": ["string"], "medications": ["string"], "doctor": "string", "date": "string" }""", "Legal Document": """{ "document_type": "string", "parties": ["string"], "effective_date": "string", "key_obligations": ["string"], "governing_law": "string" }""", "Meeting Notes": """{ "date": "string", "attendees": ["string"], "decisions": ["string"], "action_items": [{"owner": "string", "task": "string", "due": "string"}] }""", "Custom (edit below)": "{}", } # โโ Custom HearthNet theme โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ _theme = gr.themes.Soft( primary_hue=gr.themes.colors.orange, secondary_hue=gr.themes.colors.purple, neutral_hue=gr.themes.colors.gray, font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "sans-serif"], ).set( button_primary_background_fill="*primary_500", button_primary_background_fill_hover="*primary_600", block_title_text_weight="600", block_border_width="1px", ) # โโ Core functions โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ def _get_endpoint(api_key: str) -> str: return _NEMOTRON_URL.rstrip("/") + "/v1" if _NEMOTRON_URL else "https://integrate.api.nvidia.com/v1" def _run_async(coro): """Run a coroutine safely whether or not a loop is already running.""" try: loop = asyncio.get_running_loop() except RuntimeError: loop = None if loop and loop.is_running(): import concurrent.futures with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool: fut = pool.submit(asyncio.run, coro) return fut.result() return asyncio.run(coro) def _local_smol_chat(messages: list, max_tokens: int = 512) -> str: """SmolLM2-135M local fallback โ no API key required.""" try: from transformers import pipeline as _pipeline # type: ignore[import-untyped] _smol_id = "HuggingFaceTB/SmolLM2-135M-Instruct" pipe = _pipeline("text-generation", model=_smol_id, device_map="auto", torch_dtype="auto") prompt = "" for m in messages: role, content = m.get("role", "user"), m.get("content", "") if role == "system": prompt += f"<|im_start|>system\n{content}<|im_end|>\n" elif role == "user": prompt += f"<|im_start|>user\n{content}<|im_end|>\n" elif role == "assistant": prompt += f"<|im_start|>assistant\n{content}<|im_end|>\n" prompt += "<|im_start|>assistant\n" result = pipe(prompt, max_new_tokens=max_tokens, return_full_text=False, do_sample=False) return result[0]["generated_text"].strip() except Exception as exc: return f"[SmolLM2 unavailable: {exc}]" async def _nemotron_chat(messages: list, model: str, api_key: str, temperature: float = 0.1) -> str: import httpx endpoint = _get_endpoint(api_key) headers = {"Content-Type": "application/json"} if api_key: headers["Authorization"] = f"Bearer {api_key}" payload = { "model": model, "messages": messages, "temperature": temperature, "max_tokens": 2048, } async with httpx.AsyncClient(timeout=60.0) as c: r = await c.post(f"{endpoint}/chat/completions", json=payload, headers=headers) r.raise_for_status() return r.json()["choices"][0]["message"]["content"] @spaces.GPU if HAS_SPACES else lambda f: f def extract_structured( doc_text: str, schema_preset: str, custom_schema: str, model_label: str, api_key: str, ) -> tuple[str, str]: """Extract structured data from documents using Nemotron. Wrapped with @spaces.GPU to signal GPU usage to HF Spaces. Falls back gracefully if GPU unavailable (e.g., local testing). """ import json if not doc_text.strip(): return '{"error": "No document text provided"}', "โ Provide document text" key = api_key.strip() or _NVIDIA_KEY schema = custom_schema.strip() if schema_preset == "Custom (edit below)" else _SCHEMAS[schema_preset] model = _MODELS.get(model_label, list(_MODELS.values())[0]) system = ( "You are a precise structured data extraction engine. " "Extract information from the document and return ONLY valid JSON " f"matching this exact schema:\n{schema}\n" "If a field is not found, use null. Never add fields not in the schema." ) messages = [ {"role": "system", "content": system}, {"role": "user", "content": f"Document:\n\n{doc_text[:5000]}"}, ] try: if key or _NEMOTRON_URL: raw = _run_async(_nemotron_chat(messages, model, key, temperature=0.05)) label = f"โ Extracted with {model_label}" else: raw = _local_smol_chat(messages, max_tokens=512) label = "โ Extracted with SmolLM2-135M (local fallback)" try: parsed = json.loads(raw) return json.dumps(parsed, indent=2), label except json.JSONDecodeError: return raw, f"โ Model returned non-JSON (shown as-is)" except Exception as exc: return f'{{"error": "{exc}"}}', f"โ Error: {exc}" def ask_document(doc_text: str, question: str, model_label: str, api_key: str) -> str: if not doc_text.strip(): return "Provide a document first." if not question.strip(): return "Ask a question." key = api_key.strip() or _NVIDIA_KEY model = _MODELS.get(model_label, list(_MODELS.values())[0]) messages = [ { "role": "system", "content": "Answer questions about the document concisely and accurately. " "Cite specific parts of the document when relevant.", }, { "role": "user", "content": f"Document:\n\n{doc_text[:4000]}\n\nQuestion: {question}", }, ] try: if key or _NEMOTRON_URL: return _run_async(_nemotron_chat(messages, model, key, temperature=0.3)) return _local_smol_chat(messages, max_tokens=512) except Exception as exc: return f"Error: {exc}" def summarise_document(doc_text: str, style: str, model_label: str, api_key: str) -> str: if not doc_text.strip(): return "Provide a document first." key = api_key.strip() or _NVIDIA_KEY model = _MODELS.get(model_label, list(_MODELS.values())[0]) style_prompts = { "Executive (3 bullets)": "Summarise in exactly 3 bullet points for an executive audience.", "Detailed (paragraph)": "Write a thorough 2-paragraph summary covering all key points.", "ELI5 (simple)": "Explain this document as simply as possible, as if to a 10-year-old.", "Action items only": "List only the action items, decisions, and next steps.", } prompt = style_prompts.get(style, "Summarise the document.") messages = [ {"role": "system", "content": prompt}, {"role": "user", "content": f"Document:\n\n{doc_text[:5000]}"}, ] try: if key or _NEMOTRON_URL: return _run_async(_nemotron_chat(messages, model, key, temperature=0.4)) return _local_smol_chat(messages, max_tokens=512) except Exception as exc: return f"Error: {exc}" def push_to_mesh(doc_text: str, doc_title: str, corpus: str, mesh_url: str) -> str: import httpx url = (mesh_url.strip() or _MESH_NODE).rstrip("/") if not url: return "โ Set HEARTHNET_NODE env var or enter mesh URL to push to mesh." if not doc_text.strip(): return "โ No document to push." async def _push(): payload = { "capability": "rag.ingest", "version": "1.0", "params": {"corpus": corpus or "documents"}, "input": { "documents": [ { "id": f"doc-{hash(doc_text) % 100000}", "title": doc_title or "Untitled", "text": doc_text, } ] }, } async with httpx.AsyncClient(timeout=15.0) as c: r = await c.post(f"{url}/bus/v1/call", json=payload) r.raise_for_status() return r.json() try: _run_async(_push()) return f"โ Document pushed to mesh at {url}\nCorpus: {corpus}\nNow searchable via Ask tab on any mesh node." except Exception as exc: return f"โ Push failed: {exc}" # โโ Build UI โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ def build_app() -> gr.Blocks: with gr.Blocks( title="HearthNet ยท Document Intelligence", ) as demo: # โโ Header โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ gr.HTML("""
NVIDIA Nemotron Structured Extraction Offline Capable Mesh RAG Ingest
""") # โโ Shared controls (sidebar-style top row) โโโโโโโโโโโโโโโโโโโโโโโโโโโโ with gr.Row(): model_selector = gr.Dropdown( label="๐ค Nemotron Model", choices=list(_MODELS.keys()), value=list(_MODELS.keys())[0], scale=2, ) api_key_box = gr.Textbox( label="๐ NVIDIA API Key", value="", type="password", placeholder="nvapi-... leave blank if NVIDIA_API_KEY env var is set", scale=3, ) # โโ Main tabs โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ with gr.Tabs(): # โโ Tab 1: Structured Extraction โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ with gr.Tab("๐ Extract"): with gr.Row(): with gr.Column(scale=2): extract_doc = gr.Textbox( label="Document", placeholder="Paste text, or upload a file below...", lines=12, ) extract_file = gr.File( label="Upload file", type="filepath", file_types=[".txt", ".md", ".csv"], ) schema_preset = gr.Dropdown( label="Schema preset", choices=list(_SCHEMAS.keys()), value="Invoice / Receipt", ) custom_schema = gr.Code( label="Schema (JSON)", language="json", value=_SCHEMAS["Invoice / Receipt"], lines=8, ) with gr.Column(scale=3): extract_btn = gr.Button("โก Extract with Nemotron", variant="primary", size="lg") extract_out = gr.Code(label="Extracted JSON", language="json", lines=16) extract_status = gr.Textbox(label="Status", lines=1, interactive=False) def on_preset_change(preset): return _SCHEMAS.get(preset, "{}") schema_preset.change(on_preset_change, inputs=[schema_preset], outputs=[custom_schema]) def load_extract_file(fp): if not fp: return "" try: with open(fp, encoding="utf-8", errors="replace") as f: return f.read(8000) except Exception as e: return f"Error: {e}" extract_file.change(load_extract_file, inputs=[extract_file], outputs=[extract_doc]) extract_btn.click( extract_structured, inputs=[extract_doc, schema_preset, custom_schema, model_selector, api_key_box], outputs=[extract_out, extract_status], ) # โโ Tab 2: Document Q&A โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ with gr.Tab("๐ฌ Ask"): with gr.Row(): with gr.Column(scale=2): ask_doc = gr.Textbox( label="Document", placeholder="Paste the document to query...", lines=14, ) with gr.Column(scale=3): ask_question_box = gr.Textbox( label="Question", placeholder="What is the total? Who are the parties? What are the obligations?", lines=2, ) ask_btn = gr.Button("๐ Ask Nemotron", variant="primary") ask_out = gr.Textbox(label="Answer", lines=8) ask_btn.click( ask_document, inputs=[ask_doc, ask_question_box, model_selector, api_key_box], outputs=[ask_out], ) # โโ Tab 3: Summarise โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ with gr.Tab("โ Summarise"): with gr.Row(): with gr.Column(scale=2): sum_doc = gr.Textbox( label="Document", placeholder="Paste document text...", lines=14, ) with gr.Column(scale=3): sum_style = gr.Dropdown( label="Summary style", choices=[ "Executive (3 bullets)", "Detailed (paragraph)", "ELI5 (simple)", "Action items only", ], value="Executive (3 bullets)", ) sum_btn = gr.Button("โ Summarise with Nemotron", variant="primary") sum_out = gr.Textbox(label="Summary", lines=10) sum_btn.click( summarise_document, inputs=[sum_doc, sum_style, model_selector, api_key_box], outputs=[sum_out], ) # โโ Tab 4: Push to Mesh โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ with gr.Tab("๐ธ Push to Mesh"): gr.Markdown( "Send extracted/processed documents into a HearthNet mesh node's RAG corpus. " "After ingesting, documents become searchable from any mesh node's **Ask** tab." ) with gr.Row(): with gr.Column(): mesh_doc = gr.Textbox( label="Document text", placeholder="Paste processed document...", lines=10, ) mesh_title = gr.Textbox(label="Document title", placeholder="Invoice #123") mesh_corpus = gr.Textbox(label="Corpus name", value="documents") mesh_url = gr.Textbox( label="HearthNet mesh node URL", value=_MESH_NODE, placeholder="http://localhost:7860 or https://your-space.hf.space", ) mesh_push_btn = gr.Button("๐ Push to mesh", variant="primary") with gr.Column(): mesh_status = gr.Textbox(label="Status", lines=5) gr.Markdown( """ **How to use with the HearthNet main Space:** 1. Set `HEARTHNET_NODE = https://build-small-hackathon-hearthnet.hf.space` 2. Or run locally: `python app.py` โ `http://localhost:7860` 3. Documents ingested here appear in the **Ask** tab on all mesh nodes **Local multi-node example:** ```bash # Node 1 (main mesh) python app.py --port 7860 # Node 2 (this document intelligence app) python app_nemotron.py --port 7861 HEARTHNET_NODE=http://localhost:7860 ``` """ ) mesh_push_btn.click( push_to_mesh, inputs=[mesh_doc, mesh_title, mesh_corpus, mesh_url], outputs=[mesh_status], ) # โโ Tab 5: About โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ with gr.Tab("โน About"): gr.Markdown( f""" ## HearthNet Document Intelligence A companion app to the [HearthNet mesh](https://huggingface.co/spaces/build-small-hackathon/HearthNet) that adds NVIDIA Nemotron-powered document processing. ### Models | Model | Size | Best for | |-------|------|---------| | Nemotron Nano 8B | 8B | Fast extraction, Pi-friendly | | Nemotron 70B | 70B | Deep reasoning, complex docs | | Nemotron Super 49B | 49B | Balanced quality/speed | All models are under 32B parameters individually โ ### Architecture ``` Document Input โโโบ Nemotron Parse โโโบ Structured JSON โโโบ Q&A Answers โโโบ Summary โ โผ HearthNet RAG Corpus (searchable on all mesh nodes) ``` ### Prize Targets - ๐ **NVIDIA Nemotron Hardware Prize** (RTX 5080) โ builds with Nemotron โ - ๐ **Tiny Titan** โ Nano 8B model โ - ๐จ **Off Brand** โ Custom purple-to-orange UI โ ### Links - [Main HearthNet Space](https://huggingface.co/spaces/build-small-hackathon/HearthNet) - [HF Profile](https://huggingface.co/Chris4K) - [X / Twitter](https://x.com/zX14_7) - [GitHub](https://github.com/ckal) - [NVIDIA NIM API](https://build.nvidia.com) โ free tier available **Current status:** API key: {'โ configured' if _NVIDIA_KEY else 'โ not set (add NVIDIA_API_KEY)'} **Mesh node:** {_MESH_NODE or 'โ not set (add HEARTHNET_NODE)'} """ ) return demo if __name__ == "__main__": demo = build_app() # HF Spaces health-checks port 7860. Prefer GRADIO_SERVER_PORT (set by HF), # fall back to PORT, then 7860. Disable SSR: the Node proxy binds a different # port and crashes on HF, leaving :7860 unhealthy -> launch timeout. _port = int(os.getenv("GRADIO_SERVER_PORT") or os.getenv("PORT") or "7860") demo.launch( server_name="0.0.0.0", # nosec B104 server_port=_port, ssr_mode=False, theme=_theme, css=""" .grad-banner { background: linear-gradient(135deg, #7c3aed 0%, #f97316 100%); border-radius: 12px; padding: 16px 24px; margin-bottom: 16px; } .grad-banner h1 { color: white !important; margin: 0; } .grad-banner p { color: rgba(255,255,255,0.85) !important; margin: 4px 0 0; } .feature-badge { display: inline-block; padding: 2px 10px; border-radius: 12px; font-size: 0.78em; font-weight: 600; margin: 2px; } """, )