"""Aperture — Gradio app.

A lightweight alternative UI to the React/Vercel dashboard, for analysts and quick
demos. Runs the SAME in-process pipeline (OCR backends, categorization, hybrid extract,
DB, RAG, KPIs) — no separate API needed.

  pip install gradio
  python gradio_app.py        # http://localhost:7860

Docs: https://www.gradio.app/guides/quickstart
"""
from __future__ import annotations

import json
import sys
from pathlib import Path

BACKEND = Path(__file__).resolve().parent / "backend"
sys.path.insert(0, str(BACKEND))

import gradio as gr  # noqa: E402

from app.categories import list_categories  # noqa: E402
from app.config import get_settings  # noqa: E402
from app.db import Database  # noqa: E402
from app.metrics import MetricsStore  # noqa: E402
from app.observability import business_kpis  # noqa: E402
from app.ocr.backends import build_ocr_registry, list_backends  # noqa: E402
from app.pipeline import process_document  # noqa: E402
from app.providers import build_registry  # noqa: E402
from app.rag_store import VectorStore  # noqa: E402
from app.router import ModelRouter  # noqa: E402

S = get_settings()
METRICS = MetricsStore(S.metrics_db_path)
ROUTER = ModelRouter(build_registry(S), S, METRICS)
OCR = build_ocr_registry(S)
DB = Database(S.app_db_path)
RAG = VectorStore(S.rag_db_path)

CATS = [c["id"] for c in list_categories()]
SAMPLES = sorted(p.name[:-len(".gt.json")] for p in S.evals_dataset_dir.glob("*.gt.json"))


def _sample_path(sample_id: str):
    for ext in (".pdf", ".png", ".jpg", ".jpeg"):
        p = S.evals_dataset_dir / f"{sample_id}{ext}"
        if p.exists():
            return p
    return None


def run_sample(sample_id: str, ocr_backend: str):
    path = _sample_path(sample_id)
    if not path:
        return {"error": "sample not found"}, "", _kpis_md()
    return _run(path, sample_id, ocr_backend)


def run_upload(file, ocr_backend: str):
    if not file:
        return {"error": "upload a PDF/PNG"}, "", _kpis_md()
    return _run(Path(file), Path(file).stem, ocr_backend)


def _run(path, doc_id, ocr_backend):
    run = process_document(path, router=ROUTER, settings=S, metrics=METRICS,
                           ocr_registry=OCR, db=DB, rag_store=RAG,
                           doc_id=doc_id, ocr_backend=ocr_backend or "auto")
    st = run["_state"]
    summary = (
        f"**Category:** {st['category']}  ·  **Type:** {st['doc_type']}  ·  "
        f"**Confidence:** {st['confidence']:.0%}  ·  **OCR:** {st.get('ocr_backend')}\n\n"
        f"**Outcome:** {'✅ auto-posted ' + str(run['result'].get('post_id')) if run['result']['posted'] else '⚠ routed to human review'}  ·  "
        f"**Cost:** ${run['total_cost_usd']:.5f}  ·  **Latency:** {run['latency_ms']:.0f} ms\n\n"
        f"**OCR strategy:** {st['ocr'].get('strategy','')}"
    )
    return st["extracted"], summary, _kpis_md()


def _kpis_md() -> str:
    k = business_kpis(DB, METRICS)
    if not k.get("total_documents"):
        return "_No documents processed yet._"
    return (
        f"### Live KPIs\n"
        f"- Documents: **{k['total_documents']}**\n"
        f"- Straight-through (auto-post): **{k['straight_through_rate']:.0%}**\n"
        f"- Human-in-the-loop: **{k['hitl_rate']:.0%}**\n"
        f"- OCR completion: **{k['ocr_completion_rate']:.0%}**\n"
        f"- Avg confidence: **{(k['avg_confidence'] or 0):.0%}**\n"
        f"- Cost / document: **${k['cost_per_document_usd']:.5f}**\n"
        f"- By category: `{json.dumps(k['by_category'])}`"
    )


def search(query: str):
    if not query:
        return []
    return [[r["ref"], round(r["score"], 3), r["text"][:120]]
            for r in RAG.search(query, k=8)]


def erp_ask(question: str):
    """ERP DocIQ: NLQ / analytics / summary / 'why' over the simulated ERP knowledgebase."""
    from app.erp import ErpChat, get_warehouse
    if not (question or "").strip():
        return "Ask about spend, vendors, late payments, inventory or returns.", []
    chat = ErpChat(S, router=ROUTER, warehouse=get_warehouse(S), metrics=METRICS)
    r = chat.answer(question)
    md = (f"**{r['intent']}** · {r['engine']} · {r['model']} · {r['latency_ms']} ms\n\n"
          f"{r['answer']}\n\n" + (f"```sql\n{r['sql']}\n```" if r.get("sql") else ""))
    rows = r.get("rows") or []
    table = [[*(str(v) for v in row)] for row in rows[:12]] if rows else []
    return md, table


def _erp_finetune_md() -> str:
    import json as _json
    from pathlib import Path
    p = Path(__file__).resolve().parent / "backend" / "finetune" / "erp_finetune_report.json"
    if not p.exists():
        return "_Run `python scripts/finetune_erp.py` to populate fine-tune metrics._"
    d = _json.loads(p.read_text()); od = d.get("offline_demo") or d
    return ("### ERP-domain fine-tuning\n"
            f"- **Production target:** OpenBMB **MiniCPM3-4B** (LoRA recipe emitted)\n"
            f"- **Offline demo (CPU):** before **{od['before_test_accuracy']*100:.1f}%** → "
            f"after **{od['after_test_accuracy']*100:.1f}%** "
            f"(**+{od['accuracy_gain']*100:.0f} pts**) on {od['dataset_size']} examples; "
            f"routed-SQL exec {od['routed_sql_exec_rate']*100:.0f}%")


def run_complex_web_automation():
    """Intricate multi-step browser automation: ERP dashboard → Procurement →
    +Create Order → read the complex order-form fields."""
    from app.browser import run_browser_agent
    # The Gradio app doesn't host the ERP portal, so drive the simulated browser
    # (the same multi-step flow runs on real Chromium in the full web app).
    res = run_browser_agent(
        "Open the ERP dashboard, click Procurement, click '+ Create Order', and read all order fields",
        router=ROUTER, settings=S, metrics=METRICS, scenario="complex_order",
        base_url="https://portal.local/portal", prefer_simulated=True)
    trace_md = f"**{res['backend']} browser · {res['agent_mode']} · {res['steps']} steps**\n\n"
    for t in res["trace"]:
        trace_md += f"- **step {t['step']}** `{t['tool']}` {t.get('args') or ''} — {t.get('note','')[:90]}\n"
    return trace_md, res.get("result")


with gr.Blocks(title="ERP-DocIQ — Agentic Document Intelligence + ERP NLQ") as demo:
    gr.Markdown("# 📄 ERP-DocIQ — Agentic Document Intelligence + ERP NLQ, on small models\n"
                "Read documents (OCR + IDP), ask your ERP in plain English (NLQ → SQL), and automate the clicks — "
                "all on open models ≤32B, with **OpenBMB MiniCPM** (vision + reasoning) doing the heavy lifting. "
                "Built for the **Build Small Hackathon**.")
    with gr.Tab("Process a document"):
        with gr.Row():
            with gr.Column():
                backend = gr.Dropdown(["auto", *list_backends()], value="auto", label="OCR backend")
                sample = gr.Dropdown(SAMPLES, value=(SAMPLES[0] if SAMPLES else None), label="Sample document")
                run_btn = gr.Button("▶ Run sample", variant="primary")
                gr.Markdown("— or —")
                upload = gr.File(label="Upload PDF/PNG", file_types=[".pdf", ".png", ".jpg", ".jpeg"])
                upload_btn = gr.Button("▶ Run upload")
            with gr.Column():
                summary = gr.Markdown()
                extracted = gr.JSON(label="Extracted fields (multi-layer)")
        kpis = gr.Markdown(_kpis_md())
        run_btn.click(run_sample, [sample, backend], [extracted, summary, kpis])
        upload_btn.click(run_upload, [upload, backend], [extracted, summary, kpis])
    with gr.Tab("ERP DocIQ (chat)"):
        gr.Markdown("### Ask your ERP reports — NLQ · analytics · summary · reasons\n"
                    "Natural-language questions over a simulated retail ERP (vendors, POs, invoices, "
                    "GL, inventory, returns). Figures come from **real SQL**; OpenBMB **MiniCPM3-4B** "
                    "phrases summaries & explanations and never invents numbers.")
        erp_q = gr.Textbox(label="Question",
                           placeholder="e.g. Why did spend rise in Q2 2026?")
        gr.Examples(["Who are the top 5 vendors by spend?", "What is the late-payment rate overall?",
                     "Why did spend rise in Q2 2026?", "Summarize accounts payable health.",
                     "Top return reasons by refund amount?"], inputs=erp_q)
        erp_btn = gr.Button("💬 Ask ERP DocIQ", variant="primary")
        erp_answer = gr.Markdown()
        erp_rows = gr.Dataframe(label="Query result (real SQL over the warehouse)")
        erp_btn.click(erp_ask, [erp_q], [erp_answer, erp_rows])
        gr.Markdown(_erp_finetune_md())
    with gr.Tab("Search (RAG)"):
        q = gr.Textbox(label="Query", placeholder="e.g. POS Cloud subscription renewal")
        search_btn = gr.Button("🔍 Search")
        results = gr.Dataframe(headers=["ref", "score", "text"], label="Results")
        search_btn.click(search, [q], [results])
    with gr.Tab("Web Automation"):
        gr.Markdown("### Complex multi-step browser automation\n"
                    "Replaces UiPath Studio Web: navigate the ERP dashboard → click the **Procurement** "
                    "tile → click **+ Create Order** → open the order-form modal → **read the complex "
                    "nested fields** (vendor & terms, ship-to, line items, totals, approver). Runs on real "
                    "Chromium when Playwright is installed; replays on the simulated browser here.")
        cx_btn = gr.Button("▶ Run: Procurement → Create Order → read fields", variant="primary")
        with gr.Row():
            cx_trace = gr.Markdown()
            cx_result = gr.JSON(label="Order fields read from the form")
        cx_btn.click(run_complex_web_automation, [], [cx_trace, cx_result])

if __name__ == "__main__":
    try:
        demo.launch(server_name="0.0.0.0", server_port=7860, theme=gr.themes.Soft())
    except TypeError:
        demo.launch(server_name="0.0.0.0", server_port=7860)  # older gradio