Itachi-1824 commited on
Commit ·
51a14c0
1
Parent(s): ff2e396
feat: premium ui (charcoal+gold authority palette), multi-model eval, /web mount fix
Browse files- server/app.py +73 -83
server/app.py
CHANGED
|
@@ -201,103 +201,93 @@ async def api_close(req: CloseRequest):
|
|
| 201 |
|
| 202 |
_gradio_mounted = False
|
| 203 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
try:
|
| 205 |
import gradio as gr
|
| 206 |
|
| 207 |
-
def
|
| 208 |
-
scenarios_count = len(SCENARIO_LIST)
|
| 209 |
-
tools_count = 11 # 10 tools + verify
|
| 210 |
-
tiers = len(DIFFICULTY_TIERS)
|
| 211 |
return f"""
|
| 212 |
-
<div style="background:
|
| 213 |
-
<
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
|
|
|
|
|
|
|
|
|
| 218 |
</p>
|
| 219 |
-
<div style="display:grid;grid-template-columns:repeat(5,1fr);gap:
|
| 220 |
-
<div style="background:
|
| 221 |
-
<div style="color:
|
| 222 |
-
<div style="color:
|
| 223 |
-
</div>
|
| 224 |
-
<div style="background:#1a1a3e;padding:16px;border-radius:12px;text-align:center;border:1px solid #2a2a5a;">
|
| 225 |
-
<div style="color:#00d4aa;font-size:2em;font-weight:bold;">{tools_count}</div>
|
| 226 |
-
<div style="color:#8888bb;font-size:0.85em;">MCP TOOLS</div>
|
| 227 |
-
</div>
|
| 228 |
-
<div style="background:#1a1a3e;padding:16px;border-radius:12px;text-align:center;border:1px solid #2a2a5a;">
|
| 229 |
-
<div style="color:#00d4aa;font-size:2em;font-weight:bold;">6</div>
|
| 230 |
-
<div style="color:#8888bb;font-size:0.85em;">REWARD COMPS</div>
|
| 231 |
-
</div>
|
| 232 |
-
<div style="background:#1a1a3e;padding:16px;border-radius:12px;text-align:center;border:1px solid #2a2a5a;">
|
| 233 |
-
<div style="color:#00d4aa;font-size:2em;font-weight:bold;">{tiers}</div>
|
| 234 |
-
<div style="color:#8888bb;font-size:0.85em;">DIFFICULTY TIERS</div>
|
| 235 |
-
</div>
|
| 236 |
-
<div style="background:#1a1a3e;padding:16px;border-radius:12px;text-align:center;border:1px solid #2a2a5a;">
|
| 237 |
-
<div style="color:#00d4aa;font-size:2em;font-weight:bold;">Aug 2026</div>
|
| 238 |
-
<div style="color:#8888bb;font-size:0.85em;">EU DEADLINE</div>
|
| 239 |
-
</div>
|
| 240 |
</div>
|
| 241 |
-
</div>
|
| 242 |
-
"""
|
| 243 |
|
| 244 |
-
def
|
| 245 |
cards = [
|
| 246 |
-
("Real
|
| 247 |
-
("Full
|
| 248 |
-
("State-
|
| 249 |
-
("6-
|
| 250 |
-
("Parameter
|
| 251 |
-
("
|
| 252 |
]
|
| 253 |
-
html = '<div style="display:grid;grid-template-columns:repeat(3,1fr);gap:16px;margin-bottom:24px;">'
|
| 254 |
for title, desc in cards:
|
| 255 |
-
html += f""
|
| 256 |
-
|
| 257 |
-
<
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
def _build_scenarios_html():
|
| 265 |
html = ""
|
| 266 |
for s in SCENARIO_LIST:
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
<h3 style="color:#e0e0ff;margin:0;font-size:1.05em;">{s['title']}</h3>
|
| 273 |
-
<span style="background:{color}22;color:{color};padding:4px 12px;border-radius:8px;font-size:0.8em;font-weight:bold;">{s['difficulty'].upper()}</span>
|
| 274 |
</div>
|
| 275 |
-
<
|
| 276 |
-
</div>
|
| 277 |
-
"""
|
| 278 |
return html
|
| 279 |
|
| 280 |
-
_css = """
|
| 281 |
-
.gradio-container { background: #0a0a0a !important; }
|
| 282 |
-
.tab-nav button { color: #8888bb !important; background: transparent !important; border: none !important; }
|
| 283 |
-
.tab-nav button.selected { color: #00d4aa !important; border-bottom: 2px solid #00d4aa !important; }
|
| 284 |
-
"""
|
| 285 |
-
|
| 286 |
with gr.Blocks(title="EU AI Act Compliance Auditor") as landing_app:
|
|
|
|
| 287 |
with gr.Tabs():
|
| 288 |
with gr.Tab("Overview"):
|
| 289 |
-
gr.HTML(
|
| 290 |
-
gr.HTML("<h2 style='color:
|
| 291 |
-
gr.HTML(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 292 |
|
| 293 |
with gr.Tab("Scenarios"):
|
| 294 |
-
gr.HTML("<h2 style='color:
|
| 295 |
-
gr.HTML(
|
| 296 |
|
| 297 |
with gr.Tab("Playground"):
|
| 298 |
-
gr.HTML("<h2 style='color:
|
| 299 |
-
gr.HTML("<p style='color:
|
| 300 |
-
|
| 301 |
session_state = gr.State(value=None)
|
| 302 |
with gr.Row():
|
| 303 |
diff_dropdown = gr.Dropdown(choices=["easy", "medium", "hard"], value="easy", label="Difficulty")
|
|
@@ -327,14 +317,14 @@ try:
|
|
| 327 |
with _sessions_lock:
|
| 328 |
env = _sessions.get(sid)
|
| 329 |
if not env:
|
| 330 |
-
return sid, {"error": "Session expired
|
| 331 |
fn = env._tool_fns.get(tool_name)
|
| 332 |
if not fn:
|
| 333 |
return sid, {"error": f"Unknown tool: {tool_name}"}
|
| 334 |
try:
|
| 335 |
kwargs = json.loads(args_str) if args_str.strip() else {}
|
| 336 |
except json.JSONDecodeError:
|
| 337 |
-
return sid, {"error": "Invalid JSON
|
| 338 |
try:
|
| 339 |
result = fn(**kwargs)
|
| 340 |
return sid, json.loads(result) if isinstance(result, str) else result
|
|
@@ -345,17 +335,17 @@ try:
|
|
| 345 |
call_btn.click(pg_call, [session_state, tool_dropdown, args_input], [session_state, output_box])
|
| 346 |
|
| 347 |
with gr.Tab("Try It"):
|
| 348 |
-
gr.HTML("""
|
| 349 |
-
|
| 350 |
-
<
|
| 351 |
-
<pre style="background:
|
| 352 |
export MODEL_NAME="google/gemma-4-31b-it"
|
| 353 |
export HF_TOKEN="your-api-key"
|
| 354 |
python inference.py --space https://Itachi1824-compliance-auditor-env.hf.space</code></pre>
|
| 355 |
-
</div>
|
| 356 |
-
""")
|
| 357 |
|
| 358 |
gr.mount_gradio_app(app, landing_app, path="/")
|
|
|
|
| 359 |
_gradio_mounted = True
|
| 360 |
|
| 361 |
except Exception as e:
|
|
|
|
| 201 |
|
| 202 |
_gradio_mounted = False
|
| 203 |
|
| 204 |
+
# Color system: Deep charcoal + warm gold (authority/prestige/trust)
|
| 205 |
+
# NO neon, NO AI-typical cyan/purple
|
| 206 |
+
_BG = "#09090B" # base background
|
| 207 |
+
_CARD = "#18181B" # card surface
|
| 208 |
+
_BORDER = "#27272A" # borders
|
| 209 |
+
_TEXT = "#F8FAFC" # primary text
|
| 210 |
+
_MUTED = "#94A3B8" # secondary text (slate)
|
| 211 |
+
_GOLD = "#C9A84C" # accent: authority, prestige
|
| 212 |
+
_EMERALD = "#10B981" # success/easy
|
| 213 |
+
_AMBER = "#F59E0B" # warning/medium
|
| 214 |
+
_ROSE = "#F43F5E" # critical/hard
|
| 215 |
+
|
| 216 |
try:
|
| 217 |
import gradio as gr
|
| 218 |
|
| 219 |
+
def _hero():
|
|
|
|
|
|
|
|
|
|
| 220 |
return f"""
|
| 221 |
+
<div style="background:{_CARD};padding:48px 40px;border-radius:12px;margin-bottom:28px;border:1px solid {_BORDER};">
|
| 222 |
+
<div style="display:flex;align-items:center;gap:12px;margin-bottom:6px;">
|
| 223 |
+
<div style="width:8px;height:36px;background:{_GOLD};border-radius:4px;"></div>
|
| 224 |
+
<h1 style="color:{_TEXT};font-size:2em;margin:0;font-weight:700;letter-spacing:-0.02em;">EU AI Act Compliance Auditor</h1>
|
| 225 |
+
</div>
|
| 226 |
+
<p style="color:{_MUTED};font-size:1.05em;margin:8px 0 32px 20px;line-height:1.6;max-width:720px;">
|
| 227 |
+
An MCP-based environment where LLM agents audit AI systems for EU AI Act compliance.
|
| 228 |
+
8 scenarios from chatbot transparency to prohibited social scoring.
|
| 229 |
+
Parameter randomization prevents memorization.
|
| 230 |
</p>
|
| 231 |
+
<div style="display:grid;grid-template-columns:repeat(5,1fr);gap:14px;">
|
| 232 |
+
{"".join(f'''<div style="background:{_BG};padding:20px 16px;border-radius:10px;text-align:center;border:1px solid {_BORDER};">
|
| 233 |
+
<div style="color:{_GOLD};font-size:1.8em;font-weight:700;">{v}</div>
|
| 234 |
+
<div style="color:{_MUTED};font-size:0.78em;letter-spacing:0.05em;margin-top:4px;">{k}</div>
|
| 235 |
+
</div>''' for k, v in [("SCENARIOS", 8), ("MCP TOOLS", 11), ("REWARD COMPS", 6), ("TIERS", 3), ("EU DEADLINE", "Aug '26")])}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 236 |
</div>
|
| 237 |
+
</div>"""
|
|
|
|
| 238 |
|
| 239 |
+
def _design_cards():
|
| 240 |
cards = [
|
| 241 |
+
("Real Regulatory Scenarios", "Scenarios drawn from actual EU AI Act articles: prohibited social scoring (Art. 5), high-risk hiring (Annex III), deepfake transparency (Art. 50), medical device audits. Based on real compliance gaps companies face today."),
|
| 242 |
+
("Full Audit Toolkit", "11 MCP tools mirror a real compliance auditor's workflow: system overview, risk classification, documentation review, bias audit, oversight verification, transparency check, risk assessment, logging verification."),
|
| 243 |
+
("State-Graph Audit Process", "Each scenario is a directed graph with progress, no-effect, and worsened transitions. Partial credit computed via BFS depth along the optimal path. Wrong audit steps waste your query budget."),
|
| 244 |
+
("6-Component Reward", "Classification accuracy (20%), finding completeness (25%), finding precision (15%), remediation quality (15%), methodology adherence (15%), efficiency (10%). Designed to resist reward hacking."),
|
| 245 |
+
("Parameter Randomization", "Company names, deployment dates, regions, and system versions are re-rolled on every reset. Agents must learn the audit process, not memorize specific answers."),
|
| 246 |
+
("Enforcement: 113 Days Away", "EU AI Act enforcement begins August 2, 2026. Fines up to EUR 35M or 7% of global revenue. Every company deploying AI in Europe needs automated compliance auditing. This environment fills that gap."),
|
| 247 |
]
|
| 248 |
+
html = f'<div style="display:grid;grid-template-columns:repeat(3,1fr);gap:16px;margin-bottom:24px;">'
|
| 249 |
for title, desc in cards:
|
| 250 |
+
html += f'''<div style="background:{_CARD};padding:24px;border-radius:10px;border:1px solid {_BORDER};">
|
| 251 |
+
<h3 style="color:{_TEXT};margin:0 0 10px 0;font-size:1em;font-weight:600;">{title}</h3>
|
| 252 |
+
<p style="color:{_MUTED};margin:0;font-size:0.88em;line-height:1.6;">{desc}</p>
|
| 253 |
+
</div>'''
|
| 254 |
+
return html + "</div>"
|
| 255 |
+
|
| 256 |
+
def _scenarios_html():
|
| 257 |
+
diff_colors = {"easy": _EMERALD, "medium": _AMBER, "hard": _ROSE}
|
|
|
|
|
|
|
| 258 |
html = ""
|
| 259 |
for s in SCENARIO_LIST:
|
| 260 |
+
c = diff_colors.get(s["difficulty"], _MUTED)
|
| 261 |
+
html += f'''<div style="background:{_CARD};padding:18px 20px;border-radius:10px;margin-bottom:10px;border-left:3px solid {c};border:1px solid {_BORDER};display:flex;justify-content:space-between;align-items:center;">
|
| 262 |
+
<div>
|
| 263 |
+
<div style="color:{_TEXT};font-size:0.98em;font-weight:600;">{s["title"]}</div>
|
| 264 |
+
<div style="color:{_MUTED};font-size:0.8em;margin-top:3px;font-family:monospace;">{s["id"]}</div>
|
|
|
|
|
|
|
| 265 |
</div>
|
| 266 |
+
<span style="background:{c}18;color:{c};padding:4px 14px;border-radius:6px;font-size:0.75em;font-weight:700;letter-spacing:0.04em;">{s["difficulty"].upper()}</span>
|
| 267 |
+
</div>'''
|
|
|
|
| 268 |
return html
|
| 269 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 270 |
with gr.Blocks(title="EU AI Act Compliance Auditor") as landing_app:
|
| 271 |
+
gr.HTML(f'<style>.gradio-container{{background:{_BG}!important;}} .tab-nav button{{color:{_MUTED}!important;background:transparent!important;border:none!important;font-weight:500;}} .tab-nav button.selected{{color:{_GOLD}!important;border-bottom:2px solid {_GOLD}!important;}}</style>')
|
| 272 |
with gr.Tabs():
|
| 273 |
with gr.Tab("Overview"):
|
| 274 |
+
gr.HTML(_hero())
|
| 275 |
+
gr.HTML(f"<h2 style='color:{_TEXT};margin-bottom:14px;font-weight:600;'>Design Decisions</h2>")
|
| 276 |
+
gr.HTML(_design_cards())
|
| 277 |
+
gr.HTML(f"""<div style="background:{_CARD};padding:24px;border-radius:10px;border:1px solid {_BORDER};margin-top:8px;">
|
| 278 |
+
<p style="color:{_MUTED};font-size:0.9em;margin:0;line-height:1.7;">
|
| 279 |
+
<strong style="color:{_TEXT};">compliance_auditor_env</strong> · 6-component reward · 8 scenarios ·
|
| 280 |
+
11 tools · state-graph audit · parameter randomization · deterministic grading
|
| 281 |
+
</p>
|
| 282 |
+
</div>""")
|
| 283 |
|
| 284 |
with gr.Tab("Scenarios"):
|
| 285 |
+
gr.HTML(f"<h2 style='color:{_TEXT};margin-bottom:14px;font-weight:600;'>8 Compliance Audit Scenarios</h2>")
|
| 286 |
+
gr.HTML(_scenarios_html())
|
| 287 |
|
| 288 |
with gr.Tab("Playground"):
|
| 289 |
+
gr.HTML(f"<h2 style='color:{_TEXT};margin-bottom:8px;font-weight:600;'>Interactive Audit</h2>")
|
| 290 |
+
gr.HTML(f"<p style='color:{_MUTED};margin-bottom:16px;'>Reset to start a session, then call audit tools sequentially.</p>")
|
|
|
|
| 291 |
session_state = gr.State(value=None)
|
| 292 |
with gr.Row():
|
| 293 |
diff_dropdown = gr.Dropdown(choices=["easy", "medium", "hard"], value="easy", label="Difficulty")
|
|
|
|
| 317 |
with _sessions_lock:
|
| 318 |
env = _sessions.get(sid)
|
| 319 |
if not env:
|
| 320 |
+
return sid, {"error": "Session expired"}
|
| 321 |
fn = env._tool_fns.get(tool_name)
|
| 322 |
if not fn:
|
| 323 |
return sid, {"error": f"Unknown tool: {tool_name}"}
|
| 324 |
try:
|
| 325 |
kwargs = json.loads(args_str) if args_str.strip() else {}
|
| 326 |
except json.JSONDecodeError:
|
| 327 |
+
return sid, {"error": "Invalid JSON"}
|
| 328 |
try:
|
| 329 |
result = fn(**kwargs)
|
| 330 |
return sid, json.loads(result) if isinstance(result, str) else result
|
|
|
|
| 335 |
call_btn.click(pg_call, [session_state, tool_dropdown, args_input], [session_state, output_box])
|
| 336 |
|
| 337 |
with gr.Tab("Try It"):
|
| 338 |
+
gr.HTML(f"""<div style="background:{_CARD};padding:28px;border-radius:10px;border:1px solid {_BORDER};">
|
| 339 |
+
<h2 style="color:{_TEXT};margin-top:0;font-weight:600;">Run the Baseline Agent</h2>
|
| 340 |
+
<p style="color:{_MUTED};margin-bottom:16px;">Supports NVIDIA NIM and HuggingFace Inference API.</p>
|
| 341 |
+
<pre style="background:{_BG};padding:18px;border-radius:8px;color:{_GOLD};overflow-x:auto;border:1px solid {_BORDER};font-size:0.9em;line-height:1.7;"><code>export API_BASE_URL="https://integrate.api.nvidia.com/v1"
|
| 342 |
export MODEL_NAME="google/gemma-4-31b-it"
|
| 343 |
export HF_TOKEN="your-api-key"
|
| 344 |
python inference.py --space https://Itachi1824-compliance-auditor-env.hf.space</code></pre>
|
| 345 |
+
</div>""")
|
|
|
|
| 346 |
|
| 347 |
gr.mount_gradio_app(app, landing_app, path="/")
|
| 348 |
+
gr.mount_gradio_app(app, landing_app, path="/web")
|
| 349 |
_gradio_mounted = True
|
| 350 |
|
| 351 |
except Exception as e:
|