Itachi-1824 commited on
Commit
51a14c0
·
1 Parent(s): ff2e396

feat: premium ui (charcoal+gold authority palette), multi-model eval, /web mount fix

Browse files
Files changed (1) hide show
  1. server/app.py +73 -83
server/app.py CHANGED
@@ -201,103 +201,93 @@ async def api_close(req: CloseRequest):
201
 
202
  _gradio_mounted = False
203
 
 
 
 
 
 
 
 
 
 
 
 
 
204
  try:
205
  import gradio as gr
206
 
207
- def _build_hero_html():
208
- scenarios_count = len(SCENARIO_LIST)
209
- tools_count = 11 # 10 tools + verify
210
- tiers = len(DIFFICULTY_TIERS)
211
  return f"""
212
- <div style="background:linear-gradient(135deg,#0f0f23 0%,#1a1a3e 50%,#0f0f23 100%);padding:40px;border-radius:16px;margin-bottom:24px;border:1px solid #2a2a5a;">
213
- <h1 style="color:#00d4aa;font-size:2.2em;margin:0 0 8px 0;font-family:'Inter',sans-serif;">EU AI Act Compliance Auditor</h1>
214
- <p style="color:#8888bb;font-size:1.1em;margin:0 0 24px 0;">
215
- An MCP-based environment where LLM agents audit AI systems for EU AI Act compliance
216
- from risk classification to finding identification to remediation planning.
217
- Parameter randomization on every reset prevents memorization.
 
 
 
218
  </p>
219
- <div style="display:grid;grid-template-columns:repeat(5,1fr);gap:16px;">
220
- <div style="background:#1a1a3e;padding:16px;border-radius:12px;text-align:center;border:1px solid #2a2a5a;">
221
- <div style="color:#00d4aa;font-size:2em;font-weight:bold;">{scenarios_count}</div>
222
- <div style="color:#8888bb;font-size:0.85em;">SCENARIOS</div>
223
- </div>
224
- <div style="background:#1a1a3e;padding:16px;border-radius:12px;text-align:center;border:1px solid #2a2a5a;">
225
- <div style="color:#00d4aa;font-size:2em;font-weight:bold;">{tools_count}</div>
226
- <div style="color:#8888bb;font-size:0.85em;">MCP TOOLS</div>
227
- </div>
228
- <div style="background:#1a1a3e;padding:16px;border-radius:12px;text-align:center;border:1px solid #2a2a5a;">
229
- <div style="color:#00d4aa;font-size:2em;font-weight:bold;">6</div>
230
- <div style="color:#8888bb;font-size:0.85em;">REWARD COMPS</div>
231
- </div>
232
- <div style="background:#1a1a3e;padding:16px;border-radius:12px;text-align:center;border:1px solid #2a2a5a;">
233
- <div style="color:#00d4aa;font-size:2em;font-weight:bold;">{tiers}</div>
234
- <div style="color:#8888bb;font-size:0.85em;">DIFFICULTY TIERS</div>
235
- </div>
236
- <div style="background:#1a1a3e;padding:16px;border-radius:12px;text-align:center;border:1px solid #2a2a5a;">
237
- <div style="color:#00d4aa;font-size:2em;font-weight:bold;">Aug 2026</div>
238
- <div style="color:#8888bb;font-size:0.85em;">EU DEADLINE</div>
239
- </div>
240
  </div>
241
- </div>
242
- """
243
 
244
- def _build_uniqueness_html():
245
  cards = [
246
- ("Real regulatory scenarios", "Scenarios based on actual EU AI Act articles prohibited social scoring, high-risk hiring AI, deepfake compliance, medical device audits. Not toy problems."),
247
- ("Full audit workflow", "10 MCP tools mirror a real compliance auditor's toolkit: classify, review documentation, audit data, verify oversight, check transparency, assess risk management."),
248
- ("State-graph audit process", "Each scenario is a directed graph with progress/no_effect/worsened transitions. Partial credit via BFS depth. Wrong audit steps waste budget."),
249
- ("6-component reward", "Classification accuracy, finding completeness, finding precision, remediation quality, methodology adherence, and efficiency. Anti-exploit: no reward gaming."),
250
- ("Parameter randomization", "Company names, deployment dates, regions, and system versions re-rolled on every reset. Agents must learn the AUDIT PROCESS, not memorize answers."),
251
- ("Timely: Aug 2026 deadline", "EU AI Act enforcement begins August 2, 2026. Every company deploying AI in Europe needs compliance auditing. This environment trains agents for a real, urgent need."),
252
  ]
253
- html = '<div style="display:grid;grid-template-columns:repeat(3,1fr);gap:16px;margin-bottom:24px;">'
254
  for title, desc in cards:
255
- html += f"""
256
- <div style="background:#1a1a3e;padding:20px;border-radius:12px;border:1px solid #2a2a5a;">
257
- <h3 style="color:#00d4aa;margin:0 0 8px 0;font-size:1.05em;">{title}</h3>
258
- <p style="color:#8888bb;margin:0;font-size:0.9em;line-height:1.5;">{desc}</p>
259
- </div>
260
- """
261
- html += "</div>"
262
- return html
263
-
264
- def _build_scenarios_html():
265
  html = ""
266
  for s in SCENARIO_LIST:
267
- diff_color = {"easy": "#00d4aa", "medium": "#ffaa00", "hard": "#ff4444"}
268
- color = diff_color.get(s["difficulty"], "#8888bb")
269
- html += f"""
270
- <div style="background:#1a1a3e;padding:16px;border-radius:12px;margin-bottom:12px;border-left:4px solid {color};border:1px solid #2a2a5a;">
271
- <div style="display:flex;justify-content:space-between;align-items:center;">
272
- <h3 style="color:#e0e0ff;margin:0;font-size:1.05em;">{s['title']}</h3>
273
- <span style="background:{color}22;color:{color};padding:4px 12px;border-radius:8px;font-size:0.8em;font-weight:bold;">{s['difficulty'].upper()}</span>
274
  </div>
275
- <p style="color:#8888bb;margin:6px 0 0 0;font-size:0.85em;">ID: {s['id']}</p>
276
- </div>
277
- """
278
  return html
279
 
280
- _css = """
281
- .gradio-container { background: #0a0a0a !important; }
282
- .tab-nav button { color: #8888bb !important; background: transparent !important; border: none !important; }
283
- .tab-nav button.selected { color: #00d4aa !important; border-bottom: 2px solid #00d4aa !important; }
284
- """
285
-
286
  with gr.Blocks(title="EU AI Act Compliance Auditor") as landing_app:
 
287
  with gr.Tabs():
288
  with gr.Tab("Overview"):
289
- gr.HTML(_build_hero_html())
290
- gr.HTML("<h2 style='color:#e0e0ff;margin-bottom:12px;'>What makes this unique</h2>")
291
- gr.HTML(_build_uniqueness_html())
 
 
 
 
 
 
292
 
293
  with gr.Tab("Scenarios"):
294
- gr.HTML("<h2 style='color:#e0e0ff;margin-bottom:12px;'>8 Compliance Audit Scenarios</h2>")
295
- gr.HTML(_build_scenarios_html())
296
 
297
  with gr.Tab("Playground"):
298
- gr.HTML("<h2 style='color:#e0e0ff;margin-bottom:12px;'>Interactive Playground</h2>")
299
- gr.HTML("<p style='color:#8888bb;'>Click Reset to start a new audit session, then use the tool dropdown to call audit tools.</p>")
300
-
301
  session_state = gr.State(value=None)
302
  with gr.Row():
303
  diff_dropdown = gr.Dropdown(choices=["easy", "medium", "hard"], value="easy", label="Difficulty")
@@ -327,14 +317,14 @@ try:
327
  with _sessions_lock:
328
  env = _sessions.get(sid)
329
  if not env:
330
- return sid, {"error": "Session expired. Click Reset."}
331
  fn = env._tool_fns.get(tool_name)
332
  if not fn:
333
  return sid, {"error": f"Unknown tool: {tool_name}"}
334
  try:
335
  kwargs = json.loads(args_str) if args_str.strip() else {}
336
  except json.JSONDecodeError:
337
- return sid, {"error": "Invalid JSON in arguments"}
338
  try:
339
  result = fn(**kwargs)
340
  return sid, json.loads(result) if isinstance(result, str) else result
@@ -345,17 +335,17 @@ try:
345
  call_btn.click(pg_call, [session_state, tool_dropdown, args_input], [session_state, output_box])
346
 
347
  with gr.Tab("Try It"):
348
- gr.HTML("""
349
- <div style="background:#1a1a3e;padding:24px;border-radius:12px;border:1px solid #2a2a5a;">
350
- <h2 style="color:#e0e0ff;margin-top:0;">Run the Baseline Agent</h2>
351
- <pre style="background:#0a0a1a;padding:16px;border-radius:8px;color:#00d4aa;overflow-x:auto;"><code>export API_BASE_URL="https://integrate.api.nvidia.com/v1"
352
  export MODEL_NAME="google/gemma-4-31b-it"
353
  export HF_TOKEN="your-api-key"
354
  python inference.py --space https://Itachi1824-compliance-auditor-env.hf.space</code></pre>
355
- </div>
356
- """)
357
 
358
  gr.mount_gradio_app(app, landing_app, path="/")
 
359
  _gradio_mounted = True
360
 
361
  except Exception as e:
 
201
 
202
  _gradio_mounted = False
203
 
204
+ # Color system: Deep charcoal + warm gold (authority/prestige/trust)
205
+ # NO neon, NO AI-typical cyan/purple
206
+ _BG = "#09090B" # base background
207
+ _CARD = "#18181B" # card surface
208
+ _BORDER = "#27272A" # borders
209
+ _TEXT = "#F8FAFC" # primary text
210
+ _MUTED = "#94A3B8" # secondary text (slate)
211
+ _GOLD = "#C9A84C" # accent: authority, prestige
212
+ _EMERALD = "#10B981" # success/easy
213
+ _AMBER = "#F59E0B" # warning/medium
214
+ _ROSE = "#F43F5E" # critical/hard
215
+
216
  try:
217
  import gradio as gr
218
 
219
+ def _hero():
 
 
 
220
  return f"""
221
+ <div style="background:{_CARD};padding:48px 40px;border-radius:12px;margin-bottom:28px;border:1px solid {_BORDER};">
222
+ <div style="display:flex;align-items:center;gap:12px;margin-bottom:6px;">
223
+ <div style="width:8px;height:36px;background:{_GOLD};border-radius:4px;"></div>
224
+ <h1 style="color:{_TEXT};font-size:2em;margin:0;font-weight:700;letter-spacing:-0.02em;">EU AI Act Compliance Auditor</h1>
225
+ </div>
226
+ <p style="color:{_MUTED};font-size:1.05em;margin:8px 0 32px 20px;line-height:1.6;max-width:720px;">
227
+ An MCP-based environment where LLM agents audit AI systems for EU AI Act compliance.
228
+ 8 scenarios from chatbot transparency to prohibited social scoring.
229
+ Parameter randomization prevents memorization.
230
  </p>
231
+ <div style="display:grid;grid-template-columns:repeat(5,1fr);gap:14px;">
232
+ {"".join(f'''<div style="background:{_BG};padding:20px 16px;border-radius:10px;text-align:center;border:1px solid {_BORDER};">
233
+ <div style="color:{_GOLD};font-size:1.8em;font-weight:700;">{v}</div>
234
+ <div style="color:{_MUTED};font-size:0.78em;letter-spacing:0.05em;margin-top:4px;">{k}</div>
235
+ </div>''' for k, v in [("SCENARIOS", 8), ("MCP TOOLS", 11), ("REWARD COMPS", 6), ("TIERS", 3), ("EU DEADLINE", "Aug '26")])}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
  </div>
237
+ </div>"""
 
238
 
239
+ def _design_cards():
240
  cards = [
241
+ ("Real Regulatory Scenarios", "Scenarios drawn from actual EU AI Act articles: prohibited social scoring (Art. 5), high-risk hiring (Annex III), deepfake transparency (Art. 50), medical device audits. Based on real compliance gaps companies face today."),
242
+ ("Full Audit Toolkit", "11 MCP tools mirror a real compliance auditor's workflow: system overview, risk classification, documentation review, bias audit, oversight verification, transparency check, risk assessment, logging verification."),
243
+ ("State-Graph Audit Process", "Each scenario is a directed graph with progress, no-effect, and worsened transitions. Partial credit computed via BFS depth along the optimal path. Wrong audit steps waste your query budget."),
244
+ ("6-Component Reward", "Classification accuracy (20%), finding completeness (25%), finding precision (15%), remediation quality (15%), methodology adherence (15%), efficiency (10%). Designed to resist reward hacking."),
245
+ ("Parameter Randomization", "Company names, deployment dates, regions, and system versions are re-rolled on every reset. Agents must learn the audit process, not memorize specific answers."),
246
+ ("Enforcement: 113 Days Away", "EU AI Act enforcement begins August 2, 2026. Fines up to EUR 35M or 7% of global revenue. Every company deploying AI in Europe needs automated compliance auditing. This environment fills that gap."),
247
  ]
248
+ html = f'<div style="display:grid;grid-template-columns:repeat(3,1fr);gap:16px;margin-bottom:24px;">'
249
  for title, desc in cards:
250
+ html += f'''<div style="background:{_CARD};padding:24px;border-radius:10px;border:1px solid {_BORDER};">
251
+ <h3 style="color:{_TEXT};margin:0 0 10px 0;font-size:1em;font-weight:600;">{title}</h3>
252
+ <p style="color:{_MUTED};margin:0;font-size:0.88em;line-height:1.6;">{desc}</p>
253
+ </div>'''
254
+ return html + "</div>"
255
+
256
+ def _scenarios_html():
257
+ diff_colors = {"easy": _EMERALD, "medium": _AMBER, "hard": _ROSE}
 
 
258
  html = ""
259
  for s in SCENARIO_LIST:
260
+ c = diff_colors.get(s["difficulty"], _MUTED)
261
+ html += f'''<div style="background:{_CARD};padding:18px 20px;border-radius:10px;margin-bottom:10px;border-left:3px solid {c};border:1px solid {_BORDER};display:flex;justify-content:space-between;align-items:center;">
262
+ <div>
263
+ <div style="color:{_TEXT};font-size:0.98em;font-weight:600;">{s["title"]}</div>
264
+ <div style="color:{_MUTED};font-size:0.8em;margin-top:3px;font-family:monospace;">{s["id"]}</div>
 
 
265
  </div>
266
+ <span style="background:{c}18;color:{c};padding:4px 14px;border-radius:6px;font-size:0.75em;font-weight:700;letter-spacing:0.04em;">{s["difficulty"].upper()}</span>
267
+ </div>'''
 
268
  return html
269
 
 
 
 
 
 
 
270
  with gr.Blocks(title="EU AI Act Compliance Auditor") as landing_app:
271
+ gr.HTML(f'<style>.gradio-container{{background:{_BG}!important;}} .tab-nav button{{color:{_MUTED}!important;background:transparent!important;border:none!important;font-weight:500;}} .tab-nav button.selected{{color:{_GOLD}!important;border-bottom:2px solid {_GOLD}!important;}}</style>')
272
  with gr.Tabs():
273
  with gr.Tab("Overview"):
274
+ gr.HTML(_hero())
275
+ gr.HTML(f"<h2 style='color:{_TEXT};margin-bottom:14px;font-weight:600;'>Design Decisions</h2>")
276
+ gr.HTML(_design_cards())
277
+ gr.HTML(f"""<div style="background:{_CARD};padding:24px;border-radius:10px;border:1px solid {_BORDER};margin-top:8px;">
278
+ <p style="color:{_MUTED};font-size:0.9em;margin:0;line-height:1.7;">
279
+ <strong style="color:{_TEXT};">compliance_auditor_env</strong> &middot; 6-component reward &middot; 8 scenarios &middot;
280
+ 11 tools &middot; state-graph audit &middot; parameter randomization &middot; deterministic grading
281
+ </p>
282
+ </div>""")
283
 
284
  with gr.Tab("Scenarios"):
285
+ gr.HTML(f"<h2 style='color:{_TEXT};margin-bottom:14px;font-weight:600;'>8 Compliance Audit Scenarios</h2>")
286
+ gr.HTML(_scenarios_html())
287
 
288
  with gr.Tab("Playground"):
289
+ gr.HTML(f"<h2 style='color:{_TEXT};margin-bottom:8px;font-weight:600;'>Interactive Audit</h2>")
290
+ gr.HTML(f"<p style='color:{_MUTED};margin-bottom:16px;'>Reset to start a session, then call audit tools sequentially.</p>")
 
291
  session_state = gr.State(value=None)
292
  with gr.Row():
293
  diff_dropdown = gr.Dropdown(choices=["easy", "medium", "hard"], value="easy", label="Difficulty")
 
317
  with _sessions_lock:
318
  env = _sessions.get(sid)
319
  if not env:
320
+ return sid, {"error": "Session expired"}
321
  fn = env._tool_fns.get(tool_name)
322
  if not fn:
323
  return sid, {"error": f"Unknown tool: {tool_name}"}
324
  try:
325
  kwargs = json.loads(args_str) if args_str.strip() else {}
326
  except json.JSONDecodeError:
327
+ return sid, {"error": "Invalid JSON"}
328
  try:
329
  result = fn(**kwargs)
330
  return sid, json.loads(result) if isinstance(result, str) else result
 
335
  call_btn.click(pg_call, [session_state, tool_dropdown, args_input], [session_state, output_box])
336
 
337
  with gr.Tab("Try It"):
338
+ gr.HTML(f"""<div style="background:{_CARD};padding:28px;border-radius:10px;border:1px solid {_BORDER};">
339
+ <h2 style="color:{_TEXT};margin-top:0;font-weight:600;">Run the Baseline Agent</h2>
340
+ <p style="color:{_MUTED};margin-bottom:16px;">Supports NVIDIA NIM and HuggingFace Inference API.</p>
341
+ <pre style="background:{_BG};padding:18px;border-radius:8px;color:{_GOLD};overflow-x:auto;border:1px solid {_BORDER};font-size:0.9em;line-height:1.7;"><code>export API_BASE_URL="https://integrate.api.nvidia.com/v1"
342
  export MODEL_NAME="google/gemma-4-31b-it"
343
  export HF_TOKEN="your-api-key"
344
  python inference.py --space https://Itachi1824-compliance-auditor-env.hf.space</code></pre>
345
+ </div>""")
 
346
 
347
  gr.mount_gradio_app(app, landing_app, path="/")
348
+ gr.mount_gradio_app(app, landing_app, path="/web")
349
  _gradio_mounted = True
350
 
351
  except Exception as e: