sankalphs commited on
Commit
84c560e
·
verified ·
1 Parent(s): 5eaeea0

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +643 -0
app.py ADDED
@@ -0,0 +1,643 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Duel of Nemotron - Hugging Face Space entry point.
2
+
3
+ Hybrid architecture (decoupled, non-blocking):
4
+
5
+ Browser (player attack)
6
+
7
+ │ POST /api/pick_move ──▶ Gemma 3 270M + LoRA (CPU, ~100ms)
8
+ │ reads STRATEGY_CACHE synchronously
9
+ │ returns move + reasoning
10
+
11
+
12
+ STRATEGY_CACHE ◀── background asyncio.Task refreshes every ~11s
13
+ │ by calling Modal Nemotron (A10) in the
14
+ │ background. NEVER blocks an attack.
15
+
16
+
17
+ Modal Nemotron writes new aggression/defense/... weights + reasoning
18
+
19
+ Duel lifecycle:
20
+ POST /api/duel/start -- player presses DUEL; starts the refresher +
21
+ opens an event log. Auto-stops after 5min idle.
22
+ POST /api/duel/event -- frontend appends each exchange (the "chat log"
23
+ between Nemotron strategy and Gemma execution).
24
+ POST /api/duel/end -- freezes the log, returns the full transcript.
25
+ GET /api/duel/summary -- Nemotron narrates the whole match once.
26
+
27
+ Gradio interface stays at /gradio for the hackathon requirement.
28
+ """
29
+ import asyncio
30
+ import json
31
+ import os
32
+ import time
33
+ import uuid
34
+ from pathlib import Path
35
+
36
+ import gradio as gr
37
+ import httpx
38
+ from fastapi import FastAPI, Request
39
+ from fastapi.middleware.cors import CORSMiddleware
40
+ from fastapi.responses import FileResponse, JSONResponse
41
+ from fastapi.staticfiles import StaticFiles
42
+
43
+ from gemma_npc import MOVES, get_model, pick_counter_move, make_move_mask, state_to_features, remap_bn_state_to_ln
44
+
45
+ STATIC_DIR = Path(__file__).parent / "static"
46
+ MODEL_SERVER = os.environ.get("MODEL_SERVER", "")
47
+ GEMMA_SERVER = os.environ.get("GEMMA_SERVER", "").rstrip("/")
48
+ ADAPTER_DIR = Path(__file__).parent / "adapters" / "ref"
49
+
50
+ # How often the background task refreshes the strategy cache from Modal.
51
+ # Jittered 10-12s so we don't hammer a cold-starting container on a fixed
52
+ # cadence. The tiny model serves every attack from the existing cache in
53
+ # the meantime, so this latency is invisible to the player.
54
+ REFRESH_MIN = 10.0
55
+ REFRESH_MAX = 12.0
56
+ # If no duel is active for this long, the background refresher parks itself.
57
+ DUEL_IDLE_TIMEOUT = 5 * 60
58
+
59
+ _tiny_model = None # legacy global; superseded by gemma_npc singleton
60
+
61
+ # ----------------------------------------------------------------------------
62
+ # Strategy cache -- written by the background refresher, read synchronously
63
+ # by /api/pick_move. This is what decouples Modal latency from the fast loop.
64
+ # ----------------------------------------------------------------------------
65
+ DEFAULT_WEIGHTS = {
66
+ "aggression": 0.55,
67
+ "defense": 0.50,
68
+ "parry_affinity": 0.40,
69
+ "kick_affinity": 0.35,
70
+ "grapple_affinity": 0.30,
71
+ }
72
+
73
+ _STRATEGY_LOCK = asyncio.Lock()
74
+ _STRATEGY_CACHE: dict = {
75
+ "weights": dict(DEFAULT_WEIGHTS),
76
+ "reasoning": "Initial balanced stance -- waiting for first Nemotron read.",
77
+ "source": "default",
78
+ "updated_at": 0.0,
79
+ "last_sequence": "",
80
+ }
81
+
82
+ # ----------------------------------------------------------------------------
83
+ # Duel lifecycle state
84
+ # ----------------------------------------------------------------------------
85
+ _duel_state: dict = {
86
+ "active": False, # is a match in progress?
87
+ "match_id": None,
88
+ "started_at": 0.0,
89
+ "last_activity": 0.0, # for the 5-min idle auto-stop
90
+ "events": [], # the transcript / chat log
91
+ "ended": False,
92
+ }
93
+ _summary_cache: dict = {} # match_id -> summary payload
94
+
95
+
96
+ def get_model():
97
+ """Lazy-load Gemma 3 270M + the cyber-duel-tiny-users LoRA adapter.
98
+
99
+ The real loader lives in gemma_npc.py; this thin wrapper is kept as the
100
+ app-level entry point so the startup hook (`get_model()` in `_startup`)
101
+ and the /health endpoint both work.
102
+ """
103
+ from gemma_npc import get_model as _gemma_get
104
+ return _gemma_get()
105
+
106
+
107
+ app = FastAPI(title="Nemotron Duel")
108
+
109
+ app.add_middleware(
110
+ CORSMiddleware,
111
+ allow_origins=["*"],
112
+ allow_credentials=True,
113
+ allow_methods=["*"],
114
+ allow_headers=["*"],
115
+ )
116
+
117
+
118
+ # React static files
119
+ if STATIC_DIR.exists():
120
+ _assets = STATIC_DIR / "assets"
121
+ if _assets.exists():
122
+ app.mount("/assets", StaticFiles(directory=str(_assets)), name="assets")
123
+
124
+
125
+ @app.on_event("startup")
126
+ async def _startup():
127
+ """Kick off the background strategy refresher on the event loop."""
128
+ get_model()
129
+ asyncio.create_task(_strategy_refresher_loop())
130
+
131
+
132
+ async def _strategy_refresher_loop():
133
+ """Refresh STRATEGY_CACHE from Modal every ~11s while a duel is active.
134
+
135
+ Parks itself (sleeps longer) when no duel is active or after 5 minutes
136
+ of inactivity, so we never wake a cold Modal container for nothing.
137
+ """
138
+ while True:
139
+ try:
140
+ active = _duel_state["active"] and not _duel_state["ended"]
141
+ idle_for = time.time() - _duel_state.get("last_activity", 0)
142
+
143
+ if active and idle_for < DUEL_IDLE_TIMEOUT and MODEL_SERVER:
144
+ await _refresh_strategy_from_modal()
145
+ await asyncio.sleep(REFRESH_MIN + (uuid.uuid4().int % 1000) / 1000.0 * (REFRESH_MAX - REFRESH_MIN))
146
+ elif active and idle_for >= DUEL_IDLE_TIMEOUT:
147
+ # 5 minutes of inactivity -> auto-stop the duel.
148
+ _duel_state["active"] = False
149
+ await asyncio.sleep(15.0)
150
+ else:
151
+ # No active duel: poll infrequently so /strategize still works
152
+ # if someone calls it manually, but don't burn Modal credits.
153
+ await asyncio.sleep(15.0)
154
+ except asyncio.CancelledError:
155
+ raise
156
+ except Exception as e: # noqa: BLE001 - the loop must never die
157
+ print(f"[strategy_refresher] error: {e!r}")
158
+ await asyncio.sleep(20.0)
159
+
160
+
161
+ async def _refresh_strategy_from_modal():
162
+ """One background fetch to Modal. Updates STRATEGY_CACHE on success."""
163
+ sequence = _duel_state.get("last_sequence", "") or _STRATEGY_CACHE.get("last_sequence", "")
164
+ state = {
165
+ "playerHp": _duel_state.get("player_hp", 100),
166
+ "npcHp": _duel_state.get("npc_hp", 100),
167
+ "playerStamina": _duel_state.get("player_stamina", 100),
168
+ "npcStamina": _duel_state.get("npc_stamina", 100),
169
+ "round": _duel_state.get("round", 1),
170
+ "distance": _duel_state.get("distance", "mid"),
171
+ }
172
+ try:
173
+ async with httpx.AsyncClient(timeout=30.0) as client:
174
+ resp = await client.post(
175
+ f"{MODEL_SERVER}/strategize",
176
+ json={"sequence": sequence, "state": state},
177
+ )
178
+ data = resp.json()
179
+ weights = data.get("weights", {}) or {}
180
+ merged = dict(DEFAULT_WEIGHTS)
181
+ for k in merged:
182
+ if k in weights:
183
+ try:
184
+ merged[k] = float(weights[k])
185
+ except (TypeError, ValueError):
186
+ pass
187
+ async with _STRATEGY_LOCK:
188
+ _STRATEGY_CACHE.update({
189
+ "weights": merged,
190
+ "reasoning": data.get("reasoning", _STRATEGY_CACHE["reasoning"]),
191
+ "source": "nemotron_modal",
192
+ "updated_at": time.time(),
193
+ "last_sequence": sequence,
194
+ })
195
+ # Record the strategist's read into the event log.
196
+ if _duel_state["active"] and not _duel_state["ended"]:
197
+ _duel_state["events"].append({
198
+ "t": round(time.time() - _duel_state["started_at"], 2),
199
+ "kind": "nemotron_strategy",
200
+ "weights": merged,
201
+ "reasoning": data.get("reasoning", ""),
202
+ })
203
+ except Exception as e: # noqa: BLE001 - keep the cache, just log
204
+ print(f"[strategy_refresher] Modal fetch failed (cache retained): {e!r}")
205
+ async with _STRATEGY_LOCK:
206
+ _STRATEGY_CACHE["source"] = "offline"
207
+
208
+
209
+ async def _current_strategy() -> dict:
210
+ async with _STRATEGY_LOCK:
211
+ return {
212
+ "weights": dict(_STRATEGY_CACHE["weights"]),
213
+ "reasoning": _STRATEGY_CACHE["reasoning"],
214
+ "source": _STRATEGY_CACHE["source"],
215
+ "updated_at": _STRATEGY_CACHE["updated_at"],
216
+ }
217
+
218
+
219
+ @app.get("/")
220
+ async def serve_index():
221
+ idx = STATIC_DIR / "index.html"
222
+ if idx.exists():
223
+ return FileResponse(str(idx))
224
+ return JSONResponse({"error": "Frontend not built"}, status_code=500)
225
+
226
+
227
+ @app.get("/favicon.svg")
228
+ async def serve_favicon():
229
+ fav = STATIC_DIR / "favicon.svg"
230
+ if fav.exists():
231
+ return FileResponse(str(fav))
232
+ return JSONResponse({"error": "Not found"}, status_code=404)
233
+
234
+
235
+ @app.get("/icons.svg")
236
+ async def serve_icons():
237
+ ico = STATIC_DIR / "icons.svg"
238
+ if ico.exists():
239
+ return FileResponse(str(ico))
240
+ return JSONResponse({"error": "Not found"}, status_code=404)
241
+
242
+
243
+ @app.get("/health")
244
+ async def health():
245
+ """Health + readiness. `ready` is what the frontend probes to decide
246
+ whether to call the backend or fall back to the client-side mock.
247
+ """
248
+ from gemma_npc import _model as _gemma_loaded
249
+ return {
250
+ "status": "ok",
251
+ "ready": _gemma_loaded is not None,
252
+ "tiny_model_loaded": _gemma_loaded is not None,
253
+ "model_server_configured": bool(MODEL_SERVER),
254
+ "duel_active": _duel_state["active"],
255
+ "moves": list(MOVES),
256
+ }
257
+
258
+
259
+ # ---------------------------------------------------------------------------
260
+ # Real-time move selection -- Gemma 3 270M + LoRA (replaces TinyFighter MLP)
261
+ # ---------------------------------------------------------------------------
262
+ @app.post("/api/pick_move")
263
+ async def pick_move(request: Request):
264
+ """Real-time NPC move selection using Gemma 3 270M + LoRA.
265
+
266
+ Reads STRATEGY_CACHE synchronously (whatever Nemotron last wrote).
267
+ The model takes the player's recent move sequence and returns a
268
+ counter-move plus its reasoning. Returns in ~100ms on CPU.
269
+ """
270
+ try:
271
+ body = await request.json()
272
+ except Exception:
273
+ body = {}
274
+
275
+ state = body.get("state", body)
276
+
277
+ # Use cached strategy unless the caller explicitly overrides it.
278
+ strategy = body.get("strategy")
279
+ if not strategy:
280
+ strategy = (await _current_strategy())["weights"]
281
+
282
+ # Build the move-sequence prompt the model was trained on. The state
283
+ # payload carries lastPlayerMoves/lastMoves -- take the most recent ones.
284
+ last_player = (
285
+ state.get("lastPlayerMoves")
286
+ or state.get("last_player_moves")
287
+ or state.get("lastMoves")
288
+ or []
289
+ )
290
+ if isinstance(last_player, str):
291
+ last_player = [m.strip() for m in last_player.split(",") if m.strip()]
292
+ sequence = ",".join(last_player[-5:]) or "jab"
293
+
294
+ move, reasoning, source = pick_counter_move(sequence)
295
+
296
+ # Build a one-hot confidence shape that matches the previous API so the
297
+ # frontend's confidence UI keeps working.
298
+ one_hot = {m: 0.0 for m in MOVES}
299
+ one_hot[move] = 1.0 if move in one_hot else 0.0
300
+ confidence = 1.0 if source == "gemma_lora" else 0.25
301
+
302
+ return {
303
+ "move": move,
304
+ "confidence": confidence,
305
+ "top5": [{"move": move, "prob": confidence}],
306
+ "all_probs": one_hot,
307
+ "strategy_used": strategy,
308
+ "strategy_source": (await _current_strategy())["source"],
309
+ }
310
+
311
+
312
+ # ---------------------------------------------------------------------------
313
+ # Duel lifecycle
314
+ # ---------------------------------------------------------------------------
315
+ @app.post("/api/duel/start")
316
+ async def duel_start(request: Request):
317
+ """Player pressed DUEL. Open the event log and ensure the background
318
+ refresher is awake. Nemotron begins reading the fight from here so
319
+ that by the time the player reaches the action, the cache is warm.
320
+ """
321
+ try:
322
+ body = await request.json()
323
+ except Exception:
324
+ body = {}
325
+
326
+ match_id = body.get("matchId") or uuid.uuid4().hex[:12]
327
+ _duel_state.update({
328
+ "active": True,
329
+ "ended": False,
330
+ "match_id": match_id,
331
+ "started_at": time.time(),
332
+ "last_activity": time.time(),
333
+ "events": [{
334
+ "t": 0.0,
335
+ "kind": "duel_start",
336
+ "playerCharacter": body.get("playerCharacter"),
337
+ "npcCharacter": body.get("npcCharacter"),
338
+ }],
339
+ "player_hp": 100,
340
+ "npc_hp": 100,
341
+ "player_stamina": 100,
342
+ "npc_stamina": 100,
343
+ "round": 1,
344
+ "distance": "mid",
345
+ })
346
+ return {"matchId": match_id, "started": True, "modelServer": bool(MODEL_SERVER)}
347
+
348
+
349
+ @app.post("/api/duel/event")
350
+ async def duel_event(request: Request):
351
+ """Append one exchange to the transcript. This is the "chat" between
352
+ Nemotron (strategy) and Tiny (execution) that the post-match summary
353
+ narrates. Also threads live HP/stamina/round into the refresher.
354
+ """
355
+ if not _duel_state["active"] or _duel_state["ended"]:
356
+ return JSONResponse({"ok": False, "reason": "no_active_duel"}, status_code=400)
357
+ try:
358
+ body = await request.json()
359
+ except Exception:
360
+ body = {}
361
+
362
+ _duel_state["last_activity"] = time.time()
363
+ # Keep the live snapshot fresh for the background refresher.
364
+ for k in ("player_hp", "npc_hp", "player_stamina", "npc_stamina", "round", "distance"):
365
+ if k in body:
366
+ _duel_state[k] = body[k]
367
+ seq = ",".join(body.get("lastMoves", []) or [])
368
+ if seq:
369
+ _duel_state["last_sequence"] = seq
370
+
371
+ entry = {
372
+ "t": round(time.time() - _duel_state["started_at"], 2),
373
+ "kind": body.get("kind", "exchange"),
374
+ "playerMove": body.get("playerMove"),
375
+ "npcMove": body.get("npcMove"),
376
+ "outcome": body.get("outcome"), # "hit" | "blocked" | "parried" | "whiff"
377
+ "damage": body.get("damage"),
378
+ "playerHp": body.get("player_hp", _duel_state.get("player_hp")),
379
+ "npcHp": body.get("npc_hp", _duel_state.get("npc_hp")),
380
+ "round": body.get("round", _duel_state.get("round")),
381
+ }
382
+ # Optional: the tiny model's pick at this moment, if the client sent it.
383
+ if "tinyMove" in body:
384
+ entry["tinyMove"] = body["tinyMove"]
385
+ entry["tinyConfidence"] = body.get("tinyConfidence")
386
+
387
+ _duel_state["events"].append(entry)
388
+ return {"ok": True, "count": len(_duel_state["events"])}
389
+
390
+
391
+ @app.post("/api/duel/end")
392
+ async def duel_end(request: Request):
393
+ """Freeze the transcript and return it for the summary screen."""
394
+ try:
395
+ body = await request.json()
396
+ except Exception:
397
+ body = {}
398
+ _duel_state["ended"] = True
399
+ _duel_state["active"] = False
400
+ winner = body.get("winner")
401
+ _duel_state["events"].append({
402
+ "t": round(time.time() - _duel_state["started_at"], 2),
403
+ "kind": "duel_end",
404
+ "winner": winner,
405
+ "playerScore": body.get("playerScore"),
406
+ "npcScore": body.get("npcScore"),
407
+ })
408
+ return {
409
+ "matchId": _duel_state["match_id"],
410
+ "events": _duel_state["events"],
411
+ "winner": winner,
412
+ }
413
+
414
+
415
+ @app.get("/api/duel/summary")
416
+ async def duel_summary():
417
+ """Ask Nemotron to narrate the whole match once (cached per match).
418
+
419
+ Falls back to a locally-generated summary when MODEL_SERVER is unset
420
+ or the call fails, so the UI always has something to show.
421
+ """
422
+ match_id = _duel_state.get("match_id")
423
+ if match_id and match_id in _summary_cache:
424
+ return _summary_cache[match_id]
425
+
426
+ events = list(_duel_state.get("events", []))
427
+ transcript = _format_transcript(events)
428
+
429
+ if not MODEL_SERVER:
430
+ payload = _local_summary(events, transcript)
431
+ if match_id:
432
+ _summary_cache[match_id] = payload
433
+ return payload
434
+
435
+ try:
436
+ async with httpx.AsyncClient(timeout=60.0) as client:
437
+ resp = await client.post(
438
+ f"{MODEL_SERVER}/summarize",
439
+ json={"transcript": transcript, "events": events},
440
+ )
441
+ data = resp.json()
442
+ payload = {
443
+ "summary": data.get("summary", _local_summary(events, transcript)["summary"]),
444
+ "moments": data.get("moments", []),
445
+ "transcript": transcript,
446
+ "source": "nemotron_modal",
447
+ }
448
+ except Exception as e: # noqa: BLE001
449
+ local = _local_summary(events, transcript)
450
+ payload = {**local, "source": "offline", "error": str(e)[:120]}
451
+
452
+ if match_id:
453
+ _summary_cache[match_id] = payload
454
+ return payload
455
+
456
+
457
+ def _format_transcript(events: list) -> str:
458
+ """Render the event log as a compact text transcript for Nemotron."""
459
+ lines = []
460
+ for e in events:
461
+ kind = e.get("kind", "?")
462
+ if kind == "nemotron_strategy":
463
+ w = e.get("weights", {})
464
+ lines.append(
465
+ f"[{e.get('t',0):.1f}s] STRATEGY agg={w.get('aggression',0):.2f} "
466
+ f"def={w.get('defense',0):.2f} kick={w.get('kick_affinity',0):.2f} "
467
+ f"grapple={w.get('grapple_affinity',0):.2f} :: {e.get('reasoning','')[:80]}"
468
+ )
469
+ elif kind == "exchange":
470
+ lines.append(
471
+ f"[{e.get('t',0):.1f}s] player={e.get('playerMove')} npc={e.get('npcMove')} "
472
+ f"-> {e.get('outcome')} dmg={e.get('damage')} "
473
+ f"(playerHp={e.get('playerHp')} npcHp={e.get('npcHp')})"
474
+ + (f" tinyPicked={e.get('tinyMove')}" if e.get("tinyMove") else "")
475
+ )
476
+ elif kind == "duel_end":
477
+ lines.append(f"[{e.get('t',0):.1f}s] MATCH END winner={e.get('winner')} "
478
+ f"score={e.get('playerScore')}-{e.get('npcScore')}")
479
+ return "\n".join(lines)
480
+
481
+
482
+ def _local_summary(events: list, transcript: str) -> dict:
483
+ """Heuristic summary used when Nemotron is unavailable."""
484
+ exchanges = [e for e in events if e.get("kind") == "exchange"]
485
+ strat_count = sum(1 for e in events if e.get("kind") == "nemotron_strategy")
486
+ end = next((e for e in reversed(events) if e.get("kind") == "duel_end"), {})
487
+ winner = end.get("winner", "unknown")
488
+ total_dmg = sum((e.get("damage") or 0) for e in exchanges)
489
+ last_w = next((e.get("weights", {}) for e in reversed(events) if e.get("kind") == "nemotron_strategy"), {})
490
+
491
+ if last_w:
492
+ stance = ("aggressive" if last_w.get("aggression", 0.5) > 0.6
493
+ else "defensive" if last_w.get("defense", 0.5) > 0.6
494
+ else "balanced")
495
+ else:
496
+ stance = "balanced (local fallback -- Nemotron not connected)"
497
+
498
+ summary = (
499
+ f"The duel ran {len(exchanges)} exchanges across the match, dealing "
500
+ f"~{total_dmg} total damage. Nemotron's strategist issued {strat_count} "
501
+ f"reads and settled into a {stance} stance. "
502
+ f"{'The player carried the duel.' if winner == 'player' else 'The NPC prevailed.' if winner == 'npc' else 'The match ended.'} "
503
+ f"(Generated locally -- connect MODEL_SERVER for Nemotron's own narration.)"
504
+ )
505
+ return {
506
+ "summary": summary,
507
+ "moments": [],
508
+ "transcript": transcript,
509
+ "source": "local_fallback",
510
+ }
511
+
512
+
513
+ # ---------------------------------------------------------------------------
514
+ # Legacy /strategize -- kept for the Gradio panel and manual probing.
515
+ # Returns the cache instantly if fresh, else does one synchronous fetch.
516
+ # ---------------------------------------------------------------------------
517
+ @app.post("/strategize")
518
+ async def strategize(request: Request):
519
+ try:
520
+ body = await request.json()
521
+ except Exception:
522
+ body = {}
523
+
524
+ sequence = body.get("sequence", "")
525
+ state = body.get("state", {})
526
+
527
+ cached = await _current_strategy()
528
+ fresh = cached["updated_at"] and (time.time() - cached["updated_at"] < REFRESH_MIN)
529
+ if fresh and not sequence:
530
+ return JSONResponse({
531
+ "weights": cached["weights"],
532
+ "reasoning": cached["reasoning"],
533
+ "source": cached["source"],
534
+ })
535
+
536
+ if not MODEL_SERVER:
537
+ return JSONResponse({
538
+ "weights": dict(DEFAULT_WEIGHTS),
539
+ "reasoning": f"Local fallback mode -- adapting to: {sequence[:60]}",
540
+ "source": "local_fallback",
541
+ })
542
+
543
+ try:
544
+ async with httpx.AsyncClient(timeout=120.0) as client:
545
+ resp = await client.post(
546
+ f"{MODEL_SERVER}/strategize",
547
+ json={"sequence": sequence, "state": state},
548
+ )
549
+ data = resp.json()
550
+ weights = data.get("weights", DEFAULT_WEIGHTS)
551
+ merged = dict(DEFAULT_WEIGHTS)
552
+ for k in merged:
553
+ if k in weights:
554
+ try:
555
+ merged[k] = float(weights[k])
556
+ except (TypeError, ValueError):
557
+ pass
558
+ async with _STRATEGY_LOCK:
559
+ _STRATEGY_CACHE.update({
560
+ "weights": merged,
561
+ "reasoning": data.get("reasoning", cached["reasoning"]),
562
+ "source": "nemotron_modal",
563
+ "updated_at": time.time(),
564
+ "last_sequence": sequence,
565
+ })
566
+ return JSONResponse({
567
+ "weights": merged,
568
+ "reasoning": data.get("reasoning", ""),
569
+ "source": "nemotron_modal",
570
+ })
571
+ except Exception as e:
572
+ return JSONResponse({
573
+ "weights": dict(DEFAULT_WEIGHTS),
574
+ "reasoning": f"Offline mode: {str(e)[:80]}",
575
+ "source": "offline",
576
+ })
577
+
578
+
579
+ # ---------------------------------------------------------------------------
580
+ # Gradio interface (hackathon requirement)
581
+ # ---------------------------------------------------------------------------
582
+ def gradio_predict(
583
+ player_hp, npc_hp, player_stamina, npc_stamina,
584
+ distance, aggression, defense, parry_affinity,
585
+ kick_affinity, grapple_affinity, round_num,
586
+ last_npc_moves_str, last_player_moves_str,
587
+ ):
588
+ """Gradio demo: ask Gemma+LoRA for a counter-move to the last player moves."""
589
+ last_player = [m.strip() for m in last_player_moves_str.split(",") if m.strip()]
590
+ sequence = ",".join(last_player[-5:]) or "jab"
591
+ move, reasoning, source = pick_counter_move(sequence)
592
+ return move, f"source: {source}\n{reasoning}"
593
+
594
+
595
+ with gr.Blocks(title="Nemotron Duel -- Gemma NPC") as demo:
596
+ gr.Markdown(
597
+ "# Nemotron Duel -- Gemma 3 270M + LoRA NPC\n"
598
+ "Real-time NPC counter-move generation.\n"
599
+ "Full 3D game at root path. This Gradio panel demonstrates the model directly."
600
+ )
601
+ with gr.Row():
602
+ with gr.Column():
603
+ player_hp = gr.Slider(0, 100, value=80, label="Player HP (informational)")
604
+ npc_hp = gr.Slider(0, 100, value=50, label="NPC HP (informational)")
605
+ player_stamina = gr.Slider(0, 100, value=60, label="Player Stamina (informational)")
606
+ npc_stamina = gr.Slider(0, 100, value=40, label="NPC Stamina (informational)")
607
+ distance = gr.Radio(["near", "mid", "far"], value="mid", label="Distance (informational)")
608
+ round_num = gr.Slider(1, 10, value=3, step=1, label="Round (informational)")
609
+ with gr.Column():
610
+ aggression = gr.Slider(0, 1, value=0.7, label="Aggression (Nemotron, informational)")
611
+ defense = gr.Slider(0, 1, value=0.3, label="Defense (Nemotron, informational)")
612
+ parry_affinity = gr.Slider(0, 1, value=0.4, label="Parry Affinity (informational)")
613
+ kick_affinity = gr.Slider(0, 1, value=0.6, label="Kick Affinity (informational)")
614
+ grapple_affinity = gr.Slider(0, 1, value=0.2, label="Grapple Affinity (informational)")
615
+ last_npc_moves_str = gr.Textbox("jab, block, kick", label="Last NPC moves (ignored)")
616
+ last_player_moves_str = gr.Textbox("jab, jab, jab", label="Last player moves")
617
+ btn = gr.Button("Pick Counter Move", variant="primary")
618
+ move_out = gr.Textbox(label="Selected Move")
619
+ detail_out = gr.Textbox(label="Model output", lines=4)
620
+
621
+ btn.click(
622
+ gradio_predict,
623
+ inputs=[player_hp, npc_hp, player_stamina, npc_stamina,
624
+ distance, aggression, defense, parry_affinity,
625
+ kick_affinity, grapple_affinity, round_num,
626
+ last_npc_moves_str, last_player_moves_str],
627
+ outputs=[move_out, detail_out],
628
+ )
629
+
630
+ # Eagerly load the model at startup.
631
+ get_model()
632
+
633
+ app = gr.mount_gradio_app(app, demo, path="/gradio")
634
+
635
+
636
+ if __name__ == "__main__":
637
+ import uvicorn
638
+
639
+ print(f"Starting Nemotron Duel Space on port {os.environ.get('PORT', 7860)}")
640
+ print(f" NPC model: Gemma 3 270M + LoRA adapter at {ADAPTER_DIR}")
641
+ print(f" Gemma server (Modal): {GEMMA_SERVER or 'NOT SET (local CPU fallback)'}")
642
+ print(f" Model server (Nemotron Modal): {MODEL_SERVER or 'NOT SET (local fallback)'}")
643
+ uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", 7860)))