Spaces:
Sleeping
Sleeping
NA commited on
Commit ·
83513fc
1
Parent(s): fb1ba55
Update submission_template agent and README
Browse files- submission_template/.gitignore +1 -0
- submission_template/README.md +23 -4
- submission_template/agent.py +442 -171
- submission_template/mcp_server.py +142 -155
submission_template/.gitignore
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
# Python
|
| 2 |
__pycache__/
|
| 3 |
*.py[cod]
|
|
|
|
| 1 |
+
|
| 2 |
# Python
|
| 3 |
__pycache__/
|
| 4 |
*.py[cod]
|
submission_template/README.md
CHANGED
|
@@ -18,11 +18,30 @@ This is my submission for the Text Adventure Agent assignment. My agent uses the
|
|
| 18 |
|
| 19 |
## Approach
|
| 20 |
|
| 21 |
-
<!-- Describe your approach here -->
|
| 22 |
|
| 23 |
-
-
|
| 24 |
-
|
| 25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
## Files
|
| 28 |
|
|
|
|
| 18 |
|
| 19 |
## Approach
|
| 20 |
|
|
|
|
| 21 |
|
| 22 |
+
I kept for the MCP server the provided `example_submission` implementation: it exposes the game through a small set of MCP tools (`play_action`, `memory`, `get_map`, `inventory`) and maintains lightweight server-side state (recent history + explored connections) to make tool outputs informative for the agent. My main contributions are therefore on the **agent side** (`agent.py`).
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
My agent still follows the same ReAct contract (THOUGHT / TOOL / ARGS), but I added **game-agnostic mechanisms** to reduce wasted steps and improve exploration robustness across different games:
|
| 26 |
+
|
| 27 |
+
1) **More robust parsing of THOUGHT/TOOL/ARGS**
|
| 28 |
+
The agent parses the LLM output with regex and JSON fallbacks (e.g., salvage `"action": "..."` if JSON is slightly malformed), and defaults to `play_action({"action": "look"})` when needed.
|
| 29 |
+
|
| 30 |
+
2) **Action normalization**
|
| 31 |
+
I normalize actions (e.g., strip quotes, remove “go/walk/move …”, trim punctuation, collapse whitespace) to reduce invalid commands caused by natural language phrasing.
|
| 32 |
+
|
| 33 |
+
3) **Local “do-not-repeat-failures” memory**
|
| 34 |
+
The agent maintains a small set of `(location, action)` pairs that produced clear failure signals (e.g., “you can’t”, “not allowed”, “unknown word”, “securely anchored”, etc.). If the same action is proposed again in the same location, the agent deterministically substitutes a different direction.
|
| 35 |
+
|
| 36 |
+
4) **Light loop prevention**
|
| 37 |
+
The agent tracks recent executed actions. If it detects immediate repetition, it forces a different action (typically another direction). This prevents common failure modes like repeatedly opening an already-open container or bouncing between two states.
|
| 38 |
+
|
| 39 |
+
5) **Best-effort map (agent-side)**
|
| 40 |
+
Independently of the server map, I also keep a minimal map by extracting room names from the first line when it looks like a location title. When a movement action changes the room name, I store `room --direction--> room` plus inferred reverse links (north↔south, etc.). This map is included in the prompt to guide exploration.
|
| 41 |
+
|
| 42 |
+
Overall, these additions mainly reduce wasted steps (bad commands, repeated failures) and make exploration more stable across games. The agent still struggles when object names are ambiguous/repeated, and with longer action chains that require multi-step planning.
|
| 43 |
+
|
| 44 |
+
|
| 45 |
|
| 46 |
## Files
|
| 47 |
|
submission_template/agent.py
CHANGED
|
@@ -27,7 +27,7 @@ import json
|
|
| 27 |
import os
|
| 28 |
import re
|
| 29 |
from dataclasses import dataclass, field
|
| 30 |
-
from typing import Optional
|
| 31 |
|
| 32 |
from dotenv import load_dotenv
|
| 33 |
from huggingface_hub import InferenceClient
|
|
@@ -38,57 +38,27 @@ load_dotenv()
|
|
| 38 |
# =============================================================================
|
| 39 |
# LLM Configuration - DO NOT MODIFY
|
| 40 |
# =============================================================================
|
| 41 |
-
|
| 42 |
-
# Model to use (fixed for fair evaluation)
|
| 43 |
LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
_hf_token = os.getenv("HF_TOKEN")
|
| 47 |
-
if not _hf_token:
|
| 48 |
-
raise ValueError("HF_TOKEN not found. Set it in your .env file.")
|
| 49 |
-
|
| 50 |
-
LLM_CLIENT = InferenceClient(token=_hf_token)
|
| 51 |
|
| 52 |
|
| 53 |
def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300) -> str:
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
Args:
|
| 58 |
-
prompt: The user prompt (current game state, history, etc.)
|
| 59 |
-
system_prompt: The system prompt (instructions for the agent)
|
| 60 |
-
seed: Random seed for reproducibility
|
| 61 |
-
max_tokens: Maximum tokens in response (default: 300)
|
| 62 |
-
|
| 63 |
-
Returns:
|
| 64 |
-
The LLM's response text
|
| 65 |
-
|
| 66 |
-
Example:
|
| 67 |
-
response = call_llm(
|
| 68 |
-
prompt="You are in a forest. What do you do?",
|
| 69 |
-
system_prompt=SYSTEM_PROMPT,
|
| 70 |
-
seed=42,
|
| 71 |
-
)
|
| 72 |
-
"""
|
| 73 |
-
messages = [
|
| 74 |
-
{"role": "system", "content": system_prompt},
|
| 75 |
-
{"role": "user", "content": prompt},
|
| 76 |
-
]
|
| 77 |
-
|
| 78 |
response = LLM_CLIENT.chat.completions.create(
|
| 79 |
model=LLM_MODEL,
|
| 80 |
messages=messages,
|
| 81 |
-
temperature=0.0,
|
| 82 |
max_tokens=max_tokens,
|
| 83 |
seed=seed,
|
| 84 |
)
|
| 85 |
-
|
| 86 |
return response.choices[0].message.content
|
| 87 |
|
| 88 |
|
| 89 |
@dataclass
|
| 90 |
class RunResult:
|
| 91 |
-
"""Result of running the agent. Do not modify this class."""
|
| 92 |
final_score: int
|
| 93 |
max_score: int
|
| 94 |
moves: int
|
|
@@ -99,181 +69,482 @@ class RunResult:
|
|
| 99 |
|
| 100 |
|
| 101 |
# =============================================================================
|
| 102 |
-
# System Prompt
|
| 103 |
# =============================================================================
|
| 104 |
-
|
| 105 |
SYSTEM_PROMPT = """You are playing a classic text adventure game.
|
| 106 |
|
| 107 |
GOAL: Explore the world, solve puzzles, and maximize your score.
|
| 108 |
|
| 109 |
AVAILABLE TOOLS (use via MCP):
|
| 110 |
- play_action: Execute a game command (north, take lamp, open mailbox, etc.)
|
| 111 |
-
- memory: Get current game state and history
|
| 112 |
-
- inventory: Check what you're carrying
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
|
| 114 |
VALID GAME COMMANDS for play_action:
|
| 115 |
-
- Movement: north, south, east, west, up, down, enter, exit
|
| 116 |
-
- Objects: take <item>, drop <item>, open <thing>, close <thing>, examine <thing>
|
| 117 |
-
- Other: look, inventory,
|
| 118 |
|
| 119 |
RESPOND IN THIS EXACT FORMAT (no markdown):
|
| 120 |
THOUGHT: <your reasoning about what to do next>
|
| 121 |
TOOL: <tool_name>
|
| 122 |
ARGS: <JSON arguments, e.g., {"action": "look"}>
|
| 123 |
-
|
| 124 |
-
Example:
|
| 125 |
-
THOUGHT: I should look around to see where I am.
|
| 126 |
-
TOOL: play_action
|
| 127 |
-
ARGS: {"action": "look"}
|
| 128 |
"""
|
| 129 |
|
| 130 |
|
| 131 |
# =============================================================================
|
| 132 |
-
#
|
| 133 |
# =============================================================================
|
| 134 |
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
def __init__(self):
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
|
|
|
|
|
|
| 191 |
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
history = []
|
| 202 |
-
final_score = 0
|
| 203 |
moves = 0
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 208 |
return RunResult(
|
| 209 |
-
final_score=
|
| 210 |
-
max_score=350,
|
| 211 |
moves=moves,
|
| 212 |
-
locations_visited=
|
| 213 |
-
game_completed=
|
| 214 |
-
|
|
|
|
| 215 |
)
|
| 216 |
-
|
| 217 |
-
def _build_prompt(self, observation: str, history: list) -> str:
|
| 218 |
-
"""
|
| 219 |
-
Build the prompt for the LLM.
|
| 220 |
-
|
| 221 |
-
TODO: Implement this to create effective prompts
|
| 222 |
-
"""
|
| 223 |
-
# TODO: Combine system prompt, history, and current observation
|
| 224 |
-
pass
|
| 225 |
-
|
| 226 |
-
def _parse_response(self, response: str) -> tuple[str, str, dict]:
|
| 227 |
-
"""
|
| 228 |
-
Parse LLM response to extract thought, tool name, and arguments.
|
| 229 |
-
|
| 230 |
-
TODO: Implement robust parsing
|
| 231 |
-
|
| 232 |
-
Returns:
|
| 233 |
-
Tuple of (thought, tool_name, args_dict)
|
| 234 |
-
"""
|
| 235 |
-
# TODO: Parse the response format:
|
| 236 |
-
# THOUGHT: ...
|
| 237 |
-
# TOOL: ...
|
| 238 |
-
# ARGS: {...}
|
| 239 |
-
pass
|
| 240 |
-
|
| 241 |
-
def _call_llm(self, prompt: str, system_prompt: str, seed: int) -> str:
|
| 242 |
-
"""
|
| 243 |
-
Call the LLM with the given prompt.
|
| 244 |
-
|
| 245 |
-
This is a convenience wrapper - you can also use call_llm() directly.
|
| 246 |
-
"""
|
| 247 |
-
return call_llm(prompt, system_prompt, seed)
|
| 248 |
|
| 249 |
|
| 250 |
# =============================================================================
|
| 251 |
-
#
|
| 252 |
# =============================================================================
|
| 253 |
|
| 254 |
async def test_agent():
|
| 255 |
-
"""Test the agent locally."""
|
| 256 |
from fastmcp import Client
|
| 257 |
-
|
| 258 |
-
# Path to your MCP server
|
| 259 |
server_path = "mcp_server.py"
|
| 260 |
-
|
| 261 |
agent = StudentAgent()
|
| 262 |
-
|
| 263 |
async with Client(server_path) as client:
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
seed=42,
|
| 269 |
-
verbose=True,
|
| 270 |
-
)
|
| 271 |
-
|
| 272 |
-
print(f"\nFinal Score: {result.final_score}")
|
| 273 |
-
print(f"Moves: {result.moves}")
|
| 274 |
-
print(f"Locations: {result.locations_visited}")
|
| 275 |
|
| 276 |
|
| 277 |
if __name__ == "__main__":
|
| 278 |
import asyncio
|
| 279 |
-
asyncio.run(test_agent())
|
|
|
|
| 27 |
import os
|
| 28 |
import re
|
| 29 |
from dataclasses import dataclass, field
|
| 30 |
+
from typing import Optional, Dict, Tuple, Set, List
|
| 31 |
|
| 32 |
from dotenv import load_dotenv
|
| 33 |
from huggingface_hub import InferenceClient
|
|
|
|
| 38 |
# =============================================================================
|
| 39 |
# LLM Configuration - DO NOT MODIFY
|
| 40 |
# =============================================================================
|
|
|
|
|
|
|
| 41 |
LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"
|
| 42 |
+
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 43 |
+
LLM_CLIENT = InferenceClient(token=HF_TOKEN) if HF_TOKEN else None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
|
| 46 |
def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300) -> str:
|
| 47 |
+
if LLM_CLIENT is None:
|
| 48 |
+
raise RuntimeError("HF_TOKEN missing (set it as env var / HF secret).")
|
| 49 |
+
messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": prompt}]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
response = LLM_CLIENT.chat.completions.create(
|
| 51 |
model=LLM_MODEL,
|
| 52 |
messages=messages,
|
| 53 |
+
temperature=0.0,
|
| 54 |
max_tokens=max_tokens,
|
| 55 |
seed=seed,
|
| 56 |
)
|
|
|
|
| 57 |
return response.choices[0].message.content
|
| 58 |
|
| 59 |
|
| 60 |
@dataclass
|
| 61 |
class RunResult:
|
|
|
|
| 62 |
final_score: int
|
| 63 |
max_score: int
|
| 64 |
moves: int
|
|
|
|
| 69 |
|
| 70 |
|
| 71 |
# =============================================================================
|
| 72 |
+
# System Prompt
|
| 73 |
# =============================================================================
|
|
|
|
| 74 |
SYSTEM_PROMPT = """You are playing a classic text adventure game.
|
| 75 |
|
| 76 |
GOAL: Explore the world, solve puzzles, and maximize your score.
|
| 77 |
|
| 78 |
AVAILABLE TOOLS (use via MCP):
|
| 79 |
- play_action: Execute a game command (north, take lamp, open mailbox, etc.)
|
| 80 |
+
- memory: Get current game state and history
|
| 81 |
+
- inventory: Check what you're carrying
|
| 82 |
+
- get_map: Get a map of explored locations and their connections
|
| 83 |
+
- get_valid_actions: (if available) get likely valid actions
|
| 84 |
+
|
| 85 |
+
CRITICAL RULES TO AVOID LOOPS:
|
| 86 |
+
1. Avoid repeating the same action as the immediately previous action.
|
| 87 |
+
2. If an action fails in a location, do NOT retry that same action in that same location.
|
| 88 |
+
3. Prioritize unexplored directions and new interactions over repeating old ones.
|
| 89 |
|
| 90 |
VALID GAME COMMANDS for play_action:
|
| 91 |
+
- Movement: north, south, east, west, up, down, enter, exit, in, out, northeast, northwest, southeast, southwest
|
| 92 |
+
- Objects: take <item>, drop <item>, open <thing>, close <thing>, examine <thing>, read <thing>
|
| 93 |
+
- Other: look, inventory, wait, turn on <item>, turn off <item>, push <thing>, pull <thing>, move <thing>, climb <thing>
|
| 94 |
|
| 95 |
RESPOND IN THIS EXACT FORMAT (no markdown):
|
| 96 |
THOUGHT: <your reasoning about what to do next>
|
| 97 |
TOOL: <tool_name>
|
| 98 |
ARGS: <JSON arguments, e.g., {"action": "look"}>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
"""
|
| 100 |
|
| 101 |
|
| 102 |
# =============================================================================
|
| 103 |
+
# Parsing / heuristics
|
| 104 |
# =============================================================================
|
| 105 |
|
| 106 |
+
SCORE_MOVES_RE = re.compile(r"\[Score:\s*(\d+)\s*\|\s*Moves:\s*(\d+)\s*\]", re.IGNORECASE)
|
| 107 |
+
|
| 108 |
+
MOVE_WORDS = {
|
| 109 |
+
"north", "south", "east", "west",
|
| 110 |
+
"up", "down", "enter", "exit", "in", "out",
|
| 111 |
+
"northeast", "northwest", "southeast", "southwest",
|
| 112 |
+
"n", "s", "e", "w", "ne", "nw", "se", "sw", "u", "d",
|
| 113 |
+
}
|
| 114 |
+
REVERSE_DIR = {
|
| 115 |
+
"north": "south", "south": "north",
|
| 116 |
+
"east": "west", "west": "east",
|
| 117 |
+
"up": "down", "down": "up",
|
| 118 |
+
"enter": "exit", "exit": "enter",
|
| 119 |
+
"in": "out", "out": "in",
|
| 120 |
+
"northeast": "southwest", "southwest": "northeast",
|
| 121 |
+
"northwest": "southeast", "southeast": "northwest",
|
| 122 |
+
"n": "s", "s": "n", "e": "w", "w": "e",
|
| 123 |
+
"ne": "sw", "sw": "ne", "nw": "se", "se": "nw",
|
| 124 |
+
"u": "d", "d": "u",
|
| 125 |
+
}
|
| 126 |
+
|
| 127 |
+
ROOM_SKIP_PREFIXES = (
|
| 128 |
+
"Taken", "Opening", "You can't", "I don't", "WELCOME",
|
| 129 |
+
"There's nothing", "Score:", "[Score:", "It is already",
|
| 130 |
+
"The door is", "You are carrying", "Dropped",
|
| 131 |
+
)
|
| 132 |
+
|
| 133 |
+
HARD_FAIL_MARKERS = (
|
| 134 |
+
"you can't", "you cannot", "not allowed", "no way",
|
| 135 |
+
"i don't understand", "unknown word",
|
| 136 |
+
"can't see any", "not here",
|
| 137 |
+
"blocked", "impenetrable",
|
| 138 |
+
"securely anchored", "too heavy", "fixed", "attached",
|
| 139 |
+
"you already have", "it is already open",
|
| 140 |
+
)
|
| 141 |
+
|
| 142 |
+
USELESS_MARKERS = (
|
| 143 |
+
"it is already open", "you already have that", "you have that already", "nothing happens",
|
| 144 |
+
)
|
| 145 |
+
|
| 146 |
+
def strip_code_fences(s: str) -> str:
|
| 147 |
+
s = s.strip()
|
| 148 |
+
s = re.sub(r"^```json\s*", "", s, flags=re.IGNORECASE).strip()
|
| 149 |
+
s = re.sub(r"^```\s*", "", s).strip()
|
| 150 |
+
s = re.sub(r"\s*```$", "", s).strip()
|
| 151 |
+
return s
|
| 152 |
+
|
| 153 |
+
def parse_score(observation: str) -> Optional[int]:
|
| 154 |
+
m = re.search(r"[Ss]core[:\s]+(\d+)", observation or "")
|
| 155 |
+
return int(m.group(1)) if m else None
|
| 156 |
+
|
| 157 |
+
def is_game_over(observation: str) -> bool:
|
| 158 |
+
patterns = [
|
| 159 |
+
r"\*\*\*\s*You have died\s*\*\*\*",
|
| 160 |
+
r"\*\*\*\s*You have won\s*\*\*\*",
|
| 161 |
+
r"game over",
|
| 162 |
+
r"The End",
|
| 163 |
+
r"RESTART, RESTORE, or QUIT",
|
| 164 |
+
]
|
| 165 |
+
return any(re.search(p, observation or "", re.IGNORECASE) for p in patterns)
|
| 166 |
+
|
| 167 |
+
def sanitize_play_action(action: str) -> str:
|
| 168 |
+
"""Normalize common phrasing to reduce invalid commands."""
|
| 169 |
+
if not action:
|
| 170 |
+
return "look"
|
| 171 |
+
a = action.strip()
|
| 172 |
+
if (a.startswith('"') and a.endswith('"')) or (a.startswith("'") and a.endswith("'")):
|
| 173 |
+
a = a[1:-1].strip()
|
| 174 |
+
|
| 175 |
+
al = a.lower().strip()
|
| 176 |
+
for pref in ("go ", "walk ", "move ", "head ", "run "):
|
| 177 |
+
if al.startswith(pref):
|
| 178 |
+
a = a[len(pref):].strip()
|
| 179 |
+
al = a.lower().strip()
|
| 180 |
+
break
|
| 181 |
+
|
| 182 |
+
a = a.rstrip(".!?").strip()
|
| 183 |
+
a = re.sub(r"\s+", " ", a).strip()
|
| 184 |
+
return a if a else "look"
|
| 185 |
+
|
| 186 |
+
def extract_location_name(observation: str) -> Optional[str]:
|
| 187 |
+
"""Best-effort: use first line if it looks like a room title (same logic as before, but slightly safer)."""
|
| 188 |
+
if not observation:
|
| 189 |
+
return None
|
| 190 |
+
first_line = observation.strip().splitlines()[0].strip() if observation.strip().splitlines() else ""
|
| 191 |
+
if not first_line:
|
| 192 |
+
return None
|
| 193 |
+
for p in ROOM_SKIP_PREFIXES:
|
| 194 |
+
if p in first_line:
|
| 195 |
+
return None
|
| 196 |
+
if len(first_line) < 50 and first_line[0].isupper():
|
| 197 |
+
return first_line
|
| 198 |
+
return None
|
| 199 |
+
|
| 200 |
+
def looks_failed(observation: str) -> bool:
|
| 201 |
+
txt = (observation or "").lower()
|
| 202 |
+
return any(m in txt for m in HARD_FAIL_MARKERS)
|
| 203 |
+
|
| 204 |
+
def looks_useless(observation: str) -> bool:
|
| 205 |
+
txt = (observation or "").lower()
|
| 206 |
+
return any(m in txt for m in USELESS_MARKERS)
|
| 207 |
+
|
| 208 |
+
# =============================================================================
|
| 209 |
+
# Core agent state
|
| 210 |
+
# =============================================================================
|
| 211 |
+
|
| 212 |
+
@dataclass
|
| 213 |
+
class AgentState:
|
| 214 |
+
# Interaction history:
|
| 215 |
+
trace: List[Tuple[str, str, str]] = field(default_factory=list)
|
| 216 |
+
|
| 217 |
+
# Location tracking
|
| 218 |
+
current_location: Optional[str] = None
|
| 219 |
+
previous_location: Optional[str] = None
|
| 220 |
+
visited_locations: Set[str] = field(default_factory=set)
|
| 221 |
+
|
| 222 |
|
| 223 |
+
topo_map: Dict[str, Dict[str, str]] = field(default_factory=dict)
|
| 224 |
+
|
| 225 |
+
# Current score
|
| 226 |
+
score: int = 0
|
| 227 |
+
|
| 228 |
+
# Tool outputs captured separately
|
| 229 |
+
last_tool_info: str = ""
|
| 230 |
+
|
| 231 |
+
# Loop guards
|
| 232 |
+
last_executed_action: Optional[str] = None
|
| 233 |
+
recent_actions: List[str] = field(default_factory=list)
|
| 234 |
+
|
| 235 |
+
# Local failure memory: (location, normalized_action) -> True
|
| 236 |
+
banned_actions_by_location: Set[Tuple[str, str]] = field(default_factory=set)
|
| 237 |
+
|
| 238 |
+
last_memory_step: int = -999
|
| 239 |
+
last_map_step: int = -999
|
| 240 |
+
last_inventory_step: int = -999
|
| 241 |
+
|
| 242 |
+
|
| 243 |
+
# =============================================================================
|
| 244 |
+
# Agent
|
| 245 |
+
# =============================================================================
|
| 246 |
+
|
| 247 |
+
class StudentAgent:
|
| 248 |
def __init__(self):
|
| 249 |
+
self.state = AgentState()
|
| 250 |
+
|
| 251 |
+
|
| 252 |
+
def _parse_llm_toolcall(self, response: str) -> Tuple[str, str, Dict]:
|
| 253 |
+
"""Parse THOUGHT/TOOL/ARGS. Keep exactly your contract but more robust."""
|
| 254 |
+
thought = ""
|
| 255 |
+
tool = ""
|
| 256 |
+
args: Dict = {}
|
| 257 |
+
|
| 258 |
+
thought_match = re.search(r"THOUGHT:\s*(.+?)(?=TOOL:|$)", response, re.DOTALL | re.IGNORECASE)
|
| 259 |
+
if thought_match:
|
| 260 |
+
thought = thought_match.group(1).strip()
|
| 261 |
+
|
| 262 |
+
tool_match = re.search(r"TOOL:\s*([A-Za-z_][A-Za-z0-9_]*)", response, re.IGNORECASE)
|
| 263 |
+
if tool_match:
|
| 264 |
+
tool = tool_match.group(1).strip()
|
| 265 |
+
|
| 266 |
+
args_match = re.search(r"ARGS:\s*(\{.*\})", response, re.DOTALL | re.IGNORECASE)
|
| 267 |
+
if args_match:
|
| 268 |
+
raw = args_match.group(1).strip()
|
| 269 |
+
try:
|
| 270 |
+
args = json.loads(raw)
|
| 271 |
+
except json.JSONDecodeError:
|
| 272 |
+
# fallback
|
| 273 |
+
action_match = re.search(r'"action"\s*:\s*"([^"]+)"', raw)
|
| 274 |
+
if action_match:
|
| 275 |
+
args = {"action": action_match.group(1)}
|
| 276 |
+
|
| 277 |
+
# fallback default
|
| 278 |
+
if not tool:
|
| 279 |
+
tool = "play_action"
|
| 280 |
+
args = {"action": "look"}
|
| 281 |
+
elif tool == "play_action" and (not args or "action" not in args):
|
| 282 |
+
args = {"action": "look"}
|
| 283 |
+
|
| 284 |
+
return thought, tool, args
|
| 285 |
+
|
| 286 |
+
|
| 287 |
+
def _format_recent_history(self, k: int = 5) -> str:
|
| 288 |
+
if not self.state.trace:
|
| 289 |
+
return "(no history)"
|
| 290 |
+
recent = self.state.trace[-k:]
|
| 291 |
+
out = []
|
| 292 |
+
for th, act, obs in recent:
|
| 293 |
+
short_obs = obs[:200] + "..." if len(obs) > 200 else obs
|
| 294 |
+
out.append(f"- Action: {act}\n Result: {short_obs}")
|
| 295 |
+
return "\n".join(out)
|
| 296 |
+
|
| 297 |
+
def _format_map(self) -> str:
|
| 298 |
+
if not self.state.topo_map:
|
| 299 |
+
return "Empty map."
|
| 300 |
+
lines = []
|
| 301 |
+
for room, edges in self.state.topo_map.items():
|
| 302 |
+
marker = " <=== YOU ARE HERE" if room == self.state.current_location else ""
|
| 303 |
+
if edges:
|
| 304 |
+
conn = ", ".join([f"[{d} -> {dst}]" for d, dst in edges.items()])
|
| 305 |
+
else:
|
| 306 |
+
conn = "No explored exits"
|
| 307 |
+
lines.append(f"- {room}{marker}: {conn}")
|
| 308 |
+
return "\n".join(lines)
|
| 309 |
+
|
| 310 |
+
def _build_llm_prompt(self, observation: str, loop_warning: str) -> str:
|
| 311 |
+
loc = self.state.current_location or "Unknown"
|
| 312 |
+
tool_info = self.state.last_tool_info.strip() if self.state.last_tool_info else "(none)"
|
| 313 |
+
prompt = f"""{loop_warning}
|
| 314 |
+
CURRENT LOCATION: {loc}
|
| 315 |
+
|
| 316 |
+
CURRENT OBSERVATION:
|
| 317 |
+
{observation}
|
| 318 |
+
|
| 319 |
+
LAST TOOL INFO (memory/get_map/inventory):
|
| 320 |
+
{tool_info}
|
| 321 |
+
|
| 322 |
+
KNOWN MAP (Spatial Graph):
|
| 323 |
+
{self._format_map()}
|
| 324 |
+
|
| 325 |
+
RECENT HISTORY:
|
| 326 |
+
{self._format_recent_history(5)}
|
| 327 |
+
|
| 328 |
+
CURRENT SCORE: {self.state.score}
|
| 329 |
+
|
| 330 |
+
Decide your next step and output THOUGHT/TOOL/ARGS.
|
| 331 |
+
"""
|
| 332 |
+
return prompt
|
| 333 |
+
|
| 334 |
|
| 335 |
+
def _update_score(self, observation: str) -> None:
|
| 336 |
+
s = parse_score(observation)
|
| 337 |
+
if s is not None:
|
| 338 |
+
self.state.score = s
|
| 339 |
+
|
| 340 |
+
def _update_location_and_map(self, observation: str, action_taken: Optional[str]) -> None:
|
| 341 |
+
new_loc = extract_location_name(observation)
|
| 342 |
+
if not new_loc:
|
| 343 |
+
return
|
| 344 |
+
|
| 345 |
+
if new_loc not in self.state.topo_map:
|
| 346 |
+
self.state.topo_map[new_loc] = {}
|
| 347 |
+
|
| 348 |
+
if self.state.current_location and action_taken:
|
| 349 |
+
move = action_taken.lower().strip()
|
| 350 |
+
if move in MOVE_WORDS and self.state.current_location != new_loc:
|
| 351 |
+
# forward
|
| 352 |
+
self.state.topo_map.setdefault(self.state.current_location, {})
|
| 353 |
+
self.state.topo_map[self.state.current_location][move] = new_loc
|
| 354 |
+
# reverse
|
| 355 |
+
rev = REVERSE_DIR.get(move)
|
| 356 |
+
if rev:
|
| 357 |
+
self.state.topo_map[new_loc][rev] = self.state.current_location
|
| 358 |
+
|
| 359 |
+
self.state.previous_location = self.state.current_location
|
| 360 |
+
self.state.current_location = new_loc
|
| 361 |
+
self.state.visited_locations.add(new_loc)
|
| 362 |
+
|
| 363 |
+
|
| 364 |
+
def _loop_warning_text(self) -> str:
|
| 365 |
+
recent = self.state.recent_actions[-4:]
|
| 366 |
+
if len(recent) >= 4 and recent[-1] in recent[:-1]:
|
| 367 |
+
return "\n!!! WARNING: LOOP DETECTED !!!\nChoose a different action than your last few actions.\n"
|
| 368 |
+
return ""
|
| 369 |
+
|
| 370 |
+
def _is_banned_here(self, action_norm: str) -> bool:
|
| 371 |
+
loc = self.state.current_location or "Unknown"
|
| 372 |
+
return (loc, action_norm) in self.state.banned_actions_by_location
|
| 373 |
+
|
| 374 |
+
def _ban_here(self, action_norm: str) -> None:
|
| 375 |
+
loc = self.state.current_location or "Unknown"
|
| 376 |
+
self.state.banned_actions_by_location.add((loc, action_norm))
|
| 377 |
+
|
| 378 |
+
def _force_alternative_move(self, avoid: str) -> Optional[str]:
|
| 379 |
+
"""Pick a reasonable alternative direction."""
|
| 380 |
+
loc = self.state.current_location or "Unknown"
|
| 381 |
+
for d in ["north", "east", "south", "west", "up", "down", "enter", "exit"]:
|
| 382 |
+
if d == avoid:
|
| 383 |
+
continue
|
| 384 |
+
if (loc, d) not in self.state.banned_actions_by_location:
|
| 385 |
+
return d
|
| 386 |
+
return None
|
| 387 |
+
|
| 388 |
+
def _maybe_guard_play_action(self, thought: str, args: Dict) -> Tuple[str, Dict]:
|
| 389 |
+
"""Apply minimal deterministic guards while keeping behavior close to your original."""
|
| 390 |
+
action_raw = str(args.get("action", "look"))
|
| 391 |
+
action_norm = sanitize_play_action(action_raw).lower()
|
| 392 |
+
|
| 393 |
|
| 394 |
+
if self.state.last_executed_action and action_norm == self.state.last_executed_action:
|
| 395 |
+
alt = self._force_alternative_move(avoid=action_norm)
|
| 396 |
+
if alt:
|
| 397 |
+
return f"(Guard) Avoid repeating '{action_norm}'. Trying '{alt}'.", {"action": alt}
|
| 398 |
+
|
| 399 |
|
| 400 |
+
if self._is_banned_here(action_norm):
|
| 401 |
+
alt = self._force_alternative_move(avoid=action_norm)
|
| 402 |
+
if alt:
|
| 403 |
+
return f"(Guard) '{action_norm}' failed here before. Trying '{alt}'.", {"action": alt}
|
| 404 |
+
|
| 405 |
+
return thought, {"action": action_norm}
|
| 406 |
+
|
| 407 |
+
# Tool calling
|
| 408 |
+
async def _call_tool(self, client, tool: str, args: Dict) -> str:
|
| 409 |
+
result = await client.call_tool(tool, args or {})
|
| 410 |
+
return result.content[0].text if result else "No response"
|
| 411 |
+
|
| 412 |
+
async def run(self, client, game: str, max_steps: int, seed: int, verbose: bool = True) -> RunResult:
|
| 413 |
|
| 414 |
+
self.state = AgentState()
|
| 415 |
+
|
|
|
|
|
|
|
| 416 |
moves = 0
|
| 417 |
+
game_completed = False
|
| 418 |
+
error: Optional[str] = None
|
| 419 |
+
|
| 420 |
+
try:
|
| 421 |
+
|
| 422 |
+
observation = await self._call_tool(client, "play_action", {"action": "look"})
|
| 423 |
+
self._update_score(observation)
|
| 424 |
+
self._update_location_and_map(observation, action_taken=None)
|
| 425 |
+
|
| 426 |
+
if verbose:
|
| 427 |
+
print(f"=== Initial Observation ===\n{observation}\n")
|
| 428 |
+
|
| 429 |
+
for step in range(max_steps):
|
| 430 |
+
moves = step + 1
|
| 431 |
+
|
| 432 |
+
|
| 433 |
+
if step - self.state.last_memory_step >= 6:
|
| 434 |
+
try:
|
| 435 |
+
self.state.last_tool_info = await self._call_tool(client, "memory", {})
|
| 436 |
+
self.state.last_memory_step = step
|
| 437 |
+
except Exception:
|
| 438 |
+
pass
|
| 439 |
+
elif step - self.state.last_map_step >= 10:
|
| 440 |
+
try:
|
| 441 |
+
self.state.last_tool_info = await self._call_tool(client, "get_map", {})
|
| 442 |
+
self.state.last_map_step = step
|
| 443 |
+
except Exception:
|
| 444 |
+
pass
|
| 445 |
+
|
| 446 |
+
loop_warning = self._loop_warning_text()
|
| 447 |
+
prompt = self._build_llm_prompt(observation, loop_warning)
|
| 448 |
+
|
| 449 |
+
llm_response = call_llm(prompt, SYSTEM_PROMPT, seed)
|
| 450 |
+
thought, tool, args = self._parse_llm_toolcall(llm_response)
|
| 451 |
+
|
| 452 |
+
if verbose:
|
| 453 |
+
print(f"=== Step {step + 1} ===")
|
| 454 |
+
print(f"LLM Response:\n{llm_response}\n")
|
| 455 |
+
|
| 456 |
+
|
| 457 |
+
if tool == "play_action":
|
| 458 |
+
thought, args = self._maybe_guard_play_action(thought, args)
|
| 459 |
+
|
| 460 |
+
|
| 461 |
+
tool_output = ""
|
| 462 |
+
executed_action_label = tool
|
| 463 |
+
|
| 464 |
+
try:
|
| 465 |
+
tool_output = await self._call_tool(client, tool, args)
|
| 466 |
+
if tool == "play_action":
|
| 467 |
+
observation = tool_output
|
| 468 |
+
executed_action_label = args.get("action", "look")
|
| 469 |
+
self.state.last_executed_action = str(executed_action_label).lower().strip()
|
| 470 |
+
self.state.recent_actions.append(self.state.last_executed_action)
|
| 471 |
+
self.state.recent_actions = self.state.recent_actions[-8:] # cap
|
| 472 |
+
else:
|
| 473 |
+
|
| 474 |
+
self.state.last_tool_info = f"[Tool: {tool}]\n{tool_output}"
|
| 475 |
+
except Exception as e:
|
| 476 |
+
|
| 477 |
+
if tool == "play_action":
|
| 478 |
+
observation = f"Error executing tool: {e}"
|
| 479 |
+
executed_action_label = f"{tool} (error)"
|
| 480 |
+
else:
|
| 481 |
+
self.state.last_tool_info = f"[Tool: {tool} ERROR]\n{e}"
|
| 482 |
+
|
| 483 |
+
|
| 484 |
+
if tool == "play_action":
|
| 485 |
+
self._update_score(observation)
|
| 486 |
+
self._update_location_and_map(observation, action_taken=str(executed_action_label))
|
| 487 |
+
|
| 488 |
+
|
| 489 |
+
act_norm = str(executed_action_label).lower().strip()
|
| 490 |
+
if looks_failed(observation) or looks_useless(observation):
|
| 491 |
+
self._ban_here(act_norm)
|
| 492 |
+
|
| 493 |
+
|
| 494 |
+
self.state.trace.append((thought, str(executed_action_label), observation))
|
| 495 |
+
|
| 496 |
+
if verbose:
|
| 497 |
+
print(f"Thought: {thought}")
|
| 498 |
+
print(f"Tool: {tool}")
|
| 499 |
+
if tool == "play_action":
|
| 500 |
+
print(f"Action: {executed_action_label}")
|
| 501 |
+
print(f"Observation:\n{observation[:350]}\n")
|
| 502 |
+
else:
|
| 503 |
+
print(f"Tool output (stored):\n{tool_output[:250]}\n")
|
| 504 |
+
|
| 505 |
+
|
| 506 |
+
if tool == "play_action" and is_game_over(observation):
|
| 507 |
+
game_completed = True
|
| 508 |
+
break
|
| 509 |
+
|
| 510 |
+
except Exception as e:
|
| 511 |
+
error = str(e)
|
| 512 |
+
if verbose:
|
| 513 |
+
print(f"Error: {error}")
|
| 514 |
+
|
| 515 |
+
|
| 516 |
+
if verbose:
|
| 517 |
+
print(f"=== Final Stats ===")
|
| 518 |
+
print(f"Score: {self.state.score}")
|
| 519 |
+
print(f"Moves: {moves}")
|
| 520 |
+
print(f"Locations visited: {len(self.state.visited_locations)}")
|
| 521 |
+
|
| 522 |
return RunResult(
|
| 523 |
+
final_score=self.state.score,
|
| 524 |
+
max_score=350,
|
| 525 |
moves=moves,
|
| 526 |
+
locations_visited=set(self.state.visited_locations),
|
| 527 |
+
game_completed=game_completed,
|
| 528 |
+
error=error,
|
| 529 |
+
history=[(t, a, o) for (t, a, o) in self.state.trace],
|
| 530 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 531 |
|
| 532 |
|
| 533 |
# =============================================================================
|
| 534 |
+
# Local testing
|
| 535 |
# =============================================================================
|
| 536 |
|
| 537 |
async def test_agent():
|
|
|
|
| 538 |
from fastmcp import Client
|
|
|
|
|
|
|
| 539 |
server_path = "mcp_server.py"
|
|
|
|
| 540 |
agent = StudentAgent()
|
|
|
|
| 541 |
async with Client(server_path) as client:
|
| 542 |
+
res = await agent.run(client=client, game="zork1", max_steps=10, seed=42, verbose=True)
|
| 543 |
+
print(f"\nFinal Score: {res.final_score}")
|
| 544 |
+
print(f"Moves: {res.moves}")
|
| 545 |
+
print(f"Locations: {res.locations_visited}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 546 |
|
| 547 |
|
| 548 |
if __name__ == "__main__":
|
| 549 |
import asyncio
|
| 550 |
+
asyncio.run(test_agent())
|
submission_template/mcp_server.py
CHANGED
|
@@ -1,27 +1,8 @@
|
|
| 1 |
"""
|
| 2 |
-
|
| 3 |
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
Required tool:
|
| 8 |
-
play_action(action: str) -> str
|
| 9 |
-
Execute a game command and return the result.
|
| 10 |
-
|
| 11 |
-
Recommended tools:
|
| 12 |
-
memory() -> str
|
| 13 |
-
Return current game state, score, and recent history.
|
| 14 |
-
|
| 15 |
-
inventory() -> str
|
| 16 |
-
Return the player's current inventory.
|
| 17 |
-
|
| 18 |
-
get_map() -> str
|
| 19 |
-
Return a map of explored locations.
|
| 20 |
-
|
| 21 |
-
Test your server with:
|
| 22 |
-
fastmcp dev submission_template/mcp_server.py
|
| 23 |
-
|
| 24 |
-
Then open the MCP Inspector in your browser to test the tools interactively.
|
| 25 |
"""
|
| 26 |
|
| 27 |
import sys
|
|
@@ -31,179 +12,185 @@ import os
|
|
| 31 |
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 32 |
|
| 33 |
from fastmcp import FastMCP
|
| 34 |
-
from games.zork_env import TextAdventureEnv
|
| 35 |
|
| 36 |
|
| 37 |
-
#
|
| 38 |
-
|
| 39 |
-
# =============================================================================
|
| 40 |
-
|
| 41 |
-
mcp = FastMCP("Student Text Adventure Server")
|
| 42 |
|
|
|
|
|
|
|
| 43 |
|
| 44 |
-
# =============================================================================
|
| 45 |
-
# Game State Management
|
| 46 |
-
# =============================================================================
|
| 47 |
|
| 48 |
-
class
|
| 49 |
-
"""
|
| 50 |
-
Manages the text adventure game state.
|
| 51 |
|
| 52 |
-
|
| 53 |
-
- Action history (for memory tool)
|
| 54 |
-
- Explored locations (for mapping)
|
| 55 |
-
- Current score and moves
|
| 56 |
-
"""
|
| 57 |
-
|
| 58 |
-
def __init__(self):
|
| 59 |
-
self.env: TextAdventureEnv = None
|
| 60 |
-
self.state = None
|
| 61 |
-
self.game_name: str = ""
|
| 62 |
-
# TODO: Add more state tracking
|
| 63 |
-
# self.history: list[tuple[str, str]] = []
|
| 64 |
-
# self.explored_locations: dict[str, set[str]] = {}
|
| 65 |
-
# self.current_location: str = ""
|
| 66 |
-
|
| 67 |
-
def initialize(self, game: str = "zork1"):
|
| 68 |
-
"""Initialize or reset the game."""
|
| 69 |
self.game_name = game
|
| 70 |
self.env = TextAdventureEnv(game)
|
| 71 |
self.state = self.env.reset()
|
| 72 |
-
|
| 73 |
-
|
|
|
|
| 74 |
|
| 75 |
-
def
|
| 76 |
-
"""
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
|
|
|
|
|
|
| 80 |
self.state = self.env.step(action)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
|
| 82 |
-
#
|
| 83 |
-
|
| 84 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
|
| 86 |
-
return
|
| 87 |
|
| 88 |
-
def
|
| 89 |
-
"""Get current
|
| 90 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
|
| 92 |
-
def
|
| 93 |
-
"""Get
|
| 94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
|
| 96 |
|
| 97 |
-
# Global game
|
| 98 |
-
|
| 99 |
|
| 100 |
|
| 101 |
-
def get_game() ->
|
| 102 |
-
"""Get or initialize the game
|
| 103 |
-
global
|
| 104 |
-
if
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
_game.initialize(game)
|
| 108 |
-
return _game
|
| 109 |
|
| 110 |
|
| 111 |
# =============================================================================
|
| 112 |
-
# MCP Tools
|
| 113 |
# =============================================================================
|
| 114 |
|
| 115 |
@mcp.tool()
|
| 116 |
def play_action(action: str) -> str:
|
| 117 |
"""
|
| 118 |
-
Execute a game
|
| 119 |
-
|
| 120 |
-
This is the main tool for interacting with the game.
|
| 121 |
|
| 122 |
Args:
|
| 123 |
-
action: The command to execute (e.g.,
|
| 124 |
-
|
| 125 |
Returns:
|
| 126 |
-
The game's response to
|
| 127 |
-
|
| 128 |
-
Valid commands include:
|
| 129 |
-
- Movement: north, south, east, west, up, down, enter, exit
|
| 130 |
-
- Objects: take <item>, drop <item>, open <thing>, examine <thing>
|
| 131 |
-
- Other: look, inventory, read <thing>, turn on lamp
|
| 132 |
"""
|
| 133 |
game = get_game()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
|
| 135 |
-
|
| 136 |
-
|
|
|
|
| 137 |
|
| 138 |
-
result
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
|
| 140 |
-
|
| 141 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
# A summary including current location, score, moves, and recent history
|
| 155 |
-
# """
|
| 156 |
-
# game = get_game()
|
| 157 |
-
# # TODO: Return useful state information
|
| 158 |
-
# pass
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
# @mcp.tool()
|
| 162 |
-
# def inventory() -> str:
|
| 163 |
-
# """
|
| 164 |
-
# Check what the player is carrying.
|
| 165 |
-
#
|
| 166 |
-
# Returns:
|
| 167 |
-
# List of items in the player's inventory
|
| 168 |
-
# """
|
| 169 |
-
# game = get_game()
|
| 170 |
-
# result = game.step("inventory")
|
| 171 |
-
# return result
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
# @mcp.tool()
|
| 175 |
-
# def get_map() -> str:
|
| 176 |
-
# """
|
| 177 |
-
# Get a map of explored locations.
|
| 178 |
-
#
|
| 179 |
-
# Returns:
|
| 180 |
-
# A text representation of explored locations and connections
|
| 181 |
-
# """
|
| 182 |
-
# game = get_game()
|
| 183 |
-
# # TODO: Return map of explored locations
|
| 184 |
-
# pass
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
# @mcp.tool()
|
| 188 |
-
# def get_valid_actions() -> str:
|
| 189 |
-
# """
|
| 190 |
-
# Get a list of likely valid actions from the current location.
|
| 191 |
-
#
|
| 192 |
-
# Returns:
|
| 193 |
-
# List of actions that might work here
|
| 194 |
-
# """
|
| 195 |
-
# # This is a hint: Jericho provides get_valid_actions()
|
| 196 |
-
# game = get_game()
|
| 197 |
-
# if game.env and game.env.env:
|
| 198 |
-
# valid = game.env.env.get_valid_actions()
|
| 199 |
-
# return "Valid actions: " + ", ".join(valid[:20])
|
| 200 |
-
# return "Could not determine valid actions"
|
| 201 |
|
| 202 |
|
| 203 |
# =============================================================================
|
| 204 |
-
#
|
| 205 |
# =============================================================================
|
| 206 |
|
| 207 |
if __name__ == "__main__":
|
| 208 |
-
# This runs the server with stdio transport (for MCP clients)
|
| 209 |
mcp.run()
|
|
|
|
| 1 |
"""
|
| 2 |
+
Example: MCP Server for Text Adventures
|
| 3 |
|
| 4 |
+
A complete MCP server that exposes text adventure games via tools.
|
| 5 |
+
This demonstrates a full-featured server with memory, mapping, and inventory.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
"""
|
| 7 |
|
| 8 |
import sys
|
|
|
|
| 12 |
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 13 |
|
| 14 |
from fastmcp import FastMCP
|
| 15 |
+
from games.zork_env import TextAdventureEnv, list_available_games
|
| 16 |
|
| 17 |
|
| 18 |
+
# Get game from environment variable (default: zork1)
|
| 19 |
+
INITIAL_GAME = os.environ.get("GAME", "zork1")
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
+
# Create the MCP server
|
| 22 |
+
mcp = FastMCP("Text Adventure Server")
|
| 23 |
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
+
class GameState:
|
| 26 |
+
"""Manages the text adventure game state and exploration data."""
|
|
|
|
| 27 |
|
| 28 |
+
def __init__(self, game: str = "zork1"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
self.game_name = game
|
| 30 |
self.env = TextAdventureEnv(game)
|
| 31 |
self.state = self.env.reset()
|
| 32 |
+
self.history: list[tuple[str, str]] = []
|
| 33 |
+
self.explored_locations: dict[str, set[str]] = {}
|
| 34 |
+
self.current_location: str = self._extract_location(self.state.observation)
|
| 35 |
|
| 36 |
+
def _extract_location(self, observation: str) -> str:
|
| 37 |
+
"""Extract location name from observation (usually first line)."""
|
| 38 |
+
lines = observation.strip().split('\n')
|
| 39 |
+
return lines[0] if lines else "Unknown"
|
| 40 |
+
|
| 41 |
+
def take_action(self, action: str) -> str:
|
| 42 |
+
"""Execute a game action and return the result."""
|
| 43 |
self.state = self.env.step(action)
|
| 44 |
+
result = self.state.observation
|
| 45 |
+
|
| 46 |
+
# Track history
|
| 47 |
+
self.history.append((action, result))
|
| 48 |
+
if len(self.history) > 50:
|
| 49 |
+
self.history = self.history[-50:]
|
| 50 |
|
| 51 |
+
# Update map
|
| 52 |
+
new_location = self._extract_location(result)
|
| 53 |
+
if action in ["north", "south", "east", "west", "up", "down",
|
| 54 |
+
"enter", "exit", "n", "s", "e", "w", "u", "d"]:
|
| 55 |
+
if self.current_location not in self.explored_locations:
|
| 56 |
+
self.explored_locations[self.current_location] = set()
|
| 57 |
+
if new_location != self.current_location:
|
| 58 |
+
self.explored_locations[self.current_location].add(f"{action} -> {new_location}")
|
| 59 |
+
self.current_location = new_location
|
| 60 |
|
| 61 |
+
return result
|
| 62 |
|
| 63 |
+
def get_memory(self) -> str:
|
| 64 |
+
"""Get a summary of current game state."""
|
| 65 |
+
recent = self.history[-5:] if self.history else []
|
| 66 |
+
recent_str = "\n".join([f" > {a} -> {r[:60]}..." for a, r in recent]) if recent else " (none yet)"
|
| 67 |
+
|
| 68 |
+
return f"""Current State:
|
| 69 |
+
- Location: {self.current_location}
|
| 70 |
+
- Score: {self.state.score} points
|
| 71 |
+
- Moves: {self.state.moves}
|
| 72 |
+
- Game: {self.game_name}
|
| 73 |
+
|
| 74 |
+
Recent Actions:
|
| 75 |
+
{recent_str}
|
| 76 |
+
|
| 77 |
+
Current Observation:
|
| 78 |
+
{self.state.observation}"""
|
| 79 |
+
|
| 80 |
+
def get_map(self) -> str:
|
| 81 |
+
"""Get a map of explored locations."""
|
| 82 |
+
if not self.explored_locations:
|
| 83 |
+
return "Map: No locations explored yet. Try moving around!"
|
| 84 |
+
|
| 85 |
+
lines = ["Explored Locations and Exits:"]
|
| 86 |
+
for loc, exits in sorted(self.explored_locations.items()):
|
| 87 |
+
lines.append(f"\n* {loc}")
|
| 88 |
+
for exit_info in sorted(exits):
|
| 89 |
+
lines.append(f" -> {exit_info}")
|
| 90 |
+
|
| 91 |
+
lines.append(f"\n[Current] {self.current_location}")
|
| 92 |
+
return "\n".join(lines)
|
| 93 |
|
| 94 |
+
def get_inventory(self) -> str:
|
| 95 |
+
"""Get current inventory."""
|
| 96 |
+
items = self.state.inventory if hasattr(self.state, 'inventory') and self.state.inventory else []
|
| 97 |
+
|
| 98 |
+
if not items:
|
| 99 |
+
return "Inventory: You are empty-handed."
|
| 100 |
+
|
| 101 |
+
item_names = []
|
| 102 |
+
for item in items:
|
| 103 |
+
item_str = str(item)
|
| 104 |
+
item_lower = item_str.lower()
|
| 105 |
+
if "parent" in item_lower:
|
| 106 |
+
idx = item_lower.index("parent")
|
| 107 |
+
name = item_str[:idx].strip()
|
| 108 |
+
if ":" in name:
|
| 109 |
+
name = name.split(":", 1)[1].strip()
|
| 110 |
+
item_names.append(name)
|
| 111 |
+
elif ":" in item_str:
|
| 112 |
+
name = item_str.split(":")[1].strip()
|
| 113 |
+
item_names.append(name)
|
| 114 |
+
else:
|
| 115 |
+
item_names.append(item_str)
|
| 116 |
+
|
| 117 |
+
return f"Inventory: {', '.join(item_names)}"
|
| 118 |
|
| 119 |
|
| 120 |
+
# Global game state
|
| 121 |
+
_game_state: GameState | None = None
|
| 122 |
|
| 123 |
|
| 124 |
+
def get_game() -> GameState:
|
| 125 |
+
"""Get or initialize the game state."""
|
| 126 |
+
global _game_state
|
| 127 |
+
if _game_state is None:
|
| 128 |
+
_game_state = GameState(INITIAL_GAME)
|
| 129 |
+
return _game_state
|
|
|
|
|
|
|
| 130 |
|
| 131 |
|
| 132 |
# =============================================================================
|
| 133 |
+
# MCP Tools
|
| 134 |
# =============================================================================
|
| 135 |
|
| 136 |
@mcp.tool()
|
| 137 |
def play_action(action: str) -> str:
|
| 138 |
"""
|
| 139 |
+
Execute a game action in the text adventure.
|
|
|
|
|
|
|
| 140 |
|
| 141 |
Args:
|
| 142 |
+
action: The command to execute (e.g., 'north', 'take lamp', 'open mailbox')
|
| 143 |
+
|
| 144 |
Returns:
|
| 145 |
+
The game's response to your action
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
"""
|
| 147 |
game = get_game()
|
| 148 |
+
result = game.take_action(action)
|
| 149 |
+
|
| 150 |
+
# Add score info
|
| 151 |
+
score_info = f"\n\n[Score: {game.state.score} | Moves: {game.state.moves}]"
|
| 152 |
+
|
| 153 |
+
if game.state.reward > 0:
|
| 154 |
+
score_info = f"\n\n+{game.state.reward} points! (Total: {game.state.score})"
|
| 155 |
|
| 156 |
+
done_info = ""
|
| 157 |
+
if game.state.done:
|
| 158 |
+
done_info = "\n\nGAME OVER"
|
| 159 |
|
| 160 |
+
return result + score_info + done_info
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
@mcp.tool()
|
| 164 |
+
def memory() -> str:
|
| 165 |
+
"""
|
| 166 |
+
Get a summary of the current game state.
|
| 167 |
|
| 168 |
+
Returns location, score, moves, recent actions, and current observation.
|
| 169 |
+
"""
|
| 170 |
+
return get_game().get_memory()
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
@mcp.tool()
|
| 174 |
+
def get_map() -> str:
|
| 175 |
+
"""
|
| 176 |
+
Get a map showing explored locations and connections.
|
| 177 |
|
| 178 |
+
Useful for navigation and avoiding getting lost.
|
| 179 |
+
"""
|
| 180 |
+
return get_game().get_map()
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
@mcp.tool()
|
| 184 |
+
def inventory() -> str:
|
| 185 |
+
"""
|
| 186 |
+
Check what items you are currently carrying.
|
| 187 |
+
"""
|
| 188 |
+
return get_game().get_inventory()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 189 |
|
| 190 |
|
| 191 |
# =============================================================================
|
| 192 |
+
# Main
|
| 193 |
# =============================================================================
|
| 194 |
|
| 195 |
if __name__ == "__main__":
|
|
|
|
| 196 |
mcp.run()
|