Spaces:
Sleeping
Sleeping
File size: 9,136 Bytes
8b02e7c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 | """File-based tool discovery system for on-demand loading.
Following Anthropic's MCP architecture recommendations:
- Organize tools in filesystem hierarchy
- Load tool definitions on-demand
- Implement search_tools capability with detail levels
- Reduce token consumption by 98.7% (150,000 → 2,000 tokens)
"""
import logging
from pathlib import Path
from typing import Any, Literal
from dataclasses import dataclass
from mcp.types import Tool
import json
logger = logging.getLogger(__name__)
DetailLevel = Literal["minimal", "standard", "full"]
@dataclass
class ToolMetadata:
"""Lightweight tool metadata for search results."""
name: str
category: str
description: str
file_path: Path
class ToolDiscoverySystem:
"""File-based tool discovery with on-demand loading."""
def __init__(self, tools_dir: Path | None = None):
"""Initialize tool discovery system.
Args:
tools_dir: Directory containing tool definitions (default: ./tools/)
"""
self.tools_dir = tools_dir or Path("tools")
self._tool_cache: dict[str, Tool] = {}
self._metadata_cache: dict[str, ToolMetadata] = {}
self._initialize_metadata()
def _initialize_metadata(self) -> None:
"""Initialize lightweight metadata index for all tools."""
if not self.tools_dir.exists():
logger.warning(f"Tools directory not found: {self.tools_dir}")
self._load_default_tools()
return
for tool_file in self.tools_dir.rglob("*.json"):
try:
with open(tool_file) as f:
data = json.load(f)
metadata = ToolMetadata(
name=data["name"],
category=data.get("category", "general"),
description=data.get("description", ""),
file_path=tool_file,
)
self._metadata_cache[metadata.name] = metadata
logger.debug(f"Indexed tool: {metadata.name}")
except Exception as e:
logger.error(f"Failed to index tool {tool_file}: {e}")
def _load_default_tools(self) -> None:
"""Load default tool metadata when no tools directory exists."""
default_tools = [
ToolMetadata(
name="delegate_task",
category="orchestration",
description="Delegate a coding task to appropriate AI agent",
file_path=Path("tools/orchestration/delegate_task.json"),
),
ToolMetadata(
name="list_orchestrators",
category="orchestration",
description="List available orchestrators and their status",
file_path=Path("tools/orchestration/list_orchestrators.json"),
),
ToolMetadata(
name="get_statistics",
category="monitoring",
description="Get delegation statistics and metrics",
file_path=Path("tools/monitoring/get_statistics.json"),
),
]
for metadata in default_tools:
self._metadata_cache[metadata.name] = metadata
def search_tools(
self,
query: str | None = None,
category: str | None = None,
detail: DetailLevel = "minimal",
) -> list[dict[str, Any]]:
"""Search tools with configurable detail level.
Args:
query: Search query to match against tool names/descriptions
category: Filter by tool category
detail: Level of detail to return
- minimal: name + category only (lowest token cost)
- standard: + description
- full: + complete schema (highest token cost)
Returns:
List of tool information at requested detail level
"""
results = []
for name, metadata in self._metadata_cache.items():
# Apply filters
if category and metadata.category != category:
continue
if query and query.lower() not in name.lower() and query.lower() not in metadata.description.lower():
continue
# Build result based on detail level
if detail == "minimal":
results.append({
"name": name,
"category": metadata.category,
})
elif detail == "standard":
results.append({
"name": name,
"category": metadata.category,
"description": metadata.description,
})
else: # full
tool = self.load_tool(name)
if tool:
results.append({
"name": name,
"category": metadata.category,
"description": tool.description,
"inputSchema": tool.inputSchema,
})
logger.info(f"Tool search: query={query}, category={category}, detail={detail}, results={len(results)}")
return results
def load_tool(self, name: str) -> Tool | None:
"""Load complete tool definition on-demand.
Args:
name: Tool name
Returns:
Complete Tool object with schema, or None if not found
"""
# Check cache first
if name in self._tool_cache:
logger.debug(f"Tool cache hit: {name}")
return self._tool_cache[name]
# Load from file
metadata = self._metadata_cache.get(name)
if not metadata:
logger.warning(f"Tool not found: {name}")
return None
# If file doesn't exist, create tool from metadata (for default tools)
if not metadata.file_path.exists():
tool = self._create_default_tool(name)
if tool:
self._tool_cache[name] = tool
return tool
try:
with open(metadata.file_path) as f:
data = json.load(f)
tool = Tool(
name=data["name"],
description=data.get("description", ""),
inputSchema=data.get("inputSchema", {"type": "object", "properties": {}}),
)
self._tool_cache[name] = tool
logger.debug(f"Loaded tool from file: {name}")
return tool
except Exception as e:
logger.error(f"Failed to load tool {name}: {e}")
return None
def _create_default_tool(self, name: str) -> Tool | None:
"""Create default tool definitions for backward compatibility."""
if name == "delegate_task":
return Tool(
name="delegate_task",
description="Route task to specialist agent or confirm orchestrator should handle directly. Always call BEFORE code work to get routing guidance.",
inputSchema={
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "Full user request/task to route",
},
"orchestrator": {
"type": "string",
"description": "Override primary orchestrator",
},
"force_delegate": {
"type": "string",
"description": "Force delegation to specific agent",
},
"guidance_only": {
"type": "boolean",
"description": "Return routing guidance without executing (default: false)",
"default": False,
},
},
"required": ["query"],
},
)
elif name == "list_orchestrators":
return Tool(
name="list_orchestrators",
description="List available orchestrators and their status",
inputSchema={"type": "object", "properties": {}},
)
elif name == "get_statistics":
return Tool(
name="get_statistics",
description="Get delegation statistics and metrics",
inputSchema={"type": "object", "properties": {}},
)
return None
def list_categories(self) -> list[str]:
"""List all available tool categories."""
categories = {metadata.category for metadata in self._metadata_cache.values()}
return sorted(categories)
def get_tool_count(self) -> dict[str, int]:
"""Get tool count by category."""
counts: dict[str, int] = {}
for metadata in self._metadata_cache.values():
counts[metadata.category] = counts.get(metadata.category, 0) + 1
return counts
|