Spaces:
Sleeping
Sleeping
| """File-based tool discovery system for on-demand loading. | |
| Following Anthropic's MCP architecture recommendations: | |
| - Organize tools in filesystem hierarchy | |
| - Load tool definitions on-demand | |
| - Implement search_tools capability with detail levels | |
| - Reduce token consumption by 98.7% (150,000 → 2,000 tokens) | |
| """ | |
| import logging | |
| from pathlib import Path | |
| from typing import Any, Literal | |
| from dataclasses import dataclass | |
| from mcp.types import Tool | |
| import json | |
| logger = logging.getLogger(__name__) | |
| DetailLevel = Literal["minimal", "standard", "full"] | |
| class ToolMetadata: | |
| """Lightweight tool metadata for search results.""" | |
| name: str | |
| category: str | |
| description: str | |
| file_path: Path | |
| class ToolDiscoverySystem: | |
| """File-based tool discovery with on-demand loading.""" | |
| def __init__(self, tools_dir: Path | None = None): | |
| """Initialize tool discovery system. | |
| Args: | |
| tools_dir: Directory containing tool definitions (default: ./tools/) | |
| """ | |
| self.tools_dir = tools_dir or Path("tools") | |
| self._tool_cache: dict[str, Tool] = {} | |
| self._metadata_cache: dict[str, ToolMetadata] = {} | |
| self._initialize_metadata() | |
| def _initialize_metadata(self) -> None: | |
| """Initialize lightweight metadata index for all tools.""" | |
| if not self.tools_dir.exists(): | |
| logger.warning(f"Tools directory not found: {self.tools_dir}") | |
| self._load_default_tools() | |
| return | |
| for tool_file in self.tools_dir.rglob("*.json"): | |
| try: | |
| with open(tool_file) as f: | |
| data = json.load(f) | |
| metadata = ToolMetadata( | |
| name=data["name"], | |
| category=data.get("category", "general"), | |
| description=data.get("description", ""), | |
| file_path=tool_file, | |
| ) | |
| self._metadata_cache[metadata.name] = metadata | |
| logger.debug(f"Indexed tool: {metadata.name}") | |
| except Exception as e: | |
| logger.error(f"Failed to index tool {tool_file}: {e}") | |
| def _load_default_tools(self) -> None: | |
| """Load default tool metadata when no tools directory exists.""" | |
| default_tools = [ | |
| ToolMetadata( | |
| name="delegate_task", | |
| category="orchestration", | |
| description="Delegate a coding task to appropriate AI agent", | |
| file_path=Path("tools/orchestration/delegate_task.json"), | |
| ), | |
| ToolMetadata( | |
| name="list_orchestrators", | |
| category="orchestration", | |
| description="List available orchestrators and their status", | |
| file_path=Path("tools/orchestration/list_orchestrators.json"), | |
| ), | |
| ToolMetadata( | |
| name="get_statistics", | |
| category="monitoring", | |
| description="Get delegation statistics and metrics", | |
| file_path=Path("tools/monitoring/get_statistics.json"), | |
| ), | |
| ] | |
| for metadata in default_tools: | |
| self._metadata_cache[metadata.name] = metadata | |
| def search_tools( | |
| self, | |
| query: str | None = None, | |
| category: str | None = None, | |
| detail: DetailLevel = "minimal", | |
| ) -> list[dict[str, Any]]: | |
| """Search tools with configurable detail level. | |
| Args: | |
| query: Search query to match against tool names/descriptions | |
| category: Filter by tool category | |
| detail: Level of detail to return | |
| - minimal: name + category only (lowest token cost) | |
| - standard: + description | |
| - full: + complete schema (highest token cost) | |
| Returns: | |
| List of tool information at requested detail level | |
| """ | |
| results = [] | |
| for name, metadata in self._metadata_cache.items(): | |
| # Apply filters | |
| if category and metadata.category != category: | |
| continue | |
| if query and query.lower() not in name.lower() and query.lower() not in metadata.description.lower(): | |
| continue | |
| # Build result based on detail level | |
| if detail == "minimal": | |
| results.append({ | |
| "name": name, | |
| "category": metadata.category, | |
| }) | |
| elif detail == "standard": | |
| results.append({ | |
| "name": name, | |
| "category": metadata.category, | |
| "description": metadata.description, | |
| }) | |
| else: # full | |
| tool = self.load_tool(name) | |
| if tool: | |
| results.append({ | |
| "name": name, | |
| "category": metadata.category, | |
| "description": tool.description, | |
| "inputSchema": tool.inputSchema, | |
| }) | |
| logger.info(f"Tool search: query={query}, category={category}, detail={detail}, results={len(results)}") | |
| return results | |
| def load_tool(self, name: str) -> Tool | None: | |
| """Load complete tool definition on-demand. | |
| Args: | |
| name: Tool name | |
| Returns: | |
| Complete Tool object with schema, or None if not found | |
| """ | |
| # Check cache first | |
| if name in self._tool_cache: | |
| logger.debug(f"Tool cache hit: {name}") | |
| return self._tool_cache[name] | |
| # Load from file | |
| metadata = self._metadata_cache.get(name) | |
| if not metadata: | |
| logger.warning(f"Tool not found: {name}") | |
| return None | |
| # If file doesn't exist, create tool from metadata (for default tools) | |
| if not metadata.file_path.exists(): | |
| tool = self._create_default_tool(name) | |
| if tool: | |
| self._tool_cache[name] = tool | |
| return tool | |
| try: | |
| with open(metadata.file_path) as f: | |
| data = json.load(f) | |
| tool = Tool( | |
| name=data["name"], | |
| description=data.get("description", ""), | |
| inputSchema=data.get("inputSchema", {"type": "object", "properties": {}}), | |
| ) | |
| self._tool_cache[name] = tool | |
| logger.debug(f"Loaded tool from file: {name}") | |
| return tool | |
| except Exception as e: | |
| logger.error(f"Failed to load tool {name}: {e}") | |
| return None | |
| def _create_default_tool(self, name: str) -> Tool | None: | |
| """Create default tool definitions for backward compatibility.""" | |
| if name == "delegate_task": | |
| return Tool( | |
| name="delegate_task", | |
| description="Route task to specialist agent or confirm orchestrator should handle directly. Always call BEFORE code work to get routing guidance.", | |
| inputSchema={ | |
| "type": "object", | |
| "properties": { | |
| "query": { | |
| "type": "string", | |
| "description": "Full user request/task to route", | |
| }, | |
| "orchestrator": { | |
| "type": "string", | |
| "description": "Override primary orchestrator", | |
| }, | |
| "force_delegate": { | |
| "type": "string", | |
| "description": "Force delegation to specific agent", | |
| }, | |
| "guidance_only": { | |
| "type": "boolean", | |
| "description": "Return routing guidance without executing (default: false)", | |
| "default": False, | |
| }, | |
| }, | |
| "required": ["query"], | |
| }, | |
| ) | |
| elif name == "list_orchestrators": | |
| return Tool( | |
| name="list_orchestrators", | |
| description="List available orchestrators and their status", | |
| inputSchema={"type": "object", "properties": {}}, | |
| ) | |
| elif name == "get_statistics": | |
| return Tool( | |
| name="get_statistics", | |
| description="Get delegation statistics and metrics", | |
| inputSchema={"type": "object", "properties": {}}, | |
| ) | |
| return None | |
| def list_categories(self) -> list[str]: | |
| """List all available tool categories.""" | |
| categories = {metadata.category for metadata in self._metadata_cache.values()} | |
| return sorted(categories) | |
| def get_tool_count(self) -> dict[str, int]: | |
| """Get tool count by category.""" | |
| counts: dict[str, int] = {} | |
| for metadata in self._metadata_cache.values(): | |
| counts[metadata.category] = counts.get(metadata.category, 0) + 1 | |
| return counts | |