From 73638725ee49f5eddc3c989fac089918ab58eb0a Mon Sep 17 00:00:00 2001 From: supmo668 Date: Sun, 18 Jan 2026 20:20:38 -0800 Subject: [PATCH 1/6] feat(agentenv-mcp): Initialize package structure Add new agentenv-mcp package for MCP (Model Context Protocol) integration: - pyproject.toml: Package configuration with dependencies - README.md: Documentation with architecture overview - __init__.py: Lazy imports for optional dependencies This package provides bidirectional wrappers between AgentGym environments and MCP servers. --- agentenv-mcp/README.md | 91 +++++++++++++++++++++++++++ agentenv-mcp/agentenv_mcp/__init__.py | 42 +++++++++++++ agentenv-mcp/pyproject.toml | 42 +++++++++++++ 3 files changed, 175 insertions(+) create mode 100644 agentenv-mcp/README.md create mode 100644 agentenv-mcp/agentenv_mcp/__init__.py create mode 100644 agentenv-mcp/pyproject.toml diff --git a/agentenv-mcp/README.md b/agentenv-mcp/README.md new file mode 100644 index 00000000..03aeed15 --- /dev/null +++ b/agentenv-mcp/README.md @@ -0,0 +1,91 @@ +# AgentEnv-MCP + +**Bidirectional MCP (Model Context Protocol) wrapper for AgentGym environments.** + +This package provides two complementary wrappers: + +1. **AgentEnvToMCP**: Expose any AgentGym environment as an MCP server +2. **MCPToAgentEnv**: Adapt any MCP server into an AgentGym-compatible environment + +## Installation + +```bash +pip install -e ".[dev,sciworld]" +``` + +## Architecture + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ AgentEnv-MCP Wrappers │ +├─────────────────────────────────────────────────────────────────────────────┤ +│ │ +│ ┌─────────────────────┐ ┌─────────────────────┐ │ +│ │ AgentEnvToMCP │ │ MCPToAgentEnv │ │ +│ │ (Export) │ │ (Import) │ │ +│ ├─────────────────────┤ ├─────────────────────┤ │ +│ │ BaseEnvClient ──────┼──► MCP │ MCP Server ─────────┼──► BaseEnvClient│ +│ │ │ Server │ │ │ +│ │ • reset() │ Tools: │ MCP Tools become: │ │ +│ │ • step() │ • reset │ • FUNCTION_DESC │ │ +│ │ • observe() │ • step │ • ActionFormat │ │ +│ │ │ • observe│ • step() mapping │ │ +│ └─────────────────────┘ └─────────────────────┘ │ +│ │ +└─────────────────────────────────────────────────────────────────────────────┘ +``` + +## Usage + +### Export: AgentEnv as MCP Server + +```python +from agentenv_mcp import AgentEnvMCPServer +from agentenv.envs.sciworld import SciworldEnvClient + +# Create MCP server from any BaseEnvClient +server = AgentEnvMCPServer( + env_client_cls=SciworldEnvClient, + client_args={"env_server_base": "http://localhost:8000", "data_len": 100}, +) + +# Run as MCP server (stdio transport) +server.run() +``` + +### Import: MCP Server as AgentEnv + +```python +from agentenv_mcp import MCPEnvClient, MCPTask + +# Connect to any MCP server and use as AgentEnv +client = MCPEnvClient( + mcp_server_command=["python", "-m", "my_mcp_server"], + action_format="function_calling", +) + +# Use with standard AgentGym evaluation +task = MCPTask(client_args={...}) +``` + +## Examples + +See `examples/` for complete demonstrations: + +- `sciworld_mcp_server.py` - SciWorld exposed as MCP server +- `mcp_client_demo.py` - Using an MCP server as AgentEnv + +## Testing + +```bash +pytest tests/ -v +``` + +## Compatibility + +This wrapper is designed to be fully compatible with: + +- `BaseEnvClient` interface from `agentenv.controller` +- `BaseTask` for experience generation +- All `ActionFormat` types (REACT, FUNCTION_CALLING, CODE_AS_ACTION) +- `Agent` and `APIAgent` for evaluation diff --git a/agentenv-mcp/agentenv_mcp/__init__.py b/agentenv-mcp/agentenv_mcp/__init__.py new file mode 100644 index 00000000..d94b6b71 --- /dev/null +++ b/agentenv-mcp/agentenv_mcp/__init__.py @@ -0,0 +1,42 @@ +""" +AgentEnv-MCP: Bidirectional MCP wrapper for AgentGym environments. + +This package provides: +- AgentEnvMCPServer: Expose any AgentEnv as an MCP server +- MCPEnvClient: Adapt any MCP server into an AgentEnv client +- MCPTask: Task wrapper for MCP-based environments +""" + +__version__ = "0.1.0" + +# Lazy imports to avoid requiring all dependencies at import time +def __getattr__(name): + if name == "AgentEnvMCPServer": + from agentenv_mcp.agentenv_to_mcp import AgentEnvMCPServer + return AgentEnvMCPServer + elif name == "MCPEnvClient": + from agentenv_mcp.mcp_to_agentenv import MCPEnvClient + return MCPEnvClient + elif name == "MCPAdapter": + from agentenv_mcp.mcp_to_agentenv import MCPAdapter + return MCPAdapter + elif name == "MCPTask": + from agentenv_mcp.mcp_to_agentenv import MCPTask + return MCPTask + elif name == "function_desc_to_mcp_tool": + from agentenv_mcp.schema_utils import function_desc_to_mcp_tool + return function_desc_to_mcp_tool + elif name == "mcp_tool_to_function_desc": + from agentenv_mcp.schema_utils import mcp_tool_to_function_desc + return mcp_tool_to_function_desc + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + + +__all__ = [ + "AgentEnvMCPServer", + "MCPEnvClient", + "MCPAdapter", + "MCPTask", + "function_desc_to_mcp_tool", + "mcp_tool_to_function_desc", +] diff --git a/agentenv-mcp/pyproject.toml b/agentenv-mcp/pyproject.toml new file mode 100644 index 00000000..0d131e9f --- /dev/null +++ b/agentenv-mcp/pyproject.toml @@ -0,0 +1,42 @@ +[build-system] +requires = ["setuptools>=61.0", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "agentenv-mcp" +version = "0.1.0" +description = "MCP (Model Context Protocol) wrapper for AgentGym environments" +readme = "README.md" +license = {text = "Apache-2.0"} +requires-python = ">=3.10" +classifiers = [ + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", +] +dependencies = [ + "mcp>=1.0.0", + "httpx>=0.25.0", + "pydantic>=2.0.0", +] + +[project.optional-dependencies] +dev = [ + "pytest>=7.0.0", + "pytest-asyncio>=0.21.0", +] +sciworld = [ + "agentenv-sciworld", +] + +[tool.setuptools.packages.find] +where = ["."] +include = ["agentenv_mcp*"] + +[tool.pytest.ini_options] +asyncio_mode = "auto" +testpaths = ["tests"] From 7460b37e1c35bd4301e2abf4b54d8bc3165a878f Mon Sep 17 00:00:00 2001 From: supmo668 Date: Sun, 18 Jan 2026 20:20:44 -0800 Subject: [PATCH 2/6] feat(agentenv-mcp): Add schema conversion utilities MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add schema_utils.py with functions to convert between formats: - function_desc_to_mcp_tool(): AgentEnv → MCP tool schema - mcp_tool_to_function_desc(): MCP tool → AgentEnv function desc - Batch conversion helpers for tool lists These utilities enable seamless conversion between AgentGym's function description format and MCP's tool schema format. --- agentenv-mcp/agentenv_mcp/schema_utils.py | 75 +++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 agentenv-mcp/agentenv_mcp/schema_utils.py diff --git a/agentenv-mcp/agentenv_mcp/schema_utils.py b/agentenv-mcp/agentenv_mcp/schema_utils.py new file mode 100644 index 00000000..25f48904 --- /dev/null +++ b/agentenv-mcp/agentenv_mcp/schema_utils.py @@ -0,0 +1,75 @@ +""" +Schema conversion utilities between AgentEnv function descriptions and MCP tool schemas. +""" + +from typing import Any + + +def function_desc_to_mcp_tool(func_desc: dict[str, Any]) -> dict[str, Any]: + """ + Convert an AgentEnv function description to MCP tool schema. + + AgentEnv format: + { + "name": "open", + "description": "Opens a container.", + "parameters": { + "type": "object", + "properties": { + "obj": {"type": "string", "description": "The container to open."} + }, + "required": ["obj"] + } + } + + MCP format: + { + "name": "open", + "description": "Opens a container.", + "inputSchema": { + "type": "object", + "properties": { + "obj": {"type": "string", "description": "The container to open."} + }, + "required": ["obj"] + } + } + """ + return { + "name": func_desc["name"], + "description": func_desc.get("description", ""), + "inputSchema": func_desc.get("parameters", {"type": "object", "properties": {}}), + } + + +def mcp_tool_to_function_desc(mcp_tool: dict[str, Any]) -> dict[str, Any]: + """ + Convert an MCP tool schema to AgentEnv function description. + + This is the inverse of function_desc_to_mcp_tool. + """ + input_schema = mcp_tool.get("inputSchema", {"type": "object", "properties": {}}) + + return { + "name": mcp_tool["name"], + "description": mcp_tool.get("description", ""), + "parameters": input_schema, + } + + +def generate_function_descriptions_from_mcp_tools( + mcp_tools: list[dict[str, Any]] +) -> list[dict[str, Any]]: + """ + Convert a list of MCP tools to AgentEnv function descriptions. + """ + return [mcp_tool_to_function_desc(tool) for tool in mcp_tools] + + +def generate_mcp_tools_from_function_descriptions( + func_descs: list[dict[str, Any]] +) -> list[dict[str, Any]]: + """ + Convert a list of AgentEnv function descriptions to MCP tools. + """ + return [function_desc_to_mcp_tool(desc) for desc in func_descs] From 33ad85999cf759ee00850b5bd87081b32b9a1e90 Mon Sep 17 00:00:00 2001 From: supmo668 Date: Sun, 18 Jan 2026 20:20:51 -0800 Subject: [PATCH 3/6] feat(agentenv-mcp): Add AgentEnvToMCP wrapper Add AgentEnvMCPServer class that wraps any BaseEnvClient as MCP server: - Exposes environment actions as MCP tools (action_*) - Provides management tools: env_reset, env_step, env_observe, env_info - Supports both ReAct and function_calling action formats - Lazy client creation for efficient resource usage - Async handlers for all MCP tool calls Usage: server = AgentEnvMCPServer( env_client_cls=SciworldEnvClient, client_args={...}, function_descriptions=SCIWORLD_FUNCTION_DESCRIPTION, ) server.run() --- agentenv-mcp/agentenv_mcp/agentenv_to_mcp.py | 310 +++++++++++++++++++ 1 file changed, 310 insertions(+) create mode 100644 agentenv-mcp/agentenv_mcp/agentenv_to_mcp.py diff --git a/agentenv-mcp/agentenv_mcp/agentenv_to_mcp.py b/agentenv-mcp/agentenv_mcp/agentenv_to_mcp.py new file mode 100644 index 00000000..367b40fa --- /dev/null +++ b/agentenv-mcp/agentenv_mcp/agentenv_to_mcp.py @@ -0,0 +1,310 @@ +""" +AgentEnvToMCP: Wrapper to expose any AgentGym BaseEnvClient as an MCP server. + +This allows external MCP-compatible agents to interact with AgentGym environments +using the standard MCP protocol. +""" + +import asyncio +import json +from typing import Any, Callable, Type + +from mcp.server import Server +from mcp.server.stdio import stdio_server +from mcp.types import ( + CallToolResult, + TextContent, + Tool, +) + +from .schema_utils import function_desc_to_mcp_tool + + +class AgentEnvMCPServer: + """ + Wraps an AgentGym BaseEnvClient as an MCP server. + + This server exposes the environment's actions as MCP tools, plus + management tools for reset/observe operations. + + MCP Tools provided: + - env_reset(task_idx: int) -> Reset environment to a specific task + - env_step(action: str) -> Execute an action (raw format) + - env_observe() -> Get current observation + - Plus all environment-specific action tools from FUNCTION_DESCRIPTION + + Example: + >>> from agentenv.envs.sciworld import SciworldEnvClient, SCIWORLD_FUNCTION_DESCRIPTION + >>> server = AgentEnvMCPServer( + ... env_client_cls=SciworldEnvClient, + ... client_args={"env_server_base": "http://localhost:8000", "data_len": 100}, + ... function_descriptions=SCIWORLD_FUNCTION_DESCRIPTION, + ... ) + >>> server.run() + """ + + def __init__( + self, + env_client_cls: Type, + client_args: dict[str, Any], + function_descriptions: list[dict[str, Any]] | None = None, + env_name: str = "agentenv", + action_format: str = "function_calling", + ): + """ + Initialize the MCP server wrapper. + + Args: + env_client_cls: The BaseEnvClient class to wrap + client_args: Arguments to pass to the client constructor + function_descriptions: Optional list of function descriptions for action tools. + If the client has an adapter_cls with these, they'll be used. + env_name: Name for the environment (used in server identification) + action_format: Action format to use ("react", "function_calling", "code_as_action") + """ + self.env_client_cls = env_client_cls + self.client_args = client_args + self.env_name = env_name + self.action_format = action_format + + # Try to get function descriptions from the adapter class if not provided + self.function_descriptions = function_descriptions or [] + + # Lazily created client + self._client = None + self._server = Server(f"agentenv-{env_name}") + + self._setup_handlers() + + @property + def client(self): + """Lazily create the environment client.""" + if self._client is None: + self._client = self.env_client_cls( + **self.client_args, + action_format=self.action_format, + ) + return self._client + + def _setup_handlers(self): + """Set up MCP tool handlers.""" + + @self._server.list_tools() + async def list_tools() -> list[Tool]: + """List all available tools.""" + tools = [ + # Core environment management tools + Tool( + name="env_reset", + description="Reset the environment to a specific task index.", + inputSchema={ + "type": "object", + "properties": { + "task_idx": { + "type": "integer", + "description": "Index of the task to reset to (0 to env_size-1)", + } + }, + "required": ["task_idx"], + }, + ), + Tool( + name="env_step", + description="Execute a raw action string in the environment.", + inputSchema={ + "type": "object", + "properties": { + "action": { + "type": "string", + "description": "The action to execute", + } + }, + "required": ["action"], + }, + ), + Tool( + name="env_observe", + description="Get the current observation from the environment.", + inputSchema={ + "type": "object", + "properties": {}, + }, + ), + Tool( + name="env_info", + description="Get environment information (size, current state).", + inputSchema={ + "type": "object", + "properties": {}, + }, + ), + ] + + # Add environment-specific action tools + for func_desc in self.function_descriptions: + mcp_tool = function_desc_to_mcp_tool(func_desc) + tools.append(Tool( + name=f"action_{mcp_tool['name']}", + description=mcp_tool["description"], + inputSchema=mcp_tool["inputSchema"], + )) + + return tools + + @self._server.call_tool() + async def call_tool(name: str, arguments: dict[str, Any]) -> list[CallToolResult]: + """Handle tool calls.""" + try: + if name == "env_reset": + return await self._handle_reset(arguments) + elif name == "env_step": + return await self._handle_step(arguments) + elif name == "env_observe": + return await self._handle_observe() + elif name == "env_info": + return await self._handle_info() + elif name.startswith("action_"): + return await self._handle_action(name[7:], arguments) + else: + return [CallToolResult( + content=[TextContent(type="text", text=f"Unknown tool: {name}")], + isError=True, + )] + except Exception as e: + return [CallToolResult( + content=[TextContent(type="text", text=f"Error: {str(e)}")], + isError=True, + )] + + async def _handle_reset(self, arguments: dict[str, Any]) -> list[CallToolResult]: + """Handle env_reset tool call.""" + task_idx = arguments.get("task_idx", 0) + result = self.client.reset(task_idx) + observation = self.client.observe() + + return [CallToolResult( + content=[TextContent( + type="text", + text=json.dumps({ + "status": "reset", + "task_idx": task_idx, + "observation": observation, + "reset_info": result if isinstance(result, dict) else {}, + }, indent=2) + )], + isError=False, + )] + + async def _handle_step(self, arguments: dict[str, Any]) -> list[CallToolResult]: + """Handle env_step tool call.""" + action = arguments.get("action", "") + step_output = self.client.step(action) + + return [CallToolResult( + content=[TextContent( + type="text", + text=json.dumps({ + "observation": step_output.state, + "reward": step_output.reward, + "done": step_output.done, + }, indent=2) + )], + isError=False, + )] + + async def _handle_observe(self) -> list[CallToolResult]: + """Handle env_observe tool call.""" + observation = self.client.observe() + + return [CallToolResult( + content=[TextContent(type="text", text=observation)], + isError=False, + )] + + async def _handle_info(self) -> list[CallToolResult]: + """Handle env_info tool call.""" + return [CallToolResult( + content=[TextContent( + type="text", + text=json.dumps({ + "env_name": self.env_name, + "env_size": len(self.client), + "action_format": self.action_format, + "available_actions": len(self.function_descriptions), + }, indent=2) + )], + isError=False, + )] + + async def _handle_action( + self, action_name: str, arguments: dict[str, Any] + ) -> list[CallToolResult]: + """Handle environment-specific action tool calls.""" + # Find the function description for this action + func_desc = None + for fd in self.function_descriptions: + if fd["name"] == action_name: + func_desc = fd + break + + if func_desc is None: + return [CallToolResult( + content=[TextContent(type="text", text=f"Unknown action: {action_name}")], + isError=True, + )] + + # Format the action based on action_format + if self.action_format == "function_calling": + # Format as JSON function call + action_str = json.dumps({ + "thought": arguments.get("thought", "Executing action"), + "function_name": action_name, + "arguments": {k: v for k, v in arguments.items() if k != "thought"}, + }) + else: + # For react format, try to construct the action string + # This is environment-specific and may need customization + action_str = self._format_react_action(action_name, arguments) + + step_output = self.client.step(action_str) + + return [CallToolResult( + content=[TextContent( + type="text", + text=json.dumps({ + "action": action_name, + "observation": step_output.state, + "reward": step_output.reward, + "done": step_output.done, + }, indent=2) + )], + isError=False, + )] + + def _format_react_action(self, action_name: str, arguments: dict[str, Any]) -> str: + """Format an action in ReAct style. Override for environment-specific formatting.""" + args_str = " ".join(str(v) for v in arguments.values() if v) + thought = arguments.get("thought", "") + action = f"{action_name} {args_str}".strip() if args_str else action_name + return f"Thought:\n{thought}\n\nAction:\n{action}" + + def run(self): + """Run the MCP server using stdio transport.""" + async def main(): + async with stdio_server() as (read_stream, write_stream): + await self._server.run( + read_stream, + write_stream, + self._server.create_initialization_options(), + ) + + asyncio.run(main()) + + async def run_async(self): + """Run the MCP server asynchronously.""" + async with stdio_server() as (read_stream, write_stream): + await self._server.run( + read_stream, + write_stream, + self._server.create_initialization_options(), + ) From bf369cb1e5e3b3d8e353d78c22fa3bedb27af262 Mon Sep 17 00:00:00 2001 From: supmo668 Date: Sun, 18 Jan 2026 20:20:59 -0800 Subject: [PATCH 4/6] feat(agentenv-mcp): Add MCPToAgentEnv wrapper Add classes to wrap MCP servers as AgentGym-compatible clients: MCPAdapter: - Parses actions in REACT, FUNCTION_CALLING, CODE_AS_ACTION formats - Generates conversation_start prompts from function descriptions - Converts parsed actions to MCP tool calls MCPEnvClient (implements BaseEnvClient interface): - Connects to MCP servers via stdio transport - Auto-discovers tools and generates function descriptions - Maps step() calls to MCP tool invocations - Full compatibility with AgentGym evaluation pipeline MCPTask: - Task wrapper for experience generation - Compatible with Agent and APIAgent classes --- agentenv-mcp/agentenv_mcp/mcp_to_agentenv.py | 661 +++++++++++++++++++ 1 file changed, 661 insertions(+) create mode 100644 agentenv-mcp/agentenv_mcp/mcp_to_agentenv.py diff --git a/agentenv-mcp/agentenv_mcp/mcp_to_agentenv.py b/agentenv-mcp/agentenv_mcp/mcp_to_agentenv.py new file mode 100644 index 00000000..6eb7c410 --- /dev/null +++ b/agentenv-mcp/agentenv_mcp/mcp_to_agentenv.py @@ -0,0 +1,661 @@ +""" +MCPToAgentEnv: Wrapper to adapt any MCP server into an AgentGym-compatible BaseEnvClient. + +This allows AgentGym agents to train and evaluate against external MCP services +using the standard AgentEnv interface. +""" + +import asyncio +import json +import re +from abc import ABCMeta +from typing import Any, Mapping, Optional, Sequence, TYPE_CHECKING + +from .schema_utils import mcp_tool_to_function_desc, generate_function_descriptions_from_mcp_tools + +# Lazy MCP imports to allow module import without MCP installed +if TYPE_CHECKING: + from mcp import ClientSession + from mcp.client.stdio import stdio_client, StdioServerParameters + from mcp.types import CallToolResult, TextContent + + +def _get_mcp_types(): + """Lazily import MCP types.""" + from mcp import ClientSession + from mcp.client.stdio import stdio_client, StdioServerParameters + from mcp.types import CallToolResult, TextContent + return { + "ClientSession": ClientSession, + "stdio_client": stdio_client, + "StdioServerParameters": StdioServerParameters, + "CallToolResult": CallToolResult, + "TextContent": TextContent, + } + + +# Import AgentEnv types - these are imported at runtime to avoid hard dependency +def _get_agentenv_types(): + """Lazily import AgentEnv types.""" + from agentenv.controller import BaseEnvClient, BaseTask + from agentenv.controller.types import ( + ActionFormat, + ActionWithTought, + ConversationMessage, + StepOutput, + ) + from agentenv.controller.utils import ( + BaseAdapter, + format_function_call_prompt, + format_code_as_action_prompt, + ) + return { + "BaseEnvClient": BaseEnvClient, + "BaseTask": BaseTask, + "ActionFormat": ActionFormat, + "ActionWithTought": ActionWithTought, + "ConversationMessage": ConversationMessage, + "StepOutput": StepOutput, + "BaseAdapter": BaseAdapter, + "format_function_call_prompt": format_function_call_prompt, + "format_code_as_action_prompt": format_code_as_action_prompt, + } + + +class MCPAdapter: + """ + Adapter for parsing actions in various formats for MCP-based environments. + + This adapter handles conversion between AgentEnv action formats and MCP tool calls. + """ + + INVOKING_FUNCTION_PROMPT = """ + +If you want to invoke a provided function or tool, please reply in the following *JSON* format: +```json +{ + "thought": "I think ...", + "function_name": "function_name", + "arguments": +} +``` +Only reply the *JSON* object, no other text should be present. +""" + + def __init__(self, function_descriptions: list[dict[str, Any]]): + """ + Initialize the adapter with function descriptions. + + Args: + function_descriptions: List of function descriptions in AgentEnv format + """ + self.function_descriptions = function_descriptions + self._build_conversation_starts() + + def _build_conversation_starts(self): + """Build conversation start prompts for each action format.""" + types = _get_agentenv_types() + ConversationMessage = types["ConversationMessage"] + ActionFormat = types["ActionFormat"] + format_function_call_prompt = types["format_function_call_prompt"] + format_code_as_action_prompt = types["format_code_as_action_prompt"] + + base_instruction = ( + "You are an agent interacting with an environment through tools.\n" + "Each turn you will receive an observation and must respond with an action.\n" + ) + + self.conversation_start_dict = { + ActionFormat.REACT: ( + ConversationMessage({ + "from": "human", + "loss": None, + "value": ( + f"{base_instruction}" + "Your response should use the following format:\n\n" + "Thought:\nI think ... \n\nAction:\naction_name arg1 arg2" + ), + }), + ConversationMessage({"from": "gpt", "loss": False, "value": "Ok."}), + ), + ActionFormat.FUNCTION_CALLING: ( + ConversationMessage({ + "from": "human", + "loss": None, + "value": ( + f"{base_instruction}" + f"{format_function_call_prompt(self.function_descriptions)}" + ), + }), + ConversationMessage({"from": "gpt", "loss": False, "value": "Ok."}), + ), + ActionFormat.CODE_AS_ACTION: ( + ConversationMessage({ + "from": "human", + "loss": None, + "value": ( + f"{base_instruction}" + f"{format_code_as_action_prompt(self.function_descriptions)}" + ), + }), + ConversationMessage({"from": "gpt", "loss": False, "value": "Ok."}), + ), + } + + @staticmethod + def parse_react(text: str): + """Parse ReAct format action.""" + types = _get_agentenv_types() + ActionWithTought = types["ActionWithTought"] + + _split = text.rsplit("Action:", 1) + if len(_split) == 2: + _thought, _action = _split + thought = _thought.split("Thought:")[-1].strip() + action = _action.strip() + else: + thought = "" + action = text.strip() + + return ActionWithTought(thought, action) + + @staticmethod + def parse_function_calling(text: str): + """Parse function calling format action.""" + types = _get_agentenv_types() + ActionWithTought = types["ActionWithTought"] + + # Try to extract JSON from the text + try: + _fn_call = json.loads( + "{" + text.split("{", 1)[-1].rsplit("}", 1)[0] + "}", strict=False + ) + thought = _fn_call.get("thought", "") + fn_name = _fn_call.get("function_name", "") + args = _fn_call.get("arguments", {}) + + # Return as structured action that can be converted to MCP tool call + action = json.dumps({"function_name": fn_name, "arguments": args}) + return ActionWithTought(thought, action) + except json.JSONDecodeError: + return ActionWithTought("", text) + + def action_parser(self, action: str, action_format) -> dict[str, Any]: + """ + Parse action text and return MCP tool call parameters. + + Returns: + dict with "tool_name" and "arguments" keys + """ + types = _get_agentenv_types() + ActionFormat = types["ActionFormat"] + + if action_format == ActionFormat.REACT: + parsed = self.parse_react(action) + # For React format, try to parse the action as "tool_name arg1 arg2" + return self._react_to_tool_call(parsed.action) + elif action_format == ActionFormat.FUNCTION_CALLING: + parsed = self.parse_function_calling(action) + try: + call_data = json.loads(parsed.action) + return { + "tool_name": call_data.get("function_name", ""), + "arguments": call_data.get("arguments", {}), + } + except json.JSONDecodeError: + return {"tool_name": "", "arguments": {}} + else: + # CODE_AS_ACTION - extract and parse + return {"tool_name": "", "arguments": {"code": action}} + + def _react_to_tool_call(self, action: str) -> dict[str, Any]: + """Convert a React-style action string to tool call parameters.""" + # Try to match against known function names + for func_desc in self.function_descriptions: + fn_name = func_desc["name"] + if action.lower().startswith(fn_name.lower()): + args_str = action[len(fn_name):].strip() + # Parse simple space-separated arguments + params = func_desc.get("parameters", {}).get("properties", {}) + param_names = list(params.keys()) + arg_values = args_str.split() if args_str else [] + + arguments = {} + for i, param_name in enumerate(param_names): + if i < len(arg_values): + arguments[param_name] = arg_values[i] + + return {"tool_name": fn_name, "arguments": arguments} + + # If no match, return the raw action + return {"tool_name": action.split()[0] if action else "", "arguments": {}} + + +class MCPEnvClient: + """ + AgentEnv-compatible client that wraps an MCP server. + + This allows any MCP server to be used as an AgentGym environment. + The MCP server must provide: + - env_reset tool (or similar reset mechanism) + - env_step tool (or action tools) + - env_observe tool (or observation mechanism) + + Example: + >>> client = MCPEnvClient( + ... server_command=["python", "-m", "my_mcp_server"], + ... action_format="function_calling", + ... ) + >>> client.reset(0) + >>> obs = client.observe() + >>> result = client.step("some action") + """ + + def __init__( + self, + server_command: list[str], + server_args: list[str] | None = None, + server_env: dict[str, str] | None = None, + action_format: str = "function_calling", + data_len: int = 1, + timeout: float = 30.0, + reset_tool: str = "env_reset", + step_tool: str = "env_step", + observe_tool: str = "env_observe", + ): + """ + Initialize the MCP environment client. + + Args: + server_command: Command to start the MCP server + server_args: Additional arguments for the server + server_env: Environment variables for the server + action_format: Action format to use + data_len: Number of tasks available in the environment + timeout: Timeout for MCP operations + reset_tool: Name of the reset tool on the MCP server + step_tool: Name of the step tool on the MCP server + observe_tool: Name of the observe tool on the MCP server + """ + mcp_types = _get_mcp_types() + StdioServerParameters = mcp_types["StdioServerParameters"] + + types = _get_agentenv_types() + self.ActionFormat = types["ActionFormat"] + self.StepOutput = types["StepOutput"] + + self.server_params = StdioServerParameters( + command=server_command[0], + args=server_command[1:] + (server_args or []), + env=server_env, + ) + self.action_format = self.ActionFormat(action_format) + self.data_len = data_len + self.timeout = timeout + + self.reset_tool = reset_tool + self.step_tool = step_tool + self.observe_tool = observe_tool + + # State + self._session = None + self._tools: list[dict[str, Any]] = [] + self._function_descriptions: list[dict[str, Any]] = [] + self._adapter: Optional[MCPAdapter] = None + self._current_observation: str = "" + self._loop: Optional[asyncio.AbstractEventLoop] = None + + # Initialize connection and discover tools + self._initialize() + + def _initialize(self): + """Initialize the MCP connection and discover tools.""" + self._loop = asyncio.new_event_loop() + self._loop.run_until_complete(self._async_initialize()) + + async def _async_initialize(self): + """Async initialization.""" + mcp_types = _get_mcp_types() + stdio_client = mcp_types["stdio_client"] + ClientSession = mcp_types["ClientSession"] + + async with stdio_client(self.server_params) as (read, write): + async with ClientSession(read, write) as session: + await session.initialize() + + # Discover tools + tools_result = await session.list_tools() + self._tools = [ + { + "name": tool.name, + "description": tool.description or "", + "inputSchema": tool.inputSchema or {}, + } + for tool in tools_result.tools + ] + + # Convert to function descriptions (excluding management tools) + management_tools = {self.reset_tool, self.step_tool, self.observe_tool, "env_info"} + action_tools = [t for t in self._tools if t["name"] not in management_tools] + + # Strip "action_" prefix if present + for tool in action_tools: + if tool["name"].startswith("action_"): + tool["name"] = tool["name"][7:] + + self._function_descriptions = generate_function_descriptions_from_mcp_tools( + action_tools + ) + + # Create adapter + self._adapter = MCPAdapter(self._function_descriptions) + + @property + def conversation_start(self): + """Get conversation start messages for the current action format.""" + if self._adapter is None: + raise RuntimeError("Client not initialized") + return self._adapter.conversation_start_dict[self.action_format] + + @property + def adapter_cls(self): + """Return the adapter for compatibility with existing code.""" + return MCPAdapter + + def __len__(self) -> int: + """Return the number of tasks available.""" + return self.data_len + + def _run_async(self, coro): + """Run an async coroutine synchronously.""" + if self._loop is None: + self._loop = asyncio.new_event_loop() + return self._loop.run_until_complete(coro) + + async def _call_tool(self, tool_name: str, arguments: dict[str, Any]) -> str: + """Call an MCP tool and return the result text.""" + mcp_types = _get_mcp_types() + stdio_client = mcp_types["stdio_client"] + ClientSession = mcp_types["ClientSession"] + + async with stdio_client(self.server_params) as (read, write): + async with ClientSession(read, write) as session: + await session.initialize() + + result = await session.call_tool(tool_name, arguments) + + # Extract text from result + if result.content: + texts = [] + for content in result.content: + if hasattr(content, "text"): + texts.append(content.text) + return "\n".join(texts) + return "" + + def observe(self) -> str: + """Get the current observation.""" + return self._current_observation + + def step(self, action: str) -> "StepOutput": + """ + Execute an action in the environment. + + Args: + action: The action string in the configured action format + + Returns: + StepOutput with state, reward, and done flag + """ + # Clean up action string + if action.endswith(""): + action = action[:-4] + + try: + # Parse the action based on format + tool_call = self._adapter.action_parser(action, self.action_format) + tool_name = tool_call["tool_name"] + arguments = tool_call["arguments"] + + # Check if this is a known action tool + action_tool_name = f"action_{tool_name}" + available_tool_names = [t["name"] for t in self._tools] + + if action_tool_name in available_tool_names: + # Call the specific action tool + result_text = self._run_async( + self._call_tool(action_tool_name, arguments) + ) + elif self.step_tool in available_tool_names: + # Fall back to generic step tool + result_text = self._run_async( + self._call_tool(self.step_tool, {"action": action}) + ) + else: + return self.StepOutput( + state=f"Error: No suitable tool found for action: {action}", + reward=0.0, + done=False, + ) + + # Parse the result + try: + result_data = json.loads(result_text) + self._current_observation = result_data.get("observation", result_text) + reward = float(result_data.get("reward", 0.0)) + done = bool(result_data.get("done", False)) + except json.JSONDecodeError: + self._current_observation = result_text + reward = 0.0 + done = False + + return self.StepOutput( + state=self._current_observation, + reward=reward, + done=done, + ) + + except Exception as e: + return self.StepOutput( + state=f"Error executing action: {str(e)}\n\n{self._current_observation}", + reward=0.0, + done=False, + ) + + def reset(self, idx: int = 0) -> dict[str, Any]: + """ + Reset the environment to a specific task. + + Args: + idx: Task index to reset to + + Returns: + Reset information dictionary + """ + result_text = self._run_async( + self._call_tool(self.reset_tool, {"task_idx": idx}) + ) + + try: + result_data = json.loads(result_text) + self._current_observation = result_data.get("observation", result_text) + return result_data + except json.JSONDecodeError: + self._current_observation = result_text + return {"observation": result_text} + + def close(self): + """Close the MCP connection.""" + if self._loop is not None: + self._loop.close() + self._loop = None + + +class MCPTask: + """ + Task wrapper for MCP-based environments. + + This provides compatibility with AgentGym's BaseTask interface for + experience generation and evaluation. + """ + + env_client_cls = MCPEnvClient + env_name = "mcp" + + def __init__( + self, + client_args: Mapping[str, Any], + n_clients: int = 1, + ): + """ + Initialize the MCP task. + + Args: + client_args: Arguments to pass to MCPEnvClient + n_clients: Number of parallel clients (for batch generation) + """ + self.clients = [self.env_client_cls(**client_args) for _ in range(n_clients)] + self.len = len(self.clients[0]) + + def generate_experience( + self, + agent, + idxs: Sequence[int], + generation_config=None, + max_rounds: Optional[int] = None, + ): + """ + Generate experience by running the agent through the environment. + + This method follows the same pattern as BaseTask._generate_experience. + """ + # Import here to avoid circular dependency + types = _get_agentenv_types() + BaseTask = types["BaseTask"] + + # Use the standard experience generation from BaseTask + # This works because MCPEnvClient implements the BaseEnvClient interface + experiences = [] + for idx in idxs: + exp = self._generate_experience_one( + agent, + self.clients[0], + idx, + generation_config, + max_rounds, + ) + experiences.append(exp) + return experiences + + def _generate_experience_one( + self, + agent, + client: MCPEnvClient, + idx: int, + generation_config=None, + max_rounds: Optional[int] = None, + ): + """Generate experience for a single task.""" + # This follows the same pattern as BaseTask._generate_experience_one + # Importing the actual implementation to reuse it + types = _get_agentenv_types() + ConversationMessage = types["ConversationMessage"] + StepOutput = types["StepOutput"] + + from agentenv.controller.types import ExperienceOutput, APIExperienceOutput + from agentenv.controller.agent import Agent, APIAgent + + client.reset(idx) + reward = 0.0 + done = False + state = client.observe() + + if isinstance(agent, Agent): + tokenizer = agent.tokenizer + conversation = list(client.conversation_start) + conversation.append( + ConversationMessage({"from": "human", "loss": None, "value": state}) + ) + conversation_tokenized = agent.chat_template.tokenize_conversation( + conversation, tokenizer, add_generation_prompt=True + ) + elif isinstance(agent, APIAgent): + from agentenv.controller.types import APIConversationMessage + conversation = [ + APIConversationMessage({"role": "user", "content": client.conversation_start[0]["value"], "reasoning_content": None}), + APIConversationMessage({"role": "assistant", "content": client.conversation_start[1]["value"], "reasoning_content": None}), + APIConversationMessage({"role": "user", "content": state, "reasoning_content": None}) + ] + else: + raise NotImplementedError + + rounds = 0 + + while not done: + if isinstance(agent, Agent): + input_length = len(conversation_tokenized["input_ids"]) + if input_length >= (generation_config.max_length if generation_config else 4096): + break + try: + generated_tokens = agent.generate( + [conversation_tokenized["input_ids"]], generation_config + )[0] + except Exception as e: + print(e) + break + + if generated_tokens[-1] != tokenizer.eos_token_id: + generated_tokens += [tokenizer.eos_token_id] + + generated_text = tokenizer.decode(generated_tokens) + conversation_tokenized["text"] += f" {generated_text}" + conversation_tokenized["input_ids"] += generated_tokens + conversation_tokenized["action_mask"] += [1] * len(generated_tokens) + + generated_text = generated_text[:-len(tokenizer.eos_token)] + conversation.append( + ConversationMessage({"from": "gpt", "loss": True, "value": generated_text}) + ) + elif isinstance(agent, APIAgent): + generated_text, generated_reasoning_text = agent.generate(conversation) + from agentenv.controller.types import APIConversationMessage + conversation.append( + APIConversationMessage({"role": "assistant", "content": generated_text, "reasoning_content": generated_reasoning_text}) + ) + + step_output = client.step(generated_text) + state, reward, done = step_output.state, step_output.reward, step_output.done + + if isinstance(agent, Agent): + env_message = ConversationMessage({"from": "human", "loss": None, "value": state}) + env_message_tokenized = agent.chat_template.tokenize_conversation_one( + env_message, tokenizer, add_generation_prompt=True + ) + conversation.append(env_message) + conversation_tokenized["text"] += env_message_tokenized["text"] + conversation_tokenized["input_ids"] += env_message_tokenized["input_ids"] + conversation_tokenized["action_mask"] += env_message_tokenized["action_mask"] + elif isinstance(agent, APIAgent): + from agentenv.controller.types import APIConversationMessage + conversation.append( + APIConversationMessage({"role": "user", "content": state, "reasoning_content": None}) + ) + + rounds += 1 + if max_rounds is not None and rounds >= max_rounds: + break + + if isinstance(agent, Agent): + return ExperienceOutput( + conversation=conversation, + reward=reward, + text=conversation_tokenized["text"], + seq_ids=conversation_tokenized["input_ids"], + attention_mask=[1] * len(conversation_tokenized["input_ids"]), + action_mask=conversation_tokenized["action_mask"], + ) + elif isinstance(agent, APIAgent): + return APIExperienceOutput( + conversation=conversation, + reward=reward, + ) From 7f5eebf87e73aac6091e8d85773c61145a167f42 Mon Sep 17 00:00:00 2001 From: supmo668 Date: Sun, 18 Jan 2026 20:21:07 -0800 Subject: [PATCH 5/6] feat(agentenv-mcp): Add SciWorld MCP examples Add proof-of-concept examples using SciWorld environment: sciworld_mcp_server.py: - Wraps SciWorld as an MCP server - Includes full SCIWORLD_FUNCTION_DESCRIPTION - Demonstrates AgentEnvMCPServer usage mcp_client_demo.py: - Shows how to use MCP server as AgentEnv client - Demonstrates reset, observe, step operations - Example of function_calling format actions Usage: # Start SciWorld env server first uvicorn agentenv_sciworld.server:app --port 8000 # Run MCP server python -m agentenv_mcp.examples.sciworld_mcp_server --- .../agentenv_mcp/examples/__init__.py | 1 + .../agentenv_mcp/examples/mcp_client_demo.py | 94 ++++++ .../examples/sciworld_mcp_server.py | 292 ++++++++++++++++++ 3 files changed, 387 insertions(+) create mode 100644 agentenv-mcp/agentenv_mcp/examples/__init__.py create mode 100644 agentenv-mcp/agentenv_mcp/examples/mcp_client_demo.py create mode 100644 agentenv-mcp/agentenv_mcp/examples/sciworld_mcp_server.py diff --git a/agentenv-mcp/agentenv_mcp/examples/__init__.py b/agentenv-mcp/agentenv_mcp/examples/__init__.py new file mode 100644 index 00000000..3df8b26f --- /dev/null +++ b/agentenv-mcp/agentenv_mcp/examples/__init__.py @@ -0,0 +1 @@ +"""Examples for agentenv-mcp package.""" diff --git a/agentenv-mcp/agentenv_mcp/examples/mcp_client_demo.py b/agentenv-mcp/agentenv_mcp/examples/mcp_client_demo.py new file mode 100644 index 00000000..a37ba463 --- /dev/null +++ b/agentenv-mcp/agentenv_mcp/examples/mcp_client_demo.py @@ -0,0 +1,94 @@ +""" +Demo: Using an MCP Server as an AgentEnv client. + +This demonstrates how to connect to any MCP server and use it as an +AgentGym-compatible environment for agent evaluation. + +Usage: + # First, start the SciWorld MCP server + python -m agentenv_mcp.examples.sciworld_mcp_server + + # Then run this demo (in a different terminal) + python -m agentenv_mcp.examples.mcp_client_demo +""" + +import sys +import os + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from agentenv_mcp import MCPEnvClient + + +def demo_mcp_client(): + """ + Demonstrate using an MCP server as an AgentEnv client. + """ + print("Creating MCP Environment Client...") + print("=" * 60) + + # Create client that connects to the SciWorld MCP server + client = MCPEnvClient( + server_command=[ + sys.executable, "-m", + "agentenv_mcp.examples.sciworld_mcp_server", + "--env-server", "http://localhost:8000", + ], + action_format="function_calling", + data_len=100, + ) + + print(f"Environment size: {len(client)}") + print(f"Action format: {client.action_format}") + print() + + # Show conversation start + print("Conversation Start:") + print("-" * 40) + for msg in client.conversation_start: + role = msg["from"] + value = msg["value"][:200] + "..." if len(msg["value"]) > 200 else msg["value"] + print(f"[{role}]: {value}") + print() + + # Reset to first task + print("Resetting to task 0...") + print("-" * 40) + reset_result = client.reset(0) + print(f"Reset result: {reset_result}") + print() + + # Get observation + print("Current observation:") + print("-" * 40) + obs = client.observe() + print(obs[:500] + "..." if len(obs) > 500 else obs) + print() + + # Try an action (function calling format) + print("Executing action: lookaround") + print("-" * 40) + action = '{"thought": "Let me look around", "function_name": "lookaround", "arguments": {}}' + result = client.step(action) + print(f"State: {result.state[:300]}...") + print(f"Reward: {result.reward}") + print(f"Done: {result.done}") + print() + + # Try another action + print("Executing action: inventory") + print("-" * 40) + action = '{"thought": "Check my inventory", "function_name": "inventory", "arguments": {}}' + result = client.step(action) + print(f"State: {result.state[:300]}...") + print(f"Reward: {result.reward}") + print(f"Done: {result.done}") + + # Clean up + client.close() + print() + print("Demo complete!") + + +if __name__ == "__main__": + demo_mcp_client() diff --git a/agentenv-mcp/agentenv_mcp/examples/sciworld_mcp_server.py b/agentenv-mcp/agentenv_mcp/examples/sciworld_mcp_server.py new file mode 100644 index 00000000..4f75c806 --- /dev/null +++ b/agentenv-mcp/agentenv_mcp/examples/sciworld_mcp_server.py @@ -0,0 +1,292 @@ +""" +SciWorld MCP Server - Exposes SciWorld environment as an MCP server. + +This demonstrates how to wrap an existing AgentGym environment (SciWorld) +as an MCP server that can be accessed by any MCP-compatible agent. + +Usage: + # Start the SciWorld environment server first (agentenv-sciworld) + uvicorn agentenv_sciworld.server:app --host 0.0.0.0 --port 8000 + + # Then run this MCP server + python -m agentenv_mcp.examples.sciworld_mcp_server +""" + +import sys +import os + +# Add parent directory to path for imports +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from agentenv_mcp import AgentEnvMCPServer + +# Import SciWorld-specific components +# These are copied from agentenv/envs/sciworld.py for standalone operation +SCIWORLD_FUNCTION_DESCRIPTION = [ + { + "name": "open", + "description": "Opens a container. You may have to give the specific location of the container if necessary(eg.door to kitchen, door to living room).", + "parameters":{ + "type": "object", + "properties":{ + "obj":{ + "type": "string", + "description":"The container you want to open." + } + }, + "required": ["obj"] + } + }, + { + "name": "close", + "description": "Closes a container.", + "parameters":{ + "type": "object", + "properties":{ + "obj":{ + "type": "string", + "description":"The container you want to close." + }, + }, + "required": ["obj"] + } + }, + { + "name": "activate", + "description": "Activate a device (e.g., turn on a stove to heat something).", + "parameters":{ + "type": "object", + "properties":{ + "obj":{ + "type": "string", + "description":"The device you want to activate." + }, + }, + "required": ["obj"] + }, + }, + { + "name": "deactivate", + "description": "Deactivate a device (e.g., turn off a sink).", + "parameters":{ + "type": "object", + "properties":{ + "obj":{ + "type": "string", + "description":"The device you want to deactivate." + }, + }, + "required": ["obj"] + }, + }, + { + "name": "lookaround", + "description": "Describe the current room.", + "parameters": { + "type": "object", + "properties": {}, + } + }, + { + "name": "lookat", + "description": "Describe an object in detail.", + "parameters":{ + "type": "object", + "properties":{ + "obj":{ + "type": "string", + "description":"The object you want to examine." + } + }, + "required": ["obj"] + } + }, + { + "name": "pickup", + "description": "Move an object to your inventory.", + "parameters":{ + "type": "object", + "properties":{ + "obj":{ + "type": "string", + "description": "The object to pick up." + } + }, + "required": ["obj"] + }, + }, + { + "name": "drop", + "description": "Drop an object from your inventory.", + "parameters":{ + "type": "object", + "properties":{ + "obj":{ + "type": "string", + "description":"The object to drop." + }, + }, + "required": ["obj"] + }, + }, + { + "name": "goto", + "description": "Move to a new location.", + "parameters":{ + "type": "object", + "properties":{ + "loc":{ + "type": "string", + "description": "The location to go to." + } + }, + "required": ["loc"] + }, + }, + { + "name": "use", + "description": "Use a tool on an object.", + "parameters":{ + "type": "object", + "properties":{ + "tool":{ + "type": "string", + "description":"The tool to use." + }, + "obj":{ + "type": "string", + "description": "The object to use the tool on (optional)." + } + }, + "required": ["tool"] + } + }, + { + "name": "pour", + "description": "Pour a liquid into a container.", + "parameters":{ + "type": "object", + "properties":{ + "liq":{ + "type": "string", + "description": "The liquid to pour." + }, + "container":{ + "type": "string", + "description": "The container to pour into." + } + }, + "required": ["liq", "container"] + }, + }, + { + "name": "mix", + "description": "Chemically mix the contents of a container.", + "parameters":{ + "type": "object", + "properties":{ + "container":{ + "type": "string", + "description": "The container to mix." + } + }, + "required": ["container"] + }, + }, + { + "name": "inventory", + "description": "List items in your inventory.", + "parameters": { + "type": "object", + "properties": {}, + } + }, + { + "name": "task", + "description": "Describe the current task.", + "parameters": { + "type": "object", + "properties": {}, + } + }, + { + "name": "wait", + "description": "Wait for some time.", + "parameters":{ + "type": "object", + "properties":{ + "duration":{ + "type": "integer", + "description": "Number of time steps to wait." + } + }, + "required": ["duration"] + }, + }, +] + + +def create_sciworld_mcp_server( + env_server_base: str = "http://localhost:8000", + data_len: int = 100, +) -> AgentEnvMCPServer: + """ + Create an MCP server that wraps the SciWorld environment. + + Args: + env_server_base: Base URL of the SciWorld environment server + data_len: Number of tasks available + + Returns: + AgentEnvMCPServer instance + """ + # Import the SciWorld client + try: + from agentenv.envs.sciworld import SciworldEnvClient + except ImportError: + raise ImportError( + "agentenv package not found. Please install it first:\n" + " pip install -e ../agentenv" + ) + + return AgentEnvMCPServer( + env_client_cls=SciworldEnvClient, + client_args={ + "env_server_base": env_server_base, + "data_len": data_len, + }, + function_descriptions=SCIWORLD_FUNCTION_DESCRIPTION, + env_name="sciworld", + action_format="function_calling", + ) + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser( + description="Run SciWorld as an MCP server" + ) + parser.add_argument( + "--env-server", + default="http://localhost:8000", + help="Base URL of the SciWorld environment server", + ) + parser.add_argument( + "--data-len", + type=int, + default=100, + help="Number of tasks available", + ) + + args = parser.parse_args() + + print(f"Starting SciWorld MCP server...", file=sys.stderr) + print(f" Environment server: {args.env_server}", file=sys.stderr) + print(f" Data length: {args.data_len}", file=sys.stderr) + + server = create_sciworld_mcp_server( + env_server_base=args.env_server, + data_len=args.data_len, + ) + server.run() From da1118934ffe77bf9dc0e0108b72dfff21748e0f Mon Sep 17 00:00:00 2001 From: supmo668 Date: Sun, 18 Jan 2026 20:21:14 -0800 Subject: [PATCH 6/6] test(agentenv-mcp): Add comprehensive test suite Add tests for all wrapper components: test_schema_utils.py (9 tests): - Schema conversion functions - Round-trip conversion verification - SciWorld function description handling test_agentenv_to_mcp.py (9 tests): - AgentEnvMCPServer initialization - Handler methods (reset, step, observe, info, action) - ReAct formatting test_mcp_to_agentenv.py (8 tests): - Action parsing (ReAct, function_calling) - Tool call conversion - Interface compatibility checks test_sciworld_integration.py (7 tests): - Integration tests with SciWorld environment - End-to-end workflow verification - Skipped by default (requires running server) Run tests: pytest tests/ -v --- agentenv-mcp/tests/__init__.py | 1 + agentenv-mcp/tests/test_agentenv_to_mcp.py | 217 +++++++++++++++++ agentenv-mcp/tests/test_mcp_to_agentenv.py | 218 +++++++++++++++++ agentenv-mcp/tests/test_schema_utils.py | 226 ++++++++++++++++++ .../tests/test_sciworld_integration.py | 207 ++++++++++++++++ 5 files changed, 869 insertions(+) create mode 100644 agentenv-mcp/tests/__init__.py create mode 100644 agentenv-mcp/tests/test_agentenv_to_mcp.py create mode 100644 agentenv-mcp/tests/test_mcp_to_agentenv.py create mode 100644 agentenv-mcp/tests/test_schema_utils.py create mode 100644 agentenv-mcp/tests/test_sciworld_integration.py diff --git a/agentenv-mcp/tests/__init__.py b/agentenv-mcp/tests/__init__.py new file mode 100644 index 00000000..c5a930de --- /dev/null +++ b/agentenv-mcp/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for agentenv-mcp package.""" diff --git a/agentenv-mcp/tests/test_agentenv_to_mcp.py b/agentenv-mcp/tests/test_agentenv_to_mcp.py new file mode 100644 index 00000000..5355eef2 --- /dev/null +++ b/agentenv-mcp/tests/test_agentenv_to_mcp.py @@ -0,0 +1,217 @@ +""" +Tests for AgentEnvToMCP wrapper. + +These tests verify that the AgentEnvMCPServer correctly wraps +BaseEnvClient instances as MCP servers. +""" + +import pytest +import json +from unittest.mock import Mock, MagicMock, patch, AsyncMock +from dataclasses import dataclass + +# Check if MCP is available +try: + import mcp + MCP_AVAILABLE = True +except ImportError: + MCP_AVAILABLE = False + +pytestmark = pytest.mark.skipif( + not MCP_AVAILABLE, + reason="MCP library not installed. Install with: pip install mcp" +) + +# Mock the MCP imports for testing without full MCP installation +@dataclass +class MockStepOutput: + state: str + reward: float + done: bool + + +class MockBaseEnvClient: + """Mock BaseEnvClient for testing.""" + + def __init__(self, env_server_base: str, data_len: int, action_format: str = "function_calling"): + self.env_server_base = env_server_base + self.data_len = data_len + self.action_format = action_format + self._current_obs = "Initial observation" + self._task_idx = 0 + + def __len__(self): + return self.data_len + + def reset(self, idx: int): + self._task_idx = idx + self._current_obs = f"Task {idx} observation" + return {"task_idx": idx, "status": "reset"} + + def observe(self): + return self._current_obs + + def step(self, action: str): + self._current_obs = f"After action: {action}" + return MockStepOutput( + state=self._current_obs, + reward=0.5, + done=False, + ) + + +SAMPLE_FUNCTION_DESCRIPTIONS = [ + { + "name": "open", + "description": "Opens a container.", + "parameters": { + "type": "object", + "properties": { + "obj": {"type": "string", "description": "Container to open"} + }, + "required": ["obj"] + } + }, + { + "name": "lookaround", + "description": "Look around.", + "parameters": {"type": "object", "properties": {}} + }, +] + + +class TestAgentEnvMCPServerInit: + """Tests for AgentEnvMCPServer initialization.""" + + def test_init_basic(self): + """Test basic initialization.""" + from agentenv_mcp.agentenv_to_mcp import AgentEnvMCPServer + + server = AgentEnvMCPServer( + env_client_cls=MockBaseEnvClient, + client_args={"env_server_base": "http://test", "data_len": 10}, + function_descriptions=SAMPLE_FUNCTION_DESCRIPTIONS, + env_name="test", + ) + + assert server.env_name == "test" + assert server.action_format == "function_calling" + assert len(server.function_descriptions) == 2 + + def test_lazy_client_creation(self): + """Test that client is created lazily.""" + from agentenv_mcp.agentenv_to_mcp import AgentEnvMCPServer + + server = AgentEnvMCPServer( + env_client_cls=MockBaseEnvClient, + client_args={"env_server_base": "http://test", "data_len": 10}, + ) + + # Client should not be created yet + assert server._client is None + + # Accessing client property creates it + client = server.client + assert client is not None + assert server._client is client + + +class TestAgentEnvMCPServerHandlers: + """Tests for MCP tool handlers.""" + + @pytest.fixture + def server(self): + """Create a test server.""" + from agentenv_mcp.agentenv_to_mcp import AgentEnvMCPServer + + return AgentEnvMCPServer( + env_client_cls=MockBaseEnvClient, + client_args={"env_server_base": "http://test", "data_len": 100}, + function_descriptions=SAMPLE_FUNCTION_DESCRIPTIONS, + env_name="test", + ) + + @pytest.mark.asyncio + async def test_handle_reset(self, server): + """Test reset handler.""" + result = await server._handle_reset({"task_idx": 5}) + + assert len(result) == 1 + content = json.loads(result[0].content[0].text) + assert content["status"] == "reset" + assert content["task_idx"] == 5 + + @pytest.mark.asyncio + async def test_handle_observe(self, server): + """Test observe handler.""" + # First reset to set up state + await server._handle_reset({"task_idx": 0}) + + result = await server._handle_observe() + + assert len(result) == 1 + assert "Task 0 observation" in result[0].content[0].text + + @pytest.mark.asyncio + async def test_handle_step(self, server): + """Test step handler.""" + await server._handle_reset({"task_idx": 0}) + + result = await server._handle_step({"action": "test action"}) + + assert len(result) == 1 + content = json.loads(result[0].content[0].text) + assert "observation" in content + assert content["reward"] == 0.5 + assert content["done"] is False + + @pytest.mark.asyncio + async def test_handle_info(self, server): + """Test info handler.""" + result = await server._handle_info() + + assert len(result) == 1 + content = json.loads(result[0].content[0].text) + assert content["env_name"] == "test" + assert content["env_size"] == 100 + assert content["available_actions"] == 2 + + @pytest.mark.asyncio + async def test_handle_action(self, server): + """Test environment-specific action handler.""" + await server._handle_reset({"task_idx": 0}) + + result = await server._handle_action("open", {"obj": "door"}) + + assert len(result) == 1 + content = json.loads(result[0].content[0].text) + assert content["action"] == "open" + assert "observation" in content + + @pytest.mark.asyncio + async def test_handle_unknown_action(self, server): + """Test handling of unknown action.""" + result = await server._handle_action("unknown_action", {}) + + assert len(result) == 1 + assert result[0].isError is True + + +class TestReactFormatting: + """Tests for ReAct format action formatting.""" + + def test_format_react_action(self): + """Test React action formatting.""" + from agentenv_mcp.agentenv_to_mcp import AgentEnvMCPServer + + server = AgentEnvMCPServer( + env_client_cls=MockBaseEnvClient, + client_args={"env_server_base": "http://test", "data_len": 10}, + action_format="react", + ) + + result = server._format_react_action("open", {"obj": "door", "thought": "Opening the door"}) + + assert "Thought:" in result + assert "Action:" in result + assert "open" in result diff --git a/agentenv-mcp/tests/test_mcp_to_agentenv.py b/agentenv-mcp/tests/test_mcp_to_agentenv.py new file mode 100644 index 00000000..68e07af0 --- /dev/null +++ b/agentenv-mcp/tests/test_mcp_to_agentenv.py @@ -0,0 +1,218 @@ +""" +Tests for MCPToAgentEnv wrapper. + +These tests verify that the MCPEnvClient correctly adapts +MCP servers into AgentGym-compatible BaseEnvClient instances. +""" + +import pytest +import json +from unittest.mock import Mock, MagicMock, patch, AsyncMock +from dataclasses import dataclass +from enum import Enum + + +# Check if MCP is available +try: + import mcp + MCP_AVAILABLE = True +except ImportError: + MCP_AVAILABLE = False + +# Check if agentenv is available +try: + from agentenv.controller.types import ActionFormat + AGENTENV_AVAILABLE = True +except ImportError: + AGENTENV_AVAILABLE = False + + +class TestMCPAdapterParsing: + """Tests for MCPAdapter parsing methods that don't require full dependencies.""" + + def test_parse_react_format(self): + """Test ReAct format parsing logic.""" + text = "Thought:\nI should open the door.\n\nAction:\nopen door" + + _split = text.rsplit("Action:", 1) + assert len(_split) == 2 + _thought, _action = _split + thought = _thought.split("Thought:")[-1].strip() + action = _action.strip() + + assert thought == "I should open the door." + assert action == "open door" + + def test_parse_react_no_thought(self): + """Test ReAct parsing without explicit thought.""" + text = "open door" + + _split = text.rsplit("Action:", 1) + if len(_split) == 2: + thought = _split[0].split("Thought:")[-1].strip() + action = _split[1].strip() + else: + thought = "" + action = text.strip() + + assert action == "open door" + + def test_parse_function_calling_format(self): + """Test function calling format parsing logic.""" + text = '{"thought": "Opening door", "function_name": "open", "arguments": {"obj": "door"}}' + + _fn_call = json.loads( + "{" + text.split("{", 1)[-1].rsplit("}", 1)[0] + "}", strict=False + ) + + assert _fn_call["thought"] == "Opening door" + assert _fn_call["function_name"] == "open" + assert _fn_call["arguments"]["obj"] == "door" + + def test_parse_function_calling_with_extra_text(self): + """Test parsing function call with surrounding text.""" + text = 'Here is my action: {"thought": "test", "function_name": "look", "arguments": {}}' + + _fn_call = json.loads( + "{" + text.split("{", 1)[-1].rsplit("}", 1)[0] + "}", strict=False + ) + + assert _fn_call["function_name"] == "look" + + +class TestMCPAdapterToolCallConversion: + """Tests for converting parsed actions to tool calls.""" + + def test_react_to_tool_call_simple(self): + """Test converting simple React action to tool call.""" + function_descriptions = [ + {"name": "open", "parameters": {"properties": {"obj": {}}}}, + {"name": "goto", "parameters": {"properties": {"location": {}}}}, + ] + + action = "open door" + + # Logic for matching action to tool + result = None + for func_desc in function_descriptions: + fn_name = func_desc["name"] + if action.lower().startswith(fn_name.lower()): + args_str = action[len(fn_name):].strip() + params = func_desc.get("parameters", {}).get("properties", {}) + param_names = list(params.keys()) + arg_values = args_str.split() if args_str else [] + + arguments = {} + for i, param_name in enumerate(param_names): + if i < len(arg_values): + arguments[param_name] = arg_values[i] + + result = {"tool_name": fn_name, "arguments": arguments} + break + + assert result is not None + assert result["tool_name"] == "open" + assert result["arguments"]["obj"] == "door" + + def test_react_to_tool_call_no_args(self): + """Test converting React action with no arguments.""" + function_descriptions = [ + {"name": "lookaround", "parameters": {"properties": {}}}, + ] + + action = "lookaround" + + result = None + for func_desc in function_descriptions: + fn_name = func_desc["name"] + if action.lower().startswith(fn_name.lower()): + result = {"tool_name": fn_name, "arguments": {}} + break + + assert result is not None + assert result["tool_name"] == "lookaround" + assert result["arguments"] == {} + + +@pytest.mark.skipif(not MCP_AVAILABLE, reason="MCP library not installed") +class TestMCPEnvClientRequiresMCP: + """Tests that require MCP library.""" + + def test_mcp_env_client_has_interface(self): + """Test that MCPEnvClient has required interface.""" + from agentenv_mcp.mcp_to_agentenv import MCPEnvClient + + assert hasattr(MCPEnvClient, 'observe') + assert hasattr(MCPEnvClient, 'step') + assert hasattr(MCPEnvClient, 'reset') + assert hasattr(MCPEnvClient, '__len__') + + +class TestMCPTaskStructure: + """Tests for MCPTask class structure.""" + + @pytest.mark.skipif(not MCP_AVAILABLE, reason="MCP library not installed") + def test_task_has_required_attributes(self): + """Test that MCPTask has required BaseTask attributes.""" + from agentenv_mcp.mcp_to_agentenv import MCPTask + + assert hasattr(MCPTask, 'env_client_cls') + assert hasattr(MCPTask, 'env_name') + assert MCPTask.env_name == "mcp" + + +class TestSchemaConversion: + """Tests that verify schema conversion works correctly.""" + + def test_sciworld_style_function_to_tool_call(self): + """Test converting SciWorld-style functions to tool call format.""" + from agentenv_mcp.schema_utils import function_desc_to_mcp_tool + + sciworld_func = { + "name": "pour", + "description": "Pour liquid into container.", + "parameters": { + "type": "object", + "properties": { + "liq": {"type": "string", "description": "The liquid"}, + "container": {"type": "string", "description": "The container"} + }, + "required": ["liq", "container"] + } + } + + mcp_tool = function_desc_to_mcp_tool(sciworld_func) + + assert mcp_tool["name"] == "pour" + assert "inputSchema" in mcp_tool + assert "liq" in mcp_tool["inputSchema"]["properties"] + assert "container" in mcp_tool["inputSchema"]["properties"] + + +class TestConversationStartGeneration: + """Tests for conversation start prompt generation.""" + + def test_function_prompt_structure(self): + """Test that function prompts have expected structure.""" + function_descriptions = [ + { + "name": "open", + "description": "Opens something.", + "parameters": { + "type": "object", + "properties": { + "obj": {"type": "string", "description": "Object to open"} + }, + "required": ["obj"] + } + } + ] + + # Simulate the prompt format + prompt = "You have the following functions available:\n\n" + tool_descs = [{"type": "function", "function": f} for f in function_descriptions] + prompt += "\n".join([json.dumps(f, ensure_ascii=False, indent=2) for f in tool_descs]) + + assert "open" in prompt + assert "Opens something" in prompt + assert "obj" in prompt diff --git a/agentenv-mcp/tests/test_schema_utils.py b/agentenv-mcp/tests/test_schema_utils.py new file mode 100644 index 00000000..99a655f5 --- /dev/null +++ b/agentenv-mcp/tests/test_schema_utils.py @@ -0,0 +1,226 @@ +""" +Tests for schema conversion utilities. +""" + +import pytest +from agentenv_mcp.schema_utils import ( + function_desc_to_mcp_tool, + mcp_tool_to_function_desc, + generate_function_descriptions_from_mcp_tools, + generate_mcp_tools_from_function_descriptions, +) + + +class TestFunctionDescToMCPTool: + """Tests for function_desc_to_mcp_tool conversion.""" + + def test_basic_conversion(self): + """Test basic conversion from AgentEnv to MCP format.""" + func_desc = { + "name": "open", + "description": "Opens a container.", + "parameters": { + "type": "object", + "properties": { + "obj": { + "type": "string", + "description": "The container to open." + } + }, + "required": ["obj"] + } + } + + mcp_tool = function_desc_to_mcp_tool(func_desc) + + assert mcp_tool["name"] == "open" + assert mcp_tool["description"] == "Opens a container." + assert "inputSchema" in mcp_tool + assert mcp_tool["inputSchema"]["type"] == "object" + assert "obj" in mcp_tool["inputSchema"]["properties"] + + def test_empty_parameters(self): + """Test conversion with empty parameters.""" + func_desc = { + "name": "lookaround", + "description": "Look around the room.", + "parameters": { + "type": "object", + "properties": {} + } + } + + mcp_tool = function_desc_to_mcp_tool(func_desc) + + assert mcp_tool["name"] == "lookaround" + assert mcp_tool["inputSchema"]["properties"] == {} + + def test_missing_description(self): + """Test conversion with missing description.""" + func_desc = { + "name": "test", + "parameters": {"type": "object", "properties": {}} + } + + mcp_tool = function_desc_to_mcp_tool(func_desc) + + assert mcp_tool["description"] == "" + + def test_multiple_parameters(self): + """Test conversion with multiple parameters.""" + func_desc = { + "name": "pour", + "description": "Pour liquid into container.", + "parameters": { + "type": "object", + "properties": { + "liquid": {"type": "string", "description": "The liquid"}, + "container": {"type": "string", "description": "The container"} + }, + "required": ["liquid", "container"] + } + } + + mcp_tool = function_desc_to_mcp_tool(func_desc) + + assert len(mcp_tool["inputSchema"]["properties"]) == 2 + assert "liquid" in mcp_tool["inputSchema"]["properties"] + assert "container" in mcp_tool["inputSchema"]["properties"] + + +class TestMCPToolToFunctionDesc: + """Tests for mcp_tool_to_function_desc conversion.""" + + def test_basic_conversion(self): + """Test basic conversion from MCP to AgentEnv format.""" + mcp_tool = { + "name": "open", + "description": "Opens a container.", + "inputSchema": { + "type": "object", + "properties": { + "obj": { + "type": "string", + "description": "The container to open." + } + }, + "required": ["obj"] + } + } + + func_desc = mcp_tool_to_function_desc(mcp_tool) + + assert func_desc["name"] == "open" + assert func_desc["description"] == "Opens a container." + assert "parameters" in func_desc + assert func_desc["parameters"]["type"] == "object" + + def test_missing_input_schema(self): + """Test conversion with missing inputSchema.""" + mcp_tool = { + "name": "test", + "description": "A test tool." + } + + func_desc = mcp_tool_to_function_desc(mcp_tool) + + assert func_desc["parameters"]["type"] == "object" + assert func_desc["parameters"]["properties"] == {} + + +class TestRoundTrip: + """Test round-trip conversions.""" + + def test_agentenv_to_mcp_to_agentenv(self): + """Test AgentEnv -> MCP -> AgentEnv preserves data.""" + original = { + "name": "goto", + "description": "Move to a location.", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "Target location" + } + }, + "required": ["location"] + } + } + + mcp_tool = function_desc_to_mcp_tool(original) + restored = mcp_tool_to_function_desc(mcp_tool) + + assert restored["name"] == original["name"] + assert restored["description"] == original["description"] + assert restored["parameters"] == original["parameters"] + + def test_batch_conversion(self): + """Test batch conversion of multiple tools.""" + func_descs = [ + {"name": "open", "description": "Open", "parameters": {"type": "object", "properties": {}}}, + {"name": "close", "description": "Close", "parameters": {"type": "object", "properties": {}}}, + ] + + mcp_tools = generate_mcp_tools_from_function_descriptions(func_descs) + restored = generate_function_descriptions_from_mcp_tools(mcp_tools) + + assert len(restored) == 2 + assert restored[0]["name"] == "open" + assert restored[1]["name"] == "close" + + +class TestSciWorldFunctionDescriptions: + """Test with real SciWorld function descriptions.""" + + SAMPLE_SCIWORLD_FUNCTIONS = [ + { + "name": "open", + "description": "Opens a container.", + "parameters": { + "type": "object", + "properties": { + "obj": {"type": "string", "description": "The container to open."} + }, + "required": ["obj"] + } + }, + { + "name": "lookaround", + "description": "Describe the current room.", + "parameters": {"type": "object", "properties": {}} + }, + { + "name": "pour", + "description": "Pour liquid into container.", + "parameters": { + "type": "object", + "properties": { + "liq": {"type": "string", "description": "The liquid"}, + "container": {"type": "string", "description": "The container"} + }, + "required": ["liq", "container"] + } + }, + ] + + def test_sciworld_conversion(self): + """Test conversion of SciWorld-style function descriptions.""" + mcp_tools = generate_mcp_tools_from_function_descriptions( + self.SAMPLE_SCIWORLD_FUNCTIONS + ) + + assert len(mcp_tools) == 3 + + # Check open tool + open_tool = next(t for t in mcp_tools if t["name"] == "open") + assert "inputSchema" in open_tool + assert "obj" in open_tool["inputSchema"]["properties"] + + # Check lookaround tool (no args) + look_tool = next(t for t in mcp_tools if t["name"] == "lookaround") + assert look_tool["inputSchema"]["properties"] == {} + + # Check pour tool (multiple args) + pour_tool = next(t for t in mcp_tools if t["name"] == "pour") + assert len(pour_tool["inputSchema"]["properties"]) == 2 diff --git a/agentenv-mcp/tests/test_sciworld_integration.py b/agentenv-mcp/tests/test_sciworld_integration.py new file mode 100644 index 00000000..2f61c421 --- /dev/null +++ b/agentenv-mcp/tests/test_sciworld_integration.py @@ -0,0 +1,207 @@ +""" +Integration tests for SciWorld MCP wrapper. + +These tests verify the complete integration between: +1. SciWorld environment (agentenv-sciworld) +2. AgentEnv client (agentenv/envs/sciworld.py) +3. MCP wrapper (agentenv-mcp) + +NOTE: These tests require: +- The SciWorld environment server running at http://localhost:8000 +- The agentenv package installed + +To run integration tests: + # Start SciWorld server first + cd ../agentenv-sciworld && uvicorn agentenv_sciworld.server:app --port 8000 + + # Run tests + pytest tests/test_sciworld_integration.py -v +""" + +import pytest +import json +import sys +import os + +# Skip all tests if dependencies not available +pytestmark = pytest.mark.skipif( + os.environ.get("SKIP_INTEGRATION_TESTS", "1") == "1", + reason="Integration tests disabled. Set SKIP_INTEGRATION_TESTS=0 to run." +) + + +@pytest.fixture +def sciworld_env_server(): + """Check if SciWorld server is available.""" + import requests + try: + response = requests.get("http://localhost:8000/", timeout=2) + if response.status_code == 200: + return "http://localhost:8000" + except: + pass + pytest.skip("SciWorld server not available at http://localhost:8000") + + +class TestSciWorldMCPServerIntegration: + """Integration tests for SciWorld as MCP server.""" + + def test_create_mcp_server(self, sciworld_env_server): + """Test creating MCP server from SciWorld client.""" + from agentenv_mcp.examples.sciworld_mcp_server import create_sciworld_mcp_server + + server = create_sciworld_mcp_server( + env_server_base=sciworld_env_server, + data_len=10, + ) + + assert server is not None + assert server.env_name == "sciworld" + assert len(server.function_descriptions) > 0 + + @pytest.mark.asyncio + async def test_reset_and_observe(self, sciworld_env_server): + """Test reset and observe through MCP server.""" + from agentenv_mcp.examples.sciworld_mcp_server import create_sciworld_mcp_server + + server = create_sciworld_mcp_server( + env_server_base=sciworld_env_server, + data_len=10, + ) + + # Reset + reset_result = await server._handle_reset({"task_idx": 0}) + assert len(reset_result) == 1 + reset_data = json.loads(reset_result[0].content[0].text) + assert "observation" in reset_data + + # Observe + observe_result = await server._handle_observe() + assert len(observe_result) == 1 + assert len(observe_result[0].content[0].text) > 0 + + @pytest.mark.asyncio + async def test_step_action(self, sciworld_env_server): + """Test stepping with an action.""" + from agentenv_mcp.examples.sciworld_mcp_server import create_sciworld_mcp_server + + server = create_sciworld_mcp_server( + env_server_base=sciworld_env_server, + data_len=10, + ) + + # Reset first + await server._handle_reset({"task_idx": 0}) + + # Step with lookaround + step_result = await server._handle_action("lookaround", {}) + assert len(step_result) == 1 + step_data = json.loads(step_result[0].content[0].text) + assert "observation" in step_data + assert "reward" in step_data + assert "done" in step_data + + @pytest.mark.asyncio + async def test_multiple_actions(self, sciworld_env_server): + """Test executing multiple actions in sequence.""" + from agentenv_mcp.examples.sciworld_mcp_server import create_sciworld_mcp_server + + server = create_sciworld_mcp_server( + env_server_base=sciworld_env_server, + data_len=10, + ) + + # Reset + await server._handle_reset({"task_idx": 0}) + + # Execute a sequence of actions + actions = [ + ("lookaround", {}), + ("inventory", {}), + ("task", {}), + ] + + for action_name, args in actions: + result = await server._handle_action(action_name, args) + assert len(result) == 1 + assert result[0].isError is False + + +class TestSciWorldClientCompatibility: + """Test that wrapped SciWorld client is compatible with AgentEnv.""" + + def test_client_interface(self, sciworld_env_server): + """Test that SciWorld client has correct interface.""" + from agentenv.envs.sciworld import SciworldEnvClient + + client = SciworldEnvClient( + env_server_base=sciworld_env_server, + data_len=10, + action_format="function_calling", + ) + + # Test interface + assert hasattr(client, 'reset') + assert hasattr(client, 'step') + assert hasattr(client, 'observe') + assert hasattr(client, '__len__') + assert hasattr(client, 'conversation_start') + + # Test length + assert len(client) == 10 + + # Test conversation start + assert len(client.conversation_start) == 2 + + def test_reset_and_step(self, sciworld_env_server): + """Test basic reset and step operations.""" + from agentenv.envs.sciworld import SciworldEnvClient + + client = SciworldEnvClient( + env_server_base=sciworld_env_server, + data_len=10, + action_format="function_calling", + ) + + # Reset + result = client.reset(0) + assert "observation" in result or client.observe() != "" + + # Observe + obs = client.observe() + assert isinstance(obs, str) + assert len(obs) > 0 + + # Step with function calling format + action = json.dumps({ + "thought": "Looking around", + "function_name": "lookaround", + "arguments": {} + }) + step_output = client.step(action) + + assert hasattr(step_output, 'state') + assert hasattr(step_output, 'reward') + assert hasattr(step_output, 'done') + + +class TestFunctionDescriptionAlignment: + """Test that function descriptions are properly aligned.""" + + def test_sciworld_functions_in_wrapper(self): + """Test that SciWorld functions are correctly represented in wrapper.""" + from agentenv_mcp.examples.sciworld_mcp_server import SCIWORLD_FUNCTION_DESCRIPTION + from agentenv_mcp.schema_utils import function_desc_to_mcp_tool + + # Check some key functions exist + function_names = {f["name"] for f in SCIWORLD_FUNCTION_DESCRIPTION} + + required_functions = {"open", "close", "lookaround", "goto", "pickup", "inventory"} + assert required_functions.issubset(function_names) + + # Check conversion to MCP format + for func in SCIWORLD_FUNCTION_DESCRIPTION: + mcp_tool = function_desc_to_mcp_tool(func) + assert "name" in mcp_tool + assert "inputSchema" in mcp_tool + assert mcp_tool["name"] == func["name"]