From 73638725ee49f5eddc3c989fac089918ab58eb0a Mon Sep 17 00:00:00 2001
From: supmo668 <mymm.psu@gmail.com>
Date: Sun, 18 Jan 2026 20:20:38 -0800
Subject: [PATCH 1/6] feat(agentenv-mcp): Initialize package structure

Add new agentenv-mcp package for MCP (Model Context Protocol) integration:

- pyproject.toml: Package configuration with dependencies
- README.md: Documentation with architecture overview
- __init__.py: Lazy imports for optional dependencies

This package provides bidirectional wrappers between AgentGym
environments and MCP servers.
---
 agentenv-mcp/README.md                | 91 +++++++++++++++++++++++++++
 agentenv-mcp/agentenv_mcp/__init__.py | 42 +++++++++++++
 agentenv-mcp/pyproject.toml           | 42 +++++++++++++
 3 files changed, 175 insertions(+)
 create mode 100644 agentenv-mcp/README.md
 create mode 100644 agentenv-mcp/agentenv_mcp/__init__.py
 create mode 100644 agentenv-mcp/pyproject.toml

diff --git a/agentenv-mcp/README.md b/agentenv-mcp/README.md
new file mode 100644
index 00000000..03aeed15
--- /dev/null
+++ b/agentenv-mcp/README.md
@@ -0,0 +1,91 @@
+# AgentEnv-MCP
+
+**Bidirectional MCP (Model Context Protocol) wrapper for AgentGym environments.**
+
+This package provides two complementary wrappers:
+
+1. **AgentEnvToMCP**: Expose any AgentGym environment as an MCP server
+2. **MCPToAgentEnv**: Adapt any MCP server into an AgentGym-compatible environment
+
+## Installation
+
+```bash
+pip install -e ".[dev,sciworld]"
+```
+
+## Architecture
+
+```
+┌─────────────────────────────────────────────────────────────────────────────┐
+│                           AgentEnv-MCP Wrappers                             │
+├─────────────────────────────────────────────────────────────────────────────┤
+│                                                                             │
+│  ┌─────────────────────┐              ┌─────────────────────┐               │
+│  │   AgentEnvToMCP     │              │    MCPToAgentEnv    │               │
+│  │   (Export)          │              │    (Import)         │               │
+│  ├─────────────────────┤              ├─────────────────────┤               │
+│  │ BaseEnvClient ──────┼──► MCP      │ MCP Server ─────────┼──► BaseEnvClient│
+│  │                     │    Server   │                     │                │
+│  │ • reset()           │    Tools:   │ MCP Tools become:   │                │
+│  │ • step()            │    • reset  │ • FUNCTION_DESC     │                │
+│  │ • observe()         │    • step   │ • ActionFormat      │                │
+│  │                     │    • observe│ • step() mapping    │                │
+│  └─────────────────────┘              └─────────────────────┘               │
+│                                                                             │
+└─────────────────────────────────────────────────────────────────────────────┘
+```
+
+## Usage
+
+### Export: AgentEnv as MCP Server
+
+```python
+from agentenv_mcp import AgentEnvMCPServer
+from agentenv.envs.sciworld import SciworldEnvClient
+
+# Create MCP server from any BaseEnvClient
+server = AgentEnvMCPServer(
+    env_client_cls=SciworldEnvClient,
+    client_args={"env_server_base": "http://localhost:8000", "data_len": 100},
+)
+
+# Run as MCP server (stdio transport)
+server.run()
+```
+
+### Import: MCP Server as AgentEnv
+
+```python
+from agentenv_mcp import MCPEnvClient, MCPTask
+
+# Connect to any MCP server and use as AgentEnv
+client = MCPEnvClient(
+    mcp_server_command=["python", "-m", "my_mcp_server"],
+    action_format="function_calling",
+)
+
+# Use with standard AgentGym evaluation
+task = MCPTask(client_args={...})
+```
+
+## Examples
+
+See `examples/` for complete demonstrations:
+
+- `sciworld_mcp_server.py` - SciWorld exposed as MCP server
+- `mcp_client_demo.py` - Using an MCP server as AgentEnv
+
+## Testing
+
+```bash
+pytest tests/ -v
+```
+
+## Compatibility
+
+This wrapper is designed to be fully compatible with:
+
+- `BaseEnvClient` interface from `agentenv.controller`
+- `BaseTask` for experience generation
+- All `ActionFormat` types (REACT, FUNCTION_CALLING, CODE_AS_ACTION)
+- `Agent` and `APIAgent` for evaluation
diff --git a/agentenv-mcp/agentenv_mcp/__init__.py b/agentenv-mcp/agentenv_mcp/__init__.py
new file mode 100644
index 00000000..d94b6b71
--- /dev/null
+++ b/agentenv-mcp/agentenv_mcp/__init__.py
@@ -0,0 +1,42 @@
+"""
+AgentEnv-MCP: Bidirectional MCP wrapper for AgentGym environments.
+
+This package provides:
+- AgentEnvMCPServer: Expose any AgentEnv as an MCP server
+- MCPEnvClient: Adapt any MCP server into an AgentEnv client
+- MCPTask: Task wrapper for MCP-based environments
+"""
+
+__version__ = "0.1.0"
+
+# Lazy imports to avoid requiring all dependencies at import time
+def __getattr__(name):
+    if name == "AgentEnvMCPServer":
+        from agentenv_mcp.agentenv_to_mcp import AgentEnvMCPServer
+        return AgentEnvMCPServer
+    elif name == "MCPEnvClient":
+        from agentenv_mcp.mcp_to_agentenv import MCPEnvClient
+        return MCPEnvClient
+    elif name == "MCPAdapter":
+        from agentenv_mcp.mcp_to_agentenv import MCPAdapter
+        return MCPAdapter
+    elif name == "MCPTask":
+        from agentenv_mcp.mcp_to_agentenv import MCPTask
+        return MCPTask
+    elif name == "function_desc_to_mcp_tool":
+        from agentenv_mcp.schema_utils import function_desc_to_mcp_tool
+        return function_desc_to_mcp_tool
+    elif name == "mcp_tool_to_function_desc":
+        from agentenv_mcp.schema_utils import mcp_tool_to_function_desc
+        return mcp_tool_to_function_desc
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
+
+
+__all__ = [
+    "AgentEnvMCPServer",
+    "MCPEnvClient",
+    "MCPAdapter",
+    "MCPTask",
+    "function_desc_to_mcp_tool",
+    "mcp_tool_to_function_desc",
+]
diff --git a/agentenv-mcp/pyproject.toml b/agentenv-mcp/pyproject.toml
new file mode 100644
index 00000000..0d131e9f
--- /dev/null
+++ b/agentenv-mcp/pyproject.toml
@@ -0,0 +1,42 @@
+[build-system]
+requires = ["setuptools>=61.0", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "agentenv-mcp"
+version = "0.1.0"
+description = "MCP (Model Context Protocol) wrapper for AgentGym environments"
+readme = "README.md"
+license = {text = "Apache-2.0"}
+requires-python = ">=3.10"
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: Apache Software License",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+]
+dependencies = [
+    "mcp>=1.0.0",
+    "httpx>=0.25.0",
+    "pydantic>=2.0.0",
+]
+
+[project.optional-dependencies]
+dev = [
+    "pytest>=7.0.0",
+    "pytest-asyncio>=0.21.0",
+]
+sciworld = [
+    "agentenv-sciworld",
+]
+
+[tool.setuptools.packages.find]
+where = ["."]
+include = ["agentenv_mcp*"]
+
+[tool.pytest.ini_options]
+asyncio_mode = "auto"
+testpaths = ["tests"]

From 7460b37e1c35bd4301e2abf4b54d8bc3165a878f Mon Sep 17 00:00:00 2001
From: supmo668 <mymm.psu@gmail.com>
Date: Sun, 18 Jan 2026 20:20:44 -0800
Subject: [PATCH 2/6] feat(agentenv-mcp): Add schema conversion utilities
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add schema_utils.py with functions to convert between formats:

- function_desc_to_mcp_tool(): AgentEnv → MCP tool schema
- mcp_tool_to_function_desc(): MCP tool → AgentEnv function desc
- Batch conversion helpers for tool lists

These utilities enable seamless conversion between AgentGym's
function description format and MCP's tool schema format.
---
 agentenv-mcp/agentenv_mcp/schema_utils.py | 75 +++++++++++++++++++++++
 1 file changed, 75 insertions(+)
 create mode 100644 agentenv-mcp/agentenv_mcp/schema_utils.py

diff --git a/agentenv-mcp/agentenv_mcp/schema_utils.py b/agentenv-mcp/agentenv_mcp/schema_utils.py
new file mode 100644
index 00000000..25f48904
--- /dev/null
+++ b/agentenv-mcp/agentenv_mcp/schema_utils.py
@@ -0,0 +1,75 @@
+"""
+Schema conversion utilities between AgentEnv function descriptions and MCP tool schemas.
+"""
+
+from typing import Any
+
+
+def function_desc_to_mcp_tool(func_desc: dict[str, Any]) -> dict[str, Any]:
+    """
+    Convert an AgentEnv function description to MCP tool schema.
+    
+    AgentEnv format:
+    {
+        "name": "open",
+        "description": "Opens a container.",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "obj": {"type": "string", "description": "The container to open."}
+            },
+            "required": ["obj"]
+        }
+    }
+    
+    MCP format:
+    {
+        "name": "open",
+        "description": "Opens a container.",
+        "inputSchema": {
+            "type": "object",
+            "properties": {
+                "obj": {"type": "string", "description": "The container to open."}
+            },
+            "required": ["obj"]
+        }
+    }
+    """
+    return {
+        "name": func_desc["name"],
+        "description": func_desc.get("description", ""),
+        "inputSchema": func_desc.get("parameters", {"type": "object", "properties": {}}),
+    }
+
+
+def mcp_tool_to_function_desc(mcp_tool: dict[str, Any]) -> dict[str, Any]:
+    """
+    Convert an MCP tool schema to AgentEnv function description.
+    
+    This is the inverse of function_desc_to_mcp_tool.
+    """
+    input_schema = mcp_tool.get("inputSchema", {"type": "object", "properties": {}})
+    
+    return {
+        "name": mcp_tool["name"],
+        "description": mcp_tool.get("description", ""),
+        "parameters": input_schema,
+    }
+
+
+def generate_function_descriptions_from_mcp_tools(
+    mcp_tools: list[dict[str, Any]]
+) -> list[dict[str, Any]]:
+    """
+    Convert a list of MCP tools to AgentEnv function descriptions.
+    """
+    return [mcp_tool_to_function_desc(tool) for tool in mcp_tools]
+
+
+def generate_mcp_tools_from_function_descriptions(
+    func_descs: list[dict[str, Any]]
+) -> list[dict[str, Any]]:
+    """
+    Convert a list of AgentEnv function descriptions to MCP tools.
+    """
+    return [function_desc_to_mcp_tool(desc) for desc in func_descs]

From 33ad85999cf759ee00850b5bd87081b32b9a1e90 Mon Sep 17 00:00:00 2001
From: supmo668 <mymm.psu@gmail.com>
Date: Sun, 18 Jan 2026 20:20:51 -0800
Subject: [PATCH 3/6] feat(agentenv-mcp): Add AgentEnvToMCP wrapper

Add AgentEnvMCPServer class that wraps any BaseEnvClient as MCP server:

- Exposes environment actions as MCP tools (action_*)
- Provides management tools: env_reset, env_step, env_observe, env_info
- Supports both ReAct and function_calling action formats
- Lazy client creation for efficient resource usage
- Async handlers for all MCP tool calls

Usage:
  server = AgentEnvMCPServer(
      env_client_cls=SciworldEnvClient,
      client_args={...},
      function_descriptions=SCIWORLD_FUNCTION_DESCRIPTION,
  )
  server.run()
---
 agentenv-mcp/agentenv_mcp/agentenv_to_mcp.py | 310 +++++++++++++++++++
 1 file changed, 310 insertions(+)
 create mode 100644 agentenv-mcp/agentenv_mcp/agentenv_to_mcp.py

diff --git a/agentenv-mcp/agentenv_mcp/agentenv_to_mcp.py b/agentenv-mcp/agentenv_mcp/agentenv_to_mcp.py
new file mode 100644
index 00000000..367b40fa
--- /dev/null
+++ b/agentenv-mcp/agentenv_mcp/agentenv_to_mcp.py
@@ -0,0 +1,310 @@
+"""
+AgentEnvToMCP: Wrapper to expose any AgentGym BaseEnvClient as an MCP server.
+
+This allows external MCP-compatible agents to interact with AgentGym environments
+using the standard MCP protocol.
+"""
+
+import asyncio
+import json
+from typing import Any, Callable, Type
+
+from mcp.server import Server
+from mcp.server.stdio import stdio_server
+from mcp.types import (
+    CallToolResult,
+    TextContent,
+    Tool,
+)
+
+from .schema_utils import function_desc_to_mcp_tool
+
+
+class AgentEnvMCPServer:
+    """
+    Wraps an AgentGym BaseEnvClient as an MCP server.
+    
+    This server exposes the environment's actions as MCP tools, plus
+    management tools for reset/observe operations.
+    
+    MCP Tools provided:
+    - env_reset(task_idx: int) -> Reset environment to a specific task
+    - env_step(action: str) -> Execute an action (raw format)
+    - env_observe() -> Get current observation
+    - Plus all environment-specific action tools from FUNCTION_DESCRIPTION
+    
+    Example:
+        >>> from agentenv.envs.sciworld import SciworldEnvClient, SCIWORLD_FUNCTION_DESCRIPTION
+        >>> server = AgentEnvMCPServer(
+        ...     env_client_cls=SciworldEnvClient,
+        ...     client_args={"env_server_base": "http://localhost:8000", "data_len": 100},
+        ...     function_descriptions=SCIWORLD_FUNCTION_DESCRIPTION,
+        ... )
+        >>> server.run()
+    """
+    
+    def __init__(
+        self,
+        env_client_cls: Type,
+        client_args: dict[str, Any],
+        function_descriptions: list[dict[str, Any]] | None = None,
+        env_name: str = "agentenv",
+        action_format: str = "function_calling",
+    ):
+        """
+        Initialize the MCP server wrapper.
+        
+        Args:
+            env_client_cls: The BaseEnvClient class to wrap
+            client_args: Arguments to pass to the client constructor
+            function_descriptions: Optional list of function descriptions for action tools.
+                                   If the client has an adapter_cls with these, they'll be used.
+            env_name: Name for the environment (used in server identification)
+            action_format: Action format to use ("react", "function_calling", "code_as_action")
+        """
+        self.env_client_cls = env_client_cls
+        self.client_args = client_args
+        self.env_name = env_name
+        self.action_format = action_format
+        
+        # Try to get function descriptions from the adapter class if not provided
+        self.function_descriptions = function_descriptions or []
+        
+        # Lazily created client
+        self._client = None
+        self._server = Server(f"agentenv-{env_name}")
+        
+        self._setup_handlers()
+    
+    @property
+    def client(self):
+        """Lazily create the environment client."""
+        if self._client is None:
+            self._client = self.env_client_cls(
+                **self.client_args,
+                action_format=self.action_format,
+            )
+        return self._client
+    
+    def _setup_handlers(self):
+        """Set up MCP tool handlers."""
+        
+        @self._server.list_tools()
+        async def list_tools() -> list[Tool]:
+            """List all available tools."""
+            tools = [
+                # Core environment management tools
+                Tool(
+                    name="env_reset",
+                    description="Reset the environment to a specific task index.",
+                    inputSchema={
+                        "type": "object",
+                        "properties": {
+                            "task_idx": {
+                                "type": "integer",
+                                "description": "Index of the task to reset to (0 to env_size-1)",
+                            }
+                        },
+                        "required": ["task_idx"],
+                    },
+                ),
+                Tool(
+                    name="env_step",
+                    description="Execute a raw action string in the environment.",
+                    inputSchema={
+                        "type": "object",
+                        "properties": {
+                            "action": {
+                                "type": "string",
+                                "description": "The action to execute",
+                            }
+                        },
+                        "required": ["action"],
+                    },
+                ),
+                Tool(
+                    name="env_observe",
+                    description="Get the current observation from the environment.",
+                    inputSchema={
+                        "type": "object",
+                        "properties": {},
+                    },
+                ),
+                Tool(
+                    name="env_info",
+                    description="Get environment information (size, current state).",
+                    inputSchema={
+                        "type": "object",
+                        "properties": {},
+                    },
+                ),
+            ]
+            
+            # Add environment-specific action tools
+            for func_desc in self.function_descriptions:
+                mcp_tool = function_desc_to_mcp_tool(func_desc)
+                tools.append(Tool(
+                    name=f"action_{mcp_tool['name']}",
+                    description=mcp_tool["description"],
+                    inputSchema=mcp_tool["inputSchema"],
+                ))
+            
+            return tools
+        
+        @self._server.call_tool()
+        async def call_tool(name: str, arguments: dict[str, Any]) -> list[CallToolResult]:
+            """Handle tool calls."""
+            try:
+                if name == "env_reset":
+                    return await self._handle_reset(arguments)
+                elif name == "env_step":
+                    return await self._handle_step(arguments)
+                elif name == "env_observe":
+                    return await self._handle_observe()
+                elif name == "env_info":
+                    return await self._handle_info()
+                elif name.startswith("action_"):
+                    return await self._handle_action(name[7:], arguments)
+                else:
+                    return [CallToolResult(
+                        content=[TextContent(type="text", text=f"Unknown tool: {name}")],
+                        isError=True,
+                    )]
+            except Exception as e:
+                return [CallToolResult(
+                    content=[TextContent(type="text", text=f"Error: {str(e)}")],
+                    isError=True,
+                )]
+    
+    async def _handle_reset(self, arguments: dict[str, Any]) -> list[CallToolResult]:
+        """Handle env_reset tool call."""
+        task_idx = arguments.get("task_idx", 0)
+        result = self.client.reset(task_idx)
+        observation = self.client.observe()
+        
+        return [CallToolResult(
+            content=[TextContent(
+                type="text",
+                text=json.dumps({
+                    "status": "reset",
+                    "task_idx": task_idx,
+                    "observation": observation,
+                    "reset_info": result if isinstance(result, dict) else {},
+                }, indent=2)
+            )],
+            isError=False,
+        )]
+    
+    async def _handle_step(self, arguments: dict[str, Any]) -> list[CallToolResult]:
+        """Handle env_step tool call."""
+        action = arguments.get("action", "")
+        step_output = self.client.step(action)
+        
+        return [CallToolResult(
+            content=[TextContent(
+                type="text",
+                text=json.dumps({
+                    "observation": step_output.state,
+                    "reward": step_output.reward,
+                    "done": step_output.done,
+                }, indent=2)
+            )],
+            isError=False,
+        )]
+    
+    async def _handle_observe(self) -> list[CallToolResult]:
+        """Handle env_observe tool call."""
+        observation = self.client.observe()
+        
+        return [CallToolResult(
+            content=[TextContent(type="text", text=observation)],
+            isError=False,
+        )]
+    
+    async def _handle_info(self) -> list[CallToolResult]:
+        """Handle env_info tool call."""
+        return [CallToolResult(
+            content=[TextContent(
+                type="text",
+                text=json.dumps({
+                    "env_name": self.env_name,
+                    "env_size": len(self.client),
+                    "action_format": self.action_format,
+                    "available_actions": len(self.function_descriptions),
+                }, indent=2)
+            )],
+            isError=False,
+        )]
+    
+    async def _handle_action(
+        self, action_name: str, arguments: dict[str, Any]
+    ) -> list[CallToolResult]:
+        """Handle environment-specific action tool calls."""
+        # Find the function description for this action
+        func_desc = None
+        for fd in self.function_descriptions:
+            if fd["name"] == action_name:
+                func_desc = fd
+                break
+        
+        if func_desc is None:
+            return [CallToolResult(
+                content=[TextContent(type="text", text=f"Unknown action: {action_name}")],
+                isError=True,
+            )]
+        
+        # Format the action based on action_format
+        if self.action_format == "function_calling":
+            # Format as JSON function call
+            action_str = json.dumps({
+                "thought": arguments.get("thought", "Executing action"),
+                "function_name": action_name,
+                "arguments": {k: v for k, v in arguments.items() if k != "thought"},
+            })
+        else:
+            # For react format, try to construct the action string
+            # This is environment-specific and may need customization
+            action_str = self._format_react_action(action_name, arguments)
+        
+        step_output = self.client.step(action_str)
+        
+        return [CallToolResult(
+            content=[TextContent(
+                type="text",
+                text=json.dumps({
+                    "action": action_name,
+                    "observation": step_output.state,
+                    "reward": step_output.reward,
+                    "done": step_output.done,
+                }, indent=2)
+            )],
+            isError=False,
+        )]
+    
+    def _format_react_action(self, action_name: str, arguments: dict[str, Any]) -> str:
+        """Format an action in ReAct style. Override for environment-specific formatting."""
+        args_str = " ".join(str(v) for v in arguments.values() if v)
+        thought = arguments.get("thought", "")
+        action = f"{action_name} {args_str}".strip() if args_str else action_name
+        return f"Thought:\n{thought}\n\nAction:\n{action}"
+    
+    def run(self):
+        """Run the MCP server using stdio transport."""
+        async def main():
+            async with stdio_server() as (read_stream, write_stream):
+                await self._server.run(
+                    read_stream,
+                    write_stream,
+                    self._server.create_initialization_options(),
+                )
+        
+        asyncio.run(main())
+    
+    async def run_async(self):
+        """Run the MCP server asynchronously."""
+        async with stdio_server() as (read_stream, write_stream):
+            await self._server.run(
+                read_stream,
+                write_stream,
+                self._server.create_initialization_options(),
+            )

From bf369cb1e5e3b3d8e353d78c22fa3bedb27af262 Mon Sep 17 00:00:00 2001
From: supmo668 <mymm.psu@gmail.com>
Date: Sun, 18 Jan 2026 20:20:59 -0800
Subject: [PATCH 4/6] feat(agentenv-mcp): Add MCPToAgentEnv wrapper

Add classes to wrap MCP servers as AgentGym-compatible clients:

MCPAdapter:
- Parses actions in REACT, FUNCTION_CALLING, CODE_AS_ACTION formats
- Generates conversation_start prompts from function descriptions
- Converts parsed actions to MCP tool calls

MCPEnvClient (implements BaseEnvClient interface):
- Connects to MCP servers via stdio transport
- Auto-discovers tools and generates function descriptions
- Maps step() calls to MCP tool invocations
- Full compatibility with AgentGym evaluation pipeline

MCPTask:
- Task wrapper for experience generation
- Compatible with Agent and APIAgent classes
---
 agentenv-mcp/agentenv_mcp/mcp_to_agentenv.py | 661 +++++++++++++++++++
 1 file changed, 661 insertions(+)
 create mode 100644 agentenv-mcp/agentenv_mcp/mcp_to_agentenv.py

diff --git a/agentenv-mcp/agentenv_mcp/mcp_to_agentenv.py b/agentenv-mcp/agentenv_mcp/mcp_to_agentenv.py
new file mode 100644
index 00000000..6eb7c410
--- /dev/null
+++ b/agentenv-mcp/agentenv_mcp/mcp_to_agentenv.py
@@ -0,0 +1,661 @@
+"""
+MCPToAgentEnv: Wrapper to adapt any MCP server into an AgentGym-compatible BaseEnvClient.
+
+This allows AgentGym agents to train and evaluate against external MCP services
+using the standard AgentEnv interface.
+"""
+
+import asyncio
+import json
+import re
+from abc import ABCMeta
+from typing import Any, Mapping, Optional, Sequence, TYPE_CHECKING
+
+from .schema_utils import mcp_tool_to_function_desc, generate_function_descriptions_from_mcp_tools
+
+# Lazy MCP imports to allow module import without MCP installed
+if TYPE_CHECKING:
+    from mcp import ClientSession
+    from mcp.client.stdio import stdio_client, StdioServerParameters
+    from mcp.types import CallToolResult, TextContent
+
+
+def _get_mcp_types():
+    """Lazily import MCP types."""
+    from mcp import ClientSession
+    from mcp.client.stdio import stdio_client, StdioServerParameters
+    from mcp.types import CallToolResult, TextContent
+    return {
+        "ClientSession": ClientSession,
+        "stdio_client": stdio_client,
+        "StdioServerParameters": StdioServerParameters,
+        "CallToolResult": CallToolResult,
+        "TextContent": TextContent,
+    }
+
+
+# Import AgentEnv types - these are imported at runtime to avoid hard dependency
+def _get_agentenv_types():
+    """Lazily import AgentEnv types."""
+    from agentenv.controller import BaseEnvClient, BaseTask
+    from agentenv.controller.types import (
+        ActionFormat,
+        ActionWithTought,
+        ConversationMessage,
+        StepOutput,
+    )
+    from agentenv.controller.utils import (
+        BaseAdapter,
+        format_function_call_prompt,
+        format_code_as_action_prompt,
+    )
+    return {
+        "BaseEnvClient": BaseEnvClient,
+        "BaseTask": BaseTask,
+        "ActionFormat": ActionFormat,
+        "ActionWithTought": ActionWithTought,
+        "ConversationMessage": ConversationMessage,
+        "StepOutput": StepOutput,
+        "BaseAdapter": BaseAdapter,
+        "format_function_call_prompt": format_function_call_prompt,
+        "format_code_as_action_prompt": format_code_as_action_prompt,
+    }
+
+
+class MCPAdapter:
+    """
+    Adapter for parsing actions in various formats for MCP-based environments.
+    
+    This adapter handles conversion between AgentEnv action formats and MCP tool calls.
+    """
+    
+    INVOKING_FUNCTION_PROMPT = """
+
+If you want to invoke a provided function or tool, please reply in the following *JSON* format:
+```json
+{
+    "thought": "I think ...",
+    "function_name": "function_name",
+    "arguments": <valid json object of args>
+}
+```
+Only reply the *JSON* object, no other text should be present.
+"""
+
+    def __init__(self, function_descriptions: list[dict[str, Any]]):
+        """
+        Initialize the adapter with function descriptions.
+        
+        Args:
+            function_descriptions: List of function descriptions in AgentEnv format
+        """
+        self.function_descriptions = function_descriptions
+        self._build_conversation_starts()
+    
+    def _build_conversation_starts(self):
+        """Build conversation start prompts for each action format."""
+        types = _get_agentenv_types()
+        ConversationMessage = types["ConversationMessage"]
+        ActionFormat = types["ActionFormat"]
+        format_function_call_prompt = types["format_function_call_prompt"]
+        format_code_as_action_prompt = types["format_code_as_action_prompt"]
+        
+        base_instruction = (
+            "You are an agent interacting with an environment through tools.\n"
+            "Each turn you will receive an observation and must respond with an action.\n"
+        )
+        
+        self.conversation_start_dict = {
+            ActionFormat.REACT: (
+                ConversationMessage({
+                    "from": "human",
+                    "loss": None,
+                    "value": (
+                        f"{base_instruction}"
+                        "Your response should use the following format:\n\n"
+                        "Thought:\nI think ... \n\nAction:\naction_name arg1 arg2"
+                    ),
+                }),
+                ConversationMessage({"from": "gpt", "loss": False, "value": "Ok."}),
+            ),
+            ActionFormat.FUNCTION_CALLING: (
+                ConversationMessage({
+                    "from": "human",
+                    "loss": None,
+                    "value": (
+                        f"{base_instruction}"
+                        f"{format_function_call_prompt(self.function_descriptions)}"
+                    ),
+                }),
+                ConversationMessage({"from": "gpt", "loss": False, "value": "Ok."}),
+            ),
+            ActionFormat.CODE_AS_ACTION: (
+                ConversationMessage({
+                    "from": "human",
+                    "loss": None,
+                    "value": (
+                        f"{base_instruction}"
+                        f"{format_code_as_action_prompt(self.function_descriptions)}"
+                    ),
+                }),
+                ConversationMessage({"from": "gpt", "loss": False, "value": "Ok."}),
+            ),
+        }
+    
+    @staticmethod
+    def parse_react(text: str):
+        """Parse ReAct format action."""
+        types = _get_agentenv_types()
+        ActionWithTought = types["ActionWithTought"]
+        
+        _split = text.rsplit("Action:", 1)
+        if len(_split) == 2:
+            _thought, _action = _split
+            thought = _thought.split("Thought:")[-1].strip()
+            action = _action.strip()
+        else:
+            thought = ""
+            action = text.strip()
+        
+        return ActionWithTought(thought, action)
+    
+    @staticmethod
+    def parse_function_calling(text: str):
+        """Parse function calling format action."""
+        types = _get_agentenv_types()
+        ActionWithTought = types["ActionWithTought"]
+        
+        # Try to extract JSON from the text
+        try:
+            _fn_call = json.loads(
+                "{" + text.split("{", 1)[-1].rsplit("}", 1)[0] + "}", strict=False
+            )
+            thought = _fn_call.get("thought", "")
+            fn_name = _fn_call.get("function_name", "")
+            args = _fn_call.get("arguments", {})
+            
+            # Return as structured action that can be converted to MCP tool call
+            action = json.dumps({"function_name": fn_name, "arguments": args})
+            return ActionWithTought(thought, action)
+        except json.JSONDecodeError:
+            return ActionWithTought("", text)
+    
+    def action_parser(self, action: str, action_format) -> dict[str, Any]:
+        """
+        Parse action text and return MCP tool call parameters.
+        
+        Returns:
+            dict with "tool_name" and "arguments" keys
+        """
+        types = _get_agentenv_types()
+        ActionFormat = types["ActionFormat"]
+        
+        if action_format == ActionFormat.REACT:
+            parsed = self.parse_react(action)
+            # For React format, try to parse the action as "tool_name arg1 arg2"
+            return self._react_to_tool_call(parsed.action)
+        elif action_format == ActionFormat.FUNCTION_CALLING:
+            parsed = self.parse_function_calling(action)
+            try:
+                call_data = json.loads(parsed.action)
+                return {
+                    "tool_name": call_data.get("function_name", ""),
+                    "arguments": call_data.get("arguments", {}),
+                }
+            except json.JSONDecodeError:
+                return {"tool_name": "", "arguments": {}}
+        else:
+            # CODE_AS_ACTION - extract and parse
+            return {"tool_name": "", "arguments": {"code": action}}
+    
+    def _react_to_tool_call(self, action: str) -> dict[str, Any]:
+        """Convert a React-style action string to tool call parameters."""
+        # Try to match against known function names
+        for func_desc in self.function_descriptions:
+            fn_name = func_desc["name"]
+            if action.lower().startswith(fn_name.lower()):
+                args_str = action[len(fn_name):].strip()
+                # Parse simple space-separated arguments
+                params = func_desc.get("parameters", {}).get("properties", {})
+                param_names = list(params.keys())
+                arg_values = args_str.split() if args_str else []
+                
+                arguments = {}
+                for i, param_name in enumerate(param_names):
+                    if i < len(arg_values):
+                        arguments[param_name] = arg_values[i]
+                
+                return {"tool_name": fn_name, "arguments": arguments}
+        
+        # If no match, return the raw action
+        return {"tool_name": action.split()[0] if action else "", "arguments": {}}
+
+
+class MCPEnvClient:
+    """
+    AgentEnv-compatible client that wraps an MCP server.
+    
+    This allows any MCP server to be used as an AgentGym environment.
+    The MCP server must provide:
+    - env_reset tool (or similar reset mechanism)
+    - env_step tool (or action tools)
+    - env_observe tool (or observation mechanism)
+    
+    Example:
+        >>> client = MCPEnvClient(
+        ...     server_command=["python", "-m", "my_mcp_server"],
+        ...     action_format="function_calling",
+        ... )
+        >>> client.reset(0)
+        >>> obs = client.observe()
+        >>> result = client.step("some action")
+    """
+    
+    def __init__(
+        self,
+        server_command: list[str],
+        server_args: list[str] | None = None,
+        server_env: dict[str, str] | None = None,
+        action_format: str = "function_calling",
+        data_len: int = 1,
+        timeout: float = 30.0,
+        reset_tool: str = "env_reset",
+        step_tool: str = "env_step",
+        observe_tool: str = "env_observe",
+    ):
+        """
+        Initialize the MCP environment client.
+        
+        Args:
+            server_command: Command to start the MCP server
+            server_args: Additional arguments for the server
+            server_env: Environment variables for the server
+            action_format: Action format to use
+            data_len: Number of tasks available in the environment
+            timeout: Timeout for MCP operations
+            reset_tool: Name of the reset tool on the MCP server
+            step_tool: Name of the step tool on the MCP server  
+            observe_tool: Name of the observe tool on the MCP server
+        """
+        mcp_types = _get_mcp_types()
+        StdioServerParameters = mcp_types["StdioServerParameters"]
+        
+        types = _get_agentenv_types()
+        self.ActionFormat = types["ActionFormat"]
+        self.StepOutput = types["StepOutput"]
+        
+        self.server_params = StdioServerParameters(
+            command=server_command[0],
+            args=server_command[1:] + (server_args or []),
+            env=server_env,
+        )
+        self.action_format = self.ActionFormat(action_format)
+        self.data_len = data_len
+        self.timeout = timeout
+        
+        self.reset_tool = reset_tool
+        self.step_tool = step_tool
+        self.observe_tool = observe_tool
+        
+        # State
+        self._session = None
+        self._tools: list[dict[str, Any]] = []
+        self._function_descriptions: list[dict[str, Any]] = []
+        self._adapter: Optional[MCPAdapter] = None
+        self._current_observation: str = ""
+        self._loop: Optional[asyncio.AbstractEventLoop] = None
+        
+        # Initialize connection and discover tools
+        self._initialize()
+    
+    def _initialize(self):
+        """Initialize the MCP connection and discover tools."""
+        self._loop = asyncio.new_event_loop()
+        self._loop.run_until_complete(self._async_initialize())
+    
+    async def _async_initialize(self):
+        """Async initialization."""
+        mcp_types = _get_mcp_types()
+        stdio_client = mcp_types["stdio_client"]
+        ClientSession = mcp_types["ClientSession"]
+        
+        async with stdio_client(self.server_params) as (read, write):
+            async with ClientSession(read, write) as session:
+                await session.initialize()
+                
+                # Discover tools
+                tools_result = await session.list_tools()
+                self._tools = [
+                    {
+                        "name": tool.name,
+                        "description": tool.description or "",
+                        "inputSchema": tool.inputSchema or {},
+                    }
+                    for tool in tools_result.tools
+                ]
+                
+                # Convert to function descriptions (excluding management tools)
+                management_tools = {self.reset_tool, self.step_tool, self.observe_tool, "env_info"}
+                action_tools = [t for t in self._tools if t["name"] not in management_tools]
+                
+                # Strip "action_" prefix if present
+                for tool in action_tools:
+                    if tool["name"].startswith("action_"):
+                        tool["name"] = tool["name"][7:]
+                
+                self._function_descriptions = generate_function_descriptions_from_mcp_tools(
+                    action_tools
+                )
+                
+                # Create adapter
+                self._adapter = MCPAdapter(self._function_descriptions)
+    
+    @property
+    def conversation_start(self):
+        """Get conversation start messages for the current action format."""
+        if self._adapter is None:
+            raise RuntimeError("Client not initialized")
+        return self._adapter.conversation_start_dict[self.action_format]
+    
+    @property
+    def adapter_cls(self):
+        """Return the adapter for compatibility with existing code."""
+        return MCPAdapter
+    
+    def __len__(self) -> int:
+        """Return the number of tasks available."""
+        return self.data_len
+    
+    def _run_async(self, coro):
+        """Run an async coroutine synchronously."""
+        if self._loop is None:
+            self._loop = asyncio.new_event_loop()
+        return self._loop.run_until_complete(coro)
+    
+    async def _call_tool(self, tool_name: str, arguments: dict[str, Any]) -> str:
+        """Call an MCP tool and return the result text."""
+        mcp_types = _get_mcp_types()
+        stdio_client = mcp_types["stdio_client"]
+        ClientSession = mcp_types["ClientSession"]
+        
+        async with stdio_client(self.server_params) as (read, write):
+            async with ClientSession(read, write) as session:
+                await session.initialize()
+                
+                result = await session.call_tool(tool_name, arguments)
+                
+                # Extract text from result
+                if result.content:
+                    texts = []
+                    for content in result.content:
+                        if hasattr(content, "text"):
+                            texts.append(content.text)
+                    return "\n".join(texts)
+                return ""
+    
+    def observe(self) -> str:
+        """Get the current observation."""
+        return self._current_observation
+    
+    def step(self, action: str) -> "StepOutput":
+        """
+        Execute an action in the environment.
+        
+        Args:
+            action: The action string in the configured action format
+            
+        Returns:
+            StepOutput with state, reward, and done flag
+        """
+        # Clean up action string
+        if action.endswith("</s>"):
+            action = action[:-4]
+        
+        try:
+            # Parse the action based on format
+            tool_call = self._adapter.action_parser(action, self.action_format)
+            tool_name = tool_call["tool_name"]
+            arguments = tool_call["arguments"]
+            
+            # Check if this is a known action tool
+            action_tool_name = f"action_{tool_name}"
+            available_tool_names = [t["name"] for t in self._tools]
+            
+            if action_tool_name in available_tool_names:
+                # Call the specific action tool
+                result_text = self._run_async(
+                    self._call_tool(action_tool_name, arguments)
+                )
+            elif self.step_tool in available_tool_names:
+                # Fall back to generic step tool
+                result_text = self._run_async(
+                    self._call_tool(self.step_tool, {"action": action})
+                )
+            else:
+                return self.StepOutput(
+                    state=f"Error: No suitable tool found for action: {action}",
+                    reward=0.0,
+                    done=False,
+                )
+            
+            # Parse the result
+            try:
+                result_data = json.loads(result_text)
+                self._current_observation = result_data.get("observation", result_text)
+                reward = float(result_data.get("reward", 0.0))
+                done = bool(result_data.get("done", False))
+            except json.JSONDecodeError:
+                self._current_observation = result_text
+                reward = 0.0
+                done = False
+            
+            return self.StepOutput(
+                state=self._current_observation,
+                reward=reward,
+                done=done,
+            )
+            
+        except Exception as e:
+            return self.StepOutput(
+                state=f"Error executing action: {str(e)}\n\n{self._current_observation}",
+                reward=0.0,
+                done=False,
+            )
+    
+    def reset(self, idx: int = 0) -> dict[str, Any]:
+        """
+        Reset the environment to a specific task.
+        
+        Args:
+            idx: Task index to reset to
+            
+        Returns:
+            Reset information dictionary
+        """
+        result_text = self._run_async(
+            self._call_tool(self.reset_tool, {"task_idx": idx})
+        )
+        
+        try:
+            result_data = json.loads(result_text)
+            self._current_observation = result_data.get("observation", result_text)
+            return result_data
+        except json.JSONDecodeError:
+            self._current_observation = result_text
+            return {"observation": result_text}
+    
+    def close(self):
+        """Close the MCP connection."""
+        if self._loop is not None:
+            self._loop.close()
+            self._loop = None
+
+
+class MCPTask:
+    """
+    Task wrapper for MCP-based environments.
+    
+    This provides compatibility with AgentGym's BaseTask interface for
+    experience generation and evaluation.
+    """
+    
+    env_client_cls = MCPEnvClient
+    env_name = "mcp"
+    
+    def __init__(
+        self,
+        client_args: Mapping[str, Any],
+        n_clients: int = 1,
+    ):
+        """
+        Initialize the MCP task.
+        
+        Args:
+            client_args: Arguments to pass to MCPEnvClient
+            n_clients: Number of parallel clients (for batch generation)
+        """
+        self.clients = [self.env_client_cls(**client_args) for _ in range(n_clients)]
+        self.len = len(self.clients[0])
+    
+    def generate_experience(
+        self,
+        agent,
+        idxs: Sequence[int],
+        generation_config=None,
+        max_rounds: Optional[int] = None,
+    ):
+        """
+        Generate experience by running the agent through the environment.
+        
+        This method follows the same pattern as BaseTask._generate_experience.
+        """
+        # Import here to avoid circular dependency
+        types = _get_agentenv_types()
+        BaseTask = types["BaseTask"]
+        
+        # Use the standard experience generation from BaseTask
+        # This works because MCPEnvClient implements the BaseEnvClient interface
+        experiences = []
+        for idx in idxs:
+            exp = self._generate_experience_one(
+                agent,
+                self.clients[0],
+                idx,
+                generation_config,
+                max_rounds,
+            )
+            experiences.append(exp)
+        return experiences
+    
+    def _generate_experience_one(
+        self,
+        agent,
+        client: MCPEnvClient,
+        idx: int,
+        generation_config=None,
+        max_rounds: Optional[int] = None,
+    ):
+        """Generate experience for a single task."""
+        # This follows the same pattern as BaseTask._generate_experience_one
+        # Importing the actual implementation to reuse it
+        types = _get_agentenv_types()
+        ConversationMessage = types["ConversationMessage"]
+        StepOutput = types["StepOutput"]
+        
+        from agentenv.controller.types import ExperienceOutput, APIExperienceOutput
+        from agentenv.controller.agent import Agent, APIAgent
+        
+        client.reset(idx)
+        reward = 0.0
+        done = False
+        state = client.observe()
+        
+        if isinstance(agent, Agent):
+            tokenizer = agent.tokenizer
+            conversation = list(client.conversation_start)
+            conversation.append(
+                ConversationMessage({"from": "human", "loss": None, "value": state})
+            )
+            conversation_tokenized = agent.chat_template.tokenize_conversation(
+                conversation, tokenizer, add_generation_prompt=True
+            )
+        elif isinstance(agent, APIAgent):
+            from agentenv.controller.types import APIConversationMessage
+            conversation = [
+                APIConversationMessage({"role": "user", "content": client.conversation_start[0]["value"], "reasoning_content": None}),
+                APIConversationMessage({"role": "assistant", "content": client.conversation_start[1]["value"], "reasoning_content": None}),
+                APIConversationMessage({"role": "user", "content": state, "reasoning_content": None})
+            ]
+        else:
+            raise NotImplementedError
+        
+        rounds = 0
+        
+        while not done:
+            if isinstance(agent, Agent):
+                input_length = len(conversation_tokenized["input_ids"])
+                if input_length >= (generation_config.max_length if generation_config else 4096):
+                    break
+                try:
+                    generated_tokens = agent.generate(
+                        [conversation_tokenized["input_ids"]], generation_config
+                    )[0]
+                except Exception as e:
+                    print(e)
+                    break
+                
+                if generated_tokens[-1] != tokenizer.eos_token_id:
+                    generated_tokens += [tokenizer.eos_token_id]
+                
+                generated_text = tokenizer.decode(generated_tokens)
+                conversation_tokenized["text"] += f" {generated_text}"
+                conversation_tokenized["input_ids"] += generated_tokens
+                conversation_tokenized["action_mask"] += [1] * len(generated_tokens)
+                
+                generated_text = generated_text[:-len(tokenizer.eos_token)]
+                conversation.append(
+                    ConversationMessage({"from": "gpt", "loss": True, "value": generated_text})
+                )
+            elif isinstance(agent, APIAgent):
+                generated_text, generated_reasoning_text = agent.generate(conversation)
+                from agentenv.controller.types import APIConversationMessage
+                conversation.append(
+                    APIConversationMessage({"role": "assistant", "content": generated_text, "reasoning_content": generated_reasoning_text})
+                )
+            
+            step_output = client.step(generated_text)
+            state, reward, done = step_output.state, step_output.reward, step_output.done
+            
+            if isinstance(agent, Agent):
+                env_message = ConversationMessage({"from": "human", "loss": None, "value": state})
+                env_message_tokenized = agent.chat_template.tokenize_conversation_one(
+                    env_message, tokenizer, add_generation_prompt=True
+                )
+                conversation.append(env_message)
+                conversation_tokenized["text"] += env_message_tokenized["text"]
+                conversation_tokenized["input_ids"] += env_message_tokenized["input_ids"]
+                conversation_tokenized["action_mask"] += env_message_tokenized["action_mask"]
+            elif isinstance(agent, APIAgent):
+                from agentenv.controller.types import APIConversationMessage
+                conversation.append(
+                    APIConversationMessage({"role": "user", "content": state, "reasoning_content": None})
+                )
+            
+            rounds += 1
+            if max_rounds is not None and rounds >= max_rounds:
+                break
+        
+        if isinstance(agent, Agent):
+            return ExperienceOutput(
+                conversation=conversation,
+                reward=reward,
+                text=conversation_tokenized["text"],
+                seq_ids=conversation_tokenized["input_ids"],
+                attention_mask=[1] * len(conversation_tokenized["input_ids"]),
+                action_mask=conversation_tokenized["action_mask"],
+            )
+        elif isinstance(agent, APIAgent):
+            return APIExperienceOutput(
+                conversation=conversation,
+                reward=reward,
+            )

From 7f5eebf87e73aac6091e8d85773c61145a167f42 Mon Sep 17 00:00:00 2001
From: supmo668 <mymm.psu@gmail.com>
Date: Sun, 18 Jan 2026 20:21:07 -0800
Subject: [PATCH 5/6] feat(agentenv-mcp): Add SciWorld MCP examples

Add proof-of-concept examples using SciWorld environment:

sciworld_mcp_server.py:
- Wraps SciWorld as an MCP server
- Includes full SCIWORLD_FUNCTION_DESCRIPTION
- Demonstrates AgentEnvMCPServer usage

mcp_client_demo.py:
- Shows how to use MCP server as AgentEnv client
- Demonstrates reset, observe, step operations
- Example of function_calling format actions

Usage:
  # Start SciWorld env server first
  uvicorn agentenv_sciworld.server:app --port 8000

  # Run MCP server
  python -m agentenv_mcp.examples.sciworld_mcp_server
---
 .../agentenv_mcp/examples/__init__.py         |   1 +
 .../agentenv_mcp/examples/mcp_client_demo.py  |  94 ++++++
 .../examples/sciworld_mcp_server.py           | 292 ++++++++++++++++++
 3 files changed, 387 insertions(+)
 create mode 100644 agentenv-mcp/agentenv_mcp/examples/__init__.py
 create mode 100644 agentenv-mcp/agentenv_mcp/examples/mcp_client_demo.py
 create mode 100644 agentenv-mcp/agentenv_mcp/examples/sciworld_mcp_server.py

diff --git a/agentenv-mcp/agentenv_mcp/examples/__init__.py b/agentenv-mcp/agentenv_mcp/examples/__init__.py
new file mode 100644
index 00000000..3df8b26f
--- /dev/null
+++ b/agentenv-mcp/agentenv_mcp/examples/__init__.py
@@ -0,0 +1 @@
+"""Examples for agentenv-mcp package."""
diff --git a/agentenv-mcp/agentenv_mcp/examples/mcp_client_demo.py b/agentenv-mcp/agentenv_mcp/examples/mcp_client_demo.py
new file mode 100644
index 00000000..a37ba463
--- /dev/null
+++ b/agentenv-mcp/agentenv_mcp/examples/mcp_client_demo.py
@@ -0,0 +1,94 @@
+"""
+Demo: Using an MCP Server as an AgentEnv client.
+
+This demonstrates how to connect to any MCP server and use it as an
+AgentGym-compatible environment for agent evaluation.
+
+Usage:
+    # First, start the SciWorld MCP server
+    python -m agentenv_mcp.examples.sciworld_mcp_server
+    
+    # Then run this demo (in a different terminal)
+    python -m agentenv_mcp.examples.mcp_client_demo
+"""
+
+import sys
+import os
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from agentenv_mcp import MCPEnvClient
+
+
+def demo_mcp_client():
+    """
+    Demonstrate using an MCP server as an AgentEnv client.
+    """
+    print("Creating MCP Environment Client...")
+    print("=" * 60)
+    
+    # Create client that connects to the SciWorld MCP server
+    client = MCPEnvClient(
+        server_command=[
+            sys.executable, "-m", 
+            "agentenv_mcp.examples.sciworld_mcp_server",
+            "--env-server", "http://localhost:8000",
+        ],
+        action_format="function_calling",
+        data_len=100,
+    )
+    
+    print(f"Environment size: {len(client)}")
+    print(f"Action format: {client.action_format}")
+    print()
+    
+    # Show conversation start
+    print("Conversation Start:")
+    print("-" * 40)
+    for msg in client.conversation_start:
+        role = msg["from"]
+        value = msg["value"][:200] + "..." if len(msg["value"]) > 200 else msg["value"]
+        print(f"[{role}]: {value}")
+    print()
+    
+    # Reset to first task
+    print("Resetting to task 0...")
+    print("-" * 40)
+    reset_result = client.reset(0)
+    print(f"Reset result: {reset_result}")
+    print()
+    
+    # Get observation
+    print("Current observation:")
+    print("-" * 40)
+    obs = client.observe()
+    print(obs[:500] + "..." if len(obs) > 500 else obs)
+    print()
+    
+    # Try an action (function calling format)
+    print("Executing action: lookaround")
+    print("-" * 40)
+    action = '{"thought": "Let me look around", "function_name": "lookaround", "arguments": {}}'
+    result = client.step(action)
+    print(f"State: {result.state[:300]}...")
+    print(f"Reward: {result.reward}")
+    print(f"Done: {result.done}")
+    print()
+    
+    # Try another action
+    print("Executing action: inventory")
+    print("-" * 40)
+    action = '{"thought": "Check my inventory", "function_name": "inventory", "arguments": {}}'
+    result = client.step(action)
+    print(f"State: {result.state[:300]}...")
+    print(f"Reward: {result.reward}")
+    print(f"Done: {result.done}")
+    
+    # Clean up
+    client.close()
+    print()
+    print("Demo complete!")
+
+
+if __name__ == "__main__":
+    demo_mcp_client()
diff --git a/agentenv-mcp/agentenv_mcp/examples/sciworld_mcp_server.py b/agentenv-mcp/agentenv_mcp/examples/sciworld_mcp_server.py
new file mode 100644
index 00000000..4f75c806
--- /dev/null
+++ b/agentenv-mcp/agentenv_mcp/examples/sciworld_mcp_server.py
@@ -0,0 +1,292 @@
+"""
+SciWorld MCP Server - Exposes SciWorld environment as an MCP server.
+
+This demonstrates how to wrap an existing AgentGym environment (SciWorld)
+as an MCP server that can be accessed by any MCP-compatible agent.
+
+Usage:
+    # Start the SciWorld environment server first (agentenv-sciworld)
+    uvicorn agentenv_sciworld.server:app --host 0.0.0.0 --port 8000
+    
+    # Then run this MCP server
+    python -m agentenv_mcp.examples.sciworld_mcp_server
+"""
+
+import sys
+import os
+
+# Add parent directory to path for imports
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from agentenv_mcp import AgentEnvMCPServer
+
+# Import SciWorld-specific components
+# These are copied from agentenv/envs/sciworld.py for standalone operation
+SCIWORLD_FUNCTION_DESCRIPTION = [
+    {
+        "name": "open", 
+        "description": "Opens a container. You may have to give the specific location of the container if necessary(eg.door to kitchen, door to living room).",
+        "parameters":{
+            "type": "object",
+            "properties":{
+                "obj":{
+                    "type": "string",
+                    "description":"The container you want to open."
+                }
+            },
+            "required": ["obj"]
+        }
+    },
+    {
+        "name": "close", 
+        "description": "Closes a container.",
+        "parameters":{
+            "type": "object",
+            "properties":{
+                "obj":{
+                    "type": "string",
+                    "description":"The container you want to close."
+                },
+            },
+            "required": ["obj"]
+        }
+    },
+    {
+        "name": "activate",
+        "description": "Activate a device (e.g., turn on a stove to heat something).",
+        "parameters":{
+            "type": "object",
+            "properties":{
+                "obj":{
+                    "type": "string",
+                    "description":"The device you want to activate."
+                },
+            },
+            "required": ["obj"]
+        },
+    },
+    {
+        "name": "deactivate",
+        "description": "Deactivate a device (e.g., turn off a sink).",
+        "parameters":{
+            "type": "object",
+            "properties":{
+                "obj":{
+                    "type": "string",
+                    "description":"The device you want to deactivate."
+                },
+            },
+            "required": ["obj"]
+        },
+    },
+    {
+        "name": "lookaround",
+        "description": "Describe the current room.",
+        "parameters": {
+            "type": "object",
+            "properties": {},
+        }
+    },
+    {
+        "name": "lookat",
+        "description": "Describe an object in detail.",
+        "parameters":{
+            "type": "object",
+            "properties":{
+                "obj":{
+                    "type": "string",
+                    "description":"The object you want to examine."
+                }
+            },
+            "required": ["obj"]
+        }
+    },
+    {
+        "name": "pickup",
+        "description": "Move an object to your inventory.",
+        "parameters":{
+            "type": "object",
+            "properties":{
+                "obj":{
+                    "type": "string",
+                    "description": "The object to pick up."
+                }
+            },
+            "required": ["obj"]
+        },
+    },
+    {
+        "name": "drop",
+        "description": "Drop an object from your inventory.",
+        "parameters":{
+            "type": "object",
+            "properties":{
+                "obj":{
+                    "type": "string",
+                    "description":"The object to drop."
+                },
+            },
+            "required": ["obj"]
+        },
+    },
+    {
+        "name": "goto",
+        "description": "Move to a new location.",
+        "parameters":{
+            "type": "object",
+            "properties":{
+                "loc":{
+                    "type": "string",
+                    "description": "The location to go to."
+                }
+            },
+            "required": ["loc"]
+        },
+    },
+    {
+        "name": "use",
+        "description": "Use a tool on an object.",
+        "parameters":{
+            "type": "object",
+            "properties":{
+                "tool":{
+                    "type": "string",
+                    "description":"The tool to use."
+                },
+                "obj":{
+                    "type": "string",
+                    "description": "The object to use the tool on (optional)."
+                }
+            },
+            "required": ["tool"]
+        }
+    },
+    {
+        "name": "pour",
+        "description": "Pour a liquid into a container.",
+        "parameters":{
+            "type": "object",
+            "properties":{
+                "liq":{
+                    "type": "string",
+                    "description": "The liquid to pour."
+                },
+                "container":{
+                    "type": "string",
+                    "description": "The container to pour into."
+                }
+            },
+            "required": ["liq", "container"]
+        },
+    },
+    {
+        "name": "mix",
+        "description": "Chemically mix the contents of a container.",
+        "parameters":{
+            "type": "object",
+            "properties":{
+                "container":{
+                    "type": "string",
+                    "description": "The container to mix."
+                }
+            },
+            "required": ["container"]
+        },
+    },
+    {
+        "name": "inventory",
+        "description": "List items in your inventory.",
+        "parameters": {
+            "type": "object",
+            "properties": {},
+        }
+    },
+    {
+        "name": "task",
+        "description": "Describe the current task.",
+        "parameters": {
+            "type": "object",
+            "properties": {},
+        }
+    },
+    {
+        "name": "wait",
+        "description": "Wait for some time.",
+        "parameters":{
+            "type": "object",
+            "properties":{
+                "duration":{
+                    "type": "integer",
+                    "description": "Number of time steps to wait."
+                }
+            },
+            "required": ["duration"]
+        },
+    },
+]
+
+
+def create_sciworld_mcp_server(
+    env_server_base: str = "http://localhost:8000",
+    data_len: int = 100,
+) -> AgentEnvMCPServer:
+    """
+    Create an MCP server that wraps the SciWorld environment.
+    
+    Args:
+        env_server_base: Base URL of the SciWorld environment server
+        data_len: Number of tasks available
+        
+    Returns:
+        AgentEnvMCPServer instance
+    """
+    # Import the SciWorld client
+    try:
+        from agentenv.envs.sciworld import SciworldEnvClient
+    except ImportError:
+        raise ImportError(
+            "agentenv package not found. Please install it first:\n"
+            "  pip install -e ../agentenv"
+        )
+    
+    return AgentEnvMCPServer(
+        env_client_cls=SciworldEnvClient,
+        client_args={
+            "env_server_base": env_server_base,
+            "data_len": data_len,
+        },
+        function_descriptions=SCIWORLD_FUNCTION_DESCRIPTION,
+        env_name="sciworld",
+        action_format="function_calling",
+    )
+
+
+if __name__ == "__main__":
+    import argparse
+    
+    parser = argparse.ArgumentParser(
+        description="Run SciWorld as an MCP server"
+    )
+    parser.add_argument(
+        "--env-server",
+        default="http://localhost:8000",
+        help="Base URL of the SciWorld environment server",
+    )
+    parser.add_argument(
+        "--data-len",
+        type=int,
+        default=100,
+        help="Number of tasks available",
+    )
+    
+    args = parser.parse_args()
+    
+    print(f"Starting SciWorld MCP server...", file=sys.stderr)
+    print(f"  Environment server: {args.env_server}", file=sys.stderr)
+    print(f"  Data length: {args.data_len}", file=sys.stderr)
+    
+    server = create_sciworld_mcp_server(
+        env_server_base=args.env_server,
+        data_len=args.data_len,
+    )
+    server.run()

From da1118934ffe77bf9dc0e0108b72dfff21748e0f Mon Sep 17 00:00:00 2001
From: supmo668 <mymm.psu@gmail.com>
Date: Sun, 18 Jan 2026 20:21:14 -0800
Subject: [PATCH 6/6] test(agentenv-mcp): Add comprehensive test suite

Add tests for all wrapper components:

test_schema_utils.py (9 tests):
- Schema conversion functions
- Round-trip conversion verification
- SciWorld function description handling

test_agentenv_to_mcp.py (9 tests):
- AgentEnvMCPServer initialization
- Handler methods (reset, step, observe, info, action)
- ReAct formatting

test_mcp_to_agentenv.py (8 tests):
- Action parsing (ReAct, function_calling)
- Tool call conversion
- Interface compatibility checks

test_sciworld_integration.py (7 tests):
- Integration tests with SciWorld environment
- End-to-end workflow verification
- Skipped by default (requires running server)

Run tests: pytest tests/ -v
---
 agentenv-mcp/tests/__init__.py                |   1 +
 agentenv-mcp/tests/test_agentenv_to_mcp.py    | 217 +++++++++++++++++
 agentenv-mcp/tests/test_mcp_to_agentenv.py    | 218 +++++++++++++++++
 agentenv-mcp/tests/test_schema_utils.py       | 226 ++++++++++++++++++
 .../tests/test_sciworld_integration.py        | 207 ++++++++++++++++
 5 files changed, 869 insertions(+)
 create mode 100644 agentenv-mcp/tests/__init__.py
 create mode 100644 agentenv-mcp/tests/test_agentenv_to_mcp.py
 create mode 100644 agentenv-mcp/tests/test_mcp_to_agentenv.py
 create mode 100644 agentenv-mcp/tests/test_schema_utils.py
 create mode 100644 agentenv-mcp/tests/test_sciworld_integration.py

diff --git a/agentenv-mcp/tests/__init__.py b/agentenv-mcp/tests/__init__.py
new file mode 100644
index 00000000..c5a930de
--- /dev/null
+++ b/agentenv-mcp/tests/__init__.py
@@ -0,0 +1 @@
+"""Tests for agentenv-mcp package."""
diff --git a/agentenv-mcp/tests/test_agentenv_to_mcp.py b/agentenv-mcp/tests/test_agentenv_to_mcp.py
new file mode 100644
index 00000000..5355eef2
--- /dev/null
+++ b/agentenv-mcp/tests/test_agentenv_to_mcp.py
@@ -0,0 +1,217 @@
+"""
+Tests for AgentEnvToMCP wrapper.
+
+These tests verify that the AgentEnvMCPServer correctly wraps
+BaseEnvClient instances as MCP servers.
+"""
+
+import pytest
+import json
+from unittest.mock import Mock, MagicMock, patch, AsyncMock
+from dataclasses import dataclass
+
+# Check if MCP is available
+try:
+    import mcp
+    MCP_AVAILABLE = True
+except ImportError:
+    MCP_AVAILABLE = False
+
+pytestmark = pytest.mark.skipif(
+    not MCP_AVAILABLE,
+    reason="MCP library not installed. Install with: pip install mcp"
+)
+
+# Mock the MCP imports for testing without full MCP installation
+@dataclass
+class MockStepOutput:
+    state: str
+    reward: float
+    done: bool
+
+
+class MockBaseEnvClient:
+    """Mock BaseEnvClient for testing."""
+    
+    def __init__(self, env_server_base: str, data_len: int, action_format: str = "function_calling"):
+        self.env_server_base = env_server_base
+        self.data_len = data_len
+        self.action_format = action_format
+        self._current_obs = "Initial observation"
+        self._task_idx = 0
+    
+    def __len__(self):
+        return self.data_len
+    
+    def reset(self, idx: int):
+        self._task_idx = idx
+        self._current_obs = f"Task {idx} observation"
+        return {"task_idx": idx, "status": "reset"}
+    
+    def observe(self):
+        return self._current_obs
+    
+    def step(self, action: str):
+        self._current_obs = f"After action: {action}"
+        return MockStepOutput(
+            state=self._current_obs,
+            reward=0.5,
+            done=False,
+        )
+
+
+SAMPLE_FUNCTION_DESCRIPTIONS = [
+    {
+        "name": "open",
+        "description": "Opens a container.",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "obj": {"type": "string", "description": "Container to open"}
+            },
+            "required": ["obj"]
+        }
+    },
+    {
+        "name": "lookaround",
+        "description": "Look around.",
+        "parameters": {"type": "object", "properties": {}}
+    },
+]
+
+
+class TestAgentEnvMCPServerInit:
+    """Tests for AgentEnvMCPServer initialization."""
+    
+    def test_init_basic(self):
+        """Test basic initialization."""
+        from agentenv_mcp.agentenv_to_mcp import AgentEnvMCPServer
+        
+        server = AgentEnvMCPServer(
+            env_client_cls=MockBaseEnvClient,
+            client_args={"env_server_base": "http://test", "data_len": 10},
+            function_descriptions=SAMPLE_FUNCTION_DESCRIPTIONS,
+            env_name="test",
+        )
+        
+        assert server.env_name == "test"
+        assert server.action_format == "function_calling"
+        assert len(server.function_descriptions) == 2
+    
+    def test_lazy_client_creation(self):
+        """Test that client is created lazily."""
+        from agentenv_mcp.agentenv_to_mcp import AgentEnvMCPServer
+        
+        server = AgentEnvMCPServer(
+            env_client_cls=MockBaseEnvClient,
+            client_args={"env_server_base": "http://test", "data_len": 10},
+        )
+        
+        # Client should not be created yet
+        assert server._client is None
+        
+        # Accessing client property creates it
+        client = server.client
+        assert client is not None
+        assert server._client is client
+
+
+class TestAgentEnvMCPServerHandlers:
+    """Tests for MCP tool handlers."""
+    
+    @pytest.fixture
+    def server(self):
+        """Create a test server."""
+        from agentenv_mcp.agentenv_to_mcp import AgentEnvMCPServer
+        
+        return AgentEnvMCPServer(
+            env_client_cls=MockBaseEnvClient,
+            client_args={"env_server_base": "http://test", "data_len": 100},
+            function_descriptions=SAMPLE_FUNCTION_DESCRIPTIONS,
+            env_name="test",
+        )
+    
+    @pytest.mark.asyncio
+    async def test_handle_reset(self, server):
+        """Test reset handler."""
+        result = await server._handle_reset({"task_idx": 5})
+        
+        assert len(result) == 1
+        content = json.loads(result[0].content[0].text)
+        assert content["status"] == "reset"
+        assert content["task_idx"] == 5
+    
+    @pytest.mark.asyncio
+    async def test_handle_observe(self, server):
+        """Test observe handler."""
+        # First reset to set up state
+        await server._handle_reset({"task_idx": 0})
+        
+        result = await server._handle_observe()
+        
+        assert len(result) == 1
+        assert "Task 0 observation" in result[0].content[0].text
+    
+    @pytest.mark.asyncio
+    async def test_handle_step(self, server):
+        """Test step handler."""
+        await server._handle_reset({"task_idx": 0})
+        
+        result = await server._handle_step({"action": "test action"})
+        
+        assert len(result) == 1
+        content = json.loads(result[0].content[0].text)
+        assert "observation" in content
+        assert content["reward"] == 0.5
+        assert content["done"] is False
+    
+    @pytest.mark.asyncio
+    async def test_handle_info(self, server):
+        """Test info handler."""
+        result = await server._handle_info()
+        
+        assert len(result) == 1
+        content = json.loads(result[0].content[0].text)
+        assert content["env_name"] == "test"
+        assert content["env_size"] == 100
+        assert content["available_actions"] == 2
+    
+    @pytest.mark.asyncio
+    async def test_handle_action(self, server):
+        """Test environment-specific action handler."""
+        await server._handle_reset({"task_idx": 0})
+        
+        result = await server._handle_action("open", {"obj": "door"})
+        
+        assert len(result) == 1
+        content = json.loads(result[0].content[0].text)
+        assert content["action"] == "open"
+        assert "observation" in content
+    
+    @pytest.mark.asyncio
+    async def test_handle_unknown_action(self, server):
+        """Test handling of unknown action."""
+        result = await server._handle_action("unknown_action", {})
+        
+        assert len(result) == 1
+        assert result[0].isError is True
+
+
+class TestReactFormatting:
+    """Tests for ReAct format action formatting."""
+    
+    def test_format_react_action(self):
+        """Test React action formatting."""
+        from agentenv_mcp.agentenv_to_mcp import AgentEnvMCPServer
+        
+        server = AgentEnvMCPServer(
+            env_client_cls=MockBaseEnvClient,
+            client_args={"env_server_base": "http://test", "data_len": 10},
+            action_format="react",
+        )
+        
+        result = server._format_react_action("open", {"obj": "door", "thought": "Opening the door"})
+        
+        assert "Thought:" in result
+        assert "Action:" in result
+        assert "open" in result
diff --git a/agentenv-mcp/tests/test_mcp_to_agentenv.py b/agentenv-mcp/tests/test_mcp_to_agentenv.py
new file mode 100644
index 00000000..68e07af0
--- /dev/null
+++ b/agentenv-mcp/tests/test_mcp_to_agentenv.py
@@ -0,0 +1,218 @@
+"""
+Tests for MCPToAgentEnv wrapper.
+
+These tests verify that the MCPEnvClient correctly adapts
+MCP servers into AgentGym-compatible BaseEnvClient instances.
+"""
+
+import pytest
+import json
+from unittest.mock import Mock, MagicMock, patch, AsyncMock
+from dataclasses import dataclass
+from enum import Enum
+
+
+# Check if MCP is available
+try:
+    import mcp
+    MCP_AVAILABLE = True
+except ImportError:
+    MCP_AVAILABLE = False
+
+# Check if agentenv is available
+try:
+    from agentenv.controller.types import ActionFormat
+    AGENTENV_AVAILABLE = True
+except ImportError:
+    AGENTENV_AVAILABLE = False
+
+
+class TestMCPAdapterParsing:
+    """Tests for MCPAdapter parsing methods that don't require full dependencies."""
+    
+    def test_parse_react_format(self):
+        """Test ReAct format parsing logic."""
+        text = "Thought:\nI should open the door.\n\nAction:\nopen door"
+        
+        _split = text.rsplit("Action:", 1)
+        assert len(_split) == 2
+        _thought, _action = _split
+        thought = _thought.split("Thought:")[-1].strip()
+        action = _action.strip()
+        
+        assert thought == "I should open the door."
+        assert action == "open door"
+    
+    def test_parse_react_no_thought(self):
+        """Test ReAct parsing without explicit thought."""
+        text = "open door"
+        
+        _split = text.rsplit("Action:", 1)
+        if len(_split) == 2:
+            thought = _split[0].split("Thought:")[-1].strip()
+            action = _split[1].strip()
+        else:
+            thought = ""
+            action = text.strip()
+        
+        assert action == "open door"
+    
+    def test_parse_function_calling_format(self):
+        """Test function calling format parsing logic."""
+        text = '{"thought": "Opening door", "function_name": "open", "arguments": {"obj": "door"}}'
+        
+        _fn_call = json.loads(
+            "{" + text.split("{", 1)[-1].rsplit("}", 1)[0] + "}", strict=False
+        )
+        
+        assert _fn_call["thought"] == "Opening door"
+        assert _fn_call["function_name"] == "open"
+        assert _fn_call["arguments"]["obj"] == "door"
+    
+    def test_parse_function_calling_with_extra_text(self):
+        """Test parsing function call with surrounding text."""
+        text = 'Here is my action: {"thought": "test", "function_name": "look", "arguments": {}}'
+        
+        _fn_call = json.loads(
+            "{" + text.split("{", 1)[-1].rsplit("}", 1)[0] + "}", strict=False
+        )
+        
+        assert _fn_call["function_name"] == "look"
+
+
+class TestMCPAdapterToolCallConversion:
+    """Tests for converting parsed actions to tool calls."""
+    
+    def test_react_to_tool_call_simple(self):
+        """Test converting simple React action to tool call."""
+        function_descriptions = [
+            {"name": "open", "parameters": {"properties": {"obj": {}}}},
+            {"name": "goto", "parameters": {"properties": {"location": {}}}},
+        ]
+        
+        action = "open door"
+        
+        # Logic for matching action to tool
+        result = None
+        for func_desc in function_descriptions:
+            fn_name = func_desc["name"]
+            if action.lower().startswith(fn_name.lower()):
+                args_str = action[len(fn_name):].strip()
+                params = func_desc.get("parameters", {}).get("properties", {})
+                param_names = list(params.keys())
+                arg_values = args_str.split() if args_str else []
+                
+                arguments = {}
+                for i, param_name in enumerate(param_names):
+                    if i < len(arg_values):
+                        arguments[param_name] = arg_values[i]
+                
+                result = {"tool_name": fn_name, "arguments": arguments}
+                break
+        
+        assert result is not None
+        assert result["tool_name"] == "open"
+        assert result["arguments"]["obj"] == "door"
+    
+    def test_react_to_tool_call_no_args(self):
+        """Test converting React action with no arguments."""
+        function_descriptions = [
+            {"name": "lookaround", "parameters": {"properties": {}}},
+        ]
+        
+        action = "lookaround"
+        
+        result = None
+        for func_desc in function_descriptions:
+            fn_name = func_desc["name"]
+            if action.lower().startswith(fn_name.lower()):
+                result = {"tool_name": fn_name, "arguments": {}}
+                break
+        
+        assert result is not None
+        assert result["tool_name"] == "lookaround"
+        assert result["arguments"] == {}
+
+
+@pytest.mark.skipif(not MCP_AVAILABLE, reason="MCP library not installed")
+class TestMCPEnvClientRequiresMCP:
+    """Tests that require MCP library."""
+    
+    def test_mcp_env_client_has_interface(self):
+        """Test that MCPEnvClient has required interface."""
+        from agentenv_mcp.mcp_to_agentenv import MCPEnvClient
+        
+        assert hasattr(MCPEnvClient, 'observe')
+        assert hasattr(MCPEnvClient, 'step')
+        assert hasattr(MCPEnvClient, 'reset')
+        assert hasattr(MCPEnvClient, '__len__')
+
+
+class TestMCPTaskStructure:
+    """Tests for MCPTask class structure."""
+    
+    @pytest.mark.skipif(not MCP_AVAILABLE, reason="MCP library not installed")
+    def test_task_has_required_attributes(self):
+        """Test that MCPTask has required BaseTask attributes."""
+        from agentenv_mcp.mcp_to_agentenv import MCPTask
+        
+        assert hasattr(MCPTask, 'env_client_cls')
+        assert hasattr(MCPTask, 'env_name')
+        assert MCPTask.env_name == "mcp"
+
+
+class TestSchemaConversion:
+    """Tests that verify schema conversion works correctly."""
+    
+    def test_sciworld_style_function_to_tool_call(self):
+        """Test converting SciWorld-style functions to tool call format."""
+        from agentenv_mcp.schema_utils import function_desc_to_mcp_tool
+        
+        sciworld_func = {
+            "name": "pour",
+            "description": "Pour liquid into container.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "liq": {"type": "string", "description": "The liquid"},
+                    "container": {"type": "string", "description": "The container"}
+                },
+                "required": ["liq", "container"]
+            }
+        }
+        
+        mcp_tool = function_desc_to_mcp_tool(sciworld_func)
+        
+        assert mcp_tool["name"] == "pour"
+        assert "inputSchema" in mcp_tool
+        assert "liq" in mcp_tool["inputSchema"]["properties"]
+        assert "container" in mcp_tool["inputSchema"]["properties"]
+
+
+class TestConversationStartGeneration:
+    """Tests for conversation start prompt generation."""
+    
+    def test_function_prompt_structure(self):
+        """Test that function prompts have expected structure."""
+        function_descriptions = [
+            {
+                "name": "open",
+                "description": "Opens something.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "obj": {"type": "string", "description": "Object to open"}
+                    },
+                    "required": ["obj"]
+                }
+            }
+        ]
+        
+        # Simulate the prompt format
+        prompt = "You have the following functions available:\n\n"
+        tool_descs = [{"type": "function", "function": f} for f in function_descriptions]
+        prompt += "\n".join([json.dumps(f, ensure_ascii=False, indent=2) for f in tool_descs])
+        
+        assert "open" in prompt
+        assert "Opens something" in prompt
+        assert "obj" in prompt
diff --git a/agentenv-mcp/tests/test_schema_utils.py b/agentenv-mcp/tests/test_schema_utils.py
new file mode 100644
index 00000000..99a655f5
--- /dev/null
+++ b/agentenv-mcp/tests/test_schema_utils.py
@@ -0,0 +1,226 @@
+"""
+Tests for schema conversion utilities.
+"""
+
+import pytest
+from agentenv_mcp.schema_utils import (
+    function_desc_to_mcp_tool,
+    mcp_tool_to_function_desc,
+    generate_function_descriptions_from_mcp_tools,
+    generate_mcp_tools_from_function_descriptions,
+)
+
+
+class TestFunctionDescToMCPTool:
+    """Tests for function_desc_to_mcp_tool conversion."""
+    
+    def test_basic_conversion(self):
+        """Test basic conversion from AgentEnv to MCP format."""
+        func_desc = {
+            "name": "open",
+            "description": "Opens a container.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "obj": {
+                        "type": "string",
+                        "description": "The container to open."
+                    }
+                },
+                "required": ["obj"]
+            }
+        }
+        
+        mcp_tool = function_desc_to_mcp_tool(func_desc)
+        
+        assert mcp_tool["name"] == "open"
+        assert mcp_tool["description"] == "Opens a container."
+        assert "inputSchema" in mcp_tool
+        assert mcp_tool["inputSchema"]["type"] == "object"
+        assert "obj" in mcp_tool["inputSchema"]["properties"]
+    
+    def test_empty_parameters(self):
+        """Test conversion with empty parameters."""
+        func_desc = {
+            "name": "lookaround",
+            "description": "Look around the room.",
+            "parameters": {
+                "type": "object",
+                "properties": {}
+            }
+        }
+        
+        mcp_tool = function_desc_to_mcp_tool(func_desc)
+        
+        assert mcp_tool["name"] == "lookaround"
+        assert mcp_tool["inputSchema"]["properties"] == {}
+    
+    def test_missing_description(self):
+        """Test conversion with missing description."""
+        func_desc = {
+            "name": "test",
+            "parameters": {"type": "object", "properties": {}}
+        }
+        
+        mcp_tool = function_desc_to_mcp_tool(func_desc)
+        
+        assert mcp_tool["description"] == ""
+    
+    def test_multiple_parameters(self):
+        """Test conversion with multiple parameters."""
+        func_desc = {
+            "name": "pour",
+            "description": "Pour liquid into container.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "liquid": {"type": "string", "description": "The liquid"},
+                    "container": {"type": "string", "description": "The container"}
+                },
+                "required": ["liquid", "container"]
+            }
+        }
+        
+        mcp_tool = function_desc_to_mcp_tool(func_desc)
+        
+        assert len(mcp_tool["inputSchema"]["properties"]) == 2
+        assert "liquid" in mcp_tool["inputSchema"]["properties"]
+        assert "container" in mcp_tool["inputSchema"]["properties"]
+
+
+class TestMCPToolToFunctionDesc:
+    """Tests for mcp_tool_to_function_desc conversion."""
+    
+    def test_basic_conversion(self):
+        """Test basic conversion from MCP to AgentEnv format."""
+        mcp_tool = {
+            "name": "open",
+            "description": "Opens a container.",
+            "inputSchema": {
+                "type": "object",
+                "properties": {
+                    "obj": {
+                        "type": "string",
+                        "description": "The container to open."
+                    }
+                },
+                "required": ["obj"]
+            }
+        }
+        
+        func_desc = mcp_tool_to_function_desc(mcp_tool)
+        
+        assert func_desc["name"] == "open"
+        assert func_desc["description"] == "Opens a container."
+        assert "parameters" in func_desc
+        assert func_desc["parameters"]["type"] == "object"
+    
+    def test_missing_input_schema(self):
+        """Test conversion with missing inputSchema."""
+        mcp_tool = {
+            "name": "test",
+            "description": "A test tool."
+        }
+        
+        func_desc = mcp_tool_to_function_desc(mcp_tool)
+        
+        assert func_desc["parameters"]["type"] == "object"
+        assert func_desc["parameters"]["properties"] == {}
+
+
+class TestRoundTrip:
+    """Test round-trip conversions."""
+    
+    def test_agentenv_to_mcp_to_agentenv(self):
+        """Test AgentEnv -> MCP -> AgentEnv preserves data."""
+        original = {
+            "name": "goto",
+            "description": "Move to a location.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "location": {
+                        "type": "string",
+                        "description": "Target location"
+                    }
+                },
+                "required": ["location"]
+            }
+        }
+        
+        mcp_tool = function_desc_to_mcp_tool(original)
+        restored = mcp_tool_to_function_desc(mcp_tool)
+        
+        assert restored["name"] == original["name"]
+        assert restored["description"] == original["description"]
+        assert restored["parameters"] == original["parameters"]
+    
+    def test_batch_conversion(self):
+        """Test batch conversion of multiple tools."""
+        func_descs = [
+            {"name": "open", "description": "Open", "parameters": {"type": "object", "properties": {}}},
+            {"name": "close", "description": "Close", "parameters": {"type": "object", "properties": {}}},
+        ]
+        
+        mcp_tools = generate_mcp_tools_from_function_descriptions(func_descs)
+        restored = generate_function_descriptions_from_mcp_tools(mcp_tools)
+        
+        assert len(restored) == 2
+        assert restored[0]["name"] == "open"
+        assert restored[1]["name"] == "close"
+
+
+class TestSciWorldFunctionDescriptions:
+    """Test with real SciWorld function descriptions."""
+    
+    SAMPLE_SCIWORLD_FUNCTIONS = [
+        {
+            "name": "open", 
+            "description": "Opens a container.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "obj": {"type": "string", "description": "The container to open."}
+                },
+                "required": ["obj"]
+            }
+        },
+        {
+            "name": "lookaround",
+            "description": "Describe the current room.",
+            "parameters": {"type": "object", "properties": {}}
+        },
+        {
+            "name": "pour",
+            "description": "Pour liquid into container.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "liq": {"type": "string", "description": "The liquid"},
+                    "container": {"type": "string", "description": "The container"}
+                },
+                "required": ["liq", "container"]
+            }
+        },
+    ]
+    
+    def test_sciworld_conversion(self):
+        """Test conversion of SciWorld-style function descriptions."""
+        mcp_tools = generate_mcp_tools_from_function_descriptions(
+            self.SAMPLE_SCIWORLD_FUNCTIONS
+        )
+        
+        assert len(mcp_tools) == 3
+        
+        # Check open tool
+        open_tool = next(t for t in mcp_tools if t["name"] == "open")
+        assert "inputSchema" in open_tool
+        assert "obj" in open_tool["inputSchema"]["properties"]
+        
+        # Check lookaround tool (no args)
+        look_tool = next(t for t in mcp_tools if t["name"] == "lookaround")
+        assert look_tool["inputSchema"]["properties"] == {}
+        
+        # Check pour tool (multiple args)
+        pour_tool = next(t for t in mcp_tools if t["name"] == "pour")
+        assert len(pour_tool["inputSchema"]["properties"]) == 2
diff --git a/agentenv-mcp/tests/test_sciworld_integration.py b/agentenv-mcp/tests/test_sciworld_integration.py
new file mode 100644
index 00000000..2f61c421
--- /dev/null
+++ b/agentenv-mcp/tests/test_sciworld_integration.py
@@ -0,0 +1,207 @@
+"""
+Integration tests for SciWorld MCP wrapper.
+
+These tests verify the complete integration between:
+1. SciWorld environment (agentenv-sciworld)
+2. AgentEnv client (agentenv/envs/sciworld.py)
+3. MCP wrapper (agentenv-mcp)
+
+NOTE: These tests require:
+- The SciWorld environment server running at http://localhost:8000
+- The agentenv package installed
+
+To run integration tests:
+    # Start SciWorld server first
+    cd ../agentenv-sciworld && uvicorn agentenv_sciworld.server:app --port 8000
+    
+    # Run tests
+    pytest tests/test_sciworld_integration.py -v
+"""
+
+import pytest
+import json
+import sys
+import os
+
+# Skip all tests if dependencies not available
+pytestmark = pytest.mark.skipif(
+    os.environ.get("SKIP_INTEGRATION_TESTS", "1") == "1",
+    reason="Integration tests disabled. Set SKIP_INTEGRATION_TESTS=0 to run."
+)
+
+
+@pytest.fixture
+def sciworld_env_server():
+    """Check if SciWorld server is available."""
+    import requests
+    try:
+        response = requests.get("http://localhost:8000/", timeout=2)
+        if response.status_code == 200:
+            return "http://localhost:8000"
+    except:
+        pass
+    pytest.skip("SciWorld server not available at http://localhost:8000")
+
+
+class TestSciWorldMCPServerIntegration:
+    """Integration tests for SciWorld as MCP server."""
+    
+    def test_create_mcp_server(self, sciworld_env_server):
+        """Test creating MCP server from SciWorld client."""
+        from agentenv_mcp.examples.sciworld_mcp_server import create_sciworld_mcp_server
+        
+        server = create_sciworld_mcp_server(
+            env_server_base=sciworld_env_server,
+            data_len=10,
+        )
+        
+        assert server is not None
+        assert server.env_name == "sciworld"
+        assert len(server.function_descriptions) > 0
+    
+    @pytest.mark.asyncio
+    async def test_reset_and_observe(self, sciworld_env_server):
+        """Test reset and observe through MCP server."""
+        from agentenv_mcp.examples.sciworld_mcp_server import create_sciworld_mcp_server
+        
+        server = create_sciworld_mcp_server(
+            env_server_base=sciworld_env_server,
+            data_len=10,
+        )
+        
+        # Reset
+        reset_result = await server._handle_reset({"task_idx": 0})
+        assert len(reset_result) == 1
+        reset_data = json.loads(reset_result[0].content[0].text)
+        assert "observation" in reset_data
+        
+        # Observe
+        observe_result = await server._handle_observe()
+        assert len(observe_result) == 1
+        assert len(observe_result[0].content[0].text) > 0
+    
+    @pytest.mark.asyncio
+    async def test_step_action(self, sciworld_env_server):
+        """Test stepping with an action."""
+        from agentenv_mcp.examples.sciworld_mcp_server import create_sciworld_mcp_server
+        
+        server = create_sciworld_mcp_server(
+            env_server_base=sciworld_env_server,
+            data_len=10,
+        )
+        
+        # Reset first
+        await server._handle_reset({"task_idx": 0})
+        
+        # Step with lookaround
+        step_result = await server._handle_action("lookaround", {})
+        assert len(step_result) == 1
+        step_data = json.loads(step_result[0].content[0].text)
+        assert "observation" in step_data
+        assert "reward" in step_data
+        assert "done" in step_data
+    
+    @pytest.mark.asyncio
+    async def test_multiple_actions(self, sciworld_env_server):
+        """Test executing multiple actions in sequence."""
+        from agentenv_mcp.examples.sciworld_mcp_server import create_sciworld_mcp_server
+        
+        server = create_sciworld_mcp_server(
+            env_server_base=sciworld_env_server,
+            data_len=10,
+        )
+        
+        # Reset
+        await server._handle_reset({"task_idx": 0})
+        
+        # Execute a sequence of actions
+        actions = [
+            ("lookaround", {}),
+            ("inventory", {}),
+            ("task", {}),
+        ]
+        
+        for action_name, args in actions:
+            result = await server._handle_action(action_name, args)
+            assert len(result) == 1
+            assert result[0].isError is False
+
+
+class TestSciWorldClientCompatibility:
+    """Test that wrapped SciWorld client is compatible with AgentEnv."""
+    
+    def test_client_interface(self, sciworld_env_server):
+        """Test that SciWorld client has correct interface."""
+        from agentenv.envs.sciworld import SciworldEnvClient
+        
+        client = SciworldEnvClient(
+            env_server_base=sciworld_env_server,
+            data_len=10,
+            action_format="function_calling",
+        )
+        
+        # Test interface
+        assert hasattr(client, 'reset')
+        assert hasattr(client, 'step')
+        assert hasattr(client, 'observe')
+        assert hasattr(client, '__len__')
+        assert hasattr(client, 'conversation_start')
+        
+        # Test length
+        assert len(client) == 10
+        
+        # Test conversation start
+        assert len(client.conversation_start) == 2
+    
+    def test_reset_and_step(self, sciworld_env_server):
+        """Test basic reset and step operations."""
+        from agentenv.envs.sciworld import SciworldEnvClient
+        
+        client = SciworldEnvClient(
+            env_server_base=sciworld_env_server,
+            data_len=10,
+            action_format="function_calling",
+        )
+        
+        # Reset
+        result = client.reset(0)
+        assert "observation" in result or client.observe() != ""
+        
+        # Observe
+        obs = client.observe()
+        assert isinstance(obs, str)
+        assert len(obs) > 0
+        
+        # Step with function calling format
+        action = json.dumps({
+            "thought": "Looking around",
+            "function_name": "lookaround",
+            "arguments": {}
+        })
+        step_output = client.step(action)
+        
+        assert hasattr(step_output, 'state')
+        assert hasattr(step_output, 'reward')
+        assert hasattr(step_output, 'done')
+
+
+class TestFunctionDescriptionAlignment:
+    """Test that function descriptions are properly aligned."""
+    
+    def test_sciworld_functions_in_wrapper(self):
+        """Test that SciWorld functions are correctly represented in wrapper."""
+        from agentenv_mcp.examples.sciworld_mcp_server import SCIWORLD_FUNCTION_DESCRIPTION
+        from agentenv_mcp.schema_utils import function_desc_to_mcp_tool
+        
+        # Check some key functions exist
+        function_names = {f["name"] for f in SCIWORLD_FUNCTION_DESCRIPTION}
+        
+        required_functions = {"open", "close", "lookaround", "goto", "pickup", "inventory"}
+        assert required_functions.issubset(function_names)
+        
+        # Check conversion to MCP format
+        for func in SCIWORLD_FUNCTION_DESCRIPTION:
+            mcp_tool = function_desc_to_mcp_tool(func)
+            assert "name" in mcp_tool
+            assert "inputSchema" in mcp_tool
+            assert mcp_tool["name"] == func["name"]