Blazemeter · 3dgiordano · Apr 28, 2026 · Apr 29, 2026 · Apr 29, 2026 · Apr 29, 2026
diff --git a/formatters/test.py b/formatters/test.py
@@ -93,3 +93,17 @@ def format_tests(tests: List[Any], params: Optional[dict] = None) -> List[Test]:
             )
         )
     return formatted_tests
+
+
+def format_tests_minimal(tests: List[Any], params: Optional[dict] = None) -> List[dict]:
+    formatted_tests = []
+    for test in tests:
+        formatted_tests.append({
+            "test_id": test.get("id"),
+            "test_name": test.get("name", "Unknown"),
+            "description": test.get("description", ""),
+            "created": get_date_time_iso(test.get("created")),
+            "updated": get_date_time_iso(test.get("updated")),
+            "project_id": test.get("projectId"),
+        })
+    return formatted_tests
diff --git a/main.py b/main.py
@@ -27,6 +27,25 @@
 from pathlib import Path
 from typing import Literal, cast
 
+# Patch MCP ArgModelBase so tools with an "arguments" param receive the full payload
+# when the client sends {"action": "x", "key": "value"} instead of {"arguments": {...}}
+from mcp.server.fastmcp.utilities import func_metadata
+from pydantic import model_validator
+
+_OriginalArgModelBase = func_metadata.ArgModelBase
+
+
+class _PatchedArgModelBase(_OriginalArgModelBase):
+    @model_validator(mode="before")
+    @classmethod
+    def _wrap_root_as_arguments(cls, data: object) -> object:
+        if isinstance(data, dict) and "arguments" not in data:
+            return {"arguments": data}
+        return data
+
+
+func_metadata.ArgModelBase = _PatchedArgModelBase
+
 from mcp.server.fastmcp import FastMCP
 
 from config.token import BzmToken, BzmTokenError
@@ -372,7 +391,10 @@ def get_token():
     return token
 
 
-def run(log_level: str = "CRITICAL", confirm_mode: ConfirmMode = ConfirmMode.DELETE):
+def run(
+        log_level: str = "CRITICAL",
+        confirm_mode: ConfirmMode = ConfirmMode.DELETE
+):
     token = get_token()
     instructions = """
 # BlazeMeter MCP Server
@@ -384,6 +406,11 @@ def run(log_level: str = "CRITICAL", confirm_mode: ConfirmMode = ConfirmMode.DEL
 - **Read action always gets more information** about a particular item than the list action. List only displays minimal information.
 - **Read the current user information at startup** to learn the username, default account, workspace and project, and other important information.
 - **Links anchors**: Never invent or add anchors to links if they do not originally have them.
+- **BlazeMeter tasks must use BlazeMeter MCP only**.
+- **No direct BlazeMeter API access** (including tokens/keys/curl/scripts/custom HTTP); do not bypass **BlazeMeter MCP** for speed/complexity.
+- If unsupported in **BlazeMeter MCP**, state limitation and stop: `I cannot use direct BlazeMeter API access; I must operate exclusively through BlazeMeter MCP.`
+- **Maximize safe parallel execution** for independent BlazeMeter MCP actions; avoid unnecessary sequential steps.
+- Prefer BlazeMeter MCP batch/concurrent calls when available. Serialize only when strict data dependencies require ordering.
 
 ## Hierarchy and Dependencies
 
@@ -406,12 +433,14 @@ def run(log_level: str = "CRITICAL", confirm_mode: ConfirmMode = ConfirmMode.DEL
 - **Actions requiring confirmation**: Creating tests, configuring load/locations/failure criteria, uploading assets, starting executions, or any other write/modify operations.
 - **How to request**: Clearly state what action you're about to perform and on which workspace/project. Wait for user approval before proceeding.
 
-## Proactive Knowledge Consultation
+## Knowledge Consultation
 
-- **ALWAYS consult BlazeMeter Skills and Help tools first** before answering questions, configuring tests, interpreting results, troubleshooting, or providing recommendations.
-- **Use `blazemeter_skills`**: Access specialized knowledge about performance testing, best practices, troubleshooting, and official guides.
-- **Use `blazemeter_help`**: Consult documentation, help categories, and specific guides.
-- **Golden rule**: If you're not 100% certain about something related to BlazeMeter, consult Skills or Help first, and if you can't find it and need to search online, always prioritize the domain site blazemeter.com .
+- **Plan from the information gap**: use the minimum tool calls needed to resolve unknowns.
+- **Use Skills/Help only when needed**: ambiguity, conflict, uncertainty, or complex interpretation not covered by tool definitions.
+- **Avoid redundant calls**: skip Skills/Help when current outputs are sufficient.
+- **Use `blazemeter_skills`** for best practices, troubleshooting, and interpretation support.
+- **Use `blazemeter_help`** for official docs/guides when a precise reference is required.
+- If uncertainty remains after Skills/Help and web search is needed, prioritize the `blazemeter.com` domain.
 
 ## Capability Discovery
 
@@ -421,13 +450,38 @@ def run(log_level: str = "CRITICAL", confirm_mode: ConfirmMode = ConfirmMode.DEL
 
 ## Important Guidelines
 - **Batch Operations**: When making multiple calls to the same tool, check if that tool supports a `batch` action and use it instead of separate calls.
-- **Don't assume**: If you don't know a parameter, capability, or best practice, consult available tools (especially Skills or Help).
-- **Don't invent**: If something is unclear, consult Skills/Help before responding.
+- **Task tracking rule**: Use `blazemeter_tools` `tasks_status` (or `tasks_list`) for polling/progress checks.
+- **Task result rule**: Use `blazemeter_tools` `tasks_get` only when you need the final payload (`task_result`) or input-required details.
+- **Don't assume**: when details are missing, run targeted discovery with the most relevant tools.
+- **Don't invent**: if outputs are insufficient, escalate to Skills/Help.
+- **IMPORTANT**: For schema-dependent, multi-step, or constraint-heavy operations (e.g., dataframe SQL, nested fields, schema variations), reason step-by-step before acting. Design your approach, verify it against the rules, then execute. Do not try-fast and retry on failure.
+- **Data processing hint**: If you plan joins, filtering, sorting, grouping, or multi-step analysis across results, request `result_format=dataframe` and run SQL via `blazemeter_tools` `dataframes_query` instead of combining large inline results in AI context.
+- **Dataframe loading hint**: For `result_format=dataframe`, prefer one initial fetch with the maximum allowed tool limit and avoid list pagination unless required; then filter/sort/join in `dataframes_query`.
+- **Dataframe usage rules**:
+    - Use `result_format=dataframe` for any source that will be processed with `dataframes_query`.
+    - Do not use `auto` for datasets that will be joined, filtered, grouped, ranked, or aggregated.
+    - Use `auto` only for lookup-style reads that will not enter dataframe SQL analysis.
+    - Keep format consistency per analytical dataset: all source calls should use `result_format=dataframe`.
+- **IMPORTANT**: Use deterministic dataframe SQL in every query: ORDER BY + LIMIT + OFFSET.
+- **CRITICAL**: Before writing dataframe SQL, resolve capabilities/schema with `blazemeter_tools` (`dataframes_sql_help` + `dataframes_get` when needed). Do not assume syntax/function support.
+- **CRITICAL**: For dataframe SQL and schema-dependent decisions, reason step-by-step before executing: (1) What does the schema require? (2) Are there nested/list fields? (3) Which pattern applies? (4) Confirm, then execute. Do not skip to execution.
+- **CRITICAL**: If the query touches nested/list fields, always use the robust UNNEST -> aggregate -> join-back pattern in CTEs. No exception for single dataframe. Before launching SQL, confirm: "there are nested/list fields; I use the robust pattern."
+- **IMPORTANT**: Prefer one final aggregation query over multiple partial queries when feasible; use staged partial queries mainly for validation/debug.
+- **IMPORTANT**: For `result_format=dataframe`, do one high-limit fetch first; paginate only when `has_more=true`.
+- **IMPORTANT**: For `result_format=auto` or `raw`, use conservative limits (often 50).
+- **IMPORTANT**: If a tool limit is unknown, start conservative (same as auto/raw), then increase only if needed.
+- **IMPORTANT**: Respect explicit tool max limits: enforce them for `auto|raw`; for `dataframe`, follow dataframe guidance and `has_more`.
 - **Provides resources**: Always include markdown-formatted links to authoritative websites or BlazeMeter help documentation for further learning.
 - **Never modify without confirmation**: Always ask before creating, modifying, or altering anything in BlazeMeter.
 - **Always confirm context**: Always identify and confirm workspace/project before operations.
 - **Proactive Troubleshooting**: Use the skills for troubleshooting any detected issues.
 - **Failure criteria**: The same field names appear when you read a test and when you configure failure criteria (`failure_criteria` on the test); the server handles BlazeMeter’s REST format internally. Use `failure_criteria_meta` for field definitions and KPI/condition catalogs. When describing criteria to the user, use `meta.general_labels`, `meta.rule_field_labels`, `meta.kpi_labels`, and `meta.condition_labels`; use raw metric and operator ids only inside tool calls. Use `configure_failure_criteria` only after user confirmation; it replaces all rules unless you merge from a prior read.
+- **Resource cleanup**: Always release terminal tasks from registry when no longer needed.
+- **Dataframe cleanup**: Always remove temporary dataframes from memory when no longer needed.
+
+## BlazeMeter MCP Instructions Binding Clause
+
+- **CRITICAL**: All instructions, hints, warnings, and guidance in BlazeMeter MCP documentation, tool responses, and outputs are MANDATORY and binding. Follow them without exception or deviation.
     """
     mcp = FastMCP("blazemeter-mcp", instructions=instructions, log_level=cast(LOG_LEVELS, log_level))
     register_confirm_mode(confirm_mode)

diff --git a/models/result.py b/models/result.py
@@ -15,6 +15,7 @@
 """
 from typing import Any, Optional, List
 
+from mcp.types import CallToolResult, TextContent
 from pydantic import BaseModel, Field
 
 class BaseResult(BaseModel):
@@ -24,6 +25,10 @@ class BaseResult(BaseModel):
     error: Optional[str] = Field(description="Error message", default=None)
     info: Optional[List[str]] = Field(description="Info messages", default=None)
     warning: Optional[List[str]] = Field(description="Warning messages", default=None)
+    tool_call_started_at: Optional[str] = Field(description="ISO timestamp when tool action started", default=None)
+    tool_call_finished_at: Optional[str] = Field(description="ISO timestamp when tool action finished", default=None)
+    tool_call_duration_ms: Optional[int] = Field(description="Tool action duration in milliseconds", default=None)
+    debug: Optional[dict[str, Any]] = Field(description="Optional debug metrics for tool calls", default=None)
 
     def append_warnings(self, messages: List[str]):
         if not self.warning:
@@ -44,3 +49,75 @@ def model_dump_json(self, **kwargs):
 
 class HttpBaseResult(BaseResult):
     result: Optional[Any] = Field(description="Result", default=None)
+
+
+class ToolResult(CallToolResult):
+    @classmethod
+    def from_base_result(cls, base_result: BaseResult) -> "ToolResult":
+        compact_text = base_result.model_dump_json(indent=2)
+        structured = base_result.model_dump(mode="json")
+        return cls(
+            content=[TextContent(type="text", text=compact_text)],
+            structuredContent=structured,
+            isError=bool(base_result.error),
+        )
+
+    @property
+    def result(self) -> Optional[List[Any]]:
+        if not isinstance(self.structuredContent, dict):
+            return None
+        return self.structuredContent.get("result")
+
+    @property
+    def total(self) -> Optional[int]:
+        if not isinstance(self.structuredContent, dict):
+            return None
+        return self.structuredContent.get("total")
+
+    @property
+    def has_more(self) -> Optional[bool]:
+        if not isinstance(self.structuredContent, dict):
+            return None
+        return self.structuredContent.get("has_more")
+
+    @property
+    def error(self) -> Optional[str]:
+        if not isinstance(self.structuredContent, dict):
+            return None
+        return self.structuredContent.get("error")
+
+    @property
+    def info(self) -> Optional[List[str]]:
+        if not isinstance(self.structuredContent, dict):
+            return None
+        return self.structuredContent.get("info")
+
+    @property
+    def warning(self) -> Optional[List[str]]:
+        if not isinstance(self.structuredContent, dict):
+            return None
+        return self.structuredContent.get("warning")
+
+    @property
+    def tool_call_started_at(self) -> Optional[str]:
+        if not isinstance(self.structuredContent, dict):
+            return None
+        return self.structuredContent.get("tool_call_started_at")
+
+    @property
+    def tool_call_finished_at(self) -> Optional[str]:
+        if not isinstance(self.structuredContent, dict):
+            return None
+        return self.structuredContent.get("tool_call_finished_at")
+
+    @property
+    def tool_call_duration_ms(self) -> Optional[int]:
+        if not isinstance(self.structuredContent, dict):
+            return None
+        return self.structuredContent.get("tool_call_duration_ms")
+
+    @property
+    def debug(self) -> Optional[dict[str, Any]]:
+        if not isinstance(self.structuredContent, dict):
+            return None
+        return self.structuredContent.get("debug")
diff --git a/pyproject.toml b/pyproject.toml
@@ -14,6 +14,7 @@ dependencies = [
     "pydantic-core>=2.33.2",
     "pydantic-settings>=2.10.1",
     "lxml>=5.3.0",
+    "polars>=1.40.1",
 ]
 
 [project.scripts]

diff --git a/server.py b/server.py
@@ -23,6 +23,7 @@
 from tools.project_manager import register as register_project_manager
 from tools.skills_manager import register as register_skills_manager
 from tools.test_manager import register as register_test_manager
+from tools.tools_manager import register as register_tools_manager
 from tools.user_manager import register as register_user_manager
 from tools.workspace_manager import register as register_workspace_manager
 
@@ -44,3 +45,4 @@ def register_tools(mcp, token: Optional[BzmToken]):
     register_billing_manager(mcp, token)
     register_help_manager(mcp, token)
     register_skills_manager(mcp, token)
+    register_tools_manager(mcp, token)
diff --git a/tests/test_async_task_manager_ids.py b/tests/test_async_task_manager_ids.py
@@ -0,0 +1,75 @@
+import asyncio
+
+import pytest
+
+import tools.async_task_manager as task_manager
+from models.result import BaseResult
+
+
+def _clear_tasks():
+    task_manager._tasks.clear()
+
+
+def test_submit_task_uses_crockford_base32_id():
+    _clear_tasks()
+
+    async def scenario():
+        async def action():
+            return BaseResult(result=[{"ok": True}])
+
+        task_id = task_manager.submit_task(
+            action={"manager": "TestManager", "method": "read"},
+            coro_factory=action,
+        )
+        record = task_manager.get_task_record(task_id)
+        assert record is not None
+        assert len(task_id) == 8
+        assert all(ch in task_manager.TASK_ID_ALPHABET for ch in task_id)
+
+        while True:
+            record = task_manager.get_task_record(task_id)
+            if record and record.status in {"completed", "failed", "cancelled"}:
+                break
+            await asyncio.sleep(0.01)
+
+        assert task_manager.remove_task(task_id) is True
+
+    asyncio.run(scenario())
+
+
+def test_collision_policy_fails_after_ten_attempts(monkeypatch):
+    _clear_tasks()
+    task_manager._tasks["deadbeef"] = task_manager.TaskRecord(
+        task_id="deadbeef",
+        action={"manager": "TestManager", "method": "read"},
+        created_at=0.0,
+        last_updated_at=0.0,
+        time_to_live_ms=None,
+        status=task_manager.STATUS_PARKING,
+        status_message="seed",
+        status_info="seed",
+    )
+
+    monkeypatch.setattr(task_manager, "_generate_task_id", lambda: "deadbeef")
+
+    with pytest.raises(RuntimeError, match="Unable to allocate unique 8-char task id after 10 attempts."):
+        task_manager._allocate_task_id()
+
+
+def test_task_lookup_is_case_insensitive():
+    _clear_tasks()
+    now = 0.0
+    task_manager._tasks["7k2p9m4q"] = task_manager.TaskRecord(
+        task_id="7k2p9m4q",
+        action={"manager": "ExecutionManager", "method": "list"},
+        created_at=now,
+        last_updated_at=now,
+        time_to_live_ms=None,
+        status=task_manager.STATUS_WORKING,
+        status_message="running",
+        status_info="running",
+    )
+
+    assert task_manager.get_task_record("7K2P9M4Q") is not None
+    assert task_manager.remove_task("7K2P9M4Q") is True
+    assert task_manager.get_task_record("7k2p9m4q") is None
diff --git a/tests/test_batch_controls.py b/tests/test_batch_controls.py
@@ -40,11 +40,11 @@ def decorator(func):
 
 class TestBatchControls:
     def test_help_batch_respects_concurrency_limit(self, monkeypatch):
+        monkeypatch.setattr("tools.utils.MAX_BATCH_CONCURRENCY", 2)
         mcp = FakeMcp()
         register_help_tool(mcp, token=None)
         help_tool = mcp.tools[f"{TOOLS_PREFIX}_help"]
         HelpManager.help_tree = {}
-        monkeypatch.setattr(HelpManager, "MAX_BATCH_CONCURRENCY", 2)
 
         active_calls = {"current": 0, "max": 0}
 
@@ -60,20 +60,22 @@ async def slow_list_help_categories(self):
         monkeypatch.setattr(HelpManager, "list_help_categories", slow_list_help_categories)
 
         batch_calls = [{"action": "list_help_categories", "args": {}} for _ in range(6)]
-        result = asyncio.run(help_tool("batch", {"batch_calls": batch_calls}, ctx=None))
+        result = asyncio.run(
+            help_tool({"action": "batch", "batch_calls": batch_calls}, ctx=None)
+        )
 
         assert result.error is None
         assert active_calls["max"] <= 2
 
     def test_skills_batch_respects_concurrency_limit(self, monkeypatch):
+        monkeypatch.setattr("tools.utils.MAX_BATCH_CONCURRENCY", 2)
         mcp = FakeMcp()
         register_skills_tool(mcp, token=None)
         skills_tool = mcp.tools[f"{TOOLS_PREFIX}_skills"]
-        monkeypatch.setattr(SkillsManager, "MAX_BATCH_CONCURRENCY", 2)
 
         active_calls = {"current": 0, "max": 0}
 
-        async def slow_list_skills():
+        async def slow_list_skills(self):
             active_calls["current"] += 1
             active_calls["max"] = max(active_calls["max"], active_calls["current"])
             try:
@@ -82,10 +84,12 @@ async def slow_list_skills():
             finally:
                 active_calls["current"] -= 1
 
-        monkeypatch.setattr(SkillsManager, "list_skills", staticmethod(slow_list_skills))
+        monkeypatch.setattr(SkillsManager, "list_skills", slow_list_skills)
 
         batch_calls = [{"action": "list_skills", "args": {}} for _ in range(6)]
-        result = asyncio.run(skills_tool("batch", {"batch_calls": batch_calls}, ctx=None))
+        result = asyncio.run(
+            skills_tool({"action": "batch", "batch_calls": batch_calls}, ctx=None)
+        )
 
         assert result.error is None
         assert active_calls["max"] <= 2