OpenDCAI · Dingxingdi · May 3, 2026 · May 5, 2026 · May 5, 2026 · May 5, 2026
diff --git a/.gitignore b/.gitignore
@@ -92,3 +92,10 @@ fordiff/
 
 # Local SQL workspace
 sql/
+
+sandbox/server/backends/resources/mcp/mock_runtime/.env
+sandbox/server/backends/resources/mcp/mock_runtime/certs/
+sandbox/server/backends/resources/mcp/mock_runtime/logs/
+sandbox/server/backends/resources/mcp/mock_runtime/run/
+
+docs/superpowers
diff --git a/configs/sandbox-server/coding_config.json b/configs/sandbox-server/coding_config.json
@@ -0,0 +1,17 @@
+{
+  "server": {
+    "url": "http://127.0.0.1:18890",
+    "port": 18890,
+    "session_ttl": 900
+  },
+  "resources": {
+    "code": {
+      "enabled": true,
+      "description": "Local coding backend for symbolic checks, Lean/Coq validation, bib processing, and plotting",
+      "backend_class": "sandbox.server.backends.resources.code.CodeBackend",
+      "config": {
+        "workspace_root": "${CODE_WORKSPACE_ROOT}"
+      }
+    }
+  }
+}
diff --git a/configs/sandbox-server/mcp_config.json b/configs/sandbox-server/mcp_config.json
@@ -10,14 +10,12 @@
       "description": "Toolathlon-GYM MCP backend",
       "backend_class": "sandbox.server.backends.resources.mcp.toolathlon_gym.ToolathlonGymBackend",
       "config": {
-        "enabled_mcp_servers": ["filesystem", "terminal", "snowflake"],
+        "enabled_mcp_servers": ["excel", "filesystem", "memory", "pdf-tools", "playwright_with_chunk", "pptx", "terminal", "word", "canvas", "notion", "woocommerce"],
         "workspace_root": "${TOOLATHLON_WORKSPACE_ROOT:-/tmp/agentflow_mcp}",
         "env_overrides": {
-          "PGHOST": "${PGHOST:-toolathlon_pg}",
-          "PGPORT": "${PGPORT:-5432}",
-          "PGUSER": "${PGUSER:-eigent}",
-          "PGPASSWORD": "${PGPASSWORD:-camel}",
-          "PGDATABASE": "${PGDATABASE:-toolathlon_gym}"
+          "CANVAS_DOMAIN": "${AGENTFLOW_MCP_CANVAS_ENDPOINT:-127.0.0.1:38080}",
+          "BASE_URL": "${AGENTFLOW_MCP_NOTION_ENDPOINT:-http://127.0.0.1:38081}",
+          "WORDPRESS_SITE_URL": "${AGENTFLOW_MCP_WOOCOMMERCE_ENDPOINT:-http://127.0.0.1:38082}"
         }
       }
     }

diff --git a/configs/synthesis/coding.json b/configs/synthesis/coding.json
@@ -0,0 +1,45 @@
+{
+  "model_name": "deepseek/deepseek-v4-flash",
+  "api_key": "${OPENROUTER_API_KEY}",
+  "base_url": "https://openrouter.ai/api/v1",
+  "max_depth": 12,
+  "branching_factor": 2,
+  "depth_threshold": 2,
+  "min_depth": 4,
+  "max_selected_traj": 3,
+  "path_similarity_threshold": 0.72,
+  "number_of_seed": null,
+  "sandbox_server_url": "http://127.0.0.1:18890",
+  "sandbox_auto_start": true,
+  "sandbox_config_path": "configs/sandbox-server/coding_config.json",
+  "sandbox_timeout": 300,
+  "available_tools": [
+    "code-*"
+  ],
+  "sampling_tips": [
+    "You are exploring a local code repository, not a knowledge base.",
+    "Use only code tools to inspect files, search symbols, and run lightweight shell commands inside the workspace.",
+    "Prioritize repository structure, entrypoints, dependency files, configuration files, scripts, and tests.",
+    "Ground every conclusion in concrete evidence from files or command output.",
+    "Do not rely on outside knowledge or invent project behavior that is not supported by the repository."
+  ],
+  "synthesis_tips": [
+    "We are training an assistant for repository-level coding tasks.",
+    "Generate realistic engineering questions that can be answered strictly from the explored repository.",
+    "Prefer questions about entrypoints, commands, configs, dependencies, file locations, and module relationships.",
+    "Answers should be short, factual, and grounded in trajectory evidence.",
+    "Avoid generic software trivia and avoid questions that require external documentation."
+  ],
+  "seeds_file": "seeds/coding/coding.jsonl",
+  "output_dir": "results/coding",
+  "resource_types": [
+    "code"
+  ],
+  "resource_init_configs": {
+    "code": {
+       "content": {
+         "source_dir": "${SOURCE_DIR}"
+       }
+    }
+  }
+}
diff --git a/configs/synthesis/mcp.json b/configs/synthesis/mcp.json
@@ -0,0 +1,44 @@
+{
+  "model_name": "deepseek/deepseek-v4-flash",
+  "api_key": "${OPENROUTER_API_KEY}",
+  "base_url": "https://openrouter.ai/api/v1",
+  "max_depth": 12,
+  "branching_factor": 2,
+  "depth_threshold": 2,
+  "min_depth": 4,
+  "max_selected_traj": 3,
+  "path_similarity_threshold": 0.72,
+  "number_of_seed": null,
+  "sandbox_server_url": "http://127.0.0.1:18890",
+  "sandbox_auto_start": true,
+  "sandbox_config_path": "configs/sandbox-server/mcp_config.json",
+  "sandbox_timeout": 300,
+  "available_tools": [
+    "mcp:canvas.*",
+    "mcp:filesystem.*",
+    "mcp:memory.*",
+    "mcp:pdf-tools.*",
+    "mcp:playwright_with_chunk.*",
+    "mcp:pptx.*",
+    "mcp:terminal.*",
+    "mcp:word.*",
+    "mcp:excel.*"
+  ],
+  "sampling_tips": [
+    "Canvas communication for large courses is scattered across inbox conversations, announcements, discussion topics, course context, grades, submissions, and due-date signals. Teachers often need to answer repeated student questions, identify which messages require attention, and prepare targeted follow-up without manually scanning every course artifact or gradebook row.",
+    "They expect a context-aware communication assistant that summarizes Canvas inbox conversations and course discussion topics, drafts replies to common student questions using relevant course context, identifies students or groups who may need follow-up based on grades, late submissions, missing grading status, or upcoming deadlines, and prepares instructor-approved Canvas messages or announcements for targeted communication.",
+    "You have access to Canvas, a Learning Management System, which records 22 courses, 28,865 users, 32,663 enrollments, 206 assignments, 173,912 submissions and 77 quizzes. You can also use other tools provided.",
+    "We need mountains of training data that is diverse and challenging to train the agents help teachers meet their expectations."
+  ],
+  "synthesis_tips": [
+    "Canvas communication for large courses is scattered across inbox conversations, announcements, discussion topics, course context, grades, submissions, and due-date signals. Teachers often need to answer repeated student questions, identify which messages require attention, and prepare targeted follow-up without manually scanning every course artifact or gradebook row.",
+    "They expect a context-aware communication assistant that summarizes Canvas inbox conversations and course discussion topics, drafts replies to common student questions using relevant course context, identifies students or groups who may need follow-up based on grades, late submissions, missing grading status, or upcoming deadlines, and prepares instructor-approved Canvas messages or announcements for targeted communication.",
+    "Please adopt the perspective of a teacher or a professor, and think about what kinds of questions you would ask a agent in the specific scenario, as well as what kind of answers would truly meet your needs.",
+    "We need mountains of training data that is diverse and challenging to train the agents help teachers meet their expectations."
+  ],
+  "seeds_file": "seeds/mcp/seeds.jsonl",
+  "output_dir": "results/communication",
+  "resource_types": [
+    "mcp"
+  ]
+}