Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
The diff you're trying to view is too large. We only load the first 3000 changed files.
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -92,3 +92,10 @@ fordiff/

# Local SQL workspace
sql/

sandbox/server/backends/resources/mcp/mock_runtime/.env
sandbox/server/backends/resources/mcp/mock_runtime/certs/
sandbox/server/backends/resources/mcp/mock_runtime/logs/
sandbox/server/backends/resources/mcp/mock_runtime/run/

docs/superpowers
17 changes: 17 additions & 0 deletions configs/sandbox-server/coding_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"server": {
"url": "http://127.0.0.1:18890",
"port": 18890,
"session_ttl": 900
},
"resources": {
"code": {
"enabled": true,
"description": "Local coding backend for symbolic checks, Lean/Coq validation, bib processing, and plotting",
"backend_class": "sandbox.server.backends.resources.code.CodeBackend",
"config": {
"workspace_root": "${CODE_WORKSPACE_ROOT}"
}
}
}
}
10 changes: 4 additions & 6 deletions configs/sandbox-server/mcp_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,12 @@
"description": "Toolathlon-GYM MCP backend",
"backend_class": "sandbox.server.backends.resources.mcp.toolathlon_gym.ToolathlonGymBackend",
"config": {
"enabled_mcp_servers": ["filesystem", "terminal", "snowflake"],
"enabled_mcp_servers": ["excel", "filesystem", "memory", "pdf-tools", "playwright_with_chunk", "pptx", "terminal", "word", "canvas", "notion", "woocommerce"],
"workspace_root": "${TOOLATHLON_WORKSPACE_ROOT:-/tmp/agentflow_mcp}",
"env_overrides": {
"PGHOST": "${PGHOST:-toolathlon_pg}",
"PGPORT": "${PGPORT:-5432}",
"PGUSER": "${PGUSER:-eigent}",
"PGPASSWORD": "${PGPASSWORD:-camel}",
"PGDATABASE": "${PGDATABASE:-toolathlon_gym}"
"CANVAS_DOMAIN": "${AGENTFLOW_MCP_CANVAS_ENDPOINT:-127.0.0.1:38080}",
"BASE_URL": "${AGENTFLOW_MCP_NOTION_ENDPOINT:-http://127.0.0.1:38081}",
"WORDPRESS_SITE_URL": "${AGENTFLOW_MCP_WOOCOMMERCE_ENDPOINT:-http://127.0.0.1:38082}"
}
}
}
Expand Down
45 changes: 45 additions & 0 deletions configs/synthesis/coding.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
{
"model_name": "deepseek/deepseek-v4-flash",
"api_key": "${OPENROUTER_API_KEY}",
"base_url": "https://openrouter.ai/api/v1",
"max_depth": 12,
"branching_factor": 2,
"depth_threshold": 2,
"min_depth": 4,
"max_selected_traj": 3,
"path_similarity_threshold": 0.72,
"number_of_seed": null,
"sandbox_server_url": "http://127.0.0.1:18890",
"sandbox_auto_start": true,
"sandbox_config_path": "configs/sandbox-server/coding_config.json",
"sandbox_timeout": 300,
"available_tools": [
"code-*"
],
"sampling_tips": [
"You are exploring a local code repository, not a knowledge base.",
"Use only code tools to inspect files, search symbols, and run lightweight shell commands inside the workspace.",
"Prioritize repository structure, entrypoints, dependency files, configuration files, scripts, and tests.",
"Ground every conclusion in concrete evidence from files or command output.",
"Do not rely on outside knowledge or invent project behavior that is not supported by the repository."
],
"synthesis_tips": [
"We are training an assistant for repository-level coding tasks.",
"Generate realistic engineering questions that can be answered strictly from the explored repository.",
"Prefer questions about entrypoints, commands, configs, dependencies, file locations, and module relationships.",
"Answers should be short, factual, and grounded in trajectory evidence.",
"Avoid generic software trivia and avoid questions that require external documentation."
],
"seeds_file": "seeds/coding/coding.jsonl",
"output_dir": "results/coding",
"resource_types": [
"code"
],
"resource_init_configs": {
"code": {
"content": {
"source_dir": "${SOURCE_DIR}"
}
}
}
}
44 changes: 44 additions & 0 deletions configs/synthesis/mcp.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
{
"model_name": "deepseek/deepseek-v4-flash",
"api_key": "${OPENROUTER_API_KEY}",
"base_url": "https://openrouter.ai/api/v1",
"max_depth": 12,
"branching_factor": 2,
"depth_threshold": 2,
"min_depth": 4,
"max_selected_traj": 3,
"path_similarity_threshold": 0.72,
"number_of_seed": null,
"sandbox_server_url": "http://127.0.0.1:18890",
"sandbox_auto_start": true,
"sandbox_config_path": "configs/sandbox-server/mcp_config.json",
"sandbox_timeout": 300,
"available_tools": [
"mcp:canvas.*",
"mcp:filesystem.*",
"mcp:memory.*",
"mcp:pdf-tools.*",
"mcp:playwright_with_chunk.*",
"mcp:pptx.*",
"mcp:terminal.*",
"mcp:word.*",
"mcp:excel.*"
],
"sampling_tips": [
"Canvas communication for large courses is scattered across inbox conversations, announcements, discussion topics, course context, grades, submissions, and due-date signals. Teachers often need to answer repeated student questions, identify which messages require attention, and prepare targeted follow-up without manually scanning every course artifact or gradebook row.",
"They expect a context-aware communication assistant that summarizes Canvas inbox conversations and course discussion topics, drafts replies to common student questions using relevant course context, identifies students or groups who may need follow-up based on grades, late submissions, missing grading status, or upcoming deadlines, and prepares instructor-approved Canvas messages or announcements for targeted communication.",
"You have access to Canvas, a Learning Management System, which records 22 courses, 28,865 users, 32,663 enrollments, 206 assignments, 173,912 submissions and 77 quizzes. You can also use other tools provided.",
"We need mountains of training data that is diverse and challenging to train the agents help teachers meet their expectations."
],
"synthesis_tips": [
"Canvas communication for large courses is scattered across inbox conversations, announcements, discussion topics, course context, grades, submissions, and due-date signals. Teachers often need to answer repeated student questions, identify which messages require attention, and prepare targeted follow-up without manually scanning every course artifact or gradebook row.",
"They expect a context-aware communication assistant that summarizes Canvas inbox conversations and course discussion topics, drafts replies to common student questions using relevant course context, identifies students or groups who may need follow-up based on grades, late submissions, missing grading status, or upcoming deadlines, and prepares instructor-approved Canvas messages or announcements for targeted communication.",
"Please adopt the perspective of a teacher or a professor, and think about what kinds of questions you would ask a agent in the specific scenario, as well as what kind of answers would truly meet your needs.",
"We need mountains of training data that is diverse and challenging to train the agents help teachers meet their expectations."
],
"seeds_file": "seeds/mcp/seeds.jsonl",
"output_dir": "results/communication",
"resource_types": [
"mcp"
]
}
Loading