Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ The framework exposes machine-readable docs in Next.js, with sitemap routes avai
- `/docs/<slug>.md`
- `/docs/<slug>` with `Accept: text/markdown`
- `/docs/<slug>` with `Signature-Agent`
- generated `robots.txt` via `docs robots generate`

The canonical API routes remain available under `/api/docs`, including `/api/docs?format=skill`,
`/api/docs/mcp`, and `/api/docs/agent/spec`.
Expand All @@ -138,6 +139,10 @@ For static hosting, run `pnpm exec docs sitemap generate` before your framework
`public/sitemap.xml`, `public/sitemap.md`, `public/.well-known/sitemap.md`, and the internal
`.farming-labs/sitemap-manifest.json` stay fresh.

Run `pnpm exec docs robots generate` when you also want a committed `robots.txt` policy. Existing
files are preserved by default; use `--append` to add the generated block or `--force` to replace
the file.

## Agent Compaction

Use `docs agent compact` when you want to generate or refresh sibling `agent.md` files from
Expand Down
100 changes: 98 additions & 2 deletions packages/docs/src/cli/doctor.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,75 @@ Use this docs site through markdown routes and MCP.
expect(report.checks.find((check) => check.id === "compact")?.status).toBe("pass");
});

it("checks the local robots.txt agent policy", async () => {
writePackageJson(tmpDir, "doctor-robots", { next: "16.0.0" });

writeFileSync(
path.join(tmpDir, "docs.config.ts"),
`export default {
entry: "docs",
llmsTxt: { enabled: true, baseUrl: "https://docs.example.com" },
sitemap: { enabled: true, baseUrl: "https://docs.example.com" },
robots: { enabled: true },
};`,
"utf-8",
);

writeFileSync(
path.join(tmpDir, "next.config.ts"),
`import { withDocs } from "@farming-labs/next/config";

export default withDocs({});
`,
"utf-8",
);

mkdirSync(path.join(tmpDir, "app", "api", "docs"), { recursive: true });
writeFileSync(
path.join(tmpDir, "app", "api", "docs", "route.ts"),
`import { createDocsAPI } from "@farming-labs/next/api";

export const { GET, POST } = createDocsAPI({});
`,
"utf-8",
);
mkdirSync(path.join(tmpDir, "public"), { recursive: true });
writeFileSync(
path.join(tmpDir, "public", "robots.txt"),
`User-agent: *
Allow: /
Allow: /llms.txt
Allow: /llms-full.txt
Allow: /sitemap.xml
Allow: /sitemap.md
Allow: /.well-known/sitemap.md
Allow: /.well-known/agent.json
Allow: /.well-known/agent
Allow: /skill.md
Allow: /mcp

User-agent: GPTBot
Allow: /

User-agent: ClaudeBot
Allow: /

User-agent: CCBot
Allow: /
`,
"utf-8",
);
writeDocsPage(tmpDir);
process.chdir(tmpDir);

const report = await inspectAgentReadiness();

expect(report.checks.find((check) => check.id === "robots")?.status).toBe("pass");
expect(report.checks.find((check) => check.id === "robots")?.detail).toContain(
"public/robots.txt",
);
});

it("probes hosted agent surfaces when --url is provided", async () => {
writePackageJson(tmpDir, "doctor-hosted", { next: "16.0.0" });

Expand Down Expand Up @@ -318,6 +387,32 @@ export const { GET, POST } = createDocsAPI({});
return;
}

if (url.pathname === "/robots.txt") {
res.writeHead(200, { "Content-Type": "text/plain" });
res.end(`User-agent: *
Allow: /
Allow: /llms.txt
Allow: /llms-full.txt
Allow: /sitemap.xml
Allow: /sitemap.md
Allow: /.well-known/sitemap.md
Allow: /.well-known/agent.json
Allow: /.well-known/agent
Allow: /skill.md
Allow: /mcp

User-agent: GPTBot
Allow: /

User-agent: ClaudeBot
Allow: /

User-agent: CCBot
Allow: /
`);
return;
}

if (url.pathname === "/skill.md" || url.pathname === "/.well-known/skill.md") {
res.writeHead(200, { "Content-Type": "text/markdown" });
res.end("# Skill\n\nUse MCP and markdown routes.");
Expand Down Expand Up @@ -424,12 +519,13 @@ export const { GET, POST } = createDocsAPI({});
const report = await inspectAgentReadiness({ url: `http://127.0.0.1:${port}` });

expect(report.url).toBe(`http://127.0.0.1:${port}`);
expect(report.maxScore).toBe(135);
expect(report.maxScore).toBe(145);
expect(report.checks.find((check) => check.id === "hosted-agent-discovery")?.status).toBe(
"pass",
);
expect(report.checks.find((check) => check.id === "hosted-llms")?.status).toBe("pass");
expect(report.checks.find((check) => check.id === "hosted-sitemap")?.status).toBe("pass");
expect(report.checks.find((check) => check.id === "hosted-robots")?.status).toBe("pass");
expect(report.checks.find((check) => check.id === "hosted-skill")?.status).toBe("pass");
expect(report.checks.find((check) => check.id === "hosted-markdown")?.status).toBe("pass");
expect(report.checks.find((check) => check.id === "hosted-mcp")?.status).toBe("pass");
Expand Down Expand Up @@ -529,7 +625,7 @@ Use this docs site through markdown routes and MCP.
process.chdir(tmpDir);
const report = await inspectAgentReadiness({ url: `http://127.0.0.1:${port}` });

expect(report.maxScore).toBe(135);
expect(report.maxScore).toBe(145);
expect(report.score).toBeGreaterThanOrEqual(90);
expect(report.grade).not.toBe("Agent-optimized");
expect(report.checks.find((check) => check.id === "hosted-agent-discovery")?.status).toBe(
Expand Down
196 changes: 194 additions & 2 deletions packages/docs/src/cli/doctor.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { existsSync, lstatSync, readdirSync, readFileSync } from "node:fs";
import path from "node:path";
import { LATEST_PROTOCOL_VERSION } from "@modelcontextprotocol/sdk/types";
import { LATEST_PROTOCOL_VERSION } from "@modelcontextprotocol/sdk/types.js";
import pc from "picocolors";
import {
DEFAULT_AGENT_FEEDBACK_ROUTE,
Expand All @@ -20,7 +20,12 @@ import {
DEFAULT_SITEMAP_XML_ROUTE,
resolveDocsSitemapConfig,
} from "../sitemap.js";
import type { DocsConfig, DocsMcpConfig, DocsSitemapConfig } from "../types.js";
import {
DEFAULT_ROBOTS_TXT_ROUTE,
analyzeDocsRobotsTxt,
resolveDocsRobotsConfig,
} from "../robots.js";
import type { DocsConfig, DocsMcpConfig, DocsRobotsConfig, DocsSitemapConfig } from "../types.js";
import {
extractNestedObjectLiteral,
extractTopLevelConfigObject,
Expand Down Expand Up @@ -393,6 +398,49 @@ function readSitemapConfigFromStatic(content: string): boolean | DocsSitemapConf
return config;
}

function readRobotsConfigFromStatic(content: string): boolean | DocsRobotsConfig | undefined {
const topLevelBoolean = readTopLevelBooleanProperty(content, "robots");
if (typeof topLevelBoolean === "boolean") return topLevelBoolean;

const block = extractNestedObjectLiteral(content, ["robots"]);
if (!block) return undefined;

const config: DocsRobotsConfig = {};
const enabled = readObjectBooleanProperty(block, "enabled");
const pathValue = block.match(/\bpath\s*:\s*["'`]([^"'`]+)["'`]/)?.[1];
const baseUrl = block.match(/\bbaseUrl\s*:\s*["'`]([^"'`]+)["'`]/)?.[1];
const aiString = block.match(/\bai\s*:\s*["'`](allow|disallow)["'`]/)?.[1] as
| "allow"
| "disallow"
| undefined;
const aiBoolean = readObjectBooleanProperty(block, "ai");

if (typeof enabled === "boolean") config.enabled = enabled;
if (pathValue) config.path = pathValue;
if (baseUrl) config.baseUrl = baseUrl;
if (aiString) config.ai = aiString;
else if (typeof aiBoolean === "boolean") config.ai = aiBoolean;

return config;
}

function resolvePublicDir(rootDir: string, framework: Framework | "unknown"): string {
if (framework === "sveltekit") return path.join(rootDir, "static");
return path.join(rootDir, "public");
}

function resolveRobotsFilePath(
rootDir: string,
framework: Framework | "unknown",
robots: DocsRobotsConfig | undefined,
): string {
if (robots?.path) {
return path.isAbsolute(robots.path) ? robots.path : path.resolve(rootDir, robots.path);
}

return path.join(resolvePublicDir(rootDir, framework), "robots.txt");
}

function resolveStaticExport(config: DocsConfig | undefined, content: string): boolean {
if (typeof config?.staticExport === "boolean") return config.staticExport;
return readTopLevelBooleanProperty(content, "staticExport") ?? false;
Expand Down Expand Up @@ -1117,6 +1165,53 @@ async function probeTextRoute(
}
}

async function probeRobotsRoute(baseUrl: string): Promise<{
ok: boolean;
status?: number;
detail: string;
body?: string;
}> {
const route = DEFAULT_ROBOTS_TXT_ROUTE;
const url = joinDoctorUrl(baseUrl, route);

try {
const response = await fetchWithTimeout(url, {
headers: {
Accept: "text/plain, */*",
},
});
const body = await response.text().catch(() => "");

if (!response.ok) {
return {
ok: false,
status: response.status,
detail: `${route} returned HTTP ${response.status}.`,
};
}

if (body.trim().length === 0) {
return {
ok: false,
status: response.status,
detail: `${route} returned an empty body.`,
};
}

return {
ok: true,
status: response.status,
body,
detail: `${route} returned HTTP ${response.status} with ${body.length} characters.`,
};
} catch (error) {
return {
ok: false,
detail: `${route} failed: ${error instanceof Error ? error.message : String(error)}.`,
};
}
}

async function probeJsonRoute(
baseUrl: string,
route: string,
Expand Down Expand Up @@ -1447,6 +1542,34 @@ async function buildHostedAgentChecks(
);
}

const robots = await probeRobotsRoute(baseUrl);
const robotsAnalysis = robots.body ? analyzeDocsRobotsTxt(robots.body) : undefined;
const robotsBlocked = robotsAnalysis?.blocksAgentRoutes || robotsAnalysis?.blocksAiAgents;
const robotsComplete = robotsAnalysis?.hasAgentRoutes && robotsAnalysis?.hasAiPolicy;
checks.push(
makeCheck(
"hosted-robots",
"Hosted robots.txt",
robots.ok && !robotsBlocked && robotsComplete
? "pass"
: robots.ok && !robotsBlocked
? "warn"
: "fail",
robots.ok && !robotsBlocked && robotsComplete ? 5 : robots.ok && !robotsBlocked ? 3 : 0,
5,
robots.ok
? robotsBlocked
? `${DEFAULT_ROBOTS_TXT_ROUTE} is reachable but blocks ${robotsAnalysis?.blocksAiAgents ? "common AI crawlers" : "agent-readable docs routes"}.`
: robotsComplete
? `${robots.detail} It advertises agent-readable routes and common AI crawler policy.`
: `${robots.detail} It is missing ${robotsAnalysis?.missingRoutes.length ? `agent routes (${robotsAnalysis.missingRoutes.join(", ")})` : "common AI crawler policy"}.`
: robots.detail,
robots.ok && !robotsBlocked && robotsComplete
? undefined
: "Publish an agent-friendly robots.txt with `docs robots generate`, or append the generated block to the existing file.",
),
);

const skill = await Promise.all([
probeTextRoute(baseUrl, DEFAULT_SKILL_MD_ROUTE),
probeTextRoute(baseUrl, DEFAULT_SKILL_MD_WELL_KNOWN_ROUTE),
Expand Down Expand Up @@ -1658,6 +1781,20 @@ export async function inspectAgentReadiness(
const sitemapConfig = resolveDocsSitemapConfig(
config?.sitemap ?? readSitemapConfigFromStatic(configContent) ?? false,
);
const robotsInput = config?.robots ?? readRobotsConfigFromStatic(configContent) ?? true;
const robotsConfig =
robotsInput === false
? resolveDocsRobotsConfig(false)
: resolveDocsRobotsConfig(robotsInput, {
baseUrl:
(typeof robotsInput === "object" ? robotsInput.baseUrl : undefined) ??
sitemapConfig.baseUrl,
});
const robotsPath = resolveRobotsFilePath(
rootDir,
framework,
typeof robotsInput === "object" ? robotsInput : undefined,
);
const feedbackRoute = DEFAULT_AGENT_FEEDBACK_ROUTE;
const feedbackSchemaRoute = `${feedbackRoute}/schema`;

Expand Down Expand Up @@ -1784,6 +1921,61 @@ export async function inspectAgentReadiness(
),
);

const relativeRobotsPath = path.relative(rootDir, robotsPath).replace(/\\/g, "/");
if (!robotsConfig.enabled) {
checks.push(
makeCheck(
"robots",
"Robots agent policy",
"warn",
0,
5,
"Robots generation is disabled in docs config.",
"Enable robots and run `docs robots generate` so crawlers can discover agent-readable docs routes.",
),
);
} else if (!existsSync(robotsPath)) {
checks.push(
makeCheck(
"robots",
"Robots agent policy",
"warn",
0,
5,
`No robots.txt found at ${relativeRobotsPath}.`,
`Run docs robots generate --path ${relativeRobotsPath} to publish an agent-friendly crawl policy.`,
),
);
} else {
const robots = readFileSync(robotsPath, "utf-8");
const analysis = analyzeDocsRobotsTxt(robots, {
entry,
sitemap: sitemapConfig,
baseUrl: robotsConfig.baseUrl,
robots: robotsConfig,
});
const blocked = analysis.blocksAgentRoutes || analysis.blocksAiAgents;
const complete = analysis.hasAgentRoutes && analysis.hasAiPolicy;

checks.push(
makeCheck(
"robots",
"Robots agent policy",
blocked ? "fail" : complete ? "pass" : "warn",
blocked ? 0 : complete ? 5 : 3,
5,
blocked
? `${relativeRobotsPath} blocks ${analysis.blocksAiAgents ? "common AI crawlers" : "agent-readable docs routes"}.`
: complete
? `${relativeRobotsPath} advertises agent-readable routes and common AI crawler policy.`
: `${relativeRobotsPath} exists, but is missing ${analysis.missingRoutes.length > 0 ? `agent routes (${analysis.missingRoutes.join(", ")})` : "common AI crawler policy"}.`,
blocked || !complete
? `Run docs robots generate --append --path ${relativeRobotsPath} to add the generated agent policy without replacing the existing file.`
: undefined,
),
);
}

checks.push(
skillFileExists
? makeCheck(
Expand Down
Loading
Loading