Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions .changeset/geo-surface-checks.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
---
"agentimization": patch
"@agentimization/core": patch
---

add six GEO surface checks and tighten meta/image parsing:

- new `https-enabled` check (url-stability, network-only) — flags non-https deployments
- new `meta-description` check (geo-signals) — requires `<meta name="description">` between 50 and 160 chars; treats whitespace-only content as missing
- new `open-graph-tags` check (geo-signals) — checks `og:title`, `og:description`, `og:image`, `og:url`; surfaces partial/none coverage in the message
- new `external-citations` check (geo-signals) — looks for at least two outbound links per page (remote mode only)
- new `substantial-text-content` check (page-size) — requires at least 100 words of body text per page
- new `image-alt-text` check (content-structure) — measures descriptive alt-text coverage and excludes decorative `alt=""` images from the denominator
- `extractMetaTags` lowercases keys so capitalized meta names (`<meta name="Description">`) are matched
- `extractImages` quote-aware attribute reader preserves apostrophes inside double-quoted alt values
- cli now derives `networkSkipped` from `ALL_CHECKS.length` instead of a hard-coded constant
4 changes: 2 additions & 2 deletions apps/cli/src/ui/app.tsx
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { useState, useEffect } from "react"
import { Box, Text } from "ink"
import { audit, auditLocal } from "@agentimization/core"
import { audit, auditLocal, ALL_CHECKS } from "@agentimization/core"
import type { AuditResult, AuditEvent, CheckCategory } from "@agentimization/shared"
import { HeroCard, dim, FRAME_INNER_FACTOR } from "./hero-card.js"
import { ResultCard, RESULT_CARD_WIDTH } from "./result-card.js"
Expand Down Expand Up @@ -67,7 +67,7 @@ export const App = ({
: await audit(target, config)

if (isLocal) {
setNetworkSkipped(35 - res.summary.total)
setNetworkSkipped(ALL_CHECKS.length - res.summary.total)
}

setResult(res)
Expand Down
29 changes: 29 additions & 0 deletions packages/core/src/__tests__/audit.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,35 @@ describe("auditLocal", () => {
expect(agentsMd!.status).toBe("fail")
})

it("runs the new GEO surface checks on good-site", async () => {
const result = await auditLocal(resolve(FIXTURES, "good-site"))

const metaDescription = result.checks.find((c) => c.id === "meta-description")
expect(metaDescription?.status).toBe("pass")

const openGraph = result.checks.find((c) => c.id === "open-graph-tags")
expect(openGraph?.status).toBe("pass")

const substantial = result.checks.find((c) => c.id === "substantial-text-content")
expect(substantial?.status).toBe("pass")

const altText = result.checks.find((c) => c.id === "image-alt-text")
expect(altText?.status).toBe("pass")
})

it("flags missing GEO surface signals on bad-site", async () => {
const result = await auditLocal(resolve(FIXTURES, "bad-site"))

const metaDescription = result.checks.find((c) => c.id === "meta-description")
expect(metaDescription?.status).toBe("fail")

const openGraph = result.checks.find((c) => c.id === "open-graph-tags")
expect(openGraph?.status).toBe("fail")

const substantial = result.checks.find((c) => c.id === "substantial-text-content")
expect(substantial?.status).toBe("fail")
})

it("emits events when onEvent is provided", async () => {
const events: string[] = []

Expand Down
12 changes: 10 additions & 2 deletions packages/core/src/__tests__/fixtures/good-site/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@
<html>
<head>
<title>Good Site</title>
<meta name="description" content="A well-structured documentation site designed for both humans and AI agents, with comprehensive guides, API references, and code examples.">
<meta property="og:title" content="Good Site">
<meta property="og:description" content="A well-structured documentation site designed for both humans and AI agents.">
<meta property="og:image" content="https://good-site.com/og.png">
<meta property="og:url" content="https://good-site.com/">
<link rel="canonical" href="https://good-site.com/">
<script type="application/ld+json">{"@context":"https://schema.org","@type":"WebSite","name":"Good Site","url":"https://good-site.com"}</script>
</head>
Expand All @@ -10,11 +15,14 @@
<h1>Welcome to Good Site</h1>
<p>This is a well-structured documentation site designed for both humans and AI agents. It includes comprehensive guides, API references, and code examples that make it easy to understand and integrate with our platform.</p>
<h2>Getting Started</h2>
<p>Follow our quickstart guide to get up and running in minutes. We support multiple programming languages and frameworks.</p>
<p>Follow our quickstart guide to get up and running in minutes. We support multiple programming languages and frameworks including TypeScript, Python, Ruby, and Go. Our documentation is written in plain English and avoids unnecessary jargon so that both new developers and seasoned engineers can navigate it without friction.</p>
<h2>API Reference</h2>
<p>Complete API documentation with examples for every endpoint.</p>
<p>Complete API documentation with examples for every endpoint. We follow REST conventions and return predictable JSON responses. Every error includes a stable error code, a human-readable message, and a link to the relevant section of the documentation.</p>
<img src="/diagram.png" alt="Architecture diagram showing how the Good Site SDK communicates with the API">
<a href="/docs/getting-started">Getting Started</a>
<a href="/docs/api">API Reference</a>
<a href="https://schema.org">schema.org</a>
<a href="https://llmstxt.org">llms.txt spec</a>
</main>
</body>
</html>
26 changes: 26 additions & 0 deletions packages/core/src/__tests__/html.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { describe, it, expect } from "vitest"
import {
stripHtml,
extractHeadings,
extractImages,
extractJsonLd,
extractCodeFences,
hasServerRenderedContent,
Expand Down Expand Up @@ -41,6 +42,31 @@ describe("extractHeadings", () => {
})
})

describe("extractImages", () => {
it("extracts src and alt", () => {
const html = `<img src="/a.png" alt="Alpha"><img src="/b.png" alt="">`
expect(extractImages(html)).toEqual([
{ src: "/a.png", alt: "Alpha" },
{ src: "/b.png", alt: "" },
])
})

it("returns undefined alt when missing", () => {
const html = `<img src="/x.png">`
expect(extractImages(html)).toEqual([{ src: "/x.png", alt: undefined }])
})

it("skips img tags with no src", () => {
const html = `<img alt="no source"><img src="/y.png" alt="ok">`
expect(extractImages(html)).toEqual([{ src: "/y.png", alt: "ok" }])
})

it("preserves apostrophes inside double-quoted alt", () => {
const html = `<img src="/x.png" alt="don't stop">`
expect(extractImages(html)).toEqual([{ src: "/x.png", alt: "don't stop" }])
})
})

describe("extractJsonLd", () => {
it("parses valid JSON-LD", () => {
const html = `<script type="application/ld+json">{"@type":"WebSite","name":"Test"}</script>`
Expand Down
76 changes: 75 additions & 1 deletion packages/core/src/checks/content-structure.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import type { CheckDefinition } from "@agentimization/shared"
import { extractHeadings, extractCodeFences } from "../utils/html.js"
import { extractHeadings, extractCodeFences, extractImages } from "../utils/html.js"

/** Check for properly closed code fences in markdown */
const markdownCodeFenceValidity: CheckDefinition = {
Expand Down Expand Up @@ -197,8 +197,82 @@ const tabbedContentSerialization: CheckDefinition = {
},
}

/** Check image alt text coverage (≥50% of images should have alt) */
const imageAltText: CheckDefinition = {
id: "image-alt-text",
name: "Image Alt Text Coverage",
category: "content-structure",
description: "Checks that at least 50% of images have descriptive alt text",
weight: 0.5,
run: async (ctx) => {
const pages = ctx.sampledPages.slice(0, 10)
if (pages.length === 0) {
return {
id: "image-alt-text",
name: "Image Alt Text Coverage",
category: "content-structure",
status: "skip",
message: "No pages sampled",
}
}

const allImages = pages.flatMap((p) => extractImages(p.html))
// alt="" is the WCAG-recommended marker for decorative imagery — neither a pass nor a fail; exclude from the ratio
const contentImages = allImages.filter((img) => img.alt === undefined || img.alt.trim().length > 0)
const decorativeImages = allImages.length - contentImages.length
const withAlt = contentImages.filter((img) => img.alt !== undefined && img.alt.trim().length > 0).length

if (allImages.length === 0) {
return {
id: "image-alt-text",
name: "Image Alt Text Coverage",
category: "content-structure",
status: "info",
message: `No images found across ${pages.length} sampled pages`,
}
}

if (contentImages.length === 0) {
return {
id: "image-alt-text",
name: "Image Alt Text Coverage",
category: "content-structure",
status: "info",
message: `All ${allImages.length} sampled images are decorative (alt="")`,
metadata: { decorativeImages, totalImages: allImages.length },
}
}

const ratio = withAlt / contentImages.length
const pct = Math.round(ratio * 100)
const summary = `${withAlt}/${contentImages.length} content images have descriptive alt text (${pct}%)${decorativeImages > 0 ? `; ${decorativeImages} decorative skipped` : ""}`

if (ratio >= 0.5) {
return {
id: "image-alt-text",
name: "Image Alt Text Coverage",
category: "content-structure",
status: "pass",
message: summary,
metadata: { withAlt, contentImages: contentImages.length, decorativeImages, pct },
}
}

return {
id: "image-alt-text",
name: "Image Alt Text Coverage",
category: "content-structure",
status: ratio >= 0.25 ? "warn" : "fail",
message: summary,
suggestion: "Add descriptive alt text to at least 50% of content images. AI agents and screen readers rely on alt text to understand visual content. Mark purely decorative images with alt=\"\" so they don't dilute the ratio.",
metadata: { withAlt, contentImages: contentImages.length, decorativeImages, pct },
}
},
}

export const contentStructureChecks: CheckDefinition[] = [
markdownCodeFenceValidity,
sectionHeaderQuality,
tabbedContentSerialization,
imageAltText,
]
Loading
Loading