diff --git a/scripts/fetch-benchmarks.ts b/scripts/fetch-benchmarks.ts index 7aaa77b7a..3440bf201 100644 --- a/scripts/fetch-benchmarks.ts +++ b/scripts/fetch-benchmarks.ts @@ -187,6 +187,149 @@ function formatBenchmark(bench: { old: string; new: string }): { oldTime: string }; } +function formatSeconds(seconds: number): string { + if (seconds >= 60) { + const m = Math.floor(seconds / 60); + const s = seconds - m * 60; + return `${m}m ${s.toFixed(1)}s`; + } + if (seconds >= 10) return `${seconds.toFixed(1)}s`; + if (seconds >= 1) return `${seconds.toFixed(2)}s`; + return `${seconds.toFixed(3)}s`; +} + +interface CategoryHighlight { + label: string; + key: keyof Omit; + repo: string; + oldStr: string; + newStr: string; + oldSec: number | null; + newSec: number | null; + absDelta: number | null; + pctDelta: number | null; +} + +function pickMostImpactful( + data: Map, + key: keyof Omit, + label: string +): CategoryHighlight | null { + let bestByWallclock: CategoryHighlight | null = null; + let bestByRelative: CategoryHighlight | null = null; + + for (const [repo, benchData] of data) { + const bench = benchData[key]; + if (!bench || bench.old === '–' || bench.old === '-' || bench.new === '–' || bench.new === '-') continue; + + const oldSec = parseTimeToSeconds(bench.old); + const newSec = parseTimeToSeconds(bench.new); + if (oldSec === null || newSec === null) continue; + + const absDelta = oldSec - newSec; // positive => improvement + const pctDelta = (absDelta / oldSec) * 100; // positive => improvement + + const candidate: CategoryHighlight = { + label, + key, + repo, + oldStr: bench.old.replace(/\s+s/g, 's'), + newStr: bench.new.replace(/\s+s/g, 's'), + oldSec, + newSec, + absDelta, + pctDelta, + }; + + if (!bestByWallclock || Math.abs(absDelta) > Math.abs(bestByWallclock.absDelta!)) { + bestByWallclock = candidate; + } + if (!bestByRelative || Math.abs(pctDelta) > Math.abs(bestByRelative.pctDelta!)) { + bestByRelative = candidate; + } + } + + // Prefer wallclock if any candidate had a meaningful (>= 0.1s) absolute change. + if (bestByWallclock && Math.abs(bestByWallclock.absDelta!) >= 0.1) return bestByWallclock; + return bestByRelative; +} + +function generateBenchmarkBarGraph(data: Map): string { + const categories: { key: keyof Omit; label: string }[] = [ + { key: 'forgeTest', label: 'Forge Test' }, + { key: 'forgeFuzzTest', label: 'Forge Fuzz Test' }, + { key: 'forgeTestIsolated', label: 'Forge Test (Isolated)' }, + { key: 'forgeBuildNoCache', label: 'Forge Build (No Cache)' }, + { key: 'forgeBuildWithCache', label: 'Forge Build (With Cache)' }, + { key: 'forgeCoverage', label: 'Forge Coverage' }, + ]; + + const highlights: CategoryHighlight[] = []; + for (const { key, label } of categories) { + const h = pickMostImpactful(data, key, label); + if (h) highlights.push(h); + } + if (highlights.length === 0) return ''; + + // Scale all bars relative to the largest "old" wallclock so widths are comparable across rows. + const maxOldSec = Math.max(...highlights.map(h => h.oldSec ?? 0)); + + let mdx = ''; + mdx += `
\n`; + mdx += `

Highlights

\n`; + mdx += `

Most impactful change per benchmark category (by wallclock time, falling back to relative change).

\n`; + mdx += `
\n`; + + for (const h of highlights) { + const improved = (h.absDelta ?? 0) > 0; + const regressed = (h.absDelta ?? 0) < 0; + const newColor = improved ? '#22c55e' : regressed ? '#ef4444' : 'var(--vocs-color_text3)'; + const newBg = improved ? 'rgba(34, 197, 94, 0.85)' : regressed ? 'rgba(239, 68, 68, 0.85)' : 'rgba(148, 163, 184, 0.6)'; + const oldBg = 'rgba(148, 163, 184, 0.35)'; + + const oldWidthPct = maxOldSec > 0 ? (h.oldSec! / maxOldSec) * 100 : 0; + const newWidthPct = maxOldSec > 0 ? (h.newSec! / maxOldSec) * 100 : 0; + + const arrow = improved ? '↓' : regressed ? '↑' : ''; + const pctDisplay = `${arrow}${Math.abs(h.pctDelta!).toFixed(1)}%`; + const deltaSign = improved ? '−' : regressed ? '+' : ''; + const deltaDisplay = `${deltaSign}${formatSeconds(Math.abs(h.absDelta!))}`; + const repoUrl = getRepositoryUrl(h.repo); + + mdx += `
\n`; + mdx += `
\n`; + mdx += `
${h.label} · ${h.repo}
\n`; + mdx += `
\n`; + mdx += ` ${deltaDisplay}\n`; + mdx += ` ${pctDisplay}\n`; + mdx += `
\n`; + mdx += `
\n`; + mdx += `
\n`; + // Old bar + mdx += `
\n`; + mdx += ` baseline\n`; + mdx += `
\n`; + mdx += `
\n`; + mdx += `
\n`; + mdx += ` ${h.oldStr}\n`; + mdx += `
\n`; + // New bar + mdx += `
\n`; + mdx += ` latest\n`; + mdx += `
\n`; + mdx += `
\n`; + mdx += `
\n`; + mdx += ` ${h.newStr}\n`; + mdx += `
\n`; + mdx += `
\n`; + mdx += `
\n`; + } + + mdx += `
\n`; + mdx += `
\n`; + return mdx; +} + function generateBenchmarkCards(data: Map): string { const benchmarkTypes = [ { key: 'forgeTest', label: 'Test' }, @@ -293,6 +436,8 @@ async function main() { let output = `{/* Auto-generated from ${benchmarkDate}. Do not edit manually. */}\n\n`; output += `# Benchmarks\n\n`; output += `Performance comparison between Foundry releases.\n\n`; + output += generateBenchmarkBarGraph(benchmarkData); + output += `\n`; output += `
\n`; output += `
\n`; output += ` Baseline\n`;