diff --git a/.env.example b/.env.example
index 8d4b4d9..e6033e3 100644
--- a/.env.example
+++ b/.env.example
@@ -14,3 +14,8 @@ COPYSCAPE_KEY=
# Enables passage-level evidence — shows exactly which sentences were copied.
# Get your key at: https://platform.parallel.ai
PARALLEL_API_KEY=
+
+# ── OpenAlex — academic citations provider (default, recommended) ────────────
+# Free service; the mailto identifies your client for the polite pool
+# (100k requests/day). No API key required.
+OPENALEX_MAILTO=your-email@example.com
diff --git a/README.md b/README.md
index 1d3b957..7689286 100644
--- a/README.md
+++ b/README.md
@@ -25,7 +25,7 @@ Every flagged issue ships with evidence + rewrite + citation:
- **Fact-check** now carries `sources[]` (Exa highlights with url/title/quote) on every finding. Upgrade to deep-reasoning with `--deep-fact-check`.
- **Grammar & Style** (LanguageTool + LLM fallback) produces a `rewrite` per finding. LLM-fallback rewrites are grammar-checked a second time to catch mechanical errors.
-- **Academic Citations** (Semantic Scholar) merges citations onto matching fact-check findings with scientific/medical/financial claim types. Free, no API key.
+- **Academic Citations** (OpenAlex recommended, Semantic Scholar legacy) merges citations onto matching fact-check findings with scientific/medical/financial claim types. Free, no API key — see [Academic Citations](#academic-citations) below.
- **Self-Plagiarism** (Cloudflare Vectorize + OpenRouter embeddings) flags overlap with your past articles. Run `checkapp index
` once to ingest your archive.
Pick a provider per skill from the Settings → Providers dashboard. CheckApp never holds API tokens — users bring their own keys.
@@ -44,7 +44,7 @@ See [docs/security.md](docs/security.md) for the BYOK-alpha threat model.
| **AI Detection** | Copyscape | ~$0.09 | ✅ |
| **SEO** | Offline (no API) | free | ✅ |
| **Grammar & Style** | LanguageTool + LLM fallback | free tier / ~$0.002 | ✅ (free tier) |
-| **Academic Citations** | Semantic Scholar | free | ✅ |
+| **Academic Citations** | OpenAlex (default) / Semantic Scholar (legacy) | free | ✅ |
| **Self-Plagiarism** | Cloudflare Vectorize + OpenRouter embeddings | ~$0.0001 | ❌ requires index (`checkapp index `) |
| **Fact Check** | Tiered: Basic = Exa + LLM; Standard = Gemini + Google Search; Deep Audit = Gemini Deep Research | varies | Basic is available by default; Standard is opt-in; Deep Audit is async |
| **Tone of Voice** | Claude/MiniMax | ~$0.002 | ❌ requires LLM key + tone guide file |
@@ -55,6 +55,16 @@ See [docs/security.md](docs/security.md) for the BYOK-alpha threat model.
All enabled skills run in parallel. Adding more skills does not increase total time significantly.
+### Academic Citations
+
+CheckApp finds peer-reviewed supporting papers for scientific, medical, and financial claims.
+
+**Recommended provider: OpenAlex.** Free, ~250M indexed works, no API key required. Set `OPENALEX_MAILTO=your@email.com` in your `.env` to enable it — this both activates the routing (skill is skipped if unset and no explicit provider is configured) and joins the polite pool (100k req/day).
+
+**Legacy provider: Semantic Scholar.** Users with an explicit `providers.academic = { provider: "semantic-scholar" }` config continue to hit SS. Note: the free tier of SS has aggressive per-IP rate limiting and is effectively unusable on shared IPs — that's why OpenAlex is the new default. Authenticated (paid) SS requests are not currently wired in the client; support for a paid SS API key is a separate workstream.
+
+See `poc-replacement/03-academic-citations/RESULTS.md` for the comparison data that drove this decision.
+
---
## Fact-Check Tiers
diff --git a/dashboard/src/lib/providers.ts b/dashboard/src/lib/providers.ts
index 0ecfe5f..ede0422 100644
--- a/dashboard/src/lib/providers.ts
+++ b/dashboard/src/lib/providers.ts
@@ -12,7 +12,7 @@ export type ProviderId =
| "gemini-grounded" | "gemini-deep-research"
| "languagetool" | "languagetool-selfhosted" | "sapling" | "llm-fallback"
| "copyscape" | "originality"
- | "semantic-scholar"
+ | "semantic-scholar" | "openalex"
| "cloudflare-vectorize" | "pinecone" | "upstash-vector";
export interface SkillProviderConfig {
@@ -49,6 +49,7 @@ export const PROVIDER_REGISTRY: Partial> = {
],
academic: [
{ id: "semantic-scholar", label: "Semantic Scholar", speed: "medium", costPerCheckUsd: 0, costLabel: "free", depth: "standard", freeTier: true, requiresKey: false, endpoint: "https://api.semanticscholar.org/graph/v1/paper/search" },
+ { id: "openalex", label: "OpenAlex", speed: "fast", costPerCheckUsd: 0, costLabel: "Free (polite pool with mailto)", depth: "standard", freeTier: true, requiresKey: false, endpoint: "https://api.openalex.org/works" },
],
"self-plagiarism": [
{ id: "cloudflare-vectorize", label: "Cloudflare Vectorize", speed: "fast", costPerCheckUsd: 0.0001, costLabel: "$0.01/1M vectors", depth: "standard", freeTier: true, requiresKey: true },
diff --git a/package.json b/package.json
index d3809b1..3c58468 100644
--- a/package.json
+++ b/package.json
@@ -42,7 +42,7 @@
"setup": "bun src/index.tsx --setup",
"build": "bash build.sh",
"dev": "bun --watch src/index.tsx",
- "test": "bun test src/*.test.ts src/skills/*.test.ts src/utils/*.test.ts src/providers/*.test.ts src/cli/*.test.ts src/cost/*.test.ts tests/e2e/*.test.ts",
+ "test": "bun test src/*.test.ts src/skills/*.test.ts src/utils/*.test.ts src/providers/*.test.ts src/cli/*.test.ts src/cost/*.test.ts tests/e2e/*.test.ts tests/golden/*.test.ts",
"test:dashboard": "cd dashboard && bunx vitest run",
"test:e2e:browser": "bun test tests/e2e/browser/*.test.ts",
"test:e2e:live": "bun test tests/e2e/live/*.test.ts",
diff --git a/src/config.test.ts b/src/config.test.ts
index 9ee94db..eb0bdcb 100644
--- a/src/config.test.ts
+++ b/src/config.test.ts
@@ -1,4 +1,4 @@
-import { describe, expect, it } from "bun:test";
+import { describe, expect, it, test } from "bun:test";
import { readConfig } from "./config.ts";
describe("readConfig", () => {
@@ -8,4 +8,27 @@ describe("readConfig", () => {
expect(cfg.skills).toHaveProperty("academic", false);
expect(cfg.skills).toHaveProperty("selfPlagiarism", false);
});
+
+ test("loads OPENALEX_MAILTO from env", () => {
+ const saved = process.env.OPENALEX_MAILTO;
+ process.env.OPENALEX_MAILTO = "research@example.com";
+ try {
+ const config = readConfig();
+ expect(config.openalexMailto).toBe("research@example.com");
+ } finally {
+ if (saved === undefined) delete process.env.OPENALEX_MAILTO;
+ else process.env.OPENALEX_MAILTO = saved;
+ }
+ });
+
+ test("openalexMailto is undefined when env unset", () => {
+ const saved = process.env.OPENALEX_MAILTO;
+ delete process.env.OPENALEX_MAILTO;
+ try {
+ const config = readConfig();
+ expect(config.openalexMailto).toBeUndefined();
+ } finally {
+ if (saved !== undefined) process.env.OPENALEX_MAILTO = saved;
+ }
+ });
});
diff --git a/src/config.ts b/src/config.ts
index 13abbe5..e5167ab 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -28,6 +28,7 @@ export interface Config {
minimaxApiKey?: string;
openrouterApiKey?: string;
geminiApiKey?: string;
+ openalexMailto?: string;
llmProvider?: "minimax" | "anthropic" | "openrouter" | "gemini";
factCheckTier?: "basic" | "standard" | "premium";
factCheckTierFlag?: boolean;
@@ -109,6 +110,7 @@ export function readConfig(): Config {
minimaxApiKey: process.env.MINIMAX_API_KEY ?? file.minimaxApiKey,
openrouterApiKey: process.env.OPENROUTER_API_KEY ?? file.openrouterApiKey,
geminiApiKey: process.env.GEMINI_API_KEY ?? file.geminiApiKey,
+ openalexMailto: process.env.OPENALEX_MAILTO ?? file.openalexMailto,
llmProvider: (() => {
const validProviders = ["minimax", "anthropic", "openrouter", "gemini"];
const rawProvider = process.env.LLM_PROVIDER ?? file.llmProvider;
diff --git a/src/providers/openalex.test.ts b/src/providers/openalex.test.ts
new file mode 100644
index 0000000..038e9e9
--- /dev/null
+++ b/src/providers/openalex.test.ts
@@ -0,0 +1,100 @@
+import { describe, test, expect } from "bun:test";
+import { oaSearch } from "./openalex.ts";
+import { mockFetch, jsonResponse } from "../testing/mock-fetch.ts";
+
+describe("oaSearch", () => {
+ test("returns papers in SSPaper shape", async () => {
+ mockFetch(async () => jsonResponse({
+ results: [
+ {
+ id: "https://openalex.org/W123",
+ doi: "https://doi.org/10.1136/bmj.i6583",
+ title: "Vitamin D supplementation to prevent acute respiratory tract infections",
+ publication_year: 2017,
+ authorships: [{ author: { display_name: "Martineau AR" } }, { author: { display_name: "Jolliffe DA" } }],
+ primary_location: { landing_page_url: "https://www.bmj.com/content/356/bmj.i6583" },
+ },
+ ],
+ }));
+ const papers = await oaSearch("vitamin d respiratory", 5, { mailto: "me@example.com" });
+ expect(papers).toHaveLength(1);
+ expect(papers[0].title).toContain("Vitamin D supplementation");
+ expect(papers[0].year).toBe(2017);
+ expect(papers[0].authors).toHaveLength(2);
+ expect(papers[0].authors[0].name).toBe("Martineau AR");
+ expect(papers[0].externalIds?.DOI).toBe("10.1136/bmj.i6583");
+ expect(papers[0].url).toBe("https://www.bmj.com/content/356/bmj.i6583");
+ });
+
+ test("returns empty array when API returns no results", async () => {
+ mockFetch(async () => jsonResponse({ results: [] }));
+ const papers = await oaSearch("no match", 5);
+ expect(papers).toEqual([]);
+ });
+
+ test("returns empty array on non-OK response", async () => {
+ mockFetch(async () => new Response("server error", { status: 500 }));
+ const papers = await oaSearch("anything", 5);
+ expect(papers).toEqual([]);
+ });
+
+ test("returns empty array on 429 rate-limit response", async () => {
+ mockFetch(async () => new Response("rate limited", { status: 429 }));
+ const papers = await oaSearch("anything", 5);
+ expect(papers).toEqual([]);
+ });
+
+ test("returns empty array on network throw", async () => {
+ mockFetch(async () => { throw new Error("ECONNRESET"); });
+ const papers = await oaSearch("anything", 5);
+ expect(papers).toEqual([]);
+ });
+
+ test("returns empty array on malformed JSON response", async () => {
+ mockFetch(async () => new Response("not json", { status: 200 }));
+ const papers = await oaSearch("anything", 5);
+ expect(papers).toEqual([]);
+ });
+
+ test("includes mailto in URL when provided", async () => {
+ let capturedUrl = "";
+ mockFetch(async (req) => {
+ capturedUrl = req.url;
+ return jsonResponse({ results: [] });
+ });
+ await oaSearch("q", 3, { mailto: "x@y.com" });
+ expect(capturedUrl).toContain("mailto=x%40y.com");
+ });
+
+ test("omits mailto when not provided", async () => {
+ let capturedUrl = "";
+ mockFetch(async (req) => {
+ capturedUrl = req.url;
+ return jsonResponse({ results: [] });
+ });
+ await oaSearch("q", 3);
+ expect(capturedUrl).not.toContain("mailto");
+ });
+
+ test("strips https://doi.org/ prefix from DOI field", async () => {
+ mockFetch(async () => jsonResponse({
+ results: [{
+ id: "W1", doi: "https://doi.org/10.1000/xyz",
+ title: "t", publication_year: 2020, authorships: [], primary_location: {},
+ }],
+ }));
+ const papers = await oaSearch("q", 1);
+ expect(papers[0].externalIds?.DOI).toBe("10.1000/xyz");
+ });
+
+ test("strips https://dx.doi.org/ prefix from DOI field", async () => {
+ mockFetch(async () => jsonResponse({
+ results: [{
+ id: "W1", doi: "https://dx.doi.org/10.1000/xyz",
+ title: "t", publication_year: 2020, authorships: [], primary_location: {},
+ }],
+ }));
+ const papers = await oaSearch("q", 1);
+ expect(papers[0].externalIds?.DOI).toBe("10.1000/xyz");
+ });
+});
diff --git a/src/providers/openalex.ts b/src/providers/openalex.ts
new file mode 100644
index 0000000..c59ca2c
--- /dev/null
+++ b/src/providers/openalex.ts
@@ -0,0 +1,63 @@
+import type { SSPaper } from "./semanticscholar.ts";
+
+export interface OaSearchOptions {
+ mailto?: string;
+}
+
+interface OaAuthorship { author?: { display_name?: string } }
+
+interface OaWork {
+ id: string;
+ doi?: string | null;
+ title: string | null;
+ publication_year?: number;
+ authorships?: OaAuthorship[];
+ primary_location?: { landing_page_url?: string };
+}
+
+interface OaResponse { results?: OaWork[] }
+
+/**
+ * Search OpenAlex for papers matching a query.
+ *
+ * OpenAlex is a free, open academic-metadata service with ~250M indexed works.
+ * Using the polite pool (via `mailto` param) grants 100k requests/day. No API
+ * key is required for the polite pool; the `mailto` identifies the client for
+ * soft rate limiting.
+ *
+ * Returns up to `limit` papers in the same `SSPaper` shape as `ssSearch` so
+ * callers can swap providers without changing downstream logic. Returns an
+ * empty array on any error — caller treats zero results as a warn (no academic
+ * support for this claim), not a hard failure.
+ */
+export async function oaSearch(
+ query: string,
+ limit = 3,
+ opts: OaSearchOptions = {},
+): Promise {
+ const url = new URL("https://api.openalex.org/works");
+ url.searchParams.set("search", query);
+ url.searchParams.set("per-page", String(limit));
+ url.searchParams.set("select", "id,doi,title,publication_year,authorships,primary_location,type");
+ url.searchParams.set("filter", "type:article|review");
+ if (opts.mailto) url.searchParams.set("mailto", opts.mailto);
+
+ try {
+ const res = await fetch(url.toString());
+ if (!res.ok) return [];
+ const json = (await res.json()) as OaResponse;
+ const works = json.results ?? [];
+ return works.map((w) => ({
+ paperId: w.id,
+ title: w.title ?? "",
+ year: w.publication_year,
+ authors: (w.authorships ?? [])
+ .map((a) => ({ name: a.author?.display_name ?? "" }))
+ .filter((a) => a.name.length > 0),
+ externalIds: w.doi ? { DOI: w.doi.replace(/^https?:\/\/(dx\.)?doi\.org\//i, "") } : undefined,
+ url: w.primary_location?.landing_page_url ?? (w.doi ?? undefined),
+ }));
+ } catch {
+ return [];
+ }
+}
diff --git a/src/providers/registry.test.ts b/src/providers/registry.test.ts
new file mode 100644
index 0000000..2bf115f
--- /dev/null
+++ b/src/providers/registry.test.ts
@@ -0,0 +1,13 @@
+import { describe, test, expect } from "bun:test";
+import { getProvider } from "./registry.ts";
+
+describe("getProvider", () => {
+ test("returns metadata for academic + openalex", () => {
+ const meta = getProvider("academic", "openalex");
+ expect(meta).toBeDefined();
+ expect(meta?.id).toBe("openalex");
+ expect(meta?.freeTier).toBe(true);
+ expect(meta?.requiresKey).toBe(false);
+ expect(meta?.endpoint).toContain("api.openalex.org");
+ });
+});
diff --git a/src/providers/registry.ts b/src/providers/registry.ts
index 273b251..a381a71 100644
--- a/src/providers/registry.ts
+++ b/src/providers/registry.ts
@@ -16,6 +16,7 @@ export const PROVIDER_REGISTRY: Partial> = {
],
academic: [
{ id: "semantic-scholar", label: "Semantic Scholar", speed: "medium", costPerCheckUsd: 0, costLabel: "free", depth: "standard", freeTier: true, requiresKey: false, endpoint: "https://api.semanticscholar.org/graph/v1/paper/search" },
+ { id: "openalex", label: "OpenAlex", speed: "fast", costPerCheckUsd: 0, costLabel: "Free (polite pool with mailto)", depth: "standard", freeTier: true, requiresKey: false, endpoint: "https://api.openalex.org/works" },
],
"self-plagiarism": [
{ id: "cloudflare-vectorize", label: "Cloudflare Vectorize", speed: "fast", costPerCheckUsd: 0.0001, costLabel: "$0.01/1M vectors", depth: "standard", freeTier: true, requiresKey: true },
diff --git a/src/providers/resolve.test.ts b/src/providers/resolve.test.ts
index 128335c..24fdd24 100644
--- a/src/providers/resolve.test.ts
+++ b/src/providers/resolve.test.ts
@@ -88,6 +88,33 @@ describe("resolveProvider", () => {
expect(resolveProvider({ ...base, copyscapeKey: "" }, "plagiarism")).toBeNull();
});
+ test("routes academic → openalex when openalexMailto is set", () => {
+ const r = resolveProvider(
+ { ...base, openalexMailto: "me@example.com" },
+ "academic"
+ );
+ expect(r?.provider).toBe("openalex");
+ expect(r?.apiKey).toBe("me@example.com");
+ });
+
+ test("academic falls back to null when no openalexMailto and no explicit provider", () => {
+ const r = resolveProvider({ ...base }, "academic");
+ expect(r).toBeNull();
+ });
+
+ test("explicit providers[academic] still wins over openalexMailto", () => {
+ const r = resolveProvider(
+ {
+ ...base,
+ openalexMailto: "me@example.com",
+ providers: { academic: { provider: "semantic-scholar", apiKey: "ss-key" } },
+ } as Config,
+ "academic"
+ );
+ expect(r?.provider).toBe("semantic-scholar");
+ expect(r?.apiKey).toBe("ss-key");
+ });
+
test("metadata is undefined when provider id is not in registry", () => {
const r = resolveProvider(
{ ...base, providers: { "fact-check": { provider: "nonexistent" as never, apiKey: "k" } } } as Config,
diff --git a/src/providers/resolve.ts b/src/providers/resolve.ts
index 2665c72..0af6827 100644
--- a/src/providers/resolve.ts
+++ b/src/providers/resolve.ts
@@ -9,6 +9,7 @@ import { getProvider } from "./registry.ts";
const LEGACY_MAP: Partial string | undefined }>> = {
"fact-check": { provider: "exa-search", keyOf: (c) => c.exaApiKey },
plagiarism: { provider: "copyscape", keyOf: (c) => c.copyscapeKey || undefined },
+ academic: { provider: "openalex", keyOf: (c) => c.openalexMailto },
};
const GEMINI_FACT_CHECK_PROVIDERS = new Set([
diff --git a/src/providers/types.test.ts b/src/providers/types.test.ts
new file mode 100644
index 0000000..bf51b5b
--- /dev/null
+++ b/src/providers/types.test.ts
@@ -0,0 +1,9 @@
+import { describe, test, expect } from "bun:test";
+import type { ProviderId } from "./types.ts";
+
+describe("ProviderId", () => {
+ test("accepts openalex as a provider", () => {
+ const p: ProviderId = "openalex";
+ expect(p).toBe("openalex");
+ });
+});
diff --git a/src/providers/types.ts b/src/providers/types.ts
index 2406fbf..9030f17 100644
--- a/src/providers/types.ts
+++ b/src/providers/types.ts
@@ -8,7 +8,7 @@ export type ProviderId =
| "gemini-grounded" | "gemini-deep-research"
| "languagetool" | "languagetool-selfhosted" | "sapling" | "llm-fallback"
| "copyscape" | "originality"
- | "semantic-scholar"
+ | "semantic-scholar" | "openalex"
| "cloudflare-vectorize" | "pinecone" | "upstash-vector";
export interface SkillProviderConfig {
diff --git a/src/skills/academic.test.ts b/src/skills/academic.test.ts
index 4b1c798..1d323a3 100644
--- a/src/skills/academic.test.ts
+++ b/src/skills/academic.test.ts
@@ -83,3 +83,76 @@ describe("AcademicSkill — enricher", () => {
expect(r.findings.length).toBe(0);
});
});
+
+const baseConfig: Config = {
+ copyscapeUser: "", copyscapeKey: "",
+ skills: {
+ plagiarism: false, aiDetection: false, seo: false,
+ factCheck: false, tone: false, legal: false,
+ summary: false, brief: false, purpose: false,
+ academic: true,
+ },
+};
+
+const sampleText = "A 2017 study found that vitamin D supplementation reduces the risk of acute respiratory tract infections.";
+
+describe("AcademicSkill provider routing", () => {
+ test("routes to OpenAlex when openalexMailto is configured", async () => {
+ let openalexCalls = 0;
+ let ssCalls = 0;
+ mockFetch(urlRouter({
+ "api.openalex.org": async () => {
+ openalexCalls++;
+ return jsonResponse({
+ results: [{
+ id: "https://openalex.org/W1",
+ doi: "https://doi.org/10.1136/bmj.i6583",
+ title: "Vitamin D supplementation to prevent acute respiratory tract infections",
+ publication_year: 2017,
+ authorships: [{ author: { display_name: "Martineau AR" } }],
+ primary_location: { landing_page_url: "https://www.bmj.com/content/356/bmj.i6583" },
+ }],
+ });
+ },
+ "api.semanticscholar.org": async () => {
+ ssCalls++;
+ return jsonResponse({ data: [] });
+ },
+ }));
+
+ const skill = new AcademicSkill();
+ const result = await skill.enrich(sampleText, { ...baseConfig, openalexMailto: "test@example.com" }, []);
+
+ expect(openalexCalls).toBeGreaterThan(0);
+ expect(ssCalls).toBe(0);
+ expect(result.findings.length).toBeGreaterThan(0);
+ });
+
+ test("routes to Semantic Scholar via legacy explicit providers config (no openalexMailto)", async () => {
+ let openalexCalls = 0;
+ let ssCalls = 0;
+ mockFetch(urlRouter({
+ "api.openalex.org": async () => {
+ openalexCalls++;
+ return jsonResponse({ results: [] });
+ },
+ "api.semanticscholar.org": async () => {
+ ssCalls++;
+ return jsonResponse({
+ data: [{
+ paperId: "S1", title: "Some SS paper", year: 2019, authors: [{ name: "X" }],
+ }],
+ });
+ },
+ }));
+
+ const skill = new AcademicSkill();
+ await skill.enrich(sampleText, {
+ ...baseConfig,
+ providers: { academic: { provider: "semantic-scholar", apiKey: "ss-key" } },
+ } as Config, []);
+
+ expect(ssCalls).toBeGreaterThan(0);
+ expect(openalexCalls).toBe(0);
+ });
+});
diff --git a/src/skills/academic.ts b/src/skills/academic.ts
index 271c6ea..5a2cf02 100644
--- a/src/skills/academic.ts
+++ b/src/skills/academic.ts
@@ -2,6 +2,7 @@ import type { SkillResult, Finding, Citation, EnricherSkill, ClaimType } from ".
import type { Config } from "../config.ts";
import { resolveProvider } from "../providers/resolve.ts";
import { ssSearch } from "../providers/semanticscholar.ts";
+import { oaSearch } from "../providers/openalex.ts";
const MAX_ENRICH_TARGETS = 5;
const TARGET_CLAIM_TYPES = new Set(["scientific", "medical", "financial"]);
@@ -52,7 +53,9 @@ export class AcademicSkill implements EnricherSkill {
const findings: Finding[] = [];
for (const target of targets.slice(0, MAX_ENRICH_TARGETS)) {
- const papers = await ssSearch(target.claim, 3);
+ const papers = resolved.provider === "openalex"
+ ? await oaSearch(target.claim, 3, { mailto: resolved.apiKey })
+ : await ssSearch(target.claim, 3);
const citations: Citation[] = papers.map((p) => ({
title: p.title,
authors: p.authors?.map(a => a.name),
diff --git a/tests/golden/academic-ss-baseline.json b/tests/golden/academic-ss-baseline.json
new file mode 100644
index 0000000..c18cbdf
--- /dev/null
+++ b/tests/golden/academic-ss-baseline.json
@@ -0,0 +1,22 @@
+{
+ "skillId": "academic",
+ "name": "Academic Citations",
+ "verdict": "pass",
+ "findings": [
+ {
+ "text": "Suggested citations for: \"A 2017 meta-analysis in BMJ confirmed this.\"",
+ "claimType": "scientific",
+ "citations": [
+ {
+ "title": "Vitamin D supplementation",
+ "authors": [
+ "Martineau AR"
+ ],
+ "year": 2017,
+ "doi": "10.1136/bmj.i6583",
+ "url": "https://www.bmj.com/content/356/bmj.i6583"
+ }
+ ]
+ }
+ ]
+}
\ No newline at end of file
diff --git a/tests/golden/academic-ss-baseline.test.ts b/tests/golden/academic-ss-baseline.test.ts
new file mode 100644
index 0000000..e34b1d4
--- /dev/null
+++ b/tests/golden/academic-ss-baseline.test.ts
@@ -0,0 +1,65 @@
+import { describe, test, expect } from "bun:test";
+import { AcademicSkill } from "../../src/skills/academic.ts";
+import { mockFetch, urlRouter, jsonResponse } from "../../src/testing/mock-fetch.ts";
+import type { Config } from "../../src/config.ts";
+import { readFileSync, existsSync, writeFileSync } from "fs";
+import { join } from "path";
+
+const config: Config = {
+ copyscapeUser: "", copyscapeKey: "",
+ providers: { academic: { provider: "semantic-scholar", apiKey: "ss-key" } },
+ skills: {
+ plagiarism: false, aiDetection: false, seo: false,
+ factCheck: false, tone: false, legal: false,
+ summary: false, brief: false, purpose: false,
+ academic: true,
+ },
+};
+
+const fixturePath = join(import.meta.dir, "academic-ss-baseline.json");
+const sampleText = "Vitamin D reduces the risk of acute respiratory infections. A 2017 meta-analysis in BMJ confirmed this.";
+
+describe("academic skill — SS path regression", () => {
+ test("output shape matches captured baseline when using SS", async () => {
+ mockFetch(urlRouter({
+ "api.semanticscholar.org": async () => jsonResponse({
+ data: [{
+ paperId: "S-fixed",
+ title: "Vitamin D supplementation",
+ year: 2017,
+ authors: [{ name: "Martineau AR" }],
+ externalIds: { DOI: "10.1136/bmj.i6583" },
+ url: "https://www.bmj.com/content/356/bmj.i6583",
+ }],
+ }),
+ }));
+
+ const skill = new AcademicSkill();
+ const result = await skill.enrich(sampleText, config, []);
+
+ const canonical = JSON.stringify({
+ skillId: result.skillId,
+ name: result.name,
+ verdict: result.verdict,
+ findings: result.findings.map((f) => ({
+ text: f.text,
+ claimType: f.claimType,
+ citations: f.citations ?? [],
+ })),
+ }, null, 2);
+
+ if (!existsSync(fixturePath)) {
+ if (process.env.UPDATE_GOLDEN === "1") {
+ writeFileSync(fixturePath, canonical);
+ console.log(`Wrote initial baseline → ${fixturePath}. Re-run to compare.`);
+ return;
+ }
+ throw new Error(
+ `Missing golden fixture: ${fixturePath}. ` +
+ `Run with UPDATE_GOLDEN=1 to generate, then commit the file.`
+ );
+ }
+ const baseline = readFileSync(fixturePath, "utf8");
+ expect(canonical).toBe(baseline);
+ });
+});
diff --git a/tests/integration/academic-openalex.test.ts b/tests/integration/academic-openalex.test.ts
new file mode 100644
index 0000000..dfca0d2
--- /dev/null
+++ b/tests/integration/academic-openalex.test.ts
@@ -0,0 +1,32 @@
+import { describe, test, expect } from "bun:test";
+import { oaSearch } from "../../src/providers/openalex.ts";
+
+const RUN = process.env.OPENALEX_INTEGRATION === "1";
+const mailto = process.env.OPENALEX_MAILTO;
+
+describe.skipIf(!RUN)("OpenAlex live API", () => {
+ test("returns a well-formed paper list for a well-known medical query", async () => {
+ const papers = await oaSearch(
+ "Vitamin D supplementation acute respiratory tract infections",
+ 5,
+ { mailto }
+ );
+ expect(papers.length).toBeGreaterThan(0);
+ for (const p of papers) {
+ expect(typeof p.paperId).toBe("string");
+ expect(p.paperId.length).toBeGreaterThan(0);
+ expect(typeof p.title).toBe("string");
+ expect(p.title.length).toBeGreaterThan(0);
+ expect(Array.isArray(p.authors)).toBe(true);
+ }
+ const topical = papers.some((p) =>
+ /vitamin|respiratory|infection/i.test(p.title)
+ );
+ expect(topical).toBe(true);
+ }, 15000);
+
+ test("does not throw for a low-signal query", async () => {
+ const papers = await oaSearch("zxqvwypqxcvbnmasdfgklqwerty", 3, { mailto });
+ expect(Array.isArray(papers)).toBe(true);
+ }, 15000);
+});