diff --git a/.env.example b/.env.example index 8d4b4d9..e6033e3 100644 --- a/.env.example +++ b/.env.example @@ -14,3 +14,8 @@ COPYSCAPE_KEY= # Enables passage-level evidence — shows exactly which sentences were copied. # Get your key at: https://platform.parallel.ai PARALLEL_API_KEY= + +# ── OpenAlex — academic citations provider (default, recommended) ──────────── +# Free service; the mailto identifies your client for the polite pool +# (100k requests/day). No API key required. +OPENALEX_MAILTO=your-email@example.com diff --git a/README.md b/README.md index 1d3b957..7689286 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ Every flagged issue ships with evidence + rewrite + citation: - **Fact-check** now carries `sources[]` (Exa highlights with url/title/quote) on every finding. Upgrade to deep-reasoning with `--deep-fact-check`. - **Grammar & Style** (LanguageTool + LLM fallback) produces a `rewrite` per finding. LLM-fallback rewrites are grammar-checked a second time to catch mechanical errors. -- **Academic Citations** (Semantic Scholar) merges citations onto matching fact-check findings with scientific/medical/financial claim types. Free, no API key. +- **Academic Citations** (OpenAlex recommended, Semantic Scholar legacy) merges citations onto matching fact-check findings with scientific/medical/financial claim types. Free, no API key — see [Academic Citations](#academic-citations) below. - **Self-Plagiarism** (Cloudflare Vectorize + OpenRouter embeddings) flags overlap with your past articles. Run `checkapp index ` once to ingest your archive. Pick a provider per skill from the Settings → Providers dashboard. CheckApp never holds API tokens — users bring their own keys. @@ -44,7 +44,7 @@ See [docs/security.md](docs/security.md) for the BYOK-alpha threat model. | **AI Detection** | Copyscape | ~$0.09 | ✅ | | **SEO** | Offline (no API) | free | ✅ | | **Grammar & Style** | LanguageTool + LLM fallback | free tier / ~$0.002 | ✅ (free tier) | -| **Academic Citations** | Semantic Scholar | free | ✅ | +| **Academic Citations** | OpenAlex (default) / Semantic Scholar (legacy) | free | ✅ | | **Self-Plagiarism** | Cloudflare Vectorize + OpenRouter embeddings | ~$0.0001 | ❌ requires index (`checkapp index `) | | **Fact Check** | Tiered: Basic = Exa + LLM; Standard = Gemini + Google Search; Deep Audit = Gemini Deep Research | varies | Basic is available by default; Standard is opt-in; Deep Audit is async | | **Tone of Voice** | Claude/MiniMax | ~$0.002 | ❌ requires LLM key + tone guide file | @@ -55,6 +55,16 @@ See [docs/security.md](docs/security.md) for the BYOK-alpha threat model. All enabled skills run in parallel. Adding more skills does not increase total time significantly. +### Academic Citations + +CheckApp finds peer-reviewed supporting papers for scientific, medical, and financial claims. + +**Recommended provider: OpenAlex.** Free, ~250M indexed works, no API key required. Set `OPENALEX_MAILTO=your@email.com` in your `.env` to enable it — this both activates the routing (skill is skipped if unset and no explicit provider is configured) and joins the polite pool (100k req/day). + +**Legacy provider: Semantic Scholar.** Users with an explicit `providers.academic = { provider: "semantic-scholar" }` config continue to hit SS. Note: the free tier of SS has aggressive per-IP rate limiting and is effectively unusable on shared IPs — that's why OpenAlex is the new default. Authenticated (paid) SS requests are not currently wired in the client; support for a paid SS API key is a separate workstream. + +See `poc-replacement/03-academic-citations/RESULTS.md` for the comparison data that drove this decision. + --- ## Fact-Check Tiers diff --git a/dashboard/src/lib/providers.ts b/dashboard/src/lib/providers.ts index 0ecfe5f..ede0422 100644 --- a/dashboard/src/lib/providers.ts +++ b/dashboard/src/lib/providers.ts @@ -12,7 +12,7 @@ export type ProviderId = | "gemini-grounded" | "gemini-deep-research" | "languagetool" | "languagetool-selfhosted" | "sapling" | "llm-fallback" | "copyscape" | "originality" - | "semantic-scholar" + | "semantic-scholar" | "openalex" | "cloudflare-vectorize" | "pinecone" | "upstash-vector"; export interface SkillProviderConfig { @@ -49,6 +49,7 @@ export const PROVIDER_REGISTRY: Partial> = { ], academic: [ { id: "semantic-scholar", label: "Semantic Scholar", speed: "medium", costPerCheckUsd: 0, costLabel: "free", depth: "standard", freeTier: true, requiresKey: false, endpoint: "https://api.semanticscholar.org/graph/v1/paper/search" }, + { id: "openalex", label: "OpenAlex", speed: "fast", costPerCheckUsd: 0, costLabel: "Free (polite pool with mailto)", depth: "standard", freeTier: true, requiresKey: false, endpoint: "https://api.openalex.org/works" }, ], "self-plagiarism": [ { id: "cloudflare-vectorize", label: "Cloudflare Vectorize", speed: "fast", costPerCheckUsd: 0.0001, costLabel: "$0.01/1M vectors", depth: "standard", freeTier: true, requiresKey: true }, diff --git a/package.json b/package.json index d3809b1..3c58468 100644 --- a/package.json +++ b/package.json @@ -42,7 +42,7 @@ "setup": "bun src/index.tsx --setup", "build": "bash build.sh", "dev": "bun --watch src/index.tsx", - "test": "bun test src/*.test.ts src/skills/*.test.ts src/utils/*.test.ts src/providers/*.test.ts src/cli/*.test.ts src/cost/*.test.ts tests/e2e/*.test.ts", + "test": "bun test src/*.test.ts src/skills/*.test.ts src/utils/*.test.ts src/providers/*.test.ts src/cli/*.test.ts src/cost/*.test.ts tests/e2e/*.test.ts tests/golden/*.test.ts", "test:dashboard": "cd dashboard && bunx vitest run", "test:e2e:browser": "bun test tests/e2e/browser/*.test.ts", "test:e2e:live": "bun test tests/e2e/live/*.test.ts", diff --git a/src/config.test.ts b/src/config.test.ts index 9ee94db..eb0bdcb 100644 --- a/src/config.test.ts +++ b/src/config.test.ts @@ -1,4 +1,4 @@ -import { describe, expect, it } from "bun:test"; +import { describe, expect, it, test } from "bun:test"; import { readConfig } from "./config.ts"; describe("readConfig", () => { @@ -8,4 +8,27 @@ describe("readConfig", () => { expect(cfg.skills).toHaveProperty("academic", false); expect(cfg.skills).toHaveProperty("selfPlagiarism", false); }); + + test("loads OPENALEX_MAILTO from env", () => { + const saved = process.env.OPENALEX_MAILTO; + process.env.OPENALEX_MAILTO = "research@example.com"; + try { + const config = readConfig(); + expect(config.openalexMailto).toBe("research@example.com"); + } finally { + if (saved === undefined) delete process.env.OPENALEX_MAILTO; + else process.env.OPENALEX_MAILTO = saved; + } + }); + + test("openalexMailto is undefined when env unset", () => { + const saved = process.env.OPENALEX_MAILTO; + delete process.env.OPENALEX_MAILTO; + try { + const config = readConfig(); + expect(config.openalexMailto).toBeUndefined(); + } finally { + if (saved !== undefined) process.env.OPENALEX_MAILTO = saved; + } + }); }); diff --git a/src/config.ts b/src/config.ts index 13abbe5..e5167ab 100644 --- a/src/config.ts +++ b/src/config.ts @@ -28,6 +28,7 @@ export interface Config { minimaxApiKey?: string; openrouterApiKey?: string; geminiApiKey?: string; + openalexMailto?: string; llmProvider?: "minimax" | "anthropic" | "openrouter" | "gemini"; factCheckTier?: "basic" | "standard" | "premium"; factCheckTierFlag?: boolean; @@ -109,6 +110,7 @@ export function readConfig(): Config { minimaxApiKey: process.env.MINIMAX_API_KEY ?? file.minimaxApiKey, openrouterApiKey: process.env.OPENROUTER_API_KEY ?? file.openrouterApiKey, geminiApiKey: process.env.GEMINI_API_KEY ?? file.geminiApiKey, + openalexMailto: process.env.OPENALEX_MAILTO ?? file.openalexMailto, llmProvider: (() => { const validProviders = ["minimax", "anthropic", "openrouter", "gemini"]; const rawProvider = process.env.LLM_PROVIDER ?? file.llmProvider; diff --git a/src/providers/openalex.test.ts b/src/providers/openalex.test.ts new file mode 100644 index 0000000..038e9e9 --- /dev/null +++ b/src/providers/openalex.test.ts @@ -0,0 +1,100 @@ +import { describe, test, expect } from "bun:test"; +import { oaSearch } from "./openalex.ts"; +import { mockFetch, jsonResponse } from "../testing/mock-fetch.ts"; + +describe("oaSearch", () => { + test("returns papers in SSPaper shape", async () => { + mockFetch(async () => jsonResponse({ + results: [ + { + id: "https://openalex.org/W123", + doi: "https://doi.org/10.1136/bmj.i6583", + title: "Vitamin D supplementation to prevent acute respiratory tract infections", + publication_year: 2017, + authorships: [{ author: { display_name: "Martineau AR" } }, { author: { display_name: "Jolliffe DA" } }], + primary_location: { landing_page_url: "https://www.bmj.com/content/356/bmj.i6583" }, + }, + ], + })); + const papers = await oaSearch("vitamin d respiratory", 5, { mailto: "me@example.com" }); + expect(papers).toHaveLength(1); + expect(papers[0].title).toContain("Vitamin D supplementation"); + expect(papers[0].year).toBe(2017); + expect(papers[0].authors).toHaveLength(2); + expect(papers[0].authors[0].name).toBe("Martineau AR"); + expect(papers[0].externalIds?.DOI).toBe("10.1136/bmj.i6583"); + expect(papers[0].url).toBe("https://www.bmj.com/content/356/bmj.i6583"); + }); + + test("returns empty array when API returns no results", async () => { + mockFetch(async () => jsonResponse({ results: [] })); + const papers = await oaSearch("no match", 5); + expect(papers).toEqual([]); + }); + + test("returns empty array on non-OK response", async () => { + mockFetch(async () => new Response("server error", { status: 500 })); + const papers = await oaSearch("anything", 5); + expect(papers).toEqual([]); + }); + + test("returns empty array on 429 rate-limit response", async () => { + mockFetch(async () => new Response("rate limited", { status: 429 })); + const papers = await oaSearch("anything", 5); + expect(papers).toEqual([]); + }); + + test("returns empty array on network throw", async () => { + mockFetch(async () => { throw new Error("ECONNRESET"); }); + const papers = await oaSearch("anything", 5); + expect(papers).toEqual([]); + }); + + test("returns empty array on malformed JSON response", async () => { + mockFetch(async () => new Response("not json", { status: 200 })); + const papers = await oaSearch("anything", 5); + expect(papers).toEqual([]); + }); + + test("includes mailto in URL when provided", async () => { + let capturedUrl = ""; + mockFetch(async (req) => { + capturedUrl = req.url; + return jsonResponse({ results: [] }); + }); + await oaSearch("q", 3, { mailto: "x@y.com" }); + expect(capturedUrl).toContain("mailto=x%40y.com"); + }); + + test("omits mailto when not provided", async () => { + let capturedUrl = ""; + mockFetch(async (req) => { + capturedUrl = req.url; + return jsonResponse({ results: [] }); + }); + await oaSearch("q", 3); + expect(capturedUrl).not.toContain("mailto"); + }); + + test("strips https://doi.org/ prefix from DOI field", async () => { + mockFetch(async () => jsonResponse({ + results: [{ + id: "W1", doi: "https://doi.org/10.1000/xyz", + title: "t", publication_year: 2020, authorships: [], primary_location: {}, + }], + })); + const papers = await oaSearch("q", 1); + expect(papers[0].externalIds?.DOI).toBe("10.1000/xyz"); + }); + + test("strips https://dx.doi.org/ prefix from DOI field", async () => { + mockFetch(async () => jsonResponse({ + results: [{ + id: "W1", doi: "https://dx.doi.org/10.1000/xyz", + title: "t", publication_year: 2020, authorships: [], primary_location: {}, + }], + })); + const papers = await oaSearch("q", 1); + expect(papers[0].externalIds?.DOI).toBe("10.1000/xyz"); + }); +}); diff --git a/src/providers/openalex.ts b/src/providers/openalex.ts new file mode 100644 index 0000000..c59ca2c --- /dev/null +++ b/src/providers/openalex.ts @@ -0,0 +1,63 @@ +import type { SSPaper } from "./semanticscholar.ts"; + +export interface OaSearchOptions { + mailto?: string; +} + +interface OaAuthorship { author?: { display_name?: string } } + +interface OaWork { + id: string; + doi?: string | null; + title: string | null; + publication_year?: number; + authorships?: OaAuthorship[]; + primary_location?: { landing_page_url?: string }; +} + +interface OaResponse { results?: OaWork[] } + +/** + * Search OpenAlex for papers matching a query. + * + * OpenAlex is a free, open academic-metadata service with ~250M indexed works. + * Using the polite pool (via `mailto` param) grants 100k requests/day. No API + * key is required for the polite pool; the `mailto` identifies the client for + * soft rate limiting. + * + * Returns up to `limit` papers in the same `SSPaper` shape as `ssSearch` so + * callers can swap providers without changing downstream logic. Returns an + * empty array on any error — caller treats zero results as a warn (no academic + * support for this claim), not a hard failure. + */ +export async function oaSearch( + query: string, + limit = 3, + opts: OaSearchOptions = {}, +): Promise { + const url = new URL("https://api.openalex.org/works"); + url.searchParams.set("search", query); + url.searchParams.set("per-page", String(limit)); + url.searchParams.set("select", "id,doi,title,publication_year,authorships,primary_location,type"); + url.searchParams.set("filter", "type:article|review"); + if (opts.mailto) url.searchParams.set("mailto", opts.mailto); + + try { + const res = await fetch(url.toString()); + if (!res.ok) return []; + const json = (await res.json()) as OaResponse; + const works = json.results ?? []; + return works.map((w) => ({ + paperId: w.id, + title: w.title ?? "", + year: w.publication_year, + authors: (w.authorships ?? []) + .map((a) => ({ name: a.author?.display_name ?? "" })) + .filter((a) => a.name.length > 0), + externalIds: w.doi ? { DOI: w.doi.replace(/^https?:\/\/(dx\.)?doi\.org\//i, "") } : undefined, + url: w.primary_location?.landing_page_url ?? (w.doi ?? undefined), + })); + } catch { + return []; + } +} diff --git a/src/providers/registry.test.ts b/src/providers/registry.test.ts new file mode 100644 index 0000000..2bf115f --- /dev/null +++ b/src/providers/registry.test.ts @@ -0,0 +1,13 @@ +import { describe, test, expect } from "bun:test"; +import { getProvider } from "./registry.ts"; + +describe("getProvider", () => { + test("returns metadata for academic + openalex", () => { + const meta = getProvider("academic", "openalex"); + expect(meta).toBeDefined(); + expect(meta?.id).toBe("openalex"); + expect(meta?.freeTier).toBe(true); + expect(meta?.requiresKey).toBe(false); + expect(meta?.endpoint).toContain("api.openalex.org"); + }); +}); diff --git a/src/providers/registry.ts b/src/providers/registry.ts index 273b251..a381a71 100644 --- a/src/providers/registry.ts +++ b/src/providers/registry.ts @@ -16,6 +16,7 @@ export const PROVIDER_REGISTRY: Partial> = { ], academic: [ { id: "semantic-scholar", label: "Semantic Scholar", speed: "medium", costPerCheckUsd: 0, costLabel: "free", depth: "standard", freeTier: true, requiresKey: false, endpoint: "https://api.semanticscholar.org/graph/v1/paper/search" }, + { id: "openalex", label: "OpenAlex", speed: "fast", costPerCheckUsd: 0, costLabel: "Free (polite pool with mailto)", depth: "standard", freeTier: true, requiresKey: false, endpoint: "https://api.openalex.org/works" }, ], "self-plagiarism": [ { id: "cloudflare-vectorize", label: "Cloudflare Vectorize", speed: "fast", costPerCheckUsd: 0.0001, costLabel: "$0.01/1M vectors", depth: "standard", freeTier: true, requiresKey: true }, diff --git a/src/providers/resolve.test.ts b/src/providers/resolve.test.ts index 128335c..24fdd24 100644 --- a/src/providers/resolve.test.ts +++ b/src/providers/resolve.test.ts @@ -88,6 +88,33 @@ describe("resolveProvider", () => { expect(resolveProvider({ ...base, copyscapeKey: "" }, "plagiarism")).toBeNull(); }); + test("routes academic → openalex when openalexMailto is set", () => { + const r = resolveProvider( + { ...base, openalexMailto: "me@example.com" }, + "academic" + ); + expect(r?.provider).toBe("openalex"); + expect(r?.apiKey).toBe("me@example.com"); + }); + + test("academic falls back to null when no openalexMailto and no explicit provider", () => { + const r = resolveProvider({ ...base }, "academic"); + expect(r).toBeNull(); + }); + + test("explicit providers[academic] still wins over openalexMailto", () => { + const r = resolveProvider( + { + ...base, + openalexMailto: "me@example.com", + providers: { academic: { provider: "semantic-scholar", apiKey: "ss-key" } }, + } as Config, + "academic" + ); + expect(r?.provider).toBe("semantic-scholar"); + expect(r?.apiKey).toBe("ss-key"); + }); + test("metadata is undefined when provider id is not in registry", () => { const r = resolveProvider( { ...base, providers: { "fact-check": { provider: "nonexistent" as never, apiKey: "k" } } } as Config, diff --git a/src/providers/resolve.ts b/src/providers/resolve.ts index 2665c72..0af6827 100644 --- a/src/providers/resolve.ts +++ b/src/providers/resolve.ts @@ -9,6 +9,7 @@ import { getProvider } from "./registry.ts"; const LEGACY_MAP: Partial string | undefined }>> = { "fact-check": { provider: "exa-search", keyOf: (c) => c.exaApiKey }, plagiarism: { provider: "copyscape", keyOf: (c) => c.copyscapeKey || undefined }, + academic: { provider: "openalex", keyOf: (c) => c.openalexMailto }, }; const GEMINI_FACT_CHECK_PROVIDERS = new Set([ diff --git a/src/providers/types.test.ts b/src/providers/types.test.ts new file mode 100644 index 0000000..bf51b5b --- /dev/null +++ b/src/providers/types.test.ts @@ -0,0 +1,9 @@ +import { describe, test, expect } from "bun:test"; +import type { ProviderId } from "./types.ts"; + +describe("ProviderId", () => { + test("accepts openalex as a provider", () => { + const p: ProviderId = "openalex"; + expect(p).toBe("openalex"); + }); +}); diff --git a/src/providers/types.ts b/src/providers/types.ts index 2406fbf..9030f17 100644 --- a/src/providers/types.ts +++ b/src/providers/types.ts @@ -8,7 +8,7 @@ export type ProviderId = | "gemini-grounded" | "gemini-deep-research" | "languagetool" | "languagetool-selfhosted" | "sapling" | "llm-fallback" | "copyscape" | "originality" - | "semantic-scholar" + | "semantic-scholar" | "openalex" | "cloudflare-vectorize" | "pinecone" | "upstash-vector"; export interface SkillProviderConfig { diff --git a/src/skills/academic.test.ts b/src/skills/academic.test.ts index 4b1c798..1d323a3 100644 --- a/src/skills/academic.test.ts +++ b/src/skills/academic.test.ts @@ -83,3 +83,76 @@ describe("AcademicSkill — enricher", () => { expect(r.findings.length).toBe(0); }); }); + +const baseConfig: Config = { + copyscapeUser: "", copyscapeKey: "", + skills: { + plagiarism: false, aiDetection: false, seo: false, + factCheck: false, tone: false, legal: false, + summary: false, brief: false, purpose: false, + academic: true, + }, +}; + +const sampleText = "A 2017 study found that vitamin D supplementation reduces the risk of acute respiratory tract infections."; + +describe("AcademicSkill provider routing", () => { + test("routes to OpenAlex when openalexMailto is configured", async () => { + let openalexCalls = 0; + let ssCalls = 0; + mockFetch(urlRouter({ + "api.openalex.org": async () => { + openalexCalls++; + return jsonResponse({ + results: [{ + id: "https://openalex.org/W1", + doi: "https://doi.org/10.1136/bmj.i6583", + title: "Vitamin D supplementation to prevent acute respiratory tract infections", + publication_year: 2017, + authorships: [{ author: { display_name: "Martineau AR" } }], + primary_location: { landing_page_url: "https://www.bmj.com/content/356/bmj.i6583" }, + }], + }); + }, + "api.semanticscholar.org": async () => { + ssCalls++; + return jsonResponse({ data: [] }); + }, + })); + + const skill = new AcademicSkill(); + const result = await skill.enrich(sampleText, { ...baseConfig, openalexMailto: "test@example.com" }, []); + + expect(openalexCalls).toBeGreaterThan(0); + expect(ssCalls).toBe(0); + expect(result.findings.length).toBeGreaterThan(0); + }); + + test("routes to Semantic Scholar via legacy explicit providers config (no openalexMailto)", async () => { + let openalexCalls = 0; + let ssCalls = 0; + mockFetch(urlRouter({ + "api.openalex.org": async () => { + openalexCalls++; + return jsonResponse({ results: [] }); + }, + "api.semanticscholar.org": async () => { + ssCalls++; + return jsonResponse({ + data: [{ + paperId: "S1", title: "Some SS paper", year: 2019, authors: [{ name: "X" }], + }], + }); + }, + })); + + const skill = new AcademicSkill(); + await skill.enrich(sampleText, { + ...baseConfig, + providers: { academic: { provider: "semantic-scholar", apiKey: "ss-key" } }, + } as Config, []); + + expect(ssCalls).toBeGreaterThan(0); + expect(openalexCalls).toBe(0); + }); +}); diff --git a/src/skills/academic.ts b/src/skills/academic.ts index 271c6ea..5a2cf02 100644 --- a/src/skills/academic.ts +++ b/src/skills/academic.ts @@ -2,6 +2,7 @@ import type { SkillResult, Finding, Citation, EnricherSkill, ClaimType } from ". import type { Config } from "../config.ts"; import { resolveProvider } from "../providers/resolve.ts"; import { ssSearch } from "../providers/semanticscholar.ts"; +import { oaSearch } from "../providers/openalex.ts"; const MAX_ENRICH_TARGETS = 5; const TARGET_CLAIM_TYPES = new Set(["scientific", "medical", "financial"]); @@ -52,7 +53,9 @@ export class AcademicSkill implements EnricherSkill { const findings: Finding[] = []; for (const target of targets.slice(0, MAX_ENRICH_TARGETS)) { - const papers = await ssSearch(target.claim, 3); + const papers = resolved.provider === "openalex" + ? await oaSearch(target.claim, 3, { mailto: resolved.apiKey }) + : await ssSearch(target.claim, 3); const citations: Citation[] = papers.map((p) => ({ title: p.title, authors: p.authors?.map(a => a.name), diff --git a/tests/golden/academic-ss-baseline.json b/tests/golden/academic-ss-baseline.json new file mode 100644 index 0000000..c18cbdf --- /dev/null +++ b/tests/golden/academic-ss-baseline.json @@ -0,0 +1,22 @@ +{ + "skillId": "academic", + "name": "Academic Citations", + "verdict": "pass", + "findings": [ + { + "text": "Suggested citations for: \"A 2017 meta-analysis in BMJ confirmed this.\"", + "claimType": "scientific", + "citations": [ + { + "title": "Vitamin D supplementation", + "authors": [ + "Martineau AR" + ], + "year": 2017, + "doi": "10.1136/bmj.i6583", + "url": "https://www.bmj.com/content/356/bmj.i6583" + } + ] + } + ] +} \ No newline at end of file diff --git a/tests/golden/academic-ss-baseline.test.ts b/tests/golden/academic-ss-baseline.test.ts new file mode 100644 index 0000000..e34b1d4 --- /dev/null +++ b/tests/golden/academic-ss-baseline.test.ts @@ -0,0 +1,65 @@ +import { describe, test, expect } from "bun:test"; +import { AcademicSkill } from "../../src/skills/academic.ts"; +import { mockFetch, urlRouter, jsonResponse } from "../../src/testing/mock-fetch.ts"; +import type { Config } from "../../src/config.ts"; +import { readFileSync, existsSync, writeFileSync } from "fs"; +import { join } from "path"; + +const config: Config = { + copyscapeUser: "", copyscapeKey: "", + providers: { academic: { provider: "semantic-scholar", apiKey: "ss-key" } }, + skills: { + plagiarism: false, aiDetection: false, seo: false, + factCheck: false, tone: false, legal: false, + summary: false, brief: false, purpose: false, + academic: true, + }, +}; + +const fixturePath = join(import.meta.dir, "academic-ss-baseline.json"); +const sampleText = "Vitamin D reduces the risk of acute respiratory infections. A 2017 meta-analysis in BMJ confirmed this."; + +describe("academic skill — SS path regression", () => { + test("output shape matches captured baseline when using SS", async () => { + mockFetch(urlRouter({ + "api.semanticscholar.org": async () => jsonResponse({ + data: [{ + paperId: "S-fixed", + title: "Vitamin D supplementation", + year: 2017, + authors: [{ name: "Martineau AR" }], + externalIds: { DOI: "10.1136/bmj.i6583" }, + url: "https://www.bmj.com/content/356/bmj.i6583", + }], + }), + })); + + const skill = new AcademicSkill(); + const result = await skill.enrich(sampleText, config, []); + + const canonical = JSON.stringify({ + skillId: result.skillId, + name: result.name, + verdict: result.verdict, + findings: result.findings.map((f) => ({ + text: f.text, + claimType: f.claimType, + citations: f.citations ?? [], + })), + }, null, 2); + + if (!existsSync(fixturePath)) { + if (process.env.UPDATE_GOLDEN === "1") { + writeFileSync(fixturePath, canonical); + console.log(`Wrote initial baseline → ${fixturePath}. Re-run to compare.`); + return; + } + throw new Error( + `Missing golden fixture: ${fixturePath}. ` + + `Run with UPDATE_GOLDEN=1 to generate, then commit the file.` + ); + } + const baseline = readFileSync(fixturePath, "utf8"); + expect(canonical).toBe(baseline); + }); +}); diff --git a/tests/integration/academic-openalex.test.ts b/tests/integration/academic-openalex.test.ts new file mode 100644 index 0000000..dfca0d2 --- /dev/null +++ b/tests/integration/academic-openalex.test.ts @@ -0,0 +1,32 @@ +import { describe, test, expect } from "bun:test"; +import { oaSearch } from "../../src/providers/openalex.ts"; + +const RUN = process.env.OPENALEX_INTEGRATION === "1"; +const mailto = process.env.OPENALEX_MAILTO; + +describe.skipIf(!RUN)("OpenAlex live API", () => { + test("returns a well-formed paper list for a well-known medical query", async () => { + const papers = await oaSearch( + "Vitamin D supplementation acute respiratory tract infections", + 5, + { mailto } + ); + expect(papers.length).toBeGreaterThan(0); + for (const p of papers) { + expect(typeof p.paperId).toBe("string"); + expect(p.paperId.length).toBeGreaterThan(0); + expect(typeof p.title).toBe("string"); + expect(p.title.length).toBeGreaterThan(0); + expect(Array.isArray(p.authors)).toBe(true); + } + const topical = papers.some((p) => + /vitamin|respiratory|infection/i.test(p.title) + ); + expect(topical).toBe(true); + }, 15000); + + test("does not throw for a low-signal query", async () => { + const papers = await oaSearch("zxqvwypqxcvbnmasdfgklqwerty", 3, { mailto }); + expect(Array.isArray(papers)).toBe(true); + }, 15000); +});