Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,8 @@ COPYSCAPE_KEY=
# Enables passage-level evidence — shows exactly which sentences were copied.
# Get your key at: https://platform.parallel.ai
PARALLEL_API_KEY=

# ── OpenAlex — academic citations provider (default, recommended) ────────────
# Free service; the mailto identifies your client for the polite pool
# (100k requests/day). No API key required.
OPENALEX_MAILTO=your-email@example.com
14 changes: 12 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ Every flagged issue ships with evidence + rewrite + citation:

- **Fact-check** now carries `sources[]` (Exa highlights with url/title/quote) on every finding. Upgrade to deep-reasoning with `--deep-fact-check`.
- **Grammar & Style** (LanguageTool + LLM fallback) produces a `rewrite` per finding. LLM-fallback rewrites are grammar-checked a second time to catch mechanical errors.
- **Academic Citations** (Semantic Scholar) merges citations onto matching fact-check findings with scientific/medical/financial claim types. Free, no API key.
- **Academic Citations** (OpenAlex by default) merges citations onto matching fact-check findings with scientific/medical/financial claim types. Free, no API key — see [Academic Citations](#academic-citations) below.
- **Self-Plagiarism** (Cloudflare Vectorize + OpenRouter embeddings) flags overlap with your past articles. Run `checkapp index <dir>` once to ingest your archive.

Pick a provider per skill from the Settings → Providers dashboard. CheckApp never holds API tokens — users bring their own keys.
Expand All @@ -44,7 +44,7 @@ See [docs/security.md](docs/security.md) for the BYOK-alpha threat model.
| **AI Detection** | Copyscape | ~$0.09 | ✅ |
| **SEO** | Offline (no API) | free | ✅ |
| **Grammar & Style** | LanguageTool + LLM fallback | free tier / ~$0.002 | ✅ (free tier) |
| **Academic Citations** | Semantic Scholar | free | ✅ |
| **Academic Citations** | OpenAlex (default) / Semantic Scholar (legacy) | free | ✅ |
| **Self-Plagiarism** | Cloudflare Vectorize + OpenRouter embeddings | ~$0.0001 | ❌ requires index (`checkapp index <dir>`) |
| **Fact Check** | Tiered: Basic = Exa + LLM; Standard = Gemini + Google Search; Deep Audit = Gemini Deep Research | varies | Basic is available by default; Standard is opt-in; Deep Audit is async |
| **Tone of Voice** | Claude/MiniMax | ~$0.002 | ❌ requires LLM key + tone guide file |
Expand All @@ -55,6 +55,16 @@ See [docs/security.md](docs/security.md) for the BYOK-alpha threat model.

All enabled skills run in parallel. Adding more skills does not increase total time significantly.

### Academic Citations

CheckApp finds peer-reviewed supporting papers for scientific, medical, and financial claims.

**Default provider: OpenAlex.** Free, ~250M indexed works, no API key required. Set `OPENALEX_MAILTO=your@email.com` in your `.env` to use the polite pool (100k req/day).

**Legacy provider: Semantic Scholar.** Users with an explicit `providers.academic = { provider: "semantic-scholar" }` config continue to hit SS. Note: the free tier of SS has aggressive per-IP rate limiting and is effectively unusable on shared IPs — that's why OpenAlex is the new default. Authenticated (paid) SS requests are not currently wired in the client; support for a paid SS API key is a separate workstream.

See `poc-replacement/03-academic-citations/RESULTS.md` for the comparison data that drove this decision.

---

## Fact-Check Tiers
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
"setup": "bun src/index.tsx --setup",
"build": "bash build.sh",
"dev": "bun --watch src/index.tsx",
"test": "bun test src/*.test.ts src/skills/*.test.ts src/utils/*.test.ts src/providers/*.test.ts src/cli/*.test.ts src/cost/*.test.ts tests/e2e/*.test.ts",
"test": "bun test src/*.test.ts src/skills/*.test.ts src/utils/*.test.ts src/providers/*.test.ts src/cli/*.test.ts src/cost/*.test.ts tests/e2e/*.test.ts tests/golden/*.test.ts",
"test:dashboard": "cd dashboard && bunx vitest run",
"test:e2e:browser": "bun test tests/e2e/browser/*.test.ts",
"test:e2e:live": "bun test tests/e2e/live/*.test.ts",
Expand Down
25 changes: 24 additions & 1 deletion src/config.test.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { describe, expect, it } from "bun:test";
import { describe, expect, it, test } from "bun:test";
import { readConfig } from "./config.ts";

describe("readConfig", () => {
Expand All @@ -8,4 +8,27 @@ describe("readConfig", () => {
expect(cfg.skills).toHaveProperty("academic", false);
expect(cfg.skills).toHaveProperty("selfPlagiarism", false);
});

test("loads OPENALEX_MAILTO from env", () => {
const saved = process.env.OPENALEX_MAILTO;
process.env.OPENALEX_MAILTO = "research@example.com";
try {
const config = readConfig();
expect(config.openalexMailto).toBe("research@example.com");
} finally {
if (saved === undefined) delete process.env.OPENALEX_MAILTO;
else process.env.OPENALEX_MAILTO = saved;
}
});

test("openalexMailto is undefined when env unset", () => {
const saved = process.env.OPENALEX_MAILTO;
delete process.env.OPENALEX_MAILTO;
try {
const config = readConfig();
expect(config.openalexMailto).toBeUndefined();
} finally {
if (saved !== undefined) process.env.OPENALEX_MAILTO = saved;
}
});
});
2 changes: 2 additions & 0 deletions src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ export interface Config {
minimaxApiKey?: string;
openrouterApiKey?: string;
geminiApiKey?: string;
openalexMailto?: string;
llmProvider?: "minimax" | "anthropic" | "openrouter" | "gemini";
factCheckTier?: "basic" | "standard" | "premium";
factCheckTierFlag?: boolean;
Expand Down Expand Up @@ -109,6 +110,7 @@ export function readConfig(): Config {
minimaxApiKey: process.env.MINIMAX_API_KEY ?? file.minimaxApiKey,
openrouterApiKey: process.env.OPENROUTER_API_KEY ?? file.openrouterApiKey,
geminiApiKey: process.env.GEMINI_API_KEY ?? file.geminiApiKey,
openalexMailto: process.env.OPENALEX_MAILTO ?? file.openalexMailto,
llmProvider: (() => {
const validProviders = ["minimax", "anthropic", "openrouter", "gemini"];
const rawProvider = process.env.LLM_PROVIDER ?? file.llmProvider;
Expand Down
94 changes: 94 additions & 0 deletions src/providers/openalex.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
import { describe, test, expect, mock, afterEach } from "bun:test";
import { oaSearch } from "./openalex.ts";

const originalFetch = globalThis.fetch;

function mockFetch(response: Response | Promise<Response>) {
globalThis.fetch = mock(() => Promise.resolve(response)) as unknown as typeof fetch;
}

afterEach(() => {
globalThis.fetch = originalFetch;
});

Comment thread
sharonds marked this conversation as resolved.
Outdated
describe("oaSearch", () => {
test("returns papers in SSPaper shape", async () => {
const body = {
results: [
{
id: "https://openalex.org/W123",
doi: "https://doi.org/10.1136/bmj.i6583",
title: "Vitamin D supplementation to prevent acute respiratory tract infections",
publication_year: 2017,
authorships: [{ author: { display_name: "Martineau AR" } }, { author: { display_name: "Jolliffe DA" } }],
primary_location: { landing_page_url: "https://www.bmj.com/content/356/bmj.i6583" },
},
],
};
mockFetch(new Response(JSON.stringify(body), { status: 200 }));
const papers = await oaSearch("vitamin d respiratory", 5, { mailto: "me@example.com" });
expect(papers).toHaveLength(1);
expect(papers[0].title).toContain("Vitamin D supplementation");
expect(papers[0].year).toBe(2017);
expect(papers[0].authors).toHaveLength(2);
expect(papers[0].authors[0].name).toBe("Martineau AR");
expect(papers[0].externalIds?.DOI).toBe("10.1136/bmj.i6583");
expect(papers[0].url).toBe("https://www.bmj.com/content/356/bmj.i6583");
});

test("returns empty array when API returns no results", async () => {
mockFetch(new Response(JSON.stringify({ results: [] }), { status: 200 }));
const papers = await oaSearch("no match", 5);
expect(papers).toEqual([]);
});

test("returns empty array on non-OK response", async () => {
mockFetch(new Response("server error", { status: 500 }));
const papers = await oaSearch("anything", 5);
expect(papers).toEqual([]);
});

test("returns empty array on network throw", async () => {
globalThis.fetch = mock(() => Promise.reject(new Error("ECONNRESET"))) as unknown as typeof fetch;
const papers = await oaSearch("anything", 5);
expect(papers).toEqual([]);
});

test("returns empty array on malformed JSON response", async () => {
mockFetch(new Response("<html>not json</html>", { status: 200 }));
const papers = await oaSearch("anything", 5);
expect(papers).toEqual([]);
});

test("includes mailto in URL when provided", async () => {
let capturedUrl = "";
globalThis.fetch = mock((url: string) => {
capturedUrl = url;
return Promise.resolve(new Response(JSON.stringify({ results: [] }), { status: 200 }));
}) as unknown as typeof fetch;
await oaSearch("q", 3, { mailto: "x@y.com" });
expect(capturedUrl).toContain("mailto=x%40y.com");
});

test("omits mailto when not provided", async () => {
let capturedUrl = "";
globalThis.fetch = mock((url: string) => {
capturedUrl = url;
return Promise.resolve(new Response(JSON.stringify({ results: [] }), { status: 200 }));
}) as unknown as typeof fetch;
await oaSearch("q", 3);
expect(capturedUrl).not.toContain("mailto");
});

test("strips https://doi.org/ prefix from DOI field", async () => {
const body = {
results: [{
id: "W1", doi: "https://doi.org/10.1000/xyz",
title: "t", publication_year: 2020, authorships: [], primary_location: {},
}],
};
mockFetch(new Response(JSON.stringify(body), { status: 200 }));
const papers = await oaSearch("q", 1);
expect(papers[0].externalIds?.DOI).toBe("10.1000/xyz");
});
});
63 changes: 63 additions & 0 deletions src/providers/openalex.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import type { SSPaper } from "./semanticscholar.ts";

export interface OaSearchOptions {
mailto?: string;
}

interface OaAuthorship { author?: { display_name?: string } }

interface OaWork {
id: string;
doi?: string | null;
title: string | null;
publication_year?: number;
authorships?: OaAuthorship[];
primary_location?: { landing_page_url?: string };
}

interface OaResponse { results?: OaWork[] }

/**
* Search OpenAlex for papers matching a query.
*
* OpenAlex is a free, open academic-metadata service with ~250M indexed works.
* Using the polite pool (via `mailto` param) grants 100k requests/day. No API
* key is required for the polite pool; the `mailto` identifies the client for
* soft rate limiting.
*
* Returns up to `limit` papers in the same `SSPaper` shape as `ssSearch` so
* callers can swap providers without changing downstream logic. Returns an
* empty array on any error — caller treats zero results as a warn (no academic
* support for this claim), not a hard failure.
*/
export async function oaSearch(
query: string,
limit = 3,
opts: OaSearchOptions = {},
): Promise<SSPaper[]> {
const url = new URL("https://api.openalex.org/works");
url.searchParams.set("search", query);
url.searchParams.set("per-page", String(limit));
url.searchParams.set("select", "id,doi,title,publication_year,authorships,primary_location,type");
url.searchParams.set("filter", "type:article|review");
if (opts.mailto) url.searchParams.set("mailto", opts.mailto);

try {
const res = await fetch(url.toString());
if (!res.ok) return [];
const json = (await res.json()) as OaResponse;
const works = json.results ?? [];
return works.map((w) => ({
paperId: w.id,
title: w.title ?? "",
year: w.publication_year,
authors: (w.authorships ?? [])
.map((a) => ({ name: a.author?.display_name ?? "" }))
.filter((a) => a.name.length > 0),
externalIds: w.doi ? { DOI: w.doi.replace(/^https?:\/\/(dx\.)?doi\.org\//i, "") } : undefined,
url: w.primary_location?.landing_page_url ?? (w.doi ?? undefined),
}));
} catch {
return [];
}
}
13 changes: 13 additions & 0 deletions src/providers/registry.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import { describe, test, expect } from "bun:test";
import { getProvider } from "./registry.ts";

describe("getProvider", () => {
test("returns metadata for academic + openalex", () => {
const meta = getProvider("academic", "openalex");
expect(meta).toBeDefined();
expect(meta?.id).toBe("openalex");
expect(meta?.freeTier).toBe(true);
expect(meta?.requiresKey).toBe(false);
expect(meta?.endpoint).toContain("api.openalex.org");
});
});
1 change: 1 addition & 0 deletions src/providers/registry.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ export const PROVIDER_REGISTRY: Partial<Record<SkillId, ProviderMetadata[]>> = {
],
academic: [
{ id: "semantic-scholar", label: "Semantic Scholar", speed: "medium", costPerCheckUsd: 0, costLabel: "free", depth: "standard", freeTier: true, requiresKey: false, endpoint: "https://api.semanticscholar.org/graph/v1/paper/search" },
{ id: "openalex", label: "OpenAlex", speed: "fast", costPerCheckUsd: 0, costLabel: "Free (polite pool with mailto)", depth: "standard", freeTier: true, requiresKey: false, endpoint: "https://api.openalex.org/works" },
Comment thread
sharonds marked this conversation as resolved.
],
Comment thread
sharonds marked this conversation as resolved.
"self-plagiarism": [
{ id: "cloudflare-vectorize", label: "Cloudflare Vectorize", speed: "fast", costPerCheckUsd: 0.0001, costLabel: "$0.01/1M vectors", depth: "standard", freeTier: true, requiresKey: true },
Expand Down
27 changes: 27 additions & 0 deletions src/providers/resolve.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,33 @@ describe("resolveProvider", () => {
expect(resolveProvider({ ...base, copyscapeKey: "" }, "plagiarism")).toBeNull();
});

test("routes academic → openalex when openalexMailto is set", () => {
const r = resolveProvider(
{ ...base, openalexMailto: "me@example.com" },
"academic"
);
expect(r?.provider).toBe("openalex");
expect(r?.apiKey).toBe("me@example.com");
});

test("academic falls back to null when no openalexMailto and no explicit provider", () => {
const r = resolveProvider({ ...base }, "academic");
expect(r).toBeNull();
});

test("explicit providers[academic] still wins over openalexMailto", () => {
const r = resolveProvider(
{
...base,
openalexMailto: "me@example.com",
providers: { academic: { provider: "semantic-scholar", apiKey: "ss-key" } },
} as Config,
"academic"
);
expect(r?.provider).toBe("semantic-scholar");
expect(r?.apiKey).toBe("ss-key");
});

test("metadata is undefined when provider id is not in registry", () => {
const r = resolveProvider(
{ ...base, providers: { "fact-check": { provider: "nonexistent" as never, apiKey: "k" } } } as Config,
Expand Down
1 change: 1 addition & 0 deletions src/providers/resolve.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import { getProvider } from "./registry.ts";
const LEGACY_MAP: Partial<Record<SkillId, { provider: SkillProviderConfig["provider"]; keyOf: (c: Config) => string | undefined }>> = {
"fact-check": { provider: "exa-search", keyOf: (c) => c.exaApiKey },
plagiarism: { provider: "copyscape", keyOf: (c) => c.copyscapeKey || undefined },
academic: { provider: "openalex", keyOf: (c) => c.openalexMailto },
};

const GEMINI_FACT_CHECK_PROVIDERS = new Set<SkillProviderConfig["provider"]>([
Expand Down
9 changes: 9 additions & 0 deletions src/providers/types.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import { describe, test, expect } from "bun:test";
import type { ProviderId } from "./types.ts";

describe("ProviderId", () => {
test("accepts openalex as a provider", () => {
const p: ProviderId = "openalex";
expect(p).toBe("openalex");
});
});
2 changes: 1 addition & 1 deletion src/providers/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ export type ProviderId =
| "gemini-grounded" | "gemini-deep-research"
| "languagetool" | "languagetool-selfhosted" | "sapling" | "llm-fallback"
| "copyscape" | "originality"
| "semantic-scholar"
| "semantic-scholar" | "openalex"
| "cloudflare-vectorize" | "pinecone" | "upstash-vector";
Comment thread
sharonds marked this conversation as resolved.

export interface SkillProviderConfig {
Expand Down
Loading
Loading