Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,8 @@ COPYSCAPE_KEY=
# Enables passage-level evidence — shows exactly which sentences were copied.
# Get your key at: https://platform.parallel.ai
PARALLEL_API_KEY=

# ── OpenAlex — academic citations provider (default, recommended) ────────────
# Free service; the mailto identifies your client for the polite pool
# (100k requests/day). No API key required.
OPENALEX_MAILTO=your-email@example.com
14 changes: 12 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ Every flagged issue ships with evidence + rewrite + citation:

- **Fact-check** now carries `sources[]` (Exa highlights with url/title/quote) on every finding. Upgrade to deep-reasoning with `--deep-fact-check`.
- **Grammar & Style** (LanguageTool + LLM fallback) produces a `rewrite` per finding. LLM-fallback rewrites are grammar-checked a second time to catch mechanical errors.
- **Academic Citations** (Semantic Scholar) merges citations onto matching fact-check findings with scientific/medical/financial claim types. Free, no API key.
- **Academic Citations** (OpenAlex recommended, Semantic Scholar legacy) merges citations onto matching fact-check findings with scientific/medical/financial claim types. Free, no API key — see [Academic Citations](#academic-citations) below.
- **Self-Plagiarism** (Cloudflare Vectorize + OpenRouter embeddings) flags overlap with your past articles. Run `checkapp index <dir>` once to ingest your archive.

Pick a provider per skill from the Settings → Providers dashboard. CheckApp never holds API tokens — users bring their own keys.
Expand All @@ -44,7 +44,7 @@ See [docs/security.md](docs/security.md) for the BYOK-alpha threat model.
| **AI Detection** | Copyscape | ~$0.09 | ✅ |
| **SEO** | Offline (no API) | free | ✅ |
| **Grammar & Style** | LanguageTool + LLM fallback | free tier / ~$0.002 | ✅ (free tier) |
| **Academic Citations** | Semantic Scholar | free | ✅ |
| **Academic Citations** | OpenAlex (default) / Semantic Scholar (legacy) | free | ✅ |
| **Self-Plagiarism** | Cloudflare Vectorize + OpenRouter embeddings | ~$0.0001 | ❌ requires index (`checkapp index <dir>`) |
| **Fact Check** | Tiered: Basic = Exa + LLM; Standard = Gemini + Google Search; Deep Audit = Gemini Deep Research | varies | Basic is available by default; Standard is opt-in; Deep Audit is async |
| **Tone of Voice** | Claude/MiniMax | ~$0.002 | ❌ requires LLM key + tone guide file |
Expand All @@ -55,6 +55,16 @@ See [docs/security.md](docs/security.md) for the BYOK-alpha threat model.

All enabled skills run in parallel. Adding more skills does not increase total time significantly.

### Academic Citations

CheckApp finds peer-reviewed supporting papers for scientific, medical, and financial claims.

**Recommended provider: OpenAlex.** Free, ~250M indexed works, no API key required. Set `OPENALEX_MAILTO=your@email.com` in your `.env` to enable it — this both activates the routing (skill is skipped if unset and no explicit provider is configured) and joins the polite pool (100k req/day).

**Legacy provider: Semantic Scholar.** Users with an explicit `providers.academic = { provider: "semantic-scholar" }` config continue to hit SS. Note: the free tier of SS has aggressive per-IP rate limiting and is effectively unusable on shared IPs — that's why OpenAlex is the new default. Authenticated (paid) SS requests are not currently wired in the client; support for a paid SS API key is a separate workstream.

See `poc-replacement/03-academic-citations/RESULTS.md` for the comparison data that drove this decision.

---

## Fact-Check Tiers
Expand Down
3 changes: 2 additions & 1 deletion dashboard/src/lib/providers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ export type ProviderId =
| "gemini-grounded" | "gemini-deep-research"
| "languagetool" | "languagetool-selfhosted" | "sapling" | "llm-fallback"
| "copyscape" | "originality"
| "semantic-scholar"
| "semantic-scholar" | "openalex"
| "cloudflare-vectorize" | "pinecone" | "upstash-vector";

export interface SkillProviderConfig {
Expand Down Expand Up @@ -49,6 +49,7 @@ export const PROVIDER_REGISTRY: Partial<Record<SkillId, ProviderMetadata[]>> = {
],
academic: [
{ id: "semantic-scholar", label: "Semantic Scholar", speed: "medium", costPerCheckUsd: 0, costLabel: "free", depth: "standard", freeTier: true, requiresKey: false, endpoint: "https://api.semanticscholar.org/graph/v1/paper/search" },
{ id: "openalex", label: "OpenAlex", speed: "fast", costPerCheckUsd: 0, costLabel: "Free (polite pool with mailto)", depth: "standard", freeTier: true, requiresKey: false, endpoint: "https://api.openalex.org/works" },
],
"self-plagiarism": [
{ id: "cloudflare-vectorize", label: "Cloudflare Vectorize", speed: "fast", costPerCheckUsd: 0.0001, costLabel: "$0.01/1M vectors", depth: "standard", freeTier: true, requiresKey: true },
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
"setup": "bun src/index.tsx --setup",
"build": "bash build.sh",
"dev": "bun --watch src/index.tsx",
"test": "bun test src/*.test.ts src/skills/*.test.ts src/utils/*.test.ts src/providers/*.test.ts src/cli/*.test.ts src/cost/*.test.ts tests/e2e/*.test.ts",
"test": "bun test src/*.test.ts src/skills/*.test.ts src/utils/*.test.ts src/providers/*.test.ts src/cli/*.test.ts src/cost/*.test.ts tests/e2e/*.test.ts tests/golden/*.test.ts",
"test:dashboard": "cd dashboard && bunx vitest run",
"test:e2e:browser": "bun test tests/e2e/browser/*.test.ts",
"test:e2e:live": "bun test tests/e2e/live/*.test.ts",
Expand Down
25 changes: 24 additions & 1 deletion src/config.test.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { describe, expect, it } from "bun:test";
import { describe, expect, it, test } from "bun:test";
import { readConfig } from "./config.ts";

describe("readConfig", () => {
Expand All @@ -8,4 +8,27 @@ describe("readConfig", () => {
expect(cfg.skills).toHaveProperty("academic", false);
expect(cfg.skills).toHaveProperty("selfPlagiarism", false);
});

test("loads OPENALEX_MAILTO from env", () => {
const saved = process.env.OPENALEX_MAILTO;
process.env.OPENALEX_MAILTO = "research@example.com";
try {
const config = readConfig();
expect(config.openalexMailto).toBe("research@example.com");
} finally {
if (saved === undefined) delete process.env.OPENALEX_MAILTO;
else process.env.OPENALEX_MAILTO = saved;
}
});

test("openalexMailto is undefined when env unset", () => {
const saved = process.env.OPENALEX_MAILTO;
delete process.env.OPENALEX_MAILTO;
try {
const config = readConfig();
expect(config.openalexMailto).toBeUndefined();
} finally {
if (saved !== undefined) process.env.OPENALEX_MAILTO = saved;
}
});
});
2 changes: 2 additions & 0 deletions src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ export interface Config {
minimaxApiKey?: string;
openrouterApiKey?: string;
geminiApiKey?: string;
openalexMailto?: string;
llmProvider?: "minimax" | "anthropic" | "openrouter" | "gemini";
factCheckTier?: "basic" | "standard" | "premium";
factCheckTierFlag?: boolean;
Expand Down Expand Up @@ -109,6 +110,7 @@ export function readConfig(): Config {
minimaxApiKey: process.env.MINIMAX_API_KEY ?? file.minimaxApiKey,
openrouterApiKey: process.env.OPENROUTER_API_KEY ?? file.openrouterApiKey,
geminiApiKey: process.env.GEMINI_API_KEY ?? file.geminiApiKey,
openalexMailto: process.env.OPENALEX_MAILTO ?? file.openalexMailto,
llmProvider: (() => {
const validProviders = ["minimax", "anthropic", "openrouter", "gemini"];
const rawProvider = process.env.LLM_PROVIDER ?? file.llmProvider;
Expand Down
100 changes: 100 additions & 0 deletions src/providers/openalex.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import { describe, test, expect } from "bun:test";
import { oaSearch } from "./openalex.ts";
import { mockFetch, jsonResponse } from "../testing/mock-fetch.ts";

describe("oaSearch", () => {
test("returns papers in SSPaper shape", async () => {
mockFetch(async () => jsonResponse({
results: [
{
id: "https://openalex.org/W123",
doi: "https://doi.org/10.1136/bmj.i6583",
title: "Vitamin D supplementation to prevent acute respiratory tract infections",
publication_year: 2017,
authorships: [{ author: { display_name: "Martineau AR" } }, { author: { display_name: "Jolliffe DA" } }],
primary_location: { landing_page_url: "https://www.bmj.com/content/356/bmj.i6583" },
},
],
}));
const papers = await oaSearch("vitamin d respiratory", 5, { mailto: "me@example.com" });
expect(papers).toHaveLength(1);
expect(papers[0].title).toContain("Vitamin D supplementation");
expect(papers[0].year).toBe(2017);
expect(papers[0].authors).toHaveLength(2);
expect(papers[0].authors[0].name).toBe("Martineau AR");
expect(papers[0].externalIds?.DOI).toBe("10.1136/bmj.i6583");
expect(papers[0].url).toBe("https://www.bmj.com/content/356/bmj.i6583");
});

test("returns empty array when API returns no results", async () => {
mockFetch(async () => jsonResponse({ results: [] }));
const papers = await oaSearch("no match", 5);
expect(papers).toEqual([]);
});

test("returns empty array on non-OK response", async () => {
mockFetch(async () => new Response("server error", { status: 500 }));
const papers = await oaSearch("anything", 5);
expect(papers).toEqual([]);
});

test("returns empty array on 429 rate-limit response", async () => {
mockFetch(async () => new Response("rate limited", { status: 429 }));
const papers = await oaSearch("anything", 5);
expect(papers).toEqual([]);
});

test("returns empty array on network throw", async () => {
mockFetch(async () => { throw new Error("ECONNRESET"); });
const papers = await oaSearch("anything", 5);
expect(papers).toEqual([]);
});

test("returns empty array on malformed JSON response", async () => {
mockFetch(async () => new Response("<html>not json</html>", { status: 200 }));
const papers = await oaSearch("anything", 5);
expect(papers).toEqual([]);
});

test("includes mailto in URL when provided", async () => {
let capturedUrl = "";
mockFetch(async (req) => {
capturedUrl = req.url;
return jsonResponse({ results: [] });
});
await oaSearch("q", 3, { mailto: "x@y.com" });
expect(capturedUrl).toContain("mailto=x%40y.com");
});

test("omits mailto when not provided", async () => {
let capturedUrl = "";
mockFetch(async (req) => {
capturedUrl = req.url;
return jsonResponse({ results: [] });
});
await oaSearch("q", 3);
expect(capturedUrl).not.toContain("mailto");
});

test("strips https://doi.org/ prefix from DOI field", async () => {
mockFetch(async () => jsonResponse({
results: [{
id: "W1", doi: "https://doi.org/10.1000/xyz",
title: "t", publication_year: 2020, authorships: [], primary_location: {},
}],
}));
const papers = await oaSearch("q", 1);
expect(papers[0].externalIds?.DOI).toBe("10.1000/xyz");
});

test("strips https://dx.doi.org/ prefix from DOI field", async () => {
mockFetch(async () => jsonResponse({
results: [{
id: "W1", doi: "https://dx.doi.org/10.1000/xyz",
title: "t", publication_year: 2020, authorships: [], primary_location: {},
}],
}));
const papers = await oaSearch("q", 1);
expect(papers[0].externalIds?.DOI).toBe("10.1000/xyz");
});
});
63 changes: 63 additions & 0 deletions src/providers/openalex.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import type { SSPaper } from "./semanticscholar.ts";

export interface OaSearchOptions {
mailto?: string;
}

interface OaAuthorship { author?: { display_name?: string } }

interface OaWork {
id: string;
doi?: string | null;
title: string | null;
publication_year?: number;
authorships?: OaAuthorship[];
primary_location?: { landing_page_url?: string };
}

interface OaResponse { results?: OaWork[] }

/**
* Search OpenAlex for papers matching a query.
*
* OpenAlex is a free, open academic-metadata service with ~250M indexed works.
* Using the polite pool (via `mailto` param) grants 100k requests/day. No API
* key is required for the polite pool; the `mailto` identifies the client for
* soft rate limiting.
*
* Returns up to `limit` papers in the same `SSPaper` shape as `ssSearch` so
* callers can swap providers without changing downstream logic. Returns an
* empty array on any error — caller treats zero results as a warn (no academic
* support for this claim), not a hard failure.
*/
export async function oaSearch(
query: string,
limit = 3,
opts: OaSearchOptions = {},
): Promise<SSPaper[]> {
const url = new URL("https://api.openalex.org/works");
url.searchParams.set("search", query);
url.searchParams.set("per-page", String(limit));
url.searchParams.set("select", "id,doi,title,publication_year,authorships,primary_location,type");
url.searchParams.set("filter", "type:article|review");
if (opts.mailto) url.searchParams.set("mailto", opts.mailto);

try {
const res = await fetch(url.toString());
if (!res.ok) return [];
const json = (await res.json()) as OaResponse;
const works = json.results ?? [];
return works.map((w) => ({
paperId: w.id,
title: w.title ?? "",
year: w.publication_year,
authors: (w.authorships ?? [])
.map((a) => ({ name: a.author?.display_name ?? "" }))
.filter((a) => a.name.length > 0),
externalIds: w.doi ? { DOI: w.doi.replace(/^https?:\/\/(dx\.)?doi\.org\//i, "") } : undefined,
url: w.primary_location?.landing_page_url ?? (w.doi ?? undefined),
}));
} catch {
return [];
}
}
13 changes: 13 additions & 0 deletions src/providers/registry.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import { describe, test, expect } from "bun:test";
import { getProvider } from "./registry.ts";

describe("getProvider", () => {
test("returns metadata for academic + openalex", () => {
const meta = getProvider("academic", "openalex");
expect(meta).toBeDefined();
expect(meta?.id).toBe("openalex");
expect(meta?.freeTier).toBe(true);
expect(meta?.requiresKey).toBe(false);
expect(meta?.endpoint).toContain("api.openalex.org");
});
});
1 change: 1 addition & 0 deletions src/providers/registry.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ export const PROVIDER_REGISTRY: Partial<Record<SkillId, ProviderMetadata[]>> = {
],
academic: [
{ id: "semantic-scholar", label: "Semantic Scholar", speed: "medium", costPerCheckUsd: 0, costLabel: "free", depth: "standard", freeTier: true, requiresKey: false, endpoint: "https://api.semanticscholar.org/graph/v1/paper/search" },
{ id: "openalex", label: "OpenAlex", speed: "fast", costPerCheckUsd: 0, costLabel: "Free (polite pool with mailto)", depth: "standard", freeTier: true, requiresKey: false, endpoint: "https://api.openalex.org/works" },
Comment thread
sharonds marked this conversation as resolved.
],
Comment thread
sharonds marked this conversation as resolved.
"self-plagiarism": [
{ id: "cloudflare-vectorize", label: "Cloudflare Vectorize", speed: "fast", costPerCheckUsd: 0.0001, costLabel: "$0.01/1M vectors", depth: "standard", freeTier: true, requiresKey: true },
Expand Down
27 changes: 27 additions & 0 deletions src/providers/resolve.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,33 @@ describe("resolveProvider", () => {
expect(resolveProvider({ ...base, copyscapeKey: "" }, "plagiarism")).toBeNull();
});

test("routes academic → openalex when openalexMailto is set", () => {
const r = resolveProvider(
{ ...base, openalexMailto: "me@example.com" },
"academic"
);
expect(r?.provider).toBe("openalex");
expect(r?.apiKey).toBe("me@example.com");
});

test("academic falls back to null when no openalexMailto and no explicit provider", () => {
const r = resolveProvider({ ...base }, "academic");
expect(r).toBeNull();
});

test("explicit providers[academic] still wins over openalexMailto", () => {
const r = resolveProvider(
{
...base,
openalexMailto: "me@example.com",
providers: { academic: { provider: "semantic-scholar", apiKey: "ss-key" } },
} as Config,
"academic"
);
expect(r?.provider).toBe("semantic-scholar");
expect(r?.apiKey).toBe("ss-key");
});

test("metadata is undefined when provider id is not in registry", () => {
const r = resolveProvider(
{ ...base, providers: { "fact-check": { provider: "nonexistent" as never, apiKey: "k" } } } as Config,
Expand Down
1 change: 1 addition & 0 deletions src/providers/resolve.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import { getProvider } from "./registry.ts";
const LEGACY_MAP: Partial<Record<SkillId, { provider: SkillProviderConfig["provider"]; keyOf: (c: Config) => string | undefined }>> = {
"fact-check": { provider: "exa-search", keyOf: (c) => c.exaApiKey },
plagiarism: { provider: "copyscape", keyOf: (c) => c.copyscapeKey || undefined },
academic: { provider: "openalex", keyOf: (c) => c.openalexMailto },
};

const GEMINI_FACT_CHECK_PROVIDERS = new Set<SkillProviderConfig["provider"]>([
Expand Down
9 changes: 9 additions & 0 deletions src/providers/types.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import { describe, test, expect } from "bun:test";
import type { ProviderId } from "./types.ts";

describe("ProviderId", () => {
test("accepts openalex as a provider", () => {
const p: ProviderId = "openalex";
expect(p).toBe("openalex");
});
});
2 changes: 1 addition & 1 deletion src/providers/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ export type ProviderId =
| "gemini-grounded" | "gemini-deep-research"
| "languagetool" | "languagetool-selfhosted" | "sapling" | "llm-fallback"
| "copyscape" | "originality"
| "semantic-scholar"
| "semantic-scholar" | "openalex"
| "cloudflare-vectorize" | "pinecone" | "upstash-vector";

export interface SkillProviderConfig {
Expand Down
Loading
Loading