Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions .env-example
Original file line number Diff line number Diff line change
@@ -1,5 +1,16 @@
USER_AGENT="AletheiaFact.org/Agencia/v1"

# LLM provider configuration
# LLM_PROVIDER: "openai" (default) or "anthropic"
LLM_PROVIDER="openai"
# LLM_MODEL="" # Override default model (e.g., "gpt-5.2-2025-12-11", "claude-sonnet-4-6")
# LLM_MODEL_MINI="" # Override cheap/fast model (e.g., "gpt-5-mini-2025-08-07", "claude-haiku-4-5-20251001")
# LLM_TEMPERATURE="1" # Default temperature for all LLM calls

# Provider API keys (only the active provider's key is required)
OPENAI_API_KEY=""
ANTHROPIC_API_KEY=""

TAVILY_API_KEY=""
SERPAPI_API_KEY="" # Deprecated — Tavily is preferred. Remove after migration confirmed.

Expand Down
8 changes: 8 additions & 0 deletions .github/workflows/aws.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,11 @@ env:
KUBE_CONFIG: ${{ secrets.KUBE_CONFIG }}
IMAGE: agencia-production
ENVIRONMENT: development
LLM_PROVIDER: ${{ secrets.LLM_PROVIDER }}
LLM_MODEL: ${{ secrets.LLM_MODEL }}
LLM_MODEL_MINI: ${{ secrets.LLM_MODEL_MINI }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
SERPAPI_API_KEY: ${{ secrets.SERPAPI_API_KEY }}
TAVILY_API_KEY: ${{ secrets.TAVILY_API_KEY }}
GOOGLE_FACTCHECK_API_KEY: ${{ secrets.GOOGLE_FACTCHECK_API_KEY }}
Expand Down Expand Up @@ -78,7 +82,11 @@ jobs:
run: |
sed -i "s/TAG/$GITHUB_SHA/" deployment/app.yml
sed -i 's%ENVIRONMENT%${{ env.ENVIRONMENT }}%g' deployment/app.yml
sed -i "s%LLM_PROVIDER_STUB%${{ env.LLM_PROVIDER }}%g" deployment/app.yml
sed -i "s%LLM_MODEL_STUB%${{ env.LLM_MODEL }}%g" deployment/app.yml
sed -i "s%LLM_MODEL_MINI_STUB%${{ env.LLM_MODEL_MINI }}%g" deployment/app.yml
sed -i "s%OPENAI_API_KEY_STUB%${{ env.OPENAI_API_KEY }}%g" deployment/app.yml
sed -i "s%ANTHROPIC_API_KEY_STUB%${{ env.ANTHROPIC_API_KEY }}%g" deployment/app.yml
sed -i "s%SERPAPI_API_KEY_STUB%${{ env.SERPAPI_API_KEY }}%g" deployment/app.yml
sed -i "s%TAVILY_API_KEY_STUB%${{ env.TAVILY_API_KEY }}%g" deployment/app.yml
sed -i "s%GOOGLE_FACTCHECK_API_KEY_STUB%${{ env.GOOGLE_FACTCHECK_API_KEY }}%g" deployment/app.yml
Expand Down
83 changes: 83 additions & 0 deletions app/llm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
"""Centralized LLM factory with provider abstraction.

Supports OpenAI and Anthropic (Claude) models via environment-driven configuration.
All nodes should use get_llm() instead of instantiating ChatOpenAI directly.

Configuration via environment variables:
LLM_PROVIDER: "openai" (default) or "anthropic"
LLM_MODEL: Model name override (e.g., "gpt-5.2-2025-12-11", "claude-sonnet-4-6")
LLM_MODEL_MINI: Model name override for cheap/fast tasks
LLM_TEMPERATURE: Temperature override (default: 1)

Provider-specific keys:
OPENAI_API_KEY: Required when LLM_PROVIDER=openai
ANTHROPIC_API_KEY: Required when LLM_PROVIDER=anthropic
"""

import os
import logging

from langchain_core.language_models.chat_models import BaseChatModel

logger = logging.getLogger(__name__)

_PROVIDER_DEFAULTS = {
"openai": {
"model": "gpt-5.2-2025-12-11",
"model_mini": "gpt-5-mini-2025-08-07",
},
"anthropic": {
"model": "claude-sonnet-4-6",
"model_mini": "claude-haiku-4-5-20251001",
},
}

_VALID_PROVIDERS = set(_PROVIDER_DEFAULTS.keys())


def _get_provider() -> str:
provider = os.environ.get("LLM_PROVIDER", "openai").lower().strip()
if provider not in _VALID_PROVIDERS:
raise ValueError(
f"Invalid LLM_PROVIDER='{provider}'. Must be one of: {_VALID_PROVIDERS}"
)
return provider


def _get_temperature() -> float:
return float(os.environ.get("LLM_TEMPERATURE", "1"))


def get_llm(*, mini: bool = False, temperature: float | None = None) -> BaseChatModel:
"""Create an LLM instance using the configured provider.

Args:
mini: If True, use the cheaper/faster model variant.
Used for scoring, classification, and report formatting.
temperature: Override the default temperature. If None, uses
LLM_TEMPERATURE env var (default: 1).

Returns:
A LangChain chat model instance (ChatOpenAI or ChatAnthropic).
"""
provider = _get_provider()
defaults = _PROVIDER_DEFAULTS[provider]
temp = temperature if temperature is not None else _get_temperature()

if mini:
model = os.environ.get("LLM_MODEL_MINI", defaults["model_mini"])
else:
model = os.environ.get("LLM_MODEL", defaults["model"])

if provider == "openai":
from langchain_openai import ChatOpenAI

return ChatOpenAI(model=model, temperature=temp)

if provider == "anthropic":
from langchain_anthropic import ChatAnthropic

return ChatAnthropic(model=model, temperature=temp)

# Unreachable due to validation in _get_provider, but satisfies type checkers
raise ValueError(f"Unsupported provider: {provider}")
61 changes: 44 additions & 17 deletions app/nodes/gazette/cross_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@

import logging

from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

from llm import get_llm
from state import AgentState
from utils.llm_retry import invoke_with_retry

Expand All @@ -18,24 +18,32 @@
_prompt = ChatPromptTemplate.from_messages([
(
"system",
"""You are an expert data analyst specialized in creating fact-checking reports.
"""You are an independent fact-checking evaluator. You receive evidence from multiple sources
and must cross-reference them to reach an impartial classification.

Your goal: Perform a deep and thorough analysis to derive an informed conclusion by comparing
collected gazette evidence with the claim. Use critical thinking and analytical skills to assess
the accuracy and relevance of the data.
CRITICAL: You receive three types of evidence below. The AI-generated analysis may contain bias.
You MUST cross-reference it against the raw gazette passages. If the analysis claims something
not supported by the raw passages, note the discrepancy. If the raw passages contain relevant
information the analysis omitted, factor it into your classification.

Claim: {claim}

Gazette deep analysis (from multiple documents):
--- RAW GAZETTE PASSAGES (not filtered by AI — review independently) ---
{raw_passages}

--- AI-GENERATED ANALYSIS (may contain bias — cross-reference with raw passages above) ---
{gazette_analysis}

Evidence summary (key excerpts from search):
--- CONTRADICTORY EVIDENCE (independently extracted — weigh carefully) ---
{contradictory_evidence}

--- EVIDENCE SUMMARY (key search excerpts) ---
{evidence_summary}

Gazettes analyzed:
--- GAZETTES ANALYZED ---
{gazette_metadata}

CLASSIFICATION — Assign one of the following labels based on your analysis:
CLASSIFICATION — Assign one of the following labels:

- Not Fact: The information lacks evidence or a factual basis.
- Trustworthy: The information is reliable, backed by evidence or reputable sources.
Expand All @@ -47,23 +55,35 @@
- Exaggerated: Contains elements of truth but is overstated or embellished.
- Unverifiable: Cannot be substantiated through the gazette sources searched.

IMPORTANT REASONING GUIDELINES:
REASONING GUIDELINES:
- Pay careful attention to dates, names, and specific details in both the claim and evidence
- Distinguish between similar but different events (e.g., "conducted exam" vs. "called approved candidates")
- If evidence partially supports the claim, use "Trustworthy, but" or "Arguable" rather than "Trustworthy"
- If no relevant evidence was found after searching, use "Unverifiable"
- Cite specific gazette data (dates, contract numbers, names) to support your classification
- Note any discrepancies between the AI analysis and the raw passages

CONFIDENCE ASSESSMENT — After your classification, provide:
- Evidence quality: high/medium/low (how relevant are the raw passages to the claim?)
- Analysis accuracy: high/medium/low (does the AI analysis faithfully represent the raw passages?)
- Overall confidence: high/medium/low (how confident are you in this classification?)

Compile a comprehensive report detailing your investigative process, key findings, and evidence.""",
),
])


def cross_check(state: AgentState) -> dict:
"""Cross-check gazette evidence against the claim using gpt-5."""
"""Cross-check gazette evidence against the claim with independent verification.

Receives raw passages alongside LLM summaries so the classifier can
cross-reference and detect bias in the AI-generated analysis.
"""
claim = state["claim"]
gazette_analysis = state.get("gazette_analysis", "")
evidence_summary = state.get("evidence_summary", "")
raw_passages = state.get("raw_gazette_passages", "")
contradictory_evidence = state.get("contradictory_evidence", "")
selected = state.get("selected_gazettes", [])

# Format gazette metadata
Expand All @@ -79,22 +99,29 @@ def cross_check(state: AgentState) -> dict:
else:
gazette_metadata = "No gazette documents were analyzed."

# Truncate inputs for context window management
raw_passages_truncated = raw_passages[:4000] if raw_passages else "No raw passages available."
contra_truncated = contradictory_evidence[:2000] if contradictory_evidence else "No contradictory evidence extracted."

logger.info(
"[cross_check] Starting — claim='%s' analysis_len=%d gazettes=%d",
claim[:80], len(gazette_analysis), len(selected),
"[cross_check] Starting — claim='%s' analysis_len=%d raw_len=%d contra_len=%d gazettes=%d",
claim[:80], len(gazette_analysis), len(raw_passages_truncated),
len(contra_truncated), len(selected),
)

llm = ChatOpenAI(model="gpt-5.2-2025-12-11", temperature=1)
llm = get_llm()
chain = _prompt | llm | StrOutputParser()
result = invoke_with_retry(
chain,
params={
"claim": claim,
"gazette_analysis": gazette_analysis[:6000],
"evidence_summary": evidence_summary[:3000],
"gazette_analysis": gazette_analysis[:5000],
"evidence_summary": evidence_summary[:2000],
"gazette_metadata": gazette_metadata,
"raw_passages": raw_passages_truncated,
"contradictory_evidence": contra_truncated,
},
truncatable_keys=["gazette_analysis", "evidence_summary"],
truncatable_keys=["gazette_analysis", "evidence_summary", "raw_passages"],
)

logger.info("[cross_check] Completed — result length=%d chars", len(result))
Expand Down
Loading
Loading