diff --git a/docs/ops/runbooks/osint-hallucination-mitigation.md b/docs/ops/runbooks/osint-hallucination-mitigation.md new file mode 100644 index 00000000000..e244fbc3c49 --- /dev/null +++ b/docs/ops/runbooks/osint-hallucination-mitigation.md @@ -0,0 +1,30 @@ +# OSINT Hallucination Mitigation Runbook + +## Objective + +Execute OSINT runs with provenance-required facts, deterministic evidence IDs, and +verifier gates that flag unsupported claims. + +## How to Run + +```bash +python scripts/osint_run.py --case --out artifacts/osint/ +``` + +## Interpreting Verification Output + +- `needs_human_review: true` means unsupported claims or missing provenance were + detected. +- `unsupported_claims[]` lists claims without Evidence ID support. +- `missing_provenance_facts[]` lists fact IDs missing required fields. + +## Triage Checklist + +1. Confirm provenance fields are complete for every fact. +2. Validate two-source promotion for any `confirmed` fact. +3. Remove or downgrade unsupported narrative claims. +4. Re-run verification and ensure `needs_human_review` is cleared. + +## Operational SLO (Initial) + +- 95% of runs complete under the agreed CI runner time budget. diff --git a/docs/roadmap/STATUS.json b/docs/roadmap/STATUS.json index 281d973af71..603a935acd6 100644 --- a/docs/roadmap/STATUS.json +++ b/docs/roadmap/STATUS.json @@ -1,7 +1,13 @@ { "last_updated": "2026-02-07T00:00:00Z", - "revision_note": "Added Summit PR Stack Sequencer skill scaffolding.", + "revision_note": "Added OSINT hallucination mitigation standards, policies, and scaffolding.", "initiatives": [ + { + "id": "osint-hallucination-mitigation", + "status": "in_progress", + "owner": "codex", + "notes": "Established provenance-required facts, deterministic evidence IDs, verifier policy gates, and OSINT runbook guidance." + }, { "id": "adenhq-hive-subsumption-lane1", "status": "in_progress", diff --git a/docs/security/data-handling/osint-hallucination-mitigation.md b/docs/security/data-handling/osint-hallucination-mitigation.md new file mode 100644 index 00000000000..b20d07d0895 --- /dev/null +++ b/docs/security/data-handling/osint-hallucination-mitigation.md @@ -0,0 +1,30 @@ +# OSINT Hallucination Mitigation Data Handling + +## Scope + +Applies to OSINT collection, retrieval, summarization, and verification artifacts. + +## Never-Log List + +Never log or persist the following without explicit approval and redaction: + +- Auth tokens, session cookies, API keys +- Private keys or signing materials +- Emails or phone numbers unless explicitly required and redacted + +## Retention + +- Raw pages and blobs: short-lived retention, delete after extraction and + verification windows close. +- Extracted facts: longer-lived retention to preserve auditability. + +## Determinism Rules + +- Deterministic artifacts must not embed wall-clock timestamps. +- `collected_at` is permitted in provenance fields but stored outside + deterministic bundles when possible. + +## Verification Requirements + +- Missing provenance must downgrade facts to `unknown`. +- Unsupported narrative claims trigger `needs_human_review`. diff --git a/docs/standards/osint-hallucination-mitigation.md b/docs/standards/osint-hallucination-mitigation.md new file mode 100644 index 00000000000..a2d4c60fb36 --- /dev/null +++ b/docs/standards/osint-hallucination-mitigation.md @@ -0,0 +1,80 @@ +# OSINT Hallucination Mitigation Standard + +## Purpose + +Make hallucination resistance a first-class OSINT design goal by enforcing +traceable, checkable, degradable-to-unknown facts with deterministic evidence +artifacts. + +## Non-Negotiables + +1. **Provenance mandatory**: every assertion carries explicit source metadata. +2. **Degradable-to-unknown**: missing provenance downgrades facts to `unknown`. +3. **Retrieval-first**: collect raw → retrieval selects → summarizer references + retrieved evidence IDs only. +4. **Extractive-first**: key fields (names, dates, IPs, IOCs) must prefer + extractive resolution prior to LLM paraphrase. +5. **Two-source promotion**: `confirmed` requires ≥2 independent sources. +6. **Verifier required**: final report is audited for unsupported claims. +7. **Human sign-off**: final assessment requires human approval. + +## Required Fact & Evidence Fields + +Each fact MUST include provenance fields: + +- `source_url` +- `source_type` +- `collected_at` +- `collector_tool` +- `verdict_confidence` + +Evidence IDs are deterministic: + +``` +EVID::: +``` + +## Deterministic Artifacts + +Artifacts must be produced per run with no unstable timestamps inside the +deterministic files: + +- `artifacts/osint//raw/…` +- `artifacts/osint//retrieved.json` +- `artifacts/osint//facts.jsonl` +- `artifacts/osint//report.md` +- `artifacts/osint//verification.json` +- `artifacts/osint//metrics.json` + +## Import / Export Matrix + +**Imports** + +- Collector raw blobs (JSON/HTML/text) +- External tool ID + version in `collector_tool` + +**Exports** + +- `retrieved.json`: evidence selection list +- `facts.jsonl`: fact records with provenance +- `verification.json`: verifier outputs +- `report.md`: narrative with inline Evidence IDs + +**Non-goals** + +- No automatic truth adjudication without provenance +- No single-source confirmation +- No silent backfilling of missing fields + +## MAESTRO Security Alignment + +**MAESTRO Layers:** Data, Agents, Tools, Observability, Security. +**Threats Considered:** prompt injection, unsupported claims, single-source +misinformation, evidence tampering. +**Mitigations:** provenance-required facts, deterministic Evidence IDs, two-source +promotion gate, verifier audit, human approval. + +## References + +- Summit Readiness Assertion +- MAESTRO Threat Modeling Framework diff --git a/packages/osint/src/hallucination/__init__.py b/packages/osint/src/hallucination/__init__.py new file mode 100644 index 00000000000..a46bbe6c6a7 --- /dev/null +++ b/packages/osint/src/hallucination/__init__.py @@ -0,0 +1 @@ +"""OSINT hallucination mitigation primitives.""" diff --git a/packages/osint/src/hallucination/evidence_id.py b/packages/osint/src/hallucination/evidence_id.py new file mode 100644 index 00000000000..6e43e736c6a --- /dev/null +++ b/packages/osint/src/hallucination/evidence_id.py @@ -0,0 +1,61 @@ +"""Deterministic evidence ID generation utilities.""" + +from __future__ import annotations + +import hashlib +import re +import unicodedata +from typing import Iterable +from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse + +TRACKING_QUERY_KEYS = { + "fbclid", + "gclid", + "igshid", + "mc_cid", + "mc_eid", +} + + +def _drop_tracking_params(params: Iterable[tuple[str, str]]) -> list[tuple[str, str]]: + cleaned: list[tuple[str, str]] = [] + for key, value in params: + if key.startswith("utm_"): + continue + if key in TRACKING_QUERY_KEYS: + continue + cleaned.append((key, value)) + return cleaned + + +def normalize_source_url(source_url: str) -> str: + """Normalize URLs for deterministic evidence IDs.""" + + parsed = urlparse(source_url.strip()) + query_params = _drop_tracking_params(parse_qsl(parsed.query, keep_blank_values=True)) + query_params.sort() + normalized = parsed._replace( + scheme=parsed.scheme.lower(), + netloc=parsed.netloc.lower(), + query=urlencode(query_params, doseq=True), + fragment="", + ) + return urlunparse(normalized) + + +def canonicalize_snippet(snippet: str) -> str: + """Canonicalize snippets to reduce noise before hashing.""" + + normalized = unicodedata.normalize("NFKC", snippet) + normalized = re.sub(r"\s+", " ", normalized).strip() + return normalized + + +def compute_evidence_id(source_type: str, source_url: str, snippet: str) -> str: + """Compute deterministic evidence IDs.""" + + normalized_url = normalize_source_url(source_url) + normalized_snippet = canonicalize_snippet(snippet) + url_hash = hashlib.sha256(normalized_url.encode("utf-8")).hexdigest() + snippet_hash = hashlib.sha256(normalized_snippet.encode("utf-8")).hexdigest() + return f"EVID:{source_type}:{url_hash}:{snippet_hash}" diff --git a/packages/osint/src/hallucination/facts.py b/packages/osint/src/hallucination/facts.py new file mode 100644 index 00000000000..e2a7c26611e --- /dev/null +++ b/packages/osint/src/hallucination/facts.py @@ -0,0 +1,78 @@ +"""Fact and provenance schema helpers for OSINT hallucination mitigation.""" + +from __future__ import annotations + +from dataclasses import dataclass, replace +from typing import Dict, List, Literal, Optional + +Verdict = Literal["confirmed", "unconfirmed", "unknown", "rejected"] + +REQUIRED_PROVENANCE_FIELDS = [ + "source_url", + "source_type", + "collected_at", + "collector_tool", + "verdict_confidence", +] + + +@dataclass(frozen=True) +class Provenance: + source_url: str + source_type: str + collected_at: str + collector_tool: str + verdict_confidence: float + snippet: Optional[str] = None + evidence_id: Optional[str] = None + + +@dataclass(frozen=True) +class Fact: + fact_id: str + predicate: str + value: str + verdict: Verdict + confidence: float + provenance: List[Provenance] + notes: Optional[str] = None + labels: Optional[Dict[str, str]] = None + + +def missing_provenance_fields(fact: Fact) -> List[str]: + if not fact.provenance: + return ["missing_provenance"] + missing: List[str] = [] + for prov in fact.provenance: + for field in REQUIRED_PROVENANCE_FIELDS: + value = getattr(prov, field, None) + if value in (None, "", []): + missing.append(f"provenance_missing:{field}") + return missing + + +def validate_fact(fact: Fact) -> List[str]: + errors = missing_provenance_fields(fact) + if fact.verdict == "confirmed": + sources = {prov.source_url for prov in fact.provenance if prov.source_url} + if len(sources) < 2: + errors.append("confirmed_requires_two_sources") + return errors + + +def apply_provenance_policy(fact: Fact) -> Fact: + missing = missing_provenance_fields(fact) + if not missing: + return fact + notes = "; ".join(missing) + return replace(fact, verdict="unknown", notes=notes) + + +def apply_two_source_policy(fact: Fact) -> Fact: + if fact.verdict != "confirmed": + return fact + sources = {prov.source_url for prov in fact.provenance if prov.source_url} + if len(sources) >= 2: + return fact + notes = "confirmed_requires_two_sources" + return replace(fact, verdict="unconfirmed", notes=notes) diff --git a/packages/osint/src/hallucination/verifier.py b/packages/osint/src/hallucination/verifier.py new file mode 100644 index 00000000000..d9fa5633d14 --- /dev/null +++ b/packages/osint/src/hallucination/verifier.py @@ -0,0 +1,77 @@ +"""Verifier agent for unsupported claim detection.""" + +from __future__ import annotations + +import re +from typing import Any, Dict, Iterable, List + +from .facts import Fact, missing_provenance_fields + +CLAIM_PATTERN = re.compile(r"\b(?:claim|fact)\b", re.IGNORECASE) +EVIDENCE_ID_PATTERN = re.compile(r"\bEVID:[A-Za-z0-9_-]+:[a-f0-9]{64}:[a-f0-9]{64}\b") +GAP_PREFIXES = ("unknown", "unanswered", "open question", "gap") + + +def _is_gap_statement(sentence: str) -> bool: + normalized = sentence.strip().lower() + return any(normalized.startswith(prefix) for prefix in GAP_PREFIXES) + + +def _is_claim_candidate(sentence: str) -> bool: + if CLAIM_PATTERN.search(sentence): + return True + if re.search(r"\b\d{1,4}\b", sentence): + return True + if re.search(r"\b[A-Z][a-z]+\b", sentence): + return True + return False + + +def extract_claims(report_text: str) -> List[str]: + sentences = re.split(r"(?<=[.!?])\s+", report_text.strip()) + claims = [ + sentence + for sentence in sentences + if sentence and not _is_gap_statement(sentence) and _is_claim_candidate(sentence) + ] + return claims + + +def _collect_evidence_ids(facts: Iterable[Fact]) -> List[str]: + evidence_ids: List[str] = [] + for fact in facts: + for prov in fact.provenance: + if prov.evidence_id: + evidence_ids.append(prov.evidence_id) + return evidence_ids + + +def verify_report(report_text: str, facts: List[Fact]) -> Dict[str, Any]: + claims = extract_claims(report_text) + evidence_ids = _collect_evidence_ids(facts) + unsupported_claims: List[Dict[str, str]] = [] + for claim in claims: + has_evidence_id = bool(EVIDENCE_ID_PATTERN.search(claim)) + if not has_evidence_id and not any(evidence_id in claim for evidence_id in evidence_ids): + unsupported_claims.append( + { + "claim": claim, + "reason": "missing_evidence_id", + } + ) + + missing_provenance = [ + fact.fact_id for fact in facts if missing_provenance_fields(fact) + ] + + needs_human_review = bool(unsupported_claims or missing_provenance) + return { + "needs_human_review": needs_human_review, + "unsupported_claims": unsupported_claims, + "missing_provenance_facts": missing_provenance, + "summary": { + "facts_total": len(facts), + "facts_missing_provenance": len(missing_provenance), + "unsupported_claims_total": len(unsupported_claims), + }, + } diff --git a/prompts/osint/osint-hallucination-mitigation@v1.md b/prompts/osint/osint-hallucination-mitigation@v1.md new file mode 100644 index 00000000000..67dc1d65092 --- /dev/null +++ b/prompts/osint/osint-hallucination-mitigation@v1.md @@ -0,0 +1,26 @@ +# Prompt: OSINT Hallucination Mitigation (v1) + +## Objective + +Establish provenance-required OSINT facts, deterministic evidence IDs, verifier +policy gates, and documentation for hallucination mitigation. + +## Scope + +- Add OSINT hallucination mitigation modules (facts, evidence IDs, verifier). +- Add acceptance tests for provenance, unknown degradation, verifier flags, + two-source policy, and deterministic evidence IDs. +- Add documentation standards, data handling policy, and runbook. +- Update `docs/roadmap/STATUS.json`. + +## Non-goals + +- No production workflow rewrites. +- No CI workflow changes beyond local tests. + +## Required Outputs + +- Deterministic evidence ID logic. +- Provenance-required fact policy with degradable `unknown`. +- Verifier that flags unsupported claims without Evidence ID citations. +- Documentation artifacts aligning to Summit governance. diff --git a/prompts/registry.yaml b/prompts/registry.yaml index 4993c520476..476c16b2778 100644 --- a/prompts/registry.yaml +++ b/prompts/registry.yaml @@ -1,5 +1,38 @@ version: 1 prompts: + - id: osint-hallucination-mitigation + version: v1 + path: prompts/osint/osint-hallucination-mitigation@v1.md + sha256: 9d6a71f809270db4fc2c78ef7741f3517a60cac3e92488d811a3e6b0cac9a49b + description: Establish provenance-required OSINT facts, deterministic evidence IDs, verifier gates, and supporting docs. + scope: + paths: + - packages/osint/src/hallucination/ + - tests/test_provenance_required.py + - tests/test_unknown_degradation.py + - tests/test_verifier_flags_unsupported.py + - tests/test_two_source_promotion.py + - tests/test_deterministic_evidence_ids.py + - docs/standards/osint-hallucination-mitigation.md + - docs/security/data-handling/osint-hallucination-mitigation.md + - docs/ops/runbooks/osint-hallucination-mitigation.md + - docs/roadmap/STATUS.json + - prompts/osint/osint-hallucination-mitigation@v1.md + - prompts/registry.yaml + domains: + - osint + - governance + - documentation + - testing + verification: + tiers_required: + - C + debt_budget: + permitted: 0 + retirement_target: 0 + allowed_operations: + - create + - edit - id: io-cogwar-radar-brief version: v1 path: prompts/briefs/io-cogwar-radar-brief@v1.md diff --git a/tests/test_deterministic_evidence_ids.py b/tests/test_deterministic_evidence_ids.py new file mode 100644 index 00000000000..f8a32c37085 --- /dev/null +++ b/tests/test_deterministic_evidence_ids.py @@ -0,0 +1,22 @@ +"""Tests for deterministic evidence ID generation.""" + +import pathlib +import sys + +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1] / "packages" / "osint" / "src")) + +from hallucination.evidence_id import compute_evidence_id + + +def test_deterministic_evidence_ids() -> None: + evidence_a = compute_evidence_id( + source_type="web", + source_url="https://example.com/article?utm_source=foo&b=2&a=1", + snippet="Some text.", + ) + evidence_b = compute_evidence_id( + source_type="web", + source_url="https://example.com/article?a=1&b=2", + snippet="Some text.", + ) + assert evidence_a == evidence_b diff --git a/tests/test_provenance_required.py b/tests/test_provenance_required.py new file mode 100644 index 00000000000..53f9c49b903 --- /dev/null +++ b/tests/test_provenance_required.py @@ -0,0 +1,31 @@ +"""Tests for provenance-required fact policies.""" + +import pathlib +import sys + +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1] / "packages" / "osint" / "src")) + +from hallucination.facts import Fact, Provenance, apply_provenance_policy, missing_provenance_fields + + +def test_provenance_required() -> None: + provenance = Provenance( + source_url="", + source_type="web", + collected_at="2025-01-01T00:00:00Z", + collector_tool="collector@1.0", + verdict_confidence=0.4, + ) + fact = Fact( + fact_id="fact-1", + predicate="indicator.ip", + value="203.0.113.1", + verdict="confirmed", + confidence=0.9, + provenance=[provenance], + ) + missing = missing_provenance_fields(fact) + assert "provenance_missing:source_url" in missing + + degraded = apply_provenance_policy(fact) + assert degraded.verdict == "unknown" diff --git a/tests/test_two_source_promotion.py b/tests/test_two_source_promotion.py new file mode 100644 index 00000000000..9c2078ecb3b --- /dev/null +++ b/tests/test_two_source_promotion.py @@ -0,0 +1,29 @@ +"""Tests for two-source confirmation policy.""" + +import pathlib +import sys + +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1] / "packages" / "osint" / "src")) + +from hallucination.facts import Fact, Provenance, apply_two_source_policy + + +def test_two_source_promotion() -> None: + provenance = Provenance( + source_url="https://example.com/only", + source_type="web", + collected_at="2025-01-01T00:00:00Z", + collector_tool="collector@1.0", + verdict_confidence=0.6, + ) + fact = Fact( + fact_id="fact-4", + predicate="indicator.ip", + value="198.51.100.2", + verdict="confirmed", + confidence=0.85, + provenance=[provenance], + ) + downgraded = apply_two_source_policy(fact) + assert downgraded.verdict == "unconfirmed" + assert downgraded.notes == "confirmed_requires_two_sources" diff --git a/tests/test_unknown_degradation.py b/tests/test_unknown_degradation.py new file mode 100644 index 00000000000..28cc03db1fd --- /dev/null +++ b/tests/test_unknown_degradation.py @@ -0,0 +1,22 @@ +"""Tests for unknown degradation when evidence is missing.""" + +import pathlib +import sys + +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1] / "packages" / "osint" / "src")) + +from hallucination.facts import Fact, apply_provenance_policy + + +def test_unknown_degradation() -> None: + fact = Fact( + fact_id="fact-2", + predicate="indicator.domain", + value="example.com", + verdict="confirmed", + confidence=0.8, + provenance=[], + ) + degraded = apply_provenance_policy(fact) + assert degraded.verdict == "unknown" + assert degraded.notes and "missing_provenance" in degraded.notes diff --git a/tests/test_verifier_flags_unsupported.py b/tests/test_verifier_flags_unsupported.py new file mode 100644 index 00000000000..e6c3636f07b --- /dev/null +++ b/tests/test_verifier_flags_unsupported.py @@ -0,0 +1,35 @@ +"""Tests for verifier unsupported claim detection.""" + +import pathlib +import sys + +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1] / "packages" / "osint" / "src")) + +from hallucination.facts import Fact, Provenance +from hallucination.verifier import verify_report + + +def test_verifier_flags_unsupported() -> None: + provenance = Provenance( + source_url="https://example.com/post", + source_type="web", + collected_at="2025-01-01T00:00:00Z", + collector_tool="collector@1.0", + verdict_confidence=0.7, + evidence_id="EVID:web:abc:def", + ) + fact = Fact( + fact_id="fact-3", + predicate="indicator.hash", + value="deadbeef", + verdict="confirmed", + confidence=0.9, + provenance=[provenance], + ) + report = ( + "Claim: deadbeef appeared in the malware sample without citation. " + "Unknown: attribution remains unresolved." + ) + result = verify_report(report, [fact]) + assert result["needs_human_review"] is True + assert result["unsupported_claims"]