diff --git a/.claude/settings.local.json b/.claude/settings.local.json
new file mode 100644
index 0000000..b69992f
--- /dev/null
+++ b/.claude/settings.local.json
@@ -0,0 +1,7 @@
+{
+ "permissions": {
+ "allow": [
+ "mcp__crypto-intelligence__research_crypto_project"
+ ]
+ }
+}
diff --git a/.cursor/skills/example-readme-writer/SKILL.md b/.cursor/skills/example-readme-writer/SKILL.md
index 9616ada..a6ee6b2 100644
--- a/.cursor/skills/example-readme-writer/SKILL.md
+++ b/.cursor/skills/example-readme-writer/SKILL.md
@@ -57,8 +57,9 @@ Short positioning paragraph:
## Quick Start
-Fastest runnable path: .env setup, docker compose, curl. Include the
-verification request here so there's no need for a separate Verification section.
+Fastest runnable path: .env setup, docker compose, curl. Include the main
+health and success path here, and mention `endpoints.http` when prebuilt
+requests already exist.
## What You Get Back
@@ -85,10 +86,16 @@ copy, rewrite it shorter.
## Implementation Walkthrough
-Link to source files so the reader can jump directly. Show code only when it's
-the actual working snippet that teaches the architecture (e.g. the MCP tool
-definition). For everything else, reference the file and explain the idea in
-prose. Never show pseudo-code or comment-only code blocks.
+Use numbered steps so the flow is easy to follow:
+1. define the shared state
+2. define the nodes
+3. wire the graph / runtime
+4. expose the public entry points
+
+Link to source files so the reader can jump directly. Prefer short prose plus
+file references over long code excerpts. Inline code only when it teaches the
+architecture and the real working snippet is genuinely more helpful than prose.
+Never show pseudo-code or comment-only code blocks.
## Connect Your MCP Client / Integration
(if applicable -- combine all client tools into one section, CLI first, GUI last)
@@ -97,13 +104,11 @@ prose. Never show pseudo-code or comment-only code blocks.
uv sync, test, lint commands.
-## Exercises
-
-2 items max. One simple extension, one architectural extension. One sentence each.
-
-## Trade-offs
+## What You Have Learned
-Table of advantages vs. limitations. End with the bridge to the next pattern.
+Short takeaway bullets, then:
+- bridge to the next pattern with one sentence explaining what it adds
+- call to action to star the GitHub repository
## Further Reading
@@ -112,8 +117,13 @@ Only link docs for technologies introduced in this pattern.
**Sections to skip:**
-- **What You Should See** -- skip if Quick Start already shows expected output
+- **When to Use / When Not to Use** -- skip unless the pattern needs a short,
+ non-obvious note that materially helps the reader choose the pattern
+- **What You Should See** -- skip; console and Docker logs add noise unless they
+ teach something essential
- **Verification** -- never duplicate Quick Start with the same curl commands
+- **Exercises** -- skip for example READMEs in this repo
+- **Trade-offs** -- replace with `What You Have Learned`
## README Quality Rules
@@ -132,7 +142,10 @@ Only link docs for technologies introduced in this pattern.
- **Architecture explanation helps, not just describes**: when mentioning infrastructure (containers, ports, networks), explain WHY it's structured that way, not just WHAT exists.
- **Key Concepts are tight**: 4 bullets max. One line each with em-dash separators. Cut any bullet that restates the architecture diagram.
- **The Problem is concise**: 2-4 sentences stating the limitation. No comparison tables unless truly needed.
-- **Exercises are short**: 2 items max. One sentence each. One simple extension, one architectural.
+- **Implementation Walkthrough stays structural**: prefer numbered steps and file references over detailed tutorial prose. The goal is to show how things fit together, not restate the code line by line.
+- **Quick Start does the heavy lifting**: put the runnable path there and point to `endpoints.http` instead of adding a separate verification section.
+- **MCP setup stays practical**: if the example exposes MCP, include Claude Code, Cursor, and Claude Desktop setup in one section.
+- **End strong**: use `What You Have Learned` for takeaways, then bridge to the next pattern and add the GitHub star CTA.
- **Further Reading is scoped**: only link docs for technologies introduced by this specific pattern.
- **Integration guides are combined**: don't split Claude Code / Cursor / Claude Desktop into separate sections. One section, multiple examples, developer-workflow order (CLI tools first, GUI apps last).
- **No AI tone**: avoid marketing-speak, over-explanation, and restating the obvious. If a sentence doesn't add information, delete it.
@@ -141,6 +154,7 @@ Only link docs for technologies introduced in this pattern.
Before finalizing an example README, verify:
- the documented quick start matches the actual `docker compose` flow
+- `endpoints.http` is mentioned when it exists and is useful
- repo-root `.env` dependencies are stated explicitly when present
- optional shortcuts are labeled as optional
- provider selection instructions match shared config defaults
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4b54bae..49f6a08 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,36 @@
All notable changes to this project are documented here.
+## [2026-04-13] Pattern 03: Checkpoint Recovery and Resilience
+
+### Added
+- PostgreSQL-backed checkpointer via `agent_common.persistence` (shared `create_postgres_pool`, `setup_checkpointer`, `close_checkpointer`)
+- Project Verifier and Project Selector nodes -- CoinGecko match validation with `interrupt()` for ambiguous results
+- `POST /run/resume` REST endpoint for human-in-the-loop resume after interrupt
+- MCP tools: `get_research_status`, `list_research_threads`, `delete_research_thread` for thread inspection
+- Service layer (`src/service.py`) separating retry-after-failure from resume-after-interrupt semantics
+- Docker Compose with PostgreSQL container and health checks
+- Full test suite: checkpoint recovery e2e, interrupt/resume e2e, MCP tool unit tests, API tests
+
+### Changed
+- Graph extends P02 fan-out/fan-in with verifier → selector before parallel branches
+- `libs/common`: added `postgres_uri` to Settings, new `persistence` module, updated `__init__.py` exports
+- Pattern progression revised: P03 renamed from "Persistent Memory" to "Checkpoint Recovery", P04 from "Memory Lifecycle" to "Agent Memory" (now on main path)
+- P01 and P02 READMEs streamlined with walkthrough structure and "What You Have Learned" takeaways
+- README skill template updated with new section conventions
+
+### Architecture Decisions
+- **Checkpointing is resilience, not memory**: `thread_id` resumes a failed or interrupted workflow but does not create cross-session knowledge -- that is P04's concern
+- **Verifier + selector over silent best-guess**: ambiguous CoinGecko matches become explicit `interrupt()` calls, keeping the human in the loop rather than silently choosing the wrong coin
+- **Thread status derived from checkpoints**: MCP tools inspect LangGraph state directly instead of maintaining a parallel status table
+- **Two entry points, one service layer**: REST and MCP both delegate to `service.run_pipeline` / `service.resume_pipeline`, keeping execution semantics in one place
+
+### Dependencies
+- langgraph-checkpoint-postgres (PostgreSQL checkpointer)
+- psycopg[binary,pool] (async PostgreSQL driver with connection pooling)
+
+---
+
## [2026-03-30] Pattern 02: MCP Tool Integration -- Architecture Redesign
### Added
diff --git a/README.md b/README.md
index ae2ab01..f30c6e7 100644
--- a/README.md
+++ b/README.md
@@ -52,7 +52,7 @@ Teams that emerge as complexity demands them:
### Act 1 — One Team, Growing Capabilities
Patterns 01-04
-You are **Team 1: Intelligence**. Three agents research crypto projects inside a single LangGraph pipeline. It works -- until you realize tools are hardcoded, every request starts from scratch, and memory grows unbounded. Each limitation drives the next pattern: MCP for standardized tools, PostgreSQL-backed checkpointers for persistence, a Memory Refiner for lifecycle management.
+You are **Team 1: Intelligence**. Three agents research crypto projects inside a single LangGraph pipeline. It works -- until you realize tools are hardcoded, long-running workflows are fragile, and the agent forgets what users cared about across sessions. Each limitation drives the next pattern: MCP for standardized tools, PostgreSQL-backed checkpoint recovery for resilience, and real long-term memory for user and project knowledge.
### Act 2 — Teams Multiply, Protocols Emerge
Patterns 05-06
@@ -93,15 +93,15 @@ Team 2 moves to an external partner. Implicit trust is gone -- JWT authenticatio
| 03 |
-Persistent Memory |
-Remembering across conversations |
-Checkpointer, PostgreSQL, thread management |
+Checkpoint Recovery |
+Recovering long-running workflows without restarting from scratch |
+PostgresSaver, thread_id, interrupts, resume semantics |
| 04 |
-Memory Lifecycle optional |
-Managing growing knowledge bases |
-Memory refiner, fact TTL, hierarchical memory |
+Agent Memory |
+Remembering user interests and prior research across sessions |
+PostgresStore, Honcho, user preferences, incremental research |
| Distribution Tier · Multi-service, multi-team, real distributed systems |
@@ -144,9 +144,9 @@ Every pattern exists because the previous one creates a real limitation:
```
P01 ─── Hardcoded tools can't be shared ──────────────── P02
-P02 ─── Every request starts from scratch ────────────── P03
-P03 ─┬─ Memory grows unbounded ──────────────────────── P04 (optional)
- └─ A second team arrives, can't import their code ─ P05
+P02 ─── Long runs fail and lose completed work ──────── P03
+P03 ─── Resilient threads still forget across sessions ─ P04
+P04 ─── A second team arrives, can't import their code ─ P05
P05 ─── Third team needs both, sequential is too slow ── P06
P06 ─── Team 2 moves to external partner, no trust ──── P07
P07 ─── New agents appear, consumers need code changes ─ P08
diff --git a/docs/curriculum.md b/docs/curriculum.md
index f4741b8..1ff6f22 100644
--- a/docs/curriculum.md
+++ b/docs/curriculum.md
@@ -48,8 +48,8 @@ graph TD
subgraph foundation ["Foundation Tier"]
P01["P01: Orchestrator Pipeline"]
P02["P02: MCP Tool Integration"]
- P03["P03: Persistent Memory"]
- P04["P04: Memory Lifecycle\n(enrichment)"]
+ P03["P03: Checkpoint Recovery\nand Resilience"]
+ P04["P04: Agent Memory\nand Knowledge"]
end
subgraph distribution ["Distribution Tier"]
P05["P05: Distributed A2A"]
@@ -63,17 +63,14 @@ graph TD
P01 --> P02
P02 --> P03
P03 --> P04
- P03 --> P05
- P04 -.-> P05
+ P04 --> P05
P05 --> P06
P06 --> P07
P07 --> P08
P08 --> P09
```
-**Main path**: P01 -> P02 -> P03 -> P05 -> P06 -> P07 -> P08 -> P09
-
-**Optional enrichment**: P04 branches off P03 (can be skipped without breaking the progression)
+**Main path**: P01 -> P02 -> P03 -> P04 -> P05 -> P06 -> P07 -> P08 -> P09
**Team introduction timeline**:
@@ -195,15 +192,15 @@ graph TD
---
-### Pattern 03: Persistent Memory
+### Pattern 03: Checkpoint Recovery and Resilience
-**Folder:** `examples/03-persistent-memory/`
+**Folder:** `examples/03-checkpoint-recovery/`
-**Goal:** Add persistent state across conversations using LangGraph's checkpointer backed by PostgreSQL. When a user asks about a crypto project a second time, the system remembers previous research and provides incremental updates instead of starting from scratch.
+**Goal:** Add durable execution to the Pattern 02 pipeline using LangGraph's PostgreSQL-backed checkpointer. When a long-running research run fails midway, the system resumes from the last successful checkpoint instead of starting over.
-**What it solves:** In Patterns 01-02, every request starts fresh. For a research platform, this wastes tokens and time -- if you researched Arbitrum yesterday, you should build on that knowledge, not repeat it.
+**What it solves:** Pattern 02 already has a realistic failure surface: three external API calls, multiple LLM invocations, and a fan-out/fan-in graph. If `project_profiler` times out after `news_scanner` and `community_analyst` succeed, you currently lose completed work and repay the token and latency cost on retry. Checkpointing fixes resiliency, not memory.
-**Team focus:** Team 1 (Intelligence) -- same 5 agents, now with persistent memory.
+**Team focus:** Team 1 (Intelligence) -- same 5 agents, now with durable execution, thread continuity, and human checkpoints.
**Architecture:**
@@ -211,49 +208,61 @@ graph TD
graph TD
User --> FastAPI["Agent Service\n(FastAPI :8000)"]
FastAPI --> Pipeline["LangGraph Pipeline\n+ Checkpointer"]
- Pipeline --> PG["PostgreSQL\n(conversation state + research cache)"]
- Pipeline --> CoinGecko["CoinGecko API"]
- Pipeline --> DDG["DuckDuckGo\n(web search)"]
ClaudeDesktop["Claude Desktop"] -->|MCP| MCP["crypto-intelligence\nMCP (:8001)"]
MCP --> Pipeline
+ Pipeline --> PG["PostgreSQL\n(checkpoints)"]
+ Pipeline --> CoinGecko["CoinGecko API"]
+ Pipeline --> DDG["DuckDuckGo\n(web search)"]
+ Pipeline --> HITL["Human checkpoint\ninterrupt()/resume"]
```
**Key concepts:**
-- LangGraph checkpointer with PostgreSQL backend
-- Thread-based conversation management (each project = a thread)
-- Research result caching and incremental updates
-- State persistence across agent restarts
-- Docker Compose with PostgreSQL container
+- LangGraph `PostgresSaver` for durable checkpoints
+- Stable `thread_id` as the resume handle for a research workflow
+- Resume-after-failure semantics: retry only the failed node, not the full graph
+- Human-in-the-loop with `interrupt()` and `Command(resume=...)`
+- Idempotent node design and graceful degradation around external API failures
+- Docker Compose with PostgreSQL as durable workflow state
-**libs/common additions:** `agent_common.memory` -- checkpointer setup utilities
+**libs/common additions:** `agent_common.persistence` -- PostgreSQL pool and checkpointer helpers
**Builds on:** Pattern 02
---
-### Pattern 04: Memory Lifecycle Management (Enrichment)
+### Pattern 04: Agent Memory and Knowledge
-**Folder:** `examples/04-memory-lifecycle/`
+**Folder:** `examples/04-agent-memory/`
-**Goal:** Manage growing agent memory with consolidation, expiration, and hierarchical organization. Introduce a Memory Refiner agent that runs periodically to keep the knowledge base accurate and compact.
+**Goal:** Add actual cross-session memory using LangGraph `PostgresStore` plus a memory layer such as Honcho for richer user and project understanding. The system should remember which coins a user tracks, what they care about, and what was learned in previous research threads.
-**What it solves:** After many research sessions, memory grows unbounded. Stale facts ("BTC price is $67k") pollute new analyses. The system needs to distinguish between ephemeral data (prices, news) and durable knowledge (project launch date, team composition).
+**What it solves:** Pattern 03 makes the workflow resilient, but it is still amnesiac. A resumed thread is not the same thing as long-term memory. Users expect the agent to remember repeated interests ("I keep tracking Arbitrum and Base"), preferences ("focus on developer traction"), and prior research findings across separate sessions.
-**Note:** This is an enrichment pattern. The main progression continues from Pattern 03 to Pattern 05. Skip this if your priority is distributed architecture.
+**Team focus:** Team 1 (Intelligence) -- same 5 agents, now augmented with episodic and semantic memory.
+
+**Architecture:**
+
+```mermaid
+graph TD
+ User --> FastAPI["Agent Service\n(FastAPI :8000)"]
+ FastAPI --> Pipeline["LangGraph Pipeline\n+ Checkpointer + Store"]
+ ClaudeDesktop["Claude Desktop"] -->|MCP| MCP["crypto-intelligence\nMCP (:8001)"]
+ MCP --> Pipeline
+ Pipeline --> PG["PostgreSQL\n(checkpoints + BaseStore)"]
+ Pipeline --> Honcho["Honcho\n(memory service)"]
+ Pipeline --> CoinGecko["CoinGecko API"]
+ Pipeline --> DDG["DuckDuckGo\n(web search)"]
+```
**Key concepts:**
-- Memory Refiner agent (consolidates and prunes the knowledge base)
-- Fact TTL: timestamped facts with expiration policies
- - Price data: 1-hour TTL
- - News: 7-day TTL
- - Project fundamentals: no expiration
-- Hierarchical memory tiers:
- - Working memory (current conversation context)
- - Episodic memory (past research sessions)
- - Semantic memory (consolidated, long-term knowledge)
-- Memory compaction strategies
+- `PostgresStore` / `BaseStore` for cross-thread memory
+- User memory namespaces such as tracked coins, watchlists, and research preferences
+- Project memory namespaces such as prior summaries, open risks, and last-reviewed timestamps
+- Incremental research: query planning informed by previous findings
+- Honcho as a production-oriented memory service for agent and user representations
+- Memory freshness policies: separate stable facts from volatile market data
**Builds on:** Pattern 03
@@ -310,7 +319,7 @@ graph TD
**libs/common additions:** `agent_common.a2a` -- A2A protocol client/server helpers
-**Builds on:** Pattern 03
+**Builds on:** Pattern 04
---
diff --git a/docs/vision.md b/docs/vision.md
index acaf8b0..26a3466 100644
--- a/docs/vision.md
+++ b/docs/vision.md
@@ -53,13 +53,13 @@ But the News Scanner's web search is a hardcoded Python function call. What if y
**Pattern 02** introduces MCP -- but not the way you might expect. Instead of wrapping raw APIs as MCP tools, you expose the agent pipeline itself. A `crypto-intelligence` MCP server wraps the full 5-agent research pipeline as a single `research_crypto_project` tool. Claude Desktop calls one MCP tool and gets a complete intelligence report -- the internal orchestration (five agents, CoinGecko data, DuckDuckGo search) is hidden behind the protocol. This is the real Software 3.0 lesson: expose capabilities, not plumbing. The team expands to five agents, the architecture moves to multi-container Docker Compose, and the agent now has two entry points -- REST (`POST /run`) and MCP -- serving the same graph.
-Now the team works well, but every request starts from scratch. You researched Arbitrum yesterday -- why are you re-scanning the same news today?
+Now the team works well, but it's fragile. A single CoinGecko timeout or container restart can waste 30 seconds of completed work because the whole pipeline must start over.
-**Pattern 03** adds persistent memory. LangGraph's checkpointer, backed by PostgreSQL, remembers previous research sessions. When you ask about Arbitrum again, the system provides incremental updates, not a full repeat. Each project becomes a "thread" with accumulated knowledge.
+**Pattern 03** adds checkpoint recovery and resilience. LangGraph's checkpointer, backed by PostgreSQL, persists execution state after each node. If one branch fails, you resume from the last successful checkpoint instead of replaying the whole graph. This is also the first place where human-in-the-loop becomes natural: if the planner finds multiple plausible CoinGecko matches for a project name, it can pause with an interrupt and resume once the user clarifies.
-Memory grows. After 50 research sessions, the knowledge base is bloated with stale price data, outdated news, and redundant facts.
+The system is now durable, but still amnesiac. Resume after failure is not the same as remembering what the user cared about last week.
-**Pattern 04** (optional enrichment) introduces memory lifecycle management. A Memory Refiner agent consolidates knowledge, expires stale facts (price data after 1 hour, news after 7 days), and organizes memory into tiers: working memory for the current conversation, episodic memory for past sessions, semantic memory for durable knowledge.
+**Pattern 04** adds real agent memory. Long-term knowledge moves into a store: tracked projects, user preferences, previous summaries, and open questions. Honcho or a LangGraph store can keep user and project memory across separate sessions, while the graph uses that memory to produce incremental research instead of repeating old work.
At this point, Team 1 is a mature, memory-backed research engine. But it only knows about fundamentals. For investment decisions, you also need technical analysis -- price trends, indicators, support/resistance levels.
@@ -102,9 +102,9 @@ Every pattern exists because the previous one creates a real limitation:
| Transition | The problem that forces the next pattern |
|------------|------------------------------------------|
| P01 → P02 | Hardcoded tools don't scale. You can't share tools with Claude Code or other teams. |
-| P02 → P03 | Every request starts fresh. Repeated research wastes tokens and time. |
-| P03 → P04 | Memory grows unbounded. Stale facts pollute analysis. |
-| P03 → P05 | A second team arrives. You can't import their code. You need a protocol. |
+| P02 → P03 | Long-running workflows fail mid-run. Restarting from scratch wastes completed work and hides recovery patterns. |
+| P03 → P04 | Durable execution is not memory. The agent can resume a thread, but it still forgets user interests and prior research across sessions. |
+| P04 → P05 | A second team arrives. You can't import their code. You need a protocol. |
| P05 → P06 | A third team needs data from both others. Sequential calls are too slow. |
| P06 → P07 | Team 2 moves to a partner org. Implicit trust is gone. |
| P07 → P08 | New agents appear. Consumers shouldn't need code changes to use them. |
@@ -156,7 +156,7 @@ Two teams would leave half these problems unaddressed.
### Current: Foundation Tier
-Building Patterns 01-04 -- the single-team foundation with orchestration, MCP tools, and persistent memory.
+Building Patterns 01-04 -- the single-team foundation with orchestration, MCP tools, checkpoint recovery, and agent memory.
### Next: Distribution Tier
diff --git a/examples/01-orchestrator-pipeline/README.md b/examples/01-orchestrator-pipeline/README.md
index edeee90..7c5a2c5 100644
--- a/examples/01-orchestrator-pipeline/README.md
+++ b/examples/01-orchestrator-pipeline/README.md
@@ -43,7 +43,7 @@ The primary UX is running from inside the example folder. A repo-root shortcut a
make example EX=01-orchestrator-pipeline
```
-If you prefer an HTTP client, use [`endpoints.http`](endpoints.http).
+If you prefer prebuilt requests, use [`endpoints.http`](endpoints.http).
## What You Get Back
@@ -105,152 +105,29 @@ graph TD
stateGraph -.->|"optional traces"| langsmith["LangSmith"]
```
-## When to Use / When Not to Use
-
-**Use this pattern when:**
-- You want the simplest real multi-agent architecture that still shows clear orchestration boundaries.
-- You need one service with a few specialized steps and shared typed state.
-- You want a strong teaching or debugging story before adding more infrastructure.
-
-**Avoid this pattern when:**
-- Tools need to be shared across multiple agents, services, or AI clients. That is the motivation for [Pattern 02](../02-mcp-tool-integration/README.md).
-- You need persistence across requests. This pattern starts fresh every time.
-- You need independent deployment, scaling, or trust boundaries between agent groups.
+The architecture is intentionally linear. Pattern 01 is meant to make the orchestration boundaries obvious before the series adds more infrastructure, more protocols, or more runtime concerns.
## Key Concepts
-- **StateGraph**: a typed state object flows through explicit graph nodes and edges.
-- **Focused agents**: each node does one job well instead of carrying one giant all-in-one prompt.
-- **Observable execution**: verbose logs and optional LangSmith traces make every handoff visible.
-- **Graceful degradation**: node-level failures fall back to partial outputs instead of crashing immediately.
+- **Typed state** -- one shared `AgentState` object is the contract between all nodes.
+- **Focused agents** -- each node does one job instead of carrying one giant prompt.
+- **Explicit graph wiring** -- the orchestration is visible in LangGraph edges, not hidden in prompt text.
+- **Graceful degradation** -- weak dependencies can produce partial output instead of collapsing the whole run.
## Implementation Walkthrough
-### Step 1: Define the shared state
-
-The state is the contract between agents. Every node reads what it needs and writes one focused output.
-
-```python
-class AgentState(TypedDict, total=False):
- input: Required[str]
- plan: str
- news: str
- report: str
-```
-
-### Step 2: Build focused async nodes
-
-Every agent follows the same shape: read state, do one job, return a partial update.
-
-The News Scanner is the most interesting node because it combines tool use with an LLM pass and degrades gracefully if search or model calls fail:
-
-```python
-async def news_scanner_node(state: AgentState) -> dict[str, str]:
- try:
- search = DuckDuckGoSearchResults(max_results=8, output_format="list")
- current_year = datetime.now(UTC).year
- raw_results = await search.ainvoke(
- f"{state['input']} crypto project news {current_year}"
- )
- except Exception as exc:
- raw_results = f"[Search unavailable: {type(exc).__name__}]"
-
- llm = get_chat_model()
- response = await llm.ainvoke([...])
- return {"news": str(response.content)}
-```
-
-That design keeps the pipeline educational and resilient: a weak dependency produces degraded output, not an unreadable black box failure.
-
-### Step 3: Wire the graph explicitly
-
-Pattern 01 is a straight-line orchestrator pipeline:
-
-```python
-graph = StateGraph(AgentState)
-graph.add_node("research_planner", research_planner_node)
-graph.add_node("news_scanner", news_scanner_node)
-graph.add_node("intelligence_compiler", intelligence_compiler_node)
-
-graph.set_entry_point("research_planner")
-graph.add_edge("research_planner", "news_scanner")
-graph.add_edge("news_scanner", "intelligence_compiler")
-graph.add_edge("intelligence_compiler", END)
-
-compiled_graph = graph.compile()
-```
-
-### Step 4: Expose the graph via FastAPI
-
-The FastAPI app builds the graph at startup, keeps it on `app.state`, and invokes it from `POST /run`.
-
-```python
-@asynccontextmanager
-async def lifespan(fastapi_app: FastAPI) -> AsyncIterator[None]:
- setup_tracing()
- fastapi_app.state.graph = build_graph()
- yield
-
-@app.post("/run", response_model=RunResponse)
-async def run(request: RunRequest) -> RunResponse | JSONResponse:
- result = await app.state.graph.ainvoke({"input": request.input})
- return RunResponse(
- report=result.get("report", ""),
- plan=result.get("plan", ""),
- news=result.get("news", ""),
- )
-```
+1. Define the shared state in [`src/agents/state.py`](src/agents/state.py). It keeps the contract deliberately small: request input, planner output, research output, and final report.
+2. Define the three focused nodes in [`src/agents/research_planner.py`](src/agents/research_planner.py), [`src/agents/news_scanner.py`](src/agents/news_scanner.py), and [`src/agents/intelligence_compiler.py`](src/agents/intelligence_compiler.py). Each node reads only the fields it needs and returns one partial state update.
+3. Wire the straight-line graph in [`src/agents/graph.py`](src/agents/graph.py). This is the core orchestrator pattern: planner -> scanner -> compiler.
+4. Expose the graph through FastAPI in [`src/app.py`](src/app.py). Startup initializes tracing and the compiled graph, while `POST /run` validates input, invokes the graph, and returns the intermediate artifacts together with the final report.
-Two API details are worth noticing:
+Two API details matter for the developer experience:
- `input` is validated by Pydantic and must be between 3 and 500 characters.
-- If graph execution raises an exception, the endpoint returns `502` with `{"error": "pipeline_failed", "detail": "..."}`.
-
-## What You Should See
-
-With `VERBOSE=true`, container logs show each handoff clearly:
-
-```text
-[14:32:01.234] [ResearchPlanner] Planning research for: Research Arbitrum
-[14:32:03.891] [ResearchPlanner] Plan created (245 chars)
-[14:32:03.892] [NewsScanner] Searching for: Research Arbitrum
-[14:32:06.445] [NewsScanner] Got 8 search results
-[14:32:08.901] [NewsScanner] Analysis complete (523 chars)
-[14:32:08.902] [IntelligenceCompiler] Compiling intelligence report
-[14:32:11.678] [IntelligenceCompiler] Report generated (847 chars)
-```
-
-If `LANGSMITH_API_KEY` is set, startup also enables hosted LangSmith tracing under the shared `agent-patterns-lab` project. Public runs add per-example tags and metadata automatically so one project can still be filtered by example, environment, runtime, and provider. If tracing is requested but no key is set, the app logs a clear warning, disables tracing, and keeps running.
-
-## Verification
-
-Use these checks to confirm the example behaves the way the code and tests expect:
-
-```bash
-# Healthy service
-curl http://localhost:8000/health
-
-# Valid request
-curl -X POST http://localhost:8000/run \
- -H "Content-Type: application/json" \
- -d '{"input": "Research the Solana crypto project"}'
-
-# Validation failure (too short)
-curl -X POST http://localhost:8000/run \
- -H "Content-Type: application/json" \
- -d '{"input": "ab"}'
-```
-
-Expected behavior:
-- `GET /health` returns `{"status": "ok"}`
-- Valid `POST /run` returns `report`, `plan`, and `news`
-- Invalid input returns `422`
-- Unhandled graph failure returns `502`
+- Graph execution failures return `502` with `{"error": "pipeline_failed", "detail": "..."}` instead of a silent container error.
## Local Development
-Docker is the fastest way to try this example. If you want to work on the code
-locally, `uv` is the workspace tool for syncing dependencies, running tests, and
-checking types.
+Docker is the fastest way to try this example. If you want to work on the code locally, `uv` is the workspace tool for syncing dependencies, running tests, and checking types.
```bash
# From the repository root
@@ -263,25 +140,18 @@ uv run python scripts/testing/run_test_suite.py
uv run python scripts/linting/run_mypy.py
```
-Use this path when you want to iterate on the codebase itself rather than just
-run the example container.
-
-## Exercises
+Use this path when you want to iterate on the codebase itself rather than just run the example container.
-1. Add a fourth agent between the News Scanner and Intelligence Compiler to fact-check claims before report generation.
-2. Introduce conditional routing so well-known projects take a shorter research path.
-3. Split research into two parallel branches and compare the trade-off against this simple sequential flow.
+## What You Have Learned
-## Trade-offs
+- How to express a simple multi-agent workflow as a typed LangGraph `StateGraph`.
+- How to split planning, tool use, and synthesis into separate async nodes with clear handoffs.
+- How to expose a LangGraph pipeline through a minimal FastAPI boundary with useful intermediate artifacts.
+- Why a simple orchestrator pattern is a good teaching baseline before adding more protocols or runtime complexity.
-| Advantage | Limitation |
-|-----------|-----------|
-| Very easy to understand and run | Sequential execution adds latency |
-| Clear boundaries between responsibilities | All agents live in one process |
-| Great observability for learning and debugging | Every request starts from scratch |
-| Tool use is easy to add inside a node | Tools are hardcoded, not standardized |
+**Next:** [Pattern 02: MCP Tool Integration](../02-mcp-tool-integration/README.md) extends this pipeline with a second entry point and turns the full capability into an MCP tool so AI clients can call it directly.
-This last limitation is the reason [Pattern 02](../02-mcp-tool-integration/README.md) exists. Once tools need to be reused by multiple agents or external AI clients, direct Python tool calls stop scaling.
+If this project helps you, consider giving it a [star on GitHub](https://github.com/ksopyla/agent-patterns-lab).
## Further Reading
diff --git a/examples/02-mcp-tool-integration/README.md b/examples/02-mcp-tool-integration/README.md
index 974d037..7a5bf04 100644
--- a/examples/02-mcp-tool-integration/README.md
+++ b/examples/02-mcp-tool-integration/README.md
@@ -8,7 +8,7 @@ Useful context:
- [Curriculum](../../docs/curriculum.md)
- [Vision & Roadmap](../../docs/vision.md)
- [Previous pattern: Orchestrator Pipeline](../01-orchestrator-pipeline/README.md)
-- [Next pattern: Persistent Memory](../03-persistent-memory/README.md)
+- [Next pattern: Checkpoint Recovery and Resilience](../03-checkpoint-recovery/README.md)
## Quick Start
@@ -34,6 +34,8 @@ curl -X POST http://localhost:8000/run \
The MCP entry point is at `localhost:8001/sse` -- connect Claude Code, Cursor, or Claude Desktop (see [Connect Your MCP Client](#connect-your-mcp-client) below).
+If you prefer prebuilt REST requests, use [`endpoints.http`](endpoints.http).
+
## What You Get Back
Both entry points (REST and MCP) run the same 5-agent pipeline and produce the same final intelligence report. The REST API returns the full intermediate artifact set for debugging, while the MCP tool returns the final `report` only because MCP tools should expose outcomes rather than internal pipeline state:
@@ -101,48 +103,18 @@ The MCP server and REST API share the same Docker image but run as **separate co
## Key Concepts
-- **Outcome-oriented MCP tool** -- expose `research_crypto_project` (the full pipeline), not raw API wrappers like `get_coin_price`
-- **Parallel fan-out/fan-in** -- three research nodes run concurrently via LangGraph `add_edge`; compiler waits for all three
-- **Data source ownership** -- each node owns one external source (DuckDuckGo or CoinGecko), no duplication
-- **Graceful degradation** -- CoinGecko retry with backoff; search and LLM failures produce partial output, not crashes
-- **Synchronous execution boundary** -- REST and MCP both wait for the full pipeline result and fail fast after 120 seconds instead of hanging indefinitely
+- **Outcome-oriented MCP tool** -- expose `research_crypto_project`, not raw API wrappers like `get_coin_price`.
+- **Parallel fan-out/fan-in** -- three research nodes run concurrently and the compiler waits for all three.
+- **Data source ownership** -- each node owns one external source, which keeps responsibilities clean.
+- **Synchronous execution boundary** -- REST and MCP both wait for the full result and fail fast after 120 seconds.
## Implementation Walkthrough
-### MCP Server
-
-The full implementation is in [`src/mcp_servers/crypto_intelligence.py`](src/mcp_servers/crypto_intelligence.py) -- ~30 lines. It builds the graph once and wraps it as a single MCP tool:
-
-```python
-mcp = FastMCP("crypto-intelligence", host="0.0.0.0", port=8000)
-_graph = build_graph()
-
-@mcp.tool()
-async def research_crypto_project(query: str) -> str:
- result = await _graph.ainvoke({"input": query})
- return result.get("report", "")
-```
-
-### Parallel Graph
-
-The graph uses LangGraph's native fan-out: after `research_planner`, three edges fire simultaneously to `news_scanner`, `project_profiler`, and `community_analyst`. The compiler waits for all three via fan-in. See [`src/agents/graph.py`](src/agents/graph.py) for the wiring and the [LangGraph branching docs](https://langchain-ai.github.io/langgraph/how-tos/branching/) for the pattern.
-
-The planner extracts `project_name` and `coin_ticker` via LLM and generates search queries so downstream nodes don't pass raw user input to external APIs. See [`src/agents/research_planner.py`](src/agents/research_planner.py).
-
-### Docker Compose
-
-Both containers use the same image -- only the command differs. See [`docker-compose.yml`](docker-compose.yml):
-
-```yaml
-services:
- crypto-intelligence-mcp:
- command: ["uvicorn", "src.mcp_servers.crypto_intelligence:app", ...]
- ports: ["8001:8000"]
-
- agent:
- # default CMD: uvicorn src.app:app
- ports: ["8000:8000"]
-```
+1. Define the expanded Team 1 state in [`src/agents/state.py`](src/agents/state.py). Pattern 02 grows beyond Pattern 01 by adding planner-generated identifiers and branch-specific outputs such as `project_name`, `coin_ticker`, `news_queries`, `community_queries`, `profile`, and `community`.
+2. Define the five agent nodes in [`src/agents/research_planner.py`](src/agents/research_planner.py), [`src/agents/news_scanner.py`](src/agents/news_scanner.py), [`src/agents/project_profiler.py`](src/agents/project_profiler.py), [`src/agents/community_analyst.py`](src/agents/community_analyst.py), and [`src/agents/intelligence_compiler.py`](src/agents/intelligence_compiler.py). The important design choice is data-source ownership: DuckDuckGo stays in the search nodes and CoinGecko stays in the profiler.
+3. Wire the parallel fan-out / fan-in graph in [`src/agents/graph.py`](src/agents/graph.py). After `research_planner`, LangGraph launches the three research branches in parallel and waits until all of them finish before the compiler runs.
+4. Expose the same graph through two entry points: REST in [`src/app.py`](src/app.py) and MCP in [`src/mcp_servers/crypto_intelligence.py`](src/mcp_servers/crypto_intelligence.py). Shared timeout and tracing metadata live in [`src/runtime.py`](src/runtime.py), which keeps both transports aligned.
+5. Package both transports with one Docker image in [`docker-compose.yml`](docker-compose.yml). The agent and MCP server are separate containers with different commands, but they serve the same capability.
## Connect Your MCP Client
@@ -186,26 +158,16 @@ uv run python scripts/testing/run_test_suite.py
uv run python scripts/linting/run_mypy.py
```
-## Exercises
-
-1. **Add a lightweight MCP tool**: Expose `get_crypto_price(project_name)` that skips the full pipeline and returns just the current price via CoinGecko.
-2. **Add a fourth parallel branch**: Create a `tokenomics_analyst` node that fans out alongside the other three research nodes.
-
-## Trade-offs
+## What You Have Learned
-| Advantage | Limitation |
-|-----------|-----------|
-| Any MCP client gets the full agent capability | MCP server runs the full pipeline per call (cost/latency) |
-| Parallel execution cuts wall-clock time vs. sequential | Three concurrent DuckDuckGo/CoinGecko calls may hit rate limits faster |
-| Claude Desktop is the "UI" -- no custom frontend | Streaming partial results is not supported (Pattern 06 adds this) |
-| Same graph, two entry points -- no code duplication | Two containers for the same image |
-| REST exposes intermediate artifacts for debugging; MCP exposes only the final report | Entry points are intentionally asymmetric, so clients see different response shapes |
-| Internal data sources are hidden from clients | CoinGecko rate limits apply (30 req/min free tier; retry with backoff mitigates) |
-| Timeout prevents hung requests from running forever | Background jobs / `202 Accepted` polling are not implemented in this pattern to keep focus on MCP integration |
+- How to expose an agent capability through MCP instead of only through a REST endpoint.
+- How to use LangGraph fan-out / fan-in to parallelize independent research branches.
+- How to keep one graph behind two transports without duplicating business logic.
+- Why outcome-oriented tools are a better MCP interface than exposing raw API plumbing.
-Both entry points currently execute the pipeline synchronously. In a production system with longer-running research, a background-task or job-queue design such as `POST /run -> 202 Accepted -> GET /tasks/{id}` would be reasonable, but that extra lifecycle machinery would distract from the MCP lesson here.
+**Next:** [Pattern 03: Checkpoint Recovery and Resilience](../03-checkpoint-recovery/README.md) keeps the same Team 1 graph but adds durable execution, retry-after-failure, and human-in-the-loop interrupts for ambiguous project selection.
-This last limitation -- every request starts from scratch -- is the reason [Pattern 03](../03-persistent-memory/README.md) exists.
+If this project helps you, consider giving it a [star on GitHub](https://github.com/ksopyla/agent-patterns-lab).
## Further Reading
diff --git a/examples/02-mcp-tool-integration/src/agents/intelligence_compiler.py b/examples/02-mcp-tool-integration/src/agents/intelligence_compiler.py
index 4f92098..b5a55f4 100644
--- a/examples/02-mcp-tool-integration/src/agents/intelligence_compiler.py
+++ b/examples/02-mcp-tool-integration/src/agents/intelligence_compiler.py
@@ -1,7 +1,7 @@
"""Intelligence Compiler agent -- synthesizes all research into a structured report.
Reads: state["input"], state["project_name"], state["coin_ticker"],
- state["plan"], state["news"], state["profile"], state["community"]
+ state["news"], state["profile"], state["community"]
Writes: state["report"]
This is the fan-in node that waits for all parallel research branches to
diff --git a/examples/03-checkpoint-recovery/README.md b/examples/03-checkpoint-recovery/README.md
new file mode 100644
index 0000000..5b96e9e
--- /dev/null
+++ b/examples/03-checkpoint-recovery/README.md
@@ -0,0 +1,233 @@
+# Pattern 03: Checkpoint Recovery and Resilience
+
+> Recover long-running agent workflows from the last successful checkpoint, and pause safely for human clarification when the planner is unsure which crypto project the user meant.
+
+`Pattern 03 of 9`. Keeps Team 1's five-agent intelligence pipeline from [Pattern 02](../02-mcp-tool-integration/README.md), but changes the operational model: the graph is now checkpointed in PostgreSQL, failures can be retried with the same `thread_id`, and ambiguous CoinGecko matches trigger `interrupt()` instead of silently choosing the wrong project.
+
+Thread inspection is exposed as MCP tools, not REST endpoints -- because the consumer is an AI agent (Claude, Cursor), not a human clicking a dashboard.
+
+Useful context:
+- [Curriculum](../../docs/curriculum.md)
+- [Vision & Roadmap](../../docs/vision.md)
+- [Previous pattern: MCP Tool Integration](../02-mcp-tool-integration/README.md)
+
+## Quick Start
+
+```bash
+# From the repository root
+cp .env.example .env
+
+# Required:
+# - Azure OpenAI (AZURE_OPENAI_*) or Anthropic (ANTHROPIC_API_KEY + LLM_PROVIDER=anthropic)
+#
+# Optional but recommended:
+# - LANGSMITH_API_KEY for hosted LangSmith traces
+
+cd examples/03-checkpoint-recovery
+docker compose up --build
+
+# Health
+curl http://localhost:8000/health
+
+# Start a checkpointed run
+curl -X POST http://localhost:8000/run \
+ -H "Content-Type: application/json" \
+ -d "{\"input\": \"Research the Arbitrum crypto project\", \"thread_id\": \"arb-demo-thread\"}"
+
+# Resume an interrupted run after the planner asks you to choose a CoinGecko project
+curl -X POST http://localhost:8000/run/resume \
+ -H "Content-Type: application/json" \
+ -d "{\"thread_id\": \"arb-demo-thread\", \"selected_coin_id\": \"arbitrum\"}"
+```
+
+The example depends on the repo-root `.env` file and starts three containers: PostgreSQL, the REST API on `:8000`, and the MCP server on `:8001`.
+
+If you prefer prebuilt REST requests, use [`endpoints.http`](endpoints.http).
+
+## What You Get Back
+
+`POST /run` returns one of two outcomes.
+
+Completed run:
+
+```json
+{
+ "status": "completed",
+ "thread_id": "arb-demo-thread",
+ "report": "## Executive Summary\nArbitrum is a leading Layer 2...",
+ "plan": "1. Recent news and partnerships\n2. Project fundamentals...",
+ "news": "Key findings from web search...",
+ "profile": "Technology: Optimistic rollup on Ethereum...",
+ "community": "Community Health: Strong...",
+ "project_name": "Arbitrum",
+ "coin_ticker": "ARB",
+ "coin_id": "arbitrum"
+}
+```
+
+Interrupted run:
+
+```json
+{
+ "status": "interrupted",
+ "thread_id": "mercury-demo-thread",
+ "interrupt_type": "ambiguous_project",
+ "message": "Multiple CoinGecko matches found for Mercury. Choose the correct project to continue.",
+ "project_name": "Mercury",
+ "coin_ticker": "",
+ "matches": [
+ {
+ "coin_id": "mercury",
+ "name": "Mercury",
+ "symbol": "MER",
+ "market_cap_rank": 999
+ }
+ ]
+}
+```
+
+For failure recovery, call `POST /run` again with the same `thread_id`. For human-in-the-loop interrupts, call `POST /run/resume`.
+
+## At a Glance
+
+| Item | Details |
+|------|---------|
+| Pattern role | Introduces durable execution and human checkpoints |
+| Team | Team 1: Intelligence |
+| Agents | Research Planner, Project Verifier, Project Selector, News Scanner, Project Profiler, Community Analyst, Intelligence Compiler |
+| Graph | Extends P02 fan-out/fan-in with verifier + selector before parallel branches |
+| New runtime behavior | PostgreSQL-backed checkpoints, retry-after-failure, interrupt/resume |
+| REST endpoints | `POST /run`, `POST /run/resume` (minimal -- thread inspection is MCP) |
+| MCP tools | `research_crypto_project`, `get_research_status`, `list_research_threads`, `delete_research_thread` |
+| Storage | PostgreSQL for LangGraph checkpoints (no separate thread metadata table) |
+| External data | CoinGecko, DuckDuckGo |
+| Observability | `VERBOSE=true` logs and LangSmith metadata tagged with `thread_id` |
+
+## The Problem
+
+Pattern 02 has a realistic failure surface: three external API calls, multiple LLM calls, and a parallel graph. If `project_profiler` fails after `news_scanner` and `community_analyst` succeed, the whole workflow has to be replayed unless the graph is checkpointed.
+
+There is also a correctness problem. If the planner extracts a project name like "Mercury", blindly taking the first CoinGecko search result is risky. The graph should pause and ask the human to choose the intended project instead of continuing with the wrong coin.
+
+## Architecture
+
+```mermaid
+graph TD
+ User["Developer / API client"] -->|"POST /run"| RestApi["FastAPI REST :8000"]
+ Claude["Claude Code / Cursor / MCP client"] -->|"MCP SSE"| Mcp["crypto-intelligence MCP :8001"]
+ RestApi --> Service["Checkpointed execution service"]
+ Mcp --> Service
+ Mcp -->|"get_research_status\nlist_research_threads\ndelete_research_thread"| Checkpoint["LangGraph checkpoint state"]
+ Service --> Graph["LangGraph graph\nplanner -> verifier -> selector -> [news | profile | community] -> compiler"]
+ Graph --> Postgres["PostgreSQL\ncheckpoints"]
+ Graph --> Hitl["interrupt() / resume\nambiguous project resolution"]
+ Graph --> CoinGecko["CoinGecko API"]
+ Graph --> DuckDuckGo["DuckDuckGo search"]
+```
+
+The graph extends Pattern 02's fan-out/fan-in with two new nodes before the parallel branches: `project_verifier` checks whether CoinGecko returns a single unambiguous match, and `project_selector` calls `interrupt()` when multiple matches exist so the human can choose the right project before research continues. Around the graph sit the new operational pieces: a durable PostgreSQL checkpointer and a service layer that knows the difference between retrying a failed run and resuming an interrupted one. Thread inspection is exposed as MCP tools that derive status from LangGraph's checkpoint state directly -- no separate status table.
+
+## Key Concepts
+
+- **Checkpointing is resilience, not memory** -- the same `thread_id` resumes a failed workflow, but it does not create cross-session knowledge.
+- **Retry and resume are different** -- retry a failed run with `POST /run`; resume a human pause with `POST /run/resume`.
+- **Interrupts are graph behavior** -- ambiguous CoinGecko matches become `interrupt()` calls instead of a hidden best guess.
+- **Thread status is derived from checkpoints** -- the MCP tools inspect LangGraph state instead of maintaining a parallel status table.
+
+## Implementation Walkthrough
+
+1. Build the durable runtime in [`src/runtime.py`](src/runtime.py). It opens the PostgreSQL pool, initializes the LangGraph checkpointer, and compiles the graph once so both transports share the same checkpoint-backed execution engine.
+2. Keep run semantics in [`src/service.py`](src/service.py). That module decides whether a call is a fresh run, a retry of a failed thread, or a resume of a human interruption by inspecting checkpoint state rather than maintaining a separate thread table.
+3. Keep the human-in-the-loop logic in [`src/agents/research_planner.py`](src/agents/research_planner.py). Pattern 03 splits planning, project verification, and project selection so the graph can pause safely when CoinGecko returns multiple plausible matches.
+4. Expose the graph through two transport boundaries: REST in [`src/app.py`](src/app.py) and MCP in [`src/mcp_servers/crypto_intelligence.py`](src/mcp_servers/crypto_intelligence.py). REST stays minimal with `/health`, `POST /run`, and `POST /run/resume`, while MCP becomes the richer agent-facing interface for thread inspection.
+
+The MCP server exposes four tools:
+
+| Tool | Purpose |
+|------|---------|
+| `research_crypto_project` | Run or resume a crypto research pipeline |
+| `get_research_status` | Inspect a thread's checkpoint state (completed / interrupted / resumable) |
+| `list_research_threads` | List all known threads with their derived status |
+| `delete_research_thread` | Delete a thread and its checkpoint data |
+
+## Connect Your MCP Client
+
+The MCP server is exposed at `http://localhost:8001/sse`.
+
+With `docker compose up` running, connect from your tool of choice:
+
+**Claude Code:**
+```bash
+claude mcp add --transport sse crypto-intelligence http://localhost:8001/sse
+```
+
+**Cursor** -- add to your project's `.cursor/mcp.json`:
+```json
+{
+ "mcpServers": {
+ "crypto-intelligence": {
+ "url": "http://localhost:8001/sse"
+ }
+ }
+}
+```
+
+**Claude Desktop** -- add to `%APPDATA%\Claude\claude_desktop_config.json` (macOS: `~/Library/Application Support/Claude/claude_desktop_config.json`):
+```json
+{
+ "mcpServers": {
+ "crypto-intelligence": {
+ "url": "http://localhost:8001/sse"
+ }
+ }
+}
+```
+
+Once connected, the tool surface is:
+
+```text
+research_crypto_project(query, thread_id?, selected_coin_id?)
+get_research_status(thread_id)
+list_research_threads()
+delete_research_thread(thread_id)
+```
+
+Typical flow:
+
+1. Call `research_crypto_project(query="Research Mercury", thread_id="mercury-demo-thread")`.
+2. If the planner interrupts, the tool returns a message listing the candidate CoinGecko IDs.
+3. Call `research_crypto_project(query="Research Mercury", thread_id="mercury-demo-thread", selected_coin_id="mercury")`.
+4. Optionally call `get_research_status("mercury-demo-thread")` to inspect the thread later.
+
+## Local Development
+
+```bash
+# Install workspace dependencies
+uv sync --all-packages
+
+# Run just this example's tests
+uv run pytest examples/03-checkpoint-recovery/tests
+
+# Run repo checks
+uv run python scripts/testing/run_test_suite.py
+uv run ruff check .
+uv run ruff format --check .
+uv run python scripts/linting/run_mypy.py
+```
+
+## What You Have Learned
+
+- How to add durable execution to an existing LangGraph pipeline with minimal graph changes.
+- How to model retry-after-failure and resume-after-interrupt as separate execution paths around one shared graph.
+- How to use `interrupt()` and `Command(resume=...)` to make ambiguous external matches explicit and safe.
+- How to expose checkpoint inspection as MCP tools instead of adding REST CRUD around workflow state.
+
+**Next:** [Pattern 04: Agent Memory and Knowledge](../04-agent-memory/README.md) builds on this durable runtime by adding real cross-session memory so the system remembers users, projects, and prior research beyond a single thread.
+
+If this project helps you, consider giving it a [star on GitHub](https://github.com/ksopyla/agent-patterns-lab).
+
+## Further Reading
+
+- [LangGraph persistence](https://docs.langchain.com/oss/python/langgraph/persistence)
+- [LangGraph interrupts](https://docs.langchain.com/oss/python/langgraph/interrupts)
+- [PostgreSQL checkpointers in LangGraph](https://docs.langchain.com/oss/python/langgraph/add-memory)
diff --git a/examples/03-checkpoint-recovery/docker-compose.yml b/examples/03-checkpoint-recovery/docker-compose.yml
new file mode 100644
index 0000000..6f41aee
--- /dev/null
+++ b/examples/03-checkpoint-recovery/docker-compose.yml
@@ -0,0 +1,63 @@
+services:
+ postgres:
+ image: postgres:17
+ environment:
+ POSTGRES_DB: agent_patterns
+ POSTGRES_USER: postgres
+ POSTGRES_PASSWORD: postgres # pragma: allowlist secret
+ ports:
+ - "5432:5432"
+ volumes:
+ - pattern03-pgdata:/var/lib/postgresql/data
+ healthcheck:
+ test: ["CMD-SHELL", "pg_isready -U postgres -d agent_patterns"]
+ interval: 10s
+ timeout: 5s
+ retries: 5
+
+ crypto-intelligence-mcp:
+ build:
+ context: ../..
+ dockerfile: infra/docker/base/Dockerfile.agent
+ args:
+ PACKAGE_NAME: example-03-checkpoint-recovery
+ EXAMPLE_PYPROJECT: examples/03-checkpoint-recovery/pyproject.toml
+ EXAMPLE_SRC: examples/03-checkpoint-recovery/src/
+ command: ["uvicorn", "src.mcp_servers.crypto_intelligence:app", "--host", "0.0.0.0", "--port", "8000"]
+ ports:
+ - "8001:8000"
+ env_file:
+ - ../../.env
+ environment:
+ - VERBOSE=${VERBOSE:-true}
+ - POSTGRES_URI=postgresql://postgres:postgres@postgres:5432/agent_patterns?sslmode=disable
+ depends_on:
+ postgres:
+ condition: service_healthy
+
+ agent:
+ build:
+ context: ../..
+ dockerfile: infra/docker/base/Dockerfile.agent
+ args:
+ PACKAGE_NAME: example-03-checkpoint-recovery
+ EXAMPLE_PYPROJECT: examples/03-checkpoint-recovery/pyproject.toml
+ EXAMPLE_SRC: examples/03-checkpoint-recovery/src/
+ ports:
+ - "8000:8000"
+ env_file:
+ - ../../.env
+ environment:
+ - VERBOSE=${VERBOSE:-true}
+ - POSTGRES_URI=postgresql://postgres:postgres@postgres:5432/agent_patterns?sslmode=disable
+ depends_on:
+ postgres:
+ condition: service_healthy
+ healthcheck:
+ test: ["CMD", "python", "-c", "import httpx; httpx.get('http://localhost:8000/health').raise_for_status()"]
+ interval: 10s
+ timeout: 5s
+ retries: 3
+
+volumes:
+ pattern03-pgdata:
diff --git a/examples/03-checkpoint-recovery/endpoints.http b/examples/03-checkpoint-recovery/endpoints.http
new file mode 100644
index 0000000..3c5ce8a
--- /dev/null
+++ b/examples/03-checkpoint-recovery/endpoints.http
@@ -0,0 +1,28 @@
+### Health
+GET http://localhost:8000/health
+
+### Start a new thread
+POST http://localhost:8000/run
+Content-Type: application/json
+
+{
+ "input": "Research the Arbitrum crypto project"
+}
+
+### Start with an explicit thread ID
+POST http://localhost:8000/run
+Content-Type: application/json
+
+{
+ "input": "Research the Arbitrum crypto project",
+ "thread_id": "arb-demo-thread"
+}
+
+### Resume an interrupted thread
+POST http://localhost:8000/run/resume
+Content-Type: application/json
+
+{
+ "thread_id": "arb-demo-thread",
+ "selected_coin_id": "arbitrum"
+}
diff --git a/examples/03-checkpoint-recovery/pyproject.toml b/examples/03-checkpoint-recovery/pyproject.toml
new file mode 100644
index 0000000..6696895
--- /dev/null
+++ b/examples/03-checkpoint-recovery/pyproject.toml
@@ -0,0 +1,26 @@
+[project]
+name = "example-03-checkpoint-recovery"
+version = "0.1.0"
+description = "Pattern 03: Checkpoint Recovery and Resilience -- durable execution with LangGraph checkpoints"
+requires-python = ">=3.14"
+dependencies = [
+ "langgraph>=0.4",
+ "langchain-core>=0.3",
+ "langchain-community>=0.3",
+ "langchain-openai>=0.3",
+ "langchain-anthropic>=0.3",
+ "langgraph-checkpoint-postgres",
+ "langsmith>=0.3",
+ "mcp>=1.0",
+ "duckduckgo-search>=8.0",
+ "ddgs>=7.0",
+ "fastapi>=0.115",
+ "uvicorn>=0.34",
+ "pydantic>=2.0",
+ "psycopg[binary,pool]",
+ "httpx>=0.28",
+ "agent-common",
+]
+
+[tool.uv.sources]
+agent-common = { workspace = true }
diff --git a/examples/03-checkpoint-recovery/src/__init__.py b/examples/03-checkpoint-recovery/src/__init__.py
new file mode 100644
index 0000000..ecf07d7
--- /dev/null
+++ b/examples/03-checkpoint-recovery/src/__init__.py
@@ -0,0 +1 @@
+"""Pattern 03 example package."""
diff --git a/examples/03-checkpoint-recovery/src/agents/__init__.py b/examples/03-checkpoint-recovery/src/agents/__init__.py
new file mode 100644
index 0000000..2c5314e
--- /dev/null
+++ b/examples/03-checkpoint-recovery/src/agents/__init__.py
@@ -0,0 +1 @@
+"""Agent nodes for Pattern 03."""
diff --git a/examples/03-checkpoint-recovery/src/agents/community_analyst.py b/examples/03-checkpoint-recovery/src/agents/community_analyst.py
new file mode 100644
index 0000000..4f1c5ab
--- /dev/null
+++ b/examples/03-checkpoint-recovery/src/agents/community_analyst.py
@@ -0,0 +1,85 @@
+"""Community Analyst agent -- analyzes social media sentiment and community activity.
+
+Reads: state["project_name"], state["coin_ticker"], state["community_queries"]
+Writes: state["community"]
+
+Uses DuckDuckGo with site-restricted queries (reddit.com, twitter/X keywords)
+to gauge community sentiment. Does NOT call CoinGecko -- that data source is
+owned exclusively by project_profiler. Shared search mechanics live in
+src.agents.web_search.
+"""
+
+from __future__ import annotations
+
+from datetime import UTC, datetime
+
+from agent_common.llm import get_chat_model
+from agent_common.tracing import verbose_log
+from langchain_core.messages import HumanMessage, SystemMessage
+
+from src.agents.state import AgentState
+from src.agents.web_search import format_search_results, run_search_queries
+
+SYSTEM_PROMPT = """\
+You are a crypto community and sentiment analyst. You receive web search results \
+focused on social media discussions about a crypto project.
+
+Assess the project's community health:
+1. **Reddit sentiment** — what are people saying? Bullish, bearish, skeptical?
+2. **X/Twitter buzz** — influencer mentions, trending topics, community debates
+3. **Overall retail mood** — is the community growing, stable, or declining?
+4. **Red flags** — scam warnings, rug-pull concerns, team complaints, abandonment signals
+
+For each finding, cite the source. Be factual — distinguish between verified \
+community activity and speculation.
+
+End with a Community Health Rating: Strong / Moderate / Weak — with a one-sentence \
+justification. If data is insufficient, rate as "Insufficient Data" and explain why."""
+
+
+def _build_queries(project_name: str, ticker: str, community_queries: list[str]) -> list[str]:
+ """Build social-focused queries from planner output and fallback templates."""
+ if community_queries:
+ return community_queries[:4]
+
+ current_year = datetime.now(UTC).year
+ return [
+ f"{project_name} {ticker} site:reddit.com",
+ f"{project_name} crypto twitter sentiment {current_year}",
+ f"{project_name} {ticker} community discussion {current_year}",
+ ]
+
+
+async def community_analyst_node(state: AgentState) -> dict[str, str]:
+ """Analyze community sentiment using social-focused web searches."""
+ project_name = state.get("project_name", state["input"])
+ ticker = state.get("coin_ticker", "")
+ community_queries = state.get("community_queries", [])
+ verbose_log("CommunityAnalyst", f"Analyzing community for: {project_name} ({ticker})")
+
+ queries = _build_queries(project_name, ticker, community_queries)
+ verbose_log("CommunityAnalyst", f"Running {len(queries)} social search queries")
+
+ search_results = await run_search_queries(queries, "CommunityAnalyst")
+ results_text = format_search_results(
+ search_results,
+ empty_message="[No social media results found]",
+ )
+
+ try:
+ llm = get_chat_model()
+ response = await llm.ainvoke(
+ [
+ SystemMessage(content=SYSTEM_PROMPT),
+ HumanMessage(
+ content=(f"Project: {project_name} ({ticker})\n\nSocial media search results:\n{results_text}")
+ ),
+ ]
+ )
+ community = str(response.content)
+ except Exception as exc:
+ verbose_log("CommunityAnalyst", f"LLM call failed: {exc}")
+ community = f"[Community analysis failed: {type(exc).__name__}]\nRaw data:\n{results_text}"
+
+ verbose_log("CommunityAnalyst", f"Community analysis complete ({len(community)} chars)")
+ return {"community": community}
diff --git a/examples/03-checkpoint-recovery/src/agents/graph.py b/examples/03-checkpoint-recovery/src/agents/graph.py
new file mode 100644
index 0000000..9ddf9dc
--- /dev/null
+++ b/examples/03-checkpoint-recovery/src/agents/graph.py
@@ -0,0 +1,75 @@
+"""LangGraph StateGraph wiring for the Pattern 03 intelligence pipeline.
+
+Architecture (fan-out / fan-in):
+
+ research_planner -> project_verifier -> project_selector
+ |
+ +-> news_scanner ---------\
+ +-> project_profiler ------> intelligence_compiler
+ +-> community_analyst ----/
+
+The three research nodes run in parallel after the project has either been
+verified automatically or selected by the human.
+
+Pattern 03 adds a durable checkpointer at compile time so runs can resume after
+failure or interrupt.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any
+
+from agent_common.tracing import verbose_log
+from langgraph.graph import END, START, StateGraph
+
+from src.agents.community_analyst import community_analyst_node
+from src.agents.intelligence_compiler import intelligence_compiler_node
+from src.agents.news_scanner import news_scanner_node
+from src.agents.project_profiler import project_profiler_node
+from src.agents.research_planner import project_selector_node, project_verifier_node, research_planner_node
+from src.agents.state import AgentState
+
+if TYPE_CHECKING:
+ from langgraph.graph.state import CompiledStateGraph
+
+
+def build_graph(*, checkpointer: Any | None = None) -> CompiledStateGraph: # type: ignore[type-arg]
+ """Build and compile the checkpoint-capable crypto intelligence pipeline."""
+ verbose_log(
+ "System",
+ (
+ "Building graph: research_planner → project_verifier → project_selector "
+ "→ [news_scanner | project_profiler | community_analyst] → compiler"
+ ),
+ )
+
+ graph = StateGraph(AgentState)
+
+ graph.add_node("research_planner", research_planner_node)
+ graph.add_node("project_verifier", project_verifier_node)
+ graph.add_node("project_selector", project_selector_node)
+ graph.add_node("news_scanner", news_scanner_node)
+ graph.add_node("project_profiler", project_profiler_node)
+ graph.add_node("community_analyst", community_analyst_node)
+ graph.add_node("intelligence_compiler", intelligence_compiler_node)
+
+ graph.add_edge(START, "research_planner")
+ graph.add_edge("research_planner", "project_verifier")
+ graph.add_edge("project_verifier", "project_selector")
+
+ # Fan-out: project selection → three parallel research branches
+ graph.add_edge("project_selector", "news_scanner")
+ graph.add_edge("project_selector", "project_profiler")
+ graph.add_edge("project_selector", "community_analyst")
+
+ # Fan-in: all branches → compiler
+ graph.add_edge("news_scanner", "intelligence_compiler")
+ graph.add_edge("project_profiler", "intelligence_compiler")
+ graph.add_edge("community_analyst", "intelligence_compiler")
+
+ graph.add_edge("intelligence_compiler", END)
+
+ if checkpointer is None:
+ return graph.compile()
+
+ return graph.compile(checkpointer=checkpointer)
diff --git a/examples/03-checkpoint-recovery/src/agents/intelligence_compiler.py b/examples/03-checkpoint-recovery/src/agents/intelligence_compiler.py
new file mode 100644
index 0000000..b5a55f4
--- /dev/null
+++ b/examples/03-checkpoint-recovery/src/agents/intelligence_compiler.py
@@ -0,0 +1,82 @@
+"""Intelligence Compiler agent -- synthesizes all research into a structured report.
+
+Reads: state["input"], state["project_name"], state["coin_ticker"],
+ state["news"], state["profile"], state["community"]
+Writes: state["report"]
+
+This is the fan-in node that waits for all parallel research branches to
+complete, then produces the final intelligence report.
+"""
+
+from __future__ import annotations
+
+from agent_common.llm import get_chat_model
+from agent_common.tracing import verbose_log
+from langchain_core.messages import HumanMessage, SystemMessage
+
+from src.agents.state import AgentState
+
+SYSTEM_PROMPT = """\
+You are a senior crypto intelligence analyst producing a client-facing report.
+You receive three independent research outputs: news analysis, project profile \
+(with market and developer data), and community sentiment assessment.
+
+Produce a comprehensive intelligence report with these sections:
+
+1. **Executive Summary** — 2-3 sentence overview: what the project is, its current \
+market position, and the overall signal (bullish/bearish/neutral).
+
+2. **Market Snapshot** — Current price, market cap, 24h volume, 24h change. \
+Use exact numbers from the profile data. If unavailable, state "Data not available".
+
+3. **Key Findings** — Top 5 most important discoveries across all research. \
+Prioritize facts that would affect an investment decision.
+
+4. **Recent Developments** — Notable news, partnerships, milestones from the \
+news analysis. Include dates and sources where available.
+
+5. **Developer & Community Health** — GitHub activity metrics, community size, \
+social sentiment. Cite specific numbers (stars, forks, commits, followers).
+
+6. **Risk Factors** — Concrete concerns: declining metrics, regulatory threats, \
+team issues, competitive pressure. No generic boilerplate.
+
+7. **Outlook** — Forward-looking assessment with a confidence level \
+(High / Medium / Low) and 1-2 specific catalysts or risks to watch.
+
+Rules:
+- Under 600 words total.
+- Clearly distinguish verified facts from speculation.
+- If an entire section has no data, write "Insufficient data for this section."
+- Do NOT fabricate numbers, team members, or partnerships."""
+
+
+async def intelligence_compiler_node(state: AgentState) -> dict[str, str]:
+ """Compile all research findings into a structured intelligence report."""
+ project_name = state.get("project_name", state["input"])
+ ticker = state.get("coin_ticker", "")
+ verbose_log("IntelligenceCompiler", f"Compiling report for {project_name} ({ticker})")
+
+ news = state.get("news", "N/A")
+ profile = state.get("profile", "N/A")
+ community = state.get("community", "N/A")
+
+ llm = get_chat_model()
+ response = await llm.ainvoke(
+ [
+ SystemMessage(content=SYSTEM_PROMPT),
+ HumanMessage(
+ content=(
+ f"Project: {project_name} ({ticker})\n\n"
+ f"--- NEWS ANALYSIS ---\n{news}\n\n"
+ f"--- PROJECT PROFILE (market data + developer stats) ---\n{profile}\n\n"
+ f"--- COMMUNITY SENTIMENT ---\n{community}"
+ )
+ ),
+ ]
+ )
+
+ report = str(response.content)
+ verbose_log("IntelligenceCompiler", f"Report generated ({len(report)} chars)")
+
+ return {"report": report}
diff --git a/examples/03-checkpoint-recovery/src/agents/news_scanner.py b/examples/03-checkpoint-recovery/src/agents/news_scanner.py
new file mode 100644
index 0000000..36ef710
--- /dev/null
+++ b/examples/03-checkpoint-recovery/src/agents/news_scanner.py
@@ -0,0 +1,85 @@
+"""News Scanner agent -- searches the web for recent news about a crypto project.
+
+Reads: state["project_name"], state["coin_ticker"], state["news_queries"]
+Writes: state["news"]
+
+Uses DuckDuckGo web search directly (not through MCP). It receives typed search
+queries from research_planner and falls back to simple templates when those are
+missing. Shared search mechanics live in src.agents.web_search.
+"""
+
+from __future__ import annotations
+
+from datetime import UTC, datetime
+
+from agent_common.llm import get_chat_model
+from agent_common.tracing import verbose_log
+from langchain_core.messages import HumanMessage, SystemMessage
+
+from src.agents.state import AgentState
+from src.agents.web_search import format_search_results, run_search_queries
+
+SYSTEM_PROMPT = """\
+You are a crypto news analyst. You receive raw web search results about a crypto project.
+
+Your focus areas:
+- Partnerships and strategic announcements
+- Recent events — positive and negative
+- Overall sentiment on finance portals and crypto media
+- Regulatory or exchange-related developments
+- Any red flags (hacks, lawsuits, team departures)
+
+For each finding provide:
+- The fact or claim (one sentence)
+- Source attribution (site name or URL)
+- How recent it appears
+
+End with a 2-sentence "News Sentiment" summary (bullish / bearish / neutral with reasoning).
+If search results are thin, say so explicitly — do NOT fabricate information."""
+
+
+def _build_queries(project_name: str, ticker: str, news_queries: list[str]) -> list[str]:
+ """Build search queries from planner output and fallback templates."""
+ if news_queries:
+ return news_queries[:4]
+
+ current_year = datetime.now(UTC).year
+ return [
+ f"{project_name} latest news {current_year}",
+ f"{project_name} partnership announcement",
+ f"{project_name} {ticker} crypto update",
+ f"{ticker} crypto regulatory news {current_year}",
+ ]
+
+
+async def news_scanner_node(state: AgentState) -> dict[str, str]:
+ """Search the web for crypto project news and analyze results."""
+ project_name = state.get("project_name", state["input"])
+ ticker = state.get("coin_ticker", "")
+ news_queries = state.get("news_queries", [])
+ verbose_log("NewsScanner", f"Searching news for: {project_name} ({ticker})")
+
+ queries = _build_queries(project_name, ticker, news_queries)
+ verbose_log("NewsScanner", f"Running {len(queries)} search queries")
+
+ search_results = await run_search_queries(queries, "NewsScanner")
+ results_text = format_search_results(
+ search_results,
+ empty_message="[No search results found]",
+ )
+
+ try:
+ llm = get_chat_model()
+ response = await llm.ainvoke(
+ [
+ SystemMessage(content=SYSTEM_PROMPT),
+ HumanMessage(content=(f"Project: {project_name} ({ticker})\n\nWeb search results:\n{results_text}")),
+ ]
+ )
+ news = str(response.content)
+ except Exception as exc:
+ verbose_log("NewsScanner", f"LLM call failed: {exc}")
+ news = f"[News analysis failed: {type(exc).__name__}] Raw data:\n{results_text}"
+
+ verbose_log("NewsScanner", f"Analysis complete ({len(news)} chars)")
+ return {"news": news}
diff --git a/examples/03-checkpoint-recovery/src/agents/project_profiler.py b/examples/03-checkpoint-recovery/src/agents/project_profiler.py
new file mode 100644
index 0000000..70909d2
--- /dev/null
+++ b/examples/03-checkpoint-recovery/src/agents/project_profiler.py
@@ -0,0 +1,110 @@
+"""Project Profiler agent -- gathers project fundamentals from CoinGecko.
+
+Reads: state["project_name"], state["coin_ticker"], state["coin_id"]
+Writes: state["profile"]
+
+Owns ALL CoinGecko data: market stats, price, categories, description,
+genesis date, homepage, AND developer_data (GitHub stats from CoinGecko).
+Pattern 03 prefers the planner-verified `coin_id` when available so resumed
+threads do not need to repeat ambiguous project resolution.
+"""
+
+from __future__ import annotations
+
+import json
+
+from agent_common.llm import get_chat_model
+from agent_common.tracing import verbose_log
+from langchain_core.messages import HumanMessage, SystemMessage
+
+from src.agents.state import AgentState
+from src.coingecko import get_coin_info, get_coin_price, search_coins
+
+SYSTEM_PROMPT = """\
+You are a crypto project profiler. You receive structured data from CoinGecko.
+
+Create a concise project profile covering:
+1. **Project overview** — what it does, technology, use case
+2. **Market data** — current price, market cap, 24h volume, 24h change percentage
+3. **Project maturity** — genesis date, categories, notable links (homepage, GitHub)
+4. **Developer activity** — GitHub stars, forks, contributors, recent commits, \
+merged PRs (from developer_data)
+5. **Exchanges & liquidity** — where it trades (if available)
+
+Be factual and quantitative. If a data field is missing or unavailable, \
+state "Data not available" — do NOT guess or hallucinate numbers."""
+
+
+async def _resolve_coin_id(project_name: str, ticker: str, preferred_coin_id: str = "") -> str:
+ """Find the CoinGecko coin ID using the planner-verified value first."""
+ if preferred_coin_id:
+ verbose_log("ProjectProfiler", f"Using planner-verified coin_id={preferred_coin_id!r}")
+ return preferred_coin_id
+
+ for query in [project_name, ticker]:
+ if not query:
+ continue
+ try:
+ search_results = await search_coins(query)
+ coins = json.loads(search_results) if search_results else []
+ if coins:
+ coin_id: str = str(coins[0]["id"])
+ verbose_log(
+ "ProjectProfiler",
+ f"Resolved {query!r} → coin_id={coin_id!r}",
+ )
+ return coin_id
+ except Exception as exc:
+ verbose_log("ProjectProfiler", f"Search for {query!r} failed: {exc}")
+
+ fallback = project_name.lower().replace(" ", "-")
+ verbose_log("ProjectProfiler", f"Using fallback coin_id={fallback!r}")
+ return fallback
+
+
+async def project_profiler_node(state: AgentState) -> dict[str, str]:
+ """Gather project fundamentals from CoinGecko."""
+ project_name = state.get("project_name", state["input"])
+ ticker = state.get("coin_ticker", "")
+ preferred_coin_id = state.get("coin_id", "")
+ verbose_log("ProjectProfiler", f"Profiling: {project_name} ({ticker})")
+
+ coin_id = await _resolve_coin_id(project_name, ticker, preferred_coin_id)
+
+ coin_info: str
+ coin_price: str
+ try:
+ coin_info = await get_coin_info(coin_id)
+ verbose_log("ProjectProfiler", "Got coin info (includes developer_data)")
+ except Exception as exc:
+ verbose_log("ProjectProfiler", f"get_coin_info failed: {exc}")
+ coin_info = f"[Project data unavailable: {type(exc).__name__}]"
+
+ try:
+ coin_price = await get_coin_price(coin_id)
+ verbose_log("ProjectProfiler", "Got price data")
+ except Exception as exc:
+ verbose_log("ProjectProfiler", f"get_coin_price failed: {exc}")
+ coin_price = "[Price data unavailable]"
+
+ try:
+ llm = get_chat_model()
+ response = await llm.ainvoke(
+ [
+ SystemMessage(content=SYSTEM_PROMPT),
+ HumanMessage(
+ content=(
+ f"Project: {project_name} ({ticker})\n\n"
+ f"CoinGecko project info (includes developer_data):\n{coin_info}\n\n"
+ f"Current price data:\n{coin_price}"
+ )
+ ),
+ ]
+ )
+ profile = str(response.content)
+ except Exception as exc:
+ verbose_log("ProjectProfiler", f"LLM call failed: {exc}")
+ profile = f"[Profile generation failed: {type(exc).__name__}]\nRaw info: {coin_info}\nRaw price: {coin_price}"
+
+ verbose_log("ProjectProfiler", f"Profile complete ({len(profile)} chars)")
+ return {"profile": profile}
diff --git a/examples/03-checkpoint-recovery/src/agents/research_planner.py b/examples/03-checkpoint-recovery/src/agents/research_planner.py
new file mode 100644
index 0000000..064e75e
--- /dev/null
+++ b/examples/03-checkpoint-recovery/src/agents/research_planner.py
@@ -0,0 +1,253 @@
+"""Planner and project-selection nodes for Pattern 03.
+
+`research_planner_node` is intentionally limited to deterministic state updates
+that can be checkpointed before any human interaction:
+- Reads: state["input"]
+- Writes: state["plan"], state["project_name"], state["coin_ticker"],
+ state["news_queries"], state["community_queries"]
+
+`project_verifier_node` resolves the CoinGecko project using the planner's
+structured output and stores either a verified `coin_id` or an
+`ambiguous_matches` shortlist.
+
+`project_selector_node` owns the actual `interrupt()` call. Because it reads
+only persisted state instead of re-running the LLM planner, a resumed thread
+can reliably apply the user's selected `coin_id`.
+"""
+
+from __future__ import annotations
+
+import json
+from typing import Any
+
+from agent_common.llm import get_chat_model
+from agent_common.tracing import verbose_log
+from langchain_core.messages import HumanMessage, SystemMessage
+from langgraph.types import interrupt
+from pydantic import BaseModel, Field
+
+from src.agents.state import AgentState, CoinMatch
+from src.coingecko import search_coins
+
+SYSTEM_PROMPT = """\
+You are a crypto project research planner. Given a user query, do three things:
+
+1. **Identify the project** with its official project name and ticker symbol.
+2. **Create a focused research plan** (numbered list, one sentence per area):
+ a. Recent news, announcements, partnerships, events — positive and negative signals.
+ b. Project fundamentals via CoinGecko: market cap, price, volume, exchanges, \
+team, genesis date, categories.
+ c. Community and social sentiment: X/Twitter buzz, Reddit discussions, \
+Telegram activity, overall retail mood.
+3. **Generate tailored search queries** for downstream research agents:
+ - 3-4 news queries optimized for recent partnerships, announcements, and project updates
+ - 3-4 community queries optimized for Reddit, X/Twitter, and social sentiment
+
+Keep the plan concise and actionable. Do NOT include price predictions."""
+
+
+class ResearchPlan(BaseModel):
+ """Structured planner output used by downstream research nodes."""
+
+ project_name: str = Field(description="Official project name")
+ coin_ticker: str = Field(description="Ticker symbol in uppercase, for example ETH or SOL")
+ plan: str = Field(description="Concise numbered research plan covering news, fundamentals, and community")
+ news_queries: list[str] = Field(description="Three to four targeted web search queries for recent project news")
+ community_queries: list[str] = Field(
+ description="Three to four targeted web search queries for community and social sentiment"
+ )
+
+
+def _normalize(value: str) -> str:
+ return value.strip().lower()
+
+
+def _parse_coin_matches(raw_results: str) -> list[CoinMatch]:
+ """Convert CoinGecko search JSON into typed, compact match objects."""
+ try:
+ parsed = json.loads(raw_results) if raw_results else []
+ except json.JSONDecodeError:
+ return []
+
+ matches: list[CoinMatch] = []
+ for item in parsed[:5]:
+ coin_id = str(item.get("id", "")).strip()
+ name = str(item.get("name", "")).strip()
+ symbol = str(item.get("symbol", "")).strip().upper()
+ market_cap_rank_raw = item.get("market_cap_rank")
+ market_cap_rank = market_cap_rank_raw if isinstance(market_cap_rank_raw, int) else None
+ if coin_id and name:
+ matches.append(
+ {
+ "coin_id": coin_id,
+ "name": name,
+ "symbol": symbol,
+ "market_cap_rank": market_cap_rank,
+ }
+ )
+ return matches
+
+
+def _deduplicate_matches(matches: list[CoinMatch]) -> list[CoinMatch]:
+ """Preserve order while removing duplicate coin IDs."""
+ seen_coin_ids: set[str] = set()
+ unique_matches: list[CoinMatch] = []
+ for match in matches:
+ coin_id = match["coin_id"]
+ if coin_id not in seen_coin_ids:
+ seen_coin_ids.add(coin_id)
+ unique_matches.append(match)
+ return unique_matches
+
+
+def _select_matches(
+ project_name: str, coin_ticker: str, matches: list[CoinMatch]
+) -> tuple[CoinMatch | None, list[CoinMatch]]:
+ """Pick a confident CoinGecko match or return a shortlist for human review."""
+ normalized_project = _normalize(project_name)
+ normalized_ticker = _normalize(coin_ticker)
+
+ exact_name_matches = [match for match in matches if _normalize(match["name"]) == normalized_project]
+ exact_symbol_matches = [
+ match for match in matches if normalized_ticker and _normalize(match["symbol"]) == normalized_ticker
+ ]
+
+ if exact_name_matches and exact_symbol_matches:
+ overlapping_ids = {match["coin_id"] for match in exact_name_matches} & {
+ match["coin_id"] for match in exact_symbol_matches
+ }
+ if len(overlapping_ids) == 1:
+ selected_coin_id = next(iter(overlapping_ids))
+ selected = next(match for match in matches if match["coin_id"] == selected_coin_id)
+ return selected, []
+
+ if len(exact_name_matches) == 1 and not exact_symbol_matches:
+ return exact_name_matches[0], []
+
+ if len(exact_symbol_matches) == 1 and not exact_name_matches:
+ return exact_symbol_matches[0], []
+
+ candidates = _deduplicate_matches(exact_name_matches + exact_symbol_matches)
+ if not candidates:
+ candidates = matches[:3]
+
+ if len(candidates) == 1:
+ return candidates[0], []
+
+ if len(candidates) > 1:
+ return None, candidates
+
+ return None, []
+
+
+async def _verify_project(project_name: str, coin_ticker: str) -> tuple[str, list[CoinMatch]]:
+ """Resolve the intended CoinGecko project without triggering interrupts."""
+ try:
+ raw_results = await search_coins(project_name)
+ except Exception as exc:
+ verbose_log("ProjectVerifier", f"CoinGecko verification failed: {exc}")
+ return "", []
+
+ matches = _parse_coin_matches(raw_results)
+ if not matches:
+ verbose_log("ProjectVerifier", "CoinGecko returned no matches; continuing without verified coin_id")
+ return "", []
+
+ selected, ambiguous_matches = _select_matches(project_name, coin_ticker, matches)
+ if selected is not None:
+ verbose_log("ProjectVerifier", f"Verified coin_id={selected['coin_id']!r} automatically")
+ return selected["coin_id"], []
+
+ if ambiguous_matches:
+ verbose_log(
+ "ProjectVerifier",
+ f"Project is ambiguous; waiting for human selection across {len(ambiguous_matches)} matches",
+ )
+ return "", ambiguous_matches
+
+ return "", []
+
+
+async def research_planner_node(state: AgentState) -> dict[str, Any]:
+ """Create a structured research plan without performing human interrupts."""
+ user_input = state["input"]
+ verbose_log("ResearchPlanner", f"Planning research for: {user_input[:100]}")
+
+ llm = get_chat_model().with_structured_output(ResearchPlan)
+ result = await llm.ainvoke(
+ [
+ SystemMessage(content=SYSTEM_PROMPT),
+ HumanMessage(content=user_input),
+ ]
+ )
+
+ project_name = result.project_name.strip() or user_input.strip()
+ coin_ticker = result.coin_ticker.strip().upper()
+ news_queries = [query.strip() for query in result.news_queries if query.strip()]
+ community_queries = [query.strip() for query in result.community_queries if query.strip()]
+
+ verbose_log(
+ "ResearchPlanner",
+ (
+ f"Identified project={project_name!r}, ticker={coin_ticker!r}, "
+ f"news_queries={len(news_queries)}, community_queries={len(community_queries)}"
+ ),
+ )
+
+ return {
+ "plan": result.plan,
+ "project_name": project_name,
+ "coin_ticker": coin_ticker,
+ "news_queries": news_queries,
+ "community_queries": community_queries,
+ }
+
+
+async def project_verifier_node(state: AgentState) -> dict[str, Any]:
+ """Resolve the intended CoinGecko project from checkpointed planner state."""
+ project_name = state.get("project_name", state["input"]).strip()
+ coin_ticker = state.get("coin_ticker", "").strip().upper()
+ verbose_log("ProjectVerifier", f"Verifying project={project_name!r}, ticker={coin_ticker!r}")
+
+ coin_id, ambiguous_matches = await _verify_project(project_name, coin_ticker)
+ return {
+ "coin_id": coin_id,
+ "ambiguous_matches": ambiguous_matches,
+ }
+
+
+async def project_selector_node(state: AgentState) -> dict[str, Any]:
+ """Pause only when the verifier stored ambiguous CoinGecko candidates."""
+ matches = state.get("ambiguous_matches", [])
+ if not matches:
+ verbose_log("ProjectSelector", "No human selection required")
+ return {}
+
+ project_name = state.get("project_name", state["input"]).strip()
+ coin_ticker = state.get("coin_ticker", "").strip().upper()
+ prompt_message = f"Multiple CoinGecko matches found for {project_name}. Choose the correct project to continue."
+
+ while True:
+ response = interrupt(
+ {
+ "interrupt_type": "ambiguous_project",
+ "message": prompt_message,
+ "project_name": project_name,
+ "coin_ticker": coin_ticker,
+ "matches": matches,
+ }
+ )
+ selected_coin_id = ""
+ if isinstance(response, dict):
+ selected_coin_id = str(response.get("selected_coin_id", "")).strip()
+
+ for match in matches:
+ if match["coin_id"] == selected_coin_id:
+ verbose_log("ProjectSelector", f"Human selected coin_id={selected_coin_id!r}")
+ return {
+ "coin_id": selected_coin_id,
+ "ambiguous_matches": [],
+ }
+
+ prompt_message = f"Selection {selected_coin_id!r} is not valid. Choose one of the provided CoinGecko coin IDs."
+ verbose_log("ProjectSelector", prompt_message)
diff --git a/examples/03-checkpoint-recovery/src/agents/state.py b/examples/03-checkpoint-recovery/src/agents/state.py
new file mode 100644
index 0000000..5fc7e54
--- /dev/null
+++ b/examples/03-checkpoint-recovery/src/agents/state.py
@@ -0,0 +1,48 @@
+"""Typed state shared across all Pattern 03 agent nodes.
+
+Data flow:
+ research_planner -> project_verifier -> project_selector
+ -> [news_scanner, project_profiler, community_analyst] (parallel)
+ -> intelligence_compiler
+
+research_planner populates: plan, project_name, coin_ticker,
+ news_queries, community_queries
+project_verifier populates: coin_id, ambiguous_matches
+project_selector populates: coin_id (after resume), clears ambiguous_matches
+news_scanner populates: news
+project_profiler populates: profile
+community_analyst populates: community
+intelligence_compiler populates: report
+"""
+
+from __future__ import annotations
+
+from typing import Required, TypedDict
+
+
+class CoinMatch(TypedDict):
+ coin_id: str
+ name: str
+ symbol: str
+ market_cap_rank: int | None
+
+
+class AgentState(TypedDict, total=False):
+ input: Required[str]
+
+ # Research planner outputs
+ plan: str
+ project_name: str
+ coin_ticker: str
+ coin_id: str
+ news_queries: list[str]
+ community_queries: list[str]
+ ambiguous_matches: list[CoinMatch]
+
+ # Parallel research branch outputs
+ news: str
+ profile: str
+ community: str
+
+ # Final synthesis
+ report: str
diff --git a/examples/03-checkpoint-recovery/src/agents/web_search.py b/examples/03-checkpoint-recovery/src/agents/web_search.py
new file mode 100644
index 0000000..6c16d7a
--- /dev/null
+++ b/examples/03-checkpoint-recovery/src/agents/web_search.py
@@ -0,0 +1,62 @@
+"""Shared DuckDuckGo search helpers for research agents."""
+
+from __future__ import annotations
+
+from agent_common.tracing import verbose_log
+from langchain_community.tools import DuckDuckGoSearchResults
+
+
+def _deduplicate_results(all_results: list[dict[str, str]]) -> list[dict[str, str]]:
+ """Remove duplicate search results by URL."""
+ seen_urls: set[str] = set()
+ unique_results: list[dict[str, str]] = []
+
+ for item in all_results:
+ url = item.get("link", "")
+ if url and url not in seen_urls:
+ seen_urls.add(url)
+ unique_results.append(item)
+
+ return unique_results
+
+
+async def run_search_queries(
+ queries: list[str],
+ agent_name: str,
+ max_results: int = 5,
+) -> list[dict[str, str]]:
+ """Run multiple searches and deduplicate results by URL."""
+ all_results: list[dict[str, str]] = []
+ search = DuckDuckGoSearchResults(
+ max_results=max_results, # type: ignore[call-arg]
+ output_format="list",
+ )
+
+ for query in queries:
+ try:
+ raw = await search.ainvoke(query)
+ if isinstance(raw, list):
+ all_results.extend(raw)
+ verbose_log(agent_name, f" [{query[:50]}] -> {len(raw)} results")
+ except Exception as exc:
+ verbose_log(agent_name, f" [{query[:50]}] search failed: {exc}")
+
+ unique_results = _deduplicate_results(all_results)
+ verbose_log(
+ agent_name,
+ f"Total: {len(all_results)} raw -> {len(unique_results)} unique results",
+ )
+ return unique_results
+
+
+def format_search_results(
+ results: list[dict[str, str]],
+ empty_message: str = "[No results found]",
+) -> str:
+ """Format search results as a markdown list for LLM consumption."""
+ return (
+ "\n".join(
+ f"- [{item.get('title', 'N/A')}]({item.get('link', '')}): {item.get('snippet', '')}" for item in results
+ )
+ or empty_message
+ )
diff --git a/examples/03-checkpoint-recovery/src/app.py b/examples/03-checkpoint-recovery/src/app.py
new file mode 100644
index 0000000..6cec7ce
--- /dev/null
+++ b/examples/03-checkpoint-recovery/src/app.py
@@ -0,0 +1,167 @@
+"""FastAPI application exposing the checkpointed crypto intelligence pipeline."""
+
+from __future__ import annotations
+
+from collections.abc import AsyncIterator
+from contextlib import asynccontextmanager
+from typing import Literal
+
+from agent_common.tracing import setup_tracing, verbose_log
+from fastapi import FastAPI
+from fastapi.responses import JSONResponse
+from pydantic import BaseModel, Field
+
+from src.runtime import PipelineRuntime, close_runtime, create_runtime
+from src.service import CompletedRun, FailedRun, InterruptedRun, resume_pipeline, run_pipeline
+
+
+class RunRequest(BaseModel):
+ input: str = Field(min_length=3, max_length=500, description="Crypto project query to research")
+ thread_id: str | None = Field(default=None, description="Optional thread ID for retries after failure")
+
+
+class ResumeRequest(BaseModel):
+ thread_id: str = Field(min_length=1, description="Interrupted thread ID to resume")
+ selected_coin_id: str = Field(min_length=1, description="CoinGecko coin ID selected by the user")
+
+
+class CoinMatchResponse(BaseModel):
+ coin_id: str
+ name: str
+ symbol: str
+ market_cap_rank: int | None = None
+
+
+class RunCompletedResponse(BaseModel):
+ status: Literal["completed"] = "completed"
+ thread_id: str
+ report: str
+ plan: str
+ news: str
+ profile: str
+ community: str
+ project_name: str = ""
+ coin_ticker: str = ""
+ coin_id: str = ""
+
+
+class RunInterruptedResponse(BaseModel):
+ status: Literal["interrupted"] = "interrupted"
+ thread_id: str
+ interrupt_type: str
+ message: str
+ project_name: str = ""
+ coin_ticker: str = ""
+ matches: list[CoinMatchResponse]
+
+
+class ErrorResponse(BaseModel):
+ error: str
+ detail: str
+ thread_id: str
+
+
+def _completed_response(outcome: CompletedRun) -> RunCompletedResponse:
+ result = outcome.result
+ return RunCompletedResponse(
+ thread_id=outcome.thread_id,
+ report=str(result.get("report", "")),
+ plan=str(result.get("plan", "")),
+ news=str(result.get("news", "")),
+ profile=str(result.get("profile", "")),
+ community=str(result.get("community", "")),
+ project_name=str(result.get("project_name", "")),
+ coin_ticker=str(result.get("coin_ticker", "")),
+ coin_id=str(result.get("coin_id", "")),
+ )
+
+
+def _interrupted_response(outcome: InterruptedRun) -> RunInterruptedResponse:
+ payload = outcome.payload
+ matches = payload.get("matches", [])
+ match_models = [CoinMatchResponse.model_validate(match) for match in matches if isinstance(match, dict)]
+ return RunInterruptedResponse(
+ thread_id=outcome.thread_id,
+ interrupt_type=str(payload.get("interrupt_type", "interrupt")),
+ message=str(payload.get("message", "Workflow interrupted")),
+ project_name=str(payload.get("project_name", "")),
+ coin_ticker=str(payload.get("coin_ticker", "")),
+ matches=match_models,
+ )
+
+
+def _error_response(outcome: FailedRun) -> JSONResponse:
+ return JSONResponse(
+ status_code=outcome.http_status,
+ content=ErrorResponse(
+ error=outcome.error_code,
+ detail=outcome.detail,
+ thread_id=outcome.thread_id,
+ ).model_dump(),
+ )
+
+
+@asynccontextmanager
+async def lifespan(fastapi_app: FastAPI) -> AsyncIterator[None]:
+ setup_tracing()
+ runtime = await create_runtime()
+ fastapi_app.state.runtime = runtime
+ verbose_log("System", "FastAPI application started")
+ yield
+ await close_runtime(runtime)
+ verbose_log("System", "FastAPI application shutting down")
+
+
+app = FastAPI(
+ title="Pattern 03: Checkpoint Recovery and Resilience",
+ description=(
+ "Checkpoint-capable crypto intelligence pipeline. REST entry points support "
+ "retry-after-failure and resume-after-interrupt using stable thread IDs. "
+ "Thread inspection is exposed via MCP tools, not REST endpoints."
+ ),
+ version="0.1.0",
+ lifespan=lifespan,
+)
+
+
+def _runtime() -> PipelineRuntime:
+ return app.state.runtime # type: ignore[no-any-return]
+
+
+@app.get("/health")
+async def health() -> dict[str, str]:
+ return {"status": "ok"}
+
+
+@app.post("/run", response_model=RunCompletedResponse | RunInterruptedResponse)
+async def run(request: RunRequest) -> RunCompletedResponse | RunInterruptedResponse | JSONResponse:
+ verbose_log("System", f"Received request: {request.input[:100]}")
+
+ outcome = await run_pipeline(
+ _runtime(),
+ input_text=request.input,
+ thread_id=request.thread_id,
+ )
+
+ if isinstance(outcome, CompletedRun):
+ return _completed_response(outcome)
+ if isinstance(outcome, InterruptedRun):
+ return _interrupted_response(outcome)
+ return _error_response(outcome)
+
+
+@app.post("/run/resume", response_model=RunCompletedResponse | RunInterruptedResponse)
+async def resume(request: ResumeRequest) -> RunCompletedResponse | RunInterruptedResponse | JSONResponse:
+ verbose_log("System", f"Resuming thread {request.thread_id!r} with selected_coin_id={request.selected_coin_id!r}")
+
+ outcome = await resume_pipeline(
+ _runtime(),
+ thread_id=request.thread_id,
+ resume_payload={"selected_coin_id": request.selected_coin_id},
+ )
+
+ if isinstance(outcome, CompletedRun):
+ return _completed_response(outcome)
+ if isinstance(outcome, InterruptedRun):
+ return _interrupted_response(outcome)
+ return _error_response(outcome)
diff --git a/examples/03-checkpoint-recovery/src/coingecko.py b/examples/03-checkpoint-recovery/src/coingecko.py
new file mode 100644
index 0000000..f4eab9d
--- /dev/null
+++ b/examples/03-checkpoint-recovery/src/coingecko.py
@@ -0,0 +1,116 @@
+"""CoinGecko API client for crypto project data.
+
+Direct httpx calls to the free CoinGecko API (no API key required, ~30 req/min).
+This is an internal data layer -- not exposed via MCP. The MCP server exposes
+the agent pipeline capability, not raw API wrappers.
+
+Includes retry with exponential backoff for transient failures (rate limits,
+server errors). Max 3 attempts per call.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+
+import httpx
+from agent_common.tracing import verbose_log
+
+COINGECKO_BASE = "https://api.coingecko.com/api/v3"
+_MAX_RETRIES = 3
+_BASE_DELAY = 1.0
+
+
+async def _get(path: str, params: dict[str, str] | None = None) -> dict: # type: ignore[type-arg]
+ """Make a GET request to CoinGecko API with retry and backoff."""
+ url = f"{COINGECKO_BASE}{path}"
+ last_exc: Exception | None = None
+
+ async with httpx.AsyncClient(timeout=15.0) as client:
+ for attempt in range(_MAX_RETRIES):
+ try:
+ resp = await client.get(url, params=params or {})
+ resp.raise_for_status()
+ return resp.json() # type: ignore[no-any-return]
+ except (httpx.HTTPStatusError, httpx.ConnectError, httpx.ReadTimeout) as exc:
+ last_exc = exc
+ is_client_error = (
+ isinstance(exc, httpx.HTTPStatusError)
+ and exc.response.status_code < 500
+ and exc.response.status_code != 429
+ )
+ if is_client_error:
+ raise
+ delay = _BASE_DELAY * (2**attempt)
+ verbose_log(
+ "CoinGecko",
+ (
+ f"Request to {path} failed (attempt {attempt + 1}/{_MAX_RETRIES}): "
+ f"{exc!r} — retrying in {delay:.1f}s"
+ ),
+ )
+ await asyncio.sleep(delay)
+
+ assert last_exc is not None
+ raise last_exc
+
+
+async def search_coins(query: str) -> str:
+ """Search for cryptocurrency projects by name or symbol."""
+ data = await _get("/search", {"query": query})
+ coins = data.get("coins", [])[:8]
+ results = [
+ {"id": c["id"], "name": c["name"], "symbol": c["symbol"], "market_cap_rank": c.get("market_cap_rank")}
+ for c in coins
+ ]
+ verbose_log("CoinGecko", f"search_coins({query!r}) → {len(results)} results")
+ return json.dumps(results, indent=2)
+
+
+async def get_coin_info(coin_id: str) -> str:
+ """Get detailed project info: description, categories, links, community/developer stats."""
+ data = await _get(
+ f"/coins/{coin_id}",
+ {"localization": "false", "tickers": "false", "market_data": "false", "community_data": "true"},
+ )
+ info = {
+ "name": data.get("name"),
+ "symbol": data.get("symbol"),
+ "description": (data.get("description", {}).get("en", ""))[:1500],
+ "categories": data.get("categories", []),
+ "genesis_date": data.get("genesis_date"),
+ "homepage": data.get("links", {}).get("homepage", [None])[0],
+ "github": data.get("links", {}).get("repos_url", {}).get("github", []),
+ "twitter": data.get("links", {}).get("twitter_screen_name"),
+ "community_data": data.get("community_data", {}),
+ "developer_data": {
+ k: v for k, v in data.get("developer_data", {}).items() if isinstance(v, (int, float)) and v > 0
+ },
+ }
+ verbose_log("CoinGecko", f"get_coin_info({coin_id!r}) → {info.get('name')}")
+ return json.dumps(info, indent=2, default=str)
+
+
+async def get_coin_price(coin_id: str, vs_currency: str = "usd") -> str:
+ """Get current price, market cap, volume, and 24h change."""
+ data = await _get(
+ "/simple/price",
+ {
+ "ids": coin_id,
+ "vs_currencies": vs_currency,
+ "include_market_cap": "true",
+ "include_24hr_vol": "true",
+ "include_24hr_change": "true",
+ },
+ )
+ coin_data = data.get(coin_id, {})
+ price_info = {
+ "coin_id": coin_id,
+ "currency": vs_currency,
+ "price": coin_data.get(vs_currency),
+ "market_cap": coin_data.get(f"{vs_currency}_market_cap"),
+ "volume_24h": coin_data.get(f"{vs_currency}_24h_vol"),
+ "change_24h_pct": coin_data.get(f"{vs_currency}_24h_change"),
+ }
+ verbose_log("CoinGecko", f"get_coin_price({coin_id!r}) → ${price_info.get('price')}")
+ return json.dumps(price_info, indent=2)
diff --git a/examples/03-checkpoint-recovery/src/mcp_servers/__init__.py b/examples/03-checkpoint-recovery/src/mcp_servers/__init__.py
new file mode 100644
index 0000000..9317d71
--- /dev/null
+++ b/examples/03-checkpoint-recovery/src/mcp_servers/__init__.py
@@ -0,0 +1 @@
+"""MCP transport modules for Pattern 03."""
diff --git a/examples/03-checkpoint-recovery/src/mcp_servers/crypto_intelligence.py b/examples/03-checkpoint-recovery/src/mcp_servers/crypto_intelligence.py
new file mode 100644
index 0000000..1757e47
--- /dev/null
+++ b/examples/03-checkpoint-recovery/src/mcp_servers/crypto_intelligence.py
@@ -0,0 +1,328 @@
+"""MCP server exposing the checkpointed crypto intelligence pipeline as tools.
+
+The tool surface is the agentic interface to Pattern 03. An AI client
+(Claude Code, Cursor, ...) discovers these tools via MCP and can:
+
+- run or resume a crypto research pipeline,
+- inspect checkpoint status of any thread, and
+- list / delete threads.
+
+Thread status is derived from LangGraph checkpoint state -- there is no
+separate status table.
+"""
+
+from __future__ import annotations
+
+from collections.abc import AsyncIterator
+from contextlib import asynccontextmanager
+from typing import Any
+
+from agent_common.tracing import setup_tracing, verbose_log
+from mcp.server.fastmcp import FastMCP
+
+from src.runtime import PipelineRuntime, close_runtime, create_runtime
+from src.service import CompletedRun, InterruptedRun, resume_pipeline, run_pipeline
+
+mcp = FastMCP(
+ "crypto-intelligence",
+ host="0.0.0.0",
+ port=8000,
+)
+_runtime: PipelineRuntime | None = None
+
+
+# ---------------------------------------------------------------------------
+# Helpers for deriving thread status from LangGraph checkpoint state
+# ---------------------------------------------------------------------------
+
+
+def _extract_interrupt_info(state: Any) -> dict[str, Any] | None:
+ """Pull the first interrupt payload out of a state snapshot's tasks."""
+ tasks = getattr(state, "tasks", None)
+ if not tasks:
+ return None
+ for task in tasks:
+ for intr in getattr(task, "interrupts", []):
+ value = getattr(intr, "value", None)
+ if isinstance(value, dict):
+ return value
+ return None
+
+
+def _format_completed_status(thread_id: str, values: dict[str, Any]) -> str:
+ project_name = values.get("project_name", "")
+ coin_ticker = values.get("coin_ticker", "")
+ coin_id = values.get("coin_id", "")
+ report = str(values.get("report", ""))
+
+ identity = project_name
+ if coin_ticker:
+ identity += f" ({coin_ticker})"
+ if coin_id:
+ identity += f" [coin_id={coin_id}]"
+
+ lines = [
+ f'Research thread "{thread_id}" — COMPLETED',
+ f"Project: {identity}" if identity else "",
+ ]
+ if report:
+ preview = report[:300]
+ if len(report) > 300:
+ preview += "…"
+ lines.append(f"\nReport preview:\n{preview}")
+ return "\n".join(line for line in lines if line)
+
+
+def _format_interrupted_status(thread_id: str, values: dict[str, Any], interrupt: dict[str, Any]) -> str:
+ project_name = values.get("project_name", "")
+ message = interrupt.get("message", "Workflow interrupted")
+ matches = interrupt.get("matches", [])
+
+ lines = [
+ f'Research thread "{thread_id}" — INTERRUPTED',
+ f"Project: {project_name}" if project_name else "",
+ f"Waiting for human input: {message}",
+ ]
+ if isinstance(matches, list) and matches:
+ lines.append(
+ "\nChoose one of these CoinGecko IDs and call "
+ "research_crypto_project with the same thread_id and "
+ "selected_coin_id:"
+ )
+ for match in matches:
+ if isinstance(match, dict):
+ rank = match.get("market_cap_rank")
+ lines.append(f" - {match.get('coin_id')}: {match.get('name')} ({match.get('symbol')}) rank={rank}")
+ return "\n".join(line for line in lines if line)
+
+
+def _format_resumable_status(thread_id: str, values: dict[str, Any], next_nodes: tuple[str, ...]) -> str:
+ project_name = values.get("project_name", "")
+ lines = [
+ f'Research thread "{thread_id}" — RESUMABLE',
+ f"Project: {project_name}" if project_name else "",
+ f"Pipeline stopped before: {', '.join(next_nodes)}",
+ (f'\nResume by calling research_crypto_project with thread_id="{thread_id}".'),
+ ]
+ return "\n".join(line for line in lines if line)
+
+
+def _format_thread_summary(thread_id: str, state: Any) -> str:
+ """One-line summary of a thread for the listing tool."""
+ values = state.values if state and state.values else {}
+ input_text = str(values.get("input", ""))[:80]
+
+ interrupt = _extract_interrupt_info(state)
+ if interrupt:
+ return f" {thread_id} INTERRUPTED {input_text!r}"
+ if not state.next:
+ return f" {thread_id} COMPLETED {input_text!r}"
+ return f" {thread_id} RESUMABLE {input_text!r}"
+
+
+async def _list_thread_ids(pool: Any, *, limit: int = 50) -> list[str]:
+ """Query distinct thread IDs from the LangGraph checkpoint table."""
+ try:
+ async with pool.connection() as conn, conn.cursor() as cur:
+ await cur.execute(
+ "SELECT DISTINCT thread_id FROM checkpoints WHERE checkpoint_ns = '' ORDER BY thread_id LIMIT %s",
+ (limit,),
+ )
+ rows = await cur.fetchall()
+ return [str(row["thread_id"]) for row in rows]
+ except Exception as exc:
+ verbose_log("MCP", f"Unable to list threads: {exc}")
+ return []
+
+
+# ---------------------------------------------------------------------------
+# MCP lifespan
+# ---------------------------------------------------------------------------
+
+
+@asynccontextmanager
+async def mcp_lifespan(_: object) -> AsyncIterator[None]:
+ global _runtime
+ setup_tracing()
+ _runtime = await create_runtime()
+ verbose_log("MCP", "MCP server started")
+ yield
+ if _runtime is not None:
+ await close_runtime(_runtime)
+ _runtime = None
+ verbose_log("MCP", "MCP server shutting down")
+
+
+app = mcp.sse_app()
+app.router.lifespan_context = mcp_lifespan
+
+
+# ---------------------------------------------------------------------------
+# MCP tools
+# ---------------------------------------------------------------------------
+
+
+def _format_interrupt_message(outcome: InterruptedRun) -> str:
+ payload = outcome.payload
+ lines = [
+ f"[Workflow interrupted] thread_id={outcome.thread_id}",
+ str(payload.get("message", "Workflow interrupted")),
+ ]
+ matches = payload.get("matches", [])
+ if isinstance(matches, list) and matches:
+ lines.append(
+ "Choose one of these CoinGecko IDs and call the tool again with the same thread_id and selected_coin_id:"
+ )
+ for match in matches:
+ if isinstance(match, dict):
+ rank = match.get("market_cap_rank")
+ lines.append(f"- {match.get('coin_id')}: {match.get('name')} ({match.get('symbol')}) rank={rank}")
+ return "\n".join(lines)
+
+
+@mcp.tool()
+async def research_crypto_project(
+ query: str,
+ thread_id: str | None = None,
+ selected_coin_id: str | None = None,
+) -> str:
+ """Research a cryptocurrency project with checkpoint recovery.
+
+ Args:
+ query: Natural-language research request about a crypto project.
+ thread_id: Optional stable thread ID. Reuse this to retry failed runs or
+ resume an interrupted workflow.
+ selected_coin_id: Optional CoinGecko coin ID used to resume an interrupted
+ workflow after the planner asked the user to disambiguate a project.
+
+ Returns:
+ The final intelligence report on success, or a human-readable interrupt /
+ error message that includes the resumable thread ID.
+ """
+ if _runtime is None:
+ raise RuntimeError("MCP runtime is not initialized")
+
+ preview = repr(query[:80])
+ verbose_log("MCP", f"research_crypto_project({preview})")
+
+ if selected_coin_id and thread_id:
+ outcome = await resume_pipeline(
+ _runtime,
+ thread_id=thread_id,
+ resume_payload={"selected_coin_id": selected_coin_id},
+ )
+ else:
+ outcome = await run_pipeline(
+ _runtime,
+ input_text=query,
+ thread_id=thread_id,
+ )
+
+ if isinstance(outcome, CompletedRun):
+ report = str(outcome.result.get("report", ""))
+ verbose_log("MCP", f"research_crypto_project -- complete ({len(report)} chars)")
+ return report
+
+ if isinstance(outcome, InterruptedRun):
+ message = _format_interrupt_message(outcome)
+ verbose_log("MCP", message)
+ return message
+
+ verbose_log("MCP", f"research_crypto_project failed: {outcome.detail}")
+ return f"[{outcome.error_code}] thread_id={outcome.thread_id} {outcome.detail}"
+
+
+@mcp.tool()
+async def get_research_status(thread_id: str) -> str:
+ """Check the current status of a crypto research thread.
+
+ Use this to see whether a research run completed, was interrupted
+ (waiting for your input), or can be resumed after a failure.
+
+ Args:
+ thread_id: The thread ID returned by research_crypto_project.
+ """
+ if _runtime is None:
+ raise RuntimeError("MCP runtime is not initialized")
+
+ config = {"configurable": {"thread_id": thread_id}}
+ try:
+ state = await _runtime.graph.aget_state(config)
+ except Exception as exc:
+ return f"Unable to load state for thread {thread_id!r}: {exc}"
+
+ if not state or not state.values:
+ return f"No research found for thread {thread_id!r}."
+
+ values: dict[str, Any] = dict(state.values) if isinstance(state.values, dict) else {}
+
+ interrupt = _extract_interrupt_info(state)
+ if interrupt:
+ return _format_interrupted_status(thread_id, values, interrupt)
+
+ if not state.next:
+ return _format_completed_status(thread_id, values)
+
+ return _format_resumable_status(thread_id, values, state.next)
+
+
+@mcp.tool()
+async def list_research_threads() -> str:
+ """List all known crypto research threads and their status.
+
+ Returns a summary of each thread including its current state
+ (completed, interrupted, or resumable).
+ """
+ if _runtime is None:
+ raise RuntimeError("MCP runtime is not initialized")
+
+ thread_ids = await _list_thread_ids(_runtime.pool)
+ if not thread_ids:
+ return "No research threads found."
+
+ summaries: list[str] = []
+ for tid in thread_ids:
+ config = {"configurable": {"thread_id": tid}}
+ try:
+ state = await _runtime.graph.aget_state(config)
+ if state and state.values:
+ summaries.append(_format_thread_summary(tid, state))
+ else:
+ summaries.append(f" {tid} UNKNOWN")
+ except Exception:
+ summaries.append(f" {tid} ERROR (unable to load state)")
+
+ header = f"Found {len(summaries)} research thread(s):\n"
+ return header + "\n".join(summaries)
+
+
+@mcp.tool()
+async def delete_research_thread(thread_id: str) -> str:
+ """Delete a research thread and all its checkpoint data.
+
+ Args:
+ thread_id: The thread ID to delete.
+ """
+ if _runtime is None:
+ raise RuntimeError("MCP runtime is not initialized")
+
+ config = {"configurable": {"thread_id": thread_id}}
+ try:
+ state = await _runtime.graph.aget_state(config)
+ except Exception:
+ state = None
+
+ if not state or not state.values:
+ return f"No research found for thread {thread_id!r}."
+
+ try:
+ await _runtime.checkpointer.adelete_thread(thread_id)
+ except Exception as exc:
+ return f"Failed to delete thread {thread_id!r}: {exc}"
+
+ verbose_log("MCP", f"Deleted thread {thread_id!r}")
+ return f"Thread {thread_id!r} and its checkpoints have been deleted."
+
+
+if __name__ == "__main__":
+ mcp.run(transport="sse")
diff --git a/examples/03-checkpoint-recovery/src/runtime.py b/examples/03-checkpoint-recovery/src/runtime.py
new file mode 100644
index 0000000..e6716c8
--- /dev/null
+++ b/examples/03-checkpoint-recovery/src/runtime.py
@@ -0,0 +1,51 @@
+"""Shared runtime configuration and startup for Pattern 03 entry points."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any, cast
+
+from agent_common.persistence import close_checkpointer, create_postgres_pool, setup_checkpointer
+from agent_common.tracing import build_langsmith_run_config, verbose_log
+from langchain_core.runnables import RunnableConfig
+
+from src.agents.graph import build_graph
+
+PIPELINE_TIMEOUT_SECONDS = 120.0
+
+
+@dataclass(slots=True)
+class PipelineRuntime:
+ graph: Any
+ checkpointer: Any
+ pool: Any
+
+
+def build_pipeline_run_config(thread_id: str) -> RunnableConfig:
+ """Build trace metadata and a stable thread config for public invocations."""
+ config = build_langsmith_run_config(
+ example_name="03-checkpoint-recovery",
+ pattern_slug="checkpoint-recovery",
+ run_name="pattern-03-checkpoint-recovery",
+ extra_tags=["checkpointed"],
+ metadata={"thread_id": thread_id},
+ )
+ config["configurable"] = {"thread_id": thread_id}
+ return cast(RunnableConfig, config)
+
+
+async def create_runtime() -> PipelineRuntime:
+ """Create shared runtime resources for FastAPI and MCP entry points."""
+ pool = await create_postgres_pool()
+ checkpointer = await setup_checkpointer()
+ graph = build_graph(checkpointer=checkpointer)
+
+ verbose_log("System", "Pattern 03 runtime initialized")
+ return PipelineRuntime(graph=graph, checkpointer=checkpointer, pool=pool)
+
+
+async def close_runtime(runtime: PipelineRuntime) -> None:
+ """Close shared runtime resources."""
+ await close_checkpointer(runtime.checkpointer)
+ await runtime.pool.close()
+ verbose_log("System", "Pattern 03 runtime closed")
diff --git a/examples/03-checkpoint-recovery/src/service.py b/examples/03-checkpoint-recovery/src/service.py
new file mode 100644
index 0000000..5f28b44
--- /dev/null
+++ b/examples/03-checkpoint-recovery/src/service.py
@@ -0,0 +1,169 @@
+"""Shared execution flow for Pattern 03 REST and MCP entry points."""
+
+from __future__ import annotations
+
+import asyncio
+from dataclasses import dataclass
+from typing import Any
+from uuid import uuid4
+
+from agent_common.tracing import verbose_log
+from langgraph.types import Command
+
+from src.runtime import PIPELINE_TIMEOUT_SECONDS, PipelineRuntime, build_pipeline_run_config
+
+
+@dataclass(slots=True)
+class CompletedRun:
+ thread_id: str
+ result: dict[str, Any]
+ status: str = "completed"
+
+
+@dataclass(slots=True)
+class InterruptedRun:
+ thread_id: str
+ payload: dict[str, Any]
+ status: str = "interrupted"
+
+
+@dataclass(slots=True)
+class FailedRun:
+ thread_id: str
+ error_code: str
+ detail: str
+ http_status: int
+ status: str = "failed"
+
+
+PipelineOutcome = CompletedRun | InterruptedRun | FailedRun
+
+
+def _new_thread_id() -> str:
+ return str(uuid4())
+
+
+def _extract_interrupt_payload(result: dict[str, Any]) -> dict[str, Any] | None:
+ """Return the first interrupt payload if the graph paused."""
+ raw_interrupts = result.get("__interrupt__")
+ if not isinstance(raw_interrupts, list) or not raw_interrupts:
+ return None
+
+ first_interrupt = raw_interrupts[0]
+ payload = getattr(first_interrupt, "value", None)
+ if isinstance(payload, dict):
+ return payload
+ if payload is None:
+ return None
+ return {"message": str(payload)}
+
+
+async def run_pipeline(
+ runtime: PipelineRuntime,
+ *,
+ input_text: str,
+ thread_id: str | None = None,
+) -> PipelineOutcome:
+ """Start or retry a checkpointed workflow.
+
+ If *thread_id* points to a thread with pending checkpoint work (failed or
+ interrupted), the graph resumes from the last checkpoint instead of
+ starting fresh.
+ """
+ resolved_thread_id = thread_id or _new_thread_id()
+ config = build_pipeline_run_config(resolved_thread_id)
+
+ graph_input: dict[str, str] | None = {"input": input_text}
+ if thread_id:
+ try:
+ state_snapshot = await runtime.graph.aget_state(config)
+ if state_snapshot and state_snapshot.values and state_snapshot.next:
+ graph_input = None
+ verbose_log("System", f"Retrying thread_id={resolved_thread_id!r} from last checkpoint")
+ else:
+ verbose_log("System", f"Running thread_id={resolved_thread_id!r}")
+ except Exception:
+ verbose_log("System", f"Running thread_id={resolved_thread_id!r}")
+ else:
+ verbose_log("System", f"Running thread_id={resolved_thread_id!r}")
+
+ try:
+ result = await asyncio.wait_for(
+ runtime.graph.ainvoke(graph_input, config=config),
+ timeout=PIPELINE_TIMEOUT_SECONDS,
+ )
+ except TimeoutError:
+ detail = f"Pipeline timed out after {PIPELINE_TIMEOUT_SECONDS:.0f}s"
+ return FailedRun(
+ thread_id=resolved_thread_id,
+ error_code="pipeline_timeout",
+ detail=detail,
+ http_status=504,
+ )
+ except Exception as exc:
+ detail = str(exc)
+ return FailedRun(
+ thread_id=resolved_thread_id,
+ error_code="pipeline_failed",
+ detail=detail,
+ http_status=502,
+ )
+
+ interrupt_payload = _extract_interrupt_payload(result)
+ if interrupt_payload is not None:
+ return InterruptedRun(thread_id=resolved_thread_id, payload=interrupt_payload)
+
+ return CompletedRun(thread_id=resolved_thread_id, result=result)
+
+
+async def resume_pipeline(
+ runtime: PipelineRuntime,
+ *,
+ thread_id: str,
+ resume_payload: dict[str, Any],
+) -> PipelineOutcome:
+ """Resume an interrupted workflow using ``Command(resume=...)``."""
+ config = build_pipeline_run_config(thread_id)
+
+ try:
+ state_snapshot = await runtime.graph.aget_state(config)
+ except Exception:
+ state_snapshot = None
+
+ if not state_snapshot or not state_snapshot.values:
+ return FailedRun(
+ thread_id=thread_id,
+ error_code="thread_not_found",
+ detail=f"Thread {thread_id!r} was not found",
+ http_status=404,
+ )
+
+ verbose_log("System", f"Resuming thread_id={thread_id!r}")
+
+ try:
+ result = await asyncio.wait_for(
+ runtime.graph.ainvoke(Command(resume=resume_payload), config=config),
+ timeout=PIPELINE_TIMEOUT_SECONDS,
+ )
+ except TimeoutError:
+ detail = f"Pipeline timed out after {PIPELINE_TIMEOUT_SECONDS:.0f}s"
+ return FailedRun(
+ thread_id=thread_id,
+ error_code="pipeline_timeout",
+ detail=detail,
+ http_status=504,
+ )
+ except Exception as exc:
+ detail = str(exc)
+ return FailedRun(
+ thread_id=thread_id,
+ error_code="pipeline_failed",
+ detail=detail,
+ http_status=502,
+ )
+
+ interrupt_payload = _extract_interrupt_payload(result)
+ if interrupt_payload is not None:
+ return InterruptedRun(thread_id=thread_id, payload=interrupt_payload)
+
+ return CompletedRun(thread_id=thread_id, result=result)
diff --git a/examples/03-checkpoint-recovery/tests/api/test_app_api.py b/examples/03-checkpoint-recovery/tests/api/test_app_api.py
new file mode 100644
index 0000000..91f72b8
--- /dev/null
+++ b/examples/03-checkpoint-recovery/tests/api/test_app_api.py
@@ -0,0 +1,166 @@
+"""API tests for Pattern 03 FastAPI endpoints."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any
+
+import pytest
+from fastapi.testclient import TestClient
+from src import app as app_module
+from src.service import CompletedRun, FailedRun, InterruptedRun
+
+
+@dataclass
+class _FakeGraph:
+ pass
+
+
+@dataclass
+class _FakeCheckpointer:
+ pass
+
+
+@dataclass
+class _FakeRuntime:
+ graph: _FakeGraph
+ checkpointer: _FakeCheckpointer
+ pool: object | None = None
+
+
+def _make_client(monkeypatch: pytest.MonkeyPatch) -> TestClient:
+ runtime = _FakeRuntime(graph=_FakeGraph(), checkpointer=_FakeCheckpointer())
+
+ async def fake_create_runtime() -> _FakeRuntime:
+ return runtime
+
+ async def fake_close_runtime(runtime_to_close: _FakeRuntime) -> None:
+ return None
+
+ monkeypatch.setattr(app_module, "create_runtime", fake_create_runtime)
+ monkeypatch.setattr(app_module, "close_runtime", fake_close_runtime)
+ return TestClient(app_module.app)
+
+
+def test_health_endpoint_returns_ok(monkeypatch: pytest.MonkeyPatch) -> None:
+ with _make_client(monkeypatch) as client:
+ response = client.get("/health")
+
+ assert response.status_code == 200
+ assert response.json() == {"status": "ok"}
+
+
+def test_run_endpoint_returns_completed_response(monkeypatch: pytest.MonkeyPatch) -> None:
+ async def fake_run_pipeline(runtime: object, *, input_text: str, thread_id: str | None = None) -> CompletedRun:
+ return CompletedRun(
+ thread_id=thread_id or "thread-1",
+ result={
+ "report": "## Executive Summary\nArbitrum report.",
+ "plan": "1. News\n2. Profile",
+ "news": "Orbit chains launched.",
+ "profile": "L2 rollup, $1.23",
+ "community": "Strong community health.",
+ "project_name": "Arbitrum",
+ "coin_ticker": "ARB",
+ "coin_id": "arbitrum",
+ },
+ )
+
+ monkeypatch.setattr(app_module, "run_pipeline", fake_run_pipeline)
+
+ with _make_client(monkeypatch) as client:
+ response = client.post("/run", json={"input": "Research Arbitrum", "thread_id": "thread-1"})
+
+ assert response.status_code == 200
+ assert response.json()["status"] == "completed"
+ assert response.json()["thread_id"] == "thread-1"
+ assert response.json()["coin_id"] == "arbitrum"
+
+
+def test_run_endpoint_returns_interrupted_response(monkeypatch: pytest.MonkeyPatch) -> None:
+ async def fake_run_pipeline(runtime: object, *, input_text: str, thread_id: str | None = None) -> InterruptedRun:
+ return InterruptedRun(
+ thread_id=thread_id or "thread-2",
+ payload={
+ "interrupt_type": "ambiguous_project",
+ "message": "Multiple matches found.",
+ "project_name": "Mercury",
+ "coin_ticker": "",
+ "matches": [
+ {"coin_id": "mercury", "name": "Mercury", "symbol": "MER", "market_cap_rank": 999},
+ ],
+ },
+ )
+
+ monkeypatch.setattr(app_module, "run_pipeline", fake_run_pipeline)
+
+ with _make_client(monkeypatch) as client:
+ response = client.post("/run", json={"input": "Research Mercury"})
+
+ assert response.status_code == 200
+ data = response.json()
+ assert data["status"] == "interrupted"
+ assert data["thread_id"] == "thread-2"
+ assert data["matches"][0]["coin_id"] == "mercury"
+
+
+def test_run_endpoint_returns_error_response(monkeypatch: pytest.MonkeyPatch) -> None:
+ async def fake_run_pipeline(runtime: object, *, input_text: str, thread_id: str | None = None) -> FailedRun:
+ return FailedRun(
+ thread_id="thread-3",
+ error_code="pipeline_failed",
+ detail="CoinGecko down",
+ http_status=502,
+ )
+
+ monkeypatch.setattr(app_module, "run_pipeline", fake_run_pipeline)
+
+ with _make_client(monkeypatch) as client:
+ response = client.post("/run", json={"input": "Research Arbitrum"})
+
+ assert response.status_code == 502
+ assert response.json()["thread_id"] == "thread-3"
+ assert response.json()["error"] == "pipeline_failed"
+
+
+def test_run_endpoint_validates_input(monkeypatch: pytest.MonkeyPatch) -> None:
+ with _make_client(monkeypatch) as client:
+ response = client.post("/run", json={"input": "ab"})
+
+ assert response.status_code == 422
+
+
+def test_resume_endpoint_uses_selected_coin_id(monkeypatch: pytest.MonkeyPatch) -> None:
+ async def fake_resume_pipeline(runtime: object, *, thread_id: str, resume_payload: dict[str, Any]) -> CompletedRun:
+ assert thread_id == "thread-4"
+ assert resume_payload == {"selected_coin_id": "arbitrum"}
+ return CompletedRun(
+ thread_id=thread_id,
+ result={
+ "report": "## Executive Summary\nRecovered report.",
+ "plan": "1. News",
+ "news": "News",
+ "profile": "Profile",
+ "community": "Community",
+ "project_name": "Arbitrum",
+ "coin_ticker": "ARB",
+ "coin_id": "arbitrum",
+ },
+ )
+
+ monkeypatch.setattr(app_module, "resume_pipeline", fake_resume_pipeline)
+
+ with _make_client(monkeypatch) as client:
+ response = client.post("/run/resume", json={"thread_id": "thread-4", "selected_coin_id": "arbitrum"})
+
+ assert response.status_code == 200
+ assert response.json()["status"] == "completed"
+ assert response.json()["thread_id"] == "thread-4"
+
+
+def test_thread_endpoints_are_removed(monkeypatch: pytest.MonkeyPatch) -> None:
+ """Thread inspection is exposed via MCP tools, not REST endpoints."""
+ with _make_client(monkeypatch) as client:
+ assert client.get("/threads").status_code == 404
+ assert client.get("/threads/some-id").status_code == 404
+ assert client.delete("/threads/some-id").status_code == 404
diff --git a/examples/03-checkpoint-recovery/tests/conftest.py b/examples/03-checkpoint-recovery/tests/conftest.py
new file mode 100644
index 0000000..dde0937
--- /dev/null
+++ b/examples/03-checkpoint-recovery/tests/conftest.py
@@ -0,0 +1,34 @@
+"""Pytest configuration for example 03 tests."""
+
+from __future__ import annotations
+
+import sys
+from collections.abc import Iterator
+from pathlib import Path
+
+import pytest
+from agent_common.config import get_settings
+
+EXAMPLE_ROOT = Path(__file__).resolve().parents[1]
+
+for key in list(sys.modules.keys()):
+ if key == "src" or key.startswith("src."):
+ mod = sys.modules[key]
+ mod_file = getattr(mod, "__file__", None) or ""
+ if mod_file and str(EXAMPLE_ROOT) not in mod_file:
+ del sys.modules[key]
+
+if str(EXAMPLE_ROOT) not in sys.path:
+ sys.path.insert(0, str(EXAMPLE_ROOT))
+
+
+@pytest.fixture(autouse=True)
+def _disable_langsmith_tracing(monkeypatch: pytest.MonkeyPatch) -> Iterator[None]:
+ """Keep tests local and deterministic even if the host has LangSmith configured."""
+ monkeypatch.setenv("LANGSMITH_TRACING", "false")
+ monkeypatch.setenv("LANGCHAIN_TRACING_V2", "false")
+ monkeypatch.delenv("LANGSMITH_API_KEY", raising=False)
+ monkeypatch.delenv("LANGCHAIN_API_KEY", raising=False)
+ get_settings.cache_clear()
+ yield
+ get_settings.cache_clear()
diff --git a/examples/03-checkpoint-recovery/tests/e2e/test_pipeline_graph.py b/examples/03-checkpoint-recovery/tests/e2e/test_pipeline_graph.py
new file mode 100644
index 0000000..18fa606
--- /dev/null
+++ b/examples/03-checkpoint-recovery/tests/e2e/test_pipeline_graph.py
@@ -0,0 +1,177 @@
+"""End-to-end tests for the Pattern 03 checkpointed graph."""
+
+from __future__ import annotations
+
+import pytest
+from langgraph.checkpoint.memory import MemorySaver
+from langgraph.types import Command, interrupt
+from src.agents import graph as graph_module
+
+
+@pytest.mark.asyncio
+async def test_graph_resumes_only_failed_branch_after_checkpoint(monkeypatch: pytest.MonkeyPatch) -> None:
+ call_counts = {
+ "research_planner": 0,
+ "project_verifier": 0,
+ "project_selector": 0,
+ "news_scanner": 0,
+ "project_profiler": 0,
+ "community_analyst": 0,
+ "intelligence_compiler": 0,
+ }
+
+ async def fake_research_planner(state: dict[str, str]) -> dict[str, str | list[str]]:
+ call_counts["research_planner"] += 1
+ return {
+ "plan": "1. News\n2. Profile\n3. Community",
+ "project_name": "Arbitrum",
+ "coin_ticker": "ARB",
+ "news_queries": ["Arbitrum news"],
+ "community_queries": ["Arbitrum reddit"],
+ }
+
+ async def fake_project_verifier(state: dict[str, str]) -> dict[str, str | list[str]]:
+ call_counts["project_verifier"] += 1
+ return {
+ "coin_id": "arbitrum",
+ "ambiguous_matches": [],
+ }
+
+ async def fake_project_selector(state: dict[str, str]) -> dict[str, str | list[str]]:
+ call_counts["project_selector"] += 1
+ return {}
+
+ async def fake_news_scanner(state: dict[str, str]) -> dict[str, str]:
+ call_counts["news_scanner"] += 1
+ return {"news": "Orbit chains launched. TVL over $10B."}
+
+ async def fake_project_profiler(state: dict[str, str]) -> dict[str, str]:
+ call_counts["project_profiler"] += 1
+ if call_counts["project_profiler"] == 1:
+ raise RuntimeError("CoinGecko temporarily unavailable")
+ return {"profile": "L2 optimistic rollup. Price $1.23, Market cap $4.5B."}
+
+ async def fake_community_analyst(state: dict[str, str]) -> dict[str, str]:
+ call_counts["community_analyst"] += 1
+ return {"community": "Strong: active Reddit, positive Twitter sentiment."}
+
+ async def fake_intelligence_compiler(state: dict[str, str]) -> dict[str, str]:
+ call_counts["intelligence_compiler"] += 1
+ assert state["news"] is not None
+ assert state["profile"] is not None
+ assert state["community"] is not None
+ return {"report": "## Executive Summary\nArbitrum comprehensive intelligence report."}
+
+ monkeypatch.setattr(graph_module, "research_planner_node", fake_research_planner)
+ monkeypatch.setattr(graph_module, "project_verifier_node", fake_project_verifier)
+ monkeypatch.setattr(graph_module, "project_selector_node", fake_project_selector)
+ monkeypatch.setattr(graph_module, "news_scanner_node", fake_news_scanner)
+ monkeypatch.setattr(graph_module, "project_profiler_node", fake_project_profiler)
+ monkeypatch.setattr(graph_module, "community_analyst_node", fake_community_analyst)
+ monkeypatch.setattr(graph_module, "intelligence_compiler_node", fake_intelligence_compiler)
+
+ graph = graph_module.build_graph(checkpointer=MemorySaver())
+ config = {"configurable": {"thread_id": "recovery-thread"}}
+
+ with pytest.raises(RuntimeError, match="CoinGecko temporarily unavailable"):
+ await graph.ainvoke({"input": "Research Arbitrum"}, config=config)
+
+ result = await graph.ainvoke(None, config=config)
+
+ assert call_counts == {
+ "research_planner": 1,
+ "project_verifier": 1,
+ "project_selector": 1,
+ "news_scanner": 1,
+ "project_profiler": 2,
+ "community_analyst": 1,
+ "intelligence_compiler": 1,
+ }
+ assert "Executive Summary" in result["report"]
+ assert result["coin_id"] == "arbitrum"
+
+
+@pytest.mark.asyncio
+async def test_graph_interrupts_and_resumes_with_same_thread(monkeypatch: pytest.MonkeyPatch) -> None:
+ call_counts = {
+ "research_planner": 0,
+ "project_verifier": 0,
+ "project_selector": 0,
+ }
+
+ async def planning_node(state: dict[str, str]) -> dict[str, str | list[str]]:
+ call_counts["research_planner"] += 1
+ return {
+ "plan": "1. News\n2. Profile\n3. Community",
+ "project_name": "Mercury",
+ "coin_ticker": "",
+ "news_queries": ["Mercury crypto news"],
+ "community_queries": ["Mercury crypto reddit"],
+ }
+
+ async def verifying_node(state: dict[str, str]) -> dict[str, str | list[str]]:
+ call_counts["project_verifier"] += 1
+ return {
+ "coin_id": "",
+ "ambiguous_matches": [
+ {"coin_id": "mercury", "name": "Mercury", "symbol": "MER", "market_cap_rank": 999},
+ {
+ "coin_id": "mercury-protocol",
+ "name": "Mercury Protocol",
+ "symbol": "GMT",
+ "market_cap_rank": 650,
+ },
+ ],
+ }
+
+ async def selecting_node(state: dict[str, str]) -> dict[str, str | list[str]]:
+ call_counts["project_selector"] += 1
+ selected = interrupt(
+ {
+ "interrupt_type": "ambiguous_project",
+ "message": "Multiple CoinGecko matches found.",
+ "project_name": state["project_name"],
+ "coin_ticker": state["coin_ticker"],
+ "matches": state["ambiguous_matches"],
+ }
+ )
+ selected_coin_id = str(selected["selected_coin_id"])
+ return {
+ "coin_id": selected_coin_id,
+ "ambiguous_matches": [],
+ }
+
+ async def fake_news_scanner(state: dict[str, str]) -> dict[str, str]:
+ return {"news": "Mercury news"}
+
+ async def fake_project_profiler(state: dict[str, str]) -> dict[str, str]:
+ return {"profile": f"Profile for {state['coin_id']}"}
+
+ async def fake_community_analyst(state: dict[str, str]) -> dict[str, str]:
+ return {"community": "Community sentiment"}
+
+ async def fake_intelligence_compiler(state: dict[str, str]) -> dict[str, str]:
+ return {"report": f"Report for {state['coin_id']}"}
+
+ monkeypatch.setattr(graph_module, "research_planner_node", planning_node)
+ monkeypatch.setattr(graph_module, "project_verifier_node", verifying_node)
+ monkeypatch.setattr(graph_module, "project_selector_node", selecting_node)
+ monkeypatch.setattr(graph_module, "news_scanner_node", fake_news_scanner)
+ monkeypatch.setattr(graph_module, "project_profiler_node", fake_project_profiler)
+ monkeypatch.setattr(graph_module, "community_analyst_node", fake_community_analyst)
+ monkeypatch.setattr(graph_module, "intelligence_compiler_node", fake_intelligence_compiler)
+
+ graph = graph_module.build_graph(checkpointer=MemorySaver())
+ config = {"configurable": {"thread_id": "interrupt-thread"}}
+
+ first = await graph.ainvoke({"input": "Research Mercury"}, config=config)
+ assert "__interrupt__" in first
+
+ resumed = await graph.ainvoke(Command(resume={"selected_coin_id": "mercury-protocol"}), config=config)
+ assert call_counts == {
+ "research_planner": 1,
+ "project_verifier": 1,
+ "project_selector": 2,
+ }
+ assert resumed["coin_id"] == "mercury-protocol"
+ assert resumed["report"] == "Report for mercury-protocol"
diff --git a/examples/03-checkpoint-recovery/tests/unit/test_agent_nodes.py b/examples/03-checkpoint-recovery/tests/unit/test_agent_nodes.py
new file mode 100644
index 0000000..8373f04
--- /dev/null
+++ b/examples/03-checkpoint-recovery/tests/unit/test_agent_nodes.py
@@ -0,0 +1,304 @@
+"""Unit tests for Pattern 03 agent nodes."""
+
+from __future__ import annotations
+
+import json
+from dataclasses import dataclass
+from unittest.mock import AsyncMock
+
+import pytest
+from src.agents import (
+ community_analyst,
+ intelligence_compiler,
+ news_scanner,
+ project_profiler,
+ research_planner,
+)
+
+
+@dataclass
+class _DummyResponse:
+ content: str
+
+
+class _DummyTextModel:
+ def __init__(self, response_text: str) -> None:
+ self._response_text = response_text
+ self.calls: list[list[object]] = []
+
+ async def ainvoke(self, messages: list[object]) -> _DummyResponse:
+ self.calls.append(messages)
+ return _DummyResponse(content=self._response_text)
+
+
+class _DummyStructuredModel:
+ def __init__(self, response: research_planner.ResearchPlan) -> None:
+ self._response = response
+ self.calls: list[list[object]] = []
+
+ async def ainvoke(self, messages: list[object]) -> research_planner.ResearchPlan:
+ self.calls.append(messages)
+ return self._response
+
+
+class _DummyPlannerModel:
+ def __init__(self, response: research_planner.ResearchPlan) -> None:
+ self.schemas: list[type[research_planner.ResearchPlan]] = []
+ self.structured_model = _DummyStructuredModel(response)
+
+ def with_structured_output(
+ self,
+ schema: type[research_planner.ResearchPlan],
+ ) -> _DummyStructuredModel:
+ self.schemas.append(schema)
+ return self.structured_model
+
+
+@pytest.mark.asyncio
+async def test_research_planner_returns_structured_output(
+ monkeypatch: pytest.MonkeyPatch,
+) -> None:
+ planner_output = research_planner.ResearchPlan(
+ project_name="Arbitrum",
+ coin_ticker="arb",
+ plan="1. Recent news\n2. Project fundamentals\n3. Community activity",
+ news_queries=["Arbitrum latest news 2026", "Arbitrum partnership announcement"],
+ community_queries=["Arbitrum site:reddit.com", "Arbitrum twitter sentiment"],
+ )
+ model = _DummyPlannerModel(planner_output)
+ monkeypatch.setattr(research_planner, "get_chat_model", lambda: model)
+ search_coins = AsyncMock()
+ monkeypatch.setattr(research_planner, "search_coins", search_coins)
+
+ result = await research_planner.research_planner_node({"input": "Research Arbitrum"})
+
+ assert result["plan"] == planner_output.plan
+ assert result["project_name"] == "Arbitrum"
+ assert result["coin_ticker"] == "ARB"
+ assert result["news_queries"] == planner_output.news_queries
+ assert result["community_queries"] == planner_output.community_queries
+ search_coins.assert_not_awaited()
+ assert model.schemas == [research_planner.ResearchPlan]
+ assert len(model.structured_model.calls) == 1
+
+
+@pytest.mark.asyncio
+async def test_project_verifier_returns_verified_coin_id(
+ monkeypatch: pytest.MonkeyPatch,
+) -> None:
+ monkeypatch.setattr(
+ research_planner,
+ "search_coins",
+ AsyncMock(
+ return_value=json.dumps([{"id": "arbitrum", "name": "Arbitrum", "symbol": "ARB", "market_cap_rank": 40}])
+ ),
+ )
+
+ result = await research_planner.project_verifier_node(
+ {
+ "input": "Research Arbitrum",
+ "project_name": "Arbitrum",
+ "coin_ticker": "ARB",
+ }
+ )
+
+ assert result["coin_id"] == "arbitrum"
+ assert result["ambiguous_matches"] == []
+
+
+@pytest.mark.asyncio
+async def test_project_verifier_returns_ambiguous_matches_without_interrupting(
+ monkeypatch: pytest.MonkeyPatch,
+) -> None:
+ monkeypatch.setattr(
+ research_planner,
+ "search_coins",
+ AsyncMock(
+ return_value=json.dumps(
+ [
+ {"id": "mercury", "name": "Mercury", "symbol": "MER", "market_cap_rank": 999},
+ {"id": "mercury-wrapped", "name": "Mercury", "symbol": "WRAP", "market_cap_rank": 650},
+ ]
+ )
+ ),
+ )
+
+ result = await research_planner.project_verifier_node(
+ {
+ "input": "Research Mercury",
+ "project_name": "Mercury",
+ "coin_ticker": "",
+ }
+ )
+
+ assert result["coin_id"] == ""
+ assert len(result["ambiguous_matches"]) == 2
+
+
+@pytest.mark.asyncio
+async def test_project_selector_interrupts_until_valid_coin_is_selected(
+ monkeypatch: pytest.MonkeyPatch,
+) -> None:
+ interrupt_payloads: list[dict[str, object]] = []
+ responses = iter(
+ [
+ {"selected_coin_id": "not-a-real-coin"},
+ {"selected_coin_id": "mercury"},
+ ]
+ )
+
+ def fake_interrupt(payload: dict[str, object]) -> dict[str, str]:
+ interrupt_payloads.append(payload)
+ return next(responses)
+
+ monkeypatch.setattr(research_planner, "interrupt", fake_interrupt)
+
+ result = await research_planner.project_selector_node(
+ {
+ "input": "Research Mercury",
+ "project_name": "Mercury",
+ "coin_ticker": "",
+ "ambiguous_matches": [
+ {"coin_id": "mercury", "name": "Mercury", "symbol": "MER", "market_cap_rank": 999},
+ {"coin_id": "mercury-wrapped", "name": "Mercury", "symbol": "WRAP", "market_cap_rank": 650},
+ ],
+ }
+ )
+
+ assert result["coin_id"] == "mercury"
+ assert result["ambiguous_matches"] == []
+ assert len(interrupt_payloads) == 2
+ assert interrupt_payloads[0]["interrupt_type"] == "ambiguous_project"
+ assert "not valid" in str(interrupt_payloads[1]["message"])
+
+
+@pytest.mark.asyncio
+async def test_news_scanner_uses_planner_queries(monkeypatch: pytest.MonkeyPatch) -> None:
+ model = _DummyTextModel("Arbitrum announced Orbit chains. TVL exceeded $10B.")
+ monkeypatch.setattr(news_scanner, "get_chat_model", lambda: model)
+ run_search_queries = AsyncMock(
+ return_value=[
+ {"title": "Arbitrum news", "snippet": "Orbit chains launched", "link": "https://example.com/1"},
+ ]
+ )
+ monkeypatch.setattr(news_scanner, "run_search_queries", run_search_queries)
+
+ result = await news_scanner.news_scanner_node(
+ {
+ "input": "Research Arbitrum",
+ "project_name": "Arbitrum",
+ "coin_ticker": "ARB",
+ "news_queries": ["Arbitrum latest news 2026"],
+ }
+ )
+
+ assert "Orbit chains" in result["news"]
+ run_search_queries.assert_awaited_once_with(["Arbitrum latest news 2026"], "NewsScanner")
+ assert len(model.calls) == 1
+
+
+@pytest.mark.asyncio
+async def test_project_profiler_prefers_verified_coin_id(monkeypatch: pytest.MonkeyPatch) -> None:
+ model = _DummyTextModel("Arbitrum is an L2 optimistic rollup. Price: $1.23, Market cap: $4.5B")
+ monkeypatch.setattr(project_profiler, "get_chat_model", lambda: model)
+ monkeypatch.setattr(project_profiler, "search_coins", AsyncMock())
+ monkeypatch.setattr(
+ project_profiler,
+ "get_coin_info",
+ AsyncMock(return_value='{"name": "Arbitrum", "description": "L2 rollup", "developer_data": {"stars": 8000}}'),
+ )
+ monkeypatch.setattr(
+ project_profiler,
+ "get_coin_price",
+ AsyncMock(return_value='{"price": 1.23, "market_cap": 4500000000}'),
+ )
+
+ result = await project_profiler.project_profiler_node(
+ {
+ "input": "Research Arbitrum",
+ "project_name": "Arbitrum",
+ "coin_ticker": "ARB",
+ "coin_id": "arbitrum",
+ }
+ )
+
+ assert "L2 optimistic rollup" in result["profile"]
+ project_profiler.search_coins.assert_not_called()
+
+
+@pytest.mark.asyncio
+async def test_project_profiler_degrades_on_api_failure(monkeypatch: pytest.MonkeyPatch) -> None:
+ model = _DummyTextModel("Limited profile available due to data source issues.")
+ monkeypatch.setattr(project_profiler, "get_chat_model", lambda: model)
+
+ monkeypatch.setattr(
+ project_profiler,
+ "search_coins",
+ AsyncMock(side_effect=RuntimeError("CoinGecko down")),
+ )
+ monkeypatch.setattr(
+ project_profiler,
+ "get_coin_info",
+ AsyncMock(side_effect=RuntimeError("CoinGecko down")),
+ )
+ monkeypatch.setattr(
+ project_profiler,
+ "get_coin_price",
+ AsyncMock(side_effect=RuntimeError("CoinGecko down")),
+ )
+
+ result = await project_profiler.project_profiler_node(
+ {
+ "input": "Research Arbitrum",
+ "project_name": "Arbitrum",
+ "coin_ticker": "ARB",
+ }
+ )
+
+ assert "profile" in result
+ assert len(model.calls) == 1
+
+
+@pytest.mark.asyncio
+async def test_community_analyst_uses_social_search(monkeypatch: pytest.MonkeyPatch) -> None:
+ model = _DummyTextModel("Community Health: Strong. Active Reddit discussions and positive Twitter sentiment.")
+ monkeypatch.setattr(community_analyst, "get_chat_model", lambda: model)
+ run_search_queries = AsyncMock(
+ return_value=[
+ {"title": "Arbitrum Reddit", "snippet": "Great community", "link": "https://reddit.com/r/arbitrum/1"},
+ ]
+ )
+ monkeypatch.setattr(community_analyst, "run_search_queries", run_search_queries)
+
+ result = await community_analyst.community_analyst_node(
+ {
+ "input": "Research Arbitrum",
+ "project_name": "Arbitrum",
+ "coin_ticker": "ARB",
+ "community_queries": ["Arbitrum site:reddit.com"],
+ }
+ )
+
+ assert "Strong" in result["community"]
+ run_search_queries.assert_awaited_once_with(["Arbitrum site:reddit.com"], "CommunityAnalyst")
+ assert len(model.calls) == 1
+
+
+@pytest.mark.asyncio
+async def test_intelligence_compiler_produces_report(monkeypatch: pytest.MonkeyPatch) -> None:
+ model = _DummyTextModel("## Executive Summary\nArbitrum is a leading L2 scaling solution.")
+ monkeypatch.setattr(intelligence_compiler, "get_chat_model", lambda: model)
+
+ state = {
+ "input": "Research Arbitrum",
+ "project_name": "Arbitrum",
+ "coin_ticker": "ARB",
+ "plan": "1. News\n2. Profile\n3. Community",
+ "news": "Orbit chains launched",
+ "profile": "L2 rollup, $1.23, $4.5B mcap",
+ "community": "Strong community health",
+ }
+ result = await intelligence_compiler.intelligence_compiler_node(state)
+
+ assert "Executive Summary" in result["report"]
+ assert "L2 scaling solution" in result["report"]
diff --git a/examples/03-checkpoint-recovery/tests/unit/test_coingecko.py b/examples/03-checkpoint-recovery/tests/unit/test_coingecko.py
new file mode 100644
index 0000000..df649ee
--- /dev/null
+++ b/examples/03-checkpoint-recovery/tests/unit/test_coingecko.py
@@ -0,0 +1,72 @@
+"""Unit tests for the CoinGecko API client."""
+
+from __future__ import annotations
+
+import json
+from unittest.mock import AsyncMock, patch
+
+import pytest
+from src.coingecko import get_coin_info, get_coin_price, search_coins
+
+
+@pytest.mark.asyncio
+async def test_search_coins_returns_formatted_results() -> None:
+ mock_response = {
+ "coins": [
+ {"id": "arbitrum", "name": "Arbitrum", "symbol": "ARB", "market_cap_rank": 35},
+ {"id": "ethereum", "name": "Ethereum", "symbol": "ETH", "market_cap_rank": 2},
+ ]
+ }
+ with patch("src.coingecko._get", new_callable=AsyncMock, return_value=mock_response):
+ result = await search_coins("arbitrum")
+
+ data = json.loads(result)
+ assert len(data) == 2
+ assert data[0]["id"] == "arbitrum"
+ assert data[0]["symbol"] == "ARB"
+
+
+@pytest.mark.asyncio
+async def test_get_coin_info_extracts_key_fields() -> None:
+ mock_response = {
+ "name": "Arbitrum",
+ "symbol": "arb",
+ "description": {"en": "Arbitrum is a Layer 2 optimistic rollup."},
+ "categories": ["Layer 2", "Ethereum Ecosystem"],
+ "genesis_date": "2023-03-23",
+ "links": {
+ "homepage": ["https://arbitrum.io"],
+ "repos_url": {"github": ["https://github.com/OffchainLabs/nitro"]},
+ "twitter_screen_name": "arbitrum",
+ },
+ "community_data": {"twitter_followers": 500000},
+ "developer_data": {"commits_4_weeks": 120, "forks": 350, "stars": 8000, "pull_request_contributors": 0},
+ }
+ with patch("src.coingecko._get", new_callable=AsyncMock, return_value=mock_response):
+ result = await get_coin_info("arbitrum")
+
+ data = json.loads(result)
+ assert data["name"] == "Arbitrum"
+ assert "Layer 2" in data["categories"]
+ assert data["twitter"] == "arbitrum"
+ assert data["developer_data"]["commits_4_weeks"] == 120
+ assert "pull_request_contributors" not in data["developer_data"]
+
+
+@pytest.mark.asyncio
+async def test_get_coin_price_returns_market_data() -> None:
+ mock_response = {
+ "arbitrum": {
+ "usd": 1.23,
+ "usd_market_cap": 4500000000,
+ "usd_24h_vol": 350000000,
+ "usd_24h_change": 5.67,
+ }
+ }
+ with patch("src.coingecko._get", new_callable=AsyncMock, return_value=mock_response):
+ result = await get_coin_price("arbitrum")
+
+ data = json.loads(result)
+ assert data["price"] == 1.23
+ assert data["market_cap"] == 4500000000
+ assert data["change_24h_pct"] == 5.67
diff --git a/examples/03-checkpoint-recovery/tests/unit/test_mcp_tools.py b/examples/03-checkpoint-recovery/tests/unit/test_mcp_tools.py
new file mode 100644
index 0000000..391e06e
--- /dev/null
+++ b/examples/03-checkpoint-recovery/tests/unit/test_mcp_tools.py
@@ -0,0 +1,244 @@
+"""Unit tests for the Pattern 03 MCP tools (thread inspection via checkpoints)."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Any
+from unittest.mock import AsyncMock
+
+import pytest
+from src.mcp_servers import crypto_intelligence as mcp_module
+
+
+@dataclass
+class _FakeInterrupt:
+ value: dict[str, Any]
+
+
+@dataclass
+class _FakeTask:
+ id: str = "task-1"
+ name: str = "project_selector"
+ interrupts: list[_FakeInterrupt] = field(default_factory=list)
+
+
+@dataclass
+class _FakeStateSnapshot:
+ values: dict[str, Any]
+ next: tuple[str, ...]
+ config: dict[str, Any] = field(default_factory=lambda: {"configurable": {"checkpoint_id": "cp-1"}})
+ tasks: tuple[_FakeTask, ...] = ()
+ metadata: dict[str, Any] = field(default_factory=dict)
+
+
+class _FakeGraph:
+ def __init__(self, snapshots: dict[str, _FakeStateSnapshot | None] | None = None) -> None:
+ self._snapshots = snapshots or {}
+
+ async def aget_state(self, config: dict[str, Any]) -> _FakeStateSnapshot | None:
+ tid = config.get("configurable", {}).get("thread_id", "")
+ return self._snapshots.get(tid)
+
+
+class _FakeCheckpointer:
+ def __init__(self) -> None:
+ self.deleted: list[str] = []
+
+ async def adelete_thread(self, thread_id: str) -> None:
+ self.deleted.append(thread_id)
+
+
+@dataclass
+class _FakeRuntime:
+ graph: _FakeGraph
+ checkpointer: _FakeCheckpointer
+ pool: Any = None
+
+
+def _install_runtime(monkeypatch: pytest.MonkeyPatch, runtime: _FakeRuntime) -> None:
+ monkeypatch.setattr(mcp_module, "_runtime", runtime)
+
+
+# ---------------------------------------------------------------------------
+# get_research_status
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_get_status_completed(monkeypatch: pytest.MonkeyPatch) -> None:
+ snapshot = _FakeStateSnapshot(
+ values={
+ "input": "Research Arbitrum",
+ "project_name": "Arbitrum",
+ "coin_ticker": "ARB",
+ "coin_id": "arbitrum",
+ "report": "## Executive Summary\nArbitrum is a leading L2.",
+ },
+ next=(),
+ )
+ runtime = _FakeRuntime(
+ graph=_FakeGraph({"arb-thread": snapshot}),
+ checkpointer=_FakeCheckpointer(),
+ )
+ _install_runtime(monkeypatch, runtime)
+
+ result = await mcp_module.get_research_status("arb-thread")
+
+ assert "COMPLETED" in result
+ assert "Arbitrum" in result
+ assert "ARB" in result
+ assert "Executive Summary" in result
+
+
+@pytest.mark.asyncio
+async def test_get_status_interrupted(monkeypatch: pytest.MonkeyPatch) -> None:
+ interrupt_payload = {
+ "interrupt_type": "ambiguous_project",
+ "message": "Multiple CoinGecko matches found for Mercury.",
+ "project_name": "Mercury",
+ "coin_ticker": "",
+ "matches": [
+ {"coin_id": "mercury", "name": "Mercury", "symbol": "MER", "market_cap_rank": 999},
+ {"coin_id": "mercury-protocol", "name": "Mercury Protocol", "symbol": "GMT", "market_cap_rank": 650},
+ ],
+ }
+ snapshot = _FakeStateSnapshot(
+ values={"input": "Research Mercury", "project_name": "Mercury"},
+ next=("project_selector",),
+ tasks=(_FakeTask(interrupts=[_FakeInterrupt(value=interrupt_payload)]),),
+ )
+ runtime = _FakeRuntime(
+ graph=_FakeGraph({"merc-thread": snapshot}),
+ checkpointer=_FakeCheckpointer(),
+ )
+ _install_runtime(monkeypatch, runtime)
+
+ result = await mcp_module.get_research_status("merc-thread")
+
+ assert "INTERRUPTED" in result
+ assert "Mercury" in result
+ assert "mercury-protocol" in result
+ assert "selected_coin_id" in result
+
+
+@pytest.mark.asyncio
+async def test_get_status_resumable(monkeypatch: pytest.MonkeyPatch) -> None:
+ snapshot = _FakeStateSnapshot(
+ values={"input": "Research Solana", "project_name": "Solana"},
+ next=("project_profiler",),
+ )
+ runtime = _FakeRuntime(
+ graph=_FakeGraph({"sol-thread": snapshot}),
+ checkpointer=_FakeCheckpointer(),
+ )
+ _install_runtime(monkeypatch, runtime)
+
+ result = await mcp_module.get_research_status("sol-thread")
+
+ assert "RESUMABLE" in result
+ assert "project_profiler" in result
+ assert "sol-thread" in result
+
+
+@pytest.mark.asyncio
+async def test_get_status_not_found(monkeypatch: pytest.MonkeyPatch) -> None:
+ runtime = _FakeRuntime(
+ graph=_FakeGraph({}),
+ checkpointer=_FakeCheckpointer(),
+ )
+ _install_runtime(monkeypatch, runtime)
+
+ result = await mcp_module.get_research_status("no-such-thread")
+
+ assert "No research found" in result
+
+
+# ---------------------------------------------------------------------------
+# list_research_threads
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_list_threads_empty(monkeypatch: pytest.MonkeyPatch) -> None:
+ pool = AsyncMock()
+ pool.connection.return_value.__aenter__ = AsyncMock(side_effect=Exception("no table"))
+ runtime = _FakeRuntime(
+ graph=_FakeGraph({}),
+ checkpointer=_FakeCheckpointer(),
+ pool=pool,
+ )
+ _install_runtime(monkeypatch, runtime)
+
+ result = await mcp_module.list_research_threads()
+
+ assert "No research threads found" in result
+
+
+@pytest.mark.asyncio
+async def test_list_threads_with_data(monkeypatch: pytest.MonkeyPatch) -> None:
+ completed = _FakeStateSnapshot(
+ values={"input": "Research Arbitrum", "project_name": "Arbitrum"},
+ next=(),
+ )
+ interrupted = _FakeStateSnapshot(
+ values={"input": "Research Mercury", "project_name": "Mercury"},
+ next=("project_selector",),
+ tasks=(_FakeTask(interrupts=[_FakeInterrupt(value={"message": "pick"})]),),
+ )
+ graph = _FakeGraph({"arb-thread": completed, "merc-thread": interrupted})
+
+ monkeypatch.setattr(
+ mcp_module,
+ "_list_thread_ids",
+ AsyncMock(return_value=["arb-thread", "merc-thread"]),
+ )
+ runtime = _FakeRuntime(
+ graph=graph,
+ checkpointer=_FakeCheckpointer(),
+ )
+ _install_runtime(monkeypatch, runtime)
+
+ result = await mcp_module.list_research_threads()
+
+ assert "2 research thread" in result
+ assert "arb-thread" in result
+ assert "COMPLETED" in result
+ assert "merc-thread" in result
+ assert "INTERRUPTED" in result
+
+
+# ---------------------------------------------------------------------------
+# delete_research_thread
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_delete_thread_success(monkeypatch: pytest.MonkeyPatch) -> None:
+ snapshot = _FakeStateSnapshot(
+ values={"input": "Research Arbitrum"},
+ next=(),
+ )
+ checkpointer = _FakeCheckpointer()
+ runtime = _FakeRuntime(
+ graph=_FakeGraph({"arb-thread": snapshot}),
+ checkpointer=checkpointer,
+ )
+ _install_runtime(monkeypatch, runtime)
+
+ result = await mcp_module.delete_research_thread("arb-thread")
+
+ assert "deleted" in result.lower()
+ assert checkpointer.deleted == ["arb-thread"]
+
+
+@pytest.mark.asyncio
+async def test_delete_thread_not_found(monkeypatch: pytest.MonkeyPatch) -> None:
+ runtime = _FakeRuntime(
+ graph=_FakeGraph({}),
+ checkpointer=_FakeCheckpointer(),
+ )
+ _install_runtime(monkeypatch, runtime)
+
+ result = await mcp_module.delete_research_thread("no-such-thread")
+
+ assert "No research found" in result
diff --git a/examples/03-checkpoint-recovery/tests/unit/test_state.py b/examples/03-checkpoint-recovery/tests/unit/test_state.py
new file mode 100644
index 0000000..36ec24e
--- /dev/null
+++ b/examples/03-checkpoint-recovery/tests/unit/test_state.py
@@ -0,0 +1,42 @@
+"""Unit tests for Pattern 03 AgentState."""
+
+from __future__ import annotations
+
+from src.agents.state import AgentState
+
+
+def test_state_requires_input() -> None:
+ state: AgentState = {"input": "Research Arbitrum"}
+ assert state["input"] == "Research Arbitrum"
+
+
+def test_state_all_fields() -> None:
+ state: AgentState = {
+ "input": "Research Arbitrum",
+ "plan": "1. News\n2. Team",
+ "project_name": "Arbitrum",
+ "coin_ticker": "ARB",
+ "coin_id": "arbitrum",
+ "news_queries": ["Arbitrum latest news 2026"],
+ "community_queries": ["Arbitrum site:reddit.com"],
+ "ambiguous_matches": [
+ {
+ "coin_id": "arbitrum",
+ "name": "Arbitrum",
+ "symbol": "ARB",
+ "market_cap_rank": 40,
+ }
+ ],
+ "news": "Partnership announced",
+ "profile": "L2 scaling solution",
+ "community": "Strong community health",
+ "report": "## Executive Summary",
+ }
+ assert state["project_name"] == "Arbitrum"
+ assert state["coin_ticker"] == "ARB"
+ assert state["coin_id"] == "arbitrum"
+ assert state["news_queries"] == ["Arbitrum latest news 2026"]
+ assert state["community_queries"] == ["Arbitrum site:reddit.com"]
+ assert state["ambiguous_matches"][0]["coin_id"] == "arbitrum"
+ assert state["profile"] == "L2 scaling solution"
+ assert state["community"] == "Strong community health"
diff --git a/examples/03-checkpoint-recovery/tests/unit/test_web_search.py b/examples/03-checkpoint-recovery/tests/unit/test_web_search.py
new file mode 100644
index 0000000..2e1cc96
--- /dev/null
+++ b/examples/03-checkpoint-recovery/tests/unit/test_web_search.py
@@ -0,0 +1,64 @@
+"""Unit tests for the shared DuckDuckGo helper logic."""
+
+from __future__ import annotations
+
+from unittest.mock import AsyncMock
+
+import pytest
+from src.agents import web_search
+
+
+def test_deduplicate_results_removes_duplicate_urls() -> None:
+ raw_results = [
+ {"title": "A", "snippet": "one", "link": "https://example.com/a"},
+ {"title": "B", "snippet": "two", "link": "https://example.com/b"},
+ {"title": "A duplicate", "snippet": "three", "link": "https://example.com/a"},
+ ]
+
+ unique = web_search._deduplicate_results(raw_results)
+
+ assert len(unique) == 2
+ assert [item["link"] for item in unique] == [
+ "https://example.com/a",
+ "https://example.com/b",
+ ]
+
+
+@pytest.mark.asyncio
+async def test_run_search_queries_ignores_failed_query(monkeypatch: pytest.MonkeyPatch) -> None:
+ search = AsyncMock()
+ search.ainvoke = AsyncMock(
+ side_effect=[
+ [{"title": "A", "snippet": "one", "link": "https://example.com/a"}],
+ RuntimeError("search down"),
+ [{"title": "B", "snippet": "two", "link": "https://example.com/b"}],
+ ]
+ )
+ monkeypatch.setattr(web_search, "DuckDuckGoSearchResults", lambda **kwargs: search)
+
+ results = await web_search.run_search_queries(
+ ["query one", "query two", "query three"],
+ "NewsScanner",
+ )
+
+ assert len(results) == 2
+ assert {item["link"] for item in results} == {
+ "https://example.com/a",
+ "https://example.com/b",
+ }
+
+
+def test_format_search_results_returns_markdown_list() -> None:
+ formatted = web_search.format_search_results(
+ [
+ {"title": "A", "snippet": "one", "link": "https://example.com/a"},
+ {"title": "B", "snippet": "two", "link": "https://example.com/b"},
+ ]
+ )
+
+ assert "- [A](https://example.com/a): one" in formatted
+ assert "- [B](https://example.com/b): two" in formatted
+
+
+def test_format_search_results_handles_empty_results() -> None:
+ assert web_search.format_search_results([]) == "[No results found]"
diff --git a/libs/common/pyproject.toml b/libs/common/pyproject.toml
index 252fc38..8e24932 100644
--- a/libs/common/pyproject.toml
+++ b/libs/common/pyproject.toml
@@ -7,8 +7,10 @@ dependencies = [
"langsmith>=0.3",
"langchain-openai>=0.3",
"langchain-anthropic>=0.3",
+ "langgraph-checkpoint-postgres",
"pydantic>=2.0",
"pydantic-settings>=2.0",
+ "psycopg[binary,pool]",
]
[build-system]
diff --git a/libs/common/src/agent_common/__init__.py b/libs/common/src/agent_common/__init__.py
index 493cde1..da74112 100644
--- a/libs/common/src/agent_common/__init__.py
+++ b/libs/common/src/agent_common/__init__.py
@@ -2,12 +2,16 @@
from agent_common.config import Settings, get_settings
from agent_common.llm import get_chat_model
+from agent_common.persistence import close_checkpointer, create_postgres_pool, setup_checkpointer
from agent_common.tracing import build_langsmith_run_config, setup_tracing, verbose_log
__all__ = [
"Settings",
"get_settings",
"get_chat_model",
+ "create_postgres_pool",
+ "setup_checkpointer",
+ "close_checkpointer",
"setup_tracing",
"build_langsmith_run_config",
"verbose_log",
diff --git a/libs/common/src/agent_common/config.py b/libs/common/src/agent_common/config.py
index fae29b8..38dc930 100644
--- a/libs/common/src/agent_common/config.py
+++ b/libs/common/src/agent_common/config.py
@@ -27,6 +27,9 @@ class Settings(BaseSettings):
langsmith_project: str = "agent-patterns-lab"
langsmith_tracing: bool = True
+ # PostgreSQL persistence (Pattern 03+)
+ postgres_uri: str = ""
+
# Auth0 (Pattern 07+)
auth0_domain: str = ""
auth0_client_id: str = ""
diff --git a/libs/common/src/agent_common/persistence.py b/libs/common/src/agent_common/persistence.py
new file mode 100644
index 0000000..639a1d1
--- /dev/null
+++ b/libs/common/src/agent_common/persistence.py
@@ -0,0 +1,55 @@
+"""PostgreSQL helpers for LangGraph persistence patterns."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from langgraph.checkpoint.postgres.aio import AsyncPostgresSaver
+from psycopg.rows import dict_row
+from psycopg_pool import AsyncConnectionPool
+
+from agent_common.config import get_settings
+from agent_common.tracing import verbose_log
+
+_CHECKPOINTER_CONTEXTS: dict[int, Any] = {}
+
+
+async def create_postgres_pool(postgres_uri: str | None = None) -> AsyncConnectionPool[Any]:
+ """Create and open a PostgreSQL connection pool with dict rows."""
+ resolved_uri = postgres_uri or get_settings().postgres_uri
+ if not resolved_uri:
+ raise ValueError("POSTGRES_URI is required for PostgreSQL-backed persistence")
+
+ pool: AsyncConnectionPool[Any] = AsyncConnectionPool(
+ conninfo=resolved_uri,
+ kwargs={"autocommit": True, "row_factory": dict_row},
+ open=False,
+ )
+ await pool.open()
+ verbose_log("System", "PostgreSQL connection pool opened")
+ return pool
+
+
+async def setup_checkpointer(postgres_uri: str | None = None) -> AsyncPostgresSaver:
+ """Create and initialize the LangGraph PostgreSQL checkpointer."""
+ resolved_uri = postgres_uri or get_settings().postgres_uri
+ if not resolved_uri:
+ raise ValueError("POSTGRES_URI is required for PostgreSQL-backed persistence")
+
+ context_manager = AsyncPostgresSaver.from_conn_string(resolved_uri)
+ checkpointer = await context_manager.__aenter__()
+ _CHECKPOINTER_CONTEXTS[id(checkpointer)] = context_manager
+ await checkpointer.setup()
+ verbose_log("System", "LangGraph PostgreSQL checkpointer initialized")
+ return checkpointer
+
+
+async def close_checkpointer(checkpointer: AsyncPostgresSaver | None) -> None:
+ """Close a checkpointer created by `setup_checkpointer`."""
+ if checkpointer is None:
+ return
+
+ context_manager = _CHECKPOINTER_CONTEXTS.pop(id(checkpointer), None)
+ if context_manager is not None:
+ await context_manager.__aexit__(None, None, None)
+ verbose_log("System", "LangGraph PostgreSQL checkpointer closed")
diff --git a/uv.lock b/uv.lock
index 16eb12a..6a7dfde 100644
--- a/uv.lock
+++ b/uv.lock
@@ -8,6 +8,7 @@ members = [
"agent-patterns-lab",
"example-01-orchestrator-pipeline",
"example-02-mcp-tool-integration",
+ "example-03-checkpoint-recovery",
]
[[package]]
@@ -17,7 +18,9 @@ source = { editable = "libs/common" }
dependencies = [
{ name = "langchain-anthropic" },
{ name = "langchain-openai" },
+ { name = "langgraph-checkpoint-postgres" },
{ name = "langsmith" },
+ { name = "psycopg", extra = ["binary", "pool"] },
{ name = "pydantic" },
{ name = "pydantic-settings" },
]
@@ -26,7 +29,9 @@ dependencies = [
requires-dist = [
{ name = "langchain-anthropic", specifier = ">=0.3" },
{ name = "langchain-openai", specifier = ">=0.3" },
+ { name = "langgraph-checkpoint-postgres" },
{ name = "langsmith", specifier = ">=0.3" },
+ { name = "psycopg", extras = ["binary", "pool"] },
{ name = "pydantic", specifier = ">=2.0" },
{ name = "pydantic-settings", specifier = ">=2.0" },
]
@@ -536,6 +541,49 @@ requires-dist = [
{ name = "uvicorn", specifier = ">=0.34" },
]
+[[package]]
+name = "example-03-checkpoint-recovery"
+version = "0.1.0"
+source = { virtual = "examples/03-checkpoint-recovery" }
+dependencies = [
+ { name = "agent-common" },
+ { name = "ddgs" },
+ { name = "duckduckgo-search" },
+ { name = "fastapi" },
+ { name = "httpx" },
+ { name = "langchain-anthropic" },
+ { name = "langchain-community" },
+ { name = "langchain-core" },
+ { name = "langchain-openai" },
+ { name = "langgraph" },
+ { name = "langgraph-checkpoint-postgres" },
+ { name = "langsmith" },
+ { name = "mcp" },
+ { name = "psycopg", extra = ["binary", "pool"] },
+ { name = "pydantic" },
+ { name = "uvicorn" },
+]
+
+[package.metadata]
+requires-dist = [
+ { name = "agent-common", editable = "libs/common" },
+ { name = "ddgs", specifier = ">=7.0" },
+ { name = "duckduckgo-search", specifier = ">=8.0" },
+ { name = "fastapi", specifier = ">=0.115" },
+ { name = "httpx", specifier = ">=0.28" },
+ { name = "langchain-anthropic", specifier = ">=0.3" },
+ { name = "langchain-community", specifier = ">=0.3" },
+ { name = "langchain-core", specifier = ">=0.3" },
+ { name = "langchain-openai", specifier = ">=0.3" },
+ { name = "langgraph", specifier = ">=0.4" },
+ { name = "langgraph-checkpoint-postgres" },
+ { name = "langsmith", specifier = ">=0.3" },
+ { name = "mcp", specifier = ">=1.0" },
+ { name = "psycopg", extras = ["binary", "pool"] },
+ { name = "pydantic", specifier = ">=2.0" },
+ { name = "uvicorn", specifier = ">=0.34" },
+]
+
[[package]]
name = "fastapi"
version = "0.135.1"
@@ -909,6 +957,21 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/65/4c/09a4a0c42f5d2fc38d6c4d67884788eff7fd2cfdf367fdf7033de908b4c0/langgraph_checkpoint-4.0.1-py3-none-any.whl", hash = "sha256:e3adcd7a0e0166f3b48b8cf508ce0ea366e7420b5a73aa81289888727769b034", size = 50453, upload-time = "2026-02-27T21:06:14.293Z" },
]
+[[package]]
+name = "langgraph-checkpoint-postgres"
+version = "3.0.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "langgraph-checkpoint" },
+ { name = "orjson" },
+ { name = "psycopg" },
+ { name = "psycopg-pool" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/95/7a/8f439966643d32111248a225e6cb33a182d07c90de780c4dbfc1e0377832/langgraph_checkpoint_postgres-3.0.5.tar.gz", hash = "sha256:a8fd7278a63f4f849b5cbc7884a15ca8f41e7d5f7467d0a66b31e8c24492f7eb", size = 127856, upload-time = "2026-03-18T21:25:29.785Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/e8/87/b0f98b33a67204bca9d5619bcd9574222f6b025cf3c125eedcec9a50ecbc/langgraph_checkpoint_postgres-3.0.5-py3-none-any.whl", hash = "sha256:86d7040a88fd70087eaafb72251d796696a0a2d856168f5c11ef620771411552", size = 42907, upload-time = "2026-03-18T21:25:28.75Z" },
+]
+
[[package]]
name = "langgraph-prebuilt"
version = "1.0.8"
@@ -1375,6 +1438,56 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/5b/5a/bc7b4a4ef808fa59a816c17b20c4bef6884daebbdf627ff2a161da67da19/propcache-0.4.1-py3-none-any.whl", hash = "sha256:af2a6052aeb6cf17d3e46ee169099044fd8224cbaf75c76a2ef596e8163e2237", size = 13305, upload-time = "2025-10-08T19:49:00.792Z" },
]
+[[package]]
+name = "psycopg"
+version = "3.3.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "tzdata", marker = "sys_platform == 'win32'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/d3/b6/379d0a960f8f435ec78720462fd94c4863e7a31237cf81bf76d0af5883bf/psycopg-3.3.3.tar.gz", hash = "sha256:5e9a47458b3c1583326513b2556a2a9473a1001a56c9efe9e587245b43148dd9", size = 165624, upload-time = "2026-02-18T16:52:16.546Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/c8/5b/181e2e3becb7672b502f0ed7f16ed7352aca7c109cfb94cf3878a9186db9/psycopg-3.3.3-py3-none-any.whl", hash = "sha256:f96525a72bcfade6584ab17e89de415ff360748c766f0106959144dcbb38c698", size = 212768, upload-time = "2026-02-18T16:46:27.365Z" },
+]
+
+[package.optional-dependencies]
+binary = [
+ { name = "psycopg-binary", marker = "implementation_name != 'pypy'" },
+]
+pool = [
+ { name = "psycopg-pool" },
+]
+
+[[package]]
+name = "psycopg-binary"
+version = "3.3.3"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/a2/71/7a57e5b12275fe7e7d84d54113f0226080423a869118419c9106c083a21c/psycopg_binary-3.3.3-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:497852c5eaf1f0c2d88ab74a64a8097c099deac0c71de1cbcf18659a8a04a4b2", size = 4607368, upload-time = "2026-02-18T16:51:19.295Z" },
+ { url = "https://files.pythonhosted.org/packages/c7/04/cb834f120f2b2c10d4003515ef9ca9d688115b9431735e3936ae48549af8/psycopg_binary-3.3.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:258d1ea53464d29768bf25930f43291949f4c7becc706f6e220c515a63a24edd", size = 4687047, upload-time = "2026-02-18T16:51:23.84Z" },
+ { url = "https://files.pythonhosted.org/packages/40/e9/47a69692d3da9704468041aa5ed3ad6fc7f6bb1a5ae788d261a26bbca6c7/psycopg_binary-3.3.3-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:111c59897a452196116db12e7f608da472fbff000693a21040e35fc978b23430", size = 5487096, upload-time = "2026-02-18T16:51:29.645Z" },
+ { url = "https://files.pythonhosted.org/packages/0b/b6/0e0dd6a2f802864a4ae3dbadf4ec620f05e3904c7842b326aafc43e5f464/psycopg_binary-3.3.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:17bb6600e2455993946385249a3c3d0af52cd70c1c1cdbf712e9d696d0b0bf1b", size = 5168720, upload-time = "2026-02-18T16:51:36.499Z" },
+ { url = "https://files.pythonhosted.org/packages/6f/0d/977af38ac19a6b55d22dff508bd743fd7c1901e1b73657e7937c7cccb0a3/psycopg_binary-3.3.3-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:642050398583d61c9856210568eb09a8e4f2fe8224bf3be21b67a370e677eead", size = 6762076, upload-time = "2026-02-18T16:51:43.167Z" },
+ { url = "https://files.pythonhosted.org/packages/34/40/912a39d48322cf86895c0eaf2d5b95cb899402443faefd4b09abbba6b6e1/psycopg_binary-3.3.3-cp314-cp314-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:533efe6dc3a7cba5e2a84e38970786bb966306863e45f3db152007e9f48638a6", size = 4997623, upload-time = "2026-02-18T16:51:47.707Z" },
+ { url = "https://files.pythonhosted.org/packages/98/0c/c14d0e259c65dc7be854d926993f151077887391d5a081118907a9d89603/psycopg_binary-3.3.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:5958dbf28b77ce2033482f6cb9ef04d43f5d8f4b7636e6963d5626f000efb23e", size = 4532096, upload-time = "2026-02-18T16:51:51.421Z" },
+ { url = "https://files.pythonhosted.org/packages/39/21/8b7c50a194cfca6ea0fd4d1f276158307785775426e90700ab2eba5cd623/psycopg_binary-3.3.3-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:a6af77b6626ce92b5817bf294b4d45ec1a6161dba80fc2d82cdffdd6814fd023", size = 4208884, upload-time = "2026-02-18T16:51:57.336Z" },
+ { url = "https://files.pythonhosted.org/packages/c7/2c/a4981bf42cf30ebba0424971d7ce70a222ae9b82594c42fc3f2105d7b525/psycopg_binary-3.3.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:47f06fcbe8542b4d96d7392c476a74ada521c5aebdb41c3c0155f6595fc14c8d", size = 3944542, upload-time = "2026-02-18T16:52:04.266Z" },
+ { url = "https://files.pythonhosted.org/packages/60/e9/b7c29b56aa0b85a4e0c4d89db691c1ceef08f46a356369144430c155a2f5/psycopg_binary-3.3.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e7800e6c6b5dc4b0ca7cc7370f770f53ac83886b76afda0848065a674231e856", size = 4254339, upload-time = "2026-02-18T16:52:10.444Z" },
+ { url = "https://files.pythonhosted.org/packages/98/5a/291d89f44d3820fffb7a04ebc8f3ef5dda4f542f44a5daea0c55a84abf45/psycopg_binary-3.3.3-cp314-cp314-win_amd64.whl", hash = "sha256:165f22ab5a9513a3d7425ffb7fcc7955ed8ccaeef6d37e369d6cc1dff1582383", size = 3652796, upload-time = "2026-02-18T16:52:14.02Z" },
+]
+
+[[package]]
+name = "psycopg-pool"
+version = "3.3.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/56/9a/9470d013d0d50af0da9c4251614aeb3c1823635cab3edc211e3839db0bcf/psycopg_pool-3.3.0.tar.gz", hash = "sha256:fa115eb2860bd88fce1717d75611f41490dec6135efb619611142b24da3f6db5", size = 31606, upload-time = "2025-12-01T11:34:33.11Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/e7/c3/26b8a0908a9db249de3b4169692e1c7c19048a9bc41a4d3209cee7dbb758/psycopg_pool-3.3.0-py3-none-any.whl", hash = "sha256:2e44329155c410b5e8666372db44276a8b1ebd8c90f1c3026ebba40d4bc81063", size = 39995, upload-time = "2025-12-01T11:34:29.761Z" },
+]
+
[[package]]
name = "pycparser"
version = "3.0"
@@ -1867,6 +1980,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611, upload-time = "2025-10-01T02:14:40.154Z" },
]
+[[package]]
+name = "tzdata"
+version = "2025.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/5e/a7/c202b344c5ca7daf398f3b8a477eeb205cf3b6f32e7ec3a6bac0629ca975/tzdata-2025.3.tar.gz", hash = "sha256:de39c2ca5dc7b0344f2eba86f49d614019d29f060fc4ebc8a417896a620b56a7", size = 196772, upload-time = "2025-12-13T17:45:35.667Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/c7/b0/003792df09decd6849a5e39c28b513c06e84436a54440380862b5aeff25d/tzdata-2025.3-py2.py3-none-any.whl", hash = "sha256:06a47e5700f3081aab02b2e513160914ff0694bce9947d6b76ebd6bf57cfc5d1", size = 348521, upload-time = "2025-12-13T17:45:33.889Z" },
+]
+
[[package]]
name = "urllib3"
version = "2.6.3"