OHDSI · azimov · May 4, 2026 · May 7, 2026 · May 13, 2026 · May 14, 2026
diff --git a/.github/workflows/basic_tests.yml b/.github/workflows/basic_tests.yml
@@ -11,7 +11,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [ "3.9", "3.10", "3.11", "3.12", "3.13", "3.14" ]
+        python-version: [ "3.10", "3.11", "3.12", "3.13", "3.14" ]
 
     steps:
       - name: Check out repository

diff --git a/.gitignore b/.gitignore
@@ -103,6 +103,7 @@ celerybeat.pid
 
 # Environments
 .env
+.Renviron
 .venv
 env/
 venv/
@@ -178,4 +179,12 @@ debug_app/user_overrides.json
 debug_app/test_results.json
 
 .test_baseline.json
-.test_final.json
+.test_final.json
+
+# Benchmark outputs (generated by running benchmarks)
+benchmark_output/
+circepy_benchmarks/
+renv.lock
+eunomia_data/
+renv/
+.Rprofile
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -32,6 +32,35 @@ git pre-commit run --all-files
 
 If pre-commit checks fail, fix the issues and re-run until they pass.
 
+## Ibis Execution Layer: NEVER use Python in-memory operations
+
+The datasets this software processes are large (often 100M+ rows). Operations that pull data into Python memory will crash the process. All data processing MUST remain as lazy ibis expressions executed on the database backend.
+
+### Forbidden patterns in production code (`circe/execution/` and `circe/cohort_definition_set/`):
+
+| Pattern | Example (NEVER do this) | Instead |
+|---|---|---|
+| `.execute()` | `table.execute()` loads entire table into a pandas DataFrame in memory | Compose ibis expressions; let the backend execute the full query |
+| `.to_pandas()` | `table.to_pandas()` pulls result set into Python | Use ibis expressions; only call `.execute()` for small scalars (e.g., `table.limit(1).count().execute()`) |
+| Python iteration over results | `for row in table.select(...).distinct().to_pandas().itertuples()` | Push aggregation/distinct into ibis; use window functions or joins |
+| `ibis.memtable()` with large DataFrames | Constructing a large `pd.DataFrame` and passing to `ibis.memtable()` | Read directly from the database table (passed tables already exist in the backend) |
+| Loading files into Python | `pd.read_csv(...)`, reading Parquet into memory | Use ibis to read files: `ibis.read_csv()`, `ibis.read_parquet()` |
+
+### Existing violations in production code (DO NOT FIX — examples for reference):
+
+1. **`circe/cohort_definition_set/_checksum_store.py`** — uses `pandas`, `.execute()`, `pd.DataFrame()`, row iteration — should use ibis expressions end-to-end
+2. **`circe/execution/engine/custom_era.py:86`** — `.execute().iloc[:, 0]` to pull concept IDs into a Python tuple
+3. **`circe/execution/engine/group_demographics.py:97`** — `.to_pandas().itertuples()` to iterate over distinct concept IDs
+4. **`circe/execution/ibis/operations.py:86`** — `.execute()` to check if rows exist (use `table.limit(1).count()` instead)
+5. **`benchmarks/compare_cohort_outputs.py`** — full table `.execute()`, pandas row iteration, set comparison in memory
+
+### Allowed uses of `.execute()`:
+
+- **Tests only** — tests run against small in-memory DuckDB databases with tiny fixtures. Assertions on small result sets are fine.
+- **Scalar values** — getting a single count or checking existence: `table.count().execute()`, `table.limit(1).execute()` (only returns 1 row)
+
+When writing new production code, if you find yourself reaching for `.execute()`, `.to_pandas()`, or Python iteration over ibis results, **stop** — the query can be rewritten as a lazy ibis expression.
+
 ## Git Workflow
 - Do not run `git commit` — the user will handle commits
 - Run pre-commit checks to validate code quality before marking tasks complete
diff --git a/circe/api.py b/circe/api.py
@@ -9,8 +9,16 @@
 - cohort_print_friendly(): Generate Markdown from cohort expression
 """
 
-from typing import TYPE_CHECKING, Any, Literal, Optional
-
+from typing import TYPE_CHECKING, Any, Literal
+
+from .cohort_definition_set import (  # noqa: F401
+    CohortDefinition,
+    CohortDefinitionSet,
+    CohortGenerationResult,
+    async_generate_cohort_set,
+    generate_cohort_set,
+    summarise_generation_results,
+)
 from .cohortdefinition import (
     BuildExpressionQueryOptions,
     CohortExpression,
@@ -114,7 +122,7 @@ def cohort_expression_from_yaml(yaml_str: str) -> CohortExpression:
 
 def build_cohort_query(
     expression: CohortExpression,
-    options: Optional[BuildExpressionQueryOptions] = None,
+    options: BuildExpressionQueryOptions | None = None,
 ) -> str:
     """Generate SQL query from a cohort expression.
 
@@ -147,8 +155,8 @@ def build_cohort(
     *,
     backend: IbisBackendLike,
     cdm_schema: str,
-    vocabulary_schema: Optional[str] = None,
-    results_schema: Optional[str] = None,
+    vocabulary_schema: str | None = None,
+    results_schema: str | None = None,
 ) -> Table:
     """Build a cohort as a relational table expression.
 
@@ -199,8 +207,8 @@ def write_cohort(
     cdm_schema: str,
     cohort_table: str,
     cohort_id: int,
-    vocabulary_schema: Optional[str] = None,
-    results_schema: Optional[str] = None,
+    vocabulary_schema: str | None = None,
+    results_schema: str | None = None,
     if_exists: Literal["fail", "replace"] = "fail",
 ) -> None:
     """Build and write an OHDSI cohort table.
@@ -260,14 +268,12 @@ def write_cohort(
 
 def cohort_print_friendly(
     expression: CohortExpression,
-    concept_sets: Optional[list[ConceptSet]] = None,
-    title: Optional[str] = None,
+    concept_sets: list[ConceptSet] | None = None,
+    title: str | None = None,
     include_concept_sets: bool = False,
 ) -> str:
     """Generate human-readable Markdown from a cohort expression.
 
-    This is equivalent to R CirceR's `cohortPrintFriendly()` function.
-
     Args:
         expression: CohortExpression instance
         concept_sets: Optional list of concept sets (uses expression.concept_sets if None)

diff --git a/circe/chat.py b/circe/chat.py