From dec99908f81fe57567cc71bdec30efe2dce8beda Mon Sep 17 00:00:00 2001 From: Jason Hwee <1216418+hweej@users.noreply.github.com> Date: Thu, 21 May 2026 10:09:54 -0400 Subject: [PATCH 1/8] feat(api): add nullable prompt_addendum column to Dataset Operator-curated free-prose note attached to each Dataset row. Threaded into the chat agent's system prompt in subsequent tasks so its answers are grounded in dataset-specific biological context. Migration is purely additive (nullable, default NULL); existing rows are unaffected and continue to behave identically. --- ...7f37d3cf8d2_add_dataset_prompt_addendum.py | 29 +++++++++++++++++++ .../api/src/cell_explorer_api/db/models.py | 1 + 2 files changed, 30 insertions(+) create mode 100644 packages/api/alembic/versions/27f37d3cf8d2_add_dataset_prompt_addendum.py diff --git a/packages/api/alembic/versions/27f37d3cf8d2_add_dataset_prompt_addendum.py b/packages/api/alembic/versions/27f37d3cf8d2_add_dataset_prompt_addendum.py new file mode 100644 index 0000000..05cc52f --- /dev/null +++ b/packages/api/alembic/versions/27f37d3cf8d2_add_dataset_prompt_addendum.py @@ -0,0 +1,29 @@ +"""add dataset prompt_addendum + +Revision ID: 27f37d3cf8d2 +Revises: 092e2aa153ce +Create Date: 2026-05-21 10:09:30.488887 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +import sqlmodel + + +# revision identifiers, used by Alembic. +revision: str = '27f37d3cf8d2' +down_revision: Union[str, Sequence[str], None] = '092e2aa153ce' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + op.add_column('datasets', sa.Column('prompt_addendum', sa.String(), nullable=True)) + + +def downgrade() -> None: + """Downgrade schema.""" + op.drop_column('datasets', 'prompt_addendum') diff --git a/packages/api/src/cell_explorer_api/db/models.py b/packages/api/src/cell_explorer_api/db/models.py index 2e31a51..8cd2d3a 100644 --- a/packages/api/src/cell_explorer_api/db/models.py +++ b/packages/api/src/cell_explorer_api/db/models.py @@ -55,6 +55,7 @@ class Dataset(SQLModel, table=True): description: str | None = None is_public: bool = Field(default=False) required_roles: list[str] = Field(default_factory=list, sa_column=Column(JSON)) + prompt_addendum: str | None = Field(default=None) chat_enabled: bool = Field(default=False) created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc)) updated_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc)) From 65af09faabcf32d84d1e85d40540da7f7ede4047 Mon Sep 17 00:00:00 2001 From: Jason Hwee <1216418+hweej@users.noreply.github.com> Date: Thu, 21 May 2026 10:15:07 -0400 Subject: [PATCH 2/8] fix(api): prompt_addendum column uses sa.Text per code review - sa.String -> sa.Text: prose addendum is open-ended free-form text; Text is the semantically correct type (PostgreSQL distinguishes, SQLite is storage-equivalent). - Drop unused 'import sqlmodel' from migration top (cargo-culted from template, never referenced). --- .../versions/27f37d3cf8d2_add_dataset_prompt_addendum.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/packages/api/alembic/versions/27f37d3cf8d2_add_dataset_prompt_addendum.py b/packages/api/alembic/versions/27f37d3cf8d2_add_dataset_prompt_addendum.py index 05cc52f..79d65ad 100644 --- a/packages/api/alembic/versions/27f37d3cf8d2_add_dataset_prompt_addendum.py +++ b/packages/api/alembic/versions/27f37d3cf8d2_add_dataset_prompt_addendum.py @@ -9,7 +9,6 @@ from alembic import op import sqlalchemy as sa -import sqlmodel # revision identifiers, used by Alembic. @@ -21,7 +20,7 @@ def upgrade() -> None: """Upgrade schema.""" - op.add_column('datasets', sa.Column('prompt_addendum', sa.String(), nullable=True)) + op.add_column('datasets', sa.Column('prompt_addendum', sa.Text(), nullable=True)) def downgrade() -> None: From 6458b2130529f9541fe4182528e34ec77a16a00f Mon Sep 17 00:00:00 2001 From: Jason Hwee <1216418+hweej@users.noreply.github.com> Date: Thu, 21 May 2026 10:17:32 -0400 Subject: [PATCH 3/8] feat(api): admin endpoints accept + return prompt_addendum MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DatasetCreate / DatasetUpdate / DatasetAdminResponse gain a new optional prompt_addendum field. POST stores it, PUT updates it (preserving other fields via the existing exclude_unset behavior), GET surfaces it. No frontend UI changes — operators edit via the admin API for now. --- .../api/src/cell_explorer_api/routes/admin.py | 6 ++ packages/api/tests/test_admin_routes.py | 80 +++++++++++++++++++ 2 files changed, 86 insertions(+) diff --git a/packages/api/src/cell_explorer_api/routes/admin.py b/packages/api/src/cell_explorer_api/routes/admin.py index 308503d..f38fe1a 100644 --- a/packages/api/src/cell_explorer_api/routes/admin.py +++ b/packages/api/src/cell_explorer_api/routes/admin.py @@ -59,6 +59,7 @@ class DatasetCreate(BaseModel): is_public: bool = False required_roles: list[str] = [] chat_enabled: bool = False + prompt_addendum: str | None = None class DatasetUpdate(BaseModel): @@ -68,6 +69,7 @@ class DatasetUpdate(BaseModel): is_public: bool | None = None required_roles: list[str] | None = None chat_enabled: bool | None = None + prompt_addendum: str | None = None class DatasetAdminResponse(BaseModel): @@ -80,6 +82,7 @@ class DatasetAdminResponse(BaseModel): is_public: bool required_roles: list[str] chat_enabled: bool + prompt_addendum: str | None class DatasetAdminListResponse(BaseModel): @@ -176,6 +179,7 @@ async def list_datasets_admin( is_public=dataset.is_public, required_roles=dataset.required_roles, chat_enabled=dataset.chat_enabled, + prompt_addendum=dataset.prompt_addendum, ) for dataset in result.all() ] @@ -210,6 +214,7 @@ async def create_dataset( is_public=dataset.is_public, required_roles=dataset.required_roles, chat_enabled=dataset.chat_enabled, + prompt_addendum=dataset.prompt_addendum, ) @@ -240,6 +245,7 @@ async def update_dataset( is_public=dataset.is_public, required_roles=dataset.required_roles, chat_enabled=dataset.chat_enabled, + prompt_addendum=dataset.prompt_addendum, ) diff --git a/packages/api/tests/test_admin_routes.py b/packages/api/tests/test_admin_routes.py index 3a6ed28..9d5c52f 100644 --- a/packages/api/tests/test_admin_routes.py +++ b/packages/api/tests/test_admin_routes.py @@ -302,3 +302,83 @@ def test_admin_create_dataset_chat_enabled_defaults_false(seeded_app): ) assert response.status_code == 201 assert response.json()["chat_enabled"] is False + + +# --- prompt_addendum field --- + + +def test_create_dataset_with_prompt_addendum(seeded_app): + """POST /admin/datasets stores prompt_addendum and surfaces it on response.""" + ds_id = seeded_app.state.test_datasource_id + client = TestClient(seeded_app) + response = client.post( + "/api/admin/datasets", + json={ + "datasource_id": ds_id, + "name": "Test", + "slug": "test-with-addendum", + "path": "test.zarr", + "prompt_addendum": "Important: cells were sorted on CD45 first.", + }, + headers=AUTH_HEADER, + ) + assert response.status_code == 201, response.text + body = response.json() + assert body["prompt_addendum"] == "Important: cells were sorted on CD45 first." + + +def test_update_dataset_prompt_addendum(seeded_app): + """PUT updates only prompt_addendum; other fields unchanged.""" + ds_id = seeded_app.state.test_datasource_id + client = TestClient(seeded_app) + # Create a dataset with no addendum first + create_resp = client.post( + "/api/admin/datasets", + json={ + "datasource_id": ds_id, + "name": "No-Addendum", + "slug": "no-addendum", + "path": "no-addendum.zarr", + }, + headers=AUTH_HEADER, + ) + assert create_resp.status_code == 201 + created = create_resp.json() + # Update only prompt_addendum + response = client.put( + "/api/admin/datasets/no-addendum", + json={"prompt_addendum": "Curator notes here."}, + headers=AUTH_HEADER, + ) + assert response.status_code == 200, response.text + body = response.json() + assert body["prompt_addendum"] == "Curator notes here." + # Other fields kept their previous values + assert body["name"] == created["name"] + assert body["path"] == created["path"] + + +def test_get_datasets_includes_prompt_addendum(seeded_app): + """GET /admin/datasets surfaces prompt_addendum on each row.""" + ds_id = seeded_app.state.test_datasource_id + client = TestClient(seeded_app) + # Create a dataset with an addendum + create_resp = client.post( + "/api/admin/datasets", + json={ + "datasource_id": ds_id, + "name": "Has-Addendum", + "slug": "has-addendum", + "path": "has-addendum.zarr", + "prompt_addendum": "Sample curator note.", + }, + headers=AUTH_HEADER, + ) + assert create_resp.status_code == 201 + response = client.get("/api/admin/datasets", headers=AUTH_HEADER) + assert response.status_code == 200 + body = response.json() + rows = {d["slug"]: d for d in body["datasets"]} + assert "has-addendum" in rows + assert "prompt_addendum" in rows["has-addendum"] + assert rows["has-addendum"]["prompt_addendum"] == "Sample curator note." From 507531a286ebf12a71d9795d54eaffead96f62b5 Mon Sep 17 00:00:00 2001 From: Jason Hwee <1216418+hweej@users.noreply.github.com> Date: Thu, 21 May 2026 10:22:46 -0400 Subject: [PATCH 4/8] fix(api): align prompt_addendum order with ORM + tighten test coverage - Reorder prompt_addendum above chat_enabled in DatasetCreate / DatasetUpdate / DatasetAdminResponse and the three response- construction call sites, matching the column order in db/models.py. - Add test_create_dataset_prompt_addendum_defaults_null asserting omitted prompt_addendum produces a null in the response. - Add baseline 'created prompt_addendum is None' assertion in test_update_dataset_prompt_addendum so the test fails closed if the create path ever started defaulting non-null. --- .../api/src/cell_explorer_api/routes/admin.py | 12 ++++++------ packages/api/tests/test_admin_routes.py | 19 +++++++++++++++++++ 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/packages/api/src/cell_explorer_api/routes/admin.py b/packages/api/src/cell_explorer_api/routes/admin.py index f38fe1a..bb3cc6e 100644 --- a/packages/api/src/cell_explorer_api/routes/admin.py +++ b/packages/api/src/cell_explorer_api/routes/admin.py @@ -58,8 +58,8 @@ class DatasetCreate(BaseModel): description: str | None = None is_public: bool = False required_roles: list[str] = [] - chat_enabled: bool = False prompt_addendum: str | None = None + chat_enabled: bool = False class DatasetUpdate(BaseModel): @@ -68,8 +68,8 @@ class DatasetUpdate(BaseModel): description: str | None = None is_public: bool | None = None required_roles: list[str] | None = None - chat_enabled: bool | None = None prompt_addendum: str | None = None + chat_enabled: bool | None = None class DatasetAdminResponse(BaseModel): @@ -81,8 +81,8 @@ class DatasetAdminResponse(BaseModel): description: str | None is_public: bool required_roles: list[str] - chat_enabled: bool prompt_addendum: str | None + chat_enabled: bool class DatasetAdminListResponse(BaseModel): @@ -178,8 +178,8 @@ async def list_datasets_admin( description=dataset.description, is_public=dataset.is_public, required_roles=dataset.required_roles, - chat_enabled=dataset.chat_enabled, prompt_addendum=dataset.prompt_addendum, + chat_enabled=dataset.chat_enabled, ) for dataset in result.all() ] @@ -213,8 +213,8 @@ async def create_dataset( description=dataset.description, is_public=dataset.is_public, required_roles=dataset.required_roles, - chat_enabled=dataset.chat_enabled, prompt_addendum=dataset.prompt_addendum, + chat_enabled=dataset.chat_enabled, ) @@ -244,8 +244,8 @@ async def update_dataset( description=dataset.description, is_public=dataset.is_public, required_roles=dataset.required_roles, - chat_enabled=dataset.chat_enabled, prompt_addendum=dataset.prompt_addendum, + chat_enabled=dataset.chat_enabled, ) diff --git a/packages/api/tests/test_admin_routes.py b/packages/api/tests/test_admin_routes.py index 9d5c52f..d34675c 100644 --- a/packages/api/tests/test_admin_routes.py +++ b/packages/api/tests/test_admin_routes.py @@ -327,6 +327,24 @@ def test_create_dataset_with_prompt_addendum(seeded_app): assert body["prompt_addendum"] == "Important: cells were sorted on CD45 first." +def test_create_dataset_prompt_addendum_defaults_null(seeded_app): + """prompt_addendum is null when not supplied on create.""" + ds_id = seeded_app.state.test_datasource_id + client = TestClient(seeded_app) + response = client.post( + "/api/admin/datasets", + json={ + "datasource_id": ds_id, + "name": "No-Addendum-Default", + "slug": "no-addendum-default", + "path": "no-addendum.zarr", + }, + headers=AUTH_HEADER, + ) + assert response.status_code == 201, response.text + assert response.json()["prompt_addendum"] is None + + def test_update_dataset_prompt_addendum(seeded_app): """PUT updates only prompt_addendum; other fields unchanged.""" ds_id = seeded_app.state.test_datasource_id @@ -344,6 +362,7 @@ def test_update_dataset_prompt_addendum(seeded_app): ) assert create_resp.status_code == 201 created = create_resp.json() + assert created["prompt_addendum"] is None # Update only prompt_addendum response = client.put( "/api/admin/datasets/no-addendum", From 099a8ecb740a321d9bbe2112f6a61230360f70e5 Mon Sep 17 00:00:00 2001 From: Jason Hwee <1216418+hweej@users.noreply.github.com> Date: Thu, 21 May 2026 10:25:08 -0400 Subject: [PATCH 5/8] feat(agent): thread prompt_addendum through DatasetContext MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DatasetContext gains an optional prompt_addendum field. build_dataset_context accepts it as a keyword arg; _build_dataset_context_cached (from #101) forwards it from the Dataset row. The cache key (slug, updated_at) already invalidates when admin PUT bumps updated_at, so changes to prompt_addendum take effect on the next request. Default is None for backward compatibility — datasets without curator notes produce a context byte-identical to before. --- .../services/chat_session.py | 1 + .../api/tests/services/test_chat_session.py | 37 +++++++++++++++++++ .../prompt/dataset_context.py | 9 ++++- 3 files changed, 46 insertions(+), 1 deletion(-) diff --git a/packages/api/src/cell_explorer_api/services/chat_session.py b/packages/api/src/cell_explorer_api/services/chat_session.py index e737157..e223dae 100644 --- a/packages/api/src/cell_explorer_api/services/chat_session.py +++ b/packages/api/src/cell_explorer_api/services/chat_session.py @@ -62,6 +62,7 @@ async def _build_dataset_context_cached( slug=dataset.slug, name=dataset.name, description=dataset.description or "", + prompt_addendum=dataset.prompt_addendum, ) return _dataset_ctx_cache[key] diff --git a/packages/api/tests/services/test_chat_session.py b/packages/api/tests/services/test_chat_session.py index 41772a7..f0f8801 100644 --- a/packages/api/tests/services/test_chat_session.py +++ b/packages/api/tests/services/test_chat_session.py @@ -358,6 +358,43 @@ async def _spy_build(*args, **kwargs): assert call_count == 2, f"build_dataset_context called {call_count}x; expected 2 after updated_at bump" +@pytest.mark.asyncio +async def test_make_chat_agent_forwards_prompt_addendum_to_dataset_context(): + """When the Dataset row has prompt_addendum set, make_chat_agent forwards + it to build_dataset_context (via _build_dataset_context_cached).""" + dataset = _public_dataset() + dataset.prompt_addendum = "Test curator note: cells were sorted on CD45." + datasource = MagicMock(base_url="https://example.com", type="HTTP_TOKEN", credential_ref=None) + db = await _mk_db_session(_make_db_row(dataset, datasource)) + + fake_anndata = MagicMock(n_obs=10, n_vars=20, obsm_keys=[], obs_columns=[]) + + captured: dict = {} + from cell_explorer_agent import build_dataset_context as _real + + async def _spy_build(*args, **kwargs): + captured.update(kwargs) + return await _real(*args, **kwargs) + + with patch("cell_explorer_api.services.chat_session.ZarrStore") as MockZS, \ + patch("cell_explorer_api.services.chat_session.AnnDataStore") as MockADS, \ + patch("cell_explorer_api.services.chat_session.StrataStore") as MockSS, \ + patch("cell_explorer_api.services.chat_session.build_dataset_context", _spy_build): + MockZS.open = AsyncMock(return_value=MagicMock()) + MockADS.open = AsyncMock(return_value=fake_anndata) + MockSS.open = AsyncMock(return_value=MagicMock()) + + user = _FakeUser(roles=[]) + llm = FakeLLMClient(scripts=[]) + settings = MagicMock() + agent = await make_chat_agent( + user=user, dataset_slug="pbmc3k", db=db, settings=settings, llm=llm, + ) + + assert captured.get("prompt_addendum") == "Test curator note: cells were sorted on CD45." + assert agent.dataset_ctx.prompt_addendum == "Test curator note: cells were sorted on CD45." + + @pytest.mark.asyncio async def test_dataset_ctx_cache_is_independent_per_slug(): """Two datasets with different slugs cache independently.""" diff --git a/packages/cell-explorer-agent/src/cell_explorer_agent/prompt/dataset_context.py b/packages/cell-explorer-agent/src/cell_explorer_agent/prompt/dataset_context.py index ca945e5..0ad12be 100644 --- a/packages/cell-explorer-agent/src/cell_explorer_agent/prompt/dataset_context.py +++ b/packages/cell-explorer-agent/src/cell_explorer_agent/prompt/dataset_context.py @@ -23,10 +23,16 @@ class DatasetContext: n_var: int obs_columns: list[ObsColumnInfo] embedding_keys: list[str] + prompt_addendum: str | None = None async def build_dataset_context( - z: ZarrAccess, *, slug: str, name: str, description: str + z: ZarrAccess, + *, + slug: str, + name: str, + description: str, + prompt_addendum: str | None = None, ) -> DatasetContext: n_obs, n_var = await z.shape() obs = await z.obs_columns() @@ -47,4 +53,5 @@ async def build_dataset_context( for c in obs ], embedding_keys=list(emb), + prompt_addendum=prompt_addendum, ) From 11eb1e07748c464e13c9c68a71bb6cd291283e43 Mon Sep 17 00:00:00 2001 From: Jason Hwee <1216418+hweej@users.noreply.github.com> Date: Thu, 21 May 2026 10:30:34 -0400 Subject: [PATCH 6/8] docs(api): clarify cache-key invariant covers prompt_addendum --- .../src/cell_explorer_api/services/chat_session.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/packages/api/src/cell_explorer_api/services/chat_session.py b/packages/api/src/cell_explorer_api/services/chat_session.py index e223dae..4be5f58 100644 --- a/packages/api/src/cell_explorer_api/services/chat_session.py +++ b/packages/api/src/cell_explorer_api/services/chat_session.py @@ -32,11 +32,13 @@ # In-process cache of DatasetContext keyed by (slug, updated_at). The Dataset -# row's updated_at bumps on every admin PUT (see routes/admin.py), so this -# self-invalidates without an explicit hook: admin edits change the key, -# subsequent requests miss the cache and rebuild. Process restart clears the -# cache (uvicorn --reload covers dev). Stale entries accumulate on edits but -# the leak is bounded by edit frequency and dataset count. +# row's updated_at bumps unconditionally on every admin PUT (see +# routes/admin.py update_dataset, line ~234), so this self-invalidates for any +# DB-sourced DatasetContext field — including prompt_addendum — without an +# explicit hook: admin edits change the key, subsequent requests miss the +# cache and rebuild. Process restart clears the cache (uvicorn --reload covers +# dev). Stale entries accumulate on edits but the leak is bounded by edit +# frequency and dataset count. # # See issue #101. _dataset_ctx_cache: dict[tuple[str, datetime], DatasetContext] = {} From 987f308f20ea65f85f9a4744b467f226660ae143 Mon Sep 17 00:00:00 2001 From: Jason Hwee <1216418+hweej@users.noreply.github.com> Date: Thu, 21 May 2026 10:32:55 -0400 Subject: [PATCH 7/8] feat(agent): inject curator-notes block into system prompt When DatasetContext.prompt_addendum is non-empty, build_system_prompt inserts a fenced === Curator notes === block between the description and shape lines so the agent treats operator-supplied context as authoritative. Whitespace-only and None values produce no output. --- .../src/cell_explorer_agent/prompt/system.py | 5 ++ .../tests/test_prompt_system.py | 56 +++++++++++++++++++ 2 files changed, 61 insertions(+) diff --git a/packages/cell-explorer-agent/src/cell_explorer_agent/prompt/system.py b/packages/cell-explorer-agent/src/cell_explorer_agent/prompt/system.py index bda9f15..4fe185a 100644 --- a/packages/cell-explorer-agent/src/cell_explorer_agent/prompt/system.py +++ b/packages/cell-explorer-agent/src/cell_explorer_agent/prompt/system.py @@ -24,6 +24,11 @@ def build_system_prompt(ctx: DatasetContext) -> str: lines.append(f"Dataset: {ctx.slug} — {ctx.name}") if ctx.description: lines.append(f"Description: {ctx.description}") + if ctx.prompt_addendum and ctx.prompt_addendum.strip(): + lines.append("") + lines.append("=== Curator notes (authoritative dataset context) ===") + lines.append(ctx.prompt_addendum) + lines.append("=== end curator notes ===") lines.append(f"Shape: {ctx.n_obs} cells × {ctx.n_var} genes.") lines.append("") lines.append("Obs columns:") diff --git a/packages/cell-explorer-agent/tests/test_prompt_system.py b/packages/cell-explorer-agent/tests/test_prompt_system.py index 20f3b57..e4ae144 100644 --- a/packages/cell-explorer-agent/tests/test_prompt_system.py +++ b/packages/cell-explorer-agent/tests/test_prompt_system.py @@ -1,3 +1,6 @@ +import pytest + + def test_system_prompt_mentions_chart_awareness(): """The agent is told to write concise summaries when a chart is present in the tool result, instead of re-enumerating every row.""" @@ -13,3 +16,56 @@ def test_system_prompt_mentions_chart_awareness(): assert "chart" in prompt.lower() # gene_panel_by_obs is registered in the tool-use policy. assert "gene_panel_by_obs" in prompt + + +def _minimal_ctx(**kwargs): + from cell_explorer_agent.prompt.dataset_context import DatasetContext, ObsColumnInfo + return DatasetContext( + slug="test-ds", + name="Test Dataset", + description="A test description.", + n_obs=1000, + n_var=200, + obs_columns=[ObsColumnInfo(name="cell_type", dtype="categorical", cardinality=5)], + embedding_keys=["X_umap"], + **kwargs, + ) + + +def test_build_system_prompt_includes_curator_notes_when_present(): + from cell_explorer_agent.prompt.system import build_system_prompt + addendum = "There is a subtle but important consideration: cells were sorted on CD45." + ctx = _minimal_ctx(prompt_addendum=addendum) + prompt = build_system_prompt(ctx) + + assert "=== Curator notes (authoritative dataset context) ===" in prompt + assert addendum in prompt + assert "=== end curator notes ===" in prompt + + # Opening fence appears before the shape line. + idx_fence = prompt.index("=== Curator notes (authoritative dataset context) ===") + idx_shape = prompt.index("Shape:") + assert idx_fence < idx_shape + + # Opening fence appears after description. + idx_desc = prompt.index("A test description.") + assert idx_desc < idx_fence + + +def test_build_system_prompt_omits_curator_notes_when_none(): + from cell_explorer_agent.prompt.system import build_system_prompt + ctx = _minimal_ctx(prompt_addendum=None) + prompt = build_system_prompt(ctx) + assert "Curator notes" not in prompt + + # Byte-identical to prompt built from a context without the field set (default None). + ctx2 = _minimal_ctx() + assert prompt == build_system_prompt(ctx2) + + +@pytest.mark.parametrize("addendum", ["", " \n \t "]) +def test_build_system_prompt_omits_curator_notes_when_empty(addendum): + from cell_explorer_agent.prompt.system import build_system_prompt + ctx = _minimal_ctx(prompt_addendum=addendum) + prompt = build_system_prompt(ctx) + assert "Curator notes" not in prompt From 702510462e94f23c81357cbcc2efcfd4fa2956ae Mon Sep 17 00:00:00 2001 From: Jason Hwee <1216418+hweej@users.noreply.github.com> Date: Thu, 21 May 2026 10:34:53 -0400 Subject: [PATCH 8/8] fix(agent): blank line after curator-notes closing fence --- .../src/cell_explorer_agent/prompt/system.py | 1 + packages/cell-explorer-agent/tests/test_prompt_system.py | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/packages/cell-explorer-agent/src/cell_explorer_agent/prompt/system.py b/packages/cell-explorer-agent/src/cell_explorer_agent/prompt/system.py index 4fe185a..d812ea5 100644 --- a/packages/cell-explorer-agent/src/cell_explorer_agent/prompt/system.py +++ b/packages/cell-explorer-agent/src/cell_explorer_agent/prompt/system.py @@ -29,6 +29,7 @@ def build_system_prompt(ctx: DatasetContext) -> str: lines.append("=== Curator notes (authoritative dataset context) ===") lines.append(ctx.prompt_addendum) lines.append("=== end curator notes ===") + lines.append("") lines.append(f"Shape: {ctx.n_obs} cells × {ctx.n_var} genes.") lines.append("") lines.append("Obs columns:") diff --git a/packages/cell-explorer-agent/tests/test_prompt_system.py b/packages/cell-explorer-agent/tests/test_prompt_system.py index e4ae144..c4df9cd 100644 --- a/packages/cell-explorer-agent/tests/test_prompt_system.py +++ b/packages/cell-explorer-agent/tests/test_prompt_system.py @@ -51,6 +51,10 @@ def test_build_system_prompt_includes_curator_notes_when_present(): idx_desc = prompt.index("A test description.") assert idx_desc < idx_fence + # The block is paragraph-isolated: a blank line follows the closing + # fence so it doesn't visually bleed into the shape metadata. + assert "=== end curator notes ===\n\nShape:" in prompt + def test_build_system_prompt_omits_curator_notes_when_none(): from cell_explorer_agent.prompt.system import build_system_prompt