Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ STUDY_AGENT_REWRITE_CONTAINER_HOSTS=0
#VOCAB_SEARCH_URL="http://127.0.0.1:30080/search
#VOCAB_SEARCH_QUERY_PREFIX="Instruction: retrieve the concepts most related to the query. Query: "
#PHOEBE_PROVIDER=
#PHOEBE_URL_TEMPLATE=
#PHOEBE_BULK_URL=

PHOEBE_RELATIONSHIP_IDS="Lexical via source,Patient context"
PHOEBE_MAX_CONCEPTS_PER_RELATIONSHIP=100
Expand All @@ -54,7 +54,7 @@ VOCAB_SEARCH_URL="https://hecate.pantheon-hds.com/api/search_standard"

# If you want to use hecate for phoebe
PHOEBE_PROVIDER=hecate_api
PHOEBE_URL_TEMPLATE="https://hecate.pantheon-hds.com/api/concepts/{concept_id}/phoebe"
PHOEBE_BULK_URL="https://hecate.pantheon-hds.com/api/concepts/phoebe/bulk"

# if you have concept_recommended -- see https://forums.ohdsi.org/t/phoebe-2-0/17410
#PHOEBE_PROVIDER=db
Expand Down
4 changes: 3 additions & 1 deletion docs/TESTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -824,9 +824,11 @@ Important:
export VOCAB_SEARCH_PROVIDER=hecate_api
export VOCAB_SEARCH_URL="https://hecate.pantheon-hds.com/api/search_standard"
export PHOEBE_PROVIDER=hecate_api
export PHOEBE_URL_TEMPLATE="https://hecate.pantheon-hds.com/api/concepts/{concept_id}/phoebe"
export PHOEBE_BULK_URL="https://hecate.pantheon-hds.com/api/concepts/phoebe/bulk"
```

The Hecate PHOEBE provider always uses the bulk endpoint and sends concept IDs in chunks of 100.

Run the flow:

```bash
Expand Down
73 changes: 55 additions & 18 deletions mcp_server/study_agent_mcp/tools/keeper_concept_sets.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,33 +369,69 @@ def _search_standard_via_generic_api(
}


def _phoebe_hecate_bulk_url() -> str:
configured = (os.getenv("PHOEBE_BULK_URL", "") or "").strip()
if configured:
return rewrite_container_host_url(configured)
legacy = (os.getenv("PHOEBE_URL_TEMPLATE", "") or "").strip()
if legacy:
raise RuntimeError("PHOEBE_URL_TEMPLATE is no longer supported for hecate_api; set PHOEBE_BULK_URL")
return rewrite_container_host_url("https://hecate.pantheon-hds.com/api/concepts/phoebe/bulk")


def _iter_phoebe_bulk_concepts(payload: Any) -> List[Dict[str, Any]]:
if isinstance(payload, list):
rows = payload
elif isinstance(payload, dict):
rows = payload.get("results")
else:
rows = []
related: List[Dict[str, Any]] = []
for row in rows or []:
if not isinstance(row, dict):
continue
source_concept_id = row.get("conceptId", row.get("concept_id", row.get("id")))
concepts = row.get("concepts")
if concepts is None:
concepts = row.get("results")
if not isinstance(concepts, list):
continue
for concept in _dedupe_concepts(concepts):
if concept.get("sourceConceptId") in (None, ""):
concept["sourceConceptId"] = source_concept_id
related.append(concept)
return related


def _phoebe_via_hecate_bulk(
concept_ids: List[int],
relationship_ids: List[str] | None,
timeout: int,
endpoint: str,
) -> List[Dict[str, Any]]:
relationships = set(relationship_ids or [])
related: List[Dict[str, Any]] = []
for offset in range(0, len(concept_ids), 100):
chunk = concept_ids[offset : offset + 100]
payload = _post_http_json(endpoint, {"ids": chunk}, timeout=timeout)
for concept in _iter_phoebe_bulk_concepts(payload):
if relationships and concept.get("relationshipId") not in relationships:
continue
related.append(concept)
return related


def _phoebe_via_hecate(concept_ids: List[int], relationship_ids: List[str] | None) -> Dict[str, Any]:
started = time.perf_counter()
timeout = int(os.getenv("PHOEBE_TIMEOUT", "30"))
endpoint_template = os.getenv(
"PHOEBE_URL_TEMPLATE",
"https://hecate.pantheon-hds.com/api/concepts/{concept_id}/phoebe",
)
endpoint_template = rewrite_container_host_url(endpoint_template)
relationships = set(relationship_ids or [])
related: List[Dict[str, Any]] = []
endpoint = _phoebe_hecate_bulk_url()
logger.debug(
"phoebe provider=hecate_api concept_ids=%s relationship_ids=%s timeout=%s",
len(concept_ids),
relationship_ids,
timeout,
)
for concept_id in concept_ids:
url = endpoint_template.format(concept_id=concept_id)
payload = _load_http_json(url, timeout=timeout)
if payload in (None, [], {}):
continue
concepts = payload if isinstance(payload, list) else payload.get("concepts") or []
for concept in _dedupe_concepts(concepts):
concept["sourceConceptId"] = concept_id
if relationships and concept.get("relationshipId") not in relationships:
continue
related.append(concept)
related = _phoebe_via_hecate_bulk(concept_ids, relationship_ids, timeout, endpoint)
raw_deduped = _dedupe_concepts(related)
filtered, controls = _apply_phoebe_expansion_controls(raw_deduped, relationship_ids)
logger.debug(
Expand All @@ -412,6 +448,7 @@ def _phoebe_via_hecate(concept_ids: List[int], relationship_ids: List[str] | Non
"concepts": filtered,
"count": len(filtered),
"provider": "hecate_api",
"url": endpoint,
"controls": controls,
"raw_count": len(raw_deduped),
}
Expand Down
146 changes: 135 additions & 11 deletions tests/test_keeper_concept_sets_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,22 @@ def test_phoebe_related_concepts_reports_unconfigured_provider(monkeypatch) -> N
assert result["count"] == 0


@pytest.mark.mcp
def test_phoebe_related_concepts_rejects_legacy_hecate_url_template(monkeypatch) -> None:
tools = _registered_tools()
monkeypatch.setenv("PHOEBE_PROVIDER", "hecate_api")
monkeypatch.delenv("PHOEBE_BULK_URL", raising=False)
monkeypatch.setenv("PHOEBE_URL_TEMPLATE", "https://example.test/api/concepts/{concept_id}/phoebe")

result = tools["phoebe_related_concepts"](
concept_ids=[1],
)

assert result["error"] == "phoebe_provider_failed"
assert result["count"] == 0
assert result["details"] == "PHOEBE_URL_TEMPLATE is no longer supported for hecate_api; set PHOEBE_BULK_URL"


@pytest.mark.mcp
def test_vocab_search_standard_hecate_provider(monkeypatch) -> None:
tools = _registered_tools()
Expand Down Expand Up @@ -186,25 +202,133 @@ def read(self):
return self._body

def fake_urlopen(request, timeout=30):
assert "/api/concepts/100/phoebe" in request.full_url
body = (
b'[{"concept_id":201,"concept_name":"Upper GI endoscopy","vocabulary_id":"SNOMED",'
b'"domain_id":"Procedure","concept_class_id":"Procedure","standard_concept":"S",'
b'"relationship_id":"Patient context"}]'
)
assert request.get_method() == "POST"
assert "/api/concepts/phoebe/bulk" in request.full_url
assert json.loads(request.data.decode("utf-8")) == {"ids": [4247297, 4116092]}
body = json.dumps(
[
{
"concept_id": 4116092,
"results": [
{
"relationship_id": "Lexical via standard",
"concept_id": 2110451,
"concept_name": (
"Craniectomy for excision of brain tumor, infratentorial or posterior fossa; "
"cerebellopontine angle tumor"
),
"vocabulary_id": "CPT4",
"record_count": 13380,
},
{
"relationship_id": "Ontology-parent",
"concept_id": 4311115,
"concept_name": "Intracranial mass",
"vocabulary_id": "SNOMED",
"record_count": 40790,
},
],
},
{
"concept_id": 4247297,
"results": [
{
"relationship_id": "Lexical via standard",
"concept_id": 435509,
"concept_name": "Intracranial abscess",
"vocabulary_id": "SNOMED",
"record_count": 366720,
},
{
"relationship_id": "Ontology-parent",
"concept_id": 40480849,
"concept_name": "Abscess of brain",
"vocabulary_id": "SNOMED",
"record_count": 1490,
},
],
},
]
).encode("utf-8")
return FakeResponse(body)

monkeypatch.setattr(keeper_concept_sets.urllib.request, "urlopen", fake_urlopen)
monkeypatch.setenv("PHOEBE_PROVIDER", "hecate_api")
result = tools["phoebe_related_concepts"](
concept_ids=[100],
relationship_ids=["Patient context"],
concept_ids=[4247297, 4116092],
relationship_ids=["Ontology-parent"],
)

assert result["count"] == 1
assert result["count"] == 2
assert result["provider"] == "hecate_api"
assert result["concepts"][0]["conceptId"] == 201
assert result["concepts"][0]["sourceConceptId"] == 100
assert result["concepts"][0]["conceptId"] == 4311115
assert result["concepts"][0]["sourceConceptId"] == 4116092
assert result["concepts"][1]["conceptId"] == 40480849
assert result["concepts"][1]["sourceConceptId"] == 4247297


@pytest.mark.mcp
def test_phoebe_related_concepts_hecate_provider_chunks_bulk_requests(monkeypatch) -> None:
tools = _registered_tools()
observed_payloads = []

class FakeResponse:
def __init__(self, body: bytes):
self._body = body

def __enter__(self):
return self

def __exit__(self, exc_type, exc, tb):
return False

def read(self):
return self._body

def fake_urlopen(request, timeout=30):
payload = json.loads(request.data.decode("utf-8"))
observed_payloads.append(payload)
source_id = payload["ids"][0]
if source_id == 4247297:
result = {
"concept_id": 435509,
"concept_name": "Intracranial abscess",
"vocabulary_id": "SNOMED",
"record_count": 366720,
"relationship_id": "Lexical via standard",
}
else:
result = {
"concept_id": 380055,
"concept_name": "Primary malignant neoplasm of brain",
"vocabulary_id": "SNOMED",
"record_count": 10257990,
"relationship_id": "Lexical via standard",
}
body = json.dumps(
[
{
"concept_id": source_id,
"results": [result],
}
]
).encode("utf-8")
return FakeResponse(body)

monkeypatch.setattr(keeper_concept_sets.urllib.request, "urlopen", fake_urlopen)
monkeypatch.setenv("PHOEBE_PROVIDER", "hecate_api")
concept_ids = [4247297] * 100 + [4116092]
result = tools["phoebe_related_concepts"](
concept_ids=concept_ids,
relationship_ids=["Lexical via standard"],
)

assert observed_payloads == [{"ids": [4247297] * 100}, {"ids": [4116092]}]
assert result["count"] == 2
assert result["concepts"][0]["conceptId"] == 435509
assert result["concepts"][0]["sourceConceptId"] == 4247297
assert result["concepts"][1]["conceptId"] == 380055
assert result["concepts"][1]["sourceConceptId"] == 4116092


@pytest.mark.mcp
Expand Down