Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 19 additions & 2 deletions ddtrace/llmobs/_integrations/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,17 @@

logger = get_logger(__name__)

# The openai SDK uses `Omit`/`NotGiven` sentinels as defaults for unset request parameters.
# Callers (e.g. PydanticAI) may forward these sentinels explicitly, so filter them out of span
# metadata rather than serializing them to noisy repr strings. Identify them by class name instead
# of importing openai: this shared utils module is provider-agnostic, so it must not depend on a
# specific vendor SDK (which also avoids a circular import while ddtrace is patching openai).
_OPENAI_SENTINEL_TYPE_NAMES = ("Omit", "NotGiven")


def _is_openai_sentinel(value: Any) -> bool:
return type(value).__name__ in _OPENAI_SENTINEL_TYPE_NAMES


COMMON_METADATA_KEYS = (
"stream",
Expand Down Expand Up @@ -572,7 +583,7 @@ def get_metadata_from_kwargs(
keys_to_include += OPENAI_METADATA_CHAT_KEYS if operation == "chat" else OPENAI_METADATA_COMPLETION_KEYS
elif integration_name == "litellm":
keys_to_include += LITELLM_METADATA_CHAT_KEYS if operation == "chat" else LITELLM_METADATA_COMPLETION_KEYS
metadata = {k: load_data_value(v) for k, v in kwargs.items() if k in keys_to_include}
metadata = {k: load_data_value(v) for k, v in kwargs.items() if k in keys_to_include and not _is_openai_sentinel(v)}
return metadata


Expand Down Expand Up @@ -822,7 +833,13 @@ def openai_get_metadata_from_response(
metadata = {}

if kwargs:
metadata.update({k: v for k, v in kwargs.items() if k in OPENAI_METADATA_RESPONSE_KEYS + COMMON_METADATA_KEYS})
metadata.update(
{
k: v
for k, v in kwargs.items()
if k in OPENAI_METADATA_RESPONSE_KEYS + COMMON_METADATA_KEYS and not _is_openai_sentinel(v)
}
)

if not response:
return metadata
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
fixes:
- |
LLM Observability: Resolves an issue where the OpenAI integration recorded unset request parameters (OpenAI SDK's ``Omit``/``NotGiven`` sentinel values) as noise in LLM span metadata.
38 changes: 38 additions & 0 deletions tests/contrib/openai/test_openai_llmobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,44 @@ def test_chat_completion_unknown_provider(self, mock_completions_post, openai, o
assert len(spans) == 1
assert get_llmobs_model_provider(spans[0]) == "unknown"

@mock.patch("openai._base_client.SyncAPIClient.post")
def test_chat_completion_filters_openai_sentinel_metadata(
self, mock_completions_post, openai, openai_llmobs, test_spans
):
"""openai.Omit / openai.NotGiven sentinels for unset params must not pollute span metadata.

Frameworks like PydanticAI forward every chat-completion parameter explicitly, defaulting
any the caller didn't set to ``openai.omit``. Without filtering, these sentinels are
serialized into span metadata as noisy repr strings (e.g. "<openai.Omit object at 0x...>"),
making the field unqueryable. Regression test for MLOS-693.
"""
mock_completions_post.return_value = mock_openai_chat_completions_response
# Sentinel availability varies by openai version: NotGiven was exported starting in ~1.30,
# Omit only in openai>=2, and neither in very old clients (e.g. 1.0.0). Only construct and
# exercise the sentinels this installed version actually exposes; skip if it has none.
not_given_cls = getattr(openai, "NotGiven", None)
omit_cls = getattr(openai, "Omit", None)
sentinel_kwargs = {}
if not_given_cls is not None:
sentinel_kwargs["presence_penalty"] = not_given_cls()
sentinel_kwargs["seed"] = not_given_cls()
if omit_cls is not None:
sentinel_kwargs["temperature"] = omit_cls()
sentinel_kwargs["frequency_penalty"] = omit_cls()
if not sentinel_kwargs:
pytest.skip("installed openai exposes no Omit/NotGiven sentinel types")
client = openai.OpenAI(base_url="http://localhost:8000")
client.chat.completions.create(
model="gpt-3.5-turbo",
messages=multi_message_input,
top_p=0.9,
**sentinel_kwargs,
)
spans = [s for trace in test_spans.pop_traces() for s in trace]
assert len(spans) == 1
# Only the explicitly-set value should survive; the Omit/NotGiven sentinels are dropped.
assert get_llmobs_metadata(spans[0]) == {"top_p": 0.9}

@mock.patch("openai._base_client.SyncAPIClient.post")
def test_provider_attribution_with_concurrent_openai_and_azure_clients(
self, mock_completions_post, openai, azure_openai_config, openai_llmobs, test_spans
Expand Down
Loading