diff --git a/ddtrace/llmobs/_integrations/utils.py b/ddtrace/llmobs/_integrations/utils.py index 6c689fd26ff..1f4c8ad8955 100644 --- a/ddtrace/llmobs/_integrations/utils.py +++ b/ddtrace/llmobs/_integrations/utils.py @@ -39,6 +39,17 @@ logger = get_logger(__name__) +# The openai SDK uses `Omit`/`NotGiven` sentinels as defaults for unset request parameters. +# Callers (e.g. PydanticAI) may forward these sentinels explicitly, so filter them out of span +# metadata rather than serializing them to noisy repr strings. Identify them by class name instead +# of importing openai: this shared utils module is provider-agnostic, so it must not depend on a +# specific vendor SDK (which also avoids a circular import while ddtrace is patching openai). +_OPENAI_SENTINEL_TYPE_NAMES = ("Omit", "NotGiven") + + +def _is_openai_sentinel(value: Any) -> bool: + return type(value).__name__ in _OPENAI_SENTINEL_TYPE_NAMES + COMMON_METADATA_KEYS = ( "stream", @@ -572,7 +583,7 @@ def get_metadata_from_kwargs( keys_to_include += OPENAI_METADATA_CHAT_KEYS if operation == "chat" else OPENAI_METADATA_COMPLETION_KEYS elif integration_name == "litellm": keys_to_include += LITELLM_METADATA_CHAT_KEYS if operation == "chat" else LITELLM_METADATA_COMPLETION_KEYS - metadata = {k: load_data_value(v) for k, v in kwargs.items() if k in keys_to_include} + metadata = {k: load_data_value(v) for k, v in kwargs.items() if k in keys_to_include and not _is_openai_sentinel(v)} return metadata @@ -822,7 +833,13 @@ def openai_get_metadata_from_response( metadata = {} if kwargs: - metadata.update({k: v for k, v in kwargs.items() if k in OPENAI_METADATA_RESPONSE_KEYS + COMMON_METADATA_KEYS}) + metadata.update( + { + k: v + for k, v in kwargs.items() + if k in OPENAI_METADATA_RESPONSE_KEYS + COMMON_METADATA_KEYS and not _is_openai_sentinel(v) + } + ) if not response: return metadata diff --git a/releasenotes/notes/llmobs-filter-openai-omit-metadata-15533386303440c7.yaml b/releasenotes/notes/llmobs-filter-openai-omit-metadata-15533386303440c7.yaml new file mode 100644 index 00000000000..a8176b51488 --- /dev/null +++ b/releasenotes/notes/llmobs-filter-openai-omit-metadata-15533386303440c7.yaml @@ -0,0 +1,4 @@ +--- +fixes: + - | + LLM Observability: Resolves an issue where the OpenAI integration recorded unset request parameters (OpenAI SDK's ``Omit``/``NotGiven`` sentinel values) as noise in LLM span metadata. diff --git a/tests/contrib/openai/test_openai_llmobs.py b/tests/contrib/openai/test_openai_llmobs.py index 381f6298c2d..862ab8f840a 100644 --- a/tests/contrib/openai/test_openai_llmobs.py +++ b/tests/contrib/openai/test_openai_llmobs.py @@ -118,6 +118,44 @@ def test_chat_completion_unknown_provider(self, mock_completions_post, openai, o assert len(spans) == 1 assert get_llmobs_model_provider(spans[0]) == "unknown" + @mock.patch("openai._base_client.SyncAPIClient.post") + def test_chat_completion_filters_openai_sentinel_metadata( + self, mock_completions_post, openai, openai_llmobs, test_spans + ): + """openai.Omit / openai.NotGiven sentinels for unset params must not pollute span metadata. + + Frameworks like PydanticAI forward every chat-completion parameter explicitly, defaulting + any the caller didn't set to ``openai.omit``. Without filtering, these sentinels are + serialized into span metadata as noisy repr strings (e.g. ""), + making the field unqueryable. Regression test for MLOS-693. + """ + mock_completions_post.return_value = mock_openai_chat_completions_response + # Sentinel availability varies by openai version: NotGiven was exported starting in ~1.30, + # Omit only in openai>=2, and neither in very old clients (e.g. 1.0.0). Only construct and + # exercise the sentinels this installed version actually exposes; skip if it has none. + not_given_cls = getattr(openai, "NotGiven", None) + omit_cls = getattr(openai, "Omit", None) + sentinel_kwargs = {} + if not_given_cls is not None: + sentinel_kwargs["presence_penalty"] = not_given_cls() + sentinel_kwargs["seed"] = not_given_cls() + if omit_cls is not None: + sentinel_kwargs["temperature"] = omit_cls() + sentinel_kwargs["frequency_penalty"] = omit_cls() + if not sentinel_kwargs: + pytest.skip("installed openai exposes no Omit/NotGiven sentinel types") + client = openai.OpenAI(base_url="http://localhost:8000") + client.chat.completions.create( + model="gpt-3.5-turbo", + messages=multi_message_input, + top_p=0.9, + **sentinel_kwargs, + ) + spans = [s for trace in test_spans.pop_traces() for s in trace] + assert len(spans) == 1 + # Only the explicitly-set value should survive; the Omit/NotGiven sentinels are dropped. + assert get_llmobs_metadata(spans[0]) == {"top_p": 0.9} + @mock.patch("openai._base_client.SyncAPIClient.post") def test_provider_attribution_with_concurrent_openai_and_azure_clients( self, mock_completions_post, openai, azure_openai_config, openai_llmobs, test_spans