diff --git a/packages/opentelemetry-instrumentation-llamaindex/opentelemetry/instrumentation/llamaindex/_message_utils.py b/packages/opentelemetry-instrumentation-llamaindex/opentelemetry/instrumentation/llamaindex/_message_utils.py new file mode 100644 index 0000000000..d4fb0e6224 --- /dev/null +++ b/packages/opentelemetry-instrumentation-llamaindex/opentelemetry/instrumentation/llamaindex/_message_utils.py @@ -0,0 +1,174 @@ +"""Pure functions for building OTel GenAI semconv-compliant message JSON.""" + +import json +from typing import Any, Dict, List, Optional + +# Finish reason mapping: covers OpenAI, Cohere, Anthropic, Google Gemini. +# OTel spec uses "tool_call" (singular) — OpenAI's "tool_calls" (plural) must be mapped. +_FINISH_REASON_MAP = { + # OpenAI + "tool_calls": "tool_call", + "function_call": "tool_call", + # Cohere + "COMPLETE": "stop", + "MAX_TOKENS": "length", + "ERROR": "error", + "ERROR_TOXIC": "content_filter", + # Anthropic + "end_turn": "stop", + "stop_sequence": "stop", + "tool_use": "tool_call", + "max_tokens": "length", + # Google Gemini + "STOP": "stop", + "SAFETY": "content_filter", + "RECITATION": "content_filter", + "BLOCKLIST": "content_filter", + "PROHIBITED_CONTENT": "content_filter", + "SPII": "content_filter", + "FINISH_REASON_UNSPECIFIED": "error", + "OTHER": "error", +} + + +def map_finish_reason(reason: Optional[str]) -> Optional[str]: + """Map provider finish_reason to OTel enum value. + + Returns None if reason is None or empty (callers for top-level attr should omit). + Returns mapped OTel value or pass-through for unmapped values. + For per-message finish_reason, callers MUST apply fallback: + ``map_finish_reason(r) or ""``. + """ + if not reason: + return None + return _FINISH_REASON_MAP.get(reason, reason) + + +def _parse_arguments(arguments: Any) -> Any: + """Parse tool call arguments to an object. Best-effort json.loads with fallback.""" + if arguments is None: + return None + if isinstance(arguments, dict): + return arguments + if isinstance(arguments, str): + try: + return json.loads(arguments) + except (json.JSONDecodeError, ValueError): + return arguments + return arguments + + +def _content_to_parts(content: Any) -> List[Dict]: + """Convert LlamaIndex message content to OTel parts array. + + Handles: str/None → single TextPart or empty, list of content blocks → mapped by type. + """ + if content is None: + return [] + if isinstance(content, str): + return [{"type": "text", "content": content}] if content else [] + if isinstance(content, list): + return [_block_to_part(block) for block in content] + return [{"type": "text", "content": str(content)}] + + +def _block_to_part(block: Any) -> Dict: + """Convert a single content block to an OTel part dict.""" + if isinstance(block, str): + return {"type": "text", "content": block} + if not isinstance(block, dict): + return {"type": "text", "content": str(block)} + + block_type = block.get("type", "") + if block_type == "text": + return {"type": "text", "content": block.get("content", block.get("text", ""))} + if block_type in ("thinking", "reasoning"): + return {"type": "reasoning", "content": block.get("thinking", block.get("content", block.get("text", "")))} + if block_type == "image_url": + url = block.get("image_url", {}).get("url", "") + return {"type": "uri", "modality": "image", "uri": url} + if block_type == "image": + return _image_block_to_part(block) + + # Fallback: treat as text if it has recognizable content + if "text" in block: + return {"type": "text", "content": block["text"]} + if "content" in block: + return {"type": "text", "content": str(block["content"])} + return {"type": "text", "content": str(block)} + + +def _image_block_to_part(block: Dict) -> Dict: + """Convert an image content block to BlobPart or UriPart.""" + source = block.get("source", {}) + if source.get("type") == "base64": + return { + "type": "blob", + "modality": "image", + "mime_type": source.get("media_type", ""), + "content": source.get("data", ""), + } + if source.get("type") == "url": + return {"type": "uri", "modality": "image", "uri": source.get("url", "")} + return {"type": "text", "content": str(block)} + + +def _extract_tool_calls(msg: Any) -> List[Dict]: + """Extract tool_call parts from a LlamaIndex ChatMessage's additional_kwargs.""" + tool_calls = getattr(msg, "additional_kwargs", {}).get("tool_calls") or [] + parts = [] + for tc in tool_calls: + if not isinstance(tc, dict): + continue + func = tc.get("function", {}) + parts.append({ + "type": "tool_call", + "id": tc.get("id"), + "name": func.get("name", ""), + "arguments": _parse_arguments(func.get("arguments")), + }) + return parts + + +def build_input_messages(messages: Any) -> List[Dict]: + """Build OTel-compliant input messages from LlamaIndex ChatMessage list.""" + if not messages: + return [] + result = [] + for msg in messages: + role = msg.role.value if hasattr(msg.role, "value") else str(msg.role) + parts = _content_to_parts(msg.content) + + if role == "assistant": + parts.extend(_extract_tool_calls(msg)) + + if role == "tool": + parts = _maybe_wrap_tool_response(msg, parts) + + result.append({"role": role, "parts": parts}) + return result + + +def _maybe_wrap_tool_response(msg: Any, parts: List[Dict]) -> List[Dict]: + """Wrap content as tool_call_response for tool-role messages if tool_call_id present.""" + tool_call_id = getattr(msg, "additional_kwargs", {}).get("tool_call_id") + if not tool_call_id or not parts: + return parts + response_content = parts[0].get("content", "") if parts else "" + return [{"type": "tool_call_response", "id": tool_call_id, "response": response_content}] + + +def build_output_message(response_message: Any, finish_reason: Optional[str] = None) -> Dict: + """Build a single OTel-compliant output message from a LlamaIndex response message.""" + role = response_message.role.value if hasattr(response_message.role, "value") else "assistant" + parts = _content_to_parts(response_message.content) + parts.extend(_extract_tool_calls(response_message)) + fr = map_finish_reason(finish_reason) or "" + return {"role": role, "parts": parts, "finish_reason": fr} + + +def build_completion_output_message(text: str, finish_reason: Optional[str] = None) -> Dict: + """Build output message for text completion responses.""" + fr = map_finish_reason(finish_reason) or "" + parts = [{"type": "text", "content": text}] if text else [] + return {"role": "assistant", "parts": parts, "finish_reason": fr} diff --git a/packages/opentelemetry-instrumentation-llamaindex/opentelemetry/instrumentation/llamaindex/_response_utils.py b/packages/opentelemetry-instrumentation-llamaindex/opentelemetry/instrumentation/llamaindex/_response_utils.py new file mode 100644 index 0000000000..0a114e6af0 --- /dev/null +++ b/packages/opentelemetry-instrumentation-llamaindex/opentelemetry/instrumentation/llamaindex/_response_utils.py @@ -0,0 +1,220 @@ +"""Utilities for extracting structured data from LlamaIndex raw responses.""" + +from dataclasses import dataclass +from typing import Any, List, Optional + +from ._message_utils import map_finish_reason + +# Map LlamaIndex LLM class names to OTel well-known provider values. +_PROVIDER_MAP = { + "OpenAI": "openai", + "AzureOpenAI": "azure.ai.openai", + "Anthropic": "anthropic", + "Cohere": "cohere", + "Groq": "groq", + "MistralAI": "mistral_ai", + "Bedrock": "aws.bedrock", + "Gemini": "gcp.gemini", + "VertexAI": "gcp.vertex_ai", + "DeepSeek": "deepseek", + "Perplexity": "perplexity", +} + + +@dataclass +class TokenUsage: + input_tokens: Optional[int] = None + output_tokens: Optional[int] = None + total_tokens: Optional[int] = None + + +def detect_provider_name(instance_or_class_name: Any) -> Optional[str]: + """Detect OTel provider name from a LlamaIndex LLM instance or class name string. + + Returns OTel well-known value if available, otherwise lowercase class name. + Returns None if input is None. + """ + if instance_or_class_name is None: + return None + class_name = ( + instance_or_class_name + if isinstance(instance_or_class_name, str) + else instance_or_class_name.__class__.__name__ + ) + return _PROVIDER_MAP.get(class_name, class_name.lower()) + + +def extract_model_from_raw(raw: Any) -> Optional[str]: + """Extract model name from raw LLM response (object or dict).""" + if hasattr(raw, "model"): + return raw.model + if isinstance(raw, dict): + return raw.get("model") + return None + + +def extract_response_id(raw: Any) -> Optional[str]: + """Extract response ID from raw LLM response (object or dict).""" + if hasattr(raw, "id"): + return raw.id + if isinstance(raw, dict): + return raw.get("id") + return None + + +def extract_token_usage(raw: Any) -> TokenUsage: + """Extract token usage from raw response. Handles OpenAI, Cohere, and dict formats.""" + usage = _get_nested(raw, "usage") + if usage: + result = _extract_openai_usage(usage) + if result.input_tokens is not None: + return result + + meta = _get_nested(raw, "meta") + if meta: + return _extract_cohere_usage(meta) + + return TokenUsage() + + +def _get_nested(obj: Any, key: str) -> Any: + """Get a nested attribute or dict key from obj.""" + val = getattr(obj, key, None) + if val is not None: + return val + if isinstance(obj, dict): + return obj.get(key) + return None + + +def _extract_openai_usage(usage: Any) -> TokenUsage: + """Extract tokens from OpenAI-style usage object/dict.""" + if hasattr(usage, "completion_tokens"): + return TokenUsage( + input_tokens=usage.prompt_tokens, + output_tokens=usage.completion_tokens, + total_tokens=usage.total_tokens, + ) + if isinstance(usage, dict): + return TokenUsage( + input_tokens=usage.get("prompt_tokens"), + output_tokens=usage.get("completion_tokens"), + total_tokens=usage.get("total_tokens"), + ) + return TokenUsage() + + +def _extract_cohere_usage(meta: Any) -> TokenUsage: + """Extract tokens from Cohere-style meta.tokens or meta.billed_units.""" + tokens = _get_nested(meta, "tokens") + if tokens: + inp = _get_int(tokens, "input_tokens") + out = _get_int(tokens, "output_tokens") + if inp is not None: + return TokenUsage(input_tokens=inp, output_tokens=out, total_tokens=_safe_sum(inp, out)) + + billed = _get_nested(meta, "billed_units") + if billed: + inp = _get_int(billed, "input_tokens") + out = _get_int(billed, "output_tokens") + if inp is not None: + return TokenUsage(input_tokens=inp, output_tokens=out, total_tokens=_safe_sum(inp, out)) + + return TokenUsage() + + +def _get_int(obj: Any, key: str) -> Optional[int]: + """Get an integer attribute or dict key from obj.""" + val = getattr(obj, key, None) + if val is None and isinstance(obj, dict): + val = obj.get(key) + return int(val) if val is not None else None + + +def _safe_sum(a: Optional[int], b: Optional[int]) -> Optional[int]: + if a is not None and b is not None: + return a + b + return None + + +def extract_finish_reasons(raw: Any) -> List[str]: + """Extract and map finish reasons from raw LLM response. + + Handles OpenAI choices[], Google Gemini candidates[], Anthropic stop_reason, + Cohere finish_reason, and Ollama done_reason. + Returns empty list if no finish reason found. + """ + if raw is None: + return [] + + # OpenAI format: choices[].finish_reason + choices = _get_nested(raw, "choices") + if choices and isinstance(choices, (list, tuple)): + reasons = _collect_finish_reasons_from_choices(choices) + if reasons: + return reasons + + # Google Gemini format: candidates[].finish_reason + candidates = _get_nested(raw, "candidates") + if candidates and isinstance(candidates, (list, tuple)): + reasons = _collect_finish_reasons_from_candidates(candidates) + if reasons: + return reasons + + # Anthropic format: stop_reason + stop_reason = _get_nested(raw, "stop_reason") + if stop_reason and isinstance(stop_reason, str): + mapped = map_finish_reason(stop_reason) + if mapped: + return [mapped] + + # Cohere / generic: finish_reason (direct attr or in meta) + fr = _get_nested(raw, "finish_reason") + if fr and isinstance(fr, str): + mapped = map_finish_reason(fr) + if mapped: + return [mapped] + + # Ollama format: done_reason + done_reason = _get_nested(raw, "done_reason") + if done_reason and isinstance(done_reason, str): + mapped = map_finish_reason(done_reason) + if mapped: + return [mapped] + + return [] + + +def _collect_finish_reasons_from_choices(choices: Any) -> List[str]: + """Collect mapped finish reasons from an OpenAI-style choices array.""" + reasons = [] + try: + for choice in choices: + fr = getattr(choice, "finish_reason", None) + if fr is None and isinstance(choice, dict): + fr = choice.get("finish_reason") + mapped = map_finish_reason(fr) + if mapped: + reasons.append(mapped) + except (TypeError, StopIteration): + pass + return reasons + + +def _collect_finish_reasons_from_candidates(candidates: Any) -> List[str]: + """Collect mapped finish reasons from a Google Gemini-style candidates array.""" + reasons = [] + try: + for candidate in candidates: + fr = getattr(candidate, "finish_reason", None) + if fr is None and isinstance(candidate, dict): + fr = candidate.get("finish_reason") + # Gemini finish_reason may be an enum; convert to string name + if fr is not None and not isinstance(fr, str): + fr = fr.name if hasattr(fr, "name") else str(fr) + mapped = map_finish_reason(fr) + if mapped: + reasons.append(mapped) + except (TypeError, StopIteration): + pass + return reasons diff --git a/packages/opentelemetry-instrumentation-llamaindex/opentelemetry/instrumentation/llamaindex/custom_llm_instrumentor.py b/packages/opentelemetry-instrumentation-llamaindex/opentelemetry/instrumentation/llamaindex/custom_llm_instrumentor.py index 00a68a2e44..a3a812efd3 100644 --- a/packages/opentelemetry-instrumentation-llamaindex/opentelemetry/instrumentation/llamaindex/custom_llm_instrumentor.py +++ b/packages/opentelemetry-instrumentation-llamaindex/opentelemetry/instrumentation/llamaindex/custom_llm_instrumentor.py @@ -1,4 +1,5 @@ import importlib +import json import pkgutil from wrapt import wrap_function_wrapper @@ -7,10 +8,22 @@ from opentelemetry import context as context_api from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY +from opentelemetry.instrumentation.llamaindex._message_utils import ( + build_completion_output_message, + build_input_messages, + build_output_message, +) +from opentelemetry.instrumentation.llamaindex._response_utils import ( + detect_provider_name, + extract_finish_reasons, + extract_model_from_raw, + extract_response_id, + extract_token_usage, +) from opentelemetry.semconv._incubating.attributes import ( gen_ai_attributes as GenAIAttributes, ) -from opentelemetry.semconv_ai import SpanAttributes, LLMRequestTypeValues +from opentelemetry.semconv_ai import LLMRequestTypeValues, SpanAttributes from opentelemetry.instrumentation.llamaindex.utils import ( _with_tracer_wrapper, dont_throw, @@ -145,45 +158,61 @@ async def acomplete_wrapper(tracer, wrapped, instance: CustomLLM, args, kwargs): @dont_throw def _handle_request(span, llm_request_type, args, kwargs, instance: CustomLLM): - _set_span_attribute(span, GenAIAttributes.GEN_AI_SYSTEM, instance.__class__.__name__) - _set_span_attribute(span, SpanAttributes.LLM_REQUEST_TYPE, llm_request_type.value) - _set_span_attribute( - span, GenAIAttributes.GEN_AI_REQUEST_MODEL, instance.metadata.model_name - ) - _set_span_attribute( - span, GenAIAttributes.GEN_AI_REQUEST_MAX_TOKENS, instance.metadata.context_window - ) - _set_span_attribute( - span, GenAIAttributes.GEN_AI_REQUEST_TOP_P, instance.metadata.num_output - ) + op_name = "chat" if llm_request_type == LLMRequestTypeValues.CHAT else "text_completion" + _set_span_attribute(span, GenAIAttributes.GEN_AI_OPERATION_NAME, op_name) + _set_span_attribute(span, GenAIAttributes.GEN_AI_PROVIDER_NAME, detect_provider_name(instance)) + _set_span_attribute(span, GenAIAttributes.GEN_AI_REQUEST_MODEL, instance.metadata.model_name) + _set_span_attribute(span, GenAIAttributes.GEN_AI_REQUEST_MAX_TOKENS, instance.metadata.num_output) if should_send_prompts(): - # TODO: add support for chat - if llm_request_type == LLMRequestTypeValues.COMPLETION: - if len(args) > 0: - prompt = args[0] - _set_span_attribute( - span, - f"{GenAIAttributes.GEN_AI_PROMPT}.0.user", - prompt[0] if isinstance(prompt, list) else prompt, - ) - - return + if llm_request_type == LLMRequestTypeValues.CHAT and args: + messages = args[0] + if messages: + msgs = build_input_messages(messages) + span.set_attribute(GenAIAttributes.GEN_AI_INPUT_MESSAGES, json.dumps(msgs)) + elif llm_request_type == LLMRequestTypeValues.COMPLETION and args: + prompt = args[0] + text = prompt[0] if isinstance(prompt, list) else prompt + msg = [{"role": "user", "parts": [{"type": "text", "content": text}]}] + span.set_attribute(GenAIAttributes.GEN_AI_INPUT_MESSAGES, json.dumps(msg)) + + tools = kwargs.get("tools") + if tools: + span.set_attribute(GenAIAttributes.GEN_AI_TOOL_DEFINITIONS, json.dumps(tools)) @dont_throw def _handle_response(span, llm_request_type, instance, response): - _set_span_attribute( - span, GenAIAttributes.GEN_AI_RESPONSE_MODEL, instance.metadata.model_name - ) + raw = getattr(response, "raw", None) - if should_send_prompts(): - if llm_request_type == LLMRequestTypeValues.COMPLETION: - _set_span_attribute( - span, f"{GenAIAttributes.GEN_AI_COMPLETION}.0.content", response.text - ) + response_model = extract_model_from_raw(raw) if raw else None + _set_span_attribute(span, GenAIAttributes.GEN_AI_RESPONSE_MODEL, response_model or instance.metadata.model_name) - return + if raw: + response_id = extract_response_id(raw) + if response_id: + _set_span_attribute(span, GenAIAttributes.GEN_AI_RESPONSE_ID, response_id) + + usage = extract_token_usage(raw) + if usage.input_tokens is not None: + span.set_attribute(GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS, int(usage.input_tokens)) + if usage.output_tokens is not None: + span.set_attribute(GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS, int(usage.output_tokens)) + if usage.total_tokens is not None: + span.set_attribute(SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS, int(usage.total_tokens)) + + # CRITICAL: finish_reasons is NOT gated by should_send_prompts() + reasons = extract_finish_reasons(raw) if raw else [] + if reasons: + span.set_attribute(GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS, reasons) + + if should_send_prompts(): + fr = reasons[0] if reasons else None + if llm_request_type == LLMRequestTypeValues.CHAT and hasattr(response, "message"): + output_msg = build_output_message(response.message, finish_reason=fr) + else: + output_msg = build_completion_output_message(response.text, finish_reason=fr) + span.set_attribute(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES, json.dumps([output_msg])) def snake_case_class_name(instance): diff --git a/packages/opentelemetry-instrumentation-llamaindex/opentelemetry/instrumentation/llamaindex/dispatcher_wrapper.py b/packages/opentelemetry-instrumentation-llamaindex/opentelemetry/instrumentation/llamaindex/dispatcher_wrapper.py index 2e80cafd52..9b72bac630 100644 --- a/packages/opentelemetry-instrumentation-llamaindex/opentelemetry/instrumentation/llamaindex/dispatcher_wrapper.py +++ b/packages/opentelemetry-instrumentation-llamaindex/opentelemetry/instrumentation/llamaindex/dispatcher_wrapper.py @@ -30,6 +30,9 @@ from llama_index.core.instrumentation.span_handlers import BaseSpanHandler from llama_index.core.workflow import Workflow from opentelemetry import context as context_api +from opentelemetry.instrumentation.llamaindex._response_utils import ( + detect_provider_name, +) from opentelemetry.instrumentation.llamaindex.event_emitter import ( emit_chat_message_events, emit_chat_response_events, @@ -85,6 +88,7 @@ class SpanHolder: token: Optional[Any] = None context: Optional[context_api.context.Context] = None waiting_for_streaming: bool = field(init=False, default=False) + provider_name: Optional[str] = field(init=False, default=None) _active: bool = field(init=False, default=True) @@ -120,8 +124,9 @@ def update_span_for_event(self, event: BaseEvent): @update_span_for_event.register def _(self, event: LLMChatStartEvent): set_llm_chat_request_model_attributes(event, self.otel_span) + self.provider_name = detect_provider_name(event.model_dict.get("class_name")) if should_emit_events(): - emit_chat_message_events(event) + emit_chat_message_events(event, provider_name=self.provider_name) else: set_llm_chat_request(event, self.otel_span) @@ -129,7 +134,7 @@ def _(self, event: LLMChatStartEvent): def _(self, event: LLMChatEndEvent): set_llm_chat_response_model_attributes(event, self.otel_span) if should_emit_events(): - emit_chat_response_events(event) + emit_chat_response_events(event, provider_name=self.provider_name) else: set_llm_chat_response(event, self.otel_span) # noqa: F821 @@ -144,9 +149,11 @@ def _(self, event: EmbeddingStartEvent): @update_span_for_event.register def _(self, event: ReRankStartEvent): + if self.provider_name is None and self.parent is not None: + self.provider_name = self.parent.provider_name set_rerank_model_attributes(event, self.otel_span) if should_emit_events(): - emit_rerank_message_event(event) + emit_rerank_message_event(event, provider_name=self.provider_name) else: set_rerank(event, self.otel_span) diff --git a/packages/opentelemetry-instrumentation-llamaindex/opentelemetry/instrumentation/llamaindex/event_emitter.py b/packages/opentelemetry-instrumentation-llamaindex/opentelemetry/instrumentation/llamaindex/event_emitter.py index 3e8a7c29ff..24a4d2b799 100644 --- a/packages/opentelemetry-instrumentation-llamaindex/opentelemetry/instrumentation/llamaindex/event_emitter.py +++ b/packages/opentelemetry-instrumentation-llamaindex/opentelemetry/instrumentation/llamaindex/event_emitter.py @@ -1,6 +1,6 @@ from dataclasses import asdict from enum import Enum -from typing import Union +from typing import Optional, Union from llama_index.core.instrumentation.events.llm import ( LLMChatEndEvent, @@ -17,6 +17,7 @@ should_emit_events, should_send_prompts, ) +from opentelemetry.instrumentation.llamaindex._response_utils import extract_finish_reasons from opentelemetry.semconv._incubating.attributes import ( gen_ai_attributes as GenAIAttributes, ) @@ -34,41 +35,40 @@ class Roles(Enum): VALID_MESSAGE_ROLES = {role.value for role in Roles} """The valid roles for naming the message event.""" -EVENT_ATTRIBUTES = {GenAIAttributes.GEN_AI_SYSTEM: "llamaindex"} -"""The attributes to be used for the event.""" +def _event_attributes(provider_name: Optional[str] = None) -> dict: + """Build event attributes with the actual LLM provider name.""" + return {GenAIAttributes.GEN_AI_PROVIDER_NAME: provider_name or "llamaindex"} -def emit_chat_message_events(event: LLMChatStartEvent): + +def emit_chat_message_events(event: LLMChatStartEvent, provider_name: Optional[str] = None): for message in event.messages: - emit_event(MessageEvent(content=message.content, role=message.role.value)) + emit_event(MessageEvent(content=message.content, role=message.role.value), provider_name=provider_name) -def emit_chat_response_events(event: LLMChatEndEvent): +def emit_chat_response_events(event: LLMChatEndEvent, provider_name: Optional[str] = None): if event.response: - try: - finish_reason = event.response.raw.get("choices", [{}])[0].get( - "finish_reason", "unknown" - ) - except (AttributeError, ValueError): - finish_reason = "unknown" + reasons = extract_finish_reasons(event.response.raw) if event.response.raw else [] + finish_reason = reasons[0] if reasons else "" emit_choice_event( index=0, content=event.response.message.content, role=event.response.message.role.value, finish_reason=finish_reason, + provider_name=provider_name, ) -def emit_rerank_message_event(event: ReRankStartEvent): +def emit_rerank_message_event(event: ReRankStartEvent, provider_name: Optional[str] = None): if event.query: if isinstance(event.query, str): - emit_message_event(content=event.query, role="user") + emit_message_event(content=event.query, role="user", provider_name=provider_name) else: - emit_message_event(content=event.query.query_str, role="user") + emit_message_event(content=event.query.query_str, role="user", provider_name=provider_name) -def emit_message_event(*, content, role: str): - emit_event(MessageEvent(content=content, role=role)) +def emit_message_event(*, content, role: str, provider_name: Optional[str] = None): + emit_event(MessageEvent(content=content, role=role), provider_name=provider_name) def emit_choice_event( @@ -77,36 +77,40 @@ def emit_choice_event( content, role: str, finish_reason: str, + provider_name: Optional[str] = None, ): emit_event( ChoiceEvent( index=index, message={"content": content, "role": role}, finish_reason=finish_reason, - ) + ), + provider_name=provider_name, ) -def emit_event(event: Union[MessageEvent, ChoiceEvent]) -> None: +def emit_event(event: Union[MessageEvent, ChoiceEvent], provider_name: Optional[str] = None) -> None: """ Emit an event to the OpenTelemetry SDK. Args: event: The event to emit. + provider_name: The actual LLM provider name (e.g. "openai", "anthropic"). """ if not should_emit_events(): return if isinstance(event, MessageEvent): - _emit_message_event(event) + _emit_message_event(event, provider_name=provider_name) elif isinstance(event, ChoiceEvent): - _emit_choice_event(event) + _emit_choice_event(event, provider_name=provider_name) else: raise TypeError("Unsupported event type") -def _emit_message_event(event: MessageEvent) -> None: +def _emit_message_event(event: MessageEvent, provider_name: Optional[str] = None) -> None: body = asdict(event) + attrs = _event_attributes(provider_name) if event.role in VALID_MESSAGE_ROLES: name = "gen_ai.{}.message".format(event.role) @@ -131,14 +135,16 @@ def _emit_message_event(event: MessageEvent) -> None: log_record = LogRecord( body=body, - attributes=EVENT_ATTRIBUTES, + attributes=attrs, event_name=name ) Config.event_logger.emit(log_record) -def _emit_choice_event(event: ChoiceEvent) -> None: +def _emit_choice_event(event: ChoiceEvent, provider_name: Optional[str] = None) -> None: body = asdict(event) + attrs = _event_attributes(provider_name) + if event.message["role"] == Roles.ASSISTANT.value: # According to the semantic conventions, the role is conditionally required if available # and not equal to "assistant", so remove the role from the body if it is "assistant". @@ -155,7 +161,7 @@ def _emit_choice_event(event: ChoiceEvent) -> None: log_record = LogRecord( body=body, - attributes=EVENT_ATTRIBUTES, + attributes=attrs, event_name="gen_ai.choice" ) Config.event_logger.emit(log_record) diff --git a/packages/opentelemetry-instrumentation-llamaindex/opentelemetry/instrumentation/llamaindex/event_models.py b/packages/opentelemetry-instrumentation-llamaindex/opentelemetry/instrumentation/llamaindex/event_models.py index e3b5f3cc60..0759aeeb64 100644 --- a/packages/opentelemetry-instrumentation-llamaindex/opentelemetry/instrumentation/llamaindex/event_models.py +++ b/packages/opentelemetry-instrumentation-llamaindex/opentelemetry/instrumentation/llamaindex/event_models.py @@ -37,5 +37,5 @@ class ChoiceEvent: index: int message: CompletionMessage - finish_reason: str = "unknown" + finish_reason: str = "" tool_calls: Optional[List[ToolCall]] = None diff --git a/packages/opentelemetry-instrumentation-llamaindex/opentelemetry/instrumentation/llamaindex/span_utils.py b/packages/opentelemetry-instrumentation-llamaindex/opentelemetry/instrumentation/llamaindex/span_utils.py index 7ae07c21af..348e7086a0 100644 --- a/packages/opentelemetry-instrumentation-llamaindex/opentelemetry/instrumentation/llamaindex/span_utils.py +++ b/packages/opentelemetry-instrumentation-llamaindex/opentelemetry/instrumentation/llamaindex/span_utils.py @@ -1,4 +1,17 @@ -from llama_index.core.base.llms.types import MessageRole +import json + +from opentelemetry.instrumentation.llamaindex._message_utils import ( + build_completion_output_message, + build_input_messages, + build_output_message, +) +from opentelemetry.instrumentation.llamaindex._response_utils import ( + detect_provider_name, + extract_finish_reasons, + extract_model_from_raw, + extract_response_id, + extract_token_usage, +) from opentelemetry.instrumentation.llamaindex.utils import ( dont_throw, should_send_prompts, @@ -6,10 +19,7 @@ from opentelemetry.semconv._incubating.attributes import ( gen_ai_attributes as GenAIAttributes, ) -from opentelemetry.semconv_ai import ( - LLMRequestTypeValues, - SpanAttributes, -) +from opentelemetry.semconv_ai import SpanAttributes def _set_span_attribute(span, name, value): @@ -24,13 +34,8 @@ def set_llm_chat_request(event, span) -> None: return if should_send_prompts(): - for idx, message in enumerate(event.messages): - span.set_attribute( - f"{GenAIAttributes.GEN_AI_PROMPT}.{idx}.role", message.role.value - ) - span.set_attribute( - f"{GenAIAttributes.GEN_AI_PROMPT}.{idx}.content", message.content - ) + msgs = build_input_messages(event.messages) + span.set_attribute(GenAIAttributes.GEN_AI_INPUT_MESSAGES, json.dumps(msgs)) @dont_throw @@ -39,7 +44,12 @@ def set_llm_chat_request_model_attributes(event, span): return model_dict = event.model_dict - span.set_attribute(SpanAttributes.LLM_REQUEST_TYPE, LLMRequestTypeValues.CHAT.value) + span.set_attribute(GenAIAttributes.GEN_AI_OPERATION_NAME, "chat") + + class_name = model_dict.get("class_name") + provider = detect_provider_name(class_name) + if provider: + span.set_attribute(GenAIAttributes.GEN_AI_PROVIDER_NAME, provider) # For StructuredLLM, the model and temperature are nested under model_dict.llm if "llm" in model_dict: @@ -57,23 +67,16 @@ def set_llm_chat_response(event, span) -> None: return response = event.response + finish_reasons = extract_finish_reasons(response.raw) if response.raw else [] + + # finish_reasons is NOT gated by should_send_prompts() — it's metadata, not content + if finish_reasons: + span.set_attribute(GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS, finish_reasons) + if should_send_prompts(): - for idx, message in enumerate(event.messages): - span.set_attribute( - f"{GenAIAttributes.GEN_AI_PROMPT}.{idx}.role", message.role.value - ) - span.set_attribute( - f"{GenAIAttributes.GEN_AI_PROMPT}.{idx}.content", message.content - ) - span.set_attribute( - f"{GenAIAttributes.GEN_AI_COMPLETION}.0.role", - response.message.role.value, - ) - _set_span_attribute( - span, - f"{GenAIAttributes.GEN_AI_COMPLETION}.0.content", - response.message.content, - ) + fr = finish_reasons[0] if finish_reasons else None + output_msg = build_output_message(response.message, finish_reason=fr) + span.set_attribute(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES, json.dumps([output_msg])) @dont_throw @@ -86,99 +89,40 @@ def set_llm_chat_response_model_attributes(event, span): if not (raw := response.raw): return - # Get model name - handle both dict and object formats - model = None - if hasattr(raw, "model"): - model = raw.model - elif isinstance(raw, dict) and "model" in raw: - model = raw.get("model") + model = extract_model_from_raw(raw) if model: span.set_attribute(GenAIAttributes.GEN_AI_RESPONSE_MODEL, model) - # Handle token usage - support multiple formats - input_tokens = None - output_tokens = None - total_tokens = None - - # Try OpenAI format first: raw.usage with completion_tokens, prompt_tokens - usage = getattr(raw, "usage", None) or (raw.get("usage") if isinstance(raw, dict) else None) - if usage: - if hasattr(usage, "completion_tokens"): - output_tokens = usage.completion_tokens - input_tokens = usage.prompt_tokens - total_tokens = usage.total_tokens - elif isinstance(usage, dict): - output_tokens = usage.get("completion_tokens") - input_tokens = usage.get("prompt_tokens") - total_tokens = usage.get("total_tokens") - - # Try Cohere format: raw.meta.tokens or raw.meta.billed_units - if input_tokens is None or output_tokens is None: - meta = getattr(raw, "meta", None) or (raw.get("meta") if isinstance(raw, dict) else None) - if meta: - # Try meta.tokens first (actual token counts) - tokens = getattr(meta, "tokens", None) or (meta.get("tokens") if isinstance(meta, dict) else None) - if tokens: - if hasattr(tokens, "input_tokens"): - input_tokens = tokens.input_tokens - output_tokens = tokens.output_tokens - elif isinstance(tokens, dict): - input_tokens = tokens.get("input_tokens") - output_tokens = tokens.get("output_tokens") - - # Fallback to meta.billed_units if tokens not found - if input_tokens is None or output_tokens is None: - billed = getattr(meta, "billed_units", None) or ( - meta.get("billed_units") if isinstance(meta, dict) else None - ) - if billed: - if hasattr(billed, "input_tokens"): - input_tokens = int(billed.input_tokens) - output_tokens = int(billed.output_tokens) - elif isinstance(billed, dict): - input_tokens = int(billed.get("input_tokens", 0)) - output_tokens = int(billed.get("output_tokens", 0)) - - # Set token attributes if found - if output_tokens is not None: - span.set_attribute(GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS, int(output_tokens)) - if input_tokens is not None: - span.set_attribute(GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS, int(input_tokens)) - if total_tokens is not None: - span.set_attribute(SpanAttributes.LLM_USAGE_TOTAL_TOKENS, int(total_tokens)) - elif input_tokens is not None and output_tokens is not None: - # Calculate total if not provided (e.g., for Cohere) - span.set_attribute(SpanAttributes.LLM_USAGE_TOTAL_TOKENS, int(input_tokens) + int(output_tokens)) - - # Handle finish reason for OpenAI-style responses - choices = getattr(raw, "choices", None) - if choices: - span.set_attribute( - SpanAttributes.LLM_RESPONSE_FINISH_REASON, choices[0].finish_reason - ) + response_id = extract_response_id(raw) + if response_id: + span.set_attribute(GenAIAttributes.GEN_AI_RESPONSE_ID, response_id) + + usage = extract_token_usage(raw) + if usage.output_tokens is not None: + span.set_attribute(GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS, int(usage.output_tokens)) + if usage.input_tokens is not None: + span.set_attribute(GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS, int(usage.input_tokens)) + if usage.total_tokens is not None: + span.set_attribute(SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS, int(usage.total_tokens)) + + # CRITICAL: finish_reasons is NOT gated by should_send_prompts() + finish_reasons = extract_finish_reasons(raw) + if finish_reasons: + span.set_attribute(GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS, finish_reasons) @dont_throw def set_llm_predict_response(event, span) -> None: if should_send_prompts(): - span.set_attribute( - f"{GenAIAttributes.GEN_AI_COMPLETION}.role", - MessageRole.ASSISTANT.value, - ) - _set_span_attribute( - span, - f"{GenAIAttributes.GEN_AI_COMPLETION}.content", - event.output, - ) + output_msg = build_completion_output_message(event.output or "") + span.set_attribute(GenAIAttributes.GEN_AI_OUTPUT_MESSAGES, json.dumps([output_msg])) @dont_throw def set_embedding(event, span) -> None: model_dict = event.model_dict - span.set_attribute( - f"{LLMRequestTypeValues.EMBEDDING.value}.model_name", - model_dict.get("model_name"), - ) + span.set_attribute(GenAIAttributes.GEN_AI_OPERATION_NAME, "embeddings") + span.set_attribute(GenAIAttributes.GEN_AI_REQUEST_MODEL, model_dict.get("model_name")) @dont_throw @@ -186,24 +130,17 @@ def set_rerank(event, span) -> None: if not span.is_recording(): return if should_send_prompts(): - span.set_attribute( - f"{LLMRequestTypeValues.RERANK.value}.query", - event.query.query_str, - ) + msg = [{"role": "user", "parts": [{"type": "text", "content": event.query.query_str}]}] + span.set_attribute(GenAIAttributes.GEN_AI_INPUT_MESSAGES, json.dumps(msg)) @dont_throw def set_rerank_model_attributes(event, span): if not span.is_recording(): return - span.set_attribute( - f"{LLMRequestTypeValues.RERANK.value}.model_name", - event.model_name, - ) - span.set_attribute( - f"{LLMRequestTypeValues.RERANK.value}.top_n", - event.top_n, - ) + span.set_attribute(GenAIAttributes.GEN_AI_OPERATION_NAME, "rerank") + span.set_attribute(GenAIAttributes.GEN_AI_REQUEST_MODEL, event.model_name) + span.set_attribute("rerank.top_n", event.top_n) @dont_throw diff --git a/packages/opentelemetry-instrumentation-llamaindex/tests/test_agents.py b/packages/opentelemetry-instrumentation-llamaindex/tests/test_agents.py index f6f569b3bf..eea4891b85 100644 --- a/packages/opentelemetry-instrumentation-llamaindex/tests/test_agents.py +++ b/packages/opentelemetry-instrumentation-llamaindex/tests/test_agents.py @@ -185,15 +185,11 @@ def calculate_years_to_target_population( cohere_spans = [span for span in spans if span.name == "Cohere.task"] assert len(cohere_spans) >= 1, "Expected at least one Cohere LLM span" - # In llama-index 0.14.x, there are two types of Cohere.task spans: - # 1. LLM call spans with gen_ai.request.model, gen_ai.prompt.X.content, gen_ai.completion.X.content - # 2. Text processing spans with gen_ai.completion.content only # We verify that at least one span has the full set of LLM attributes llm_spans_with_model = [ span for span in cohere_spans if GenAIAttributes.GEN_AI_REQUEST_MODEL in span.attributes - or "gen_ai.request.model" in span.attributes ] assert ( len(llm_spans_with_model) >= 1 @@ -201,34 +197,24 @@ def calculate_years_to_target_population( # Check that LLM spans with gen_ai.request.model have the expected attributes for cohere_span in llm_spans_with_model: - # Check for gen_ai.request.model attribute assert ( GenAIAttributes.GEN_AI_REQUEST_MODEL in cohere_span.attributes - or "gen_ai.request.model" in cohere_span.attributes ), f"Expected gen_ai.request.model in {cohere_span.name}" - # Check for prompt content attributes (gen_ai.prompt.X.content) - prompt_keys = [ - k for k in cohere_span.attributes if k.startswith("gen_ai.prompt.") - ] - assert len(prompt_keys) > 0, f"Expected prompt attributes in {cohere_span.name}" - - # Check for completion content attributes (gen_ai.completion.X.content) - completion_keys = [ - k for k in cohere_span.attributes if k.startswith("gen_ai.completion") - ] + # Check for JSON input/output messages (new semconv format) + assert ( + GenAIAttributes.GEN_AI_INPUT_MESSAGES in cohere_span.attributes + ), f"Expected gen_ai.input.messages in {cohere_span.name}" assert ( - len(completion_keys) > 0 - ), f"Expected completion attributes in {cohere_span.name}" + GenAIAttributes.GEN_AI_OUTPUT_MESSAGES in cohere_span.attributes + ), f"Expected gen_ai.output.messages in {cohere_span.name}" - # Check that operation name exists (gen_ai.operation.name or legacy llm.request.type) + # Check that operation name exists assert ( GenAIAttributes.GEN_AI_OPERATION_NAME in cohere_span.attributes - or SpanAttributes.LLM_REQUEST_TYPE in cohere_span.attributes - or "llm.request.type" in cohere_span.attributes - ), f"Expected gen_ai.operation.name or llm.request.type in {cohere_span.name}" + ), f"Expected gen_ai.operation.name in {cohere_span.name}" - # Check for token usage attributes (restored with Cohere format support) + # Check for token usage attributes assert ( GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS in cohere_span.attributes ), f"Expected gen_ai.usage.output_tokens in {cohere_span.name}" @@ -238,9 +224,9 @@ def calculate_years_to_target_population( ), f"Expected gen_ai.usage.input_tokens in {cohere_span.name}" assert cohere_span.attributes[GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS] > 0 assert ( - SpanAttributes.LLM_USAGE_TOTAL_TOKENS in cohere_span.attributes - ), f"Expected llm.usage.total_tokens in {cohere_span.name}" - assert cohere_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] > 0 + SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS in cohere_span.attributes + ), f"Expected gen_ai.usage.total_tokens in {cohere_span.name}" + assert cohere_span.attributes[SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS] > 0 # Verify tool-related spans exist (FunctionTool.task and QueryEngineTool.task) function_tool_spans = [span for span in spans if span.name == "FunctionTool.task"] diff --git a/packages/opentelemetry-instrumentation-llamaindex/tests/test_custom_llm_semconv.py b/packages/opentelemetry-instrumentation-llamaindex/tests/test_custom_llm_semconv.py new file mode 100644 index 0000000000..0bcf857fc0 --- /dev/null +++ b/packages/opentelemetry-instrumentation-llamaindex/tests/test_custom_llm_semconv.py @@ -0,0 +1,312 @@ +"""Unit tests for custom_llm_instrumentor semconv migration.""" + +import json +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +import pytest + +from opentelemetry.semconv._incubating.attributes import ( + gen_ai_attributes as GenAIAttributes, +) +from opentelemetry.semconv_ai import SpanAttributes + +from opentelemetry.instrumentation.llamaindex.custom_llm_instrumentor import ( + _handle_request, + _handle_response, +) + +PATCH_SHOULD_SEND = "opentelemetry.instrumentation.llamaindex.custom_llm_instrumentor.should_send_prompts" + + +def _span(): + return MagicMock() + + +def _instance(class_name="Ollama", model_name="llama3", context_window=4096, num_output=512): + inst = type(class_name, (), {})() + inst.metadata = SimpleNamespace(model_name=model_name, context_window=context_window, num_output=num_output) + return inst + + +def _attr(span, name): + for call in span.set_attribute.call_args_list: + if call.args[0] == name: + return call.args[1] + return None + + +def _has_attr(span, name): + return any(c.args[0] == name for c in span.set_attribute.call_args_list) + + +# =========================================================================== +# _handle_request +# =========================================================================== + +class TestCustomLLMHandleRequest: + def test_sets_operation_name_chat(self): + from opentelemetry.semconv_ai import LLMRequestTypeValues + span = _span() + inst = _instance() + _handle_request(span, LLMRequestTypeValues.CHAT, (), {}, inst) + assert _attr(span, GenAIAttributes.GEN_AI_OPERATION_NAME) == "chat" + + def test_sets_operation_name_completion(self): + from opentelemetry.semconv_ai import LLMRequestTypeValues + span = _span() + inst = _instance() + _handle_request(span, LLMRequestTypeValues.COMPLETION, (), {}, inst) + assert _attr(span, GenAIAttributes.GEN_AI_OPERATION_NAME) == "text_completion" + + def test_sets_provider_name(self): + from opentelemetry.semconv_ai import LLMRequestTypeValues + span = _span() + inst = _instance("Cohere") + _handle_request(span, LLMRequestTypeValues.CHAT, (), {}, inst) + assert _attr(span, GenAIAttributes.GEN_AI_PROVIDER_NAME) == "cohere" + + def test_no_legacy_gen_ai_system(self): + from opentelemetry.semconv_ai import LLMRequestTypeValues + span = _span() + inst = _instance() + _handle_request(span, LLMRequestTypeValues.CHAT, (), {}, inst) + assert not _has_attr(span, GenAIAttributes.GEN_AI_SYSTEM) + + def test_no_legacy_llm_request_type(self): + from opentelemetry.semconv_ai import LLMRequestTypeValues + span = _span() + inst = _instance() + _handle_request(span, LLMRequestTypeValues.CHAT, (), {}, inst) + assert not _has_attr(span, SpanAttributes.LLM_REQUEST_TYPE) + + def test_sets_input_messages_json_for_completion(self): + from opentelemetry.semconv_ai import LLMRequestTypeValues + span = _span() + inst = _instance() + with patch(PATCH_SHOULD_SEND, return_value=True): + _handle_request(span, LLMRequestTypeValues.COMPLETION, ("hello world",), {}, inst) + raw = _attr(span, GenAIAttributes.GEN_AI_INPUT_MESSAGES) + assert raw is not None + msgs = json.loads(raw) + assert msgs[0]["role"] == "user" + assert msgs[0]["parts"][0]["content"] == "hello world" + + def test_gated_by_should_send_prompts(self): + from opentelemetry.semconv_ai import LLMRequestTypeValues + span = _span() + inst = _instance() + with patch(PATCH_SHOULD_SEND, return_value=False): + _handle_request(span, LLMRequestTypeValues.COMPLETION, ("hello",), {}, inst) + assert not _has_attr(span, GenAIAttributes.GEN_AI_INPUT_MESSAGES) + + def test_sets_model(self): + from opentelemetry.semconv_ai import LLMRequestTypeValues + span = _span() + inst = _instance(model_name="llama3-70b") + _handle_request(span, LLMRequestTypeValues.CHAT, (), {}, inst) + assert _attr(span, GenAIAttributes.GEN_AI_REQUEST_MODEL) == "llama3-70b" + + def test_sets_max_tokens_from_num_output(self): + from opentelemetry.semconv_ai import LLMRequestTypeValues + span = _span() + inst = _instance(num_output=1024) + _handle_request(span, LLMRequestTypeValues.CHAT, (), {}, inst) + assert _attr(span, GenAIAttributes.GEN_AI_REQUEST_MAX_TOKENS) == 1024 + + def test_no_top_p_from_num_output(self): + """num_output should NOT be set as top_p — it maps to max_tokens.""" + from opentelemetry.semconv_ai import LLMRequestTypeValues + span = _span() + inst = _instance(num_output=512) + _handle_request(span, LLMRequestTypeValues.CHAT, (), {}, inst) + assert not _has_attr(span, GenAIAttributes.GEN_AI_REQUEST_TOP_P) + + def test_sets_input_messages_json_for_chat(self): + from opentelemetry.semconv_ai import LLMRequestTypeValues + span = _span() + inst = _instance() + msgs = [SimpleNamespace(role=SimpleNamespace(value="user"), content="Hello", additional_kwargs={})] + with patch(PATCH_SHOULD_SEND, return_value=True): + _handle_request(span, LLMRequestTypeValues.CHAT, (msgs,), {}, inst) + raw = _attr(span, GenAIAttributes.GEN_AI_INPUT_MESSAGES) + assert raw is not None + parsed = json.loads(raw) + assert parsed[0]["role"] == "user" + assert parsed[0]["parts"][0]["content"] == "Hello" + + def test_chat_input_gated_by_should_send_prompts(self): + from opentelemetry.semconv_ai import LLMRequestTypeValues + span = _span() + inst = _instance() + msgs = [SimpleNamespace(role=SimpleNamespace(value="user"), content="Hello", additional_kwargs={})] + with patch(PATCH_SHOULD_SEND, return_value=False): + _handle_request(span, LLMRequestTypeValues.CHAT, (msgs,), {}, inst) + assert not _has_attr(span, GenAIAttributes.GEN_AI_INPUT_MESSAGES) + + +# =========================================================================== +# _handle_response +# =========================================================================== + +class TestCustomLLMHandleResponse: + def test_sets_output_messages_json_for_completion(self): + from opentelemetry.semconv_ai import LLMRequestTypeValues + span = _span() + inst = _instance() + resp = SimpleNamespace(text="The answer is 42.", raw=None) + with patch(PATCH_SHOULD_SEND, return_value=True): + _handle_response(span, LLMRequestTypeValues.COMPLETION, inst, resp) + raw = _attr(span, GenAIAttributes.GEN_AI_OUTPUT_MESSAGES) + assert raw is not None + msgs = json.loads(raw) + assert msgs[0]["parts"][0]["content"] == "The answer is 42." + + def test_sets_output_messages_json_for_chat(self): + from opentelemetry.semconv_ai import LLMRequestTypeValues + span = _span() + inst = _instance() + msg = SimpleNamespace(role=SimpleNamespace(value="assistant"), content="Reply", additional_kwargs={}) + resp = SimpleNamespace(text="Reply", message=msg, raw=None) + with patch(PATCH_SHOULD_SEND, return_value=True): + _handle_response(span, LLMRequestTypeValues.CHAT, inst, resp) + raw = _attr(span, GenAIAttributes.GEN_AI_OUTPUT_MESSAGES) + assert raw is not None + msgs = json.loads(raw) + assert msgs[0]["role"] == "assistant" + assert msgs[0]["parts"][0]["content"] == "Reply" + + def test_sets_response_model(self): + from opentelemetry.semconv_ai import LLMRequestTypeValues + span = _span() + inst = _instance(model_name="llama3") + resp = SimpleNamespace(text="ok", raw=None) + _handle_response(span, LLMRequestTypeValues.COMPLETION, inst, resp) + assert _attr(span, GenAIAttributes.GEN_AI_RESPONSE_MODEL) == "llama3" + + def test_response_model_prefers_raw_over_instance(self): + """gen_ai.response.model should use the resolved model from raw response, not the request alias.""" + from opentelemetry.semconv_ai import LLMRequestTypeValues + span = _span() + inst = _instance(model_name="gpt-4o") + raw = SimpleNamespace(model="gpt-4o-2024-05-13", id=None) + resp = SimpleNamespace(text="ok", raw=raw) + _handle_response(span, LLMRequestTypeValues.COMPLETION, inst, resp) + assert _attr(span, GenAIAttributes.GEN_AI_RESPONSE_MODEL) == "gpt-4o-2024-05-13" + + def test_response_model_falls_back_to_instance_when_no_raw(self): + """When raw is None, fall back to instance.metadata.model_name.""" + from opentelemetry.semconv_ai import LLMRequestTypeValues + span = _span() + inst = _instance(model_name="llama3") + resp = SimpleNamespace(text="ok", raw=None) + _handle_response(span, LLMRequestTypeValues.COMPLETION, inst, resp) + assert _attr(span, GenAIAttributes.GEN_AI_RESPONSE_MODEL) == "llama3" + + def test_response_model_falls_back_to_instance_when_raw_has_no_model(self): + """When raw exists but has no model field, fall back to instance.metadata.model_name.""" + from opentelemetry.semconv_ai import LLMRequestTypeValues + span = _span() + inst = _instance(model_name="ollama-llama3") + resp = SimpleNamespace(text="ok", raw=SimpleNamespace(id="resp-1")) + _handle_response(span, LLMRequestTypeValues.COMPLETION, inst, resp) + assert _attr(span, GenAIAttributes.GEN_AI_RESPONSE_MODEL) == "ollama-llama3" + + def test_output_gated_by_should_send_prompts(self): + from opentelemetry.semconv_ai import LLMRequestTypeValues + span = _span() + inst = _instance() + resp = SimpleNamespace(text="ok", raw=None) + with patch(PATCH_SHOULD_SEND, return_value=False): + _handle_response(span, LLMRequestTypeValues.COMPLETION, inst, resp) + assert not _has_attr(span, GenAIAttributes.GEN_AI_OUTPUT_MESSAGES) + + + def test_sets_response_id_from_raw(self): + """gen_ai.response.id should be extracted from raw response.""" + from opentelemetry.semconv_ai import LLMRequestTypeValues + span = _span() + inst = _instance() + resp = SimpleNamespace(text="ok", raw=SimpleNamespace(id="chatcmpl-abc123", model="llama3")) + with patch(PATCH_SHOULD_SEND, return_value=True): + _handle_response(span, LLMRequestTypeValues.COMPLETION, inst, resp) + assert _attr(span, GenAIAttributes.GEN_AI_RESPONSE_ID) == "chatcmpl-abc123" + + def test_sets_response_id_from_raw_dict(self): + from opentelemetry.semconv_ai import LLMRequestTypeValues + span = _span() + inst = _instance() + resp = SimpleNamespace(text="ok", raw={"id": "resp-456", "model": "llama3"}) + with patch(PATCH_SHOULD_SEND, return_value=True): + _handle_response(span, LLMRequestTypeValues.COMPLETION, inst, resp) + assert _attr(span, GenAIAttributes.GEN_AI_RESPONSE_ID) == "resp-456" + + def test_no_response_id_when_missing(self): + from opentelemetry.semconv_ai import LLMRequestTypeValues + span = _span() + inst = _instance() + resp = SimpleNamespace(text="ok", raw={}) + with patch(PATCH_SHOULD_SEND, return_value=True): + _handle_response(span, LLMRequestTypeValues.COMPLETION, inst, resp) + assert not _has_attr(span, GenAIAttributes.GEN_AI_RESPONSE_ID) + + def test_sets_token_usage_from_raw(self): + """gen_ai.usage.* tokens should be extracted from raw response.""" + from opentelemetry.semconv_ai import LLMRequestTypeValues + span = _span() + inst = _instance() + raw = SimpleNamespace( + usage=SimpleNamespace(prompt_tokens=10, completion_tokens=20, total_tokens=30), + id=None, + ) + resp = SimpleNamespace(text="ok", raw=raw) + with patch(PATCH_SHOULD_SEND, return_value=True): + _handle_response(span, LLMRequestTypeValues.COMPLETION, inst, resp) + assert _attr(span, GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS) == 10 + assert _attr(span, GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS) == 20 + assert _attr(span, SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS) == 30 + + def test_no_token_usage_when_missing(self): + from opentelemetry.semconv_ai import LLMRequestTypeValues + span = _span() + inst = _instance() + resp = SimpleNamespace(text="ok", raw={}) + with patch(PATCH_SHOULD_SEND, return_value=True): + _handle_response(span, LLMRequestTypeValues.COMPLETION, inst, resp) + assert not _has_attr(span, GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS) + assert not _has_attr(span, GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS) + + def test_sets_tool_definitions_from_kwargs(self): + """gen_ai.tool.definitions should be captured from kwargs when tools are passed.""" + from opentelemetry.semconv_ai import LLMRequestTypeValues + span = _span() + inst = _instance() + tools = [{"type": "function", "function": {"name": "get_weather", "parameters": {"type": "object"}}}] + with patch(PATCH_SHOULD_SEND, return_value=True): + _handle_request(span, LLMRequestTypeValues.CHAT, (), {"tools": tools}, inst) + raw = _attr(span, GenAIAttributes.GEN_AI_TOOL_DEFINITIONS) + assert raw is not None + parsed = json.loads(raw) + assert len(parsed) == 1 + assert parsed[0]["function"]["name"] == "get_weather" + + def test_no_tool_definitions_when_not_present(self): + from opentelemetry.semconv_ai import LLMRequestTypeValues + span = _span() + inst = _instance() + with patch(PATCH_SHOULD_SEND, return_value=True): + _handle_request(span, LLMRequestTypeValues.CHAT, (), {}, inst) + assert not _has_attr(span, GenAIAttributes.GEN_AI_TOOL_DEFINITIONS) + + def test_tool_definitions_gated_by_should_send_prompts(self): + from opentelemetry.semconv_ai import LLMRequestTypeValues + span = _span() + inst = _instance() + tools = [{"type": "function", "function": {"name": "get_weather"}}] + with patch(PATCH_SHOULD_SEND, return_value=False): + _handle_request(span, LLMRequestTypeValues.CHAT, (), {"tools": tools}, inst) + assert not _has_attr(span, GenAIAttributes.GEN_AI_TOOL_DEFINITIONS) + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/packages/opentelemetry-instrumentation-llamaindex/tests/test_event_emitter_semconv.py b/packages/opentelemetry-instrumentation-llamaindex/tests/test_event_emitter_semconv.py new file mode 100644 index 0000000000..12963a8671 --- /dev/null +++ b/packages/opentelemetry-instrumentation-llamaindex/tests/test_event_emitter_semconv.py @@ -0,0 +1,111 @@ +"""Unit tests for event_emitter semconv migration.""" + +from unittest.mock import MagicMock, patch + +import pytest + +from opentelemetry.semconv._incubating.attributes import ( + gen_ai_attributes as GenAIAttributes, +) + +from opentelemetry.instrumentation.llamaindex.event_emitter import ( + _event_attributes, + emit_chat_message_events, + emit_rerank_message_event, +) + + +# =========================================================================== +# _event_attributes — dynamic provider name +# =========================================================================== + +class TestEventAttributes: + def test_with_provider_name(self): + attrs = _event_attributes("openai") + assert attrs[GenAIAttributes.GEN_AI_PROVIDER_NAME] == "openai" + + def test_with_none_provider_name_falls_back_to_llamaindex(self): + """When provider is unknown, fall back to 'llamaindex' so events always have a provider.""" + attrs = _event_attributes(None) + assert attrs[GenAIAttributes.GEN_AI_PROVIDER_NAME] == "llamaindex" + + def test_with_empty_provider_name_falls_back_to_llamaindex(self): + attrs = _event_attributes("") + assert attrs[GenAIAttributes.GEN_AI_PROVIDER_NAME] == "llamaindex" + + def test_uses_actual_provider_not_llamaindex(self): + """Provider name should be the actual LLM provider, not the framework.""" + attrs = _event_attributes("anthropic") + assert attrs[GenAIAttributes.GEN_AI_PROVIDER_NAME] == "anthropic" + + +# =========================================================================== +# emit_chat_message_events — provider_name +# =========================================================================== + +class TestEmitChatMessageEventsProviderName: + def test_provider_name_passed_to_message_events(self): + """emit_chat_message_events should forward provider_name to emit_event.""" + event = MagicMock() + msg = MagicMock() + msg.content = "hello" + msg.role.value = "user" + event.messages = [msg] + + with patch("opentelemetry.instrumentation.llamaindex.event_emitter.emit_event") as mock_emit: + with patch("opentelemetry.instrumentation.llamaindex.event_emitter.should_emit_events", return_value=True): + emit_chat_message_events(event, provider_name="openai") + mock_emit.assert_called_once() + assert mock_emit.call_args[1]["provider_name"] == "openai" + + def test_none_provider_name_passed_through(self): + event = MagicMock() + msg = MagicMock() + msg.content = "hello" + msg.role.value = "user" + event.messages = [msg] + + with patch("opentelemetry.instrumentation.llamaindex.event_emitter.emit_event") as mock_emit: + with patch("opentelemetry.instrumentation.llamaindex.event_emitter.should_emit_events", return_value=True): + emit_chat_message_events(event) + mock_emit.assert_called_once() + assert mock_emit.call_args[1]["provider_name"] is None + + def test_backward_compat_no_provider_arg(self): + """Calling without provider_name should still work (defaults to None).""" + event = MagicMock() + event.messages = [] + # Should not raise + emit_chat_message_events(event) + + +# =========================================================================== +# emit_rerank_message_event — provider_name +# =========================================================================== + +class TestEmitRerankMessageEvent: + def test_provider_name_passed_to_rerank_event(self): + """emit_rerank_message_event should forward provider_name.""" + event = MagicMock() + event.query = "search query" + + with patch("opentelemetry.instrumentation.llamaindex.event_emitter.emit_event") as mock_emit: + with patch("opentelemetry.instrumentation.llamaindex.event_emitter.should_emit_events", return_value=True): + emit_rerank_message_event(event, provider_name="cohere") + mock_emit.assert_called_once() + assert mock_emit.call_args[1]["provider_name"] == "cohere" + + def test_rerank_without_provider_name(self): + """Rerank events without provider_name should still work.""" + event = MagicMock() + event.query = "search query" + + with patch("opentelemetry.instrumentation.llamaindex.event_emitter.emit_event") as mock_emit: + with patch("opentelemetry.instrumentation.llamaindex.event_emitter.should_emit_events", return_value=True): + emit_rerank_message_event(event) + mock_emit.assert_called_once() + assert mock_emit.call_args[1]["provider_name"] is None + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/packages/opentelemetry-instrumentation-llamaindex/tests/test_finish_reasons.py b/packages/opentelemetry-instrumentation-llamaindex/tests/test_finish_reasons.py new file mode 100644 index 0000000000..1352ba2192 --- /dev/null +++ b/packages/opentelemetry-instrumentation-llamaindex/tests/test_finish_reasons.py @@ -0,0 +1,777 @@ +"""Dedicated finish_reason tests — mapping, extraction, span attributes, output messages, events. + +Groups: +- map_finish_reason: provider value → OTel enum mapping +- extract_finish_reasons: raw response → List[str] extraction per provider format +- Span attribute: gen_ai.response.finish_reasons through span_utils and custom_llm_instrumentor +- Output messages: finish_reason key in gen_ai.output.messages JSON +- Events: finish_reason in ChoiceEvent emission +- Cross-cutting invariants: always string, never empty array, always present key +""" + +import json +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +import pytest + +from opentelemetry.instrumentation.llamaindex._message_utils import ( + build_completion_output_message, + build_output_message, + map_finish_reason, +) +from opentelemetry.instrumentation.llamaindex._response_utils import ( + extract_finish_reasons, +) +from opentelemetry.instrumentation.llamaindex.custom_llm_instrumentor import ( + _handle_response, +) +from opentelemetry.instrumentation.llamaindex.event_emitter import ( + emit_chat_response_events, +) +from opentelemetry.instrumentation.llamaindex.event_models import ChoiceEvent +from opentelemetry.instrumentation.llamaindex.span_utils import ( + set_llm_chat_response, + set_llm_chat_response_model_attributes, +) +from opentelemetry.semconv._incubating.attributes import ( + gen_ai_attributes as GenAIAttributes, +) +from opentelemetry.semconv_ai import SpanAttributes + + +VALID_OTEL_FINISH_REASONS = {"stop", "tool_call", "length", "content_filter", "error"} + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _msg(role, content, **additional_kwargs): + """Create a fake ChatMessage-like object.""" + return SimpleNamespace( + role=SimpleNamespace(value=role), + content=content, + additional_kwargs=additional_kwargs, + ) + + +def _recording_span(): + span = MagicMock() + span.is_recording.return_value = True + return span + + +def _attr(span, name): + for call in span.set_attribute.call_args_list: + if call.args[0] == name: + return call.args[1] + return None + + +def _has_attr(span, name): + return any(c.args[0] == name for c in span.set_attribute.call_args_list) + + +def _custom_llm_instance(class_name="Ollama", model_name="llama3"): + inst = type(class_name, (), {})() + inst.metadata = SimpleNamespace( + model_name=model_name, context_window=4096, num_output=512 + ) + return inst + + +PATCH_SHOULD_SEND_SPAN = "opentelemetry.instrumentation.llamaindex.span_utils.should_send_prompts" +PATCH_SHOULD_SEND_CUSTOM = "opentelemetry.instrumentation.llamaindex.custom_llm_instrumentor.should_send_prompts" + + +# =========================================================================== +# map_finish_reason — provider value → OTel enum +# =========================================================================== + + +class TestMapFinishReason: + # --- OpenAI --- + def test_openai_stop(self): + assert map_finish_reason("stop") == "stop" + + def test_openai_tool_calls_mapped_to_singular(self): + """OTel JSON schema uses 'tool_call' (singular). OpenAI 'tool_calls' (plural) must be mapped.""" + assert map_finish_reason("tool_calls") == "tool_call" + + def test_openai_function_call(self): + assert map_finish_reason("function_call") == "tool_call" + + def test_openai_length(self): + assert map_finish_reason("length") == "length" + + def test_openai_content_filter(self): + assert map_finish_reason("content_filter") == "content_filter" + + # --- Cohere --- + def test_cohere_complete(self): + assert map_finish_reason("COMPLETE") == "stop" + + def test_cohere_max_tokens(self): + assert map_finish_reason("MAX_TOKENS") == "length" + + def test_cohere_error(self): + assert map_finish_reason("ERROR") == "error" + + def test_cohere_error_toxic(self): + assert map_finish_reason("ERROR_TOXIC") == "content_filter" + + # --- Anthropic --- + def test_anthropic_end_turn(self): + assert map_finish_reason("end_turn") == "stop" + + def test_anthropic_stop_sequence(self): + assert map_finish_reason("stop_sequence") == "stop" + + def test_anthropic_tool_use(self): + assert map_finish_reason("tool_use") == "tool_call" + + def test_anthropic_max_tokens(self): + assert map_finish_reason("max_tokens") == "length" + + # --- Google Gemini --- + def test_gemini_stop(self): + assert map_finish_reason("STOP") == "stop" + + def test_gemini_safety(self): + assert map_finish_reason("SAFETY") == "content_filter" + + def test_gemini_recitation(self): + assert map_finish_reason("RECITATION") == "content_filter" + + def test_gemini_blocklist(self): + assert map_finish_reason("BLOCKLIST") == "content_filter" + + def test_gemini_prohibited_content(self): + assert map_finish_reason("PROHIBITED_CONTENT") == "content_filter" + + def test_gemini_spii(self): + assert map_finish_reason("SPII") == "content_filter" + + def test_gemini_unspecified(self): + assert map_finish_reason("FINISH_REASON_UNSPECIFIED") == "error" + + def test_gemini_other(self): + assert map_finish_reason("OTHER") == "error" + + def test_gemini_max_tokens_reuses_cohere_mapping(self): + """Gemini MAX_TOKENS is handled by the same mapping as Cohere.""" + assert map_finish_reason("MAX_TOKENS") == "length" + + # --- Edge cases --- + def test_none_returns_none(self): + assert map_finish_reason(None) is None + + def test_empty_string_returns_none(self): + assert map_finish_reason("") is None + + def test_unknown_passes_through(self): + assert map_finish_reason("custom_reason") == "custom_reason" + + def test_all_mapped_values_are_valid_otel(self): + """Every value in _FINISH_REASON_MAP must produce a valid OTel finish reason.""" + from opentelemetry.instrumentation.llamaindex._message_utils import _FINISH_REASON_MAP + for provider_val, otel_val in _FINISH_REASON_MAP.items(): + assert otel_val in VALID_OTEL_FINISH_REASONS, ( + f"Mapping '{provider_val}' -> '{otel_val}' is not a valid OTel finish reason" + ) + + +# =========================================================================== +# extract_finish_reasons — raw response → List[str] +# =========================================================================== + + +class TestExtractFinishReasons: + # --- OpenAI choices[] --- + def test_openai_choices_object(self): + choice = SimpleNamespace(finish_reason="stop") + raw = SimpleNamespace(choices=[choice]) + assert extract_finish_reasons(raw) == ["stop"] + + def test_openai_choices_dict(self): + raw = {"choices": [{"finish_reason": "stop"}]} + assert extract_finish_reasons(raw) == ["stop"] + + def test_openai_tool_calls_mapped_to_singular(self): + raw = {"choices": [{"finish_reason": "tool_calls"}]} + assert extract_finish_reasons(raw) == ["tool_call"] + + def test_multiple_choices(self): + raw = {"choices": [{"finish_reason": "stop"}, {"finish_reason": "length"}]} + assert extract_finish_reasons(raw) == ["stop", "length"] + + def test_none_finish_reason_in_choices(self): + raw = {"choices": [{"finish_reason": None}]} + assert extract_finish_reasons(raw) == [] + + def test_empty_choices(self): + raw = {"choices": []} + assert extract_finish_reasons(raw) == [] + + # --- Anthropic stop_reason --- + def test_anthropic_stop_reason(self): + raw = SimpleNamespace(stop_reason="end_turn") + assert extract_finish_reasons(raw) == ["stop"] + + def test_anthropic_stop_reason_dict(self): + raw = {"stop_reason": "end_turn"} + assert extract_finish_reasons(raw) == ["stop"] + + # --- Cohere finish_reason --- + def test_cohere_finish_reason(self): + raw = SimpleNamespace(finish_reason="COMPLETE") + assert extract_finish_reasons(raw) == ["stop"] + + def test_cohere_finish_reason_dict(self): + raw = {"finish_reason": "MAX_TOKENS"} + assert extract_finish_reasons(raw) == ["length"] + + # --- Google Gemini candidates[] --- + def test_gemini_candidates_object(self): + candidate = SimpleNamespace(finish_reason="STOP") + raw = SimpleNamespace(candidates=[candidate]) + assert extract_finish_reasons(raw) == ["stop"] + + def test_gemini_candidates_dict(self): + raw = {"candidates": [{"finish_reason": "STOP"}]} + assert extract_finish_reasons(raw) == ["stop"] + + def test_gemini_candidates_max_tokens(self): + candidate = SimpleNamespace(finish_reason="MAX_TOKENS") + raw = SimpleNamespace(candidates=[candidate]) + assert extract_finish_reasons(raw) == ["length"] + + def test_gemini_candidates_safety(self): + candidate = SimpleNamespace(finish_reason="SAFETY") + raw = SimpleNamespace(candidates=[candidate]) + assert extract_finish_reasons(raw) == ["content_filter"] + + def test_gemini_candidates_enum_object(self): + """Gemini finish_reason may be an enum with a .name attribute.""" + enum_val = SimpleNamespace(name="STOP") + candidate = SimpleNamespace(finish_reason=enum_val) + raw = SimpleNamespace(candidates=[candidate]) + assert extract_finish_reasons(raw) == ["stop"] + + def test_gemini_candidates_enum_unspecified(self): + enum_val = SimpleNamespace(name="FINISH_REASON_UNSPECIFIED") + candidate = SimpleNamespace(finish_reason=enum_val) + raw = SimpleNamespace(candidates=[candidate]) + assert extract_finish_reasons(raw) == ["error"] + + def test_gemini_candidates_none_finish_reason(self): + candidate = SimpleNamespace(finish_reason=None) + raw = SimpleNamespace(candidates=[candidate]) + assert extract_finish_reasons(raw) == [] + + def test_gemini_multiple_candidates(self): + raw = {"candidates": [{"finish_reason": "STOP"}, {"finish_reason": "SAFETY"}]} + assert extract_finish_reasons(raw) == ["stop", "content_filter"] + + def test_gemini_empty_candidates(self): + raw = {"candidates": []} + assert extract_finish_reasons(raw) == [] + + # --- Ollama done_reason --- + def test_ollama_done_reason(self): + raw = {"done_reason": "stop"} + assert extract_finish_reasons(raw) == ["stop"] + + def test_ollama_done_reason_object(self): + raw = SimpleNamespace(done_reason="stop") + assert extract_finish_reasons(raw) == ["stop"] + + def test_ollama_done_reason_length(self): + raw = {"done_reason": "length"} + assert extract_finish_reasons(raw) == ["length"] + + def test_ollama_done_reason_none(self): + raw = {"done_reason": None} + assert extract_finish_reasons(raw) == [] + + # --- Priority / precedence --- + def test_choices_takes_precedence_over_candidates(self): + raw = SimpleNamespace( + choices=[SimpleNamespace(finish_reason="stop")], + candidates=[SimpleNamespace(finish_reason="SAFETY")], + ) + assert extract_finish_reasons(raw) == ["stop"] + + def test_candidates_takes_precedence_over_stop_reason(self): + raw = SimpleNamespace( + candidates=[SimpleNamespace(finish_reason="STOP")], + stop_reason="end_turn", + ) + assert extract_finish_reasons(raw) == ["stop"] + + def test_stop_reason_takes_precedence_over_done_reason(self): + raw = {"stop_reason": "end_turn", "done_reason": "length"} + assert extract_finish_reasons(raw) == ["stop"] + + # --- Edge cases --- + def test_none_raw(self): + assert extract_finish_reasons(None) == [] + + def test_no_finish_reason(self): + assert extract_finish_reasons(SimpleNamespace()) == [] + + def test_empty_dict_raw(self): + assert extract_finish_reasons({}) == [] + + +# =========================================================================== +# Span attribute: gen_ai.response.finish_reasons +# =========================================================================== + + +class TestSpanFinishReasonsViaSpanUtils: + """Tests for gen_ai.response.finish_reasons set by set_llm_chat_response_model_attributes.""" + + def _event_with_raw(self, raw): + event = MagicMock() + event.response = MagicMock(raw=raw) + return event + + def test_sets_finish_reasons_array(self): + span = _recording_span() + raw = SimpleNamespace( + model="gpt-4", + choices=[SimpleNamespace(finish_reason="stop")], + usage=SimpleNamespace(prompt_tokens=10, completion_tokens=20, total_tokens=30), + ) + set_llm_chat_response_model_attributes(self._event_with_raw(raw), span) + assert _attr(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS) == ["stop"] + + def test_finish_reasons_tool_calls_mapped_to_singular(self): + span = _recording_span() + raw = SimpleNamespace( + model="gpt-4", + choices=[SimpleNamespace(finish_reason="tool_calls")], + usage=SimpleNamespace(prompt_tokens=5, completion_tokens=5, total_tokens=10), + ) + set_llm_chat_response_model_attributes(self._event_with_raw(raw), span) + assert _attr(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS) == ["tool_call"] + + def test_finish_reasons_omitted_when_not_available(self): + span = _recording_span() + raw = SimpleNamespace(model="gpt-4") + set_llm_chat_response_model_attributes(self._event_with_raw(raw), span) + assert not _has_attr(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS) + + def test_finish_reasons_not_gated_by_should_send_prompts(self): + span = _recording_span() + raw = SimpleNamespace( + model="gpt-4", + choices=[SimpleNamespace(finish_reason="stop")], + usage=SimpleNamespace(prompt_tokens=10, completion_tokens=20, total_tokens=30), + ) + with patch(PATCH_SHOULD_SEND_SPAN, return_value=False): + set_llm_chat_response_model_attributes(self._event_with_raw(raw), span) + assert _attr(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS) == ["stop"] + + def test_no_legacy_finish_reason_attr(self): + span = _recording_span() + raw = SimpleNamespace( + model="gpt-4", + choices=[SimpleNamespace(finish_reason="stop")], + ) + set_llm_chat_response_model_attributes(self._event_with_raw(raw), span) + assert not _has_attr(span, SpanAttributes.LLM_RESPONSE_FINISH_REASON) + + def test_cohere_finish_reason_mapped(self): + span = _recording_span() + raw = SimpleNamespace(model="command-r", finish_reason="COMPLETE") + set_llm_chat_response_model_attributes(self._event_with_raw(raw), span) + assert _attr(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS) == ["stop"] + + def test_gemini_candidates_through_span_utils(self): + span = _recording_span() + raw = SimpleNamespace( + model="gemini-pro", + candidates=[SimpleNamespace(finish_reason="STOP")], + ) + set_llm_chat_response_model_attributes(self._event_with_raw(raw), span) + assert _attr(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS) == ["stop"] + + def test_gemini_safety_through_span_utils(self): + span = _recording_span() + raw = SimpleNamespace( + model="gemini-pro", + candidates=[SimpleNamespace(finish_reason="SAFETY")], + ) + set_llm_chat_response_model_attributes(self._event_with_raw(raw), span) + assert _attr(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS) == ["content_filter"] + + def test_gemini_enum_object_through_span_utils(self): + """Gemini finish_reason may be a protobuf enum object, not a string.""" + span = _recording_span() + enum_val = SimpleNamespace(name="STOP") + raw = SimpleNamespace( + model="gemini-pro", + candidates=[SimpleNamespace(finish_reason=enum_val)], + ) + set_llm_chat_response_model_attributes(self._event_with_raw(raw), span) + assert _attr(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS) == ["stop"] + + def test_ollama_done_reason_through_span_utils(self): + span = _recording_span() + raw = SimpleNamespace(model="llama3", done_reason="stop") + set_llm_chat_response_model_attributes(self._event_with_raw(raw), span) + assert _attr(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS) == ["stop"] + + def test_none_raw_omits_finish_reasons(self): + span = _recording_span() + event = MagicMock() + event.response = MagicMock(raw=None) + set_llm_chat_response_model_attributes(event, span) + assert not _has_attr(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS) + + def test_multiple_choices_mixed_reasons(self): + span = _recording_span() + raw = SimpleNamespace( + model="gpt-4", + choices=[ + SimpleNamespace(finish_reason="stop"), + SimpleNamespace(finish_reason="tool_calls"), + SimpleNamespace(finish_reason="length"), + ], + ) + set_llm_chat_response_model_attributes(self._event_with_raw(raw), span) + assert _attr(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS) == [ + "stop", "tool_call", "length" + ] + + +class TestSpanFinishReasonsViaCustomLLM: + """Tests for gen_ai.response.finish_reasons set by custom_llm_instrumentor._handle_response.""" + + def test_sets_finish_reasons_from_raw(self): + from opentelemetry.semconv_ai import LLMRequestTypeValues + span = _recording_span() + inst = _custom_llm_instance() + resp = SimpleNamespace(text="ok", raw={"choices": [{"finish_reason": "stop"}]}) + _handle_response(span, LLMRequestTypeValues.COMPLETION, inst, resp) + assert _attr(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS) == ["stop"] + + def test_finish_reasons_not_gated(self): + from opentelemetry.semconv_ai import LLMRequestTypeValues + span = _recording_span() + inst = _custom_llm_instance() + resp = SimpleNamespace(text="ok", raw={"choices": [{"finish_reason": "stop"}]}) + with patch(PATCH_SHOULD_SEND_CUSTOM, return_value=False): + _handle_response(span, LLMRequestTypeValues.COMPLETION, inst, resp) + assert _attr(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS) == ["stop"] + + def test_gemini_candidates_through_custom_llm(self): + from opentelemetry.semconv_ai import LLMRequestTypeValues + span = _recording_span() + inst = _custom_llm_instance("Gemini", "gemini-pro") + raw = SimpleNamespace(candidates=[SimpleNamespace(finish_reason="STOP")]) + resp = SimpleNamespace(text="ok", raw=raw) + _handle_response(span, LLMRequestTypeValues.COMPLETION, inst, resp) + assert _attr(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS) == ["stop"] + + def test_ollama_done_reason_through_custom_llm(self): + from opentelemetry.semconv_ai import LLMRequestTypeValues + span = _recording_span() + inst = _custom_llm_instance() + resp = SimpleNamespace(text="ok", raw=SimpleNamespace(done_reason="stop")) + _handle_response(span, LLMRequestTypeValues.COMPLETION, inst, resp) + assert _attr(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS) == ["stop"] + + def test_no_raw_omits_finish_reasons(self): + from opentelemetry.semconv_ai import LLMRequestTypeValues + span = _recording_span() + inst = _custom_llm_instance() + resp = SimpleNamespace(text="ok", raw=None) + _handle_response(span, LLMRequestTypeValues.COMPLETION, inst, resp) + assert not _has_attr(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS) + + +# =========================================================================== +# Output messages: finish_reason in gen_ai.output.messages JSON +# =========================================================================== + + +class TestOutputMessageFinishReason: + """Tests for finish_reason field inside output message JSON.""" + + # --- build_output_message --- + def test_stop(self): + resp = _msg("assistant", "Done.") + result = build_output_message(resp, finish_reason="stop") + assert result["finish_reason"] == "stop" + + def test_tool_calls_mapped_to_singular(self): + resp = _msg("assistant", "") + result = build_output_message(resp, finish_reason="tool_calls") + assert result["finish_reason"] == "tool_call" + + def test_length(self): + resp = _msg("assistant", "truncated...") + result = build_output_message(resp, finish_reason="length") + assert result["finish_reason"] == "length" + + def test_none_defaults_to_empty_string(self): + """Missing finish_reason defaults to '' (required field, non-nullable per schema).""" + resp = _msg("assistant", "ok") + result = build_output_message(resp, finish_reason=None) + assert result["finish_reason"] == "" + + def test_unknown_passes_through(self): + resp = _msg("assistant", "ok") + result = build_output_message(resp, finish_reason="custom_stop") + assert result["finish_reason"] == "custom_stop" + + def test_always_string_type(self): + resp = _msg("assistant", "ok") + result = build_output_message(resp) + assert isinstance(result["finish_reason"], str) + + # --- build_completion_output_message --- + def test_completion_with_finish_reason(self): + result = build_completion_output_message("done", finish_reason="stop") + assert result["finish_reason"] == "stop" + + def test_completion_with_mapped_finish_reason(self): + result = build_completion_output_message("done", finish_reason="COMPLETE") + assert result["finish_reason"] == "stop" + + def test_completion_none_defaults_to_empty_string(self): + result = build_completion_output_message("done", finish_reason=None) + assert result["finish_reason"] == "" + + # --- Through span_utils --- + def test_output_message_includes_finish_reason_via_span_utils(self): + span = _recording_span() + msg = _msg("assistant", "The answer is 42.") + event = MagicMock() + event.response = MagicMock( + message=msg, + raw={"choices": [{"finish_reason": "stop"}]}, + ) + with patch(PATCH_SHOULD_SEND_SPAN, return_value=True): + set_llm_chat_response(event, span) + msgs = json.loads(_attr(span, GenAIAttributes.GEN_AI_OUTPUT_MESSAGES)) + assert msgs[0]["finish_reason"] == "stop" + + def test_output_message_finish_reason_empty_when_no_raw(self): + span = _recording_span() + msg = _msg("assistant", "ok") + event = MagicMock() + event.response = MagicMock(message=msg, raw=None) + with patch(PATCH_SHOULD_SEND_SPAN, return_value=True): + set_llm_chat_response(event, span) + msgs = json.loads(_attr(span, GenAIAttributes.GEN_AI_OUTPUT_MESSAGES)) + assert msgs[0]["finish_reason"] == "" + + def test_output_message_gemini_finish_reason_via_span_utils(self): + span = _recording_span() + msg = _msg("assistant", "ok") + event = MagicMock() + event.response = MagicMock( + message=msg, + raw=SimpleNamespace(candidates=[SimpleNamespace(finish_reason="STOP")]), + ) + with patch(PATCH_SHOULD_SEND_SPAN, return_value=True): + set_llm_chat_response(event, span) + msgs = json.loads(_attr(span, GenAIAttributes.GEN_AI_OUTPUT_MESSAGES)) + assert msgs[0]["finish_reason"] == "stop" + + # --- Through custom_llm_instrumentor --- + def test_output_message_finish_reason_via_custom_llm_chat(self): + from opentelemetry.semconv_ai import LLMRequestTypeValues + span = _recording_span() + inst = _custom_llm_instance() + msg = _msg("assistant", "Reply") + resp = SimpleNamespace( + text="Reply", + message=msg, + raw={"choices": [{"finish_reason": "stop"}]}, + ) + with patch(PATCH_SHOULD_SEND_CUSTOM, return_value=True): + _handle_response(span, LLMRequestTypeValues.CHAT, inst, resp) + msgs = json.loads(_attr(span, GenAIAttributes.GEN_AI_OUTPUT_MESSAGES)) + assert msgs[0]["finish_reason"] == "stop" + + def test_output_message_finish_reason_via_custom_llm_completion(self): + from opentelemetry.semconv_ai import LLMRequestTypeValues + span = _recording_span() + inst = _custom_llm_instance() + resp = SimpleNamespace(text="done", raw={"choices": [{"finish_reason": "length"}]}) + with patch(PATCH_SHOULD_SEND_CUSTOM, return_value=True): + _handle_response(span, LLMRequestTypeValues.COMPLETION, inst, resp) + msgs = json.loads(_attr(span, GenAIAttributes.GEN_AI_OUTPUT_MESSAGES)) + assert msgs[0]["finish_reason"] == "length" + + +# =========================================================================== +# Events: finish_reason in ChoiceEvent emission +# =========================================================================== + + +class TestChoiceEventFinishReason: + def test_default_finish_reason_is_empty_string(self): + event = ChoiceEvent(index=0, message={"content": "ok", "role": "assistant"}) + assert event.finish_reason == "" + + +class TestEmitChatResponseEventsFinishReason: + def test_tool_calls_mapped_to_singular(self): + event = MagicMock() + event.response = MagicMock() + event.response.raw = {"choices": [{"finish_reason": "tool_calls"}]} + event.response.message.content = "hi" + event.response.message.role.value = "assistant" + + with patch("opentelemetry.instrumentation.llamaindex.event_emitter.emit_choice_event") as mock_emit: + with patch("opentelemetry.instrumentation.llamaindex.event_emitter.should_emit_events", return_value=True): + emit_chat_response_events(event) + call_kwargs = mock_emit.call_args[1] + assert call_kwargs["finish_reason"] == "tool_call" + + def test_fallback_empty_string_when_no_raw_reasons(self): + event = MagicMock() + event.response = MagicMock() + event.response.raw = {} + event.response.message.content = "hi" + event.response.message.role.value = "assistant" + + with patch("opentelemetry.instrumentation.llamaindex.event_emitter.emit_choice_event") as mock_emit: + with patch("opentelemetry.instrumentation.llamaindex.event_emitter.should_emit_events", return_value=True): + emit_chat_response_events(event) + call_kwargs = mock_emit.call_args[1] + assert call_kwargs["finish_reason"] == "" + + def test_handles_none_raw(self): + event = MagicMock() + event.response = MagicMock() + event.response.raw = None + event.response.message.content = "hi" + event.response.message.role.value = "assistant" + + with patch("opentelemetry.instrumentation.llamaindex.event_emitter.emit_choice_event") as mock_emit: + with patch("opentelemetry.instrumentation.llamaindex.event_emitter.should_emit_events", return_value=True): + emit_chat_response_events(event) + call_kwargs = mock_emit.call_args[1] + assert call_kwargs["finish_reason"] == "" + + def test_provider_name_passed_to_choice_event(self): + event = MagicMock() + event.response = MagicMock() + event.response.raw = {"choices": [{"finish_reason": "stop"}]} + event.response.message.content = "hi" + event.response.message.role.value = "assistant" + + with patch("opentelemetry.instrumentation.llamaindex.event_emitter.emit_choice_event") as mock_emit: + with patch("opentelemetry.instrumentation.llamaindex.event_emitter.should_emit_events", return_value=True): + emit_chat_response_events(event, provider_name="anthropic") + call_kwargs = mock_emit.call_args[1] + assert call_kwargs["provider_name"] == "anthropic" + + def test_cohere_finish_reason_mapped(self): + event = MagicMock() + event.response = MagicMock() + event.response.raw = SimpleNamespace(finish_reason="COMPLETE") + event.response.message.content = "hi" + event.response.message.role.value = "assistant" + + with patch("opentelemetry.instrumentation.llamaindex.event_emitter.emit_choice_event") as mock_emit: + with patch("opentelemetry.instrumentation.llamaindex.event_emitter.should_emit_events", return_value=True): + emit_chat_response_events(event) + call_kwargs = mock_emit.call_args[1] + assert call_kwargs["finish_reason"] == "stop" + + def test_anthropic_finish_reason_mapped(self): + event = MagicMock() + event.response = MagicMock() + event.response.raw = SimpleNamespace(stop_reason="end_turn") + event.response.message.content = "hi" + event.response.message.role.value = "assistant" + + with patch("opentelemetry.instrumentation.llamaindex.event_emitter.emit_choice_event") as mock_emit: + with patch("opentelemetry.instrumentation.llamaindex.event_emitter.should_emit_events", return_value=True): + emit_chat_response_events(event) + call_kwargs = mock_emit.call_args[1] + assert call_kwargs["finish_reason"] == "stop" + + def test_gemini_finish_reason_mapped(self): + event = MagicMock() + event.response = MagicMock() + event.response.raw = SimpleNamespace(candidates=[SimpleNamespace(finish_reason="STOP")]) + event.response.message.content = "hi" + event.response.message.role.value = "assistant" + + with patch("opentelemetry.instrumentation.llamaindex.event_emitter.emit_choice_event") as mock_emit: + with patch("opentelemetry.instrumentation.llamaindex.event_emitter.should_emit_events", return_value=True): + emit_chat_response_events(event) + call_kwargs = mock_emit.call_args[1] + assert call_kwargs["finish_reason"] == "stop" + + +# =========================================================================== +# Cross-cutting invariants +# =========================================================================== + + +class TestFinishReasonInvariants: + """Invariants that must hold across all code paths.""" + + def test_output_message_finish_reason_key_always_present(self): + """finish_reason is required per OutputMessage JSON schema — key must always exist.""" + resp = _msg("assistant", "ok") + for fr in [None, "", "stop", "length", "tool_calls", "custom"]: + result = build_output_message(resp, finish_reason=fr) + assert "finish_reason" in result, f"finish_reason key missing for input {fr!r}" + + def test_output_message_finish_reason_never_none(self): + """finish_reason must always be a string, never None (schema is non-nullable).""" + resp = _msg("assistant", "ok") + for fr in [None, "", "stop"]: + result = build_output_message(resp, finish_reason=fr) + assert result["finish_reason"] is not None, f"finish_reason is None for input {fr!r}" + assert isinstance(result["finish_reason"], str), f"finish_reason not str for input {fr!r}" + + def test_completion_output_message_finish_reason_never_none(self): + for fr in [None, "", "stop"]: + result = build_completion_output_message("text", finish_reason=fr) + assert result["finish_reason"] is not None + assert isinstance(result["finish_reason"], str) + + def test_span_attr_never_set_as_empty_array(self): + """gen_ai.response.finish_reasons must be omitted, not set as [].""" + span = _recording_span() + # Raw with no finish_reason at all + raw = SimpleNamespace(model="gpt-4") + event = MagicMock() + event.response = MagicMock(raw=raw) + set_llm_chat_response_model_attributes(event, span) + + # Attribute should not be set at all — never as [] + fr = _attr(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS) + assert fr is None, "finish_reasons must be omitted when unavailable, not set as empty" + + def test_span_attr_never_set_as_empty_array_custom_llm(self): + from opentelemetry.semconv_ai import LLMRequestTypeValues + span = _recording_span() + inst = _custom_llm_instance() + resp = SimpleNamespace(text="ok", raw=SimpleNamespace(model="llama3")) + _handle_response(span, LLMRequestTypeValues.COMPLETION, inst, resp) + fr = _attr(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS) + assert fr is None, "finish_reasons must be omitted when unavailable, not set as empty" + + def test_all_otel_enum_values_pass_through_mapper(self): + """The five canonical OTel values should pass through unchanged.""" + for val in VALID_OTEL_FINISH_REASONS: + assert map_finish_reason(val) == val, f"OTel value '{val}' was altered by mapper" + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/packages/opentelemetry-instrumentation-llamaindex/tests/test_message_utils.py b/packages/opentelemetry-instrumentation-llamaindex/tests/test_message_utils.py new file mode 100644 index 0000000000..d19fbe9c5c --- /dev/null +++ b/packages/opentelemetry-instrumentation-llamaindex/tests/test_message_utils.py @@ -0,0 +1,305 @@ +"""Unit tests for _message_utils — pure message-building functions.""" + +from types import SimpleNamespace + +import pytest + +from opentelemetry.instrumentation.llamaindex._message_utils import ( + _content_to_parts, + _extract_tool_calls, + _parse_arguments, + build_completion_output_message, + build_input_messages, + build_output_message, +) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _msg(role, content, **additional_kwargs): + """Create a fake ChatMessage-like object.""" + m = SimpleNamespace(role=SimpleNamespace(value=role), content=content, additional_kwargs=additional_kwargs) + return m + + +# =========================================================================== +# _parse_arguments +# =========================================================================== + +class TestParseArguments: + def test_none(self): + assert _parse_arguments(None) is None + + def test_dict_passthrough(self): + d = {"key": "val"} + assert _parse_arguments(d) is d + + def test_json_string(self): + assert _parse_arguments('{"a": 1}') == {"a": 1} + + def test_invalid_json_string_passthrough(self): + assert _parse_arguments("not json") == "not json" + + def test_other_type_passthrough(self): + assert _parse_arguments(42) == 42 + + +# =========================================================================== +# _content_to_parts +# =========================================================================== + +class TestContentToParts: + def test_none_returns_empty(self): + assert _content_to_parts(None) == [] + + def test_empty_string_returns_empty(self): + assert _content_to_parts("") == [] + + def test_string_returns_text_part(self): + assert _content_to_parts("hello") == [{"type": "text", "content": "hello"}] + + def test_list_of_strings(self): + parts = _content_to_parts(["a", "b"]) + assert parts == [ + {"type": "text", "content": "a"}, + {"type": "text", "content": "b"}, + ] + + def test_list_with_text_block(self): + parts = _content_to_parts([{"type": "text", "text": "hi"}]) + assert parts == [{"type": "text", "content": "hi"}] + + def test_image_url_block(self): + block = {"type": "image_url", "image_url": {"url": "https://img.png"}} + parts = _content_to_parts([block]) + assert parts == [{"type": "uri", "modality": "image", "uri": "https://img.png"}] + + def test_image_base64_block(self): + block = { + "type": "image", + "source": {"type": "base64", "media_type": "image/png", "data": "abc123"}, + } + parts = _content_to_parts([block]) + assert parts == [ + {"type": "blob", "modality": "image", "mime_type": "image/png", "content": "abc123"} + ] + + def test_blob_part_uses_content_key_not_data(self): + """OTel BlobPart schema requires 'content' for base64 data, not 'data'.""" + block = { + "type": "image", + "source": {"type": "base64", "media_type": "image/jpeg", "data": "base64data"}, + } + parts = _content_to_parts([block]) + assert "content" in parts[0], "BlobPart must use 'content' key per OTel spec" + assert "data" not in parts[0], "BlobPart must NOT use 'data' key" + assert parts[0]["content"] == "base64data" + + def test_image_url_source_block(self): + block = {"type": "image", "source": {"type": "url", "url": "https://img.png"}} + parts = _content_to_parts([block]) + assert parts == [{"type": "uri", "modality": "image", "uri": "https://img.png"}] + + def test_mixed_text_and_image(self): + blocks = [ + {"type": "text", "text": "Look at this:"}, + {"type": "image_url", "image_url": {"url": "https://img.png"}}, + ] + parts = _content_to_parts(blocks) + assert len(parts) == 2 + assert parts[0] == {"type": "text", "content": "Look at this:"} + assert parts[1]["type"] == "uri" + + def test_thinking_block_maps_to_reasoning_part(self): + """Anthropic-style thinking blocks must emit ReasoningPart, not TextPart.""" + block = {"type": "thinking", "thinking": "Let me think step by step..."} + parts = _content_to_parts([block]) + assert parts == [{"type": "reasoning", "content": "Let me think step by step..."}] + + def test_reasoning_block_maps_to_reasoning_part(self): + """Generic reasoning blocks must emit ReasoningPart.""" + block = {"type": "reasoning", "content": "Step 1: analyze the problem"} + parts = _content_to_parts([block]) + assert parts == [{"type": "reasoning", "content": "Step 1: analyze the problem"}] + + def test_thinking_block_with_content_key(self): + """Thinking block using 'content' key instead of 'thinking' key.""" + block = {"type": "thinking", "content": "Deep thought..."} + parts = _content_to_parts([block]) + assert parts == [{"type": "reasoning", "content": "Deep thought..."}] + + def test_thinking_block_with_text_key(self): + """Thinking block using 'text' key as fallback.""" + block = {"type": "thinking", "text": "Reasoning text"} + parts = _content_to_parts([block]) + assert parts == [{"type": "reasoning", "content": "Reasoning text"}] + + def test_otel_shaped_text_block_preserves_content(self): + """Block already in OTel form {"type": "text", "content": "…"} must not drop text.""" + parts = _content_to_parts([{"type": "text", "content": "hello"}]) + assert parts == [{"type": "text", "content": "hello"}] + + def test_fallback_dict_with_text_key(self): + parts = _content_to_parts([{"type": "custom", "text": "fallback"}]) + assert parts == [{"type": "text", "content": "fallback"}] + + def test_fallback_dict_with_content_key(self): + parts = _content_to_parts([{"type": "custom", "content": 42}]) + assert parts == [{"type": "text", "content": "42"}] + + def test_non_str_non_list_stringified(self): + parts = _content_to_parts(12345) + assert parts == [{"type": "text", "content": "12345"}] + + +# =========================================================================== +# _extract_tool_calls +# =========================================================================== + +class TestExtractToolCalls: + def test_no_tool_calls(self): + msg = _msg("assistant", "hi") + assert _extract_tool_calls(msg) == [] + + def test_single_tool_call(self): + tc = {"id": "tc1", "function": {"name": "get_weather", "arguments": '{"city": "NYC"}'}} + msg = _msg("assistant", "hi", tool_calls=[tc]) + parts = _extract_tool_calls(msg) + assert len(parts) == 1 + assert parts[0]["type"] == "tool_call" + assert parts[0]["id"] == "tc1" + assert parts[0]["name"] == "get_weather" + assert parts[0]["arguments"] == {"city": "NYC"} + + def test_skips_non_dict_tool_calls(self): + msg = _msg("assistant", "hi", tool_calls=["not_a_dict"]) + assert _extract_tool_calls(msg) == [] + + +# =========================================================================== +# build_input_messages +# =========================================================================== + +class TestBuildInputMessages: + def test_single_user_message(self): + msgs = [_msg("user", "Hello")] + result = build_input_messages(msgs) + assert result == [{"role": "user", "parts": [{"type": "text", "content": "Hello"}]}] + + def test_multiple_messages_with_roles(self): + msgs = [ + _msg("system", "You are helpful"), + _msg("user", "Hi"), + _msg("assistant", "Hello!"), + ] + result = build_input_messages(msgs) + assert len(result) == 3 + assert result[0]["role"] == "system" + assert result[1]["role"] == "user" + assert result[2]["role"] == "assistant" + + def test_system_message_inline(self): + msgs = [_msg("system", "Be concise")] + result = build_input_messages(msgs) + assert result[0]["parts"][0]["content"] == "Be concise" + + def test_message_with_none_content(self): + msgs = [_msg("assistant", None)] + result = build_input_messages(msgs) + assert result == [{"role": "assistant", "parts": []}] + + def test_message_with_empty_content(self): + msgs = [_msg("user", "")] + result = build_input_messages(msgs) + assert result == [{"role": "user", "parts": []}] + + def test_message_order_preserved(self): + msgs = [_msg("user", f"msg{i}") for i in range(5)] + result = build_input_messages(msgs) + for i, m in enumerate(result): + assert m["parts"][0]["content"] == f"msg{i}" + + def test_assistant_message_with_tool_calls(self): + tc = {"id": "tc1", "function": {"name": "search", "arguments": '{"q": "test"}'}} + msg = _msg("assistant", "Let me search", tool_calls=[tc]) + result = build_input_messages([msg]) + parts = result[0]["parts"] + assert len(parts) == 2 + assert parts[0]["type"] == "text" + assert parts[1]["type"] == "tool_call" + + def test_tool_role_message(self): + msg = _msg("tool", "result data", tool_call_id="tc1") + result = build_input_messages([msg]) + parts = result[0]["parts"] + assert len(parts) == 1 + assert parts[0]["type"] == "tool_call_response" + assert parts[0]["id"] == "tc1" + assert parts[0]["response"] == "result data" + + def test_tool_role_without_call_id_keeps_text(self): + msg = _msg("tool", "result data") + result = build_input_messages([msg]) + assert result[0]["parts"][0]["type"] == "text" + + def test_empty_messages_list(self): + assert build_input_messages([]) == [] + + def test_none_messages(self): + assert build_input_messages(None) == [] + + def test_multimodal_content_list(self): + msg = _msg("user", [{"type": "text", "text": "Describe this"}, {"type": "image_url", "image_url": {"url": "https://img.png"}}]) + result = build_input_messages([msg]) + parts = result[0]["parts"] + assert len(parts) == 2 + assert parts[0]["type"] == "text" + assert parts[1]["type"] == "uri" + + +# =========================================================================== +# build_output_message +# =========================================================================== + +class TestBuildOutputMessage: + def test_single_assistant_response(self): + resp = _msg("assistant", "The answer is 42.") + result = build_output_message(resp) + assert result["role"] == "assistant" + assert result["parts"] == [{"type": "text", "content": "The answer is 42."}] + assert result["finish_reason"] == "" + + def test_response_with_none_content(self): + resp = _msg("assistant", None) + result = build_output_message(resp) + assert result["parts"] == [] + + def test_response_with_tool_call_parts(self): + tc = {"id": "tc1", "function": {"name": "calc", "arguments": '{"x": 1}'}} + resp = _msg("assistant", "Calling tool", tool_calls=[tc]) + result = build_output_message(resp, finish_reason="tool_calls") + assert any(p["type"] == "tool_call" for p in result["parts"]) + + +# =========================================================================== +# build_completion_output_message +# =========================================================================== + +class TestBuildCompletionOutputMessage: + def test_basic(self): + result = build_completion_output_message("Hello world") + assert result == { + "role": "assistant", + "parts": [{"type": "text", "content": "Hello world"}], + "finish_reason": "", + } + + def test_empty_text(self): + result = build_completion_output_message("") + assert result["parts"] == [] + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/packages/opentelemetry-instrumentation-llamaindex/tests/test_none_content_fix.py b/packages/opentelemetry-instrumentation-llamaindex/tests/test_none_content_fix.py index 410bdf1695..401caea462 100644 --- a/packages/opentelemetry-instrumentation-llamaindex/tests/test_none_content_fix.py +++ b/packages/opentelemetry-instrumentation-llamaindex/tests/test_none_content_fix.py @@ -1,5 +1,7 @@ """Test for None content handling in span_utils - Issue #3513""" +import json + import pytest from unittest.mock import MagicMock, patch from llama_index.core.base.llms.types import MessageRole @@ -43,8 +45,7 @@ class TestNoneContentHandling: def test_set_llm_chat_response_with_none_content(self): """ - Test that set_llm_chat_response doesn't set gen_ai.completion.0.content - when response.message.content is None (StructuredLLM case). + Test that output message has empty parts when content is None (StructuredLLM case). """ mock_span = MagicMock() mock_span.is_recording.return_value = True @@ -52,9 +53,11 @@ def test_set_llm_chat_response_with_none_content(self): mock_message = MagicMock() mock_message.role = MessageRole.ASSISTANT mock_message.content = None + mock_message.additional_kwargs = {} mock_response = MagicMock() mock_response.message = mock_message + mock_response.raw = {} mock_event = MagicMock() mock_event.response = mock_response @@ -63,21 +66,18 @@ def test_set_llm_chat_response_with_none_content(self): with patch('opentelemetry.instrumentation.llamaindex.span_utils.should_send_prompts', return_value=True): set_llm_chat_response(mock_event, mock_span) - # Verify role was set - mock_span.set_attribute.assert_any_call( - f"{GenAIAttributes.GEN_AI_COMPLETION}.0.role", - MessageRole.ASSISTANT.value - ) - - # Verify content was NOT set (no call with the content attribute key) - content_key = f"{GenAIAttributes.GEN_AI_COMPLETION}.0.content" - assert not any( - c.args[0] == content_key for c in mock_span.set_attribute.call_args_list - ), "Content attribute should NOT be set when value is None" + # Find the gen_ai.output.messages call and verify empty parts for None content + raw = None + for call in mock_span.set_attribute.call_args_list: + if call.args[0] == GenAIAttributes.GEN_AI_OUTPUT_MESSAGES: + raw = call.args[1] + assert raw is not None + msgs = json.loads(raw) + assert msgs[0]["parts"] == [], "Parts should be empty when content is None" def test_set_llm_chat_response_with_valid_content(self): """ - Test that set_llm_chat_response correctly sets content when it's not None. + Test that set_llm_chat_response correctly sets content as JSON when it's not None. """ mock_span = MagicMock() mock_span.is_recording.return_value = True @@ -85,9 +85,11 @@ def test_set_llm_chat_response_with_valid_content(self): mock_message = MagicMock() mock_message.role = MessageRole.ASSISTANT mock_message.content = "This is a valid response" + mock_message.additional_kwargs = {} mock_response = MagicMock() mock_response.message = mock_message + mock_response.raw = {} mock_event = MagicMock() mock_event.response = mock_response @@ -96,16 +98,18 @@ def test_set_llm_chat_response_with_valid_content(self): with patch('opentelemetry.instrumentation.llamaindex.span_utils.should_send_prompts', return_value=True): set_llm_chat_response(mock_event, mock_span) - # Verify content was set - mock_span.set_attribute.assert_any_call( - f"{GenAIAttributes.GEN_AI_COMPLETION}.0.content", - "This is a valid response" - ) + # Verify content was set in JSON output messages + raw = None + for call in mock_span.set_attribute.call_args_list: + if call.args[0] == GenAIAttributes.GEN_AI_OUTPUT_MESSAGES: + raw = call.args[1] + assert raw is not None + msgs = json.loads(raw) + assert msgs[0]["parts"][0]["content"] == "This is a valid response" def test_set_llm_predict_response_with_none_output(self): """ - Test that set_llm_predict_response doesn't set gen_ai.completion.content - when event.output is None. + Test that predict response handles None output gracefully (empty text). """ mock_span = MagicMock() @@ -115,21 +119,20 @@ def test_set_llm_predict_response_with_none_output(self): with patch('opentelemetry.instrumentation.llamaindex.span_utils.should_send_prompts', return_value=True): set_llm_predict_response(mock_event, mock_span) - # Verify role was set - mock_span.set_attribute.assert_any_call( - f"{GenAIAttributes.GEN_AI_COMPLETION}.role", - MessageRole.ASSISTANT.value - ) - - # Verify content was NOT set - content_key = f"{GenAIAttributes.GEN_AI_COMPLETION}.content" - assert not any( - c.args[0] == content_key for c in mock_span.set_attribute.call_args_list - ), "Content attribute should NOT be set when value is None" + raw = None + for call in mock_span.set_attribute.call_args_list: + if call.args[0] == GenAIAttributes.GEN_AI_OUTPUT_MESSAGES: + raw = call.args[1] + assert raw is not None + msgs = json.loads(raw) + assert msgs[0]["role"] == "assistant" + # None output → empty string → empty parts (build_completion_output_message) + # Actually: event.output or "" → "", which gives empty parts + assert msgs[0]["parts"] == [] def test_set_llm_predict_response_with_valid_output(self): """ - Test that set_llm_predict_response correctly sets content when output is not None. + Test that set_llm_predict_response correctly sets content as JSON. """ mock_span = MagicMock() @@ -139,11 +142,13 @@ def test_set_llm_predict_response_with_valid_output(self): with patch('opentelemetry.instrumentation.llamaindex.span_utils.should_send_prompts', return_value=True): set_llm_predict_response(mock_event, mock_span) - # Verify content was set - mock_span.set_attribute.assert_any_call( - f"{GenAIAttributes.GEN_AI_COMPLETION}.content", - "Valid output text" - ) + raw = None + for call in mock_span.set_attribute.call_args_list: + if call.args[0] == GenAIAttributes.GEN_AI_OUTPUT_MESSAGES: + raw = call.args[1] + assert raw is not None + msgs = json.loads(raw) + assert msgs[0]["parts"][0]["content"] == "Valid output text" if __name__ == "__main__": diff --git a/packages/opentelemetry-instrumentation-llamaindex/tests/test_response_utils.py b/packages/opentelemetry-instrumentation-llamaindex/tests/test_response_utils.py new file mode 100644 index 0000000000..b0e113c3d7 --- /dev/null +++ b/packages/opentelemetry-instrumentation-llamaindex/tests/test_response_utils.py @@ -0,0 +1,173 @@ +"""Unit tests for _response_utils — response extraction utilities.""" + +from types import SimpleNamespace + +import pytest + +from opentelemetry.instrumentation.llamaindex._response_utils import ( + TokenUsage, + detect_provider_name, + extract_model_from_raw, + extract_response_id, + extract_token_usage, +) + + +# =========================================================================== +# detect_provider_name +# =========================================================================== + +class TestDetectProviderName: + def test_openai_class(self): + inst = type("OpenAI", (), {})() + assert detect_provider_name(inst) == "openai" + + def test_cohere_class(self): + inst = type("Cohere", (), {})() + assert detect_provider_name(inst) == "cohere" + + def test_anthropic_class(self): + inst = type("Anthropic", (), {})() + assert detect_provider_name(inst) == "anthropic" + + def test_groq_class(self): + inst = type("Groq", (), {})() + assert detect_provider_name(inst) == "groq" + + def test_mistralai_class(self): + inst = type("MistralAI", (), {})() + assert detect_provider_name(inst) == "mistral_ai" + + def test_bedrock_class(self): + inst = type("Bedrock", (), {})() + assert detect_provider_name(inst) == "aws.bedrock" + + def test_gemini_class(self): + inst = type("Gemini", (), {})() + assert detect_provider_name(inst) == "gcp.gemini" + + def test_ollama_class(self): + inst = type("Ollama", (), {})() + assert detect_provider_name(inst) == "ollama" + + def test_custom_llm_class(self): + inst = type("MyCustomLLM", (), {})() + assert detect_provider_name(inst) == "mycustomllm" + + def test_none_instance(self): + assert detect_provider_name(None) is None + + def test_from_string_class_name(self): + assert detect_provider_name("OpenAI") == "openai" + + def test_from_string_unknown(self): + assert detect_provider_name("SomeProvider") == "someprovider" + + def test_azure_openai(self): + assert detect_provider_name("AzureOpenAI") == "azure.ai.openai" + + def test_deepseek(self): + assert detect_provider_name("DeepSeek") == "deepseek" + + +# =========================================================================== +# extract_model_from_raw +# =========================================================================== + +class TestExtractModelFromRaw: + def test_object_with_model_attr(self): + raw = SimpleNamespace(model="gpt-4") + assert extract_model_from_raw(raw) == "gpt-4" + + def test_dict_with_model_key(self): + assert extract_model_from_raw({"model": "gpt-4"}) == "gpt-4" + + def test_no_model_returns_none(self): + assert extract_model_from_raw(SimpleNamespace()) is None + + def test_none_in_dict(self): + assert extract_model_from_raw({"model": None}) is None + + +# =========================================================================== +# extract_response_id +# =========================================================================== + +class TestExtractResponseId: + def test_object_with_id_attr(self): + raw = SimpleNamespace(id="chatcmpl-abc123") + assert extract_response_id(raw) == "chatcmpl-abc123" + + def test_dict_with_id_key(self): + assert extract_response_id({"id": "resp-1"}) == "resp-1" + + def test_no_id_returns_none(self): + assert extract_response_id({}) is None + + +# =========================================================================== +# extract_token_usage +# =========================================================================== + +class TestExtractTokenUsage: + def test_openai_format_object(self): + raw = SimpleNamespace( + usage=SimpleNamespace(prompt_tokens=10, completion_tokens=20, total_tokens=30) + ) + result = extract_token_usage(raw) + assert result == TokenUsage(input_tokens=10, output_tokens=20, total_tokens=30) + + def test_openai_format_dict(self): + raw = { + "usage": {"prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30} + } + result = extract_token_usage(raw) + assert result == TokenUsage(input_tokens=10, output_tokens=20, total_tokens=30) + + def test_cohere_meta_tokens_format(self): + raw = SimpleNamespace( + meta=SimpleNamespace(tokens=SimpleNamespace(input_tokens=5, output_tokens=15)) + ) + result = extract_token_usage(raw) + assert result.input_tokens == 5 + assert result.output_tokens == 15 + assert result.total_tokens == 20 + + def test_cohere_meta_tokens_dict(self): + raw = {"meta": {"tokens": {"input_tokens": 5, "output_tokens": 15}}} + result = extract_token_usage(raw) + assert result.input_tokens == 5 + assert result.output_tokens == 15 + + def test_cohere_meta_billed_units_format(self): + raw = SimpleNamespace( + meta=SimpleNamespace( + tokens=None, + billed_units=SimpleNamespace(input_tokens=3, output_tokens=7), + ) + ) + result = extract_token_usage(raw) + assert result.input_tokens == 3 + assert result.output_tokens == 7 + assert result.total_tokens == 10 + + def test_cohere_meta_billed_units_dict(self): + raw = {"meta": {"billed_units": {"input_tokens": 3, "output_tokens": 7}}} + result = extract_token_usage(raw) + assert result.input_tokens == 3 + assert result.output_tokens == 7 + + def test_no_usage_returns_empty(self): + result = extract_token_usage(SimpleNamespace()) + assert result == TokenUsage() + + def test_partial_usage(self): + raw = {"usage": {"prompt_tokens": 10}} + result = extract_token_usage(raw) + assert result.input_tokens == 10 + assert result.output_tokens is None + + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/packages/opentelemetry-instrumentation-llamaindex/tests/test_semconv_migration.py b/packages/opentelemetry-instrumentation-llamaindex/tests/test_semconv_migration.py new file mode 100644 index 0000000000..1d7bcc3bc8 --- /dev/null +++ b/packages/opentelemetry-instrumentation-llamaindex/tests/test_semconv_migration.py @@ -0,0 +1,463 @@ +"""Span-level semconv migration tests — verifies span_utils functions set correct attributes.""" + +import json +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +import pytest +from llama_index.core.base.llms.types import MessageRole + +from opentelemetry.semconv._incubating.attributes import ( + gen_ai_attributes as GenAIAttributes, +) +from opentelemetry.semconv_ai import SpanAttributes + +from opentelemetry.instrumentation.llamaindex.span_utils import ( + set_embedding, + set_llm_chat_request, + set_llm_chat_request_model_attributes, + set_llm_chat_response, + set_llm_chat_response_model_attributes, + set_llm_predict_response, + set_rerank, + set_rerank_model_attributes, +) + +PATCH_SHOULD_SEND = "opentelemetry.instrumentation.llamaindex.span_utils.should_send_prompts" + + +def _recording_span(): + span = MagicMock() + span.is_recording.return_value = True + return span + + +def _chat_message(role, content, **additional_kwargs): + m = MagicMock() + m.role = MessageRole(role) + m.content = content + m.additional_kwargs = additional_kwargs + return m + + +def _attr(span, name): + """Get the value set_attribute was called with for a given attribute name.""" + for call in span.set_attribute.call_args_list: + if call.args[0] == name: + return call.args[1] + return None + + +def _has_attr(span, name): + return any(c.args[0] == name for c in span.set_attribute.call_args_list) + + +# =========================================================================== +# set_llm_chat_request — input messages as JSON +# =========================================================================== + +class TestSetLlmChatRequest: + def test_sets_gen_ai_input_messages_json(self): + span = _recording_span() + event = MagicMock() + event.messages = [_chat_message("user", "Hello")] + with patch(PATCH_SHOULD_SEND, return_value=True): + set_llm_chat_request(event, span) + raw = _attr(span, GenAIAttributes.GEN_AI_INPUT_MESSAGES) + assert raw is not None + msgs = json.loads(raw) + assert len(msgs) == 1 + assert msgs[0]["role"] == "user" + assert msgs[0]["parts"][0]["content"] == "Hello" + + def test_gated_by_should_send_prompts(self): + span = _recording_span() + event = MagicMock() + event.messages = [_chat_message("user", "Hello")] + with patch(PATCH_SHOULD_SEND, return_value=False): + set_llm_chat_request(event, span) + assert not _has_attr(span, GenAIAttributes.GEN_AI_INPUT_MESSAGES) + + def test_skips_when_not_recording(self): + span = MagicMock() + span.is_recording.return_value = False + event = MagicMock() + event.messages = [_chat_message("user", "Hello")] + with patch(PATCH_SHOULD_SEND, return_value=True): + set_llm_chat_request(event, span) + span.set_attribute.assert_not_called() + + def test_multiple_messages_preserved(self): + span = _recording_span() + event = MagicMock() + event.messages = [ + _chat_message("system", "Be helpful"), + _chat_message("user", "Hi"), + ] + with patch(PATCH_SHOULD_SEND, return_value=True): + set_llm_chat_request(event, span) + msgs = json.loads(_attr(span, GenAIAttributes.GEN_AI_INPUT_MESSAGES)) + assert len(msgs) == 2 + assert msgs[0]["role"] == "system" + assert msgs[1]["role"] == "user" + + def test_no_legacy_indexed_attributes(self): + span = _recording_span() + event = MagicMock() + event.messages = [_chat_message("user", "Hello")] + with patch(PATCH_SHOULD_SEND, return_value=True): + set_llm_chat_request(event, span) + for call in span.set_attribute.call_args_list: + assert not call.args[0].startswith(f"{GenAIAttributes.GEN_AI_PROMPT}.") + + +# =========================================================================== +# set_llm_chat_request_model_attributes — operation.name + provider.name +# =========================================================================== + +class TestSetLlmChatRequestModelAttributes: + def test_sets_operation_name_chat(self): + span = _recording_span() + event = MagicMock() + event.model_dict = {"model": "gpt-4", "temperature": 0.7} + set_llm_chat_request_model_attributes(event, span) + assert _attr(span, GenAIAttributes.GEN_AI_OPERATION_NAME) == "chat" + + def test_sets_model(self): + span = _recording_span() + event = MagicMock() + event.model_dict = {"model": "gpt-4"} + set_llm_chat_request_model_attributes(event, span) + assert _attr(span, GenAIAttributes.GEN_AI_REQUEST_MODEL) == "gpt-4" + + def test_sets_temperature(self): + span = _recording_span() + event = MagicMock() + event.model_dict = {"model": "gpt-4", "temperature": 0.5} + set_llm_chat_request_model_attributes(event, span) + assert _attr(span, GenAIAttributes.GEN_AI_REQUEST_TEMPERATURE) == 0.5 + + def test_structured_llm_nested_model(self): + span = _recording_span() + event = MagicMock() + event.model_dict = {"llm": {"model": "gpt-4", "temperature": 0.3}} + set_llm_chat_request_model_attributes(event, span) + assert _attr(span, GenAIAttributes.GEN_AI_REQUEST_MODEL) == "gpt-4" + + def test_sets_provider_name_from_class_name(self): + span = _recording_span() + event = MagicMock() + event.model_dict = {"model": "gpt-4", "class_name": "OpenAI"} + set_llm_chat_request_model_attributes(event, span) + assert _attr(span, GenAIAttributes.GEN_AI_PROVIDER_NAME) == "openai" + + def test_no_legacy_llm_request_type(self): + span = _recording_span() + event = MagicMock() + event.model_dict = {"model": "gpt-4"} + set_llm_chat_request_model_attributes(event, span) + assert not _has_attr(span, SpanAttributes.LLM_REQUEST_TYPE) + + +# =========================================================================== +# set_llm_chat_response — output messages as JSON +# =========================================================================== + +class TestSetLlmChatResponse: + def test_sets_gen_ai_output_messages_json(self): + span = _recording_span() + msg = _chat_message("assistant", "The answer is 42.") + event = MagicMock() + event.response = MagicMock(message=msg, raw={"choices": [{"finish_reason": "stop"}]}) + event.messages = [_chat_message("user", "What is 42?")] + with patch(PATCH_SHOULD_SEND, return_value=True): + set_llm_chat_response(event, span) + raw_out = _attr(span, GenAIAttributes.GEN_AI_OUTPUT_MESSAGES) + assert raw_out is not None + msgs = json.loads(raw_out) + assert msgs[0]["role"] == "assistant" + assert msgs[0]["parts"][0]["content"] == "The answer is 42." + assert "finish_reason" in msgs[0] + + def test_does_not_set_input_messages(self): + """Input messages are set by set_llm_chat_request, not set_llm_chat_response.""" + span = _recording_span() + msg = _chat_message("assistant", "Reply") + event = MagicMock() + event.response = MagicMock(message=msg, raw={}) + event.messages = [_chat_message("user", "Hello")] + with patch(PATCH_SHOULD_SEND, return_value=True): + set_llm_chat_response(event, span) + assert not _has_attr(span, GenAIAttributes.GEN_AI_INPUT_MESSAGES) + + def test_gated_by_should_send_prompts(self): + span = _recording_span() + msg = _chat_message("assistant", "Reply") + event = MagicMock() + event.response = MagicMock(message=msg, raw={}) + event.messages = [_chat_message("user", "Hello")] + with patch(PATCH_SHOULD_SEND, return_value=False): + set_llm_chat_response(event, span) + assert not _has_attr(span, GenAIAttributes.GEN_AI_OUTPUT_MESSAGES) + + def test_none_content_in_output(self): + span = _recording_span() + msg = _chat_message("assistant", None) + event = MagicMock() + event.response = MagicMock(message=msg, raw={}) + event.messages = [] + with patch(PATCH_SHOULD_SEND, return_value=True): + set_llm_chat_response(event, span) + msgs = json.loads(_attr(span, GenAIAttributes.GEN_AI_OUTPUT_MESSAGES)) + assert msgs[0]["parts"] == [] + + def test_no_legacy_indexed_attributes(self): + span = _recording_span() + msg = _chat_message("assistant", "Reply") + event = MagicMock() + event.response = MagicMock(message=msg, raw={}) + event.messages = [_chat_message("user", "Hi")] + with patch(PATCH_SHOULD_SEND, return_value=True): + set_llm_chat_response(event, span) + for call in span.set_attribute.call_args_list: + key = call.args[0] + assert not key.startswith(f"{GenAIAttributes.GEN_AI_PROMPT}.") + assert not key.startswith(f"{GenAIAttributes.GEN_AI_COMPLETION}.") + + def test_sets_finish_reasons_span_attr_independently(self): + """set_llm_chat_response must set gen_ai.response.finish_reasons on its own, + without relying on set_llm_chat_response_model_attributes.""" + span = _recording_span() + msg = _chat_message("assistant", "Done.") + event = MagicMock() + event.response = MagicMock( + message=msg, + raw={"choices": [{"finish_reason": "stop"}]}, + ) + with patch(PATCH_SHOULD_SEND, return_value=True): + set_llm_chat_response(event, span) + assert _attr(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS) == ["stop"] + + def test_finish_reasons_span_attr_not_gated_by_should_send_prompts(self): + """gen_ai.response.finish_reasons is metadata, not content — never gated.""" + span = _recording_span() + msg = _chat_message("assistant", "Done.") + event = MagicMock() + event.response = MagicMock( + message=msg, + raw={"choices": [{"finish_reason": "stop"}]}, + ) + with patch(PATCH_SHOULD_SEND, return_value=False): + set_llm_chat_response(event, span) + assert _attr(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS) == ["stop"] + + +# =========================================================================== +# set_llm_chat_response_model_attributes — finish_reasons, tokens, model +# =========================================================================== + +class TestSetLlmChatResponseModelAttributes: + def _event_with_raw(self, raw): + event = MagicMock() + event.response = MagicMock(raw=raw) + return event + + def test_sets_model(self): + span = _recording_span() + raw = SimpleNamespace(model="gpt-4o") + set_llm_chat_response_model_attributes(self._event_with_raw(raw), span) + assert _attr(span, GenAIAttributes.GEN_AI_RESPONSE_MODEL) == "gpt-4o" + + def test_sets_token_usage_openai(self): + span = _recording_span() + raw = SimpleNamespace( + model="gpt-4", + usage=SimpleNamespace(prompt_tokens=10, completion_tokens=20, total_tokens=30), + ) + set_llm_chat_response_model_attributes(self._event_with_raw(raw), span) + assert _attr(span, GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS) == 10 + assert _attr(span, GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS) == 20 + assert _attr(span, SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS) == 30 + + def test_no_legacy_total_tokens_attr(self): + span = _recording_span() + raw = SimpleNamespace( + model="gpt-4", + usage=SimpleNamespace(prompt_tokens=10, completion_tokens=20, total_tokens=30), + ) + set_llm_chat_response_model_attributes(self._event_with_raw(raw), span) + assert not _has_attr(span, SpanAttributes.LLM_USAGE_TOTAL_TOKENS) + + def test_cohere_token_usage(self): + span = _recording_span() + raw = SimpleNamespace( + model="command-r", + meta=SimpleNamespace(tokens=SimpleNamespace(input_tokens=5, output_tokens=15)), + finish_reason="COMPLETE", + ) + set_llm_chat_response_model_attributes(self._event_with_raw(raw), span) + assert _attr(span, GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS) == 5 + assert _attr(span, GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS) == 15 + assert _attr(span, SpanAttributes.GEN_AI_USAGE_TOTAL_TOKENS) == 20 + + def test_response_id_set(self): + span = _recording_span() + raw = SimpleNamespace(model="gpt-4", id="chatcmpl-abc123") + set_llm_chat_response_model_attributes(self._event_with_raw(raw), span) + assert _attr(span, GenAIAttributes.GEN_AI_RESPONSE_ID) == "chatcmpl-abc123" + + def test_none_raw_returns_early(self): + span = _recording_span() + event = MagicMock() + event.response = MagicMock(raw=None) + set_llm_chat_response_model_attributes(event, span) + # Only is_recording check, no attributes set + assert not _has_attr(span, GenAIAttributes.GEN_AI_RESPONSE_MODEL) + + +# =========================================================================== +# set_llm_predict_response — completion output messages +# =========================================================================== + +class TestSetLlmPredictResponse: + def test_sets_output_messages_json(self): + span = _recording_span() + event = MagicMock() + event.output = "The answer is 42." + with patch(PATCH_SHOULD_SEND, return_value=True): + set_llm_predict_response(event, span) + raw_out = _attr(span, GenAIAttributes.GEN_AI_OUTPUT_MESSAGES) + assert raw_out is not None + msgs = json.loads(raw_out) + assert msgs[0]["role"] == "assistant" + assert msgs[0]["parts"][0]["content"] == "The answer is 42." + + def test_gated_by_should_send_prompts(self): + span = _recording_span() + event = MagicMock() + event.output = "text" + with patch(PATCH_SHOULD_SEND, return_value=False): + set_llm_predict_response(event, span) + assert not _has_attr(span, GenAIAttributes.GEN_AI_OUTPUT_MESSAGES) + + def test_none_output(self): + span = _recording_span() + event = MagicMock() + event.output = None + with patch(PATCH_SHOULD_SEND, return_value=True): + set_llm_predict_response(event, span) + raw_out = _attr(span, GenAIAttributes.GEN_AI_OUTPUT_MESSAGES) + msgs = json.loads(raw_out) + assert msgs[0]["parts"] == [] + + def test_no_legacy_indexed_attributes(self): + span = _recording_span() + event = MagicMock() + event.output = "Reply" + with patch(PATCH_SHOULD_SEND, return_value=True): + set_llm_predict_response(event, span) + for call in span.set_attribute.call_args_list: + assert not call.args[0].startswith(f"{GenAIAttributes.GEN_AI_COMPLETION}.") + + +# =========================================================================== +# set_embedding — semconv attributes +# =========================================================================== + +class TestSetEmbedding: + def test_sets_operation_name_embeddings(self): + span = _recording_span() + event = MagicMock() + event.model_dict = {"model_name": "text-embedding-3-small"} + set_embedding(event, span) + assert _attr(span, GenAIAttributes.GEN_AI_OPERATION_NAME) == "embeddings" + + def test_sets_request_model(self): + span = _recording_span() + event = MagicMock() + event.model_dict = {"model_name": "text-embedding-3-small"} + set_embedding(event, span) + assert _attr(span, GenAIAttributes.GEN_AI_REQUEST_MODEL) == "text-embedding-3-small" + + def test_no_legacy_embedding_model_name(self): + """Must NOT emit legacy 'embedding.model_name' attribute.""" + span = _recording_span() + event = MagicMock() + event.model_dict = {"model_name": "text-embedding-3-small"} + set_embedding(event, span) + assert not _has_attr(span, "embedding.model_name") + + +# =========================================================================== +# set_rerank / set_rerank_model_attributes — semconv attributes +# =========================================================================== + +class TestSetRerankModelAttributes: + def test_sets_operation_name(self): + span = _recording_span() + event = MagicMock() + event.model_name = "rerank-v3.5" + event.top_n = 5 + set_rerank_model_attributes(event, span) + assert _attr(span, GenAIAttributes.GEN_AI_OPERATION_NAME) == "rerank" + + def test_sets_request_model(self): + span = _recording_span() + event = MagicMock() + event.model_name = "rerank-v3.5" + event.top_n = 5 + set_rerank_model_attributes(event, span) + assert _attr(span, GenAIAttributes.GEN_AI_REQUEST_MODEL) == "rerank-v3.5" + + def test_sets_top_n(self): + """top_n is a rerank-specific param — kept as rerank.top_n (no semconv equivalent).""" + span = _recording_span() + event = MagicMock() + event.model_name = "rerank-v3.5" + event.top_n = 3 + set_rerank_model_attributes(event, span) + assert _attr(span, "rerank.top_n") == 3 + + def test_no_legacy_rerank_model_name(self): + """Must NOT emit legacy 'rerank.model_name' attribute.""" + span = _recording_span() + event = MagicMock() + event.model_name = "rerank-v3.5" + event.top_n = 5 + set_rerank_model_attributes(event, span) + assert not _has_attr(span, "rerank.model_name") + + +class TestSetRerank: + def test_sets_input_messages_with_query(self): + span = _recording_span() + event = MagicMock() + event.query.query_str = "what is the meaning of life?" + with patch(PATCH_SHOULD_SEND, return_value=True): + set_rerank(event, span) + raw = _attr(span, GenAIAttributes.GEN_AI_INPUT_MESSAGES) + assert raw is not None + msgs = json.loads(raw) + assert msgs[0]["role"] == "user" + assert msgs[0]["parts"][0]["content"] == "what is the meaning of life?" + + def test_gated_by_should_send_prompts(self): + span = _recording_span() + event = MagicMock() + event.query.query_str = "query" + with patch(PATCH_SHOULD_SEND, return_value=False): + set_rerank(event, span) + assert not _has_attr(span, GenAIAttributes.GEN_AI_INPUT_MESSAGES) + + def test_no_legacy_rerank_query(self): + """Must NOT emit legacy 'rerank.query' attribute.""" + span = _recording_span() + event = MagicMock() + event.query.query_str = "query" + with patch(PATCH_SHOULD_SEND, return_value=True): + set_rerank(event, span) + assert not _has_attr(span, "rerank.query") + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/packages/opentelemetry-instrumentation-llamaindex/tests/test_structured_llm.py b/packages/opentelemetry-instrumentation-llamaindex/tests/test_structured_llm.py index 208883c07a..f913793d00 100644 --- a/packages/opentelemetry-instrumentation-llamaindex/tests/test_structured_llm.py +++ b/packages/opentelemetry-instrumentation-llamaindex/tests/test_structured_llm.py @@ -2,7 +2,6 @@ from llama_index.core.llms import ChatMessage from llama_index.llms.openai import OpenAI from pydantic import BaseModel, Field -from opentelemetry.semconv_ai import SpanAttributes, LLMRequestTypeValues class Invoice(BaseModel): @@ -42,11 +41,7 @@ def test_structured_llm_model_attributes(instrument_with_content, span_exporter) llm_span = None for span in spans: - if ( - span.attributes.get("gen_ai.operation.name") == "chat" - or span.attributes.get(SpanAttributes.LLM_REQUEST_TYPE) - == LLMRequestTypeValues.CHAT.value - ): + if span.attributes.get("gen_ai.operation.name") == "chat": llm_span = span break @@ -89,11 +84,7 @@ async def test_structured_llm_achat_model_attributes( llm_span = None for span in spans: - if ( - span.attributes.get("gen_ai.operation.name") == "chat" - or span.attributes.get(SpanAttributes.LLM_REQUEST_TYPE) - == LLMRequestTypeValues.CHAT.value - ): + if span.attributes.get("gen_ai.operation.name") == "chat": llm_span = span break diff --git a/packages/opentelemetry-instrumentation-llamaindex/uv.lock b/packages/opentelemetry-instrumentation-llamaindex/uv.lock index 2b31bd2794..c49e960171 100644 --- a/packages/opentelemetry-instrumentation-llamaindex/uv.lock +++ b/packages/opentelemetry-instrumentation-llamaindex/uv.lock @@ -2322,7 +2322,7 @@ wheels = [ [[package]] name = "opentelemetry-instrumentation-chromadb" -version = "0.53.3" +version = "0.58.1" source = { editable = "../opentelemetry-instrumentation-chromadb" } dependencies = [ { name = "opentelemetry-api" }, @@ -2357,7 +2357,7 @@ test = [ [[package]] name = "opentelemetry-instrumentation-cohere" -version = "0.53.3" +version = "0.58.1" source = { editable = "../opentelemetry-instrumentation-cohere" } dependencies = [ { name = "opentelemetry-api" }, @@ -2411,7 +2411,7 @@ wheels = [ [[package]] name = "opentelemetry-instrumentation-llamaindex" -version = "0.53.3" +version = "0.58.1" source = { editable = "." } dependencies = [ { name = "inflection" }, @@ -2495,7 +2495,7 @@ test = [ [[package]] name = "opentelemetry-instrumentation-openai" -version = "0.53.3" +version = "0.58.1" source = { editable = "../opentelemetry-instrumentation-openai" } dependencies = [ { name = "opentelemetry-api" },