From 231ba223c65a1c94861ec2ffbd629e962b78641c Mon Sep 17 00:00:00 2001 From: Jake LoRocco Date: Mon, 11 May 2026 14:34:12 -0400 Subject: [PATCH 1/2] fix: change hallucination detection tests to accept differences in explanation Signed-off-by: Jake LoRocco --- test/stdlib/components/intrinsic/test_rag.py | 21 +++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/test/stdlib/components/intrinsic/test_rag.py b/test/stdlib/components/intrinsic/test_rag.py index bc76fa650..fb472414c 100644 --- a/test/stdlib/components/intrinsic/test_rag.py +++ b/test/stdlib/components/intrinsic/test_rag.py @@ -187,6 +187,21 @@ def test_context_relevance(backend_4_0): assert result == "irrelevant" +def _compare_hallucination(result: list[dict], expected: list[dict]): + """Special function to compare the result and expected output for hallucination detection. + + There are slight differences in explanations depending on where the test is run. + """ + for r, e in zip(result, expected, strict=True): + assert r["response_begin"] == e["response_begin"] + assert r["response_end"] == e["response_end"] + assert r["response_text"] == e["response_text"] + assert r["faithfulness"] == e["faithfulness"] + + # Specifically don't check the explanation due to mentioned differences. + # assert result["explanation"] == expected["explanation"] + + @pytest.mark.qualitative def test_hallucination_detection(backend): """Verify that the hallucination detection intrinsic functions properly.""" @@ -196,11 +211,11 @@ def test_hallucination_detection(backend): # First call triggers adapter loading result = rag.flag_hallucinated_content(assistant_response, docs, context, backend) _dump_output_json("hallucination_detection.json", result) - assert result == expected + _compare_hallucination(result, expected) # Second call hits a different code path from the first one result = rag.flag_hallucinated_content(assistant_response, docs, context, backend) - assert result == expected + _compare_hallucination(result, expected) @pytest.mark.qualitative @@ -303,7 +318,7 @@ def test_hallucination_detection_resolve(backend): expected = _read_output_json("hallucination_detection.json") result = rag.flag_hallucinated_content(None, docs, context, backend) - assert result == expected + _compare_hallucination(result, expected) @pytest.mark.qualitative From 75b27eb060effb7a64f71b61e81ee6c4553eaf60 Mon Sep 17 00:00:00 2001 From: Jake LoRocco Date: Mon, 11 May 2026 15:33:31 -0400 Subject: [PATCH 2/2] fix: remove unnecessary warning from openai standard generation Signed-off-by: Jake LoRocco --- mellea/backends/openai.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/mellea/backends/openai.py b/mellea/backends/openai.py index 1eea93511..f9c24b7d3 100644 --- a/mellea/backends/openai.py +++ b/mellea/backends/openai.py @@ -832,14 +832,7 @@ async def _generate_from_chat_context_standard( ) # Convert our linearized context into a sequence of chat messages. Template formatters have a standard way of doing this. messages: list[Message] = self.formatter.to_chat_messages(linearized_context) - # Add the final message. - match action: - case ALoraRequirement(): - raise Exception( - "The OpenAI backend does not currently support activated LoRAs." - ) - case _: - messages.extend(self.formatter.to_chat_messages([action])) + messages.extend(self.formatter.to_chat_messages([action])) conversation: list[dict] = [] system_prompt = model_opts.get(ModelOption.SYSTEM_PROMPT, "")