Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 17 additions & 4 deletions src/agents/interviewer/interviewer.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,11 +186,24 @@ def _get_prompt(self):
else:
main_prompt = get_prompt("normal")

# Remove STRATEGIC_QUESTIONS section from template if stale
# Remove the strategic_questions block when EP suggestions are not yet
# available or have become stale.
#
# NOTE: get_prompt("normal") performs a first-stage format_prompt that
# expands the uppercase {STRATEGIC_QUESTIONS} token into the full
# <strategic_questions>…{strategic_questions}…</strategic_questions>
# block. By the time we reach this branch the uppercase token is gone,
# so the previous string-replace on "\n{STRATEGIC_QUESTIONS}\n" was a
# no-op, leaving the raw "{strategic_questions}" placeholder in the
# prompt. We now strip the already-expanded block with regex instead.
if not self.use_baseline and not self._should_include_strategic_questions():
# Remove the {STRATEGIC_QUESTIONS} line to exclude the section entirely
main_prompt = main_prompt.replace("\n{STRATEGIC_QUESTIONS}\n", "\n")
# Don't provide strategic_questions key in format_params (already omitted above)
main_prompt = re.sub(
r"\n<strategic_questions>.*?</strategic_questions>",
"",
main_prompt,
flags=re.DOTALL,
)
# strategic_questions is intentionally absent from format_params

return format_prompt(main_prompt, format_params)

Expand Down
49 changes: 40 additions & 9 deletions src/utils/llm/xml_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,28 +38,59 @@ def format_tool_as_xml_v2(tool: Type[BaseTool]) -> str:
return "\n".join(lines)

def parse_tool_calls(xml_string: str) -> Dict[str, Any]:
"""Parse XML tool calls with proper XML entity handling.

If the LLM returns structurally malformed XML (mismatched tags, stray
angle brackets inside field values, etc.) ``ET.fromstring`` raises
``ET.ParseError``. We attempt one recovery pass using
``clean_malformed_xml`` before giving up. This prevents the background
``AgendaManager._process_qa_pair`` tasks from dying silently and leaving
memory / subtopic-coverage state partially un-updated.

Returns an empty list rather than raising when both parse attempts fail,
so the calling interview can continue unimpeded.
"""
Parse XML tool calls with proper XML entity handling
"""
import logging as _logging
_log = _logging.getLogger(__name__)

# First, identify and escape any < or > within response tags
xml_string = xml_string.replace('&', '&amp;')
# xml_string = xml_string.replace('<', '&lt;')
# xml_string = xml_string.replace('>', '&gt;')
xml_string = xml_string.replace('"', '&quot;')
xml_string = xml_string.replace("'", '&apos;')

# Find content between <response> tags and escape < and > within it
def escape_response_content(match):
content = match.group(1)
escaped_content = content.replace('<', '&lt;').replace('>', '&gt;')
return f"<response>{escaped_content}</response>"
xml_string = re.sub(r'<response>(.*?)</response>',
escape_response_content,
xml_string,

xml_string = re.sub(r'<response>(.*?)</response>',
escape_response_content,
xml_string,
flags=re.DOTALL)

root = ET.fromstring(xml_string)

# Attempt 1: parse as-is.
try:
root = ET.fromstring(xml_string)
except ET.ParseError as first_err:
# Attempt 2: clean mismatched tags then retry.
_log.warning(
"parse_tool_calls: malformed XML (%s); attempting clean_malformed_xml recovery",
first_err,
)
try:
cleaned = clean_malformed_xml(xml_string)
root = ET.fromstring(cleaned)
_log.info("parse_tool_calls: recovery succeeded after clean_malformed_xml")
except ET.ParseError as second_err:
_log.error(
"parse_tool_calls: XML unrecoverable after clean attempt (%s); "
"returning empty tool-call list. Raw (first 300): %.300s",
second_err, xml_string,
)
return []
result = []

def parse_value(text: str) -> Any:
Expand Down