diff --git a/agents/developer/prompts/agent.system.main.communication.md b/agents/developer/prompts/agent.system.main.communication.md index 18251a64bb..ece64f4a2a 100644 --- a/agents/developer/prompts/agent.system.main.communication.md +++ b/agents/developer/prompts/agent.system.main.communication.md @@ -15,6 +15,10 @@ The agent SHALL conduct a structured interview process to establish: The agent must utilize the 'response' tool iteratively until achieving complete clarity on all dimensions. Only when the agent can execute the entire development lifecycle without further clarification should autonomous work commence. This front-loaded investment in requirements understanding prevents costly refactoring and ensures alignment with user expectations. +### Direct Time Handling + +If the user asks for the current system date or time, answer directly from the injected `current_datetime` prompt content. Do not call CLI, RFC, browser, or code execution tools for simple time/date queries, and do not say the system is unavailable when `current_datetime` is present. + ### Thinking (thoughts) Every Agent Zero reply must contain a "thoughts" JSON field serving as the cognitive workspace for systematic architectural processing. diff --git a/usr/plugins/headroom/README.md b/usr/plugins/headroom/README.md new file mode 100644 index 0000000000..a43e64bad0 --- /dev/null +++ b/usr/plugins/headroom/README.md @@ -0,0 +1,26 @@ +# Headroom Plugin + +Optional Agent Zero integration for `chopratejas/headroom`. + +This plugin hooks `chat_model_call_before` and compresses the LangChain message +list before Agent Zero calls LiteLLM. It is intentionally optional: if +`headroom-ai` is not installed in the Agent Zero framework runtime, the plugin +logs a warning once and leaves messages unchanged. + +Install dependency in the framework runtime when you want to enable compression. +On Windows, current `headroom-ai` releases may try to build a Rust extension +from source and fail without Visual Studio Build Tools. The tested workaround +for this Agent Zero environment is: + +```bash +pip install --force-reinstall --no-deps "headroom-ai==0.5.25" +pip install --no-deps "ast-grep-cli==0.42.3" +``` + +This keeps Agent Zero's pinned LiteLLM version in place. The plugin temporarily +clears broken proxy environment variables while Headroom loads tokenizer/model +files because this environment sets proxy variables to `127.0.0.1:9`. + +Use the latest `headroom-ai[proxy]`, `headroom-ai[mcp]`, or `headroom-ai[all]` +only after installing Visual Studio Build Tools with the Visual C++ workload, or +when a compatible wheel is available for your Python/Windows target. diff --git a/usr/plugins/headroom/default_config.yaml b/usr/plugins/headroom/default_config.yaml new file mode 100644 index 0000000000..2e46861575 --- /dev/null +++ b/usr/plugins/headroom/default_config.yaml @@ -0,0 +1,12 @@ +enabled: true +min_total_tokens: 4000 +model_name_override: "" +compress_user_messages: true +protect_recent: 4 +target_ratio: 0.35 +min_tokens_to_compress: 250 +kompress_model: "disabled" +clear_proxy_for_headroom: true +log_savings: true +warn_missing_dependency: true +skip_non_text_messages: true diff --git a/usr/plugins/headroom/extensions/python/chat_model_call_before/_50_headroom_compress.py b/usr/plugins/headroom/extensions/python/chat_model_call_before/_50_headroom_compress.py new file mode 100644 index 0000000000..b51b3667ff --- /dev/null +++ b/usr/plugins/headroom/extensions/python/chat_model_call_before/_50_headroom_compress.py @@ -0,0 +1,106 @@ +from __future__ import annotations + +from typing import Any + +from helpers import plugins +from helpers.extension import Extension +from helpers.print_style import PrintStyle + +from usr.plugins.headroom.helpers.compression import ( + HeadroomUnavailable, + compress_langchain_messages, +) + + +class HeadroomCompressBeforeChatModel(Extension): + async def execute(self, call_data: dict[str, Any] | None = None, **kwargs): + if not self.agent or not call_data: + return + + config = plugins.get_plugin_config("headroom", agent=self.agent) or {} + if not config.get("enabled", True): + return + + messages = call_data.get("messages") + if not isinstance(messages, list): + return + + model = call_data.get("model") + model_name = str(config.get("model_name_override") or getattr(model, "model_name", "") or "") + if not model_name: + return + + try: + compressed, stats = compress_langchain_messages( + messages, + model_name=model_name, + min_total_tokens=int(config.get("min_total_tokens", 4000) or 0), + compress_user_messages=bool(config.get("compress_user_messages", True)), + protect_recent=int(config.get("protect_recent", 4) or 0), + target_ratio=_optional_float(config.get("target_ratio", 0.35)), + min_tokens_to_compress=int(config.get("min_tokens_to_compress", 250) or 0), + kompress_model=str(config.get("kompress_model", "disabled") or ""), + clear_proxy_for_headroom=bool(config.get("clear_proxy_for_headroom", True)), + skip_non_text_messages=bool(config.get("skip_non_text_messages", True)), + ) + except HeadroomUnavailable as exc: + self._warn_once(str(exc), config) + return + except Exception as exc: + PrintStyle.warning(f"Headroom compression skipped: {exc}") + return + + if compressed is messages: + return + + call_data["messages"] = compressed + if stats and stats.tokens_saved > 0: + self._record_stats(stats) + if config.get("log_savings", True): + transforms = ", ".join(stats.transforms or []) or "none" + self.agent.context.log.log( + type="info", + heading="icon://compress Headroom compressed context", + content=( + f"Saved {stats.tokens_saved} tokens " + f"({stats.tokens_before} -> {stats.tokens_after}). " + f"Transforms: {transforms}" + ), + ) + + def _warn_once(self, message: str, config: dict[str, Any]) -> None: + if not config.get("warn_missing_dependency", True): + return + key = "_headroom_missing_dependency_warned" + if self.agent and not self.agent.context.get_data(key): + self.agent.context.set_data(key, True) + self.agent.context.log.log( + type="warning", + heading="icon://warning Headroom is not installed", + content=message + ". Install 'headroom-ai' in the Agent Zero framework runtime.", + ) + + def _record_stats(self, stats) -> None: + if not self.agent: + return + key = "_headroom_stats" + current = self.agent.context.get_data(key) or { + "calls": 0, + "tokens_saved": 0, + "tokens_before": 0, + "tokens_after": 0, + } + current["calls"] += 1 + current["tokens_saved"] += stats.tokens_saved + current["tokens_before"] += stats.tokens_before + current["tokens_after"] += stats.tokens_after + self.agent.context.set_data(key, current) + + +def _optional_float(value: Any) -> float | None: + if value in ("", None): + return None + try: + return float(value) + except (TypeError, ValueError): + return None diff --git a/usr/plugins/headroom/helpers/__init__.py b/usr/plugins/headroom/helpers/__init__.py new file mode 100644 index 0000000000..8b13789179 --- /dev/null +++ b/usr/plugins/headroom/helpers/__init__.py @@ -0,0 +1 @@ + diff --git a/usr/plugins/headroom/helpers/compression.py b/usr/plugins/headroom/helpers/compression.py new file mode 100644 index 0000000000..e1815ee208 --- /dev/null +++ b/usr/plugins/headroom/helpers/compression.py @@ -0,0 +1,183 @@ +from __future__ import annotations + +from dataclasses import dataclass +import inspect +import os +from typing import Any + +from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage + +from helpers import tokens + + +@dataclass +class HeadroomStats: + tokens_before: int = 0 + tokens_after: int = 0 + tokens_saved: int = 0 + compression_ratio: float = 0.0 + transforms: list[str] | None = None + + +class HeadroomUnavailable(RuntimeError): + pass + + +def compress_langchain_messages( + messages: list[BaseMessage], + *, + model_name: str, + min_total_tokens: int, + compress_user_messages: bool = True, + protect_recent: int = 4, + target_ratio: float | None = 0.35, + min_tokens_to_compress: int = 250, + kompress_model: str | None = "disabled", + clear_proxy_for_headroom: bool = True, + skip_non_text_messages: bool = True, +) -> tuple[list[BaseMessage], HeadroomStats | None]: + if not messages: + return messages, None + + total_tokens = tokens.approximate_tokens(_messages_text(messages)) + if total_tokens < max(0, int(min_total_tokens or 0)): + return messages, None + + payload = _to_headroom_messages(messages, skip_non_text_messages=skip_non_text_messages) + if payload is None: + return messages, None + + try: + from headroom import compress + except ImportError as exc: + raise HeadroomUnavailable("Python package 'headroom-ai' is not installed") from exc + + compress_config = None + try: + from headroom import CompressConfig + + config_kwargs = { + "compress_user_messages": compress_user_messages, + "compress_system_messages": True, + "protect_recent": max(0, int(protect_recent or 0)), + "target_ratio": target_ratio, + "min_tokens_to_compress": max(0, int(min_tokens_to_compress or 0)), + "kompress_model": kompress_model or None, + } + supported = set(inspect.signature(CompressConfig).parameters) + compress_config = CompressConfig( + **{key: value for key, value in config_kwargs.items() if key in supported} + ) + except Exception: + compress_config = None + + previous_proxy_env = _clear_proxy_env() if clear_proxy_for_headroom else {} + try: + result = compress(payload, model=model_name, config=compress_config) + finally: + _restore_env(previous_proxy_env) + compressed_payload = getattr(result, "messages", None) + if not isinstance(compressed_payload, list): + return messages, None + + compressed_messages = _from_headroom_messages(compressed_payload, fallback=messages) + stats = HeadroomStats( + tokens_before=int(getattr(result, "tokens_before", total_tokens) or total_tokens), + tokens_after=int( + getattr( + result, + "tokens_after", + tokens.approximate_tokens(_messages_text(compressed_messages)), + ) + or 0 + ), + tokens_saved=int(getattr(result, "tokens_saved", 0) or 0), + compression_ratio=float(getattr(result, "compression_ratio", 0.0) or 0.0), + transforms=list(getattr(result, "transforms_applied", []) or []), + ) + return compressed_messages, stats + + +def _messages_text(messages: list[BaseMessage]) -> str: + return "\n\n".join(str(message.content) for message in messages) + + +def _to_headroom_messages( + messages: list[BaseMessage], + *, + skip_non_text_messages: bool, +) -> list[dict[str, str]] | None: + payload: list[dict[str, str]] = [] + role_map = { + "system": "system", + "human": "user", + "ai": "assistant", + } + + for message in messages: + if not isinstance(message.content, str): + if skip_non_text_messages: + return None + content = str(message.content) + else: + content = message.content + + role = role_map.get(message.type) + if not role: + return None + payload.append({"role": role, "content": content}) + + return payload + + +def _from_headroom_messages( + payload: list[dict[str, Any]], + *, + fallback: list[BaseMessage], +) -> list[BaseMessage]: + result: list[BaseMessage] = [] + for index, item in enumerate(payload): + if not isinstance(item, dict): + return fallback + role = str(item.get("role") or "") + content = item.get("content", "") + if not isinstance(content, str): + content = str(content) + + original = fallback[index] if index < len(fallback) else None + if role == "system": + result.append(SystemMessage(content=content)) + elif role == "assistant": + result.append(AIMessage(content=content)) + elif role == "user": + result.append(HumanMessage(content=content)) + elif isinstance(original, BaseMessage): + result.append(type(original)(content=content)) + else: + return fallback + + return result or fallback + + +def _clear_proxy_env() -> dict[str, str | None]: + names = ( + "HTTP_PROXY", + "HTTPS_PROXY", + "ALL_PROXY", + "http_proxy", + "https_proxy", + "all_proxy", + ) + previous = {name: os.environ.get(name) for name in names} + for name in names: + os.environ.pop(name, None) + os.environ.setdefault("HF_HUB_DISABLE_SYMLINKS_WARNING", "1") + return previous + + +def _restore_env(previous: dict[str, str | None]) -> None: + for name, value in previous.items(): + if value is None: + os.environ.pop(name, None) + else: + os.environ[name] = value diff --git a/usr/plugins/headroom/plugin.yaml b/usr/plugins/headroom/plugin.yaml new file mode 100644 index 0000000000..f7083f0cae --- /dev/null +++ b/usr/plugins/headroom/plugin.yaml @@ -0,0 +1,10 @@ +name: headroom +title: Headroom +description: Compress Agent Zero chat context with Headroom before LLM calls. +version: 0.1.0 +settings_sections: + - agent + - developer +per_project_config: true +per_agent_config: true +always_enabled: false diff --git a/usr/plugins/headroom/webui/config.html b/usr/plugins/headroom/webui/config.html new file mode 100644 index 0000000000..ae703832c5 --- /dev/null +++ b/usr/plugins/headroom/webui/config.html @@ -0,0 +1,179 @@ + + + Headroom + + +
+ +
+ +