Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions agents/developer/prompts/agent.system.main.communication.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ The agent SHALL conduct a structured interview process to establish:

The agent must utilize the 'response' tool iteratively until achieving complete clarity on all dimensions. Only when the agent can execute the entire development lifecycle without further clarification should autonomous work commence. This front-loaded investment in requirements understanding prevents costly refactoring and ensures alignment with user expectations.

### Direct Time Handling

If the user asks for the current system date or time, answer directly from the injected `current_datetime` prompt content. Do not call CLI, RFC, browser, or code execution tools for simple time/date queries, and do not say the system is unavailable when `current_datetime` is present.

### Thinking (thoughts)

Every Agent Zero reply must contain a "thoughts" JSON field serving as the cognitive workspace for systematic architectural processing.
Expand Down
26 changes: 26 additions & 0 deletions usr/plugins/headroom/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Headroom Plugin

Optional Agent Zero integration for `chopratejas/headroom`.

This plugin hooks `chat_model_call_before` and compresses the LangChain message
list before Agent Zero calls LiteLLM. It is intentionally optional: if
`headroom-ai` is not installed in the Agent Zero framework runtime, the plugin
logs a warning once and leaves messages unchanged.

Install dependency in the framework runtime when you want to enable compression.
On Windows, current `headroom-ai` releases may try to build a Rust extension
from source and fail without Visual Studio Build Tools. The tested workaround
for this Agent Zero environment is:

```bash
pip install --force-reinstall --no-deps "headroom-ai==0.5.25"
pip install --no-deps "ast-grep-cli==0.42.3"
```

This keeps Agent Zero's pinned LiteLLM version in place. The plugin temporarily
clears broken proxy environment variables while Headroom loads tokenizer/model
files because this environment sets proxy variables to `127.0.0.1:9`.

Use the latest `headroom-ai[proxy]`, `headroom-ai[mcp]`, or `headroom-ai[all]`
only after installing Visual Studio Build Tools with the Visual C++ workload, or
when a compatible wheel is available for your Python/Windows target.
12 changes: 12 additions & 0 deletions usr/plugins/headroom/default_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
enabled: true
min_total_tokens: 4000
model_name_override: ""
compress_user_messages: true
protect_recent: 4
target_ratio: 0.35
min_tokens_to_compress: 250
kompress_model: "disabled"
clear_proxy_for_headroom: true
log_savings: true
warn_missing_dependency: true
skip_non_text_messages: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
from __future__ import annotations

from typing import Any

from helpers import plugins
from helpers.extension import Extension
from helpers.print_style import PrintStyle

from usr.plugins.headroom.helpers.compression import (
HeadroomUnavailable,
compress_langchain_messages,
)


class HeadroomCompressBeforeChatModel(Extension):
async def execute(self, call_data: dict[str, Any] | None = None, **kwargs):
if not self.agent or not call_data:
return

config = plugins.get_plugin_config("headroom", agent=self.agent) or {}
if not config.get("enabled", True):
return

messages = call_data.get("messages")
if not isinstance(messages, list):
return

model = call_data.get("model")
model_name = str(config.get("model_name_override") or getattr(model, "model_name", "") or "")
if not model_name:
return

try:
compressed, stats = compress_langchain_messages(
messages,
model_name=model_name,
min_total_tokens=int(config.get("min_total_tokens", 4000) or 0),
compress_user_messages=bool(config.get("compress_user_messages", True)),
protect_recent=int(config.get("protect_recent", 4) or 0),
target_ratio=_optional_float(config.get("target_ratio", 0.35)),
min_tokens_to_compress=int(config.get("min_tokens_to_compress", 250) or 0),
kompress_model=str(config.get("kompress_model", "disabled") or ""),
clear_proxy_for_headroom=bool(config.get("clear_proxy_for_headroom", True)),
skip_non_text_messages=bool(config.get("skip_non_text_messages", True)),
)
except HeadroomUnavailable as exc:
self._warn_once(str(exc), config)
return
except Exception as exc:
PrintStyle.warning(f"Headroom compression skipped: {exc}")
return

if compressed is messages:
return

call_data["messages"] = compressed
if stats and stats.tokens_saved > 0:
self._record_stats(stats)
if config.get("log_savings", True):
transforms = ", ".join(stats.transforms or []) or "none"
self.agent.context.log.log(
type="info",
heading="icon://compress Headroom compressed context",
content=(
f"Saved {stats.tokens_saved} tokens "
f"({stats.tokens_before} -> {stats.tokens_after}). "
f"Transforms: {transforms}"
),
)

def _warn_once(self, message: str, config: dict[str, Any]) -> None:
if not config.get("warn_missing_dependency", True):
return
key = "_headroom_missing_dependency_warned"
if self.agent and not self.agent.context.get_data(key):
self.agent.context.set_data(key, True)
self.agent.context.log.log(
type="warning",
heading="icon://warning Headroom is not installed",
content=message + ". Install 'headroom-ai' in the Agent Zero framework runtime.",
)

def _record_stats(self, stats) -> None:
if not self.agent:
return
key = "_headroom_stats"
current = self.agent.context.get_data(key) or {
"calls": 0,
"tokens_saved": 0,
"tokens_before": 0,
"tokens_after": 0,
}
current["calls"] += 1
current["tokens_saved"] += stats.tokens_saved
current["tokens_before"] += stats.tokens_before
current["tokens_after"] += stats.tokens_after
self.agent.context.set_data(key, current)


def _optional_float(value: Any) -> float | None:
if value in ("", None):
return None
try:
return float(value)
except (TypeError, ValueError):
return None
1 change: 1 addition & 0 deletions usr/plugins/headroom/helpers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

183 changes: 183 additions & 0 deletions usr/plugins/headroom/helpers/compression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
from __future__ import annotations

from dataclasses import dataclass
import inspect
import os
from typing import Any

from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage

from helpers import tokens


@dataclass
class HeadroomStats:
tokens_before: int = 0
tokens_after: int = 0
tokens_saved: int = 0
compression_ratio: float = 0.0
transforms: list[str] | None = None


class HeadroomUnavailable(RuntimeError):
pass


def compress_langchain_messages(
messages: list[BaseMessage],
*,
model_name: str,
min_total_tokens: int,
compress_user_messages: bool = True,
protect_recent: int = 4,
target_ratio: float | None = 0.35,
min_tokens_to_compress: int = 250,
kompress_model: str | None = "disabled",
clear_proxy_for_headroom: bool = True,
skip_non_text_messages: bool = True,
) -> tuple[list[BaseMessage], HeadroomStats | None]:
if not messages:
return messages, None

total_tokens = tokens.approximate_tokens(_messages_text(messages))
if total_tokens < max(0, int(min_total_tokens or 0)):
return messages, None

payload = _to_headroom_messages(messages, skip_non_text_messages=skip_non_text_messages)
if payload is None:
return messages, None

try:
from headroom import compress
except ImportError as exc:
raise HeadroomUnavailable("Python package 'headroom-ai' is not installed") from exc

compress_config = None
try:
from headroom import CompressConfig

config_kwargs = {
"compress_user_messages": compress_user_messages,
"compress_system_messages": True,
"protect_recent": max(0, int(protect_recent or 0)),
"target_ratio": target_ratio,
"min_tokens_to_compress": max(0, int(min_tokens_to_compress or 0)),
"kompress_model": kompress_model or None,
}
supported = set(inspect.signature(CompressConfig).parameters)
compress_config = CompressConfig(
**{key: value for key, value in config_kwargs.items() if key in supported}
)
except Exception:
compress_config = None

previous_proxy_env = _clear_proxy_env() if clear_proxy_for_headroom else {}
try:
result = compress(payload, model=model_name, config=compress_config)
finally:
_restore_env(previous_proxy_env)
compressed_payload = getattr(result, "messages", None)
if not isinstance(compressed_payload, list):
return messages, None

compressed_messages = _from_headroom_messages(compressed_payload, fallback=messages)
stats = HeadroomStats(
tokens_before=int(getattr(result, "tokens_before", total_tokens) or total_tokens),
tokens_after=int(
getattr(
result,
"tokens_after",
tokens.approximate_tokens(_messages_text(compressed_messages)),
)
or 0
),
tokens_saved=int(getattr(result, "tokens_saved", 0) or 0),
compression_ratio=float(getattr(result, "compression_ratio", 0.0) or 0.0),
transforms=list(getattr(result, "transforms_applied", []) or []),
)
return compressed_messages, stats


def _messages_text(messages: list[BaseMessage]) -> str:
return "\n\n".join(str(message.content) for message in messages)


def _to_headroom_messages(
messages: list[BaseMessage],
*,
skip_non_text_messages: bool,
) -> list[dict[str, str]] | None:
payload: list[dict[str, str]] = []
role_map = {
"system": "system",
"human": "user",
"ai": "assistant",
}

for message in messages:
if not isinstance(message.content, str):
if skip_non_text_messages:
return None
content = str(message.content)
else:
content = message.content

role = role_map.get(message.type)
if not role:
return None
payload.append({"role": role, "content": content})

return payload


def _from_headroom_messages(
payload: list[dict[str, Any]],
*,
fallback: list[BaseMessage],
) -> list[BaseMessage]:
result: list[BaseMessage] = []
for index, item in enumerate(payload):
if not isinstance(item, dict):
return fallback
role = str(item.get("role") or "")
content = item.get("content", "")
if not isinstance(content, str):
content = str(content)

original = fallback[index] if index < len(fallback) else None
if role == "system":
result.append(SystemMessage(content=content))
elif role == "assistant":
result.append(AIMessage(content=content))
elif role == "user":
result.append(HumanMessage(content=content))
elif isinstance(original, BaseMessage):
result.append(type(original)(content=content))
else:
return fallback

return result or fallback


def _clear_proxy_env() -> dict[str, str | None]:
names = (
"HTTP_PROXY",
"HTTPS_PROXY",
"ALL_PROXY",
"http_proxy",
"https_proxy",
"all_proxy",
)
previous = {name: os.environ.get(name) for name in names}
for name in names:
os.environ.pop(name, None)
os.environ.setdefault("HF_HUB_DISABLE_SYMLINKS_WARNING", "1")
return previous


def _restore_env(previous: dict[str, str | None]) -> None:
for name, value in previous.items():
if value is None:
os.environ.pop(name, None)
else:
os.environ[name] = value
10 changes: 10 additions & 0 deletions usr/plugins/headroom/plugin.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
name: headroom
title: Headroom
description: Compress Agent Zero chat context with Headroom before LLM calls.
version: 0.1.0
settings_sections:
- agent
- developer
per_project_config: true
per_agent_config: true
always_enabled: false
Loading