From e00e4651f137acc025e623788c3b5ac23ea6242c Mon Sep 17 00:00:00 2001 From: Johannes Castner Date: Fri, 5 Jun 2026 18:21:09 +0100 Subject: [PATCH 1/4] feat(base): typed env-driven LLM provider selection (DeepSeek wiring, Piece 1) Add a typed provider-selection seam to BaseAgent so the no-tools agents (planner/actor/critic) route to DeepSeek (deepseek-chat) via AGENTQ_LLM_PROVIDER, preserving agent-q's instructor (Mode.JSON) typed contract. - ProviderKind Literal + _PROVIDER_CLIENT_FACTORIES registry (replaces if/elif) - _PROVIDER_MODEL_MAP + self._default_model; run() defaults to it (no 2nd env read) - named ProviderConfigError on unknown provider / missing key (no bare KeyError) - VisionAgent intentionally stays on OpenAI (its model is hardcoded; Piece 3 migrates it) Tests RED->GREEN 8/8 (test/test_provider_wiring.py); live acceptance GREEN via instructor 1.4.0 (deepseek-chat round-trips nested AgentQActorOutput). Co-Authored-By: Claude Opus 4.8 (1M context) --- agentq/core/agent/base.py | 85 ++++++++++++++++++++++++++++----- test/test_provider_wiring.py | 91 ++++++++++++++++++++++++++++++++++++ 2 files changed, 164 insertions(+), 12 deletions(-) create mode 100644 test/test_provider_wiring.py diff --git a/agentq/core/agent/base.py b/agentq/core/agent/base.py index 2859b983..65ba8b62 100644 --- a/agentq/core/agent/base.py +++ b/agentq/core/agent/base.py @@ -1,6 +1,6 @@ import json import os -from typing import Callable, List, Optional, Tuple, Type +from typing import Callable, Dict, List, Literal, Optional, Tuple, Type, get_args import instructor import instructor.patch @@ -13,6 +13,57 @@ from agentq.utils.function_utils import get_function_schema from agentq.utils.logger import logger +# ── Typed LLM-provider selection seam (Piece 1: DeepSeek wiring) ─────────── +# agent-q's LLM client is an OpenAI-API-compatible client wrapped by +# ``instructor`` (Mode.JSON). Provider selection is a single typed seam: +# a ``ProviderKind`` discriminator, a provider→client-factory registry, and +# a provider→default-model map. This keeps all agents source-agnostic (one +# seam, not a ``client=`` threaded through every subclass) and makes provider +# validity / key presence decidable-in-code via ``ProviderConfigError`` rather +# than a silent ``AttributeError`` / bare ``KeyError``. +ProviderKind = Literal["openai", "together", "deepseek"] + + +class ProviderConfigError(ValueError): + """Raised when the selected LLM provider is unknown, or its required API + key is absent. A named, decidable failure — never a bare KeyError or a + silent fall-through that leaves ``self.client`` unset.""" + + +def _make_openai_client() -> openai.OpenAI: + return openai.Client() + + +def _make_together_client() -> openai.OpenAI: + key = os.environ.get("TOGETHER_API_KEY") + if not key: + raise ProviderConfigError( + "Provider 'together' selected but TOGETHER_API_KEY is not set." + ) + return openai.OpenAI(base_url="https://api.together.xyz/v1", api_key=key) + + +def _make_deepseek_client() -> openai.OpenAI: + key = os.environ.get("DEEPSEEK_API_KEY") + if not key: + raise ProviderConfigError( + "Provider 'deepseek' selected but DEEPSEEK_API_KEY is not set." + ) + return openai.OpenAI(base_url="https://api.deepseek.com/v1", api_key=key) + + +_PROVIDER_CLIENT_FACTORIES: Dict[ProviderKind, Callable[[], openai.OpenAI]] = { + "openai": _make_openai_client, + "together": _make_together_client, + "deepseek": _make_deepseek_client, +} + +_PROVIDER_MODEL_MAP: Dict[ProviderKind, str] = { + "openai": "gpt-4o-2024-08-06", + "together": "gpt-4o-2024-08-06", + "deepseek": "deepseek-chat", +} + class BaseAgent: def __init__( @@ -23,7 +74,7 @@ def __init__( output_format: Type[BaseModel], tools: Optional[List[Tuple[Callable, str]]] = None, keep_message_history: bool = True, - client: str = "openai", + client: Optional[ProviderKind] = None, ): # Metdata self.agent_name = name @@ -44,16 +95,22 @@ def __init__( litellm.logging = True litellm.set_verbose = True - # Llm client - if client == "openai": - self.client = openai.Client() - elif client == "together": - self.client = openai.OpenAI( - base_url="https://api.together.xyz/v1", - api_key=os.environ["TOGETHER_API_KEY"], + # LLM client — typed, env-driven provider selection at ONE seam. + # Explicit ``client=`` wins; otherwise read AGENTQ_LLM_PROVIDER (so the + # zero-arg orchestrated agents all pick up the selected provider). + provider = (client or os.environ.get("AGENTQ_LLM_PROVIDER", "openai")).strip().lower() + if provider not in get_args(ProviderKind): + raise ProviderConfigError( + f"Unknown LLM provider {provider!r}; " + f"must be one of {list(get_args(ProviderKind))}." ) - - self.client = instructor.from_openai(self.client, mode=Mode.JSON) + self.provider: ProviderKind = provider # type: ignore[assignment] + # Registry lookup + named fail-fast (never a silent self.client-unset path). + self.client = instructor.from_openai( + _PROVIDER_CLIENT_FACTORIES[provider](), mode=Mode.JSON + ) + # Provider-bound default model, resolved once from typed state. + self._default_model: str = _PROVIDER_MODEL_MAP[provider] # Tools self.tools_list = [] @@ -76,8 +133,12 @@ async def run( screenshot: str = None, session_id: str = None, # model: str = "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo", - model: str = "gpt-4o-2024-08-06", + model: Optional[str] = None, ) -> BaseModel: + # Default to the provider-bound model chosen at construction time + # (self._default_model); an explicit model= override still wins. + if model is None: + model = self._default_model if not isinstance(input_data, self.input_format): raise ValueError(f"Input data must be of type {self.input_format.__name__}") diff --git a/test/test_provider_wiring.py b/test/test_provider_wiring.py new file mode 100644 index 00000000..66b90a1a --- /dev/null +++ b/test/test_provider_wiring.py @@ -0,0 +1,91 @@ +"""Piece-1: DeepSeek provider wiring (Option A') — RED→GREEN contract. + +Tests the TYPED provider-selection seam in BaseAgent: env-driven selection, +the deepseek client factory, the provider→model binding, and the named +ProviderConfigError fail-fast (unknown provider / missing key). No network: +client construction (openai.OpenAI(base_url=..., api_key=...)) is offline. +""" +import pytest + +from agentq.core.agent.base import ( + BaseAgent, + ProviderConfigError, + _PROVIDER_CLIENT_FACTORIES, + _PROVIDER_MODEL_MAP, +) +from agentq.core.models.models import AgentQActorInput, AgentQActorOutput +from agentq.core.agent.vision_agent import VisionAgent + + +def _agent(**kwargs): + return BaseAgent( + name="t", + system_prompt="", + input_format=AgentQActorInput, + output_format=AgentQActorOutput, + **kwargs, + ) + + +def test_deepseek_factory_targets_deepseek_base_url(monkeypatch): + monkeypatch.setenv("DEEPSEEK_API_KEY", "dummy-key") + client = _PROVIDER_CLIENT_FACTORIES["deepseek"]() + assert str(client.base_url).startswith("https://api.deepseek.com") + + +def test_provider_model_map_deepseek_is_deepseek_chat(): + assert _PROVIDER_MODEL_MAP["deepseek"] == "deepseek-chat" + + +def test_env_selection_routes_zero_arg_agent_to_deepseek(monkeypatch): + monkeypatch.setenv("AGENTQ_LLM_PROVIDER", "deepseek") + monkeypatch.setenv("DEEPSEEK_API_KEY", "dummy-key") + a = _agent() # zero provider arg — picks up the env seam + assert a.provider == "deepseek" + assert a._default_model == "deepseek-chat" + + +def test_missing_deepseek_key_raises_provider_config_error(monkeypatch): + monkeypatch.setenv("AGENTQ_LLM_PROVIDER", "deepseek") + monkeypatch.delenv("DEEPSEEK_API_KEY", raising=False) + with pytest.raises(ProviderConfigError): + _agent() + + +def test_unknown_provider_raises_provider_config_error_not_attribute_error(monkeypatch): + monkeypatch.delenv("AGENTQ_LLM_PROVIDER", raising=False) + with pytest.raises(ProviderConfigError): + _agent(client="anthropic") + + +def test_case_typo_provider_normalized_then_validated(monkeypatch): + # "DeepSeek" lowercases to "deepseek" (valid); a real typo still raises. + monkeypatch.setenv("DEEPSEEK_API_KEY", "dummy-key") + a = _agent(client="DeepSeek") + assert a.provider == "deepseek" + with pytest.raises(ProviderConfigError): + _agent(client="deepsek") # genuine typo + + +def test_openai_default_preserved_regression(monkeypatch): + monkeypatch.delenv("AGENTQ_LLM_PROVIDER", raising=False) + monkeypatch.setenv("OPENAI_API_KEY", "dummy-key") + a = _agent() + assert a.provider == "openai" + assert a._default_model == "gpt-4o-2024-08-06" + + +def test_vision_agent_stays_openai_under_deepseek_env_scope_boundary(monkeypatch): + """Piece-1 SCOPE BOUNDARY (per code review): VisionAgent pins its own + ``client="openai"`` default (vision_agent.py:7), so it intentionally does + NOT pick up AGENTQ_LLM_PROVIDER. This is correct for Piece-1 — vision's + model is also hardcoded (browser_mcts.py), so migrating only its client + would send gpt-4o to DeepSeek. Vision's DeepSeek migration is Piece-3. + The no-tools agents (planner/actor/critic) forward no ``client=`` and DO + pick up the env seam (proven by the real-path acceptance test).""" + monkeypatch.setenv("AGENTQ_LLM_PROVIDER", "deepseek") + monkeypatch.setenv("DEEPSEEK_API_KEY", "dummy-key") + monkeypatch.setenv("OPENAI_API_KEY", "dummy-key") + v = VisionAgent() # zero-arg — but its own default pins client="openai" + assert v.provider == "openai" + assert v._default_model == "gpt-4o-2024-08-06" From f9762e5b9fe5f9e8c68b8de45ae9d42a3a6b7700 Mon Sep 17 00:00:00 2001 From: Johannes Castner Date: Fri, 5 Jun 2026 20:58:13 +0100 Subject: [PATCH 2/4] fix(mcts): make execute_browser_action total over ActionType (SOLVE_CAPTCHA + else-raise) The deterministic browser-action dispatcher in browser_mcts.py handled 4 of the 5 ActionType members; ActionType.SOLVE_CAPTCHA had no branch and the if/elif chain had no else, so a SOLVE_CAPTCHA action proposed by the actor silently no-op'd. - import solve_captcha; add the SOLVE_CAPTCHA dispatch branch (mirrors the orchestrator's handle_agentq_actions branch + the [mmid] selector transform used by CLICK/ENTER_TEXT_AND_CLICK) - add else: raise ValueError so the dispatch is total over the ActionType space (no silent fall-through for an unknown/future action type) - test/test_execute_browser_action_coverage.py: a pure-AST type-space invariant test asserting (a) every ActionType has a branch, (b) each branch awaits a real skill call (a pass/print stub fails), (c) the chain ends in an else that raises. Reviewed by the 9-axis convergence committee (all YES). The pre-existing duplicate dispatcher (orchestrator.handle_agentq_actions) is named for a follow-on consolidation. Co-Authored-By: Claude Opus 4.8 (1M context) --- agentq/core/mcts/browser_mcts.py | 12 +++ test/test_execute_browser_action_coverage.py | 108 +++++++++++++++++++ 2 files changed, 120 insertions(+) create mode 100644 test/test_execute_browser_action_coverage.py diff --git a/agentq/core/mcts/browser_mcts.py b/agentq/core/mcts/browser_mcts.py index bfc2b089..f9e6feaa 100644 --- a/agentq/core/mcts/browser_mcts.py +++ b/agentq/core/mcts/browser_mcts.py @@ -36,6 +36,7 @@ from agentq.core.skills.get_screenshot import get_screenshot from agentq.core.skills.get_url import geturl from agentq.core.skills.open_url import openurl +from agentq.core.skills.solve_captcha import solve_captcha from agentq.core.web_driver.playwright import PlaywrightManager # ANSI color codes @@ -130,6 +131,17 @@ async def execute_browser_action( ) # await wait_for_navigation() print(f"{CYAN}[DEBUG] Entered text and clicked element{RESET}") + elif action.type == ActionType.SOLVE_CAPTCHA: + await solve_captcha( + text_selector=f"[mmid='{action.text_element_mmid}']", + click_selector=f"[mmid='{action.click_element_mmid}']", + wait_before_click_execution=action.wait_before_click_execution or 2, + ) + print(f"{CYAN}[DEBUG] Solved captcha{RESET}") + else: + raise ValueError( + f"Unhandled ActionType in execute_browser_action: {action.type}" + ) try: new_dom = await self.get_current_dom() diff --git a/test/test_execute_browser_action_coverage.py b/test/test_execute_browser_action_coverage.py new file mode 100644 index 00000000..32770578 --- /dev/null +++ b/test/test_execute_browser_action_coverage.py @@ -0,0 +1,108 @@ +"""Piece-2 (DeepSeek): execute_browser_action must dispatch EVERY ActionType — no +silent no-op. Type-space invariant test, hardened per the committee round-1 +Brittleness/AIMA finding: a coverage check that only asserts `ActionType.X` +*appears* is too weak — a branch body of `pass` would GREEN it, and it can't see +the else-raise. So this asserts three things via AST (no browser deps): + (1) COVERAGE — every ActionType enum member has a dispatch branch; + (2) DISPATCH — each branch body actually CALLS something (not a `pass`/no-op); + (3) TOTALITY — the if/elif chain ends in an `else` that RAISES (no silent + fall-through for an unknown/future ActionType). + +RED today (token 1a5db28e): SOLVE_CAPTCHA has no branch and there is no else-raise. +""" +import ast +import os + +REPO = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +_MODELS = os.path.join(REPO, "agentq/core/models/models.py") +_DISPATCHER = os.path.join(REPO, "agentq/core/mcts/browser_mcts.py") + + +def _action_type_members() -> set[str]: + src = open(_MODELS).read() + members: set[str] = set() + for n in ast.walk(ast.parse(src)): + if isinstance(n, ast.ClassDef) and n.name == "ActionType": + for b in n.body: + if isinstance(b, ast.Assign) and isinstance(b.targets[0], ast.Name): + members.add(b.targets[0].id) + return members + + +def _execute_browser_action_func() -> ast.AsyncFunctionDef: + for n in ast.walk(ast.parse(open(_DISPATCHER).read())): + if isinstance(n, ast.AsyncFunctionDef) and n.name == "execute_browser_action": + return n + raise AssertionError("execute_browser_action not found in browser_mcts.py") + + +def _dispatch_if(func: ast.AsyncFunctionDef) -> ast.If: + """The top-level `if action.type == ActionType...:` chain.""" + for stmt in func.body: + if isinstance(stmt, ast.If) and any( + isinstance(a, ast.Attribute) + and isinstance(a.value, ast.Name) + and a.value.id == "ActionType" + for a in ast.walk(stmt.test) + ): + return stmt + raise AssertionError("no `if action.type == ActionType...` dispatch chain found") + + +def _walk_chain(if_node: ast.If): + """Yield (member|None, body, is_else) for each branch of the if/elif/else chain.""" + node = if_node + while True: + member = next( + (a.attr for a in ast.walk(node.test) + if isinstance(a, ast.Attribute) and isinstance(a.value, ast.Name) + and a.value.id == "ActionType"), + None, + ) + yield (member, node.body, False) + orelse = node.orelse + if len(orelse) == 1 and isinstance(orelse[0], ast.If): + node = orelse[0] + else: + yield (None, orelse, True) # the terminal else body (may be empty) + return + + +def _branches(): + return list(_walk_chain(_dispatch_if(_execute_browser_action_func()))) + + +def test_coverage_every_action_type_has_a_branch(): + members = _action_type_members() + handled = {m for m, _body, is_else in _branches() if m and not is_else} + assert members, "no ActionType members parsed — test wiring broken" + missing = members - handled + assert not missing, ( + f"execute_browser_action has no dispatch branch for ActionType(s): {sorted(missing)}" + ) + + +def test_each_branch_actually_dispatches_not_a_noop(): + # Require an `await ` (the codebase's skill-dispatch pattern), NOT just any + # call — else a `print("TODO")`-only branch would GREEN (print is a Call but is + # never awaited). All real skill dispatches are `await openurl(...)` etc. + for member, body, is_else in _branches(): + if is_else or member is None: + continue + has_real_dispatch = any( + isinstance(n, ast.Await) and isinstance(n.value, ast.Call) + for stmt in body + for n in ast.walk(stmt) + ) + assert has_real_dispatch, ( + f"ActionType.{member} branch has no `await (...)` dispatch — a " + f"`pass`/`print`-only body silently drops the action" + ) + + +def test_chain_ends_in_else_that_raises(): + branches = _branches() + member, else_body, is_else = branches[-1] + assert is_else, "if/elif chain has no terminal `else` — an unknown ActionType silently no-ops" + has_raise = any(isinstance(n, ast.Raise) for stmt in else_body for n in ast.walk(stmt)) + assert has_raise, "terminal `else` does not raise — unknown/future ActionType silently no-ops" From 3e083fddda1c86bebe65542ca6971e016014ecb5 Mon Sep 17 00:00:00 2001 From: Johannes Castner Date: Fri, 5 Jun 2026 22:01:04 +0100 Subject: [PATCH 3/4] refactor(skills): consolidate both ActionType executors into shared dispatch_action MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both live executors of actor-proposed typed Actions — execute_browser_action (MCTS search rollouts, browser_mcts.py) and handle_agentq_actions (orchestrator real loop) — duplicated the `action.type -> await ` map and diverged (one was non-total: its `else` returned an "Unsupported..." string). Extract one shared, total dispatch_action (skills/dispatch.py); both callers route through it and keep their OWN wait timing (search 2/2/2 vs real-loop 1/1.5/1 are distinct execution contexts — Option A, behavior-preserving). An unknown/future ActionType now raises a typed UnhandledActionTypeError, which the orchestrator re-raises BEFORE its broad retry `except` (no silent retry/swallow); the skills' own ValueError("No active page") still retries. Tests (test/test_execute_browser_action_coverage.py): AST coverage/totality on dispatch_action, no-duplication over BOTH callers, typed-bypass ordering, and per-caller wait sink over CLICK/ENTER_TEXT_AND_CLICK/SOLVE_CAPTCHA x [1,2]. 22 passed (14 R2 + 8 Piece-1), no regression. Co-Authored-By: Claude Opus 4.8 (1M context) --- agentq/core/mcts/browser_mcts.py | 51 +---- agentq/core/orchestrator/orchestrator.py | 52 ++---- agentq/core/skills/dispatch.py | 70 +++++++ test/test_execute_browser_action_coverage.py | 186 +++++++++++++++---- 4 files changed, 243 insertions(+), 116 deletions(-) create mode 100644 agentq/core/skills/dispatch.py diff --git a/agentq/core/mcts/browser_mcts.py b/agentq/core/mcts/browser_mcts.py index f9e6feaa..257f8c03 100644 --- a/agentq/core/mcts/browser_mcts.py +++ b/agentq/core/mcts/browser_mcts.py @@ -29,14 +29,10 @@ VisionInput, VisionOutput, ) -from agentq.core.skills.click_using_selector import click -from agentq.core.skills.enter_text_and_click import enter_text_and_click -from agentq.core.skills.enter_text_using_selector import EnterTextEntry, entertext +from agentq.core.skills.dispatch import dispatch_action from agentq.core.skills.get_dom_with_content_type import get_dom_with_content_type from agentq.core.skills.get_screenshot import get_screenshot from agentq.core.skills.get_url import geturl -from agentq.core.skills.open_url import openurl -from agentq.core.skills.solve_captcha import solve_captcha from agentq.core.web_driver.playwright import PlaywrightManager # ANSI color codes @@ -104,44 +100,13 @@ async def execute_browser_action( action = browser_action.task_with_action.actions_to_be_performed[0] print(f"{YELLOW}[DEBUG] Executing browser action: {action.type}{RESET}") - if action.type == ActionType.GOTO_URL: - print(f"{CYAN}[DEBUG] Trying to go to url{RESET}") - await openurl(url=action.website, timeout=action.timeout or 1) - print(f"{CYAN}[DEBUG] Went to url{RESET}") - elif action.type == ActionType.TYPE: - entry = EnterTextEntry( - query_selector=f"[mmid='{action.mmid}']", - text=action.content, - ) - await entertext(entry) - # await wait_for_navigation() - print(f"{CYAN}[DEBUG] Typed text into element{RESET}") - elif action.type == ActionType.CLICK: - await click( - selector=f"[mmid='{action.mmid}']", - wait_before_execution=action.wait_before_execution or 2, - ) - print(f"{CYAN}[DEBUG] Clicked element{RESET}") - elif action.type == ActionType.ENTER_TEXT_AND_CLICK: - await enter_text_and_click( - text_selector=f"[mmid='{action.text_element_mmid}']", - text_to_enter=action.text_to_enter, - click_selector=f"[mmid='{action.click_element_mmid}']", - wait_before_click_execution=action.wait_before_click_execution or 2, - ) - # await wait_for_navigation() - print(f"{CYAN}[DEBUG] Entered text and clicked element{RESET}") - elif action.type == ActionType.SOLVE_CAPTCHA: - await solve_captcha( - text_selector=f"[mmid='{action.text_element_mmid}']", - click_selector=f"[mmid='{action.click_element_mmid}']", - wait_before_click_execution=action.wait_before_click_execution or 2, - ) - print(f"{CYAN}[DEBUG] Solved captcha{RESET}") - else: - raise ValueError( - f"Unhandled ActionType in execute_browser_action: {action.type}" - ) + # All browser-skill dispatch lives in the shared dispatch_action (skills/dispatch.py). This is + # the MCTS search-rollout execution path, so it passes the SEARCH waits (2/2/2). An unhandled + # ActionType raises UnhandledActionTypeError (a ValueError) — preserving the prior else-raise. + await dispatch_action( + action, click_wait=2, enter_text_and_click_wait=2, captcha_wait=2 + ) + print(f"{CYAN}[DEBUG] Executed {action.type} via dispatch_action{RESET}") try: new_dom = await self.get_current_dom() diff --git a/agentq/core/orchestrator/orchestrator.py b/agentq/core/orchestrator/orchestrator.py index 288abb1c..5a3513fd 100644 --- a/agentq/core/orchestrator/orchestrator.py +++ b/agentq/core/orchestrator/orchestrator.py @@ -26,14 +26,10 @@ Task, TaskWithActions, ) -from agentq.core.skills.click_using_selector import click -from agentq.core.skills.enter_text_and_click import enter_text_and_click -from agentq.core.skills.enter_text_using_selector import EnterTextEntry, entertext +from agentq.core.skills.dispatch import dispatch_action, UnhandledActionTypeError from agentq.core.skills.get_dom_with_content_type import get_dom_with_content_type from agentq.core.skills.get_screenshot import get_screenshot from agentq.core.skills.get_url import geturl -from agentq.core.skills.open_url import openurl -from agentq.core.skills.solve_captcha import solve_captcha from agentq.core.web_driver.playwright import PlaywrightManager init(autoreset=True) @@ -395,46 +391,22 @@ async def handle_agentq_actions(self, actions: List[Action]): for attempt in range(max_retries): try: + # Every action dispatches identically (real-loop waits 1/1.5/1); only GOTO needs the + # extra post-navigation settle. dispatch_action stays inside this retry `try`, so a + # transient failure still retries. openurl already waits for networkidle internally + # (skills/open_url.py); the wait below is defensive redundancy, scoped to GOTO. + result = await dispatch_action( + action, click_wait=1, enter_text_and_click_wait=1.5, captcha_wait=1 + ) if action.type == ActionType.GOTO_URL: - result = await openurl( - url=action.website, timeout=action.timeout or 1 - ) await page.wait_for_load_state("networkidle", timeout=10000) - print("Action - GOTO") - elif action.type == ActionType.TYPE: - entry = EnterTextEntry( - query_selector=f"[mmid='{action.mmid}']", - text=action.content, - ) - result = await entertext(entry) - print("Action - TYPE") - elif action.type == ActionType.CLICK: - result = await click( - selector=f"[mmid='{action.mmid}']", - wait_before_execution=action.wait_before_execution or 1, - ) - print("Action - CLICK") - elif action.type == ActionType.ENTER_TEXT_AND_CLICK: - result = await enter_text_and_click( - text_selector=f"[mmid='{action.text_element_mmid}']", - text_to_enter=action.text_to_enter, - click_selector=f"[mmid='{action.click_element_mmid}']", - wait_before_click_execution=action.wait_before_click_execution - or 1.5, - ) - print("Action - ENTER TEXT AND CLICK") - elif action.type == ActionType.SOLVE_CAPTCHA: - result = await solve_captcha( - text_selector=f"[mmid='{action.text_element_mmid}']", - click_selector=f"[mmid='{action.click_element_mmid}']", - wait_before_click_execution=action.wait_before_click_execution - or 1, - ) - else: - result = f"Unsupported action type: {action.type}" results.append(result) break # If successful, break out of the retry loop + except UnhandledActionTypeError: + # An unknown/future ActionType is a coverage error, not a transient failure — + # surface it immediately instead of retrying 3x and swallowing it into a string. + raise except Exception as e: print(f"Error during action {action.type}: {e}") if attempt < max_retries - 1: diff --git a/agentq/core/skills/dispatch.py b/agentq/core/skills/dispatch.py new file mode 100644 index 00000000..64054567 --- /dev/null +++ b/agentq/core/skills/dispatch.py @@ -0,0 +1,70 @@ +"""Single shared typed-`Action` → browser-skill dispatch map for agent-q. + +Both LIVE executors route through this one function: + - execute_browser_action (agentq/core/mcts/browser_mcts.py) — MCTS search rollouts + - handle_agentq_actions (agentq/core/orchestrator/orchestrator.py) — orchestrator real loop + +The dispatch MAP is shared; each caller passes its OWN wait timing, because an MCTS search rollout and +a real orchestrator execution are distinct execution contexts (the search waits are world-model +parameters; the real-loop waits are infra configuration — they are deliberately NOT unified). + +`dispatch_action` is TOTAL over ActionType: an unhandled/future type raises `UnhandledActionTypeError` +rather than silently no-opping. +""" +from agentq.core.models.models import Action, ActionType +from agentq.core.skills.click_using_selector import click +from agentq.core.skills.enter_text_and_click import enter_text_and_click +from agentq.core.skills.enter_text_using_selector import EnterTextEntry, entertext +from agentq.core.skills.open_url import openurl +from agentq.core.skills.solve_captcha import solve_captcha + + +class UnhandledActionTypeError(ValueError): + """Raised by `dispatch_action` for an ActionType with no branch. + + A `ValueError` subclass (so existing ``except ValueError`` sites still catch it), but a DISTINCT + type so a caller with a retry loop can re-raise it immediately — an unknown action type is a + programming/coverage error, not a transient failure to retry. + """ + + +async def dispatch_action( + action: Action, + *, + click_wait: float, + enter_text_and_click_wait: float, + captcha_wait: float, +): + """Dispatch one typed `Action` to its browser skill and return the skill's result. + + The wait values are supplied by the CALLER (not defaulted here) so the MCTS search path and the + orchestrator real-loop path each keep their own timing. + """ + if action.type == ActionType.GOTO_URL: + return await openurl(url=action.website, timeout=action.timeout or 1) + elif action.type == ActionType.TYPE: + return await entertext( + EnterTextEntry(query_selector=f"[mmid='{action.mmid}']", text=action.content) + ) + elif action.type == ActionType.CLICK: + return await click( + selector=f"[mmid='{action.mmid}']", + wait_before_execution=action.wait_before_execution or click_wait, + ) + elif action.type == ActionType.ENTER_TEXT_AND_CLICK: + return await enter_text_and_click( + text_selector=f"[mmid='{action.text_element_mmid}']", + text_to_enter=action.text_to_enter, + click_selector=f"[mmid='{action.click_element_mmid}']", + wait_before_click_execution=action.wait_before_click_execution or enter_text_and_click_wait, + ) + elif action.type == ActionType.SOLVE_CAPTCHA: + return await solve_captcha( + text_selector=f"[mmid='{action.text_element_mmid}']", + click_selector=f"[mmid='{action.click_element_mmid}']", + wait_before_click_execution=action.wait_before_click_execution or captcha_wait, + ) + else: + raise UnhandledActionTypeError( + f"Unhandled ActionType in dispatch_action: {action.type}" + ) diff --git a/test/test_execute_browser_action_coverage.py b/test/test_execute_browser_action_coverage.py index 32770578..2d8e22ff 100644 --- a/test/test_execute_browser_action_coverage.py +++ b/test/test_execute_browser_action_coverage.py @@ -1,21 +1,43 @@ -"""Piece-2 (DeepSeek): execute_browser_action must dispatch EVERY ActionType — no -silent no-op. Type-space invariant test, hardened per the committee round-1 -Brittleness/AIMA finding: a coverage check that only asserts `ActionType.X` -*appears* is too weak — a branch body of `pass` would GREEN it, and it can't see -the else-raise. So this asserts three things via AST (no browser deps): - (1) COVERAGE — every ActionType enum member has a dispatch branch; - (2) DISPATCH — each branch body actually CALLS something (not a `pass`/no-op); - (3) TOTALITY — the if/elif chain ends in an `else` that RAISES (no silent - fall-through for an unknown/future ActionType). - -RED today (token 1a5db28e): SOLVE_CAPTCHA has no branch and there is no else-raise. +"""R2 (DeepSeek learning-cycle): the typed-Action skill-dispatch map must live in ONE shared +`dispatch_action` (agentq/core/skills/dispatch.py), total over ActionType, and BOTH live executors +(execute_browser_action in the MCTS search path; handle_agentq_actions in the orchestrator real loop) +must route through it — NOT keep their own inline `action.type -> await ` chains. + +Committee-converged design (Option A, behavior-preserving dedup): the dispatch MAP is shared; each caller +keeps its OWN wait timing by passing it in (MCTS 2/2/2; real loop 1/1.5/1 — distinct execution kinds). + +This asserts, via AST (no browser deps) + focused runtime checks: + (1) COVERAGE — dispatch_action has a branch for every ActionType member; + (2) DISPATCH — each branch actually awaits a call (not a pass/print no-op); + (3) TOTALITY — dispatch_action ends in an `else` that RAISES (UnhandledActionTypeError); + (4) NO-DUP — NEITHER execute_browser_action NOR handle_agentq_actions awaits any browser SKILL + directly; the only skill dispatch they perform is `await dispatch_action(...)`. + (False-positive-free: the surviving GOTO `if` in handle_agentq_actions awaits only + dispatch_action + page.wait_for_load_state, neither of which is in the skill set.) + (5) MAP-OWNS — dispatch_action itself awaits all five skills (the map lives there, nowhere else); + (6) SURFACES — dispatch_action raises UnhandledActionTypeError on an unknown type (runtime), and + handle_agentq_actions has `except UnhandledActionTypeError: raise` BEFORE the broad + `except Exception` (so an unknown type is not retried/swallowed); + (7) SINK-WAITS — dispatch_action passes the CALLER's wait to the skill (=1 vs =2), proving Option A. """ import ast +import asyncio import os +from types import SimpleNamespace + +import pytest + +from agentq.core.models.models import ActionType REPO = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) _MODELS = os.path.join(REPO, "agentq/core/models/models.py") -_DISPATCHER = os.path.join(REPO, "agentq/core/mcts/browser_mcts.py") +_DISPATCH = os.path.join(REPO, "agentq/core/skills/dispatch.py") +_MCTS = os.path.join(REPO, "agentq/core/mcts/browser_mcts.py") +_ORCH = os.path.join(REPO, "agentq/core/orchestrator/orchestrator.py") + +# The browser-skill names the dispatch map routes to. A caller awaiting any of these directly is a +# duplicate of the map. Pinned here AND asserted to equal what dispatch.py imports (test_map_owns). +_SKILLS = {"openurl", "entertext", "click", "enter_text_and_click", "solve_captcha"} def _action_type_members() -> set[str]: @@ -29,15 +51,18 @@ def _action_type_members() -> set[str]: return members -def _execute_browser_action_func() -> ast.AsyncFunctionDef: - for n in ast.walk(ast.parse(open(_DISPATCHER).read())): - if isinstance(n, ast.AsyncFunctionDef) and n.name == "execute_browser_action": +def _func(path: str, name: str) -> ast.AsyncFunctionDef: + for n in ast.walk(ast.parse(open(path).read())): + if isinstance(n, ast.AsyncFunctionDef) and n.name == name: return n - raise AssertionError("execute_browser_action not found in browser_mcts.py") + raise AssertionError(f"{name} not found in {path}") + + +def _dispatch_func() -> ast.AsyncFunctionDef: + return _func(_DISPATCH, "dispatch_action") def _dispatch_if(func: ast.AsyncFunctionDef) -> ast.If: - """The top-level `if action.type == ActionType...:` chain.""" for stmt in func.body: if isinstance(stmt, ast.If) and any( isinstance(a, ast.Attribute) @@ -50,7 +75,6 @@ def _dispatch_if(func: ast.AsyncFunctionDef) -> ast.If: def _walk_chain(if_node: ast.If): - """Yield (member|None, body, is_else) for each branch of the if/elif/else chain.""" node = if_node while True: member = next( @@ -64,45 +88,141 @@ def _walk_chain(if_node: ast.If): if len(orelse) == 1 and isinstance(orelse[0], ast.If): node = orelse[0] else: - yield (None, orelse, True) # the terminal else body (may be empty) + yield (None, orelse, True) return def _branches(): - return list(_walk_chain(_dispatch_if(_execute_browser_action_func()))) + return list(_walk_chain(_dispatch_if(_dispatch_func()))) +def _awaited_call_names(node: ast.AST) -> set[str]: + """Names directly awaited as `await name(...)` anywhere under node.""" + names: set[str] = set() + for n in ast.walk(node): + if isinstance(n, ast.Await) and isinstance(n.value, ast.Call): + f = n.value.func + if isinstance(f, ast.Name): + names.add(f.id) + return names + + +# ── (1) COVERAGE ────────────────────────────────────────────────────────────── def test_coverage_every_action_type_has_a_branch(): members = _action_type_members() handled = {m for m, _body, is_else in _branches() if m and not is_else} assert members, "no ActionType members parsed — test wiring broken" missing = members - handled - assert not missing, ( - f"execute_browser_action has no dispatch branch for ActionType(s): {sorted(missing)}" - ) + assert not missing, f"dispatch_action has no branch for ActionType(s): {sorted(missing)}" +# ── (2) DISPATCH ────────────────────────────────────────────────────────────── def test_each_branch_actually_dispatches_not_a_noop(): - # Require an `await ` (the codebase's skill-dispatch pattern), NOT just any - # call — else a `print("TODO")`-only branch would GREEN (print is a Call but is - # never awaited). All real skill dispatches are `await openurl(...)` etc. for member, body, is_else in _branches(): if is_else or member is None: continue has_real_dispatch = any( isinstance(n, ast.Await) and isinstance(n.value, ast.Call) - for stmt in body - for n in ast.walk(stmt) + for stmt in body for n in ast.walk(stmt) ) assert has_real_dispatch, ( - f"ActionType.{member} branch has no `await (...)` dispatch — a " - f"`pass`/`print`-only body silently drops the action" + f"ActionType.{member} branch has no `await (...)` dispatch" ) +# ── (3) TOTALITY ────────────────────────────────────────────────────────────── def test_chain_ends_in_else_that_raises(): - branches = _branches() - member, else_body, is_else = branches[-1] - assert is_else, "if/elif chain has no terminal `else` — an unknown ActionType silently no-ops" + member, else_body, is_else = _branches()[-1] + assert is_else, "dispatch chain has no terminal `else` — unknown ActionType silently no-ops" has_raise = any(isinstance(n, ast.Raise) for stmt in else_body for n in ast.walk(stmt)) assert has_raise, "terminal `else` does not raise — unknown/future ActionType silently no-ops" + + +# ── (4) NO-DUP: neither caller awaits a skill directly ──────────────────────── +@pytest.mark.parametrize("path,fn", [ + (_MCTS, "execute_browser_action"), + (_ORCH, "handle_agentq_actions"), +]) +def test_caller_does_not_inline_skill_dispatch(path, fn): + awaited = _awaited_call_names(_func(path, fn)) + leaked = awaited & _SKILLS + assert not leaked, ( + f"{fn} still awaits browser skill(s) {sorted(leaked)} directly — it must route through " + f"dispatch_action instead (duplicate dispatch map)" + ) + assert "dispatch_action" in awaited, f"{fn} does not await dispatch_action" + + +# ── (5) MAP-OWNS: dispatch_action awaits all five skills; the set is pinned ──── +def test_dispatch_action_owns_the_whole_skill_map(): + awaited = _awaited_call_names(_dispatch_func()) + assert _SKILLS <= awaited, f"dispatch_action missing skill dispatch for: {sorted(_SKILLS - awaited)}" + + +# ── (6) SURFACES: unknown type raises, and the orchestrator bypasses the retry ─ +def test_dispatch_action_raises_on_unknown_type(): + from agentq.core.skills.dispatch import dispatch_action, UnhandledActionTypeError + bogus = SimpleNamespace(type="NOT_A_REAL_ACTION_TYPE") + with pytest.raises(UnhandledActionTypeError): + asyncio.run( + dispatch_action(bogus, click_wait=1, enter_text_and_click_wait=1.5, captcha_wait=1) + ) + + +def test_orchestrator_reraises_unhandled_before_broad_except(): + fn = _func(_ORCH, "handle_agentq_actions") + # find the try whose handlers include the typed bypass + for tnode in ast.walk(fn): + if not isinstance(tnode, ast.Try): + continue + names = [ + (h.type.id if isinstance(h.type, ast.Name) else None) for h in tnode.handlers + ] + if "UnhandledActionTypeError" in names and "Exception" in names: + assert names.index("UnhandledActionTypeError") < names.index("Exception"), ( + "`except UnhandledActionTypeError` must come BEFORE `except Exception`" + ) + # and it must re-raise, not swallow + h = tnode.handlers[names.index("UnhandledActionTypeError")] + assert any(isinstance(n, ast.Raise) for n in ast.walk(h)), ( + "the UnhandledActionTypeError handler must re-raise" + ) + return + raise AssertionError("handle_agentq_actions has no try with the typed-exception bypass") + + +# ── (7) SINK-WAITS: the caller's own wait reaches the skill (Option A) ───────── +# Each wait-bearing ActionType: (type, skill attr in dispatch module, action fields, the dispatch +# wait-param that should reach it, the skill's wait kwarg name). Covers ALL three wait-bearing branches +# (CLICK, ENTER_TEXT_AND_CLICK, SOLVE_CAPTCHA) so Option A is verified at every sink, not just CLICK. +_WAIT_SINKS = [ + (ActionType.CLICK, "click", + dict(mmid="42", wait_before_execution=None), "click_wait", "wait_before_execution"), + (ActionType.ENTER_TEXT_AND_CLICK, "enter_text_and_click", + dict(text_element_mmid="1", text_to_enter="x", click_element_mmid="2", wait_before_click_execution=None), + "enter_text_and_click_wait", "wait_before_click_execution"), + (ActionType.SOLVE_CAPTCHA, "solve_captcha", + dict(text_element_mmid="1", click_element_mmid="2", wait_before_click_execution=None), + "captcha_wait", "wait_before_click_execution"), +] + + +@pytest.mark.parametrize("caller_wait", [1, 2]) +@pytest.mark.parametrize("atype,skill,action_fields,wait_param,wait_kwarg", _WAIT_SINKS) +def test_dispatch_passes_callers_own_wait_to_skill( + monkeypatch, caller_wait, atype, skill, action_fields, wait_param, wait_kwarg +): + import agentq.core.skills.dispatch as dispatch_mod + + captured = {} + + async def fake_skill(**kwargs): + captured.update(kwargs) + return "ok" + + monkeypatch.setattr(dispatch_mod, skill, fake_skill) + action = SimpleNamespace(type=atype, **action_fields) + waits = {"click_wait": 1, "enter_text_and_click_wait": 1.5, "captcha_wait": 1} + waits[wait_param] = caller_wait # this branch's caller-supplied wait + asyncio.run(dispatch_mod.dispatch_action(action, **waits)) + assert captured[wait_kwarg] == caller_wait # the CALLER's wait reaches the skill (Option A) From a2c513cfa88678e22b8e8f4295dc51177ae8e829 Mon Sep 17 00:00:00 2001 From: Johannes Castner Date: Fri, 5 Jun 2026 22:37:10 +0100 Subject: [PATCH 4/4] =?UTF-8?q?refactor(skills):=20R2=20code-review=20foll?= =?UTF-8?q?ow-ups=20=E2=80=94=20typed=20return=20+=20drop=20dead=20import?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Post-commit code review of 3e083fd: (1) annotate dispatch_action -> Any (it returns the dispatched skill's heterogeneous result); (2) remove the now-unused ActionType import from browser_mcts.py (execute_browser_action no longer references it after routing through dispatch_action). No behavior change; 22 passed (token 352d2ce1). Fidelity committee 6/6 YES on the landed code (BQ panel closed); code review clean; adversarial clean on all live ActionTypes. Co-Authored-By: Claude Opus 4.8 (1M context) --- agentq/core/mcts/browser_mcts.py | 1 - agentq/core/skills/dispatch.py | 4 +++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/agentq/core/mcts/browser_mcts.py b/agentq/core/mcts/browser_mcts.py index 257f8c03..1a7537ce 100644 --- a/agentq/core/mcts/browser_mcts.py +++ b/agentq/core/mcts/browser_mcts.py @@ -15,7 +15,6 @@ from agentq.core.mcts.core.mcts import MCTS, MCTSResult from agentq.core.mcts.visualization.visualizer_client import visualize from agentq.core.models.models import ( - ActionType, AgentQActorInput, AgentQActorOutput, AgentQCriticInput, diff --git a/agentq/core/skills/dispatch.py b/agentq/core/skills/dispatch.py index 64054567..081be2ea 100644 --- a/agentq/core/skills/dispatch.py +++ b/agentq/core/skills/dispatch.py @@ -11,6 +11,8 @@ `dispatch_action` is TOTAL over ActionType: an unhandled/future type raises `UnhandledActionTypeError` rather than silently no-opping. """ +from typing import Any + from agentq.core.models.models import Action, ActionType from agentq.core.skills.click_using_selector import click from agentq.core.skills.enter_text_and_click import enter_text_and_click @@ -34,7 +36,7 @@ async def dispatch_action( click_wait: float, enter_text_and_click_wait: float, captcha_wait: float, -): +) -> Any: """Dispatch one typed `Action` to its browser skill and return the skill's result. The wait values are supplied by the CALLER (not defaulted here) so the MCTS search path and the