Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
647ed9d
test: add domain PIT mutation workflow
alexk-dev Apr 14, 2026
772f7d0
Merge branch 'main' into test/pitests
alexk-dev Apr 15, 2026
5473cc4
test(domain): narrow PIT target scope to high-signal packages
alexk-dev Apr 15, 2026
1a2783b
refactor(context): extract shared TokenEstimator utility
alexk-dev Apr 15, 2026
0a0b639
test(domain): harden memory layer and view builder against PIT mutants
alexk-dev Apr 15, 2026
b4b0f78
test(domain): cover reflection tier and candidate normalization branches
alexk-dev Apr 15, 2026
f680aa6
test(domain): cover run service caching, verdict stamping, and lambda…
alexk-dev Apr 15, 2026
26554ca
test(domain): assert skill resolver source attribution
alexk-dev Apr 15, 2026
a1e455b
test(selfevolving): cover hydration service edge cases
alexk-dev Apr 15, 2026
1cad4d4
style(test): drop redundant java.util qualifier for List
alexk-dev Apr 15, 2026
6eb264f
test(domain): pin layer name/order, diagnostics defaults, and store c…
alexk-dev Apr 15, 2026
20d60ed
test(selfevolving): verify completeRun invokes journal save
alexk-dev Apr 15, 2026
b3c23a3
test(memory): cover isMemoryDisabled preset-fallback branch
alexk-dev Apr 15, 2026
2a5a084
refactor(selfevolving): drop unreachable candidate guard in bundle id
alexk-dev Apr 15, 2026
5ea3ba4
test(toolloop): cover null-metadata branch in readPreviousModel
alexk-dev Apr 15, 2026
ec034af
test(memory): cover procedural upsert happy path and guard
alexk-dev Apr 15, 2026
fae739f
Merge remote-tracking branch 'origin/main' into test/pitests
alexk-dev Apr 15, 2026
7e7a33a
chore(ci): make PIT mutation tests opt-in via workflow_dispatch
alexk-dev Apr 15, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions .github/workflows/pitests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
name: PIT Tests

# Mutation testing is opt-in only: it runs on-demand via the Actions UI or
# `gh workflow run "PIT Tests"`. It is NOT wired to push or pull_request
# events, so it never appears as a required check and never blocks a merge.
on:
workflow_dispatch:

env:
PLUGIN_API_REPO_REF: 67cd0d10a997f494dcaf4686438e5c6bcfc8ac65
PITEST_MUTATION_THRESHOLD: 85

jobs:
pitests:
name: Domain Mutation Tests
runs-on: ubuntu-latest
timeout-minutes: 90

steps:
- name: Checkout
uses: actions/checkout@v6

- name: Set up JDK 25
uses: actions/setup-java@v5
with:
java-version: '25'
distribution: 'temurin'
cache: maven

- name: Checkout plugin APIs
uses: actions/checkout@v6
with:
repository: alexk-dev/golemcore-plugins
ref: ${{ env.PLUGIN_API_REPO_REF }}
path: golemcore-plugins

- name: Install plugin API artifacts
run: mvn -B -ntp -Djgitver.skip=true -f golemcore-plugins/pom.xml -pl extension-api,runtime-api -am install -DskipTests

- name: Run PIT mutation tests
run: >
./mvnw -B -ntp -P pitests
test-compile org.pitest:pitest-maven:mutationCoverage
-DskipGitHooks=true
-Dpitest.mutationThreshold=${{ env.PITEST_MUTATION_THRESHOLD }}

- name: Upload PIT reports
if: always()
uses: actions/upload-artifact@v7
with:
name: pit-reports
path: target/pit-reports
if-no-files-found: ignore
76 changes: 76 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -524,6 +524,82 @@
</build>

<profiles>
<!-- Mutation testing profile for CI-only focused checks. -->
<profile>
<id>pitests</id>
<properties>
<pitest.version>1.23.0</pitest.version>
<pitest.junit5.plugin.version>1.2.3</pitest.junit5.plugin.version>
<pitest.mutationThreshold>85</pitest.mutationThreshold>
<pitest.threads>4</pitest.threads>
</properties>
<build>
<plugins>
<plugin>
<groupId>org.pitest</groupId>
<artifactId>pitest-maven</artifactId>
<version>${pitest.version}</version>
<dependencies>
<dependency>
<groupId>org.pitest</groupId>
<artifactId>pitest-junit5-plugin</artifactId>
<version>${pitest.junit5.plugin.version}</version>
</dependency>
</dependencies>
<configuration>
<!--
Mutation testing is scoped to a curated subset of the domain
module: the context pipeline layers, context resolvers, memory
diagnostics/orchestrator, self-evolving run + promotion services,
and the tool-loop conversation view. These packages form the
deterministic core of the agent loop and have the highest
existing test strength, so a stringent mutation threshold here
provides meaningful regression protection without requiring the
full domain module to clear the same bar in a single run.

Classes excluded below either depend on Spring-wired
infrastructure that mutation testing cannot easily exercise
(e.g. RagLayer, AutoModeLayer) or are thin coordination layers
whose logic is covered indirectly by integration tests.
-->
<targetClasses>
<param>me.golemcore.bot.domain.context.layer.*</param>
<param>me.golemcore.bot.domain.context.resolution.*</param>
<param>me.golemcore.bot.domain.memory.diagnostics.*</param>
<param>me.golemcore.bot.domain.memory.orchestrator.*</param>
<param>me.golemcore.bot.domain.selfevolving.run.*</param>
<param>me.golemcore.bot.domain.selfevolving.promotion.*</param>
<param>me.golemcore.bot.domain.system.toolloop.view.*</param>
</targetClasses>
<excludedClasses>
<param>me.golemcore.bot.domain.context.layer.TierAwarenessLayer</param>
<param>me.golemcore.bot.domain.context.layer.AutoModeLayer</param>
<param>me.golemcore.bot.domain.context.layer.RagLayer</param>
<param>me.golemcore.bot.domain.memory.orchestrator.MemoryContextOrchestrator</param>
<param>me.golemcore.bot.domain.selfevolving.promotion.PromotionTargetResolver</param>
<param>me.golemcore.bot.domain.system.toolloop.view.FlatteningToolMessageMasker</param>
</excludedClasses>
<targetTests>
<param>me.golemcore.bot.domain.*</param>
</targetTests>
<threads>${pitest.threads}</threads>
<mutationThreshold>${pitest.mutationThreshold}</mutationThreshold>
<timeoutFactor>2.0</timeoutFactor>
<timeoutConstant>8000</timeoutConstant>
<timestampedReports>false</timestampedReports>
<failWhenNoMutations>true</failWhenNoMutations>
<outputFormats>
<outputFormat>HTML</outputFormat>
<outputFormat>XML</outputFormat>
</outputFormats>
<jvmArgs>
<jvmArg>-Xmx2g</jvmArg>
</jvmArgs>
</configuration>
</plugin>
</plugins>
</build>
</profile>
<!-- Strict profile for CI/CD - fails build on code quality issues -->
<profile>
<id>strict</id>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ public ContextLayerResult assemble(AgentContext context) {
return ContextLayerResult.builder()
.layerName(getName())
.content(content)
.estimatedTokens((int) Math.ceil(content.length() / 3.5))
.estimatedTokens(TokenEstimator.estimate(content))
.build();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ public ContextLayerResult assemble(AgentContext context) {
return ContextLayerResult.builder()
.layerName(getName())
.content(content)
.estimatedTokens((int) Math.ceil(content.length() / 3.5))
.estimatedTokens(TokenEstimator.estimate(content))
.build();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -89,11 +89,7 @@ public ContextLayerResult assemble(AgentContext context) {
return ContextLayerResult.builder()
.layerName(getName())
.content(content)
.estimatedTokens(estimateTokens(content))
.estimatedTokens(TokenEstimator.estimate(content))
.build();
}

private int estimateTokens(String text) {
return (int) Math.ceil(text.length() / 3.5);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ public ContextLayerResult assemble(AgentContext context) {
return ContextLayerResult.builder()
.layerName(getName())
.content(content)
.estimatedTokens((int) Math.ceil(content.length() / 3.5))
.estimatedTokens(TokenEstimator.estimate(content))
.build();
}

Expand All @@ -149,12 +149,12 @@ private boolean isMemoryDisabled(AgentContext context) {
if (memoryConfig != null) {
return Boolean.FALSE.equals(memoryConfig.getEnabled());
}
String memoryPreset = context != null ? context.getAttribute(ContextAttributes.MEMORY_PRESET_ID) : null;
String memoryPreset = context.getAttribute(ContextAttributes.MEMORY_PRESET_ID);
return memoryPreset != null && MemoryPresetIds.DISABLED.equalsIgnoreCase(memoryPreset.trim());
}

private RuntimeConfig.MemoryConfig resolveMemoryPresetConfig(AgentContext context) {
String memoryPreset = context != null ? context.getAttribute(ContextAttributes.MEMORY_PRESET_ID) : null;
String memoryPreset = context.getAttribute(ContextAttributes.MEMORY_PRESET_ID);
if (memoryPreset == null || memoryPreset.isBlank()) {
return null;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ public ContextLayerResult assemble(AgentContext context) {
return ContextLayerResult.builder()
.layerName(getName())
.content(planContext)
.estimatedTokens((int) Math.ceil(planContext.length() / 3.5))
.estimatedTokens(TokenEstimator.estimate(planContext))
.build();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ public ContextLayerResult assemble(AgentContext context) {
return ContextLayerResult.builder()
.layerName(getName())
.content(content)
.estimatedTokens((int) Math.ceil(content.length() / 3.5))
.estimatedTokens(TokenEstimator.estimate(content))
.build();
} catch (Exception e) { // NOSONAR — best-effort RAG retrieval
log.warn("[RagLayer] RAG query failed: {}", e.getMessage());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ private ContextLayerResult assembleActiveSkill(AgentContext context) {
return ContextLayerResult.builder()
.layerName(getName())
.content(content)
.estimatedTokens((int) Math.ceil(content.length() / 3.5))
.estimatedTokens(TokenEstimator.estimate(content))
.build();
}

Expand All @@ -125,7 +125,7 @@ private ContextLayerResult assembleSkillsSummary(AgentContext context) {
return ContextLayerResult.builder()
.layerName(getName())
.content(content)
.estimatedTokens((int) Math.ceil(content.length() / 3.5))
.estimatedTokens(TokenEstimator.estimate(content))
.build();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ public ContextLayerResult assemble(AgentContext context) {
return ContextLayerResult.builder()
.layerName(getName())
.content(content)
.estimatedTokens((int) Math.ceil(content.length() / 3.5))
.estimatedTokens(TokenEstimator.estimate(content))
.build();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
package me.golemcore.bot.domain.context.layer;

/*
* Copyright 2026 Aleksei Kuleshov
*
* Licensed under the Apache License, Version 2.0 (the "License");

Check warning on line 6 in src/main/java/me/golemcore/bot/domain/context/layer/TokenEstimator.java

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

This block of commented-out lines of code should be removed.

See more on https://sonarcloud.io/project/issues?id=alexk-dev_golemcore-bot&issues=AZ2O6F6M9Mm1IXzUxRI8&open=AZ2O6F6M9Mm1IXzUxRI8&pullRequest=286
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Contact: alex@kuleshov.tech
*/

/**
* Shared cheap character-based token estimator used by every
* {@link ContextLayer} to report the approximate token footprint of its
* rendered content.
*
* <p>
* The heuristic divides the character length by {@value #CHARS_PER_TOKEN} and
* rounds up &mdash; this is a deliberately rough, tokenizer-agnostic upper
* bound suitable for budgeting pipeline layers without pulling a real tokenizer
* into the hot path. Values below &frac12; a token are still reported as one
* token so that zero-length output distinguishes cleanly from any rendered
* content.
*
* <p>
* Rationale for factoring this out of individual layers: keeping the formula in
* a single place removes copy-paste drift between layers, gives us a single
* regression-test surface for the estimator, and makes it cheap to swap in a
* smarter estimator in the future without touching every layer.
*/
public final class TokenEstimator {

/**
* Average number of characters per token in the English prompts the agent
* produces; chosen empirically to bias slightly high so budgeting decisions err
* on the side of reserving headroom.
*/
static final double CHARS_PER_TOKEN = 3.5;

private TokenEstimator() {
// utility class; prevent instantiation
}

/**
* Estimate the number of tokens a string contributes to the prompt budget.
*
* @param text
* rendered layer content (may be {@code null} or empty)
* @return a non-negative integer approximation; zero for {@code null} or empty
* inputs
*/
public static int estimate(String text) {
if (text == null || text.isEmpty()) {
return 0;
}
return (int) Math.ceil(text.length() / CHARS_PER_TOKEN);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ public ContextLayerResult assemble(AgentContext context) {
return ContextLayerResult.builder()
.layerName(getName())
.content(content)
.estimatedTokens((int) Math.ceil(content.length() / 3.5))
.estimatedTokens(TokenEstimator.estimate(content))
.build();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ public ContextLayerResult assemble(AgentContext context) {
return ContextLayerResult.builder()
.layerName(getName())
.content(content)
.estimatedTokens((int) Math.ceil(content.length() / 3.5))
.estimatedTokens(TokenEstimator.estimate(content))
.build();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ public ContextLayerResult assemble(AgentContext context) {
return ContextLayerResult.builder()
.layerName(getName())
.content(content)
.estimatedTokens((int) Math.ceil(content.length() / 3.5))
.estimatedTokens(TokenEstimator.estimate(content))
.build();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@ public boolean hydrate(PromotionDecision decision, EvolutionCandidate candidate)
}

PromotionTarget target = resolveTarget(decision);
if (StringValueSupport.isBlank(decision.getToState())) {
decision.setToState(target.legacyState());
mutated = true;
}
if (StringValueSupport.isBlank(decision.getToLifecycleState())) {
decision.setToLifecycleState(target.lifecycleState());
mutated = true;
Expand All @@ -96,9 +100,9 @@ public boolean hydrate(PromotionDecision decision, EvolutionCandidate candidate)
}

private PromotionTarget resolveTarget(PromotionDecision decision) {
String toState = decision != null ? decision.getToState() : null;
String toLifecycleState = decision != null ? decision.getToLifecycleState() : null;
String toRolloutStage = decision != null ? decision.getToRolloutStage() : null;
String toState = decision.getToState();
String toLifecycleState = decision.getToLifecycleState();
String toRolloutStage = decision.getToRolloutStage();
return new PromotionTarget(
!StringValueSupport.isBlank(toState)
? toState
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,6 @@ public PromotionExecutionResult execute(EvolutionCandidate candidate, PromotionT
}

private String buildTargetBundleId(EvolutionCandidate candidate, PromotionTarget target) {
if (candidate == null || StringValueSupport.isBlank(candidate.getId())) {
return candidate != null ? candidate.getBaseVersion() : null;
}
return candidate.getId() + ":" + target.rolloutStage();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,6 @@ public RunRecord completeRun(RunRecord run, AgentContext context) {
}

private List<String> resolveAppliedTacticIds(AgentContext context) {
if (context == null) {
return new ArrayList<>();
}
List<String> applied = context.getAttribute(ContextAttributes.APPLIED_TACTIC_IDS);
if (applied == null || applied.isEmpty()) {
return new ArrayList<>();
Expand Down
Loading
Loading