From de269a0626ca5d62609c3ec0a689da82afe9c34a Mon Sep 17 00:00:00 2001 From: Sanketh Nalli Date: Sat, 2 May 2026 18:21:01 -0700 Subject: [PATCH 1/2] [debug] Isolate ServerHttp2Test.replicateBlobV2MultipleCases with full debug logs Goal: capture verbose logs of the flaky `replicateBlobV2MultipleCases` failure ("expected: but was:" or "expected: but was:") that intermittently breaks server-int-test on master. Changes (debug-only, do NOT merge): - Disable all jobs except server-int-test in the workflow. - Restrict server-int-test gradle args to: --scan --info --warning-mode=summary :ambry-server:intTest --tests "com.github.ambry.server.ServerHttp2Test.replicateBlobV2MultipleCases" - log4j-test-config/log4j2.xml: - Root level: info -> debug - Re-enable BlockingChannelConnectionPool, BlockingChannelInfo, ReplicThread loggers (they were `off` to suppress noise; we want that noise now). - intTest retry budget: maxRetries 2 -> 0 so first failure is captured cleanly (no retry-masked attempts). Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/github-actions.yml | 18 +++++++++++++----- build.gradle | 11 +++-------- .../src/main/resources/log4j2.xml | 18 +++++++++++------- 3 files changed, 27 insertions(+), 20 deletions(-) diff --git a/.github/workflows/github-actions.yml b/.github/workflows/github-actions.yml index 297fa38359..38c95f6675 100644 --- a/.github/workflows/github-actions.yml +++ b/.github/workflows/github-actions.yml @@ -38,6 +38,7 @@ jobs: # ============================================================ unit-test-clustermap: + if: false # DEBUG branch — only server-int-test runs runs-on: ubuntu-latest timeout-minutes: 60 steps: @@ -49,6 +50,7 @@ jobs: job-id-suffix: clustermap unit-test-network: + if: false # DEBUG branch — only server-int-test runs runs-on: ubuntu-latest timeout-minutes: 60 steps: @@ -60,6 +62,7 @@ jobs: job-id-suffix: network unit-test-frontend: + if: false # DEBUG branch — only server-int-test runs runs-on: ubuntu-latest timeout-minutes: 60 steps: @@ -71,6 +74,7 @@ jobs: job-id-suffix: frontend unit-test-mysql-stack: + if: false # DEBUG branch — only server-int-test runs runs-on: ubuntu-latest timeout-minutes: 60 steps: @@ -85,6 +89,7 @@ jobs: needs-mysql: 'true' unit-test-azure-stack: + if: false # DEBUG branch — only server-int-test runs runs-on: ubuntu-latest timeout-minutes: 60 steps: @@ -97,6 +102,7 @@ jobs: needs-azurite: 'true' unit-test-protocols: + if: false # DEBUG branch — only server-int-test runs runs-on: ubuntu-latest timeout-minutes: 60 steps: @@ -108,6 +114,7 @@ jobs: job-id-suffix: protocols unit-test-utility-modules: + if: false # DEBUG branch — only server-int-test runs runs-on: ubuntu-latest timeout-minutes: 60 steps: @@ -123,10 +130,8 @@ jobs: # a per-module unit-test-file-transfer job here once the path is operational. store-test: - + if: false # DEBUG branch — only server-int-test runs runs-on: ubuntu-latest - # Hard cap matches unit-test's: prevents runaway hangs from consuming - # full GitHub-default 6h timeout if a test wedges. timeout-minutes: 60 steps: - name: Checkout Ambry @@ -156,7 +161,7 @@ jobs: timeout-minutes: 2 int-test: - + if: false # DEBUG branch — only server-int-test runs runs-on: ubuntu-latest timeout-minutes: 60 steps: @@ -246,7 +251,10 @@ jobs: name: Run integration tests with: job-id: jdk11 - arguments: --scan --warning-mode=summary :ambry-server:intTest codeCoverageReport + # DEBUG branch: isolate replicateBlobV2MultipleCases. No codeCoverageReport + # (skips coverage build), --info for verbose gradle output, --tests filter + # to run only the failing test class+method across all parameter variants. + arguments: --scan --info --warning-mode=summary :ambry-server:intTest --tests "com.github.ambry.server.ServerHttp2Test.replicateBlobV2MultipleCases" gradle-version: wrapper - name: Upload coverage to Codecov diff --git a/build.gradle b/build.gradle index fe70415a11..59e6c90ea9 100644 --- a/build.gradle +++ b/build.gradle @@ -253,16 +253,11 @@ subprojects { logger.lifecycle " suite total: ${result.testCount} tests, ${result.successfulTestCount} passed, ${result.failedTestCount} failed, ${result.skippedTestCount} skipped" } } - // Allow for retrying flaky integration tests. + // DEBUG branch (snalli/debug-replicate-blobv2): retries off so the FIRST + // failure surfaces with full debug logs, not the retried-and-masked one. retry { - // The maximum number of times to retry an individual test - maxRetries = 2 - // The maximum number of test failures that are allowed (per module) before retrying is disabled. The count applies to - // each round of test execution. For example, if maxFailures is 5 and 4 tests initially fail and then 3 - // again on retry, this will not be considered too many failures and retrying will continue (if maxRetries {@literal >} 1). - // If 5 or more tests were to fail initially then no retry would be attempted. + maxRetries = 0 maxFailures = 5 - // Whether tests that initially fail and then pass on retry should fail the task. failOnPassedAfterRetry = false } maxHeapSize = "6g" diff --git a/log4j-test-config/src/main/resources/log4j2.xml b/log4j-test-config/src/main/resources/log4j2.xml index ae23d118da..ce94248e9f 100644 --- a/log4j-test-config/src/main/resources/log4j2.xml +++ b/log4j-test-config/src/main/resources/log4j2.xml @@ -11,7 +11,12 @@ - + + @@ -22,11 +27,10 @@ - - - - - + + + + From 2b8618201d2ef6ac0f6a6449eaeeee443fb97253 Mon Sep 17 00:00:00 2001 From: Sanketh Nalli Date: Sat, 2 May 2026 18:29:50 -0700 Subject: [PATCH 2/2] [debug] Add wildcard to --tests filter for parameterized test Gradle's --tests filter doesn't match parameterized variants ([0], [1]) without a trailing wildcard. Filter "...replicateBlobV2MultipleCases" matched 0 tests; "...replicateBlobV2MultipleCases*" matches both parameter values. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/github-actions.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/github-actions.yml b/.github/workflows/github-actions.yml index 38c95f6675..76b6652565 100644 --- a/.github/workflows/github-actions.yml +++ b/.github/workflows/github-actions.yml @@ -254,7 +254,7 @@ jobs: # DEBUG branch: isolate replicateBlobV2MultipleCases. No codeCoverageReport # (skips coverage build), --info for verbose gradle output, --tests filter # to run only the failing test class+method across all parameter variants. - arguments: --scan --info --warning-mode=summary :ambry-server:intTest --tests "com.github.ambry.server.ServerHttp2Test.replicateBlobV2MultipleCases" + arguments: --scan --info --warning-mode=summary :ambry-server:intTest --tests "com.github.ambry.server.ServerHttp2Test.replicateBlobV2MultipleCases*" gradle-version: wrapper - name: Upload coverage to Codecov