From 712a77fb7686b256383103c5abb61c8d2eaab10a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 9 Feb 2026 18:35:38 +0000 Subject: [PATCH 1/4] Initial plan From 4837f4728641c784c35d37a670c1028ec6e1d1ad Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 9 Feb 2026 18:40:37 +0000 Subject: [PATCH 2/4] Add retry logic and delays for WAL archiving in cloudberry restore point test Co-authored-by: chipitsine <2217296+chipitsine@users.noreply.github.com> --- .../scripts/tests/restore_point.sh | 35 ++++++++++++++----- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/docker/cloudberry_tests/scripts/tests/restore_point.sh b/docker/cloudberry_tests/scripts/tests/restore_point.sh index 062d77f44d..24681b2e85 100755 --- a/docker/cloudberry_tests/scripts/tests/restore_point.sh +++ b/docker/cloudberry_tests/scripts/tests/restore_point.sh @@ -16,7 +16,12 @@ setup_wal_archiving wal-g --config=${TMP_CONFIG} delete everything FORCE --confirm wal-g create-restore-point rp1 --config=${TMP_CONFIG} +# Wait a bit for the archiver to process the WAL switch +sleep 2 + wal-g create-restore-point rp2 --config=${TMP_CONFIG} +# Wait for the archiver to process the second WAL switch +sleep 2 # Check whether the WAL log is correctly switched and uploaded to S3 after create-restore-point # gpadmin@10f4a227f02b:/usr/local/gpdb_src$ wal-g st ls segments_005/seg0/wal_005/ --config=${TMP_CONFIG} @@ -24,16 +29,31 @@ wal-g create-restore-point rp2 --config=${TMP_CONFIG} # obj 4624920 2025-05-21 07:17:16.052 +0000 UTC 000000010000000000000001.lz4 # obj 264275 2025-05-21 07:26:06.265 +0000 UTC 000000010000000000000002.lz4 -#wait for wal-g to upload WALs -sleep 5 - check_wal_upload() { local path=$1 - + local max_attempts=60 + local attempt=1 + + echo "Waiting for WAL files to be uploaded to $path..." + + while [ $attempt -le $max_attempts ]; do + local count=$(wal-g st ls "$path" --config=${TMP_CONFIG} | awk '/^obj/ {count++} END {print count+0}') + + if [ "$count" -ge 2 ]; then + echo "Found $count WAL files in $path (attempt $attempt/$max_attempts)" + wal-g st ls "$path" --config=${TMP_CONFIG} + return 0 + fi + + echo "Found $count WAL files in $path, waiting... (attempt $attempt/$max_attempts)" + sleep 2 + attempt=$((attempt + 1)) + done + + echo "Error: WAL files after create-restore-point were not correctly uploaded to S3 for $path" + echo "Final listing:" wal-g st ls "$path" --config=${TMP_CONFIG} - - wal-g st ls "$path" --config=${TMP_CONFIG} \ - | awk '/^obj/ {count++} END {exit !(count >= 2)}' + return 1 } # Check each segment @@ -44,7 +64,6 @@ for seg_path in \ segments_005/seg2/wal_005/ do if ! check_wal_upload "$seg_path"; then - echo "Error: WAL files after create-restore-point were not correctly uploaded to S3 for $seg_path" exit 1 fi done From 6926dcf61d29d6e3059bb30114404d7b4e611cb4 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 9 Feb 2026 18:41:27 +0000 Subject: [PATCH 3/4] Address code review feedback - add comment and increase sleep interval Co-authored-by: chipitsine <2217296+chipitsine@users.noreply.github.com> --- docker/cloudberry_tests/scripts/tests/restore_point.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docker/cloudberry_tests/scripts/tests/restore_point.sh b/docker/cloudberry_tests/scripts/tests/restore_point.sh index 24681b2e85..b4f14576fb 100755 --- a/docker/cloudberry_tests/scripts/tests/restore_point.sh +++ b/docker/cloudberry_tests/scripts/tests/restore_point.sh @@ -31,6 +31,7 @@ sleep 2 check_wal_upload() { local path=$1 + # Wait up to 2 minutes (60 attempts × 2 seconds) for WAL files to be uploaded local max_attempts=60 local attempt=1 @@ -46,7 +47,8 @@ check_wal_upload() { fi echo "Found $count WAL files in $path, waiting... (attempt $attempt/$max_attempts)" - sleep 2 + # Use 5-second interval to reduce S3 API calls while still providing reasonable timeout + sleep 5 attempt=$((attempt + 1)) done From 763a05f599b9f28df9464531134e6f15f76fad60 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 9 Feb 2026 18:41:54 +0000 Subject: [PATCH 4/4] Fix comment - update timeout calculation to reflect 5s interval Co-authored-by: chipitsine <2217296+chipitsine@users.noreply.github.com> --- docker/cloudberry_tests/scripts/tests/restore_point.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/cloudberry_tests/scripts/tests/restore_point.sh b/docker/cloudberry_tests/scripts/tests/restore_point.sh index b4f14576fb..fd5affb3f9 100755 --- a/docker/cloudberry_tests/scripts/tests/restore_point.sh +++ b/docker/cloudberry_tests/scripts/tests/restore_point.sh @@ -31,7 +31,7 @@ sleep 2 check_wal_upload() { local path=$1 - # Wait up to 2 minutes (60 attempts × 2 seconds) for WAL files to be uploaded + # Wait up to 5 minutes (60 attempts × 5 seconds) for WAL files to be uploaded local max_attempts=60 local attempt=1