Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
83b803c
Restore live index settings on per-entity distributed-promote path
harshach May 4, 2026
ba952d9
Wire jobData into per-entity reindex promotion handler
harshach May 4, 2026
80f01f6
Add regression test for live serving settings on per-entity promote
harshach May 5, 2026
c373592
Expand unit coverage around the per-entity promotion contract
harshach May 5, 2026
20e1cd4
Add integration test for live settings restoration after alias promotion
harshach May 5, 2026
a977032
Address PR review: harden settings revert + lock InOrder + drop redun…
harshach May 5, 2026
a2a2b4a
Drop verbose explanatory comments from promote-path edits
harshach May 5, 2026
f40a21b
Close Rest5Client in IT _settings helper
harshach May 5, 2026
4775e90
Tighten SearchIndexAliasPromotionIT against false-positive runs
harshach May 5, 2026
2af5827
Harden alias promotion: defer canonical delete, hard-fail on empty al…
harshach May 5, 2026
4654158
Consolidate finalizeReindex and promoteEntityIndex into one core path
harshach May 5, 2026
9a7fa49
Address PR review: post-state checks, FAILED listener, hermetic IT, I…
harshach May 6, 2026
8b4d1a8
Wrap post-state checks: indexExists / getAliases throws no longer escape
harshach May 6, 2026
2580597
Address Copilot review 4232747647: positive-evidence dataLoss, hermet…
harshach May 6, 2026
0351078
Merge branch 'main' into harshach/search-alias-promote
harshach May 6, 2026
30774c7
Wait for restore-triggered run to settle in SearchIndexAliasPromotionIT
harshach May 6, 2026
232d195
Fix AppsResourceIT.waitForAppJobCompletion case mismatch and timeout
harshach May 6, 2026
22717f5
Merge branch 'main' into harshach/search-alias-promote
mohityadav766 May 6, 2026
98b9871
Merge remote-tracking branch 'origin/main' into harshach/search-alias…
harshach May 6, 2026
aee61f2
Merge branch 'harshach/search-alias-promote' of github.com:open-metad…
harshach May 6, 2026
64a385a
Run SearchIndexAliasPromotionIT in the sequential bucket
harshach May 6, 2026
810ed16
Address Copilot PR review 4233452655
harshach May 6, 2026
cf18270
Remove SearchIndexAliasPromotionIT in favor of unit test coverage
harshach May 6, 2026
7d7e5db
Address Copilot PR review 4236718653
harshach May 6, 2026
bb973d8
Fix per-entity promote when canonical is an alias, not a concrete index
harshach May 6, 2026
0164cd8
Add ALIAS_PROMOTE_BEGIN diagnostic log per entity
harshach May 6, 2026
d80e44b
Drop heavy alias-promotion refactor; rely on PR #27930 fix already in…
harshach May 6, 2026
3e0d7ca
Merge remote-tracking branch 'origin/main' into harshach/search-alias…
harshach May 6, 2026
25240ec
Skip delete-by-alias-name when canonical is currently an alias
harshach May 6, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,15 @@ static void setup() {
private void waitForAppJobCompletion(String appName) {
HttpClient httpClient = SdkClients.adminClient().getHttpClient();
try {
// AppRunRecord.status is a lowercase enum (see appRunRecord.json: started, running,
// completed, failed, success, activeError, stopped, ...). Comparing with case-insensitive
// matchers — using uppercase here matches none of the real values and silently makes the
// wait a no-op. 5-minute ceiling covers an in-flight reindex from another test class
// (e.g. SearchIndexingFieldsParityIT triggers an "all entities" reindex that can take
// minutes); a 30s ceiling fell through to the catch and let the trigger Awaitility below
// hit its own 2-minute "already running" wall.
Awaitility.await("Wait for app job completion: " + appName)
.atMost(Duration.ofSeconds(30))
.atMost(Duration.ofMinutes(5))
.pollDelay(Duration.ofMillis(500))
.pollInterval(Duration.ofSeconds(2))
.ignoreExceptions()
Expand All @@ -98,9 +105,7 @@ private void waitForAppJobCompletion(String appName) {
return true;
}
String status = latestRun.getStatus().value();
return "SUCCESS".equals(status)
|| "FAILED".equals(status)
|| "COMPLETED".equals(status);
return !"running".equalsIgnoreCase(status) && !"started".equalsIgnoreCase(status);
});
} catch (org.awaitility.core.ConditionTimeoutException e) {
// Best-effort wait — the app may be continuously running under parallel test load.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,18 @@ public void finalizeReindex(EntityReindexContext context, boolean reindexSuccess
}
}

// After the first reindex, the canonical name is an alias on the previous staged, not a
// concrete index. OpenSearch's listIndicesByPrefix returns that alias name as one of its
// result keys, which then drives a delete-by-name attempt that fails with
// "matches an alias, specify the corresponding concrete indices" and burns ~31s of
// exponential backoff per entity (1+2+4+8+16s before giving up). With 60 entity types
// a full reindex wastes ~30 minutes in cleanup. Drop the alias name from the cleanup set
// when it is currently an alias — it does not need to be deleted; the swap moves the
// alias atomically and the underlying old concrete is in oldIndicesToDelete already.
if (!searchClient.getIndicesByAlias(canonicalIndex).isEmpty()) {
oldIndicesToDelete.remove(canonicalIndex);
}

LOG.debug(
"finalizeReindex entity '{}': aliases={}, oldIndices={}, stagedIndex={}",
entityType,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,9 @@ class FinalizeReindexTests {
@DisplayName("Should promote partial data and record success when failed reindex has documents")
void testFinalizeReindexPromotesPartialData() {
AliasState aliasState = new AliasState();
aliasState.put("table_search_index", Set.of("table_search_index"));
// Canonical is a concrete index with no aliases (the realistic first-reindex shape; OS/ES
// forbid an alias and a concrete sharing the same name).
aliasState.put("table_search_index", Set.of());
aliasState.put("table_search_index_rebuild_old", Set.of("stale"));
aliasState.put("table_search_index_rebuild_new", new HashSet<>());

Expand Down Expand Up @@ -667,6 +669,62 @@ void testFinalizeReindexRecordsPromotionFailureOnException() {

verify(metrics).recordPromotionFailure("table");
}

@Test
@DisplayName(
"Should not delete-by-alias-name when canonical is currently an alias on a previous staged")
void testFinalizeReindexSkipsDeleteWhenCanonicalIsAlias() {
// After the first reindex, the canonical name (table_search_index) is an alias on the
// previous staged index, not a concrete one. OpenSearch's listIndicesByPrefix returns the
// alias name as one of its result keys; without the guard, finalizeReindex would attempt
// deleteIndexWithBackoff(canonicalIndex), fail with "matches an alias" and burn ~31s of
// exponential backoff per entity. The guard must drop the alias name from oldIndicesToDelete
// BEFORE the delete branch fires.
AliasState aliasState = new AliasState();
aliasState.put(
"table_search_index_rebuild_old",
new HashSet<>(Set.of("table_search_index", "table", "all")));
aliasState.put("table_search_index_rebuild_new", new HashSet<>());
// Simulate the OpenSearch behavior where listIndicesByPrefix surfaces the alias name itself
// among its result keys (the key in our AliasState mock is what listIndicesByPrefix returns).
aliasState.put("table_search_index", Set.of());

SearchClient client = aliasState.toMock();
SearchRepository repo = mock(SearchRepository.class);
when(repo.getSearchClient()).thenReturn(client);

try (MockedStatic<Entity> entityMock = mockStatic(Entity.class)) {
entityMock.when(Entity::getSearchRepository).thenReturn(repo);

EntityReindexContext context =
EntityReindexContext.builder()
.entityType("table")
.canonicalIndex("table_search_index")
.activeIndex("table_search_index_rebuild_old")
.stagedIndex("table_search_index_rebuild_new")
.existingAliases(new HashSet<>(Set.of("table_search_index", "table", "all")))
.canonicalAliases("table")
.parentAliases(new HashSet<>(Set.of("all")))
.build();

new DefaultRecreateHandler().finalizeReindex(context, true);
}

verify(client, never()).deleteIndexWithBackoff("table_search_index");
assertTrue(
aliasState.deletedIndices.contains("table_search_index_rebuild_old"),
"Old concrete rebuild must still be cleaned up by the swap path");
Set<String> stagedAliases = aliasState.indexAliases.get("table_search_index_rebuild_new");
assertTrue(
stagedAliases.contains("table_search_index"),
() -> "Canonical alias must end up on staged after promotion; got " + stagedAliases);
assertTrue(
stagedAliases.contains("table"),
() -> "Short alias must end up on staged after promotion; got " + stagedAliases);
assertTrue(
stagedAliases.contains("all"),
() -> "Parent alias must end up on staged after promotion; got " + stagedAliases);
}
}

@Nested
Expand Down
Loading