Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

This file was deleted.

This file was deleted.

This file was deleted.

11 changes: 0 additions & 11 deletions bootstrap/sql/migrations/native/1.12.7/postgres/schemaChanges.sql

This file was deleted.

7 changes: 7 additions & 0 deletions bootstrap/sql/migrations/native/2.0.0/mysql/schemaChanges.sql
Original file line number Diff line number Diff line change
Expand Up @@ -135,3 +135,10 @@ CREATE TABLE IF NOT EXISTS task_form_schema_entity (
KEY idx_task_form_schema_task_type (taskType),
KEY idx_task_form_schema_deleted (deleted)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;

ALTER TABLE entity_extension
ADD COLUMN versionNum DOUBLE NULL,
ADD COLUMN changedFieldKeys JSON NULL;
Comment thread
gitar-bot[bot] marked this conversation as resolved.

CREATE INDEX idx_entity_extension_version_order
ON entity_extension (id, versionNum);
Comment on lines +139 to +144
12 changes: 12 additions & 0 deletions bootstrap/sql/migrations/native/2.0.0/postgres/schemaChanges.sql
Original file line number Diff line number Diff line change
Expand Up @@ -140,3 +140,15 @@ CREATE TABLE IF NOT EXISTS task_form_schema_entity (
CREATE INDEX IF NOT EXISTS idx_task_form_schema_name ON task_form_schema_entity (name);
CREATE INDEX IF NOT EXISTS idx_task_form_schema_tasktype ON task_form_schema_entity (tasktype);
CREATE INDEX IF NOT EXISTS idx_task_form_schema_deleted ON task_form_schema_entity (deleted);

ALTER TABLE entity_extension
ADD COLUMN IF NOT EXISTS versionNum DOUBLE PRECISION,
ADD COLUMN IF NOT EXISTS changedFieldKeys JSONB;

CREATE INDEX IF NOT EXISTS idx_entity_extension_version_order
ON entity_extension (id, versionNum DESC)
WHERE versionNum IS NOT NULL;

CREATE INDEX IF NOT EXISTS idx_entity_extension_changed_field_keys
ON entity_extension USING GIN (changedFieldKeys)
WHERE changedFieldKeys IS NOT NULL;
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,17 @@
import static org.openmetadata.service.jdbi3.locator.ConnectionType.MYSQL;
import static org.openmetadata.service.migration.utils.v200.MigrationUtil.addTableColumnSearchSettings;
import static org.openmetadata.service.migration.utils.v200.MigrationUtil.backfillAnnouncementRelationships;
import static org.openmetadata.service.migration.utils.v200.MigrationUtil.backfillVersionMetadata;
import static org.openmetadata.service.migration.utils.v200.MigrationUtil.migrateLegacyActivityThreadsToActivityStream;
import static org.openmetadata.service.migration.utils.v200.MigrationUtil.migrateSuggestionsToTaskEntity;
import static org.openmetadata.service.migration.utils.v200.MigrationUtil.migrateThreadTasksToTaskEntity;

import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
import org.openmetadata.service.migration.api.MigrationProcessImpl;
import org.openmetadata.service.migration.utils.MigrationFile;

@Slf4j
public class Migration extends MigrationProcessImpl {

public Migration(MigrationFile migrationFile) {
Expand All @@ -25,5 +28,13 @@ public void runDataMigration() {
migrateThreadTasksToTaskEntity(handle, MYSQL);
migrateLegacyActivityThreadsToActivityStream(handle, MYSQL);
backfillAnnouncementRelationships(handle);
try {
backfillVersionMetadata(handle);
} catch (Exception e) {
LOG.error(
"Failed to backfill versionNum and changedFieldKeys in v200 migration. "
+ "Version timeline filtering may not work correctly until the migration is re-run.",
e);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,17 @@
import static org.openmetadata.service.jdbi3.locator.ConnectionType.POSTGRES;
import static org.openmetadata.service.migration.utils.v200.MigrationUtil.addTableColumnSearchSettings;
import static org.openmetadata.service.migration.utils.v200.MigrationUtil.backfillAnnouncementRelationships;
import static org.openmetadata.service.migration.utils.v200.MigrationUtil.backfillVersionMetadata;
import static org.openmetadata.service.migration.utils.v200.MigrationUtil.migrateLegacyActivityThreadsToActivityStream;
import static org.openmetadata.service.migration.utils.v200.MigrationUtil.migrateSuggestionsToTaskEntity;
import static org.openmetadata.service.migration.utils.v200.MigrationUtil.migrateThreadTasksToTaskEntity;

import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
import org.openmetadata.service.migration.api.MigrationProcessImpl;
import org.openmetadata.service.migration.utils.MigrationFile;

@Slf4j
public class Migration extends MigrationProcessImpl {

public Migration(MigrationFile migrationFile) {
Expand All @@ -25,5 +28,13 @@ public void runDataMigration() {
migrateThreadTasksToTaskEntity(handle, POSTGRES);
migrateLegacyActivityThreadsToActivityStream(handle, POSTGRES);
backfillAnnouncementRelationships(handle);
try {
backfillVersionMetadata(handle);
} catch (Exception e) {
LOG.error(
"Failed to backfill versionNum and changedFieldKeys in v200 migration. "
+ "Version timeline filtering may not work correctly until the migration is re-run.",
e);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import lombok.extern.slf4j.Slf4j;
import org.jdbi.v3.core.Handle;
Expand All @@ -25,6 +27,7 @@
import org.openmetadata.service.jdbi3.CollectionDAO;
import org.openmetadata.service.jdbi3.locator.ConnectionType;
import org.openmetadata.service.migration.utils.SearchSettingsMergeUtil;
import org.openmetadata.service.resources.databases.DatasourceConfig;
import org.openmetadata.service.resources.feeds.MessageParser;
import org.openmetadata.service.util.EntityUtil;
import org.openmetadata.service.util.FullyQualifiedName;
Expand All @@ -34,6 +37,20 @@ public class MigrationUtil {

private static final String TABLE_COLUMN_ASSET_TYPE = "tableColumn";

private static final int BATCH_SIZE = 1000;

private static final String UPDATE_MYSQL =
"UPDATE entity_extension SET versionNum = :versionNum, changedFieldKeys = :changedFieldKeys "
+ "WHERE id = :id AND extension = :extension";
Comment on lines +41 to +45
Copy link

Copilot AI Apr 29, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BATCH_SIZE is set to 5000, which is much larger than batching used in other migration utils in this repo (commonly 100–500). A 5000-row select + JSON parse + batch update can increase transaction size and lock/WAL pressure during upgrades. Consider reducing this to a smaller value (and/or making it configurable) to keep migrations predictable on large datasets.

Copilot uses AI. Check for mistakes.

private static final String UPDATE_POSTGRES =
"UPDATE entity_extension SET versionNum = :versionNum, changedFieldKeys = :changedFieldKeys::jsonb "
+ "WHERE id = :id AND extension = :extension";

private static final String UPDATE_VERSION_NUM_ONLY =
"UPDATE entity_extension SET versionNum = :versionNum "
+ "WHERE id = :id AND extension = :extension";

private MigrationUtil() {}

public static void addTableColumnSearchSettings() {
Expand Down Expand Up @@ -1341,4 +1358,124 @@ private static void insertTaskDomainRelationships(
}
}
}

public static void backfillVersionMetadata(Handle handle) {
String updateSql =
Boolean.TRUE.equals(DatasourceConfig.getInstance().isMySQL())
? UPDATE_MYSQL
: UPDATE_POSTGRES;

LOG.info("Starting backfill of versionNum and changedFieldKeys in entity_extension");
int totalProcessed = 0;
List<Map<String, Object>> batch;

do {
batch =
handle
.createQuery(
"SELECT id, extension, json FROM entity_extension "
+ "WHERE extension LIKE '%.version.%' "
+ "AND versionNum IS NULL "
+ "LIMIT :limit")
Comment thread
gitar-bot[bot] marked this conversation as resolved.
Comment on lines +1378 to +1382
Copy link

Copilot AI Apr 29, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

backfillVersionMetadata currently selects batches from entity_extension without filtering (versionNum IS NULL / extension LIKE '%.version.%'). That means rerunning the migration (or running it on Postgres instances that already populated these columns) will scan the entire table and rewrite every version row, which can generate large write amplification/WAL and extend upgrade time unnecessarily. Consider adding WHERE versionNum IS NULL AND extension LIKE '%.version.%' (and keeping the keyset pagination) so the backfill is idempotent and only touches rows that still need population.

Suggested change
+ "WHERE (id, extension) > (:lastId, :lastExt) "
+ "ORDER BY id, extension "
+ "LIMIT :limit")
+ "WHERE versionNum IS NULL "
+ "AND extension LIKE :extensionPattern "
+ "AND (id, extension) > (:lastId, :lastExt) "
+ "ORDER BY id, extension "
+ "LIMIT :limit")
.bind("extensionPattern", "%.version.%")

Copilot uses AI. Check for mistakes.
.bind("limit", BATCH_SIZE)
.mapToMap()
.list();

for (Map<String, Object> row : batch) {
String extension = row.get("extension").toString();
String id = row.get("id").toString();
Comment thread
gitar-bot[bot] marked this conversation as resolved.
Outdated
Object jsonObj = row.get("json");
double versionNum = extractVersionNum(extension);
String changedFieldKeys = "[]";

if (jsonObj != null) {
try {
changedFieldKeys = extractChangedFieldKeys(jsonObj.toString());
} catch (Exception e) {
LOG.warn(
"Failed to extract changedFieldKeys for extension {}, using empty array",
extension,
e);
}
}

try {
handle
.createUpdate(updateSql)
.bind("versionNum", versionNum)
.bind("changedFieldKeys", changedFieldKeys)
.bind("id", id)
.bind("extension", extension)
.execute();
} catch (Exception e) {
LOG.warn(
"Full update failed for extension {}, falling back to versionNum-only update",
extension,
e);
// Guarantee versionNum is set so this row is not retried in the next batch.
// If this also fails we have a real DB issue — let the exception propagate to stop
// the loop rather than hammering a broken database indefinitely.
handle
.createUpdate(UPDATE_VERSION_NUM_ONLY)
.bind("versionNum", versionNum)
.bind("id", id)
.bind("extension", extension)
.execute();
}
Comment on lines +1477 to +1483
}

totalProcessed += batch.size();
if (!batch.isEmpty()) {
LOG.info("Backfilled {} entity_extension rows so far", totalProcessed);
}
} while (batch.size() == BATCH_SIZE);

LOG.info(
"Backfill of versionNum and changedFieldKeys complete: {} total rows processed",
totalProcessed);
}

static double extractVersionNum(String extension) {
int idx = extension.lastIndexOf(".version.");
if (idx < 0) {
return 0.0;
}
try {
return Double.parseDouble(extension.substring(idx + ".version.".length()));
} catch (NumberFormatException e) {
return 0.0;
}
}

static String extractChangedFieldKeys(String json) {
JsonNode root = JsonUtils.readTree(json);
JsonNode changeDescription = root.get("changeDescription");
if (changeDescription == null || changeDescription.isNull()) {
return "[]";
}

Set<String> fieldNames = new LinkedHashSet<>();
collectFieldNames(fieldNames, changeDescription.get("fieldsAdded"));
collectFieldNames(fieldNames, changeDescription.get("fieldsUpdated"));
collectFieldNames(fieldNames, changeDescription.get("fieldsDeleted"));

List<String> sorted = new ArrayList<>(fieldNames);
sorted.sort(String::compareTo);
return JsonUtils.pojoToJson(sorted);
}

private static void collectFieldNames(Set<String> fieldNames, JsonNode fieldChanges) {
if (fieldChanges == null || !fieldChanges.isArray()) {
return;
}
for (JsonNode fieldChange : fieldChanges) {
JsonNode nameNode = fieldChange.get("name");
if (nameNode != null && !nameNode.isNull()) {
String name = nameNode.asText();
if (!name.isEmpty()) {
fieldNames.add(name);
}
}
}
}
}
Loading
Loading