From 0527a2bd58c50512708e5bfe39c674e716d1d1b5 Mon Sep 17 00:00:00 2001 From: Himanshu Shekhar Das Date: Sat, 28 Mar 2026 16:56:50 +0530 Subject: [PATCH 1/3] Fix defragment_symbol_data dropping user metadata (#9889436827) defragment_symbol_data_impl passed std::nullopt to collate_and_write for user_meta, silently discarding the metadata that had been correctly read into pipeline_context->user_meta_ from the previous version. Fix by passing the actual user_meta_ptr from the pipeline context. Also fix the Python wrapper which hardcoded metadata=None in the returned VersionedItem regardless of what was stored; it now reads the metadata back from the newly-written version using the existing read_metadata pattern. Add regression test test_defragment_preserves_metadata. --- cpp/arcticdb/version/version_core.cpp | 3 ++- python/arcticdb/version_store/_store.py | 13 ++++--------- .../unit/arcticdb/version_store/test_append.py | 18 +++++++++++++++++- 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/cpp/arcticdb/version/version_core.cpp b/cpp/arcticdb/version/version_core.cpp index 7720a7b9fd3..eae47c97d43 100644 --- a/cpp/arcticdb/version/version_core.cpp +++ b/cpp/arcticdb/version/version_core.cpp @@ -2620,13 +2620,14 @@ VersionedItem defragment_symbol_data_impl( return util::variant_match( std::move(result), [&slices, &pre_defragmentation_info, &store](CompactionWrittenKeys&& written_keys) -> VersionedItem { + const auto& user_meta_ptr = pre_defragmentation_info.pipeline_context->user_meta_; return collate_and_write( store, pre_defragmentation_info.pipeline_context, slices, std::move(written_keys), pre_defragmentation_info.append_after.value(), - std::nullopt + user_meta_ptr ? std::make_optional(*user_meta_ptr) : std::nullopt ); }, [](Error&& error) -> VersionedItem { diff --git a/python/arcticdb/version_store/_store.py b/python/arcticdb/version_store/_store.py index a82b4d9dd7d..7b06c4267e0 100644 --- a/python/arcticdb/version_store/_store.py +++ b/python/arcticdb/version_store/_store.py @@ -4032,15 +4032,10 @@ def defragment_symbol_data( ) result = self.version_store.defragment_symbol_data(symbol, segment_size, prune_previous_version) - return VersionedItem( - symbol=result.symbol, - library=self._library.library_path, - version=result.version, - metadata=None, - data=None, - host=self.env, - timestamp=result.timestamp, - ) + version_query = self._get_version_query(result.version) + _, udm = self.version_store.read_metadata(symbol, version_query) + meta = denormalize_user_metadata(udm, self._normalizer) if udm else None + return self._convert_thin_cxx_item_to_python(result, meta) def library(self): return self._library diff --git a/python/tests/unit/arcticdb/version_store/test_append.py b/python/tests/unit/arcticdb/version_store/test_append.py index 7a870fe992c..e98058f0918 100644 --- a/python/tests/unit/arcticdb/version_store/test_append.py +++ b/python/tests/unit/arcticdb/version_store/test_append.py @@ -14,7 +14,7 @@ from arcticdb.version_store import NativeVersionStore from arcticdb_ext.exceptions import InternalException, NormalizationException, SortingException, SchemaException from arcticdb_ext import set_config_int -from arcticdb.util.test import random_integers, assert_frame_equal +from arcticdb.util.test import random_integers, assert_frame_equal, config_context from arcticdb.config import set_log_level from arcticdb.util.test_utils import generate_random_numpy_array, supported_types_list from arcticdb.util.logger import get_logger @@ -697,6 +697,22 @@ def test_defragment_no_work_to_do(sym, lmdb_version_store): lmdb_version_store.defragment_symbol_data(sym) +def test_defragment_preserves_metadata(sym, lmdb_version_store): + meta = {"key": "value", "number": 42} + df1 = pd.DataFrame({"a": [1, 2]}, index=pd.date_range("2020-01-01", periods=2)) + df2 = pd.DataFrame({"a": [3, 4]}, index=pd.date_range("2020-01-03", periods=2)) + + lmdb_version_store.write(sym, df1, metadata=meta) + lmdb_version_store.append(sym, df2) + + with config_context("SymbolDataCompact.SegmentCount", 1): + assert lmdb_version_store.is_symbol_fragmented(sym) + versioned_item = lmdb_version_store.defragment_symbol_data(sym) + + assert versioned_item.metadata == meta + assert lmdb_version_store.read_metadata(sym).metadata == meta + + @pytest.mark.parametrize( "to_write, to_append", [ From 05c51f0032dc60c703118073dacbd4ed44a4769a Mon Sep 17 00:00:00 2001 From: Himanshu Shekhar Das Date: Mon, 30 Mar 2026 23:05:42 +0530 Subject: [PATCH 2/3] fix small bug in tests --- python/tests/unit/arcticdb/version_store/test_append.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/tests/unit/arcticdb/version_store/test_append.py b/python/tests/unit/arcticdb/version_store/test_append.py index e98058f0918..be2385f5fb6 100644 --- a/python/tests/unit/arcticdb/version_store/test_append.py +++ b/python/tests/unit/arcticdb/version_store/test_append.py @@ -703,7 +703,7 @@ def test_defragment_preserves_metadata(sym, lmdb_version_store): df2 = pd.DataFrame({"a": [3, 4]}, index=pd.date_range("2020-01-03", periods=2)) lmdb_version_store.write(sym, df1, metadata=meta) - lmdb_version_store.append(sym, df2) + lmdb_version_store.append(sym, df2, metadata=meta) with config_context("SymbolDataCompact.SegmentCount", 1): assert lmdb_version_store.is_symbol_fragmented(sym) From 2ff54271c32d4154e4b2e1c6285cf9337c643b36 Mon Sep 17 00:00:00 2001 From: Himanshu Shekhar Das Date: Mon, 30 Mar 2026 23:19:03 +0530 Subject: [PATCH 3/3] revert extra read metadata call --- python/arcticdb/version_store/_store.py | 13 +++++++++---- python/arcticdb/version_store/library.py | 4 +++- .../unit/arcticdb/version_store/test_append.py | 2 +- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/python/arcticdb/version_store/_store.py b/python/arcticdb/version_store/_store.py index 7b06c4267e0..a82b4d9dd7d 100644 --- a/python/arcticdb/version_store/_store.py +++ b/python/arcticdb/version_store/_store.py @@ -4032,10 +4032,15 @@ def defragment_symbol_data( ) result = self.version_store.defragment_symbol_data(symbol, segment_size, prune_previous_version) - version_query = self._get_version_query(result.version) - _, udm = self.version_store.read_metadata(symbol, version_query) - meta = denormalize_user_metadata(udm, self._normalizer) if udm else None - return self._convert_thin_cxx_item_to_python(result, meta) + return VersionedItem( + symbol=result.symbol, + library=self._library.library_path, + version=result.version, + metadata=None, + data=None, + host=self.env, + timestamp=result.timestamp, + ) def library(self): return self._library diff --git a/python/arcticdb/version_store/library.py b/python/arcticdb/version_store/library.py index 4f657d37dae..2f150e69cc8 100644 --- a/python/arcticdb/version_store/library.py +++ b/python/arcticdb/version_store/library.py @@ -3243,7 +3243,9 @@ def defragment_symbol_data( Returns ------- VersionedItem - Structure containing metadata and version number of the defragmented symbol in the store. + Structure containing version number of the defragmented symbol in the store. The ``metadata`` + and ``data`` fields of the returned object will always be ``None``; call ``read_metadata`` or + ``read`` to retrieve the metadata or data associated with the defragmented version. Raises ------ diff --git a/python/tests/unit/arcticdb/version_store/test_append.py b/python/tests/unit/arcticdb/version_store/test_append.py index be2385f5fb6..b912c3227cf 100644 --- a/python/tests/unit/arcticdb/version_store/test_append.py +++ b/python/tests/unit/arcticdb/version_store/test_append.py @@ -709,7 +709,7 @@ def test_defragment_preserves_metadata(sym, lmdb_version_store): assert lmdb_version_store.is_symbol_fragmented(sym) versioned_item = lmdb_version_store.defragment_symbol_data(sym) - assert versioned_item.metadata == meta + assert versioned_item.metadata is None assert lmdb_version_store.read_metadata(sym).metadata == meta