diff --git a/cpp/arcticdb/version/version_core.cpp b/cpp/arcticdb/version/version_core.cpp index d35b4bc1584..19e09999116 100644 --- a/cpp/arcticdb/version/version_core.cpp +++ b/cpp/arcticdb/version/version_core.cpp @@ -2621,13 +2621,14 @@ VersionedItem defragment_symbol_data_impl( return util::variant_match( std::move(result), [&slices, &pre_defragmentation_info, &store](CompactionWrittenKeys&& written_keys) -> VersionedItem { + const auto& user_meta_ptr = pre_defragmentation_info.pipeline_context->user_meta_; return collate_and_write( store, pre_defragmentation_info.pipeline_context, slices, std::move(written_keys), pre_defragmentation_info.append_after.value(), - std::nullopt + user_meta_ptr ? std::make_optional(*user_meta_ptr) : std::nullopt ); }, [](Error&& error) -> VersionedItem { diff --git a/python/arcticdb/version_store/library.py b/python/arcticdb/version_store/library.py index f1da6d59ab8..3f9e7f9bd4f 100644 --- a/python/arcticdb/version_store/library.py +++ b/python/arcticdb/version_store/library.py @@ -3311,7 +3311,9 @@ def defragment_symbol_data( Returns ------- VersionedItem - Structure containing metadata and version number of the defragmented symbol in the store. + Structure containing version number of the defragmented symbol in the store. The ``metadata`` + and ``data`` fields of the returned object will always be ``None``; call ``read_metadata`` or + ``read`` to retrieve the metadata or data associated with the defragmented version. Raises ------ diff --git a/python/tests/unit/arcticdb/version_store/test_append.py b/python/tests/unit/arcticdb/version_store/test_append.py index 7a870fe992c..b912c3227cf 100644 --- a/python/tests/unit/arcticdb/version_store/test_append.py +++ b/python/tests/unit/arcticdb/version_store/test_append.py @@ -14,7 +14,7 @@ from arcticdb.version_store import NativeVersionStore from arcticdb_ext.exceptions import InternalException, NormalizationException, SortingException, SchemaException from arcticdb_ext import set_config_int -from arcticdb.util.test import random_integers, assert_frame_equal +from arcticdb.util.test import random_integers, assert_frame_equal, config_context from arcticdb.config import set_log_level from arcticdb.util.test_utils import generate_random_numpy_array, supported_types_list from arcticdb.util.logger import get_logger @@ -697,6 +697,22 @@ def test_defragment_no_work_to_do(sym, lmdb_version_store): lmdb_version_store.defragment_symbol_data(sym) +def test_defragment_preserves_metadata(sym, lmdb_version_store): + meta = {"key": "value", "number": 42} + df1 = pd.DataFrame({"a": [1, 2]}, index=pd.date_range("2020-01-01", periods=2)) + df2 = pd.DataFrame({"a": [3, 4]}, index=pd.date_range("2020-01-03", periods=2)) + + lmdb_version_store.write(sym, df1, metadata=meta) + lmdb_version_store.append(sym, df2, metadata=meta) + + with config_context("SymbolDataCompact.SegmentCount", 1): + assert lmdb_version_store.is_symbol_fragmented(sym) + versioned_item = lmdb_version_store.defragment_symbol_data(sym) + + assert versioned_item.metadata is None + assert lmdb_version_store.read_metadata(sym).metadata == meta + + @pytest.mark.parametrize( "to_write, to_append", [