diff --git a/conanfile.py b/conanfile.py index 759473464..71860d319 100644 --- a/conanfile.py +++ b/conanfile.py @@ -9,7 +9,7 @@ class HomestoreConan(ConanFile): name = "homestore" - version = "7.5.4" + version = "7.5.5" homepage = "https://github.com/eBay/Homestore" description = "HomeStore Storage Engine" diff --git a/src/include/homestore/index/index_table.hpp b/src/include/homestore/index/index_table.hpp index 3075f00ab..b79381962 100644 --- a/src/include/homestore/index/index_table.hpp +++ b/src/include/homestore/index/index_table.hpp @@ -140,6 +140,13 @@ class IndexTable : public IndexTableBase, public Btree< K, V > { auto cpg = cp_mgr().cp_guard(); Btree< K, V >::destroy_btree(cpg.context(cp_consumer_t::INDEX_SVC)); } + // Flush the current CP to completion before removing the superblock. + // This closes the journal–table mismatch window: any txn_journal entries + // for this table's ordinal are written and the CP is marked done on disk. + // If the process crashes after this point, recovery will not replay the + // old journal (CP is complete), so repair_index_node will never be called + // for an ordinal whose superblock no longer exists. + cp_mgr().trigger_cp_flush(true /* force */).get(); m_sb.destroy(); m_sb_buffer->m_valid = false; decr_pending_request_num(); diff --git a/src/tests/test_index_crash_recovery.cpp b/src/tests/test_index_crash_recovery.cpp index cd48f5830..b97f56b80 100644 --- a/src/tests/test_index_crash_recovery.cpp +++ b/src/tests/test_index_crash_recovery.cpp @@ -377,6 +377,18 @@ struct IndexCrashTest : public test_common::HSTestHelper, BtreeTestHelper< TestT LOGINFO("destroy btree - erase shadow map {}", m_shadow_filename); } + // Install a brand-new empty btree (without destroying the old one first). + // Used after recovery to replace a stale/destroyed m_bt so TearDown() is safe. + void install_fresh_btree(uint32_t erase_up_to_key = 0) { + auto uuid = boost::uuids::random_generator()(); + auto parent_uuid = boost::uuids::random_generator()(); + this->m_bt = std::make_shared< typename T::BtreeType >(uuid, parent_uuid, 0, this->m_cfg); + hs()->index_service().add_index_table(this->m_bt); + if (erase_up_to_key > 0) { this->m_shadow_map.range_erase(0, erase_up_to_key - 1); } + this->m_shadow_map.save(m_shadow_filename); + LOGINFO("Installed fresh btree with uuid {}", boost::uuids::to_string(uuid)); + } + void restart_homestore(uint32_t shutdown_delay_sec = 3) override { this->params(HS_SERVICE::INDEX).index_svc_cbs = new TestIndexServiceCallbacks(this); LOGINFO("\n\n\n\n\n\n shutdown homestore for index service Test\n\n\n\n\n"); @@ -1001,6 +1013,82 @@ TYPED_TEST(IndexCrashTest, MergeRemoveBasic) { } } +// Regression test for the bug: when an index table is destroyed concurrently with a CP flush, +// the txn_journal (written at the START of the flush) can contain entries for the destroyed +// table's ordinal while the table's meta superblock is already gone. On recovery the journal +// is replayed but the table cannot be found in m_ordinal_index_map, causing +// repair_index_node / sanity_check to assert. +// +// Sequence that triggers the bug: +// 1. Build a multi-level tree and flush a baseline CP. +// 2. Set a crash flip so the next CP crashes mid-flush (after writing the journal). +// 3. Insert more keys → splits happen → transact_bufs is called → txn_journal entry added +// for this table's ordinal AND m_crash_flag_on is set on the split parent buf. +// 4. Destroy the table: meta superblock removed from disk, but the dirty split bufs +// (with m_crash_flag_on) remain in the CP dirty list. +// 5. Trigger CP → journal written to disk → do_flush_one_buf hits the flagged buf → crash. +// 6. Recovery: journal has entries for the destroyed table → without fix: assert/abort. +// With fix: skip gracefully. +TYPED_TEST(IndexCrashTest, DestroyTableWithPendingCpCrash) { + // Use a small fixed preload to avoid triggering unrelated block-allocator issues + // that surface when inserting hundreds of consecutive right-edge keys into a + // fully-flushed large tree. With max_keys_in_node=20 and 100 preloaded keys + // we get 5 full leaf nodes; inserting 20 more triggers 1-2 leaf splits which is + // enough to (a) populate a txn_journal entry and (b) mark a parent buffer for crash. + static constexpr uint32_t preload_size = 100; + static constexpr uint32_t extra_keys = 20; + + // Step 1: Build a small tree and flush a clean baseline CP. + LOGINFO("Step 1: Preload {} keys and flush baseline CP", preload_size); + for (auto k = 0u; k < preload_size; ++k) { + this->put(k, btree_put_type::INSERT, true /* expect_success */); + } + test_common::HSTestHelper::trigger_cp(true); + this->m_shadow_map.save(this->m_shadow_filename); + + // Step 2: Set the crash flip BEFORE the operations that will cause splits. + // When a split occurs, transact_bufs checks this flip and sets m_crash_flag_on on + // the parent buf. The actual crash fires later when the CP flushes that buf. + LOGINFO("Step 2: Set crash flip on split-at-parent"); + this->set_basic_flip("crash_flush_on_split_at_parent"); + + // Step 3: Insert extra keys to cause splits and populate the txn_journal. + // The rightmost leaf is full after the preload, so the first new key triggers a + // split → transact_bufs is called → crash flip fires once → m_crash_flag_on set + // on the parent buf; subsequent splits (if any) accumulate more journal entries + // but no additional crash flags (flip fires only once, count=1). + LOGINFO("Step 3: Insert {} extra keys to cause splits (txn_journal populated)", extra_keys); + for (auto k = preload_size; k < preload_size + extra_keys; ++k) { + this->put(k, btree_put_type::INSERT, true /* expect_success */); + } + + // Step 4: Destroy the table. m_sb.destroy() removes the meta superblock from disk. + // The dirty split bufs (including the one with m_crash_flag_on) remain in the CP + // dirty list because free_buf only marks m_node_freed — it does not touch the list. + LOGINFO("Step 4: Destroy index table — meta superblock removed from disk"); + hs()->index_service().remove_index_table(this->m_bt); + this->m_bt->destroy(); + + // Step 5: Trigger the CP (do not wait). async_cp_flush will: + // (a) write the txn_journal to disk (entries include the destroyed table's ordinal), + // (b) start flushing nodes, hit the buf with m_crash_flag_on → simulated crash. + LOGINFO("Step 5: Trigger CP (crash expected after journal is written)"); + test_common::HSTestHelper::trigger_cp(false /* no wait */); + + // Step 6: Wait for the simulated crash and homestore recovery. + // Without fix : repair_index_node / sanity_check asserts → process aborts (SIGABRT). + // With fix : missing table is skipped gracefully and recovery completes normally. + LOGINFO("Step 6: Waiting for crash-recovery"); + this->wait_for_crash_recovery(true /* check_will_crash */); + LOGINFO("Step 6: Recovery succeeded without aborting — destroyed table was handled gracefully"); + + // Step 7: Install a fresh empty btree so that the shared TearDown() can safely call + // root_node_id() and tree_key_count() without hitting the stale destroyed btree. + LOGINFO("Step 7: Installing a fresh empty btree for teardown"); + this->install_fresh_btree(preload_size + extra_keys); + test_common::HSTestHelper::trigger_cp(true); +} + TYPED_TEST(IndexCrashTest, MetricsTest) { const auto num_entries = SISL_OPTIONS["num_entries"].as< uint32_t >(); std::vector< uint32_t > vec(num_entries);