diff --git a/conanfile.py b/conanfile.py index b09aee14d..167d34759 100644 --- a/conanfile.py +++ b/conanfile.py @@ -9,7 +9,7 @@ class HomestoreConan(ConanFile): name = "homestore" - version = "7.5.0" + version = "7.5.1" homepage = "https://github.com/eBay/Homestore" description = "HomeStore Storage Engine" diff --git a/src/lib/device/physical_dev.cpp b/src/lib/device/physical_dev.cpp index ba52ba2f2..b151c308c 100644 --- a/src/lib/device/physical_dev.cpp +++ b/src/lib/device/physical_dev.cpp @@ -66,8 +66,11 @@ first_block PhysicalDev::read_first_block(const std::string& devname, int oflags first_block ret; auto buf = hs_utils::iobuf_alloc(first_block::s_io_fb_size, sisl::buftag::superblk, 512); - iodev->drive_interface()->sync_read(iodev.get(), r_cast< char* >(buf), first_block::s_io_fb_size, - hs_super_blk::first_block_offset()); + auto err = iodev->drive_interface()->sync_read(iodev.get(), r_cast< char* >(buf), first_block::s_io_fb_size, + hs_super_blk::first_block_offset()); + + HS_REL_ASSERT(!err, "IO error reading first block from device={}, error={}, homestore will go down", devname, + err.message()); ret = *(r_cast< first_block* >(buf)); hs_utils::iobuf_free(buf, sisl::buftag::superblk); @@ -114,20 +117,25 @@ PhysicalDev::PhysicalDev(const dev_info& dinfo, int oflags, const pdev_info_head m_streams.emplace_back(i); } m_super_blk_in_footer = m_pdev_info.mirror_super_block; + + // Validate footer superblock consistency if mirroring is enabled + sanity_check(); } PhysicalDev::~PhysicalDev() { close_device(); } void PhysicalDev::write_super_block(uint8_t const* buf, uint32_t sb_size, uint64_t offset) { auto err_c = m_drive_iface->sync_write(m_iodev.get(), c_charptr_cast(buf), sb_size, offset); + HS_REL_ASSERT(!err_c, "Super block write to header failed on dev={} at size={} offset={}, homestore will go down", + m_devname, sb_size, offset); if (m_super_blk_in_footer) { auto t_offset = data_end_offset() + offset; - err_c = m_drive_iface->sync_write(m_iodev.get(), c_charptr_cast(buf), sb_size, t_offset); + auto footer_err_c = m_drive_iface->sync_write(m_iodev.get(), c_charptr_cast(buf), sb_size, t_offset); + HS_REL_ASSERT(!footer_err_c, + "Super block write to footer failed on dev={} at size={} offset={}, homestore will go down", + m_devname, sb_size, t_offset); } - - HS_REL_ASSERT(!err_c, "Super block write failed on dev={} at size={} offset={}, homestore will go down", m_devname, - sb_size, offset); } std::error_code PhysicalDev::read_super_block(uint8_t* buf, uint32_t sb_size, uint64_t offset) { @@ -136,6 +144,43 @@ std::error_code PhysicalDev::read_super_block(uint8_t* buf, uint32_t sb_size, ui void PhysicalDev::close_device() { close_and_uncache_dev(m_devname, m_iodev); } +void PhysicalDev::sanity_check() { + // Only validate footer if mirroring is enabled (HDD devices) + if (!m_super_blk_in_footer) { return; } + + HS_LOG(INFO, device, "Validating footer superblock consistency on device={}", m_devname); + + // Read header first block + auto header_buf = hs_utils::iobuf_alloc(first_block::s_io_fb_size, sisl::buftag::superblk, + m_pdev_info.dev_attr.align_size); + auto header_err = read_super_block(header_buf, first_block::s_io_fb_size, hs_super_blk::first_block_offset()); + HS_REL_ASSERT(!header_err, + "IO error reading header first block during sanity check on device={}, error={}, homestore will go down", + m_devname, header_err.message()); + + // Read footer first block using the same offset calculation as write_super_block() + auto footer_offset = data_end_offset() + hs_super_blk::first_block_offset(); + auto footer_buf = hs_utils::iobuf_alloc(first_block::s_io_fb_size, sisl::buftag::superblk, + m_pdev_info.dev_attr.align_size); + auto footer_err = read_super_block(footer_buf, first_block::s_io_fb_size, footer_offset); + HS_REL_ASSERT( + !footer_err, + "IO error reading footer first block during sanity check on device={}, offset={}, error={}, homestore will go down", + m_devname, footer_offset, footer_err.message()); + + // Compare header and footer + auto header_blk = r_cast< first_block* >(header_buf); + auto footer_blk = r_cast< first_block* >(footer_buf); + HS_REL_ASSERT(std::memcmp(header_blk, footer_blk, first_block::s_atomic_fb_size) == 0, + "Footer first block mismatch with header on device={}, header=[{}], footer=[{}], this indicates " + "corruption, homestore will go down", + m_devname, header_blk->to_string(), footer_blk->to_string()); + + hs_utils::iobuf_free(header_buf, sisl::buftag::superblk); + hs_utils::iobuf_free(footer_buf, sisl::buftag::superblk); + HS_LOG(INFO, device, "Footer superblock validated successfully on device={}", m_devname); +} + folly::Future< std::error_code > PhysicalDev::async_write(const char* data, uint32_t size, uint64_t offset, bool part_of_batch) { auto const start_time = get_current_time(); diff --git a/src/lib/device/physical_dev.hpp b/src/lib/device/physical_dev.hpp index f68ae14e6..0709e27d5 100644 --- a/src/lib/device/physical_dev.hpp +++ b/src/lib/device/physical_dev.hpp @@ -154,6 +154,7 @@ class PhysicalDev { std::error_code read_super_block(uint8_t* buf, uint32_t sb_size, uint64_t offset); void write_super_block(uint8_t const* buf, uint32_t sb_size, uint64_t offset); + void sanity_check(); void close_device(); //////////////////////////// Chunk Creation/Load related methods ///////////////////////////////////////// diff --git a/src/tests/test_pdev.cpp b/src/tests/test_pdev.cpp index 4447c500b..e8003a445 100644 --- a/src/tests/test_pdev.cpp +++ b/src/tests/test_pdev.cpp @@ -262,6 +262,227 @@ TEST_F(PDevTest, RandomChunkOpsWithRestart) { num_removed, available_size); } +// Test fixture for superblock error handling tests +class SuperblockErrorTest : public ::testing::Test { +protected: + std::string m_test_file; + uint64_t m_dev_size{100 * 1024 * 1024}; // 100MB + + void SetUp() override { + m_test_file = "/tmp/test_superblock_error"; + init_file(m_test_file, m_dev_size); + + auto const is_spdk = SISL_OPTIONS["spdk"].as< bool >(); + ioenvironment.with_iomgr(iomgr::iomgr_params{.num_threads = 1, .is_spdk = is_spdk}); + } + + void TearDown() override { + iomanager.stop(); + if (std::filesystem::exists(m_test_file)) { + std::filesystem::remove(m_test_file); + } + } + + // Helper to corrupt a file at specific offset + void corrupt_file_at_offset(uint64_t offset, uint64_t size) { + std::fstream file(m_test_file, std::ios::binary | std::ios::in | std::ios::out); + ASSERT_TRUE(file.is_open()); + file.seekp(offset); + std::vector garbage(size, 0xAA); // Fill with garbage + file.write(reinterpret_cast(garbage.data()), size); + file.close(); + } + + // Helper to truncate file to simulate IO errors + void truncate_file(uint64_t new_size) { + std::filesystem::resize_file(m_test_file, new_size); + } +}; + +TEST_F(SuperblockErrorTest, ReadFirstBlockIOError) { + LOGINFO("Test: read_first_block should crash on IO error"); + + // Truncate the file to be too small to contain first block + truncate_file(512); // Less than first_block::s_io_fb_size (4096) + + // Attempt to read first block should crash with HS_REL_ASSERT + ASSERT_DEATH({ + PhysicalDev::read_first_block(m_test_file, O_RDWR); + }, "IO error reading first block"); +} + +TEST_F(SuperblockErrorTest, ReadFirstBlockCorruptedData) { + LOGINFO("Test: read_first_block should return invalid first_block on corrupted data"); + + // Fill the first block area with garbage + corrupt_file_at_offset(0, 4096); + + // Reading should succeed but return invalid first_block + ASSERT_NO_THROW({ + auto fblk = PhysicalDev::read_first_block(m_test_file, O_RDWR); + ASSERT_FALSE(fblk.is_valid()) << "Corrupted first block should be invalid"; + LOGINFO("Successfully read corrupted first block, is_valid={}", fblk.is_valid()); + }); +} + +TEST_F(SuperblockErrorTest, FooterValidationHDDDevice) { + LOGINFO("Test: Footer validation should detect header/footer mismatch on HDD"); + + // First, create a properly formatted device + std::vector dev_infos; + dev_infos.emplace_back(std::filesystem::canonical(m_test_file).string(), HSDevType::Data); + + auto dmgr = std::make_unique( + dev_infos, [](const vdev_info&, bool) -> shared { return nullptr; }); + + ASSERT_TRUE(dmgr->is_first_time_boot()); + dmgr->format_devices(); + dmgr->commit_formatting(); + + // Get the pdev to check if it has footer mirroring + auto pdevs = dmgr->get_pdevs_by_dev_type(HSDevType::Data); + ASSERT_GT(pdevs.size(), 0); + auto pdev = pdevs[0]; + + // For HDD devices (with footer mirroring), test footer validation + if (pdev->atomic_page_size() > 0) { + LOGINFO("Device has footer mirroring enabled, testing footer corruption detection"); + + dmgr.reset(); + iomanager.stop(); + + // Calculate footer offset: data_end_offset = devsize - data_offset + // Footer first block is at: data_end_offset + first_block_offset (0) + auto data_offset = hs_super_blk::first_block_offset() + + hs_super_blk::total_size(dev_infos[0]); + auto footer_offset = m_dev_size - data_offset; + + LOGINFO("Corrupting footer at offset={}", footer_offset); + corrupt_file_at_offset(footer_offset, 512); + + // Restart should crash because footer doesn't match header + ioenvironment.with_iomgr(iomgr::iomgr_params{.num_threads = 1, .is_spdk = false}); + ASSERT_DEATH({ + auto dmgr2 = std::make_unique( + dev_infos, [](const vdev_info&, bool) -> shared { return nullptr; }); + dmgr2->load_devices(); + }, "Footer first block mismatch"); + } else { + LOGINFO("Device does not have footer mirroring, skipping footer corruption test"); + } +} + +TEST_F(SuperblockErrorTest, FooterIOError) { + LOGINFO("Test: Footer read IO error should be caught during sanity_check"); + + // First, create a properly formatted device + std::vector dev_infos; + dev_infos.emplace_back(std::filesystem::canonical(m_test_file).string(), HSDevType::Data); + + auto dmgr = std::make_unique( + dev_infos, [](const vdev_info&, bool) -> shared { return nullptr; }); + + ASSERT_TRUE(dmgr->is_first_time_boot()); + dmgr->format_devices(); + dmgr->commit_formatting(); + + auto pdevs = dmgr->get_pdevs_by_dev_type(HSDevType::Data); + ASSERT_GT(pdevs.size(), 0); + auto pdev = pdevs[0]; + + // For HDD devices, test footer IO error + if (pdev->atomic_page_size() > 0) { + LOGINFO("Device has footer mirroring enabled, testing footer IO error"); + + dmgr.reset(); + iomanager.stop(); + + // Truncate file to cut off the footer area + auto data_offset = hs_super_blk::first_block_offset() + + hs_super_blk::total_size(dev_infos[0]); + auto truncate_size = data_offset + 1024; // Cut off before footer + + LOGINFO("Truncating file to size={} to cause footer IO error", truncate_size); + truncate_file(truncate_size); + + // Restart should crash because footer cannot be read + ioenvironment.with_iomgr(iomgr::iomgr_params{.num_threads = 1, .is_spdk = false}); + ASSERT_DEATH({ + auto dmgr2 = std::make_unique( + dev_infos, [](const vdev_info&, bool) -> shared { return nullptr; }); + dmgr2->load_devices(); + }, "IO error reading footer first block"); + } else { + LOGINFO("Device does not have footer mirroring, skipping footer IO error test"); + } +} + +TEST_F(SuperblockErrorTest, NonHDDDeviceSkipsFooterValidation) { + LOGINFO("Test: Non-HDD devices should skip footer validation"); + + // Create device as Fast type (SSD), which typically doesn't have footer mirroring + std::vector dev_infos; + dev_infos.emplace_back(std::filesystem::canonical(m_test_file).string(), HSDevType::Fast); + + auto dmgr = std::make_unique( + dev_infos, [](const vdev_info&, bool) -> shared { return nullptr; }); + + ASSERT_TRUE(dmgr->is_first_time_boot()); + dmgr->format_devices(); + dmgr->commit_formatting(); + + auto pdevs = dmgr->get_pdevs_by_dev_type(HSDevType::Fast); + ASSERT_GT(pdevs.size(), 0); + + // Should restart successfully even if we corrupt the footer area + dmgr.reset(); + iomanager.stop(); + + // Corrupt what would be the footer area + auto data_offset = hs_super_blk::first_block_offset() + + hs_super_blk::total_size(dev_infos[0]); + auto footer_offset = m_dev_size - data_offset; + corrupt_file_at_offset(footer_offset, 4096); + + // Should succeed because SSD doesn't validate footer + ioenvironment.with_iomgr(iomgr::iomgr_params{.num_threads = 1, .is_spdk = false}); + ASSERT_NO_THROW({ + auto dmgr2 = std::make_unique( + dev_infos, [](const vdev_info&, bool) -> shared { return nullptr; }); + dmgr2->load_devices(); + LOGINFO("Successfully loaded device without footer validation"); + }); +} + +TEST_F(SuperblockErrorTest, ValidFooterMatchesHeader) { + LOGINFO("Test: Valid footer should match header on HDD device"); + + std::vector dev_infos; + dev_infos.emplace_back(std::filesystem::canonical(m_test_file).string(), HSDevType::Data); + + auto dmgr = std::make_unique( + dev_infos, [](const vdev_info&, bool) -> shared { return nullptr; }); + + ASSERT_TRUE(dmgr->is_first_time_boot()); + dmgr->format_devices(); + dmgr->commit_formatting(); + + auto pdevs = dmgr->get_pdevs_by_dev_type(HSDevType::Data); + ASSERT_GT(pdevs.size(), 0); + + // Restart should succeed with matching header and footer + dmgr.reset(); + iomanager.stop(); + + ioenvironment.with_iomgr(iomgr::iomgr_params{.num_threads = 1, .is_spdk = false}); + ASSERT_NO_THROW({ + auto dmgr2 = std::make_unique( + dev_infos, [](const vdev_info&, bool) -> shared { return nullptr; }); + dmgr2->load_devices(); + LOGINFO("Successfully validated matching header and footer"); + }); +} + int main(int argc, char* argv[]) { SISL_OPTIONS_LOAD(argc, argv, logging, test_pdev, iomgr); ::testing::InitGoogleTest(&argc, argv);