Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 31 additions & 4 deletions src/ra_snapshot.erl
Original file line number Diff line number Diff line change
Expand Up @@ -633,7 +633,8 @@ begin_snapshot(#{index := Idx,
-spec promote_checkpoint(Idx :: ra_index(), State0 :: state()) ->
{boolean(), State :: state(), Effects :: [effect()]}.
promote_checkpoint(PromotionIdx,
#?MODULE{module = Mod,
#?MODULE{uid = UId,
module = Mod,
sync_server = SyncServer,
snapshot_directory = SnapDir,
checkpoint_directory = CheckpointDir,
Expand All @@ -660,7 +661,12 @@ promote_checkpoint(PromotionIdx,
Indexes = case indexes(Snapshot) of
{ok, Idxs} ->
Idxs;
_ ->
{error, Err} ->
?WARN("ra_snapshot: ~ts: indexes file "
"corrupt during checkpoint "
"promotion (~w), using empty "
"indexes",
[UId, Err]),
[]
end,
EndTime = erlang:monotonic_time(),
Expand Down Expand Up @@ -1037,7 +1043,20 @@ indexes(Dir) ->
{ok, <<?IDX_MAGIC, ?IDX_VERSION:8/unsigned, Crc:32/unsigned, Data/binary>>} ->
case erlang:crc32(Data) of
Crc ->
{ok, binary_to_term(Data)};
try
case binary_to_term(Data) of
Seq when is_list(Seq) -> {ok, Seq};
Invalid ->
?WARN("ra_snapshot: indexes file ~ts: CRC passed "
"but decoded term is not a list: ~0p",
[File, Invalid]),
{error, invalid_format}
end
catch _:Err ->
?WARN("ra_snapshot: indexes file ~ts: CRC passed but "
"binary_to_term failed: ~w", [File, Err]),
{error, invalid_format}
end;
_ ->
{error, checksum_error}
end;
Expand All @@ -1047,9 +1066,17 @@ indexes(Dir) ->
%% Backward compatibility: old format without header
%% Try to parse as plain term_to_binary data
try
{ok, binary_to_term(Bin)}
case binary_to_term(Bin) of
Seq when is_list(Seq) -> {ok, Seq};
Invalid ->
?WARN("ra_snapshot: indexes file ~ts: old format term "
"is not a list: ~0p", [File, Invalid]),
{error, invalid_format}
end
catch
_:_ ->
?WARN("ra_snapshot: indexes file ~ts: failed to parse "
"as term (empty or truncated)", [File]),
{error, invalid_format}
end;
{error, enoent} ->
Expand Down
74 changes: 53 additions & 21 deletions test/ra_log_2_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ all_tests() ->
snapshot_installation_with_live_indexes,
init_with_dangling_symlink,
init_after_missing_segments_event,
snapshot_install_with_empty_indexes_file
snapshot_install_with_corrupted_indexes
].

groups() ->
Expand Down Expand Up @@ -685,7 +685,11 @@ recover_after_snapshot(Config) ->
last_written_index_term := {2, 1}}, Overview),
ok.

snapshot_install_with_empty_indexes_file(Config) ->
snapshot_install_with_corrupted_indexes(Config) ->
%% Verify that any byte sequence in the indexes file (empty, truncated,
%% garbage, adversarial) never causes startup to abort. The server must
%% always recover gracefully: either use the indexes (if valid) or rebuild
%% them from the snapshot.
UId = ?config(uid, Config),
MachineConf = {module, ?MODULE, #{}},
LogConf = #{uid => UId,
Expand Down Expand Up @@ -739,25 +743,53 @@ snapshot_install_with_empty_indexes_file(Config) ->
ra_log:close(Log4),

IndexesFile = filename:join(SnapDir, <<"indexes">>),
ok = file:write_file(IndexesFile, <<>>),

application:stop(ra),
start_ra(Config),
timer:sleep(100),
ct:pal("snapshot state ~p",
[ra_log_snapshot_state:read(ra_log_snapshot_state, UId)]),


Log5 = ra_log_init(Config, LogConf),

%% Fetch items 1..10 (should be dropped because of snapshot)
{[], _} = ra_log_take(1, 10, Log5),

%% Fetch items 11..15
{[_, _, _, _, _], _} = ra_log_take(11, 16, Log5),

ra_log:close(Log5),
ok.
{ok, ValidIndexesContent} = file:read_file(IndexesFile),
TruncatedBytes = binary:part(ValidIndexesContent, 0,
max(1, byte_size(ValidIndexesContent) div 2)),

%% CRC of <<>> is 0, so this 9-byte blob passes the CRC check and then
%% passes <<>> to binary_to_term — this was an uncaught crash before the fix.
CrcOfEmpty = erlang:crc32(<<>>),
Gap1Trigger = <<"RASI", 1, CrcOfEmpty:32/unsigned>>,

CorruptContents =
[
<<>>, %% empty
<<"garbage">>, %% random bytes, no RASI magic
binary:copy(<<0>>, 50), %% all-zero bytes
<<"RASI", 99>>, %% valid magic, unknown version
<<"RASI", 1, 0:32, "notterm">>, %% valid header, wrong CRC
Gap1Trigger, %% CRC of <<>> passes, binary_to_term(<<>>) would throw
term_to_binary(some_atom), %% old format, non-list Erlang term
term_to_binary(42), %% old format, integer
term_to_binary(#{}), %% old format, map
term_to_binary([]), %% old format, valid empty ra_seq — must work
TruncatedBytes %% first half of a legitimately written file
],

lists:foreach(
fun (Content) ->
ct:pal("Testing indexes file content (~w bytes): ~w",
[byte_size(Content), Content]),
ok = file:write_file(IndexesFile, Content),

application:stop(ra),
start_ra(Config),
timer:sleep(100),

Log = ra_log_init(Config, LogConf),

%% Fetch items 1..10 (should be dropped because of snapshot)
{[], _} = ra_log_take(1, 10, Log),
%% Fetch items 11..15
{[_, _, _, _, _], _} = ra_log_take(11, 16, Log),

ra_log:close(Log),

%% Restore a clean indexes file for the next iteration
ok = file:write_file(IndexesFile, ValidIndexesContent)
end,
CorruptContents).

writes_lower_than_snapshot_index_are_dropped(Config) ->
logger:set_primary_config(level, debug),
Expand Down
Loading