Skip to content

Commit

Permalink
rabbit_db: force_reset command is unsupported with Khepri
Browse files Browse the repository at this point in the history
[Why]
The `force_reset` command simply removes local files on disk for the
local node.

In the case of Ra, this can't work because the rest of the cluster does
not know about the forced-reset node. Therefore the leader will continue
to send `append_entry` commands to the reset node.

If that node restarts and receives these messages, it will either join
the cluster again (because it's on an older Raft term) or it will hit an
assertion and exit (because it's on the same Raft term).

[How]
Given we can't really support this scenarion and it has little value,
the command will now return an error if someone attemps a `force_reset`
with a node running Khepri.

This also deprecates the command: once Mnesia support is removed, the
command will be removed at the same time.
  • Loading branch information
dumbbell committed Feb 10, 2025
1 parent 211fc5b commit eb54ce9
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 39 deletions.
9 changes: 8 additions & 1 deletion deps/rabbit/docs/rabbitmqctl.8
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ next time it is started:
.sp
.Dl rabbitmqctl force_boot
.\" ------------------------------------------------------------------
.It Cm force_reset
.It Cm force_reset Em (deprecated)
.Pp
Forcefully returns a RabbitMQ node to its virgin state.
.Pp
Expand All @@ -359,6 +359,13 @@ management database state and cluster configuration.
It should only be used as a last resort if the database or cluster
configuration has been corrupted.
.Pp
The
.Cm force_reset
command is
.Sy deprecated .
It remains available when the Mnesia metadata store is used.
It is unsupported with the Khepri metadata store.
.Pp
For
.Cm reset
and
Expand Down
8 changes: 5 additions & 3 deletions deps/rabbit/src/rabbit_db.erl
Original file line number Diff line number Diff line change
Expand Up @@ -163,11 +163,13 @@ force_reset_using_mnesia() ->
#{domain => ?RMQLOG_DOMAIN_DB}),
rabbit_mnesia:force_reset().

-spec force_reset_using_khepri() -> no_return().

force_reset_using_khepri() ->
?LOG_DEBUG(
"DB: resetting node forcefully (using Khepri)",
?LOG_ERROR(
"DB: resetting node forcefully is unsupported with Khepri",
#{domain => ?RMQLOG_DOMAIN_DB}),
rabbit_khepri:force_reset().
throw({error, "Forced reset is unsupported with Khepri"}).

-spec force_load_on_next_boot() -> Ret when
Ret :: ok.
Expand Down
20 changes: 1 addition & 19 deletions deps/rabbit/src/rabbit_khepri.erl
Original file line number Diff line number Diff line change
Expand Up @@ -168,8 +168,7 @@
-export([check_cluster_consistency/0,
check_cluster_consistency/2,
node_info/0]).
-export([reset/0,
force_reset/0]).
-export([reset/0]).
-export([cluster_status_from_khepri/0,
cli_cluster_status/0]).

Expand Down Expand Up @@ -601,23 +600,6 @@ reset() ->

%% @private

force_reset() ->
case rabbit:is_running() of
false ->
ok = khepri:stop(?RA_CLUSTER_NAME),
DataDir = maps:get(data_dir, ra_system:fetch(?RA_SYSTEM)),
ok = rabbit_ra_systems:ensure_ra_system_stopped(?RA_SYSTEM),
ok = rabbit_file:recursive_delete(
filelib:wildcard(DataDir ++ "/*")),

_ = file:delete(rabbit_guid:filename()),
ok;
true ->
throw({error, rabbitmq_unexpectedly_running})
end.

%% @private

force_shrink_member_to_current_member() ->
ok = ra_server_proc:force_shrink_members_to_current_member(
{?RA_CLUSTER_NAME, node()}).
Expand Down
21 changes: 5 additions & 16 deletions deps/rabbit/test/clustering_management_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -953,22 +953,11 @@ force_reset_node_in_khepri(Config) ->

stop_join_start(Config, Rabbit, Hare),
stop_app(Config, Rabbit),
ok = force_reset(Config, Rabbit),
assert_cluster_status({[Rabbit, Hare], [Rabbit, Hare], [Hare]}, [Hare]),
%% Khepri is stopped, so it won't report anything.
assert_status({[Rabbit], [], [Rabbit], [Rabbit], []}, [Rabbit]),
%% Hare thinks that Rabbit is still clustered
assert_cluster_status({[Rabbit, Hare], [Rabbit, Hare], [Hare]},
[Hare]),
ok = start_app(Config, Rabbit),
assert_not_clustered(Rabbit),
%% We can rejoin Rabbit and Hare. Unlike with Mnesia, we try to solve the
%% inconsistency instead of returning an error.
ok = stop_app(Config, Rabbit),
?assertEqual(ok, join_cluster(Config, Rabbit, Hare, false)),
ok = start_app(Config, Rabbit),
assert_cluster_status({[Rabbit, Hare], [Rabbit, Hare], [Rabbit, Hare]},
[Rabbit, Hare]).
{error, 69, Msg} = force_reset(Config, Rabbit),
?assertEqual(
match,
re:run(
Msg, "Forced reset is unsupported with Khepri", [{capture, none}])).

status_with_alarm(Config) ->
[Rabbit, Hare] = rabbit_ct_broker_helpers:get_node_configs(Config,
Expand Down

0 comments on commit eb54ce9

Please sign in to comment.