From eb54ce95b5ba31707c34074e918d92000455c33d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jean-S=C3=A9bastien=20P=C3=A9dron?=
 <jean-sebastien.pedron@dumbbell.fr>
Date: Fri, 7 Feb 2025 16:44:37 +0100
Subject: [PATCH] rabbit_db: `force_reset` command is unsupported with Khepri

[Why]
The `force_reset` command simply removes local files on disk for the
local node.

In the case of Ra, this can't work because the rest of the cluster does
not know about the forced-reset node. Therefore the leader will continue
to send `append_entry` commands to the reset node.

If that node restarts and receives these messages, it will either join
the cluster again (because it's on an older Raft term) or it will hit an
assertion and exit (because it's on the same Raft term).

[How]
Given we can't really support this scenarion and it has little value,
the command will now return an error if someone attemps a `force_reset`
with a node running Khepri.

This also deprecates the command: once Mnesia support is removed, the
command will be removed at the same time.
---
 deps/rabbit/docs/rabbitmqctl.8                |  9 +++++++-
 deps/rabbit/src/rabbit_db.erl                 |  8 ++++---
 deps/rabbit/src/rabbit_khepri.erl             | 20 +-----------------
 .../test/clustering_management_SUITE.erl      | 21 +++++--------------
 4 files changed, 19 insertions(+), 39 deletions(-)

diff --git a/deps/rabbit/docs/rabbitmqctl.8 b/deps/rabbit/docs/rabbitmqctl.8
index da5abcd2ccdc..64ef2b798d2c 100644
--- a/deps/rabbit/docs/rabbitmqctl.8
+++ b/deps/rabbit/docs/rabbitmqctl.8
@@ -346,7 +346,7 @@ next time it is started:
 .sp
 .Dl rabbitmqctl force_boot
 .\" ------------------------------------------------------------------
-.It Cm force_reset
+.It Cm force_reset Em (deprecated)
 .Pp
 Forcefully returns a RabbitMQ node to its virgin state.
 .Pp
@@ -359,6 +359,13 @@ management database state and cluster configuration.
 It should only be used as a last resort if the database or cluster
 configuration has been corrupted.
 .Pp
+The
+.Cm force_reset
+command is
+.Sy deprecated .
+It remains available when the Mnesia metadata store is used.
+It is unsupported with the Khepri metadata store.
+.Pp
 For
 .Cm reset
 and
diff --git a/deps/rabbit/src/rabbit_db.erl b/deps/rabbit/src/rabbit_db.erl
index a506c91259a2..2bf52b3a01c8 100644
--- a/deps/rabbit/src/rabbit_db.erl
+++ b/deps/rabbit/src/rabbit_db.erl
@@ -163,11 +163,13 @@ force_reset_using_mnesia() ->
       #{domain => ?RMQLOG_DOMAIN_DB}),
     rabbit_mnesia:force_reset().
 
+-spec force_reset_using_khepri() -> no_return().
+
 force_reset_using_khepri() ->
-    ?LOG_DEBUG(
-      "DB: resetting node forcefully (using Khepri)",
+    ?LOG_ERROR(
+      "DB: resetting node forcefully is unsupported with Khepri",
       #{domain => ?RMQLOG_DOMAIN_DB}),
-    rabbit_khepri:force_reset().
+    throw({error, "Forced reset is unsupported with Khepri"}).
 
 -spec force_load_on_next_boot() -> Ret when
       Ret :: ok.
diff --git a/deps/rabbit/src/rabbit_khepri.erl b/deps/rabbit/src/rabbit_khepri.erl
index efb97a6d3532..537021efa341 100644
--- a/deps/rabbit/src/rabbit_khepri.erl
+++ b/deps/rabbit/src/rabbit_khepri.erl
@@ -168,8 +168,7 @@
 -export([check_cluster_consistency/0,
          check_cluster_consistency/2,
          node_info/0]).
--export([reset/0,
-         force_reset/0]).
+-export([reset/0]).
 -export([cluster_status_from_khepri/0,
          cli_cluster_status/0]).
 
@@ -601,23 +600,6 @@ reset() ->
 
 %% @private
 
-force_reset() ->
-    case rabbit:is_running() of
-        false ->
-            ok = khepri:stop(?RA_CLUSTER_NAME),
-            DataDir = maps:get(data_dir, ra_system:fetch(?RA_SYSTEM)),
-            ok = rabbit_ra_systems:ensure_ra_system_stopped(?RA_SYSTEM),
-            ok = rabbit_file:recursive_delete(
-                   filelib:wildcard(DataDir ++ "/*")),
-
-            _ = file:delete(rabbit_guid:filename()),
-            ok;
-        true ->
-            throw({error, rabbitmq_unexpectedly_running})
-    end.
-
-%% @private
-
 force_shrink_member_to_current_member() ->
     ok = ra_server_proc:force_shrink_members_to_current_member(
            {?RA_CLUSTER_NAME, node()}).
diff --git a/deps/rabbit/test/clustering_management_SUITE.erl b/deps/rabbit/test/clustering_management_SUITE.erl
index 881342468051..7e18242ccaea 100644
--- a/deps/rabbit/test/clustering_management_SUITE.erl
+++ b/deps/rabbit/test/clustering_management_SUITE.erl
@@ -953,22 +953,11 @@ force_reset_node_in_khepri(Config) ->
 
     stop_join_start(Config, Rabbit, Hare),
     stop_app(Config, Rabbit),
-    ok = force_reset(Config, Rabbit),
-    assert_cluster_status({[Rabbit, Hare], [Rabbit, Hare], [Hare]}, [Hare]),
-    %% Khepri is stopped, so it won't report anything.
-    assert_status({[Rabbit], [], [Rabbit], [Rabbit], []}, [Rabbit]),
-    %% Hare thinks that Rabbit is still clustered
-    assert_cluster_status({[Rabbit, Hare], [Rabbit, Hare], [Hare]},
-                          [Hare]),
-    ok = start_app(Config, Rabbit),
-    assert_not_clustered(Rabbit),
-    %% We can rejoin Rabbit and Hare. Unlike with Mnesia, we try to solve the
-    %% inconsistency instead of returning an error.
-    ok = stop_app(Config, Rabbit),
-    ?assertEqual(ok, join_cluster(Config, Rabbit, Hare, false)),
-    ok = start_app(Config, Rabbit),
-    assert_cluster_status({[Rabbit, Hare], [Rabbit, Hare], [Rabbit, Hare]},
-                          [Rabbit, Hare]).
+    {error, 69, Msg} = force_reset(Config, Rabbit),
+    ?assertEqual(
+       match,
+       re:run(
+         Msg, "Forced reset is unsupported with Khepri", [{capture, none}])).
 
 status_with_alarm(Config) ->
     [Rabbit, Hare] = rabbit_ct_broker_helpers:get_node_configs(Config,