From ce650adadd33d5c72d78a854f5b032cc4cdef5b8 Mon Sep 17 00:00:00 2001 From: Ed Hennis Date: Wed, 7 Aug 2024 18:08:03 -0400 Subject: [PATCH 1/8] refactor: Change recursive_mutex to mutex in DatabaseRotatingImp - Follow-up to #4989, which stated "Ideally, the code should be rewritten so it doesn't hold the mutex during the callback and the mutex should be changed back to a regular mutex." --- src/xrpld/nodestore/DatabaseRotating.h | 4 ++++ .../nodestore/detail/DatabaseRotatingImp.cpp | 16 +++++++++++++++- src/xrpld/nodestore/detail/DatabaseRotatingImp.h | 10 +++------- 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/src/xrpld/nodestore/DatabaseRotating.h b/src/xrpld/nodestore/DatabaseRotating.h index 10f575c4662..d923def5f3e 100644 --- a/src/xrpld/nodestore/DatabaseRotating.h +++ b/src/xrpld/nodestore/DatabaseRotating.h @@ -45,6 +45,10 @@ class DatabaseRotating : public Database /** Rotates the backends. @param f A function executed before the rotation and under the same lock + + This function only has one caller in SHAMapStoreImp::run. The locking + for this function will need to be rethought another caller is ever + added. */ virtual void rotateWithLock(std::function( diff --git a/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp b/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp index 58cc3599dc6..8b9a311d3ac 100644 --- a/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp +++ b/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp @@ -45,9 +45,23 @@ DatabaseRotatingImp::rotateWithLock( std::function( std::string const& writableBackendName)> const& f) { + auto const writableBackend = [&] { + std::lock_guard lock(mutex_); + return writableBackend_; + }(); + + auto newBackend = f(writableBackend->getName()); + std::lock_guard lock(mutex_); - auto newBackend = f(writableBackend_->getName()); + // This function only has one caller, which is syncronous, and this is the + // only place other than the ctor where writableBackend_ can change. Even + // without a lock, it should be impossible for writableBackend_ to have + // changed. + assert(writableBackend_ == writableBackend); + if (writableBackend_ != writableBackend) + LogicError("Backend changed in the middle of a rotation"); + archiveBackend_->setDeletePath(); archiveBackend_ = std::move(writableBackend_); writableBackend_ = std::move(newBackend); diff --git a/src/xrpld/nodestore/detail/DatabaseRotatingImp.h b/src/xrpld/nodestore/detail/DatabaseRotatingImp.h index 5183aa1e2e4..d857e3dc2d6 100644 --- a/src/xrpld/nodestore/detail/DatabaseRotatingImp.h +++ b/src/xrpld/nodestore/detail/DatabaseRotatingImp.h @@ -48,6 +48,8 @@ class DatabaseRotatingImp : public DatabaseRotating stop(); } + // This function only has one caller in SHAMapStoreImp::run. The locking for + // this function will need to be rethought another caller is ever added. void rotateWithLock( std::function( @@ -82,13 +84,7 @@ class DatabaseRotatingImp : public DatabaseRotating private: std::shared_ptr writableBackend_; std::shared_ptr archiveBackend_; - // This needs to be a recursive mutex because callbacks in `rotateWithLock` - // can call function that also lock the mutex. A current example of this is - // a callback from SHAMapStoreImp, which calls `clearCaches`. This - // `clearCaches` call eventually calls `fetchNodeObject` which tries to - // relock the mutex. It would be desirable to rewrite the code so the lock - // was not held during a callback. - mutable std::recursive_mutex mutex_; + mutable std::mutex mutex_; std::shared_ptr fetchNodeObject( From b8413ae1e1cac318e77989828ffbfe7205dd2261 Mon Sep 17 00:00:00 2001 From: Ed Hennis Date: Wed, 5 Feb 2025 18:32:28 -0500 Subject: [PATCH 2/8] Review feedback from @Bronek: * Use a second mutex to protect the backends from modification * Remove a bunch of warning comments --- src/xrpld/nodestore/DatabaseRotating.h | 6 +----- src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp | 12 ++++-------- src/xrpld/nodestore/detail/DatabaseRotatingImp.h | 6 ++++-- 3 files changed, 9 insertions(+), 15 deletions(-) diff --git a/src/xrpld/nodestore/DatabaseRotating.h b/src/xrpld/nodestore/DatabaseRotating.h index d923def5f3e..259dae4fe65 100644 --- a/src/xrpld/nodestore/DatabaseRotating.h +++ b/src/xrpld/nodestore/DatabaseRotating.h @@ -44,11 +44,7 @@ class DatabaseRotating : public Database /** Rotates the backends. - @param f A function executed before the rotation and under the same lock - - This function only has one caller in SHAMapStoreImp::run. The locking - for this function will need to be rethought another caller is ever - added. + @param f A function executed before the rotation */ virtual void rotateWithLock(std::function( diff --git a/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp b/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp index 8b9a311d3ac..d2a4e2fab2c 100644 --- a/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp +++ b/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp @@ -45,6 +45,10 @@ DatabaseRotatingImp::rotateWithLock( std::function( std::string const& writableBackendName)> const& f) { + // backendMutex_ prevents writableBackend_ and archiveBackend_ from changing + // while the main mutex_ is released during the callback. + std::lock_guard backendLock(backendMutex_); + auto const writableBackend = [&] { std::lock_guard lock(mutex_); return writableBackend_; @@ -54,14 +58,6 @@ DatabaseRotatingImp::rotateWithLock( std::lock_guard lock(mutex_); - // This function only has one caller, which is syncronous, and this is the - // only place other than the ctor where writableBackend_ can change. Even - // without a lock, it should be impossible for writableBackend_ to have - // changed. - assert(writableBackend_ == writableBackend); - if (writableBackend_ != writableBackend) - LogicError("Backend changed in the middle of a rotation"); - archiveBackend_->setDeletePath(); archiveBackend_ = std::move(writableBackend_); writableBackend_ = std::move(newBackend); diff --git a/src/xrpld/nodestore/detail/DatabaseRotatingImp.h b/src/xrpld/nodestore/detail/DatabaseRotatingImp.h index d857e3dc2d6..0ab9ca6d211 100644 --- a/src/xrpld/nodestore/detail/DatabaseRotatingImp.h +++ b/src/xrpld/nodestore/detail/DatabaseRotatingImp.h @@ -48,8 +48,6 @@ class DatabaseRotatingImp : public DatabaseRotating stop(); } - // This function only has one caller in SHAMapStoreImp::run. The locking for - // this function will need to be rethought another caller is ever added. void rotateWithLock( std::function( @@ -82,8 +80,12 @@ class DatabaseRotatingImp : public DatabaseRotating sweep() override; private: + // backendMutex_ is only needed when the *Backend_ members are modified. + // Reads are protected by the general mutex_. + std::mutex backendMutex_; std::shared_ptr writableBackend_; std::shared_ptr archiveBackend_; + mutable std::mutex mutex_; std::shared_ptr From d912b50cb91c7e82d82718ef7e0df7dd3050ffe4 Mon Sep 17 00:00:00 2001 From: Ed Hennis Date: Fri, 7 Feb 2025 17:07:24 -0500 Subject: [PATCH 3/8] Review feedback from @bthomee and @vvysokikh1: - Rewrite the locking in DatabaseRotatingImp::rotateWithLock to use a shared_lock, and write a unit test to show (as much as possible) that it won't deadlock. --- src/test/app/SHAMapStore_test.cpp | 120 ++++++++++++++++++ src/xrpld/app/misc/SHAMapStoreImp.cpp | 31 +++-- src/xrpld/nodestore/DatabaseRotating.h | 3 +- .../nodestore/detail/DatabaseRotatingImp.cpp | 61 +++++++-- .../nodestore/detail/DatabaseRotatingImp.h | 18 ++- 5 files changed, 200 insertions(+), 33 deletions(-) diff --git a/src/test/app/SHAMapStore_test.cpp b/src/test/app/SHAMapStore_test.cpp index 376cb4eb7ba..d521baa3085 100644 --- a/src/test/app/SHAMapStore_test.cpp +++ b/src/test/app/SHAMapStore_test.cpp @@ -20,10 +20,14 @@ #include #include #include +#include #include #include #include +#include #include +#include +#include namespace ripple { namespace test { @@ -518,12 +522,128 @@ class SHAMapStore_test : public beast::unit_test::suite lastRotated = ledgerSeq - 1; } + std::unique_ptr + makeBackendRotating( + jtx::Env& env, + NodeStoreScheduler& scheduler, + std::string path) + { + Section section{ + env.app().config().section(ConfigSection::nodeDatabase())}; + boost::filesystem::path newPath; + + if (!BEAST_EXPECT(path.size())) + return {}; + newPath = path; + section.set("path", newPath.string()); + + auto backend{NodeStore::Manager::instance().make_Backend( + section, + megabytes(env.app().config().getValueFor( + SizedItem::burstSize, std::nullopt)), + scheduler, + env.app().logs().journal("NodeStoreTest"))}; + backend->open(); + return backend; + } + void + testRotateWithLockContention() + { + // The only purpose of this test is to ensure that if something that + // should never happen happens, we don't get a deadlock. + testcase("rotate with lock contention"); + + using namespace jtx; + Env env(*this, envconfig(onlineDelete)); + + ///////////////////////////////////////////////////////////// + // Create the backend. Normally, SHAMapStoreImp handles all these + // details + auto nscfg = env.app().config().section(ConfigSection::nodeDatabase()); + nscfg.set( + NodeStore::DatabaseRotatingImp::unitTestFlag, std::to_string(true)); + + // Provide default values: + if (!nscfg.exists("cache_size")) + nscfg.set( + "cache_size", + std::to_string(env.app().config().getValueFor( + SizedItem::treeCacheSize, std::nullopt))); + + if (!nscfg.exists("cache_age")) + nscfg.set( + "cache_age", + std::to_string(env.app().config().getValueFor( + SizedItem::treeCacheAge, std::nullopt))); + + NodeStoreScheduler scheduler(env.app().getJobQueue()); + + std::string const writableDb = "write"; + std::string const archiveDb = "archive"; + auto writableBackend = makeBackendRotating(env, scheduler, writableDb); + auto archiveBackend = makeBackendRotating(env, scheduler, archiveDb); + + // Create NodeStore with two backends to allow online deletion of + // data + constexpr int readThreads = 4; + auto dbr = std::make_unique( + scheduler, + readThreads, + std::move(writableBackend), + std::move(archiveBackend), + nscfg, + env.app().logs().journal("NodeStoreTest")); + + ///////////////////////////////////////////////////////////// + // Create the impossible situation. Get several calls to rotateWithLock + // going in parallel using a callback that just delays + using namespace std::chrono_literals; + std::atomic threadNum = 0; + auto const cb = [&](std::string const& writableBackendName) { + using namespace std::chrono_literals; + BEAST_EXPECT(writableBackendName == "write"); + auto newBackend = makeBackendRotating( + env, scheduler, std::to_string(++threadNum)); + std::this_thread::sleep_for(5s); + return newBackend; + }; + + std::atomic successes = 0; + std::atomic failures = 0; + std::vector threads; + threads.reserve(5); + for (int i = 0; i < 5; ++i) + { + threads.emplace_back([&]() { + auto const result = dbr->rotateWithLock(cb); + if (result) + ++successes; + else + ++failures; + }); + // There's no point in trying to time the threads to line up at + // exact points, but introduce a little bit of staggering to be more + // "realistic". + std::this_thread::sleep_for(10ms * i); + } + for (auto& t : threads) + { + t.join(); + } + BEAST_EXPECT(successes == 1); + BEAST_EXPECT(failures == 4); + // Only one thread will invoke the callback to increment threadNum + BEAST_EXPECT(threadNum == 1); + BEAST_EXPECT(dbr->getName() == "1"); + } + void run() override { testClear(); testAutomatic(); testCanDelete(); + testRotateWithLockContention(); } }; diff --git a/src/xrpld/app/misc/SHAMapStoreImp.cpp b/src/xrpld/app/misc/SHAMapStoreImp.cpp index 3a530e0e410..c69ffc7c7a2 100644 --- a/src/xrpld/app/misc/SHAMapStoreImp.cpp +++ b/src/xrpld/app/misc/SHAMapStoreImp.cpp @@ -366,18 +366,25 @@ SHAMapStoreImp::run() lastRotated = validatedSeq; - dbRotating_->rotateWithLock( - [&](std::string const& writableBackendName) { - SavedState savedState; - savedState.writableDb = newBackend->getName(); - savedState.archiveDb = writableBackendName; - savedState.lastRotated = lastRotated; - state_db_.setState(savedState); - - clearCaches(validatedSeq); - - return std::move(newBackend); - }); + if (!dbRotating_->rotateWithLock( + [&](std::string const& writableBackendName) { + SavedState savedState; + savedState.writableDb = newBackend->getName(); + savedState.archiveDb = writableBackendName; + savedState.lastRotated = lastRotated; + state_db_.setState(savedState); + + clearCaches(validatedSeq); + + return std::move(newBackend); + })) + { + JLOG(journal_.error()) + << validatedSeq + << " rotation failed. Discard unused new backend."; + newBackend->setDeletePath(); + return; + } JLOG(journal_.warn()) << "finished rotation " << validatedSeq; } diff --git a/src/xrpld/nodestore/DatabaseRotating.h b/src/xrpld/nodestore/DatabaseRotating.h index 259dae4fe65..af7fe6674fc 100644 --- a/src/xrpld/nodestore/DatabaseRotating.h +++ b/src/xrpld/nodestore/DatabaseRotating.h @@ -46,7 +46,8 @@ class DatabaseRotating : public Database @param f A function executed before the rotation */ - virtual void + [[nodiscard]] + virtual bool rotateWithLock(std::function( std::string const& writableBackendName)> const& f) = 0; }; diff --git a/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp b/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp index d2a4e2fab2c..c44dd0cf333 100644 --- a/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp +++ b/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp @@ -33,6 +33,7 @@ DatabaseRotatingImp::DatabaseRotatingImp( : DatabaseRotating(scheduler, readThreads, config, j) , writableBackend_(std::move(writableBackend)) , archiveBackend_(std::move(archiveBackend)) + , unitTest_(get(config, unitTestFlag, false)) { if (writableBackend_) fdRequired_ += writableBackend_->fdRequired(); @@ -40,40 +41,72 @@ DatabaseRotatingImp::DatabaseRotatingImp( fdRequired_ += archiveBackend_->fdRequired(); } -void +[[nodiscard]] bool DatabaseRotatingImp::rotateWithLock( std::function( std::string const& writableBackendName)> const& f) { - // backendMutex_ prevents writableBackend_ and archiveBackend_ from changing - // while the main mutex_ is released during the callback. - std::lock_guard backendLock(backendMutex_); + // This function should be the only one taking any kind of unique/write + // lock, and should only be called once at a time by its syncronous caller. + // The extra checking involving the "rotating" flag, are to ensure that if + // that ever changes, we still avoid deadlocks and incorrect behavior. + { + std::unique_lock writeLock(mutex_); + if (!rotating) + { + // Once this flag is set, we're committed to doing the work and + // returning true. + rotating = true; + } + else + { + // This should only be reachable through unit tests. + XRPL_ASSERT( + unitTest_, + "ripple::NodeStore::DatabaseRotatingImp::rotateWithLock " + "unit testing"); + return false; + } + } auto const writableBackend = [&] { - std::lock_guard lock(mutex_); + std::shared_lock readLock(mutex_); + XRPL_ASSERT( + rotating, + "ripple::NodeStore::DatabaseRotatingImp::rotateWithLock rotating " + "flag set"); + return writableBackend_; }(); auto newBackend = f(writableBackend->getName()); - std::lock_guard lock(mutex_); + // Because of the "rotating" flag, there should be no way any other thread + // is waiting at this point. As long as they all release the shared_lock + // before taking the unique_lock (which they have to, because upgrading is + // unsupported), there can be no deadlock. + std::unique_lock writeLock(mutex_); archiveBackend_->setDeletePath(); archiveBackend_ = std::move(writableBackend_); writableBackend_ = std::move(newBackend); + + rotating = false; + + return true; } std::string DatabaseRotatingImp::getName() const { - std::lock_guard lock(mutex_); + std::shared_lock lock(mutex_); return writableBackend_->getName(); } std::int32_t DatabaseRotatingImp::getWriteLoad() const { - std::lock_guard lock(mutex_); + std::shared_lock lock(mutex_); return writableBackend_->getWriteLoad(); } @@ -81,7 +114,7 @@ void DatabaseRotatingImp::importDatabase(Database& source) { auto const backend = [&] { - std::lock_guard lock(mutex_); + std::shared_lock lock(mutex_); return writableBackend_; }(); @@ -91,7 +124,7 @@ DatabaseRotatingImp::importDatabase(Database& source) void DatabaseRotatingImp::sync() { - std::lock_guard lock(mutex_); + std::shared_lock lock(mutex_); writableBackend_->sync(); } @@ -105,7 +138,7 @@ DatabaseRotatingImp::store( auto nObj = NodeObject::createObject(type, std::move(data), hash); auto const backend = [&] { - std::lock_guard lock(mutex_); + std::shared_lock lock(mutex_); return writableBackend_; }(); @@ -159,7 +192,7 @@ DatabaseRotatingImp::fetchNodeObject( std::shared_ptr nodeObject; auto [writable, archive] = [&] { - std::lock_guard lock(mutex_); + std::shared_lock lock(mutex_); return std::make_pair(writableBackend_, archiveBackend_); }(); @@ -173,7 +206,7 @@ DatabaseRotatingImp::fetchNodeObject( { { // Refresh the writable backend pointer - std::lock_guard lock(mutex_); + std::shared_lock lock(mutex_); writable = writableBackend_; } @@ -194,7 +227,7 @@ DatabaseRotatingImp::for_each( std::function)> f) { auto [writable, archive] = [&] { - std::lock_guard lock(mutex_); + std::shared_lock lock(mutex_); return std::make_pair(writableBackend_, archiveBackend_); }(); diff --git a/src/xrpld/nodestore/detail/DatabaseRotatingImp.h b/src/xrpld/nodestore/detail/DatabaseRotatingImp.h index 0ab9ca6d211..3ae7ce8dfc9 100644 --- a/src/xrpld/nodestore/detail/DatabaseRotatingImp.h +++ b/src/xrpld/nodestore/detail/DatabaseRotatingImp.h @@ -22,7 +22,7 @@ #include -#include +#include namespace ripple { namespace NodeStore { @@ -48,7 +48,7 @@ class DatabaseRotatingImp : public DatabaseRotating stop(); } - void + [[nodiscard]] bool rotateWithLock( std::function( std::string const& writableBackendName)> const& f) override; @@ -79,14 +79,20 @@ class DatabaseRotatingImp : public DatabaseRotating void sweep() override; + // Include the space in the name to ensure it can't be set in a file + static constexpr auto unitTestFlag = " unit_test"; + private: - // backendMutex_ is only needed when the *Backend_ members are modified. - // Reads are protected by the general mutex_. - std::mutex backendMutex_; + bool const unitTest_; + bool rotating = false; std::shared_ptr writableBackend_; std::shared_ptr archiveBackend_; - mutable std::mutex mutex_; + // https://en.cppreference.com/w/cpp/thread/shared_timed_mutex/lock + // "Shared mutexes do not support direct transition from shared to unique + // ownership mode: the shared lock has to be relinquished with + // unlock_shared() before exclusive ownership may be obtained with lock()." + mutable std::shared_timed_mutex mutex_; std::shared_ptr fetchNodeObject( From 3f7fb662e574d0e4e683795753b90b9a418dd87a Mon Sep 17 00:00:00 2001 From: Ed Hennis Date: Fri, 7 Feb 2025 17:26:03 -0500 Subject: [PATCH 4/8] Update levelization tracking --- Builds/levelization/results/ordering.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Builds/levelization/results/ordering.txt b/Builds/levelization/results/ordering.txt index acf8daafb79..681f76dd5db 100644 --- a/Builds/levelization/results/ordering.txt +++ b/Builds/levelization/results/ordering.txt @@ -19,6 +19,7 @@ test.app > xrpl.basics test.app > xrpld.app test.app > xrpld.core test.app > xrpld.ledger +test.app > xrpld.nodestore test.app > xrpld.overlay test.app > xrpld.rpc test.app > xrpl.json From 3b6984c4dc49d42773e10dc39b714caf3c0b911b Mon Sep 17 00:00:00 2001 From: Ed Hennis Date: Sat, 8 Feb 2025 11:10:11 -0500 Subject: [PATCH 5/8] fixup! Review feedback from @bthomee and @vvysokikh1: --- src/xrpld/nodestore/detail/DatabaseRotatingImp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/xrpld/nodestore/detail/DatabaseRotatingImp.h b/src/xrpld/nodestore/detail/DatabaseRotatingImp.h index 3ae7ce8dfc9..5280746d7c7 100644 --- a/src/xrpld/nodestore/detail/DatabaseRotatingImp.h +++ b/src/xrpld/nodestore/detail/DatabaseRotatingImp.h @@ -83,10 +83,10 @@ class DatabaseRotatingImp : public DatabaseRotating static constexpr auto unitTestFlag = " unit_test"; private: - bool const unitTest_; bool rotating = false; std::shared_ptr writableBackend_; std::shared_ptr archiveBackend_; + bool const unitTest_; // https://en.cppreference.com/w/cpp/thread/shared_timed_mutex/lock // "Shared mutexes do not support direct transition from shared to unique From b90f65f24d451c80748fe2ade7f454c64164b45e Mon Sep 17 00:00:00 2001 From: Ed Hennis Date: Wed, 12 Feb 2025 10:59:56 -0500 Subject: [PATCH 6/8] Rewrite DatabaseRotatingImp again: - Use a boost::upgrade_mutex, which implements a clean read -> write lock upgrade. --- src/xrpld/nodestore/DatabaseRotating.h | 3 ++ .../nodestore/detail/DatabaseRotatingImp.cpp | 50 ++++++------------- .../nodestore/detail/DatabaseRotatingImp.h | 15 +++--- 3 files changed, 25 insertions(+), 43 deletions(-) diff --git a/src/xrpld/nodestore/DatabaseRotating.h b/src/xrpld/nodestore/DatabaseRotating.h index af7fe6674fc..e534b77b9da 100644 --- a/src/xrpld/nodestore/DatabaseRotating.h +++ b/src/xrpld/nodestore/DatabaseRotating.h @@ -45,6 +45,9 @@ class DatabaseRotating : public Database /** Rotates the backends. @param f A function executed before the rotation + + @return bool indicating whether the callback "f" was called, and the new + Backend it returned is stored */ [[nodiscard]] virtual bool diff --git a/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp b/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp index c44dd0cf333..e721c32d672 100644 --- a/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp +++ b/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp @@ -19,6 +19,7 @@ #include #include +#include namespace ripple { namespace NodeStore { @@ -48,51 +49,28 @@ DatabaseRotatingImp::rotateWithLock( { // This function should be the only one taking any kind of unique/write // lock, and should only be called once at a time by its syncronous caller. - // The extra checking involving the "rotating" flag, are to ensure that if - // that ever changes, we still avoid deadlocks and incorrect behavior. + boost::upgrade_lock readLock(mutex_, boost::defer_lock); + if (!readLock.try_lock()) { - std::unique_lock writeLock(mutex_); - if (!rotating) - { - // Once this flag is set, we're committed to doing the work and - // returning true. - rotating = true; - } - else - { - // This should only be reachable through unit tests. - XRPL_ASSERT( - unitTest_, - "ripple::NodeStore::DatabaseRotatingImp::rotateWithLock " - "unit testing"); - return false; - } - } - auto const writableBackend = [&] { - std::shared_lock readLock(mutex_); + // This should only be reachable through unit tests. XRPL_ASSERT( - rotating, - "ripple::NodeStore::DatabaseRotatingImp::rotateWithLock rotating " - "flag set"); - - return writableBackend_; - }(); - - auto newBackend = f(writableBackend->getName()); + unitTest_, + "ripple::NodeStore::DatabaseRotatingImp::rotateWithLock " + "unit testing"); + return false; + } + auto newBackend = f(writableBackend_->getName()); - // Because of the "rotating" flag, there should be no way any other thread - // is waiting at this point. As long as they all release the shared_lock - // before taking the unique_lock (which they have to, because upgrading is - // unsupported), there can be no deadlock. - std::unique_lock writeLock(mutex_); + // boost::upgrade_mutex guarantees that only only thread can have "upgrade + // ownership" at a time, so this is 100% safe, and guaranteed to avoid + // deadlock. + boost::upgrade_to_unique_lock writeLock(readLock); archiveBackend_->setDeletePath(); archiveBackend_ = std::move(writableBackend_); writableBackend_ = std::move(newBackend); - rotating = false; - return true; } diff --git a/src/xrpld/nodestore/detail/DatabaseRotatingImp.h b/src/xrpld/nodestore/detail/DatabaseRotatingImp.h index 5280746d7c7..cf45be82f29 100644 --- a/src/xrpld/nodestore/detail/DatabaseRotatingImp.h +++ b/src/xrpld/nodestore/detail/DatabaseRotatingImp.h @@ -22,7 +22,7 @@ #include -#include +#include namespace ripple { namespace NodeStore { @@ -83,16 +83,17 @@ class DatabaseRotatingImp : public DatabaseRotating static constexpr auto unitTestFlag = " unit_test"; private: - bool rotating = false; std::shared_ptr writableBackend_; std::shared_ptr archiveBackend_; bool const unitTest_; - // https://en.cppreference.com/w/cpp/thread/shared_timed_mutex/lock - // "Shared mutexes do not support direct transition from shared to unique - // ownership mode: the shared lock has to be relinquished with - // unlock_shared() before exclusive ownership may be obtained with lock()." - mutable std::shared_timed_mutex mutex_; + // Implements the "UpgradeLockable" concept + // https://www.boost.org/doc/libs/1_86_0/doc/html/thread/synchronization.html#thread.synchronization.mutex_concepts.upgrade_lockable + // In short: Many threads can have shared ownership. One thread can have + // upgradable ownership at the same time as others have shared ownership. + // The upgradeable ownership can be upgraded to exclusive ownership, + // blocking if necessary until no other threads have shared ownership. + mutable boost::upgrade_mutex mutex_; std::shared_ptr fetchNodeObject( From 925c31119fade6c678a24d981c76561752ce927c Mon Sep 17 00:00:00 2001 From: Ed Hennis Date: Wed, 12 Feb 2025 11:30:34 -0500 Subject: [PATCH 7/8] Add another test case, use boost locks, update comments --- src/test/app/SHAMapStore_test.cpp | 20 ++++++++++++++++-- src/xrpld/app/misc/SHAMapStoreImp.cpp | 2 ++ .../nodestore/detail/DatabaseRotatingImp.cpp | 21 ++++++++++--------- 3 files changed, 31 insertions(+), 12 deletions(-) diff --git a/src/test/app/SHAMapStore_test.cpp b/src/test/app/SHAMapStore_test.cpp index d521baa3085..3ba77fce9a8 100644 --- a/src/test/app/SHAMapStore_test.cpp +++ b/src/test/app/SHAMapStore_test.cpp @@ -615,8 +615,7 @@ class SHAMapStore_test : public beast::unit_test::suite for (int i = 0; i < 5; ++i) { threads.emplace_back([&]() { - auto const result = dbr->rotateWithLock(cb); - if (result) + if (dbr->rotateWithLock(cb)) ++successes; else ++failures; @@ -635,6 +634,23 @@ class SHAMapStore_test : public beast::unit_test::suite // Only one thread will invoke the callback to increment threadNum BEAST_EXPECT(threadNum == 1); BEAST_EXPECT(dbr->getName() == "1"); + + ///////////////////////////////////////////////////////////// + // Create another impossible situation. Try to re-enter rotateWithLock + // inside the callback. + auto const cbReentrant = [&](std::string const& writableBackendName) { + BEAST_EXPECT(writableBackendName == "1"); + auto newBackend = makeBackendRotating( + env, scheduler, std::to_string(++threadNum)); + BEAST_EXPECT(!dbr->rotateWithLock(cb)); + return newBackend; + }; + BEAST_EXPECT(dbr->rotateWithLock(cbReentrant)); + + BEAST_EXPECT(successes == 1); + BEAST_EXPECT(failures == 4); + BEAST_EXPECT(threadNum == 2); + BEAST_EXPECT(dbr->getName() == "2"); } void diff --git a/src/xrpld/app/misc/SHAMapStoreImp.cpp b/src/xrpld/app/misc/SHAMapStoreImp.cpp index c69ffc7c7a2..dd9dc562b21 100644 --- a/src/xrpld/app/misc/SHAMapStoreImp.cpp +++ b/src/xrpld/app/misc/SHAMapStoreImp.cpp @@ -379,6 +379,8 @@ SHAMapStoreImp::run() return std::move(newBackend); })) { + UNREACHABLE( + "ripple::SHAMapStoreImp::run rotateWithLock failed"); JLOG(journal_.error()) << validatedSeq << " rotation failed. Discard unused new backend."; diff --git a/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp b/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp index e721c32d672..52bb4cd6701 100644 --- a/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp +++ b/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp @@ -53,7 +53,8 @@ DatabaseRotatingImp::rotateWithLock( boost::upgrade_lock readLock(mutex_, boost::defer_lock); if (!readLock.try_lock()) { - // This should only be reachable through unit tests. + // If anything other than a unit test gets here, something has gone very + // wrong XRPL_ASSERT( unitTest_, "ripple::NodeStore::DatabaseRotatingImp::rotateWithLock " @@ -62,7 +63,7 @@ DatabaseRotatingImp::rotateWithLock( } auto newBackend = f(writableBackend_->getName()); - // boost::upgrade_mutex guarantees that only only thread can have "upgrade + // boost::upgrade_mutex guarantees that only one thread can have "upgrade // ownership" at a time, so this is 100% safe, and guaranteed to avoid // deadlock. boost::upgrade_to_unique_lock writeLock(readLock); @@ -77,14 +78,14 @@ DatabaseRotatingImp::rotateWithLock( std::string DatabaseRotatingImp::getName() const { - std::shared_lock lock(mutex_); + boost::shared_lock lock(mutex_); return writableBackend_->getName(); } std::int32_t DatabaseRotatingImp::getWriteLoad() const { - std::shared_lock lock(mutex_); + boost::shared_lock lock(mutex_); return writableBackend_->getWriteLoad(); } @@ -92,7 +93,7 @@ void DatabaseRotatingImp::importDatabase(Database& source) { auto const backend = [&] { - std::shared_lock lock(mutex_); + boost::shared_lock lock(mutex_); return writableBackend_; }(); @@ -102,7 +103,7 @@ DatabaseRotatingImp::importDatabase(Database& source) void DatabaseRotatingImp::sync() { - std::shared_lock lock(mutex_); + boost::shared_lock lock(mutex_); writableBackend_->sync(); } @@ -116,7 +117,7 @@ DatabaseRotatingImp::store( auto nObj = NodeObject::createObject(type, std::move(data), hash); auto const backend = [&] { - std::shared_lock lock(mutex_); + boost::shared_lock lock(mutex_); return writableBackend_; }(); @@ -170,7 +171,7 @@ DatabaseRotatingImp::fetchNodeObject( std::shared_ptr nodeObject; auto [writable, archive] = [&] { - std::shared_lock lock(mutex_); + boost::shared_lock lock(mutex_); return std::make_pair(writableBackend_, archiveBackend_); }(); @@ -184,7 +185,7 @@ DatabaseRotatingImp::fetchNodeObject( { { // Refresh the writable backend pointer - std::shared_lock lock(mutex_); + boost::shared_lock lock(mutex_); writable = writableBackend_; } @@ -205,7 +206,7 @@ DatabaseRotatingImp::for_each( std::function)> f) { auto [writable, archive] = [&] { - std::shared_lock lock(mutex_); + boost::shared_lock lock(mutex_); return std::make_pair(writableBackend_, archiveBackend_); }(); From 217bc1da75c3530aa2a9b1ba6d36e0a89511b920 Mon Sep 17 00:00:00 2001 From: Ed Hennis Date: Wed, 12 Feb 2025 12:56:46 -0500 Subject: [PATCH 8/8] Review feedback: rename readlock, add and update comments --- src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp | 8 +++++--- src/xrpld/nodestore/detail/DatabaseRotatingImp.h | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp b/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp index 52bb4cd6701..86c6486dd9f 100644 --- a/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp +++ b/src/xrpld/nodestore/detail/DatabaseRotatingImp.cpp @@ -50,8 +50,10 @@ DatabaseRotatingImp::rotateWithLock( // This function should be the only one taking any kind of unique/write // lock, and should only be called once at a time by its syncronous caller. - boost::upgrade_lock readLock(mutex_, boost::defer_lock); - if (!readLock.try_lock()) + // The upgradable lock will NOT block shared locks, but will block other + // upgrade locks and unique/exclusive locks. + boost::upgrade_lock upgradeableLock(mutex_, boost::defer_lock); + if (!upgradeableLock.try_lock()) { // If anything other than a unit test gets here, something has gone very // wrong @@ -66,7 +68,7 @@ DatabaseRotatingImp::rotateWithLock( // boost::upgrade_mutex guarantees that only one thread can have "upgrade // ownership" at a time, so this is 100% safe, and guaranteed to avoid // deadlock. - boost::upgrade_to_unique_lock writeLock(readLock); + boost::upgrade_to_unique_lock writeLock(upgradeableLock); archiveBackend_->setDeletePath(); archiveBackend_ = std::move(writableBackend_); diff --git a/src/xrpld/nodestore/detail/DatabaseRotatingImp.h b/src/xrpld/nodestore/detail/DatabaseRotatingImp.h index cf45be82f29..0feb894eb9b 100644 --- a/src/xrpld/nodestore/detail/DatabaseRotatingImp.h +++ b/src/xrpld/nodestore/detail/DatabaseRotatingImp.h @@ -88,7 +88,7 @@ class DatabaseRotatingImp : public DatabaseRotating bool const unitTest_; // Implements the "UpgradeLockable" concept - // https://www.boost.org/doc/libs/1_86_0/doc/html/thread/synchronization.html#thread.synchronization.mutex_concepts.upgrade_lockable + // https://www.boost.org/doc/libs/release/doc/html/thread/synchronization.html#thread.synchronization.mutex_concepts.upgrade_lockable // In short: Many threads can have shared ownership. One thread can have // upgradable ownership at the same time as others have shared ownership. // The upgradeable ownership can be upgraded to exclusive ownership,