From a9afc5c62928704df61bfb114e69bfc2112eacc3 Mon Sep 17 00:00:00 2001 From: Laurynas Biveinis Date: Tue, 17 Dec 2024 17:17:51 +0200 Subject: [PATCH 1/2] Introduce rdb_snapshot_unique_ptr & make rdb_get_rocksdb_db return reference This is a second batch of cleanups from the repeatable read snapshot work. - Make rdb_get_rocksdb_db return reference instead of pointer, update callers and callees. Inline this function in ha_rocksdb_proto.h, move rdb declaration there to support inlining in detail namespace as documentation that it should not be accessed directly. Update ha_rocksdb.cc itself to use rdb_get_rocksdb_db instead of rdb. Remove many redudant rdb != nullptr checks and asserts. - Introduce rdb_snapshot_unique_ptr with a custom deleter and a factory function get_rdb_snapshot to manager global RocksDB snapshots - Make Rdb_explicit_snapshot create the snapshot itself, by calling get_rdb_snapshot, instead of receiving one through parameters, simplify its signature. - Replace rocksdb::ManagedSnapshot uses with rdb_snapshot_unique_ptr ones. - Remove rarely-used explicit transaction snapshot assignment code from acquire_snapshot and move it to the few callers where it may happen. Add new method Rdb_transaction::has_explicit_or_read_only_snapshot to support this. - For Rdb_transaction, make m_insert_count, m_update_count, m_delete_count, m_auto_incr_map, & m_rollback_only fields private instead of protected. Move their reset at the end of committed or rolledback transactions to on_finish method in the base class. Remove redundant m_writes_at_last_savepoint reset in set_initial_savepoint. - Add several asserts. --- storage/rocksdb/clone/donor.cc | 14 +- storage/rocksdb/ha_rocksdb.cc | 498 +++++++++--------- storage/rocksdb/ha_rocksdb.h | 3 +- storage/rocksdb/ha_rocksdb_proto.h | 33 +- storage/rocksdb/rdb_bulk_load.cc | 2 +- storage/rocksdb/rdb_bulk_load.h | 10 +- storage/rocksdb/rdb_cf_manager.cc | 16 +- storage/rocksdb/rdb_cf_manager.h | 10 +- storage/rocksdb/rdb_datadic.cc | 21 +- storage/rocksdb/rdb_datadic.h | 16 +- storage/rocksdb/rdb_i_s.cc | 113 +--- storage/rocksdb/rdb_iterator.cc | 9 +- storage/rocksdb/rdb_iterator.h | 2 +- storage/rocksdb/rdb_sst_info.cc | 18 +- storage/rocksdb/rdb_sst_info.h | 12 +- storage/rocksdb/rdb_sst_partitioner_factory.h | 14 +- 16 files changed, 371 insertions(+), 420 deletions(-) diff --git a/storage/rocksdb/clone/donor.cc b/storage/rocksdb/clone/donor.cc index c701ff0e7e3d..dcccc862929b 100644 --- a/storage/rocksdb/clone/donor.cc +++ b/storage/rocksdb/clone/donor.cc @@ -614,8 +614,8 @@ donor::donor(const myrocks::clone::locator &l, const uchar *&loc, donor::~donor() { if (m_rdb_file_deletes_disabled) { - auto *const rdb = myrocks::rdb_get_rocksdb_db(); - const auto result = rdb->EnableFileDeletions(); + auto &rdb = myrocks::rdb_get_rocksdb_db(); + const auto result = rdb.EnableFileDeletions(); if (!result.ok()) { myrocks::rdb_log_status_error(result, "RocksDB file deletion re-enable failed"); @@ -700,9 +700,9 @@ int donor::next_checkpoint_locked(bool final, std::size_t &total_new_size) { auto err = m_checkpoint.cleanup(); if (err != 0) return save_and_return_error(err, "RocksDB checkpoint error"); - auto *const rdb = final ? myrocks::rdb_get_rocksdb_db() : nullptr; - if (rdb != nullptr) { - const auto dfd_result = rdb->DisableFileDeletions(); + if (final) { + const auto dfd_result = + myrocks::rdb_get_rocksdb_db().DisableFileDeletions(); m_rdb_file_deletes_disabled = dfd_result.ok(); if (!m_rdb_file_deletes_disabled) { myrocks::rdb_log_status_error(dfd_result, @@ -714,7 +714,7 @@ int donor::next_checkpoint_locked(bool final, std::size_t &total_new_size) { err = m_checkpoint.init(); if (err != 0) { - if (rdb) rdb->EnableFileDeletions(); + if (final) myrocks::rdb_get_rocksdb_db().EnableFileDeletions(); return save_and_return_error(err, "RocksDB checkpoint error"); } @@ -726,7 +726,7 @@ int donor::next_checkpoint_locked(bool final, std::size_t &total_new_size) { if (err != 0) { // Ignore the return value because we are already returning an error (void)m_checkpoint.cleanup(); - if (rdb) rdb->EnableFileDeletions(); + if (final) myrocks::rdb_get_rocksdb_db().EnableFileDeletions(); return err; } diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc index da3d824966f0..a14209989b44 100644 --- a/storage/rocksdb/ha_rocksdb.cc +++ b/storage/rocksdb/ha_rocksdb.cc @@ -211,7 +211,12 @@ static bool rocksdb_use_default_sk_cf = false; /////////////////////////////////////////////////////////// handlerton *rocksdb_hton; -static rocksdb::TransactionDB *rdb = nullptr; +namespace detail { + +rocksdb::TransactionDB *rdb; + +} // namespace detail + static rocksdb::HistogramImpl *commit_latency_stats = nullptr; static std::shared_ptr rocksdb_stats; @@ -252,7 +257,7 @@ static void rocksdb_flush_all_memtables() { // RocksDB will fail the flush if the CF is deleted, // but here we don't handle return status for (const auto &cf_handle : cf_manager.get_all_cf()) { - rdb->Flush(rocksdb::FlushOptions(), cf_handle.get()); + rdb_get_rocksdb_db().Flush(rocksdb::FlushOptions(), cf_handle.get()); } } @@ -351,7 +356,7 @@ int rocksdb_create_checkpoint(std::string_view checkpoint_dir_raw) { "creating checkpoint in directory: %s\n", checkpoint_dir.c_str()); rocksdb::Checkpoint *checkpoint; - auto status = rocksdb::Checkpoint::Create(rdb, &checkpoint); + auto status = rocksdb::Checkpoint::Create(&rdb_get_rocksdb_db(), &checkpoint); if (status.ok()) { status = checkpoint->CreateCheckpoint(checkpoint_dir.c_str()); delete checkpoint; @@ -449,7 +454,7 @@ static int rocksdb_compact_lzero() { for (const auto &cf_handle : cf_manager.get_all_cf()) { for (i = 0; i < max_attempts; i++) { - rdb->GetColumnFamilyMetaData(cf_handle.get(), &metadata); + rdb_get_rocksdb_db().GetColumnFamilyMetaData(cf_handle.get(), &metadata); cf_handle->GetDescriptor(&cf_descr); c_options.output_file_size_limit = cf_descr.options.target_file_size_base; @@ -457,9 +462,9 @@ static int rocksdb_compact_lzero() { c_options.compression = rocksdb::kDisableCompressionOption; uint64_t base_level; - if (!rdb->GetIntProperty(cf_handle.get(), - rocksdb::DB::Properties::kBaseLevel, - &base_level)) { + if (!rdb_get_rocksdb_db().GetIntProperty( + cf_handle.get(), rocksdb::DB::Properties::kBaseLevel, + &base_level)) { LogPluginErrMsg(ERROR_LEVEL, ER_LOG_PRINTF_MSG, "MyRocks: compact L0 cannot get base level"); break; @@ -484,8 +489,8 @@ static int rocksdb_compact_lzero() { break; } - rocksdb::Status s; - s = rdb->CompactFiles(c_options, cf_handle.get(), file_names, base_level); + const auto s = rdb_get_rocksdb_db().CompactFiles( + c_options, cf_handle.get(), file_names, base_level); if (!s.ok()) { std::shared_ptr cfh = @@ -574,11 +579,11 @@ static int rocksdb_cancel_manual_compactions( // NO_LINT_DEBUG LogPluginErrMsg(INFORMATION_LEVEL, ER_LOG_PRINTF_MSG, "RocksDB: Stopping all Manual Compactions."); - rdb->GetBaseDB()->DisableManualCompaction(); + rdb_get_rocksdb_db().GetBaseDB()->DisableManualCompaction(); // NO_LINT_DEBUG LogPluginErrMsg(INFORMATION_LEVEL, ER_LOG_PRINTF_MSG, "RocksDB: Enabling Manual Compactions."); - rdb->GetBaseDB()->EnableManualCompaction(); + rdb_get_rocksdb_db().GetBaseDB()->EnableManualCompaction(); return HA_EXIT_SUCCESS; } @@ -598,9 +603,9 @@ static void rocksdb_set_pause_background_work( const bool pause_requested = *static_cast(save); if (rocksdb_pause_background_work != pause_requested) { if (pause_requested) { - rdb->PauseBackgroundWork(); + rdb_get_rocksdb_db().PauseBackgroundWork(); } else { - rdb->ContinueBackgroundWork(); + rdb_get_rocksdb_db().ContinueBackgroundWork(); } rocksdb_pause_background_work = pause_requested; } @@ -962,7 +967,7 @@ static int rocksdb_tracing(THD *const thd MY_ATTRIBUTE((__unused__)), int len = 0; const char *const trace_opt_str_raw = value->val_str(value, nullptr, &len); rocksdb::Status s; - if (trace_opt_str_raw == nullptr || rdb == nullptr) { + if (trace_opt_str_raw == nullptr) { return HA_EXIT_FAILURE; } int rc __attribute__((__unused__)); @@ -972,7 +977,8 @@ static int rocksdb_tracing(THD *const thd MY_ATTRIBUTE((__unused__)), // NO_LINT_DEBUG LogPluginErrMsg(INFORMATION_LEVEL, ER_LOG_PRINTF_MSG, "RocksDB: Stop tracing block cache accesses or queries.\n"); - s = trace_block_cache_access ? rdb->EndBlockCacheTrace() : rdb->EndTrace(); + s = trace_block_cache_access ? rdb_get_rocksdb_db().EndBlockCacheTrace() + : rdb_get_rocksdb_db().EndTrace(); if (!s.ok()) { rc = ha_rocksdb::rdb_error_to_mysql(s); @@ -1020,7 +1026,7 @@ static int rocksdb_tracing(THD *const thd MY_ATTRIBUTE((__unused__)), return HA_EXIT_FAILURE; } const std::string trace_dir = std::string(rocksdb_datadir) + trace_folder; - s = rdb->GetEnv()->CreateDirIfMissing(trace_dir); + s = rdb_get_rocksdb_db().GetEnv()->CreateDirIfMissing(trace_dir); if (!s.ok()) { // NO_LINT_DEBUG LogPluginErrMsg( @@ -1031,7 +1037,7 @@ static int rocksdb_tracing(THD *const thd MY_ATTRIBUTE((__unused__)), return HA_EXIT_FAILURE; } const auto trace_file_path = rdb_concat_paths(trace_dir, trace_file_name); - s = rdb->GetEnv()->FileExists(trace_file_path); + s = rdb_get_rocksdb_db().GetEnv()->FileExists(trace_file_path); if (s.ok() || !s.IsNotFound()) { // NO_LINT_DEBUG LogPluginErrMsg( @@ -1044,17 +1050,18 @@ static int rocksdb_tracing(THD *const thd MY_ATTRIBUTE((__unused__)), return HA_EXIT_FAILURE; } std::unique_ptr trace_writer; - const rocksdb::EnvOptions env_option(rdb->GetDBOptions()); - s = rocksdb::NewFileTraceWriter(rdb->GetEnv(), env_option, trace_file_path, - &trace_writer); + const rocksdb::EnvOptions env_option{rdb_get_rocksdb_db().GetDBOptions()}; + s = rocksdb::NewFileTraceWriter(rdb_get_rocksdb_db().GetEnv(), env_option, + trace_file_path, &trace_writer); if (!s.ok()) { rc = ha_rocksdb::rdb_error_to_mysql(s); return HA_EXIT_FAILURE; } if (trace_block_cache_access) { - s = rdb->StartBlockCacheTrace(trace_opt, std::move(trace_writer)); + s = rdb_get_rocksdb_db().StartBlockCacheTrace(trace_opt, + std::move(trace_writer)); } else { - s = rdb->StartTrace(trace_opt, std::move(trace_writer)); + s = rdb_get_rocksdb_db().StartTrace(trace_opt, std::move(trace_writer)); } if (!s.ok()) { rc = ha_rocksdb::rdb_error_to_mysql(s); @@ -1227,13 +1234,12 @@ static void rocksdb_set_reset_stats( my_core::SYS_VAR *const var MY_ATTRIBUTE((__unused__)), void *const var_ptr, const void *const save) { assert(save != nullptr); - assert(rdb != nullptr); assert(rocksdb_stats != nullptr); *static_cast(var_ptr) = *static_cast(save); if (rocksdb_reset_stats) { - rocksdb::Status s = rdb->ResetStats(); + auto s = rdb_get_rocksdb_db().ResetStats(); // RocksDB will always return success. Let's document this assumption here // as well so that we'll get immediately notified when contract changes. @@ -1272,7 +1278,6 @@ static void rocksdb_set_io_write_timeout( my_core::SYS_VAR *const var MY_ATTRIBUTE((__unused__)), void *const var_ptr MY_ATTRIBUTE((__unused__)), const void *const save) { assert(save != nullptr); - assert(rdb != nullptr); const auto new_val = *static_cast(save); rocksdb_io_write_timeout_secs = new_val; @@ -1567,8 +1572,10 @@ static void rocksdb_set_max_bottom_pri_background_compactions_internal( // This creates background threads in rocksdb with BOTTOM priority pool. // Compactions for bottommost level use threads in the BOTTOM pool, and // the threads in the BOTTOM pool run with lower OS priority (19 in Linux). - rdb->GetEnv()->SetBackgroundThreads(val, rocksdb::Env::Priority::BOTTOM); - rdb->GetEnv()->LowerThreadPoolCPUPriority(rocksdb::Env::Priority::BOTTOM); + rdb_get_rocksdb_db().GetEnv()->SetBackgroundThreads( + val, rocksdb::Env::Priority::BOTTOM); + rdb_get_rocksdb_db().GetEnv()->LowerThreadPoolCPUPriority( + rocksdb::Env::Priority::BOTTOM); LogPluginErrMsg(INFORMATION_LEVEL, ER_LOG_PRINTF_MSG, "Set %d compaction thread(s) with " "lower scheduling priority.", @@ -3238,16 +3245,11 @@ static struct SYS_VAR *rocksdb_system_variables[] = { class Rdb_explicit_snapshot : public explicit_snapshot { public: [[nodiscard]] static std::shared_ptr create( - THD *thd, snapshot_info_st &ssinfo, rocksdb::DB *db, - const rocksdb::Snapshot *snapshot) { - auto s = std::unique_ptr( - new rocksdb::ManagedSnapshot(db, snapshot)); - if (!s) { - return nullptr; - } + THD &thd, snapshot_info_st &ssinfo) { + auto s = get_rdb_snapshot(); const uint64_t client_provided_read_filtering_ts = rdb_is_binlog_ttl_enabled() - ? THDVAR(thd, consistent_snapshot_ttl_read_filtering_ts_nsec) / + ? THDVAR(&thd, consistent_snapshot_ttl_read_filtering_ts_nsec) / 1000000000UL : 0; std::lock_guard lock(explicit_snapshot_mutex); @@ -3296,12 +3298,12 @@ class Rdb_explicit_snapshot : public explicit_snapshot { return elem->second.lock(); } - [[nodiscard]] rocksdb::ManagedSnapshot *get_snapshot() noexcept { + [[nodiscard]] const rocksdb::Snapshot *get_snapshot() const noexcept { return snapshot.get(); } Rdb_explicit_snapshot(snapshot_info_st ssinfo, - std::unique_ptr &&snapshot) + rdb_snapshot_unique_ptr &&snapshot) : explicit_snapshot(ssinfo), snapshot(std::move(snapshot)) {} virtual ~Rdb_explicit_snapshot() { @@ -3310,7 +3312,7 @@ class Rdb_explicit_snapshot : public explicit_snapshot { } private: - std::unique_ptr snapshot; + rdb_snapshot_unique_ptr snapshot; static std::mutex explicit_snapshot_mutex; static ulonglong explicit_snapshot_counter; @@ -3355,7 +3357,7 @@ static int rocksdb_compact_column_family( if (cf_name.empty()) cf_name = DEFAULT_CF_NAME; auto cfh = cf_manager.get_cf(cf_name); - if (cfh != nullptr && rdb != nullptr) { + if (cfh != nullptr) { rocksdb::BottommostLevelCompaction bottommost_level_compaction = (rocksdb::BottommostLevelCompaction)THDVAR( thd, manual_compaction_bottommost_level); @@ -3649,15 +3651,16 @@ class Rdb_transaction { Rdb_transaction *next{nullptr}; Rdb_transaction *prev{nullptr}; - protected: - ulonglong m_write_count[2] = {0, 0}; ulonglong m_insert_count = 0; ulonglong m_update_count = 0; ulonglong m_delete_count = 0; - // per row data - ulonglong m_row_lock_count = 0; + std::unordered_map m_auto_incr_map; + protected: + ulonglong m_write_count[2] = {0, 0}; + // per row data + ulonglong m_row_lock_count = 0; bool m_is_two_phase = false; private: @@ -3668,7 +3671,7 @@ class Rdb_transaction { savepoint (the idea is not to take another savepoint if we haven't made any changes) */ - ulonglong m_writes_at_last_savepoint; + ulonglong m_writes_at_last_savepoint = 0; bool m_bulk_index_transaction = false; bool m_dd_transaction = false; @@ -3681,6 +3684,8 @@ class Rdb_transaction { Rdb_transaction_list::mutex. */ std::int64_t m_earliest_snapshot_ts = 0; + bool m_rollback_only = false; + protected: THD *m_thd = nullptr; @@ -3691,12 +3696,10 @@ class Rdb_transaction { /* Maximum number of locks the transaction can have */ ulonglong m_max_row_locks; - bool m_rollback_only = false; - enum class snapshot_type { NONE, - // A snapshot created through Tranaction API for regular transactions and by - // DB::GetSnapshot() for WB ones + // A snapshot created through Transaction API for regular transactions and + // by DB::GetSnapshot() for WB ones CURRENT, CURRENT_DELAYED, // This is used by transactions started with "START TRANSACTION WITH @@ -3728,7 +3731,7 @@ class Rdb_transaction { case snapshot_type::EXPLICIT: assert(m_explicit_snapshot != nullptr); assert(m_read_opts[USER_TABLE].snapshot == - m_explicit_snapshot->get_snapshot()->snapshot()); + m_explicit_snapshot->get_snapshot()); break; } #endif @@ -4130,8 +4133,7 @@ class Rdb_transaction { assert(m_read_opts[USER_TABLE].snapshot == nullptr); break; case snapshot_type::EXPLICIT: - assert(snapshot == - m_explicit_snapshot->get_snapshot()->snapshot()); + assert(snapshot == m_explicit_snapshot->get_snapshot()); break; case snapshot_type::NONE: assert(false); @@ -4170,12 +4172,13 @@ class Rdb_transaction { } void share_explicit_snapshot( - std::shared_ptr snapshot) noexcept { + std::shared_ptr &&snapshot) noexcept { + assert(statement_snapshot_type == snapshot_type::NONE); assert_snapshot_invariants(); m_explicit_snapshot = std::move(snapshot); statement_snapshot_type = snapshot_type::EXPLICIT; - auto *const rdb_snapshot = m_explicit_snapshot->get_snapshot()->snapshot(); + auto *const rdb_snapshot = m_explicit_snapshot->get_snapshot(); assign_snapshot(rdb_snapshot); assert_snapshot_invariants(); @@ -4185,11 +4188,9 @@ class Rdb_transaction { assert(statement_snapshot_type == snapshot_type::NONE); assert_snapshot_invariants(); - auto *const rdb_snapshot = rdb->GetSnapshot(); - m_explicit_snapshot = - Rdb_explicit_snapshot::create(m_thd, ss_info, rdb, rdb_snapshot); + m_explicit_snapshot = Rdb_explicit_snapshot::create(*m_thd, ss_info); statement_snapshot_type = snapshot_type::EXPLICIT; - assign_snapshot(rdb_snapshot); + assign_snapshot(m_explicit_snapshot->get_snapshot()); assert_snapshot_invariants(); } @@ -4199,6 +4200,12 @@ class Rdb_transaction { return statement_snapshot_type == snapshot_type::EXPLICIT; } + [[nodiscard]] bool has_explicit_or_read_only_snapshot() const noexcept { + assert_snapshot_invariants(); + return statement_snapshot_type == snapshot_type::EXPLICIT || + statement_snapshot_type == snapshot_type::READ_ONLY_TRX; + } + [[nodiscard]] snapshot_info_st clone_explicit_snapshot_info() const noexcept { assert(has_explicit_snapshot()); return m_explicit_snapshot->ss_info; @@ -4253,7 +4260,7 @@ class Rdb_transaction { auto ctx = get_bulk_load_ctx(); auto &bulk_load_index_registry = ctx->bulk_load_index_registry(); - rocksdb::Status s = rdb->IngestExternalFiles(args); + auto s = rdb_get_rocksdb_db().IngestExternalFiles(args); if (!s.ok() && bulk_load_index_registry.index_registered_in_sst_partitioner()) { // NO_LINT_DEBUG @@ -4262,7 +4269,7 @@ class Rdb_transaction { "status code = %d, status = %s", s.code(), s.ToString().c_str()); s = bulk_load_index_registry.compact_index_ranges( - rdb, getCompactRangeOptions()); + rdb_get_rocksdb_db(), getCompactRangeOptions()); if (!s.ok()) { // NO_LINT_DEBUG LogPluginErrMsg(WARNING_LEVEL, ER_LOG_PRINTF_MSG, @@ -4272,7 +4279,7 @@ class Rdb_transaction { return s; } // try again after compaction - s = rdb->IngestExternalFiles(args); + s = rdb_get_rocksdb_db().IngestExternalFiles(args); } return s; } @@ -4443,9 +4450,9 @@ class Rdb_transaction { full_name.c_str(), index_name.c_str()); } - auto sst_info = std::make_unique( - rdb, rdb_merge.get_table_name(), index_name, rdb_merge.get_cf(), - *rocksdb_db_options, trace_sst_api, + const auto sst_info = std::make_unique( + rdb_get_rocksdb_db(), rdb_merge.get_table_name(), index_name, + rdb_merge.get_cf(), *rocksdb_db_options, trace_sst_api, THDVAR(get_thd(), bulk_load_compression_parallel_threads)); const auto enable_unique_key_check = @@ -4801,7 +4808,7 @@ class Rdb_transaction { rocksdb::ColumnFamilyHandle &cf, const Rdb_key_def &kd) { assert(!is_ac_nl_ro_rc_transaction()); return get_bulk_load_ctx()->bulk_load_index_registry().add_index( - rdb, cf, kd.get_index_number()); + rdb_get_rocksdb_db(), cf, kd.get_index_number()); } [[nodiscard]] int start_bulk_load(ha_rocksdb *const bulk_load, @@ -4848,8 +4855,8 @@ class Rdb_transaction { } *sst_info = ctx->add_sst_info( - rdb, table_handler->m_table_name, kd, *rocksdb_db_options, - trace_sst_api, + rdb_get_rocksdb_db(), table_handler->m_table_name, kd, + *rocksdb_db_options, trace_sst_api, THDVAR(get_thd(), bulk_load_compression_parallel_threads)); return HA_EXIT_SUCCESS; @@ -5031,6 +5038,16 @@ class Rdb_transaction { void on_finish() noexcept { assert(statement_snapshot_type == snapshot_type::NONE); assert_snapshot_invariants(); + assert(m_auto_incr_map.empty()); + + m_write_count[USER_TABLE] = 0; + m_write_count[INTRINSIC_TMP] = 0; + m_insert_count = 0; + m_update_count = 0; + m_delete_count = 0; + m_row_lock_count = 0; + m_rollback_only = false; + m_writes_at_last_savepoint = 0; modified_tables.clear(); @@ -5052,7 +5069,10 @@ class Rdb_transaction { on_finish(); } - void on_rollback() { on_finish(); } + void on_rollback() { + m_auto_incr_map.clear(); + on_finish(); + } private: std::atomic m_binlog_ttl_read_filtering_ts{0}; @@ -5097,13 +5117,16 @@ class Rdb_transaction { } void set_initial_savepoint() { + assert(statement_snapshot_type == snapshot_type::NONE); + assert(m_write_count[TABLE_TYPE::USER_TABLE] == 0); + assert(m_writes_at_last_savepoint == 0); + /* Set the initial savepoint. If the first statement in the transaction fails, we need something to roll back to, without rolling back the entire transaction. */ do_set_savepoint(); - m_writes_at_last_savepoint = m_write_count[USER_TABLE]; } /* @@ -5298,7 +5321,7 @@ class Rdb_transaction_impl : public Rdb_transaction { assert(m_rocksdb_tx[table_type] == nullptr); // If m_rocksdb_reuse_tx[table_type] is nullptr this will create a new // transaction object. Otherwise it will reuse the existing one. - m_rocksdb_tx[table_type].reset(rdb->BeginTransaction( + m_rocksdb_tx[table_type].reset(rdb_get_rocksdb_db().BeginTransaction( write_opts, tx_opts, m_rocksdb_reuse_tx[table_type].release())); } @@ -5427,21 +5450,16 @@ class Rdb_transaction_impl : public Rdb_transaction { goto error; } - on_commit(table_type); error: - if (table_type == USER_TABLE) { + if (likely(!res)) + on_commit(table_type); + else if (table_type == USER_TABLE) on_rollback(); + + if (table_type == USER_TABLE) { /* Save the transaction object to be reused */ release_tx(wb_size); - m_write_count[USER_TABLE] = 0; - m_write_count[INTRINSIC_TMP] = 0; - m_insert_count = 0; - m_update_count = 0; - m_delete_count = 0; - m_row_lock_count = 0; - m_rollback_only = false; } else { - m_write_count[INTRINSIC_TMP] = 0; // clean up only tmp table tx release_intrinsic_table_tx(); } @@ -5460,19 +5478,10 @@ class Rdb_transaction_impl : public Rdb_transaction { m_rocksdb_tx[TABLE_TYPE::USER_TABLE]->Rollback(); release_tx(wb_size); - - m_rollback_only = false; } else { release_intrinsic_table_tx(); } on_rollback(); - m_write_count[USER_TABLE] = 0; - m_write_count[INTRINSIC_TMP] = 0; - m_insert_count = 0; - m_update_count = 0; - m_delete_count = 0; - m_row_lock_count = 0; - m_auto_incr_map.clear(); reset_flags(); } @@ -5509,25 +5518,27 @@ class Rdb_transaction_impl : public Rdb_transaction { return; } - auto thd_ss = std::static_pointer_cast( +#ifndef NDEBUG + const auto thd_ss = std::static_pointer_cast( m_thd->get_explicit_snapshot()); - if (thd_ss) { - share_explicit_snapshot(std::move(thd_ss)); - } +#endif switch (statement_snapshot_type) { case Rdb_transaction::snapshot_type::NONE: + assert(thd_ss == nullptr); if (acquire_now) acquire_snapshot_now(); else acquire_snapshot_on_next_op(); break; case Rdb_transaction::snapshot_type::READ_ONLY_TRX: - assign_snapshot(rdb->GetSnapshot()); + assign_snapshot(rdb_get_rocksdb_db().GetSnapshot()); break; case Rdb_transaction::snapshot_type::EXPLICIT: + assert(thd_ss != nullptr); break; case Rdb_transaction::snapshot_type::CURRENT_DELAYED: + assert(thd_ss == nullptr); if (acquire_now) { acquire_snapshot_now(); } @@ -5603,7 +5614,7 @@ class Rdb_transaction_impl : public Rdb_transaction { break; } case Rdb_transaction::snapshot_type::READ_ONLY_TRX: - rdb->ReleaseSnapshot(m_read_opts[table_type].snapshot); + rdb_get_rocksdb_db().ReleaseSnapshot(m_read_opts[table_type].snapshot); m_read_opts[table_type].snapshot = nullptr; statement_snapshot_type = snapshot_type::NONE; break; @@ -5814,6 +5825,7 @@ class Rdb_transaction_impl : public Rdb_transaction { m_read_opts[table_type], &column_family, key, value, exclusive, m_read_opts[table_type].snapshot ? do_validate : false); } else { + assert(statement_snapshot_type == snapshot_type::CURRENT); // If snapshot is set, and if skipping validation, // call GetForUpdate without validation and set back old snapshot auto saved_snapshot = m_read_opts[table_type].snapshot; @@ -6012,7 +6024,10 @@ class Rdb_writebatch_impl : public Rdb_transaction { } private: - bool prepare() override { return true; } + bool prepare() override { + assert(statement_snapshot_type == snapshot_type::CURRENT); + return true; + } bool commit_no_binlog(TABLE_TYPE table_type) override { assert(!is_ac_nl_ro_rc_transaction()); @@ -6033,22 +6048,21 @@ class Rdb_writebatch_impl : public Rdb_transaction { } release_snapshot(table_type); - s = rdb->Write(write_opts, optimize, m_batch.GetWriteBatch()); + s = rdb_get_rocksdb_db().Write(write_opts, optimize, + m_batch.GetWriteBatch()); if (!s.ok()) { rdb_handle_io_error(s, RDB_IO_ERROR_TX_COMMIT); res = true; goto error; } - on_commit(table_type); + error: - on_rollback(); + if (likely(!res)) + on_commit(table_type); + else + on_rollback(); reset(); - m_write_count[table_type] = 0; - m_insert_count = 0; - m_update_count = 0; - m_delete_count = 0; - m_rollback_only = false; return res; } @@ -6086,16 +6100,13 @@ class Rdb_writebatch_impl : public Rdb_transaction { } void rollback() override { + assert(statement_snapshot_type == snapshot_type::NONE || + statement_snapshot_type == snapshot_type::CURRENT); + release_snapshot(TABLE_TYPE::USER_TABLE); on_rollback(); - m_write_count[TABLE_TYPE::USER_TABLE] = 0; - m_insert_count = 0; - m_update_count = 0; - m_delete_count = 0; - m_row_lock_count = 0; reset(); - m_rollback_only = false; } void acquire_snapshot(bool acquire_now MY_ATTRIBUTE((unused)), @@ -6108,7 +6119,7 @@ class Rdb_writebatch_impl : public Rdb_transaction { if (!has_snapshot(table_type)) { assert(statement_snapshot_type == snapshot_type::NONE); statement_snapshot_type = snapshot_type::CURRENT; - assign_snapshot(rdb->GetSnapshot()); + assign_snapshot(rdb_get_rocksdb_db().GetSnapshot()); } assert_snapshot_invariants(); } @@ -6122,7 +6133,7 @@ class Rdb_writebatch_impl : public Rdb_transaction { if (has_snapshot(table_type)) { assert(statement_snapshot_type == snapshot_type::CURRENT); statement_snapshot_type = snapshot_type::NONE; - rdb->ReleaseSnapshot(m_read_opts[table_type].snapshot); + rdb_get_rocksdb_db().ReleaseSnapshot(m_read_opts[table_type].snapshot); m_read_opts[table_type].snapshot = nullptr; } assert_snapshot_invariants(); @@ -6140,12 +6151,12 @@ class Rdb_writebatch_impl : public Rdb_transaction { const rocksdb::Slice &value, TABLE_TYPE table_type, bool) override { assert(!is_ac_nl_ro_rc_transaction()); + assert(statement_snapshot_type == snapshot_type::CURRENT); if (table_type == TABLE_TYPE::INTRINSIC_TMP) { return rocksdb::Status::NotSupported( "Not supported for intrinsic tmp tables"); } - assert(!is_ac_nl_ro_rc_transaction()); ++m_write_count[table_type]; m_batch.Put(&column_family, key, value); @@ -6158,6 +6169,7 @@ class Rdb_writebatch_impl : public Rdb_transaction { rocksdb::ColumnFamilyHandle &column_family, const rocksdb::Slice &key, TABLE_TYPE table_type, bool) override { assert(!is_ac_nl_ro_rc_transaction()); + assert(statement_snapshot_type == snapshot_type::CURRENT); if (table_type == TABLE_TYPE::INTRINSIC_TMP) { assert(false); @@ -6174,6 +6186,7 @@ class Rdb_writebatch_impl : public Rdb_transaction { rocksdb::ColumnFamilyHandle &column_family, const rocksdb::Slice &key, TABLE_TYPE table_type, bool) override { assert(!is_ac_nl_ro_rc_transaction()); + assert(statement_snapshot_type == snapshot_type::CURRENT); if (table_type == TABLE_TYPE::INTRINSIC_TMP) { assert(false); @@ -6197,6 +6210,7 @@ class Rdb_writebatch_impl : public Rdb_transaction { TABLE_TYPE table_type) override { assert(table_type != TABLE_TYPE::INTRINSIC_TMP); assert(!is_ac_nl_ro_rc_transaction()); + assert(statement_snapshot_type == snapshot_type::CURRENT); ++m_write_count[table_type]; return m_batch; @@ -6214,8 +6228,9 @@ class Rdb_writebatch_impl : public Rdb_transaction { "Not supported for intrinsic tmp tables"); } value->Reset(); - return m_batch.GetFromBatchAndDB(rdb, m_read_opts[table_type], - &column_family, key, value); + return m_batch.GetFromBatchAndDB(&rdb_get_rocksdb_db(), + m_read_opts[table_type], &column_family, + key, value); } void multi_get(rocksdb::ColumnFamilyHandle &column_family, size_t num_keys, @@ -6228,9 +6243,9 @@ class Rdb_writebatch_impl : public Rdb_transaction { assert(false); return; } - m_batch.MultiGetFromBatchAndDB(rdb, m_read_opts[table_type], &column_family, - num_keys, keys, values, statuses, - sorted_input); + m_batch.MultiGetFromBatchAndDB( + &rdb_get_rocksdb_db(), m_read_opts[table_type], &column_family, + num_keys, keys, values, statuses, sorted_input); } rocksdb::Status get_for_update(const Rdb_key_def &key_descr, @@ -6267,7 +6282,7 @@ class Rdb_writebatch_impl : public Rdb_transaction { assert(false); return nullptr; } - const auto it = rdb->NewIterator(options); + const auto it = rdb_get_rocksdb_db().NewIterator(options); return std::unique_ptr(m_batch.NewIteratorWithBase(it)); } @@ -6806,7 +6821,7 @@ static int rocksdb_close_connection( rocksdb_remove_checkpoint(checkpoint_dir); } if (get_ha_data(thd)->get_disable_file_deletions()) { - rdb->EnableFileDeletions(); + rdb_get_rocksdb_db().EnableFileDeletions(); } destroy_ha_data(thd); return HA_EXIT_SUCCESS; @@ -6815,7 +6830,6 @@ static int rocksdb_close_connection( static int rocksdb_create_temporary_checkpoint_validate( my_core::THD *const thd, my_core::SYS_VAR *const /* unused */, void *const save, my_core::st_mysql_value *const value) { - assert(rdb != nullptr); assert(thd != nullptr); const char *current_checkpoint_dir = THDVAR(thd, create_temporary_checkpoint); @@ -6855,16 +6869,15 @@ static int rocksdb_create_temporary_checkpoint_validate( static void rocksdb_disable_file_deletions_update( my_core::THD *const thd, my_core::SYS_VAR *const /* unused */, void *const var_ptr, const void *const save) { - assert(rdb != nullptr); assert(thd != nullptr); bool val = *static_cast(var_ptr) = *static_cast(save); bool old_val = get_ha_data(thd)->get_disable_file_deletions(); if (val && !old_val) { - rdb->DisableFileDeletions(); + rdb_get_rocksdb_db().DisableFileDeletions(); get_ha_data(thd)->set_disable_file_deletions(true); } else if (!val && old_val) { - rdb->EnableFileDeletions(); + rdb_get_rocksdb_db().EnableFileDeletions(); get_ha_data(thd)->set_disable_file_deletions(false); } } @@ -6878,8 +6891,6 @@ static void rocksdb_disable_file_deletions_update( */ static bool rocksdb_flush_wal(handlerton *const hton MY_ATTRIBUTE((__unused__)), bool binlog_group_flush) { - assert(rdb != nullptr); - rocksdb::Status s; if ((!binlog_group_flush && !rocksdb_db_options->allow_mmap_writes) || rocksdb_flush_log_at_trx_commit != FLUSH_LOG_NEVER) { @@ -6887,7 +6898,7 @@ static bool rocksdb_flush_wal(handlerton *const hton MY_ATTRIBUTE((__unused__)), bool sync = rdb_sync_wal_supported() && (!binlog_group_flush || rocksdb_flush_log_at_trx_commit == FLUSH_LOG_SYNC); - s = rdb->FlushWAL(sync); + s = rdb_get_rocksdb_db().FlushWAL(sync); } if (!s.ok()) { @@ -7021,7 +7032,7 @@ static xa_status_code rocksdb_commit_by_xid( const auto name = rdb_xid_to_string(*xid); assert(!name.empty()); - rocksdb::Transaction *const trx = rdb->GetTransactionByName(name); + auto *const trx = rdb_get_rocksdb_db().GetTransactionByName(name); if (trx == nullptr) { DBUG_RETURN(XAER_NOTA); @@ -7061,11 +7072,9 @@ static xa_status_code rocksdb_rollback_by_xid( assert(hton != nullptr); assert(xid != nullptr); - assert(rdb != nullptr); const auto name = rdb_xid_to_string(*xid); - - rocksdb::Transaction *const trx = rdb->GetTransactionByName(name); + auto *const trx = rdb_get_rocksdb_db().GetTransactionByName(name); if (trx == nullptr) { DBUG_RETURN(XAER_NOTA); @@ -7217,7 +7226,7 @@ static int rocksdb_recover(handlerton *const hton [[maybe_unused]], } std::vector trans_list; - rdb->GetAllPreparedTransactions(&trans_list); + rdb_get_rocksdb_db().GetAllPreparedTransactions(&trans_list); uint count = 0; for (auto &trans : trans_list) { @@ -7472,7 +7481,7 @@ class Rdb_snapshot_status : public Rdb_tx_list_walker { const auto earliest_snapshot_timestamp = tx->get_earliest_snapshot_ts(); if (earliest_snapshot_timestamp != 0) { int64_t curr_time; - rdb->GetEnv()->GetCurrentTime(&curr_time); + rdb_get_rocksdb_db().GetEnv()->GetCurrentTime(&curr_time); const auto earliest_snapshot_age = curr_time - earliest_snapshot_timestamp; @@ -7516,7 +7525,7 @@ class Rdb_snapshot_status : public Rdb_tx_list_walker { } void populate_deadlock_buffer() { - auto dlock_buffer = rdb->GetDeadlockInfoBuffer(); + const auto dlock_buffer = rdb_get_rocksdb_db().GetDeadlockInfoBuffer(); m_data += "----------LATEST DETECTED DEADLOCKS----------\n"; for (const auto &path_entry : dlock_buffer) { @@ -7558,7 +7567,7 @@ class Rdb_snapshot_status : public Rdb_tx_list_walker { std::vector get_deadlock_info() { std::vector deadlock_info; - auto dlock_buffer = rdb->GetDeadlockInfoBuffer(); + const auto dlock_buffer = rdb_get_rocksdb_db().GetDeadlockInfoBuffer(); for (const auto &path_entry : dlock_buffer) { if (!path_entry.limit_exceeded) { deadlock_info.push_back(get_dl_path_trx_info(path_entry)); @@ -7707,10 +7716,9 @@ static uint64_t advance_binlog_ttl_compaction_timestamp(uint64_t ts) { static bool rocksdb_update_binlog_ttl_compaction_ts( handlerton *const hton MY_ATTRIBUTE((__unused__)), THD *thd, uint64_t *timestamp) { - assert(rdb != nullptr); assert(timestamp != nullptr); - if (unlikely(!timestamp || !rdb)) { + if (unlikely(!timestamp)) { return HA_EXIT_FAILURE; } @@ -7809,8 +7817,6 @@ static bool rocksdb_show_status(handlerton *const hton, THD *const thd, char buf[100] = {'\0'}; if (stat_type == HA_ENGINE_STATUS) { - assert(rdb != nullptr); - std::string str; /* Global DB Statistics */ @@ -7840,13 +7846,14 @@ static bool rocksdb_show_status(handlerton *const hton, THD *const thd, // // NB! We're replacing hyphens with underscores in output to better match // the existing naming convention. - if (rdb->GetIntProperty("rocksdb.is-write-stopped", &v)) { + if (rdb_get_rocksdb_db().GetIntProperty("rocksdb.is-write-stopped", &v)) { snprintf(buf, sizeof(buf), "rocksdb.is_write_stopped COUNT : %" PRIu64 "\n", v); str.append(buf); } - if (rdb->GetIntProperty("rocksdb.actual-delayed-write-rate", &v)) { + if (rdb_get_rocksdb_db().GetIntProperty( + "rocksdb.actual-delayed-write-rate", &v)) { snprintf(buf, sizeof(buf), "rocksdb.actual_delayed_write_rate " "COUNT : %" PRIu64 "\n", @@ -7858,7 +7865,7 @@ static bool rocksdb_show_status(handlerton *const hton, THD *const thd, } /* Per DB stats */ - if (rdb->GetProperty("rocksdb.dbstats", &str)) { + if (rdb_get_rocksdb_db().GetProperty("rocksdb.dbstats", &str)) { res |= print_stats(thd, "DBSTATS", "rocksdb", str, stat_print); } @@ -7873,7 +7880,8 @@ static bool rocksdb_show_status(handlerton *const hton, THD *const thd, // Retrieve information from CF handle object. // Even if the CF is removed from CF_manager, the handle object // is valid. - if (!rdb->GetProperty(cfh.get(), "rocksdb.cfstats", &str)) { + if (!rdb_get_rocksdb_db().GetProperty(cfh.get(), "rocksdb.cfstats", + &str)) { continue; } @@ -7886,7 +7894,7 @@ static bool rocksdb_show_status(handlerton *const hton, THD *const thd, size_t internal_cache_count = 0; size_t kDefaultInternalCacheSize = 8 * 1024 * 1024; - dbs.push_back(rdb); + dbs.push_back(&rdb_get_rocksdb_db()); cache_set.insert(rocksdb_tbl_options->block_cache.get()); for (const auto &cf_handle : cf_manager.get_all_cf()) { @@ -7938,7 +7946,7 @@ static bool rocksdb_show_status(handlerton *const hton, THD *const thd, /* Show the background thread status */ std::vector thread_list; - rocksdb::Status s = rdb->GetEnv()->GetThreadList(&thread_list); + const auto s = rdb_get_rocksdb_db().GetEnv()->GetThreadList(&thread_list); // GetThreadList() may return Status::NotSupported when // ROCKSDB_USING_THREAD_STATUS is not defined @@ -7998,10 +8006,8 @@ static bool rocksdb_show_status(handlerton *const hton, THD *const thd, returns false on success */ -static bool rocksdb_lock_hton_log( - handlerton *const MY_ATTRIBUTE((__unused__))) { - assert(rdb != nullptr); - return !rdb->LockWAL().ok(); +static bool rocksdb_lock_hton_log(handlerton *) { + return !rdb_get_rocksdb_db().LockWAL().ok(); } /* @@ -8009,9 +8015,8 @@ static bool rocksdb_lock_hton_log( returns false on success */ -static bool rocksdb_unlock_hton_log(handlerton *const /* unused */) { - assert(rdb != nullptr); - return !rdb->UnlockWAL().ok(); +static bool rocksdb_unlock_hton_log(handlerton *) { + return !rdb_get_rocksdb_db().UnlockWAL().ok(); } /* @@ -8032,7 +8037,7 @@ static bool rocksdb_collect_hton_log_info(handlerton *const /* unused */, Json_dom *json) { bool ret_val = false; rocksdb::VectorLogPtr live_wal_files; - const auto s = rdb->GetSortedWalFiles(live_wal_files); + const auto s = rdb_get_rocksdb_db().GetSortedWalFiles(live_wal_files); if (!s.ok()) { return true; @@ -8067,25 +8072,33 @@ static bool rocksdb_collect_hton_log_info(handlerton *const /* unused */, return ret_val; } -static inline void rocksdb_register_tx( - handlerton *const hton MY_ATTRIBUTE((__unused__)), THD *const thd, - Rdb_transaction *const tx) { - assert(tx != nullptr); +static inline void rocksdb_register_tx(handlerton *hton [[maybe_unused]], + THD &thd, Rdb_transaction &tx) { + assert(hton == rocksdb_hton); - trans_register_ha(thd, false, rocksdb_hton, NULL); + trans_register_ha(&thd, false, rocksdb_hton, nullptr); if (rocksdb_write_policy == rocksdb::TxnDBWritePolicy::WRITE_UNPREPARED) { // Some internal operations will call trans_register_ha, but they do not // go through 2pc. In this case, the xid is set with query_id == 0, which // means that rocksdb will receive transactions with duplicate names. // // Skip setting name in these cases. - if (thd->query_id != 0) { - tx->set_name(); + if (thd.query_id != 0) { + tx.set_name(); + } + } + + if (!tx.has_explicit_or_read_only_snapshot()) { + auto thd_ss = std::static_pointer_cast( + thd.get_explicit_snapshot()); + if (thd_ss) { + tx.share_explicit_snapshot(std::move(thd_ss)); } } - if (!is_autocommit(*thd)) { - tx->start_stmt(); - trans_register_ha(thd, true, rocksdb_hton, NULL); + + if (!is_autocommit(thd)) { + tx.start_stmt(); + trans_register_ha(&thd, true, rocksdb_hton, nullptr); } } @@ -8099,8 +8112,7 @@ static int rocksdb_explicit_snapshot( if (mysql_bin_log_is_open()) { mysql_bin_log_lock_commits(ss_info); } - auto s = - Rdb_explicit_snapshot::create(thd, *ss_info, rdb, rdb->GetSnapshot()); + const auto s = Rdb_explicit_snapshot::create(*thd, *ss_info); if (mysql_bin_log_is_open()) { mysql_bin_log_unlock_commits(ss_info); } @@ -8168,11 +8180,11 @@ static int rocksdb_start_tx_and_assign_read_view( return HA_EXIT_FAILURE; } - Rdb_transaction *const tx = get_or_create_tx(thd, TABLE_TYPE::USER_TABLE); - Rdb_perf_context_guard guard(tx, thd); + auto &tx = *get_or_create_tx(thd, TABLE_TYPE::USER_TABLE); + Rdb_perf_context_guard guard(&tx, thd); - tx->set_tx_read_only(); - rocksdb_register_tx(hton, thd, tx); + tx.set_tx_read_only(); + rocksdb_register_tx(hton, *thd, tx); const uint64_t client_provided_read_filtering_ts = rdb_is_binlog_ttl_enabled() @@ -8192,7 +8204,7 @@ static int rocksdb_start_tx_and_assign_read_view( if (ss_info) { ss_info->read_filtering_ts = read_filtering_ts; } - tx->set_ttl_read_filtering_ts(read_filtering_ts); + tx.set_ttl_read_filtering_ts(read_filtering_ts); return HA_EXIT_SUCCESS; } @@ -8255,7 +8267,7 @@ static int rocksdb_start_tx_with_shared_read_view( } } - rocksdb_register_tx(hton, thd, tx); + rocksdb_register_tx(hton, *thd, *tx); } // case: unlock the binlog @@ -9155,8 +9167,9 @@ static int rocksdb_init_internal(void *const p) { // NO_LINT_DEBUG LogPluginErrMsg(INFORMATION_LEVEL, ER_LOG_PRINTF_MSG, "RocksDB: Opening TransactionDB..."); - status = rocksdb::TransactionDB::Open( - main_opts, tx_db_options, rocksdb_datadir, cf_descr, &cf_handles, &rdb); + status = + rocksdb::TransactionDB::Open(main_opts, tx_db_options, rocksdb_datadir, + cf_descr, &cf_handles, &detail::rdb); DBUG_EXECUTE_IF("rocksdb_init_failure_open_db", { // Simulate opening TransactionDB failure @@ -9176,7 +9189,7 @@ static int rocksdb_init_internal(void *const p) { "Verifying file checksums..."); rocksdb::ReadOptions checksum_read_options; checksum_read_options.readahead_size = 2 * 1024 * 1024; - status = rdb->VerifyFileChecksums(checksum_read_options); + status = rdb_get_rocksdb_db().VerifyFileChecksums(checksum_read_options); if (!status.ok()) { rdb_log_status_error(status, "Instance failed checksum verification"); for (auto cfh_ptr : cf_handles) delete (cfh_ptr); @@ -9189,7 +9202,8 @@ static int rocksdb_init_internal(void *const p) { LogPluginErrMsg(INFORMATION_LEVEL, ER_LOG_PRINTF_MSG, "RocksDB:Init column families..."); if (st_rdb_exec_time.exec("cf_manager::init", [&]() { - return cf_manager.init(rdb, std::move(cf_options_map), &cf_handles); + return cf_manager.init(rdb_get_rocksdb_db(), std::move(cf_options_map), + &cf_handles); })) { // NO_LINT_DEBUG LogPluginErrMsg(ERROR_LEVEL, ER_LOG_PRINTF_MSG, @@ -9201,7 +9215,7 @@ static int rocksdb_init_internal(void *const p) { LogPluginErrMsg(INFORMATION_LEVEL, ER_LOG_PRINTF_MSG, "RocksDB: Initializing data dictionary..."); if (st_rdb_exec_time.exec("Rdb_dict_manager_selector::init", [&]() { - return dict_manager.init(rdb, &cf_manager, + return dict_manager.init(rdb_get_rocksdb_db(), &cf_manager, rocksdb_enable_remove_orphaned_dropped_cfs); })) { // NO_LINT_DEBUG @@ -9242,7 +9256,7 @@ static int rocksdb_init_internal(void *const p) { DBUG_EXECUTE_IF("rocksdb_init_failure_managers", { DBUG_RETURN(HA_EXIT_FAILURE); }); - Rdb_sst_info::init(rdb); + Rdb_sst_info::init(rdb_get_rocksdb_db()); /* Enable auto compaction, things needed for compaction filter are finished @@ -9255,7 +9269,8 @@ static int rocksdb_init_internal(void *const p) { new_compaction_enabled_cf_handles.push_back(cfh_ptr); } } - status = rdb->EnableAutoCompaction(new_compaction_enabled_cf_handles); + status = rdb_get_rocksdb_db().EnableAutoCompaction( + new_compaction_enabled_cf_handles); if (!status.ok()) { rdb_log_status_error(status, "Error enabling compaction"); @@ -9325,7 +9340,7 @@ static int rocksdb_init_internal(void *const p) { { DBUG_RETURN(HA_EXIT_FAILURE); }); if (rocksdb_pause_background_work) { - rdb->PauseBackgroundWork(); + rdb_get_rocksdb_db().PauseBackgroundWork(); } // NO_LINT_DEBUG @@ -9418,8 +9433,8 @@ static int rocksdb_shutdown(bool minimalShutdown) { rocksdb_flush_all_memtables(); // Stop all rocksdb background work - if (rdb && rdb->GetBaseDB()) { - CancelAllBackgroundWork(rdb->GetBaseDB(), true); + if (detail::rdb != nullptr && rdb_get_rocksdb_db().GetBaseDB() != nullptr) { + CancelAllBackgroundWork(rdb_get_rocksdb_db().GetBaseDB(), true); } // Signal the background thread to stop and to persist all stats collected @@ -9526,8 +9541,8 @@ static int rocksdb_shutdown(bool minimalShutdown) { clone::client_shutdown(); clone::donor_shutdown(); - delete rdb; - rdb = nullptr; + delete detail::rdb; + detail::rdb = nullptr; delete commit_latency_stats; commit_latency_stats = nullptr; @@ -10829,7 +10844,7 @@ bool ha_rocksdb::create_cfs( auto local_dict_manager = dict_manager.get_dict_manager_selector_non_const(cf_name); std::lock_guard dm_lock(*local_dict_manager); - cf_handle = cf_manager.get_or_create_cf(rdb, cf_name); + cf_handle = cf_manager.get_or_create_cf(rdb_get_rocksdb_db(), cf_name); if (!cf_handle) { return true; } @@ -15384,7 +15399,7 @@ int ha_rocksdb::external_lock(THD *const thd, int lock_type) { } } tx->m_n_mysql_tables_in_use++; - rocksdb_register_tx(rocksdb_hton, thd, tx); + rocksdb_register_tx(rocksdb_hton, *thd, *tx); tx->io_perf_start(&m_io_perf); } @@ -15413,7 +15428,7 @@ int ha_rocksdb::start_stmt(THD *const thd, Rdb_transaction *const tx = get_or_create_tx(thd, m_tbl_def->get_table_type()); read_thd_vars(thd); - rocksdb_register_tx(ht, thd, tx); + rocksdb_register_tx(ht, *thd, *tx); tx->io_perf_start(&m_io_perf); DBUG_RETURN(HA_EXIT_SUCCESS); @@ -15474,8 +15489,8 @@ static int delete_range(const std::unordered_set &indices) { uchar buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2]; rocksdb::Range range = get_range(d.index_id, buf, is_reverse_cf ? 1 : 0, is_reverse_cf ? 0 : 1); - rocksdb::Status status = DeleteFilesInRange(rdb->GetBaseDB(), cfh.get(), - &range.start, &range.limit); + auto status = DeleteFilesInRange(rdb_get_rocksdb_db().GetBaseDB(), + cfh.get(), &range.start, &range.limit); if (!status.ok()) { // NO_LINT_DEBUG LogPluginErrMsg( @@ -15501,13 +15516,13 @@ static int delete_range(const std::unordered_set &indices) { rocksdb::TransactionDBWriteOptimizations optimize; optimize.skip_concurrency_control = true; optimize.skip_duplicate_key_check = true; - rocksdb::Status status = - rdb->Write(rocksdb::WriteOptions(), optimize, &batch); + const auto status = + rdb_get_rocksdb_db().Write(rocksdb::WriteOptions(), optimize, &batch); if (status.ok()) { if (!rdb_sync_wal_supported()) { // If we don't support SyncWAL, do a flush at least - rdb->FlushWAL(false); + rdb_get_rocksdb_db().FlushWAL(false); } } return HA_EXIT_SUCCESS; @@ -15529,7 +15544,8 @@ static bool is_myrocks_index_empty(rocksdb::ColumnFamilyHandle *cfh, rdb_netbuf_store_uint32(key_buf, index_id); const rocksdb::Slice key = rocksdb::Slice(reinterpret_cast(key_buf), sizeof(key_buf)); - std::unique_ptr it(rdb->NewIterator(read_opts, cfh)); + std::unique_ptr it( + rdb_get_rocksdb_db().NewIterator(read_opts, cfh)); rocksdb_smart_seek(is_reverse_cf, *it, key); if (!it->Valid()) { index_removed = true; @@ -15616,8 +15632,9 @@ void Rdb_drop_index_thread::run() { rocksdb::Range range = get_range( d.index_id, buf, is_reverse_cf ? 1 : 0, is_reverse_cf ? 0 : 1); - rocksdb::Status status = DeleteFilesInRange( - rdb->GetBaseDB(), cfh.get(), &range.start, &range.limit); + auto status = + DeleteFilesInRange(rdb_get_rocksdb_db().GetBaseDB(), cfh.get(), + &range.start, &range.limit); if (!status.ok()) { if (status.IsIncomplete()) { continue; @@ -15627,8 +15644,8 @@ void Rdb_drop_index_thread::run() { rdb_handle_io_error(status, RDB_IO_ERROR_BG_THREAD); } - status = rdb->CompactRange(getCompactRangeOptions(), cfh.get(), - &range.start, &range.limit); + status = rdb_get_rocksdb_db().CompactRange( + getCompactRangeOptions(), cfh.get(), &range.start, &range.limit); if (!status.ok()) { if (status.IsIncomplete()) { continue; @@ -15685,7 +15702,8 @@ void Rdb_drop_index_thread::run() { for (const auto cf_id : dropped_cf_ids) { if (ongoing_drop_cf_ids.find(cf_id) == ongoing_drop_cf_ids.end()) { - cf_manager.remove_dropped_cf(local_dict_manager, rdb, cf_id); + cf_manager.remove_dropped_cf(local_dict_manager, + rdb_get_rocksdb_db(), cf_id); } } } @@ -16123,11 +16141,13 @@ void ha_rocksdb::records_in_range_internal(uint inx, key_range *const min_key, // Getting statistics, including from Memtables rocksdb::DB::SizeApproximationFlags include_flags = rocksdb::DB::SizeApproximationFlags::INCLUDE_FILES; - rdb->GetApproximateSizes(&kd.get_cf(), &r, 1, &sz, include_flags); + rdb_get_rocksdb_db().GetApproximateSizes(&kd.get_cf(), &r, 1, &sz, + include_flags); *row_count = rows * ((double)sz / (double)disk_size); *total_size = sz; uint64_t memTableCount; - rdb->GetApproximateMemTableStats(&kd.get_cf(), r, &memTableCount, &sz); + rdb_get_rocksdb_db().GetApproximateMemTableStats(&kd.get_cf(), r, + &memTableCount, &sz); *row_count += memTableCount; *total_size += sz; DBUG_VOID_RETURN; @@ -16174,9 +16194,9 @@ int ha_rocksdb::optimize(THD *const thd MY_ATTRIBUTE((__unused__)), for (uint i = 0; i < table->s->keys; i++) { uchar buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2]; auto range = get_range(i, buf); - const auto s = rdb->CompactRange(getCompactRangeOptions(), - &m_key_descr_arr[i]->get_cf(), - &range.start, &range.limit); + const auto s = rdb_get_rocksdb_db().CompactRange( + getCompactRangeOptions(), &m_key_descr_arr[i]->get_cf(), &range.start, + &range.limit); if (!s.ok()) { DBUG_RETURN(rdb_error_to_mysql(s)); } @@ -16248,8 +16268,8 @@ static int calculate_cardinality_table_scan( auto r = ha_rocksdb::get_range(*kd, r_buf); uint64_t memtableCount; uint64_t memtableSize; - rdb->GetApproximateMemTableStats(&kd->get_cf(), r, &memtableCount, - &memtableSize); + rdb_get_rocksdb_db().GetApproximateMemTableStats( + &kd->get_cf(), r, &memtableCount, &memtableSize); if (scan_type == SCAN_TYPE_MEMTABLE_ONLY && memtableCount < (uint64_t)stat.m_rows / 10) { @@ -16267,8 +16287,8 @@ static int calculate_cardinality_table_scan( stat.m_actual_disk_size = memtableSize; } - std::unique_ptr it = std::unique_ptr( - rdb->NewIterator(read_opts, &kd->get_cf())); + const auto it = std::unique_ptr( + rdb_get_rocksdb_db().NewIterator(read_opts, &kd->get_cf())); rocksdb::Slice first_index_key((const char *)r_buf, Rdb_key_def::INDEX_NUMBER_SIZE); @@ -16394,7 +16414,7 @@ static int read_stats_from_ssts( rocksdb::TablePropertiesCollection props; for (const auto &it : ranges) { const auto old_size MY_ATTRIBUTE((__unused__)) = props.size(); - const auto status = rdb->GetPropertiesOfTablesInRange( + const auto status = rdb_get_rocksdb_db().GetPropertiesOfTablesInRange( it.first, &it.second[0], it.second.size(), &props); assert(props.size() >= old_size); if (!status.ok()) { @@ -16619,7 +16639,8 @@ int ha_rocksdb::adjust_handler_stats_sst_and_memtable(ha_statistics *ha_stats, uint64_t sz = 0; rocksdb::DB::SizeApproximationFlags include_flags = rocksdb::DB::SizeApproximationFlags::INCLUDE_FILES; - rdb->GetApproximateSizes(&pk_def->get_cf(), &r, 1, &sz, include_flags); + rdb_get_rocksdb_db().GetApproximateSizes(&pk_def->get_cf(), &r, 1, &sz, + include_flags); ha_stats->records += sz / ROCKSDB_ASSUMED_KEY_VALUE_DISK_SIZE; ha_stats->data_file_length += sz; } @@ -16637,8 +16658,8 @@ int ha_rocksdb::adjust_handler_stats_sst_and_memtable(ha_statistics *ha_stats, // it also can return 0 for quite a large tables which means that // cardinality for memtable only indxes will be reported as 0 - rdb->GetApproximateMemTableStats(&pk_def->get_cf(), r, &memtableCount, - &memtableSize); + rdb_get_rocksdb_db().GetApproximateMemTableStats( + &pk_def->get_cf(), r, &memtableCount, &memtableSize); // Atomically update all of these fields at the same time if (cachetime > 0) { @@ -18163,7 +18184,7 @@ static void myrocks_update_status() { static void myrocks_update_memory_status() { std::vector dbs; std::unordered_set cache_set; - dbs.push_back(rdb); + dbs.push_back(&rdb_get_rocksdb_db()); std::map temp_usage_by_type; rocksdb::MemoryUtil::GetApproximateMemoryUsageByType(dbs, cache_set, &temp_usage_by_type); @@ -18255,7 +18276,7 @@ static void update_rocksdb_stall_status() { // Retrieve information from valid CF handle object. It is safe // even if the CF is removed from cf_manager at this point. std::map props; - if (!rdb->GetMapProperty( + if (!rdb_get_rocksdb_db().GetMapProperty( cfh.get(), rocksdb::DB::Properties::kCFWriteStallStats, &props)) { continue; } @@ -18538,10 +18559,10 @@ void Rdb_background_thread::run() { // InnoDB's behavior. For mode never, the wal file isn't even written, // whereas background writes to the wal file, but issues the syncs in a // background thread. - if (rdb && (rocksdb_flush_log_at_trx_commit != FLUSH_LOG_SYNC) && + if (detail::rdb && (rocksdb_flush_log_at_trx_commit != FLUSH_LOG_SYNC) && !rocksdb_db_options->allow_mmap_writes) { bool sync = rdb_sync_wal_supported(); - const rocksdb::Status s = rdb->FlushWAL(sync); + const auto s = rdb_get_rocksdb_db().FlushWAL(sync); if (!s.ok()) { rdb_handle_io_error(s, RDB_IO_ERROR_BG_THREAD); } @@ -18874,8 +18895,8 @@ void Rdb_manual_compaction_thread::run() { // CompactRange may take a very long time. On clean shutdown, // it is cancelled by CancelAllBackgroundWork, then status is // set to shutdownInProgress. - const rocksdb::Status s = - rdb->CompactRange(mcr.option, mcr.cf.get(), mcr.start, mcr.limit); + const auto s = rdb_get_rocksdb_db().CompactRange(mcr.option, mcr.cf.get(), + mcr.start, mcr.limit); rocksdb_manual_compactions_running--; if (s.ok()) { @@ -19199,9 +19220,6 @@ bool ha_rocksdb::can_use_bloom_filter(THD *thd, const Rdb_key_def &kd, return can_use; } -/* For modules that need access to the global data structures */ -rocksdb::TransactionDB *rdb_get_rocksdb_db() { return rdb; } - Rdb_cf_manager &rdb_get_cf_manager() { return cf_manager; } const rocksdb::BlockBasedTableOptions &rdb_get_table_options() { @@ -19475,8 +19493,8 @@ static void rocksdb_set_delayed_write_rate( const uint64_t new_val = *static_cast(save); if (rocksdb_db_options->delayed_write_rate != new_val) { rocksdb_db_options->delayed_write_rate = new_val; - rocksdb::Status s = - rdb->SetDBOptions({{"delayed_write_rate", std::to_string(new_val)}}); + const auto s = rdb_get_rocksdb_db().SetDBOptions( + {{"delayed_write_rate", std::to_string(new_val)}}); if (!s.ok()) { /* NO_LINT_DEBUG */ @@ -19494,7 +19512,8 @@ static void rocksdb_set_max_latest_deadlocks( const uint32_t new_val = *static_cast(save); if (rocksdb_max_latest_deadlocks != new_val) { rocksdb_max_latest_deadlocks = new_val; - rdb->SetDeadlockInfoBufferSize(rocksdb_max_latest_deadlocks); + rdb_get_rocksdb_db().SetDeadlockInfoBufferSize( + rocksdb_max_latest_deadlocks); } } @@ -19606,8 +19625,8 @@ static void rocksdb_set_max_background_jobs( if (rocksdb_db_options->max_background_jobs != new_val) { rocksdb_db_options->max_background_jobs = new_val; - rocksdb::Status s = - rdb->SetDBOptions({{"max_background_jobs", std::to_string(new_val)}}); + const auto s = rdb_get_rocksdb_db().SetDBOptions( + {{"max_background_jobs", std::to_string(new_val)}}); if (!s.ok()) { /* NO_LINT_DEBUG */ @@ -19631,7 +19650,7 @@ static void rocksdb_set_max_background_compactions( if (rocksdb_db_options->max_background_compactions != new_val) { rocksdb_db_options->max_background_compactions = new_val; - rocksdb::Status s = rdb->SetDBOptions( + const auto s = rdb_get_rocksdb_db().SetDBOptions( {{"max_background_compactions", std::to_string(new_val)}}); if (!s.ok()) { @@ -19698,8 +19717,8 @@ static void rocksdb_set_bytes_per_sync( if (rocksdb_db_options->bytes_per_sync != new_val) { rocksdb_db_options->bytes_per_sync = new_val; - rocksdb::Status s = - rdb->SetDBOptions({{"bytes_per_sync", std::to_string(new_val)}}); + const auto s = rdb_get_rocksdb_db().SetDBOptions( + {{"bytes_per_sync", std::to_string(new_val)}}); if (!s.ok()) { /* NO_LINT_DEBUG */ @@ -19723,8 +19742,8 @@ static void rocksdb_set_wal_bytes_per_sync( if (rocksdb_db_options->wal_bytes_per_sync != new_val) { rocksdb_db_options->wal_bytes_per_sync = new_val; - rocksdb::Status s = - rdb->SetDBOptions({{"wal_bytes_per_sync", std::to_string(new_val)}}); + const auto s = rdb_get_rocksdb_db().SetDBOptions( + {{"wal_bytes_per_sync", std::to_string(new_val)}}); if (!s.ok()) { /* NO_LINT_DEBUG */ @@ -19804,7 +19823,8 @@ static int rocksdb_validate_update_cf_options(THD * /* unused */, auto local_dict_manager = dict_manager.get_dict_manager_selector_non_const(cf_name); std::lock_guard dm_lock(*local_dict_manager); - auto cfh = cf_manager.get_or_create_cf(rdb, cf_name); + const auto cfh = + cf_manager.get_or_create_cf(rdb_get_rocksdb_db(), cf_name); if (!cfh) { return HA_EXIT_FAILURE; @@ -19878,13 +19898,11 @@ static void rocksdb_set_update_cf_options(THD *const /* unused */, "family '%s' to a map. %s", cf_name.c_str(), s.ToString().c_str()); } else { - assert(rdb != nullptr); - // Finally we can apply the options. // If cf_manager.drop_cf() has been called at this point, SetOptions() // will still succeed. The options data will only be cleared when // the CF handle object is destroyed. - s = rdb->SetOptions(cfh.get(), opt_map); + s = rdb_get_rocksdb_db().SetOptions(cfh.get(), opt_map); if (s != rocksdb::Status::OK()) { // NO_LINT_DEBUG @@ -19903,7 +19921,7 @@ static void rocksdb_set_update_cf_options(THD *const /* unused */, // the CF options. This is necessary also to make sure that the CF // options will be correctly reflected in the relevant table: // ROCKSDB_CF_OPTIONS in INFORMATION_SCHEMA. - const auto cf_options = rdb->GetOptions(cfh.get()); + const auto cf_options = rdb_get_rocksdb_db().GetOptions(cfh.get()); std::string updated_options; s = rocksdb::GetStringFromColumnFamilyOptions(&updated_options, @@ -20058,8 +20076,6 @@ rocksdb::DBOptions *get_rocksdb_db_options() { static void rocksdb_select_bypass_rejected_query_history_size_update( my_core::THD *const /* unused */, my_core::SYS_VAR *const /* unused */, void *const var_ptr, const void *const save) { - assert(rdb != nullptr); - uint32_t val = *static_cast(var_ptr) = *static_cast(save); @@ -20072,8 +20088,6 @@ static void rocksdb_select_bypass_rejected_query_history_size_update( static void rocksdb_max_compaction_history_update( my_core::THD *const /* unused */, my_core::SYS_VAR *const /* unused */, void *const var_ptr, const void *const save) { - assert(rdb != nullptr); - uint64_t val = *static_cast(var_ptr) = *static_cast(save); compaction_stats.resize_history(val); @@ -20199,22 +20213,22 @@ std::unique_ptr rdb_tx_get_iterator( THD *thd, rocksdb::ColumnFamilyHandle &cf, bool skip_bloom_filter, const rocksdb::Slice &eq_cond_lower_bound, const rocksdb::Slice &eq_cond_upper_bound, - const rocksdb::Snapshot **snapshot, TABLE_TYPE table_type, - bool read_current, bool create_snapshot) { + rdb_snapshot_unique_ptr *snapshot, TABLE_TYPE table_type, bool read_current, + bool create_snapshot) { if (commit_in_the_middle(thd)) { assert(snapshot && *snapshot == nullptr); if (snapshot) { - *snapshot = rdb->GetSnapshot(); + *snapshot = get_rdb_snapshot(); auto read_opts = rocksdb::ReadOptions(); // TODO(mung): set based on WHERE conditions read_opts.total_order_seek = true; - read_opts.snapshot = *snapshot; + read_opts.snapshot = (*snapshot).get(); if (rocksdb_enable_udt_in_mem && is_udt_compatible_cf(cf)) { Rdb_transaction *tx = get_tx_from_thd(thd); read_opts.timestamp = tx->get_tx_read_timestamp_slice(); } return std::unique_ptr( - rdb->NewIterator(read_opts, &cf)); + rdb_get_rocksdb_db().NewIterator(read_opts, &cf)); } else { return nullptr; } diff --git a/storage/rocksdb/ha_rocksdb.h b/storage/rocksdb/ha_rocksdb.h index 28ce89398ad4..a75300c53c13 100644 --- a/storage/rocksdb/ha_rocksdb.h +++ b/storage/rocksdb/ha_rocksdb.h @@ -49,6 +49,7 @@ #include "./rdb_perf_context.h" #include "./rdb_sst_info.h" #include "./rdb_utils.h" +#include "ha_rocksdb_proto.h" #ifndef __APPLE__ #include "./rdb_io_watchdog.h" @@ -1218,7 +1219,7 @@ void rdb_tx_acquire_snapshot(Rdb_transaction &tx); THD *thd, rocksdb::ColumnFamilyHandle &cf, bool skip_bloom_filter, const rocksdb::Slice &eq_cond_lower_bound, const rocksdb::Slice &eq_cond_upper_bound, - const rocksdb::Snapshot **snapshot, TABLE_TYPE table_type, + rdb_snapshot_unique_ptr *snapshot, TABLE_TYPE table_type, bool read_current = false, bool create_snapshot = true); [[nodiscard]] rocksdb::Status rdb_tx_get( diff --git a/storage/rocksdb/ha_rocksdb_proto.h b/storage/rocksdb/ha_rocksdb_proto.h index 3b673f70be15..656506b246ba 100644 --- a/storage/rocksdb/ha_rocksdb_proto.h +++ b/storage/rocksdb/ha_rocksdb_proto.h @@ -72,11 +72,36 @@ void rdb_queue_save_stats_request(); extern const std::string TRUNCATE_TABLE_PREFIX; -/* - Access to singleton objects. -*/ +// Do not use declarations in this namespace outside of ha_rocksdb.cc +namespace detail { + +extern rocksdb::TransactionDB *rdb; + +} // namespace detail + +// Safe to call between successful call to rocksdb_init_internal and +// rocksdb_shutdown +[[nodiscard]] inline rocksdb::TransactionDB &rdb_get_rocksdb_db() { + return *detail::rdb; +} + +namespace detail { +struct rdb_snapshot_deleter { + void operator()(const rocksdb::Snapshot *snapshot) { + rdb_get_rocksdb_db().ReleaseSnapshot(snapshot); + } +}; + +} // namespace detail + +// Similar to rocksdb::ManagedSnapshot but taking less space and supporting move +// semantics too +using rdb_snapshot_unique_ptr = + std::unique_ptr; -rocksdb::TransactionDB *rdb_get_rocksdb_db(); +[[nodiscard]] inline rdb_snapshot_unique_ptr get_rdb_snapshot() { + return rdb_snapshot_unique_ptr{rdb_get_rocksdb_db().GetSnapshot()}; +} class Rdb_cf_manager; Rdb_cf_manager &rdb_get_cf_manager(); diff --git a/storage/rocksdb/rdb_bulk_load.cc b/storage/rocksdb/rdb_bulk_load.cc index 9912dcea87dc..0c23919f20a5 100644 --- a/storage/rocksdb/rdb_bulk_load.cc +++ b/storage/rocksdb/rdb_bulk_load.cc @@ -276,7 +276,7 @@ uint Rdb_bulk_load_context::notify_ddl(std::string_view db_name, } Rdb_sst_info *Rdb_bulk_load_context::add_sst_info( - rocksdb::DB *rdb, const std::string &tablename, const Rdb_key_def &kd, + rocksdb::DB &rdb, const std::string &tablename, const Rdb_key_def &kd, rocksdb::DBOptions &db_option, bool trace_sst_api, bool compression_parallel_threads) { auto sst_info_ptr = std::make_unique( diff --git a/storage/rocksdb/rdb_bulk_load.h b/storage/rocksdb/rdb_bulk_load.h index 9e6f8bb23774..7d0fd38c88f1 100644 --- a/storage/rocksdb/rdb_bulk_load.h +++ b/storage/rocksdb/rdb_bulk_load.h @@ -180,10 +180,12 @@ class Rdb_bulk_load_context { return it->second.get(); } - Rdb_sst_info *add_sst_info(rocksdb::DB *rdb, const std::string &tablename, - const Rdb_key_def &kd, - rocksdb::DBOptions &db_option, bool trace_sst_api, - bool compression_parallel_threads); + [[nodiscard]] Rdb_sst_info *add_sst_info(rocksdb::DB &rdb, + const std::string &tablename, + const Rdb_key_def &kd, + rocksdb::DBOptions &db_option, + bool trace_sst_api, + bool compression_parallel_threads); Rdb_index_merge *find_key_merge(GL_INDEX_ID index_id) { const auto it = m_key_merge.find(index_id); diff --git a/storage/rocksdb/rdb_cf_manager.cc b/storage/rocksdb/rdb_cf_manager.cc index df00c1386ce0..f01fe3eae220 100644 --- a/storage/rocksdb/rdb_cf_manager.cc +++ b/storage/rocksdb/rdb_cf_manager.cc @@ -45,7 +45,7 @@ bool Rdb_cf_manager::is_cf_name_reverse(std::string_view name) { return name.compare(0, 4, "rev:") == 0; } -bool Rdb_cf_manager::init(rocksdb::DB *const rdb, +bool Rdb_cf_manager::init(rocksdb::DB &rdb, std::unique_ptr &&cf_options, std::vector *handles) { mysql_mutex_init(rdb_cfm_mutex_key, &m_mutex, MY_MUTEX_INIT_FAST); @@ -74,7 +74,7 @@ bool Rdb_cf_manager::init(rocksdb::DB *const rdb, "RocksDB: Dropping column family %s with id %u on RocksDB for temp " "table", cf_name.c_str(), cf_id); - auto status = rdb->DropColumnFamily(cfh_ptr); + const auto status = rdb.DropColumnFamily(cfh_ptr); if (status.ok()) { delete (cfh_ptr); continue; @@ -149,8 +149,7 @@ void Rdb_cf_manager::cleanup() { See Rdb_cf_manager::get_cf */ std::shared_ptr Rdb_cf_manager::get_or_create_cf( - rocksdb::DB *const rdb, const std::string &cf_name) { - assert(rdb != nullptr); + rocksdb::DB &rdb, const std::string &cf_name) { assert(!cf_name.empty()); std::shared_ptr cf_handle; @@ -187,8 +186,7 @@ std::shared_ptr Rdb_cf_manager::get_or_create_cf( opts.target_file_size_base); rocksdb::ColumnFamilyHandle *cf_handle_ptr = nullptr; - const rocksdb::Status s = - rdb->CreateColumnFamily(opts, cf_name, &cf_handle_ptr); + const auto s = rdb.CreateColumnFamily(opts, cf_name, &cf_handle_ptr); if (s.ok()) { assert(cf_handle_ptr != nullptr); @@ -280,8 +278,8 @@ Rdb_cf_manager::get_all_cf(void) const { } int Rdb_cf_manager::remove_dropped_cf(Rdb_dict_manager *const dict_manager, - rocksdb::TransactionDB *const rdb, - const uint32 &cf_id) { + rocksdb::TransactionDB &rdb, + uint32 cf_id) { dict_manager->assert_lock_held(); RDB_MUTEX_LOCK_CHECK(m_mutex); auto batch = Rdb_dict_manager::begin(); @@ -314,7 +312,7 @@ int Rdb_cf_manager::remove_dropped_cf(Rdb_dict_manager *const dict_manager, return HA_EXIT_FAILURE; } - auto status = rdb->DropColumnFamily(cf_handle); + const auto status = rdb.DropColumnFamily(cf_handle); if (!status.ok()) { dict_manager->delete_dropped_cf(batch, cf_id); diff --git a/storage/rocksdb/rdb_cf_manager.h b/storage/rocksdb/rdb_cf_manager.h index b5c4b23bfe9a..e62ac42deeb7 100644 --- a/storage/rocksdb/rdb_cf_manager.h +++ b/storage/rocksdb/rdb_cf_manager.h @@ -75,9 +75,9 @@ class Rdb_cf_manager : public Ensure_initialized { @param handles [IN][OUT]: list of all active cf_handles fetched from rdb transaction. */ - bool init(rocksdb::DB *const rdb, - std::unique_ptr &&cf_options, - std::vector *handles); + [[nodiscard]] bool init(rocksdb::DB &rdb, + std::unique_ptr &&cf_options, + std::vector *handles); void cleanup(); /* @@ -85,7 +85,7 @@ class Rdb_cf_manager : public Ensure_initialized { cf_name requires non-empty string */ std::shared_ptr get_or_create_cf( - rocksdb::DB *const rdb, const std::string &cf_name); + rocksdb::DB &rdb, const std::string &cf_name); /* Used by table open */ std::shared_ptr get_cf( @@ -102,7 +102,7 @@ class Rdb_cf_manager : public Ensure_initialized { void) const; int remove_dropped_cf(Rdb_dict_manager *const dict_manager, - rocksdb::TransactionDB *const rdb, const uint32 &cf_id); + rocksdb::TransactionDB &rdb, uint32 cf_id); /* Used to delete cf by name */ int drop_cf(Rdb_ddl_manager *const ddl_manager, diff --git a/storage/rocksdb/rdb_datadic.cc b/storage/rocksdb/rdb_datadic.cc index c1958e58bab5..7da300b3e3f1 100644 --- a/storage/rocksdb/rdb_datadic.cc +++ b/storage/rocksdb/rdb_datadic.cc @@ -566,7 +566,7 @@ uint Rdb_key_def::setup(const TABLE &tbl, const Rdb_tbl_def &tbl_def, m_stats.m_distinct_keys_per_prefix.resize(get_key_parts()); /* Cache prefix extractor for bloom filter usage later */ - const auto opt = rdb_get_rocksdb_db()->GetOptions(&get_cf()); + const auto opt = rdb_get_rocksdb_db().GetOptions(&get_cf()); m_prefix_extractor = opt.prefix_extractor; uint rtn = setup_vector_index(tbl, tbl_def, cmd_srv_helper); @@ -5491,24 +5491,23 @@ bool Rdb_binlog_manager::unpack_value(const std::string &value_str, return false; } -bool Rdb_dict_manager::init(rocksdb::TransactionDB *const rdb_dict, - Rdb_cf_manager *const cf_manager, - const bool enable_remove_orphaned_dropped_cfs, +bool Rdb_dict_manager::init(rocksdb::TransactionDB &rdb_dict, + Rdb_cf_manager *cf_manager, + bool enable_remove_orphaned_dropped_cfs, const std::string &system_cf_name, const std::string &default_cf_name) { - assert(rdb_dict != nullptr); assert(cf_manager != nullptr); mysql_mutex_init(0, &m_mutex, MY_MUTEX_INIT_FAST); - m_db = rdb_dict; + m_db = &rdb_dict; // It is safe to get raw pointers here since: // 1. System CF and default CF cannot be dropped // 2. cf_manager outlives dict_manager - m_system_cfh = cf_manager->get_or_create_cf(m_db, system_cf_name).get(); + m_system_cfh = cf_manager->get_or_create_cf(*m_db, system_cf_name).get(); rocksdb::ColumnFamilyHandle *default_cfh = - cf_manager->get_or_create_cf(m_db, default_cf_name).get(); + cf_manager->get_or_create_cf(*m_db, default_cf_name).get(); // System CF and default CF should be initialized if (m_system_cfh == nullptr || default_cfh == nullptr) { return HA_EXIT_FAILURE; @@ -6431,9 +6430,9 @@ Rdb_dict_manager_selector::get_dict_manager_selector_const( return &m_user_table_dict_manager; } -bool Rdb_dict_manager_selector::init( - rocksdb::TransactionDB *const rdb_dict, Rdb_cf_manager *const cf_manager, - const bool enable_remove_orphaned_cf_flags) { +bool Rdb_dict_manager_selector::init(rocksdb::TransactionDB &rdb_dict, + Rdb_cf_manager *cf_manager, + bool enable_remove_orphaned_cf_flags) { m_cf_manager = cf_manager; bool ret = m_user_table_dict_manager.init( rdb_dict, cf_manager, enable_remove_orphaned_cf_flags, diff --git a/storage/rocksdb/rdb_datadic.h b/storage/rocksdb/rdb_datadic.h index a67ff500d0a4..a44ad71a45e8 100644 --- a/storage/rocksdb/rdb_datadic.h +++ b/storage/rocksdb/rdb_datadic.h @@ -1687,11 +1687,11 @@ class Rdb_dict_manager : public Ensure_initialized { Rdb_dict_manager &operator=(const Rdb_dict_manager &) = delete; Rdb_dict_manager() = default; - bool init(rocksdb::TransactionDB *const rdb_dict, - Rdb_cf_manager *const cf_manager, - const bool enable_remove_orphaned_cf_flags, - const std::string &system_cf_name, - const std::string &default_cf_name); + [[nodiscard]] bool init(rocksdb::TransactionDB &rdb_dict, + Rdb_cf_manager *cf_manager, + bool enable_remove_orphaned_cf_flags, + const std::string &system_cf_name, + const std::string &default_cf_name); inline void cleanup() { if (!initialized) return; @@ -1983,9 +1983,9 @@ class Rdb_dict_manager_selector { const Rdb_dict_manager *get_dict_manager_selector_const( bool fetch_tmp_dict_manager) const; - bool init(rocksdb::TransactionDB *const rdb_dict, - Rdb_cf_manager *const cf_manager, - const bool enable_remove_orphaned_cf_flags); + [[nodiscard]] bool init(rocksdb::TransactionDB &rdb_dict, + Rdb_cf_manager *cf_manager, + bool enable_remove_orphaned_cf_flags); void cleanup(); }; diff --git a/storage/rocksdb/rdb_i_s.cc b/storage/rocksdb/rdb_i_s.cc index acb947310619..0b6d70de36bc 100644 --- a/storage/rocksdb/rdb_i_s.cc +++ b/storage/rocksdb/rdb_i_s.cc @@ -118,11 +118,7 @@ static int rdb_i_s_cfstats_fill_table( {rocksdb::DB::Properties::kEstimatePendingCompactionBytes, "ESTIMATE_PENDING_COMPACTION_BYTES"}}; - rocksdb::DB *const rdb = rdb_get_rocksdb_db(); - - if (!rdb) { - DBUG_RETURN(ret); - } + auto &rdb = rdb_get_rocksdb_db(); const Rdb_cf_manager &cf_manager = rdb_get_cf_manager(); @@ -137,7 +133,7 @@ static int rdb_i_s_cfstats_fill_table( // It is safe if the CF is removed from cf_manager at // this point. The CF handle object is valid and sufficient here. for (const auto &property : cf_properties) { - if (!rdb->GetIntProperty(cfh.get(), property.first, &val)) { + if (!rdb.GetIntProperty(cfh.get(), property.first, &val)) { continue; } @@ -204,17 +200,13 @@ static int rdb_i_s_dbstats_fill_table( {rocksdb::DB::Properties::kOldestSnapshotTime, "DB_OLDEST_SNAPSHOT_TIME"}}; - rocksdb::DB *const rdb = rdb_get_rocksdb_db(); - - if (!rdb) { - DBUG_RETURN(ret); - } + auto &rdb = rdb_get_rocksdb_db(); const rocksdb::BlockBasedTableOptions &table_options = rdb_get_table_options(); for (const auto &property : db_properties) { - if (!rdb->GetIntProperty(property.first, &val)) { + if (!rdb.GetIntProperty(property.first, &val)) { continue; } @@ -296,12 +288,6 @@ static int rdb_i_s_perf_context_fill_table( Field **field = tables->table->field; assert(field != nullptr); - rocksdb::DB *const rdb = rdb_get_rocksdb_db(); - - if (!rdb) { - DBUG_RETURN(ret); - } - const std::vector tablenames = rdb_get_open_table_names(); for (const auto &it : tablenames) { @@ -392,12 +378,6 @@ static int rdb_i_s_perf_context_global_fill_table( int ret = 0; - rocksdb::DB *const rdb = rdb_get_rocksdb_db(); - - if (!rdb) { - DBUG_RETURN(ret); - } - // Get a copy of the global perf counters. Rdb_perf_counters global_counters; rdb_get_global_perf_counters(&global_counters); @@ -458,12 +438,6 @@ static int rdb_i_s_cfoptions_fill_table( int ret = 0; - rocksdb::DB *const rdb = rdb_get_rocksdb_db(); - - if (!rdb) { - DBUG_RETURN(ret); - } - Rdb_cf_manager &cf_manager = rdb_get_cf_manager(); for (const auto &cf_name : cf_manager.get_cf_names()) { @@ -748,12 +722,6 @@ static int rdb_i_s_global_info_fill_table( int ret = 0; - rocksdb::DB *const rdb = rdb_get_rocksdb_db(); - - if (!rdb) { - DBUG_RETURN(ret); - } - /* binlog info */ Rdb_binlog_manager *const blm = rdb_get_binlog_manager(); assert(blm != nullptr); @@ -871,11 +839,7 @@ static int rdb_i_s_compact_stats_fill_table( DBUG_ENTER_FUNC(); int ret = 0; - rocksdb::DB *rdb = rdb_get_rocksdb_db(); - - if (!rdb) { - DBUG_RETURN(ret); - } + auto &rdb = rdb_get_rocksdb_db(); Rdb_cf_manager &cf_manager = rdb_get_cf_manager(); @@ -891,7 +855,7 @@ static int rdb_i_s_compact_stats_fill_table( // this point. The CF handle object is valid and sufficient here. std::map props; bool bool_ret MY_ATTRIBUTE((__unused__)); - bool_ret = rdb->GetMapProperty(cfh.get(), "rocksdb.cfstats", &props); + bool_ret = rdb.GetMapProperty(cfh.get(), "rocksdb.cfstats", &props); assert(bool_ret); @@ -1223,14 +1187,10 @@ static int rdb_i_s_live_files_metadata_fill_table( DBUG_ENTER_FUNC(); int ret = 0; - rocksdb::DB *rdb = rdb_get_rocksdb_db(); - - if (!rdb) { - DBUG_RETURN(ret); - } + auto &rdb = rdb_get_rocksdb_db(); std::vector metadata; - rdb->GetLiveFilesMetaData(&metadata); + rdb.GetLiveFilesMetaData(&metadata); for (const auto &file : metadata) { Field **field = tables->table->field; @@ -1300,10 +1260,6 @@ static int rdb_i_s_live_files_metadata_fill_table( } } - if (!rdb) { - DBUG_RETURN(ret); - } - DBUG_RETURN(ret); } @@ -1524,14 +1480,8 @@ static int rdb_i_s_ddl_fill_table( assert(tables->table != nullptr); int ret = 0; - rocksdb::DB *const rdb = rdb_get_rocksdb_db(); - - if (!rdb) { - DBUG_RETURN(ret); - } Rdb_ddl_scanner ddl_arg; - ddl_arg.m_thd = thd; ddl_arg.m_table = tables->table; @@ -1778,11 +1728,6 @@ static int rdb_i_s_vector_index_config_fill_table( assert(tables->table != nullptr); int ret = HA_EXIT_SUCCESS; - rocksdb::DB *const rdb = rdb_get_rocksdb_db(); - - if (!rdb) { - DBUG_RETURN(ret); - } Rdb_vector_index_scanner ddl_arg(thd, tables->table); Rdb_ddl_manager *ddl_manager = rdb_get_ddl_manager(); @@ -1892,19 +1837,14 @@ static int rdb_i_s_sst_props_fill_table( assert(field != nullptr); /* Iterate over all the column families */ - rocksdb::DB *const rdb = rdb_get_rocksdb_db(); - - if (!rdb) { - DBUG_RETURN(ret); - } - + auto &rdb = rdb_get_rocksdb_db(); const Rdb_cf_manager &cf_manager = rdb_get_cf_manager(); for (const auto &cf_handle : cf_manager.get_all_cf()) { /* Grab the the properties of all the tables in the column family */ rocksdb::TablePropertiesCollection table_props_collection; const rocksdb::Status s = - rdb->GetPropertiesOfAllTables(cf_handle.get(), &table_props_collection); + rdb.GetPropertiesOfAllTables(cf_handle.get(), &table_props_collection); if (!s.ok()) { continue; @@ -2057,13 +1997,8 @@ static int rdb_i_s_index_file_map_fill_table( assert(field != nullptr); /* Iterate over all the column families */ - rocksdb::DB *const rdb = rdb_get_rocksdb_db(); - - if (!rdb) { - DBUG_RETURN(ret); - } - - const Rdb_cf_manager &cf_manager = rdb_get_cf_manager(); + auto &rdb = rdb_get_rocksdb_db(); + const auto &cf_manager = rdb_get_cf_manager(); for (const auto &cf_handle : cf_manager.get_all_cf()) { /* Grab the the properties of all the tables in the column family */ @@ -2072,7 +2007,7 @@ static int rdb_i_s_index_file_map_fill_table( // It is safe if the CF is removed from cf_manager at // this point. The CF handle object is valid and sufficient here. const rocksdb::Status s = - rdb->GetPropertiesOfAllTables(cf_handle.get(), &table_props_collection); + rdb.GetPropertiesOfAllTables(cf_handle.get(), &table_props_collection); if (!s.ok()) { continue; @@ -2195,15 +2130,11 @@ static int rdb_i_s_lock_info_fill_table( int ret = 0; - rocksdb::TransactionDB *const rdb = rdb_get_rocksdb_db(); - - if (!rdb) { - DBUG_RETURN(ret); - } + auto &rdb = rdb_get_rocksdb_db(); /* cf id -> rocksdb::KeyLockInfo */ std::unordered_multimap lock_info = - rdb->GetLockStatusData(); + rdb.GetLockStatusData(); for (const auto &lock : lock_info) { const uint32_t cf_id = lock.first; @@ -2308,12 +2239,6 @@ static int rdb_i_s_trx_info_fill_table( assert(tables->table->field != nullptr); int ret = 0; - rocksdb::DB *const rdb = rdb_get_rocksdb_db(); - - if (!rdb) { - DBUG_RETURN(ret); - } - const std::vector &all_trx_info = rdb_get_all_trx_info(); for (const auto &info : all_trx_info) { @@ -2427,13 +2352,7 @@ static int rdb_i_s_deadlock_info_fill_table( static const std::string str_shared("SHARED"); int ret = 0; - rocksdb::DB *const rdb = rdb_get_rocksdb_db(); - - if (!rdb) { - DBUG_RETURN(ret); - } - - const std::vector &all_dl_info = rdb_get_deadlock_info(); + const auto &all_dl_info = rdb_get_deadlock_info(); ulonglong id = 0; for (const auto &info : all_dl_info) { diff --git a/storage/rocksdb/rdb_iterator.cc b/storage/rocksdb/rdb_iterator.cc index 0aca21b377df..d710341a7de8 100644 --- a/storage/rocksdb/rdb_iterator.cc +++ b/storage/rocksdb/rdb_iterator.cc @@ -143,12 +143,7 @@ int Rdb_iterator_base::read_after_key(const rocksdb::Slice &key_slice) { void Rdb_iterator_base::release_scan_iterator() { m_scan_it.reset(); - - if (m_scan_it_snapshot) { - auto rdb = rdb_get_rocksdb_db(); - rdb->ReleaseSnapshot(m_scan_it_snapshot); - m_scan_it_snapshot = nullptr; - } + m_scan_it_snapshot.reset(); } void Rdb_iterator_base::setup_scan_iterator( @@ -864,7 +859,7 @@ int Rdb_iterator_partial::materialize_prefix() { } } - s = rdb_get_rocksdb_db()->GetBaseDB()->Write(options, wb.get()); + s = rdb_get_rocksdb_db().GetBaseDB()->Write(options, wb.get()); if (!s.ok()) { rc = rdb_tx_set_status_error(*tx, s, m_kd, m_tbl_def); goto exit; diff --git a/storage/rocksdb/rdb_iterator.h b/storage/rocksdb/rdb_iterator.h index 3729ddbeedb6..4917bf5d2779 100644 --- a/storage/rocksdb/rdb_iterator.h +++ b/storage/rocksdb/rdb_iterator.h @@ -161,7 +161,7 @@ class Rdb_iterator_base : public Rdb_iterator { /* Whether m_scan_it was created with skip_bloom=true */ bool m_scan_it_skips_bloom; - const rocksdb::Snapshot *m_scan_it_snapshot; + rdb_snapshot_unique_ptr m_scan_it_snapshot; /* Buffers used for upper/lower bounds for m_scan_it. */ uchar *m_scan_it_lower_bound; diff --git a/storage/rocksdb/rdb_sst_info.cc b/storage/rocksdb/rdb_sst_info.cc index 3244a62565e4..b8354d4c6049 100644 --- a/storage/rocksdb/rdb_sst_info.cc +++ b/storage/rocksdb/rdb_sst_info.cc @@ -43,7 +43,7 @@ namespace myrocks { // don't assign timestamp to bulk-loaded key. GetRootComparator() can return us // a non-timestamp aware one when UDT-IN-MEM is enabled or disabled. Rdb_sst_file_ordered::Rdb_sst_file::Rdb_sst_file( - rocksdb::DB *db, rocksdb::ColumnFamilyHandle &cf, + rocksdb::DB &db, rocksdb::ColumnFamilyHandle &cf, const rocksdb::DBOptions &db_options, const std::string &name, bool tracing, uint32_t compression_parallel_threads) : m_db(db), @@ -53,9 +53,7 @@ Rdb_sst_file_ordered::Rdb_sst_file::Rdb_sst_file( m_name(name), m_tracing(tracing), m_comparator(cf.GetComparator()->GetRootComparator()), - m_compression_parallel_threads(compression_parallel_threads) { - assert(db != nullptr); -} + m_compression_parallel_threads(compression_parallel_threads) {} rocksdb::Status Rdb_sst_file_ordered::Rdb_sst_file::open() { assert(m_sst_file_writer == nullptr); @@ -189,7 +187,7 @@ Rdb_sst_file_ordered::Rdb_sst_stack::top() { } Rdb_sst_file_ordered::Rdb_sst_file_ordered( - rocksdb::DB *db, rocksdb::ColumnFamilyHandle &cf, + rocksdb::DB &db, rocksdb::ColumnFamilyHandle &cf, const rocksdb::DBOptions &db_options, const std::string &name, bool tracing, size_t max_size, uint32_t compression_parallel_threads) : m_use_stack(false), @@ -297,7 +295,7 @@ rocksdb::Status Rdb_sst_file_ordered::commit() { return m_file.commit(); } -Rdb_sst_info::Rdb_sst_info(rocksdb::DB *db, const std::string &tablename, +Rdb_sst_info::Rdb_sst_info(rocksdb::DB &db, const std::string &tablename, const std::string &indexname, rocksdb::ColumnFamilyHandle &cf, const rocksdb::DBOptions &db_options, bool tracing, @@ -313,7 +311,7 @@ Rdb_sst_info::Rdb_sst_info(rocksdb::DB *db, const std::string &tablename, m_tracing(tracing), m_print_client_error(true), m_compression_parallel_threads(compression_parallel_threads) { - m_prefix = db->GetName() + '/'; + m_prefix = db.GetName() + '/'; std::string normalized_table; if (rdb_normalize_tablename(tablename.c_str(), &normalized_table)) { @@ -521,9 +519,9 @@ void Rdb_sst_info::report_error_msg(const rocksdb::Status &s, } } -void Rdb_sst_info::init(const rocksdb::DB *const db) { - const std::string dir = db->GetName(); - const auto &fs = db->GetEnv()->GetFileSystem(); +void Rdb_sst_info::init(const rocksdb::DB &db) { + const auto &dir = db.GetName(); + const auto &fs = db.GetEnv()->GetFileSystem(); std::vector files_in_dir; // Get the files in the specified directory diff --git a/storage/rocksdb/rdb_sst_info.h b/storage/rocksdb/rdb_sst_info.h index 08b39b84ae95..ab22f1d6e4f0 100644 --- a/storage/rocksdb/rdb_sst_info.h +++ b/storage/rocksdb/rdb_sst_info.h @@ -40,7 +40,7 @@ class Rdb_sst_file_ordered { Rdb_sst_file(Rdb_sst_file &&) = delete; Rdb_sst_file &operator=(Rdb_sst_file &&) = delete; - rocksdb::DB *const m_db; + rocksdb::DB &m_db; rocksdb::ColumnFamilyHandle &m_cf; const rocksdb::DBOptions &m_db_options; std::unique_ptr m_sst_file_writer; @@ -51,7 +51,7 @@ class Rdb_sst_file_ordered { std::string generateKey(const std::string &key); public: - Rdb_sst_file(rocksdb::DB *db, rocksdb::ColumnFamilyHandle &cf, + Rdb_sst_file(rocksdb::DB &db, rocksdb::ColumnFamilyHandle &cf, const rocksdb::DBOptions &db_options, const std::string &name, bool tracing, uint32_t compression_parallel_threads); @@ -95,7 +95,7 @@ class Rdb_sst_file_ordered { rocksdb::Status apply_first(); public: - Rdb_sst_file_ordered(rocksdb::DB *db, rocksdb::ColumnFamilyHandle &cf, + Rdb_sst_file_ordered(rocksdb::DB &db, rocksdb::ColumnFamilyHandle &cf, const rocksdb::DBOptions &db_options, const std::string &name, bool tracing, size_t max_size, uint32_t compression_parallel_threads); @@ -112,7 +112,7 @@ class Rdb_sst_info { Rdb_sst_info(Rdb_sst_info &&) = delete; Rdb_sst_info &operator=(Rdb_sst_info &&) = delete; - rocksdb::DB *const m_db; + rocksdb::DB &m_db; rocksdb::ColumnFamilyHandle &m_cf; const rocksdb::DBOptions &m_db_options; uint64_t m_curr_size; @@ -141,7 +141,7 @@ class Rdb_sst_info { const rocksdb::Status &s); public: - Rdb_sst_info(rocksdb::DB *db, const std::string &tablename, + Rdb_sst_info(rocksdb::DB &db, const std::string &tablename, const std::string &indexname, rocksdb::ColumnFamilyHandle &cf, const rocksdb::DBOptions &db_options, bool tracing, uint32_t compression_parallel_threads); @@ -252,7 +252,7 @@ class Rdb_sst_info { const rocksdb::ColumnFamilyHandle &get_cf() const { return m_cf; } - static void init(const rocksdb::DB *const db); + static void init(const rocksdb::DB &db); static void report_error_msg(const rocksdb::Status &s, const char *sst_file_name); diff --git a/storage/rocksdb/rdb_sst_partitioner_factory.h b/storage/rocksdb/rdb_sst_partitioner_factory.h index 309362c1f859..eca5f0ec6697 100644 --- a/storage/rocksdb/rdb_sst_partitioner_factory.h +++ b/storage/rocksdb/rdb_sst_partitioner_factory.h @@ -264,7 +264,7 @@ class Rdb_bulk_load_index_registry { * not already registered. * returns true when success. */ - [[nodiscard]] bool add_index(rocksdb::TransactionDB *rdb, + [[nodiscard]] bool add_index(rocksdb::TransactionDB &rdb, rocksdb::ColumnFamilyHandle &cf, Index_id index_id) { if (m_partitioner_factories.count(index_id) != 0) { @@ -273,7 +273,7 @@ class Rdb_bulk_load_index_registry { } auto *const sst_partitioner_factory = - rdb->GetOptions(&cf).sst_partitioner_factory.get(); + rdb.GetOptions(&cf).sst_partitioner_factory.get(); auto *const rdb_sst_partitioner_factory = dynamic_cast(sst_partitioner_factory); if (rdb_sst_partitioner_factory == nullptr) { @@ -328,9 +328,9 @@ class Rdb_bulk_load_index_registry { * trigger compaction that covers all indexes registered in * this object */ - rocksdb::Status compact_index_ranges( - rocksdb::TransactionDB *rdb, - const rocksdb::CompactRangeOptions compact_range_options) { + [[nodiscard]] rocksdb::Status compact_index_ranges( + rocksdb::TransactionDB &rdb, + const rocksdb::CompactRangeOptions &compact_range_options) { rocksdb::Status status; for (auto &entry : m_cf_indexes) { auto cf = entry.first; @@ -358,8 +358,8 @@ class Rdb_bulk_load_index_registry { compact_begin_key.ToString(/*hex*/ true).c_str(), compact_end_key.ToString(/*hex*/ true).c_str()); - status = rdb->CompactRange(compact_range_options, cf, &compact_begin_key, - &compact_end_key); + status = rdb.CompactRange(compact_range_options, cf, &compact_begin_key, + &compact_end_key); if (!status.ok()) { break; } From 2041d053be8d75ecb9abd819c864e73c69a711e5 Mon Sep 17 00:00:00 2001 From: Laurynas Biveinis Date: Mon, 6 Jan 2025 18:03:32 +0200 Subject: [PATCH 2/2] Introduce a transaction-lifetime REPEATABLE READ snapshot for RR reads Make MyRocks isolation behavior closer to that of InnoDB: - All data-writing SQL statements, and read statements under READ COMMITTED isolation level take a RocksDB snapshot at the beginning of the statement, and release it at the end. - Data-reading SQL statements under REPEATABLE READ share a RocksDB snapshot which is acquired on a first such statement and released at the end of a transaction. The above causes other user-visible changes: - rocksdb_skip_snapshot_validation system variable becomes a no-op. This variable should be removed at a later time. - SHOW ENGINE ROCKSDB STATUS output is extended with RR read snapshot information. Implementation details: - Introduce new possible value for Rdb_transaction::statement_snapshot_type: snapshot_type::RR, set iff performing a read under RR. - Introduce Rdb_transaction::end_stmt method, to be called at the end of each MyRocks-involving SQL statement in a transaction, and virtual method release_stmt_snapshot with implementation for both regular and WB transactions. - Unify existing code to restore an older RocksDB snapshot if a statement failed, as well as the code to release snapshots at the end of RC statements. --- mysql-test/r/mysqld--help-notwin.result | 5 +- .../include/locking_issues_case1_1.inc | 4 - .../include/locking_issues_case1_2.inc | 4 - .../rocksdb/include/locking_issues_case2.inc | 4 - .../rocksdb/include/locking_issues_case3.inc | 4 - .../rocksdb/include/locking_issues_case4.inc | 4 - .../rocksdb/include/locking_issues_case5.inc | 4 - .../rocksdb/include/locking_issues_case6.inc | 4 - .../rocksdb/include/locking_issues_case7.inc | 4 - mysql-test/suite/rocksdb/r/hermitage.result | 330 --------------- mysql-test/suite/rocksdb/r/issue111.result | 1 - .../r/issue243_transactionStatus.result | 12 +- .../rocksdb/r/level_repeatable_read.result | 8 +- .../suite/rocksdb/r/locking_issues.result | 30 +- .../suite/rocksdb/r/max_row_locks.result | 9 +- .../r/rocksdb_concurrent_delete.result | 290 ------------- .../rocksdb_concurrent_point_update_sk.result | 59 --- .../suite/rocksdb/r/rocksdb_locks.result | 24 -- .../rocksdb/r/rocksdb_timeout_rollback.result | 2 + .../rocksdb/r/rpl_statement_not_found.result | 3 +- mysql-test/suite/rocksdb/r/show_engine.result | 1 + .../suite/rocksdb/r/unique_check.result | 3 +- mysql-test/suite/rocksdb/t/hermitage.inc | 68 +--- mysql-test/suite/rocksdb/t/hermitage.test | 6 - mysql-test/suite/rocksdb/t/issue111.test | 1 - .../rocksdb/t/rocksdb_concurrent_delete.inc | 36 +- .../rocksdb/t/rocksdb_concurrent_delete.test | 10 - .../t/rocksdb_concurrent_delete_range.inc | 36 +- .../t/rocksdb_concurrent_delete_sk.inc | 36 +- .../t/rocksdb_concurrent_point_update_sk.inc | 23 +- .../t/rocksdb_concurrent_point_update_sk.test | 20 +- mysql-test/suite/rocksdb/t/rocksdb_locks.test | 29 +- .../suite/rocksdb/t/rpl_row_not_found.inc | 2 - mysql-test/suite/rocksdb/t/unique_check.test | 1 - storage/rocksdb/ha_rocksdb.cc | 382 ++++++++++++------ storage/rocksdb/ha_rocksdb.h | 19 +- storage/rocksdb/rdb_iterator.cc | 6 +- 37 files changed, 318 insertions(+), 1166 deletions(-) diff --git a/mysql-test/r/mysqld--help-notwin.result b/mysql-test/r/mysqld--help-notwin.result index 726474468699..9630e03ba5b1 100644 --- a/mysql-test/r/mysqld--help-notwin.result +++ b/mysql-test/r/mysqld--help-notwin.result @@ -2584,9 +2584,8 @@ The following options may be given as the first argument: --rocksdb-skip-locks-if-skip-unique-check Skip row locking when unique checks are disabled. --rocksdb-skip-snapshot-validation - Skips snapshot validation on locking reads. This makes - MyRocks Repeatable Read behavior close to InnoDB -- - forcing reading the newest data with locking reads. + Obsolete option name kept for compatibility before + removing --rocksdb-sst-mgr-rate-bytes-per-sec=# DBOptions::sst_file_manager rate_bytes_per_sec for RocksDB diff --git a/mysql-test/suite/rocksdb/include/locking_issues_case1_1.inc b/mysql-test/suite/rocksdb/include/locking_issues_case1_1.inc index 6dc5a78e3a08..7293126b46b5 100644 --- a/mysql-test/suite/rocksdb/include/locking_issues_case1_1.inc +++ b/mysql-test/suite/rocksdb/include/locking_issues_case1_1.inc @@ -17,10 +17,6 @@ --echo - using $isolation_level transaction isolation level --echo ----------------------------------------------------------------------- ---disable_warnings -DROP TABLE IF EXISTS t0; ---enable_warnings - CREATE TABLE t0(id1 INT, id2 INT, value INT, PRIMARY KEY(id1, id2)); INSERT INTO t0 VALUES (1,1,0), (3,3,0), (4,4,0), (6,6,0); diff --git a/mysql-test/suite/rocksdb/include/locking_issues_case1_2.inc b/mysql-test/suite/rocksdb/include/locking_issues_case1_2.inc index 13083bf82d9a..8574160c6017 100644 --- a/mysql-test/suite/rocksdb/include/locking_issues_case1_2.inc +++ b/mysql-test/suite/rocksdb/include/locking_issues_case1_2.inc @@ -17,10 +17,6 @@ --echo - using $isolation_level transaction isolation level --echo ----------------------------------------------------------------------- ---disable_warnings -DROP TABLE IF EXISTS t0; ---enable_warnings - CREATE TABLE t0(id1 INT, id2 INT, value INT, PRIMARY KEY(id1, id2)); INSERT INTO t0 VALUES (1,1,0), (3,3,0), (4,4,0), (6,6,0); diff --git a/mysql-test/suite/rocksdb/include/locking_issues_case2.inc b/mysql-test/suite/rocksdb/include/locking_issues_case2.inc index 61c604dd6d38..d1cb887a407f 100644 --- a/mysql-test/suite/rocksdb/include/locking_issues_case2.inc +++ b/mysql-test/suite/rocksdb/include/locking_issues_case2.inc @@ -18,10 +18,6 @@ --echo - rocksdb_lock_scanned_rows is on --echo ----------------------------------------------------------------------- ---disable_warnings -DROP TABLE IF EXISTS t0; ---enable_warnings - SELECT @@global.rocksdb_lock_scanned_rows; if ($lock_scanned_rows) diff --git a/mysql-test/suite/rocksdb/include/locking_issues_case3.inc b/mysql-test/suite/rocksdb/include/locking_issues_case3.inc index c7c881f7921a..97f462eb2008 100644 --- a/mysql-test/suite/rocksdb/include/locking_issues_case3.inc +++ b/mysql-test/suite/rocksdb/include/locking_issues_case3.inc @@ -15,10 +15,6 @@ --echo - using $isolation_level transaction isolation level --echo ----------------------------------------------------------------------- ---disable_warnings -DROP TABLE IF EXISTS t0; ---enable_warnings - CREATE TABLE t0(id INT AUTO_INCREMENT PRIMARY KEY, value INT); # Insert 200,000 rows, breaking it up into inserts of 1000 rows at a time diff --git a/mysql-test/suite/rocksdb/include/locking_issues_case4.inc b/mysql-test/suite/rocksdb/include/locking_issues_case4.inc index 5ee061816b95..7a3f8ab90bee 100644 --- a/mysql-test/suite/rocksdb/include/locking_issues_case4.inc +++ b/mysql-test/suite/rocksdb/include/locking_issues_case4.inc @@ -15,10 +15,6 @@ --echo - using $isolation_level transaction isolation level --echo ----------------------------------------------------------------------- ---disable_warnings -DROP TABLE IF EXISTS t0; ---enable_warnings - CREATE TABLE t0(id INT AUTO_INCREMENT PRIMARY KEY, value INT); # Insert 200,000 rows, breaking it up into inserts of 1000 rows at a time diff --git a/mysql-test/suite/rocksdb/include/locking_issues_case5.inc b/mysql-test/suite/rocksdb/include/locking_issues_case5.inc index e357b5480ab9..10aac82eb097 100644 --- a/mysql-test/suite/rocksdb/include/locking_issues_case5.inc +++ b/mysql-test/suite/rocksdb/include/locking_issues_case5.inc @@ -15,10 +15,6 @@ --echo - using $isolation_level transaction isolation level --echo ----------------------------------------------------------------------- ---disable_warnings -DROP TABLE IF EXISTS t0; ---enable_warnings - CREATE TABLE t0(id INT AUTO_INCREMENT PRIMARY KEY, value INT); # Insert 200,000 rows, breaking it up into inserts of 1000 rows at a time diff --git a/mysql-test/suite/rocksdb/include/locking_issues_case6.inc b/mysql-test/suite/rocksdb/include/locking_issues_case6.inc index ef4418f447c9..d2767d15bbe5 100644 --- a/mysql-test/suite/rocksdb/include/locking_issues_case6.inc +++ b/mysql-test/suite/rocksdb/include/locking_issues_case6.inc @@ -15,10 +15,6 @@ --echo - using $isolation_level transaction isolation level --echo ----------------------------------------------------------------------- ---disable_warnings -DROP TABLE IF EXISTS t0; ---enable_warnings - CREATE TABLE t0(id INT AUTO_INCREMENT PRIMARY KEY, value INT); # Insert 200,000 rows, breaking it up into inserts of 1000 rows at a time diff --git a/mysql-test/suite/rocksdb/include/locking_issues_case7.inc b/mysql-test/suite/rocksdb/include/locking_issues_case7.inc index d71d398982ec..1ca55dc45f0a 100644 --- a/mysql-test/suite/rocksdb/include/locking_issues_case7.inc +++ b/mysql-test/suite/rocksdb/include/locking_issues_case7.inc @@ -17,10 +17,6 @@ --echo - updated should not be locked unless rocksdb_lock_scanned_rows is on --echo ----------------------------------------------------------------------- ---disable_warnings -DROP TABLE IF EXISTS t1, t2; ---enable_warnings - SELECT @@global.rocksdb_lock_scanned_rows; if ($lock_scanned_rows) diff --git a/mysql-test/suite/rocksdb/r/hermitage.result b/mysql-test/suite/rocksdb/r/hermitage.result index caf395ca6a99..3938fa38b6cb 100644 --- a/mysql-test/suite/rocksdb/r/hermitage.result +++ b/mysql-test/suite/rocksdb/r/hermitage.result @@ -1,13 +1,10 @@ DROP TABLE IF EXISTS test; connect con1,localhost,root,,; SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; -SET SESSION rocksdb_skip_snapshot_validation=0; connect con2,localhost,root,,; SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; -SET SESSION rocksdb_skip_snapshot_validation=0; connect con3,localhost,root,,; SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; -SET SESSION rocksdb_skip_snapshot_validation=0; connection con1; create table test (id int primary key, value int) engine=rocksdb; connection con1; @@ -331,337 +328,10 @@ disconnect con3; DROP TABLE IF EXISTS test; connect con1,localhost,root,,; SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; -SET SESSION rocksdb_skip_snapshot_validation=0; connect con2,localhost,root,,; SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; -SET SESSION rocksdb_skip_snapshot_validation=0; connect con3,localhost,root,,; SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; -SET SESSION rocksdb_skip_snapshot_validation=0; -connection con1; -create table test (id int primary key, value int) engine=rocksdb; -connection con1; -truncate table test; -insert into test (id, value) values (1, 10), (2, 20); -begin; -connection con2; -begin; -connection con3; -begin; -connection con1; -select * from test; -id value -1 10 -2 20 -update test set value = 101 where id = 1; -connection con2; -select * from test; -id value -1 10 -2 20 -connection con1; -rollback; -connection con2; -select * from test; -id value -1 10 -2 20 -commit; -connection con1; -truncate table test; -insert into test (id, value) values (1, 10), (2, 20); -begin; -connection con2; -begin; -connection con3; -begin; -connection con1; -update test set value = 101 where id = 1; -connection con2; -select * from test; -id value -1 10 -2 20 -connection con1; -update test set value = 11 where id = 1; -commit; -connection con2; -select * from test; -id value -1 10 -2 20 -commit; -connection con1; -truncate table test; -insert into test (id, value) values (1, 10), (2, 20); -begin; -connection con2; -begin; -connection con3; -begin; -connection con1; -update test set value = 11 where id = 1; -connection con2; -update test set value = 22 where id = 2; -connection con1; -select * from test where id = 2; -id value -2 20 -connection con2; -select * from test where id = 1; -id value -1 10 -connection con1; -commit; -connection con2; -commit; -connection con1; -truncate table test; -insert into test (id, value) values (1, 10), (2, 20); -begin; -connection con2; -begin; -connection con3; -begin; -connection con1; -update test set value = 11 where id = 1; -update test set value = 19 where id = 2; -connection con2; -update test set value = 12 where id = 1; -connection con1; -commit; -connection con2; -connection con3; -select * from test; -id value -1 11 -2 19 -connection con2; -update test set value = 18 where id = 2; -connection con3; -select * from test; -id value -1 11 -2 19 -connection con2; -commit; -connection con3; -select * from test; -id value -1 11 -2 19 -commit; -connection con1; -truncate table test; -insert into test (id, value) values (1, 10), (2, 20); -begin; -connection con2; -begin; -connection con3; -begin; -connection con1; -select * from test where value = 30; -id value -connection con2; -insert into test (id, value) values(3, 30); -commit; -connection con1; -select * from test where value % 3 = 0; -id value -commit; -connection con1; -truncate table test; -insert into test (id, value) values (1, 10), (2, 20); -begin; -connection con2; -begin; -connection con3; -begin; -connection con1; -update test set value = value + 10; -connection con2; -select variable_value into @a from performance_schema.global_status where variable_name='rocksdb_snapshot_conflict_errors'; -select * from test; -id value -1 10 -2 20 -delete from test where value = 20; -connection con1; -commit; -connection con2; -ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict) -select variable_value-@a from performance_schema.global_status where variable_name='rocksdb_snapshot_conflict_errors'; -variable_value-@a -1 -commit; -connection con1; -truncate table test; -insert into test (id, value) values (1, 10), (2, 20); -begin; -connection con2; -begin; -connection con3; -begin; -connection con1; -select * from test where id = 1; -id value -1 10 -connection con2; -select * from test where id = 1; -id value -1 10 -connection con1; -update test set value = 11 where id = 1; -connection con2; -update test set value = 12 where id = 1; -connection con1; -commit; -connection con2; -ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict) -commit; -connection con1; -truncate table test; -insert into test (id, value) values (1, 10), (2, 20); -begin; -connection con2; -begin; -connection con3; -begin; -connection con1; -select * from test where id = 1; -id value -1 10 -connection con2; -select * from test where id = 1; -id value -1 10 -select * from test where id = 2; -id value -2 20 -update test set value = 12 where id = 1; -update test set value = 18 where id = 2; -commit; -connection con1; -select * from test where id = 2; -id value -2 20 -commit; -connection con1; -truncate table test; -insert into test (id, value) values (1, 10), (2, 20); -begin; -connection con2; -begin; -connection con3; -begin; -connection con1; -select * from test where value % 5 = 0; -id value -1 10 -2 20 -connection con2; -update test set value = 12 where value = 10; -commit; -connection con1; -select * from test where value % 3 = 0; -id value -commit; -connection con1; -truncate table test; -insert into test (id, value) values (1, 10), (2, 20); -begin; -connection con2; -begin; -connection con3; -begin; -connection con1; -select * from test where id = 1; -id value -1 10 -connection con2; -select * from test; -id value -1 10 -2 20 -update test set value = 12 where id = 1; -update test set value = 18 where id = 2; -commit; -connection con1; -delete from test where value = 20; -ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict) -commit; -connection con1; -truncate table test; -insert into test (id, value) values (1, 10), (2, 20); -begin; -connection con2; -begin; -connection con3; -begin; -connection con1; -select * from test where id in (1,2); -id value -1 10 -2 20 -connection con2; -select * from test where id in (1,2); -id value -1 10 -2 20 -connection con1; -update test set value = 11 where id = 1; -connection con2; -update test set value = 21 where id = 2; -connection con1; -commit; -connection con2; -commit; -connection con1; -truncate table test; -insert into test (id, value) values (1, 10), (2, 20); -begin; -connection con2; -begin; -connection con3; -begin; -connection con1; -select * from test where value % 3 = 0; -id value -connection con2; -select * from test where value % 3 = 0; -id value -connection con1; -insert into test (id, value) values(3, 30); -connection con2; -insert into test (id, value) values(4, 42); -connection con1; -commit; -connection con2; -commit; -select * from test where value % 3 = 0; -id value -3 30 -4 42 -connection con1; -select * from test where value % 3 = 0; -id value -3 30 -4 42 -connection default; -drop table test; -disconnect con1; -disconnect con2; -disconnect con3; -DROP TABLE IF EXISTS test; -connect con1,localhost,root,,; -SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; -SET SESSION rocksdb_skip_snapshot_validation=1; -connect con2,localhost,root,,; -SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; -SET SESSION rocksdb_skip_snapshot_validation=1; -connect con3,localhost,root,,; -SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; -SET SESSION rocksdb_skip_snapshot_validation=1; connection con1; create table test (id int primary key, value int) engine=rocksdb; connection con1; diff --git a/mysql-test/suite/rocksdb/r/issue111.result b/mysql-test/suite/rocksdb/r/issue111.result index 6a773f84e83e..6317c733bece 100644 --- a/mysql-test/suite/rocksdb/r/issue111.result +++ b/mysql-test/suite/rocksdb/r/issue111.result @@ -28,5 +28,4 @@ begin; update t1 set col2=123456 where pk=0; commit; update t1 set col2=col2+1 where col1 < 10 limit 5; -ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict) drop table t1, ten, one_k; diff --git a/mysql-test/suite/rocksdb/r/issue243_transactionStatus.result b/mysql-test/suite/rocksdb/r/issue243_transactionStatus.result index c27c171f19aa..5fb56232f4f1 100644 --- a/mysql-test/suite/rocksdb/r/issue243_transactionStatus.result +++ b/mysql-test/suite/rocksdb/r/issue243_transactionStatus.result @@ -63,7 +63,8 @@ TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT SNAPSHOTS --------- LIST OF SNAPSHOTS FOR EACH SESSION: ----SNAPSHOT, ACTIVE NUM sec +---NO ACTIVE SNAPSHOT +---RR SNAPSHOT, ACTIVE NUM sec MySQL thread id TID, OS thread handle PTR, query id QID localhost root ACTION SHOW ENGINE ROCKSDB TRANSACTION STATUS earliest snapshot created NUM sec ago @@ -108,7 +109,8 @@ TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT SNAPSHOTS --------- LIST OF SNAPSHOTS FOR EACH SESSION: ----SNAPSHOT, ACTIVE NUM sec +---NO ACTIVE SNAPSHOT +---NO RR SNAPSHOT MySQL thread id TID, OS thread handle PTR, query id QID localhost root ACTION SHOW ENGINE ROCKSDB TRANSACTION STATUS earliest snapshot created NUM sec ago @@ -166,7 +168,8 @@ TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT SNAPSHOTS --------- LIST OF SNAPSHOTS FOR EACH SESSION: ----SNAPSHOT, ACTIVE NUM sec +---NO ACTIVE SNAPSHOT +---NO RR SNAPSHOT MySQL thread id TID, OS thread handle PTR, query id QID localhost root ACTION SHOW ENGINE ROCKSDB TRANSACTION STATUS earliest snapshot created NUM sec ago @@ -203,7 +206,8 @@ TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT SNAPSHOTS --------- LIST OF SNAPSHOTS FOR EACH SESSION: ----SNAPSHOT, ACTIVE NUM sec +---NO ACTIVE SNAPSHOT +---NO RR SNAPSHOT MySQL thread id TID, OS thread handle PTR, query id QID localhost root ACTION SHOW ENGINE ROCKSDB TRANSACTION STATUS earliest snapshot created NUM sec ago diff --git a/mysql-test/suite/rocksdb/r/level_repeatable_read.result b/mysql-test/suite/rocksdb/r/level_repeatable_read.result index 86d5832610b9..0592b0992385 100644 --- a/mysql-test/suite/rocksdb/r/level_repeatable_read.result +++ b/mysql-test/suite/rocksdb/r/level_repeatable_read.result @@ -38,16 +38,22 @@ a INSERT INTO t1 (a) SELECT a+200 FROM t1; SELECT a FROM t1; a +201 +202 COMMIT; SELECT a FROM t1; a 1 2 +201 +202 connection con2; SELECT a FROM t1; a 1 2 +201 +202 connection default; CREATE TABLE t2 (a INT PRIMARY KEY) ENGINE=rocksdb; INSERT INTO t2 (a) VALUES (1); @@ -93,7 +99,7 @@ INSERT INTO t3 (pk) VALUES(1) ON DUPLICATE KEY UPDATE count=count+1; COMMIT; SELECT count FROM t3; count -0 +1 connection default; disconnect con1; disconnect con2; diff --git a/mysql-test/suite/rocksdb/r/locking_issues.result b/mysql-test/suite/rocksdb/r/locking_issues.result index 6df4d44f72b0..d26cbf038597 100644 --- a/mysql-test/suite/rocksdb/r/locking_issues.result +++ b/mysql-test/suite/rocksdb/r/locking_issues.result @@ -5,7 +5,6 @@ - a WHERE clause - using REPEATABLE READ transaction isolation level ----------------------------------------------------------------------- -DROP TABLE IF EXISTS t0; CREATE TABLE t0(id1 INT, id2 INT, value INT, PRIMARY KEY(id1, id2)); INSERT INTO t0 VALUES (1,1,0), (3,3,0), (4,4,0), (6,6,0); SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; @@ -27,7 +26,6 @@ DROP TABLE t0; - a WHERE clause - using READ COMMITTED transaction isolation level ----------------------------------------------------------------------- -DROP TABLE IF EXISTS t0; CREATE TABLE t0(id1 INT, id2 INT, value INT, PRIMARY KEY(id1, id2)); INSERT INTO t0 VALUES (1,1,0), (3,3,0), (4,4,0), (6,6,0); SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; @@ -49,7 +47,6 @@ DROP TABLE t0; - columns in a WHERE clause - using REPEATABLE READ transaction isolation level ----------------------------------------------------------------------- -DROP TABLE IF EXISTS t0; CREATE TABLE t0(id1 INT, id2 INT, value INT, PRIMARY KEY(id1, id2)); INSERT INTO t0 VALUES (1,1,0), (3,3,0), (4,4,0), (6,6,0); SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; @@ -71,7 +68,6 @@ DROP TABLE t0; - columns in a WHERE clause - using READ COMMITTED transaction isolation level ----------------------------------------------------------------------- -DROP TABLE IF EXISTS t0; CREATE TABLE t0(id1 INT, id2 INT, value INT, PRIMARY KEY(id1, id2)); INSERT INTO t0 VALUES (1,1,0), (3,3,0), (4,4,0), (6,6,0); SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; @@ -93,7 +89,6 @@ DROP TABLE t0; - using REPEATABLE READ transaction isolation level unless - rocksdb_lock_scanned_rows is on ----------------------------------------------------------------------- -DROP TABLE IF EXISTS t0; SELECT @@global.rocksdb_lock_scanned_rows; @@global.rocksdb_lock_scanned_rows 0 @@ -131,7 +126,6 @@ DROP TABLE t0; - using READ COMMITTED transaction isolation level unless - rocksdb_lock_scanned_rows is on ----------------------------------------------------------------------- -DROP TABLE IF EXISTS t0; SELECT @@global.rocksdb_lock_scanned_rows; @@global.rocksdb_lock_scanned_rows 0 @@ -169,7 +163,6 @@ DROP TABLE t0; - using REPEATABLE READ transaction isolation level unless - rocksdb_lock_scanned_rows is on ----------------------------------------------------------------------- -DROP TABLE IF EXISTS t0; SELECT @@global.rocksdb_lock_scanned_rows; @@global.rocksdb_lock_scanned_rows 0 @@ -196,7 +189,6 @@ SET GLOBAL rocksdb_lock_scanned_rows=0; - using READ COMMITTED transaction isolation level unless - rocksdb_lock_scanned_rows is on ----------------------------------------------------------------------- -DROP TABLE IF EXISTS t0; SELECT @@global.rocksdb_lock_scanned_rows; @@global.rocksdb_lock_scanned_rows 0 @@ -222,14 +214,15 @@ SET GLOBAL rocksdb_lock_scanned_rows=0; - After creating a snapshot, other clients updating rows - using REPEATABLE READ transaction isolation level ----------------------------------------------------------------------- -DROP TABLE IF EXISTS t0; CREATE TABLE t0(id INT AUTO_INCREMENT PRIMARY KEY, value INT); Inserting 200,000 rows SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; SELECT * FROM t0 WHERE value > 0 FOR UPDATE; SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; UPDATE t0 SET VALUE=VALUE+1 WHERE id=190000; -ERROR: 1213 +id value +190000 1 +ERROR: 0 DROP TABLE t0; ----------------------------------------------------------------------- @@ -237,7 +230,6 @@ DROP TABLE t0; - After creating a snapshot, other clients updating rows - using READ COMMITTED transaction isolation level ----------------------------------------------------------------------- -DROP TABLE IF EXISTS t0; CREATE TABLE t0(id INT AUTO_INCREMENT PRIMARY KEY, value INT); Inserting 200,000 rows SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; @@ -254,7 +246,6 @@ DROP TABLE t0; - Phantom rows - using REPEATABLE READ transaction isolation level ----------------------------------------------------------------------- -DROP TABLE IF EXISTS t0; CREATE TABLE t0(id INT AUTO_INCREMENT PRIMARY KEY, value INT); Inserting 200,000 rows SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; @@ -269,7 +260,6 @@ DROP TABLE t0; - Phantom rows - using READ COMMITTED transaction isolation level ----------------------------------------------------------------------- -DROP TABLE IF EXISTS t0; CREATE TABLE t0(id INT AUTO_INCREMENT PRIMARY KEY, value INT); Inserting 200,000 rows SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; @@ -284,7 +274,6 @@ DROP TABLE t0; - Deleting primary key - using REPEATABLE READ transaction isolation level ----------------------------------------------------------------------- -DROP TABLE IF EXISTS t0; CREATE TABLE t0(id INT AUTO_INCREMENT PRIMARY KEY, value INT); Inserting 200,000 rows UPDATE t0 SET value=100 WHERE id=190000; @@ -295,7 +284,8 @@ SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; BEGIN; DELETE FROM t0 WHERE id=190000; COMMIT; -ERROR: 1213 +id value +ERROR: 0 COMMIT; DROP TABLE t0; @@ -304,7 +294,6 @@ DROP TABLE t0; - Deleting primary key - using READ COMMITTED transaction isolation level ----------------------------------------------------------------------- -DROP TABLE IF EXISTS t0; CREATE TABLE t0(id INT AUTO_INCREMENT PRIMARY KEY, value INT); Inserting 200,000 rows UPDATE t0 SET value=100 WHERE id=190000; @@ -325,7 +314,6 @@ DROP TABLE t0; - Changing primary key - using REPEATABLE READ transaction isolation level ----------------------------------------------------------------------- -DROP TABLE IF EXISTS t0; CREATE TABLE t0(id INT AUTO_INCREMENT PRIMARY KEY, value INT); Inserting 200,000 rows UPDATE t0 SET value=100 WHERE id=190000; @@ -336,7 +324,8 @@ SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; BEGIN; UPDATE t0 SET id=200001 WHERE id=190000; COMMIT; -ERROR: 1213 +id value +ERROR: 0 COMMIT; DROP TABLE t0; @@ -345,7 +334,6 @@ DROP TABLE t0; - Changing primary key - using READ COMMITTED transaction isolation level ----------------------------------------------------------------------- -DROP TABLE IF EXISTS t0; CREATE TABLE t0(id INT AUTO_INCREMENT PRIMARY KEY, value INT); Inserting 200,000 rows UPDATE t0 SET value=100 WHERE id=190000; @@ -366,7 +354,6 @@ DROP TABLE t0; - Rows that are scanned as part of a query but not in the table being - updated should not be locked unless rocksdb_lock_scanned_rows is on ----------------------------------------------------------------------- -DROP TABLE IF EXISTS t1, t2; SELECT @@global.rocksdb_lock_scanned_rows; @@global.rocksdb_lock_scanned_rows 0 @@ -397,7 +384,6 @@ DROP TABLE t2; - Rows that are scanned as part of a query but not in the table being - updated should not be locked unless rocksdb_lock_scanned_rows is on ----------------------------------------------------------------------- -DROP TABLE IF EXISTS t1, t2; SELECT @@global.rocksdb_lock_scanned_rows; @@global.rocksdb_lock_scanned_rows 0 @@ -428,7 +414,6 @@ DROP TABLE t2; - Rows that are scanned as part of a query but not in the table being - updated should not be locked unless rocksdb_lock_scanned_rows is on ----------------------------------------------------------------------- -DROP TABLE IF EXISTS t1, t2; SELECT @@global.rocksdb_lock_scanned_rows; @@global.rocksdb_lock_scanned_rows 0 @@ -463,7 +448,6 @@ SET GLOBAL rocksdb_lock_scanned_rows=0; - Rows that are scanned as part of a query but not in the table being - updated should not be locked unless rocksdb_lock_scanned_rows is on ----------------------------------------------------------------------- -DROP TABLE IF EXISTS t1, t2; SELECT @@global.rocksdb_lock_scanned_rows; @@global.rocksdb_lock_scanned_rows 0 diff --git a/mysql-test/suite/rocksdb/r/max_row_locks.result b/mysql-test/suite/rocksdb/r/max_row_locks.result index 7b17d28e8582..e37cba61701c 100644 --- a/mysql-test/suite/rocksdb/r/max_row_locks.result +++ b/mysql-test/suite/rocksdb/r/max_row_locks.result @@ -12,7 +12,8 @@ TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT SNAPSHOTS --------- LIST OF SNAPSHOTS FOR EACH SESSION: ----SNAPSHOT, ACTIVE NUM sec +---NO ACTIVE SNAPSHOT +---NO RR SNAPSHOT MySQL thread id TID, OS thread handle PTR, query id QID localhost root ACTION SHOW ENGINE ROCKSDB TRANSACTION STATUS earliest snapshot created NUM sec ago @@ -45,7 +46,8 @@ TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT SNAPSHOTS --------- LIST OF SNAPSHOTS FOR EACH SESSION: ----SNAPSHOT, ACTIVE NUM sec +---NO ACTIVE SNAPSHOT +---NO RR SNAPSHOT MySQL thread id TID, OS thread handle PTR, query id QID localhost root ACTION SHOW ENGINE ROCKSDB TRANSACTION STATUS earliest snapshot created NUM sec ago @@ -71,7 +73,8 @@ TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT SNAPSHOTS --------- LIST OF SNAPSHOTS FOR EACH SESSION: ----SNAPSHOT, ACTIVE NUM sec +---NO ACTIVE SNAPSHOT +---NO RR SNAPSHOT MySQL thread id TID, OS thread handle PTR, query id QID localhost root ACTION SHOW ENGINE ROCKSDB TRANSACTION STATUS earliest snapshot created NUM sec ago diff --git a/mysql-test/suite/rocksdb/r/rocksdb_concurrent_delete.result b/mysql-test/suite/rocksdb/r/rocksdb_concurrent_delete.result index a70941f59532..c4b041a1b27e 100644 --- a/mysql-test/suite/rocksdb/r/rocksdb_concurrent_delete.result +++ b/mysql-test/suite/rocksdb/r/rocksdb_concurrent_delete.result @@ -1,273 +1,9 @@ SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; -SET SESSION rocksdb_skip_snapshot_validation=0; SET debug_sync='RESET'; CREATE TABLE t1 (pk INT PRIMARY KEY COMMENT "", a INT); INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5); --PK first row delete SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; -SET SESSION rocksdb_skip_snapshot_validation=0; -SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; -SELECT * FROM t1 order by t1.pk ASC FOR UPDATE; -SET debug_sync='now WAIT_FOR parked'; -DELETE FROM t1 WHERE pk = 1; -SET debug_sync='now SIGNAL go'; -pk a -2 2 -3 3 -4 4 -5 5 ---PK middle row delete -SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; -SELECT * FROM t1 order by t1.pk ASC FOR UPDATE; -SET debug_sync='now WAIT_FOR parked'; -DELETE FROM t1 WHERE pk = 3; -SET debug_sync='now SIGNAL go'; -ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict) ---PK end row delete -SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; -SELECT * FROM t1 order by t1.pk ASC FOR UPDATE; -SET debug_sync='now WAIT_FOR parked'; -DELETE FROM t1 WHERE pk = 5; -SET debug_sync='now SIGNAL go'; -ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict) -set debug_sync='RESET'; -drop table t1; -SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; -SET SESSION rocksdb_skip_snapshot_validation=0; -SET debug_sync='RESET'; -CREATE TABLE t1 (pk INT PRIMARY KEY, a INT, index a(a)); -INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5); ---SK first row delete -SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; -SET SESSION rocksdb_skip_snapshot_validation=0; -SET debug_sync='rocksdb_concurrent_upd_or_delete_sk SIGNAL parked WAIT_FOR go'; -SELECT a FROM t1 FORCE INDEX(a) FOR UPDATE; -SET debug_sync='now WAIT_FOR parked'; -DELETE FROM t1 WHERE pk = 1; -SET debug_sync='now SIGNAL go'; -a -2 -3 -4 -5 ---SK middle row delete -SET debug_sync='rocksdb_concurrent_upd_or_delete_sk SIGNAL parked WAIT_FOR go'; -SELECT a FROM t1 FORCE INDEX(a) FOR UPDATE; -SET debug_sync='now WAIT_FOR parked'; -DELETE FROM t1 WHERE pk = 3; -SET debug_sync='now SIGNAL go'; -ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict) ---SK end row delete -SET debug_sync='rocksdb_concurrent_upd_or_delete_sk SIGNAL parked WAIT_FOR go'; -SELECT a FROM t1 FORCE INDEX(a) FOR UPDATE; -SET debug_sync='now WAIT_FOR parked'; -DELETE FROM t1 WHERE pk = 5; -SET debug_sync='now SIGNAL go'; -ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict) -set debug_sync='RESET'; -drop table t1; -SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; -SET SESSION rocksdb_skip_snapshot_validation=0; -SET debug_sync='RESET'; -CREATE TABLE t1 (pk INT PRIMARY KEY COMMENT "", a INT); -INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5); ---PK first row delete -SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; -SET SESSION rocksdb_skip_snapshot_validation=0; -SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; -SELECT * FROM t1 order by t1.pk DESC FOR UPDATE; -SET debug_sync='now WAIT_FOR parked'; -DELETE FROM t1 WHERE pk = 5; -SET debug_sync='now SIGNAL go'; -pk a -4 4 -3 3 -2 2 -1 1 ---PK middle row delete -SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; -SELECT * FROM t1 order by t1.pk DESC FOR UPDATE; -SET debug_sync='now WAIT_FOR parked'; -DELETE FROM t1 WHERE pk = 3; -SET debug_sync='now SIGNAL go'; -ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict) ---PK end row delete -SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; -SELECT * FROM t1 order by t1.pk DESC FOR UPDATE; -SET debug_sync='now WAIT_FOR parked'; -DELETE FROM t1 WHERE pk = 1; -SET debug_sync='now SIGNAL go'; -ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict) -set debug_sync='RESET'; -drop table t1; -SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; -SET SESSION rocksdb_skip_snapshot_validation=0; -SET debug_sync='RESET'; -CREATE TABLE t1 (pk INT PRIMARY KEY COMMENT "rev:cf2", a INT); -INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5); ---PK first row delete -SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; -SET SESSION rocksdb_skip_snapshot_validation=0; -SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; -SELECT * FROM t1 order by t1.pk ASC FOR UPDATE; -SET debug_sync='now WAIT_FOR parked'; -DELETE FROM t1 WHERE pk = 1; -SET debug_sync='now SIGNAL go'; -pk a -2 2 -3 3 -4 4 -5 5 ---PK middle row delete -SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; -SELECT * FROM t1 order by t1.pk ASC FOR UPDATE; -SET debug_sync='now WAIT_FOR parked'; -DELETE FROM t1 WHERE pk = 3; -SET debug_sync='now SIGNAL go'; -ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict) ---PK end row delete -SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; -SELECT * FROM t1 order by t1.pk ASC FOR UPDATE; -SET debug_sync='now WAIT_FOR parked'; -DELETE FROM t1 WHERE pk = 5; -SET debug_sync='now SIGNAL go'; -ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict) -set debug_sync='RESET'; -drop table t1; -SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; -SET SESSION rocksdb_skip_snapshot_validation=0; -SET debug_sync='RESET'; -CREATE TABLE t1 (pk INT PRIMARY KEY COMMENT "rev:cf2", a INT); -INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5); ---PK first row delete -SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; -SET SESSION rocksdb_skip_snapshot_validation=0; -SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; -SELECT * FROM t1 order by t1.pk DESC FOR UPDATE; -SET debug_sync='now WAIT_FOR parked'; -DELETE FROM t1 WHERE pk = 5; -SET debug_sync='now SIGNAL go'; -pk a -4 4 -3 3 -2 2 -1 1 ---PK middle row delete -SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; -SELECT * FROM t1 order by t1.pk DESC FOR UPDATE; -SET debug_sync='now WAIT_FOR parked'; -DELETE FROM t1 WHERE pk = 3; -SET debug_sync='now SIGNAL go'; -ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict) ---PK end row delete -SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; -SELECT * FROM t1 order by t1.pk DESC FOR UPDATE; -SET debug_sync='now WAIT_FOR parked'; -DELETE FROM t1 WHERE pk = 1; -SET debug_sync='now SIGNAL go'; -ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict) -set debug_sync='RESET'; -drop table t1; -set debug_sync='RESET'; -SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; -create table t1 (id1 int, id2 int, value int, primary key (id1, id2), index sk (id1, value)) engine=rocksdb; -insert into t1 values (1, 1, 1),(1, 2, 1),(1, 3, 1),(1, 4, 1),(1, 5, 1),(2, 2, 2); ---First row delete with PRIMARY -SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; -SET SESSION rocksdb_skip_snapshot_validation=0; -set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go'; -update t1 force index (PRIMARY) set value=100 where id1=1; -set debug_sync='now WAIT_FOR parked'; -delete from t1 where id1=1 and id2=1; -set debug_sync='now SIGNAL go'; -select * from t1 where id1=1; -id1 id2 value -1 2 100 -1 3 100 -1 4 100 -1 5 100 ---Middle row delete with PRIMARY -SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; -SET SESSION rocksdb_skip_snapshot_validation=0; -set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go'; -update t1 force index (PRIMARY) set value=200 where id1=1; -set debug_sync='now WAIT_FOR parked'; -delete from t1 where id1=1 and id2=3; -set debug_sync='now SIGNAL go'; -ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict) -select * from t1 where id1=1; -id1 id2 value -1 2 100 -1 4 100 -1 5 100 ---End row delete with PRIMARY -SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; -set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go'; -update t1 force index (PRIMARY) set value=300 where id1=1; -set debug_sync='now WAIT_FOR parked'; -delete from t1 where id1=1 and id2=5; -set debug_sync='now SIGNAL go'; -ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict) -select * from t1 where id1=1; -id1 id2 value -1 2 100 -1 4 100 -set debug_sync='RESET'; -drop table t1; -set debug_sync='RESET'; -SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; -create table t1 (id1 int, id2 int, value int, primary key (id1, id2), index sk (id1, value)) engine=rocksdb; -insert into t1 values (1, 1, 1),(1, 2, 1),(1, 3, 1),(1, 4, 1),(1, 5, 1),(2, 2, 2); ---First row delete with sk -SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; -SET SESSION rocksdb_skip_snapshot_validation=0; -set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go'; -update t1 force index (sk) set value=100 where id1=1; -set debug_sync='now WAIT_FOR parked'; -delete from t1 where id1=1 and id2=1; -set debug_sync='now SIGNAL go'; -select * from t1 where id1=1; -id1 id2 value -1 2 100 -1 3 100 -1 4 100 -1 5 100 ---Middle row delete with sk -SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; -SET SESSION rocksdb_skip_snapshot_validation=0; -set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go'; -update t1 force index (sk) set value=200 where id1=1; -set debug_sync='now WAIT_FOR parked'; -delete from t1 where id1=1 and id2=3; -set debug_sync='now SIGNAL go'; -ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict) -select * from t1 where id1=1; -id1 id2 value -1 2 100 -1 4 100 -1 5 100 ---End row delete with sk -SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; -set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go'; -update t1 force index (sk) set value=300 where id1=1; -set debug_sync='now WAIT_FOR parked'; -delete from t1 where id1=1 and id2=5; -set debug_sync='now SIGNAL go'; -ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict) -select * from t1 where id1=1; -id1 id2 value -1 2 100 -1 4 100 -set debug_sync='RESET'; -drop table t1; -SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; -SET SESSION rocksdb_skip_snapshot_validation=1; -SET debug_sync='RESET'; -CREATE TABLE t1 (pk INT PRIMARY KEY COMMENT "", a INT); -INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5); ---PK first row delete -SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; -SET SESSION rocksdb_skip_snapshot_validation=1; SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; SELECT * FROM t1 order by t1.pk ASC FOR UPDATE; SET debug_sync='now WAIT_FOR parked'; @@ -294,13 +30,11 @@ pk a set debug_sync='RESET'; drop table t1; SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; -SET SESSION rocksdb_skip_snapshot_validation=1; SET debug_sync='RESET'; CREATE TABLE t1 (pk INT PRIMARY KEY, a INT, index a(a)); INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5); --SK first row delete SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; -SET SESSION rocksdb_skip_snapshot_validation=1; SET debug_sync='rocksdb_concurrent_upd_or_delete_sk SIGNAL parked WAIT_FOR go'; SELECT a FROM t1 FORCE INDEX(a) FOR UPDATE; SET debug_sync='now WAIT_FOR parked'; @@ -327,13 +61,11 @@ a set debug_sync='RESET'; drop table t1; SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; -SET SESSION rocksdb_skip_snapshot_validation=1; SET debug_sync='RESET'; CREATE TABLE t1 (pk INT PRIMARY KEY COMMENT "", a INT); INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5); --PK first row delete SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; -SET SESSION rocksdb_skip_snapshot_validation=1; SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; SELECT * FROM t1 order by t1.pk DESC FOR UPDATE; SET debug_sync='now WAIT_FOR parked'; @@ -360,13 +92,11 @@ pk a set debug_sync='RESET'; drop table t1; SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; -SET SESSION rocksdb_skip_snapshot_validation=1; SET debug_sync='RESET'; CREATE TABLE t1 (pk INT PRIMARY KEY COMMENT "rev:cf2", a INT); INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5); --PK first row delete SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; -SET SESSION rocksdb_skip_snapshot_validation=1; SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; SELECT * FROM t1 order by t1.pk ASC FOR UPDATE; SET debug_sync='now WAIT_FOR parked'; @@ -393,13 +123,11 @@ pk a set debug_sync='RESET'; drop table t1; SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; -SET SESSION rocksdb_skip_snapshot_validation=1; SET debug_sync='RESET'; CREATE TABLE t1 (pk INT PRIMARY KEY COMMENT "rev:cf2", a INT); INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5); --PK first row delete SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; -SET SESSION rocksdb_skip_snapshot_validation=1; SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; SELECT * FROM t1 order by t1.pk DESC FOR UPDATE; SET debug_sync='now WAIT_FOR parked'; @@ -431,7 +159,6 @@ create table t1 (id1 int, id2 int, value int, primary key (id1, id2), index sk ( insert into t1 values (1, 1, 1),(1, 2, 1),(1, 3, 1),(1, 4, 1),(1, 5, 1),(2, 2, 2); --First row delete with PRIMARY SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; -SET SESSION rocksdb_skip_snapshot_validation=1; set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go'; update t1 force index (PRIMARY) set value=100 where id1=1; set debug_sync='now WAIT_FOR parked'; @@ -445,7 +172,6 @@ id1 id2 value 1 5 1 --Middle row delete with PRIMARY SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; -SET SESSION rocksdb_skip_snapshot_validation=1; set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go'; update t1 force index (PRIMARY) set value=200 where id1=1; set debug_sync='now WAIT_FOR parked'; @@ -475,7 +201,6 @@ create table t1 (id1 int, id2 int, value int, primary key (id1, id2), index sk ( insert into t1 values (1, 1, 1),(1, 2, 1),(1, 3, 1),(1, 4, 1),(1, 5, 1),(2, 2, 2); --First row delete with sk SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; -SET SESSION rocksdb_skip_snapshot_validation=1; set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go'; update t1 force index (sk) set value=100 where id1=1; set debug_sync='now WAIT_FOR parked'; @@ -489,7 +214,6 @@ id1 id2 value 1 5 1 --Middle row delete with sk SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ; -SET SESSION rocksdb_skip_snapshot_validation=1; set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go'; update t1 force index (sk) set value=200 where id1=1; set debug_sync='now WAIT_FOR parked'; @@ -514,13 +238,11 @@ id1 id2 value set debug_sync='RESET'; drop table t1; SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; -SET SESSION rocksdb_skip_snapshot_validation=0; SET debug_sync='RESET'; CREATE TABLE t1 (pk INT PRIMARY KEY COMMENT "", a INT); INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5); --PK first row delete SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; -SET SESSION rocksdb_skip_snapshot_validation=0; SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; SELECT * FROM t1 order by t1.pk ASC FOR UPDATE; SET debug_sync='now WAIT_FOR parked'; @@ -553,13 +275,11 @@ pk a set debug_sync='RESET'; drop table t1; SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; -SET SESSION rocksdb_skip_snapshot_validation=0; SET debug_sync='RESET'; CREATE TABLE t1 (pk INT PRIMARY KEY, a INT, index a(a)); INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5); --SK first row delete SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; -SET SESSION rocksdb_skip_snapshot_validation=0; SET debug_sync='rocksdb_concurrent_upd_or_delete_sk SIGNAL parked WAIT_FOR go'; SELECT a FROM t1 FORCE INDEX(a) FOR UPDATE; SET debug_sync='now WAIT_FOR parked'; @@ -592,13 +312,11 @@ a set debug_sync='RESET'; drop table t1; SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; -SET SESSION rocksdb_skip_snapshot_validation=0; SET debug_sync='RESET'; CREATE TABLE t1 (pk INT PRIMARY KEY COMMENT "", a INT); INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5); --PK first row delete SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; -SET SESSION rocksdb_skip_snapshot_validation=0; SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; SELECT * FROM t1 order by t1.pk DESC FOR UPDATE; SET debug_sync='now WAIT_FOR parked'; @@ -631,13 +349,11 @@ pk a set debug_sync='RESET'; drop table t1; SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; -SET SESSION rocksdb_skip_snapshot_validation=0; SET debug_sync='RESET'; CREATE TABLE t1 (pk INT PRIMARY KEY COMMENT "rev:cf2", a INT); INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5); --PK first row delete SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; -SET SESSION rocksdb_skip_snapshot_validation=0; SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; SELECT * FROM t1 order by t1.pk ASC FOR UPDATE; SET debug_sync='now WAIT_FOR parked'; @@ -670,13 +386,11 @@ pk a set debug_sync='RESET'; drop table t1; SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; -SET SESSION rocksdb_skip_snapshot_validation=0; SET debug_sync='RESET'; CREATE TABLE t1 (pk INT PRIMARY KEY COMMENT "rev:cf2", a INT); INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5); --PK first row delete SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; -SET SESSION rocksdb_skip_snapshot_validation=0; SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; SELECT * FROM t1 order by t1.pk DESC FOR UPDATE; SET debug_sync='now WAIT_FOR parked'; @@ -714,7 +428,6 @@ create table t1 (id1 int, id2 int, value int, primary key (id1, id2), index sk ( insert into t1 values (1, 1, 1),(1, 2, 1),(1, 3, 1),(1, 4, 1),(1, 5, 1),(2, 2, 2); --First row delete with PRIMARY SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; -SET SESSION rocksdb_skip_snapshot_validation=0; set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go'; update t1 force index (PRIMARY) set value=100 where id1=1; set debug_sync='now WAIT_FOR parked'; @@ -728,7 +441,6 @@ id1 id2 value 1 5 100 --Middle row delete with PRIMARY SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; -SET SESSION rocksdb_skip_snapshot_validation=0; set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go'; update t1 force index (PRIMARY) set value=200 where id1=1; set debug_sync='now WAIT_FOR parked'; @@ -758,7 +470,6 @@ create table t1 (id1 int, id2 int, value int, primary key (id1, id2), index sk ( insert into t1 values (1, 1, 1),(1, 2, 1),(1, 3, 1),(1, 4, 1),(1, 5, 1),(2, 2, 2); --First row delete with sk SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; -SET SESSION rocksdb_skip_snapshot_validation=0; set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go'; update t1 force index (sk) set value=100 where id1=1; set debug_sync='now WAIT_FOR parked'; @@ -772,7 +483,6 @@ id1 id2 value 1 5 100 --Middle row delete with sk SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; -SET SESSION rocksdb_skip_snapshot_validation=0; set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go'; update t1 force index (sk) set value=200 where id1=1; set debug_sync='now WAIT_FOR parked'; diff --git a/mysql-test/suite/rocksdb/r/rocksdb_concurrent_point_update_sk.result b/mysql-test/suite/rocksdb/r/rocksdb_concurrent_point_update_sk.result index 6b0757e79437..d2707bd89d3a 100644 --- a/mysql-test/suite/rocksdb/r/rocksdb_concurrent_point_update_sk.result +++ b/mysql-test/suite/rocksdb/r/rocksdb_concurrent_point_update_sk.result @@ -16,7 +16,6 @@ row_key val1 val2 3 14 Charlie 4 14 Delta SET TRANSACTION ISOLATION LEVEL REPEATABLE READ; -SET SESSION rocksdb_skip_snapshot_validation=0; START TRANSACTION; Conn A: Started TRANSACTION A (SELECT .. FOR UPDATE ) set DEBUG_SYNC = "rocksdb_concurrent_upd_or_delete_sk SIGNAL waiting_for_update WAIT_FOR update_done"; @@ -24,62 +23,6 @@ Conn A: activate DEBUG_SYNC point rocksdb_concurrent_upd_or_delete_sk SELECT * from table1 FORCE INDEX(idx_val1) WHERE row_key = 1 AND val1 = 14 AND val2 = 'Alfa' FOR UPDATE; Conn A: Sent SELECT SET TRANSACTION ISOLATION LEVEL REPEATABLE READ; -SET SESSION rocksdb_skip_snapshot_validation=0; -START TRANSACTION; -Conn B: Started TRANSACTION B (Concurrent update) -Conn B: Waiting for Conn A to hit `waiting_for_update` -set DEBUG_SYNC = "now WAIT_FOR waiting_for_update"; -Conn B: Conn A triggered `waiting_for_update` -UPDATE table1 SET val1 = 15 WHERE val1 = 14 AND val2 = 'Alfa'; -SELECT * FROM table1; -row_key val1 val2 -1 15 Alfa -2 14 Bravo -3 14 Charlie -4 14 Delta -COMMIT; -Conn B: COMMIT for update done -set DEBUG_SYNC = "now SIGNAL update_done"; -Conn B: signalled Conn A with event `update_done` -Conn A: reaping SELECT * from table1 FORCE INDEX(idx_val1) WHERE row_key = 1 AND val1 = 14 AND val2 = 'Alfa'; -The SELECT output should see the snapshot conflict error -ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict) -ROLLBACK; -Conn A: Table after -SELECT * FROM table1; -row_key val1 val2 -1 15 Alfa -2 14 Bravo -3 14 Charlie -4 14 Delta -DROP TABLE table1; -Conn A creating table -CREATE TABLE table1 ( -row_key BIGINT UNSIGNED NOT NULL AUTO_INCREMENT, -val1 TINYINT NOT NULL, -val2 VARCHAR(128) NOT NULL, -PRIMARY KEY (row_key), -KEY idx_val1 (val1) -) ENGINE=RocksDB; -INSERT INTO table1 (val1, val2) VALUES (14, 'Alfa'), (14, 'Bravo'), (14, 'Charlie'), (14, 'Delta'); -Conn A: `table1` created with 4 rows -Conn A: Table before -SELECT * FROM table1; -row_key val1 val2 -1 14 Alfa -2 14 Bravo -3 14 Charlie -4 14 Delta -SET TRANSACTION ISOLATION LEVEL REPEATABLE READ; -SET SESSION rocksdb_skip_snapshot_validation=1; -START TRANSACTION; -Conn A: Started TRANSACTION A (SELECT .. FOR UPDATE ) -set DEBUG_SYNC = "rocksdb_concurrent_upd_or_delete_sk SIGNAL waiting_for_update WAIT_FOR update_done"; -Conn A: activate DEBUG_SYNC point rocksdb_concurrent_upd_or_delete_sk -SELECT * from table1 FORCE INDEX(idx_val1) WHERE row_key = 1 AND val1 = 14 AND val2 = 'Alfa' FOR UPDATE; -Conn A: Sent SELECT -SET TRANSACTION ISOLATION LEVEL REPEATABLE READ; -SET SESSION rocksdb_skip_snapshot_validation=1; START TRANSACTION; Conn B: Started TRANSACTION B (Concurrent update) Conn B: Waiting for Conn A to hit `waiting_for_update` @@ -127,7 +70,6 @@ row_key val1 val2 3 14 Charlie 4 14 Delta SET TRANSACTION ISOLATION LEVEL READ COMMITTED; -SET SESSION rocksdb_skip_snapshot_validation=0; START TRANSACTION; Conn A: Started TRANSACTION A (SELECT .. FOR UPDATE ) set DEBUG_SYNC = "rocksdb_concurrent_upd_or_delete_sk SIGNAL waiting_for_update WAIT_FOR update_done"; @@ -135,7 +77,6 @@ Conn A: activate DEBUG_SYNC point rocksdb_concurrent_upd_or_delete_sk SELECT * from table1 FORCE INDEX(idx_val1) WHERE row_key = 1 AND val1 = 14 AND val2 = 'Alfa' FOR UPDATE; Conn A: Sent SELECT SET TRANSACTION ISOLATION LEVEL READ COMMITTED; -SET SESSION rocksdb_skip_snapshot_validation=0; START TRANSACTION; Conn B: Started TRANSACTION B (Concurrent update) Conn B: Waiting for Conn A to hit `waiting_for_update` diff --git a/mysql-test/suite/rocksdb/r/rocksdb_locks.result b/mysql-test/suite/rocksdb/r/rocksdb_locks.result index 744963b534fa..fd1bb2512185 100644 --- a/mysql-test/suite/rocksdb/r/rocksdb_locks.result +++ b/mysql-test/suite/rocksdb/r/rocksdb_locks.result @@ -35,30 +35,6 @@ ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on connection default; rollback; set autocommit=1; -connection con1; -drop table t1; -connection default; -# -# Now, test what happens if another transaction modified the record and committed -# -CREATE TABLE t1 ( -id int primary key, -value int -) engine=rocksdb collate latin1_bin; -insert into t1 values (1,1),(2,2),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8),(9,9),(10,10); -connection con1; -BEGIN; -SELECT * FROM t1 WHERE id=3; -id value -3 3 -connection default; -BEGIN; -UPDATE t1 SET value=30 WHERE id=3; -COMMIT; -connection con1; -SELECT * FROM t1 WHERE id=3 FOR UPDATE; -ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict) -ROLLBACK; disconnect con1; connection default; drop table t1; diff --git a/mysql-test/suite/rocksdb/r/rocksdb_timeout_rollback.result b/mysql-test/suite/rocksdb/r/rocksdb_timeout_rollback.result index 1e253a9974b3..242143e6e520 100644 --- a/mysql-test/suite/rocksdb/r/rocksdb_timeout_rollback.result +++ b/mysql-test/suite/rocksdb/r/rocksdb_timeout_rollback.result @@ -57,6 +57,8 @@ a 6 9 10 +11 +12 commit; select * from t1; a diff --git a/mysql-test/suite/rocksdb/r/rpl_statement_not_found.result b/mysql-test/suite/rocksdb/r/rpl_statement_not_found.result index 8cdfa9107392..df82f7e482c9 100644 --- a/mysql-test/suite/rocksdb/r/rpl_statement_not_found.result +++ b/mysql-test/suite/rocksdb/r/rpl_statement_not_found.result @@ -39,7 +39,6 @@ set global debug= 'd,dbug.rocksdb.get_row_by_rowid'; include/stop_slave.inc include/start_slave.inc update t3 set col1=100 where kp1 between 1 and 4 and mod(kp2,2)=0; -call mtr.add_suppression("Deadlock found when trying to get lock"); set debug_sync= 'now WAIT_FOR Reached'; set global debug = ''; set sql_log_bin=0; @@ -51,6 +50,6 @@ select * from t3 where pk < 5; pk kp1 kp2 col1 0 0 0 0 1 1 1 1 -4 4 4 100 +4 4 4 4 drop table t0, t1, t2, t3; include/rpl_end.inc diff --git a/mysql-test/suite/rocksdb/r/show_engine.result b/mysql-test/suite/rocksdb/r/show_engine.result index aca46996c4c3..1c4d5b72f11b 100644 --- a/mysql-test/suite/rocksdb/r/show_engine.result +++ b/mysql-test/suite/rocksdb/r/show_engine.result @@ -468,6 +468,7 @@ SNAPSHOTS --------- LIST OF SNAPSHOTS FOR EACH SESSION: ---SNAPSHOT, ACTIVE NUM sec +---NO RR SNAPSHOT MySQL thread id TID, OS thread handle PTR, query id QID localhost root ACTION SHOW ENGINE ROCKSDB TRANSACTION STATUS earliest snapshot created NUM sec ago diff --git a/mysql-test/suite/rocksdb/r/unique_check.result b/mysql-test/suite/rocksdb/r/unique_check.result index cfb37c7f71a0..c766406b8078 100644 --- a/mysql-test/suite/rocksdb/r/unique_check.result +++ b/mysql-test/suite/rocksdb/r/unique_check.result @@ -106,7 +106,8 @@ Warnings: Warning 1287 'VALUES function' is deprecated and will be removed in a future release. Please use an alias (INSERT INTO ... VALUES (...) AS alias) and replace VALUES(col) in the ON DUPLICATE KEY UPDATE clause with alias.col instead commit; set debug_sync='now SIGNAL trx_a_cont'; -ERROR 40001: Deadlock found when trying to get lock; try restarting transaction (snapshot conflict) +Warnings: +Warning 1287 'VALUES function' is deprecated and will be removed in a future release. Please use an alias (INSERT INTO ... VALUES (...) AS alias) and replace VALUES(col) in the ON DUPLICATE KEY UPDATE clause with alias.col instead rollback; drop table t1; set debug_sync='RESET'; diff --git a/mysql-test/suite/rocksdb/t/hermitage.inc b/mysql-test/suite/rocksdb/t/hermitage.inc index f7f8ef1f9a8d..82e21fa9ff5c 100644 --- a/mysql-test/suite/rocksdb/t/hermitage.inc +++ b/mysql-test/suite/rocksdb/t/hermitage.inc @@ -12,13 +12,10 @@ DROP TABLE IF EXISTS test; connect (con1,localhost,root,,); eval SET SESSION TRANSACTION ISOLATION LEVEL $trx_isolation; -eval SET SESSION rocksdb_skip_snapshot_validation=$skip_snapshot_validation; connect (con2,localhost,root,,); eval SET SESSION TRANSACTION ISOLATION LEVEL $trx_isolation; -eval SET SESSION rocksdb_skip_snapshot_validation=$skip_snapshot_validation; connect (con3,localhost,root,,); eval SET SESSION TRANSACTION ISOLATION LEVEL $trx_isolation; -eval SET SESSION rocksdb_skip_snapshot_validation=$skip_snapshot_validation; connection con1; @@ -120,26 +117,9 @@ send delete from test where value = 20; connection con1; commit; connection con2; -if ($trx_isolation == "READ COMMITTED") -{ - reap; - # RC: Returns 2 => 30 - select * from test; -} -if ($trx_isolation == "REPEATABLE READ") -{ - if ($skip_snapshot_validation == 0) - { - --error ER_LOCK_DEADLOCK - reap; - select variable_value-@a from performance_schema.global_status where variable_name='rocksdb_snapshot_conflict_errors'; - } - if ($skip_snapshot_validation == 1) - { - reap; - select * from test; - } -} +reap; +# RC: Returns 2 => 30 +select * from test; commit; @@ -157,25 +137,9 @@ send update test set value = 12 where id = 1; connection con1; commit; connection con2; -if ($trx_isolation == "READ COMMITTED") -{ - reap; - # RC: Returns 1 => 12 - select * from test; -} -if ($trx_isolation == "REPEATABLE READ") -{ - if ($skip_snapshot_validation == 0) - { - --error ER_LOCK_DEADLOCK - reap; - } - if ($skip_snapshot_validation == 1) - { - reap; - select * from test; - } -} +reap; +# RC: Returns 1 => 12 +select * from test; commit; @@ -218,24 +182,8 @@ update test set value = 12 where id = 1; update test set value = 18 where id = 2; commit; connection con1; -if ($trx_isolation == "READ COMMITTED") -{ - delete from test where value = 20; # doesn't delete anything - select * from test where id = 2; # shows 2 => 18 -} -if ($trx_isolation == "REPEATABLE READ") -{ - if ($skip_snapshot_validation == 0) - { - --error ER_LOCK_DEADLOCK - delete from test where value = 20; - } - if ($skip_snapshot_validation == 1) - { - delete from test where value = 20; # doesn't delete anything - select * from test where id = 2; # shows 2 => 18 - } -} +delete from test where value = 20; # doesn't delete anything +select * from test where id = 2; # shows 2 => 18 commit; diff --git a/mysql-test/suite/rocksdb/t/hermitage.test b/mysql-test/suite/rocksdb/t/hermitage.test index 35842e9f29b4..e4138e8d89fe 100644 --- a/mysql-test/suite/rocksdb/t/hermitage.test +++ b/mysql-test/suite/rocksdb/t/hermitage.test @@ -4,13 +4,7 @@ # https://github.com/ept/hermitage let $trx_isolation = READ COMMITTED; -let $skip_snapshot_validation = 0; --source hermitage.inc let $trx_isolation = REPEATABLE READ; -let $skip_snapshot_validation = 0; ---source hermitage.inc - -let $trx_isolation = REPEATABLE READ; -let $skip_snapshot_validation = 1; --source hermitage.inc diff --git a/mysql-test/suite/rocksdb/t/issue111.test b/mysql-test/suite/rocksdb/t/issue111.test index a77b4f8895f1..8b464d59a618 100644 --- a/mysql-test/suite/rocksdb/t/issue111.test +++ b/mysql-test/suite/rocksdb/t/issue111.test @@ -35,7 +35,6 @@ commit; connection default; ---error ER_LOCK_DEADLOCK update t1 set col2=col2+1 where col1 < 10 limit 5; disconnect con2; diff --git a/mysql-test/suite/rocksdb/t/rocksdb_concurrent_delete.inc b/mysql-test/suite/rocksdb/t/rocksdb_concurrent_delete.inc index 3e8bb5978ddc..a467efcf68f3 100644 --- a/mysql-test/suite/rocksdb/t/rocksdb_concurrent_delete.inc +++ b/mysql-test/suite/rocksdb/t/rocksdb_concurrent_delete.inc @@ -26,7 +26,6 @@ if ($order == 'DESC') connect (con, localhost, root,,); connection default; eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level; -eval SET SESSION rocksdb_skip_snapshot_validation=$skip_snapshot_validation; SET debug_sync='RESET'; @@ -38,7 +37,6 @@ INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5); --echo --PK first row delete connection con; eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level; -eval SET SESSION rocksdb_skip_snapshot_validation=$skip_snapshot_validation; SET debug_sync='rocksdb_concurrent_delete SIGNAL parked WAIT_FOR go'; send_eval SELECT * FROM t1 order by t1.pk $order FOR UPDATE; @@ -68,22 +66,7 @@ eval DELETE FROM t1 WHERE pk = $middle_row; SET debug_sync='now SIGNAL go'; connection con; -if ($isolation_level == "REPEATABLE READ") -{ - if ($skip_snapshot_validation == 0) - { - --error ER_LOCK_DEADLOCK - reap; - } - if ($skip_snapshot_validation == 1) - { - reap; - } -} -if ($isolation_level == "READ COMMITTED") -{ - reap; -} +reap; # Deleting the end row --echo --PK end row delete @@ -96,22 +79,7 @@ eval DELETE FROM t1 WHERE pk = $end_row; SET debug_sync='now SIGNAL go'; connection con; -if ($isolation_level == "REPEATABLE READ") -{ - if ($skip_snapshot_validation == 0) - { - --error ER_LOCK_DEADLOCK - reap; - } - if ($skip_snapshot_validation == 1) - { - reap; - } -} -if ($isolation_level == "READ COMMITTED") -{ - reap; -} +reap; # Cleanup diff --git a/mysql-test/suite/rocksdb/t/rocksdb_concurrent_delete.test b/mysql-test/suite/rocksdb/t/rocksdb_concurrent_delete.test index cafd2e83668f..cc2084dbd724 100644 --- a/mysql-test/suite/rocksdb/t/rocksdb_concurrent_delete.test +++ b/mysql-test/suite/rocksdb/t/rocksdb_concurrent_delete.test @@ -23,22 +23,12 @@ # read_range_first -> index_read_map_impl -> read_row_from_secondary_key # next row # index_next -> index_next_with_direction -> secondary_index_read -# -# In all cases, RR gets snapshot conflict errors if non-first rows get -# deleted by another transaction after scanning. --source include/have_rocksdb.inc --source include/have_debug_sync.inc let $isolation_level = REPEATABLE READ; -let $skip_snapshot_validation = 0; ---source rocksdb_concurrent_delete_main.inc - -let $isolation_level = REPEATABLE READ; -let $skip_snapshot_validation = 1; --source rocksdb_concurrent_delete_main.inc let $isolation_level = READ COMMITTED; -let $skip_snapshot_validation = 0; --source rocksdb_concurrent_delete_main.inc - diff --git a/mysql-test/suite/rocksdb/t/rocksdb_concurrent_delete_range.inc b/mysql-test/suite/rocksdb/t/rocksdb_concurrent_delete_range.inc index c0b439b88b82..20e308c30e79 100644 --- a/mysql-test/suite/rocksdb/t/rocksdb_concurrent_delete_range.inc +++ b/mysql-test/suite/rocksdb/t/rocksdb_concurrent_delete_range.inc @@ -19,7 +19,6 @@ insert into t1 values (1, 1, 1),(1, 2, 1),(1, 3, 1),(1, 4, 1),(1, 5, 1),(2, 2, 2 --echo --First row delete with $index connection con; eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level; -eval SET SESSION rocksdb_skip_snapshot_validation=$skip_snapshot_validation; set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go'; send_eval update t1 force index ($index) set value=100 where id1=1; @@ -35,7 +34,6 @@ select * from t1 where id1=1; # deleting a middle row --echo --Middle row delete with $index eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level; -eval SET SESSION rocksdb_skip_snapshot_validation=$skip_snapshot_validation; set debug_sync='rocksdb.get_row_by_rowid SIGNAL parked WAIT_FOR go'; send_eval update t1 force index ($index) set value=200 where id1=1; @@ -45,22 +43,7 @@ delete from t1 where id1=1 and id2=3; set debug_sync='now SIGNAL go'; connection con; -if ($isolation_level == "REPEATABLE READ") -{ - if ($skip_snapshot_validation == 0) - { - --error ER_LOCK_DEADLOCK - reap; - } - if ($skip_snapshot_validation == 1) - { - reap; - } -} -if ($isolation_level == "READ COMMITTED") -{ - reap; -} +reap; select * from t1 where id1=1; # deleting the end row @@ -75,22 +58,7 @@ delete from t1 where id1=1 and id2=5; set debug_sync='now SIGNAL go'; connection con; -if ($isolation_level == "REPEATABLE READ") -{ - if ($skip_snapshot_validation == 0) - { - --error ER_LOCK_DEADLOCK - reap; - } - if ($skip_snapshot_validation == 1) - { - reap; - } -} -if ($isolation_level == "READ COMMITTED") -{ - reap; -} +reap; select * from t1 where id1=1; # Cleanup diff --git a/mysql-test/suite/rocksdb/t/rocksdb_concurrent_delete_sk.inc b/mysql-test/suite/rocksdb/t/rocksdb_concurrent_delete_sk.inc index f71b85ab547f..68deaeaa0a9b 100644 --- a/mysql-test/suite/rocksdb/t/rocksdb_concurrent_delete_sk.inc +++ b/mysql-test/suite/rocksdb/t/rocksdb_concurrent_delete_sk.inc @@ -6,7 +6,6 @@ connect (con, localhost, root,,); connection default; eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level; -eval SET SESSION rocksdb_skip_snapshot_validation=$skip_snapshot_validation; SET debug_sync='RESET'; @@ -18,7 +17,6 @@ INSERT INTO t1 VALUES(1,1), (2,2), (3,3), (4,4), (5,5); --echo --SK first row delete connection con; eval SET SESSION TRANSACTION ISOLATION LEVEL $isolation_level; -eval SET SESSION rocksdb_skip_snapshot_validation=$skip_snapshot_validation; SET debug_sync='rocksdb_concurrent_upd_or_delete_sk SIGNAL parked WAIT_FOR go'; send_eval SELECT a FROM t1 FORCE INDEX(a) FOR UPDATE; @@ -45,22 +43,7 @@ eval DELETE FROM t1 WHERE pk = 3; SET debug_sync='now SIGNAL go'; connection con; -if ($isolation_level == "REPEATABLE READ") -{ - if ($skip_snapshot_validation == 0) - { - --error ER_LOCK_DEADLOCK - reap; - } - if ($skip_snapshot_validation == 1) - { - reap; - } -} -if ($isolation_level == "READ COMMITTED") -{ - reap; -} +reap; # Deleting the end row --echo --SK end row delete @@ -73,22 +56,7 @@ eval DELETE FROM t1 WHERE pk = 5; SET debug_sync='now SIGNAL go'; connection con; -if ($isolation_level == "REPEATABLE READ") -{ - if ($skip_snapshot_validation == 0) - { - --error ER_LOCK_DEADLOCK - reap; - } - if ($skip_snapshot_validation == 1) - { - reap; - } -} -if ($isolation_level == "READ COMMITTED") -{ - reap; -} +reap; # Cleanup connection default; diff --git a/mysql-test/suite/rocksdb/t/rocksdb_concurrent_point_update_sk.inc b/mysql-test/suite/rocksdb/t/rocksdb_concurrent_point_update_sk.inc index 99859e69ffe6..3619a21b41d0 100644 --- a/mysql-test/suite/rocksdb/t/rocksdb_concurrent_point_update_sk.inc +++ b/mysql-test/suite/rocksdb/t/rocksdb_concurrent_point_update_sk.inc @@ -27,7 +27,6 @@ SELECT * FROM table1; # update gets frozen. eval SET TRANSACTION ISOLATION LEVEL $isolation_level; -eval SET SESSION rocksdb_skip_snapshot_validation=$skip_snapshot_validation; START TRANSACTION; --echo Conn A: Started TRANSACTION A (SELECT .. FOR UPDATE ) @@ -42,7 +41,6 @@ send SELECT * from table1 FORCE INDEX(idx_val1) WHERE row_key = 1 AND val1 = 14 --connection con0 eval SET TRANSACTION ISOLATION LEVEL $isolation_level; -eval SET SESSION rocksdb_skip_snapshot_validation=$skip_snapshot_validation; START TRANSACTION; --echo Conn B: Started TRANSACTION B (Concurrent update) @@ -64,25 +62,8 @@ set DEBUG_SYNC = "now SIGNAL update_done"; --echo Conn A: reaping SELECT * from table1 FORCE INDEX(idx_val1) WHERE row_key = 1 AND val1 = 14 AND val2 = 'Alfa'; --connection default -if ($isolation_level == "REPEATABLE READ") -{ - if ($skip_snapshot_validation == 0) - { - --echo The SELECT output should see the snapshot conflict error - --error ER_LOCK_DEADLOCK - reap; - } - if ($skip_snapshot_validation == 1) - { - --echo The SELECT output should be empty - reap; - } -} -if ($isolation_level == "READ COMMITTED") -{ - --echo The SELECT output should be empty - reap; -} +--echo The SELECT output should be empty +reap; ROLLBACK; --echo Conn A: Table after diff --git a/mysql-test/suite/rocksdb/t/rocksdb_concurrent_point_update_sk.test b/mysql-test/suite/rocksdb/t/rocksdb_concurrent_point_update_sk.test index 15c2f3ee6ccb..4cc10a21bbac 100644 --- a/mysql-test/suite/rocksdb/t/rocksdb_concurrent_point_update_sk.test +++ b/mysql-test/suite/rocksdb/t/rocksdb_concurrent_point_update_sk.test @@ -5,26 +5,12 @@ # (or locking reads) using SK as the index, when a concurrent transaction # modifies the same row -# The behavior will be different for READ COMMITTED and REPEATABLE READ -# isolation levels. - -# In the case of REPEATABLE READ, the locking read -# that got affected by the concurrent update will hit a snapshot conflict -# and return the error back to the SQL layer. The user will see the error. - -# In the case of READ COMMITTED, the locking read that gets impacted -# by the concurrent update will return KEY_NOT_FOUND back to the SQL layer -# In this case, the user will be returned no row. - -let $isolation_level = REPEATABLE READ; -let $skip_snapshot_validation = 0; ---source rocksdb_concurrent_point_update_sk.inc +# The locking read that gets impacted by the concurrent update will return +# KEY_NOT_FOUND back to the SQL layer In this case, the user will be returned no +# row. let $isolation_level = REPEATABLE READ; -let $skip_snapshot_validation = 1; --source rocksdb_concurrent_point_update_sk.inc let $isolation_level = READ COMMITTED; -let $skip_snapshot_validation = 0; --source rocksdb_concurrent_point_update_sk.inc - diff --git a/mysql-test/suite/rocksdb/t/rocksdb_locks.test b/mysql-test/suite/rocksdb/t/rocksdb_locks.test index 1c98e9a4cb3a..409a2d83629e 100644 --- a/mysql-test/suite/rocksdb/t/rocksdb_locks.test +++ b/mysql-test/suite/rocksdb/t/rocksdb_locks.test @@ -64,36 +64,9 @@ select * from t1 where pk=1 for update; rollback; set autocommit=1; ---connection con1 -drop table t1; ---connection default - ---echo # ---echo # Now, test what happens if another transaction modified the record and committed ---echo # - -CREATE TABLE t1 ( - id int primary key, - value int -) engine=rocksdb collate latin1_bin; -insert into t1 values (1,1),(2,2),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8),(9,9),(10,10); - ---connection con1 -BEGIN; -SELECT * FROM t1 WHERE id=3; - ---connection default -BEGIN; -UPDATE t1 SET value=30 WHERE id=3; -COMMIT; - ---connection con1 ---error ER_LOCK_DEADLOCK -SELECT * FROM t1 WHERE id=3 FOR UPDATE; - -ROLLBACK; --disconnect con1 --connection default + drop table t1; --source include/wait_until_count_sessions.inc diff --git a/mysql-test/suite/rocksdb/t/rpl_row_not_found.inc b/mysql-test/suite/rocksdb/t/rpl_row_not_found.inc index 4342fb3e1830..bb868f799803 100644 --- a/mysql-test/suite/rocksdb/t/rpl_row_not_found.inc +++ b/mysql-test/suite/rocksdb/t/rpl_row_not_found.inc @@ -71,7 +71,6 @@ connection master; update t3 set col1=100 where kp1 between 1 and 4 and mod(kp2,2)=0; connection slave; -call mtr.add_suppression("Deadlock found when trying to get lock"); set debug_sync= 'now WAIT_FOR Reached'; eval set global debug = '$old_debug'; set sql_log_bin=0; @@ -82,7 +81,6 @@ set debug_sync= 'now SIGNAL signal.rocksdb.get_row_by_rowid_let_running'; connection master; --source include/sync_slave_sql_with_master.inc connection slave; -# col1 for pk=4 should be 100 select * from t3 where pk < 5; # Cleanup diff --git a/mysql-test/suite/rocksdb/t/unique_check.test b/mysql-test/suite/rocksdb/t/unique_check.test index 55830306ff4d..07d0c400e4cc 100644 --- a/mysql-test/suite/rocksdb/t/unique_check.test +++ b/mysql-test/suite/rocksdb/t/unique_check.test @@ -206,7 +206,6 @@ set debug_sync='now SIGNAL trx_a_cont'; connection default; ---error ER_LOCK_DEADLOCK reap; disconnect con1; diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc index a14209989b44..5ec07c3caf0d 100644 --- a/storage/rocksdb/ha_rocksdb.cc +++ b/storage/rocksdb/ha_rocksdb.cc @@ -1502,10 +1502,8 @@ static MYSQL_THDVAR_BOOL( static MYSQL_THDVAR_BOOL( skip_snapshot_validation, PLUGIN_VAR_OPCMDARG, - "Skips snapshot validation on locking reads. This makes MyRocks " - "Repeatable Read behavior close to InnoDB -- forcing reading the " - "newest data with locking reads.", - nullptr, nullptr, false); + "Obsolete option name kept for compatibility before removing", nullptr, + nullptr, false); static const char *const DEFAULT_READ_FREE_RPL_TABLES = ".*"; @@ -3699,9 +3697,14 @@ class Rdb_transaction { enum class snapshot_type { NONE, // A snapshot created through Transaction API for regular transactions and - // by DB::GetSnapshot() for WB ones + // by DB::GetSnapshot() for WB ones. Trumps RR requests for the same + // statement. CURRENT, CURRENT_DELAYED, + // A snapshot with the lifetime of whole transaction, used for reads under + // REPEATABLE READ isolation level. If the same statement requests both this + // and CURRENT or CURRENT_DELAYED, the latter is used. + RR, // This is used by transactions started with "START TRANSACTION WITH // CONSISTENT [ROCKSDB] SNAPSHOT". The snapshot has to be created via // DB::GetSnapshot(), not via Transaction API. @@ -3725,11 +3728,18 @@ class Rdb_transaction { assert(m_explicit_snapshot == nullptr); assert(m_read_opts[USER_TABLE].snapshot != nullptr); break; + case snapshot_type::RR: + assert(m_explicit_snapshot == nullptr); + assert(m_rr_snapshot != nullptr); + assert(m_read_opts[USER_TABLE].snapshot == m_rr_snapshot.get()); + break; case snapshot_type::READ_ONLY_TRX: assert(m_explicit_snapshot == nullptr); + assert(m_rr_snapshot == nullptr); break; case snapshot_type::EXPLICIT: assert(m_explicit_snapshot != nullptr); + assert(m_rr_snapshot == nullptr); assert(m_read_opts[USER_TABLE].snapshot == m_explicit_snapshot->get_snapshot()); break; @@ -3751,6 +3761,8 @@ class Rdb_transaction { rocksdb::ReadOptions m_read_opts[2]; + rdb_snapshot_unique_ptr m_rr_snapshot; + std::shared_ptr m_explicit_snapshot; // This should be used only when updating binlog information. @@ -3771,6 +3783,7 @@ class Rdb_transaction { assert(statement_snapshot_type == snapshot_type::NONE || statement_snapshot_type == snapshot_type::CURRENT || statement_snapshot_type == snapshot_type::CURRENT_DELAYED || + statement_snapshot_type == snapshot_type::RR || statement_snapshot_type == snapshot_type::EXPLICIT); // Iterate through the merge map merging all keys into data dictionary. @@ -3984,6 +3997,19 @@ class Rdb_transaction { return result; } + [[nodiscard]] std::int64_t get_rr_snapshot_ts() const { + assert_snapshot_invariants(); + + const auto result = + m_rr_snapshot != nullptr ? m_rr_snapshot->GetUnixTime() : 0; + + assert(result >= 0); + assert(m_earliest_snapshot_ts >= 0); + assert(m_earliest_snapshot_ts <= result || result == 0); + + return result; + } + virtual void set_lock_timeout(int timeout_sec_arg, TABLE_TYPE table_type) = 0; ulonglong get_write_count( @@ -4132,6 +4158,10 @@ class Rdb_transaction { case snapshot_type::CURRENT_DELAYED: assert(m_read_opts[USER_TABLE].snapshot == nullptr); break; + case snapshot_type::RR: + assert(m_read_opts[USER_TABLE].snapshot == nullptr || + m_read_opts[USER_TABLE].snapshot == m_rr_snapshot.get()); + break; case snapshot_type::EXPLICIT: assert(snapshot == m_explicit_snapshot->get_snapshot()); break; @@ -4159,6 +4189,10 @@ class Rdb_transaction { virtual void acquire_snapshot(bool acquire_now, TABLE_TYPE table_type) = 0; virtual void release_snapshot(TABLE_TYPE table_type) = 0; + void end_stmt() { release_stmt_snapshot(); } + + virtual void release_stmt_snapshot() = 0; + virtual rocksdb::Status set_tx_read_timestamp(TABLE_TYPE table_type, uint64_t override_ts = 0) = 0; virtual rocksdb::Slice *get_tx_read_timestamp_slice() = 0; @@ -4200,10 +4234,9 @@ class Rdb_transaction { return statement_snapshot_type == snapshot_type::EXPLICIT; } - [[nodiscard]] bool has_explicit_or_read_only_snapshot() const noexcept { + [[nodiscard]] bool has_read_only_snapshot() const noexcept { assert_snapshot_invariants(); - return statement_snapshot_type == snapshot_type::EXPLICIT || - statement_snapshot_type == snapshot_type::READ_ONLY_TRX; + return statement_snapshot_type == snapshot_type::READ_ONLY_TRX; } [[nodiscard]] snapshot_info_st clone_explicit_snapshot_info() const noexcept { @@ -4957,12 +4990,10 @@ class Rdb_transaction { rocksdb::ColumnFamilyHandle &column_family, const rocksdb::Slice &key, rocksdb::PinnableSlice *const value, TABLE_TYPE table_type) = 0; - virtual rocksdb::Status get_for_update(const Rdb_key_def &key_descr, - const rocksdb::Slice &key, - rocksdb::PinnableSlice *const value, - TABLE_TYPE table_type, bool exclusive, - const bool do_validate, - bool no_wait) = 0; + [[nodiscard]] virtual rocksdb::Status get_for_update( + const Rdb_key_def &key_descr, const rocksdb::Slice &key, + rocksdb::PinnableSlice *value, TABLE_TYPE table_type, bool exclusive, + bool no_wait) = 0; [[nodiscard]] virtual std::unique_ptr get_iterator( const rocksdb::ReadOptions &options, @@ -5031,7 +5062,7 @@ class Rdb_transaction { virtual bool is_tx_started(TABLE_TYPE table_type) const = 0; virtual void start_tx(TABLE_TYPE table_type) = 0; - virtual void start_stmt() = 0; + virtual void start_stmt(bool) = 0; virtual bool set_name() = 0; private: @@ -5134,13 +5165,13 @@ class Rdb_transaction { successfully and its changes become part of the transaction's changes. */ int make_stmt_savepoint_permanent() { + assert(statement_snapshot_type == snapshot_type::NONE || + statement_snapshot_type == snapshot_type::EXPLICIT || + statement_snapshot_type == snapshot_type::READ_ONLY_TRX); // Take another RocksDB savepoint only if we had changes since the last // one. This is very important for long transactions doing lots of // SELECTs. if (m_writes_at_last_savepoint != m_write_count[USER_TABLE]) { - assert(statement_snapshot_type == snapshot_type::CURRENT || - statement_snapshot_type == snapshot_type::CURRENT_DELAYED || - statement_snapshot_type == snapshot_type::EXPLICIT); assert(!is_ac_nl_ro_rc_transaction()); rocksdb::Status status = rocksdb::Status::NotFound(); @@ -5162,9 +5193,9 @@ class Rdb_transaction { Rollback to the savepoint we've set before the last statement */ void rollback_to_stmt_savepoint() { + assert(statement_snapshot_type == snapshot_type::NONE); + if (m_writes_at_last_savepoint != m_write_count[USER_TABLE]) { - assert(statement_snapshot_type == snapshot_type::CURRENT || - statement_snapshot_type == snapshot_type::EXPLICIT); assert(!is_ac_nl_ro_rc_transaction()); do_rollback_to_savepoint(); @@ -5190,11 +5221,11 @@ class Rdb_transaction { return true; } - int rollback_to_savepoint(void *const savepoint MY_ATTRIBUTE((__unused__))) { - if (has_modifications()) { - assert(statement_snapshot_type == snapshot_type::CURRENT || - statement_snapshot_type == snapshot_type::EXPLICIT); + int rollback_to_savepoint(void *) { + assert(statement_snapshot_type == snapshot_type::NONE || + statement_snapshot_type == snapshot_type::READ_ONLY_TRX); + if (has_modifications()) { my_error(ER_ROLLBACK_TO_SAVEPOINT, MYF(0)); m_rollback_only = true; return HA_EXIT_FAILURE; @@ -5292,6 +5323,9 @@ class Rdb_transaction_impl : public Rdb_transaction { void release_lock(const Rdb_key_def &key_descr, const std::string &rowkey, bool force) override { + assert(statement_snapshot_type == snapshot_type::CURRENT || + statement_snapshot_type == snapshot_type::RR || + statement_snapshot_type == snapshot_type::EXPLICIT); assert(!is_ac_nl_ro_rc_transaction()); if (!THDVAR(m_thd, lock_scanned_rows) || force) { @@ -5381,6 +5415,14 @@ class Rdb_transaction_impl : public Rdb_transaction { bool commit_no_binlog(TABLE_TYPE table_type) override { assert(!is_ac_nl_ro_rc_transaction()); + // Called either during tranaction commit after all snapshots have been + // released, either in a middle of a statement to flush a batch, with + // (CURRENT) or without (CURRENT_DELAYED) a snapshot acquired, either with + // an RR snapshot (e.g. when spilling a heap temp table to disk) + assert(statement_snapshot_type == snapshot_type::NONE || + statement_snapshot_type == snapshot_type::CURRENT || + statement_snapshot_type == snapshot_type::CURRENT_DELAYED || + statement_snapshot_type == snapshot_type::RR); bool res = false; rocksdb::Status s; @@ -5468,6 +5510,10 @@ class Rdb_transaction_impl : public Rdb_transaction { public: void rollback() override { + assert(statement_snapshot_type == snapshot_type::NONE || + statement_snapshot_type == snapshot_type::EXPLICIT || + statement_snapshot_type == snapshot_type::READ_ONLY_TRX); + if (m_rocksdb_tx[TABLE_TYPE::USER_TABLE]) { release_snapshot(TABLE_TYPE::USER_TABLE); // Record the writebatch size before it is rolled back. The size changes @@ -5498,8 +5544,7 @@ class Rdb_transaction_impl : public Rdb_transaction { } void acquire_snapshot_on_next_op() { - assert(statement_snapshot_type == snapshot_type::NONE || - statement_snapshot_type == snapshot_type::CURRENT); + assert(statement_snapshot_type == snapshot_type::NONE); m_rocksdb_tx[TABLE_TYPE::USER_TABLE]->SetSnapshotOnNextOperation( m_notifier); @@ -5531,6 +5576,11 @@ class Rdb_transaction_impl : public Rdb_transaction { else acquire_snapshot_on_next_op(); break; + case Rdb_transaction::snapshot_type::RR: + assert(thd_ss == nullptr); + if (m_rr_snapshot == nullptr) m_rr_snapshot = get_rdb_snapshot(); + assign_snapshot(m_rr_snapshot.get()); + break; case Rdb_transaction::snapshot_type::READ_ONLY_TRX: assign_snapshot(rdb_get_rocksdb_db().GetSnapshot()); break; @@ -5594,6 +5644,27 @@ class Rdb_transaction_impl : public Rdb_transaction { return &m_udt_read_timestamp_slice; } + void release_stmt_snapshot() override { + assert_snapshot_invariants(); + switch (statement_snapshot_type) { + case Rdb_transaction::snapshot_type::NONE: + case Rdb_transaction::snapshot_type::READ_ONLY_TRX: + case Rdb_transaction::snapshot_type::EXPLICIT: + break; + case Rdb_transaction::snapshot_type::CURRENT: + case Rdb_transaction::snapshot_type::CURRENT_DELAYED: + if (m_rocksdb_tx[TABLE_TYPE::USER_TABLE] != nullptr) { + m_rocksdb_tx[TABLE_TYPE::USER_TABLE]->ClearSnapshot(); + } + [[fallthrough]]; + case Rdb_transaction::snapshot_type::RR: + m_read_opts[TABLE_TYPE::USER_TABLE].snapshot = nullptr; + statement_snapshot_type = snapshot_type::NONE; + break; + } + assert_snapshot_invariants(); + } + void release_snapshot(TABLE_TYPE table_type) override { assert_snapshot_invariants(); @@ -5601,18 +5672,12 @@ class Rdb_transaction_impl : public Rdb_transaction { return; } + release_stmt_snapshot(); + m_rr_snapshot.reset(); + switch (statement_snapshot_type) { case Rdb_transaction::snapshot_type::NONE: break; - case Rdb_transaction::snapshot_type::CURRENT: - case Rdb_transaction::snapshot_type::CURRENT_DELAYED: { - if (m_rocksdb_tx[table_type] != nullptr) { - m_rocksdb_tx[table_type]->ClearSnapshot(); - } - m_read_opts[table_type].snapshot = nullptr; - statement_snapshot_type = snapshot_type::NONE; - break; - } case Rdb_transaction::snapshot_type::READ_ONLY_TRX: rdb_get_rocksdb_db().ReleaseSnapshot(m_read_opts[table_type].snapshot); m_read_opts[table_type].snapshot = nullptr; @@ -5623,6 +5688,11 @@ class Rdb_transaction_impl : public Rdb_transaction { m_read_opts[table_type].snapshot = nullptr; statement_snapshot_type = snapshot_type::NONE; break; + case Rdb_transaction::snapshot_type::CURRENT: + case Rdb_transaction::snapshot_type::CURRENT_DELAYED: + case Rdb_transaction::snapshot_type::RR: + assert(false); + __builtin_unreachable(); } assert_snapshot_invariants(); @@ -5639,6 +5709,10 @@ class Rdb_transaction_impl : public Rdb_transaction { TABLE_TYPE table_type, bool assume_tracked) override { assert(!is_ac_nl_ro_rc_transaction()); + assert(statement_snapshot_type == snapshot_type::CURRENT || + statement_snapshot_type == snapshot_type::CURRENT_DELAYED || + statement_snapshot_type == snapshot_type::EXPLICIT || + table_type != TABLE_TYPE::USER_TABLE); ++m_write_count[table_type]; return m_rocksdb_tx[table_type]->Put(&column_family, key, value, @@ -5649,6 +5723,9 @@ class Rdb_transaction_impl : public Rdb_transaction { rocksdb::ColumnFamilyHandle &column_family, const rocksdb::Slice &key, TABLE_TYPE table_type, bool assume_tracked) override { assert(!is_ac_nl_ro_rc_transaction()); + assert(statement_snapshot_type == snapshot_type::CURRENT || + statement_snapshot_type == snapshot_type::CURRENT_DELAYED || + statement_snapshot_type == snapshot_type::EXPLICIT); ++m_write_count[table_type]; return m_rocksdb_tx[table_type]->Delete(&column_family, key, @@ -5659,6 +5736,9 @@ class Rdb_transaction_impl : public Rdb_transaction { rocksdb::ColumnFamilyHandle &column_family, const rocksdb::Slice &key, TABLE_TYPE table_type, bool assume_tracked) override { assert(!is_ac_nl_ro_rc_transaction()); + assert(statement_snapshot_type == snapshot_type::CURRENT || + statement_snapshot_type == snapshot_type::CURRENT_DELAYED || + statement_snapshot_type == snapshot_type::EXPLICIT); ++m_write_count[table_type]; return m_rocksdb_tx[table_type]->SingleDelete(&column_family, key, @@ -5677,6 +5757,10 @@ class Rdb_transaction_impl : public Rdb_transaction { } [[nodiscard]] rocksdb::WriteBatchBase &get_write_batch() override { + // Called during commit, either after all snapshots released, either on an + // autocommit statement, with its snapshot still open + assert(statement_snapshot_type == snapshot_type::NONE); + if (is_two_phase()) { return *m_rocksdb_tx[TABLE_TYPE::USER_TABLE]->GetCommitTimeWriteBatch(); } @@ -5692,6 +5776,13 @@ class Rdb_transaction_impl : public Rdb_transaction { [[nodiscard]] rocksdb::WriteBatchBase &get_indexed_write_batch( TABLE_TYPE table_type) override { assert(!is_ac_nl_ro_rc_transaction()); + // Can be called without a snapshot, e.g. in a middle of ALTER TABLE data + // copy after an intermediate commit or with an RR snapshot (e.g. when + // spilling a heap temp table to disk) + assert(statement_snapshot_type == snapshot_type::NONE || + statement_snapshot_type == snapshot_type::CURRENT || + statement_snapshot_type == snapshot_type::CURRENT_DELAYED || + statement_snapshot_type == snapshot_type::RR); ++m_write_count[table_type]; return *m_rocksdb_tx[table_type]->GetWriteBatch(); @@ -5766,12 +5857,12 @@ class Rdb_transaction_impl : public Rdb_transaction { } } - rocksdb::Status get_for_update(const Rdb_key_def &key_descr, - const rocksdb::Slice &key, - rocksdb::PinnableSlice *const value, - TABLE_TYPE table_type, bool exclusive, - const bool do_validate, - bool no_wait) override { + [[nodiscard]] rocksdb::Status get_for_update(const Rdb_key_def &key_descr, + const rocksdb::Slice &key, + rocksdb::PinnableSlice *value, + TABLE_TYPE table_type, + bool exclusive, + bool no_wait) override { assert(!is_ac_nl_ro_rc_transaction()); if (table_type == INTRINSIC_TMP) { @@ -5819,15 +5910,16 @@ class Rdb_transaction_impl : public Rdb_transaction { rocksdb::Status s; // If snapshot is null, pass it to GetForUpdate and snapshot is - // initialized there. Snapshot validation is skipped in that case. - if (m_read_opts[table_type].snapshot == nullptr || do_validate) { - s = m_rocksdb_tx[table_type]->GetForUpdate( - m_read_opts[table_type], &column_family, key, value, exclusive, - m_read_opts[table_type].snapshot ? do_validate : false); + // initialized there. + if (m_read_opts[table_type].snapshot == nullptr) { + assert(statement_snapshot_type == snapshot_type::CURRENT_DELAYED); + s = m_rocksdb_tx[table_type]->GetForUpdate(m_read_opts[table_type], + &column_family, key, value, + exclusive, false); } else { - assert(statement_snapshot_type == snapshot_type::CURRENT); - // If snapshot is set, and if skipping validation, - // call GetForUpdate without validation and set back old snapshot + assert(statement_snapshot_type == snapshot_type::CURRENT || + statement_snapshot_type == snapshot_type::RR); + // If snapshot is set, call GetForUpdate and set back old snapshot auto saved_snapshot = m_read_opts[table_type].snapshot; m_read_opts[table_type].snapshot = nullptr; s = m_rocksdb_tx[table_type]->GetForUpdate(m_read_opts[table_type], @@ -5928,13 +6020,19 @@ class Rdb_transaction_impl : public Rdb_transaction { /* Implementations of do_*savepoint based on rocksdB::Transaction savepoints */ void do_set_savepoint() override { + assert(statement_snapshot_type == snapshot_type::NONE || + statement_snapshot_type == snapshot_type::CURRENT); m_rocksdb_tx[TABLE_TYPE::USER_TABLE]->SetSavePoint(); } + rocksdb::Status do_pop_savepoint() override { + assert(statement_snapshot_type == snapshot_type::NONE); return m_rocksdb_tx[TABLE_TYPE::USER_TABLE]->PopSavePoint(); } void do_rollback_to_savepoint() override { + assert(statement_snapshot_type == snapshot_type::NONE); + m_rocksdb_tx[TABLE_TYPE::USER_TABLE]->RollbackToSavePoint(); } @@ -5947,7 +6045,17 @@ class Rdb_transaction_impl : public Rdb_transaction { For hooking to start of statement that is its own transaction, see ha_rocksdb::external_lock(). */ - void start_stmt() override { + void start_stmt(bool use_rr_read_snapshot) override { + // May be called multiple times for a single SQL statement under RR, + // involving both read and written tables. Writes do not allow using the RR + // read snapshot. + if (use_rr_read_snapshot && + statement_snapshot_type == snapshot_type::NONE) { + statement_snapshot_type = snapshot_type::RR; + } else if (!use_rr_read_snapshot && + statement_snapshot_type == snapshot_type::RR) { + release_stmt_snapshot(); + } // Set the snapshot to delayed acquisition (SetSnapshotOnNextOperation) acquire_snapshot(can_acquire_snapshot_without_conflicts(), TABLE_TYPE::USER_TABLE); @@ -5962,26 +6070,11 @@ class Rdb_transaction_impl : public Rdb_transaction { /* TODO: here we must release the locks taken since the start_stmt() call */ if (m_rocksdb_tx[TABLE_TYPE::USER_TABLE]) { - const rocksdb::Snapshot *const org_snapshot = - m_rocksdb_tx[TABLE_TYPE::USER_TABLE]->GetSnapshot(); rollback_to_stmt_savepoint(); - - const rocksdb::Snapshot *const cur_snapshot = - m_rocksdb_tx[TABLE_TYPE::USER_TABLE]->GetSnapshot(); - if (org_snapshot != cur_snapshot) { - m_read_opts[TABLE_TYPE::USER_TABLE].snapshot = cur_snapshot; - if (cur_snapshot == nullptr) { - assert(statement_snapshot_type == snapshot_type::CURRENT); - acquire_snapshot_on_next_op(); - } - } } - - assert_snapshot_invariants(); } - explicit Rdb_transaction_impl(THD *const thd) - : Rdb_transaction(thd) { + explicit Rdb_transaction_impl(THD *const thd) : Rdb_transaction(thd) { // Create a notifier that can be called when a snapshot gets generated. m_notifier = std::make_shared(this); } @@ -6031,6 +6124,8 @@ class Rdb_writebatch_impl : public Rdb_transaction { bool commit_no_binlog(TABLE_TYPE table_type) override { assert(!is_ac_nl_ro_rc_transaction()); + assert(statement_snapshot_type == snapshot_type::NONE || + statement_snapshot_type == snapshot_type::CURRENT); bool res = false; if (table_type == INTRINSIC_TMP) { @@ -6067,10 +6162,16 @@ class Rdb_writebatch_impl : public Rdb_transaction { } /* Implementations of do_*savepoint based on rocksdB::WriteBatch savepoints */ - void do_set_savepoint() override { m_batch.SetSavePoint(); } + void do_set_savepoint() override { + assert(statement_snapshot_type == snapshot_type::NONE || + statement_snapshot_type == snapshot_type::CURRENT); + m_batch.SetSavePoint(); + } rocksdb::Status do_pop_savepoint() override { + assert(statement_snapshot_type == snapshot_type::NONE); assert(!is_ac_nl_ro_rc_transaction()); + return m_batch.PopSavePoint(); } @@ -6124,21 +6225,28 @@ class Rdb_writebatch_impl : public Rdb_transaction { assert_snapshot_invariants(); } - void release_snapshot(TABLE_TYPE table_type) override { - if (table_type == INTRINSIC_TMP) { - assert(false); - return; - } + void release_stmt_snapshot() override { assert_snapshot_invariants(); - if (has_snapshot(table_type)) { + + if (has_snapshot(TABLE_TYPE::USER_TABLE)) { assert(statement_snapshot_type == snapshot_type::CURRENT); statement_snapshot_type = snapshot_type::NONE; - rdb_get_rocksdb_db().ReleaseSnapshot(m_read_opts[table_type].snapshot); - m_read_opts[table_type].snapshot = nullptr; + rdb_get_rocksdb_db().ReleaseSnapshot( + m_read_opts[TABLE_TYPE::USER_TABLE].snapshot); + m_read_opts[TABLE_TYPE::USER_TABLE].snapshot = nullptr; } + assert_snapshot_invariants(); } + void release_snapshot(TABLE_TYPE table_type) override { + if (table_type == INTRINSIC_TMP) { + assert(false); + return; + } + release_stmt_snapshot(); + } + rocksdb::Status set_tx_read_timestamp(TABLE_TYPE, uint64_t) override { return rocksdb::Status::NotSupported( "Not supported for write batch transaction."); @@ -6203,6 +6311,7 @@ class Rdb_writebatch_impl : public Rdb_transaction { } [[nodiscard]] rocksdb::WriteBatchBase &get_write_batch() override { + assert(statement_snapshot_type == snapshot_type::NONE); return m_batch; } @@ -6248,12 +6357,11 @@ class Rdb_writebatch_impl : public Rdb_transaction { num_keys, keys, values, statuses, sorted_input); } - rocksdb::Status get_for_update(const Rdb_key_def &key_descr, - const rocksdb::Slice &key, - rocksdb::PinnableSlice *const value, - TABLE_TYPE table_type, bool /* exclusive */, - const bool /* do_validate */, - bool /* no_wait */) override { + [[nodiscard]] rocksdb::Status get_for_update(const Rdb_key_def &key_descr, + const rocksdb::Slice &key, + rocksdb::PinnableSlice *value, + TABLE_TYPE table_type, bool, + bool) override { assert(statement_snapshot_type == snapshot_type::CURRENT); assert(!is_ac_nl_ro_rc_transaction()); @@ -6310,11 +6418,9 @@ class Rdb_writebatch_impl : public Rdb_transaction { bool set_name() override { return false; } - void start_stmt() override {} + void start_stmt(bool) override {} - void rollback_stmt() override { - rollback_to_stmt_savepoint(); - } + void rollback_stmt() override { rollback_to_stmt_savepoint(); } explicit Rdb_writebatch_impl(THD *const thd) : Rdb_transaction(thd), m_batch(rocksdb::BytewiseComparator(), 0, true) {} @@ -6967,6 +7073,8 @@ static bool rocksdb_is_supported_system_table(const char *db_name, static int rocksdb_prepare(handlerton *const hton MY_ATTRIBUTE((__unused__)), THD *const thd, bool prepare_tx) { Rdb_transaction *tx = get_tx_from_thd(thd); + tx->end_stmt(); + if (!tx->can_prepare()) { return HA_EXIT_FAILURE; } @@ -7258,6 +7366,7 @@ static int rocksdb_commit(handlerton *const hton MY_ATTRIBUTE((__unused__)), Rdb_perf_context_guard guard(tx, thd); if (tx != nullptr) { + tx->end_stmt(); if (all || is_autocommit(*thd)) { /* We get here @@ -7273,12 +7382,6 @@ static int rocksdb_commit(handlerton *const hton MY_ATTRIBUTE((__unused__)), */ tx->make_stmt_savepoint_permanent(); } - - if (my_core::thd_tx_isolation(thd) <= ISO_READ_COMMITTED) { - // For READ_COMMITTED, we release any existing snapshot so that we will - // see any changes that occurred since the last statement. - tx->release_snapshot(TABLE_TYPE::USER_TABLE); - } } // `Add()` is implemented in a thread-safe manner. @@ -7293,6 +7396,7 @@ static int rocksdb_rollback(handlerton *const hton MY_ATTRIBUTE((__unused__)), Rdb_perf_context_guard guard(tx, thd); if (tx != nullptr) { + tx->end_stmt(); if (rollback_tx) { /* We get here, when @@ -7312,12 +7416,6 @@ static int rocksdb_rollback(handlerton *const hton MY_ATTRIBUTE((__unused__)), tx->rollback_stmt(); if (is_autocommit(*thd)) tx->rollback(); } - - if (my_core::thd_tx_isolation(thd) <= ISO_READ_COMMITTED) { - // For READ_COMMITTED, we release any existing snapshot so that we will - // see any changes that occurred since the last statement. - tx->release_snapshot(TABLE_TYPE::USER_TABLE); - } } return HA_EXIT_SUCCESS; } @@ -7494,6 +7592,14 @@ class Rdb_snapshot_status : public Rdb_tx_list_walker { curr_time - snapshot_ts); } + const auto rr_snapshot_ts = tx->get_rr_snapshot_ts(); + if (rr_snapshot_ts == 0) { + m_data += format_string("---NO RR SNAPSHOT\n"); + } else { + m_data += format_string("---RR SNAPSHOT, ACTIVE %lld sec\n", + curr_time - rr_snapshot_ts); + } + THD *thd = tx->get_thd(); char buffer[1024]; thd_security_context_internal(thd, buffer, sizeof buffer, 0, @@ -8073,7 +8179,8 @@ static bool rocksdb_collect_hton_log_info(handlerton *const /* unused */, } static inline void rocksdb_register_tx(handlerton *hton [[maybe_unused]], - THD &thd, Rdb_transaction &tx) { + THD &thd, Rdb_transaction &tx, + bool is_read) { assert(hton == rocksdb_hton); trans_register_ha(&thd, false, rocksdb_hton, nullptr); @@ -8088,16 +8195,23 @@ static inline void rocksdb_register_tx(handlerton *hton [[maybe_unused]], } } - if (!tx.has_explicit_or_read_only_snapshot()) { + if (tx.has_explicit_snapshot()) { + tx.start_stmt(false); + } else if (!tx.has_read_only_snapshot()) { auto thd_ss = std::static_pointer_cast( thd.get_explicit_snapshot()); if (thd_ss) { tx.share_explicit_snapshot(std::move(thd_ss)); } + const auto use_rr_read_snapshot = + !thd_ss && is_read && thd_tx_isolation(&thd) >= ISO_REPEATABLE_READ; + tx.start_stmt(use_rr_read_snapshot); + } else { + const auto use_rr_read_snapshot = + is_read && thd_tx_isolation(&thd) >= ISO_REPEATABLE_READ; + tx.start_stmt(use_rr_read_snapshot); } - if (!is_autocommit(thd)) { - tx.start_stmt(); trans_register_ha(&thd, true, rocksdb_hton, nullptr); } } @@ -8184,7 +8298,7 @@ static int rocksdb_start_tx_and_assign_read_view( Rdb_perf_context_guard guard(&tx, thd); tx.set_tx_read_only(); - rocksdb_register_tx(hton, *thd, tx); + rocksdb_register_tx(hton, *thd, tx, false); const uint64_t client_provided_read_filtering_ts = rdb_is_binlog_ttl_enabled() @@ -8267,7 +8381,7 @@ static int rocksdb_start_tx_with_shared_read_view( } } - rocksdb_register_tx(hton, *thd, *tx); + rocksdb_register_tx(hton, *thd, *tx, false); } // case: unlock the binlog @@ -9705,6 +9819,14 @@ std::vector Rdb_open_tables_map::get_table_names(void) const { return names; } +static void assert_no_explicit_thd_snapshot() noexcept { +#ifndef NDEBUG + const auto thd_ss = std::static_pointer_cast( + current_thd->get_explicit_snapshot()); + assert(thd_ss == nullptr); +#endif +} + void ha_rocksdb::load_auto_incr_value() { ulonglong auto_incr = 0; @@ -9748,6 +9870,7 @@ ulonglong ha_rocksdb::load_auto_incr_value_from_index() { get_or_create_tx(table->in_use, m_tbl_def->get_table_type()); const bool is_new_snapshot = !tx->has_snapshot(m_tbl_def->get_table_type()); if (is_new_snapshot) { + assert_no_explicit_thd_snapshot(); tx->acquire_snapshot(true, m_tbl_def->get_table_type()); } @@ -12436,16 +12559,15 @@ void dbug_dump_database(rocksdb::DB *const db) { fclose(out); } -rocksdb::Status ha_rocksdb::get_for_update(Rdb_transaction *const tx, +rocksdb::Status ha_rocksdb::get_for_update(Rdb_transaction &tx, const Rdb_key_def &key_descr, const rocksdb::Slice &key) const { assert(m_lock_rows != RDB_LOCK_NONE); bool exclusive = m_lock_rows != RDB_LOCK_READ; bool skip_wait = m_locked_row_action == THR_NOWAIT || m_locked_row_action == THR_SKIP; - return rdb_tx_get_for_update(tx, key_descr, key, nullptr, - m_tbl_def->get_table_type(), exclusive, - skip_wait); + return tx.get_for_update(key_descr, key, nullptr, m_tbl_def->get_table_type(), + exclusive, skip_wait); } bool ha_rocksdb::is_blind_delete_enabled() { @@ -12520,6 +12642,7 @@ int ha_rocksdb::get_row_by_rowid(uchar *const buf, const char *const rowid, m_retrieved_record = std::move(m_dup_key_retrieved_record); rc = HA_EXIT_SUCCESS; } else { + assert_no_explicit_thd_snapshot(); tx->acquire_snapshot(tx->can_acquire_snapshot_without_conflicts(), m_tbl_def->get_table_type()); bool skip_wait = @@ -12602,6 +12725,7 @@ int ha_rocksdb::get_row_by_sk(uchar *buf, const Rdb_key_def &kd, Rdb_transaction *const tx = get_tx_from_thd(thd); assert(tx != nullptr); + assert_no_explicit_thd_snapshot(); tx->acquire_snapshot(true /* acquire_now */, m_tbl_def->get_table_type()); int rc = m_iterator->get(key, &m_retrieved_record, RDB_LOCK_NONE); @@ -13488,9 +13612,9 @@ int ha_rocksdb::acquire_prefix_lock(const Rdb_key_def &kd, Rdb_transaction *tx, const rocksdb::Slice prefix_slice = rocksdb::Slice((const char *)m_sk_packed_tuple, size); - const rocksdb::Status s = tx->get_for_update( - kd, prefix_slice, nullptr, m_tbl_def->get_table_type(), - false /* exclusive */, false /* do validate */, false /* no_wait */); + const auto s = + tx->get_for_update(kd, prefix_slice, nullptr, m_tbl_def->get_table_type(), + false /* exclusive */, false /* no_wait */); if (!s.ok()) { return tx->set_status_error(table->in_use, s, kd, m_tbl_def, m_table_handler); @@ -13586,7 +13710,7 @@ int ha_rocksdb::check_and_lock_sk(const uint key_id, const rocksdb::Slice old_slice = rocksdb::Slice((const char *)m_sk_packed_tuple_old, size); - const rocksdb::Status s = get_for_update(row_info.tx, kd, old_slice); + const auto s = get_for_update(*row_info.tx, kd, old_slice); if (!s.ok()) { return row_info.tx->set_status_error(table->in_use, s, kd, m_tbl_def, m_table_handler); @@ -13695,6 +13819,7 @@ int ha_rocksdb::check_uniqueness_and_lock( THD *thd = ha_thd(); Rdb_transaction *const tx = get_or_create_tx(thd, m_tbl_def->get_table_type()); + assert_no_explicit_thd_snapshot(); tx->acquire_snapshot(false, m_tbl_def->get_table_type()); /* @@ -13932,9 +14057,9 @@ int ha_rocksdb::check_partial_index_prefix(const TABLE *table_arg, rocksdb::Slice((const char *)m_sk_packed_tuple, size); rocksdb::PinnableSlice value; - const rocksdb::Status s = tx->get_for_update( - kd, prefix_slice, &value, m_tbl_def->get_table_type(), - false /* exclusive */, false /* do validate */, false /* no_wait */); + const auto s = + tx->get_for_update(kd, prefix_slice, &value, m_tbl_def->get_table_type(), + false /* exclusive */, false /* no_wait */); if (!s.ok() && !s.IsNotFound()) { return tx->set_status_error(table_arg->in_use, s, kd, m_tbl_def, m_table_handler); @@ -14668,7 +14793,7 @@ int ha_rocksdb::delete_row(const uchar *const buf) { if (n_null_fields == 0) { rocksdb::Slice sk_slice( reinterpret_cast(m_sk_packed_tuple), packed_size); - const rocksdb::Status s = get_for_update(tx, kd, sk_slice); + const auto s = get_for_update(*tx, kd, sk_slice); if (!s.ok()) { DBUG_RETURN(tx->set_status_error(table->in_use, s, kd, m_tbl_def, m_table_handler)); @@ -15292,6 +15417,7 @@ int ha_rocksdb::external_lock(THD *const thd, int lock_type) { single-statement transaction. In the case of rollback, it will already be done in rocksdb_rollback, and the commit call will be a no-op. */ + tx->end_stmt(); if (tx->commit()) { res = HA_ERR_INTERNAL_ERROR; } @@ -15399,7 +15525,8 @@ int ha_rocksdb::external_lock(THD *const thd, int lock_type) { } } tx->m_n_mysql_tables_in_use++; - rocksdb_register_tx(rocksdb_hton, *thd, *tx); + rocksdb_register_tx(rocksdb_hton, *thd, *tx, + (m_lock_rows == RDB_LOCK_NONE)); tx->io_perf_start(&m_io_perf); } @@ -15419,17 +15546,15 @@ int ha_rocksdb::external_lock(THD *const thd, int lock_type) { HA_EXIT_SUCCESS OK */ -int ha_rocksdb::start_stmt(THD *const thd, - thr_lock_type lock_type MY_ATTRIBUTE((__unused__))) { +int ha_rocksdb::start_stmt(THD *thd, thr_lock_type) { DBUG_ENTER_FUNC(); assert(thd != nullptr); - Rdb_transaction *const tx = - get_or_create_tx(thd, m_tbl_def->get_table_type()); + auto &tx = *get_or_create_tx(thd, m_tbl_def->get_table_type()); read_thd_vars(thd); - rocksdb_register_tx(ht, *thd, *tx); - tx->io_perf_start(&m_io_perf); + rocksdb_register_tx(ht, *thd, tx, (m_lock_rows == RDB_LOCK_NONE)); + tx.io_perf_start(&m_io_perf); DBUG_RETURN(HA_EXIT_SUCCESS); } @@ -20206,6 +20331,7 @@ unsigned long long get_partial_index_sort_max_mem(THD *thd) { } void rdb_tx_acquire_snapshot(Rdb_transaction &tx) { + assert_no_explicit_thd_snapshot(); tx.acquire_snapshot(true, TABLE_TYPE::USER_TABLE); } @@ -20252,22 +20378,16 @@ rocksdb::Status rdb_tx_get(Rdb_transaction *tx, return tx->get(column_family, key, value, table_type); } -rocksdb::Status rdb_tx_get_for_update(Rdb_transaction *tx, +rocksdb::Status rdb_tx_get_for_update(Rdb_transaction &tx, const Rdb_key_def &kd, const rocksdb::Slice &key, - rocksdb::PinnableSlice *const value, + rocksdb::PinnableSlice *value, TABLE_TYPE table_type, bool exclusive, bool skip_wait) { - auto *const thd = tx->get_thd(); - const auto do_validate = !(thd_tx_isolation(thd) <= ISO_READ_COMMITTED || - THDVAR(thd, skip_snapshot_validation)); - rocksdb::Status s = tx->get_for_update(kd, key, value, table_type, exclusive, - do_validate, skip_wait); - #ifndef NDEBUG ++rocksdb_num_get_for_update_calls; #endif - return s; + return tx.get_for_update(kd, key, value, table_type, exclusive, skip_wait); } void rdb_tx_release_lock(Rdb_transaction *tx, const Rdb_key_def &kd, diff --git a/storage/rocksdb/ha_rocksdb.h b/storage/rocksdb/ha_rocksdb.h index a75300c53c13..c7fc428750aa 100644 --- a/storage/rocksdb/ha_rocksdb.h +++ b/storage/rocksdb/ha_rocksdb.h @@ -367,9 +367,9 @@ class ha_rocksdb : public my_core::handler, public blob_buffer { const rocksdb::Slice *value, bool *skip_row) MY_ATTRIBUTE((__warn_unused_result__)); - rocksdb::Status get_for_update(Rdb_transaction *const tx, - const Rdb_key_def &kd, - const rocksdb::Slice &key) const; + [[nodiscard]] rocksdb::Status get_for_update(Rdb_transaction &tx, + const Rdb_key_def &kd, + const rocksdb::Slice &key) const; int fill_virtual_columns(); @@ -948,8 +948,7 @@ class ha_rocksdb : public my_core::handler, public blob_buffer { * required by the interface. */ int extra(enum ha_extra_function operation) override; - int start_stmt(THD *const thd, thr_lock_type lock_type) override - MY_ATTRIBUTE((__warn_unused_result__)); + [[nodiscard]] int start_stmt(THD *thd, thr_lock_type) override; int external_lock(THD *const thd, int lock_type) override MY_ATTRIBUTE((__warn_unused_result__)); int truncate(dd::Table *table_def MY_ATTRIBUTE((unused))) override @@ -1227,12 +1226,10 @@ void rdb_tx_acquire_snapshot(Rdb_transaction &tx); const rocksdb::Slice &key, rocksdb::PinnableSlice *const value, TABLE_TYPE table_type); -rocksdb::Status rdb_tx_get_for_update(Rdb_transaction *tx, - const Rdb_key_def &kd, - const rocksdb::Slice &key, - rocksdb::PinnableSlice *const value, - TABLE_TYPE table_type, bool exclusive, - bool skip_wait); +[[nodiscard]] rocksdb::Status rdb_tx_get_for_update( + Rdb_transaction &tx, const Rdb_key_def &kd, const rocksdb::Slice &key, + rocksdb::PinnableSlice *value, TABLE_TYPE table_type, bool exclusive, + bool skip_wait); void rdb_tx_release_lock(Rdb_transaction *tx, const Rdb_key_def &kd, const rocksdb::Slice &key, bool force); diff --git a/storage/rocksdb/rdb_iterator.cc b/storage/rocksdb/rdb_iterator.cc index d710341a7de8..1a9a2bf159a4 100644 --- a/storage/rocksdb/rdb_iterator.cc +++ b/storage/rocksdb/rdb_iterator.cc @@ -453,7 +453,7 @@ int Rdb_iterator_base::get(const rocksdb::Slice *key, if (type == RDB_LOCK_NONE) { s = rdb_tx_get(tx, m_kd.get_cf(), *key, value, m_table_type); } else { - s = rdb_tx_get_for_update(tx, m_kd, *key, value, m_table_type, + s = rdb_tx_get_for_update(*tx, m_kd, *key, value, m_table_type, type == RDB_LOCK_WRITE, skip_wait); } @@ -743,8 +743,8 @@ int Rdb_iterator_partial::materialize_prefix() { // It is possible that someone else has already materialized this group // before we locked. Double check by doing a locking read on the sentinel. rocksdb::PinnableSlice value; - auto s = rdb_tx_get_for_update(tx, m_kd, cur_prefix_key, &value, m_table_type, - true, false); + auto s = rdb_tx_get_for_update(*tx, m_kd, cur_prefix_key, &value, + m_table_type, true, false); if (s.ok()) { rdb_tx_release_lock(tx, m_kd, cur_prefix_key, true /* force */); thd_proc_info(m_thd, old_proc_info);