From 5e62bc3b0634dcbfc3c17fa46aa223c7da0bbad1 Mon Sep 17 00:00:00 2001 From: Jon Date: Wed, 14 Aug 2024 23:53:24 -0700 Subject: [PATCH] Address most review feedback --- common/include/common_defs.hpp | 1 + filters/include/bit_array_ops.hpp | 6 ++--- filters/include/bloom_filter.hpp | 24 +++++++++---------- filters/include/bloom_filter_builder_impl.hpp | 7 +----- filters/include/bloom_filter_impl.hpp | 8 +++---- ...loom_filter_deserialize_from_java_test.cpp | 2 +- filters/test/bloom_filter_test.cpp | 2 +- 7 files changed, 23 insertions(+), 27 deletions(-) diff --git a/common/include/common_defs.hpp b/common/include/common_defs.hpp index 8a61ff30..6a87e079 100644 --- a/common/include/common_defs.hpp +++ b/common/include/common_defs.hpp @@ -42,6 +42,7 @@ namespace random_utils { static std::random_device rd; // possibly unsafe in MinGW with GCC < 9.2 static thread_local std::mt19937_64 rand(rd()); static thread_local std::uniform_real_distribution<> next_double(0.0, 1.0); + static thread_local std::uniform_int_distribution next_uint64(0, UINT64_MAX); // thread-safe random bit static thread_local std::independent_bits_engine diff --git a/filters/include/bit_array_ops.hpp b/filters/include/bit_array_ops.hpp index cc1a42fa..2ccfe00f 100644 --- a/filters/include/bit_array_ops.hpp +++ b/filters/include/bit_array_ops.hpp @@ -69,7 +69,7 @@ namespace bit_array_ops { * @param array the array of bits * @param index the index of the bit to set. */ - static inline void assign_bit(uint8_t* array, const uint64_t index, const bool value) { + static inline void assign_bit(uint8_t* array, uint64_t index, bool value) { // read-only checks handled by set_bit() and clear_bit() if (value) { set_bit(array, index); @@ -79,12 +79,12 @@ namespace bit_array_ops { } /** - * Gets teh value of a bit at the specified index and sets it to true + * Gets the value of a bit at the specified index and sets it to true * @param array the array of bits * @param index the index of the bit to get and set * @return the value of the bit at the specified index */ - static inline bool get_and_set_bit(uint8_t* array, const uint64_t index) { + static inline bool get_and_set_bit(uint8_t* array, uint64_t index) { const uint64_t offset = index >> 3; const uint8_t mask = 1 << (index & 7); if ((array[offset] & mask) != 0) { diff --git a/filters/include/bloom_filter.hpp b/filters/include/bloom_filter.hpp index 71b170e0..cf2059c9 100644 --- a/filters/include/bloom_filter.hpp +++ b/filters/include/bloom_filter.hpp @@ -203,7 +203,7 @@ class bloom_filter_alloc { * @param allocator instance of an Allocator * @return an instance of a Bloom filter */ - static bloom_filter_alloc deserialize(std::istream& is, const A& allocator = Allocator()); + static bloom_filter_alloc deserialize(std::istream& is, const Allocator& allocator = Allocator()); /** * @brief Wraps the provided memory as a read-only Bloom filter. Reads the data in-place and does @@ -231,12 +231,12 @@ class bloom_filter_alloc { * Copy constructor * @param other filter to be copied */ - bloom_filter_alloc(const bloom_filter_alloc&); + bloom_filter_alloc(const bloom_filter_alloc& other); /** Move constructor * @param other filter to be moved */ - bloom_filter_alloc(bloom_filter_alloc&&) noexcept; + bloom_filter_alloc(bloom_filter_alloc&& other) noexcept; /** * Copy assignment @@ -253,7 +253,7 @@ class bloom_filter_alloc { bloom_filter_alloc& operator=(bloom_filter_alloc&& other); /** - * @brief Destroy the bloom filter alloc object + * @brief Destroy the bloom filter object */ ~bloom_filter_alloc(); @@ -265,7 +265,7 @@ class bloom_filter_alloc { * This method serializes the filter as a vector of bytes. * An optional header can be reserved in front of the filter. * It is a blank space of a given size. - * This header is used in Datasketches PostgreSQL extension. + * Some integrations such as PostgreSQL may need this header space. * @param header_size_bytes space to reserve in front of the filter * @return serialized filter as a vector of bytes */ @@ -658,17 +658,17 @@ class bloom_filter_alloc { bool is_memory_owned() const; /** - * @brief Checks if the Bloom Filter has backing memory. + * @brief Checks if the Bloom Filter was created by a call to wrap(). * - * @return True if the filter has backing memory, otherwise false. + * @return True if the filter was created by wrapping memory, otherwise false. */ - bool has_backing_memory() const; + bool is_wrapped() const; /** - * @brief Returns a pointer to the backing memory, if it exists. - * @return A pointer to the backing memory, or nullptr if it does not exist. + * @brief Returns a pointer to the memory this filter wraps, if it exists. + * @return A pointer to the wrapped memory, or nullptr if is_wrapped() is false. */ - const uint8_t* get_backing_memory() const; + const uint8_t* get_wrapped_memory() const; /** * @brief Gets the serialized size of the Bloom Filter in bytes @@ -739,7 +739,7 @@ class bloom_filter_alloc { uint64_t seed_; uint16_t num_hashes_; bool is_dirty_; - bool is_owned_; // if true, data is not owned by filter AND data_ holdes the entire filter not just the bit array + bool is_owned_; // if true, data is not owned by filter AND memory_ holds the entire filter not just the bit array bool is_read_only_; // if true, filter is read-only uint64_t capacity_bits_; uint64_t num_bits_set_; diff --git a/filters/include/bloom_filter_builder_impl.hpp b/filters/include/bloom_filter_builder_impl.hpp index 5b34767a..b2050730 100644 --- a/filters/include/bloom_filter_builder_impl.hpp +++ b/filters/include/bloom_filter_builder_impl.hpp @@ -31,12 +31,7 @@ namespace datasketches { template uint64_t bloom_filter_builder_alloc::generate_random_seed() { - union { - uint64_t long_value; - double double_value; - } ldu; - ldu.double_value = random_utils::next_double(random_utils::rand); - return ldu.long_value; + return random_utils::next_uint64(random_utils::rand); } template diff --git a/filters/include/bloom_filter_impl.hpp b/filters/include/bloom_filter_impl.hpp index b8c47a9b..0195eb5d 100644 --- a/filters/include/bloom_filter_impl.hpp +++ b/filters/include/bloom_filter_impl.hpp @@ -182,8 +182,8 @@ bloom_filter_alloc::bloom_filter_alloc(bloom_filter_alloc&& other) noexcept : is_read_only_(other.is_read_only_), capacity_bits_(other.capacity_bits_), num_bits_set_(other.num_bits_set_), - bit_array_(std::move(other.bit_array_)), - memory_(std::move(other.memory_)) + bit_array_(other.bit_array_), + memory_(other.memory_) { // ensure destructor on other will behave nicely other.is_owned_ = false; @@ -504,7 +504,7 @@ bool bloom_filter_alloc::is_read_only() const { } template -bool bloom_filter_alloc::has_backing_memory() const { +bool bloom_filter_alloc::is_wrapped() const { return memory_ != nullptr; } @@ -514,7 +514,7 @@ bool bloom_filter_alloc::is_memory_owned() const { } template -const uint8_t* bloom_filter_alloc::get_backing_memory() const { +const uint8_t* bloom_filter_alloc::get_wrapped_memory() const { return memory_; } diff --git a/filters/test/bloom_filter_deserialize_from_java_test.cpp b/filters/test/bloom_filter_deserialize_from_java_test.cpp index f2dee18c..259eb4ff 100644 --- a/filters/test/bloom_filter_deserialize_from_java_test.cpp +++ b/filters/test/bloom_filter_deserialize_from_java_test.cpp @@ -35,7 +35,7 @@ TEST_CASE("bloom_filter", "[serde_compat]") { for (const uint16_t num_hashes: h_arr) { std::ifstream is; is.exceptions(std::ios::failbit | std::ios::badbit); - is.open(testBinaryInputPath + "bf_n" + std::to_string(n) + "_h" + std::to_string(num_hashes) + "_cpp.sk", std::ios::binary); + is.open(testBinaryInputPath + "bf_n" + std::to_string(n) + "_h" + std::to_string(num_hashes) + "_java.sk", std::ios::binary); auto bf = bloom_filter::deserialize(is); REQUIRE(bf.is_empty() == (n == 0)); REQUIRE((bf.is_empty() || (bf.get_bits_used() > n / 10))); diff --git a/filters/test/bloom_filter_test.cpp b/filters/test/bloom_filter_test.cpp index df22cadf..a1e57d1f 100644 --- a/filters/test/bloom_filter_test.cpp +++ b/filters/test/bloom_filter_test.cpp @@ -131,7 +131,7 @@ TEST_CASE("bloom_filter: basic operations", "[bloom_filter]") { } // check that raw memory also matches serialized sketch - const uint8_t* bf_bytes = bf2.get_backing_memory(); + const uint8_t* bf_bytes = bf2.get_wrapped_memory(); REQUIRE(bf_bytes == bf_memory); for (size_t i = 0; i < bytes.size(); ++i) { REQUIRE(bf_bytes[i] == bytes[i]);