diff --git a/common/include/common_defs.hpp b/common/include/common_defs.hpp index 8a61ff30..6a87e079 100644 --- a/common/include/common_defs.hpp +++ b/common/include/common_defs.hpp @@ -42,6 +42,7 @@ namespace random_utils { static std::random_device rd; // possibly unsafe in MinGW with GCC < 9.2 static thread_local std::mt19937_64 rand(rd()); static thread_local std::uniform_real_distribution<> next_double(0.0, 1.0); + static thread_local std::uniform_int_distribution next_uint64(0, UINT64_MAX); // thread-safe random bit static thread_local std::independent_bits_engine diff --git a/filters/include/bit_array_ops.hpp b/filters/include/bit_array_ops.hpp index 263678dc..730b2387 100644 --- a/filters/include/bit_array_ops.hpp +++ b/filters/include/bit_array_ops.hpp @@ -79,7 +79,7 @@ namespace bit_array_ops { } /** - * Gets teh value of a bit at the specified index and sets it to true + * Gets the value of a bit at the specified index and sets it to true * @param array the array of bits * @param index the index of the bit to get and set * @return the value of the bit at the specified index diff --git a/filters/include/bloom_filter.hpp b/filters/include/bloom_filter.hpp index 1de09e63..78396277 100644 --- a/filters/include/bloom_filter.hpp +++ b/filters/include/bloom_filter.hpp @@ -203,7 +203,7 @@ class bloom_filter_alloc { * @param allocator instance of an Allocator * @return an instance of a Bloom filter */ - static bloom_filter_alloc deserialize(std::istream& is, const A& allocator = Allocator()); + static bloom_filter_alloc deserialize(std::istream& is, const Allocator& allocator = Allocator()); /** * @brief Wraps the provided memory as a read-only Bloom filter. Reads the data in-place and does @@ -231,12 +231,12 @@ class bloom_filter_alloc { * Copy constructor * @param other filter to be copied */ - bloom_filter_alloc(const bloom_filter_alloc&); + bloom_filter_alloc(const bloom_filter_alloc& other); /** Move constructor * @param other filter to be moved */ - bloom_filter_alloc(bloom_filter_alloc&&) noexcept; + bloom_filter_alloc(bloom_filter_alloc&& other) noexcept; /** * Copy assignment @@ -253,7 +253,7 @@ class bloom_filter_alloc { bloom_filter_alloc& operator=(bloom_filter_alloc&& other); /** - * @brief Destroy the bloom filter alloc object + * @brief Destroy the bloom filter object */ ~bloom_filter_alloc(); @@ -265,7 +265,7 @@ class bloom_filter_alloc { * This method serializes the filter as a vector of bytes. * An optional header can be reserved in front of the filter. * It is a blank space of a given size. - * This header is used in Datasketches PostgreSQL extension. + * Some integrations such as PostgreSQL may need this header space. * @param header_size_bytes space to reserve in front of the filter * @return serialized filter as a vector of bytes */ @@ -658,17 +658,17 @@ class bloom_filter_alloc { bool is_memory_owned() const; /** - * @brief Checks if the Bloom Filter has backing memory. + * @brief Checks if the Bloom Filter was created by a call to wrap(). * - * @return True if the filter has backing memory, otherwise false. + * @return True if the filter was created by wrapping memory, otherwise false. */ - bool has_backing_memory() const; + bool is_wrapped() const; /** - * @brief Returns a pointer to the backing memory, if it exists. - * @return A pointer to the backing memory, or nullptr if it does not exist. + * @brief Returns a pointer to the memory this filter wraps, if it exists. + * @return A pointer to the wrapped memory, or nullptr if is_wrapped() is false. */ - const uint8_t* get_backing_memory() const; + const uint8_t* get_wrapped_memory() const; /** * @brief Gets the serialized size of the Bloom Filter in bytes @@ -739,7 +739,7 @@ class bloom_filter_alloc { uint64_t seed_; uint16_t num_hashes_; bool is_dirty_; - bool is_owned_; // if true, data is not owned by filter AND data_ holdes the entire filter not just the bit array + bool is_owned_; // if true, data is not owned by filter AND memory_ holds the entire filter not just the bit array bool is_read_only_; // if true, filter is read-only uint64_t capacity_bits_; uint64_t num_bits_set_; diff --git a/filters/include/bloom_filter_builder_impl.hpp b/filters/include/bloom_filter_builder_impl.hpp index da637a68..26bd2634 100644 --- a/filters/include/bloom_filter_builder_impl.hpp +++ b/filters/include/bloom_filter_builder_impl.hpp @@ -31,12 +31,7 @@ namespace datasketches { template uint64_t bloom_filter_builder_alloc::generate_random_seed() { - union { - uint64_t long_value; - double double_value; - } ldu; - ldu.double_value = random_utils::next_double(random_utils::rand); - return ldu.long_value; + return random_utils::next_uint64(random_utils::rand); } template diff --git a/filters/include/bloom_filter_impl.hpp b/filters/include/bloom_filter_impl.hpp index b8c47a9b..0195eb5d 100644 --- a/filters/include/bloom_filter_impl.hpp +++ b/filters/include/bloom_filter_impl.hpp @@ -182,8 +182,8 @@ bloom_filter_alloc::bloom_filter_alloc(bloom_filter_alloc&& other) noexcept : is_read_only_(other.is_read_only_), capacity_bits_(other.capacity_bits_), num_bits_set_(other.num_bits_set_), - bit_array_(std::move(other.bit_array_)), - memory_(std::move(other.memory_)) + bit_array_(other.bit_array_), + memory_(other.memory_) { // ensure destructor on other will behave nicely other.is_owned_ = false; @@ -504,7 +504,7 @@ bool bloom_filter_alloc::is_read_only() const { } template -bool bloom_filter_alloc::has_backing_memory() const { +bool bloom_filter_alloc::is_wrapped() const { return memory_ != nullptr; } @@ -514,7 +514,7 @@ bool bloom_filter_alloc::is_memory_owned() const { } template -const uint8_t* bloom_filter_alloc::get_backing_memory() const { +const uint8_t* bloom_filter_alloc::get_wrapped_memory() const { return memory_; } diff --git a/filters/test/bloom_filter_deserialize_from_java_test.cpp b/filters/test/bloom_filter_deserialize_from_java_test.cpp index f2dee18c..259eb4ff 100644 --- a/filters/test/bloom_filter_deserialize_from_java_test.cpp +++ b/filters/test/bloom_filter_deserialize_from_java_test.cpp @@ -35,7 +35,7 @@ TEST_CASE("bloom_filter", "[serde_compat]") { for (const uint16_t num_hashes: h_arr) { std::ifstream is; is.exceptions(std::ios::failbit | std::ios::badbit); - is.open(testBinaryInputPath + "bf_n" + std::to_string(n) + "_h" + std::to_string(num_hashes) + "_cpp.sk", std::ios::binary); + is.open(testBinaryInputPath + "bf_n" + std::to_string(n) + "_h" + std::to_string(num_hashes) + "_java.sk", std::ios::binary); auto bf = bloom_filter::deserialize(is); REQUIRE(bf.is_empty() == (n == 0)); REQUIRE((bf.is_empty() || (bf.get_bits_used() > n / 10))); diff --git a/filters/test/bloom_filter_test.cpp b/filters/test/bloom_filter_test.cpp index df22cadf..a1e57d1f 100644 --- a/filters/test/bloom_filter_test.cpp +++ b/filters/test/bloom_filter_test.cpp @@ -131,7 +131,7 @@ TEST_CASE("bloom_filter: basic operations", "[bloom_filter]") { } // check that raw memory also matches serialized sketch - const uint8_t* bf_bytes = bf2.get_backing_memory(); + const uint8_t* bf_bytes = bf2.get_wrapped_memory(); REQUIRE(bf_bytes == bf_memory); for (size_t i = 0; i < bytes.size(); ++i) { REQUIRE(bf_bytes[i] == bytes[i]);