Skip to content

Commit

Permalink
Merge branch 'bloom' of github.com:jmalkin/datasketches-cpp into bloom
Browse files Browse the repository at this point in the history
  • Loading branch information
jmalkin committed Aug 16, 2024
2 parents 9d53c6c + 5e62bc3 commit 0b575fe
Show file tree
Hide file tree
Showing 7 changed files with 21 additions and 25 deletions.
1 change: 1 addition & 0 deletions common/include/common_defs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ namespace random_utils {
static std::random_device rd; // possibly unsafe in MinGW with GCC < 9.2
static thread_local std::mt19937_64 rand(rd());
static thread_local std::uniform_real_distribution<> next_double(0.0, 1.0);
static thread_local std::uniform_int_distribution<uint64_t> next_uint64(0, UINT64_MAX);

// thread-safe random bit
static thread_local std::independent_bits_engine<std::mt19937, 1, uint32_t>
Expand Down
2 changes: 1 addition & 1 deletion filters/include/bit_array_ops.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ namespace bit_array_ops {
}

/**
* Gets teh value of a bit at the specified index and sets it to true
* Gets the value of a bit at the specified index and sets it to true
* @param array the array of bits
* @param index the index of the bit to get and set
* @return the value of the bit at the specified index
Expand Down
24 changes: 12 additions & 12 deletions filters/include/bloom_filter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ class bloom_filter_alloc {
* @param allocator instance of an Allocator
* @return an instance of a Bloom filter
*/
static bloom_filter_alloc deserialize(std::istream& is, const A& allocator = Allocator());
static bloom_filter_alloc deserialize(std::istream& is, const Allocator& allocator = Allocator());

/**
* @brief Wraps the provided memory as a read-only Bloom filter. Reads the data in-place and does
Expand Down Expand Up @@ -231,12 +231,12 @@ class bloom_filter_alloc {
* Copy constructor
* @param other filter to be copied
*/
bloom_filter_alloc(const bloom_filter_alloc&);
bloom_filter_alloc(const bloom_filter_alloc& other);

/** Move constructor
* @param other filter to be moved
*/
bloom_filter_alloc(bloom_filter_alloc&&) noexcept;
bloom_filter_alloc(bloom_filter_alloc&& other) noexcept;

/**
* Copy assignment
Expand All @@ -253,7 +253,7 @@ class bloom_filter_alloc {
bloom_filter_alloc& operator=(bloom_filter_alloc&& other);

/**
* @brief Destroy the bloom filter alloc object
* @brief Destroy the bloom filter object
*/
~bloom_filter_alloc();

Expand All @@ -265,7 +265,7 @@ class bloom_filter_alloc {
* This method serializes the filter as a vector of bytes.
* An optional header can be reserved in front of the filter.
* It is a blank space of a given size.
* This header is used in Datasketches PostgreSQL extension.
* Some integrations such as PostgreSQL may need this header space.
* @param header_size_bytes space to reserve in front of the filter
* @return serialized filter as a vector of bytes
*/
Expand Down Expand Up @@ -658,17 +658,17 @@ class bloom_filter_alloc {
bool is_memory_owned() const;

/**
* @brief Checks if the Bloom Filter has backing memory.
* @brief Checks if the Bloom Filter was created by a call to wrap().
*
* @return True if the filter has backing memory, otherwise false.
* @return True if the filter was created by wrapping memory, otherwise false.
*/
bool has_backing_memory() const;
bool is_wrapped() const;

/**
* @brief Returns a pointer to the backing memory, if it exists.
* @return A pointer to the backing memory, or nullptr if it does not exist.
* @brief Returns a pointer to the memory this filter wraps, if it exists.
* @return A pointer to the wrapped memory, or nullptr if is_wrapped() is false.
*/
const uint8_t* get_backing_memory() const;
const uint8_t* get_wrapped_memory() const;

/**
* @brief Gets the serialized size of the Bloom Filter in bytes
Expand Down Expand Up @@ -739,7 +739,7 @@ class bloom_filter_alloc {
uint64_t seed_;
uint16_t num_hashes_;
bool is_dirty_;
bool is_owned_; // if true, data is not owned by filter AND data_ holdes the entire filter not just the bit array
bool is_owned_; // if true, data is not owned by filter AND memory_ holds the entire filter not just the bit array
bool is_read_only_; // if true, filter is read-only
uint64_t capacity_bits_;
uint64_t num_bits_set_;
Expand Down
7 changes: 1 addition & 6 deletions filters/include/bloom_filter_builder_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,7 @@ namespace datasketches {

template<typename A>
uint64_t bloom_filter_builder_alloc<A>::generate_random_seed() {
union {
uint64_t long_value;
double double_value;
} ldu;
ldu.double_value = random_utils::next_double(random_utils::rand);
return ldu.long_value;
return random_utils::next_uint64(random_utils::rand);
}

template<typename A>
Expand Down
8 changes: 4 additions & 4 deletions filters/include/bloom_filter_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,8 +182,8 @@ bloom_filter_alloc<A>::bloom_filter_alloc(bloom_filter_alloc&& other) noexcept :
is_read_only_(other.is_read_only_),
capacity_bits_(other.capacity_bits_),
num_bits_set_(other.num_bits_set_),
bit_array_(std::move(other.bit_array_)),
memory_(std::move(other.memory_))
bit_array_(other.bit_array_),
memory_(other.memory_)
{
// ensure destructor on other will behave nicely
other.is_owned_ = false;
Expand Down Expand Up @@ -504,7 +504,7 @@ bool bloom_filter_alloc<A>::is_read_only() const {
}

template<typename A>
bool bloom_filter_alloc<A>::has_backing_memory() const {
bool bloom_filter_alloc<A>::is_wrapped() const {
return memory_ != nullptr;
}

Expand All @@ -514,7 +514,7 @@ bool bloom_filter_alloc<A>::is_memory_owned() const {
}

template<typename A>
const uint8_t* bloom_filter_alloc<A>::get_backing_memory() const {
const uint8_t* bloom_filter_alloc<A>::get_wrapped_memory() const {
return memory_;
}

Expand Down
2 changes: 1 addition & 1 deletion filters/test/bloom_filter_deserialize_from_java_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ TEST_CASE("bloom_filter", "[serde_compat]") {
for (const uint16_t num_hashes: h_arr) {
std::ifstream is;
is.exceptions(std::ios::failbit | std::ios::badbit);
is.open(testBinaryInputPath + "bf_n" + std::to_string(n) + "_h" + std::to_string(num_hashes) + "_cpp.sk", std::ios::binary);
is.open(testBinaryInputPath + "bf_n" + std::to_string(n) + "_h" + std::to_string(num_hashes) + "_java.sk", std::ios::binary);
auto bf = bloom_filter::deserialize(is);
REQUIRE(bf.is_empty() == (n == 0));
REQUIRE((bf.is_empty() || (bf.get_bits_used() > n / 10)));
Expand Down
2 changes: 1 addition & 1 deletion filters/test/bloom_filter_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ TEST_CASE("bloom_filter: basic operations", "[bloom_filter]") {
}

// check that raw memory also matches serialized sketch
const uint8_t* bf_bytes = bf2.get_backing_memory();
const uint8_t* bf_bytes = bf2.get_wrapped_memory();
REQUIRE(bf_bytes == bf_memory);
for (size_t i = 0; i < bytes.size(); ++i) {
REQUIRE(bf_bytes[i] == bytes[i]);
Expand Down

0 comments on commit 0b575fe

Please sign in to comment.