Skip to content

Commit

Permalink
implemented get_max_serialized_size_bytes() + tests
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexanderSaydakov committed May 9, 2024
1 parent b4ee4b4 commit a986e8e
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 2 deletions.
6 changes: 6 additions & 0 deletions theta/include/theta_sketch.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,12 @@ class compact_theta_sketch_alloc: public theta_sketch_alloc<Allocator> {
virtual uint32_t get_num_retained() const;
virtual uint16_t get_seed_hash() const;

/**
* Computes maximum serialized size in bytes
* @param lg_k nominal number of entries in the sketch
*/
static size_t get_max_serialized_size_bytes(uint8_t lg_k);

/**
* Computes size in bytes required to serialize the current state of the sketch.
* Computing compressed size is expensive. It takes iterating over all retained hashes,
Expand Down
5 changes: 5 additions & 0 deletions theta/include/theta_sketch_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,11 @@ uint8_t compact_theta_sketch_alloc<A>::get_preamble_longs(bool compressed) const
return this->is_estimation_mode() ? 3 : this->is_empty() || entries_.size() == 1 ? 1 : 2;
}

template<typename A>
size_t compact_theta_sketch_alloc<A>::get_max_serialized_size_bytes(uint8_t lg_k) {
return sizeof(uint64_t) * (3 + update_theta_sketch_alloc<A>::theta_table::get_capacity(lg_k + 1, lg_k));
}

template<typename A>
size_t compact_theta_sketch_alloc<A>::get_serialized_size_bytes(bool compressed) const {
if (compressed && is_suitable_for_compression()) {
Expand Down
22 changes: 20 additions & 2 deletions theta/test/theta_sketch_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -273,9 +273,11 @@ TEST_CASE("theta sketch: serialize deserialize stream and bytes equivalence", "[
for (int i = 0; i < n; i++) update_sketch.update(i);

std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
update_sketch.compact().serialize(s);
auto bytes = update_sketch.compact().serialize();
auto compact_sketch = update_sketch.compact();
compact_sketch.serialize(s);
auto bytes = compact_sketch.serialize();
REQUIRE(bytes.size() == static_cast<size_t>(s.tellp()));
REQUIRE(bytes.size() == compact_sketch.get_serialized_size_bytes());
for (size_t i = 0; i < bytes.size(); ++i) {
REQUIRE(((char*)bytes.data())[i] == (char)s.get());
}
Expand Down Expand Up @@ -521,6 +523,7 @@ TEST_CASE("theta sketch: serialize deserialize compressed", "[theta_sketch]") {
auto compact_sketch = update_sketch.compact();

auto bytes = compact_sketch.serialize_compressed();
REQUIRE(bytes.size() == compact_sketch.get_serialized_size_bytes(true));
{ // deserialize bytes
auto deserialized_sketch = compact_theta_sketch::deserialize(bytes.data(), bytes.size());
REQUIRE(deserialized_sketch.get_num_retained() == compact_sketch.get_num_retained());
Expand All @@ -544,6 +547,7 @@ TEST_CASE("theta sketch: serialize deserialize compressed", "[theta_sketch]") {

std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
compact_sketch.serialize_compressed(s);
REQUIRE(static_cast<size_t>(s.tellp()) == compact_sketch.get_serialized_size_bytes(true));
auto deserialized_sketch = compact_theta_sketch::deserialize(s);
REQUIRE(deserialized_sketch.get_num_retained() == compact_sketch.get_num_retained());
REQUIRE(deserialized_sketch.get_theta() == compact_sketch.get_theta());
Expand All @@ -554,4 +558,18 @@ TEST_CASE("theta sketch: serialize deserialize compressed", "[theta_sketch]") {
}
}

TEST_CASE("max serialized size", "[theta_sketch]") {
const uint8_t lg_k = 10;
auto sketch = update_theta_sketch::builder().set_lg_k(lg_k).build();
int value = 0;
for (int i = 0; i < (1 << lg_k) * 2; ++i) sketch.update(value++);
size_t max_size_bytes;
for (int i = 0; i < (1 << lg_k) * 2; ++i) {
sketch.update(value++);
auto bytes = sketch.compact().serialize();
max_size_bytes = std::max(max_size_bytes, bytes.size());
}
REQUIRE(max_size_bytes == compact_theta_sketch::get_max_serialized_size_bytes(lg_k));
}

} /* namespace datasketches */

0 comments on commit a986e8e

Please sign in to comment.