From 693997497367645268568c4aa6fdfb05039c8f48 Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Fri, 26 Aug 2022 16:25:41 -0400 Subject: [PATCH] Introduce `roaring_bitmap_*_bulk` operations (#363) * implement bitmap contains multi * typo * fix commit and repair implementations * fix check result after call ra_advance, check the high 16 bits instean. * resolve comment and add document * add unit test for contains multi * add benchmark for contains_multi * fix unittest * fix unittest * fix unittest * add const to array length * fix unittest * add static inline declaration * remove declaration in .c * update codes via comments * Applying various fixes. * Add roaring_bitmap_add_bulk roaring_bitmap_add_bulk is a generalization of roaring_bitmap_add_many, caching the container for the last inserted item, and avoiding lookiing the container up if another item is inserted in the same container. Use the new function in the implementation of roaring_bitmap_add_many and roaring_bitmap_of * Add a test to add in bulk * Allow `roaring_bitmap_add_many` to be used with an unaligned ptr * Use the correct type for the container pointer in the bulk context struct * TMP: trying something * Fix RDTSC_FINAL for CLOCK_THREAD_CPUTIME_ID * Add a benchmark for add_bulk * clang-format * Don't load the whole context * Reorder tests * Improvements based on assembly output * Inline * Go back to using pointers into context * Add docs for optimization * Check the removals in the unit test * clang-format * Be smarter about benchmark clocks * Remove initialized bool * Posix should always have CLOCK_REALTIME * Posix is a lie * Implement a bulk contains function * Be more fair to add_many, don't count time building the array * Remove roaring_bitmap_contains_multi, use roaring_bitmap_contains_bulk * Actually run bulk add unit test * Fix incorrect behavior of roaring_bitmap_contains_bulk * Fix compliation as c++ * Add extra logging for error only on windows * Check if tests are being built with NDEBUG * Use cmocka's `assert_true`, which is always evaluated * Add more documentation to the `roaring_bulk_context_t` type Co-authored-by: arthur Co-authored-by: Daniel Lemire --- benchmarks/CMakeLists.txt | 1 + benchmarks/add_benchmark.c | 24 ++++- benchmarks/benchmark.h | 72 +++++--------- benchmarks/containsmulti_benchmark.c | 121 ++++++++++++++++++++++++ include/roaring/roaring.h | 59 +++++++++++- include/roaring/roaring_array.h | 4 +- src/roaring.c | 134 +++++++++++++++++++-------- src/roaring_array.c | 5 +- tests/toplevel_unit.c | 77 ++++++++++++++- 9 files changed, 394 insertions(+), 103 deletions(-) create mode 100644 benchmarks/containsmulti_benchmark.c diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 0d5fabcaa..4a3eb0e8e 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -12,6 +12,7 @@ if(NOT WIN32) add_c_benchmark(intersect_range_benchmark) target_link_libraries(add_benchmark m) add_c_benchmark(frozen_benchmark) + add_c_benchmark(containsmulti_benchmark) endif() add_c_benchmark(bitset_container_benchmark) add_c_benchmark(array_container_benchmark) diff --git a/benchmarks/add_benchmark.c b/benchmarks/add_benchmark.c index cc143e3dd..574b285c8 100644 --- a/benchmarks/add_benchmark.c +++ b/benchmarks/add_benchmark.c @@ -96,15 +96,33 @@ void run_test(uint32_t spanlen, uint32_t intvlen, double density, order_t order) printf(" %6.1f\n", array_min(results, num_passes)); printf(" roaring_bitmap_add_many():"); + for (int p = 0; p < num_passes; p++) { + roaring_bitmap_t *r = roaring_bitmap_create(); + uint32_t values[intvlen * count]; + for (int64_t i = 0; i < count; i++) { + for (uint32_t j = 0; j < intvlen; j++) { + values[i * intvlen + j] = offsets[i] + j; + } + } + RDTSC_START(cycles_start); + for (int64_t i = 0; i < count; i++) { + roaring_bitmap_add_many(r, intvlen, values + (i * intvlen)); + } + RDTSC_FINAL(cycles_final); + results[p] = (cycles_final - cycles_start) * 1.0 / count / intvlen; + roaring_bitmap_free(r); + } + printf(" %6.1f\n", array_min(results, num_passes)); + + printf(" roaring_bitmap_add_bulk():"); for (int p = 0; p < num_passes; p++) { roaring_bitmap_t *r = roaring_bitmap_create(); RDTSC_START(cycles_start); - uint32_t values[intvlen]; + roaring_bulk_context_t context = {0}; for (int64_t i = 0; i < count; i++) { for (uint32_t j = 0; j < intvlen; j++) { - values[j] = offsets[i] + j; + roaring_bitmap_add_bulk(r, &context, offsets[i] + j); } - roaring_bitmap_add_many(r, intvlen, values); } RDTSC_FINAL(cycles_final); results[p] = (cycles_final - cycles_start) * 1.0 / count / intvlen; diff --git a/benchmarks/benchmark.h b/benchmarks/benchmark.h index fee613fd9..e3a6ad166 100644 --- a/benchmarks/benchmark.h +++ b/benchmarks/benchmark.h @@ -37,69 +37,39 @@ (cycles) = ((uint64_t)cyc_high << 32) | cyc_low; \ } while (0) -#elif defined(__linux__) && defined(__GLIBC__) - -#include -#ifdef CLOCK_THREAD_CPUTIME_ID -#define RDTSC_START(cycles) \ - do { \ - struct timespec ts; \ - clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts); \ - cycles = ts.tv_sec * UINT64_C(1000000000) + ts.tv_nsec; \ - } while (0) - -#define RDTSC_FINAL(cycles) \ - do { \ - struct timespec ts; \ - clock_gettime(CLOCK_REALTIME, &ts); \ - cycles = ts.tv_sec * UINT64_C(1000000000) + ts.tv_nsec; \ - } while (0) - -#elif defined(CLOCK_REALTIME) // #ifdef CLOCK_THREAD_CPUTIME_ID -#define RDTSC_START(cycles) \ - do { \ - struct timespec ts; \ - clock_gettime(CLOCK_REALTIME, &ts); \ - cycles = ts.tv_sec * UINT64_C(1000000000) + ts.tv_nsec; \ - } while (0) - -#define RDTSC_FINAL(cycles) \ - do { \ - struct timespec ts; \ - clock_gettime(CLOCK_REALTIME, &ts); \ - cycles = ts.tv_sec * UINT64_C(1000000000) + ts.tv_nsec; \ - } while (0) - -#else -#define RDTSC_START(cycles) \ - do { \ - cycles = clock(); \ - } while(0) +#else // defined(CROARING_IS_X64) && defined(ROARING_INLINE_ASM) + +#if defined(CLOCK_THREAD_CPUTIME_ID) +#define RDTSC_CLOCK_ID CLOCK_THREAD_CPUTIME_ID +#elif defined(CLOCK_MONOTONIC) +#define RDTSC_CLOCK_ID CLOCK_MONOTONIC +#elif defined(CLOCK_REALTIME) +#define RDTSC_CLOCK_ID CLOCK_REALTIME +#endif -#define RDTSC_FINAL(cycles) \ - do { \ - cycles = clock(); \ - } while(0) +#if defined(RDTSC_CLOCK_ID) +#define RDTSC_START(cycles) \ + do { \ + struct timespec ts; \ + clock_gettime(RDTSC_CLOCK_ID, &ts); \ + cycles = ts.tv_sec * UINT64_C(1000000000) + ts.tv_nsec; \ + } while (0) -#endif // #ifdef CLOCK_THREAD_CPUTIME_ID +#define RDTSC_FINAL(cycles) RDTSC_START(cycles) -#else +#else // defined(RDTSC_CLOCK_ID) /** -* Other architectures do not support rdtsc ? +* Fall back to the `clock` function */ -#include - #define RDTSC_START(cycles) \ do { \ cycles = clock(); \ } while (0) -#define RDTSC_FINAL(cycles) \ - do { \ - cycles = clock(); \ - } while (0) +#define RDTSC_FINAL(cycles) RDTSC_START(cycles) +#endif #endif /* diff --git a/benchmarks/containsmulti_benchmark.c b/benchmarks/containsmulti_benchmark.c new file mode 100644 index 000000000..e92d82e5a --- /dev/null +++ b/benchmarks/containsmulti_benchmark.c @@ -0,0 +1,121 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include "benchmark.h" +#include "random.h" +#include "numbersfromtextfiles.h" + +void contains_multi_via_contains(roaring_bitmap_t* bm, const uint32_t* values, bool* results, const size_t count) { + for (size_t i = 0; i < count; ++i) { + results[i] = roaring_bitmap_contains(bm, values[i]); + } +} + +void contains_multi_bulk(roaring_bitmap_t* bm, const uint32_t* values, bool* results, const size_t count) { + roaring_bulk_context_t context = {0}; + for (size_t i = 0; i < count; ++i) { + results[i] = roaring_bitmap_contains_bulk(bm, &context, values[i]); + } +} + +int compare_uint32(const void* a, const void* b) { + uint32_t arg1 = *(const uint32_t*)a; + uint32_t arg2 = *(const uint32_t*)b; + if (arg1 < arg2) return -1; + if (arg1 > arg2) return 1; + return 0; +} + +int main(int argc, char* argv[]) { + (void)&read_all_integer_files; // suppress unused warning + + if (argc < 2) { + printf("Usage: %s ...\n", argv[0]); + printf("Example: %s ~/CRoaring/benchmarks/realdata/weather_sept_85/*\n", argv[0]); + return 1; + } + + size_t fields = argc-1; + uint32_t* values[argc]; + size_t count[argc]; + + roaring_bitmap_t* bm = roaring_bitmap_create(); + for (int i = 1; i < argc; i++) { + size_t t_count = 0; + uint32_t* t_values = read_integer_file(argv[i], &t_count); + if (t_count == 0) { + printf("No integers found in %s\n", argv[i]); + return 1; + } + roaring_bitmap_add_many(bm, t_count, t_values); + + shuffle_uint32(t_values, t_count); + + values[i-1] = t_values; + count[i-1] = t_count; + } + //roaring_bitmap_run_optimize(bm); + + printf("Data:\n"); + printf(" cardinality: %"PRIu64"\n", roaring_bitmap_get_cardinality(bm)); + printf(" buckets: %d\n", (int)bm->high_low_container.size); + printf(" range: %"PRIu32"-%"PRIu32"\n", roaring_bitmap_minimum(bm) >> 16, roaring_bitmap_maximum(bm) >> 16); + + const int num_passes = 10; + printf("Cycles/element: %d\n", num_passes); + uint64_t cycles_start, cycles_final; + + printf(" roaring_bitmap_contains:"); + for (int p = 0; p < num_passes; p++) { + bool result[count[p]]; + RDTSC_START(cycles_start); + contains_multi_via_contains(bm, values[p], result, count[p]); + RDTSC_FINAL(cycles_final); + printf(" %10f", (cycles_final - cycles_start) * 1.0 / count[p]); + } + printf("\n"); + + printf(" roaring_bitmap_contains_bulk:"); + for (int p = 0; p < num_passes; p++) { + bool result[count[p]]; + RDTSC_START(cycles_start); + contains_multi_bulk(bm, values[p], result, count[p]); + RDTSC_FINAL(cycles_final); + printf(" %10f", (cycles_final - cycles_start) * 1.0 / count[p]); + } + printf("\n"); + + // sort input array + for (size_t i = 0; i < fields; ++i) { + qsort(values[i], count[i], sizeof(uint32_t), compare_uint32); + } + + printf(" roaring_bitmap_contains with sorted input:"); + for (int p = 0; p < num_passes; p++) { + bool result[count[p]]; + RDTSC_START(cycles_start); + contains_multi_via_contains(bm, values[p], result, count[p]); + RDTSC_FINAL(cycles_final); + printf(" %10f", (cycles_final - cycles_start) * 1.0 / count[p]); + } + printf("\n"); + + printf(" roaring_bitmap_contains_bulk with sorted input:"); + for (int p = 0; p < num_passes; p++) { + bool result[count[p]]; + RDTSC_START(cycles_start); + contains_multi_bulk(bm, values[p], result, count[p]); + RDTSC_FINAL(cycles_final); + printf(" %10f", (cycles_final - cycles_start) * 1.0 / count[p]); + } + printf("\n"); + + roaring_bitmap_free(bm); + for (size_t i = 0; i < fields; ++i) { + free(values[i]); + } + return 0; +} diff --git a/include/roaring/roaring.h b/include/roaring/roaring.h index e82d05b1b..415152445 100644 --- a/include/roaring/roaring.h +++ b/include/roaring/roaring.h @@ -257,9 +257,48 @@ void roaring_bitmap_andnot_inplace(roaring_bitmap_t *r1, */ void roaring_bitmap_free(const roaring_bitmap_t *r); +/** + * A bit of context usable with `roaring_bitmap_*_bulk()` functions + * + * Should be initialized with `{0}` (or `memset()` to all zeros). + * Callers should treat it as an opaque type. + * + * A context may only be used with a single bitmap + * (unless re-initialized to zero), and any modification to a bitmap + * (other than modifications performed with `_bulk()` functions with the context + * passed) will invalidate any contexts associated with that bitmap. + */ +typedef struct roaring_bulk_context_s { + ROARING_CONTAINER_T *container; + int idx; + uint16_t key; + uint8_t typecode; +} roaring_bulk_context_t; + +/** + * Add an item, using context from a previous insert for speed optimization. + * + * `context` will be used to store information between calls to make bulk + * operations faster. `*context` should be zero-initialized before the first + * call to this function. + * + * Modifying the bitmap in any way (other than `-bulk` suffixed functions) + * will invalidate the stored context, calling this function with a non-zero + * context after doing any modification invokes undefined behavior. + * + * In order to exploit this optimization, the caller should call this function + * with values with the same "key" (high 16 bits of the value) consecutively. + */ +void roaring_bitmap_add_bulk(roaring_bitmap_t *r, + roaring_bulk_context_t *context, uint32_t val); + /** * Add value n_args from pointer vals, faster than repeatedly calling * `roaring_bitmap_add()` + * + * In order to exploit this optimization, the caller should attempt to keep + * values with the same "key" (high 16 bits of the value) as consecutive + * elements in `vals` */ void roaring_bitmap_add_many(roaring_bitmap_t *r, size_t n_args, const uint32_t *vals); @@ -335,6 +374,25 @@ bool roaring_bitmap_contains_range(const roaring_bitmap_t *r, uint64_t range_start, uint64_t range_end); +/** + * Check if an items is present, using context from a previous insert for speed + * optimization. + * + * `context` will be used to store information between calls to make bulk + * operations faster. `*context` should be zero-initialized before the first + * call to this function. + * + * Modifying the bitmap in any way (other than `-bulk` suffixed functions) + * will invalidate the stored context, calling this function with a non-zero + * context after doing any modification invokes undefined behavior. + * + * In order to exploit this optimization, the caller should call this function + * with values with the same "key" (high 16 bits of the value) consecutively. + */ +bool roaring_bitmap_contains_bulk(const roaring_bitmap_t *r, + roaring_bulk_context_t *context, + uint32_t val); + /** * Get the cardinality of the bitmap (number of elements). */ @@ -814,4 +872,3 @@ uint32_t roaring_read_uint32_iterator(roaring_uint32_iterator_t *it, using namespace ::roaring::api; #endif #endif - diff --git a/include/roaring/roaring_array.h b/include/roaring/roaring_array.h index fd201662b..24ce7cad2 100644 --- a/include/roaring/roaring_array.h +++ b/include/roaring/roaring_array.h @@ -93,7 +93,9 @@ inline container_t *ra_get_container_at_index( /** * Retrieves the key at index i */ -uint16_t ra_get_key_at_index(const roaring_array_t *ra, uint16_t i); +inline uint16_t ra_get_key_at_index(const roaring_array_t *ra, uint16_t i) { + return ra->keys[i]; +} /** * Add a new key-value pair at index i diff --git a/src/roaring.c b/src/roaring.c index 303f727c1..cc717bb29 100644 --- a/src/roaring.c +++ b/src/roaring.c @@ -87,46 +87,91 @@ bool roaring_bitmap_init_with_capacity(roaring_bitmap_t *r, uint32_t cap) { return ra_init_with_capacity(&r->high_low_container, cap); } +static inline void add_bulk_impl(roaring_bitmap_t *r, + roaring_bulk_context_t *context, + uint32_t val) { + uint16_t key = val >> 16; + if (context->container == NULL || context->key != key) { + uint8_t typecode; + int idx; + context->container = containerptr_roaring_bitmap_add( + r, val, &typecode, &idx); + context->typecode = typecode; + context->idx = idx; + context->key = key; + } else { + // no need to seek the container, it is at hand + // because we already have the container at hand, we can do the + // insertion directly, bypassing the roaring_bitmap_add call + uint8_t new_typecode; + container_t *container2 = container_add( + context->container, val & 0xFFFF, context->typecode, &new_typecode); + if (container2 != context->container) { + // rare instance when we need to change the container type + container_free(context->container, context->typecode); + ra_set_container_at_index(&r->high_low_container, context->idx, + container2, new_typecode); + context->typecode = new_typecode; + context->container = container2; + } + } +} void roaring_bitmap_add_many(roaring_bitmap_t *r, size_t n_args, const uint32_t *vals) { - container_t *container = NULL; // hold value of last container touched - uint8_t typecode = 0; // typecode of last container touched - uint32_t prev = 0; // previous valued inserted - size_t i = 0; // index of value - int containerindex = 0; - if (n_args == 0) return; uint32_t val; - memcpy(&val, vals + i, sizeof(val)); - container = - containerptr_roaring_bitmap_add(r, val, &typecode, &containerindex); - prev = val; - i++; - for (; i < n_args; i++) { - memcpy(&val, vals + i, sizeof(val)); - if (((prev ^ val) >> 16) == - 0) { // no need to seek the container, it is at hand - // because we already have the container at hand, we can do the - // insertion - // automatically, bypassing the roaring_bitmap_add call - uint8_t newtypecode = typecode; - container_t *container2 = - container_add(container, val & 0xFFFF, typecode, &newtypecode); - if (container2 != container) { // rare instance when we need to - // change the container type - container_free(container, typecode); - ra_set_container_at_index(&r->high_low_container, - containerindex, container2, - newtypecode); - typecode = newtypecode; - container = container2; - } - } else { - container = containerptr_roaring_bitmap_add(r, val, &typecode, - &containerindex); + const uint32_t *start = vals; + const uint32_t *end = vals + n_args; + const uint32_t *current_val = start; + + if (n_args == 0) { + return; + } + + uint8_t typecode; + int idx; + container_t *container; + val = *current_val; + container = containerptr_roaring_bitmap_add(r, val, &typecode, &idx); + roaring_bulk_context_t context = {container, idx, (uint16_t)(val >> 16), typecode}; + + for (; current_val != end; current_val++) { + memcpy(&val, current_val, sizeof(val)); + add_bulk_impl(r, &context, val); + } +} + +void roaring_bitmap_add_bulk(roaring_bitmap_t *r, + roaring_bulk_context_t *context, uint32_t val) { + add_bulk_impl(r, context, val); +} + +bool roaring_bitmap_contains_bulk(const roaring_bitmap_t *r, + roaring_bulk_context_t *context, + uint32_t val) +{ + uint16_t key = val >> 16; + if (context->container == NULL || context->key != key) { + int32_t start_idx = -1; + if (context->container != NULL && context->key < key) { + start_idx = context->idx; + } + int idx = ra_advance_until(&r->high_low_container, key, start_idx); + if (idx == ra_get_size(&r->high_low_container)) { + return false; + } + uint8_t typecode; + context->container = ra_get_container_at_index(&r->high_low_container, idx, &typecode); + context->typecode = typecode; + context->idx = idx; + context->key = ra_get_key_at_index(&r->high_low_container, idx); + // ra_advance_until finds the next key >= the target, we found a later container. + if (context->key != key) { + return false; } - prev = val; } + // context is now set up + return container_contains(context->container, val & 0xFFFF, context->typecode); } roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals) { @@ -139,11 +184,12 @@ roaring_bitmap_t *roaring_bitmap_of(size_t n_args, ...) { // todo: could be greatly optimized but we do not expect this call to ever // include long lists roaring_bitmap_t *answer = roaring_bitmap_create(); + roaring_bulk_context_t context = {0}; va_list ap; va_start(ap, n_args); - for (size_t i = 1; i <= n_args; i++) { + for (size_t i = 0; i < n_args; i++) { uint32_t val = va_arg(ap, uint32_t); - roaring_bitmap_add(answer, val); + roaring_bitmap_add_bulk(answer, &context, val); } va_end(ap); return answer; @@ -1413,14 +1459,24 @@ size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *r, roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf) { const char *bufaschar = (const char *)buf; - if (*(const unsigned char *)buf == CROARING_SERIALIZATION_ARRAY_UINT32) { + if (bufaschar[0] == CROARING_SERIALIZATION_ARRAY_UINT32) { /* This looks like a compressed set of uint32_t elements */ uint32_t card; memcpy(&card, bufaschar + 1, sizeof(uint32_t)); const uint32_t *elems = (const uint32_t *)(bufaschar + 1 + sizeof(uint32_t)); - - return roaring_bitmap_of_ptr(card, elems); + roaring_bitmap_t *bitmap = roaring_bitmap_create(); + if (bitmap == NULL) { + return NULL; + } + roaring_bulk_context_t context = {0}; + for (uint32_t i = 0; i < card; i++) { + // elems may not be aligned, read with memcpy + uint32_t elem; + memcpy(&elem, elems + i, sizeof(elem)); + roaring_bitmap_add_bulk(bitmap, &context, elem); + } + return bitmap; } else if (bufaschar[0] == CROARING_SERIALIZATION_CONTAINER) { return roaring_bitmap_portable_deserialize(bufaschar + 1); } else diff --git a/src/roaring_array.c b/src/roaring_array.c index 2e1b2c671..5151e7062 100644 --- a/src/roaring_array.c +++ b/src/roaring_array.c @@ -319,9 +319,8 @@ extern inline container_t *ra_get_container_at_index( const roaring_array_t *ra, uint16_t i, uint8_t *typecode); -uint16_t ra_get_key_at_index(const roaring_array_t *ra, uint16_t i) { - return ra->keys[i]; -} +extern inline uint16_t ra_get_key_at_index(const roaring_array_t *ra, + uint16_t i); extern inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x); diff --git a/tests/toplevel_unit.c b/tests/toplevel_unit.c index 3a4de4b3b..011cc4011 100644 --- a/tests/toplevel_unit.c +++ b/tests/toplevel_unit.c @@ -57,6 +57,63 @@ DEFINE_TEST(range_contains) { roaring_bitmap_free(bm); } +DEFINE_TEST(contains_bulk) { + roaring_bitmap_t *bm = roaring_bitmap_create(); + roaring_bulk_context_t context = {0}; + + // Ensure checking an empty bitmap is okay + assert_true(!roaring_bitmap_contains_bulk(bm, &context, 0)); + assert_true(!roaring_bitmap_contains_bulk(bm, &context, 0xFFFFFFFF)); + + // create RLE container from [0, 1000] + roaring_bitmap_add_range_closed(bm, 0, 1000); + + // add array container from 77000 + for (uint32_t i = 77000; i < 87000; i+=2) { + roaring_bitmap_add(bm, i); + } + // add bitset container from 132000 + for (uint32_t i = 132000; i < 140000; i+=2) { + roaring_bitmap_add(bm, i); + } + + roaring_bitmap_add(bm, UINT32_MAX); + + uint32_t values[] = { + 1000, // 1 + 1001, // 0 + 77000, // 1 + 77001, // 0 + 77002, // 1 + 1002, // 0 + 132000, // 1 + 132001, // 0 + 132002, // 1 + 77003, // 0 + UINT32_MAX, // 1 + UINT32_MAX - 1, // 0 + }; + size_t test_count = sizeof(values) / sizeof(values[0]); + + for (size_t i = 0; i < test_count; i++) { + roaring_bulk_context_t empty_context = {0}; + bool expected_contains = roaring_bitmap_contains(bm, values[i]); + assert_true(expected_contains == roaring_bitmap_contains_bulk(bm, &empty_context, values[i])); + assert_true(expected_contains == roaring_bitmap_contains_bulk(bm, &context, values[i])); + + if (expected_contains) { + assert_int_equal(context.key, values[i] >> 16); + } + if (context.container != NULL) { + assert_in_range(context.idx, 0, bm->high_low_container.size - 1); + assert_ptr_equal(context.container, bm->high_low_container.containers[context.idx]); + assert_int_equal(context.key, bm->high_low_container.keys[context.idx]); + assert_int_equal(context.typecode, bm->high_low_container.typecodes[context.idx]); + } + } + roaring_bitmap_free(bm); +} + DEFINE_TEST(is_really_empty) { roaring_bitmap_t *bm = roaring_bitmap_create(); assert_true(roaring_bitmap_is_empty(bm)); @@ -94,10 +151,6 @@ void can_copy_empty(bool copy_on_write) { roaring_bitmap_free(bm2); } - - - - bool check_serialization(roaring_bitmap_t *bitmap) { const int32_t size = roaring_bitmap_portable_size_in_bytes(bitmap); char *data = (char *)malloc(size); @@ -109,7 +162,6 @@ bool check_serialization(roaring_bitmap_t *bitmap) { return ret; } - DEFINE_TEST(issue245) { roaring_bitmap_t *bitmap = roaring_bitmap_create(); const uint32_t targetEntries = 2048; @@ -888,6 +940,19 @@ DEFINE_TEST(test_addremove) { roaring_bitmap_free(bm); } +DEFINE_TEST(test_addremove_bulk) { + roaring_bitmap_t *bm = roaring_bitmap_create(); + roaring_bulk_context_t context = {0}; + for (uint32_t value = 33057; value < 147849; value += 8) { + roaring_bitmap_add_bulk(bm, &context, value); + } + for (uint32_t value = 33057; value < 147849; value += 8) { + assert_true(roaring_bitmap_remove_checked(bm, value)); + } + assert_true(roaring_bitmap_is_empty(bm)); + roaring_bitmap_free(bm); +} + DEFINE_TEST(test_addremoverun) { roaring_bitmap_t *bm = roaring_bitmap_create(); for (uint32_t value = 33057; value < 147849; value += 8) { @@ -4182,6 +4247,7 @@ int main() { cmocka_unit_test(issue208), cmocka_unit_test(issue208b), cmocka_unit_test(range_contains), + cmocka_unit_test(contains_bulk), cmocka_unit_test(inplaceorwide), cmocka_unit_test(test_contains_range), cmocka_unit_test(check_range_contains_from_end), @@ -4206,6 +4272,7 @@ int main() { cmocka_unit_test(test_maximum_minimum), cmocka_unit_test(test_stats), cmocka_unit_test(test_addremove), + cmocka_unit_test(test_addremove_bulk), cmocka_unit_test(test_addremoverun), cmocka_unit_test(test_basic_add), cmocka_unit_test(test_remove_withrun),