Skip to content

Commit

Permalink
Introduce roaring_bitmap_*_bulk operations (#363)
Browse files Browse the repository at this point in the history
* implement bitmap contains multi

* typo

* fix commit and repair implementations

* fix check result after call ra_advance, check the high 16 bits instean.

* resolve comment and add document

* add unit test for contains multi

* add benchmark for contains_multi

* fix unittest

* fix unittest

* fix unittest

* add const to array length

* fix unittest

* add static inline declaration

* remove declaration in .c

* update codes via comments

* Applying various fixes.

* Add roaring_bitmap_add_bulk

roaring_bitmap_add_bulk is a generalization of roaring_bitmap_add_many,
caching the container for the last inserted item, and avoiding lookiing
the container up if another item is inserted in the same container.

Use the new function in the implementation of roaring_bitmap_add_many
and roaring_bitmap_of

* Add a test to add in bulk

* Allow `roaring_bitmap_add_many` to be used with an unaligned ptr

* Use the correct type for the container pointer in the bulk context
struct

* TMP: trying something

* Fix RDTSC_FINAL for CLOCK_THREAD_CPUTIME_ID

* Add a benchmark for add_bulk

* clang-format

* Don't load the whole context

* Reorder tests

* Improvements based on assembly output

* Inline

* Go back to using pointers into context

* Add docs for optimization

* Check the removals in the unit test

* clang-format

* Be smarter about benchmark clocks

* Remove initialized bool

* Posix should always have CLOCK_REALTIME

* Posix is a lie

* Implement a bulk contains function

* Be more fair to add_many, don't count time building the array

* Remove roaring_bitmap_contains_multi, use roaring_bitmap_contains_bulk

* Actually run bulk add unit test

* Fix incorrect behavior of roaring_bitmap_contains_bulk

* Fix compliation as c++

* Add extra logging for error only on windows

* Check if tests are being built with NDEBUG

* Use cmocka's `assert_true`, which is always evaluated

* Add more documentation to the `roaring_bulk_context_t` type

Co-authored-by: arthur <arthurkiller21@gmail.com>
Co-authored-by: Daniel Lemire <lemire@gmail.com>
  • Loading branch information
3 people authored Aug 26, 2022
1 parent efcb83d commit 6939974
Show file tree
Hide file tree
Showing 9 changed files with 394 additions and 103 deletions.
1 change: 1 addition & 0 deletions benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ if(NOT WIN32)
add_c_benchmark(intersect_range_benchmark)
target_link_libraries(add_benchmark m)
add_c_benchmark(frozen_benchmark)
add_c_benchmark(containsmulti_benchmark)
endif()
add_c_benchmark(bitset_container_benchmark)
add_c_benchmark(array_container_benchmark)
Expand Down
24 changes: 21 additions & 3 deletions benchmarks/add_benchmark.c
Original file line number Diff line number Diff line change
Expand Up @@ -96,15 +96,33 @@ void run_test(uint32_t spanlen, uint32_t intvlen, double density, order_t order)
printf(" %6.1f\n", array_min(results, num_passes));

printf(" roaring_bitmap_add_many():");
for (int p = 0; p < num_passes; p++) {
roaring_bitmap_t *r = roaring_bitmap_create();
uint32_t values[intvlen * count];
for (int64_t i = 0; i < count; i++) {
for (uint32_t j = 0; j < intvlen; j++) {
values[i * intvlen + j] = offsets[i] + j;
}
}
RDTSC_START(cycles_start);
for (int64_t i = 0; i < count; i++) {
roaring_bitmap_add_many(r, intvlen, values + (i * intvlen));
}
RDTSC_FINAL(cycles_final);
results[p] = (cycles_final - cycles_start) * 1.0 / count / intvlen;
roaring_bitmap_free(r);
}
printf(" %6.1f\n", array_min(results, num_passes));

printf(" roaring_bitmap_add_bulk():");
for (int p = 0; p < num_passes; p++) {
roaring_bitmap_t *r = roaring_bitmap_create();
RDTSC_START(cycles_start);
uint32_t values[intvlen];
roaring_bulk_context_t context = {0};
for (int64_t i = 0; i < count; i++) {
for (uint32_t j = 0; j < intvlen; j++) {
values[j] = offsets[i] + j;
roaring_bitmap_add_bulk(r, &context, offsets[i] + j);
}
roaring_bitmap_add_many(r, intvlen, values);
}
RDTSC_FINAL(cycles_final);
results[p] = (cycles_final - cycles_start) * 1.0 / count / intvlen;
Expand Down
72 changes: 21 additions & 51 deletions benchmarks/benchmark.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,69 +37,39 @@
(cycles) = ((uint64_t)cyc_high << 32) | cyc_low; \
} while (0)

#elif defined(__linux__) && defined(__GLIBC__)

#include <time.h>
#ifdef CLOCK_THREAD_CPUTIME_ID
#define RDTSC_START(cycles) \
do { \
struct timespec ts; \
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts); \
cycles = ts.tv_sec * UINT64_C(1000000000) + ts.tv_nsec; \
} while (0)

#define RDTSC_FINAL(cycles) \
do { \
struct timespec ts; \
clock_gettime(CLOCK_REALTIME, &ts); \
cycles = ts.tv_sec * UINT64_C(1000000000) + ts.tv_nsec; \
} while (0)

#elif defined(CLOCK_REALTIME) // #ifdef CLOCK_THREAD_CPUTIME_ID
#define RDTSC_START(cycles) \
do { \
struct timespec ts; \
clock_gettime(CLOCK_REALTIME, &ts); \
cycles = ts.tv_sec * UINT64_C(1000000000) + ts.tv_nsec; \
} while (0)

#define RDTSC_FINAL(cycles) \
do { \
struct timespec ts; \
clock_gettime(CLOCK_REALTIME, &ts); \
cycles = ts.tv_sec * UINT64_C(1000000000) + ts.tv_nsec; \
} while (0)

#else
#define RDTSC_START(cycles) \
do { \
cycles = clock(); \
} while(0)
#else // defined(CROARING_IS_X64) && defined(ROARING_INLINE_ASM)

#if defined(CLOCK_THREAD_CPUTIME_ID)
#define RDTSC_CLOCK_ID CLOCK_THREAD_CPUTIME_ID
#elif defined(CLOCK_MONOTONIC)
#define RDTSC_CLOCK_ID CLOCK_MONOTONIC
#elif defined(CLOCK_REALTIME)
#define RDTSC_CLOCK_ID CLOCK_REALTIME
#endif

#define RDTSC_FINAL(cycles) \
do { \
cycles = clock(); \
} while(0)
#if defined(RDTSC_CLOCK_ID)
#define RDTSC_START(cycles) \
do { \
struct timespec ts; \
clock_gettime(RDTSC_CLOCK_ID, &ts); \
cycles = ts.tv_sec * UINT64_C(1000000000) + ts.tv_nsec; \
} while (0)

#endif // #ifdef CLOCK_THREAD_CPUTIME_ID
#define RDTSC_FINAL(cycles) RDTSC_START(cycles)

#else
#else // defined(RDTSC_CLOCK_ID)

/**
* Other architectures do not support rdtsc ?
* Fall back to the `clock` function
*/
#include <time.h>

#define RDTSC_START(cycles) \
do { \
cycles = clock(); \
} while (0)

#define RDTSC_FINAL(cycles) \
do { \
cycles = clock(); \
} while (0)
#define RDTSC_FINAL(cycles) RDTSC_START(cycles)

#endif
#endif

/*
Expand Down
121 changes: 121 additions & 0 deletions benchmarks/containsmulti_benchmark.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
#define _GNU_SOURCE
#include <roaring/roaring.h>
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include "benchmark.h"
#include "random.h"
#include "numbersfromtextfiles.h"

void contains_multi_via_contains(roaring_bitmap_t* bm, const uint32_t* values, bool* results, const size_t count) {
for (size_t i = 0; i < count; ++i) {
results[i] = roaring_bitmap_contains(bm, values[i]);
}
}

void contains_multi_bulk(roaring_bitmap_t* bm, const uint32_t* values, bool* results, const size_t count) {
roaring_bulk_context_t context = {0};
for (size_t i = 0; i < count; ++i) {
results[i] = roaring_bitmap_contains_bulk(bm, &context, values[i]);
}
}

int compare_uint32(const void* a, const void* b) {
uint32_t arg1 = *(const uint32_t*)a;
uint32_t arg2 = *(const uint32_t*)b;
if (arg1 < arg2) return -1;
if (arg1 > arg2) return 1;
return 0;
}

int main(int argc, char* argv[]) {
(void)&read_all_integer_files; // suppress unused warning

if (argc < 2) {
printf("Usage: %s <comma_separated_integers_file> ...\n", argv[0]);
printf("Example: %s ~/CRoaring/benchmarks/realdata/weather_sept_85/*\n", argv[0]);
return 1;
}

size_t fields = argc-1;
uint32_t* values[argc];
size_t count[argc];

roaring_bitmap_t* bm = roaring_bitmap_create();
for (int i = 1; i < argc; i++) {
size_t t_count = 0;
uint32_t* t_values = read_integer_file(argv[i], &t_count);
if (t_count == 0) {
printf("No integers found in %s\n", argv[i]);
return 1;
}
roaring_bitmap_add_many(bm, t_count, t_values);

shuffle_uint32(t_values, t_count);

values[i-1] = t_values;
count[i-1] = t_count;
}
//roaring_bitmap_run_optimize(bm);

printf("Data:\n");
printf(" cardinality: %"PRIu64"\n", roaring_bitmap_get_cardinality(bm));
printf(" buckets: %d\n", (int)bm->high_low_container.size);
printf(" range: %"PRIu32"-%"PRIu32"\n", roaring_bitmap_minimum(bm) >> 16, roaring_bitmap_maximum(bm) >> 16);

const int num_passes = 10;
printf("Cycles/element: %d\n", num_passes);
uint64_t cycles_start, cycles_final;

printf(" roaring_bitmap_contains:");
for (int p = 0; p < num_passes; p++) {
bool result[count[p]];
RDTSC_START(cycles_start);
contains_multi_via_contains(bm, values[p], result, count[p]);
RDTSC_FINAL(cycles_final);
printf(" %10f", (cycles_final - cycles_start) * 1.0 / count[p]);
}
printf("\n");

printf(" roaring_bitmap_contains_bulk:");
for (int p = 0; p < num_passes; p++) {
bool result[count[p]];
RDTSC_START(cycles_start);
contains_multi_bulk(bm, values[p], result, count[p]);
RDTSC_FINAL(cycles_final);
printf(" %10f", (cycles_final - cycles_start) * 1.0 / count[p]);
}
printf("\n");

// sort input array
for (size_t i = 0; i < fields; ++i) {
qsort(values[i], count[i], sizeof(uint32_t), compare_uint32);
}

printf(" roaring_bitmap_contains with sorted input:");
for (int p = 0; p < num_passes; p++) {
bool result[count[p]];
RDTSC_START(cycles_start);
contains_multi_via_contains(bm, values[p], result, count[p]);
RDTSC_FINAL(cycles_final);
printf(" %10f", (cycles_final - cycles_start) * 1.0 / count[p]);
}
printf("\n");

printf(" roaring_bitmap_contains_bulk with sorted input:");
for (int p = 0; p < num_passes; p++) {
bool result[count[p]];
RDTSC_START(cycles_start);
contains_multi_bulk(bm, values[p], result, count[p]);
RDTSC_FINAL(cycles_final);
printf(" %10f", (cycles_final - cycles_start) * 1.0 / count[p]);
}
printf("\n");

roaring_bitmap_free(bm);
for (size_t i = 0; i < fields; ++i) {
free(values[i]);
}
return 0;
}
59 changes: 58 additions & 1 deletion include/roaring/roaring.h
Original file line number Diff line number Diff line change
Expand Up @@ -257,9 +257,48 @@ void roaring_bitmap_andnot_inplace(roaring_bitmap_t *r1,
*/
void roaring_bitmap_free(const roaring_bitmap_t *r);

/**
* A bit of context usable with `roaring_bitmap_*_bulk()` functions
*
* Should be initialized with `{0}` (or `memset()` to all zeros).
* Callers should treat it as an opaque type.
*
* A context may only be used with a single bitmap
* (unless re-initialized to zero), and any modification to a bitmap
* (other than modifications performed with `_bulk()` functions with the context
* passed) will invalidate any contexts associated with that bitmap.
*/
typedef struct roaring_bulk_context_s {
ROARING_CONTAINER_T *container;
int idx;
uint16_t key;
uint8_t typecode;
} roaring_bulk_context_t;

/**
* Add an item, using context from a previous insert for speed optimization.
*
* `context` will be used to store information between calls to make bulk
* operations faster. `*context` should be zero-initialized before the first
* call to this function.
*
* Modifying the bitmap in any way (other than `-bulk` suffixed functions)
* will invalidate the stored context, calling this function with a non-zero
* context after doing any modification invokes undefined behavior.
*
* In order to exploit this optimization, the caller should call this function
* with values with the same "key" (high 16 bits of the value) consecutively.
*/
void roaring_bitmap_add_bulk(roaring_bitmap_t *r,
roaring_bulk_context_t *context, uint32_t val);

/**
* Add value n_args from pointer vals, faster than repeatedly calling
* `roaring_bitmap_add()`
*
* In order to exploit this optimization, the caller should attempt to keep
* values with the same "key" (high 16 bits of the value) as consecutive
* elements in `vals`
*/
void roaring_bitmap_add_many(roaring_bitmap_t *r, size_t n_args,
const uint32_t *vals);
Expand Down Expand Up @@ -335,6 +374,25 @@ bool roaring_bitmap_contains_range(const roaring_bitmap_t *r,
uint64_t range_start,
uint64_t range_end);

/**
* Check if an items is present, using context from a previous insert for speed
* optimization.
*
* `context` will be used to store information between calls to make bulk
* operations faster. `*context` should be zero-initialized before the first
* call to this function.
*
* Modifying the bitmap in any way (other than `-bulk` suffixed functions)
* will invalidate the stored context, calling this function with a non-zero
* context after doing any modification invokes undefined behavior.
*
* In order to exploit this optimization, the caller should call this function
* with values with the same "key" (high 16 bits of the value) consecutively.
*/
bool roaring_bitmap_contains_bulk(const roaring_bitmap_t *r,
roaring_bulk_context_t *context,
uint32_t val);

/**
* Get the cardinality of the bitmap (number of elements).
*/
Expand Down Expand Up @@ -814,4 +872,3 @@ uint32_t roaring_read_uint32_iterator(roaring_uint32_iterator_t *it,
using namespace ::roaring::api;
#endif
#endif

4 changes: 3 additions & 1 deletion include/roaring/roaring_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,9 @@ inline container_t *ra_get_container_at_index(
/**
* Retrieves the key at index i
*/
uint16_t ra_get_key_at_index(const roaring_array_t *ra, uint16_t i);
inline uint16_t ra_get_key_at_index(const roaring_array_t *ra, uint16_t i) {
return ra->keys[i];
}

/**
* Add a new key-value pair at index i
Expand Down
Loading

0 comments on commit 6939974

Please sign in to comment.