-
Notifications
You must be signed in to change notification settings - Fork 275
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Introduce
roaring_bitmap_*_bulk
operations (#363)
* implement bitmap contains multi * typo * fix commit and repair implementations * fix check result after call ra_advance, check the high 16 bits instean. * resolve comment and add document * add unit test for contains multi * add benchmark for contains_multi * fix unittest * fix unittest * fix unittest * add const to array length * fix unittest * add static inline declaration * remove declaration in .c * update codes via comments * Applying various fixes. * Add roaring_bitmap_add_bulk roaring_bitmap_add_bulk is a generalization of roaring_bitmap_add_many, caching the container for the last inserted item, and avoiding lookiing the container up if another item is inserted in the same container. Use the new function in the implementation of roaring_bitmap_add_many and roaring_bitmap_of * Add a test to add in bulk * Allow `roaring_bitmap_add_many` to be used with an unaligned ptr * Use the correct type for the container pointer in the bulk context struct * TMP: trying something * Fix RDTSC_FINAL for CLOCK_THREAD_CPUTIME_ID * Add a benchmark for add_bulk * clang-format * Don't load the whole context * Reorder tests * Improvements based on assembly output * Inline * Go back to using pointers into context * Add docs for optimization * Check the removals in the unit test * clang-format * Be smarter about benchmark clocks * Remove initialized bool * Posix should always have CLOCK_REALTIME * Posix is a lie * Implement a bulk contains function * Be more fair to add_many, don't count time building the array * Remove roaring_bitmap_contains_multi, use roaring_bitmap_contains_bulk * Actually run bulk add unit test * Fix incorrect behavior of roaring_bitmap_contains_bulk * Fix compliation as c++ * Add extra logging for error only on windows * Check if tests are being built with NDEBUG * Use cmocka's `assert_true`, which is always evaluated * Add more documentation to the `roaring_bulk_context_t` type Co-authored-by: arthur <arthurkiller21@gmail.com> Co-authored-by: Daniel Lemire <lemire@gmail.com>
- Loading branch information
1 parent
efcb83d
commit 6939974
Showing
9 changed files
with
394 additions
and
103 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
#define _GNU_SOURCE | ||
#include <roaring/roaring.h> | ||
#include <inttypes.h> | ||
#include <stdio.h> | ||
#include <stdlib.h> | ||
#include <limits.h> | ||
#include "benchmark.h" | ||
#include "random.h" | ||
#include "numbersfromtextfiles.h" | ||
|
||
void contains_multi_via_contains(roaring_bitmap_t* bm, const uint32_t* values, bool* results, const size_t count) { | ||
for (size_t i = 0; i < count; ++i) { | ||
results[i] = roaring_bitmap_contains(bm, values[i]); | ||
} | ||
} | ||
|
||
void contains_multi_bulk(roaring_bitmap_t* bm, const uint32_t* values, bool* results, const size_t count) { | ||
roaring_bulk_context_t context = {0}; | ||
for (size_t i = 0; i < count; ++i) { | ||
results[i] = roaring_bitmap_contains_bulk(bm, &context, values[i]); | ||
} | ||
} | ||
|
||
int compare_uint32(const void* a, const void* b) { | ||
uint32_t arg1 = *(const uint32_t*)a; | ||
uint32_t arg2 = *(const uint32_t*)b; | ||
if (arg1 < arg2) return -1; | ||
if (arg1 > arg2) return 1; | ||
return 0; | ||
} | ||
|
||
int main(int argc, char* argv[]) { | ||
(void)&read_all_integer_files; // suppress unused warning | ||
|
||
if (argc < 2) { | ||
printf("Usage: %s <comma_separated_integers_file> ...\n", argv[0]); | ||
printf("Example: %s ~/CRoaring/benchmarks/realdata/weather_sept_85/*\n", argv[0]); | ||
return 1; | ||
} | ||
|
||
size_t fields = argc-1; | ||
uint32_t* values[argc]; | ||
size_t count[argc]; | ||
|
||
roaring_bitmap_t* bm = roaring_bitmap_create(); | ||
for (int i = 1; i < argc; i++) { | ||
size_t t_count = 0; | ||
uint32_t* t_values = read_integer_file(argv[i], &t_count); | ||
if (t_count == 0) { | ||
printf("No integers found in %s\n", argv[i]); | ||
return 1; | ||
} | ||
roaring_bitmap_add_many(bm, t_count, t_values); | ||
|
||
shuffle_uint32(t_values, t_count); | ||
|
||
values[i-1] = t_values; | ||
count[i-1] = t_count; | ||
} | ||
//roaring_bitmap_run_optimize(bm); | ||
|
||
printf("Data:\n"); | ||
printf(" cardinality: %"PRIu64"\n", roaring_bitmap_get_cardinality(bm)); | ||
printf(" buckets: %d\n", (int)bm->high_low_container.size); | ||
printf(" range: %"PRIu32"-%"PRIu32"\n", roaring_bitmap_minimum(bm) >> 16, roaring_bitmap_maximum(bm) >> 16); | ||
|
||
const int num_passes = 10; | ||
printf("Cycles/element: %d\n", num_passes); | ||
uint64_t cycles_start, cycles_final; | ||
|
||
printf(" roaring_bitmap_contains:"); | ||
for (int p = 0; p < num_passes; p++) { | ||
bool result[count[p]]; | ||
RDTSC_START(cycles_start); | ||
contains_multi_via_contains(bm, values[p], result, count[p]); | ||
RDTSC_FINAL(cycles_final); | ||
printf(" %10f", (cycles_final - cycles_start) * 1.0 / count[p]); | ||
} | ||
printf("\n"); | ||
|
||
printf(" roaring_bitmap_contains_bulk:"); | ||
for (int p = 0; p < num_passes; p++) { | ||
bool result[count[p]]; | ||
RDTSC_START(cycles_start); | ||
contains_multi_bulk(bm, values[p], result, count[p]); | ||
RDTSC_FINAL(cycles_final); | ||
printf(" %10f", (cycles_final - cycles_start) * 1.0 / count[p]); | ||
} | ||
printf("\n"); | ||
|
||
// sort input array | ||
for (size_t i = 0; i < fields; ++i) { | ||
qsort(values[i], count[i], sizeof(uint32_t), compare_uint32); | ||
} | ||
|
||
printf(" roaring_bitmap_contains with sorted input:"); | ||
for (int p = 0; p < num_passes; p++) { | ||
bool result[count[p]]; | ||
RDTSC_START(cycles_start); | ||
contains_multi_via_contains(bm, values[p], result, count[p]); | ||
RDTSC_FINAL(cycles_final); | ||
printf(" %10f", (cycles_final - cycles_start) * 1.0 / count[p]); | ||
} | ||
printf("\n"); | ||
|
||
printf(" roaring_bitmap_contains_bulk with sorted input:"); | ||
for (int p = 0; p < num_passes; p++) { | ||
bool result[count[p]]; | ||
RDTSC_START(cycles_start); | ||
contains_multi_bulk(bm, values[p], result, count[p]); | ||
RDTSC_FINAL(cycles_final); | ||
printf(" %10f", (cycles_final - cycles_start) * 1.0 / count[p]); | ||
} | ||
printf("\n"); | ||
|
||
roaring_bitmap_free(bm); | ||
for (size_t i = 0; i < fields; ++i) { | ||
free(values[i]); | ||
} | ||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.