Skip to content

Commit

Permalink
upgrade to rust 1.75 && various minor fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
modelflat committed Jan 18, 2024
1 parent 62501e4 commit d8f1d69
Show file tree
Hide file tree
Showing 10 changed files with 260 additions and 140 deletions.
286 changes: 166 additions & 120 deletions Cargo.lock

Large diffs are not rendered by default.

7 changes: 7 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ version = "0.5"
default-features = false
features = ["cargo_bench_support", "html_reports"]

[[example]]
name = "search"

[[bench]]
name = "search64"
harness = false
Expand All @@ -37,3 +40,7 @@ harness = false
[[bench]]
name = "internal"
harness = false

[profile.release-with-debug]
inherits = "release"
debug = true
38 changes: 38 additions & 0 deletions examples/search.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
use std::time::Instant;

use data_gen::{flip_bits, generate_uniform_data, rand_pos};
use hloo::{
lookup::lookup_impl::lookup256::{Bits, MemLookup},
Lookup,
};

fn generate_perfect_data(n: usize, _: usize) -> Vec<(Bits, usize)> {
generate_uniform_data(n).map(|(k, v)| (Bits::new(k), v)).collect()
}

fn generate_target(data: &[(Bits, usize)], change_bits: usize) -> Bits {
let pos = rand_pos(data);
Bits::new(flip_bits(data[pos].0.data, change_bits))
}

fn main() {
println!("preparing data...");
let data = generate_perfect_data(1_000_000, 10);

let mut lookup = MemLookup::default();
println!("inserting data into in-memory...");
lookup.insert(&data).unwrap();

println!("running search...");
let t = Instant::now();
let mut side_effect = 0;
for _ in 0..10000 {
let target = generate_target(&data, 3);
for _ in 0..1000 {
side_effect += lookup.search(&target, 3).map_or(0, |r| r.candidates_scanned);
}
}
let t = Instant::now() - t;
println!("total time taken: {} ms", t.as_millis());
println!("total candidates scanned: {}", side_effect);
}
5 changes: 5 additions & 0 deletions hloo_core/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
mod bit_block;
mod permutations;

use std::cmp::Ordering;

pub use bit_block::{BitBlock, BitOp, PermutedBitBlock};
pub use permutations::{create_permutations, Permutation};

Expand Down Expand Up @@ -45,6 +47,9 @@ pub trait BitPermuter<B, M> {
/// Apply mask to bit sequence `key`.
fn mask(&self, key: &B) -> M;

/// Apply mask to bit sequence `key` and compare it to `other_key`.
fn mask_and_cmp(&self, key: &B, other_mask: &M) -> Ordering;

/// Get number of blocks this permuter operates on.
fn n_blocks(&self) -> u32;
}
4 changes: 4 additions & 0 deletions hloo_macros/src/permutation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,10 @@ impl ToTokens for Permutation<'_> {
Self::mask_static(w)
}

fn mask_and_cmp(&self, w: &#data_type_name, other_mask: &#mask_type_name) -> std::cmp::Ordering {
Self::mask_static(w).cmp(other_mask)
}

fn n_blocks(&self) -> u32 {
#n_blocks as u32
}
Expand Down
4 changes: 2 additions & 2 deletions src/index/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ pub enum BlockLocator {
}

impl BlockLocator {
#[inline(always)]
pub fn locate_by<'a, T>(&'_ self, slice: &'a [T], f: impl Fn(&T) -> Ordering) -> &'a [T] {
match self {
BlockLocator::BinarySearch => extended_binary_search_by(slice, f),
Expand Down Expand Up @@ -144,13 +143,14 @@ where
fn remove(&mut self, keys: &[K]) -> Result<(), Self::Error>;

/// Retrieve candidates for a given search.
#[inline(never)]
fn get_candidates<'a>(&'a self, key: &K) -> Candidates<'a, K, V> {
let permuter = self.permuter();
let permuted_key = permuter.apply(key);
let masked_key = permuter.mask(&permuted_key);
let block = self
.block_locator()
.locate_by(self.data(), |(key, _)| permuter.mask(key).cmp(&masked_key));
.locate_by(self.data(), |(key, _)| permuter.mask_and_cmp(key, &masked_key));
Candidates::new(permuted_key, block)
}

Expand Down
3 changes: 2 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
//! let memmap_lookup = lookup64::MemMapLookup::<i64>::create(&path);
//! ```
#![warn(clippy::pedantic)]
// #![warn(clippy::pedantic)]
#![warn(clippy::redundant_closure_for_method_calls)]

pub mod index;
pub mod lookup;
Expand Down
7 changes: 4 additions & 3 deletions src/lookup/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@ pub struct SearchResult<V> {
}

impl<V> SearchResult<V> {
pub fn iter(&self) -> impl Iterator<Item = &SearchResultItem<V>> {
pub fn flat_iter(&self) -> impl Iterator<Item = &SearchResultItem<V>> {
self.result.iter().flatten()
}

pub fn into_iter(self) -> impl Iterator<Item = SearchResultItem<V>> {
pub fn into_flat_iter(self) -> impl Iterator<Item = SearchResultItem<V>> {
self.result.into_iter().flatten()
}
}
Expand Down Expand Up @@ -68,6 +68,7 @@ where
}

/// Perform a distance search.
#[inline(never)]
fn search(&self, key: &K, distance: u32) -> Result<SearchResult<V>, SearchError> {
let max_distance = self.max_search_distance();
if distance > max_distance {
Expand Down Expand Up @@ -95,7 +96,7 @@ where
{
self.search(key, distance)
.expect("distance exceeds max")
.into_iter()
.into_flat_iter()
.collect()
}

Expand Down
8 changes: 4 additions & 4 deletions src/mmvec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ where
/// Signature of this vector.
#[must_use]
pub fn sig(&self) -> u64 {
self.data.as_ref().map_or(u64::MAX, |d| d.sig())
self.data.as_ref().map_or(u64::MAX, Data::sig)
}

/// Whether this vector is empty.
Expand Down Expand Up @@ -126,7 +126,7 @@ where

/// Flushes memory-mapped data into file.
pub fn flush(&self) -> Result<(), MmVecError> {
Ok(self.data.as_ref().map_or(Ok(()), |d| d.flush())?)
Ok(self.data.as_ref().map_or(Ok(()), Data::flush)?)
}

/// Destroys self, removing the underlying file.
Expand Down Expand Up @@ -308,15 +308,15 @@ where
}

fn header_offset(&self, offset: usize) -> *const u8 {
let start = self.header_mmap.as_ptr() as *const u8;
let start = self.header_mmap.as_ptr();
assert!(offset < Self::HEADER_SIZE as usize, "offset is out of bounds");
assert!(offset % 8 == 0, "offset is not placed on u64 boundary");
// Safety: we checked prerequisites for `add`
unsafe { start.add(offset) }
}

fn header_offset_mut(&mut self, offset: usize) -> *mut u8 {
let start = self.header_mmap.as_mut_ptr() as *mut u8;
let start = self.header_mmap.as_mut_ptr();
assert!(offset < Self::HEADER_SIZE as usize, "offset is out of bounds");
assert!(offset % 8 == 0, "offset is not placed on u64 boundary");
// Safety: we checked prerequisites for `add`
Expand Down
38 changes: 28 additions & 10 deletions src/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ use std::{
hash::{Hash, Hasher},
};

/// Partition a slice according to predicate.
/// Partition the slice according to the given predicate.
///
/// Elements for which predicate returns `true` go to the start of the slice.
/// Elements for which the predicate returns `true` are placed at the start of the slice.
pub fn partition<T, F>(data: &mut [T], predicate: F) -> usize
where
F: Fn(&T) -> bool,
Expand All @@ -16,16 +16,34 @@ where
data.partition_point(predicate)
}

/// Search for a value using binary search. If a value is found, return a slice starting at the first occurence
/// of the found value, and ending at the last occurence (inclusive). If the value is not found, return empty slice.
/// Search the slice using binary search with the given comparator. Return a slice starting at the first index for
/// which the comparator returns `Ordering::Equal`, and ending at the last such index (inclusive). If the comparator
/// never returns `Ordering::Equal`, return an empty slice.
pub fn extended_binary_search_by<T>(slice: &[T], f: impl Fn(&T) -> Ordering) -> &[T] {
// perform the first two steps of the binary search manually to get rid of OOB values right away
// this may be helpful with some of the skew cases, and makes this search more robust against user-provided data
let mid = slice.len() / 2;
let slice = if f(&slice[mid]).then(Ordering::Greater) == Ordering::Greater {
if f(&slice[0]) == Ordering::Greater {
// not in bounds
return &slice[0..0];
}
&slice[..mid + 1]
} else if f(&slice[slice.len() - 1]) == Ordering::Less {
// not in bounds
return &slice[0..0];
} else {
&slice[mid..]
};

let maybe_block_start = slice.binary_search_by(|el| {
// 0 0 2 2 2 3 4 5 13
// ^st ^end
f(el).then(Ordering::Greater)
});

match maybe_block_start {
Ok(_) => unreachable!("not possible to find element with a comparator fn that never returns Equals"),
Ok(_) => unreachable!("not possible to find an element with a comparator fn that never returns Equals"),
Err(pos) if pos < slice.len() && f(&slice[pos]).is_eq() => {
// exp_search performs best when blocks are small, otherwise binary_search is better
let block_end = exponential_search_by(&slice[pos..], |el| {
Expand All @@ -34,25 +52,25 @@ pub fn extended_binary_search_by<T>(slice: &[T], f: impl Fn(&T) -> Ordering) ->
f(el).then(Ordering::Less)
});
match block_end {
Ok(_) => unreachable!("not possible to find element with a comparator fn that never returns Equals"),
Ok(_) => unreachable!("not possible to find an element with a comparator fn that never returns Equals"),
Err(block_end) => &slice[pos..(pos + block_end).min(slice.len())],
}
}
Err(_) => &slice[0..0],
}
}

/// Performs exponential search.
fn exponential_search_by<T, F>(data: &[T], f: F) -> Result<usize, usize>
/// Perform an exponential binary search over the slice.
fn exponential_search_by<T, F>(slice: &[T], f: F) -> Result<usize, usize>
where
F: Fn(&T) -> Ordering,
{
let mut bound = 1;
while bound < data.len() && matches!(f(&data[bound]), Ordering::Less) {
while bound < slice.len() && matches!(f(&slice[bound]), Ordering::Less) {
bound <<= 1;
}
let start = bound >> 1;
data[start..data.len().min(bound + 1)]
slice[start..slice.len().min(bound + 1)]
.binary_search_by(f)
.map(|i| i + start)
.map_err(|i| i + start)
Expand Down

0 comments on commit d8f1d69

Please sign in to comment.