-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathbloomfilter.py
91 lines (81 loc) · 2.79 KB
/
bloomfilter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
def init(size=10, function_count=5, tweak=99):
return {
'bit_field': [0] * (size * 8),
'size': size,
'function_count': function_count,
'tweak': tweak
}
BIP37_CONSTANT = 0xfba4c795
def add(bloomfilter, item):
# iterate self.function_count number of times
for i in range(bloomfilter['function_count']):
# BIP0037 spec seed is i*BIP37_CONSTANT + self.tweak
seed = i * BIP37_CONSTANT + bloomfilter['tweak']
# get the murmur3 hash given that seed
h = murmur3(item, seed=seed)
# set the bit at the hash mod the bitfield size (self.size*8)
bit = h % (bloomfilter['size'] * 8)
# set the bit field at bit to be 1
bloomfilter['bit_field'][bit] = 1
return bloomfilter
def bit_field_to_bytes(bit_field):
if len(bit_field) % 8 != 0:
raise RuntimeError('bit_field does not have a length that is not divisible by 8')
result = bytearray(len(bit_field) // 8)
for i, bit in enumerate(bit_field):
byte_index, bit_index = divmod(i, 8)
if bit:
result[byte_index] |= 1 << bit_index
return bytes(result)
def murmur3(data, seed=0):
"""from http://stackoverflow.com/questions/13305290/is-there-a-pure-python-implementation-of-murmurhash"""
c1 = 0xcc9e2d51
c2 = 0x1b873593
length = len(data)
h1 = seed
roundedEnd = (length & 0xfffffffc) # round down to 4 byte block
for i in range(0, roundedEnd, 4):
# little endian load order
k1 = (data[i] & 0xff) | ((data[i + 1] & 0xff) << 8) | \
((data[i + 2] & 0xff) << 16) | (data[i + 3] << 24)
k1 *= c1
k1 = (k1 << 15) | ((k1 & 0xffffffff) >> 17) # ROTL32(k1,15)
k1 *= c2
h1 ^= k1
h1 = (h1 << 13) | ((h1 & 0xffffffff) >> 19) # ROTL32(h1,13)
h1 = h1 * 5 + 0xe6546b64
# tail
k1 = 0
val = length & 0x03
if val == 3:
k1 = (data[roundedEnd + 2] & 0xff) << 16
# fallthrough
if val in [2, 3]:
k1 |= (data[roundedEnd + 1] & 0xff) << 8
# fallthrough
if val in [1, 2, 3]:
k1 |= data[roundedEnd] & 0xff
k1 *= c1
k1 = (k1 << 15) | ((k1 & 0xffffffff) >> 17) # ROTL32(k1,15)
k1 *= c2
h1 ^= k1
# finalization
h1 ^= length
# fmix(h1)
h1 ^= ((h1 & 0xffffffff) >> 16)
h1 *= 0x85ebca6b
h1 ^= ((h1 & 0xffffffff) >> 13)
h1 *= 0xc2b2ae35
h1 ^= ((h1 & 0xffffffff) >> 16)
return h1 & 0xffffffff
def bytes_to_bit_field(some_bytes):
flag_bits = []
# iterate over each byte of flags
for byte in some_bytes:
# iterate over each bit, right-to-left
for _ in range(8):
# add the current bit (byte & 1)
flag_bits.append(byte & 1)
# rightshift the byte 1
byte >>= 1
return flag_bits