-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdedup.go
111 lines (96 loc) · 2.25 KB
/
dedup.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
package news
import (
"encoding/hex"
"hash/fnv"
"sync"
)
// Deduplicator is used to filter out repeated elements by their key (unique id or hash)
// Keep - returns true if key was kept (i.e. key is new) and false if key is duplicate.
type Deduplicator interface {
Keep(key DedupKey) bool
}
// DedupKeySize - number of bytes in dedupkey byte arr
const DedupKeySize = 16
// DedupKey is array due to GC reasons (map will not be scanned), although string keys may be faster to get
type DedupKey [DedupKeySize]byte
// memDedup is primitive LRU cache impl
type memDedup struct {
m map[DedupKey]struct{}
q []DedupKey
MaxSize int
qr int
qw int
}
// syncDedup - thread-safe wrapper
type syncDedup struct {
dedup Deduplicator
mu sync.Mutex
}
//NewDedup - creates new in-memory deduplicator
func NewDedup(maxSize int) Deduplicator {
if maxSize <= 0 {
panic("dedup: maxsize is positive num")
}
return &memDedup{make(map[DedupKey]struct{}), make([]DedupKey, maxSize), maxSize, 0, 0}
}
//DedupSync returns concurrent-safe (mutex-based) wrapper
//if already wrapped does nothing.
func DedupSync(d Deduplicator) Deduplicator {
if d == nil {
panic("wrapping nil dedup")
}
if _, ok := d.(*syncDedup); ok {
return d
}
return &syncDedup{dedup: d}
}
func (d *memDedup) Keep(k DedupKey) bool {
m := d.m
if _, has := m[k]; has {
return false
}
max := d.MaxSize - 1
if len(m) > max {
oldk := d.q[getAndInc(&d.qr, max)]
delete(m, oldk)
m[k] = struct{}{}
d.q[getAndInc(&d.qw, max)] = k
} else {
m[k] = struct{}{}
d.q[getAndInc(&d.qw, max)] = k
}
return true
}
func (d *syncDedup) Keep(k DedupKey) bool {
d.mu.Lock()
defer d.mu.Unlock()
return d.dedup.Keep(k)
}
func getAndInc(ptr *int, max int) int {
v := *ptr
if v < max {
*ptr = v + 1
return v
}
*ptr = 0
return max
}
//StrToDedupKey - calculates dedupKey for strings (by hashing)
func StrToDedupKey(xs ...string) DedupKey {
if len(xs) == 0 {
return DedupKey{}
}
h := fnv.New128()
h.Write([]byte(xs[0])) // nolint:errcheck
for _, x := range xs[1:] {
h.Write([]byte{0}) // nolint:errcheck
h.Write([]byte(x)) // nolint:errcheck
}
var k DedupKey
hash := h.Sum(nil)
copy(k[:], hash)
return k
}
func (k DedupKey) String() string {
return hex.EncodeToString(k[:])
}