-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathlevenshtein.go
212 lines (170 loc) · 4.05 KB
/
levenshtein.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
package muzzy
// Distance between strings
//
type Distance func(s1, s2 string, bound int) int
// LevenshteinDistance calculate distance between strings
//
// Distance between strings represent how many simple operations (insertion,
// deletion and replacement) needed to be done to convert first string to
// another. For examples, distance between "milk" and "silk" is 1 (replacement
// 'm' to 's'), but distance between "happiness" and "princess" is 4 (-'h',
// -'a', 'p'/'r', +'c').
//
// Parameter `bound` is used to optimization. If you only intrested if distance
// less or equal some number, put it number as bound. If distance more than this
// number, function return -1. Use -1 as `bound` to calculate distance without
// limitation.
func LevenshteinDistance(s1, s2 string, bound int) int {
if bound == 0 {
if s1 == s2 {
return 0
}
return -1
}
b := &bounder{
bound: bound,
}
return b.Do(s1, s2, newLevenshteinCalculator)
}
// DamerauDistance similar to Levenshtein except that permutation cost is 1
//
// Permutation of neighbor symbols cost is 1, for example Levenshtein distance
// between "permutation" and "permtuation" is 2 (u/t, t/u), but in
// Damerau–Levenshtein is 1.
func DamerauDistance(s1, s2 string, bound int) int {
if bound == 0 {
if s1 == s2 {
return 0
}
return -1
}
b := &bounder{
bound: bound,
}
return b.Do(s1, s2, newDamerauCalculator)
}
// Calculator is an abstraction of handling prefix-distance matrix to
// isolate implementation with one row (Levenshtein distance) and two rows
// (Damerau–Levenshtein distance).
type calculator interface {
Reset(int)
Calc(int, int) int
}
type bounder struct {
calc calculator
bound int
width int
height int
left int
right int
}
func (b *bounder) Do(s1, s2 string, calc func(r1, r2 []rune) calculator) int {
r1, r2 := []rune(s1), []rune(s2)
if len(r1) < len(r2) {
r1, r2 = r2, r1
}
if b.bound >= 0 && len(r1)-len(r2) > b.bound {
return -1
}
b.width = len(r2)
b.height = len(r1)
if b.bound < 0 {
b.bound = len(r1)
}
b.right = len(r2)
if b.bound < b.right {
b.right = b.bound
}
b.calc = calc(r1, r2)
return b.Calculate()
}
// Calculate distance matrix.
func (b *bounder) Calculate() int {
var n int
for i := 0; i < b.height; i++ {
b.calc.Reset(b.left)
for j := b.left; j < b.right; j++ {
n = b.calc.Calc(i, j)
if n > b.bound && j == b.left {
b.left++
}
}
if b.right < b.width && n <= b.bound {
b.right++
}
if b.left >= b.right {
return -1
}
}
return n
}
type levenshteinCalculator struct {
s1, s2 []rune
last []int
diag int
}
func newLevenshteinCalculator(s1, s2 []rune) calculator {
lc := &levenshteinCalculator{s1: s1, s2: s2, last: make([]int, len(s2)+1)}
for i := range lc.last {
lc.last[i] = i
}
return lc
}
func (lc *levenshteinCalculator) Reset(j int) {
lc.diag = lc.last[j]
lc.last[j]++
}
func (lc *levenshteinCalculator) Calc(i, j int) int {
dd := lc.diag
if lc.s1[i] != lc.s2[j] {
dd++
}
lc.last[j+1], lc.diag = min(dd, lc.last[j]+1, lc.last[j+1]+1), lc.last[j+1]
return lc.last[j+1]
}
type damerauCalculator struct {
s1, s2 []rune
last []int
prev []int
buff [2]int
}
func newDamerauCalculator(s1, s2 []rune) calculator {
lc := &damerauCalculator{
s1: s1,
s2: s2,
last: make([]int, len(s2)+1),
prev: make([]int, len(s2)+1),
}
for i := range lc.last {
lc.last[i] = i
lc.prev[i] = i
}
return lc
}
func (lc *damerauCalculator) Reset(j int) {
lc.rotate(j, lc.last[j]+1)
}
func (lc *damerauCalculator) Calc(i, j int) int {
dd := lc.prev[j]
if lc.s1[i] != lc.s2[j] {
dd++
}
if i > 0 && j > 0 && lc.s1[i-1] == lc.s2[j] && lc.s1[i] == lc.s2[j-1] {
return lc.rotate(j+1, min(dd, lc.last[j]+1, lc.last[j+1]+1, lc.buff[0]+1))
}
return lc.rotate(j+1, min(dd, lc.last[j]+1, lc.last[j+1]+1))
}
func (lc *damerauCalculator) rotate(j, next int) int {
lc.buff[0], lc.buff[1], lc.prev[j], lc.last[j] =
lc.buff[1], lc.prev[j], lc.last[j], next
return next
}
func min(x ...int) int {
m := x[0]
for i := 1; i < len(x); i++ {
if x[i] < m {
m = x[i]
}
}
return m
}