-
Notifications
You must be signed in to change notification settings - Fork 28
/
Copy pathsum.go
314 lines (270 loc) · 6.86 KB
/
sum.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
// Copyright (c) Efficient Go Authors
// Licensed under the Apache License 2.0.
package sum
import (
"bufio"
"bytes"
"io"
"os"
"strconv"
"unsafe"
"github.com/efficientgo/core/errcapture"
"github.com/efficientgo/core/errors"
)
// Sum is a naive implementation and algorithm for summing integers from file.
// Read more in "Efficient Go"; Example 4-1.
func Sum(fileName string) (ret int64, _ error) {
b, err := os.ReadFile(fileName)
if err != nil {
return 0, err
}
for _, line := range bytes.Split(b, []byte("\n")) {
if len(line) == 0 {
// Empty line at the end.
continue
}
num, err := strconv.ParseInt(string(line), 10, 64)
if err != nil {
return 0, err
}
ret += num
}
return ret, nil
}
// Sum2 is sum with optimized the first latency + CPU bottleneck bytes.Split.
// bytes.Split look complex to hande different cases. It allocates a lot causing It looks like the algo is simple enough to just
// implement on our own (tried scanner := bufio.NewScanner(f) but it's slower).
// 30% less latency and 5x less memory than Sum.
// Read more in "Efficient Go"; Example 10-3.
func Sum2(fileName string) (ret int64, _ error) {
b, err := os.ReadFile(fileName)
if err != nil {
return 0, err
}
var last int
for i := 0; i < len(b); i++ {
if b[i] != '\n' {
continue
}
num, err := strconv.ParseInt(string(b[last:i]), 10, 64)
if err != nil {
return 0, err
}
ret += num
last = i + 1
}
return ret, nil
}
// Sum2_scanner is a sum attempting using scanner. Actually slower than Sum2, but uses less memory.
func Sum2_scanner(fileName string) (ret int64, err error) {
f, err := os.Open(fileName)
if err != nil {
return 0, err
}
defer errcapture.Do(&err, f.Close, "close file")
scanner := bufio.NewScanner(f)
for scanner.Scan() {
num, err := strconv.ParseInt(string(scanner.Bytes()), 10, 64)
if err != nil {
return 0, err
}
ret += num
}
return ret, nil
}
func zeroCopyToString(b []byte) string {
return *((*string)(unsafe.Pointer(&b)))
}
// Sum3 is a sum with optimized the second latency + CPU bottleneck: string conversion.
// On CPU profile we see byte to string conversion not only allocate memory, but also takes precious time.
// Let's perform zeroCopy conversion.
// 2x less latency memory than Sum2.
// Read more in "Efficient Go"; Example 10-4.
func Sum3(fileName string) (ret int64, _ error) {
b, err := os.ReadFile(fileName)
if err != nil {
return 0, err
}
var last int
for i := 0; i < len(b); i++ {
if b[i] != '\n' {
continue
}
num, err := strconv.ParseInt(zeroCopyToString(b[last:i]), 10, 64)
if err != nil {
return 0, err
}
ret += num
last = i + 1
}
return ret, nil
}
// ParseInt is 3-4x times faster than strconv.ParseInt or Atoi.
func ParseInt(input []byte) (n int64, _ error) {
factor := int64(1)
k := 0
// TODO(bwplotka): Optimize if only positive integers are accepted (only 2.6% overhead in my tests though).
if input[0] == '-' {
factor *= -1
k++
}
for i := len(input) - 1; i >= k; i-- {
if input[i] < '0' || input[i] > '9' {
return 0, errors.Newf("not a valid integer: %v", input)
}
n += factor * int64(input[i]-'0')
factor *= 10
}
return n, nil
}
// Sum4 is a sum with optimized the second latency + CPU bottleneck: ParseInt and string conversion.
// On CPU profile we see that ParseInt does a lot of checks that we might not need. We write our own parsing
// straight from byte to avoid conversion CPU time.
// 2x less latency, same mem as Sum3.
// Read more in "Efficient Go"; Example 10-5.
func Sum4(fileName string) (ret int64, err error) {
b, err := os.ReadFile(fileName)
if err != nil {
return 0, err
}
var last int
for i := 0; i < len(b); i++ {
if b[i] != '\n' {
continue
}
num, err := ParseInt(b[last:i])
if err != nil {
return 0, err
}
ret += num
last = i + 1
}
return ret, nil
}
func Sum4_atoi(fileName string) (ret int64, err error) {
b, err := os.ReadFile(fileName)
if err != nil {
return 0, err
}
var last int
for i := 0; i < len(b); i++ {
if b[i] != '\n' {
continue
}
num, err := strconv.Atoi(zeroCopyToString(b[last:i]))
if err != nil {
return 0, err
}
ret += int64(num)
last = i + 1
}
return ret, nil
}
// Sum5 is like Sum4, but noticing that it takes time to even allocate 21 MB on heap (and read file to it).
// Let's try to use scanner instead.
// Slower than Sum4 and Sum6 because scanner is not optimized for this...? Scanner takes 73% of CPU time.
// Read more in "Efficient Go"; Example 10-7.
func Sum5(fileName string) (ret int64, err error) {
f, err := os.Open(fileName)
if err != nil {
return 0, err
}
defer errcapture.Do(&err, f.Close, "close file")
scanner := bufio.NewScanner(f)
for scanner.Scan() {
num, err := ParseInt(scanner.Bytes())
if err != nil {
return 0, err
}
ret += num
}
return ret, scanner.Err()
}
func Sum5_line(fileName string) (ret int64, err error) {
f, err := os.Open(fileName)
if err != nil {
return 0, err
}
defer errcapture.Do(&err, f.Close, "close file")
scanner := bufio.NewScanner(f)
scanner.Split(ScanLines)
for scanner.Scan() {
num, err := ParseInt(scanner.Bytes())
if err != nil {
return 0, err
}
ret += num
}
return ret, scanner.Err()
}
func ScanLines(data []byte, atEOF bool) (advance int, token []byte, err error) {
if atEOF && len(data) == 0 {
return 0, nil, nil
}
for i := range data {
if data[i] != '\n' {
continue
}
return i + 1, data[0:i], nil
}
// If we're at EOF, we have a final, non-terminated line. Return it.
if atEOF {
return len(data), data, nil
}
// Request more data.
return 0, nil, nil
}
// Sum6 is like Sum4, but trying to use max 10 KB of mem without scanner and bulk read.
// Assuming no integer is larger than 8 000 digits.
// Read more in "Efficient Go"; Example 10-8.
func Sum6(fileName string) (ret int64, err error) {
f, err := os.Open(fileName)
if err != nil {
return 0, err
}
defer errcapture.Do(&err, f.Close, "close file")
buf := make([]byte, 8*1024)
return Sum6Reader(f, buf)
}
func Sum6Reader(r io.Reader, buf []byte) (ret int64, err error) { // Just inlining this function saves 7% on latency
var offset, n int
for err != io.EOF {
n, err = r.Read(buf[offset:])
if err != nil && err != io.EOF {
return 0, err
}
n += offset
var last int
//for i := 0; i < n; i++ { // Funny enough this is 5% slower!
for i := range buf[:n] {
if buf[i] != '\n' {
continue
}
num, err := ParseInt(buf[last:i])
if err != nil {
return 0, err
}
ret += num
last = i + 1
}
offset = n - last
if offset > 0 {
_ = copy(buf, buf[last:n])
}
}
return ret, nil
}
var sumByFile = map[string]int64{}
// Sum7 is cached (cheating!) (:
// Read more in "Efficient Go"; Example 10-15.
func Sum7(fileName string) (int64, error) {
if s, ok := sumByFile[fileName]; ok {
return s, nil
}
ret, err := Sum(fileName)
if err != nil {
return 0, err
}
sumByFile[fileName] = ret
return ret, nil
}