Skip to content

Commit b5a9c10

Browse files
authored
Merge pull request #19 from tk3369/tk/stringperf
performance pack
2 parents 54ebebf + 7d8b9c7 commit b5a9c10

9 files changed

+545
-182
lines changed

src/SASLib.jl

+180-111
Large diffs are not rendered by default.

src/constants.jl

+1
Original file line numberDiff line numberDiff line change
@@ -214,4 +214,5 @@ const FALLBACK_ENCODING = "UTF-8"
214214
const ENCODINGS_OK_WITH_BASE_TRANSCODER = [ "UTF-8" , "US-ASCII" ]
215215

216216
const REGULAR_STR_ARRAY(n) = Array{String}(n)
217+
const EMPTY_STRING = ""
217218

src/utils.jl

+45-27
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,20 @@ function brstrip(bytes::Vector{UInt8}, remove::Vector{UInt8})
1818
end
1919
return Vector{UInt8}()
2020
end
21-
#brstrip(b"\x01\x02\x03", b"\x03")
21+
22+
# """
23+
# Faster version of rstrip (slightly modified version from julia master branch)
24+
# """
25+
# function rstrip2(s::String)
26+
# i = endof(s)
27+
# while 1 ≤ i
28+
# c = s[i]
29+
# j = prevind(s, i)
30+
# c == ' ' || return s[1:i]
31+
# i = j
32+
# end
33+
# EMPTY_STRING
34+
# end
2235

2336
"""
2437
Find needle in the haystack with both `Vector{UInt8}` type arguments.
@@ -35,12 +48,6 @@ function Base.contains(haystack::Vector{UInt8}, needle::Vector{UInt8})
3548
end
3649
return false
3750
end
38-
# contains(b"123", b"123")
39-
# contains(b"123456", b"123")
40-
# contains(b"123456", b"234")
41-
# contains(b"123456", b"456")
42-
# contains(b"123456", b"567")
43-
# contains(b"123456", b"xxx")
4451

4552
# Fast implementation to `reinterpret` int/floats
4653
# See https://discourse.julialang.org/t/newbie-question-convert-two-8-byte-values-into-a-single-16-byte-value/7662/5
@@ -186,16 +193,27 @@ function convertfloat64f(bytes::Vector{UInt8}, endianess::Symbol)
186193
r
187194
end
188195

189-
190-
# """
191-
# Take 8 bytes and convert them into a UInt64 type. The order is preserved.
192-
# """
193-
# function convertint64(a::UInt8,b::UInt8,c::UInt8,d::UInt8,e::UInt8,f::UInt8,g::UInt8,h::UInt8)
194-
# (UInt64(a) << 56) | (UInt64(b) << 48) |
195-
# (UInt64(c) << 40) | (UInt64(d) << 32) |
196-
# (UInt64(e) << 24) | (UInt64(f) << 16) |
197-
# (UInt64(g) << 8) | UInt64(h)
198-
# end
196+
# Conversion routines for 1,2,4,8-byte words into a single 64-bit integer
197+
@inline function convertint64B(a::UInt8,b::UInt8,c::UInt8,d::UInt8,e::UInt8,f::UInt8,g::UInt8,h::UInt8)
198+
(Int64(a) << 56) | (Int64(b) << 48) | (Int64(c) << 40) | (Int64(d) << 32) |
199+
(Int64(e) << 24) | (Int64(f) << 16) | (Int64(g) << 8) | Int64(h)
200+
end
201+
@inline function convertint64L(a::UInt8,b::UInt8,c::UInt8,d::UInt8,e::UInt8,f::UInt8,g::UInt8,h::UInt8)
202+
(Int64(h) << 56) | (Int64(g) << 48) | (Int64(f) << 40) | (Int64(e) << 32) |
203+
(Int64(d) << 24) | (Int64(c) << 16) | (Int64(b) << 8) | Int64(a)
204+
end
205+
@inline function convertint64B(a::UInt8,b::UInt8,c::UInt8,d::UInt8)
206+
(Int64(a) << 24) | (Int64(b) << 16) | (Int64(c) << 8) | Int64(d)
207+
end
208+
@inline function convertint64L(a::UInt8,b::UInt8,c::UInt8,d::UInt8)
209+
(Int64(d) << 24) | (Int64(c) << 16) | (Int64(b) << 8) | Int64(a)
210+
end
211+
@inline function convertint64B(a::UInt8,b::UInt8)
212+
(Int64(a) << 8) | Int64(b)
213+
end
214+
@inline function convertint64L(a::UInt8,b::UInt8)
215+
(Int64(b) << 8) | Int64(a)
216+
end
199217

200218
# this version is slightly slower
201219
# function convertint64b(a::UInt8,b::UInt8,c::UInt8,d::UInt8,e::UInt8,f::UInt8,g::UInt8,h::UInt8)
@@ -212,15 +230,15 @@ end
212230

213231

214232
# TODO cannot use AbstractString for some reasons
215-
"""
216-
Concatenate an array of strings to a single string
217-
"""
218-
concatenate(strArray::Vector{T} where T <: AbstractString, separator=",") =
219-
foldl((x, y) -> *(x, y, separator), "", strArray)[1:end-length(separator)]
233+
# """
234+
# Concatenate an array of strings to a single string
235+
# """
236+
# concatenate(strArray::Vector{T} where T <: AbstractString, separator=",") =
237+
# foldl((x, y) -> *(x, y, separator), "", strArray)[1:end-length(separator)]
220238

221-
"""
222-
Convert a dictionary to an array of k=>v strings
223-
"""
224-
stringarray(dict::Dict) =
225-
["$x => $y" for (x, y) in dict]
239+
# """
240+
# Convert a dictionary to an array of k=>v strings
241+
# """
242+
# stringarray(dict::Dict) =
243+
# ["$x => $y" for (x, y) in dict]
226244

+52
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# Performance Test 1
2+
3+
## Summary
4+
5+
SASLib is ~12x faster than Pandas.
6+
7+
## Test File
8+
9+
Filename|Rows|Columns|Numeric Columns|String Columns
10+
--------|----|-------|---------------|--------------
11+
numeric_1000000_2.sas7bdat|1,000,000|2|2|0
12+
13+
## Python
14+
```
15+
$ python -V
16+
Python 3.6.3 :: Anaconda custom (64-bit)
17+
$ python perf_test1.py data_misc/numeric_1000000_2.sas7bdat 30
18+
Minimum: 1.8377 seconds
19+
Median: 1.9093 seconds
20+
Mean: 1.9168 seconds
21+
Maximum: 2.0423 seconds
22+
```
23+
24+
## Julia
25+
```
26+
$ julia perf_test1.jl data_misc/numeric_1000000_2.sas7bdat 30
27+
Julia Version 0.6.2
28+
Commit d386e40c17 (2017-12-13 18:08 UTC)
29+
Platform Info:
30+
OS: macOS (x86_64-apple-darwin14.5.0)
31+
CPU: Intel(R) Core(TM) i5-4258U CPU @ 2.40GHz
32+
WORD_SIZE: 64
33+
BLAS: libopenblas (USE64BITINT DYNAMIC_ARCH NO_AFFINITY Haswell)
34+
LAPACK: libopenblas64_
35+
LIBM: libopenlibm
36+
LLVM: libLLVM-3.9.1 (ORCJIT, haswell)
37+
38+
Loaded library in 0.656 seconds
39+
BenchmarkTools.Trial:
40+
memory estimate: 153.16 MiB
41+
allocs estimate: 1002726
42+
--------------
43+
minimum time: 151.382 ms (3.41% GC)
44+
median time: 235.003 ms (35.13% GC)
45+
mean time: 202.453 ms (23.83% GC)
46+
maximum time: 272.253 ms (35.25% GC)
47+
--------------
48+
samples: 25
49+
evals/sample: 1
50+
```
51+
52+
+76
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
# Performance Test 2
2+
3+
## Summary
4+
5+
SASLib is 24-72x faster than Pandas.
6+
7+
## Test File
8+
9+
Filename |Rows|Columns|Numeric Columns|String Columns
10+
--------------|----|-------|---------------|--------------
11+
test1.sas7bdat|10 |100 |73 |27
12+
13+
## Python
14+
```
15+
$ python perf_test1.py data_pandas/test1.sas7bdat 100
16+
Minimum: 0.0800 seconds
17+
Median: 0.0868 seconds
18+
Mean: 0.0920 seconds
19+
Maximum: 0.1379 seconds
20+
```
21+
22+
## Julia (ObjectPool String Array)
23+
```
24+
$ julia perf_test1.jl data_pandas/test1.sas7bdat 100
25+
Julia Version 0.6.2
26+
Commit d386e40c17 (2017-12-13 18:08 UTC)
27+
Platform Info:
28+
OS: macOS (x86_64-apple-darwin14.5.0)
29+
CPU: Intel(R) Core(TM) i5-4258U CPU @ 2.40GHz
30+
WORD_SIZE: 64
31+
BLAS: libopenblas (USE64BITINT DYNAMIC_ARCH NO_AFFINITY Haswell)
32+
LAPACK: libopenblas64_
33+
LIBM: libopenlibm
34+
LLVM: libLLVM-3.9.1 (ORCJIT, haswell)
35+
36+
Loaded library in 0.664 seconds
37+
BenchmarkTools.Trial:
38+
memory estimate: 988.28 KiB
39+
allocs estimate: 9378
40+
--------------
41+
minimum time: 1.149 ms (0.00% GC)
42+
median time: 1.222 ms (0.00% GC)
43+
mean time: 1.358 ms (6.98% GC)
44+
maximum time: 4.425 ms (55.85% GC)
45+
--------------
46+
samples: 100
47+
evals/sample: 1
48+
```
49+
50+
## Julia (Regular String Array)
51+
```
52+
$ julia perf_test_regarray.jl data_pandas/test1.sas7bdat 100
53+
Julia Version 0.6.2
54+
Commit d386e40c17 (2017-12-13 18:08 UTC)
55+
Platform Info:
56+
OS: macOS (x86_64-apple-darwin14.5.0)
57+
CPU: Intel(R) Core(TM) i5-4258U CPU @ 2.40GHz
58+
WORD_SIZE: 64
59+
BLAS: libopenblas (USE64BITINT DYNAMIC_ARCH NO_AFFINITY Haswell)
60+
LAPACK: libopenblas64_
61+
LIBM: libopenlibm
62+
LLVM: libLLVM-3.9.1 (ORCJIT, haswell)
63+
64+
Loaded library in 0.680 seconds
65+
BenchmarkTools.Trial:
66+
memory estimate: 949.63 KiB
67+
allocs estimate: 8967
68+
--------------
69+
minimum time: 1.106 ms (0.00% GC)
70+
median time: 1.339 ms (0.00% GC)
71+
mean time: 1.482 ms (6.61% GC)
72+
maximum time: 4.545 ms (57.52% GC)
73+
--------------
74+
samples: 100
75+
evals/sample: 1
76+
```
+78
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
# Performance Test 3
2+
3+
## Summary
4+
5+
SASLib is ~14-22x faster than Pandas.
6+
7+
## Test File
8+
9+
Filename |Rows |Columns|Numeric Columns|String Columns
10+
---------------------|------|-------|---------------|--------------
11+
productsales.sas7bdat|1440 |10 |4 |6
12+
13+
## Python
14+
```
15+
$ python -V
16+
Python 3.6.3 :: Anaconda custom (64-bit)
17+
$ python perf_test1.py data_pandas/productsales.sas7bdat 100
18+
Minimum: 0.0286 seconds
19+
Median: 0.0316 seconds
20+
Mean: 0.0329 seconds
21+
Maximum: 0.0894 seconds
22+
```
23+
24+
## Julia (ObjectPool string array)
25+
```
26+
$ julia perf_test1.jl data_pandas/productsales.sas7bdat 100
27+
Julia Version 0.6.2
28+
Commit d386e40c17 (2017-12-13 18:08 UTC)
29+
Platform Info:
30+
OS: macOS (x86_64-apple-darwin14.5.0)
31+
CPU: Intel(R) Core(TM) i5-4258U CPU @ 2.40GHz
32+
WORD_SIZE: 64
33+
BLAS: libopenblas (USE64BITINT DYNAMIC_ARCH NO_AFFINITY Haswell)
34+
LAPACK: libopenblas64_
35+
LIBM: libopenlibm
36+
LLVM: libLLVM-3.9.1 (ORCJIT, haswell)
37+
38+
Loaded library in 4.693 seconds
39+
BenchmarkTools.Trial:
40+
memory estimate: 1.07 MiB
41+
allocs estimate: 18573
42+
--------------
43+
minimum time: 2.088 ms (0.00% GC)
44+
median time: 2.133 ms (0.00% GC)
45+
mean time: 2.320 ms (4.10% GC)
46+
maximum time: 5.123 ms (47.12% GC)
47+
--------------
48+
samples: 100
49+
evals/sample: 1
50+
```
51+
52+
## Julia (regular string array)
53+
```
54+
Julia Version 0.6.2
55+
Commit d386e40c17 (2017-12-13 18:08 UTC)
56+
Platform Info:
57+
OS: macOS (x86_64-apple-darwin14.5.0)
58+
CPU: Intel(R) Core(TM) i5-4258U CPU @ 2.40GHz
59+
WORD_SIZE: 64
60+
BLAS: libopenblas (USE64BITINT DYNAMIC_ARCH NO_AFFINITY Haswell)
61+
LAPACK: libopenblas64_
62+
LIBM: libopenlibm
63+
LLVM: libLLVM-3.9.1 (ORCJIT, haswell)
64+
65+
Loaded library in 0.651 seconds
66+
BenchmarkTools.Trial:
67+
memory estimate: 1.05 MiB
68+
allocs estimate: 18500
69+
--------------
70+
minimum time: 1.337 ms (0.00% GC)
71+
median time: 1.385 ms (0.00% GC)
72+
mean time: 1.556 ms (8.02% GC)
73+
maximum time: 5.486 ms (69.40% GC)
74+
--------------
75+
samples: 100
76+
evals/sample: 1
77+
78+
```
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
## SASLib vs ReadStat results
2+
3+
Key | Description |
4+
--------|-------------------------|
5+
F64 | number of Float64 columns|
6+
STR | number of String columns|
7+
DT | number of date/time coumns|
8+
COMP | compression method|
9+
S/R | SASLib time divided by ReadStat time|
10+
SA/R | SASLib time (regular string arrays) divided by ReadStat time|
11+
SASLibA | SASLib (regular string arrays)|
12+
13+
14+
```
15+
Filename : ReadStat SASLib S/R SASLibA SA/R F64 STR DT COMP
16+
data_misc/numeric_1000000_2.sas7bdat : 205.002 ms 152.764 ms ( 75%) 154.288 ms ( 75%) 2 0 0 None
17+
data_misc/types.sas7bdat : 0.093 ms 0.179 ms (194%) 0.180 ms (194%) 5 1 0 None
18+
data_AHS2013/homimp.sas7bdat : 40.138 ms 51.994 ms (130%) 24.975 ms ( 62%) 1 5 0 None
19+
data_AHS2013/omov.sas7bdat : 2.557 ms 5.136 ms (201%) 3.485 ms (136%) 3 5 0 RLE
20+
data_AHS2013/owner.sas7bdat : 13.859 ms 17.104 ms (123%) 9.272 ms ( 67%) 0 3 0 None
21+
data_AHS2013/ratiov.sas7bdat : 4.820 ms 8.170 ms (169%) 3.577 ms ( 74%) 0 9 0 None
22+
data_AHS2013/rmov.sas7bdat : 56.358 ms 101.530 ms (180%) 70.293 ms (125%) 2 21 0 RLE
23+
data_AHS2013/topical.sas7bdat : 2609.437 ms 2876.122 ms (110%) 1104.849 ms ( 42%) 8 106 0 RLE
24+
data_pandas/airline.sas7bdat : 0.105 ms 0.170 ms (161%) 0.172 ms (164%) 6 0 0 None
25+
data_pandas/datetime.sas7bdat : 0.080 ms 0.235 ms (293%) 0.234 ms (291%) 1 1 2 None
26+
data_pandas/productsales.sas7bdat : 2.276 ms 2.374 ms (104%) 1.355 ms ( 60%) 4 5 1 None
27+
data_pandas/test1.sas7bdat : 0.831 ms 1.162 ms (140%) 1.101 ms (132%) 73 25 2 None
28+
data_pandas/test2.sas7bdat : 0.846 ms 1.029 ms (122%) 0.971 ms (115%) 73 25 2 RLE
29+
data_pandas/test4.sas7bdat : 0.829 ms 1.162 ms (140%) 1.103 ms (133%) 73 25 2 None
30+
data_pandas/test5.sas7bdat : 0.848 ms 1.034 ms (122%) 0.974 ms (115%) 73 25 2 RLE
31+
data_pandas/test7.sas7bdat : 0.832 ms 1.182 ms (142%) 1.111 ms (133%) 73 25 2 None
32+
data_pandas/test9.sas7bdat : 0.850 ms 1.057 ms (124%) 0.993 ms (117%) 73 25 2 RLE
33+
data_pandas/test10.sas7bdat : 0.833 ms 1.166 ms (140%) 1.102 ms (132%) 73 25 2 None
34+
data_pandas/test12.sas7bdat : 0.849 ms 1.038 ms (122%) 0.974 ms (115%) 73 25 2 RLE
35+
data_pandas/test13.sas7bdat : 0.831 ms 1.180 ms (142%) 1.110 ms (134%) 73 25 2 None
36+
data_pandas/test15.sas7bdat : 0.852 ms 1.048 ms (123%) 0.988 ms (116%) 73 25 2 RLE
37+
data_pandas/test16.sas7bdat : 0.842 ms 2.236 ms (265%) 2.152 ms (255%) 73 25 2 None
38+
data_reikoch/barrows.sas7bdat : 6.923 ms 6.031 ms ( 87%) 6.047 ms ( 87%) 72 0 0 RLE
39+
data_reikoch/extr.sas7bdat : 0.177 ms 0.381 ms (215%) 0.368 ms (208%) 0 1 0 None
40+
data_reikoch/ietest2.sas7bdat : 0.061 ms 0.139 ms (229%) 0.138 ms (228%) 0 1 0 RLE
41+
```

0 commit comments

Comments
 (0)