@@ -28,6 +28,7 @@ struct ReaderConfig
28
28
include_columns:: Vector
29
29
exclude_columns:: Vector
30
30
string_array_fn:: Dict{Symbol, Function}
31
+ number_array_fn:: Dict{Symbol, Function}
31
32
verbose_level:: Int64
32
33
end
33
34
@@ -152,6 +153,7 @@ open(filename::AbstractString;
152
153
include_columns::Vector = [],
153
154
exclude_columns::Vector = [],
154
155
string_array_fn::Dict = Dict(),
156
+ number_array_fn::Dict = Dict(),
155
157
verbose_level::Int64 = 1)
156
158
157
159
Open a SAS7BDAT data file. Returns a `SASLib.Handler` object that can be used in
@@ -163,9 +165,10 @@ function open(filename::AbstractString;
163
165
include_columns:: Vector = [],
164
166
exclude_columns:: Vector = [],
165
167
string_array_fn:: Dict = Dict (),
168
+ number_array_fn:: Dict = Dict (),
166
169
verbose_level:: Int64 = 1 )
167
170
return _open (ReaderConfig (filename, encoding, default_chunk_size, convert_dates,
168
- include_columns, exclude_columns, string_array_fn, verbose_level))
171
+ include_columns, exclude_columns, string_array_fn, number_array_fn, verbose_level))
169
172
end
170
173
171
174
"""
@@ -205,6 +208,7 @@ readsas(filename::AbstractString;
205
208
include_columns::Vector = [],
206
209
exclude_columns::Vector = [],
207
210
string_array_fn::Dict = Dict(),
211
+ number_array_fn::Dict = Dict(),
208
212
verbose_level::Int64 = 1)
209
213
210
214
Read a SAS7BDAT file.
@@ -239,6 +243,10 @@ For examples,
239
243
or
240
244
`string_array_fn = Dict(:column1 => REGULAR_STR_ARRAY)`.
241
245
246
+ For numeric columns, you may specify your own array constructors using
247
+ the `number_array_fn` parameter. Perhaps you have a different kind of
248
+ array to store the values e.g. SharedArray.
249
+
242
250
For debugging purpose, `verbose_level` may be set to a value higher than 1.
243
251
Verbose level 0 will output nothing to the console, essentially a total quiet
244
252
option.
@@ -249,11 +257,12 @@ function readsas(filename::AbstractString;
249
257
include_columns:: Vector = [],
250
258
exclude_columns:: Vector = [],
251
259
string_array_fn:: Dict = Dict (),
260
+ number_array_fn:: Dict = Dict (),
252
261
verbose_level:: Int64 = 1 )
253
262
handler = nothing
254
263
try
255
264
handler = _open (ReaderConfig (filename, encoding, default_chunk_size, convert_dates,
256
- include_columns, exclude_columns, string_array_fn, verbose_level))
265
+ include_columns, exclude_columns, string_array_fn, number_array_fn, verbose_level))
257
266
return read (handler)
258
267
finally
259
268
isdefined (handler, :string_decoder ) && Base. close (handler. string_decoder)
@@ -982,7 +991,7 @@ function read_chunk(handler, nrows=0)
982
991
perf_read_data = toq ()
983
992
984
993
tic ()
985
- rslt = _chunk_to_dataframe (handler)
994
+ rslt = _chunk_to_dataframe (handler, nrows )
986
995
perf_chunk_to_data_frame = toq ()
987
996
988
997
# here column symbols contains only ones for columns that are actually read
@@ -1015,7 +1024,8 @@ function read_chunk(handler, nrows=0)
1015
1024
:column_info => column_info,
1016
1025
:compression => compressionstring (handler),
1017
1026
:perf_read_data => perf_read_data,
1018
- :perf_type_conversion => perf_chunk_to_data_frame
1027
+ :perf_type_conversion => perf_chunk_to_data_frame,
1028
+ :process_id => myid ()
1019
1029
)
1020
1030
end
1021
1031
@@ -1039,6 +1049,17 @@ function createstrarray(handler, column_symbol, nrows)
1039
1049
end
1040
1050
end
1041
1051
1052
+ # create numeric array
1053
+ function createnumarray (handler, column_symbol, nrows)
1054
+ if haskey (handler. config. number_array_fn, column_symbol)
1055
+ handler. config. number_array_fn[column_symbol](nrows)
1056
+ elseif haskey (handler. config. number_array_fn, :_all_ )
1057
+ handler. config. number_array_fn[:_all_ ](nrows)
1058
+ else
1059
+ zeros (Float64, nrows)
1060
+ end
1061
+ end
1062
+
1042
1063
function nullresult (filename)
1043
1064
Dict (
1044
1065
:data => Dict (),
@@ -1121,7 +1142,7 @@ end
1121
1142
# Construct Dict object that holds the columns.
1122
1143
# For date or datetime columns, convert from numeric value to Date/DateTime type column.
1123
1144
# The resulting dictionary uses column symbols as the key.
1124
- function _chunk_to_dataframe (handler)
1145
+ function _chunk_to_dataframe (handler, nrows )
1125
1146
# println("IN: _chunk_to_dataframe")
1126
1147
1127
1148
n = handler. current_row_in_chunk_index
@@ -1137,7 +1158,8 @@ function _chunk_to_dataframe(handler)
1137
1158
# if j == 1 && length(bytes) < 100 #debug only
1138
1159
# println(" bytes=$bytes")
1139
1160
# end
1140
- values = convertfloat64f (bytes, handler. file_endianness)
1161
+ values = createnumarray (handler, name, nrows)
1162
+ convertfloat64f! (values, bytes, handler. file_endianness)
1141
1163
# println(length(bytes))
1142
1164
# rslt[name] = bswap(rslt[name])
1143
1165
rslt[name] = values
@@ -1149,6 +1171,7 @@ function _chunk_to_dataframe(handler)
1149
1171
rslt[name] = datetime_from_float (rslt[name])
1150
1172
end
1151
1173
end
1174
+
1152
1175
elseif ty == column_type_string
1153
1176
# println(" String: size=$(size(handler.string_chunk))")
1154
1177
# println(" String: column $j, name $name, size=$(size(handler.string_chunk[js, :]))")
@@ -1714,6 +1737,7 @@ function Base.show(io::IO, h::Handler)
1714
1737
println (io, " page size: $(h. page_length) " )
1715
1738
println (io, " pages: $(h. page_count) " )
1716
1739
println (io, " rows: $(h. row_count) " )
1740
+ println (io, " cols: $(h. column_count) " )
1717
1741
end
1718
1742
1719
1743
0 commit comments