@@ -12,6 +12,7 @@ import Base: show, size
12
12
include (" constants.jl" )
13
13
include (" utils.jl" )
14
14
include (" ObjectPool.jl" )
15
+ include (" CIDict.jl" )
15
16
include (" Types.jl" )
16
17
include (" ResultSet.jl" )
17
18
include (" Metadata.jl" )
@@ -32,6 +33,7 @@ function _open(config::ReaderConfig)
32
33
handler. current_page = 0
33
34
_get_properties (handler)
34
35
_parse_metadata (handler)
36
+ _post_metadata_handler (handler)
35
37
return handler
36
38
end
37
39
@@ -43,6 +45,7 @@ open(filename::AbstractString;
43
45
exclude_columns::Vector = [],
44
46
string_array_fn::Dict = Dict(),
45
47
number_array_fn::Dict = Dict(),
48
+ column_types::Dict = Dict{Symbol,Type}(),
46
49
verbose_level::Int64 = 1)
47
50
48
51
Open a SAS7BDAT data file. Returns a `SASLib.Handler` object that can be used in
@@ -55,9 +58,11 @@ function open(filename::AbstractString;
55
58
exclude_columns:: Vector = [],
56
59
string_array_fn:: Dict = Dict (),
57
60
number_array_fn:: Dict = Dict (),
61
+ column_types:: Dict = Dict {Symbol,Type} (),
58
62
verbose_level:: Int64 = 1 )
59
63
return _open (ReaderConfig (filename, encoding, default_chunk_size, convert_dates,
60
- include_columns, exclude_columns, string_array_fn, number_array_fn, verbose_level))
64
+ include_columns, exclude_columns, string_array_fn, number_array_fn,
65
+ column_types, verbose_level))
61
66
end
62
67
63
68
"""
@@ -97,6 +102,7 @@ readsas(filename::AbstractString;
97
102
exclude_columns::Vector = [],
98
103
string_array_fn::Dict = Dict(),
99
104
number_array_fn::Dict = Dict(),
105
+ column_types::Dict = Dict{Symbol,Type}(),
100
106
verbose_level::Int64 = 1)
101
107
102
108
Read a SAS7BDAT file.
@@ -135,6 +141,9 @@ For numeric columns, you may specify your own array constructors using
135
141
the `number_array_fn` parameter. Perhaps you have a different kind of
136
142
array to store the values e.g. SharedArray.
137
143
144
+ Specify `column_type` argument if any conversion is required. It should
145
+ be a Dict, mapping column symbol to a data type.
146
+
138
147
For debugging purpose, `verbose_level` may be set to a value higher than 1.
139
148
Verbose level 0 will output nothing to the console, essentially a total quiet
140
149
option.
@@ -146,11 +155,13 @@ function readsas(filename::AbstractString;
146
155
exclude_columns:: Vector = [],
147
156
string_array_fn:: Dict = Dict (),
148
157
number_array_fn:: Dict = Dict (),
158
+ column_types:: Dict = Dict {Symbol,Type} (),
149
159
verbose_level:: Int64 = 1 )
150
160
handler = nothing
151
161
try
152
162
handler = _open (ReaderConfig (filename, encoding, default_chunk_size, convert_dates,
153
- include_columns, exclude_columns, string_array_fn, number_array_fn, verbose_level))
163
+ include_columns, exclude_columns, string_array_fn, number_array_fn,
164
+ column_types, verbose_level))
154
165
return read (handler)
155
166
finally
156
167
isdefined (handler, :string_decoder ) && Base. close (handler. string_decoder)
@@ -390,6 +401,20 @@ function _parse_metadata(handler)
390
401
end
391
402
end
392
403
404
+ # Do this after finish reading metadata but before reading data
405
+ function _post_metadata_handler (handler)
406
+
407
+ # save a copy of column types in a case insensitive dict
408
+ handler. column_types_dict = CIDict {Symbol,Type} (handler. config. column_types)
409
+
410
+ # check column_types
411
+ for k in keys (handler. config. column_types)
412
+ if ! case_insensitive_in (k, handler. column_symbols)
413
+ Compat. @warn (" Unknown column symbol ($k ) in column_types. Ignored." )
414
+ end
415
+ end
416
+ end
417
+
393
418
function _process_page_meta (handler)
394
419
# println3(handler, "IN: _process_page_meta")
395
420
_read_page_header (handler)
@@ -1006,7 +1031,7 @@ function _chunk_to_dataframe(handler, nrows)
1006
1031
rslt[name] = datetime_from_float (rslt[name])
1007
1032
end
1008
1033
end
1009
-
1034
+ convert_column_type_if_needed! (handler, rslt, name)
1010
1035
elseif ty == column_type_string
1011
1036
# println(" String: size=$(size(handler.string_chunk))")
1012
1037
# println(" String: column $j, name $name, size=$(size(handler.string_chunk[js, :]))")
@@ -1018,6 +1043,21 @@ function _chunk_to_dataframe(handler, nrows)
1018
1043
return rslt
1019
1044
end
1020
1045
1046
+ # If the user specified a type for the column, try to convert the column data.
1047
+ function convert_column_type_if_needed! (handler, rslt, name)
1048
+ if haskey (handler. column_types_dict, name)
1049
+ type_wanted = handler. column_types_dict[name]
1050
+ # println("$name exists in config.column_types, type_wanted=$type_wanted")
1051
+ if type_wanted != Float64
1052
+ try
1053
+ rslt[name] = convert (Vector{type_wanted}, rslt[name])
1054
+ catch ex
1055
+ Compat. @warn (" Unable to convert column to type $type_wanted , error=$ex " )
1056
+ end
1057
+ end
1058
+ end
1059
+ end
1060
+
1021
1061
# Simple loop that reads data row-by-row.
1022
1062
function read_data (handler, nrows)
1023
1063
# println("IN: read_data, nrows=$nrows")
0 commit comments