Skip to content

Commit bc39af3

Browse files
authored
Merge pull request #25 from tk3369/tk/caseinsensitivecol
Case-insensitive match for column symbols in include/exclude_columns argument
2 parents 67696cd + 0dd9424 commit bc39af3

File tree

3 files changed

+56
-2
lines changed

3 files changed

+56
-2
lines changed

README.md

+21
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,8 @@ julia> head(df,5)
126126
127127
### Inclusion/Exclusion of Columns
128128
129+
**Column Inclusion**
130+
129131
It is always faster to read only the columns that you need. The `include_columns` argument comes in handy:
130132
131133
```
@@ -142,6 +144,8 @@ Read data set of size 1440 x 4 in 0.004 seconds
142144
6948.01993-06-01 │ SOFA │ 1993.0
143145
```
144146
147+
**Column Exclusion**
148+
145149
Likewise, you can read all columns except the ones you don't want as specified in `exclude_columns` argument:
146150
147151
```
@@ -158,6 +162,23 @@ Read data set of size 1440 x 6 in 0.031 seconds
158162
6 │ CANADA │ EDUCATION │ 486.0 │ FURNITURE │ 2.0 │ EAST │
159163
```
160164
165+
**Case Sensitivity and Column Number**
166+
167+
Column symbols are matched in a case insensitive manner with SAS column names.
168+
169+
Both `include_columns` and `exclude_columns` accept column number. In fact, you can mixed column symbols and column numbers as such:
170+
171+
```
172+
julia> readsas("productsales.sas7bdat", include_columns=[:actual, :predict, 8, 9, 10])[:column_names]
173+
Read productsales.sas7bdat with size 1440 x 5 in 0.00511 seconds
174+
5-element Array{String,1}:
175+
"ACTUAL"
176+
"PREDICT"
177+
"QUARTER"
178+
"YEAR"
179+
"MONTH"
180+
```
181+
161182
### Incremental Reading
162183
163184
If you need to read files incrementally, you can do so as such:

src/SASLib.jl

+25-2
Original file line numberDiff line numberDiff line change
@@ -1691,26 +1691,49 @@ logdebug = println
16911691
# return d
16921692
# end
16931693

1694+
# case insensitive column mapping
1695+
Base.lowercase(s::Symbol) = Symbol(lowercase(String(s)))
1696+
case_insensitive_in(s::Symbol, ar::AbstractArray) =
1697+
lowercase(s) in [x isa Symbol ? lowercase(x) : x for x in ar]
1698+
16941699
# fill column indices as a dictionary (key = column index, value = column symbol)
16951700
function _fill_column_indices(handler)
16961701
handler.column_indices = Vector{Tuple{Int64, Symbol, UInt8}}()
16971702
inflag = length(handler.config.include_columns) > 0
16981703
exflag = length(handler.config.exclude_columns) > 0
16991704
inflag && exflag && throw(ConfigError("You can specify either include_columns or exclude_columns but not both."))
1705+
processed = []
17001706
for j in 1:length(handler.column_symbols)
17011707
name = handler.column_symbols[j]
17021708
if inflag
1703-
if j in handler.config.include_columns || name in handler.config.include_columns
1709+
if j in handler.config.include_columns ||
1710+
case_insensitive_in(name, handler.config.include_columns)
17041711
push!(handler.column_indices, (j, name, handler.column_types[j]))
1712+
push!(processed, lowercase(name))
17051713
end
17061714
elseif exflag
1707-
if !(j in handler.config.exclude_columns || name in handler.config.exclude_columns)
1715+
if !(j in handler.config.exclude_columns ||
1716+
case_insensitive_in(name, handler.config.exclude_columns))
17081717
push!(handler.column_indices, (j, name, handler.column_types[j]))
1718+
else
1719+
push!(processed, lowercase(name))
17091720
end
17101721
else
17111722
push!(handler.column_indices, (j, name, handler.column_types[j]))
17121723
end
17131724
end
1725+
if inflag && length(processed) != length(handler.config.include_columns)
1726+
diff = setdiff(handler.config.include_columns, processed)
1727+
for c in diff
1728+
warn("Unknown include column $c")
1729+
end
1730+
end
1731+
if exflag && length(processed) != length(handler.config.exclude_columns)
1732+
diff = setdiff(handler.config.exclude_columns, processed)
1733+
for c in diff
1734+
warn("Unknown exclude column $c")
1735+
end
1736+
end
17141737
# println2(handler, "column_indices = $(handler.column_indices)")
17151738
end
17161739

test/runtests.jl

+10
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,16 @@ openfile(dir, file; kwargs...) = SASLib.open(getpath(dir, file), kwargs...)
113113
@test result[:ncols] == 1
114114
@test sort(result[:column_symbols]) == sort([:ACTUAL])
115115

116+
# case insensitive include/exclude
117+
result = readsas(fname, include_columns=[:month, :Year])
118+
@test result[:ncols] == 2
119+
result = readsas(fname, exclude_columns=[:diVisiON])
120+
@test result[:ncols] == 9
121+
122+
# bad include/exclude param
123+
@test_warn "Unknown include column" readsas(fname, include_columns=[:blah, :Year])
124+
@test_warn "Unknown exclude column" readsas(fname, exclude_columns=[:blah, :Year])
125+
116126
# error handling
117127
@test_throws SASLib.ConfigError readsas(fname,
118128
include_columns=[1], exclude_columns=[1])

0 commit comments

Comments
 (0)