Skip to content

Commit

Permalink
feat: add column check for pairs
Browse files Browse the repository at this point in the history
The root cause of the issue was that the column was renamed, and then
couldn't be found within the Polars data frame. This is a fairly naive
solution, and I'm open to ideas about how it might be improved.
  • Loading branch information
pcapel committed Feb 7, 2024
1 parent 3080d79 commit ce0c9ca
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 1 deletion.
10 changes: 10 additions & 0 deletions lib/explorer/data_frame.ex
Original file line number Diff line number Diff line change
Expand Up @@ -3609,6 +3609,7 @@ defmodule Explorer.DataFrame do
df

pairs ->
maybe_raise_column_duplicate(pairs)
pairs_map = Map.new(pairs)
old_dtypes = df.dtypes

Expand All @@ -3633,6 +3634,15 @@ defmodule Explorer.DataFrame do
end
end

defp maybe_raise_column_duplicate(pairs) when is_column_pairs(pairs) do
Enum.reduce(pairs, MapSet.new(), fn {col, _val}, seen ->
case col in seen do
true -> raise ArgumentError, "duplicate column name \"#{col}\" in rename"
false -> MapSet.put(seen, col)
end
end)
end

defp check_new_names_length!(df, names) do
width = n_columns(df)
n_new_names = length(names)
Expand Down
2 changes: 1 addition & 1 deletion test/explorer/data_frame_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -2766,7 +2766,7 @@ defmodule Explorer.DataFrameTest do
test "with keyword and a column that is duplicated" do
df = DF.new(a: [1, 2, 3], b: ["a", "b", "c"])

assert_raise ArgumentError, ~r"duplicate column name \"g\"", fn ->
assert_raise ArgumentError, ~r"duplicate column name \"a\"", fn ->
DF.rename(df, a: "first", a: "second")
end
end
Expand Down

0 comments on commit ce0c9ca

Please sign in to comment.