Skip to content

Commit

Permalink
Bump Polars 0.37 (#861)
Browse files Browse the repository at this point in the history
  • Loading branch information
lkarthee authored Feb 25, 2024
1 parent 63bff40 commit 7f168f5
Show file tree
Hide file tree
Showing 16 changed files with 506 additions and 275 deletions.
1 change: 0 additions & 1 deletion datasets/iris.csv
Original file line number Diff line number Diff line change
Expand Up @@ -149,4 +149,3 @@ sepal_length,sepal_width,petal_length,petal_width,species
6.5,3.0,5.2,2.0,Iris-virginica
6.2,3.4,5.4,2.3,Iris-virginica
5.9,3.0,5.1,1.8,Iris-virginica

7 changes: 6 additions & 1 deletion lib/explorer/backend/lazy_series.ex
Original file line number Diff line number Diff line change
Expand Up @@ -606,7 +606,12 @@ defmodule Explorer.Backend.LazySeries do

@impl true
def format(list) do
series_list = Enum.map(list, &series_or_lazy_series!/1)
series_list =
Enum.map(list, fn
s when is_binary(s) -> s
s -> series_or_lazy_series!(s)
end)

data = new(:format, [series_list], :string, aggregations?(series_list))

Backend.Series.new(data, :string)
Expand Down
1 change: 0 additions & 1 deletion lib/explorer/polars_backend/native.ex
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,6 @@ defmodule Explorer.PolarsBackend.Native do
def s_fill_missing_with_atom(_s, _value), do: err()
def s_fill_missing_with_date(_s, _value), do: err()
def s_fill_missing_with_datetime(_s, _value), do: err()
def s_format(_series_list), do: err()
def s_greater(_s, _rhs), do: err()
def s_greater_equal(_s, _rhs), do: err()
def s_head(_s, _length), do: err()
Expand Down
22 changes: 19 additions & 3 deletions lib/explorer/polars_backend/series.ex
Original file line number Diff line number Diff line change
Expand Up @@ -132,10 +132,26 @@ defmodule Explorer.PolarsBackend.Series do

@impl true
def format(list) do
polars_series = for s <- list, do: s.data
{_, df_args, params} =
Enum.reduce(list, {0, [], []}, fn s, {counter, df_args, params} ->
if is_binary(s) or Kernel.is_nil(s) do
{counter, df_args, [s | params]}
else
counter = counter + 1
name = "#{counter}"
column = Explorer.Backend.LazySeries.new(:column, [name], :string)
{counter, [{name, s} | df_args], [column | params]}
end
end)

Shared.apply(:s_format, [polars_series])
|> Shared.create_series()
df = Explorer.PolarsBackend.DataFrame.from_series(df_args)
format_expr = Explorer.Backend.LazySeries.new(:format, [Enum.reverse(params)], :string)
out_dtypes = Map.put(df.dtypes, "result", :string)
out_names = ["result" | df.names]
out_df = %{df | dtypes: out_dtypes, names: out_names}

Explorer.PolarsBackend.DataFrame.mutate_with(df, out_df, [{"result", format_expr}])
|> Explorer.PolarsBackend.DataFrame.pull("result")
end

@impl true
Expand Down
35 changes: 28 additions & 7 deletions lib/explorer/series.ex
Original file line number Diff line number Diff line change
Expand Up @@ -1274,13 +1274,27 @@ defmodule Explorer.Series do
"""
@doc type: :element_wise
def categorise(%Series{dtype: l_dtype} = series, %Series{dtype: dtype} = categories)
when K.and(K.in(l_dtype, [:string | @integer_types]), K.in(dtype, [:string, :category])),
def categorise(%Series{dtype: l_dtype} = series, %Series{dtype: :category} = categories)
when K.in(l_dtype, [:string | @integer_types]),
do: apply_series(series, :categorise, [categories])

def categorise(%Series{dtype: l_dtype} = series, %Series{dtype: :string} = categories)
when K.in(l_dtype, [:string | @integer_types]) do
if nil_count(categories) != 0 do
raise(ArgumentError, "categories as strings cannot have nil values")
end

if count(categories) != n_distinct(categories) do
raise(ArgumentError, "categories as strings cannot have duplicated values")
end

categories = cast(categories, :category)
apply_series(series, :categorise, [categories])
end

def categorise(%Series{dtype: l_dtype} = series, [head | _] = categories)
when K.and(K.in(l_dtype, [:string | @integer_types]), is_binary(head)),
do: apply_series(series, :categorise, [from_list(categories, dtype: :string)])
do: categorise(series, from_list(categories, dtype: :string))

# Slice and dice

Expand Down Expand Up @@ -2086,13 +2100,20 @@ defmodule Explorer.Series do
iex> s1 = Explorer.Series.from_list([<<1>>, <<239, 191, 19>>], dtype: :binary)
iex> s2 = Explorer.Series.from_list([<<3>>, <<4>>], dtype: :binary)
iex> Explorer.Series.format([s1, s2])
** (RuntimeError) Polars Error: invalid utf-8 sequence
** (RuntimeError) Polars Error: invalid utf8
"""
@doc type: :shape
@spec format([Series.t() | String.t()]) :: Series.t()
def format([_ | _] = list) do
list = cast_to_string(list)
impl!(list).format(list)

if impl = impl!(list) do
impl.format(list)
else
[hd | rest] = list
s = Series.from_list([hd], dtype: :string)
impl!([s]).format([s | rest])
end
end

defp cast_to_string(list) do
Expand All @@ -2103,8 +2124,8 @@ defmodule Explorer.Series do
%Series{} = s ->
cast(s, :string)

value when is_binary(value) ->
from_list([value], dtype: :string)
value when K.or(is_binary(value), K.is_nil(value)) ->
value

other ->
raise ArgumentError,
Expand Down
Loading

0 comments on commit 7f168f5

Please sign in to comment.