Skip to content

Commit

Permalink
Update data_frame.ex
Browse files Browse the repository at this point in the history
  • Loading branch information
lkarthee committed Feb 13, 2024
1 parent 1a8fc05 commit 1c951ad
Showing 1 changed file with 76 additions and 89 deletions.
165 changes: 76 additions & 89 deletions lib/explorer/data_frame.ex
Original file line number Diff line number Diff line change
Expand Up @@ -2800,118 +2800,105 @@ defmodule Explorer.DataFrame do

result = fun.(ldf)

column_pairs =
to_column_pairs(df, result, fn value ->
case value do
%Series{data: %LazySeries{}} = lazy_series ->
lazy_series

%Series{data: _other} ->
raise ArgumentError,
"expecting a lazy series. Consider using `Explorer.DataFrame.put/3` " <>
"to add eager series to your dataframe."

list when is_list(list) ->
raise ArgumentError,
"expecting a lazy series or scalar value, but instead got a list. " <>
"consider using `Explorer.Series.from_list/2` to create a `Series`, " <>
"and then `Explorer.DataFrame.put/3` to add the series to your dataframe."

nil ->
lazy_s = LazySeries.new(:lazy, [nil], :null)
Explorer.Backend.Series.new(lazy_s, :null)

number when is_number(number) ->
dtype = if is_integer(number), do: {:s, 64}, else: {:f, 64}
lazy_s = LazySeries.new(:lazy, [number], dtype)

Explorer.Backend.Series.new(lazy_s, dtype)

string when is_binary(string) ->
lazy_s = LazySeries.new(:lazy, [string], :string)

Explorer.Backend.Series.new(lazy_s, :string)

boolean when is_boolean(boolean) ->
lazy_s = LazySeries.new(:lazy, [boolean], :boolean)

Explorer.Backend.Series.new(lazy_s, :boolean)
column_pairs = to_column_pairs(df, result, &value!/1)

date = %Date{} ->
lazy_s = LazySeries.new(:lazy, [date], :date)
new_dtypes =
for {column_name, series} <- column_pairs, into: %{} do
{column_name, series.dtype}
end

Explorer.Backend.Series.new(lazy_s, :date)
mut_names = Enum.map(column_pairs, &elem(&1, 0))
new_names = Enum.uniq(df.names ++ mut_names)

datetime = %NaiveDateTime{} ->
lazy_s = LazySeries.new(:lazy, [datetime], {:datetime, :nanosecond})
df_out = %{df | names: new_names, dtypes: Map.merge(df.dtypes, new_dtypes)}

Explorer.Backend.Series.new(lazy_s, {:datetime, :nanosecond})
column_pairs = for {name, %Series{data: lazy_series}} <- column_pairs, do: {name, lazy_series}

duration = %Explorer.Duration{precision: precision} ->
lazy_s = LazySeries.new(:lazy, [duration], {:duration, precision})
Shared.apply_impl(df, :mutate_with, [df_out, column_pairs])
end

Explorer.Backend.Series.new(lazy_s, {:duration, precision})
defp value!(%Series{data: %LazySeries{}} = lazy_series) do
lazy_series
end

map = %{} when not is_struct(map) ->
{series_list, dtype_list} =
Enum.reduce(map, {[], []}, fn {name, series_or_scalars}, {sl, dl} ->
series =
case series_or_scalars do
%Series{} ->
series_or_scalars.data
defp value!(%Series{data: _other}) do
raise ArgumentError,
"expecting a lazy series. Consider using `Explorer.DataFrame.put/3` " <>
"to add eager series to your dataframe."
end

nil ->
LazySeries.new(:lazy, [nil], :null)
defp value!(list) when is_list(list) do
map =
Enum.reduce(list, %{}, fn
%Series{data: %LazySeries{op: :column}} = s, acc ->
Map.put(acc, s.name, s)

number when is_number(number) ->
dtype = if is_integer(number), do: {:s, 64}, else: {:f, 64}
LazySeries.new(:lazy, [number], dtype)
{name, %Series{} = s}, acc when is_atom(name) or is_binary(name) ->
Map.put(acc, name, s)

string when is_binary(string) ->
LazySeries.new(:lazy, [string], :string)
_, _ ->
raise ArgumentError,
"expecting list elements to be columns. Consider using %{a: a, ...} instead of list [a, ...]"
end)

boolean when is_boolean(boolean) ->
LazySeries.new(:lazy, [boolean], :boolean)
if map do
value!(map)
else
raise ArgumentError,
"expecting a lazy series or scalar value, but instead got a list. " <>
"consider using `Explorer.Series.from_list/2` to create a `Series`, " <>
"and then `Explorer.DataFrame.put/3` to add the series to your dataframe."
end
end

date = %Date{} ->
LazySeries.new(:lazy, [date], :date)
defp value!(scalar) do
lazy_s = lazy_series!(scalar)
Explorer.Backend.Series.new(lazy_s, lazy_s.dtype)
end

datetime = %NaiveDateTime{} ->
LazySeries.new(:lazy, [datetime], {:datetime, :nanosecond})
defp lazy_series!(scalar) do
case scalar do
%Series{data: %LazySeries{}} = series ->
series.data

duration = %Explorer.Duration{precision: precision} ->
LazySeries.new(:lazy, [duration], {:duration, precision})
end
nil ->
LazySeries.new(:lazy, [nil], :null)

name = if is_atom(name), do: Atom.to_string(name), else: name
{[{name, series} | sl], [{name, series.dtype} | dl]}
end)
number when is_number(number) ->
dtype = if is_integer(number), do: {:s, 64}, else: {:f, 64}
LazySeries.new(:lazy, [number], dtype)

map = Enum.into(series_list, %{})
dtype_list = Enum.sort(dtype_list)
string when is_binary(string) ->
LazySeries.new(:lazy, [string], :string)

lazy_s = LazySeries.new(:lazy, [map], {:struct, dtype_list})
Explorer.Backend.Series.new(lazy_s, {:struct, dtype_list})
boolean when is_boolean(boolean) ->
LazySeries.new(:lazy, [boolean], :boolean)

other ->
raise ArgumentError,
"expecting a lazy series or scalar value, but instead got #{inspect(other)}"
end
end)
date = %Date{} ->
LazySeries.new(:lazy, [date], :date)

new_dtypes =
for {column_name, series} <- column_pairs, into: %{} do
{column_name, series.dtype}
end
datetime = %NaiveDateTime{} ->
LazySeries.new(:lazy, [datetime], {:datetime, :nanosecond})

mut_names = Enum.map(column_pairs, &elem(&1, 0))
new_names = Enum.uniq(df.names ++ mut_names)
duration = %Explorer.Duration{precision: precision} ->
LazySeries.new(:lazy, [duration], {:duration, precision})

df_out = %{df | names: new_names, dtypes: Map.merge(df.dtypes, new_dtypes)}
map = %{} when not is_struct(map) ->
{series_list, dtype_list} =
Enum.reduce(map, {[], []}, fn {name, series}, {sl, dl} ->
lazy_series = lazy_series!(series)
name = if is_atom(name), do: Atom.to_string(name), else: name
{[{name, lazy_series} | sl], [{name, lazy_series.dtype} | dl]}
end)

column_pairs = for {name, %Series{data: lazy_series}} <- column_pairs, do: {name, lazy_series}
map = Enum.into(series_list, %{})
dtype_list = Enum.sort(dtype_list)
LazySeries.new(:lazy, [map], {:struct, dtype_list})

Shared.apply_impl(df, :mutate_with, [df_out, column_pairs])
other ->
raise ArgumentError,
"expecting a lazy series or scalar value, but instead got #{inspect(other)}"
end
end

@doc """
Expand Down

0 comments on commit 1c951ad

Please sign in to comment.