diff --git a/lib/explorer/data_frame.ex b/lib/explorer/data_frame.ex index 05186b69a..b65ae31a5 100644 --- a/lib/explorer/data_frame.ex +++ b/lib/explorer/data_frame.ex @@ -2800,118 +2800,105 @@ defmodule Explorer.DataFrame do result = fun.(ldf) - column_pairs = - to_column_pairs(df, result, fn value -> - case value do - %Series{data: %LazySeries{}} = lazy_series -> - lazy_series - - %Series{data: _other} -> - raise ArgumentError, - "expecting a lazy series. Consider using `Explorer.DataFrame.put/3` " <> - "to add eager series to your dataframe." - - list when is_list(list) -> - raise ArgumentError, - "expecting a lazy series or scalar value, but instead got a list. " <> - "consider using `Explorer.Series.from_list/2` to create a `Series`, " <> - "and then `Explorer.DataFrame.put/3` to add the series to your dataframe." - - nil -> - lazy_s = LazySeries.new(:lazy, [nil], :null) - Explorer.Backend.Series.new(lazy_s, :null) - - number when is_number(number) -> - dtype = if is_integer(number), do: {:s, 64}, else: {:f, 64} - lazy_s = LazySeries.new(:lazy, [number], dtype) - - Explorer.Backend.Series.new(lazy_s, dtype) - - string when is_binary(string) -> - lazy_s = LazySeries.new(:lazy, [string], :string) - - Explorer.Backend.Series.new(lazy_s, :string) - - boolean when is_boolean(boolean) -> - lazy_s = LazySeries.new(:lazy, [boolean], :boolean) - - Explorer.Backend.Series.new(lazy_s, :boolean) + column_pairs = to_column_pairs(df, result, &value!/1) - date = %Date{} -> - lazy_s = LazySeries.new(:lazy, [date], :date) + new_dtypes = + for {column_name, series} <- column_pairs, into: %{} do + {column_name, series.dtype} + end - Explorer.Backend.Series.new(lazy_s, :date) + mut_names = Enum.map(column_pairs, &elem(&1, 0)) + new_names = Enum.uniq(df.names ++ mut_names) - datetime = %NaiveDateTime{} -> - lazy_s = LazySeries.new(:lazy, [datetime], {:datetime, :nanosecond}) + df_out = %{df | names: new_names, dtypes: Map.merge(df.dtypes, new_dtypes)} - Explorer.Backend.Series.new(lazy_s, {:datetime, :nanosecond}) + column_pairs = for {name, %Series{data: lazy_series}} <- column_pairs, do: {name, lazy_series} - duration = %Explorer.Duration{precision: precision} -> - lazy_s = LazySeries.new(:lazy, [duration], {:duration, precision}) + Shared.apply_impl(df, :mutate_with, [df_out, column_pairs]) + end - Explorer.Backend.Series.new(lazy_s, {:duration, precision}) + defp value!(%Series{data: %LazySeries{}} = lazy_series) do + lazy_series + end - map = %{} when not is_struct(map) -> - {series_list, dtype_list} = - Enum.reduce(map, {[], []}, fn {name, series_or_scalars}, {sl, dl} -> - series = - case series_or_scalars do - %Series{} -> - series_or_scalars.data + defp value!(%Series{data: _other}) do + raise ArgumentError, + "expecting a lazy series. Consider using `Explorer.DataFrame.put/3` " <> + "to add eager series to your dataframe." + end - nil -> - LazySeries.new(:lazy, [nil], :null) + defp value!(list) when is_list(list) do + map = + Enum.reduce(list, %{}, fn + %Series{data: %LazySeries{op: :column}} = s, acc -> + Map.put(acc, s.name, s) - number when is_number(number) -> - dtype = if is_integer(number), do: {:s, 64}, else: {:f, 64} - LazySeries.new(:lazy, [number], dtype) + {name, %Series{} = s}, acc when is_atom(name) or is_binary(name) -> + Map.put(acc, name, s) - string when is_binary(string) -> - LazySeries.new(:lazy, [string], :string) + _, _ -> + raise ArgumentError, + "expecting list elements to be columns. Consider using %{a: a, ...} instead of list [a, ...]" + end) - boolean when is_boolean(boolean) -> - LazySeries.new(:lazy, [boolean], :boolean) + if map do + value!(map) + else + raise ArgumentError, + "expecting a lazy series or scalar value, but instead got a list. " <> + "consider using `Explorer.Series.from_list/2` to create a `Series`, " <> + "and then `Explorer.DataFrame.put/3` to add the series to your dataframe." + end + end - date = %Date{} -> - LazySeries.new(:lazy, [date], :date) + defp value!(scalar) do + lazy_s = lazy_series!(scalar) + Explorer.Backend.Series.new(lazy_s, lazy_s.dtype) + end - datetime = %NaiveDateTime{} -> - LazySeries.new(:lazy, [datetime], {:datetime, :nanosecond}) + defp lazy_series!(scalar) do + case scalar do + %Series{data: %LazySeries{}} = series -> + series.data - duration = %Explorer.Duration{precision: precision} -> - LazySeries.new(:lazy, [duration], {:duration, precision}) - end + nil -> + LazySeries.new(:lazy, [nil], :null) - name = if is_atom(name), do: Atom.to_string(name), else: name - {[{name, series} | sl], [{name, series.dtype} | dl]} - end) + number when is_number(number) -> + dtype = if is_integer(number), do: {:s, 64}, else: {:f, 64} + LazySeries.new(:lazy, [number], dtype) - map = Enum.into(series_list, %{}) - dtype_list = Enum.sort(dtype_list) + string when is_binary(string) -> + LazySeries.new(:lazy, [string], :string) - lazy_s = LazySeries.new(:lazy, [map], {:struct, dtype_list}) - Explorer.Backend.Series.new(lazy_s, {:struct, dtype_list}) + boolean when is_boolean(boolean) -> + LazySeries.new(:lazy, [boolean], :boolean) - other -> - raise ArgumentError, - "expecting a lazy series or scalar value, but instead got #{inspect(other)}" - end - end) + date = %Date{} -> + LazySeries.new(:lazy, [date], :date) - new_dtypes = - for {column_name, series} <- column_pairs, into: %{} do - {column_name, series.dtype} - end + datetime = %NaiveDateTime{} -> + LazySeries.new(:lazy, [datetime], {:datetime, :nanosecond}) - mut_names = Enum.map(column_pairs, &elem(&1, 0)) - new_names = Enum.uniq(df.names ++ mut_names) + duration = %Explorer.Duration{precision: precision} -> + LazySeries.new(:lazy, [duration], {:duration, precision}) - df_out = %{df | names: new_names, dtypes: Map.merge(df.dtypes, new_dtypes)} + map = %{} when not is_struct(map) -> + {series_list, dtype_list} = + Enum.reduce(map, {[], []}, fn {name, series}, {sl, dl} -> + lazy_series = lazy_series!(series) + name = if is_atom(name), do: Atom.to_string(name), else: name + {[{name, lazy_series} | sl], [{name, lazy_series.dtype} | dl]} + end) - column_pairs = for {name, %Series{data: lazy_series}} <- column_pairs, do: {name, lazy_series} + map = Enum.into(series_list, %{}) + dtype_list = Enum.sort(dtype_list) + LazySeries.new(:lazy, [map], {:struct, dtype_list}) - Shared.apply_impl(df, :mutate_with, [df_out, column_pairs]) + other -> + raise ArgumentError, + "expecting a lazy series or scalar value, but instead got #{inspect(other)}" + end end @doc """