Skip to content

Commit

Permalink
tests, document error
Browse files Browse the repository at this point in the history
Update expressions.rs

add tests

Update series.ex
  • Loading branch information
lkarthee committed Jan 30, 2024
1 parent a3d30b3 commit 846eff0
Show file tree
Hide file tree
Showing 5 changed files with 77 additions and 29 deletions.
2 changes: 1 addition & 1 deletion lib/explorer/backend/series.ex
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ defmodule Explorer.Backend.Series do

# Struct
@callback field(s, String.t()) :: s
@callback json_decode(s, dtype() | nil, non_neg_integer() | nil) :: s
@callback json_decode(s, option(dtype()), option(non_neg_integer())) :: s

# Functions

Expand Down
72 changes: 46 additions & 26 deletions lib/explorer/polars_backend/shared.ex
Original file line number Diff line number Diff line change
Expand Up @@ -42,36 +42,56 @@ defmodule Explorer.PolarsBackend.Shared do
def apply_dataframe(%DataFrame{} = df, %DataFrame{} = out_df, fun, args) do
case apply(Native, fun, [df.data | args]) do
{:ok, %module{} = new_df} when module in @polars_df ->
if @check_frames do
# We need to collect here, because the lazy frame may not have
# the full picture of the result yet.
check_df =
if match?(%PolarsLazyFrame{}, new_df) do
{:ok, new_df} = Native.lf_collect(new_df)
create_dataframe(new_df)
else
create_dataframe(new_df)
# :df_mutate_with_exprs
{struct?, dtypes} =
if @check_frames do
# We need to collect here, because the lazy frame may not have
# the full picture of the result yet.
check_df =
if match?(%PolarsLazyFrame{}, new_df) do
{:ok, new_df} = Native.lf_collect(new_df)
create_dataframe(new_df)
else
create_dataframe(new_df)
end

# When dealing with structs in mutate, we may not know dtype of struct series.
# We have to accept the dtype returned by polars, else we will have mismatch error.
{struct?, out_dtypes} =
if fun == :df_mutate_with_exprs do
Enum.reduce(check_df.dtypes, {false, out_df.dtypes}, fn
{key, {:struct, _} = dtype}, {_, dtypes} -> {true, Map.put(dtypes, key, dtype)}
_, acc -> acc
end)
else
{false, out_df.dtypes}
end

if Enum.sort(out_df.names) != Enum.sort(check_df.names) or
out_dtypes != check_df.dtypes do
raise """
DataFrame mismatch.
expected:
names: #{inspect(out_df.names)}
dtypes: #{inspect(out_df.dtypes)}
got:
names: #{inspect(check_df.names)}
dtypes: #{inspect(check_df.dtypes)}
"""
end

if Enum.sort(out_df.names) != Enum.sort(check_df.names) or
out_df.dtypes != check_df.dtypes do
raise """
DataFrame mismatch.
expected:
names: #{inspect(out_df.names)}
dtypes: #{inspect(out_df.dtypes)}
got:
names: #{inspect(check_df.names)}
dtypes: #{inspect(check_df.dtypes)}
"""
{struct?, out_dtypes}
end
end

%{out_df | data: new_df}
if struct? do
%{out_df | data: new_df, dtypes: dtypes}
else
%{out_df | data: new_df}
end

{:error, error} ->
raise runtime_error(error)
Expand Down
4 changes: 3 additions & 1 deletion lib/explorer/series.ex
Original file line number Diff line number Diff line change
Expand Up @@ -6044,7 +6044,7 @@ defmodule Explorer.Series do
end

@doc """
Decode json from string
Decode json from string.
## Examples
Expand All @@ -6054,6 +6054,8 @@ defmodule Explorer.Series do
Polars[1]
struct[1] [%{"a" => 1}]
>
Will raise `RuntimeError` for invalid json.
"""
@doc type: :struct_wise
@spec json_decode(Series.t(), Keyword.t()) :: Series.t()
Expand Down
2 changes: 1 addition & 1 deletion native/explorer/src/expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1072,7 +1072,7 @@ pub fn expr_json_decode(
ex_dtype: Option<ExSeriesDtype>,
infer_schema_length: Option<usize>,
) -> ExExpr {
let dtype = ex_dtype.map(|x| DataType::try_from(&x).unwrap()); //DataType::try_from().unwrap();
let dtype = ex_dtype.map(|x| DataType::try_from(&x).unwrap());
let expr = expr
.clone_inner()
.str()
Expand Down
26 changes: 26 additions & 0 deletions test/explorer/data_frame_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -1870,6 +1870,32 @@ defmodule Explorer.DataFrameTest do
member?: [true, false]
}
end

test "extracts struct from json - json_decode" do
df = DF.new([%{a: "{\"n\": 1}"}])
dfj = DF.mutate(df, aj: json_decode(a, dtype: {:struct, %{"n" => {:s, 64}}}))
assert dfj.dtypes == %{"a" => :string, "aj" => {:struct, %{"n" => {:s, 64}}}}
assert DF.to_rows(dfj) == [%{"a" => "{\"n\": 1}", "aj" => %{"n" => 1}}]
end

test "extracts struct from json - json_decode with dtype" do
df = DF.new([%{a: "{\"n\": 1}"}])
dfj = DF.mutate(df, aj: json_decode(a, dtype: {:struct, %{"n" => {:f, 64}}}))
assert dfj.dtypes == %{"a" => :string, "aj" => {:struct, %{"n" => {:f, 64}}}}
assert DF.to_rows(dfj) == [%{"a" => "{\"n\": 1}", "aj" => %{"n" => 1.0}}]
end

test "extracts struct from json - json_decode with infer_schema_length" do
df = DF.new([%{a: "{\"n\": 1}"}])

dfj =
DF.mutate(df,
aj: json_decode(a, infer_schema_length: 100)
)

assert dfj.dtypes == %{"a" => :string, "aj" => {:struct, %{"n" => {:s, 64}}}}
assert DF.to_rows(dfj) == [%{"a" => "{\"n\": 1}", "aj" => %{"n" => 1}}]
end
end

describe "sort_by/3" do
Expand Down

0 comments on commit 846eff0

Please sign in to comment.