Skip to content

Commit

Permalink
review changes
Browse files Browse the repository at this point in the history
  • Loading branch information
lkarthee committed Feb 1, 2024
1 parent 33a31a7 commit 654f913
Show file tree
Hide file tree
Showing 6 changed files with 70 additions and 14 deletions.
4 changes: 2 additions & 2 deletions lib/explorer/backend/lazy_series.ex
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ defmodule Explorer.Backend.LazySeries do
downcase: 1,
substring: 3,
split: 2,
json_decode: 2,
# Float round
round: 2,
floor: 1,
Expand All @@ -143,8 +144,7 @@ defmodule Explorer.Backend.LazySeries do
lengths: 1,
member: 3,
# Struct functions
field: 2,
json_decode: 2
field: 2
]

@comparison_operations [:equal, :not_equal, :greater, :greater_equal, :less, :less_equal]
Expand Down
2 changes: 1 addition & 1 deletion lib/explorer/backend/series.ex
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,7 @@ defmodule Explorer.Backend.Series do
@callback rstrip(s, String.t() | nil) :: s
@callback substring(s, integer(), non_neg_integer() | nil) :: s
@callback split(s, String.t()) :: s
@callback json_decode(s, dtype()) :: s

# Date / DateTime

Expand All @@ -305,7 +306,6 @@ defmodule Explorer.Backend.Series do

# Struct
@callback field(s, String.t()) :: s
@callback json_decode(s, dtype()) :: s

# Functions

Expand Down
4 changes: 2 additions & 2 deletions lib/explorer/polars_backend/expression.ex
Original file line number Diff line number Diff line change
Expand Up @@ -135,15 +135,15 @@ defmodule Explorer.PolarsBackend.Expression do
upcase: 1,
substring: 3,
split: 2,
json_decode: 2,

# Lists
join: 2,
lengths: 1,
member: 3,

# Structs
field: 2,
json_decode: 2
field: 2
]

@custom_expressions [
Expand Down
26 changes: 22 additions & 4 deletions lib/explorer/series.ex
Original file line number Diff line number Diff line change
Expand Up @@ -6044,7 +6044,7 @@ defmodule Explorer.Series do
end

@doc """
Converts a string series containing valid json to a struct series.
Converts a string series containing valid json to a series.
## Examples
Expand All @@ -6055,10 +6055,28 @@ defmodule Explorer.Series do
struct[1] [%{"a" => 1}]
>
Will raise `RuntimeError` for invalid json.
iex> s = Series.from_list(["1"])
iex> Series.json_decode(s, {:s, 64})
#Explorer.Series<
Polars[1]
s64 [1]
>
Will raise `RuntimeError` if the string is invalid json.
Decoded value will be nil incase of a dtype mismatch e.g. string and integer.
## Examples
iex> s = Series.from_list(["\\"1\\""])
iex> Series.json_decode(s, {:s, 64})
#Explorer.Series<
Polars[1]
s64 [nil]
>
"""
@doc type: :struct_wise
@spec json_decode(Series.t(), struct_dtype()) :: Series.t()
@doc type: :string_wise
@spec json_decode(Series.t(), dtype()) :: Series.t()
def json_decode(%Series{dtype: :string} = series, dtype) do
dtype = Shared.normalise_dtype!(dtype)

Expand Down
30 changes: 30 additions & 0 deletions test/explorer/data_frame_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -4278,4 +4278,34 @@ defmodule Explorer.DataFrameTest do
}
end
end

describe "json_decode/2" do
test "decodes primitives, lists, structs" do
df = DF.new([%{st: "{\"n\": 1}", f: "1.0", l: "[1]", dt: "1"}], lazy: true)

df1 =
DF.mutate(df,
st: json_decode(st, {:struct, %{"n" => {:s, 64}}}),
f: json_decode(f, {:f, 64}),
l: json_decode(l, {:list, {:s, 64}}),
dt: json_decode(dt, {:datetime, :microsecond})
)

assert df.dtypes == %{"dt" => :string, "f" => :string, "l" => :string, "st" => :string}

assert df1.dtypes == %{
"dt" => {:datetime, :microsecond},
"f" => {:f, 64},
"l" => {:list, {:s, 64}},
"st" => {:struct, %{"n" => {:s, 64}}}
}

assert df1 |> DF.collect() |> DF.to_columns() == %{
"dt" => [~N[1970-01-01 00:00:00.000001]],
"f" => [1.0],
"l" => [[1]],
"st" => [%{"n" => 1}]
}
end
end
end
18 changes: 13 additions & 5 deletions test/explorer/series_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -5868,11 +5868,19 @@ defmodule Explorer.SeriesTest do
end

describe "json_decode/2" do
test "extracts struct from json" do
s = Series.from_list(["{\"n\": 1}"])
sj = Series.json_decode(s, {:struct, %{"n" => {:s, 64}}})
assert sj.dtype == {:struct, %{"n" => {:s, 64}}}
assert Series.to_list(sj) == [%{"n" => 1}]
test "raises for invalid json" do
assert_raise RuntimeError,
"Polars Error: error deserializing JSON: json parsing error: 'InternalError(TapeError) at character 1 ('a')'",
fn ->
Series.from_list(["a"]) |> Series.json_decode(:string)
end
end

test "extracts primitive from json and nil for mismatch" do
s = Series.from_list(["1", "\"a\""])
sj = Series.json_decode(s, {:s, 64})
assert sj.dtype == {:s, 64}
assert Series.to_list(sj) == [1, nil]
end

test "extracts struct from json with dtype" do
Expand Down

0 comments on commit 654f913

Please sign in to comment.