Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Inferred dtype property tests #856

Merged
merged 12 commits into from
Feb 14, 2024
4 changes: 3 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,11 @@ jobs:
- run: mix deps.compile
- name: Run tests
run: mix test --warnings-as-errors
- name: Run property tests
run: |
mix test --only property --warnings-as-errors
- name: Compile once again but without optional deps
run: mix compile --force --warnings-as-errors --no-optional-deps

- name: Run cloud integration tests
run: |
mix localstack.setup
Expand Down
1 change: 1 addition & 0 deletions mix.exs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ defmodule Explorer.MixProject do

## Test
{:bypass, "~> 2.1", only: :test},
{:stream_data, "~> 0.6", only: :test},

## Dev
{:ex_doc, "~> 0.24", only: :dev},
Expand Down
1 change: 1 addition & 0 deletions mix.lock
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
"rustler": {:hex, :rustler, "0.29.1", "880f20ae3027bd7945def6cea767f5257bc926f33ff50c0d5d5a5315883c084d", [:mix], [{:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:toml, "~> 0.6", [hex: :toml, repo: "hexpm", optional: false]}], "hexpm", "109497d701861bfcd26eb8f5801fe327a8eef304f56a5b63ef61151ff44ac9b6"},
"rustler_precompiled": {:hex, :rustler_precompiled, "0.7.1", "ecadf02cc59a0eccbaed6c1937303a5827fbcf60010c541595e6d3747d3d0f9f", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:rustler, "~> 0.23", [hex: :rustler, repo: "hexpm", optional: true]}], "hexpm", "b9e4657b99a1483ea31502e1d58c464bedebe9028808eda45c3a429af4550c66"},
"statistex": {:hex, :statistex, "1.0.0", "f3dc93f3c0c6c92e5f291704cf62b99b553253d7969e9a5fa713e5481cd858a5", [:mix], [], "hexpm", "ff9d8bee7035028ab4742ff52fc80a2aa35cece833cf5319009b52f1b5a86c27"},
"stream_data": {:hex, :stream_data, "0.6.0", "e87a9a79d7ec23d10ff83eb025141ef4915eeb09d4491f79e52f2562b73e5f47", [:mix], [], "hexpm", "b92b5031b650ca480ced047578f1d57ea6dd563f5b57464ad274718c9c29501c"},
"table": {:hex, :table, "0.1.2", "87ad1125f5b70c5dea0307aa633194083eb5182ec537efc94e96af08937e14a8", [:mix], [], "hexpm", "7e99bc7efef806315c7e65640724bf165c3061cdc5d854060f74468367065029"},
"table_rex": {:hex, :table_rex, "4.0.0", "3c613a68ebdc6d4d1e731bc973c233500974ec3993c99fcdabb210407b90959b", [:mix], [], "hexpm", "c35c4d5612ca49ebb0344ea10387da4d2afe278387d4019e4d8111e815df8f55"},
"telemetry": {:hex, :telemetry, "1.2.1", "68fdfe8d8f05a8428483a97d7aab2f268aaff24b49e0f599faa091f1d4e7f61c", [:rebar3], [], "hexpm", "dad9ce9d8effc621708f99eac538ef1cbe05d6a874dd741de2e689c47feafed5"},
Expand Down
100 changes: 100 additions & 0 deletions test/explorer/series/inferred_dtype_property_test.exs
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
defmodule Explorer.Series.InferredDtypePropertyTest do
@moduledoc """
Property tests for checking the inferred dtype logic when the dtype isn't
specified in `Explorer.Series.from_list/1`.

## Notes

* A maximum of 3 used quite a bit. This is intentional. Usually issues stem
from empty lists, not really long lists. By keeping lists small, we can
iterate much quicker through the input space.
"""
use ExUnit.Case, async: true
use ExUnitProperties

alias Explorer.Series

@moduletag timeout: :infinity

property "inferred dtype should always be a sub-dtype" do
check all(
dtype <- dtype_generator(),
series <- series_of_dtype_generator(dtype),
max_run_time: 60_000,
max_runs: 10_000
) do
assert series |> Series.dtype() |> sub_dtype_of?(dtype)
end
end

defp dtype_generator do
scalar_dtype_generator = StreamData.constant({:s, 64})

# We don't need complicated keys: single letter strings should suffice.
key_generator = StreamData.string(?a..?z, min_length: 1, max_length: 1)

dtype_generator =
StreamData.tree(scalar_dtype_generator, fn generator ->
StreamData.one_of([
StreamData.tuple({
StreamData.constant(:list),
generator
}),
StreamData.tuple({
StreamData.constant(:struct),
StreamData.map(
StreamData.nonempty(StreamData.map_of(key_generator, generator, max_length: 3)),
# Building the list from a map then ensures unique keys.
&Enum.to_list/1
)
})
])
end)

dtype_generator
end

defp series_of_dtype_generator(dtype) do
series_value_generator = build_series_value_generator(dtype)

StreamData.bind(StreamData.list_of(series_value_generator, max_length: 3), fn series_values ->
StreamData.constant(Explorer.Series.from_list(series_values))
end)
end

defp build_series_value_generator({:s, 64}),
do: StreamData.integer()

defp build_series_value_generator({:list, dtype}),
do: StreamData.list_of(build_series_value_generator(dtype), max_length: 3)

defp build_series_value_generator({:struct, keyword_of_dtypes}) do
keyword_of_dtypes
|> Map.new(fn {key, dtype} -> {key, build_series_value_generator(dtype)} end)
|> StreamData.fixed_map()
end

# The idea behind a "sub" dtype is that in the dtype tree, you can replace
# any subtree with `:null` and it's still valid. This is to deal with empty
# lists where we can't reasonably infer the dtype of a list with no elements.
defp sub_dtype_of?(x, x), do: true
defp sub_dtype_of?(:null, _), do: true
defp sub_dtype_of?({:list, sub_dtype}, {:list, dtype}), do: sub_dtype_of?(sub_dtype, dtype)

defp sub_dtype_of?({:struct, sub_dtype_keyword}, {:struct, dtype_keyword})
when is_list(sub_dtype_keyword) and is_list(dtype_keyword) do
if length(sub_dtype_keyword) != length(dtype_keyword) do
false
else
# Note: the need to sort here indicates we may want to normalize the result
# of `Series.dtype/1`.
Enum.sort(sub_dtype_keyword)
|> Enum.zip(Enum.sort(dtype_keyword))
|> Enum.all?(fn {{sub_key, sub_value}, {key, value}} ->
sub_key == key and sub_dtype_of?(sub_value, value)
end)
end
end

defp sub_dtype_of?(_sub_dtype, _dtype), do: false
end
2 changes: 1 addition & 1 deletion test/test_helper.exs
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,4 @@ defmodule Explorer.IOHelpers do
end
end

ExUnit.start(exclude: :cloud_integration)
ExUnit.start(exclude: [:cloud_integration, :property])
Loading