diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 93193d89a..cc333f1fd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -47,9 +47,11 @@ jobs: - run: mix deps.compile - name: Run tests run: mix test --warnings-as-errors + - name: Run property tests + run: | + mix test --only property --warnings-as-errors - name: Compile once again but without optional deps run: mix compile --force --warnings-as-errors --no-optional-deps - - name: Run cloud integration tests run: | mix localstack.setup diff --git a/mix.exs b/mix.exs index 9f05b423a..7908b5a04 100644 --- a/mix.exs +++ b/mix.exs @@ -50,6 +50,7 @@ defmodule Explorer.MixProject do ## Test {:bypass, "~> 2.1", only: :test}, + {:stream_data, "~> 0.6", only: :test}, ## Dev {:ex_doc, "~> 0.24", only: :dev}, diff --git a/mix.lock b/mix.lock index accd491e2..3c9af8556 100644 --- a/mix.lock +++ b/mix.lock @@ -30,6 +30,7 @@ "rustler": {:hex, :rustler, "0.29.1", "880f20ae3027bd7945def6cea767f5257bc926f33ff50c0d5d5a5315883c084d", [:mix], [{:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:toml, "~> 0.6", [hex: :toml, repo: "hexpm", optional: false]}], "hexpm", "109497d701861bfcd26eb8f5801fe327a8eef304f56a5b63ef61151ff44ac9b6"}, "rustler_precompiled": {:hex, :rustler_precompiled, "0.7.1", "ecadf02cc59a0eccbaed6c1937303a5827fbcf60010c541595e6d3747d3d0f9f", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:rustler, "~> 0.23", [hex: :rustler, repo: "hexpm", optional: true]}], "hexpm", "b9e4657b99a1483ea31502e1d58c464bedebe9028808eda45c3a429af4550c66"}, "statistex": {:hex, :statistex, "1.0.0", "f3dc93f3c0c6c92e5f291704cf62b99b553253d7969e9a5fa713e5481cd858a5", [:mix], [], "hexpm", "ff9d8bee7035028ab4742ff52fc80a2aa35cece833cf5319009b52f1b5a86c27"}, + "stream_data": {:hex, :stream_data, "0.6.0", "e87a9a79d7ec23d10ff83eb025141ef4915eeb09d4491f79e52f2562b73e5f47", [:mix], [], "hexpm", "b92b5031b650ca480ced047578f1d57ea6dd563f5b57464ad274718c9c29501c"}, "table": {:hex, :table, "0.1.2", "87ad1125f5b70c5dea0307aa633194083eb5182ec537efc94e96af08937e14a8", [:mix], [], "hexpm", "7e99bc7efef806315c7e65640724bf165c3061cdc5d854060f74468367065029"}, "table_rex": {:hex, :table_rex, "4.0.0", "3c613a68ebdc6d4d1e731bc973c233500974ec3993c99fcdabb210407b90959b", [:mix], [], "hexpm", "c35c4d5612ca49ebb0344ea10387da4d2afe278387d4019e4d8111e815df8f55"}, "telemetry": {:hex, :telemetry, "1.2.1", "68fdfe8d8f05a8428483a97d7aab2f268aaff24b49e0f599faa091f1d4e7f61c", [:rebar3], [], "hexpm", "dad9ce9d8effc621708f99eac538ef1cbe05d6a874dd741de2e689c47feafed5"}, diff --git a/test/explorer/series/inferred_dtype_property_test.exs b/test/explorer/series/inferred_dtype_property_test.exs new file mode 100644 index 000000000..877479154 --- /dev/null +++ b/test/explorer/series/inferred_dtype_property_test.exs @@ -0,0 +1,96 @@ +defmodule Explorer.Series.InferredDtypePropertyTest do + @moduledoc """ + Property tests for checking the inferred dtype logic when the dtype isn't + specified in `Explorer.Series.from_list/1`. + + ## Notes + + * A maximum of 3 used quite a bit. This is intentional. Usually issues stem + from empty lists, not really long lists. By keeping lists small, we can + iterate much quicker through the input space. + """ + use ExUnit.Case, async: true + use ExUnitProperties + + import StreamData + + alias Explorer.Series + + @moduletag timeout: :infinity + + property "inferred dtype should always be a sub-dtype" do + check all( + dtype <- dtype_generator(), + series <- series_of_dtype_generator(dtype), + max_run_time: 60_000, + max_runs: 10_000 + ) do + assert series |> Series.dtype() |> sub_dtype_of?(dtype) + end + end + + defp dtype_generator do + scalar_dtype_generator = constant({:s, 64}) + + # We don't need complicated keys: single letter strings should suffice. + key_generator = string(?a..?z, min_length: 1, max_length: 1) + + dtype_generator = + tree(scalar_dtype_generator, fn generator -> + # Building the keyword list from a map ensures unique keys. + keyword_generator = + map(nonempty(map_of(key_generator, generator, max_length: 3)), &Enum.to_list/1) + + one_of([ + tuple({constant(:list), generator}), + tuple({constant(:struct), keyword_generator}) + ]) + end) + + dtype_generator + end + + defp series_of_dtype_generator(dtype) do + series_value_generator = build_series_value_generator(dtype) + + bind(list_of(series_value_generator, max_length: 3), fn series_values -> + constant(Explorer.Series.from_list(series_values)) + end) + end + + defp build_series_value_generator({:s, 64}), + do: integer() + + defp build_series_value_generator({:list, dtype}), + do: list_of(build_series_value_generator(dtype), max_length: 3) + + defp build_series_value_generator({:struct, keyword_of_dtypes}) do + keyword_of_dtypes + |> Map.new(fn {key, dtype} -> {key, build_series_value_generator(dtype)} end) + |> fixed_map() + end + + # The idea behind a "sub" dtype is that in the dtype tree, you can replace + # any subtree with `:null` and it's still valid. This is to deal with empty + # lists where we can't reasonably infer the dtype of a list with no elements. + defp sub_dtype_of?(x, x), do: true + defp sub_dtype_of?(:null, _), do: true + defp sub_dtype_of?({:list, sub_dtype}, {:list, dtype}), do: sub_dtype_of?(sub_dtype, dtype) + + defp sub_dtype_of?({:struct, sub_dtype_keyword}, {:struct, dtype_keyword}) + when is_list(sub_dtype_keyword) and is_list(dtype_keyword) do + if length(sub_dtype_keyword) != length(dtype_keyword) do + false + else + # Note: the need to sort here indicates we may want to normalize the result + # of `Series.dtype/1`. + Enum.sort(sub_dtype_keyword) + |> Enum.zip(Enum.sort(dtype_keyword)) + |> Enum.all?(fn {{sub_key, sub_value}, {key, value}} -> + sub_key == key and sub_dtype_of?(sub_value, value) + end) + end + end + + defp sub_dtype_of?(_sub_dtype, _dtype), do: false +end diff --git a/test/test_helper.exs b/test/test_helper.exs index 4e30d229b..3800bfa64 100644 --- a/test/test_helper.exs +++ b/test/test_helper.exs @@ -43,4 +43,4 @@ defmodule Explorer.IOHelpers do end end -ExUnit.start(exclude: :cloud_integration) +ExUnit.start(exclude: [:cloud_integration, :property])