diff --git a/.formatter.exs b/.formatter.exs new file mode 100644 index 0000000..d2cda26 --- /dev/null +++ b/.formatter.exs @@ -0,0 +1,4 @@ +# Used by "mix format" +[ + inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"] +] diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..786ab8d --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,36 @@ +name: CI +on: [push, pull_request] + +jobs: + test: + runs-on: ubuntu-latest + name: OTP ${{matrix.otp}} / Elixir ${{matrix.elixir}} + strategy: + matrix: + include: + - elixir: "1.13" + otp: "24.3" + - elixir: "1.14" + otp: "25.3" + - elixir: "1.15" + otp: "26.0" + - elixir: "1.16" + otp: "26.2" + - elixir: "1.17" + otp: "27.0-rc3" + steps: + - uses: actions/checkout@v2 + - uses: erlef/setup-beam@v1 + with: + otp-version: ${{matrix.otp}} + elixir-version: ${{matrix.elixir}} + - name: Install Dependencies + run: mix deps.get + - name: Check compile warnings + run: mix compile --warnings-as-errors + - name: Check format + run: mix format --check-formatted + - name: Unit tests + run: mix test.unit + - name: Property-based tests + run: PROP_TEST_RUNTIME=30000 mix test.prop diff --git a/.gitignore b/.gitignore index 5d38716..3a97400 100644 --- a/.gitignore +++ b/.gitignore @@ -20,7 +20,7 @@ erl_crash.dump *.ez # Ignore package tarball (built via "mix hex.build"). -cmp-*.tar +ion-*.tar # Temporary files, for example, from tests. /tmp/ diff --git a/.iex.exs b/.iex.exs new file mode 100644 index 0000000..208b048 --- /dev/null +++ b/.iex.exs @@ -0,0 +1,2 @@ +import_file("~/.iex.exs") +import_if_available(Ion, only: :sigils) diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..9150581 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,7 @@ +# Changelog + +## Dev + +## v0.1.0 (2024-04-25) + +- Initial release diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..d76d61d --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,20 @@ +MIT License + +Copyright (c) 2024 Sabiwara + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..0ba2049 --- /dev/null +++ b/README.md @@ -0,0 +1,112 @@ +# Ion + +[![Hex Version](https://img.shields.io/hexpm/v/ion.svg)](https://hex.pm/packages/ion) +[![docs](https://img.shields.io/badge/docs-hexpm-blue.svg)](https://hexdocs.pm/ion/) +[![CI](https://github.com/sabiwara/ion/workflows/CI/badge.svg)](https://github.com/sabiwara/ion/actions?query=workflow%3ACI) + +Lightweight utility library for efficient IO data and chardata handling. + +## TLDR + +Interpolation: + +```elixir +# plain string +"#{name}: #{count}." +# IO data (using Ion) +~i"#{name}: #{count}." +# IO data (manual) +[name, " ", to_string(count), ?.] +``` + +Joining: + +```elixir +# plain string +Enum.join(integers, "+") +# IO data (using Ion) +Ion.join(integers, "+") +# IO data (manual) +Enum.map_intersperse(integers, "+", &to_string/1) +``` + +Map joining: + +```elixir +# plain string +Enum.map_join(users, ", ", fn user -> "#{user.first}.#{user.last}@passione.org" end) +# IO data (using Ion) +Ion.map_join(users, ", ", fn user -> ~i"#{user.first}.#{user.last}@passione.org" end) +# IO data (manual) +Enum.map_intersperse(users, ", ", fn user -> [user.first, ?., user.last, "@passione.org"] end) +``` + +## Why `Ion`? + +[IO data and chardata](https://hexdocs.pm/elixir/io-and-the-file-system.html#iodata-and-chardata) +are one of the secret weapons behind Elixir and Erlang performance when it comes +to string processing and IO. Turns out the fastest way to concatenate strings +is: avoiding concatenation in the first place! + +While it is perfectly possible to handcraft IO data with just the standard +library, it can sometimes be tedious, cryptic and error prone. `Ion` provides a +few common recipes which: + +- drop-in replacement, with APIS consistent with the standard way of building + strings +- reduce the cognitive overhead and make the intent explicit +- are implemented in an optimal fashion (`Ion` is fast!) +- help reducing bugs through better typing (see below) + +The examples above illustrate how easy it is to migrate from building strings to +building IO data or chardata. + +### Increased safety + +Building IO lists manually or through interspersing is error prone: we need to +be careful to cast things that are neither strings nor nested IO data or will +end up with invalid data: + +```elixir +iex> as_bytes = Enum.intersperse(100..105, "+") +[100, "+", 101, "+", 102, "+", 103, "+", 104, "+", 105] +iex> IO.iodata_to_binary(as_bytes) +"d+e+f+g+h+i" +# need to make sure we have strings: +iex> Enum.map_intersperse(100..105, "+", &to_string/1) |> IO.iodata_to_binary() +# works just like Enum.join/2: +iex> Ion.join(100..105, "+") |> IO.iodata_to_binary() +"100+101+102+103+104+105" +``` + +### Performance + +Because they are specialized, the join functions should also be faster than +interspersing (~1.5x, see the `benchmarks` folder). + +## Installation + +Ion can be installed by adding `Ion` to your list of dependencies in `mix.exs`: + +```elixir +def deps do + [ + {:ion, "~> 0.1.0"} + ] +end +``` + +Or you can just try it out from `iex` or an `.exs` script: + +```elixir +iex> Mix.install([:ion]) +:ok +iex> Ion.join(["Hello", :world], ",") +["Hello", ",", "world"] +``` + +Documentation can be found at [https://hexdocs.pm/ion](https://hexdocs.pm/ion). + +## Copyright and License + +Ion is licensed under the [MIT License](LICENSE.md). diff --git a/benchmarks/iodata_empty.exs b/benchmarks/iodata_empty.exs new file mode 100644 index 0000000..883b507 --- /dev/null +++ b/benchmarks/iodata_empty.exs @@ -0,0 +1,24 @@ +defmodule Bench.IO.Empty do + def inputs() do + for n <- [5, 50, 500] do + :rand.seed(:exrop, {1, 2, 3}) + iodata = Stream.repeatedly(fn -> < Enum.random()>> end) |> Enum.take(n) + {"n = #{n}", iodata} + end ++ [ + {"pathological", Enum.reduce(1..50, "not_empty", fn _, acc -> [acc] end)} + ] + end + + def run() do + Benchee.run( + [ + {"IO.iodata_length() == 0", fn iodata -> IO.iodata_length(iodata) == 0 end}, + {"Ion.iodata_empty?/1", fn iodata -> Ion.iodata_empty?(iodata) end} + ], + inputs: inputs(), + print: [fast_warning: false] + ) + end +end + +Bench.IO.Empty.run() diff --git a/benchmarks/iodata_empty.results.txt b/benchmarks/iodata_empty.results.txt new file mode 100644 index 0000000..af9820a --- /dev/null +++ b/benchmarks/iodata_empty.results.txt @@ -0,0 +1,60 @@ +Operating System: macOS +CPU Information: Apple M1 +Number of Available Cores: 8 +Available memory: 16 GB +Elixir 1.14.5 +Erlang 26.0.1 + +Benchmark suite executing with the following configuration: +warmup: 2 s +time: 5 s +memory time: 0 ns +reduction time: 0 ns +parallel: 1 +inputs: n = 5, n = 50, n = 500, pathological +Estimated total run time: 56 s + +Benchmarking IO.iodata_length() == 0 with input n = 5 ... +Benchmarking IO.iodata_length() == 0 with input n = 50 ... +Benchmarking IO.iodata_length() == 0 with input n = 500 ... +Benchmarking IO.iodata_length() == 0 with input pathological ... +Benchmarking Ion.iodata_empty?/1 with input n = 5 ... +Benchmarking Ion.iodata_empty?/1 with input n = 50 ... +Benchmarking Ion.iodata_empty?/1 with input n = 500 ... +Benchmarking Ion.iodata_empty?/1 with input pathological ... + +##### With input n = 5 ##### +Name ips average deviation median 99th % +Ion.iodata_empty?/1 60.16 M 16.62 ns ±3994.43% 16.70 ns 29.20 ns +IO.iodata_length() == 0 25.62 M 39.03 ns ±7769.23% 42 ns 42 ns + +Comparison: +Ion.iodata_empty?/1 60.16 M +IO.iodata_length() == 0 25.62 M - 2.35x slower +22.41 ns + +##### With input n = 50 ##### +Name ips average deviation median 99th % +Ion.iodata_empty?/1 26.38 M 37.91 ns ±17084.78% 42 ns 42 ns +IO.iodata_length() == 0 8.52 M 117.34 ns ±5382.12% 125 ns 125 ns + +Comparison: +Ion.iodata_empty?/1 26.38 M +IO.iodata_length() == 0 8.52 M - 3.10x slower +79.43 ns + +##### With input n = 500 ##### +Name ips average deviation median 99th % +Ion.iodata_empty?/1 24.78 M 40.35 ns ±41066.46% 42 ns 42 ns +IO.iodata_length() == 0 1.20 M 836.14 ns ±169.27% 833 ns 916 ns + +Comparison: +Ion.iodata_empty?/1 24.78 M +IO.iodata_length() == 0 1.20 M - 20.72x slower +795.79 ns + +##### With input pathological ##### +Name ips average deviation median 99th % +IO.iodata_length() == 0 3.45 M 289.98 ns ±461.74% 292 ns 334 ns +Ion.iodata_empty?/1 2.18 M 458.13 ns ±5164.45% 416 ns 542 ns + +Comparison: +IO.iodata_length() == 0 3.45 M +Ion.iodata_empty?/1 2.18 M - 1.58x slower +168.15 ns diff --git a/benchmarks/join.exs b/benchmarks/join.exs new file mode 100644 index 0000000..dcda55a --- /dev/null +++ b/benchmarks/join.exs @@ -0,0 +1,21 @@ +range = 1..100 +integers = Enum.shuffle(range) +strings = Enum.map(integers, &<<&1::utf8, &1::utf8>>) +charlists = Enum.map(integers, &[&1, &1]) + +Benchee.run( + %{ + "Enum.intersperse (strings)" => fn -> Enum.intersperse(strings, "-") end, + "Enum.join (strings)" => fn -> Enum.join(strings, "-") end, + "Ion.join (strings)" => fn -> Ion.join(strings, "-") end, + "Enum.intersperse (charlists)" => fn -> Enum.intersperse(charlists, "-") end, + "Ion.join (charlists)" => fn -> Ion.join(charlists, "-") end, + "Enum.map_intersperse (ints)" => fn -> Enum.map_intersperse(integers, "-", &to_string/1) end, + "Enum.join (ints)" => fn -> Enum.join(integers, "-") end, + "Ion.join (ints)" => fn -> Ion.join(integers, "-") end, + "Enum.join (range)" => fn -> Enum.join(range, "-") end, + "Ion.join (range)" => fn -> Ion.join(range, "-") end, + }, + time: 2, + memory_time: 0.5 +) diff --git a/benchmarks/join.results.txt b/benchmarks/join.results.txt new file mode 100644 index 0000000..f243973 --- /dev/null +++ b/benchmarks/join.results.txt @@ -0,0 +1,66 @@ +Operating System: macOS +CPU Information: Apple M1 +Number of Available Cores: 8 +Available memory: 16 GB +Elixir 1.14.5 +Erlang 26.0.1 + +Benchmark suite executing with the following configuration: +warmup: 2 s +time: 2 s +memory time: 500 ms +reduction time: 0 ns +parallel: 1 +inputs: none specified +Estimated total run time: 45 s + +Benchmarking Enum.intersperse (charlists) ... +Benchmarking Enum.intersperse (strings) ... +Benchmarking Enum.join (ints) ... +Benchmarking Enum.join (range) ... +Benchmarking Enum.join (strings) ... +Benchmarking Enum.map_intersperse (ints) ... +Benchmarking Ion.join (charlists) ... +Benchmarking Ion.join (ints) ... +Benchmarking Ion.join (range) ... +Benchmarking Ion.join (strings) ... + +Name ips average deviation median 99th % +Ion.join (strings) 1840.60 K 0.54 μs ±2872.63% 0.50 μs 0.67 μs +Ion.join (charlists) 1763.59 K 0.57 μs ±3011.09% 0.50 μs 0.71 μs +Enum.intersperse (charlists) 1219.01 K 0.82 μs ±1589.04% 0.75 μs 0.96 μs +Enum.intersperse (strings) 1217.86 K 0.82 μs ±1577.94% 0.75 μs 0.96 μs +Enum.join (strings) 498.47 K 2.01 μs ±411.72% 1.88 μs 3.17 μs +Ion.join (ints) 297.30 K 3.36 μs ±139.02% 3.25 μs 4.21 μs +Ion.join (range) 280.18 K 3.57 μs ±162.61% 3.46 μs 4.50 μs +Enum.map_intersperse (ints) 265.88 K 3.76 μs ±162.12% 3.67 μs 4.63 μs +Enum.join (ints) 209.26 K 4.78 μs ±69.28% 4.63 μs 8.54 μs +Enum.join (range) 190.09 K 5.26 μs ±121.90% 5.13 μs 7.33 μs + +Comparison: +Ion.join (strings) 1840.60 K +Ion.join (charlists) 1763.59 K - 1.04x slower +0.0237 μs +Enum.intersperse (charlists) 1219.01 K - 1.51x slower +0.28 μs +Enum.intersperse (strings) 1217.86 K - 1.51x slower +0.28 μs +Enum.join (strings) 498.47 K - 3.69x slower +1.46 μs +Ion.join (ints) 297.30 K - 6.19x slower +2.82 μs +Ion.join (range) 280.18 K - 6.57x slower +3.03 μs +Enum.map_intersperse (ints) 265.88 K - 6.92x slower +3.22 μs +Enum.join (ints) 209.26 K - 8.80x slower +4.24 μs +Enum.join (range) 190.09 K - 9.68x slower +4.72 μs + +Memory usage statistics: + +Name Memory usage +Ion.join (strings) 3.13 KB +Ion.join (charlists) 3.13 KB - 1.00x memory usage +0 KB +Enum.intersperse (charlists) 3.09 KB - 0.99x memory usage -0.03906 KB +Enum.intersperse (strings) 3.09 KB - 0.99x memory usage -0.03906 KB +Enum.join (strings) 4.66 KB - 1.49x memory usage +1.52 KB +Ion.join (ints) 5.48 KB - 1.75x memory usage +2.34 KB +Ion.join (range) 5.52 KB - 1.76x memory usage +2.39 KB +Enum.map_intersperse (ints) 5.45 KB - 1.74x memory usage +2.32 KB +Enum.join (ints) 5.53 KB - 1.77x memory usage +2.40 KB +Enum.join (range) 5.53 KB - 1.77x memory usage +2.40 KB + +**All measurements for memory usage were the same** diff --git a/benchmarks/map_join.exs b/benchmarks/map_join.exs new file mode 100644 index 0000000..3cc03dc --- /dev/null +++ b/benchmarks/map_join.exs @@ -0,0 +1,21 @@ +range = 1..100 +integers = Enum.shuffle(range) +strings = Enum.map(integers, &<<&1::utf8, &1::utf8>>) +charlists = Enum.map(integers, &[&1, &1]) + +Benchee.run( + %{ + "Enum.map_intersperse (strings)" => fn -> Enum.map_intersperse(strings, "-", & &1) end, + "Enum.map_join (strings)" => fn -> Enum.map_join(strings, "-", & &1) end, + "Ion.map_join (strings)" => fn -> Ion.map_join(strings, "-", & &1) end, + "Enum.map_intersperse (charlists)" => fn -> Enum.map_intersperse(charlists, "-", & &1) end, + "Ion.map_join (charlists)" => fn -> Ion.map_join(charlists, "-", & &1) end, + "Enum.map_intersperse (ints)" => fn -> Enum.map_intersperse(integers, "-", &to_string/1) end, + "Enum.map_join (ints)" => fn -> Enum.map_join(integers, "-", & &1) end, + "Ion.map_join (ints)" => fn -> Ion.map_join(integers, "-", & &1) end, + "Enum.map_join (range)" => fn -> Enum.map_join(range, "-", & &1) end, + "Ion.map_join (range)" => fn -> Ion.map_join(range, "-", & &1) end, + }, + time: 2, + memory_time: 0.5 +) diff --git a/benchmarks/map_join.results.txt b/benchmarks/map_join.results.txt new file mode 100644 index 0000000..7221382 --- /dev/null +++ b/benchmarks/map_join.results.txt @@ -0,0 +1,66 @@ +Operating System: macOS +CPU Information: Apple M1 +Number of Available Cores: 8 +Available memory: 16 GB +Elixir 1.14.5 +Erlang 26.0.1 + +Benchmark suite executing with the following configuration: +warmup: 2 s +time: 2 s +memory time: 500 ms +reduction time: 0 ns +parallel: 1 +inputs: none specified +Estimated total run time: 45 s + +Benchmarking Enum.map_intersperse (charlists) ... +Benchmarking Enum.map_intersperse (ints) ... +Benchmarking Enum.map_intersperse (strings) ... +Benchmarking Enum.map_join (ints) ... +Benchmarking Enum.map_join (range) ... +Benchmarking Enum.map_join (strings) ... +Benchmarking Ion.map_join (charlists) ... +Benchmarking Ion.map_join (ints) ... +Benchmarking Ion.map_join (range) ... +Benchmarking Ion.map_join (strings) ... + +Name ips average deviation median 99th % +Ion.map_join (strings) 1748.47 K 0.57 μs ±2838.62% 0.50 μs 0.71 μs +Ion.map_join (charlists) 1730.00 K 0.58 μs ±2877.17% 0.50 μs 0.71 μs +Enum.map_intersperse (charlists) 1109.40 K 0.90 μs ±1506.11% 0.83 μs 1.04 μs +Enum.map_intersperse (strings) 1102.13 K 0.91 μs ±1581.56% 0.83 μs 1.04 μs +Enum.map_join (strings) 416.16 K 2.40 μs ±260.54% 2.29 μs 3.38 μs +Ion.map_join (ints) 294.08 K 3.40 μs ±141.91% 3.29 μs 4.21 μs +Ion.map_join (range) 269.34 K 3.71 μs ±161.84% 3.58 μs 4.88 μs +Enum.map_intersperse (ints) 265.32 K 3.77 μs ±156.63% 3.67 μs 4.63 μs +Enum.map_join (ints) 188.14 K 5.32 μs ±89.31% 5.13 μs 7.33 μs +Enum.map_join (range) 186.04 K 5.38 μs ±52.79% 5.21 μs 8.33 μs + +Comparison: +Ion.map_join (strings) 1748.47 K +Ion.map_join (charlists) 1730.00 K - 1.01x slower +0.00610 μs +Enum.map_intersperse (charlists) 1109.40 K - 1.58x slower +0.33 μs +Enum.map_intersperse (strings) 1102.13 K - 1.59x slower +0.34 μs +Enum.map_join (strings) 416.16 K - 4.20x slower +1.83 μs +Ion.map_join (ints) 294.08 K - 5.95x slower +2.83 μs +Ion.map_join (range) 269.34 K - 6.49x slower +3.14 μs +Enum.map_intersperse (ints) 265.32 K - 6.59x slower +3.20 μs +Enum.map_join (ints) 188.14 K - 9.29x slower +4.74 μs +Enum.map_join (range) 186.04 K - 9.40x slower +4.80 μs + +Memory usage statistics: + +Name Memory usage +Ion.map_join (strings) 3.13 KB +Ion.map_join (charlists) 3.13 KB - 1.00x memory usage +0 KB +Enum.map_intersperse (charlists) 3.11 KB - 0.99x memory usage -0.02344 KB +Enum.map_intersperse (strings) 3.11 KB - 0.99x memory usage -0.02344 KB +Enum.map_join (strings) 3.20 KB - 1.02x memory usage +0.0703 KB +Ion.map_join (ints) 5.48 KB - 1.75x memory usage +2.34 KB +Ion.map_join (range) 5.53 KB - 1.77x memory usage +2.40 KB +Enum.map_intersperse (ints) 5.45 KB - 1.74x memory usage +2.32 KB +Enum.map_join (ints) 5.55 KB - 1.77x memory usage +2.41 KB +Enum.map_join (range) 7.84 KB - 2.50x memory usage +4.70 KB + +**All measurements for memory usage were the same** diff --git a/lib/ion.ex b/lib/ion.ex new file mode 100644 index 0000000..a5b0f81 --- /dev/null +++ b/lib/ion.ex @@ -0,0 +1,268 @@ +defmodule Ion do + @moduledoc ~S""" + Lightweight utility library for efficient IO data and chardata handling. + + ## Why `Ion`? + + See [`README`](./README.md). + + ## Examples + + [IO data and chardata](https://hexdocs.pm/elixir/io-and-the-file-system.html#iodata-and-chardata) + can be easily be built using: + + a. interpolation with the [`~i` sigil](`Ion.sigil_i/2`): + + iex> import Ion, only: :sigils + iex> ~i"atom: #{:foo}, number: #{12 + 2.35}\n" + ["atom: ", "foo", ", number: ", "14.35", 10] + + b. joining using `join/2` or `map_join/3`: + + iex> Ion.join(100..103, ",") + [[["100", "," | "101"], "," | "102"], "," | "103"] + + iex> Ion.map_join(1..4, ",", & &1 + 100) + [[["101", "," | "102"], "," | "103"], "," | "104"] + + """ + + @doc ~S""" + A sigil to build [IO data](https://hexdocs.pm/elixir/IO.html#module-io-data) or chardata and + avoid string concatenation. + + Use `import Ion` to use it, or `import Ion, only: :sigils`. + + This sigil provides a faster version of string interpolation which: + - will build a list with all chunks instead of concatenating them as a string + - will keep interpolated lists and binaries untouched, without any validation or transformation + - will cast anything else using `to_string/1` + + Works with both [IO data](https://hexdocs.pm/elixir/IO.html#module-io-data) and + [Chardata](https://hexdocs.pm/elixir/IO.html?#module-chardata). + See their respective documentation or this + [guide](https://hexdocs.pm/elixir/io-and-the-file-system.html#iodata-and-chardata) + for more information. + + ## Examples + + iex> ~i"atom: #{:foo}, number: #{12 + 2.35}\n" + ["atom: ", "foo", ", number: ", "14.35", 10] + + The data produced corresponds to the binary that a regular string interpolation + would have produced: + + iex> IO.iodata_to_binary(~i"atom: #{:foo}, number: #{12 + 2.35}\n") + "atom: foo, number: 14.35\n" + iex> "atom: #{:foo}, number: #{12 + 2.35}\n" + "atom: foo, number: 14.35\n" + + Can be used to build chardata too: + + iex> chardata = ~i"charlist: #{~c(ジョルノ)}!" + ["charlist: ", [12472, 12519, 12523, 12494], 33] + iex> IO.chardata_to_string(chardata) + "charlist: ジョルノ!" + + """ + defmacro sigil_i(term, modifiers) + + defmacro sigil_i({:<<>>, _line, pieces}, []), do: sigil_i_pieces(pieces) + + defp sigil_i_pieces([]), do: [] + defp sigil_i_pieces(["" | pieces]), do: sigil_i_pieces(pieces) + defp sigil_i_pieces([piece]), do: sigil_i_piece(piece, :last) + + defp sigil_i_pieces([piece, last]) do + quote do + [unquote(sigil_i_piece(piece)) | unquote(sigil_i_piece(last, :last))] + end + end + + defp sigil_i_pieces([piece | pieces]) do + [sigil_i_piece(piece, :not_last) | sigil_i_pieces(pieces)] + end + + defp sigil_i_piece(piece, ctx \\ nil) + + defp sigil_i_piece({:"::", _, [{{:., _, _}, _, [expr]}, {:binary, _, _}]}, _) do + quote generated: true do + case unquote(expr) do + # TODO extra safety: check head to prevent list of garbage + data when is_binary(data) -> data + [] -> [] + # for extra safety, shallow check to prevent lists of the wrong type + [h | _] = data when is_list(h) or is_binary(h) or is_integer(h) -> data + data when not is_list(data) -> String.Chars.to_string(data) + end + end + end + + defp sigil_i_piece(piece, ctx) when is_binary(piece) do + case Macro.unescape_string(piece) do + <> when char < 128 and ctx == :last -> [char] + <> when char < 128 -> char + binary -> binary + end + end + + @doc """ + Joins the given `enumerable` into IO data or chardata using `joiner` as separator. + + It is a fast equivalent to `Enum.join/2`. + + ## Examples + + iex> iodata = Ion.join(1..3) + iex> IO.iodata_to_binary(iodata) + "123" + + iex> iodata = Ion.join(1..3, " + ") + iex> IO.iodata_to_binary(iodata) + "1 + 2 + 3" + + """ + @spec join(Enumerable.t(data | String.Chars.t()), binary()) :: data + when data: iodata() | IO.chardata() + def join(enumerable, joiner \\ "") when is_binary(joiner) do + case joiner do + "" when is_list(enumerable) -> join_list(enumerable, []) + "" -> Enum.reduce(enumerable, [], &[&2 | ~i"#{&1}"]) + _ when is_list(enumerable) -> join_list(enumerable, joiner, nil) + _ -> join_enumerable(enumerable, joiner) + end + end + + @compile {:inline, join_list: 2} + + defp join_list([], acc), do: acc + + defp join_list([head | tail], acc) do + join_list(tail, append_to_acc(head, acc)) + end + + @compile {:inline, join_list: 3} + + defp join_list([], _joiner, acc), do: acc || [] + + defp join_list([head | tail], joiner, acc) do + join_list(tail, joiner, append_to_optional_acc(head, acc, joiner)) + end + + defp join_enumerable(enumerable, joiner) do + Enum.reduce(enumerable, nil, &append_to_optional_acc(&1, &2, joiner)) || [] + end + + @doc """ + Maps and joins the given `enumerable` as IO data or chardata, in one pass. + + It is a fast equivalent to `Enum.map_join/3`. + + ## Examples + + iex> iodata = Ion.map_join(1..3, &(&1 * 2)) + iex> IO.iodata_to_binary(iodata) + "246" + + iex> iodata = Ion.map_join(1..3, " + ", &(&1 * 2)) + iex> IO.iodata_to_binary(iodata) + "2 + 4 + 6" + + """ + @spec map_join(Enumerable.t(element), binary(), (element -> data | String.Chars.t())) :: data + when data: iodata() | IO.chardata(), element: term() + def map_join(enumerable, joiner \\ "", mapper) + when is_binary(joiner) and is_function(mapper, 1) do + case joiner do + "" when is_list(enumerable) -> map_join_list(enumerable, mapper, []) + "" -> Enum.reduce(enumerable, [], &append_to_acc(mapper.(&1), &2)) + _ when is_list(enumerable) -> map_join_list(enumerable, joiner, mapper, nil) + _ -> map_join_enumerable(enumerable, joiner, mapper) + end + end + + @compile {:inline, map_join_list: 3} + + defp map_join_list([], _mapper, acc), do: acc + + defp map_join_list([head | tail], mapper, acc) do + map_join_list(tail, mapper, append_to_acc(mapper.(head), acc)) + end + + @compile {:inline, map_join_list: 4} + + defp map_join_list([], _joiner, _mapper, acc), do: acc || [] + + defp map_join_list([head | tail], joiner, mapper, acc) do + acc = mapper.(head) |> append_to_optional_acc(acc, joiner) + map_join_list(tail, joiner, mapper, acc) + end + + defp map_join_enumerable(enumerable, joiner, mapper) do + Enum.reduce(enumerable, nil, &append_to_optional_acc(mapper.(&1), &2, joiner)) || [] + end + + @compile {:inline, append_to_acc: 2} + + defp append_to_acc(raw, acc) do + data = ~i"#{raw}" + + case acc do + [] -> data + _ -> [acc | data] + end + end + + @compile {:inline, append_to_optional_acc: 3} + + defp append_to_optional_acc(raw, acc, joiner) do + data = ~i"#{raw}" + + case acc do + nil -> data + _ -> [acc, joiner | data] + end + end + + @doc ~S""" + Returns `true` if `iodata_or_chardata` is empty. + + Unlike `IO.iodata_length(iodata) == 0` which needs to walk the complete tree, + it will bail early as soon as it finds at least one byte or codepoint. + + It should be constant time for most typical outputs, with the exception of + atypical cases where the head is a deep nested tree. + + Even if `IO.iodata_length/1` is a very efficient BIF implemented in C, it has a linear + algorithmic complexity and can become slow on bigger trees. + + ## Examples + + iex> Ion.iodata_empty?(["", []]) + true + iex> Ion.iodata_empty?('a') + false + iex> Ion.iodata_empty?(["a"]) + false + iex> Ion.iodata_empty?(["", [], ["" | "c"]]) + false + + """ + @spec iodata_empty?(iodata() | IO.chardata()) :: boolean() + def iodata_empty?(iodata_or_chardata) + + def iodata_empty?(binary) when is_binary(binary), do: binary === "" + def iodata_empty?([]), do: true + def iodata_empty?([head | _]) when is_integer(head), do: false + + def iodata_empty?([head | rest]) do + # optimized `and` + case iodata_empty?(head) do + false -> false + _ -> iodata_empty?(rest) + end + end + + # TODO Ultimate IO data guide + # TODO Deprecate Aja +end diff --git a/mix.exs b/mix.exs new file mode 100644 index 0000000..cb31947 --- /dev/null +++ b/mix.exs @@ -0,0 +1,73 @@ +defmodule Ion.MixProject do + use Mix.Project + + @version "0.1.0" + @github_url "https://github.com/sabiwara/ion" + + def project do + [ + app: :ion, + version: @version, + elixir: "~> 1.10", + start_permanent: Mix.env() == :prod, + deps: deps(), + preferred_cli_env: [ + docs: :docs, + "hex.publish": :docs, + dialyzer: :test, + "test.unit": :test, + "test.prop": :test + ], + dialyzer: [flags: [:missing_return, :extra_return]], + aliases: aliases(), + consolidate_protocols: Mix.env() != :test, + + # hex + description: "Lightweight utility library for efficient IO data and chardata handling", + package: package(), + name: "Ion", + docs: docs() + ] + end + + def application do + [] + end + + defp deps do + [ + # doc, benchs + {:ex_doc, "~> 0.28", only: :docs, runtime: false}, + {:benchee, "~> 1.1", only: :bench, runtime: false}, + # CI + {:dialyxir, "~> 1.0", only: :test, runtime: false}, + {:stream_data, "~> 0.6.0", only: :test} + ] + end + + defp package do + [ + maintainers: ["sabiwara"], + licenses: ["MIT"], + links: %{"GitHub" => @github_url}, + files: ~w(lib mix.exs README.md LICENSE.md CHANGELOG.md) + ] + end + + defp aliases do + [ + "test.unit": ["test --exclude property:true"], + "test.prop": ["test --only property:true"] + ] + end + + defp docs do + [ + main: "Ion", + source_ref: "v#{@version}", + source_url: @github_url, + homepage_url: @github_url, + extras: ["README.md", "CHANGELOG.md", "LICENSE.md"] + ] + end +end diff --git a/mix.lock b/mix.lock new file mode 100644 index 0000000..0e1ea70 --- /dev/null +++ b/mix.lock @@ -0,0 +1,14 @@ +%{ + "benchee": {:hex, :benchee, "1.3.0", "f64e3b64ad3563fa9838146ddefb2d2f94cf5b473bdfd63f5ca4d0657bf96694", [:mix], [{:deep_merge, "~> 1.0", [hex: :deep_merge, repo: "hexpm", optional: false]}, {:statistex, "~> 1.0", [hex: :statistex, repo: "hexpm", optional: false]}, {:table, "~> 0.1.0", [hex: :table, repo: "hexpm", optional: true]}], "hexpm", "34f4294068c11b2bd2ebf2c59aac9c7da26ffa0068afdf3419f1b176e16c5f81"}, + "deep_merge": {:hex, :deep_merge, "1.0.0", "b4aa1a0d1acac393bdf38b2291af38cb1d4a52806cf7a4906f718e1feb5ee961", [:mix], [], "hexpm", "ce708e5f094b9cd4e8f2be4f00d2f4250c4095be93f8cd6d018c753894885430"}, + "dialyxir": {:hex, :dialyxir, "1.4.3", "edd0124f358f0b9e95bfe53a9fcf806d615d8f838e2202a9f430d59566b6b53b", [:mix], [{:erlex, ">= 0.2.6", [hex: :erlex, repo: "hexpm", optional: false]}], "hexpm", "bf2cfb75cd5c5006bec30141b131663299c661a864ec7fbbc72dfa557487a986"}, + "earmark_parser": {:hex, :earmark_parser, "1.4.39", "424642f8335b05bb9eb611aa1564c148a8ee35c9c8a8bba6e129d51a3e3c6769", [:mix], [], "hexpm", "06553a88d1f1846da9ef066b87b57c6f605552cfbe40d20bd8d59cc6bde41944"}, + "erlex": {:hex, :erlex, "0.2.6", "c7987d15e899c7a2f34f5420d2a2ea0d659682c06ac607572df55a43753aa12e", [:mix], [], "hexpm", "2ed2e25711feb44d52b17d2780eabf998452f6efda104877a3881c2f8c0c0c75"}, + "ex_doc": {:hex, :ex_doc, "0.32.1", "21e40f939515373bcdc9cffe65f3b3543f05015ac6c3d01d991874129d173420", [:mix], [{:earmark_parser, "~> 1.4.39", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.1", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1", [hex: :makeup_erlang, repo: "hexpm", optional: false]}], "hexpm", "5142c9db521f106d61ff33250f779807ed2a88620e472ac95dc7d59c380113da"}, + "makeup": {:hex, :makeup, "1.1.1", "fa0bc768698053b2b3869fa8a62616501ff9d11a562f3ce39580d60860c3a55e", [:mix], [{:nimble_parsec, "~> 1.2.2 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "5dc62fbdd0de44de194898b6710692490be74baa02d9d108bc29f007783b0b48"}, + "makeup_elixir": {:hex, :makeup_elixir, "0.16.2", "627e84b8e8bf22e60a2579dad15067c755531fea049ae26ef1020cad58fe9578", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "41193978704763f6bbe6cc2758b84909e62984c7752b3784bd3c218bb341706b"}, + "makeup_erlang": {:hex, :makeup_erlang, "0.1.5", "e0ff5a7c708dda34311f7522a8758e23bfcd7d8d8068dc312b5eb41c6fd76eba", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "94d2e986428585a21516d7d7149781480013c56e30c6a233534bedf38867a59a"}, + "nimble_parsec": {:hex, :nimble_parsec, "1.4.0", "51f9b613ea62cfa97b25ccc2c1b4216e81df970acd8e16e8d1bdc58fef21370d", [:mix], [], "hexpm", "9c565862810fb383e9838c1dd2d7d2c437b3d13b267414ba6af33e50d2d1cf28"}, + "statistex": {:hex, :statistex, "1.0.0", "f3dc93f3c0c6c92e5f291704cf62b99b553253d7969e9a5fa713e5481cd858a5", [:mix], [], "hexpm", "ff9d8bee7035028ab4742ff52fc80a2aa35cece833cf5319009b52f1b5a86c27"}, + "stream_data": {:hex, :stream_data, "0.6.0", "e87a9a79d7ec23d10ff83eb025141ef4915eeb09d4491f79e52f2562b73e5f47", [:mix], [], "hexpm", "b92b5031b650ca480ced047578f1d57ea6dd563f5b57464ad274718c9c29501c"}, +} diff --git a/test/ion_prop_test.exs b/test/ion_prop_test.exs new file mode 100644 index 0000000..05e9308 --- /dev/null +++ b/test/ion_prop_test.exs @@ -0,0 +1,185 @@ +defmodule Ion.PropTest do + use ExUnit.Case, async: true + use ExUnitProperties + + import Ion, only: :sigils + + @moduletag timeout: :infinity + @moduletag :property + + describe "~i sigil" do + property "keep iodata untouched" do + check all(data <- iodata()) do + assert ~i"#{data}" == data + assert ~i"#{data}?" == [data, ??] + assert ~i"#{data} & co" == [data | " & co"] + assert ~i"#{data}#{~c[ & co]}" == [data] ++ ~c" & co" + + assert ~i""" + #{data} & co + """ == [data | " & co\n"] + + assert ~i"#{data}#{data}" == [data | data] + end + end + + property "keep chardata untouched" do + check all(data <- chardata()) do + assert ~i"#{data}" == data + assert ~i"#{data}?" == [data, ??] + assert ~i"#{data} & co" == [data | " & co"] + assert ~i"#{data}#{~c[ & co]}" == [data] ++ ~c" & co" + + assert ~i""" + #{data} & co + """ == [data | " & co\n"] + + assert ~i"#{data}#{data}" == [data | data] + end + end + + property "is consistent for iodata substitutions" do + check all(sub <- iodata()) do + assert IO.iodata_to_binary(~i"hi #{sub}") == "hi " <> IO.iodata_to_binary(sub) + assert IO.iodata_to_binary(~i"#{sub}!") == IO.iodata_to_binary(sub) <> "!" + assert IO.iodata_to_binary(~i"[#{sub}]") == "[" <> IO.iodata_to_binary(sub) <> "]" + assert IO.iodata_to_binary(~i"hi #{sub}!\n") == "hi " <> IO.iodata_to_binary(sub) <> "!\n" + assert IO.iodata_to_binary(~i"✨#{sub}👍🏽") == "✨" <> IO.iodata_to_binary(sub) <> "👍🏽" + end + end + + property "is consistent for chardata substitutions" do + check all(sub <- chardata()) do + assert IO.chardata_to_string(~i"hi #{sub}") == "hi " <> IO.chardata_to_string(sub) + assert IO.chardata_to_string(~i"#{sub}!") == IO.chardata_to_string(sub) <> "!" + assert IO.chardata_to_string(~i"[#{sub}]") == "[" <> IO.chardata_to_string(sub) <> "]" + + assert IO.chardata_to_string(~i"hi #{sub}!\n") == + "hi " <> IO.chardata_to_string(sub) <> "!\n" + + assert IO.chardata_to_string(~i"✨#{sub}👍🏽") == "✨" <> IO.chardata_to_string(sub) <> "👍🏽" + end + end + + property "is consistent for String.Chars substitutions (as iodata)" do + check all(sub <- string_chars()) do + assert ~i"#{sub}" == to_string(sub) + assert IO.iodata_to_binary(~i"hi #{sub}") == "hi " <> to_string(sub) + assert IO.iodata_to_binary(~i"#{sub}!") == to_string(sub) <> "!" + assert IO.iodata_to_binary(~i"[#{sub}]") == "[" <> to_string(sub) <> "]" + assert IO.iodata_to_binary(~i"hi #{sub}!\n") == "hi " <> to_string(sub) <> "!\n" + assert IO.iodata_to_binary(~i"✨#{sub}👍🏽") == "✨" <> to_string(sub) <> "👍🏽" + end + end + end + + property "is consistent for String.Chars substitutions (as chardata)" do + check all(sub <- string_chars()) do + assert ~i"#{sub}" == to_string(sub) + assert IO.chardata_to_string(~i"hi #{sub}") == "hi " <> to_string(sub) + assert IO.chardata_to_string(~i"#{sub}!") == to_string(sub) <> "!" + assert IO.chardata_to_string(~i"[#{sub}]") == "[" <> to_string(sub) <> "]" + assert IO.chardata_to_string(~i"hi #{sub}!\n") == "hi " <> to_string(sub) <> "!\n" + assert IO.iodata_to_binary(~i"✨#{sub}👍🏽") == "✨" <> to_string(sub) <> "👍🏽" + end + end + + property "is consistent with ~s as iodata" do + check all(binary <- string(:printable), data <- iodata()) do + assert apply_sigil("i", binary, data) |> IO.iodata_to_binary() == + apply_sigil("s", binary, IO.iodata_to_binary(data)) + end + end + + property "is consistent with ~s as chardata" do + check all(binary <- string(:printable), data <- chardata()) do + assert apply_sigil("i", binary, data) |> IO.chardata_to_string() == + apply_sigil("s", binary, IO.chardata_to_string(data)) + end + end + + describe "join/2" do + property "is consistent with Enum for iodata" do + check all(list <- list_of(one_of([string_chars(), iodata()])), joiner <- joiner()) do + stream = Stream.map(list, & &1) + + expected = + Enum.map_join(list, joiner, fn + x when is_list(x) -> IO.iodata_to_binary(x) + x -> x + end) + + assert Ion.join(list, joiner) |> IO.iodata_to_binary() == expected + assert Ion.join(stream, joiner) |> IO.iodata_to_binary() == expected + end + end + + property "is consistent with Enum for chardata" do + check all(list <- list_of(one_of([string_chars(), chardata()])), joiner <- joiner()) do + stream = Stream.map(list, & &1) + expected = Enum.join(list, joiner) + + assert Ion.join(list, joiner) |> IO.chardata_to_string() == expected + assert Ion.join(stream, joiner) |> IO.chardata_to_string() == expected + end + end + end + + describe "map_join/3" do + property "is consistent with Enum for iodata" do + check all(list <- list_of(one_of([string_chars(), iodata()])), joiner <- joiner()) do + stream = Stream.map(list, & &1) + + fun = &remove_third_of_values/1 + + expected = + Enum.map_join(list, joiner, fn value -> + case remove_third_of_values(value) do + x when is_list(x) -> IO.iodata_to_binary(x) + x -> x + end + end) + + assert Ion.map_join(list, joiner, fun) |> IO.iodata_to_binary() == expected + assert Ion.map_join(stream, joiner, fun) |> IO.iodata_to_binary() == expected + end + end + + property "is consistent with Enum for chardata" do + check all(list <- list_of(one_of([string_chars(), chardata()])), joiner <- joiner()) do + stream = Stream.map(list, & &1) + + fun = &remove_third_of_values/1 + + expected = Enum.map_join(list, joiner, fun) + + assert Ion.map_join(list, joiner, fun) |> IO.chardata_to_string() == expected + assert Ion.map_join(stream, joiner, fun) |> IO.chardata_to_string() == expected + end + end + + defp remove_third_of_values(term) do + case :erlang.phash2(term, 3) do + 0 -> [] + _ -> term + end + end + end + + defp string_chars, do: one_of([integer(), atom(:alphanumeric), string(:utf8)]) + + defp apply_sigil(<>, string, sub) do + "\"" <> inspected = inspect(string) + code = <<"~", sigil, ~S("#{sub}), inspected::binary>> + {result, _} = Code.eval_string(code, [sub: sub], __ENV__) + result + end + + defp joiner do + string(:utf8) |> scale_with_exponent(0.6) + end + + defp scale_with_exponent(data, exponent) do + scale(data, fn size -> trunc(:math.pow(size, exponent)) end) + end +end diff --git a/test/ion_test.exs b/test/ion_test.exs new file mode 100644 index 0000000..72eaac0 --- /dev/null +++ b/test/ion_test.exs @@ -0,0 +1,69 @@ +defmodule IonTest do + use ExUnit.Case, async: true + doctest Ion, import: true + + import Ion, only: :sigils + + describe "~i sigil" do + test "constants" do + assert ~i"" == [] + assert ~i"?" == [??] + assert ~i"abc" == "abc" + assert ~i"é" == "é" + assert ~i"あ" == "あ" + assert ~i"おはよう" == "おはよう" + + assert ~i""" + Hello + world + """ == "Hello\nworld\n" + end + + test "literals" do + assert ~i"#{"abc"} & co" == ["abc" | " & co"] + assert ~i"#{~c"abc"} & co" == [~c"abc" | " & co"] + assert ~i"#{"abc"}!" == ["abc", ?!] + + assert ~i""" + Hello + #{"world"} + """ == ["Hello\n", "world", ?\n] + end + + test "string variables" do + string = "abc" + assert ~i"#{string} & co" == ["abc" | " & co"] + assert ~i"#{string}!" == ["abc", ?!] + assert ~i"#{string},#{string}" == ["abc", ?, | "abc"] + assert ~i"#{string}é" == ["abc" | "é"] + assert ~i"#{string}あ" == ["abc" | "あ"] + + assert ~i""" + Hello + #{string} + """ == ["Hello\n", "abc", ?\n] + end + + test "concatenation" do + x = 1 + y = 2 + + assert ~i"#{x}#{y}" == ["1" | "2"] + + assert ~i""" + #{x}#{y} + """ == ["1", "2", ?\n] + end + + test "error on shallow invalid lists" do + assert_raise CaseClauseError, fn -> ~i"#{[nil]}" end + assert_raise CaseClauseError, fn -> ~i"[#{[nil]}]" end + end + + test "modifiers are not allowed" do + assert_raise FunctionClauseError, fn -> Code.eval_quoted(quote do: ~i""x) end + assert_raise FunctionClauseError, fn -> Code.eval_quoted(quote do: ~i"a"y) end + assert_raise FunctionClauseError, fn -> Code.eval_quoted(quote do: ~i"#{1}"z) end + end + end +end diff --git a/test/test_helper.exs b/test/test_helper.exs new file mode 100644 index 0000000..869559e --- /dev/null +++ b/test/test_helper.exs @@ -0,0 +1 @@ +ExUnit.start()