Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

remove chunked output for now #132

Merged
merged 1 commit into from
Mar 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 10 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,21 @@ Otherwise, copy the clangd args from the [.vscode/settings.json](.vscode/setting

### TODO

* Get SYCL building with bazel. Already have OpenSYCL building for CPU only [here](https://github.com/garymm/xpu).
Would be nicer to use [intel's LLVM](https://github.com/intel/llvm) which supports lots of GPUs.
* (maybe?) Implement LZ77 with C++ std lib.
#### Basic

* Implement Deflate decompression with C++ std lib.
* Port Deflate to SYCL.
* Benchmark it on CPU.
* Build system work to get it to run on GPU.
* Port Deflate to GPU.
* Benchmark it on GPU.

#### Nice to have

* Support chunked output. Started in
[2e6a83d622e](https://github.com/garymm/starflate/commit/2e6a83d622a0bbe6b65c757199b64511156b516c)
, but removed because it was adding too much complexity and I wanted to focus on getting the
basics working.

## References

* [DEFLATE Compressed Data Format Specification version 1.3](https://tools.ietf.org/html/rfc1951)
Expand Down
24 changes: 12 additions & 12 deletions src/decompress.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,16 @@
}

auto read_header(huffman::bit_span& compressed_bits)
-> std::expected<BlockHeader, DecompressError>
-> std::expected<BlockHeader, DecompressStatus>
{
if (std::ranges::size(compressed_bits) < 3) {
return std::unexpected{DecompressError::InvalidBlockHeader};
return std::unexpected{DecompressStatus::InvalidBlockHeader};
}
auto type = static_cast<BlockType>(
std::uint8_t{static_cast<bool>(compressed_bits[1])} |
(std::uint8_t{static_cast<bool>(compressed_bits[2])} << 1));
if (not valid(type)) {
return std::unexpected{DecompressError::InvalidBlockHeader};
return std::unexpected{DecompressStatus::InvalidBlockHeader};
}
const bool final{static_cast<bool>(compressed_bits[0])};
compressed_bits.consume(3);
Expand All @@ -34,16 +34,16 @@
} // namespace detail

auto decompress(std::span<const std::byte> src, std::span<std::byte> dst)
-> std::expected<DecompressResult, DecompressError>
-> DecompressStatus
{
using enum detail::BlockType;

huffman::bit_span src_bits{src};
std::size_t dst_written{};
// std::size_t dst_written{};
for (bool was_final = false; not was_final;) {
const auto header = detail::read_header(src_bits);
if (not header) {
return std::unexpected{header.error()};
return header.error();
}
was_final = header->final;
if (header->type == NoCompression) { // no compression
Expand All @@ -52,32 +52,32 @@
const std::uint16_t len = src_bits.pop_16();
const std::uint16_t nlen = src_bits.pop_16();
if (len != static_cast<std::uint16_t>(~nlen)) {
return std::unexpected{DecompressError::NoCompressionLenMismatch};
return DecompressStatus::NoCompressionLenMismatch;

Check warning on line 55 in src/decompress.cpp

View check run for this annotation

Codecov / codecov/patch

src/decompress.cpp#L55

Added line #L55 was not covered by tests
}
// TODO: should we return an error instead of assert?
assert(
std::cmp_greater_equal(
src_bits.size(), std::size_t{len} * CHAR_BIT) and
"not enough bits in src");

if (std::ranges::size(dst) < len) {
return DecompressResult{src, dst_written, len};
if (dst.size() < len) {
return DecompressStatus::DstTooSmall;
}

std::copy_n(src_bits.byte_data(), len, dst.begin());
src_bits.consume(CHAR_BIT * len);
dst = dst.subspan(len);
dst_written += len;
// dst_written += len;
} else {
// TODO: implement
return std::unexpected{DecompressError::Error};
return DecompressStatus::Error;

Check warning on line 73 in src/decompress.cpp

View check run for this annotation

Codecov / codecov/patch

src/decompress.cpp#L73

Added line #L73 was not covered by tests
}
const auto distance =
std::distance(std::ranges::data(src), src_bits.byte_data());
assert(distance >= 0 and "distance must be positive");
src = src.subspan(static_cast<size_t>(distance));
}
return DecompressResult{src, dst_written, 0};
return DecompressStatus::Success;
}

} // namespace starflate
26 changes: 7 additions & 19 deletions src/decompress.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@
namespace starflate {

// error code enum
enum class DecompressError : std::uint8_t
enum class DecompressStatus : std::uint8_t
{
Error,
Success,
Error, // TODO: remove
InvalidBlockHeader,
NoCompressionLenMismatch,
DstTooSmall,
};

namespace detail {
Expand All @@ -33,31 +35,17 @@ struct BlockHeader
};

auto read_header(huffman::bit_span& compressed_bits)
-> std::expected<BlockHeader, DecompressError>;
-> std::expected<BlockHeader, DecompressStatus>;
} // namespace detail

/// The result of decompress.
///
struct DecompressResult
{
std::span<const std::byte> remaining_src; ///< Remaining source data after
///< decompression.
std::size_t dst_written; ///< Number of bytes written to dst.
std::size_t min_next_dst_size; ///< Minimum number of bytes required in dst
///< for the next decompression. This is only
///< enough space for decompression of a
///< single block
};

/// Decompresses the given source data into the destination buffer.
///
/// @param src The source data to decompress.
/// @param dst The destination buffer to store the decompressed data.
/// @return An expected value containing the decompression result if successful,
/// or an error code if failed.
/// @return A status code indicating the result of the decompression.
///
auto decompress(std::span<const std::byte> src, std::span<std::byte> dst)
-> std::expected<DecompressResult, DecompressError>;
-> DecompressStatus;

template <std::ranges::contiguous_range R>
requires std::same_as<std::ranges::range_value_t<R>, std::byte>
Expand Down
42 changes: 16 additions & 26 deletions src/test/decompress_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,12 @@ auto main(int, char* argv[]) -> int
test("read_header") = [] -> void {
huffman::bit_span empty{nullptr, 0, 0};
expect(detail::read_header(empty).error() ==
DecompressError::InvalidBlockHeader);
DecompressStatus::InvalidBlockHeader);

constexpr auto bad_block_type = huffman::byte_array(0b111);
huffman::bit_span bad_block_type_span{bad_block_type};
expect(detail::read_header(bad_block_type_span).error() ==
DecompressError::InvalidBlockHeader);
DecompressStatus::InvalidBlockHeader);

constexpr auto fixed = huffman::byte_array(0b010);
huffman::bit_span fixed_span{fixed};
Expand All @@ -84,9 +84,9 @@ auto main(int, char* argv[]) -> int
};

test("decompress invalid header") = [] -> void {
const auto result =
const auto status =
decompress(std::span<const std::byte>{}, std::span<std::byte>{});
expect(result.error() == DecompressError::InvalidBlockHeader);
expect(status == DecompressStatus::InvalidBlockHeader);
};

test("no compression") = [] {
Expand All @@ -108,31 +108,21 @@ auto main(int, char* argv[]) -> int
'b',
'u',
'd');
std::span<const std::byte> src{compressed};
const std::span<const std::byte> src{compressed};

constexpr auto expected_0 = huffman::byte_array('r', 'o', 's', 'e');
constexpr auto expected_1 = huffman::byte_array('b', 'u', 'd');
const std::array<std::span<const std::byte>, 2> expecteds{
expected_0, expected_1};
constexpr auto expected =
huffman::byte_array('r', 'o', 's', 'e', 'b', 'u', 'd');

std::array<std::byte, expected.size()> dst_array{};
const std::span<std::byte> dst_too_small{
dst_array.data(), dst_array.size() - 1};
const auto status_too_small = decompress(src, dst_too_small);
expect(status_too_small == DecompressStatus::DstTooSmall);

std::array<std::byte, 4> dst_array{};
const std::span<std::byte> dst{dst_array};
for (std::size_t i = 0; i < expecteds.size(); ++i) {
const auto result = decompress(src, dst);
expect(result.has_value())
<< "got error code: " << static_cast<std::int32_t>(result.error());
if (i == 0) {
expect(not result->remaining_src.empty());
expect(result->min_next_dst_size == expecteds.at(1).size());
} else {
expect(result->remaining_src.empty());
expect(result->min_next_dst_size == 0);
}
const auto expected = expecteds.at(i);
expect(result->dst_written == expected.size());
expect(std::ranges::equal(dst.subspan(0, expected.size()), expected));
src = result->remaining_src;
}
const auto status = decompress(src, dst);
expect(status == DecompressStatus::Success);
expect(std::ranges::equal(dst, expected));
};

test("fixed huffman") = [argv] {
Expand Down
Loading