diff --git a/include/slimlog/pattern-inl.h b/include/slimlog/pattern-inl.h index f90d2b6..bf6ebf5 100644 --- a/include/slimlog/pattern-inl.h +++ b/include/slimlog/pattern-inl.h @@ -22,15 +22,11 @@ #include #include #include -#if defined(__cpp_unicode_characters) or defined(__cpp_char8_t) -#include // IWYU pragma: keep -#endif -#include +#include #include #include #include #include -#include #include #include #include @@ -43,72 +39,6 @@ namespace SlimLog { /** @cond */ namespace Detail { -// Fallback functions to detect missing ones from stdlib -namespace Fallback { -#ifdef __cpp_char8_t -template -inline auto mbrtoc8(Args... /*unused*/) -{ - return std::monostate{}; -}; -#endif -#ifdef __cpp_unicode_characters -template -inline auto mbrtoc16(Args... /*unused*/) -{ - return std::monostate{}; -}; -template -inline auto mbrtoc32(Args... /*unused*/) -{ - return std::monostate{}; -}; -#endif -} // namespace Fallback - -template -struct FromMultibyte { - auto get(Char* chr, const char* str, std::size_t len) -> int - { - using namespace Fallback; - // NOLINTBEGIN (concurrency-mt-unsafe) - if constexpr (std::is_same_v) { - return handle(mbrtowc(chr, str, len, &m_state)); -#ifdef __cpp_char8_t - } else if constexpr (std::is_same_v) { - return handle(mbrtoc8(chr, str, len, &m_state)); -#endif -#ifdef __cpp_unicode_characters - } else if constexpr (std::is_same_v) { - return handle(mbrtoc16(chr, str, len, &m_state)); - } else if constexpr (std::is_same_v) { - return handle(mbrtoc32(chr, str, len, &m_state)); -#endif - } else { - static_assert(Util::Types::AlwaysFalse{}, "Unsupported character type"); - return -1; - } - // NOLINTEND (concurrency-mt-unsafe) - } - - static auto handle(std::size_t res) -> int - { - return static_cast(res); - } - - template - static auto handle(T /*unused*/) -> int - { - static_assert( - Util::Types::AlwaysFalse{}, - "C++ stdlib does not support conversion to given character type"); - return -1; - } - -private: - std::mbstate_t m_state = {}; -}; - template concept HasConvertString = requires(StringType value) { { ConvertString{}(value) } -> std::same_as>; @@ -342,8 +272,12 @@ void Pattern::format_string(auto& out, const auto& item, StringView&& data } else { using DataChar = typename std::remove_cvref_t::value_type; if constexpr (std::is_same_v && !std::is_same_v) { - // NOLINTNEXTLINE (cppcoreguidelines-slicing) - from_multibyte(out, std::forward(data), codepoints); + out.resize(out.size() + codepoints + 1); + const std::size_t written = Util::Unicode::from_multibyte( + std::prev(out.end()), + std::forward(data), // NOLINT(cppcoreguidelines-slicing) + codepoints); + out.resize(out.size() + codepoints - written); } else { out.append(std::forward(data)); } @@ -519,7 +453,7 @@ constexpr void Pattern::write_padded( const auto right_padding = padding - left_padding; // Reserve exact amount for data + padding - dst.reserve(dst.size() + src.size() + padding * specs.fill.size()); + dst.reserve(dst.size() + codepoints + padding * specs.fill.size()); // Lambda for filling with single character or multibyte pattern constexpr auto FillPattern @@ -558,8 +492,12 @@ constexpr void Pattern::write_padded( // Fill data using DataChar = typename std::remove_cvref_t::value_type; if constexpr (std::is_same_v && !std::is_same_v) { - // NOLINTNEXTLINE (cppcoreguidelines-slicing) - from_multibyte(dst, std::forward(src), codepoints); + dst.resize(dst.size() + codepoints + 1); + const std::size_t written = Util::Unicode::from_multibyte( + std::prev(dst.end()), + std::forward(src), // NOLINT(cppcoreguidelines-slicing) + codepoints); + dst.resize(dst.size() + codepoints - written); } else { dst.append(std::forward(src)); } @@ -571,69 +509,4 @@ constexpr void Pattern::write_padded( } } -template -void Pattern::from_multibyte(auto& out, std::string_view data, std::size_t codepoints) -{ - const auto buf_size = out.size(); -#if defined(_WIN32) and defined(__STDC_WANT_SECURE_LIB__) - out.reserve(buf_size + codepoints + 1); -#else - out.reserve(buf_size + codepoints); -#endif - - Char* dest = std::next(out.begin(), buf_size); - const char* source = data.data(); - - std::size_t written = 0; - if constexpr (std::is_same_v) { - std::mbstate_t state = {}; -#if defined(_WIN32) and defined(__STDC_WANT_SECURE_LIB__) - if (mbsrtowcs_s(&written, dest, codepoints + 1, &source, _TRUNCATE, &state) != 0) { - throw std::runtime_error("mbsrtowcs_s(): conversion error"); - } - written -= 1; // Don't take into account null terminator -#else - // NOLINTNEXTLINE (concurrency-mt-unsafe) - written = std::mbsrtowcs(dest, &source, codepoints, &state); - if (written == static_cast(-1)) { - throw std::runtime_error("std::mbsrtowcs(): conversion error"); - } -#endif - } else { - Char wchr; - Detail::FromMultibyte dispatcher; - for (auto source_size = data.size(); source_size > 0;) { - const int next = dispatcher.get(&wchr, source, source_size); - switch (next) { - case 0: - // Null character, finish processing - source_size = 0; - break; - case -1: - // Encoding error occured - throw std::runtime_error("std::mbrtocN(): conversion error"); - break; - case -2: - // Incomplete but valid character, skip it - break; - case -3: - // Next character from surrogate pair was processed - *dest = wchr; - written++; - std::advance(dest, 1); - break; - default: - // Successfuly processed - *dest = wchr; - written++; - std::advance(dest, 1); - std::advance(source, next); - source_size -= next; - break; - } - } - } - out.resize(buf_size + written); -} - } // namespace SlimLog diff --git a/include/slimlog/util/unicode.h b/include/slimlog/util/unicode.h index 5fc6734..647028a 100644 --- a/include/slimlog/util/unicode.h +++ b/include/slimlog/util/unicode.h @@ -5,16 +5,94 @@ #pragma once +#include + #include #include #include +#if defined(__cpp_unicode_characters) or defined(__cpp_char8_t) +#include // IWYU pragma: keep +#endif #include #include #include #include +#include +#include namespace SlimLog::Util::Unicode { +/** @cond */ +namespace Detail { + +// Fallback functions to detect missing ones from stdlib +namespace Fallback { +#ifdef __cpp_char8_t +template +inline auto mbrtoc8(Args... /*unused*/) +{ + return std::monostate{}; +}; +#endif +#ifdef __cpp_unicode_characters +template +inline auto mbrtoc16(Args... /*unused*/) +{ + return std::monostate{}; +}; +template +inline auto mbrtoc32(Args... /*unused*/) +{ + return std::monostate{}; +}; +#endif +} // namespace Fallback + +template +struct FromMultibyte { + auto get(Char* chr, const char* str, std::size_t len) -> int + { + using namespace Fallback; + // NOLINTBEGIN (concurrency-mt-unsafe) + if constexpr (std::is_same_v) { + return handle(mbrtowc(chr, str, len, &m_state)); +#ifdef __cpp_char8_t + } else if constexpr (std::is_same_v) { + return handle(mbrtoc8(chr, str, len, &m_state)); +#endif +#ifdef __cpp_unicode_characters + } else if constexpr (std::is_same_v) { + return handle(mbrtoc16(chr, str, len, &m_state)); + } else if constexpr (std::is_same_v) { + return handle(mbrtoc32(chr, str, len, &m_state)); +#endif + } else { + static_assert(Util::Types::AlwaysFalse{}, "Unsupported character type"); + return -1; + } + // NOLINTEND (concurrency-mt-unsafe) + } + + static auto handle(std::size_t res) -> int + { + return static_cast(res); + } + + template + static auto handle(T /*unused*/) -> int + { + static_assert( + Util::Types::AlwaysFalse{}, + "C++ stdlib does not support conversion to given character type"); + return -1; + } + +private: + std::mbstate_t m_state = {}; +}; + +} // namespace Detail + /** * @brief Calculates the length of a Unicode code point starting from the given pointer. * @@ -184,4 +262,64 @@ constexpr auto to_ascii(Char chr) -> char return chr <= std::numeric_limits::max() ? static_cast(chr) : '\0'; } +template +constexpr auto from_multibyte(Char* dest, std::string_view data, std::size_t codepoints) +{ + const char* source = data.data(); + std::size_t written = 0; + + if constexpr (std::is_same_v) { + std::mbstate_t state = {}; +#if defined(_WIN32) and defined(__STDC_WANT_SECURE_LIB__) + if (mbsrtowcs_s(&written, dest, codepoints + 1, &source, _TRUNCATE, &state) != 0) { + throw std::runtime_error("mbsrtowcs_s(): conversion error"); + } +#else + // NOLINTNEXTLINE (concurrency-mt-unsafe) + written = std::mbsrtowcs(dest, &source, codepoints, &state); + if (written == static_cast(-1)) { + throw std::runtime_error("std::mbsrtowcs(): conversion error"); + } + *std::next(dest, codepoints) = '\0'; + ++written; +#endif + } else { + Char wchr; + Detail::FromMultibyte dispatcher; + for (auto source_size = data.size(); source_size > 0;) { + const int next = dispatcher.get(&wchr, source, source_size); + switch (next) { + case 0: + // Null character, finish processing + source_size = 0; + break; + case -1: + // Encoding error occured + throw std::runtime_error("std::mbrtocN(): conversion error"); + break; + case -2: + // Incomplete but valid character, skip it + break; + case -3: + // Next character from surrogate pair was processed + *dest = wchr; + ++written; + std::advance(dest, 1); + break; + default: + // Successfuly processed + *dest = wchr; + ++written; + std::advance(dest, 1); + std::advance(source, next); + source_size -= next; + break; + } + } + *std::next(dest, codepoints) = '\0'; + ++written; + } + return written; +} + } // namespace SlimLog::Util::Unicode