diff --git a/include/slimlog/pattern-inl.h b/include/slimlog/pattern-inl.h index 3f9d629..f90d2b6 100644 --- a/include/slimlog/pattern-inl.h +++ b/include/slimlog/pattern-inl.h @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -67,25 +68,27 @@ inline auto mbrtoc32(Args... /*unused*/) template struct FromMultibyte { - static auto get(Char* chr, const char* str, std::size_t len, mbstate_t* state) -> int + auto get(Char* chr, const char* str, std::size_t len) -> int { using namespace Fallback; + // NOLINTBEGIN (concurrency-mt-unsafe) if constexpr (std::is_same_v) { - return handle(mbrtowc(chr, str, len, state)); // NOLINT (concurrency-mt-unsafe) + return handle(mbrtowc(chr, str, len, &m_state)); #ifdef __cpp_char8_t } else if constexpr (std::is_same_v) { - return handle(mbrtoc8(chr, str, len, state)); // NOLINT (concurrency-mt-unsafe) + return handle(mbrtoc8(chr, str, len, &m_state)); #endif #ifdef __cpp_unicode_characters } else if constexpr (std::is_same_v) { - return handle(mbrtoc16(chr, str, len, state)); // NOLINT (concurrency-mt-unsafe) + return handle(mbrtoc16(chr, str, len, &m_state)); } else if constexpr (std::is_same_v) { - return handle(mbrtoc32(chr, str, len, state)); // NOLINT (concurrency-mt-unsafe) + return handle(mbrtoc32(chr, str, len, &m_state)); #endif } else { static_assert(Util::Types::AlwaysFalse{}, "Unsupported character type"); return -1; } + // NOLINTEND (concurrency-mt-unsafe) } static auto handle(std::size_t res) -> int @@ -101,6 +104,9 @@ struct FromMultibyte { "C++ stdlib does not support conversion to given character type"); return -1; } + +private: + std::mbstate_t m_state = {}; }; template @@ -317,30 +323,27 @@ void Pattern::compile(StringViewType pattern) } } -template -void Pattern::from_multibyte(auto& out, std::string_view data) -{ - Char wchr; - auto state = std::mbstate_t{}; - const Detail::FromMultibyte dispatcher; - for (int ret{}; (ret = dispatcher.get(&wchr, data.data(), data.size(), &state)) > 0; - data = data.substr(ret)) { - out.push_back(wchr); - } -} - template template void Pattern::format_string(auto& out, const auto& item, StringView&& data) { + constexpr auto CountCodepoints = [](StringView& src) { + if constexpr (std::is_same_v) { + return Util::Unicode::count_codepoints(src.data(), src.size()); + } else { + return src.codepoints(); + } + }; + const auto codepoints = CountCodepoints(data); + if (auto& specs = std::get(item); specs.width > 0) [[unlikely]] { - write_padded(out, std::forward(data), specs); + write_padded(out, std::forward(data), specs, codepoints); } else { using DataChar = typename std::remove_cvref_t::value_type; if constexpr (std::is_same_v && !std::is_same_v) { // NOLINTNEXTLINE (cppcoreguidelines-slicing) - from_multibyte(out, std::forward(data)); + from_multibyte(out, std::forward(data), codepoints); } else { out.append(std::forward(data)); } @@ -502,20 +505,11 @@ auto Pattern::get_string_specs(StringViewType value) -> Placeholder::Strin template template -constexpr void -Pattern::write_padded(auto& dst, StringView&& src, const Placeholder::StringSpecs& specs) +constexpr void Pattern::write_padded( + auto& dst, StringView&& src, const Placeholder::StringSpecs& specs, std::size_t codepoints) { - constexpr auto CountCodepoints = [](StringView& src) { - if constexpr (std::is_same_v) { - return Util::Unicode::count_codepoints(src.data(), src.size()); - } else { - return src.codepoints(); - } - }; - const auto spec_width = Util::Types::to_unsigned(specs.width); - const auto width = CountCodepoints(src); - const auto padding = spec_width > width ? spec_width - width : 0; + const auto padding = spec_width > codepoints ? spec_width - codepoints : 0; // Shifts are encoded as string literals because constexpr is not // supported in constexpr functions. @@ -565,7 +559,7 @@ Pattern::write_padded(auto& dst, StringView&& src, const Placeholder::Stri using DataChar = typename std::remove_cvref_t::value_type; if constexpr (std::is_same_v && !std::is_same_v) { // NOLINTNEXTLINE (cppcoreguidelines-slicing) - from_multibyte(dst, std::forward(src)); + from_multibyte(dst, std::forward(src), codepoints); } else { dst.append(std::forward(src)); } @@ -577,4 +571,69 @@ Pattern::write_padded(auto& dst, StringView&& src, const Placeholder::Stri } } +template +void Pattern::from_multibyte(auto& out, std::string_view data, std::size_t codepoints) +{ + const auto buf_size = out.size(); +#if defined(_WIN32) and defined(__STDC_WANT_SECURE_LIB__) + out.reserve(buf_size + codepoints + 1); +#else + out.reserve(buf_size + codepoints); +#endif + + Char* dest = std::next(out.begin(), buf_size); + const char* source = data.data(); + + std::size_t written = 0; + if constexpr (std::is_same_v) { + std::mbstate_t state = {}; +#if defined(_WIN32) and defined(__STDC_WANT_SECURE_LIB__) + if (mbsrtowcs_s(&written, dest, codepoints + 1, &source, _TRUNCATE, &state) != 0) { + throw std::runtime_error("mbsrtowcs_s(): conversion error"); + } + written -= 1; // Don't take into account null terminator +#else + // NOLINTNEXTLINE (concurrency-mt-unsafe) + written = std::mbsrtowcs(dest, &source, codepoints, &state); + if (written == static_cast(-1)) { + throw std::runtime_error("std::mbsrtowcs(): conversion error"); + } +#endif + } else { + Char wchr; + Detail::FromMultibyte dispatcher; + for (auto source_size = data.size(); source_size > 0;) { + const int next = dispatcher.get(&wchr, source, source_size); + switch (next) { + case 0: + // Null character, finish processing + source_size = 0; + break; + case -1: + // Encoding error occured + throw std::runtime_error("std::mbrtocN(): conversion error"); + break; + case -2: + // Incomplete but valid character, skip it + break; + case -3: + // Next character from surrogate pair was processed + *dest = wchr; + written++; + std::advance(dest, 1); + break; + default: + // Successfuly processed + *dest = wchr; + written++; + std::advance(dest, 1); + std::advance(source, next); + source_size -= next; + break; + } + } + } + out.resize(buf_size + written); +} + } // namespace SlimLog diff --git a/include/slimlog/pattern.h b/include/slimlog/pattern.h index aa578bd..294a5fe 100644 --- a/include/slimlog/pattern.h +++ b/include/slimlog/pattern.h @@ -253,18 +253,6 @@ class Pattern { */ void compile(StringViewType pattern); - /** - * @brief Converts a multi-byte string to a single-byte string. - * - * This function converts a multi-byte string to a single-byte string and appends the result to - * the provided destination stream buffer. - * - * @tparam T Character type of the source string. - * @param out Destination stream buffer where the converted string will be appended. - * @param data Source multi-byte string to be converted. - */ - static void from_multibyte(auto& out, std::string_view data); - /** * @brief Formats a string according to the specifications. * @@ -355,10 +343,24 @@ class Pattern { * @param dst Destination buffer where the string will be written. * @param src Source string view to be written. * @param specs String specifications, including alignment and fill character. + * @param codepoints Number of codepoints the source string contains. */ template - constexpr static void - write_padded(auto& dst, StringView&& src, const Placeholder::StringSpecs& specs); + constexpr static void write_padded( + auto& dst, StringView&& src, const Placeholder::StringSpecs& specs, std::size_t codepoints); + + /** + * @brief Converts a multi-byte string to a single-byte string. + * + * This function converts a multi-byte string to a single-byte string and appends the result to + * the provided destination stream buffer. + * + * @tparam T Character type of the source string. + * @param out Destination stream buffer where the converted string will be appended. + * @param data Source multi-byte string to be converted. + * @param codepoints Number of codepoints the data string contains. + */ + static void from_multibyte(auto& out, std::string_view data, std::size_t codepoints); std::basic_string m_pattern; std::vector m_placeholders; diff --git a/include/slimlog/util/unicode.h b/include/slimlog/util/unicode.h index 8f1c372..329e643 100644 --- a/include/slimlog/util/unicode.h +++ b/include/slimlog/util/unicode.h @@ -5,9 +5,10 @@ #pragma once -#include +#include #include #include +#include namespace SlimLog::Util::Unicode { @@ -72,11 +73,29 @@ constexpr auto count_codepoints(const Char* begin, std::size_t len) -> std::size { if constexpr (sizeof(Char) != 1) { return len; - } else { +#ifdef __cpp_char8_t + } else if constexpr (std::is_same_v) { std::size_t codepoints = 0; for (const auto* end = std::next(begin, len); begin != end; ++codepoints) { std::advance(begin, Util::Unicode::code_point_length(begin)); } + return codepoints - 1; +#endif + } else { + std::mbstate_t state = std::mbstate_t(); +#if defined(_WIN32) and defined(__STDC_WANT_SECURE_LIB__) + std::size_t codepoints = 0; + if (mbsrtowcs_s(&codepoints, nullptr, codepoints, &begin, 0, &state) != 0) { + throw std::runtime_error("mbsrtowcs_s(): conversion error"); + } + codepoints -= 1; +#else + // NOLINTNEXTLINE (concurrency-mt-unsafe) + const auto codepoints = std::mbsrtowcs(nullptr, &begin, 0, &state); + if (codepoints == static_cast(-1)) [[unlikely]] { + throw std::runtime_error("std::mbsrtowcs(): conversion error"); + } +#endif return codepoints; } }