From 0dd7dbdb6c39b227e4336889900bcd192993baa5 Mon Sep 17 00:00:00 2001 From: Pavel Artsishevsky Date: Mon, 16 Dec 2024 23:42:51 +0100 Subject: [PATCH] Fix multibyte string processing Signed-off-by: Pavel Artsishevsky --- include/slimlog/pattern.h | 28 +++++++++++++--------------- include/slimlog/util/unicode.h | 21 ++++++++++++++++----- 2 files changed, 29 insertions(+), 20 deletions(-) diff --git a/include/slimlog/pattern.h b/include/slimlog/pattern.h index 294a5fe..86621d1 100644 --- a/include/slimlog/pattern.h +++ b/include/slimlog/pattern.h @@ -333,35 +333,33 @@ class Pattern { */ static auto get_string_specs(StringViewType value) -> Placeholder::StringSpecs; + /** + * @brief Writes the source string to the destination buffer. + * + * @tparam StringView String view type, convertible to `std::basic_string_view`. + * @param dst Destination buffer where the string will be written. + * @param src Source string view to be written. + * @param codepoints Number of codepoints the source string contains. + */ + template + constexpr static void write_string(auto& dst, StringView&& src, std::size_t codepoints); + /** * @brief Writes the source string to the destination buffer with specific alignment. * * This function writes the source string to the destination buffer, applying the specified * alignment and fill character. * - * @tparam T Character type for the string view. + * @tparam StringView String view type, convertible to `std::basic_string_view`. * @param dst Destination buffer where the string will be written. * @param src Source string view to be written. * @param specs String specifications, including alignment and fill character. * @param codepoints Number of codepoints the source string contains. */ template - constexpr static void write_padded( + constexpr static void write_string_padded( auto& dst, StringView&& src, const Placeholder::StringSpecs& specs, std::size_t codepoints); - /** - * @brief Converts a multi-byte string to a single-byte string. - * - * This function converts a multi-byte string to a single-byte string and appends the result to - * the provided destination stream buffer. - * - * @tparam T Character type of the source string. - * @param out Destination stream buffer where the converted string will be appended. - * @param data Source multi-byte string to be converted. - * @param codepoints Number of codepoints the data string contains. - */ - static void from_multibyte(auto& out, std::string_view data, std::size_t codepoints); - std::basic_string m_pattern; std::vector m_placeholders; Levels m_levels; diff --git a/include/slimlog/util/unicode.h b/include/slimlog/util/unicode.h index 647028a..0f09bd0 100644 --- a/include/slimlog/util/unicode.h +++ b/include/slimlog/util/unicode.h @@ -262,6 +262,18 @@ constexpr auto to_ascii(Char chr) -> char return chr <= std::numeric_limits::max() ? static_cast(chr) : '\0'; } +/** + * @brief Converts a null-terminated multibyte string to a singlebyte character sequence. + * + * Destination buffer has to be capable of storing at least @p codepoints + 1 characters + * including null terminator. + * + * @tparam Char Character type of the destination string. + * @param dest Pointer to destination buffer for the converted string. + * @param data Source multi-byte string to be converted. + * @param codepoints Number of codepoints to be written to the destination string. + * @return Number of characters written including null terminator. + */ template constexpr auto from_multibyte(Char* dest, std::string_view data, std::size_t codepoints) { @@ -271,7 +283,7 @@ constexpr auto from_multibyte(Char* dest, std::string_view data, std::size_t cod if constexpr (std::is_same_v) { std::mbstate_t state = {}; #if defined(_WIN32) and defined(__STDC_WANT_SECURE_LIB__) - if (mbsrtowcs_s(&written, dest, codepoints + 1, &source, _TRUNCATE, &state) != 0) { + if (mbsrtowcs_s(&written, dest, codepoints, &source, codepoints - 1, &state) != 0) { throw std::runtime_error("mbsrtowcs_s(): conversion error"); } #else @@ -280,8 +292,7 @@ constexpr auto from_multibyte(Char* dest, std::string_view data, std::size_t cod if (written == static_cast(-1)) { throw std::runtime_error("std::mbsrtowcs(): conversion error"); } - *std::next(dest, codepoints) = '\0'; - ++written; + *std::next(dest, written++) = '\0'; #endif } else { Char wchr; @@ -298,7 +309,7 @@ constexpr auto from_multibyte(Char* dest, std::string_view data, std::size_t cod throw std::runtime_error("std::mbrtocN(): conversion error"); break; case -2: - // Incomplete but valid character, skip it + // Incomplete but valid character, go further break; case -3: // Next character from surrogate pair was processed @@ -316,7 +327,7 @@ constexpr auto from_multibyte(Char* dest, std::string_view data, std::size_t cod break; } } - *std::next(dest, codepoints) = '\0'; + *dest = '\0'; ++written; } return written;