Skip to content

Commit

Permalink
Move from_multibyte to Util::Unicode
Browse files Browse the repository at this point in the history
Signed-off-by: Pavel Artsishevsky <polter.rnd@gmail.com>
  • Loading branch information
polter-rnd committed Dec 16, 2024
1 parent 22729c6 commit c083041
Show file tree
Hide file tree
Showing 2 changed files with 152 additions and 141 deletions.
155 changes: 14 additions & 141 deletions include/slimlog/pattern-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,11 @@
#include <chrono>
#include <climits>
#include <concepts>
#if defined(__cpp_unicode_characters) or defined(__cpp_char8_t)
#include <cuchar> // IWYU pragma: keep
#endif
#include <cwchar>
#include <cstddef>
#include <functional>
#include <initializer_list>
#include <iterator>
#include <limits>
#include <stdexcept>
#include <string>
#include <string_view>
#include <type_traits>
Expand All @@ -43,72 +39,6 @@ namespace SlimLog {
/** @cond */
namespace Detail {

// Fallback functions to detect missing ones from stdlib
namespace Fallback {
#ifdef __cpp_char8_t
template<typename... Args>
inline auto mbrtoc8(Args... /*unused*/)
{
return std::monostate{};
};
#endif
#ifdef __cpp_unicode_characters
template<typename... Args>
inline auto mbrtoc16(Args... /*unused*/)
{
return std::monostate{};
};
template<typename... Args>
inline auto mbrtoc32(Args... /*unused*/)
{
return std::monostate{};
};
#endif
} // namespace Fallback

template<typename Char>
struct FromMultibyte {
auto get(Char* chr, const char* str, std::size_t len) -> int
{
using namespace Fallback;
// NOLINTBEGIN (concurrency-mt-unsafe)
if constexpr (std::is_same_v<Char, wchar_t>) {
return handle(mbrtowc(chr, str, len, &m_state));
#ifdef __cpp_char8_t
} else if constexpr (std::is_same_v<Char, char8_t>) {
return handle(mbrtoc8(chr, str, len, &m_state));
#endif
#ifdef __cpp_unicode_characters
} else if constexpr (std::is_same_v<Char, char16_t>) {
return handle(mbrtoc16(chr, str, len, &m_state));
} else if constexpr (std::is_same_v<Char, char32_t>) {
return handle(mbrtoc32(chr, str, len, &m_state));
#endif
} else {
static_assert(Util::Types::AlwaysFalse<Char>{}, "Unsupported character type");
return -1;
}
// NOLINTEND (concurrency-mt-unsafe)
}

static auto handle(std::size_t res) -> int
{
return static_cast<int>(res);
}

template<typename T = std::monostate>
static auto handle(T /*unused*/) -> int
{
static_assert(
Util::Types::AlwaysFalse<Char>{},
"C++ stdlib does not support conversion to given character type");
return -1;
}

private:
std::mbstate_t m_state = {};
};

template<typename Char, typename StringType>
concept HasConvertString = requires(StringType value) {
{ ConvertString<Char, StringType>{}(value) } -> std::same_as<std::basic_string_view<Char>>;
Expand Down Expand Up @@ -342,8 +272,12 @@ void Pattern<Char>::format_string(auto& out, const auto& item, StringView&& data
} else {
using DataChar = typename std::remove_cvref_t<StringView>::value_type;
if constexpr (std::is_same_v<DataChar, char> && !std::is_same_v<Char, char>) {
// NOLINTNEXTLINE (cppcoreguidelines-slicing)
from_multibyte(out, std::forward<StringView>(data), codepoints);
out.resize(out.size() + codepoints + 1);
const std::size_t written = Util::Unicode::from_multibyte(
std::prev(out.end()),
std::forward<StringView>(data), // NOLINT(cppcoreguidelines-slicing)
codepoints);
out.resize(out.size() + codepoints - written);
} else {
out.append(std::forward<StringView>(data));
}
Expand Down Expand Up @@ -519,7 +453,7 @@ constexpr void Pattern<Char>::write_padded(
const auto right_padding = padding - left_padding;

// Reserve exact amount for data + padding
dst.reserve(dst.size() + src.size() + padding * specs.fill.size());
dst.reserve(dst.size() + codepoints + padding * specs.fill.size());

// Lambda for filling with single character or multibyte pattern
constexpr auto FillPattern
Expand Down Expand Up @@ -558,8 +492,12 @@ constexpr void Pattern<Char>::write_padded(
// Fill data
using DataChar = typename std::remove_cvref_t<StringView>::value_type;
if constexpr (std::is_same_v<DataChar, char> && !std::is_same_v<Char, char>) {
// NOLINTNEXTLINE (cppcoreguidelines-slicing)
from_multibyte(dst, std::forward<StringView>(src), codepoints);
dst.resize(dst.size() + codepoints + 1);
const std::size_t written = Util::Unicode::from_multibyte(
std::prev(dst.end()),
std::forward<StringView>(src), // NOLINT(cppcoreguidelines-slicing)
codepoints);
dst.resize(dst.size() + codepoints - written);
} else {
dst.append(std::forward<StringView>(src));
}
Expand All @@ -571,69 +509,4 @@ constexpr void Pattern<Char>::write_padded(
}
}

template<typename Char>
void Pattern<Char>::from_multibyte(auto& out, std::string_view data, std::size_t codepoints)
{
const auto buf_size = out.size();
#if defined(_WIN32) and defined(__STDC_WANT_SECURE_LIB__)
out.reserve(buf_size + codepoints + 1);
#else
out.reserve(buf_size + codepoints);
#endif

Char* dest = std::next(out.begin(), buf_size);
const char* source = data.data();

std::size_t written = 0;
if constexpr (std::is_same_v<Char, wchar_t>) {
std::mbstate_t state = {};
#if defined(_WIN32) and defined(__STDC_WANT_SECURE_LIB__)
if (mbsrtowcs_s(&written, dest, codepoints + 1, &source, _TRUNCATE, &state) != 0) {
throw std::runtime_error("mbsrtowcs_s(): conversion error");
}
written -= 1; // Don't take into account null terminator
#else
// NOLINTNEXTLINE (concurrency-mt-unsafe)
written = std::mbsrtowcs(dest, &source, codepoints, &state);
if (written == static_cast<std::size_t>(-1)) {
throw std::runtime_error("std::mbsrtowcs(): conversion error");
}
#endif
} else {
Char wchr;
Detail::FromMultibyte<Char> dispatcher;
for (auto source_size = data.size(); source_size > 0;) {
const int next = dispatcher.get(&wchr, source, source_size);
switch (next) {
case 0:
// Null character, finish processing
source_size = 0;
break;
case -1:
// Encoding error occured
throw std::runtime_error("std::mbrtocN(): conversion error");
break;
case -2:
// Incomplete but valid character, skip it
break;
case -3:
// Next character from surrogate pair was processed
*dest = wchr;
written++;
std::advance(dest, 1);
break;
default:
// Successfuly processed
*dest = wchr;
written++;
std::advance(dest, 1);
std::advance(source, next);
source_size -= next;
break;
}
}
}
out.resize(buf_size + written);
}

} // namespace SlimLog
138 changes: 138 additions & 0 deletions include/slimlog/util/unicode.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,94 @@

#pragma once

#include <slimlog/util/types.h>

#include <array>
#include <cassert>
#include <cstdint>
#if defined(__cpp_unicode_characters) or defined(__cpp_char8_t)
#include <cuchar> // IWYU pragma: keep
#endif
#include <cwchar>
#include <iterator>
#include <limits>
#include <stdexcept>
#include <string_view>
#include <variant>

namespace SlimLog::Util::Unicode {

/** @cond */
namespace Detail {

// Fallback functions to detect missing ones from stdlib
namespace Fallback {
#ifdef __cpp_char8_t
template<typename... Args>
inline auto mbrtoc8(Args... /*unused*/)
{
return std::monostate{};
};
#endif
#ifdef __cpp_unicode_characters
template<typename... Args>
inline auto mbrtoc16(Args... /*unused*/)
{
return std::monostate{};
};
template<typename... Args>
inline auto mbrtoc32(Args... /*unused*/)
{
return std::monostate{};
};
#endif
} // namespace Fallback

template<typename Char>
struct FromMultibyte {
auto get(Char* chr, const char* str, std::size_t len) -> int
{
using namespace Fallback;
// NOLINTBEGIN (concurrency-mt-unsafe)
if constexpr (std::is_same_v<Char, wchar_t>) {
return handle(mbrtowc(chr, str, len, &m_state));
#ifdef __cpp_char8_t
} else if constexpr (std::is_same_v<Char, char8_t>) {
return handle(mbrtoc8(chr, str, len, &m_state));
#endif
#ifdef __cpp_unicode_characters
} else if constexpr (std::is_same_v<Char, char16_t>) {
return handle(mbrtoc16(chr, str, len, &m_state));
} else if constexpr (std::is_same_v<Char, char32_t>) {
return handle(mbrtoc32(chr, str, len, &m_state));
#endif
} else {
static_assert(Util::Types::AlwaysFalse<Char>{}, "Unsupported character type");
return -1;
}
// NOLINTEND (concurrency-mt-unsafe)
}

static auto handle(std::size_t res) -> int
{
return static_cast<int>(res);
}

template<typename T = std::monostate>
static auto handle(T /*unused*/) -> int
{
static_assert(
Util::Types::AlwaysFalse<Char>{},
"C++ stdlib does not support conversion to given character type");
return -1;
}

private:
std::mbstate_t m_state = {};
};

} // namespace Detail

/**
* @brief Calculates the length of a Unicode code point starting from the given pointer.
*
Expand Down Expand Up @@ -184,4 +262,64 @@ constexpr auto to_ascii(Char chr) -> char
return chr <= std::numeric_limits<unsigned char>::max() ? static_cast<char>(chr) : '\0';
}

template<typename Char>
constexpr auto from_multibyte(Char* dest, std::string_view data, std::size_t codepoints)
{
const char* source = data.data();
std::size_t written = 0;

if constexpr (std::is_same_v<Char, wchar_t>) {
std::mbstate_t state = {};
#if defined(_WIN32) and defined(__STDC_WANT_SECURE_LIB__)
if (mbsrtowcs_s(&written, dest, codepoints + 1, &source, _TRUNCATE, &state) != 0) {
throw std::runtime_error("mbsrtowcs_s(): conversion error");
}
#else
// NOLINTNEXTLINE (concurrency-mt-unsafe)
written = std::mbsrtowcs(dest, &source, codepoints, &state);
if (written == static_cast<std::size_t>(-1)) {
throw std::runtime_error("std::mbsrtowcs(): conversion error");
}
*std::next(dest, codepoints) = '\0';
++written;
#endif
} else {
Char wchr;
Detail::FromMultibyte<Char> dispatcher;
for (auto source_size = data.size(); source_size > 0;) {
const int next = dispatcher.get(&wchr, source, source_size);
switch (next) {
case 0:
// Null character, finish processing
source_size = 0;
break;
case -1:
// Encoding error occured
throw std::runtime_error("std::mbrtocN(): conversion error");
break;
case -2:
// Incomplete but valid character, skip it
break;
case -3:
// Next character from surrogate pair was processed
*dest = wchr;
++written;
std::advance(dest, 1);
break;
default:
// Successfuly processed
*dest = wchr;
++written;
std::advance(dest, 1);
std::advance(source, next);
source_size -= next;
break;
}
}
*std::next(dest, codepoints) = '\0';
++written;
}
return written;
}

} // namespace SlimLog::Util::Unicode

0 comments on commit c083041

Please sign in to comment.