9 #ifndef BOOST_NOWIDE_UTF8_CODECVT_HPP_INCLUDED    10 #define BOOST_NOWIDE_UTF8_CODECVT_HPP_INCLUDED    13 #include <boost/nowide/utf/utf.hpp>    21     static_assert(
sizeof(std::mbstate_t) >= 2, 
"mbstate_t is to small to store an UTF-16 codepoint");
    24         inline void copy_uint16_t(
void* dst, 
const void* src)
    26             unsigned char* cdst = static_cast<unsigned char*>(dst);
    27             const unsigned char* csrc = static_cast<const unsigned char*>(src);
    31         inline std::uint16_t read_state(
const std::mbstate_t& src)
    34             copy_uint16_t(&dst, &src);
    37         inline void write_state(std::mbstate_t& dst, 
const std::uint16_t src)
    39             copy_uint16_t(&dst, &src);
    49     template<
typename CharType, 
int CharSize = sizeof(CharType)>
    52     BOOST_NOWIDE_SUPPRESS_UTF_CODECVT_DEPRECATION_BEGIN
    54     template<
typename CharType>
    55     class BOOST_SYMBOL_VISIBLE 
utf8_codecvt<CharType, 2> : 
public std::codecvt<CharType, char, std::mbstate_t>
    58         static_assert(
sizeof(CharType) >= 2, 
"CharType must be able to store UTF16 code point");
    60         utf8_codecvt(
size_t refs = 0) : std::codecvt<CharType, char, std::mbstate_t>(refs)
    62         BOOST_NOWIDE_SUPPRESS_UTF_CODECVT_DEPRECATION_END
    65         using uchar = CharType;
    67         std::codecvt_base::result do_unshift(std::mbstate_t& s, 
char* from, 
char* , 
char*& next)
 const override    69             if(detail::read_state(s) != 0)
    70                 return std::codecvt_base::error;
    72             return std::codecvt_base::ok;
    74         int do_encoding() 
const noexcept 
override    78         int do_max_length() 
const noexcept 
override    82         bool do_always_noconv() 
const noexcept 
override    88         int do_length(std::mbstate_t& std_state, 
const char* from, 
const char* from_end, 
size_t max)
 const override    92             std::uint16_t state = detail::read_state(std_state);
    93             const char* save_from = from;
    99             while(max > 0 && from < from_end)
   101                 const char* prev_from = from;
   112                 if(BOOST_LIKELY(static_cast<size_t>(utf16_traits::width(ch)) <= max))
   114                     max -= utf16_traits::width(ch);
   117                     static_assert(utf16_traits::max_width == 2, 
"Required for below");
   118                     std::uint16_t tmpOut[2]{};
   119                     utf16_traits::encode(ch, tmpOut);
   124             detail::write_state(std_state, state);
   125             return static_cast<int>(from - save_from);
   128         std::codecvt_base::result do_in(std::mbstate_t& std_state, 
   130                                         const char* from_end,
   131                                         const char*& from_next,
   134                                         uchar*& to_next)
 const override   136             std::codecvt_base::result r = std::codecvt_base::ok;
   142             std::uint16_t state = detail::read_state(std_state);
   144             if(state && to < to_end)
   146                 *to++ = static_cast<CharType>(state);
   149             while(to < to_end && from < from_end)
   151                 const char* from_saved = from;
   161                     r = std::codecvt_base::partial;
   165                 if(BOOST_LIKELY(utf16_traits::width(ch) <= to_end - to))
   167                     to = utf16_traits::encode(ch, to);
   170                     static_assert(utf16_traits::max_width == 2, 
"Required for below");
   171                     std::uint16_t tmpOut[2]{};
   172                     utf16_traits::encode(ch, tmpOut);
   173                     *to++ = static_cast<CharType>(tmpOut[0]);
   180             if(r == std::codecvt_base::ok && (from != from_end || state != 0))
   181                 r = std::codecvt_base::partial;
   182             detail::write_state(std_state, state);
   186         std::codecvt_base::result do_out(std::mbstate_t& std_state,
   188                                          const uchar* from_end,
   189                                          const uchar*& from_next,
   192                                          char*& to_next)
 const override   194             std::codecvt_base::result r = std::codecvt_base::ok;
   199             std::uint16_t state = detail::read_state(std_state);
   200             for(; to < to_end && from < from_end; ++from)
   202                 std::uint32_t ch = 0;
   206                     std::uint16_t w1 = state;
   207                     std::uint16_t w2 = *from;
   208                     if(BOOST_LIKELY(utf16_traits::is_trail(w2)))
   210                         ch = utf16_traits::combine_surrogate(w1, w2);
   217                     std::uint16_t w1 = *from;
   218                     if(BOOST_LIKELY(utf16_traits::is_single_codepoint(w1)))
   221                     } 
else if(BOOST_LIKELY(utf16_traits::is_first_surrogate(w1)))
   235                 if(to_end - to < len)
   237                     r = std::codecvt_base::partial;
   245             if(r == std::codecvt_base::ok && (from != from_end || state != 0))
   246                 r = std::codecvt_base::partial;
   247             detail::write_state(std_state, state);
   252     BOOST_NOWIDE_SUPPRESS_UTF_CODECVT_DEPRECATION_BEGIN
   254     template<
typename CharType>
   255     class BOOST_SYMBOL_VISIBLE 
utf8_codecvt<CharType, 4> : 
public std::codecvt<CharType, char, std::mbstate_t>
   258         utf8_codecvt(
size_t refs = 0) : std::codecvt<CharType, char, std::mbstate_t>(refs)
   260         BOOST_NOWIDE_SUPPRESS_UTF_CODECVT_DEPRECATION_END
   263         using uchar = CharType;
   265         std::codecvt_base::result
   266         do_unshift(std::mbstate_t& , 
char* from, 
char* , 
char*& next)
 const override   269             return std::codecvt_base::noconv;
   271         int do_encoding() 
const noexcept 
override   275         int do_max_length() 
const noexcept 
override   279         bool do_always_noconv() 
const noexcept 
override   284         int do_length(std::mbstate_t& , 
const char* from, 
const char* from_end, 
size_t max)
 const override   286             const char* start_from = from;
   288             while(max > 0 && from < from_end)
   290                 const char* save_from = from;
   302             return static_cast<int>(from - start_from);
   305         std::codecvt_base::result do_in(std::mbstate_t& ,
   307                                         const char* from_end,
   308                                         const char*& from_next,
   311                                         uchar*& to_next)
 const override   313             std::codecvt_base::result r = std::codecvt_base::ok;
   315             while(to < to_end && from < from_end)
   317                 const char* from_saved = from;
   326                     r = std::codecvt_base::partial;
   334             if(r == std::codecvt_base::ok && from != from_end)
   335                 r = std::codecvt_base::partial;
   339         std::codecvt_base::result do_out(std::mbstate_t& ,
   341                                          const uchar* from_end,
   342                                          const uchar*& from_next,
   345                                          char*& to_next)
 const override   347             std::codecvt_base::result r = std::codecvt_base::ok;
   348             while(to < to_end && from < from_end)
   350                 std::uint32_t ch = 0;
   357                 if(to_end - to < len)
   359                     r = std::codecvt_base::partial;
   367             if(r == std::codecvt_base::ok && from != from_end)
   368                 r = std::codecvt_base::partial;
 UTF Traits class - functions to convert UTF sequences to and from Unicode code points.
Definition: utf.hpp:57
static Iterator encode(code_point value, Iterator out)
Definition: utf8_codecvt.hpp:50
static const code_point illegal
Special constant that defines illegal code point.
Definition: utf.hpp:33
#define BOOST_NOWIDE_REPLACEMENT_CHARACTER
Definition: replacement.hpp:16
static const code_point incomplete
Special constant that defines incomplete code point.
Definition: utf.hpp:38
static int width(code_point value)
bool is_valid_codepoint(code_point v)
the function checks if v is a valid code point
Definition: utf.hpp:43
static code_point decode(Iterator &p, Iterator e)