16#define USE_WINDOWS_API 0
19#if defined (_WIN32) && !defined (USE_WINDOWS_API)
20#define USE_WINDOWS_API 1
21#elif !defined (USE_WINDOWS_API)
22#define USE_WINDOWS_API 0
26#pragma message ("Using Windows API")
35 enum reason { invalid_utf8, invalid_wchar, invalid_char32 };
41 const char* what() const noexcept
43 return (
why == reason::invalid_utf8) ?
"Invalid UTF-8 encoding"
44 : (
why == reason::invalid_wchar) ?
"Invalid UTF-16 encoding"
45 : (
why ==
reason::invalid_char32) ?
"Invalid code-point value"
46 :
"Other UTF-8 exception";
59std::string
narrow (
const wchar_t* s,
size_t nch=0);
60std::string
narrow (
const std::wstring& s);
61std::string
narrow (
const char32_t* s,
size_t nch = 0);
62std::string
narrow (
const std::u32string& s);
63std::string
narrow (
char32_t r);
65std::wstring
widen (
const char* s,
size_t nch = 0);
66std::wstring
widen (
const std::string& s);
67std::u32string
runes (
const char* s,
size_t nch = 0);
68std::u32string
runes (
const std::string& s);
70char32_t rune (
const char* p);
71char32_t rune (
const std::string::const_iterator& p);
75bool is_valid (std::string::const_iterator p,
const std::string::const_iterator last);
76bool valid_str (
const char* s,
size_t nch = 0);
79char32_t next (std::string::const_iterator& ptr,
const std::string::const_iterator last);
80char32_t next (
const char*& ptr);
81char32_t next (
char*& p);
83char32_t prev (
const char*& ptr);
84char32_t prev (
char*& ptr);
85char32_t prev (std::string::const_iterator& ptr,
const std::string::const_iterator first);
87size_t length (
const std::string& s);
88size_t length (
const char* s);
96std::string
tolower (
const std::string& str);
97std::string
toupper (
const std::string& str);
98int icompare (
const std::string& s1,
const std::string& s2);
108bool isspace (std::string::const_iterator p);
112bool isblank (std::string::const_iterator p);
116bool isdigit (std::string::const_iterator p);
120bool isalnum (std::string::const_iterator p);
124bool isalpha (std::string::const_iterator p);
128bool isxdigit (std::string::const_iterator p);
132bool isupper (std::string::const_iterator p);
136bool islower (std::string::const_iterator p);
162bool is_valid (std::string::const_iterator p,
const std::string::const_iterator last)
194 return next (
const_cast<const char*&
>(ptr));
209 return prev (
const_cast<const char*&
>(ptr));
226char32_t rune (
const std::string::const_iterator& p)
287 return '0' <= r && r <=
'9';
316 return (
'0' <= r && r <=
'9') || (
'A' <= r && r <=
'Z') || (
'a' <= r && r <=
'z');
345 return (
'A' <= r && r <=
'Z') || (
'a' <= r && r <=
'z');
375 return (
'0' <= r && r <=
'9') || (
'A' <= r && r <=
'F') || (
'a' <= r && r <=
'f');
419#pragma comment (lib, "utf8")
std::u32string runes(const char *s, size_t nch)
Conversion from UTF-8 to UTF-32.
Definition utf8.cpp:270
std::string narrow(const wchar_t *s, size_t nch)
Conversion from wide character to UTF-8.
Definition utf8.cpp:30
char32_t rune(const char *p)
Conversion from UTF-8 to UTF-32.
Definition utf8.h:175
std::wstring widen(const char *s, size_t nch)
Conversion from UTF-8 to wide character.
Definition utf8.cpp:192
bool islower(char32_t r)
Definition casecvt.cpp:48
bool isspace(char32_t r)
Check if character is white space.
Definition utf8.cpp:654
bool isalnum(char32_t r)
Check if character is an alphanumeric character (0-9 or A-Z or a-z)
Definition utf8.h:314
bool isalpha(char32_t r)
Check if character is an alphabetic character (A-Z or a-z)
Definition utf8.h:343
bool isxdigit(char32_t r)
Check if character is a hexadecimal digit (0-9 or A-F or a-f)
Definition utf8.h:373
bool isblank(char32_t r)
Check if character is space or tab.
Definition utf8.cpp:637
bool isupper(char32_t r)
Definition casecvt.cpp:106
bool isdigit(char32_t r)
Check if character is a decimal digit (0-9)
Definition utf8.h:285
void make_lower(std::string &str)
In place version converts a UTF-8 encoded string to lowercase.
Definition casecvt.cpp:99
std::string tolower(const std::string &str)
Convert UTF-8 string to lower case.
Definition casecvt.cpp:76
std::string toupper(const std::string &str)
Convert a UTF-8 string to upper case.
Definition casecvt.cpp:133
void make_upper(std::string &str)
In place version converts a UTF-8 encoded string to lowercase.
Definition casecvt.cpp:156
Exception thrown on encoding/decoding failure.
Definition utf8.h:33
reason why
What triggered the exception.
Definition utf8.h:50
exception(reason c)
Constructor.
Definition utf8.h:38
reason
Possible causes.
Definition utf8.h:35
bool valid_str(const char *s, size_t nch)
Verifies if string is a valid UTF-8 string.
Definition utf8.cpp:319
char32_t next(std::string::const_iterator &ptr, const std::string::const_iterator last)
Decodes a UTF-8 encoded character and advances iterator to next code point.
Definition utf8.cpp:344
char32_t prev(const char *&ptr)
Decrements a character pointer to previous UTF-8 character.
Definition utf8.cpp:485
size_t length(const std::string &s)
Counts number of characters in an UTF8 encoded string.
Definition utf8.cpp:575
bool is_valid(const char *p)
Check if pointer points to a valid UTF-8 encoding.
Definition utf8.h:149
const char32_t REPLACEMENT_CHARACTER
Replacement character used for invalid encodings.
Definition utf8.h:54