UTF8 - Simple Library for Internationalization
Loading...
Searching...
No Matches
utf8.h
Go to the documentation of this file.
1/*
2 (c) Mircea Neacsu 2014-2023. Licensed under MIT License.
3 See README file for full license terms.
4*/
5
7#pragma once
8
9#include <string>
10#include <vector>
11
12// If USE_WINDOWS_API is not zero, the library issues direct Windows API
13// calls. Otherwise it relies only on standard C++17 functions.
14// If not defined, USE_WINDOWS_API defaults to 1 on Windows platform.
15
16#define USE_WINDOWS_API 0
17
18
19#if defined (_WIN32) && !defined (USE_WINDOWS_API)
20#define USE_WINDOWS_API 1
21#elif !defined (USE_WINDOWS_API)
22#define USE_WINDOWS_API 0
23#endif
24
25#if USE_WINDOWS_API
26#pragma message ("Using Windows API")
27#endif
28
29namespace utf8 {
30
32struct exception : public std::exception
33{
35 enum reason { invalid_utf8, invalid_wchar, invalid_char32 };
36
38 explicit exception (reason c)
39 : why (c)
40 {}
41 const char* what() const noexcept
42 {
43 return (why == reason::invalid_utf8) ? "Invalid UTF-8 encoding"
44 : (why == reason::invalid_wchar) ? "Invalid UTF-16 encoding"
45 : (why == reason::invalid_char32) ? "Invalid code-point value"
46 : "Other UTF-8 exception";
47 }
48
51};
52
54const char32_t REPLACEMENT_CHARACTER = 0xfffd;
55
56
59std::string narrow (const wchar_t* s, size_t nch=0);
60std::string narrow (const std::wstring& s);
61std::string narrow (const char32_t* s, size_t nch = 0);
62std::string narrow (const std::u32string& s);
63std::string narrow (char32_t r);
64
65std::wstring widen (const char* s, size_t nch = 0);
66std::wstring widen (const std::string& s);
67std::u32string runes (const char* s, size_t nch = 0);
68std::u32string runes (const std::string& s);
69
70char32_t rune (const char* p);
71char32_t rune (const std::string::const_iterator& p);
73
74bool is_valid (const char* p);
75bool is_valid (std::string::const_iterator p, const std::string::const_iterator last);
76bool valid_str (const char* s, size_t nch = 0);
77bool valid_str (const std::string& s);
78
79char32_t next (std::string::const_iterator& ptr, const std::string::const_iterator last);
80char32_t next (const char*& ptr);
81char32_t next (char*& p);
82
83char32_t prev (const char*& ptr);
84char32_t prev (char*& ptr);
85char32_t prev (std::string::const_iterator& ptr, const std::string::const_iterator first);
86
87size_t length (const std::string& s);
88size_t length (const char* s);
89
94void make_lower (std::string& str);
95void make_upper (std::string& str);
96std::string tolower (const std::string& str);
97std::string toupper (const std::string& str);
98int icompare (const std::string& s1, const std::string& s2);
100
106bool isspace (char32_t r);
107bool isspace (const char* p);
108bool isspace (std::string::const_iterator p);
109
110bool isblank (char32_t r);
111bool isblank (const char* p);
112bool isblank (std::string::const_iterator p);
113
114bool isdigit (char32_t r);
115bool isdigit (const char* p);
116bool isdigit (std::string::const_iterator p);
117
118bool isalnum (char32_t r);
119bool isalnum (const char* p);
120bool isalnum (std::string::const_iterator p);
121
122bool isalpha (char32_t r);
123bool isalpha (const char* p);
124bool isalpha (std::string::const_iterator p);
125
126bool isxdigit (char32_t r);
127bool isxdigit (const char* p);
128bool isxdigit (std::string::const_iterator p);
129
130bool isupper (char32_t r);
131bool isupper (const char* p);
132bool isupper (std::string::const_iterator p);
133
134bool islower (char32_t r);
135bool islower (const char* p);
136bool islower (std::string::const_iterator p);
138
139
140// INLINES --------------------------------------------------------------------
141
148inline
149bool is_valid (const char* p)
150{
151 return next (p) != REPLACEMENT_CHARACTER;
152}
153
161inline
162bool is_valid (std::string::const_iterator p, const std::string::const_iterator last)
163{
164 return next (p, last) != REPLACEMENT_CHARACTER;
165}
166
174inline
175char32_t rune (const char* p)
176{
177 return next (p);
178}
179
180
191inline
192char32_t next (char*& ptr)
193{
194 return next (const_cast<const char*&>(ptr));
195}
196
206inline
207char32_t prev (char*& ptr)
208{
209 return prev (const_cast<const char*&>(ptr));
210}
211
212
218inline
219bool valid_str (const std::string& s)
220{
221 return valid_str (s.c_str (), s.size());
222}
223
225inline
226char32_t rune (const std::string::const_iterator& p)
227{
228 return rune (&(*p));
229}
230
231
240inline
241bool isspace (const char* p)
242{
243 return isspace (rune (p));
244}
245
247inline
248bool isspace (std::string::const_iterator p)
249{
250 return isspace (rune(p));
251}
252
253
266inline
267bool isblank (const char *p)
268{
269 return isblank(rune(p));
270}
271
273inline
274bool isblank (std::string::const_iterator p)
275{
276 return isblank (rune (p));
277}
278
284inline
285bool isdigit (char32_t r)
286{
287 return '0' <= r && r <= '9';
288}
289
295inline
296bool isdigit (const char *p)
297{
298 return isdigit (rune (p));
299}
300
302inline
303bool isdigit (std::string::const_iterator p)
304{
305 return isdigit (rune (p));
306}
307
313inline
314bool isalnum (char32_t r)
315{
316 return ('0' <= r && r <= '9') || ('A' <= r && r <= 'Z') || ('a' <= r && r <= 'z');
317}
318
324inline
325bool isalnum (const char *p)
326{
327 return isalnum (rune (p));
328}
329
331inline
332bool isalnum (std::string::const_iterator p)
333{
334 return isalnum (rune (p));
335}
336
342inline
343bool isalpha (char32_t r)
344{
345 return ('A' <= r && r <= 'Z') || ('a' <= r && r <= 'z');
346}
347
353inline
354bool isalpha (const char *p)
355{
356 return isalpha (rune (p));
357}
358
360inline
361bool isalpha (std::string::const_iterator p)
362{
363 return isalpha (&*p);
364}
365
366
372inline
373bool isxdigit (char32_t r)
374{
375 return ('0' <= r && r <= '9') || ('A' <= r && r <= 'F') || ('a' <= r && r <= 'f');
376}
377
383inline
384bool isxdigit (const char *p)
385{
386 return isxdigit(rune(p));
387}
388
390inline
391bool isxdigit (std::string::const_iterator p)
392{
393 return isxdigit (rune(p));
394}
395
397inline
398bool isupper (std::string::const_iterator p)
399{
400 return isupper (rune(p));
401}
402
404inline
405bool islower (std::string::const_iterator p)
406{
407 return islower (rune(p));
408}
409
410
411}; //namespace utf8
412
413#ifdef _WIN32
414#include <utf8/winutf8.h>
415#endif
416#include <utf8/ini.h>
417
418#ifdef _MSC_VER
419#pragma comment (lib, "utf8")
420#endif
std::u32string runes(const char *s, size_t nch)
Conversion from UTF-8 to UTF-32.
Definition utf8.cpp:270
std::string narrow(const wchar_t *s, size_t nch)
Conversion from wide character to UTF-8.
Definition utf8.cpp:30
char32_t rune(const char *p)
Conversion from UTF-8 to UTF-32.
Definition utf8.h:175
std::wstring widen(const char *s, size_t nch)
Conversion from UTF-8 to wide character.
Definition utf8.cpp:192
bool islower(char32_t r)
Definition casecvt.cpp:48
bool isspace(char32_t r)
Check if character is white space.
Definition utf8.cpp:654
bool isalnum(char32_t r)
Check if character is an alphanumeric character (0-9 or A-Z or a-z)
Definition utf8.h:314
bool isalpha(char32_t r)
Check if character is an alphabetic character (A-Z or a-z)
Definition utf8.h:343
bool isxdigit(char32_t r)
Check if character is a hexadecimal digit (0-9 or A-F or a-f)
Definition utf8.h:373
bool isblank(char32_t r)
Check if character is space or tab.
Definition utf8.cpp:637
bool isupper(char32_t r)
Definition casecvt.cpp:106
bool isdigit(char32_t r)
Check if character is a decimal digit (0-9)
Definition utf8.h:285
void make_lower(std::string &str)
In place version converts a UTF-8 encoded string to lowercase.
Definition casecvt.cpp:99
std::string tolower(const std::string &str)
Convert UTF-8 string to lower case.
Definition casecvt.cpp:76
std::string toupper(const std::string &str)
Convert a UTF-8 string to upper case.
Definition casecvt.cpp:133
void make_upper(std::string &str)
In place version converts a UTF-8 encoded string to lowercase.
Definition casecvt.cpp:156
Exception thrown on encoding/decoding failure.
Definition utf8.h:33
reason why
What triggered the exception.
Definition utf8.h:50
exception(reason c)
Constructor.
Definition utf8.h:38
reason
Possible causes.
Definition utf8.h:35
bool valid_str(const char *s, size_t nch)
Verifies if string is a valid UTF-8 string.
Definition utf8.cpp:319
char32_t next(std::string::const_iterator &ptr, const std::string::const_iterator last)
Decodes a UTF-8 encoded character and advances iterator to next code point.
Definition utf8.cpp:344
char32_t prev(const char *&ptr)
Decrements a character pointer to previous UTF-8 character.
Definition utf8.cpp:485
size_t length(const std::string &s)
Counts number of characters in an UTF8 encoded string.
Definition utf8.cpp:575
bool is_valid(const char *p)
Check if pointer points to a valid UTF-8 encoding.
Definition utf8.h:149
const char32_t REPLACEMENT_CHARACTER
Replacement character used for invalid encodings.
Definition utf8.h:54
Windows specific parts.