29#if defined (_WIN32) && !defined (UTF8_USE_WINDOWS_API)
30#define UTF8_USE_WINDOWS_API 1
31#elif !defined (UTF8_USE_WINDOWS_API)
32#define UTF8_USE_WINDOWS_API 0
35#if !UTF8_USE_WINDOWS_API
38#if (defined(_MSVC_LANG) && _MSVC_LANG < 201703L) \
39 || (!defined(_MSVC_LANG) && (__cplusplus < 201703L))
40#error "UTF8 requires c++17 or newer if not using Windows API functions"
51 enum cause { invalid_utf8=1, invalid_wchar, invalid_char32 };
59 const char*
what() const noexcept
61 return (
code == cause::invalid_utf8) ?
"Invalid UTF-8 encoding"
62 : (
code == cause::invalid_wchar) ?
"Invalid UTF-16 encoding"
63 : (
code == cause::invalid_char32) ?
"Invalid code-point value"
64 :
"Other UTF-8 exception";
77action error_mode (action mode);
85std::string
narrow (
const wchar_t* s,
size_t nch=0);
86std::string
narrow (
const std::wstring& s);
87std::string
narrow (
const char32_t* s,
size_t nch = 0);
88std::string
narrow (
const std::u32string& s);
89std::string
narrow (
char32_t r);
91std::wstring
widen (
const char* s,
size_t nch = 0);
92std::wstring
widen (
const std::string& s);
93std::wstring
widen (
char32_t r);
95std::u32string
runes (
const char* s,
size_t nch = 0);
96std::u32string
runes (
const std::string& s);
98char32_t rune (
const char* p);
99char32_t rune (
const std::string::const_iterator& p);
103bool is_valid (std::string::const_iterator p,
const std::string::const_iterator last);
104bool valid_str (
const char* s,
size_t nch = 0);
107char32_t next (std::string::const_iterator& ptr,
const std::string::const_iterator last);
108char32_t next (std::string::iterator& ptr,
const std::string::const_iterator last);
109char32_t next (
const char*& ptr);
110char32_t next (
char*& p);
112char32_t prev (
const char*& ptr);
113char32_t prev (
char*& ptr);
114char32_t prev (std::string::const_iterator& ptr,
const std::string::const_iterator first);
115char32_t prev (std::string::iterator& ptr,
const std::string::const_iterator first);
117size_t length (
const std::string& s);
118size_t length (
const char* s);
126std::string
tolower (
const std::string& str);
127std::string
toupper (
const std::string& str);
128int icompare (
const std::string& s1,
const std::string& s2);
138bool isspace (std::string::const_iterator p);
142bool isblank (std::string::const_iterator p);
146bool isdigit (std::string::const_iterator p);
150bool isalnum (std::string::const_iterator p);
154bool isalpha (std::string::const_iterator p);
158bool isxdigit (std::string::const_iterator p);
162bool isupper (std::string::const_iterator p);
166bool islower (std::string::const_iterator p);
171class ifstream :
public std::ifstream
175 explicit ifstream (
const char* filename, std::ios_base::openmode mode = ios_base::in)
176 : std::ifstream (utf8::
widen (filename), mode) {};
177 explicit ifstream (
const std::string& filename, std::ios_base::openmode mode = ios_base::in)
179 ifstream (ifstream&& other) noexcept : std::ifstream ((std::ifstream&&)other) {};
180 ifstream (
const ifstream& rhs) =
delete;
182 void open (
const char* filename, std::ios_base::openmode mode = ios_base::in)
184 std::ifstream::open (
utf8::widen (filename), mode);
186 void open (
const std::string& filename, ios_base::openmode mode = ios_base::in)
188 std::ifstream::open (
utf8::widen (filename), mode);
192class ofstream :
public std::ofstream
195 ofstream () : std::ofstream () {};
196 explicit ofstream (
const char* filename, std::ios_base::openmode mode = ios_base::out)
197 : std::ofstream (utf8::
widen (filename), mode) {};
198 explicit ofstream (
const std::string& filename, std::ios_base::openmode mode = ios_base::out)
199 : std::ofstream (utf8::
widen (filename), mode) {};
200 ofstream (ofstream&& other) noexcept : std::ofstream ((std::ofstream&&)other) {};
201 ofstream (
const ofstream& rhs) =
delete;
203 void open (
const char* filename, ios_base::openmode mode = ios_base::out)
205 std::ofstream::open (
utf8::widen (filename), mode);
207 void open (
const std::string& filename, ios_base::openmode mode = ios_base::out)
209 std::ofstream::open (
utf8::widen (filename), mode);
214class fstream :
public std::fstream
217 fstream () : std::fstream () {};
218 explicit fstream (
const char* filename, std::ios_base::openmode mode = ios_base::in | ios_base::out)
219 : std::fstream (utf8::
widen (filename), mode) {};
220 explicit fstream (
const std::string& filename, std::ios_base::openmode mode = ios_base::in | ios_base::out)
221 : std::fstream (utf8::
widen (filename), mode) {};
222 fstream (fstream&& other) noexcept : std::fstream ((std::fstream&&)other) {};
223 fstream (
const fstream& rhs) =
delete;
225 void open (
const char* filename, ios_base::openmode mode = ios_base::in | ios_base::out)
229 void open (
const std::string& filename, ios_base::openmode mode = ios_base::in | ios_base::out)
238typedef std::ofstream ofstream;
239typedef std::fstream fstream;
254 auto prev_mode =
error_mode (action::replace);
268bool is_valid (std::string::const_iterator p,
const std::string::const_iterator last)
271 auto prev_mode =
error_mode (action::replace);
279char32_t next (std::string::iterator& ptr,
const std::string::const_iterator last)
281 return next (*(std::string::const_iterator*)(&ptr), last);
286char32_t prev (std::string::iterator& ptr,
const std::string::const_iterator first)
288 return prev (*(std::string::const_iterator*)(&ptr), first);
318 return next (
const_cast<const char*&
>(ptr));
333 return prev (
const_cast<const char*&
>(ptr));
350char32_t rune (
const std::string::const_iterator& p)
411 return '0' <= r && r <=
'9';
440 return (
'0' <= r && r <=
'9') || (
'A' <= r && r <=
'Z') || (
'a' <= r && r <=
'z');
469 return (
'A' <= r && r <=
'Z') || (
'a' <= r && r <=
'z');
499 return (
'0' <= r && r <=
'9') || (
'A' <= r && r <=
'F') || (
'a' <= r && r <=
'f');
544FILE*
fopen (
const std::string& filename,
const std::string& mode)
548 _wfopen_s (&h,
widen (filename).c_str (),
widen (mode).c_str ());
550 h = ::fopen (filename.c_str(), mode.c_str());
557FILE*
fopen (
const char* filename,
const char* mode)
561 _wfopen_s (&h,
widen (filename).c_str (),
widen (mode).c_str ());
563 h = ::fopen (filename, mode);
575#if UTF8_USE_WINDOWS_API
576 wchar_t tmp[_MAX_PATH];
577 if (_wgetcwd (tmp, _countof (tmp)))
580 return std::string ();
583 std::filesystem::path wd = std::filesystem::current_path (ec);
585 return std::string ();
587 return narrow (wd.native());
601bool chdir (
const std::string& dirname)
603#if UTF8_USE_WINDOWS_API
604 return (_wchdir (
widen (dirname).c_str ()) == 0);
607 std::filesystem::path dir (
widen (dirname));
609 std::filesystem::path dir (dirname);
612 std::filesystem::current_path (dir, ec);
621#if UTF8_USE_WINDOWS_API
622 return (_wchdir (
widen (dirname).c_str ()) == 0);
625 std::filesystem::path dir (
widen (dirname));
627 std::filesystem::path dir (dirname);
630 std::filesystem::current_path (dir, ec);
643bool mkdir (
const std::string& dirname)
645#if UTF8_USE_WINDOWS_API
646 return (_wmkdir (
widen (dirname).c_str ()) == 0);
649 std::filesystem::path dir (
widen (dirname));
651 std::filesystem::path dir (dirname);
654 std::filesystem::create_directory (dir, ec);
664#if UTF8_USE_WINDOWS_API
665 return (_wmkdir (
widen (dirname).c_str ()) == 0);
668 std::filesystem::path dir (
widen (dirname));
670 std::filesystem::path dir (dirname);
673 std::filesystem::create_directory (dir, ec);
685bool rmdir (
const std::string& dirname)
687#if UTF8_USE_WINDOWS_API
688 return (_wrmdir (
widen (dirname).c_str ()) == 0);
691 std::filesystem::path dir (
widen (dirname));
693 std::filesystem::path dir (dirname);
696 std::filesystem::remove (dir, ec);
705#if UTF8_USE_WINDOWS_API
706 return (_wrmdir (
widen (dirname).c_str ()) == 0);
709 std::filesystem::path dir (
widen (dirname));
711 std::filesystem::path dir (dirname);
714 std::filesystem::remove (dir, ec);
727bool rename (
const std::string& oldname,
const std::string& newname)
729#if UTF8_USE_WINDOWS_API
730 return (_wrename (
widen (oldname).c_str (),
widen (newname).c_str ()) == 0);
733 std::filesystem::path fn (
widen (newname));
734 std::filesystem::path fo (
widen (oldname));
736 std::filesystem::path fn (newname);
737 std::filesystem::path fo (oldname);
740 std::filesystem::rename (fo, fn, ec);
747bool rename (
const char* oldname,
const char* newname)
749#if UTF8_USE_WINDOWS_API
750 return (_wrename (
widen (oldname).c_str (),
widen (newname).c_str ()) == 0);
753 std::filesystem::path fn (
widen (newname));
754 std::filesystem::path fo (
widen (oldname));
756 std::filesystem::path fn (newname);
757 std::filesystem::path fo (oldname);
760 std::filesystem::rename (fo, fn, ec);
774#if UTF8_USE_WINDOWS_API
775 return (_wremove (
widen (filename).c_str ()) == 0);
778 std::filesystem::path f (
widen(filename));
780 std::filesystem::path f (filename);
783 std::filesystem::remove (f, ec);
793#if UTF8_USE_WINDOWS_API
794 return (_wremove (
widen (filename).c_str ()) == 0);
797 std::filesystem::path f (
widen (filename));
799 std::filesystem::path f (filename);
802 std::filesystem::remove (f, ec);
826 return !operator ==(lhs, rhs);
831#if defined(_WIN32) && !defined(UTF8_KEEP_WIN32_API)
837#pragma comment (lib, "utf8")
std::u32string runes(const char *s, size_t nch)
Conversion from UTF-8 to UTF-32.
Definition utf8.cpp:329
std::string narrow(const wchar_t *s, size_t nch)
Conversion from wide character to UTF-8.
Definition utf8.cpp:52
char32_t rune(const char *p)
Conversion from UTF-8 to UTF-32.
Definition utf8.h:299
std::wstring widen(const char *s, size_t nch)
Conversion from UTF-8 to wide character.
Definition utf8.cpp:207
bool islower(char32_t r)
Definition casecvt.cpp:48
bool isspace(char32_t r)
Check if character is white space.
Definition utf8.cpp:730
bool isalnum(char32_t r)
Check if character is an alphanumeric character (0-9 or A-Z or a-z)
Definition utf8.h:438
bool isalpha(char32_t r)
Check if character is an alphabetic character (A-Z or a-z)
Definition utf8.h:467
bool isxdigit(char32_t r)
Check if character is a hexadecimal digit (0-9 or A-F or a-f)
Definition utf8.h:497
bool isblank(char32_t r)
Check if character is space or tab.
Definition utf8.cpp:713
bool isupper(char32_t r)
Definition casecvt.cpp:106
bool isdigit(char32_t r)
Check if character is a decimal digit (0-9)
Definition utf8.h:409
void make_lower(std::string &str)
In place version converts a UTF-8 encoded string to lowercase.
Definition casecvt.cpp:99
std::string tolower(const std::string &str)
Convert UTF-8 string to lower case.
Definition casecvt.cpp:76
std::string toupper(const std::string &str)
Convert a UTF-8 string to upper case.
Definition casecvt.cpp:133
void make_upper(std::string &str)
In place version converts a UTF-8 encoded string to lowercase.
Definition casecvt.cpp:156
Exception thrown on encoding/decoding failure.
Definition utf8.h:49
const char * what() const noexcept
Exception message.
Definition utf8.h:59
exception(cause c)
Constructor.
Definition utf8.h:54
cause
Possible causes.
Definition utf8.h:51
cause code
Condition that triggered the exception.
Definition utf8.h:67
bool valid_str(const char *s, size_t nch)
Verifies if string is a valid UTF-8 string.
Definition utf8.cpp:375
action error_mode(action mode)
Set error handling mode for this thread.
Definition utf8.cpp:22
char32_t next(std::string::const_iterator &ptr, const std::string::const_iterator last)
Decodes a UTF-8 encoded character and advances iterator to next code point.
Definition utf8.cpp:401
char32_t prev(const char *&ptr)
Decrements a character pointer to previous UTF-8 character.
Definition utf8.cpp:561
size_t length(const std::string &s)
Counts number of characters in an UTF8 encoded string.
Definition utf8.cpp:651
bool is_valid(const char *p)
Check if pointer points to a valid UTF-8 encoding.
Definition utf8.h:252
std::ifstream ifstream
Input stream class using UTF-8 filename.
Definition utf8.h:237
const char32_t REPLACEMENT_CHARACTER
Replacement character used for invalid encodings.
Definition utf8.h:80
std::ostream & operator<<(std::ostream &os, const exception &x)
Extraction operator for exception objects.
Definition utf8.h:809
action
Error handling methods.
Definition utf8.h:71
@ replace
Use replacement character for invalid encodings.
Definition utf8.h:72
@ except
Throw an exception on invalid encodings.
Definition utf8.h:73
bool chdir(const std::string &dirname)
Changes the current working directory.
Definition utf8.h:601
bool rename(const std::string &oldname, const std::string &newname)
Rename a file or directory.
Definition utf8.h:727
bool remove(const std::string &filename)
Delete a file.
Definition utf8.h:772
std::string getcwd()
Gets the current working directory.
Definition utf8.h:573
FILE * fopen(const std::string &filename, const std::string &mode)
Open a file.
Definition utf8.h:544
bool mkdir(const std::string &dirname)
Creates a new directory.
Definition utf8.h:643
bool rmdir(const std::string &dirname)
Deletes a directory.
Definition utf8.h:685