UTF8 - Simple Library for Internationalization
Loading...
Searching...
No Matches
utf8.h
Go to the documentation of this file.
1/*
2 Copyright (c) Mircea Neacsu (2014-2024) Licensed under MIT License.
3 This is part of UTF8 project. See LICENSE file for full license terms.
4*/
5
7#pragma once
8
9#include <string>
10#include <vector>
11#include <fstream>
12
13// ------------- Global configuration options ---------------------------------
14
20// #define UTF8_USE_WINDOWS_API 0
21
26// #define UTF8_KEEP_WIN32_API
27
28// --------------- end of configuration options -------------------------------
29
30#if defined (_WIN32) && !defined (UTF8_USE_WINDOWS_API)
31#define UTF8_USE_WINDOWS_API 1
32#elif !defined (UTF8_USE_WINDOWS_API)
33#define UTF8_USE_WINDOWS_API 0
34#endif
35
36#if !UTF8_USE_WINDOWS_API
37#include <filesystem>
38
39#if (defined(_MSVC_LANG) && _MSVC_LANG < 201703L) \
40 || (!defined(_MSVC_LANG) && (__cplusplus < 201703L))
41#error "UTF8 requires c++17 or newer if not using Windows API functions"
42#endif
43
44#endif
45
46namespace utf8 {
47
49struct exception : public std::exception
50{
52 enum cause { invalid_utf8=1, invalid_wchar, invalid_char32 };
53
55 explicit exception (cause c)
56 : code (c)
57 {}
58
60 const char* what() const noexcept
61 {
62 return (code == cause::invalid_utf8) ? "Invalid UTF-8 encoding"
63 : (code == cause::invalid_wchar) ? "Invalid UTF-16 encoding"
64 : (code == cause::invalid_char32) ? "Invalid code-point value"
65 : "Other UTF-8 exception";
66 }
69};
70
76
78action error_mode (action mode);
79
81const char32_t REPLACEMENT_CHARACTER = 0xfffd;
82
83
86std::string narrow (const wchar_t* s, size_t nch=0);
87std::string narrow (const std::wstring& s);
88std::string narrow (const char32_t* s, size_t nch = 0);
89std::string narrow (const std::u32string& s);
90std::string narrow (char32_t r);
91
92std::wstring widen (const char* s, size_t nch = 0);
93std::wstring widen (const std::string& s);
94std::u32string runes (const char* s, size_t nch = 0);
95std::u32string runes (const std::string& s);
96
97char32_t rune (const char* p);
98char32_t rune (const std::string::const_iterator& p);
100
101bool is_valid (const char* p);
102bool is_valid (std::string::const_iterator p, const std::string::const_iterator last);
103bool valid_str (const char* s, size_t nch = 0);
104bool valid_str (const std::string& s);
105
106char32_t next (std::string::const_iterator& ptr, const std::string::const_iterator last);
107char32_t next (std::string::iterator& ptr, const std::string::const_iterator last);
108char32_t next (const char*& ptr);
109char32_t next (char*& p);
110
111char32_t prev (const char*& ptr);
112char32_t prev (char*& ptr);
113char32_t prev (std::string::const_iterator& ptr, const std::string::const_iterator first);
114char32_t prev (std::string::iterator& ptr, const std::string::const_iterator first);
115
116size_t length (const std::string& s);
117size_t length (const char* s);
118
123void make_lower (std::string& str);
124void make_upper (std::string& str);
125std::string tolower (const std::string& str);
126std::string toupper (const std::string& str);
127int icompare (const std::string& s1, const std::string& s2);
129
135bool isspace (char32_t r);
136bool isspace (const char* p);
137bool isspace (std::string::const_iterator p);
138
139bool isblank (char32_t r);
140bool isblank (const char* p);
141bool isblank (std::string::const_iterator p);
142
143bool isdigit (char32_t r);
144bool isdigit (const char* p);
145bool isdigit (std::string::const_iterator p);
146
147bool isalnum (char32_t r);
148bool isalnum (const char* p);
149bool isalnum (std::string::const_iterator p);
150
151bool isalpha (char32_t r);
152bool isalpha (const char* p);
153bool isalpha (std::string::const_iterator p);
154
155bool isxdigit (char32_t r);
156bool isxdigit (const char* p);
157bool isxdigit (std::string::const_iterator p);
158
159bool isupper (char32_t r);
160bool isupper (const char* p);
161bool isupper (std::string::const_iterator p);
162
163bool islower (char32_t r);
164bool islower (const char* p);
165bool islower (std::string::const_iterator p);
167
169#ifdef _WIN32
170class ifstream : public std::ifstream
171{
172public:
173 ifstream () : std::ifstream () {};
174 explicit ifstream (const char* filename, std::ios_base::openmode mode = ios_base::in)
175 : std::ifstream (utf8::widen (filename), mode) {};
176 explicit ifstream (const std::string& filename, std::ios_base::openmode mode = ios_base::in)
177 : std::ifstream (utf8::widen (filename), mode) {};
178 ifstream (ifstream&& other) noexcept : std::ifstream ((std::ifstream&&)other) {};
179 ifstream (const ifstream& rhs) = delete;
180
181 void open (const char* filename, std::ios_base::openmode mode = ios_base::in)
182 {
183 std::ifstream::open (utf8::widen (filename), mode);
184 }
185 void open (const std::string& filename, ios_base::openmode mode = ios_base::in)
186 {
187 std::ifstream::open (utf8::widen (filename), mode);
188 }
189};
191class ofstream : public std::ofstream
192{
193public:
194 ofstream () : std::ofstream () {};
195 explicit ofstream (const char* filename, std::ios_base::openmode mode = ios_base::out)
196 : std::ofstream (utf8::widen (filename), mode) {};
197 explicit ofstream (const std::string& filename, std::ios_base::openmode mode = ios_base::out)
198 : std::ofstream (utf8::widen (filename), mode) {};
199 ofstream (ofstream&& other) noexcept : std::ofstream ((std::ofstream&&)other) {};
200 ofstream (const ofstream& rhs) = delete;
201
202 void open (const char* filename, ios_base::openmode mode = ios_base::out)
203 {
204 std::ofstream::open (utf8::widen (filename), mode);
205 }
206 void open (const std::string& filename, ios_base::openmode mode = ios_base::out)
207 {
208 std::ofstream::open (utf8::widen (filename), mode);
209 }
210};
211
213class fstream : public std::fstream
214{
215public:
216 fstream () : std::fstream () {};
217 explicit fstream (const char* filename, std::ios_base::openmode mode = ios_base::in | ios_base::out)
218 : std::fstream (utf8::widen (filename), mode) {};
219 explicit fstream (const std::string& filename, std::ios_base::openmode mode = ios_base::in | ios_base::out)
220 : std::fstream (utf8::widen (filename), mode) {};
221 fstream (fstream&& other) noexcept : std::fstream ((std::fstream&&)other) {};
222 fstream (const fstream& rhs) = delete;
223
224 void open (const char* filename, ios_base::openmode mode = ios_base::in | ios_base::out)
225 {
226 std::fstream::open (utf8::widen (filename), mode);
227 }
228 void open (const std::string& filename, ios_base::openmode mode = ios_base::in | ios_base::out)
229 {
230 std::fstream::open (utf8::widen (filename), mode);
231 }
232};
233
234#else
235//Under Linux file streams already use UTF-8 filenames
236typedef std::ifstream ifstream;
237typedef std::ofstream ofstream;
238typedef std::fstream fstream;
239#endif
240
241
242// INLINES --------------------------------------------------------------------
243
250inline
251bool is_valid (const char* p)
252{
253 auto prev_mode = error_mode (action::replace);
254 bool valid = (next (p) != REPLACEMENT_CHARACTER);
255 error_mode (prev_mode);
256 return valid;
257}
258
266inline
267bool is_valid (std::string::const_iterator p, const std::string::const_iterator last)
268{
269 auto len = last - p;
270 auto prev_mode = error_mode (action::replace);
271 bool valid = (next (p, last) != REPLACEMENT_CHARACTER);
272 error_mode (prev_mode);
273 return valid;
274}
275
277inline
278char32_t next (std::string::iterator& ptr, const std::string::const_iterator last)
279{
280 return next (*(std::string::const_iterator*)(&ptr), last);
281}
282
284inline
285char32_t prev (std::string::iterator& ptr, const std::string::const_iterator first)
286{
287 return prev (*(std::string::const_iterator*)(&ptr), first);
288}
289
297inline
298char32_t rune (const char* p)
299{
300 return next (p);
301}
302
303
314inline
315char32_t next (char*& ptr)
316{
317 return next (const_cast<const char*&>(ptr));
318}
319
329inline
330char32_t prev (char*& ptr)
331{
332 return prev (const_cast<const char*&>(ptr));
333}
334
335
341inline
342bool valid_str (const std::string& s)
343{
344 return valid_str (s.c_str (), s.size());
345}
346
348inline
349char32_t rune (const std::string::const_iterator& p)
350{
351 return rune (&(*p));
352}
353
354
363inline
364bool isspace (const char* p)
365{
366 return isspace (rune (p));
367}
368
370inline
371bool isspace (std::string::const_iterator p)
372{
373 return isspace (rune(p));
374}
375
376
389inline
390bool isblank (const char *p)
391{
392 return isblank(rune(p));
393}
394
396inline
397bool isblank (std::string::const_iterator p)
398{
399 return isblank (rune (p));
400}
401
407inline
408bool isdigit (char32_t r)
409{
410 return '0' <= r && r <= '9';
411}
412
418inline
419bool isdigit (const char *p)
420{
421 return isdigit (rune (p));
422}
423
425inline
426bool isdigit (std::string::const_iterator p)
427{
428 return isdigit (rune (p));
429}
430
436inline
437bool isalnum (char32_t r)
438{
439 return ('0' <= r && r <= '9') || ('A' <= r && r <= 'Z') || ('a' <= r && r <= 'z');
440}
441
447inline
448bool isalnum (const char *p)
449{
450 return isalnum (rune (p));
451}
452
454inline
455bool isalnum (std::string::const_iterator p)
456{
457 return isalnum (rune (p));
458}
459
465inline
466bool isalpha (char32_t r)
467{
468 return ('A' <= r && r <= 'Z') || ('a' <= r && r <= 'z');
469}
470
476inline
477bool isalpha (const char *p)
478{
479 return isalpha (rune (p));
480}
481
483inline
484bool isalpha (std::string::const_iterator p)
485{
486 return isalpha (&*p);
487}
488
489
495inline
496bool isxdigit (char32_t r)
497{
498 return ('0' <= r && r <= '9') || ('A' <= r && r <= 'F') || ('a' <= r && r <= 'f');
499}
500
506inline
507bool isxdigit (const char *p)
508{
509 return isxdigit(rune(p));
510}
511
513inline
514bool isxdigit (std::string::const_iterator p)
515{
516 return isxdigit (rune(p));
517}
518
520inline
521bool isupper (std::string::const_iterator p)
522{
523 return isupper (rune(p));
524}
525
527inline
528bool islower (std::string::const_iterator p)
529{
530 return islower (rune(p));
531}
532
533// File System functions -----------------------------------------------------
534
542inline
543FILE* fopen (const std::string& filename, const std::string& mode)
544{
545 FILE* h = nullptr;
546#ifdef _WIN32
547 _wfopen_s (&h, widen (filename).c_str (), widen (mode).c_str ());
548#else
549 h = ::fopen (filename.c_str(), mode.c_str());
550#endif
551 return h;
552}
553
555inline
556FILE* fopen (const char* filename, const char* mode)
557{
558 FILE* h = nullptr;
559#ifdef _WIN32
560 _wfopen_s (&h, widen (filename).c_str (), widen (mode).c_str ());
561#else
562 h = ::fopen (filename, mode);
563#endif
564 return h;
565}
566
571inline
572std::string getcwd ()
573{
574#if UTF8_USE_WINDOWS_API
575 wchar_t tmp[_MAX_PATH];
576 if (_wgetcwd (tmp, _countof (tmp)))
577 return narrow (tmp);
578 else
579 return std::string ();
580#else
581 std::error_code ec;
582 std::filesystem::path wd = std::filesystem::current_path (ec);
583 if (ec)
584 return std::string ();
585# ifdef _WIN32
586 return narrow (wd.native());
587# else
588 return wd;
589# endif
590#endif
591}
592
599inline
600bool chdir (const std::string& dirname)
601{
602#if UTF8_USE_WINDOWS_API
603 return (_wchdir (widen (dirname).c_str ()) == 0);
604#else
605# ifdef _WIN32
606 std::filesystem::path dir (widen (dirname));
607# else
608 std::filesystem::path dir (dirname);
609# endif
610 std::error_code ec;
611 std::filesystem::current_path (dir, ec);
612 return !ec;
613#endif
614}
615
617inline
618bool chdir (const char* dirname)
619{
620#if UTF8_USE_WINDOWS_API
621 return (_wchdir (widen (dirname).c_str ()) == 0);
622#else
623# ifdef _WIN32
624 std::filesystem::path dir (widen (dirname));
625# else
626 std::filesystem::path dir (dirname);
627# endif
628 std::error_code ec;
629 std::filesystem::current_path (dir, ec);
630 return !ec;
631#endif
632}
633
634
641inline
642bool mkdir (const std::string& dirname)
643{
644#if UTF8_USE_WINDOWS_API
645 return (_wmkdir (widen (dirname).c_str ()) == 0);
646#else
647# ifdef _WIN32
648 std::filesystem::path dir (widen (dirname));
649# else
650 std::filesystem::path dir (dirname);
651# endif
652 std::error_code ec;
653 std::filesystem::create_directory (dir, ec);
654 return !ec;
655#endif
656}
657
658
660inline
661bool mkdir (const char* dirname)
662{
663#if UTF8_USE_WINDOWS_API
664 return (_wmkdir (widen (dirname).c_str ()) == 0);
665#else
666# ifdef _WIN32
667 std::filesystem::path dir (widen (dirname));
668# else
669 std::filesystem::path dir (dirname);
670# endif
671 std::error_code ec;
672 std::filesystem::create_directory (dir, ec);
673 return !ec;
674#endif
675}
676
683inline
684bool rmdir (const std::string& dirname)
685{
686#if UTF8_USE_WINDOWS_API
687 return (_wrmdir (widen (dirname).c_str ()) == 0);
688#else
689# ifdef _WIN32
690 std::filesystem::path dir (widen (dirname));
691# else
692 std::filesystem::path dir (dirname);
693# endif
694 std::error_code ec;
695 std::filesystem::remove (dir, ec);
696 return !ec;
697#endif
698}
699
701inline
702bool rmdir (const char* dirname)
703{
704#if UTF8_USE_WINDOWS_API
705 return (_wrmdir (widen (dirname).c_str ()) == 0);
706#else
707# ifdef _WIN32
708 std::filesystem::path dir (widen (dirname));
709# else
710 std::filesystem::path dir (dirname);
711# endif
712 std::error_code ec;
713 std::filesystem::remove (dir, ec);
714 return !ec;
715#endif
716}
717
725inline
726bool rename (const std::string& oldname, const std::string& newname)
727{
728#if UTF8_USE_WINDOWS_API
729 return (_wrename (widen (oldname).c_str (), widen (newname).c_str ()) == 0);
730#else
731# ifdef _WIN32
732 std::filesystem::path fn (widen (newname));
733 std::filesystem::path fo (widen (oldname));
734# else
735 std::filesystem::path fn (newname);
736 std::filesystem::path fo (oldname);
737# endif
738 std::error_code ec;
739 std::filesystem::rename (fo, fn, ec);
740 return !ec;
741#endif
742}
743
745inline
746bool rename (const char* oldname, const char* newname)
747{
748#if UTF8_USE_WINDOWS_API
749 return (_wrename (widen (oldname).c_str (), widen (newname).c_str ()) == 0);
750#else
751# ifdef _WIN32
752 std::filesystem::path fn (widen (newname));
753 std::filesystem::path fo (widen (oldname));
754# else
755 std::filesystem::path fn (newname);
756 std::filesystem::path fo (oldname);
757# endif
758 std::error_code ec;
759 std::filesystem::rename (fo, fn, ec);
760 return !ec;
761#endif
762}
763
770inline
771bool remove (const std::string& filename)
772{
773#if UTF8_USE_WINDOWS_API
774 return (_wremove (widen (filename).c_str ()) == 0);
775#else
776# ifdef _WIN32
777 std::filesystem::path f (widen(filename));
778# else
779 std::filesystem::path f (filename);
780# endif
781 std::error_code ec;
782 std::filesystem::remove (f, ec);
783 return !ec;
784#endif
785}
786
787
789inline
790bool remove (const char* filename)
791{
792#if UTF8_USE_WINDOWS_API
793 return (_wremove (widen (filename).c_str ()) == 0);
794#else
795# ifdef _WIN32
796 std::filesystem::path f (widen (filename));
797# else
798 std::filesystem::path f (filename);
799# endif
800 std::error_code ec;
801 std::filesystem::remove (f, ec);
802 return !ec;
803#endif
804}
805
807inline
808std::ostream& operator<<(std::ostream& os, const exception& x)
809{
810 os << x.what ();
811 return os;
812}
813
815inline
816bool operator ==(const exception& lhs, const exception rhs)
817{
818 return (lhs.code == rhs.code);
819}
820
822inline
823bool operator !=(const exception& lhs, const exception& rhs)
824{
825 return !operator ==(lhs, rhs);
826}
827
828}; //namespace utf8
829
830#if defined(_WIN32) && !defined(UTF8_KEEP_WIN32_API)
831#include <utf8/winutf8.h>
832#endif
833#include <utf8/ini.h>
834
835#ifdef _MSC_VER
836#pragma comment (lib, "utf8")
837#endif
std::u32string runes(const char *s, size_t nch)
Conversion from UTF-8 to UTF-32.
Definition utf8.cpp:287
std::string narrow(const wchar_t *s, size_t nch)
Conversion from wide character to UTF-8.
Definition utf8.cpp:52
char32_t rune(const char *p)
Conversion from UTF-8 to UTF-32.
Definition utf8.h:298
std::wstring widen(const char *s, size_t nch)
Conversion from UTF-8 to wide character.
Definition utf8.cpp:207
bool islower(char32_t r)
Definition casecvt.cpp:48
bool isspace(char32_t r)
Check if character is white space.
Definition utf8.cpp:688
bool isalnum(char32_t r)
Check if character is an alphanumeric character (0-9 or A-Z or a-z)
Definition utf8.h:437
bool isalpha(char32_t r)
Check if character is an alphabetic character (A-Z or a-z)
Definition utf8.h:466
bool isxdigit(char32_t r)
Check if character is a hexadecimal digit (0-9 or A-F or a-f)
Definition utf8.h:496
bool isblank(char32_t r)
Check if character is space or tab.
Definition utf8.cpp:671
bool isupper(char32_t r)
Definition casecvt.cpp:106
bool isdigit(char32_t r)
Check if character is a decimal digit (0-9)
Definition utf8.h:408
void make_lower(std::string &str)
In place version converts a UTF-8 encoded string to lowercase.
Definition casecvt.cpp:99
std::string tolower(const std::string &str)
Convert UTF-8 string to lower case.
Definition casecvt.cpp:76
std::string toupper(const std::string &str)
Convert a UTF-8 string to upper case.
Definition casecvt.cpp:133
void make_upper(std::string &str)
In place version converts a UTF-8 encoded string to lowercase.
Definition casecvt.cpp:156
Exception thrown on encoding/decoding failure.
Definition utf8.h:50
const char * what() const noexcept
Exception message.
Definition utf8.h:60
exception(cause c)
Constructor.
Definition utf8.h:55
cause
Possible causes.
Definition utf8.h:52
cause code
Condition that triggered the exception.
Definition utf8.h:68
bool valid_str(const char *s, size_t nch)
Verifies if string is a valid UTF-8 string.
Definition utf8.cpp:333
action error_mode(action mode)
Set error handling mode for this thread.
Definition utf8.cpp:22
char32_t next(std::string::const_iterator &ptr, const std::string::const_iterator last)
Decodes a UTF-8 encoded character and advances iterator to next code point.
Definition utf8.cpp:359
char32_t prev(const char *&ptr)
Decrements a character pointer to previous UTF-8 character.
Definition utf8.cpp:519
size_t length(const std::string &s)
Counts number of characters in an UTF8 encoded string.
Definition utf8.cpp:609
bool is_valid(const char *p)
Check if pointer points to a valid UTF-8 encoding.
Definition utf8.h:251
std::ifstream ifstream
Input stream class using UTF-8 filename.
Definition utf8.h:236
const char32_t REPLACEMENT_CHARACTER
Replacement character used for invalid encodings.
Definition utf8.h:81
std::ostream & operator<<(std::ostream &os, const exception &x)
Extraction operator for exception objects.
Definition utf8.h:808
action
Error handling methods.
Definition utf8.h:72
@ replace
Use replacement character for invalid encodings.
Definition utf8.h:73
@ except
Throw an exception on invalid encodings.
Definition utf8.h:74
bool chdir(const std::string &dirname)
Changes the current working directory.
Definition utf8.h:600
bool rename(const std::string &oldname, const std::string &newname)
Rename a file or directory.
Definition utf8.h:726
bool remove(const std::string &filename)
Delete a file.
Definition utf8.h:771
std::string getcwd()
Gets the current working directory.
Definition utf8.h:572
FILE * fopen(const std::string &filename, const std::string &mode)
Open a file.
Definition utf8.h:543
bool mkdir(const std::string &dirname)
Creates a new directory.
Definition utf8.h:642
bool rmdir(const std::string &dirname)
Deletes a directory.
Definition utf8.h:684
Windows specific parts.