UTF8 - Simple Library for Internationalization
Loading...
Searching...
No Matches
utf8.h
Go to the documentation of this file.
1/*
2 Copyright (c) Mircea Neacsu (2014-2024) Licensed under MIT License.
3 This is part of UTF8 project. See LICENSE file for full license terms.
4*/
5
7#pragma once
8
9#include <string>
10#include <vector>
11#include <fstream>
12
13// ------------- Global configuration options ---------------------------------
14
20// #define UTF8_USE_WINDOWS_API 0
21
26// #define UTF8_KEEP_WIN32_API
27
28// --------------- end of configuration options -------------------------------
29
30#if defined (_WIN32) && !defined (UTF8_USE_WINDOWS_API)
31#define UTF8_USE_WINDOWS_API 1
32#elif !defined (UTF8_USE_WINDOWS_API)
33#define UTF8_USE_WINDOWS_API 0
34#endif
35
36#if !UTF8_USE_WINDOWS_API
37#include <filesystem>
38
39#if (defined(_MSVC_LANG) && _MSVC_LANG < 201703L) \
40 || (!defined(_MSVC_LANG) && (__cplusplus < 201703L))
41#error "UTF8 requires c++17 or newer if not using Windows API functions"
42#endif
43
44#endif
45
46namespace utf8 {
47
49struct exception : public std::exception
50{
52 enum cause { invalid_utf8=1, invalid_wchar, invalid_char32 };
53
55 explicit exception (cause c)
56 : code (c)
57 {}
58
60 const char* what() const noexcept
61 {
62 return (code == cause::invalid_utf8) ? "Invalid UTF-8 encoding"
63 : (code == cause::invalid_wchar) ? "Invalid UTF-16 encoding"
64 : (code == cause::invalid_char32) ? "Invalid code-point value"
65 : "Other UTF-8 exception";
66 }
69};
70
76
78action error_mode (action mode);
79
81const char32_t REPLACEMENT_CHARACTER = 0xfffd;
82
83
86std::string narrow (const wchar_t* s, size_t nch=0);
87std::string narrow (const std::wstring& s);
88std::string narrow (const char32_t* s, size_t nch = 0);
89std::string narrow (const std::u32string& s);
90std::string narrow (char32_t r);
91
92std::wstring widen (const char* s, size_t nch = 0);
93std::wstring widen (const std::string& s);
94std::wstring widen (char32_t r);
95
96std::u32string runes (const char* s, size_t nch = 0);
97std::u32string runes (const std::string& s);
98
99char32_t rune (const char* p);
100char32_t rune (const std::string::const_iterator& p);
102
103bool is_valid (const char* p);
104bool is_valid (std::string::const_iterator p, const std::string::const_iterator last);
105bool valid_str (const char* s, size_t nch = 0);
106bool valid_str (const std::string& s);
107
108char32_t next (std::string::const_iterator& ptr, const std::string::const_iterator last);
109char32_t next (std::string::iterator& ptr, const std::string::const_iterator last);
110char32_t next (const char*& ptr);
111char32_t next (char*& p);
112
113char32_t prev (const char*& ptr);
114char32_t prev (char*& ptr);
115char32_t prev (std::string::const_iterator& ptr, const std::string::const_iterator first);
116char32_t prev (std::string::iterator& ptr, const std::string::const_iterator first);
117
118size_t length (const std::string& s);
119size_t length (const char* s);
120
125void make_lower (std::string& str);
126void make_upper (std::string& str);
127std::string tolower (const std::string& str);
128std::string toupper (const std::string& str);
129int icompare (const std::string& s1, const std::string& s2);
131
137bool isspace (char32_t r);
138bool isspace (const char* p);
139bool isspace (std::string::const_iterator p);
140
141bool isblank (char32_t r);
142bool isblank (const char* p);
143bool isblank (std::string::const_iterator p);
144
145bool isdigit (char32_t r);
146bool isdigit (const char* p);
147bool isdigit (std::string::const_iterator p);
148
149bool isalnum (char32_t r);
150bool isalnum (const char* p);
151bool isalnum (std::string::const_iterator p);
152
153bool isalpha (char32_t r);
154bool isalpha (const char* p);
155bool isalpha (std::string::const_iterator p);
156
157bool isxdigit (char32_t r);
158bool isxdigit (const char* p);
159bool isxdigit (std::string::const_iterator p);
160
161bool isupper (char32_t r);
162bool isupper (const char* p);
163bool isupper (std::string::const_iterator p);
164
165bool islower (char32_t r);
166bool islower (const char* p);
167bool islower (std::string::const_iterator p);
169
171#ifdef _WIN32
172class ifstream : public std::ifstream
173{
174public:
175 ifstream () : std::ifstream () {};
176 explicit ifstream (const char* filename, std::ios_base::openmode mode = ios_base::in)
177 : std::ifstream (utf8::widen (filename), mode) {};
178 explicit ifstream (const std::string& filename, std::ios_base::openmode mode = ios_base::in)
179 : std::ifstream (utf8::widen (filename), mode) {};
180 ifstream (ifstream&& other) noexcept : std::ifstream ((std::ifstream&&)other) {};
181 ifstream (const ifstream& rhs) = delete;
182
183 void open (const char* filename, std::ios_base::openmode mode = ios_base::in)
184 {
185 std::ifstream::open (utf8::widen (filename), mode);
186 }
187 void open (const std::string& filename, ios_base::openmode mode = ios_base::in)
188 {
189 std::ifstream::open (utf8::widen (filename), mode);
190 }
191};
193class ofstream : public std::ofstream
194{
195public:
196 ofstream () : std::ofstream () {};
197 explicit ofstream (const char* filename, std::ios_base::openmode mode = ios_base::out)
198 : std::ofstream (utf8::widen (filename), mode) {};
199 explicit ofstream (const std::string& filename, std::ios_base::openmode mode = ios_base::out)
200 : std::ofstream (utf8::widen (filename), mode) {};
201 ofstream (ofstream&& other) noexcept : std::ofstream ((std::ofstream&&)other) {};
202 ofstream (const ofstream& rhs) = delete;
203
204 void open (const char* filename, ios_base::openmode mode = ios_base::out)
205 {
206 std::ofstream::open (utf8::widen (filename), mode);
207 }
208 void open (const std::string& filename, ios_base::openmode mode = ios_base::out)
209 {
210 std::ofstream::open (utf8::widen (filename), mode);
211 }
212};
213
215class fstream : public std::fstream
216{
217public:
218 fstream () : std::fstream () {};
219 explicit fstream (const char* filename, std::ios_base::openmode mode = ios_base::in | ios_base::out)
220 : std::fstream (utf8::widen (filename), mode) {};
221 explicit fstream (const std::string& filename, std::ios_base::openmode mode = ios_base::in | ios_base::out)
222 : std::fstream (utf8::widen (filename), mode) {};
223 fstream (fstream&& other) noexcept : std::fstream ((std::fstream&&)other) {};
224 fstream (const fstream& rhs) = delete;
225
226 void open (const char* filename, ios_base::openmode mode = ios_base::in | ios_base::out)
227 {
228 std::fstream::open (utf8::widen (filename), mode);
229 }
230 void open (const std::string& filename, ios_base::openmode mode = ios_base::in | ios_base::out)
231 {
232 std::fstream::open (utf8::widen (filename), mode);
233 }
234};
235
236#else
237//Under Linux file streams already use UTF-8 filenames
238typedef std::ifstream ifstream;
239typedef std::ofstream ofstream;
240typedef std::fstream fstream;
241#endif
242
243
244// INLINES --------------------------------------------------------------------
245
252inline
253bool is_valid (const char* p)
254{
255 auto prev_mode = error_mode (action::replace);
256 bool valid = (next (p) != REPLACEMENT_CHARACTER);
257 error_mode (prev_mode);
258 return valid;
259}
260
268inline
269bool is_valid (std::string::const_iterator p, const std::string::const_iterator last)
270{
271 auto len = last - p;
272 auto prev_mode = error_mode (action::replace);
273 bool valid = (next (p, last) != REPLACEMENT_CHARACTER);
274 error_mode (prev_mode);
275 return valid;
276}
277
279inline
280char32_t next (std::string::iterator& ptr, const std::string::const_iterator last)
281{
282 return next (*(std::string::const_iterator*)(&ptr), last);
283}
284
286inline
287char32_t prev (std::string::iterator& ptr, const std::string::const_iterator first)
288{
289 return prev (*(std::string::const_iterator*)(&ptr), first);
290}
291
299inline
300char32_t rune (const char* p)
301{
302 return next (p);
303}
304
305
316inline
317char32_t next (char*& ptr)
318{
319 return next (const_cast<const char*&>(ptr));
320}
321
331inline
332char32_t prev (char*& ptr)
333{
334 return prev (const_cast<const char*&>(ptr));
335}
336
337
343inline
344bool valid_str (const std::string& s)
345{
346 return valid_str (s.c_str (), s.size());
347}
348
350inline
351char32_t rune (const std::string::const_iterator& p)
352{
353 return rune (&(*p));
354}
355
356
365inline
366bool isspace (const char* p)
367{
368 return isspace (rune (p));
369}
370
372inline
373bool isspace (std::string::const_iterator p)
374{
375 return isspace (rune(p));
376}
377
378
391inline
392bool isblank (const char *p)
393{
394 return isblank(rune(p));
395}
396
398inline
399bool isblank (std::string::const_iterator p)
400{
401 return isblank (rune (p));
402}
403
409inline
410bool isdigit (char32_t r)
411{
412 return '0' <= r && r <= '9';
413}
414
420inline
421bool isdigit (const char *p)
422{
423 return isdigit (rune (p));
424}
425
427inline
428bool isdigit (std::string::const_iterator p)
429{
430 return isdigit (rune (p));
431}
432
438inline
439bool isalnum (char32_t r)
440{
441 return ('0' <= r && r <= '9') || ('A' <= r && r <= 'Z') || ('a' <= r && r <= 'z');
442}
443
449inline
450bool isalnum (const char *p)
451{
452 return isalnum (rune (p));
453}
454
456inline
457bool isalnum (std::string::const_iterator p)
458{
459 return isalnum (rune (p));
460}
461
467inline
468bool isalpha (char32_t r)
469{
470 return ('A' <= r && r <= 'Z') || ('a' <= r && r <= 'z');
471}
472
478inline
479bool isalpha (const char *p)
480{
481 return isalpha (rune (p));
482}
483
485inline
486bool isalpha (std::string::const_iterator p)
487{
488 return isalpha (&*p);
489}
490
491
497inline
498bool isxdigit (char32_t r)
499{
500 return ('0' <= r && r <= '9') || ('A' <= r && r <= 'F') || ('a' <= r && r <= 'f');
501}
502
508inline
509bool isxdigit (const char *p)
510{
511 return isxdigit(rune(p));
512}
513
515inline
516bool isxdigit (std::string::const_iterator p)
517{
518 return isxdigit (rune(p));
519}
520
522inline
523bool isupper (std::string::const_iterator p)
524{
525 return isupper (rune(p));
526}
527
529inline
530bool islower (std::string::const_iterator p)
531{
532 return islower (rune(p));
533}
534
535// File System functions -----------------------------------------------------
536
544inline
545FILE* fopen (const std::string& filename, const std::string& mode)
546{
547 FILE* h = nullptr;
548#ifdef _WIN32
549 _wfopen_s (&h, widen (filename).c_str (), widen (mode).c_str ());
550#else
551 h = ::fopen (filename.c_str(), mode.c_str());
552#endif
553 return h;
554}
555
557inline
558FILE* fopen (const char* filename, const char* mode)
559{
560 FILE* h = nullptr;
561#ifdef _WIN32
562 _wfopen_s (&h, widen (filename).c_str (), widen (mode).c_str ());
563#else
564 h = ::fopen (filename, mode);
565#endif
566 return h;
567}
568
573inline
574std::string getcwd ()
575{
576#if UTF8_USE_WINDOWS_API
577 wchar_t tmp[_MAX_PATH];
578 if (_wgetcwd (tmp, _countof (tmp)))
579 return narrow (tmp);
580 else
581 return std::string ();
582#else
583 std::error_code ec;
584 std::filesystem::path wd = std::filesystem::current_path (ec);
585 if (ec)
586 return std::string ();
587# ifdef _WIN32
588 return narrow (wd.native());
589# else
590 return wd;
591# endif
592#endif
593}
594
601inline
602bool chdir (const std::string& dirname)
603{
604#if UTF8_USE_WINDOWS_API
605 return (_wchdir (widen (dirname).c_str ()) == 0);
606#else
607# ifdef _WIN32
608 std::filesystem::path dir (widen (dirname));
609# else
610 std::filesystem::path dir (dirname);
611# endif
612 std::error_code ec;
613 std::filesystem::current_path (dir, ec);
614 return !ec;
615#endif
616}
617
619inline
620bool chdir (const char* dirname)
621{
622#if UTF8_USE_WINDOWS_API
623 return (_wchdir (widen (dirname).c_str ()) == 0);
624#else
625# ifdef _WIN32
626 std::filesystem::path dir (widen (dirname));
627# else
628 std::filesystem::path dir (dirname);
629# endif
630 std::error_code ec;
631 std::filesystem::current_path (dir, ec);
632 return !ec;
633#endif
634}
635
636
643inline
644bool mkdir (const std::string& dirname)
645{
646#if UTF8_USE_WINDOWS_API
647 return (_wmkdir (widen (dirname).c_str ()) == 0);
648#else
649# ifdef _WIN32
650 std::filesystem::path dir (widen (dirname));
651# else
652 std::filesystem::path dir (dirname);
653# endif
654 std::error_code ec;
655 std::filesystem::create_directory (dir, ec);
656 return !ec;
657#endif
658}
659
660
662inline
663bool mkdir (const char* dirname)
664{
665#if UTF8_USE_WINDOWS_API
666 return (_wmkdir (widen (dirname).c_str ()) == 0);
667#else
668# ifdef _WIN32
669 std::filesystem::path dir (widen (dirname));
670# else
671 std::filesystem::path dir (dirname);
672# endif
673 std::error_code ec;
674 std::filesystem::create_directory (dir, ec);
675 return !ec;
676#endif
677}
678
685inline
686bool rmdir (const std::string& dirname)
687{
688#if UTF8_USE_WINDOWS_API
689 return (_wrmdir (widen (dirname).c_str ()) == 0);
690#else
691# ifdef _WIN32
692 std::filesystem::path dir (widen (dirname));
693# else
694 std::filesystem::path dir (dirname);
695# endif
696 std::error_code ec;
697 std::filesystem::remove (dir, ec);
698 return !ec;
699#endif
700}
701
703inline
704bool rmdir (const char* dirname)
705{
706#if UTF8_USE_WINDOWS_API
707 return (_wrmdir (widen (dirname).c_str ()) == 0);
708#else
709# ifdef _WIN32
710 std::filesystem::path dir (widen (dirname));
711# else
712 std::filesystem::path dir (dirname);
713# endif
714 std::error_code ec;
715 std::filesystem::remove (dir, ec);
716 return !ec;
717#endif
718}
719
727inline
728bool rename (const std::string& oldname, const std::string& newname)
729{
730#if UTF8_USE_WINDOWS_API
731 return (_wrename (widen (oldname).c_str (), widen (newname).c_str ()) == 0);
732#else
733# ifdef _WIN32
734 std::filesystem::path fn (widen (newname));
735 std::filesystem::path fo (widen (oldname));
736# else
737 std::filesystem::path fn (newname);
738 std::filesystem::path fo (oldname);
739# endif
740 std::error_code ec;
741 std::filesystem::rename (fo, fn, ec);
742 return !ec;
743#endif
744}
745
747inline
748bool rename (const char* oldname, const char* newname)
749{
750#if UTF8_USE_WINDOWS_API
751 return (_wrename (widen (oldname).c_str (), widen (newname).c_str ()) == 0);
752#else
753# ifdef _WIN32
754 std::filesystem::path fn (widen (newname));
755 std::filesystem::path fo (widen (oldname));
756# else
757 std::filesystem::path fn (newname);
758 std::filesystem::path fo (oldname);
759# endif
760 std::error_code ec;
761 std::filesystem::rename (fo, fn, ec);
762 return !ec;
763#endif
764}
765
772inline
773bool remove (const std::string& filename)
774{
775#if UTF8_USE_WINDOWS_API
776 return (_wremove (widen (filename).c_str ()) == 0);
777#else
778# ifdef _WIN32
779 std::filesystem::path f (widen(filename));
780# else
781 std::filesystem::path f (filename);
782# endif
783 std::error_code ec;
784 std::filesystem::remove (f, ec);
785 return !ec;
786#endif
787}
788
789
791inline
792bool remove (const char* filename)
793{
794#if UTF8_USE_WINDOWS_API
795 return (_wremove (widen (filename).c_str ()) == 0);
796#else
797# ifdef _WIN32
798 std::filesystem::path f (widen (filename));
799# else
800 std::filesystem::path f (filename);
801# endif
802 std::error_code ec;
803 std::filesystem::remove (f, ec);
804 return !ec;
805#endif
806}
807
809inline
810std::ostream& operator<<(std::ostream& os, const exception& x)
811{
812 os << x.what ();
813 return os;
814}
815
817inline
818bool operator ==(const exception& lhs, const exception rhs)
819{
820 return (lhs.code == rhs.code);
821}
822
824inline
825bool operator !=(const exception& lhs, const exception& rhs)
826{
827 return !operator ==(lhs, rhs);
828}
829
830}; //namespace utf8
831
832#if defined(_WIN32) && !defined(UTF8_KEEP_WIN32_API)
833#include <utf8/winutf8.h>
834#endif
835#include <utf8/ini.h>
836
837#ifdef _MSC_VER
838#pragma comment (lib, "utf8")
839#endif
std::u32string runes(const char *s, size_t nch)
Conversion from UTF-8 to UTF-32.
Definition utf8.cpp:308
std::string narrow(const wchar_t *s, size_t nch)
Conversion from wide character to UTF-8.
Definition utf8.cpp:52
char32_t rune(const char *p)
Conversion from UTF-8 to UTF-32.
Definition utf8.h:300
std::wstring widen(const char *s, size_t nch)
Conversion from UTF-8 to wide character.
Definition utf8.cpp:207
bool islower(char32_t r)
Definition casecvt.cpp:48
bool isspace(char32_t r)
Check if character is white space.
Definition utf8.cpp:709
bool isalnum(char32_t r)
Check if character is an alphanumeric character (0-9 or A-Z or a-z)
Definition utf8.h:439
bool isalpha(char32_t r)
Check if character is an alphabetic character (A-Z or a-z)
Definition utf8.h:468
bool isxdigit(char32_t r)
Check if character is a hexadecimal digit (0-9 or A-F or a-f)
Definition utf8.h:498
bool isblank(char32_t r)
Check if character is space or tab.
Definition utf8.cpp:692
bool isupper(char32_t r)
Definition casecvt.cpp:106
bool isdigit(char32_t r)
Check if character is a decimal digit (0-9)
Definition utf8.h:410
void make_lower(std::string &str)
In place version converts a UTF-8 encoded string to lowercase.
Definition casecvt.cpp:99
std::string tolower(const std::string &str)
Convert UTF-8 string to lower case.
Definition casecvt.cpp:76
std::string toupper(const std::string &str)
Convert a UTF-8 string to upper case.
Definition casecvt.cpp:133
void make_upper(std::string &str)
In place version converts a UTF-8 encoded string to lowercase.
Definition casecvt.cpp:156
Exception thrown on encoding/decoding failure.
Definition utf8.h:50
const char * what() const noexcept
Exception message.
Definition utf8.h:60
exception(cause c)
Constructor.
Definition utf8.h:55
cause
Possible causes.
Definition utf8.h:52
cause code
Condition that triggered the exception.
Definition utf8.h:68
bool valid_str(const char *s, size_t nch)
Verifies if string is a valid UTF-8 string.
Definition utf8.cpp:354
action error_mode(action mode)
Set error handling mode for this thread.
Definition utf8.cpp:22
char32_t next(std::string::const_iterator &ptr, const std::string::const_iterator last)
Decodes a UTF-8 encoded character and advances iterator to next code point.
Definition utf8.cpp:380
char32_t prev(const char *&ptr)
Decrements a character pointer to previous UTF-8 character.
Definition utf8.cpp:540
size_t length(const std::string &s)
Counts number of characters in an UTF8 encoded string.
Definition utf8.cpp:630
bool is_valid(const char *p)
Check if pointer points to a valid UTF-8 encoding.
Definition utf8.h:253
std::ifstream ifstream
Input stream class using UTF-8 filename.
Definition utf8.h:238
const char32_t REPLACEMENT_CHARACTER
Replacement character used for invalid encodings.
Definition utf8.h:81
std::ostream & operator<<(std::ostream &os, const exception &x)
Extraction operator for exception objects.
Definition utf8.h:810
action
Error handling methods.
Definition utf8.h:72
@ replace
Use replacement character for invalid encodings.
Definition utf8.h:73
@ except
Throw an exception on invalid encodings.
Definition utf8.h:74
bool chdir(const std::string &dirname)
Changes the current working directory.
Definition utf8.h:602
bool rename(const std::string &oldname, const std::string &newname)
Rename a file or directory.
Definition utf8.h:728
bool remove(const std::string &filename)
Delete a file.
Definition utf8.h:773
std::string getcwd()
Gets the current working directory.
Definition utf8.h:574
FILE * fopen(const std::string &filename, const std::string &mode)
Open a file.
Definition utf8.h:545
bool mkdir(const std::string &dirname)
Creates a new directory.
Definition utf8.h:644
bool rmdir(const std::string &dirname)
Deletes a directory.
Definition utf8.h:686
Windows specific parts.