UTF8 - Simple Library for Internationalization
Loading...
Searching...
No Matches
utf8.h
Go to the documentation of this file.
1/*
2 Copyright (c) Mircea Neacsu (2014-2024) Licensed under MIT License.
3 This is part of UTF8 project. See LICENSE file for full license terms.
4*/
5
7#pragma once
8
9#include <string>
10#include <fstream>
11
12// ------------- Global configuration options ---------------------------------
13
19// #define UTF8_USE_WINDOWS_API 0
20
25// #define UTF8_KEEP_WIN32_API
26
27// --------------- end of configuration options -------------------------------
28
29#if defined (_WIN32) && !defined (UTF8_USE_WINDOWS_API)
30#define UTF8_USE_WINDOWS_API 1
31#elif !defined (UTF8_USE_WINDOWS_API)
32#define UTF8_USE_WINDOWS_API 0
33#endif
34
35#if !UTF8_USE_WINDOWS_API
36#include <filesystem>
37
38#if (defined(_MSVC_LANG) && _MSVC_LANG < 201703L) \
39 || (!defined(_MSVC_LANG) && (__cplusplus < 201703L))
40#error "UTF8 requires c++17 or newer if not using Windows API functions"
41#endif
42
43#endif
44
45namespace utf8 {
46
48struct exception : public std::exception
49{
51 enum cause { invalid_utf8=1, invalid_wchar, invalid_char32 };
52
54 explicit exception (cause c)
55 : code (c)
56 {}
57
59 const char* what() const noexcept
60 {
61 return (code == cause::invalid_utf8) ? "Invalid UTF-8 encoding"
62 : (code == cause::invalid_wchar) ? "Invalid UTF-16 encoding"
63 : (code == cause::invalid_char32) ? "Invalid code-point value"
64 : "Other UTF-8 exception";
65 }
68};
69
75
77action error_mode (action mode);
78
80const char32_t REPLACEMENT_CHARACTER = 0xfffd;
81
82
85std::string narrow (const wchar_t* s, size_t nch=0);
86std::string narrow (const std::wstring& s);
87std::string narrow (const char32_t* s, size_t nch = 0);
88std::string narrow (const std::u32string& s);
89std::string narrow (char32_t r);
90
91std::wstring widen (const char* s, size_t nch = 0);
92std::wstring widen (const std::string& s);
93std::wstring widen (char32_t r);
94
95std::u32string runes (const char* s, size_t nch = 0);
96std::u32string runes (const std::string& s);
97
98char32_t rune (const char* p);
99char32_t rune (const std::string::const_iterator& p);
101
102bool is_valid (const char* p);
103bool is_valid (std::string::const_iterator p, const std::string::const_iterator last);
104bool valid_str (const char* s, size_t nch = 0);
105bool valid_str (const std::string& s);
106
107char32_t next (std::string::const_iterator& ptr, const std::string::const_iterator last);
108char32_t next (std::string::iterator& ptr, const std::string::const_iterator last);
109char32_t next (const char*& ptr);
110char32_t next (char*& p);
111
112char32_t prev (const char*& ptr);
113char32_t prev (char*& ptr);
114char32_t prev (std::string::const_iterator& ptr, const std::string::const_iterator first);
115char32_t prev (std::string::iterator& ptr, const std::string::const_iterator first);
116
117size_t length (const std::string& s);
118size_t length (const char* s);
119
124void make_lower (std::string& str);
125void make_upper (std::string& str);
126std::string tolower (const std::string& str);
127std::string toupper (const std::string& str);
128int icompare (const std::string& s1, const std::string& s2);
130
136bool isspace (char32_t r);
137bool isspace (const char* p);
138bool isspace (std::string::const_iterator p);
139
140bool isblank (char32_t r);
141bool isblank (const char* p);
142bool isblank (std::string::const_iterator p);
143
144bool isdigit (char32_t r);
145bool isdigit (const char* p);
146bool isdigit (std::string::const_iterator p);
147
148bool isalnum (char32_t r);
149bool isalnum (const char* p);
150bool isalnum (std::string::const_iterator p);
151
152bool isalpha (char32_t r);
153bool isalpha (const char* p);
154bool isalpha (std::string::const_iterator p);
155
156bool isxdigit (char32_t r);
157bool isxdigit (const char* p);
158bool isxdigit (std::string::const_iterator p);
159
160bool isupper (char32_t r);
161bool isupper (const char* p);
162bool isupper (std::string::const_iterator p);
163
164bool islower (char32_t r);
165bool islower (const char* p);
166bool islower (std::string::const_iterator p);
168
170#ifdef _WIN32
171class ifstream : public std::ifstream
172{
173public:
174 ifstream () : std::ifstream () {};
175 explicit ifstream (const char* filename, std::ios_base::openmode mode = ios_base::in)
176 : std::ifstream (utf8::widen (filename), mode) {};
177 explicit ifstream (const std::string& filename, std::ios_base::openmode mode = ios_base::in)
178 : std::ifstream (utf8::widen (filename), mode) {};
179 ifstream (ifstream&& other) noexcept : std::ifstream ((std::ifstream&&)other) {};
180 ifstream (const ifstream& rhs) = delete;
181
182 void open (const char* filename, std::ios_base::openmode mode = ios_base::in)
183 {
184 std::ifstream::open (utf8::widen (filename), mode);
185 }
186 void open (const std::string& filename, ios_base::openmode mode = ios_base::in)
187 {
188 std::ifstream::open (utf8::widen (filename), mode);
189 }
190};
192class ofstream : public std::ofstream
193{
194public:
195 ofstream () : std::ofstream () {};
196 explicit ofstream (const char* filename, std::ios_base::openmode mode = ios_base::out)
197 : std::ofstream (utf8::widen (filename), mode) {};
198 explicit ofstream (const std::string& filename, std::ios_base::openmode mode = ios_base::out)
199 : std::ofstream (utf8::widen (filename), mode) {};
200 ofstream (ofstream&& other) noexcept : std::ofstream ((std::ofstream&&)other) {};
201 ofstream (const ofstream& rhs) = delete;
202
203 void open (const char* filename, ios_base::openmode mode = ios_base::out)
204 {
205 std::ofstream::open (utf8::widen (filename), mode);
206 }
207 void open (const std::string& filename, ios_base::openmode mode = ios_base::out)
208 {
209 std::ofstream::open (utf8::widen (filename), mode);
210 }
211};
212
214class fstream : public std::fstream
215{
216public:
217 fstream () : std::fstream () {};
218 explicit fstream (const char* filename, std::ios_base::openmode mode = ios_base::in | ios_base::out)
219 : std::fstream (utf8::widen (filename), mode) {};
220 explicit fstream (const std::string& filename, std::ios_base::openmode mode = ios_base::in | ios_base::out)
221 : std::fstream (utf8::widen (filename), mode) {};
222 fstream (fstream&& other) noexcept : std::fstream ((std::fstream&&)other) {};
223 fstream (const fstream& rhs) = delete;
224
225 void open (const char* filename, ios_base::openmode mode = ios_base::in | ios_base::out)
226 {
227 std::fstream::open (utf8::widen (filename), mode);
228 }
229 void open (const std::string& filename, ios_base::openmode mode = ios_base::in | ios_base::out)
230 {
231 std::fstream::open (utf8::widen (filename), mode);
232 }
233};
234
235#else
236//Under Linux file streams already use UTF-8 filenames
237typedef std::ifstream ifstream;
238typedef std::ofstream ofstream;
239typedef std::fstream fstream;
240#endif
241
242
243// INLINES --------------------------------------------------------------------
244
251inline
252bool is_valid (const char* p)
253{
254 auto prev_mode = error_mode (action::replace);
255 bool valid = (next (p) != REPLACEMENT_CHARACTER);
256 error_mode (prev_mode);
257 return valid;
258}
259
267inline
268bool is_valid (std::string::const_iterator p, const std::string::const_iterator last)
269{
270 // auto len = last - p;
271 auto prev_mode = error_mode (action::replace);
272 bool valid = (next (p, last) != REPLACEMENT_CHARACTER);
273 error_mode (prev_mode);
274 return valid;
275}
276
278inline
279char32_t next (std::string::iterator& ptr, const std::string::const_iterator last)
280{
281 return next (*(std::string::const_iterator*)(&ptr), last);
282}
283
285inline
286char32_t prev (std::string::iterator& ptr, const std::string::const_iterator first)
287{
288 return prev (*(std::string::const_iterator*)(&ptr), first);
289}
290
298inline
299char32_t rune (const char* p)
300{
301 return next (p);
302}
303
304
315inline
316char32_t next (char*& ptr)
317{
318 return next (const_cast<const char*&>(ptr));
319}
320
330inline
331char32_t prev (char*& ptr)
332{
333 return prev (const_cast<const char*&>(ptr));
334}
335
336
342inline
343bool valid_str (const std::string& s)
344{
345 return valid_str (s.c_str (), s.size());
346}
347
349inline
350char32_t rune (const std::string::const_iterator& p)
351{
352 return rune (&(*p));
353}
354
355
364inline
365bool isspace (const char* p)
366{
367 return isspace (rune (p));
368}
369
371inline
372bool isspace (std::string::const_iterator p)
373{
374 return isspace (rune(p));
375}
376
377
390inline
391bool isblank (const char *p)
392{
393 return isblank(rune(p));
394}
395
397inline
398bool isblank (std::string::const_iterator p)
399{
400 return isblank (rune (p));
401}
402
408inline
409bool isdigit (char32_t r)
410{
411 return '0' <= r && r <= '9';
412}
413
419inline
420bool isdigit (const char *p)
421{
422 return isdigit (rune (p));
423}
424
426inline
427bool isdigit (std::string::const_iterator p)
428{
429 return isdigit (rune (p));
430}
431
437inline
438bool isalnum (char32_t r)
439{
440 return ('0' <= r && r <= '9') || ('A' <= r && r <= 'Z') || ('a' <= r && r <= 'z');
441}
442
448inline
449bool isalnum (const char *p)
450{
451 return isalnum (rune (p));
452}
453
455inline
456bool isalnum (std::string::const_iterator p)
457{
458 return isalnum (rune (p));
459}
460
466inline
467bool isalpha (char32_t r)
468{
469 return ('A' <= r && r <= 'Z') || ('a' <= r && r <= 'z');
470}
471
477inline
478bool isalpha (const char *p)
479{
480 return isalpha (rune (p));
481}
482
484inline
485bool isalpha (std::string::const_iterator p)
486{
487 return isalpha (&*p);
488}
489
490
496inline
497bool isxdigit (char32_t r)
498{
499 return ('0' <= r && r <= '9') || ('A' <= r && r <= 'F') || ('a' <= r && r <= 'f');
500}
501
507inline
508bool isxdigit (const char *p)
509{
510 return isxdigit(rune(p));
511}
512
514inline
515bool isxdigit (std::string::const_iterator p)
516{
517 return isxdigit (rune(p));
518}
519
521inline
522bool isupper (std::string::const_iterator p)
523{
524 return isupper (rune(p));
525}
526
528inline
529bool islower (std::string::const_iterator p)
530{
531 return islower (rune(p));
532}
533
534// File System functions -----------------------------------------------------
535
543inline
544FILE* fopen (const std::string& filename, const std::string& mode)
545{
546 FILE* h = nullptr;
547#ifdef _WIN32
548 _wfopen_s (&h, widen (filename).c_str (), widen (mode).c_str ());
549#else
550 h = ::fopen (filename.c_str(), mode.c_str());
551#endif
552 return h;
553}
554
556inline
557FILE* fopen (const char* filename, const char* mode)
558{
559 FILE* h = nullptr;
560#ifdef _WIN32
561 _wfopen_s (&h, widen (filename).c_str (), widen (mode).c_str ());
562#else
563 h = ::fopen (filename, mode);
564#endif
565 return h;
566}
567
572inline
573std::string getcwd ()
574{
575#if UTF8_USE_WINDOWS_API
576 wchar_t tmp[_MAX_PATH];
577 if (_wgetcwd (tmp, _countof (tmp)))
578 return narrow (tmp);
579 else
580 return std::string ();
581#else
582 std::error_code ec;
583 std::filesystem::path wd = std::filesystem::current_path (ec);
584 if (ec)
585 return std::string ();
586# ifdef _WIN32
587 return narrow (wd.native());
588# else
589 return wd;
590# endif
591#endif
592}
593
600inline
601bool chdir (const std::string& dirname)
602{
603#if UTF8_USE_WINDOWS_API
604 return (_wchdir (widen (dirname).c_str ()) == 0);
605#else
606# ifdef _WIN32
607 std::filesystem::path dir (widen (dirname));
608# else
609 std::filesystem::path dir (dirname);
610# endif
611 std::error_code ec;
612 std::filesystem::current_path (dir, ec);
613 return !ec;
614#endif
615}
616
618inline
619bool chdir (const char* dirname)
620{
621#if UTF8_USE_WINDOWS_API
622 return (_wchdir (widen (dirname).c_str ()) == 0);
623#else
624# ifdef _WIN32
625 std::filesystem::path dir (widen (dirname));
626# else
627 std::filesystem::path dir (dirname);
628# endif
629 std::error_code ec;
630 std::filesystem::current_path (dir, ec);
631 return !ec;
632#endif
633}
634
635
642inline
643bool mkdir (const std::string& dirname)
644{
645#if UTF8_USE_WINDOWS_API
646 return (_wmkdir (widen (dirname).c_str ()) == 0);
647#else
648# ifdef _WIN32
649 std::filesystem::path dir (widen (dirname));
650# else
651 std::filesystem::path dir (dirname);
652# endif
653 std::error_code ec;
654 std::filesystem::create_directory (dir, ec);
655 return !ec;
656#endif
657}
658
659
661inline
662bool mkdir (const char* dirname)
663{
664#if UTF8_USE_WINDOWS_API
665 return (_wmkdir (widen (dirname).c_str ()) == 0);
666#else
667# ifdef _WIN32
668 std::filesystem::path dir (widen (dirname));
669# else
670 std::filesystem::path dir (dirname);
671# endif
672 std::error_code ec;
673 std::filesystem::create_directory (dir, ec);
674 return !ec;
675#endif
676}
677
684inline
685bool rmdir (const std::string& dirname)
686{
687#if UTF8_USE_WINDOWS_API
688 return (_wrmdir (widen (dirname).c_str ()) == 0);
689#else
690# ifdef _WIN32
691 std::filesystem::path dir (widen (dirname));
692# else
693 std::filesystem::path dir (dirname);
694# endif
695 std::error_code ec;
696 std::filesystem::remove (dir, ec);
697 return !ec;
698#endif
699}
700
702inline
703bool rmdir (const char* dirname)
704{
705#if UTF8_USE_WINDOWS_API
706 return (_wrmdir (widen (dirname).c_str ()) == 0);
707#else
708# ifdef _WIN32
709 std::filesystem::path dir (widen (dirname));
710# else
711 std::filesystem::path dir (dirname);
712# endif
713 std::error_code ec;
714 std::filesystem::remove (dir, ec);
715 return !ec;
716#endif
717}
718
726inline
727bool rename (const std::string& oldname, const std::string& newname)
728{
729#if UTF8_USE_WINDOWS_API
730 return (_wrename (widen (oldname).c_str (), widen (newname).c_str ()) == 0);
731#else
732# ifdef _WIN32
733 std::filesystem::path fn (widen (newname));
734 std::filesystem::path fo (widen (oldname));
735# else
736 std::filesystem::path fn (newname);
737 std::filesystem::path fo (oldname);
738# endif
739 std::error_code ec;
740 std::filesystem::rename (fo, fn, ec);
741 return !ec;
742#endif
743}
744
746inline
747bool rename (const char* oldname, const char* newname)
748{
749#if UTF8_USE_WINDOWS_API
750 return (_wrename (widen (oldname).c_str (), widen (newname).c_str ()) == 0);
751#else
752# ifdef _WIN32
753 std::filesystem::path fn (widen (newname));
754 std::filesystem::path fo (widen (oldname));
755# else
756 std::filesystem::path fn (newname);
757 std::filesystem::path fo (oldname);
758# endif
759 std::error_code ec;
760 std::filesystem::rename (fo, fn, ec);
761 return !ec;
762#endif
763}
764
771inline
772bool remove (const std::string& filename)
773{
774#if UTF8_USE_WINDOWS_API
775 return (_wremove (widen (filename).c_str ()) == 0);
776#else
777# ifdef _WIN32
778 std::filesystem::path f (widen(filename));
779# else
780 std::filesystem::path f (filename);
781# endif
782 std::error_code ec;
783 std::filesystem::remove (f, ec);
784 return !ec;
785#endif
786}
787
788
790inline
791bool remove (const char* filename)
792{
793#if UTF8_USE_WINDOWS_API
794 return (_wremove (widen (filename).c_str ()) == 0);
795#else
796# ifdef _WIN32
797 std::filesystem::path f (widen (filename));
798# else
799 std::filesystem::path f (filename);
800# endif
801 std::error_code ec;
802 std::filesystem::remove (f, ec);
803 return !ec;
804#endif
805}
806
808inline
809std::ostream& operator<<(std::ostream& os, const exception& x)
810{
811 os << x.what ();
812 return os;
813}
814
816inline
817bool operator ==(const exception& lhs, const exception rhs)
818{
819 return (lhs.code == rhs.code);
820}
821
823inline
824bool operator !=(const exception& lhs, const exception& rhs)
825{
826 return !operator ==(lhs, rhs);
827}
828
829}; //namespace utf8
830
831#if defined(_WIN32) && !defined(UTF8_KEEP_WIN32_API)
832#include <utf8/winutf8.h>
833#endif
834#include <utf8/ini.h>
835
836#ifdef _MSC_VER
837#pragma comment (lib, "utf8")
838#endif
std::u32string runes(const char *s, size_t nch)
Conversion from UTF-8 to UTF-32.
Definition utf8.cpp:329
std::string narrow(const wchar_t *s, size_t nch)
Conversion from wide character to UTF-8.
Definition utf8.cpp:52
char32_t rune(const char *p)
Conversion from UTF-8 to UTF-32.
Definition utf8.h:299
std::wstring widen(const char *s, size_t nch)
Conversion from UTF-8 to wide character.
Definition utf8.cpp:207
bool islower(char32_t r)
Definition casecvt.cpp:48
bool isspace(char32_t r)
Check if character is white space.
Definition utf8.cpp:730
bool isalnum(char32_t r)
Check if character is an alphanumeric character (0-9 or A-Z or a-z)
Definition utf8.h:438
bool isalpha(char32_t r)
Check if character is an alphabetic character (A-Z or a-z)
Definition utf8.h:467
bool isxdigit(char32_t r)
Check if character is a hexadecimal digit (0-9 or A-F or a-f)
Definition utf8.h:497
bool isblank(char32_t r)
Check if character is space or tab.
Definition utf8.cpp:713
bool isupper(char32_t r)
Definition casecvt.cpp:106
bool isdigit(char32_t r)
Check if character is a decimal digit (0-9)
Definition utf8.h:409
void make_lower(std::string &str)
In place version converts a UTF-8 encoded string to lowercase.
Definition casecvt.cpp:99
std::string tolower(const std::string &str)
Convert UTF-8 string to lower case.
Definition casecvt.cpp:76
std::string toupper(const std::string &str)
Convert a UTF-8 string to upper case.
Definition casecvt.cpp:133
void make_upper(std::string &str)
In place version converts a UTF-8 encoded string to lowercase.
Definition casecvt.cpp:156
Exception thrown on encoding/decoding failure.
Definition utf8.h:49
const char * what() const noexcept
Exception message.
Definition utf8.h:59
exception(cause c)
Constructor.
Definition utf8.h:54
cause
Possible causes.
Definition utf8.h:51
cause code
Condition that triggered the exception.
Definition utf8.h:67
bool valid_str(const char *s, size_t nch)
Verifies if string is a valid UTF-8 string.
Definition utf8.cpp:375
action error_mode(action mode)
Set error handling mode for this thread.
Definition utf8.cpp:22
char32_t next(std::string::const_iterator &ptr, const std::string::const_iterator last)
Decodes a UTF-8 encoded character and advances iterator to next code point.
Definition utf8.cpp:401
char32_t prev(const char *&ptr)
Decrements a character pointer to previous UTF-8 character.
Definition utf8.cpp:561
size_t length(const std::string &s)
Counts number of characters in an UTF8 encoded string.
Definition utf8.cpp:651
bool is_valid(const char *p)
Check if pointer points to a valid UTF-8 encoding.
Definition utf8.h:252
std::ifstream ifstream
Input stream class using UTF-8 filename.
Definition utf8.h:237
const char32_t REPLACEMENT_CHARACTER
Replacement character used for invalid encodings.
Definition utf8.h:80
std::ostream & operator<<(std::ostream &os, const exception &x)
Extraction operator for exception objects.
Definition utf8.h:809
action
Error handling methods.
Definition utf8.h:71
@ replace
Use replacement character for invalid encodings.
Definition utf8.h:72
@ except
Throw an exception on invalid encodings.
Definition utf8.h:73
bool chdir(const std::string &dirname)
Changes the current working directory.
Definition utf8.h:601
bool rename(const std::string &oldname, const std::string &newname)
Rename a file or directory.
Definition utf8.h:727
bool remove(const std::string &filename)
Delete a file.
Definition utf8.h:772
std::string getcwd()
Gets the current working directory.
Definition utf8.h:573
FILE * fopen(const std::string &filename, const std::string &mode)
Open a file.
Definition utf8.h:544
bool mkdir(const std::string &dirname)
Creates a new directory.
Definition utf8.h:643
bool rmdir(const std::string &dirname)
Deletes a directory.
Definition utf8.h:685
Windows specific parts.