UTF8 - Simple Library for Internationalization
Loading...
Searching...
No Matches
utf8.h
Go to the documentation of this file.
1/*
2 Copyright (c) Mircea Neacsu (2014-2024) Licensed under MIT License.
3 This is part of UTF8 project. See LICENSE file for full license terms.
4*/
5
7#pragma once
8
9#include <string>
10#include <vector>
11#include <fstream>
12
19//#define USE_WINDOWS_API 0
20
21#if defined (_WIN32) && !defined (USE_WINDOWS_API)
22#define USE_WINDOWS_API 1
23#elif !defined (USE_WINDOWS_API)
24#define USE_WINDOWS_API 0
25#endif
26
27#if !USE_WINDOWS_API
28#include <filesystem>
29
30#if (defined(_MSVC_LANG) && _MSVC_LANG < 201703L) \
31 || (!defined(_MSVC_LANG) && (__cplusplus < 201703L))
32#error "UTF8 requires c++17 or newer if not using Windows API functions"
33#endif
34
35#endif
36
37namespace utf8 {
38
40struct exception : public std::exception
41{
43 enum cause { invalid_utf8=1, invalid_wchar, invalid_char32 };
44
46 explicit exception (cause c)
47 : code (c)
48 {}
49
51 const char* what() const noexcept
52 {
53 return (code == cause::invalid_utf8) ? "Invalid UTF-8 encoding"
54 : (code == cause::invalid_wchar) ? "Invalid UTF-16 encoding"
55 : (code == cause::invalid_char32) ? "Invalid code-point value"
56 : "Other UTF-8 exception";
57 }
60};
61
67
69action error_mode (action mode);
70
72const char32_t REPLACEMENT_CHARACTER = 0xfffd;
73
74
77std::string narrow (const wchar_t* s, size_t nch=0);
78std::string narrow (const std::wstring& s);
79std::string narrow (const char32_t* s, size_t nch = 0);
80std::string narrow (const std::u32string& s);
81std::string narrow (char32_t r);
82
83std::wstring widen (const char* s, size_t nch = 0);
84std::wstring widen (const std::string& s);
85std::u32string runes (const char* s, size_t nch = 0);
86std::u32string runes (const std::string& s);
87
88char32_t rune (const char* p);
89char32_t rune (const std::string::const_iterator& p);
91
92bool is_valid (const char* p);
93bool is_valid (std::string::const_iterator p, const std::string::const_iterator last);
94bool valid_str (const char* s, size_t nch = 0);
95bool valid_str (const std::string& s);
96
97char32_t next (std::string::const_iterator& ptr, const std::string::const_iterator last);
98char32_t next (std::string::iterator& ptr, const std::string::const_iterator last);
99char32_t next (const char*& ptr);
100char32_t next (char*& p);
101
102char32_t prev (const char*& ptr);
103char32_t prev (char*& ptr);
104char32_t prev (std::string::const_iterator& ptr, const std::string::const_iterator first);
105char32_t prev (std::string::iterator& ptr, const std::string::const_iterator first);
106
107size_t length (const std::string& s);
108size_t length (const char* s);
109
114void make_lower (std::string& str);
115void make_upper (std::string& str);
116std::string tolower (const std::string& str);
117std::string toupper (const std::string& str);
118int icompare (const std::string& s1, const std::string& s2);
120
126bool isspace (char32_t r);
127bool isspace (const char* p);
128bool isspace (std::string::const_iterator p);
129
130bool isblank (char32_t r);
131bool isblank (const char* p);
132bool isblank (std::string::const_iterator p);
133
134bool isdigit (char32_t r);
135bool isdigit (const char* p);
136bool isdigit (std::string::const_iterator p);
137
138bool isalnum (char32_t r);
139bool isalnum (const char* p);
140bool isalnum (std::string::const_iterator p);
141
142bool isalpha (char32_t r);
143bool isalpha (const char* p);
144bool isalpha (std::string::const_iterator p);
145
146bool isxdigit (char32_t r);
147bool isxdigit (const char* p);
148bool isxdigit (std::string::const_iterator p);
149
150bool isupper (char32_t r);
151bool isupper (const char* p);
152bool isupper (std::string::const_iterator p);
153
154bool islower (char32_t r);
155bool islower (const char* p);
156bool islower (std::string::const_iterator p);
158
160#ifdef _WIN32
161class ifstream : public std::ifstream
162{
163public:
164 ifstream () : std::ifstream () {};
165 explicit ifstream (const char* filename, std::ios_base::openmode mode = ios_base::in)
166 : std::ifstream (utf8::widen (filename), mode) {};
167 explicit ifstream (const std::string& filename, std::ios_base::openmode mode = ios_base::in)
168 : std::ifstream (utf8::widen (filename), mode) {};
169 ifstream (ifstream&& other) noexcept : std::ifstream ((std::ifstream&&)other) {};
170 ifstream (const ifstream& rhs) = delete;
171
172 void open (const char* filename, std::ios_base::openmode mode = ios_base::in)
173 {
174 std::ifstream::open (utf8::widen (filename), mode);
175 }
176 void open (const std::string& filename, ios_base::openmode mode = ios_base::in)
177 {
178 std::ifstream::open (utf8::widen (filename), mode);
179 }
180};
182class ofstream : public std::ofstream
183{
184public:
185 ofstream () : std::ofstream () {};
186 explicit ofstream (const char* filename, std::ios_base::openmode mode = ios_base::out)
187 : std::ofstream (utf8::widen (filename), mode) {};
188 explicit ofstream (const std::string& filename, std::ios_base::openmode mode = ios_base::out)
189 : std::ofstream (utf8::widen (filename), mode) {};
190 ofstream (ofstream&& other) noexcept : std::ofstream ((std::ofstream&&)other) {};
191 ofstream (const ofstream& rhs) = delete;
192
193 void open (const char* filename, ios_base::openmode mode = ios_base::out)
194 {
195 std::ofstream::open (utf8::widen (filename), mode);
196 }
197 void open (const std::string& filename, ios_base::openmode mode = ios_base::out)
198 {
199 std::ofstream::open (utf8::widen (filename), mode);
200 }
201};
202
204class fstream : public std::fstream
205{
206public:
207 fstream () : std::fstream () {};
208 explicit fstream (const char* filename, std::ios_base::openmode mode = ios_base::in | ios_base::out)
209 : std::fstream (utf8::widen (filename), mode) {};
210 explicit fstream (const std::string& filename, std::ios_base::openmode mode = ios_base::in | ios_base::out)
211 : std::fstream (utf8::widen (filename), mode) {};
212 fstream (fstream&& other) noexcept : std::fstream ((std::fstream&&)other) {};
213 fstream (const fstream& rhs) = delete;
214
215 void open (const char* filename, ios_base::openmode mode = ios_base::in | ios_base::out)
216 {
217 std::fstream::open (utf8::widen (filename), mode);
218 }
219 void open (const std::string& filename, ios_base::openmode mode = ios_base::in | ios_base::out)
220 {
221 std::fstream::open (utf8::widen (filename), mode);
222 }
223};
224
225#else
226//Under Linux file streams already use UTF-8 filenames
227typedef std::ifstream ifstream;
228typedef std::ofstream ofstream;
229typedef std::fstream fstream;
230#endif
231
232
233// INLINES --------------------------------------------------------------------
234
241inline
242bool is_valid (const char* p)
243{
244 auto prev_mode = error_mode (action::replace);
245 bool valid = (next (p) != REPLACEMENT_CHARACTER);
246 error_mode (prev_mode);
247 return valid;
248}
249
257inline
258bool is_valid (std::string::const_iterator p, const std::string::const_iterator last)
259{
260 auto len = last - p;
261 auto prev_mode = error_mode (action::replace);
262 bool valid = (next (p, last) != REPLACEMENT_CHARACTER);
263 error_mode (prev_mode);
264 return valid;
265}
266
268inline
269char32_t next (std::string::iterator& ptr, const std::string::const_iterator last)
270{
271 return next (*(std::string::const_iterator*)(&ptr), last);
272}
273
275inline
276char32_t prev (std::string::iterator& ptr, const std::string::const_iterator first)
277{
278 return prev (*(std::string::const_iterator*)(&ptr), first);
279}
280
288inline
289char32_t rune (const char* p)
290{
291 return next (p);
292}
293
294
305inline
306char32_t next (char*& ptr)
307{
308 return next (const_cast<const char*&>(ptr));
309}
310
320inline
321char32_t prev (char*& ptr)
322{
323 return prev (const_cast<const char*&>(ptr));
324}
325
326
332inline
333bool valid_str (const std::string& s)
334{
335 return valid_str (s.c_str (), s.size());
336}
337
339inline
340char32_t rune (const std::string::const_iterator& p)
341{
342 return rune (&(*p));
343}
344
345
354inline
355bool isspace (const char* p)
356{
357 return isspace (rune (p));
358}
359
361inline
362bool isspace (std::string::const_iterator p)
363{
364 return isspace (rune(p));
365}
366
367
380inline
381bool isblank (const char *p)
382{
383 return isblank(rune(p));
384}
385
387inline
388bool isblank (std::string::const_iterator p)
389{
390 return isblank (rune (p));
391}
392
398inline
399bool isdigit (char32_t r)
400{
401 return '0' <= r && r <= '9';
402}
403
409inline
410bool isdigit (const char *p)
411{
412 return isdigit (rune (p));
413}
414
416inline
417bool isdigit (std::string::const_iterator p)
418{
419 return isdigit (rune (p));
420}
421
427inline
428bool isalnum (char32_t r)
429{
430 return ('0' <= r && r <= '9') || ('A' <= r && r <= 'Z') || ('a' <= r && r <= 'z');
431}
432
438inline
439bool isalnum (const char *p)
440{
441 return isalnum (rune (p));
442}
443
445inline
446bool isalnum (std::string::const_iterator p)
447{
448 return isalnum (rune (p));
449}
450
456inline
457bool isalpha (char32_t r)
458{
459 return ('A' <= r && r <= 'Z') || ('a' <= r && r <= 'z');
460}
461
467inline
468bool isalpha (const char *p)
469{
470 return isalpha (rune (p));
471}
472
474inline
475bool isalpha (std::string::const_iterator p)
476{
477 return isalpha (&*p);
478}
479
480
486inline
487bool isxdigit (char32_t r)
488{
489 return ('0' <= r && r <= '9') || ('A' <= r && r <= 'F') || ('a' <= r && r <= 'f');
490}
491
497inline
498bool isxdigit (const char *p)
499{
500 return isxdigit(rune(p));
501}
502
504inline
505bool isxdigit (std::string::const_iterator p)
506{
507 return isxdigit (rune(p));
508}
509
511inline
512bool isupper (std::string::const_iterator p)
513{
514 return isupper (rune(p));
515}
516
518inline
519bool islower (std::string::const_iterator p)
520{
521 return islower (rune(p));
522}
523
524// File System functions -----------------------------------------------------
525
533inline
534FILE* fopen (const std::string& filename, const std::string& mode)
535{
536 FILE* h = nullptr;
537#ifdef _WIN32
538 _wfopen_s (&h, widen (filename).c_str (), widen (mode).c_str ());
539#else
540 h = ::fopen (filename.c_str(), mode.c_str());
541#endif
542 return h;
543}
544
546inline
547FILE* fopen (const char* filename, const char* mode)
548{
549 FILE* h = nullptr;
550#ifdef _WIN32
551 _wfopen_s (&h, widen (filename).c_str (), widen (mode).c_str ());
552#else
553 h = ::fopen (filename, mode);
554#endif
555 return h;
556}
557
562inline
563std::string getcwd ()
564{
565#if USE_WINDOWS_API
566 wchar_t tmp[_MAX_PATH];
567 if (_wgetcwd (tmp, _countof (tmp)))
568 return narrow (tmp);
569 else
570 return std::string ();
571#else
572 std::error_code ec;
573 std::filesystem::path wd = std::filesystem::current_path (ec);
574 if (ec)
575 return std::string ();
576# ifdef _WIN32
577 return narrow (wd.native());
578# else
579 return wd;
580# endif
581#endif
582}
583
590inline
591bool chdir (const std::string& dirname)
592{
593#if USE_WINDOWS_API
594 return (_wchdir (widen (dirname).c_str ()) == 0);
595#else
596# ifdef _WIN32
597 std::filesystem::path dir (widen (dirname));
598# else
599 std::filesystem::path dir (dirname);
600# endif
601 std::error_code ec;
602 std::filesystem::current_path (dir, ec);
603 return !ec;
604#endif
605}
606
608inline
609bool chdir (const char* dirname)
610{
611#if USE_WINDOWS_API
612 return (_wchdir (widen (dirname).c_str ()) == 0);
613#else
614# ifdef _WIN32
615 std::filesystem::path dir (widen (dirname));
616# else
617 std::filesystem::path dir (dirname);
618# endif
619 std::error_code ec;
620 std::filesystem::current_path (dir, ec);
621 return !ec;
622#endif
623}
624
625
632inline
633bool mkdir (const std::string& dirname)
634{
635#if USE_WINDOWS_API
636 return (_wmkdir (widen (dirname).c_str ()) == 0);
637#else
638# ifdef _WIN32
639 std::filesystem::path dir (widen (dirname));
640# else
641 std::filesystem::path dir (dirname);
642# endif
643 std::error_code ec;
644 std::filesystem::create_directory (dir, ec);
645 return !ec;
646#endif
647}
648
649
651inline
652bool mkdir (const char* dirname)
653{
654#if USE_WINDOWS_API
655 return (_wmkdir (widen (dirname).c_str ()) == 0);
656#else
657# ifdef _WIN32
658 std::filesystem::path dir (widen (dirname));
659# else
660 std::filesystem::path dir (dirname);
661# endif
662 std::error_code ec;
663 std::filesystem::create_directory (dir, ec);
664 return !ec;
665#endif
666}
667
674inline
675bool rmdir (const std::string& dirname)
676{
677#if USE_WINDOWS_API
678 return (_wrmdir (widen (dirname).c_str ()) == 0);
679#else
680# ifdef _WIN32
681 std::filesystem::path dir (widen (dirname));
682# else
683 std::filesystem::path dir (dirname);
684# endif
685 std::error_code ec;
686 std::filesystem::remove (dir, ec);
687 return !ec;
688#endif
689}
690
692inline
693bool rmdir (const char* dirname)
694{
695#if USE_WINDOWS_API
696 return (_wrmdir (widen (dirname).c_str ()) == 0);
697#else
698# ifdef _WIN32
699 std::filesystem::path dir (widen (dirname));
700# else
701 std::filesystem::path dir (dirname);
702# endif
703 std::error_code ec;
704 std::filesystem::remove (dir, ec);
705 return !ec;
706#endif
707}
708
716inline
717bool rename (const std::string& oldname, const std::string& newname)
718{
719#if USE_WINDOWS_API
720 return (_wrename (widen (oldname).c_str (), widen (newname).c_str ()) == 0);
721#else
722# ifdef _WIN32
723 std::filesystem::path fn (widen (newname));
724 std::filesystem::path fo (widen (oldname));
725# else
726 std::filesystem::path fn (newname);
727 std::filesystem::path fo (oldname);
728# endif
729 std::error_code ec;
730 std::filesystem::rename (fo, fn, ec);
731 return !ec;
732#endif
733}
734
736inline
737bool rename (const char* oldname, const char* newname)
738{
739#if USE_WINDOWS_API
740 return (_wrename (widen (oldname).c_str (), widen (newname).c_str ()) == 0);
741#else
742# ifdef _WIN32
743 std::filesystem::path fn (widen (newname));
744 std::filesystem::path fo (widen (oldname));
745# else
746 std::filesystem::path fn (newname);
747 std::filesystem::path fo (oldname);
748# endif
749 std::error_code ec;
750 std::filesystem::rename (fo, fn, ec);
751 return !ec;
752#endif
753}
754
761inline
762bool remove (const std::string& filename)
763{
764#if USE_WINDOWS_API
765 return (_wremove (widen (filename).c_str ()) == 0);
766#else
767# ifdef _WIN32
768 std::filesystem::path f (widen(filename));
769# else
770 std::filesystem::path f (filename);
771# endif
772 std::error_code ec;
773 std::filesystem::remove (f, ec);
774 return !ec;
775#endif
776}
777
778
780inline
781bool remove (const char* filename)
782{
783#if USE_WINDOWS_API
784 return (_wremove (widen (filename).c_str ()) == 0);
785#else
786# ifdef _WIN32
787 std::filesystem::path f (widen (filename));
788# else
789 std::filesystem::path f (filename);
790# endif
791 std::error_code ec;
792 std::filesystem::remove (f, ec);
793 return !ec;
794#endif
795}
796
798inline
799std::ostream& operator<<(std::ostream& os, const exception& x)
800{
801 os << x.what ();
802 return os;
803}
804
806inline
807bool operator ==(const exception& lhs, const exception rhs)
808{
809 return (lhs.code == rhs.code);
810}
811
813inline
814bool operator !=(const exception& lhs, const exception& rhs)
815{
816 return !operator ==(lhs, rhs);
817}
818
819}; //namespace utf8
820
821#ifdef _WIN32
822#include <utf8/winutf8.h>
823#endif
824#include <utf8/ini.h>
825
826#ifdef _MSC_VER
827#pragma comment (lib, "utf8")
828#endif
std::u32string runes(const char *s, size_t nch)
Conversion from UTF-8 to UTF-32.
Definition utf8.cpp:287
std::string narrow(const wchar_t *s, size_t nch)
Conversion from wide character to UTF-8.
Definition utf8.cpp:52
char32_t rune(const char *p)
Conversion from UTF-8 to UTF-32.
Definition utf8.h:289
std::wstring widen(const char *s, size_t nch)
Conversion from UTF-8 to wide character.
Definition utf8.cpp:207
bool islower(char32_t r)
Definition casecvt.cpp:48
bool isspace(char32_t r)
Check if character is white space.
Definition utf8.cpp:688
bool isalnum(char32_t r)
Check if character is an alphanumeric character (0-9 or A-Z or a-z)
Definition utf8.h:428
bool isalpha(char32_t r)
Check if character is an alphabetic character (A-Z or a-z)
Definition utf8.h:457
bool isxdigit(char32_t r)
Check if character is a hexadecimal digit (0-9 or A-F or a-f)
Definition utf8.h:487
bool isblank(char32_t r)
Check if character is space or tab.
Definition utf8.cpp:671
bool isupper(char32_t r)
Definition casecvt.cpp:106
bool isdigit(char32_t r)
Check if character is a decimal digit (0-9)
Definition utf8.h:399
void make_lower(std::string &str)
In place version converts a UTF-8 encoded string to lowercase.
Definition casecvt.cpp:99
std::string tolower(const std::string &str)
Convert UTF-8 string to lower case.
Definition casecvt.cpp:76
std::string toupper(const std::string &str)
Convert a UTF-8 string to upper case.
Definition casecvt.cpp:133
void make_upper(std::string &str)
In place version converts a UTF-8 encoded string to lowercase.
Definition casecvt.cpp:156
Exception thrown on encoding/decoding failure.
Definition utf8.h:41
const char * what() const noexcept
Exception message.
Definition utf8.h:51
exception(cause c)
Constructor.
Definition utf8.h:46
cause
Possible causes.
Definition utf8.h:43
cause code
Condition that triggered the exception.
Definition utf8.h:59
bool valid_str(const char *s, size_t nch)
Verifies if string is a valid UTF-8 string.
Definition utf8.cpp:333
action error_mode(action mode)
Set error handling mode for this thread.
Definition utf8.cpp:22
char32_t next(std::string::const_iterator &ptr, const std::string::const_iterator last)
Decodes a UTF-8 encoded character and advances iterator to next code point.
Definition utf8.cpp:359
char32_t prev(const char *&ptr)
Decrements a character pointer to previous UTF-8 character.
Definition utf8.cpp:519
size_t length(const std::string &s)
Counts number of characters in an UTF8 encoded string.
Definition utf8.cpp:609
bool is_valid(const char *p)
Check if pointer points to a valid UTF-8 encoding.
Definition utf8.h:242
std::ifstream ifstream
Input stream class using UTF-8 filename.
Definition utf8.h:227
const char32_t REPLACEMENT_CHARACTER
Replacement character used for invalid encodings.
Definition utf8.h:72
std::ostream & operator<<(std::ostream &os, const exception &x)
Extraction operator for exception objects.
Definition utf8.h:799
action
Error handling methods.
Definition utf8.h:63
@ replace
Use replacement character for invalid encodings.
Definition utf8.h:64
@ except
Throw an exception on invalid encodings.
Definition utf8.h:65
bool chdir(const std::string &dirname)
Changes the current working directory.
Definition utf8.h:591
bool rename(const std::string &oldname, const std::string &newname)
Rename a file or directory.
Definition utf8.h:717
bool remove(const std::string &filename)
Delete a file.
Definition utf8.h:762
std::string getcwd()
Gets the current working directory.
Definition utf8.h:563
FILE * fopen(const std::string &filename, const std::string &mode)
Open a file.
Definition utf8.h:534
bool mkdir(const std::string &dirname)
Creates a new directory.
Definition utf8.h:633
bool rmdir(const std::string &dirname)
Deletes a directory.
Definition utf8.h:675
Windows specific parts.