UTF8 - Simple Library for Internationalization
Loading...
Searching...
No Matches
utf8.h File Reference

UTF-8 Conversion functions. More...

#include <string>
#include <vector>
#include <fstream>
#include <filesystem>
#include <utf8/ini.h>

Go to the source code of this file.

Data Structures

class  utf8::exception
 Exception thrown on encoding/decoding failure. More...
 

Macros

#define USE_WINDOWS_API   0
 If USE_WINDOWS_API is not zero, the library issues direct Windows API calls.
 

Typedefs

typedef std::ifstream utf8::ifstream
 Input stream class using UTF-8 filename.
 
typedef std::ofstream utf8::ofstream
 
typedef std::fstream utf8::fstream
 

Enumerations

enum  utf8::action { utf8::replace , utf8::except }
 Error handling methods. More...
 

Functions

action utf8::error_mode (action mode)
 Set error handling mode for this thread.
 
std::string utf8::narrow (const wchar_t *s, size_t nch)
 Conversion from wide character to UTF-8.
 
std::string utf8::narrow (const std::wstring &ws)
 Conversion from wide character to UTF-8.
 
std::string utf8::narrow (const char32_t *s, size_t nch)
 Conversion from UTF32 to UTF8.
 
std::string utf8::narrow (const std::u32string &s)
 Conversion from UTF32 to UTF8.
 
std::string utf8::narrow (char32_t r)
 Conversion from UTF32 to UTF8.
 
std::wstring utf8::widen (const char *s, size_t nch)
 Conversion from UTF-8 to wide character.
 
std::wstring utf8::widen (const std::string &s)
 Conversion from UTF-8 to wide character.
 
std::u32string utf8::runes (const char *s, size_t nch)
 Conversion from UTF-8 to UTF-32.
 
std::u32string utf8::runes (const std::string &s)
 Converts a string of characters from UTF-8 to UTF-32.
 
char32_t utf8::rune (const char *p)
 Conversion from UTF-8 to UTF-32.
 
char32_t utf8::rune (const std::string::const_iterator &p)
 Conversion from UTF-8 to UTF-32.
 
bool utf8::is_valid (const char *p)
 Check if pointer points to a valid UTF-8 encoding.
 
bool utf8::is_valid (std::string::const_iterator p, const std::string::const_iterator last)
 Check if iterator points to a valid UTF-8 encoding.
 
bool utf8::valid_str (const char *s, size_t nch)
 Verifies if string is a valid UTF-8 string.
 
bool utf8::valid_str (const std::string &s)
 Verifies if string is a valid UTF-8 encoded string.
 
char32_t utf8::next (std::string::const_iterator &ptr, const std::string::const_iterator last)
 Decodes a UTF-8 encoded character and advances iterator to next code point.
 
char32_t utf8::next (std::string::iterator &ptr, const std::string::const_iterator last)
 prev (std::string::const_iterator& ptr, const std::string::const_iterator first);
 
char32_t utf8::next (const char *&ptr)
 Decodes a UTF-8 encoded character and advances pointer to next character.
 
char32_t utf8::next (char *&ptr)
 Decodes a UTF-8 encoded character and advances pointer to next character.
 
char32_t utf8::prev (const char *&ptr)
 Decrements a character pointer to previous UTF-8 character.
 
char32_t utf8::prev (char *&ptr)
 Decrements a character pointer to previous UTF-8 character.
 
char32_t utf8::prev (std::string::const_iterator &ptr, const std::string::const_iterator first)
 Decrements an iterator to previous UTF-8 character.
 
char32_t utf8::prev (std::string::iterator &ptr, const std::string::const_iterator first)
 prev (std::string::const_iterator& ptr, const std::string::const_iterator first);
 
size_t utf8::length (const std::string &s)
 Counts number of characters in an UTF8 encoded string.
 
size_t utf8::length (const char *s)
 Counts number of characters in an UTF8 encoded string.
 
void utf8::make_lower (std::string &str)
 In place version converts a UTF-8 encoded string to lowercase.
 
void utf8::make_upper (std::string &str)
 In place version converts a UTF-8 encoded string to lowercase.
 
std::string utf8::tolower (const std::string &str)
 Convert UTF-8 string to lower case.
 
std::string utf8::toupper (const std::string &str)
 Convert a UTF-8 string to upper case.
 
int utf8::icompare (const std::string &s1, const std::string &s2)
 Compare two strings in a case-insensitive way.
 
bool utf8::isspace (char32_t r)
 Check if character is white space.
 
bool utf8::isspace (const char *p)
 Return true if character is blank(-ish).
 
bool utf8::isspace (std::string::const_iterator p)
 Return true if character is blank(-ish).
 
bool utf8::isblank (char32_t r)
 Check if character is space or tab.
 
bool utf8::isblank (const char *p)
 Check if character is space or tab.
 
bool utf8::isblank (std::string::const_iterator p)
 Check if character is space or tab.
 
bool utf8::isdigit (char32_t r)
 Check if character is a decimal digit (0-9)
 
bool utf8::isdigit (const char *p)
 Check if character is a decimal digit (0-9)
 
bool utf8::isdigit (std::string::const_iterator p)
 Check if character is a decimal digit (0-9)
 
bool utf8::isalnum (char32_t r)
 Check if character is an alphanumeric character (0-9 or A-Z or a-z)
 
bool utf8::isalnum (const char *p)
 Check if character is an alphanumeric character (0-9 or A-Z or a-z)
 
bool utf8::isalnum (std::string::const_iterator p)
 Check if character is an alphanumeric character (0-9 or A-Z or a-z)
 
bool utf8::isalpha (char32_t r)
 Check if character is an alphabetic character (A-Z or a-z)
 
bool utf8::isalpha (const char *p)
 Return true if character is an alphabetic character (A-Z or a-z)
 
bool utf8::isalpha (std::string::const_iterator p)
 Return true if character is an alphabetic character (A-Z or a-z)
 
bool utf8::isxdigit (char32_t r)
 Check if character is a hexadecimal digit (0-9 or A-F or a-f)
 
bool utf8::isxdigit (const char *p)
 Check if character is a hexadecimal digit (0-9 or A-F or a-f)
 
bool utf8::isxdigit (std::string::const_iterator p)
 Check if character is a hexadecimal digit (0-9 or A-F or a-f)
 
bool utf8::isupper (char32_t r)
 
bool utf8::isupper (const char *p)
 
bool utf8::isupper (std::string::const_iterator p)
 
bool utf8::islower (char32_t r)
 
bool utf8::islower (const char *p)
 
bool utf8::islower (std::string::const_iterator p)
 
FILE * utf8::fopen (const std::string &filename, const std::string &mode)
 Open a file.
 
FILE * utf8::fopen (const char *filename, const char *mode)
 Open a file.
 
std::string utf8::getcwd ()
 Gets the current working directory.
 
bool utf8::chdir (const std::string &dirname)
 Changes the current working directory.
 
bool utf8::chdir (const char *dirname)
 Changes the current working directory. ()
 
bool utf8::mkdir (const std::string &dirname)
 Creates a new directory.
 
bool utf8::mkdir (const char *dirname)
 Creates a new directory. ()
 
bool utf8::rmdir (const std::string &dirname)
 Deletes a directory.
 
bool utf8::rmdir (const char *dirname)
 Deletes a directory. ()
 
bool utf8::rename (const std::string &oldname, const std::string &newname)
 Rename a file or directory.
 
bool utf8::rename (const char *oldname, const char *newname)
 Rename a file or directory.
 
bool utf8::remove (const std::string &filename)
 Delete a file.
 
bool utf8::remove (const char *filename)
 Delete a file. ()
 
std::ostream & utf8::operator<< (std::ostream &os, const exception &x)
 Extraction operator for exception objects.
 
bool utf8::operator== (const exception &lhs, const exception rhs)
 Equality operator for exception objects.
 
bool utf8::operator!= (const exception &lhs, const exception &rhs)
 Inequality operator for exception objects.
 

Variables

const char32_t utf8::REPLACEMENT_CHARACTER = 0xfffd
 Replacement character used for invalid encodings.
 

Detailed Description

UTF-8 Conversion functions.

Macro Definition Documentation

◆ USE_WINDOWS_API

#define USE_WINDOWS_API   0

If USE_WINDOWS_API is not zero, the library issues direct Windows API calls.

Otherwise it relies only on standard C++17 functions. If not defined, USE_WINDOWS_API defaults to 1 on Windows platform.

Enumeration Type Documentation

◆ action

Error handling methods.

Enumerator
replace 

Use replacement character for invalid encodings.

except 

Throw an exception on invalid encodings.

Function Documentation

◆ chdir() [1/2]

bool utf8::chdir ( const char *  dirname)
inline

Changes the current working directory. ()

Parameters
dirnameUTF-8 path of new working directory
Returns
true if successful, false otherwise ()

◆ chdir() [2/2]

bool utf8::chdir ( const std::string &  dirname)
inline

Changes the current working directory.

Parameters
dirnameUTF-8 path of new working directory
Returns
true if successful, false otherwise

◆ error_mode()

action utf8::error_mode ( action  mode)

Set error handling mode for this thread.

Parameters
modenew error handling mode
Returns
previous error handling mode for this thread

◆ fopen() [1/2]

FILE * utf8::fopen ( const char *  filename,
const char *  mode 
)
inline

Open a file.

Parameters
filenameUTF-8 encoded file name
modeaccess mode
Returns
pointer to the opened file or NULL if an error occurs

◆ fopen() [2/2]

FILE * utf8::fopen ( const std::string &  filename,
const std::string &  mode 
)
inline

Open a file.

Parameters
filenameUTF-8 encoded file name
modeaccess mode
Returns
pointer to the opened file or NULL if an error occurs

◆ getcwd()

std::string utf8::getcwd ( )
inline

Gets the current working directory.

Returns
UTF-8 encoded name of working directory

◆ is_valid() [1/2]

bool utf8::is_valid ( const char *  p)
inline

Check if pointer points to a valid UTF-8 encoding.

Parameters
ppointer to string
Returns
true if there is a valid UTF-8 encoding at the current pointer position, false otherwise.

◆ is_valid() [2/2]

bool utf8::is_valid ( std::string::const_iterator  p,
const std::string::const_iterator  last 
)
inline

Check if iterator points to a valid UTF-8 encoding.

Parameters
pIterator
lastIterator pointing to end of range
Returns
true if there is a valid UTF-8 encoding at the current iterator position, false otherwise.

◆ length() [1/2]

size_t utf8::length ( const char *  s)

Counts number of characters in an UTF8 encoded string.

Parameters
sUTF8-encoded string
Returns
number of characters in string
Note
Algorithm from http://canonical.org/~kragen/strlen-utf8.html

◆ length() [2/2]

size_t utf8::length ( const std::string &  s)

Counts number of characters in an UTF8 encoded string.

Parameters
sUTF8-encoded string
Returns
number of characters in string
Note
Algorithm from http://canonical.org/~kragen/strlen-utf8.html

◆ mkdir() [1/2]

bool utf8::mkdir ( const char *  dirname)
inline

Creates a new directory. ()

Parameters
dirnameUTF-8 path for new directory
Returns
true if successful, false otherwise ()

◆ mkdir() [2/2]

bool utf8::mkdir ( const std::string &  dirname)
inline

Creates a new directory.

Parameters
dirnameUTF-8 path for new directory
Returns
true if successful, false otherwise

◆ next() [1/4]

char32_t utf8::next ( char *&  ptr)
inline

Decodes a UTF-8 encoded character and advances pointer to next character.

Parameters
ptrReference to character pointer to be advanced
Returns
decoded character

If the string contains an invalid UTF-8 encoding, the function returns utf8::REPLACEMENT_CHARACTER (0xfffd) and advances pointer to beginning of next character or end of string.

◆ next() [2/4]

char32_t utf8::next ( const char *&  ptr)

Decodes a UTF-8 encoded character and advances pointer to next character.

Parameters
ptrReference to character pointer to be advanced
Returns
decoded character

If the string contains an invalid UTF-8 encoding, the function throws an exception or returns utf8::REPLACEMENT_CHARACTER (0xfffd) depending on error handling mode. In any case, the pointer is advanced to beginning of next character or end of string.

◆ next() [3/4]

char32_t utf8::next ( std::string::const_iterator &  ptr,
const std::string::const_iterator  last 
)

Decodes a UTF-8 encoded character and advances iterator to next code point.

Parameters
ptrReference to iterator to be advanced
lastIterator pointing to the end of range
Returns
decoded character

If the iterator points to an invalid UTF-8 encoding or is at end, the function throws an exception or returns utf8::REPLACEMENT_CHARACTER (0xfffd) depending on error handling mode. In any case, the iterator is advanced to beginning of next character or end of string.

◆ next() [4/4]

char32_t utf8::next ( std::string::iterator &  ptr,
const std::string::const_iterator  last 
)
inline

prev (std::string::const_iterator& ptr, const std::string::const_iterator first);

prev (std::string::const_iterator& ptr, const std::string::const_iterator first);

◆ prev() [1/4]

char32_t utf8::prev ( char *&  ptr)
inline

Decrements a character pointer to previous UTF-8 character.

Parameters
ptrReference to character pointer to be decremented
Returns
previous UTF-8 encoded character

If the string contains an invalid UTF-8 encoding, the function returns REPLACEMENT_CHARACTER (0xfffd) and pointer remains unchanged.

◆ prev() [2/4]

char32_t utf8::prev ( const char *&  ptr)

Decrements a character pointer to previous UTF-8 character.

Parameters
ptrReference to character pointer to be decremented
Returns
previous UTF-8 encoded character

If the string contains an invalid UTF-8 encoding, the function throws an exception or returns utf8::REPLACEMENT_CHARACTER (0xfffd) depending on error handling mode. In this case the pointer remains unchanged.

◆ prev() [3/4]

char32_t utf8::prev ( std::string::const_iterator &  ptr,
const std::string::const_iterator  first 
)

Decrements an iterator to previous UTF-8 character.

Parameters
ptriterator to be decremented
firstiterator pointing to beginning of string
Returns
previous UTF-8 encoded character

If the string contains an invalid UTF-8 encoding, the function returns REPLACEMENT_CHARACTER (0xfffd) and iterator remains unchanged.

◆ prev() [4/4]

char32_t utf8::prev ( std::string::iterator &  ptr,
const std::string::const_iterator  first 
)
inline

prev (std::string::const_iterator& ptr, const std::string::const_iterator first);

prev (std::string::const_iterator& ptr, const std::string::const_iterator first);

◆ remove() [1/2]

bool utf8::remove ( const char *  filename)
inline

Delete a file. ()

Parameters
filenameUTF-8 name of file to be deleted
Returns
true if successful, false otherwise ()

◆ remove() [2/2]

bool utf8::remove ( const std::string &  filename)
inline

Delete a file.

Parameters
filenameUTF-8 name of file to be deleted
Returns
true if successful, false otherwise

◆ rename() [1/2]

bool utf8::rename ( const char *  oldname,
const char *  newname 
)
inline

Rename a file or directory.

Parameters
oldnamecurrent UTF-8 encoded name of file or directory
newnamenew UTF-8 name
Returns
true if successful, false otherwise

◆ rename() [2/2]

bool utf8::rename ( const std::string &  oldname,
const std::string &  newname 
)
inline

Rename a file or directory.

Parameters
oldnamecurrent UTF-8 encoded name of file or directory
newnamenew UTF-8 name
Returns
true if successful, false otherwise

◆ rmdir() [1/2]

bool utf8::rmdir ( const char *  dirname)
inline

Deletes a directory. ()

Parameters
dirnameUTF-8 path of directory to be removed
Returns
true if successful, false otherwise ()

◆ rmdir() [2/2]

bool utf8::rmdir ( const std::string &  dirname)
inline

Deletes a directory.

Parameters
dirnameUTF-8 path of directory to be removed
Returns
true if successful, false otherwise

◆ valid_str() [1/2]

bool utf8::valid_str ( const char *  s,
size_t  nch 
)

Verifies if string is a valid UTF-8 string.

Parameters
spointer to character string to verify
nchnumber of characters to verify or 0 if string is null-terminated
Returns
true if string is a valid UTF-8 encoded string, false otherwise

◆ valid_str() [2/2]

bool utf8::valid_str ( const std::string &  s)
inline

Verifies if string is a valid UTF-8 encoded string.

Parameters
scharacter string to verify
Returns
true if string is a valid UTF-8 encoded string, false otherwise