UTF8 - Simple Library for Internationalization
|
Basic UTF-8 Conversion functions. More...
#include <windows.h>
#include <sys/stat.h>
#include <utf8/utf8.h>
#include <vector>
#include <assert.h>
Functions | |
std::string | utf8::narrow (const wchar_t *s, size_t nch) |
Conversion from wide character to UTF-8. | |
std::string | utf8::narrow (const std::wstring &s) |
Conversion from wide character to UTF-8. | |
std::string | utf8::narrow (const char32_t *s, size_t nch) |
Conversion from UTF32 to UTF8. | |
std::string | utf8::narrow (const std::u32string &s) |
Conversion from UTF32 to UTF8. | |
std::string | utf8::narrow (char32_t r) |
Conversion from UTF32 to UTF8. | |
std::wstring | utf8::widen (const char *s, size_t nch) |
Conversion from UTF-8 to wide character. | |
std::wstring | utf8::widen (const std::string &s) |
Conversion from UTF-8 to wide character. | |
std::u32string | utf8::runes (const char *s, size_t nch) |
Conversion from UTF-8 to UTF-32. | |
std::u32string | utf8::runes (const std::string &s) |
Converts a string of characters from UTF-8 to UTF-32. | |
bool | utf8::valid_str (const char *s, size_t nch) |
Verifies if string is a valid UTF-8 string. | |
char32_t | utf8::next (std::string::const_iterator &ptr, const std::string::const_iterator last) |
Decodes a UTF-8 encoded character and advances iterator to next code point. | |
char32_t | utf8::next (const char *&ptr) |
Decodes a UTF-8 encoded character and Advances pointer to next character. | |
char32_t | utf8::prev (const char *&ptr) |
Decrements a character pointer to previous UTF-8 character. | |
char32_t | utf8::prev (std::string::const_iterator &ptr, const std::string::const_iterator first) |
Decrements an iterator to previous UTF-8 character. | |
size_t | utf8::length (const std::string &s) |
Counts number of characters in an UTF8 encoded string. | |
size_t | utf8::length (const char *s) |
Counts number of characters in an UTF8 encoded string. | |
std::string | utf8::getcwd () |
Gets the current working directory. | |
bool | utf8::splitpath (const std::string &path, char *drive, char *dir, char *fname, char *ext) |
Breaks a path name into components. | |
bool | utf8::splitpath (const std::string &path, std::string &drive, std::string &dir, std::string &fname, std::string &ext) |
Breaks a path name into components. | |
bool | utf8::makepath (std::string &path, const std::string &drive, const std::string &dir, const std::string &fname, const std::string &ext) |
Creates a path from UTF-8 encoded components. | |
std::string | utf8::fullpath (const std::string &relpath) |
Returns the absolute (full) path of a filename. | |
std::string | utf8::getenv (const std::string &var) |
Retrieves the value of an environment variable. | |
char ** | utf8::get_argv (int *argc) |
Converts wide byte command arguments to an array of pointers to UTF-8 strings. | |
void | utf8::free_argv (int argc, char **argv) |
Frees the memory allocated by get_argv(int *argc) | |
std::vector< std::string > | utf8::get_argv () |
Converts wide byte command arguments to UTF-8 to a vector of UTF-8 strings. | |
bool | utf8::isblank (char32_t r) |
Check if character is space or tab. | |
bool | utf8::isspace (char32_t r) |
Check if character is white space. | |
Basic UTF-8 Conversion functions.
void utf8::free_argv | ( | int | argc, |
char ** | argv | ||
) |
Frees the memory allocated by get_argv(int *argc)
argc | number of arguments |
argv | array of pointers to arguments |
std::string utf8::fullpath | ( | const std::string & | relpath | ) |
Returns the absolute (full) path of a filename.
relpath | relative path |
std::vector< std::string > utf8::get_argv | ( | ) |
Converts wide byte command arguments to UTF-8 to a vector of UTF-8 strings.
char ** utf8::get_argv | ( | int * | argc | ) |
Converts wide byte command arguments to an array of pointers to UTF-8 strings.
argc | Pointer to an integer that contains number of parameters |
The space allocated for strings and array of pointers should be freed by calling free_utf8argv()
std::string utf8::getcwd | ( | ) |
Gets the current working directory.
std::string utf8::getenv | ( | const std::string & | var | ) |
Retrieves the value of an environment variable.
var | name of environment variable |
size_t utf8::length | ( | const char * | s | ) |
Counts number of characters in an UTF8 encoded string.
s | UTF8-encoded string |
size_t utf8::length | ( | const std::string & | s | ) |
Counts number of characters in an UTF8 encoded string.
s | UTF8-encoded string |
bool utf8::makepath | ( | std::string & | path, |
const std::string & | drive, | ||
const std::string & | dir, | ||
const std::string & | fname, | ||
const std::string & | ext | ||
) |
Creates a path from UTF-8 encoded components.
path | Resulting path (UTF-8 encoded) |
drive | drive letter |
dir | directory path |
fname | filename |
ext | extension |
If any required syntactic element (colon after drive letter, '\' at end of directory path, colon before extension) is missing, it is automatically added.
char32_t utf8::next | ( | const char *& | ptr | ) |
Decodes a UTF-8 encoded character and Advances pointer to next character.
ptr | Reference to character pointer to be advanced |
If the string contains an invalid UTF-8 encoding, the function returns REPLACEMENT_CHARACTER (0xfffd) and advances pointer to beginning of next character or end of string.
char32_t utf8::next | ( | std::string::const_iterator & | ptr, |
const std::string::const_iterator | last | ||
) |
Decodes a UTF-8 encoded character and advances iterator to next code point.
ptr | Reference to iterator to be advanced |
last | Iterator pointing to the end of range |
If the string contains an invalid UTF-8 encoding, the function returns REPLACEMENT_CHARACTER (0xfffd) and advances pointer to beginning of next character or end of string.
char32_t utf8::prev | ( | const char *& | ptr | ) |
Decrements a character pointer to previous UTF-8 character.
ptr | Reference to character pointer to be decremented |
If the string contains an invalid UTF-8 encoding, the function returns REPLACEMENT_CHARACTER (0xfffd) and pointer remains unchanged.
char32_t utf8::prev | ( | std::string::const_iterator & | ptr, |
const std::string::const_iterator | first | ||
) |
Decrements an iterator to previous UTF-8 character.
ptr | iterator to be decremented |
first | iterator pointing to beginning of string |
If the string contains an invalid UTF-8 encoding, the function returns REPLACEMENT_CHARACTER (0xfffd) and iterator remains unchanged.
bool utf8::splitpath | ( | const std::string & | path, |
char * | drive, | ||
char * | dir, | ||
char * | fname, | ||
char * | ext | ||
) |
Breaks a path name into components.
path | UTF-8 encoded full path |
drive | drive letter followed by colon (or NULL if not needed) |
dir | directory path (or NULL if not needed) |
fname | base filename (or NULL if not needed) |
ext | file extension including the leading period (.) (or NULL if not needed) |
bool utf8::splitpath | ( | const std::string & | path, |
std::string & | drive, | ||
std::string & | dir, | ||
std::string & | fname, | ||
std::string & | ext | ||
) |
Breaks a path name into components.
path | UTF-8 encoded full path |
drive | drive letter followed by colon |
dir | directory path |
fname | base filename |
ext | file extension including the leading period (.) |
Returned strings are converted to UTF-8.
bool utf8::valid_str | ( | const char * | s, |
size_t | nch | ||
) |
Verifies if string is a valid UTF-8 string.
s | pointer to character string to verify |
nch | number of characters to verify or 0 if string is null-terminated |
true
if string is a valid UTF-8 encoded string, false
otherwise