UTF8 - Simple Library for Internationalization
Loading...
Searching...
No Matches
utf8.cpp File Reference

Basic UTF-8 Conversion functions. More...

#include <windows.h>
#include <sys/stat.h>
#include <utf8/utf8.h>
#include <vector>
#include <assert.h>

Functions

std::string utf8::narrow (const wchar_t *s, size_t nch)
 Conversion from wide character to UTF-8.
 
std::string utf8::narrow (const std::wstring &s)
 Conversion from wide character to UTF-8.
 
std::string utf8::narrow (const char32_t *s, size_t nch)
 Conversion from UTF32 to UTF8.
 
std::string utf8::narrow (const std::u32string &s)
 Conversion from UTF32 to UTF8.
 
std::string utf8::narrow (char32_t r)
 Conversion from UTF32 to UTF8.
 
std::wstring utf8::widen (const char *s, size_t nch)
 Conversion from UTF-8 to wide character.
 
std::wstring utf8::widen (const std::string &s)
 Conversion from UTF-8 to wide character.
 
std::u32string utf8::runes (const char *s, size_t nch)
 Conversion from UTF-8 to UTF-32.
 
std::u32string utf8::runes (const std::string &s)
 Converts a string of characters from UTF-8 to UTF-32.
 
bool utf8::valid_str (const char *s, size_t nch)
 Verifies if string is a valid UTF-8 string.
 
char32_t utf8::next (std::string::const_iterator &ptr, const std::string::const_iterator last)
 Decodes a UTF-8 encoded character and advances iterator to next code point.
 
char32_t utf8::next (const char *&ptr)
 Decodes a UTF-8 encoded character and Advances pointer to next character.
 
char32_t utf8::prev (const char *&ptr)
 Decrements a character pointer to previous UTF-8 character.
 
char32_t utf8::prev (std::string::const_iterator &ptr, const std::string::const_iterator first)
 Decrements an iterator to previous UTF-8 character.
 
size_t utf8::length (const std::string &s)
 Counts number of characters in an UTF8 encoded string.
 
size_t utf8::length (const char *s)
 Counts number of characters in an UTF8 encoded string.
 
std::string utf8::getcwd ()
 Gets the current working directory.
 
bool utf8::splitpath (const std::string &path, char *drive, char *dir, char *fname, char *ext)
 Breaks a path name into components.
 
bool utf8::splitpath (const std::string &path, std::string &drive, std::string &dir, std::string &fname, std::string &ext)
 Breaks a path name into components.
 
bool utf8::makepath (std::string &path, const std::string &drive, const std::string &dir, const std::string &fname, const std::string &ext)
 Creates a path from UTF-8 encoded components.
 
std::string utf8::fullpath (const std::string &relpath)
 Returns the absolute (full) path of a filename.
 
std::string utf8::getenv (const std::string &var)
 Retrieves the value of an environment variable.
 
char ** utf8::get_argv (int *argc)
 Converts wide byte command arguments to an array of pointers to UTF-8 strings.
 
void utf8::free_argv (int argc, char **argv)
 Frees the memory allocated by get_argv(int *argc)
 
std::vector< std::string > utf8::get_argv ()
 Converts wide byte command arguments to UTF-8 to a vector of UTF-8 strings.
 
bool utf8::isblank (char32_t r)
 Check if character is space or tab.
 
bool utf8::isspace (char32_t r)
 Check if character is white space.
 

Detailed Description

Basic UTF-8 Conversion functions.

Function Documentation

◆ free_argv()

void utf8::free_argv ( int  argc,
char **  argv 
)

Frees the memory allocated by get_argv(int *argc)

Parameters
argcnumber of arguments
argvarray of pointers to arguments

◆ fullpath()

std::string utf8::fullpath ( const std::string &  relpath)

Returns the absolute (full) path of a filename.

Parameters
relpathrelative path

◆ get_argv() [1/2]

std::vector< std::string > utf8::get_argv ( )

Converts wide byte command arguments to UTF-8 to a vector of UTF-8 strings.

Returns
vector of UTF-8 strings. The vector is empty if an error occurred.

◆ get_argv() [2/2]

char ** utf8::get_argv ( int *  argc)

Converts wide byte command arguments to an array of pointers to UTF-8 strings.

Parameters
argcPointer to an integer that contains number of parameters
Returns
array of pointers to each command line parameter or NULL if an error occurred.

The space allocated for strings and array of pointers should be freed by calling free_utf8argv()

◆ getcwd()

std::string utf8::getcwd ( )

Gets the current working directory.

Returns
UTF-8 encoded name of working directory

◆ getenv()

std::string utf8::getenv ( const std::string &  var)

Retrieves the value of an environment variable.

Parameters
varname of environment variable
Returns
value of environment variable or an empty string if there is no such environment variable

◆ length() [1/2]

size_t utf8::length ( const char *  s)

Counts number of characters in an UTF8 encoded string.

Parameters
sUTF8-encoded string
Returns
number of characters in string
Note
Algorithm from http://canonical.org/~kragen/strlen-utf8.html

◆ length() [2/2]

size_t utf8::length ( const std::string &  s)

Counts number of characters in an UTF8 encoded string.

Parameters
sUTF8-encoded string
Returns
number of characters in string
Note
Algorithm from http://canonical.org/~kragen/strlen-utf8.html

◆ makepath()

bool utf8::makepath ( std::string &  path,
const std::string &  drive,
const std::string &  dir,
const std::string &  fname,
const std::string &  ext 
)

Creates a path from UTF-8 encoded components.

Parameters
pathResulting path (UTF-8 encoded)
drivedrive letter
dirdirectory path
fnamefilename
extextension
Returns
True if successful; false otherwise

If any required syntactic element (colon after drive letter, '\' at end of directory path, colon before extension) is missing, it is automatically added.

◆ next() [1/2]

char32_t utf8::next ( const char *&  ptr)

Decodes a UTF-8 encoded character and Advances pointer to next character.

Parameters
ptrReference to character pointer to be advanced
Returns
decoded character

If the string contains an invalid UTF-8 encoding, the function returns REPLACEMENT_CHARACTER (0xfffd) and advances pointer to beginning of next character or end of string.

◆ next() [2/2]

char32_t utf8::next ( std::string::const_iterator &  ptr,
const std::string::const_iterator  last 
)

Decodes a UTF-8 encoded character and advances iterator to next code point.

Parameters
ptrReference to iterator to be advanced
lastIterator pointing to the end of range
Returns
decoded character

If the string contains an invalid UTF-8 encoding, the function returns REPLACEMENT_CHARACTER (0xfffd) and advances pointer to beginning of next character or end of string.

◆ prev() [1/2]

char32_t utf8::prev ( const char *&  ptr)

Decrements a character pointer to previous UTF-8 character.

Parameters
ptrReference to character pointer to be decremented
Returns
previous UTF-8 encoded character

If the string contains an invalid UTF-8 encoding, the function returns REPLACEMENT_CHARACTER (0xfffd) and pointer remains unchanged.

◆ prev() [2/2]

char32_t utf8::prev ( std::string::const_iterator &  ptr,
const std::string::const_iterator  first 
)

Decrements an iterator to previous UTF-8 character.

Parameters
ptriterator to be decremented
firstiterator pointing to beginning of string
Returns
previous UTF-8 encoded character

If the string contains an invalid UTF-8 encoding, the function returns REPLACEMENT_CHARACTER (0xfffd) and iterator remains unchanged.

◆ splitpath() [1/2]

bool utf8::splitpath ( const std::string &  path,
char *  drive,
char *  dir,
char *  fname,
char *  ext 
)

Breaks a path name into components.

Parameters
pathUTF-8 encoded full path
drivedrive letter followed by colon (or NULL if not needed)
dirdirectory path (or NULL if not needed)
fnamebase filename (or NULL if not needed)
extfile extension including the leading period (.) (or NULL if not needed)
Returns
true if successful, false otherwise Returned strings are converted to UTF-8.

◆ splitpath() [2/2]

bool utf8::splitpath ( const std::string &  path,
std::string &  drive,
std::string &  dir,
std::string &  fname,
std::string &  ext 
)

Breaks a path name into components.

Parameters
pathUTF-8 encoded full path
drivedrive letter followed by colon
dirdirectory path
fnamebase filename
extfile extension including the leading period (.)

Returned strings are converted to UTF-8.

◆ valid_str()

bool utf8::valid_str ( const char *  s,
size_t  nch 
)

Verifies if string is a valid UTF-8 string.

Parameters
spointer to character string to verify
nchnumber of characters to verify or 0 if string is null-terminated
Returns
true if string is a valid UTF-8 encoded string, false otherwise