This class stores C/C++ style null terminated string. More...

#include <cel_string.h>

Classes
class	Transform

Public Types
enum	UnicodeNormalizeForm { , U_NFC = 1, U_NFKD = 2, U_NFKC = 3 }

Public Member Functions
	String ()

	String (NULL_STRING null)

	String (const utf8s &inString, size_t length=STR_AUTOSIZE)

	String (const char *inString, size_t length=STR_AUTOSIZE)

	String (const std::string &inString, size_t length=STR_AUTOSIZE)

	String (const String &inString)

	String (const String &inString, size_t start, size_t length)

	String (const ReferableString *inString)

	String (const UChar2 *inString, size_t length=STR_AUTOSIZE)

	String (const UChar4 *inString, size_t length=STR_AUTOSIZE)

String &	operator= (NULL_STRING null)

String &	operator= (const utf8s &inString)

String &	operator= (const char *inString)

String &	operator= (const String &inString)

String &	operator= (const UChar2 *inString)

String &	operator= (const UChar4 *inString)

void	clear ()

void	swap (String &inString)

String	substring (size_t start, size_t length=STR_AUTOSIZE) const

String	substringByChar (size_t startChar, size_t charCount=STR_AUTOSIZE) const

String	removeBoms () const

String	operator+ (const String &str) const

String &	operator+= (const String &str)

bool	isEmpty () const

int	compare (const String &str) const

int	compareI (const String &str) const

bool	operator== (const String &str) const

bool	operator< (const String &str) const

bool	operator> (const String &str) const

bool	operator<= (const String &str) const

bool	operator>= (const String &str) const

bool	operator!= (const String &str) const

UChar1	operator[] (size_t n) const

UChar1 &	operator[] (size_t n)

UChar4	operator() (size_t n) const

size_t	getNumOfChars () const

size_t	getLength () const

const UChar1 *	c_str () const

const utf8s	c_utf8str () const

size_t	getNthCharPos (size_t nCharPos) const

const UChar1 *	getNthCharPtr (size_t nCharPos) const

const utf8s	getNthCharUtf8Ptr (size_t nCharPos) const

UChar4	getNthUnicodeChar (size_t nCharPos) const

int	toInt () const

size_t	toSizeT () const

uint64_t	toUInt64 () const

double	toDouble () const

const UChar1 *	getFirstCharPtr (UChar4 u) const

size_t	getCharPos (UChar4 u, size_t from=0) const

size_t	getFirstCharPos (UChar4 u) const

const UChar1 *	getLastCharPtr (UChar4 u) const

size_t	getLastCharPos (UChar4 u) const

const UChar1 *	pbrk (const String &str, size_t from=0) const

size_t	nbrk (const String &str, size_t from=0) const

size_t	getPatternPos (const String &str, size_t from=0) const

int	indexOf (const String &str) const

int	lastIndexOf (const String &str) const

UChar1 *	allocate (size_t length)

String	trimHeadingSpaces () const

String	trimTrailingSpaces () const

String	trimSpaces () const

String	chomp () const

size_t	findPos (const String &inPattern) const

bool	startWith (const String &inPattern) const

bool	endWith (const String &inPattern) const

bool	split (const String &inPattern, SimpleArray< String > &outSubstrs, bool inRemoveEmptyStrings=false) const

bool	match (const String &inPattern, SimpleArray< String > &outMatches) const

AutoPtr< Region >	match (const String &inPattern) const

size_t	findOneOf (const UChar4 *inChars, size_t inCharCount) const

size_t	findOneOf (const String &inChars) const

const UChar1 *	findPtr (const String &inPattern) const

String	replace (const String &inReplacee, const String &inReplacer, u32 inFlags=0) const

String	erase (size_t inPos, size_t inLength) const

String	erase (const String &inPattern) const

void	serialize (Stream *inStream, size_t inLevel, Endian inEndian) const

void	deserialize (Stream *inStream, size_t inLevel, Endian inEndian)

const UChar1 *	begin () const

const UChar1 *	end () const

const UChar1 *	raw_ptr () const

const UChar1 *	raw_end () const

String	toUpper () const

String	toLower () const

String	applyTransform (Transform &inTransform)

String	normalize (UnicodeNormalizeForm form) const

const char *	toMbs () const

const wchar_t *	toWcs () const

const UChar2 *	toUcs2 () const

const UChar4 *	toUcs4 () const

String	resolveEntityReferences () const

Static Public Member Functions
static int	compare (const String &str1, const String &str2)

static int	compareI (const String &str1, const String &str2)

static String	concat (const utf8s &str1, const Celartem::String &str2)

static String	concat (const SimpleArray< String > &inStrings, const String &inSeparator=NullString)

static const UChar1 *	next (const UChar1 *inStrPtr)

static UChar1 *	next (UChar1 *inStrPtr)

static size_t	getCharSize (const UChar1 *inStrPtr)

static UChar4	getCharcode (const UChar1 *inStrPtr)

static bool	isWhiteSpace (const UChar1 *inStrPtr)

static bool	isLineTerminator (const UChar1 *inStrPtr)

static bool	isValidUTF8Sequence (const u8 *inDataToValidate, size_t inSize)

Detailed Description

This class stores C/C++ style null terminated string.

For security related reason, this class zero-clears the memory block used by the string when resizing and destruction.

See Also: Strings Manipulations

Member Enumeration Documentation

enum Celartem::String::UnicodeNormalizeForm

Normalization Form Selector; used with normalize function. For more information, see Unicode Standard Annex #15 Unicode Normalization Forms.

See Also: normalize

Enumerator
U_NFC	Normalization Form D.
U_NFKD	Normalization Form C.
U_NFKC	Normalization Form KD.

Constructor & Destructor Documentation

Celartem::String::String ( )

This constructor initializes the String instance with "", not NULL; The string contains a '\0'.

See Also: Initializing String instances

Celartem::String::String ( NULL_STRING null )

This constructor accepts NullString and initializes the String instance with "", not NULL; The string contains a '\0'.

Parameters

null	It should be NullString.

See Also: Initializing String instances

Celartem::String::String	(	const utf8s &	inString,
		size_t	length = `STR_AUTOSIZE`
	)

This constructor initializes the String with UTF-8 string.

Parameters

inString	UTF-8 string to be preserved.
length	the length of the string to be preserved; you can remove the trailing string by intensionally set the length less than actual size. You can also omit the parameter and then String automatically measures the length of string (regarding the string is terminated by '\0').

See Also: utf8s, Initializing String instances

Celartem::String::String	(	const char *	inString,
		size_t	length = `STR_AUTOSIZE`
	)

This constructor initializes the String with platform dependent multibyte string. Since String stores strings as UTF-8 string, this constructor converts the input string into UTF-8 string. If the input string is only contains ASCII 7bit characters and you think you don't want to take the conversion cost, use the utf8s version of the constructor. Mac OS X's native multibyte is UTF-8 and this constructor does not cost so much in Mac OS X.

Parameters

inString	Platform/Locale specific multibyte character string.
length	The length of the string to be preserved; you can remove the trailing string by intensionally set the length less than actual size. You can also omit the parameter and then String automatically measures the length of string (regarding the string is terminated by '\0').

See Also: Initializing String instances

Celartem::String::String	(	const std::string &	inString,
		size_t	length = `STR_AUTOSIZE`
	)

This constructor initializes the String with std::string. Since String stores strings as UTF-8 string, this constructor converts the input string into UTF-8 string.

Parameters

inString	Platform/Locale specific multibyte character string.
length	The length of the string to be preserved; you can remove the trailing string by intensionally set the length less than actual size. You can also omit the parameter and then String automatically measures the length of string (regarding the string is terminated by '\0').

See Also: Initializing String instances

Celartem::String::String ( const String & inString )

This constructor duplicates the input String.
Since the String class manages the strings by reference count mechanism, this constructor does not cost so much.

Parameters

inString string to copy.

See Also: Initializing String instances

Celartem::String::String	(	const String &	inString,
		size_t	start,
		size_t	length
	)

This constructor duplicates the input String.
Since the String class manages the strings by reference count mechanism, this constructor does not cost so much.

Parameters

inString	string to copy.
start	The position of the substring to copy.
length	The length of the string to be preserved; you can remove the trailing string by intensionally set the length less than actual size. You can also omit the parameter and then String automatically measures the length of string (regarding the string is terminated by '\0').

See Also: Initializing String instances

Celartem::String::String ( const ReferableString * inString )

This constructor duplicates the input String.
Since the String class manages the strings by reference count mechanism, this constructor does not cost so much.

Parameters

inString string to copy.

See Also: Initializing String instances

Celartem::String::String	(	const UChar2 *	inString,
		size_t	length = `STR_AUTOSIZE`
	)

This constructor initializes the String with UTF-16 string. Since the String class stores strings as UTF-8, this constructor is more effecient than the multibyte version.

Parameters

inString	String in UTF-16.
length	The length of the string to be preserved in characters; you can remove the trailing string by intensionally set the length less than actual size. You can also omit the parameter and then String automatically measures the length of string (regarding the string is terminated by '\0').

See Also: Initializing String instances

Celartem::String::String	(	const UChar4 *	inString,
		size_t	length = `STR_AUTOSIZE`
	)

This constructor initializes the String with UTF-32 string. Since the String class stores strings as UTF-8, this constructor is more effecient than the multibyte version.

Parameters

inString	A string in UCS-4(UTF-32).
length	the length of the string to be preserved; you can remove the trailing string by intensionally set the length less than actual size. You can also omit the parameter and then String automatically measures the length of string (regarding the string is terminated by '\0').

See Also: Initializing String instances

Member Function Documentation

UChar1* Celartem::String::allocate ( size_t length )

        This method allocates bytes specified by length. You don't have to
        make the room for trailing null-terminator. It will
        be automatically appended and initialized with '\\0'.

UChar1 *p = str1.allocate(3);
std::memcpy(p, "123", 4);
// You don't have to add '\0'.
UChar1 *q = str2.allocate(5);
q[0] = '0';
q[0] = '1';
q[0] = '2';
q[0] = '3';
q[0] = '4';

        \param length
            the buffer length to allocate. It don't have to include the
            terminating '\\0'.
        \return
            Pointer to the buffer.

Referenced by Celartem::Base64T< Base64Traits >::encode().

String Celartem::String::applyTransform ( Transform & inTransform )

This function enables flexible string manipulation based on visitor pattern.

Parameters

inTransform A Transform instance to apply.

Returns: The result string.

See Also: Transform

const UChar1* Celartem::String::begin ( ) const

inline

This function is provided for the compatibility with STL. Completely same to c_str() function.
Since String class internally uses Pascal style non null-terminated string, the function may take some time to convert the raw string into C/C++ compatible null-terminated string. If you don't want such overheads, use raw_ptr() and raw_end() function.

Returns: The pointer (iterator) to the first character in the string.

See Also: end, c_str, c_utf8str, raw_ptr, raw_end

const UChar1* Celartem::String::c_str ( ) const

This method returns the pointer to the raw UTF-8 string. Be careful to use this function with String related methods; they may regard the string as the environment native multibyte rather than UTF-8.
Since String class internally uses Pascal style non null-terminated string, the function may take some time to convert the raw string into C/C++ compatible null-terminated string. If you don't want such overheads, use raw_ptr() and raw_end() function.

Returns: Pointer to the raw string.

See Also: c_utf8str, begin, end, raw_ptr, raw_end

Referenced by begin(), c_utf8str(), Celartem::Base64T< Base64Traits >::decode(), end(), Celartem::RegularExpression::findFirst(), Celartem::DataStore::get(), and Celartem::RegularExpression::RegularExpression().

const utf8s Celartem::String::c_utf8str ( ) const

inline

This method returns the pointer to the raw UTF-8 string. This method is identical to c_str() method except it returns utf8s string proxy object.
Since String class internally uses Pascal style non null-terminated string, the function may take some time to convert the raw string into C/C++ compatible null-terminated string. If you don't want such overheads, use raw_ptr() and raw_end() function.

Returns: utf8s proxy object that wraps the raw UTF-8 string.

See Also: c_str, begin, end, raw_ptr, raw_end

String Celartem::String::chomp ( ) const

This method removes the trailing CR/LF/CRLF codes if exist.

Returns: The result string.

void Celartem::String::clear ( )

This method resets the string to "". This method also confirms the memory block is zero-cleared.

static int Celartem::String::compare	(	const String &	str1,
		const String &	str2
	)

static

This function compares two strings.

Parameters

str1
str2	Strings to compare.

Returns

The return value indicates the lexicographic relation of str1 to str2.

ret < 0 str1 is less than str2.
ret == 0 str1 is identical to str2.
ret > 0 str1 is greater than str2.

int Celartem::String::compare ( const String & str ) const

This method compares this string with another string.

Parameters

str	String to compare with.

Returns

The return value indicates the lexicographic relation of this string to str.

ret < 0 This string is less than str.
ret == 0 This string is identical to str.
ret > 0 This string is greater than str.

static int Celartem::String::compareI	(	const String &	str1,
		const String &	str2
	)

static

This function compares two strings in 7-bit ASCII case insensitive manner.

Parameters

str1
str2	Strings to compare.

Returns

The return value indicates the lexicographic relation of str1 to str2.

ret < 0 str1 is less than str2.
ret == 0 str1 is identical to str2.
ret > 0 str1 is greater than str2.

int Celartem::String::compareI ( const String & str ) const

This method compares this string with another string in 7-bit ASCII case insensitive manner.

Parameters

str	String to compare with.

Returns

The return value indicates the lexicographic relation of this string to str.

ret < 0 This string is less than str.
ret == 0 This string is identical to str.
ret > 0 This string is greater than str.

static String Celartem::String::concat	(	const utf8s &	str1,
		const Celartem::String &	str2
	)

static

This function enables the concatenation of the strings in the faster way than String(str1) + str2.

Parameters

str1
str2	Strings to be combined.

Returns: The result string.

static String Celartem::String::concat	(	const SimpleArray< String > &	inStrings,
		const String &	inSeparator = `NullString`
	)

static

This function enables the concatenation of the strings.

Parameters

inStrings	Strings to be combined.
inSeparator	An optional string which is placed between a string and the next.

Returns: The result string.

void Celartem::String::deserialize	(	Stream *	inStream,
		size_t	inLevel,
		Endian	inEndian
	)

This method is just a helper function to deal with SerializableData template. For more information, see SerializableData.

See Also: SerializableData,Serializable, DataStore

const UChar1* Celartem::String::end ( ) const

inline

This function is provided for the compatibility with STL. Since String class internally uses Pascal style non null-terminated string, the function may take some time to convert the raw string into C/C++ compatible null-terminated string. If you don't want such overheads, use raw_ptr() and raw_end() function.

Returns: The pointer (iterator) to the end of the string.

See Also: begin, c_str, c_utf8str, raw_ptr, raw_end

bool Celartem::String::endWith ( const String & inPattern ) const

This function checks whether the string ends with the specified pattern or not.

Parameters

inPattern The pattern in String.

Returns: true if the string ends with the specified pattern, otherwise false.

String Celartem::String::erase	(	size_t	inPos,
		size_t	inLength
	)		const

This function erases the substring.

Parameters

inPos	The index of the first character in the string to be removed.
inLength	The number of elements that will be removed.

Returns: The result string.

String Celartem::String::erase ( const String & inPattern ) const

This function removes all the occurrence of the specified pattern from the string.

Parameters

inPattern The pattern to be removed.

Returns: The result string.

size_t Celartem::String::findOneOf	(	const UChar4 *	inChars,
		size_t	inCharCount
	)		const

This function tries to find the first occurrence of one of the specified characters and returns the position by index.

Parameters

inChars	The characters to be find.
inCharCount	The number of characters in the array.

Returns: The index of the first occurrence if found, otherwise notFound.

size_t Celartem::String::findOneOf ( const String & inChars ) const

This function tries to find the first occurrence of one of the specified characters and returns the position by index.

Parameters

inChars The characters to be find.

Returns: The index of the first occurrence if found, otherwise notFound.

size_t Celartem::String::findPos ( const String & inPattern ) const

This function tries to find the specified pattern from the string and return the index to the character.

Parameters

inPattern The string to find.

Returns: The index of the first occurrence if found, otherwise notFound.

const UChar1* Celartem::String::findPtr ( const String & inPattern ) const

This function tries to find the specified pattern from the string and returns the pointer to the character.

Parameters

inPattern The string to find.

Returns: The pointer to the position the string is found.

static UChar4 Celartem::String::getCharcode ( const UChar1 * inStrPtr )

static

This function helps you to determine the UCS-4 character code of the specified location.

Parameters

inStrPtr The pointer to a valid UTF-8 boundary on a string.

Returns: UCS-4 character code.

size_t Celartem::String::getCharPos	(	UChar4	u,
		size_t	from = `0`
	)		const

        This method searches the string for the occurrence of a
        character that matches the specified character.
        \param u
            UCS-4 character code to search.
        \param from
            Where the search starts from. For the further information,
            see the sample code below:
        \return
            The 0-based index of the character. If the character is not
            found in the string, this function return \ref notFound.

        To search the same character repeatedly, do like the following code:

size_t pos = 0;
for(;;)
{
    pos = str.getCharPos('/', pos);
    if(pos == notFound)
        break; // no more occurrences
    
    // do the task for this occurrence
    // ....
    
    pos++; // prepare for the next search
};

static size_t Celartem::String::getCharSize ( const UChar1 * inStrPtr )

static

This function helps you to determine the number of UChar1 elements to the next character boundary.

Parameters

inStrPtr The pointer on a UTF-8 string.

Returns: The number of UChar1 elements to the next character boundary.

size_t Celartem::String::getFirstCharPos ( UChar4 u ) const

This method searches the string for the first occurrence of a character that matches the specified character. The behavior is almost identical to strchr function except it accepts UCS-4 character code and returns 0-based index.

Parameters

u	UCS-4 character code to search.

Returns: The 0-based index of the character. If the character is not found in the string, this function return notFound.

const UChar1* Celartem::String::getFirstCharPtr ( UChar4 u ) const

This method searches the string for the first occurrence of a character that matches the specified character. The behavior is almost identical to strchr function except it accepts UCS-4 character code.

Parameters

u	UCS-4 character code to search.

Returns: Pointer to the character. If the character is not found in the string, this function returns NULL.

size_t Celartem::String::getLastCharPos ( UChar4 u ) const

This method searches the string for the last occurrence of a specified character. The behavior is almost identical to strrchr function except it accepts UCS-4 character code and returns 0-based index.

Parameters

u	UCS-4 character code to search.

Returns: The 0-based index of the character. If the character is not found in the string, this function returns notFound.

const UChar1* Celartem::String::getLastCharPtr ( UChar4 u ) const

This method searches the string for the last occurrence of a specified character. The behavior is almost identical to strrchr function except it accepts UCS-4 character code.

Parameters

u	UCS-4 character code to search.

Returns: Pointer to the character. If the character is not found in the string, this function returns NULL.

size_t Celartem::String::getLength ( ) const

This method returns the number of characters in the string.

Returns: The number of characters.

Referenced by Celartem::Base64T< Base64Traits >::decode(), end(), and Celartem::DjVu::Link::getType().

size_t Celartem::String::getNthCharPos ( size_t nCharPos ) const

This method returns the position of n-th UCS-4 character in the string.

Parameters

nCharPos The position of the UCS-4 character.

Returns: The actual position of that UCS-4 character in bytes. If the nCharPos exceeds the end of the string, this function returns notFound.

const UChar1* Celartem::String::getNthCharPtr ( size_t nCharPos ) const

This method returns the pointer to n-th UCS-4 character in the string.

Parameters

nCharPos The position of the UCS-4 character.

Returns: Pointer to the UTF-8 string that represents the specified UCS-4 character. If the nCharPos exceeds the end of the string, this function returns NULL.

Referenced by getNthCharUtf8Ptr(), and getNthUnicodeChar().

const utf8s Celartem::String::getNthCharUtf8Ptr ( size_t nCharPos ) const

inline

This method returns the pointer to n-th UCS-4 character in the string by utf8s proxy object.

Parameters

nCharPos The position of the UCS-4 character.

Returns: An utf8s proxy object that points to the UTF-8 string that represents the specified UCS-4 character. If the nCharPos exceeds the end of the string, this function returns utf8s(NULL).

UChar4 Celartem::String::getNthUnicodeChar ( size_t nCharPos ) const

inline

This method returns n-th UCS-4 character in the string.

Parameters

nCharPos The position of the UCS-4 character.

Returns: The UCS-4 character code of the character. If the nCharPos exceeds the end of the string, this function returns 0.

Referenced by operator()().

size_t Celartem::String::getNumOfChars ( ) const

This method returns the number of UCS-4 characters in the string.

Returns: The number of UCS-4 characters.

size_t Celartem::String::getPatternPos	(	const String &	str,
		size_t	from = `0`
	)		const

inline

This method searches the string for the first occurrence of a character contained in a specified string. This search does not include the null terminator. The behavior is almost identical to strpbrk function except it returns 0-based index. This method is just an alias of nbrk.

Parameters

str	The string that contains the characters for which to search.
from	Where the search starts from.

Returns: The 0-based index of the character. If the character is not found in the string, this function returns notFound.

int Celartem::String::indexOf ( const String & str ) const

This method searches the string for the first occurrence of the specified string.

Parameters

str	The string to search for.

Returns: The 0-based index of the character. If the character is not found in the string, this function returns -1.

bool Celartem::String::isEmpty ( ) const

This method verifies the string is empty or not.

Returns: true if the string is empty, otherwise false.

Referenced by Celartem::DjVu::Link::getType(), Celartem::DjVu::TextWithFontConfig::isEmpty(), Celartem::DjVu::Link::isEmptyLink(), and Celartem::DjVu::FontConfig::isValid().

static bool Celartem::String::isLineTerminator ( const UChar1 * inStrPtr )

static

This function helps you to determine whether the pointed character sequence is defined as a line terminator or not.

Parameters

inStrPtr The pointer to a valid UTF-8 boundary on a string.

Returns: true if the character sequence is a line terminator; otherwise false.

static bool Celartem::String::isValidUTF8Sequence	(	const u8 *	inDataToValidate,
		size_t	inSize
	)

static

This function helps you to determine wether the specified data is a valid UTF-8 character sequence or not.

Parameters

inDataToValidate	The pointer to a data block to validate.
inSize	The size of the data.

Returns: true if the data is a valid UTF-8 character sequence; otherwise false.

static bool Celartem::String::isWhiteSpace ( const UChar1 * inStrPtr )

static

This function helps you to determine whether the pointed character sequence is defined as a whitespace or not. param inStrPtr The pointer to a valid UTF-8 boundary on a string.

Returns: true if the character sequence is a white space; otherwise false.

int Celartem::String::lastIndexOf ( const String & str ) const

This method searches the string for the last occurrence of the specified string.

Parameters

str	The string to search for.

Returns: The 0-based index of the character. If the character is not found in the string, this function returns -1.

bool Celartem::String::match	(	const String &	inPattern,
		SimpleArray< String > &	outMatches
	)		const

This function tries to match the string to the specified regular expression pattern and returns the match strings (a portion of the original string).
For more information, see RegularExpression.

Parameters

inPattern	The regular expression pattern to match.
outMatches	The match result. outMatches[0] is all the matching string and outMatches[n] is the n-th matched substring.

Returns: true if the match is successful, otherwise false.

See Also: RegularExpression

AutoPtr<Region> Celartem::String::match ( const String & inPattern ) const

This function tries to match the string to the specified regular expression pattern and returns the match strings (a portion of the original string).
For more information, see RegularExpression.

Parameters

inPattern The regular expression pattern to match.

Returns: Pointer to a Region instance if the match success; otherwise NULL.

See Also: RegularExpression

size_t Celartem::String::nbrk	(	const String &	str,
		size_t	from = `0`
	)		const

This method searches the string for the first occurrence of a character contained in a specified string. This search does not include the null terminator. The behavior is almost identical to strpbrk function except it returns 0-based index.
This method is just an alias of getPatternPos.

Parameters

str	The string that contains the characters for which to search.
from	Where the search starts from.

Returns: The 0-based index of the character. If the character is not found in the string, this function returns notFound.

Referenced by getPatternPos().

static const UChar1* Celartem::String::next ( const UChar1 * inStrPtr )

static

This function helps you to move to the next valid UCS character position.

Parameters

inStrPtr The pointer on a UTF-8 string.

Returns: Pointer to the next character.

static UChar1* Celartem::String::next ( UChar1 * inStrPtr )

static

This function helps you to move to the next valid UCS character position.

Parameters

inStrPtr The pointer on a UTF-8 string.

Returns: Pointer to the next character.

String Celartem::String::normalize ( UnicodeNormalizeForm form ) const

        This function normalizes the input text in the specified manner.
        For more information, see <a href="http://www.unicode.org/reports/tr15/">Unicode Standard Annex #15 Unicode Normalization Forms</a>.
        The following code illustrates how to use the function:

String str = "....";

String normalized = str.normalize(String::U_NFKD);

Parameters

inForm One of UnicodeNormalizeForm enumeration.

Returns: The normalized string.

See Also: UnicodeNormalizeForm

bool Celartem::String::operator!= ( const String & str ) const

For direct comparison with the other String. This method internally uses compare method.

Parameters

str	A string to compare with.

Returns: true if the strings are not equal value in the dictionary order, otherwise false.

UChar4 Celartem::String::operator() ( size_t n ) const

inline

This method is to get the character of specified position. This method works much like as if the string were stored as UCS-4 character array.

Parameters

n	The position of the UCS-4 character.

Returns: The character value.

String Celartem::String::operator+ ( const String & str ) const

This method concatenates the specified string to the tail. This method DOES NOT modify the original.

Parameters

str	A string to concatenate.

Returns: The result string.

String& Celartem::String::operator+= ( const String & str )

This method concatenates the specified string to the tail. This method DOES modify the original. The behavior of this method is almost identical to strcat function.

Parameters

str	A string to concatenate.

Returns: This instance (*this ).

bool Celartem::String::operator< ( const String & str ) const

For direct comparison with the other String. This method internally uses compare method.

Parameters

str	A string to compare with.

Returns: true if the string is less than str in the dictionary order, otherwise false.

bool Celartem::String::operator<= ( const String & str ) const

For direct comparison with the other String. This method internally uses compare method.

Parameters

str	A string to compare with.

Returns: true if the string is no more than str in the dictionary order, otherwise false.

String& Celartem::String::operator= ( NULL_STRING null )

This method accepts NullString and clears the string.

Parameters

null	It should be NullString.

Returns: The reference to this instance.

See Also: Initializing String instances

String& Celartem::String::operator= ( const utf8s & inString )

        This method duplicates the specified UTF-8 string.
        \param inString string to be copied.
        \return The reference to this instance.
        The following is the sample usafe of this constructor:

String s = utf8s("Hello, world!");

See Also: Initializing String instances

String& Celartem::String::operator= ( const char * inString )

This method duplicates the specified multibyte string. Since String stores strings as UTF-8 string, this method converts the input string into UTF-8 string. If the input string is only contains ASCII 7bit characters and you think you don't want to take the conversion cost, use the utf8s version of the constructor. Mac OS X's native multibyte is UTF-8 and this constructor does not cost so much in Mac OS X.

Parameters

inString Platform/Locale depended multibyte string.

See Also: Initializing String instances

String& Celartem::String::operator= ( const String & inString )

This method duplicates the specified string. Since the String class manages the strings by reference count mechanism, this constructor does not cost so much.

Parameters

inString String to be copied.

Returns: The reference to this instance.

See Also: Initializing String instances

String& Celartem::String::operator= ( const UChar2 * inString )

This method duplicates the specified string.

Parameters

inString String in UTF-16.

Returns: The reference to this instance.

See Also: Initializing String instances

String& Celartem::String::operator= ( const UChar4 * inString )

This method duplicates the specified string.

Parameters

inString String in UCS-4(UTF-32).

Returns: The reference to this instance.

See Also: Initializing String instances

bool Celartem::String::operator== ( const String & str ) const

For direct comparison with the other String. This method internally uses compare method.

Parameters

str	A string to compare with.

Returns: true if the strings are same value, otherwise false.

bool Celartem::String::operator> ( const String & str ) const

For direct comparison with the other String. This method internally uses compare method.

Parameters

str	A string to compare with.

Returns: true if the string is larger than str in the dictionary order, otherwise false.

bool Celartem::String::operator>= ( const String & str ) const

For direct comparison with the other String. This method internally uses compare method.

Parameters

str	A string to compare with.

Returns: true if the string is no less than str in the dictionary order, otherwise false.

UChar1 Celartem::String::operator[] ( size_t n ) const

This method is to get the character of specified position.

Parameters

n	The position of the character.

Returns: The character value.

UChar1& Celartem::String::operator[] ( size_t n )

This method is to get the character of specified position. This method may internally duplicate the string to realize modification of the string and it potentially has heavy overhead.

Parameters

n	The position of the character.

Returns: A non-const reference to the character.

const UChar1* Celartem::String::pbrk	(	const String &	str,
		size_t	from = `0`
	)		const

This method searches the string for the first occurrence of a character contained in a specified string. This search does not include the null terminator. The behavior is almost identical to strpbrk function.

Parameters

str	The string that contains the characters for which to search.
from	Where the search starts from.

Returns: Pointer to the character. If the character is not found in the string, this function returns NULL.

const UChar1* Celartem::String::raw_end ( ) const

        This function returns the pointer to the end position of the
        internal string.\n
        The following sample illustrates how to use the function:

const UChar1 * const end = str.raw_end();
for(const UChar1 *p = str.raw_ptr(); p < end; p++)
{
    // do something on each character.
}

Returns: The pointer to the end of the internal string.

See Also: raw_ptr, c_str, c_utf8str, begin, end

const UChar1* Celartem::String::raw_ptr ( ) const

This function returns the pointer to the internal string. Please note that the string returned by the function may not be null-terminated. To obtain null-terminated string, use c_str() or begin() function instead of this function.

Returns: The pointer to the fisrt character in the internal string.

See Also: raw_end, c_str, c_utf8str, begin, end

String Celartem::String::removeBoms ( ) const

This method removes any heading BOMs from the string. BOM is usually only on the head of Unicode strings but it may be in the middle of the string due to incorrect string operations. This function checks all the string and removes all occurrences of BOM. If the string does not contain BOMs, this function simply returns the string.

Returns: The string without BOMs.

String Celartem::String::replace	(	const String &	inReplacee,
		const String &	inReplacer,
		u32	inFlags = `0`
	)		const

This function replaces strings in a string by another string.

Parameters

inReplacee	The string to be replaced.
inReplacer	The string that replaces the strings.
inFlags	Reserved, must be 0.

Returns: The result string.

String Celartem::String::resolveEntityReferences ( ) const

This method resolves all the entity references in the string.
This method resolves all the HTML 4.01 defined entity references to the actual Unicode entities.

Returns: The result string.

void Celartem::String::serialize	(	Stream *	inStream,
		size_t	inLevel,
		Endian	inEndian
	)		const

This method is just a helper function to deal with SerializableData template. For more information, see SerializableData.

See Also: SerializableData,Serializable, DataStore

bool Celartem::String::split	(	const String &	inPattern,
		SimpleArray< String > &	outSubstrs,
		bool	inRemoveEmptyStrings = `false`
	)		const

This function tries to split the string by the specified regular expression pattern and returns the splitted substrings. For more information, see RegularExpression.

Parameters

inPattern	The regular expression pattern to separates the string.
outSubstrs	The splitted substrings. If there are no matches for the pattern, the original string is stored on the array.
inRemoveEmptyStrings	Whether to remove empty substrings or not.

Returns: true if any matches for the regular expression pattern; otherwise false. The return value is only for backward compatibility and useless on almost all cases.

See Also: match, RegularExpression

bool Celartem::String::startWith ( const String & inPattern ) const

This function checks whether the string starts from the specified pattern or not.

Parameters

inPattern The pattern in String.

Returns: true if the string starts from the specified pattern, otherwise false.

String Celartem::String::substring	(	size_t	start,
		size_t	length = `STR_AUTOSIZE`
	)		const

This method extracts a portion of the string. There is no guarantee that the result string is a vaild UTF-8 string if you specify some invalid position and/or length.

Parameters

start	0 based index from which we extract the sub-string. If this value is larger than the length of the string, this function returns NullString.
length	the number of characters to be extracted. You don't have to think about the room for terminating '\0'. If length exceeds the length of the string, this method fits the length to the original string.

Returns: The result string.

Referenced by Celartem::DjVu::Link::getType().

String Celartem::String::substringByChar	(	size_t	startChar,
		size_t	charCount = `STR_AUTOSIZE`
	)		const

This method extracts a portion of the string. This method works much like as if the string were stored as UCS-4 character array.

Parameters

startChar	0 based index in UCS-4 character array. If this value points exceeds the end of the string, this function returns NullString.
charCount	the number of UCS-4 characters to be extracted. You don't have to think about the room for terminating '\0'. If length exceeds the length of the string, this method fits the length to the original string.

Returns: The result string.

void Celartem::String::swap ( String & inString )

This method swaps two strings. inString A String instance to swap with.

double Celartem::String::toDouble ( ) const

This method converts the string into double value.

Returns: double value; if the string is not suitable to convert into double value, this function returns 0.

int Celartem::String::toInt ( ) const

This method converts the string into int value.

Returns: Integer value; if the string is not suitable to convert into integer value, this function returns 0.

Referenced by Celartem::DjVu::Link::getType().

String Celartem::String::toLower ( ) const

This function convert all upper-case characters into lower-case ones. What this function exactly does is convert [A-Z] to [a-z] and never understand any locale specific issues.

Returns: The result string.

const char* Celartem::String::toMbs ( ) const

This method converts the string into locale specific multi-byte string.

Returns: The converted result.

size_t Celartem::String::toSizeT ( ) const

This method converts the string into size_t value.

Returns: Unsigned integer value.

const UChar2* Celartem::String::toUcs2 ( ) const

This method converts the string into UCS-2 (UTF-16) string.

Returns: The converted result.

const UChar4* Celartem::String::toUcs4 ( ) const

This method converts the string into UCS-4 (UTF-32) string.

Returns: The converted result.

uint64_t Celartem::String::toUInt64 ( ) const

This method converts the string into uint64_t value.

Returns: Integer value; if the string is not suitable to convert into integer value, this function returns 0.

String Celartem::String::toUpper ( ) const

This function convert all lower-case characters into lower-case ones. What this function exactly does is convert [a-z] to [A-Z] and never understand any locale specific issues.

Returns: The result string.

const wchar_t* Celartem::String::toWcs ( ) const

This method converts the string into wchar_t based string.

Returns: The converted result.

String Celartem::String::trimHeadingSpaces ( ) const

This method removes any space characters (' ', \t) in the head of the string if exist.

Returns: The result string.

String Celartem::String::trimSpaces ( ) const

This method removes any space characters (' ', \t) in both of the head and the tail of the string if exist.

Returns: The result string.

String Celartem::String::trimTrailingSpaces ( ) const

This method removes any space characters (' ', \t) in the tail of the string if exist.

Returns: The result string.

The documentation for this class was generated from the following file:

cel_string.h

Classes

Public Types

Public Member Functions

Static Public Member Functions

Detailed Description

Member Enumeration Documentation

Constructor & Destructor Documentation

Member Function Documentation