37 return mIter == mString->mData.begin();
42 return mIter == mString->mData.end();
47 return mIter - mString->mData.begin();
52 mIter = mString->mData.begin() + index;
58 return mString->getChar( current_index );
64 int change = mString->setChar( current_index, uc );
65 _jump_to( current_index );
72 if ( _test_end() )
return;
78 lead_half = mIter[-1];
88 if ( _test_begin() )
return;
93 lead_half = mIter[-1];
203 return _getCharacter();
208 return _setCharacter( uc );
319 return _getCharacter();
540 assign( str, index, length );
543 #if MYGUI_IS_NATIVE_WCHAR_T
609 return mData.max_size();
614 mData.reserve( size );
619 mData.resize( num, val );
624 mData.swap( from.mData );
629 return mData.empty();
634 return mData.c_str();
644 return mData.capacity();
657 tmp.mData.swap( data );
669 #if MYGUI_IS_NATIVE_WCHAR_T
673 mData.push_back( static_cast<code_point>( val ) );
679 mData.push_back( val );
684 mData.push_back( static_cast<code_point>( val ) );
700 return *m_buffer.mStrBuffer;
706 return m_buffer.mStrBuffer->c_str();
711 _load_buffer_UTF32();
712 return *m_buffer.mUTF32StrBuffer;
717 _load_buffer_UTF32();
718 return m_buffer.mUTF32StrBuffer->c_str();
724 return *m_buffer.mWStrBuffer;
730 return m_buffer.mWStrBuffer->c_str();
735 return mData.at( loc );
740 return mData.at( loc );
753 if ( l == 2 && ( loc + 1 ) < mData.length() ) {
770 if ( newSize > existingSize ) {
772 insert( loc + 1, 1, cp[1] );
775 if ( newSize < existingSize ) {
783 if ( l == 2 )
at( loc + 1 ) = cp[1];
790 i.
mIter = mData.begin();
806 i.
mIter = mData.end();
822 i.
mIter = mData.end();
838 i.
mIter = mData.begin();
859 mData.assign( str.mData );
871 mData.assign( str, num );
877 mData.assign( str.mData, index, len );
883 mData.assign( num, ch );
890 mData.reserve( wstr.length() );
891 #ifdef WCHAR_UTF16 // if we're already working in UTF-16, this is easy
893 std::wstring::const_iterator i, ie = wstr.end();
894 for ( i = wstr.begin(); i != ie; i++ ) {
896 mData.push_back( tmp );
898 #else // otherwise we do it the safe way (which is still 100% safe to pass UTF-16 through, just slower)
901 std::wstring::const_iterator i, ie = wstr.end();
902 for ( i = wstr.begin(); i != ie; i++ ) {
905 if ( l > 0 ) mData.push_back( cp[0] );
906 if ( l > 1 ) mData.push_back( cp[1] );
912 #if MYGUI_IS_NATIVE_WCHAR_T
938 unsigned char utf8buf[7];
945 std::string::const_iterator i, ie = str.end();
946 for ( i = str.begin(); i != ie; i++ ) {
948 for (
size_t j = 0; j < utf8len; j++ ) {
949 utf8buf[j] = (
static_cast<unsigned char>( *( i + j ) ) );
951 utf8buf[utf8len] = 0;
956 append( utf16buff, utf16len );
963 std::string tmp( c_str );
976 mData.append( str.mData );
988 mData.append( str.mData, index, len );
994 mData.append( str, num );
1000 mData.append( num, ch );
1010 #if MYGUI_IS_NATIVE_WCHAR_T
1013 std::wstring tmp( w_str, num );
1019 return append( num, static_cast<unicode_char>( ch ) );
1031 append( num, static_cast<code_point>( ch ) );
1061 mData.insert( index, str.mData );
1067 mData.insert( index1, str.mData, index2, num );
1078 mData.insert( index, str, num );
1082 #if MYGUI_IS_NATIVE_WCHAR_T
1100 mData.insert( index, num, ch );
1104 #if MYGUI_IS_NATIVE_WCHAR_T
1107 insert( index, num, static_cast<unicode_char>( ch ) );
1114 insert( index, num, static_cast<code_point>( ch ) );
1123 return insert( index, num, cp[0] );
1127 insert( index, 1, cp[1] );
1128 insert( index, 1, cp[0] );
1135 mData.insert( i.
mIter, num, ch );
1137 #if MYGUI_IS_NATIVE_WCHAR_T
1140 insert( i, num, static_cast<unicode_char>( ch ) );
1146 insert( i, num, static_cast<code_point>( ch ) );
1183 mData.erase( index );
1185 mData.erase( index, num );
1191 mData.replace( index1, num1, str.mData, 0,
npos );
1197 mData.replace( index1, num1, str.mData, 0, num2 );
1203 mData.replace( index1, num1, str.mData, index2, num2 );
1213 return replace( index1, num1, str, 0, num );
1218 mData.replace( index, num1, num2, ch );
1228 return replace( index1, num1, num, ch );
1233 return mData.compare( str.mData );
1238 return mData.compare( str );
1243 return mData.compare( index, length, str.mData );
1248 return mData.compare( index, length, str.mData, index2, length2 );
1253 return mData.compare( index, length, str, length2 );
1256 #if MYGUI_IS_NATIVE_WCHAR_T
1259 UString tmp( w_str, length2 );
1260 return compare( index, length, tmp );
1266 UString tmp( c_str, length2 );
1267 return compare( index, length, tmp );
1272 return mData.find( str.
c_str(), index );
1287 #if MYGUI_IS_NATIVE_WCHAR_T
1291 return mData.find( tmp.c_str(), index,
length );
1297 return find( static_cast<code_point>( ch ), index );
1302 return mData.find( ch, index );
1305 #if MYGUI_IS_NATIVE_WCHAR_T
1308 return find( static_cast<unicode_char>( ch ), index );
1321 return mData.rfind( str.
c_str(), index );
1327 return mData.rfind( tmp.
c_str(), index, num );
1333 return mData.rfind( tmp.
c_str(), index, num );
1336 #if MYGUI_IS_NATIVE_WCHAR_T
1340 return mData.rfind( tmp.c_str(), index, num );
1346 return rfind( static_cast<code_point>( ch ), index );
1351 return mData.rfind( ch, index );
1354 #if MYGUI_IS_NATIVE_WCHAR_T
1357 return rfind( static_cast<unicode_char>( ch ), index );
1372 while ( i < num && ( index + i ) < len ) {
1390 return find_first_of( static_cast<code_point>( ch ), index );
1393 #if MYGUI_IS_NATIVE_WCHAR_T
1396 return find_first_of( static_cast<unicode_char>( ch ), index );
1411 while ( i < num && ( index + i ) < len ) {
1432 #if MYGUI_IS_NATIVE_WCHAR_T
1450 if ( index > len ) index = len - 1;
1452 while ( i < num && ( index - i ) !=
npos ) {
1474 #if MYGUI_IS_NATIVE_WCHAR_T
1477 return find_last_of( static_cast<unicode_char>( ch ), index );
1492 if ( index > len ) index = len - 1;
1494 while ( i < num && ( index - i ) !=
npos ) {
1521 #if MYGUI_IS_NATIVE_WCHAR_T
1562 #if MYGUI_IS_NATIVE_WCHAR_T
1606 UString::operator std::string()
const
1608 return std::string(
asUTF8() );
1612 UString::operator std::wstring()
const
1614 return std::wstring(
asWStr() );
1620 if ( 0xD800 <= cp && cp <= 0xDFFF )
1627 if ( 0xD800 <= cp && cp <= 0xDBFF )
1634 if ( 0xDC00 <= cp && cp <= 0xDFFF )
1641 if ( 0xD800 <= cp && cp <= 0xDBFF )
1657 bool wordPair =
false;
1660 if ( 0xD800 <= cp1 && cp1 <= 0xDBFF ) {
1662 if ( 0xDC00 <= cp2 && cp2 <= 0xDFFF )
1671 unsigned short cU = cp1, cL = cp2;
1675 out_uc = ( cU & 0x03FF ) << 10;
1676 out_uc |= ( cL & 0x03FF );
1684 if ( in_uc <= 0xFFFF ) {
1693 tmp =
static_cast<unsigned short>(( uc >> 10 ) & 0x03FF);
1698 tmp =
static_cast<unsigned short>(uc & 0x03FF);
1707 return ( cp & ~_cont_mask ) != _cont;
1712 if ( !( cp & 0x80 ) )
return 1;
1713 if (( cp & ~_lead1_mask ) == _lead1 )
return 2;
1714 if (( cp & ~_lead2_mask ) == _lead2 )
return 3;
1715 if (( cp & ~_lead3_mask ) == _lead3 )
return 4;
1716 if (( cp & ~_lead4_mask ) == _lead4 )
return 5;
1717 if (( cp & ~_lead5_mask ) == _lead5 )
return 6;
1733 if ( !( uc & ~0x0000007F ) )
return 1;
1734 if ( !( uc & ~0x000007FF ) )
return 2;
1735 if ( !( uc & ~0x0000FFFF ) )
return 3;
1736 if ( !( uc & ~0x001FFFFF ) )
return 4;
1737 if ( !( uc & ~0x03FFFFFF ) )
return 5;
1738 if ( !( uc & ~0x7FFFFFFF ) )
return 6;
1756 c = in_cp[i] & _lead5_mask;
1759 c = in_cp[i] & _lead4_mask;
1762 c = in_cp[i] & _lead3_mask;
1765 c = in_cp[i] & _lead2_mask;
1768 c = in_cp[i] & _lead1_mask;
1773 for ( ++i; i <
len; i++ )
1775 if (( in_cp[i] & ~_cont_mask ) != _cont )
1782 c |= ( in_cp[i] & _cont_mask );
1795 for (
size_t i = len - 1; i > 0; i-- ) {
1796 out_cp[i] =
static_cast<unsigned char>((( c ) & _cont_mask ) | _cont);
1803 out_cp[0] =
static_cast<unsigned char>((( c ) & _lead5_mask ) | _lead5);
1806 out_cp[0] =
static_cast<unsigned char>((( c ) & _lead4_mask ) | _lead4);
1809 out_cp[0] =
static_cast<unsigned char>((( c ) & _lead3_mask ) | _lead3);
1812 out_cp[0] =
static_cast<unsigned char>((( c ) & _lead2_mask ) | _lead2);
1815 out_cp[0] =
static_cast<unsigned char>((( c ) & _lead1_mask ) | _lead1);
1819 out_cp[0] =
static_cast<unsigned char>(( c ) & 0x7F);
1829 std::string tmp( reinterpret_cast<const char*>( c_str ) );
1835 std::string::const_iterator i, ie = str.end();
1841 if (( *i ) & 0x80 ) {
1842 unsigned char c = ( *i );
1843 size_t contBytes = 0;
1846 if (( c & ~_lead1_mask ) == _lead1 ) {
1854 }
else if (( c & ~_lead2_mask ) == _lead2 ) {
1856 if ( c == _lead2 ) {
1858 if (( c & _lead2 ) == _cont )
1865 }
else if (( c & ~_lead3_mask ) == _lead3 ) {
1867 if ( c == _lead3 ) {
1869 if (( c & _lead3 ) == _cont )
1876 }
else if (( c & ~_lead4_mask ) == _lead4 ) {
1878 if ( c == _lead4 ) {
1880 if (( c & _lead4 ) == _cont )
1887 }
else if (( c & ~_lead5_mask ) == _lead5 ) {
1889 if ( c == _lead5 ) {
1891 if (( c & _lead5 ) == _cont )
1900 while ( contBytes-- ) {
1902 if (( c & ~_cont_mask ) != _cont )
1915 void UString::_init()
1917 m_buffer.mVoidBuffer = 0;
1918 m_bufferType = bt_none;
1922 void UString::_cleanBuffer()
const
1924 if ( m_buffer.mVoidBuffer != 0 ) {
1925 switch ( m_bufferType ) {
1927 delete m_buffer.mStrBuffer;
1930 delete m_buffer.mWStrBuffer;
1932 case bt_utf32string:
1933 delete m_buffer.mUTF32StrBuffer;
1939 assert(
"This should never happen - mVoidBuffer should never contain something if we "
1940 "don't know the type");
1943 m_buffer.mVoidBuffer = 0;
1945 m_bufferType = bt_none;
1949 void UString::_getBufferStr()
const
1951 if ( m_bufferType != bt_string ) {
1953 m_buffer.mStrBuffer =
new std::string();
1954 m_bufferType = bt_string;
1956 m_buffer.mStrBuffer->clear();
1959 void UString::_getBufferWStr()
const
1961 if ( m_bufferType != bt_wstring ) {
1963 m_buffer.mWStrBuffer =
new std::wstring();
1964 m_bufferType = bt_wstring;
1966 m_buffer.mWStrBuffer->clear();
1969 void UString::_getBufferUTF32Str()
const
1971 if ( m_bufferType != bt_utf32string ) {
1974 m_bufferType = bt_utf32string;
1976 m_buffer.mUTF32StrBuffer->clear();
1979 void UString::_load_buffer_UTF8()
const
1982 std::string& buffer = ( *m_buffer.mStrBuffer );
1983 buffer.reserve(
length() );
1985 unsigned char utf8buf[6];
1986 char* charbuf = (
char* )utf8buf;
1992 c = i.getCharacter();
1996 buffer.push_back( charbuf[j++] );
2000 void UString::_load_buffer_WStr()
const
2003 std::wstring& buffer = ( *m_buffer.mWStrBuffer );
2004 buffer.reserve(
length() );
2005 #ifdef WCHAR_UTF16 // wchar_t matches UTF-16
2007 for ( i =
begin(); i != ie; ++i ) {
2008 buffer.push_back((
wchar_t )( *i ) );
2010 #else // wchar_t fits UTF-32
2014 c = i.getCharacter();
2015 buffer.push_back((
wchar_t )c );
2020 void UString::_load_buffer_UTF32()
const
2022 _getBufferUTF32Str();
2023 utf32string& buffer = ( *m_buffer.mUTF32StrBuffer );
2024 buffer.reserve(
length() );
2030 c = i.getCharacter();
2031 buffer.push_back( c );
code_point & operator[](size_type index)
code point dereference operator
std::basic_string< code_point > dstring
base iterator class for UString
_const_rev_iterator operator-(difference_type n)
subtraction operator
_const_fwd_iterator & operator--()
pre-decrement
size_type capacity() const
returns the number of elements that the string can hold before it will need to allocate more space ...
const value_type & operator*() const
dereference operator
reverse_iterator rend()
returns a reverse iterator just past the beginning of the string
void resize(size_type num, const code_point &val=0)
changes the size of the string to size, filling in any new area with val
UString & append(const UString &str)
appends str on to the end of the current string
_rev_iterator & operator--()
pre-decrement
void _seekRev(size_type c)
unicode_char getChar(size_type loc) const
returns the data point loc evaluated as a UTF-32 value
const char * asUTF8_c_str() const
returns the current string in UTF-8 form as a nul-terminated char array
int _setCharacter(unicode_char uc)
bool operator>(const UString &right) const
greater than operator
const code_point * c_str() const
returns a pointer to the first character in the current string
static size_t _utf32_to_utf8(const unicode_char &in_uc, unsigned char out_cp[6])
writes the given UTF-32 uc_in to the buffer location out_cp using UTF-8 encoding, returns the number ...
iterator erase(iterator loc)
removes the code point pointed to by loc, returning an iterator to the next character ...
size_type find_last_of(const UString &str, size_type index=npos, size_type num=npos) const
returns the index of the first character within the current string that matches any character in str...
UString()
default constructor, creates an empty string
bool inString(unicode_char ch) const
returns true if the given Unicode character ch is in this string
static size_t _utf8_char_length(unsigned char cp)
estimates the number of UTF-8 code points in the sequence starting with cp
_const_fwd_iterator operator+(difference_type n)
addition operator
_const_fwd_iterator & operator+=(difference_type n)
addition assignment operator
_const_rev_iterator & operator+=(difference_type n)
addition assignment operator
const value_type & operator*() const
dereference operator
const utf32string & asUTF32() const
returns the current string in UTF-32 form within a utf32string
_rev_iterator operator-(difference_type n)
subtraction operator
_const_fwd_iterator operator-(difference_type n)
subtraction operator
_fwd_iterator & operator+=(difference_type n)
addition assignment operator
const std::wstring & asWStr() const
returns the current string in the native form of std::wstring
size_t size_type
size type used to indicate string size and character positions within the string
_const_rev_iterator & operator++()
pre-increment
size_type find(const UString &str, size_type index=0) const
returns the index of the first occurrence of str within the current string, starting at index; return...
int setChar(size_type loc, unicode_char ch)
sets the value of the character at loc to the Unicode value ch (UTF-32)
_const_fwd_iterator const_iterator
const iterator
static const size_type npos
the usual constant representing: not found, no limit, etc
const forward iterator for UString
_fwd_iterator & operator-=(difference_type n)
subtraction assignment operator
bool operator<(const UString &right) const
less than operator
_rev_iterator & operator-=(difference_type n)
subtraction assignment operator
value_type & operator*() const
dereference operator
_const_rev_iterator operator+(difference_type n)
addition operator
static size_t _utf32_to_utf16(const unicode_char &in_uc, code_point out_cp[2])
writes the given UTF-32 uc_in to the buffer location out_cp using UTF-16 encoding, returns the number of code points used to encode the input (always 1 or 2)
unicode_char getCharacter() const
Returns the Unicode value of the character at the current position (decodes surrogate pairs if needed...
void push_back(unicode_char val)
appends val to the end of the string
int compare(const UString &str) const
compare str to the current string
std::basic_string< unicode_char > utf32string
string type used for returning UTF-32 formatted data
void clear()
deletes all of the elements in the string
const reverse iterator for UString
size_type find_last_not_of(const UString &str, size_type index=npos, size_type num=npos) const
returns the index of the last character within the current string that does not match any character i...
iterator begin()
returns an iterator to the first element of the string
_fwd_iterator iterator
iterator
_const_fwd_iterator & movePrev()
rewinds to the previous Unicode character, honoring surrogate pairs in the UTF-16 stream ...
reverse_iterator rbegin()
returns a reverse iterator to the last element of the string
_const_fwd_iterator & operator++()
pre-increment
bool operator==(const UString &right) const
equality operator
uint16 code_point
a single UTF-16 code point
_fwd_iterator operator+(difference_type n)
addition operator
void _seekFwd(size_type c)
_const_rev_iterator & operator-=(difference_type n)
subtraction assignment operator
const unicode_char * asUTF32_c_str() const
returns the current string in UTF-32 form as a nul-terminated unicode_char array
static bool _utf8_start_char(unsigned char cp)
returns true if cp is the beginning of a UTF-8 sequence
static bool _utf16_surrogate_lead(code_point cp)
returns true if cp matches the signature of a surrogate pair lead character
unicode_char _getCharacter() const
UString substr(size_type index, size_type num=npos) const
returns a substring of the current string, starting at index, and num characters long.
size_type max_size() const
returns the maximum number of UTF-16 code points that the string can hold
void swap(UString &from)
exchanges the elements of the current string with those of from
static size_t _utf16_to_utf32(const code_point in_cp[2], unicode_char &out_uc)
converts the given UTF-16 character buffer in_cp to a single UTF-32 Unicode character out_uc...
_fwd_iterator operator-(difference_type n)
subtraction operator
_rev_iterator operator+(difference_type n)
addition operator
size_type length() const
Returns the number of code points in the current string.
bool operator>=(const UString &right) const
greater than or equal operator
size_type rfind(const UString &str, size_type index=0) const
returns the location of the first occurrence of str in the current string, doing a reverse search fro...
code_point value_type
value type typedef for use in iterators
value_type & operator[](difference_type n) const
dereference at offset operator
void reserve(size_type size)
sets the capacity of the string to at least size code points
const value_type & operator[](difference_type n) const
dereference at offset operator
_fwd_iterator & movePrev()
rewinds to the previous Unicode character, honoring surrogate pairs in the UTF-16 stream ...
bool operator<=(const UString &right) const
less than or equal operator
forward iterator for UString
uint32 unicode_char
a single 32-bit Unicode character
static bool _utf16_surrogate_follow(code_point cp)
returns true if cp matches the signature of a surrogate pair following character
size_type find_first_not_of(const UString &str, size_type index=0, size_type num=npos) const
returns the index of the first character within the current string that does not match any character ...
unicode_char getCharacter() const
Returns the Unicode value of the character at the current position (decodes surrogate pairs if needed...
static size_t _utf16_char_length(code_point cp)
estimates the number of UTF-16 code points in the sequence starting with cp
code_point & at(size_type loc)
returns a reference to the element in the string at index loc
static bool _utf16_independent_char(code_point cp)
returns true if cp does not match the signature for the lead of follow code point of a surrogate pair...
_const_rev_iterator & operator--()
pre-decrement
void _become(const _base_iterator &i)
_fwd_iterator & operator--()
pre-decrement
_rev_iterator & operator++()
pre-increment
_fwd_iterator & moveNext()
advances to the next Unicode character, honoring surrogate pairs in the UTF-16 stream ...
forward iterator for UString
static size_type _verifyUTF8(const unsigned char *c_str)
verifies a UTF-8 stream, returning the total number of Unicode characters found
UString & assign(iterator start, iterator end)
gives the current string the values from start to end
int setCharacter(unicode_char uc)
Sets the Unicode value of the character at the current position (adding a surrogate pair if needed); ...
_const_fwd_iterator & moveNext()
advances to the next Unicode character, honoring surrogate pairs in the UTF-16 stream ...
size_type length_Characters() const
Returns the number of Unicode characters in the string.
const std::string & asUTF8() const
returns the current string in UTF-8 form within a std::string
size_type _get_index() const
A UTF-16 string with implicit conversion to/from std::string and std::wstring.
_rev_iterator & operator+=(difference_type n)
addition assignment operator
static size_t _utf8_to_utf32(const unsigned char in_cp[6], unicode_char &out_uc)
converts the given UTF-8 character buffer to a single UTF-32 Unicode character, returns the number of...
value_type & operator[](difference_type n) const
dereference at offset operator
iterator insert(iterator i, const code_point &ch)
inserts ch before the code point denoted by i
const code_point * data() const
returns a pointer to the first character in the current string
UString & operator=(const UString &s)
assignment operator, implicitly casts all compatible types
iterator end()
returns an iterator just past the end of the string
UString & replace(size_type index1, size_type num1, const UString &str)
replaces up to num1 code points of the current string (starting at index1) with str ...
size_type size() const
Returns the number of code points in the current string.
size_type find_first_of(const UString &str, size_type index=0, size_type num=npos) const
Returns the index of the first character within the current string that matches any character in str...
value_type & operator*() const
dereference operator
const value_type & operator[](difference_type n) const
dereference at offset operator
bool operator!=(const UString &right) const
inequality operator
float len(float x, float y)
bool empty() const
returns true if the string has no elements, false otherwise
_const_fwd_iterator & operator-=(difference_type n)
subtraction assignment operator
_fwd_iterator & operator++()
pre-increment
void _jump_to(size_type index)
const wchar_t * asWStr_c_str() const
returns the current string in the native form of a nul-terminated wchar_t array