• Main Page
  • Related Pages
  • Namespaces
  • Data Structures
  • Files
  • Examples
  • File List
  • Globals

MyGUI_UString.h

Go to the documentation of this file.
00001 // Modified from OpenGUI under lenient license
00002 // Original copyright details and licensing below:
00003 // OpenGUI (http://opengui.sourceforge.net)
00004 // This source code is released under the BSD License
00005 
00006 // Permission is given to the Ogre project to use the contents of file within its
00007 // source and binary applications, as well as any derivative works, in accordance
00008 // with the terms of any license under which Ogre is or will be distributed.
00009 //
00010 // Ogre may relicense its copy of this file, as well as any OpenGUI released updates
00011 // to this file, under any terms that it deems fit, and is not required to maintain
00012 // the original BSD licensing terms of this file, however OpenGUI retains the right
00013 // to present its copy of this file under the terms of any license under which
00014 // OpenGUI is distributed.
00015 //
00016 // Ogre is not required to release to OpenGUI any future changes that it makes to
00017 // this file, and understands and agrees that any such changes that are released
00018 // back to OpenGUI will become available under the terms of any license under which
00019 // OpenGUI is distributed.
00020 //
00021 // For brevity, this permission text may be removed from this file if desired.
00022 // The original record kept within the SourceForge (http://sourceforge.net/) tracker
00023 // is sufficient.
00024 //
00025 // - Eric Shorkey (zero/zeroskill) <opengui@rightbracket.com> [January 20th, 2007]
00026 
00027 #ifndef __MYGUI_U_STRING_H__
00028 #define __MYGUI_U_STRING_H__
00029 
00030 
00031 #include "MyGUI_Prerequest.h"
00032 #include "MyGUI_Diagnostic.h"
00033 
00034 // these are explained later
00035 #include <iterator>
00036 #include <string>
00037 #include <stdexcept>
00038 #include <assert.h>
00039 
00040 // Workaround for VC7:
00041 //      when build with /MD or /MDd, VC7 have both std::basic_string<unsigned short> and
00042 // basic_string<__wchar_t> instantiated in msvcprt[d].lib/MSVCP71[D].dll, but the header
00043 // files tells compiler that only one of them is over there (based on /Zc:wchar_t compile
00044 // option). And since this file used both of them, causing compiler instantiating another
00045 // one in user object code, which lead to duplicate symbols with msvcprt.lib/MSVCP71[D].dll.
00046 //
00047 #if MYGUI_COMPILER == MYGUI_COMPILER_MSVC && (1300 <= MYGUI_COMP_VER && MYGUI_COMP_VER <= 1310)
00048 
00049 # if defined(_DLL_CPPLIB)
00050 
00051 namespace std
00052 {
00053     template class _CRTIMP2 basic_string<unsigned short, char_traits<unsigned short>,
00054         allocator<unsigned short> >;
00055 
00056     template class _CRTIMP2 basic_string<__wchar_t, char_traits<__wchar_t>,
00057         allocator<__wchar_t> >;
00058 }
00059 
00060 # endif // defined(_DLL_CPPLIB)
00061 
00062 #endif  // MYGUI_COMPILER == MYGUI_COMPILER_MSVC && MYGUI_COMP_VER == 1300
00063 
00064 
00065 namespace MyGUI
00066 {
00067 
00068     /* READ THIS NOTICE BEFORE USING IN YOUR OWN APPLICATIONS
00069     =NOTICE=
00070     This class is not a complete Unicode solution. It purposefully does not
00071     provide certain functionality, such as proper lexical sorting for
00072     Unicode values. It does provide comparison operators for the sole purpose
00073     of using UString as an index with std::map and other operator< sorted
00074     containers, but it should NOT be relied upon for meaningful lexical
00075     operations, such as alphabetical sorts. If you need this type of
00076     functionality, look into using ICU instead (http://icu.sourceforge.net/).
00077 
00078     =REQUIREMENTS=
00079     There are a few requirements for proper operation. They are fairly small,
00080     and shouldn't restrict usage on any reasonable target.
00081     * Compiler must support unsigned 16-bit integer types
00082     * Compiler must support signed 32-bit integer types
00083     * wchar_t must be either UTF-16 or UTF-32 encoding, and specified as such
00084         using the WCHAR_UTF16 macro as outlined below.
00085     * You must include <iterator>, <string>, and <wchar>. Probably more, but
00086         these are the most obvious.
00087 
00088     =REQUIRED PREPROCESSOR MACROS=
00089     This class requires two preprocessor macros to be defined in order to
00090     work as advertised.
00091     INT32 - must be mapped to a signed 32 bit integer (ex. #define INT32 int)
00092     UINT16 - must be mapped to an unsigned 16 bit integer (ex. #define UINT32 unsigned short)
00093 
00094     Additionally, a third macro should be defined to control the evaluation of wchar_t:
00095     WCHAR_UTF16 - should be defined when wchar_t represents UTF-16 code points,
00096         such as in Windows. Otherwise it is assumed that wchar_t is a 32-bit
00097         integer representing UTF-32 code points.
00098     */
00099 
00100     // THIS IS A VERY BRIEF AUTO DETECTION. YOU MAY NEED TO TWEAK THIS
00101 #ifdef __STDC_ISO_10646__
00102 // for any compiler that provides this, wchar_t is guaranteed to hold any Unicode value with a single code point (32-bit or larger)
00103 // so we can safely skip the rest of the testing
00104 #else // #ifdef __STDC_ISO_10646__
00105 #if defined( __WIN32__ ) || defined( _WIN32 )
00106 #define WCHAR_UTF16 // All currently known Windows platforms utilize UTF-16 encoding in wchar_t
00107 #else // #if defined( __WIN32__ ) || defined( _WIN32 )
00108 #if WCHAR_MAX <= 0xFFFF // this is a last resort fall back test; WCHAR_MAX is defined in <wchar.h>
00109 #define WCHAR_UTF16 // best we can tell, wchar_t is not larger than 16-bit
00110 #endif // #if WCHAR_MAX <= 0xFFFF
00111 #endif // #if defined( __WIN32__ ) || defined( _WIN32 )
00112 #endif // #ifdef __STDC_ISO_10646__
00113 
00114 
00115 // MYGUI_IS_NATIVE_WCHAR_T means that wchar_t isn't a typedef of
00116 // uint16 or uint32.
00117 #if MYGUI_COMPILER == MYGUI_COMPILER_MSVC
00118 
00119 // Don't define wchar_t related functions since it'll duplicate
00120 // with UString::code_point related functions when compile
00121 // without /Zc:wchar_t, because in this case both of them are
00122 // a typedef of uint16.
00123 # if defined(_NATIVE_WCHAR_T_DEFINED)
00124 #   define MYGUI_IS_NATIVE_WCHAR_T      1
00125 # else
00126 #   define MYGUI_IS_NATIVE_WCHAR_T      0
00127 # endif
00128 
00129 #else   // MYGUI_COMPILER != MYGUI_COMPILER_MSVC
00130 
00131 // Assumed wchar_t is natively for other compilers
00132 #   define MYGUI_IS_NATIVE_WCHAR_T     1
00133 
00134 #endif  // MYGUI_COMPILER == MYGUI_COMPILER_MSVC
00135 
00137 
00162     class UString
00163     {
00164         // constants used in UTF-8 conversions
00165         static const unsigned char _lead1 = 0xC0;      //110xxxxx
00166         static const unsigned char _lead1_mask = 0x1F; //00011111
00167         static const unsigned char _lead2 = 0xE0;      //1110xxxx
00168         static const unsigned char _lead2_mask = 0x0F; //00001111
00169         static const unsigned char _lead3 = 0xF0;      //11110xxx
00170         static const unsigned char _lead3_mask = 0x07; //00000111
00171         static const unsigned char _lead4 = 0xF8;      //111110xx
00172         static const unsigned char _lead4_mask = 0x03; //00000011
00173         static const unsigned char _lead5 = 0xFC;      //1111110x
00174         static const unsigned char _lead5_mask = 0x01; //00000001
00175         static const unsigned char _cont = 0x80;       //10xxxxxx
00176         static const unsigned char _cont_mask = 0x3F;  //00111111
00177 
00178     public:
00180         typedef size_t size_type;
00182         static const size_type npos = ~0;
00183 
00185         typedef uint32 unicode_char;
00186 
00188         typedef uint16 code_point;
00189 
00191         typedef code_point value_type;
00192 
00193         typedef std::basic_string<code_point> dstring; // data string
00194 
00196         typedef std::basic_string<unicode_char> utf32string;
00197 
00199         class invalid_data: public std::runtime_error
00200         { /* i don't know why the beautifier is freaking out on this line */
00201         public:
00203             explicit invalid_data( const std::string& _Message ): std::runtime_error( _Message )
00204             {
00205                 /* The thing is, Bob, it's not that I'm lazy, it's that I just don't care. */
00206             }
00207         };
00208 
00209         //#########################################################################
00211         class _base_iterator: public std::iterator<std::random_access_iterator_tag, value_type>
00212         { /* i don't know why the beautifier is freaking out on this line */
00213             friend class UString;
00214         protected:
00215             _base_iterator()
00216             {
00217                 mString = 0;
00218             }
00219 
00220             void _seekFwd( size_type c )
00221             {
00222                 mIter += c;
00223             }
00224             void _seekRev( size_type c )
00225             {
00226                 mIter -= c;
00227             }
00228             void _become( const _base_iterator& i )
00229             {
00230                 mIter = i.mIter;
00231                 mString = i.mString;
00232             }
00233             bool _test_begin() const
00234             {
00235                 return mIter == mString->mData.begin();
00236             }
00237             bool _test_end() const
00238             {
00239                 return mIter == mString->mData.end();
00240             }
00241             size_type _get_index() const
00242             {
00243                 return mIter - mString->mData.begin();
00244             }
00245             void _jump_to( size_type index )
00246             {
00247                 mIter = mString->mData.begin() + index;
00248             }
00249 
00250             unicode_char _getCharacter() const
00251             {
00252                 size_type current_index = _get_index();
00253                 return mString->getChar( current_index );
00254             }
00255             int _setCharacter( unicode_char uc )
00256             {
00257                 size_type current_index = _get_index();
00258                 int change = mString->setChar( current_index, uc );
00259                 _jump_to( current_index );
00260                 return change;
00261             }
00262 
00263             void _moveNext()
00264             {
00265                 _seekFwd( 1 ); // move 1 code point forward
00266                 if ( _test_end() ) return; // exit if we hit the end
00267                 if ( _utf16_surrogate_follow( mIter[0] ) )
00268                 {
00269                     // landing on a follow code point means we might be part of a bigger character
00270                     // so we test for that
00271                     code_point lead_half = 0;
00272                     //NB: we can't possibly be at the beginning here, so no need to test
00273                     lead_half = mIter[-1]; // check the previous code point to see if we're part of a surrogate pair
00274                     if ( _utf16_surrogate_lead( lead_half ) )
00275                     {
00276                         _seekFwd( 1 ); // if so, then advance 1 more code point
00277                     }
00278                 }
00279             }
00280             void _movePrev()
00281             {
00282                 _seekRev( 1 ); // move 1 code point backwards
00283                 if ( _test_begin() ) return; // exit if we hit the beginning
00284                 if ( _utf16_surrogate_follow( mIter[0] ) )
00285                 {
00286                     // landing on a follow code point means we might be part of a bigger character
00287                     // so we test for that
00288                     code_point lead_half = 0;
00289                     lead_half = mIter[-1]; // check the previous character to see if we're part of a surrogate pair
00290                     if ( _utf16_surrogate_lead( lead_half ) )
00291                     {
00292                         _seekRev( 1 ); // if so, then rewind 1 more code point
00293                     }
00294                 }
00295             }
00296 
00297             dstring::iterator mIter;
00298             UString* mString;
00299         };
00300 
00301         //#########################################################################
00302         // FORWARD ITERATORS
00303         //#########################################################################
00304         class _const_fwd_iterator; // forward declaration
00305 
00307         class _fwd_iterator: public _base_iterator
00308         { /* i don't know why the beautifier is freaking out on this line */
00309             friend class _const_fwd_iterator;
00310         public:
00311             _fwd_iterator() { }
00312             _fwd_iterator( const _fwd_iterator& i )
00313             {
00314                 _become( i );
00315             }
00316 
00318             _fwd_iterator& operator++()
00319             {
00320                 _seekFwd( 1 );
00321                 return *this;
00322             }
00324             _fwd_iterator operator++( int )
00325             {
00326                 _fwd_iterator tmp( *this );
00327                 _seekFwd( 1 );
00328                 return tmp;
00329             }
00330 
00332             _fwd_iterator& operator--()
00333             {
00334                 _seekRev( 1 );
00335                 return *this;
00336             }
00338             _fwd_iterator operator--( int )
00339             {
00340                 _fwd_iterator tmp( *this );
00341                 _seekRev( 1 );
00342                 return tmp;
00343             }
00344 
00346             _fwd_iterator operator+( size_type n )
00347             {
00348                 _fwd_iterator tmp( *this );
00349                 tmp._seekFwd( n );
00350                 return tmp;
00351             }
00353             _fwd_iterator operator+( difference_type n )
00354             {
00355                 _fwd_iterator tmp( *this );
00356                 if ( n < 0 )
00357                     tmp._seekRev( -n );
00358                 else
00359                     tmp._seekFwd( n );
00360                 return tmp;
00361             }
00363             _fwd_iterator operator-( size_type n )
00364             {
00365                 _fwd_iterator tmp( *this );
00366                 tmp._seekRev( n );
00367                 return tmp;
00368             }
00370             _fwd_iterator operator-( difference_type n )
00371             {
00372                 _fwd_iterator tmp( *this );
00373                 if ( n < 0 )
00374                     tmp._seekFwd( -n );
00375                 else
00376                     tmp._seekRev( n );
00377                 return tmp;
00378             }
00379 
00381             _fwd_iterator& operator+=( size_type n )
00382             {
00383                 _seekFwd( n );
00384                 return *this;
00385             }
00387             _fwd_iterator& operator+=( difference_type n )
00388             {
00389                 if ( n < 0 )
00390                     _seekRev( -n );
00391                 else
00392                     _seekFwd( n );
00393                 return *this;
00394             }
00396             _fwd_iterator& operator-=( size_type n )
00397             {
00398                 _seekRev( n );
00399                 return *this;
00400             }
00402             _fwd_iterator& operator-=( difference_type n )
00403             {
00404                 if ( n < 0 )
00405                     _seekFwd( -n );
00406                 else
00407                     _seekRev( n );
00408                 return *this;
00409             }
00410 
00412             value_type& operator*() const
00413             {
00414                 return *mIter;
00415             }
00416 
00418             value_type& operator[]( size_type n ) const
00419             {
00420                 _fwd_iterator tmp( *this );
00421                 tmp += n;
00422                 return *tmp;
00423             }
00425             value_type& operator[]( difference_type n ) const
00426             {
00427                 _fwd_iterator tmp( *this );
00428                 tmp += n;
00429                 return *tmp;
00430             }
00431 
00433             _fwd_iterator& moveNext()
00434             {
00435                 _moveNext();
00436                 return *this;
00437             }
00439             _fwd_iterator& movePrev()
00440             {
00441                 _movePrev();
00442                 return *this;
00443             }
00445             unicode_char getCharacter() const
00446             {
00447                 return _getCharacter();
00448             }
00450             int setCharacter( unicode_char uc )
00451             {
00452                 return _setCharacter( uc );
00453             }
00454         };
00455 
00456 
00457 
00458         //#########################################################################
00460         class _const_fwd_iterator: public _base_iterator
00461         { /* i don't know why the beautifier is freaking out on this line */
00462         public:
00463             _const_fwd_iterator() { }
00464             _const_fwd_iterator( const _const_fwd_iterator& i )
00465             {
00466                 _become( i );
00467             }
00468             _const_fwd_iterator( const _fwd_iterator& i )
00469             {
00470                 _become( i );
00471             }
00472 
00474             _const_fwd_iterator& operator++()
00475             {
00476                 _seekFwd( 1 );
00477                 return *this;
00478             }
00480             _const_fwd_iterator operator++( int )
00481             {
00482                 _const_fwd_iterator tmp( *this );
00483                 _seekFwd( 1 );
00484                 return tmp;
00485             }
00486 
00488             _const_fwd_iterator& operator--()
00489             {
00490                 _seekRev( 1 );
00491                 return *this;
00492             }
00494             _const_fwd_iterator operator--( int )
00495             {
00496                 _const_fwd_iterator tmp( *this );
00497                 _seekRev( 1 );
00498                 return tmp;
00499             }
00500 
00502             _const_fwd_iterator operator+( size_type n )
00503             {
00504                 _const_fwd_iterator tmp( *this );
00505                 tmp._seekFwd( n );
00506                 return tmp;
00507             }
00509             _const_fwd_iterator operator+( difference_type n )
00510             {
00511                 _const_fwd_iterator tmp( *this );
00512                 if ( n < 0 )
00513                     tmp._seekRev( -n );
00514                 else
00515                     tmp._seekFwd( n );
00516                 return tmp;
00517             }
00519             _const_fwd_iterator operator-( size_type n )
00520             {
00521                 _const_fwd_iterator tmp( *this );
00522                 tmp._seekRev( n );
00523                 return tmp;
00524             }
00526             _const_fwd_iterator operator-( difference_type n )
00527             {
00528                 _const_fwd_iterator tmp( *this );
00529                 if ( n < 0 )
00530                     tmp._seekFwd( -n );
00531                 else
00532                     tmp._seekRev( n );
00533                 return tmp;
00534             }
00535 
00537             _const_fwd_iterator& operator+=( size_type n )
00538             {
00539                 _seekFwd( n );
00540                 return *this;
00541             }
00543             _const_fwd_iterator& operator+=( difference_type n )
00544             {
00545                 if ( n < 0 )
00546                     _seekRev( -n );
00547                 else
00548                     _seekFwd( n );
00549                 return *this;
00550             }
00552             _const_fwd_iterator& operator-=( size_type n )
00553             {
00554                 _seekRev( n );
00555                 return *this;
00556             }
00558             _const_fwd_iterator& operator-=( difference_type n )
00559             {
00560                 if ( n < 0 )
00561                     _seekFwd( -n );
00562                 else
00563                     _seekRev( n );
00564                 return *this;
00565             }
00566 
00568             const value_type& operator*() const
00569             {
00570                 return *mIter;
00571             }
00572 
00574             const value_type& operator[]( size_type n ) const
00575             {
00576                 _const_fwd_iterator tmp( *this );
00577                 tmp += n;
00578                 return *tmp;
00579             }
00581             const value_type& operator[]( difference_type n ) const
00582             {
00583                 _const_fwd_iterator tmp( *this );
00584                 tmp += n;
00585                 return *tmp;
00586             }
00587 
00589             _const_fwd_iterator& moveNext()
00590             {
00591                 _moveNext();
00592                 return *this;
00593             }
00595             _const_fwd_iterator& movePrev()
00596             {
00597                 _movePrev();
00598                 return *this;
00599             }
00601             unicode_char getCharacter() const
00602             {
00603                 return _getCharacter();
00604             }
00605 
00607             friend size_type operator-( const _const_fwd_iterator& left, const _const_fwd_iterator& right );
00609             friend bool operator==( const _const_fwd_iterator& left, const _const_fwd_iterator& right );
00611             friend bool operator!=( const _const_fwd_iterator& left, const _const_fwd_iterator& right );
00613             friend bool operator<( const _const_fwd_iterator& left, const _const_fwd_iterator& right );
00615             friend bool operator<=( const _const_fwd_iterator& left, const _const_fwd_iterator& right );
00617             friend bool operator>( const _const_fwd_iterator& left, const _const_fwd_iterator& right );
00619             friend bool operator>=( const _const_fwd_iterator& left, const _const_fwd_iterator& right );
00620 
00621         };
00622 
00623         //#########################################################################
00624         // REVERSE ITERATORS
00625         //#########################################################################
00626         class _const_rev_iterator; // forward declaration
00628         class _rev_iterator: public _base_iterator
00629         { /* i don't know why the beautifier is freaking out on this line */
00630             friend class _const_rev_iterator;
00631         public:
00632             _rev_iterator() { }
00633             _rev_iterator( const _rev_iterator& i )
00634             {
00635                 _become( i );
00636             }
00637 
00639             _rev_iterator& operator++()
00640             {
00641                 _seekRev( 1 );
00642                 return *this;
00643             }
00645             _rev_iterator operator++( int )
00646             {
00647                 _rev_iterator tmp( *this );
00648                 _seekRev( 1 );
00649                 return tmp;
00650             }
00651 
00653             _rev_iterator& operator--()
00654             {
00655                 _seekFwd( 1 );
00656                 return *this;
00657             }
00659             _rev_iterator operator--( int )
00660             {
00661                 _rev_iterator tmp( *this );
00662                 _seekFwd( 1 );
00663                 return tmp;
00664             }
00665 
00667             _rev_iterator operator+( size_type n )
00668             {
00669                 _rev_iterator tmp( *this );
00670                 tmp._seekRev( n );
00671                 return tmp;
00672             }
00674             _rev_iterator operator+( difference_type n )
00675             {
00676                 _rev_iterator tmp( *this );
00677                 if ( n < 0 )
00678                     tmp._seekFwd( -n );
00679                 else
00680                     tmp._seekRev( n );
00681                 return tmp;
00682             }
00684             _rev_iterator operator-( size_type n )
00685             {
00686                 _rev_iterator tmp( *this );
00687                 tmp._seekFwd( n );
00688                 return tmp;
00689             }
00691             _rev_iterator operator-( difference_type n )
00692             {
00693                 _rev_iterator tmp( *this );
00694                 if ( n < 0 )
00695                     tmp._seekRev( -n );
00696                 else
00697                     tmp._seekFwd( n );
00698                 return tmp;
00699             }
00700 
00702             _rev_iterator& operator+=( size_type n )
00703             {
00704                 _seekRev( n );
00705                 return *this;
00706             }
00708             _rev_iterator& operator+=( difference_type n )
00709             {
00710                 if ( n < 0 )
00711                     _seekFwd( -n );
00712                 else
00713                     _seekRev( n );
00714                 return *this;
00715             }
00717             _rev_iterator& operator-=( size_type n )
00718             {
00719                 _seekFwd( n );
00720                 return *this;
00721             }
00723             _rev_iterator& operator-=( difference_type n )
00724             {
00725                 if ( n < 0 )
00726                     _seekRev( -n );
00727                 else
00728                     _seekFwd( n );
00729                 return *this;
00730             }
00731 
00733             value_type& operator*() const
00734             {
00735                 return mIter[-1];
00736             }
00737 
00739             value_type& operator[]( size_type n ) const
00740             {
00741                 _rev_iterator tmp( *this );
00742                 tmp -= n;
00743                 return *tmp;
00744             }
00746             value_type& operator[]( difference_type n ) const
00747             {
00748                 _rev_iterator tmp( *this );
00749                 tmp -= n;
00750                 return *tmp;
00751             }
00752         };
00753         //#########################################################################
00755         class _const_rev_iterator: public _base_iterator
00756         { /* i don't know why the beautifier is freaking out on this line */
00757         public:
00758             _const_rev_iterator() { }
00759             _const_rev_iterator( const _const_rev_iterator& i )
00760             {
00761                 _become( i );
00762             }
00763             _const_rev_iterator( const _rev_iterator& i )
00764             {
00765                 _become( i );
00766             }
00768             _const_rev_iterator& operator++()
00769             {
00770                 _seekRev( 1 );
00771                 return *this;
00772             }
00774             _const_rev_iterator operator++( int )
00775             {
00776                 _const_rev_iterator tmp( *this );
00777                 _seekRev( 1 );
00778                 return tmp;
00779             }
00780 
00782             _const_rev_iterator& operator--()
00783             {
00784                 _seekFwd( 1 );
00785                 return *this;
00786             }
00788             _const_rev_iterator operator--( int )
00789             {
00790                 _const_rev_iterator tmp( *this );
00791                 _seekFwd( 1 );
00792                 return tmp;
00793             }
00794 
00796             _const_rev_iterator operator+( size_type n )
00797             {
00798                 _const_rev_iterator tmp( *this );
00799                 tmp._seekRev( n );
00800                 return tmp;
00801             }
00803             _const_rev_iterator operator+( difference_type n )
00804             {
00805                 _const_rev_iterator tmp( *this );
00806                 if ( n < 0 )
00807                     tmp._seekFwd( -n );
00808                 else
00809                     tmp._seekRev( n );
00810                 return tmp;
00811             }
00813             _const_rev_iterator operator-( size_type n )
00814             {
00815                 _const_rev_iterator tmp( *this );
00816                 tmp._seekFwd( n );
00817                 return tmp;
00818             }
00820             _const_rev_iterator operator-( difference_type n )
00821             {
00822                 _const_rev_iterator tmp( *this );
00823                 if ( n < 0 )
00824                     tmp._seekRev( -n );
00825                 else
00826                     tmp._seekFwd( n );
00827                 return tmp;
00828             }
00829 
00831             _const_rev_iterator& operator+=( size_type n )
00832             {
00833                 _seekRev( n );
00834                 return *this;
00835             }
00837             _const_rev_iterator& operator+=( difference_type n )
00838             {
00839                 if ( n < 0 )
00840                     _seekFwd( -n );
00841                 else
00842                     _seekRev( n );
00843                 return *this;
00844             }
00846             _const_rev_iterator& operator-=( size_type n )
00847             {
00848                 _seekFwd( n );
00849                 return *this;
00850             }
00852             _const_rev_iterator& operator-=( difference_type n )
00853             {
00854                 if ( n < 0 )
00855                     _seekRev( -n );
00856                 else
00857                     _seekFwd( n );
00858                 return *this;
00859             }
00860 
00862             const value_type& operator*() const
00863             {
00864                 return mIter[-1];
00865             }
00866 
00868             const value_type& operator[]( size_type n ) const
00869             {
00870                 _const_rev_iterator tmp( *this );
00871                 tmp -= n;
00872                 return *tmp;
00873             }
00875             const value_type& operator[]( difference_type n ) const
00876             {
00877                 _const_rev_iterator tmp( *this );
00878                 tmp -= n;
00879                 return *tmp;
00880             }
00881 
00883             friend size_type operator-( const _const_rev_iterator& left, const _const_rev_iterator& right );
00885             friend bool operator==( const _const_rev_iterator& left, const _const_rev_iterator& right );
00887             friend bool operator!=( const _const_rev_iterator& left, const _const_rev_iterator& right );
00889             friend bool operator<( const _const_rev_iterator& left, const _const_rev_iterator& right );
00891             friend bool operator<=( const _const_rev_iterator& left, const _const_rev_iterator& right );
00893             friend bool operator>( const _const_rev_iterator& left, const _const_rev_iterator& right );
00895             friend bool operator>=( const _const_rev_iterator& left, const _const_rev_iterator& right );
00896         };
00897         //#########################################################################
00898 
00899         typedef _fwd_iterator iterator;                     
00900         typedef _rev_iterator reverse_iterator;             
00901         typedef _const_fwd_iterator const_iterator;         
00902         typedef _const_rev_iterator const_reverse_iterator; 
00903 
00904 
00906 
00907 
00908         UString()
00909         {
00910             _init();
00911         }
00913         UString( const UString& copy )
00914         {
00915             _init();
00916             mData = copy.mData;
00917         }
00919         UString( size_type length, const code_point& ch )
00920         {
00921             _init();
00922             assign( length, ch );
00923         }
00925         UString( const code_point* str )
00926         {
00927             _init();
00928             assign( str );
00929         }
00931         UString( const code_point* str, size_type length )
00932         {
00933             _init();
00934             assign( str, length );
00935         }
00937         UString( const UString& str, size_type index, size_type length )
00938         {
00939             _init();
00940             assign( str, index, length );
00941         }
00942 #if MYGUI_IS_NATIVE_WCHAR_T
00943 
00944         UString( const wchar_t* w_str )
00945         {
00946             _init();
00947             assign( w_str );
00948         }
00950         UString( const wchar_t* w_str, size_type length )
00951         {
00952             _init();
00953             assign( w_str, length );
00954         }
00955 #endif
00956 
00957         UString( const std::wstring& wstr )
00958         {
00959             _init();
00960             assign( wstr );
00961         }
00963         UString( const char* c_str )
00964         {
00965             _init();
00966             assign( c_str );
00967         }
00969         UString( const char* c_str, size_type length )
00970         {
00971             _init();
00972             assign( c_str, length );
00973         }
00975         UString( const std::string& str )
00976         {
00977             _init();
00978             assign( str );
00979         }
00981         ~UString()
00982         {
00983             _cleanBuffer();
00984         }
00986 
00988 
00990 
00991 
00992         size_type size() const
00993         {
00994             return mData.size();
00995         }
00997         size_type length() const
00998         {
00999             return size();
01000         }
01002 
01003         size_type length_Characters() const
01004         {
01005             const_iterator i = begin(), ie = end();
01006             size_type c = 0;
01007             while ( i != ie )
01008             {
01009                 i.moveNext();
01010                 ++c;
01011             }
01012             return c;
01013         }
01015         size_type max_size() const
01016         {
01017             return mData.max_size();
01018         }
01020         void reserve( size_type size )
01021         {
01022             mData.reserve( size );
01023         }
01025         void resize( size_type num, const code_point& val = 0 )
01026         {
01027             mData.resize( num, val );
01028         }
01030         void swap( UString& from )
01031         {
01032             mData.swap( from.mData );
01033         }
01035         bool empty() const
01036         {
01037             return mData.empty();
01038         }
01040         const code_point* c_str() const
01041         {
01042             return mData.c_str();
01043         }
01045         const code_point* data() const
01046         {
01047             return c_str();
01048         }
01050         size_type capacity() const
01051         {
01052             return mData.capacity();
01053         }
01055         void clear()
01056         {
01057             mData.clear();
01058         }
01060 
01061         UString substr( size_type index, size_type num = npos ) const
01062         {
01063             // this could avoid the extra copy if we used a private specialty constructor
01064             dstring data = mData.substr( index, num );
01065             UString tmp;
01066             tmp.mData.swap( data );
01067             return tmp;
01068         }
01070         void push_back( unicode_char val )
01071         {
01072             code_point cp[2];
01073             size_t c = _utf32_to_utf16( val, cp );
01074             if ( c > 0 ) push_back( cp[0] );
01075             if ( c > 1 ) push_back( cp[1] );
01076         }
01077 #if MYGUI_IS_NATIVE_WCHAR_T
01078 
01079         void push_back( wchar_t val )
01080         {
01081             // we do this because the Unicode method still preserves UTF-16 code points
01082             mData.push_back( static_cast<unicode_char>( val ) );
01083         }
01084 #endif
01085 
01086 
01088         void push_back( code_point val )
01089         {
01090             mData.push_back( val );
01091         }
01093 
01094         void push_back( char val )
01095         {
01096             mData.push_back( static_cast<code_point>( val ) );
01097         }
01099         bool inString( unicode_char ch ) const
01100         {
01101             const_iterator i, ie = end();
01102             for ( i = begin(); i != ie; i.moveNext() )
01103             {
01104                 if ( i.getCharacter() == ch )
01105                     return true;
01106             }
01107             return false;
01108         }
01110 
01112 
01114 
01115 
01116         const std::string& asUTF8() const
01117         {
01118             _load_buffer_UTF8();
01119             return *m_buffer.mStrBuffer;
01120         }
01122         const char* asUTF8_c_str() const
01123         {
01124             _load_buffer_UTF8();
01125             return m_buffer.mStrBuffer->c_str();
01126         }
01128         const utf32string& asUTF32() const
01129         {
01130             _load_buffer_UTF32();
01131             return *m_buffer.mUTF32StrBuffer;
01132         }
01134         const unicode_char* asUTF32_c_str() const
01135         {
01136             _load_buffer_UTF32();
01137             return m_buffer.mUTF32StrBuffer->c_str();
01138         }
01140         const std::wstring& asWStr() const
01141         {
01142             _load_buffer_WStr();
01143             return *m_buffer.mWStrBuffer;
01144         }
01146         const wchar_t* asWStr_c_str() const
01147         {
01148             _load_buffer_WStr();
01149             return m_buffer.mWStrBuffer->c_str();
01150         }
01152 
01154 
01156 
01157 
01158         code_point& at( size_type loc )
01159         {
01160             return mData.at( loc );
01161         }
01163         const code_point& at( size_type loc ) const
01164         {
01165             return mData.at( loc );
01166         }
01168 
01172         unicode_char getChar( size_type loc ) const
01173         {
01174             const code_point* ptr = c_str();
01175             unicode_char uc;
01176             size_t l = _utf16_char_length( ptr[loc] );
01177             code_point cp[2] = { /* blame the code beautifier */
01178                                    0, 0
01179                                };
01180             cp[0] = ptr[loc];
01181 
01182             if ( l == 2 && ( loc + 1 ) < mData.length() )
01183             {
01184                 cp[1] = ptr[loc+1];
01185             }
01186             _utf16_to_utf32( cp, uc );
01187             return uc;
01188         }
01190 
01198         int setChar( size_type loc, unicode_char ch )
01199         {
01200             code_point cp[2] = { /* blame the code beautifier */
01201                                    0, 0
01202                                };
01203             size_t l = _utf32_to_utf16( ch, cp );
01204             unicode_char existingChar = getChar( loc );
01205             size_t existingSize = _utf16_char_length( existingChar );
01206             size_t newSize = _utf16_char_length( ch );
01207 
01208             if ( newSize > existingSize )
01209             {
01210                 at( loc ) = cp[0];
01211                 insert( loc + 1, 1, cp[1] );
01212                 return 1;
01213             }
01214             if ( newSize < existingSize )
01215             {
01216                 erase( loc, 1 );
01217                 at( loc ) = cp[0];
01218                 return -1;
01219             }
01220 
01221             // newSize == existingSize
01222             at( loc ) = cp[0];
01223             if ( l == 2 ) at( loc + 1 ) = cp[1];
01224             return 0;
01225         }
01227 
01229 
01231 
01232 
01233         iterator begin()
01234         {
01235             iterator i;
01236             i.mIter = mData.begin();
01237             i.mString = this;
01238             return i;
01239         }
01241         const_iterator begin() const
01242         {
01243             const_iterator i;
01244             i.mIter = const_cast<UString*>( this )->mData.begin();
01245             i.mString = const_cast<UString*>( this );
01246             return i;
01247         }
01249         iterator end()
01250         {
01251             iterator i;
01252             i.mIter = mData.end();
01253             i.mString = this;
01254             return i;
01255         }
01257         const_iterator end() const
01258         {
01259             const_iterator i;
01260             i.mIter = const_cast<UString*>( this )->mData.end();
01261             i.mString = const_cast<UString*>( this );
01262             return i;
01263         }
01265         reverse_iterator rbegin()
01266         {
01267             reverse_iterator i;
01268             i.mIter = mData.end();
01269             i.mString = this;
01270             return i;
01271         }
01273         const_reverse_iterator rbegin() const
01274         {
01275             const_reverse_iterator i;
01276             i.mIter = const_cast<UString*>( this )->mData.end();
01277             i.mString = const_cast<UString*>( this );
01278             return i;
01279         }
01281         reverse_iterator rend()
01282         {
01283             reverse_iterator i;
01284             i.mIter = mData.begin();
01285             i.mString = this;
01286             return i;
01287         }
01289         const_reverse_iterator rend() const
01290         {
01291             const_reverse_iterator i;
01292             i.mIter = const_cast<UString*>( this )->mData.begin();
01293             i.mString = const_cast<UString*>( this );
01294             return i;
01295         }
01297 
01299 
01301 
01302 
01303         UString& assign( iterator start, iterator end )
01304         {
01305             mData.assign( start.mIter, end.mIter );
01306             return *this;
01307         }
01309         UString& assign( const UString& str )
01310         {
01311             mData.assign( str.mData );
01312             return *this;
01313         }
01315         UString& assign( const code_point* str )
01316         {
01317             mData.assign( str );
01318             return *this;
01319         }
01321         UString& assign( const code_point* str, size_type num )
01322         {
01323             mData.assign( str, num );
01324             return *this;
01325         }
01327         UString& assign( const UString& str, size_type index, size_type len )
01328         {
01329             mData.assign( str.mData, index, len );
01330             return *this;
01331         }
01333         UString& assign( size_type num, const code_point& ch )
01334         {
01335             mData.assign( num, ch );
01336             return *this;
01337         }
01339         UString& assign( const std::wstring& wstr )
01340         {
01341             mData.clear();
01342             mData.reserve( wstr.length() ); // best guess bulk allocate
01343 #ifdef WCHAR_UTF16 // if we're already working in UTF-16, this is easy
01344             code_point tmp;
01345             std::wstring::const_iterator i, ie = wstr.end();
01346             for ( i = wstr.begin(); i != ie; i++ )
01347             {
01348                 tmp = static_cast<code_point>( *i );
01349                 mData.push_back( tmp );
01350             }
01351 #else // otherwise we do it the safe way (which is still 100% safe to pass UTF-16 through, just slower)
01352             code_point cp[3] = { 0, 0, 0 };
01353             unicode_char tmp;
01354             std::wstring::const_iterator i, ie = wstr.end();
01355             for ( i = wstr.begin(); i != ie; i++ )
01356             {
01357                 tmp = static_cast<unicode_char>( *i );
01358                 size_t l = _utf32_to_utf16( tmp, cp );
01359                 if ( l > 0 ) mData.push_back( cp[0] );
01360                 if ( l > 1 ) mData.push_back( cp[1] );
01361             }
01362 #endif
01363             return *this;
01364         }
01365 #if MYGUI_IS_NATIVE_WCHAR_T
01366 
01367         UString& assign( const wchar_t* w_str )
01368         {
01369             std::wstring tmp;
01370             tmp.assign( w_str );
01371             return assign( tmp );
01372         }
01374         UString& assign( const wchar_t* w_str, size_type num )
01375         {
01376             std::wstring tmp;
01377             tmp.assign( w_str, num );
01378             return assign( tmp );
01379         }
01380 #endif
01381 
01382         UString& assign( const std::string& str )
01383         {
01384             size_type len = _verifyUTF8( str );
01385             clear(); // empty our contents, if there are any
01386             reserve( len ); // best guess bulk capacity growth
01387 
01388             // This is a 3 step process, converting each byte in the UTF-8 stream to UTF-32,
01389             // then converting it to UTF-16, then finally appending the data buffer
01390 
01391             unicode_char uc;          // temporary Unicode character buffer
01392             unsigned char utf8buf[7]; // temporary UTF-8 buffer
01393             utf8buf[6] = 0;
01394             size_t utf8len;           // UTF-8 length
01395             code_point utf16buff[3];  // temporary UTF-16 buffer
01396             utf16buff[2] = 0;
01397             size_t utf16len;          // UTF-16 length
01398 
01399             std::string::const_iterator i, ie = str.end();
01400             for ( i = str.begin(); i != ie; i++ )
01401             {
01402                 utf8len = _utf8_char_length( static_cast<unsigned char>( *i ) ); // estimate bytes to load
01403                 for ( size_t j = 0; j < utf8len; j++ )
01404                 { // load the needed UTF-8 bytes
01405                     utf8buf[j] = ( static_cast<unsigned char>( *( i + j ) ) ); // we don't increment 'i' here just in case the estimate is wrong (shouldn't happen, but we're being careful)
01406                 }
01407                 utf8buf[utf8len] = 0; // nul terminate so we throw an exception before running off the end of the buffer
01408                 utf8len = _utf8_to_utf32( utf8buf, uc ); // do the UTF-8 -> UTF-32 conversion
01409                 i += utf8len - 1; // we subtract 1 for the increment of the 'for' loop
01410 
01411                 utf16len = _utf32_to_utf16( uc, utf16buff ); // UTF-32 -> UTF-16 conversion
01412                 append( utf16buff, utf16len ); // append the characters to the string
01413             }
01414             return *this;
01415         }
01417         UString& assign( const char* c_str )
01418         {
01419             std::string tmp( c_str );
01420             return assign( tmp );
01421         }
01423         UString& assign( const char* c_str, size_type num )
01424         {
01425             std::string tmp;
01426             tmp.assign( c_str, num );
01427             return assign( tmp );
01428         }
01430 
01432 
01434 
01435 
01436         UString& append( const UString& str )
01437         {
01438             mData.append( str.mData );
01439             return *this;
01440         }
01442         UString& append( const code_point* str )
01443         {
01444             mData.append( str );
01445             return *this;
01446         }
01448         UString& append( const UString& str, size_type index, size_type len )
01449         {
01450             mData.append( str.mData, index, len );
01451             return *this;
01452         }
01454         UString& append( const code_point* str, size_type num )
01455         {
01456             mData.append( str, num );
01457             return *this;
01458         }
01460         UString& append( size_type num, code_point ch )
01461         {
01462             mData.append( num, ch );
01463             return *this;
01464         }
01466         UString& append( iterator start, iterator end )
01467         {
01468             mData.append( start.mIter, end.mIter );
01469             return *this;
01470         }
01471 #if MYGUI_IS_NATIVE_WCHAR_T
01472 
01473         UString& append( const wchar_t* w_str, size_type num )
01474         {
01475             std::wstring tmp( w_str, num );
01476             return append( tmp );
01477         }
01479         UString& append( size_type num, wchar_t ch )
01480         {
01481             return append( num, static_cast<unicode_char>( ch ) );
01482         }
01483 #endif
01484 
01485         UString& append( const char* c_str, size_type num )
01486         {
01487             UString tmp( c_str, num );
01488             append( tmp );
01489             return *this;
01490         }
01492         UString& append( size_type num, char ch )
01493         {
01494             append( num, static_cast<code_point>( ch ) );
01495             return *this;
01496         }
01498         UString& append( size_type num, unicode_char ch )
01499         {
01500             code_point cp[2] = { 0, 0 };
01501             if ( _utf32_to_utf16( ch, cp ) == 2 )
01502             {
01503                 for ( size_type i = 0; i < num; i++ )
01504                 {
01505                     append( 1, cp[0] );
01506                     append( 1, cp[1] );
01507                 }
01508             }
01509             else
01510             {
01511                 for ( size_type i = 0; i < num; i++ )
01512                 {
01513                     append( 1, cp[0] );
01514                 }
01515             }
01516             return *this;
01517         }
01519 
01521 
01523 
01524 
01525         iterator insert( iterator i, const code_point& ch )
01526         {
01527             iterator ret;
01528             ret.mIter = mData.insert( i.mIter, ch );
01529             ret.mString = this;
01530             return ret;
01531         }
01533         UString& insert( size_type index, const UString& str )
01534         {
01535             mData.insert( index, str.mData );
01536             return *this;
01537         }
01539         UString& insert( size_type index, const code_point* str )
01540         {
01541             mData.insert( index, str );
01542             return *this;
01543         }
01545         UString& insert( size_type index1, const UString& str, size_type index2, size_type num )
01546         {
01547             mData.insert( index1, str.mData, index2, num );
01548             return *this;
01549         }
01551         void insert( iterator i, iterator start, iterator end )
01552         {
01553             mData.insert( i.mIter, start.mIter, end.mIter );
01554         }
01556         UString& insert( size_type index, const code_point* str, size_type num )
01557         {
01558             mData.insert( index, str, num );
01559             return *this;
01560         }
01561 #if MYGUI_IS_NATIVE_WCHAR_T
01562 
01563         UString& insert( size_type index, const wchar_t* w_str, size_type num )
01564         {
01565             UString tmp( w_str, num );
01566             insert( index, tmp );
01567             return *this;
01568         }
01569 #endif
01570 
01571         UString& insert( size_type index, const char* c_str, size_type num )
01572         {
01573             UString tmp( c_str, num );
01574             insert( index, tmp );
01575             return *this;
01576         }
01578         UString& insert( size_type index, size_type num, code_point ch )
01579         {
01580             mData.insert( index, num, ch );
01581             return *this;
01582         }
01583 #if MYGUI_IS_NATIVE_WCHAR_T
01584 
01585         UString& insert( size_type index, size_type num, wchar_t ch )
01586         {
01587             insert( index, num, static_cast<unicode_char>( ch ) );
01588             return *this;
01589         }
01590 #endif
01591 
01592         UString& insert( size_type index, size_type num, char ch )
01593         {
01594             insert( index, num, static_cast<code_point>( ch ) );
01595             return *this;
01596         }
01598         UString& insert( size_type index, size_type num, unicode_char ch )
01599         {
01600             code_point cp[3] = { 0, 0, 0 };
01601             size_t l = _utf32_to_utf16( ch, cp );
01602             if ( l == 1 )
01603             {
01604                 return insert( index, num, cp[0] );
01605             }
01606             for ( size_type c = 0; c < num; c++ )
01607             {
01608                 // insert in reverse order to preserve ordering after insert
01609                 insert( index, 1, cp[1] );
01610                 insert( index, 1, cp[0] );
01611             }
01612             return *this;
01613         }
01615         void insert( iterator i, size_type num, const code_point& ch )
01616         {
01617             mData.insert( i.mIter, num, ch );
01618         }
01619 #if MYGUI_IS_NATIVE_WCHAR_T
01620 
01621         void insert( iterator i, size_type num, const wchar_t& ch )
01622         {
01623             insert( i, num, static_cast<unicode_char>( ch ) );
01624         }
01625 #endif
01626 
01627         void insert( iterator i, size_type num, const char& ch )
01628         {
01629             insert( i, num, static_cast<code_point>( ch ) );
01630         }
01632         void insert( iterator i, size_type num, const unicode_char& ch )
01633         {
01634             code_point cp[3] = { 0, 0, 0 };
01635             size_t l = _utf32_to_utf16( ch, cp );
01636             if ( l == 1 )
01637             {
01638                 insert( i, num, cp[0] );
01639             }
01640             else
01641             {
01642                 for ( size_type c = 0; c < num; c++ )
01643                 {
01644                     // insert in reverse order to preserve ordering after insert
01645                     insert( i, 1, cp[1] );
01646                     insert( i, 1, cp[0] );
01647                 }
01648             }
01649         }
01651 
01653 
01655 
01656 
01657         iterator erase( iterator loc )
01658         {
01659             iterator ret;
01660             ret.mIter = mData.erase( loc.mIter );
01661             ret.mString = this;
01662             return ret;
01663         }
01665         iterator erase( iterator start, iterator end )
01666         {
01667             iterator ret;
01668             ret.mIter = mData.erase( start.mIter, end.mIter );
01669             ret.mString = this;
01670             return ret;
01671         }
01673         UString& erase( size_type index = 0, size_type num = npos )
01674         {
01675             if ( num == npos )
01676                 mData.erase( index );
01677             else
01678                 mData.erase( index, num );
01679             return *this;
01680         }
01682 
01684 
01686 
01687 
01688         UString& replace( size_type index1, size_type num1, const UString& str )
01689         {
01690             mData.replace( index1, num1, str.mData, 0, npos );
01691             return *this;
01692         }
01694         UString& replace( size_type index1, size_type num1, const UString& str, size_type num2 )
01695         {
01696             mData.replace( index1, num1, str.mData, 0, num2 );
01697             return *this;
01698         }
01700         UString& replace( size_type index1, size_type num1, const UString& str, size_type index2, size_type num2 )
01701         {
01702             mData.replace( index1, num1, str.mData, index2, num2 );
01703             return *this;
01704         }
01706         UString& replace( iterator start, iterator end, const UString& str, size_type num = npos )
01707         {
01708             _const_fwd_iterator st(start); //Work around for gcc, allow it to find correct overload
01709 
01710             size_type index1 = begin() - st;
01711             size_type num1 = end - st;
01712             return replace( index1, num1, str, 0, num );
01713         }
01715         UString& replace( size_type index, size_type num1, size_type num2, code_point ch )
01716         {
01717             mData.replace( index, num1, num2, ch );
01718             return *this;
01719         }
01721         UString& replace( iterator start, iterator end, size_type num, code_point ch )
01722         {
01723             _const_fwd_iterator st(start); //Work around for gcc, allow it to find correct overload
01724 
01725             size_type index1 = begin() - st;
01726             size_type num1 = end - st;
01727             return replace( index1, num1, num, ch );
01728         }
01730 
01732 
01734 
01735 
01736         int compare( const UString& str ) const
01737         {
01738             return mData.compare( str.mData );
01739         }
01741         int compare( const code_point* str ) const
01742         {
01743             return mData.compare( str );
01744         }
01746         int compare( size_type index, size_type length, const UString& str ) const
01747         {
01748             return mData.compare( index, length, str.mData );
01749         }
01751         int compare( size_type index, size_type length, const UString& str, size_type index2, size_type length2 ) const
01752         {
01753             return mData.compare( index, length, str.mData, index2, length2 );
01754         }
01756         int compare( size_type index, size_type length, const code_point* str, size_type length2 ) const
01757         {
01758             return mData.compare( index, length, str, length2 );
01759         }
01760 #if MYGUI_IS_NATIVE_WCHAR_T
01761 
01762         int compare( size_type index, size_type length, const wchar_t* w_str, size_type length2 ) const
01763         {
01764             UString tmp( w_str, length2 );
01765             return compare( index, length, tmp );
01766         }
01767 #endif
01768 
01769         int compare( size_type index, size_type length, const char* c_str, size_type length2 ) const
01770         {
01771             UString tmp( c_str, length2 );
01772             return compare( index, length, tmp );
01773         }
01775 
01777 
01779 
01780 
01781 
01782         size_type find( const UString& str, size_type index = 0 ) const
01783         {
01784             return mData.find( str.c_str(), index );
01785         }
01787 
01788         size_type find( const code_point* cp_str, size_type index, size_type length ) const
01789         {
01790             UString tmp( cp_str );
01791             return mData.find( tmp.c_str(), index, length );
01792         }
01794 
01795         size_type find( const char* c_str, size_type index, size_type length ) const
01796         {
01797             UString tmp( c_str );
01798             return mData.find( tmp.c_str(), index, length );
01799         }
01800 #if MYGUI_IS_NATIVE_WCHAR_T
01801 
01802 
01803         size_type find( const wchar_t* w_str, size_type index, size_type length ) const
01804         {
01805             UString tmp( w_str );
01806             return mData.find( tmp.c_str(), index, length );
01807         }
01808 #endif
01809 
01810 
01811         size_type find( char ch, size_type index = 0 ) const
01812         {
01813             return find( static_cast<code_point>( ch ), index );
01814         }
01816 
01817         size_type find( code_point ch, size_type index = 0 ) const
01818         {
01819             return mData.find( ch, index );
01820         }
01821 #if MYGUI_IS_NATIVE_WCHAR_T
01822 
01823 
01824         size_type find( wchar_t ch, size_type index = 0 ) const
01825         {
01826             return find( static_cast<unicode_char>( ch ), index );
01827         }
01828 #endif
01829 
01830 
01831         size_type find( unicode_char ch, size_type index = 0 ) const
01832         {
01833             code_point cp[3] = { 0, 0, 0 };
01834             size_t l = _utf32_to_utf16( ch, cp );
01835             return find( UString( cp, l ), index );
01836         }
01837 
01839         size_type rfind( const UString& str, size_type index = 0 ) const
01840         {
01841             return mData.rfind( str.c_str(), index );
01842         }
01844         size_type rfind( const code_point* cp_str, size_type index, size_type num ) const
01845         {
01846             UString tmp( cp_str );
01847             return mData.rfind( tmp.c_str(), index, num );
01848         }
01850         size_type rfind( const char* c_str, size_type index, size_type num ) const
01851         {
01852             UString tmp( c_str );
01853             return mData.rfind( tmp.c_str(), index, num );
01854         }
01855 #if MYGUI_IS_NATIVE_WCHAR_T
01856 
01857         size_type rfind( const wchar_t* w_str, size_type index, size_type num ) const
01858         {
01859             UString tmp( w_str );
01860             return mData.rfind( tmp.c_str(), index, num );
01861         }
01862 #endif
01863 
01864         size_type rfind( char ch, size_type index = 0 ) const
01865         {
01866             return rfind( static_cast<code_point>( ch ), index );
01867         }
01869         size_type rfind( code_point ch, size_type index ) const
01870         {
01871             return mData.rfind( ch, index );
01872         }
01873 #if MYGUI_IS_NATIVE_WCHAR_T
01874 
01875         size_type rfind( wchar_t ch, size_type index = 0 ) const
01876         {
01877             return rfind( static_cast<unicode_char>( ch ), index );
01878         }
01879 #endif
01880 
01881         size_type rfind( unicode_char ch, size_type index = 0 ) const
01882         {
01883             code_point cp[3] = { 0, 0, 0 };
01884             size_t l = _utf32_to_utf16( ch, cp );
01885             return rfind( UString( cp, l ), index );
01886         }
01888 
01890 
01892 
01893 
01894         size_type find_first_of( const UString &str, size_type index = 0, size_type num = npos ) const
01895         {
01896             size_type i = 0;
01897             const size_type len = length();
01898             while ( i < num && ( index + i ) < len )
01899             {
01900                 unicode_char ch = getChar( index + i );
01901                 if ( str.inString( ch ) )
01902                     return index + i;
01903                 i += _utf16_char_length( ch ); // increment by the Unicode character length
01904             }
01905             return npos;
01906         }
01908         size_type find_first_of( code_point ch, size_type index = 0 ) const
01909         {
01910             UString tmp;
01911             tmp.assign( 1, ch );
01912             return find_first_of( tmp, index );
01913         }
01915         size_type find_first_of( char ch, size_type index = 0 ) const
01916         {
01917             return find_first_of( static_cast<code_point>( ch ), index );
01918         }
01919 #if MYGUI_IS_NATIVE_WCHAR_T
01920 
01921         size_type find_first_of( wchar_t ch, size_type index = 0 ) const
01922         {
01923             return find_first_of( static_cast<unicode_char>( ch ), index );
01924         }
01925 #endif
01926 
01927         size_type find_first_of( unicode_char ch, size_type index = 0 ) const
01928         {
01929             code_point cp[3] = { 0, 0, 0 };
01930             size_t l = _utf32_to_utf16( ch, cp );
01931             return find_first_of( UString( cp, l ), index );
01932         }
01933 
01935         size_type find_first_not_of( const UString& str, size_type index = 0, size_type num = npos ) const
01936         {
01937             size_type i = 0;
01938             const size_type len = length();
01939             while ( i < num && ( index + i ) < len )
01940             {
01941                 unicode_char ch = getChar( index + i );
01942                 if ( !str.inString( ch ) )
01943                     return index + i;
01944                 i += _utf16_char_length( ch ); // increment by the Unicode character length
01945             }
01946             return npos;
01947         }
01949         size_type find_first_not_of( code_point ch, size_type index = 0 ) const
01950         {
01951             UString tmp;
01952             tmp.assign( 1, ch );
01953             return find_first_not_of( tmp, index );
01954         }
01956         size_type find_first_not_of( char ch, size_type index = 0 ) const
01957         {
01958             return find_first_not_of( static_cast<code_point>( ch ), index );
01959         }
01960 #if MYGUI_IS_NATIVE_WCHAR_T
01961 
01962         size_type find_first_not_of( wchar_t ch, size_type index = 0 ) const
01963         {
01964             return find_first_not_of( static_cast<unicode_char>( ch ), index );
01965         }
01966 #endif
01967 
01968         size_type find_first_not_of( unicode_char ch, size_type index = 0 ) const
01969         {
01970             code_point cp[3] = { 0, 0, 0 };
01971             size_t l = _utf32_to_utf16( ch, cp );
01972             return find_first_not_of( UString( cp, l ), index );
01973         }
01974 
01976         size_type find_last_of( const UString& str, size_type index = npos, size_type num = npos ) const
01977         {
01978             size_type i = 0;
01979             const size_type len = length();
01980             if ( index > len ) index = len - 1;
01981 
01982             while ( i < num && ( index - i ) != npos )
01983             {
01984                 size_type j = index - i;
01985                 // careful to step full Unicode characters
01986                 if ( j != 0 && _utf16_surrogate_follow( at( j ) ) && _utf16_surrogate_lead( at( j - 1 ) ) )
01987                 {
01988                     j = index - ++i;
01989                 }
01990                 // and back to the usual dull test
01991                 unicode_char ch = getChar( j );
01992                 if ( str.inString( ch ) )
01993                     return j;
01994                 i++;
01995             }
01996             return npos;
01997         }
01999         size_type find_last_of( code_point ch, size_type index = npos ) const
02000         {
02001             UString tmp;
02002             tmp.assign( 1, ch );
02003             return find_last_of( tmp, index );
02004         }
02006         size_type find_last_of( char ch, size_type index = npos ) const
02007         {
02008             return find_last_of( static_cast<code_point>( ch ), index );
02009         }
02010 #if MYGUI_IS_NATIVE_WCHAR_T
02011 
02012         size_type find_last_of( wchar_t ch, size_type index = npos ) const
02013         {
02014             return find_last_of( static_cast<unicode_char>( ch ), index );
02015         }
02016 #endif
02017 
02018         size_type find_last_of( unicode_char ch, size_type index = npos ) const
02019         {
02020             code_point cp[3] = { 0, 0, 0 };
02021             size_t l = _utf32_to_utf16( ch, cp );
02022             return find_last_of( UString( cp, l ), index );
02023         }
02024 
02026         size_type find_last_not_of( const UString& str, size_type index = npos, size_type num = npos ) const
02027         {
02028             size_type i = 0;
02029             const size_type len = length();
02030             if ( index > len ) index = len - 1;
02031 
02032             while ( i < num && ( index - i ) != npos )
02033             {
02034                 size_type j = index - i;
02035                 // careful to step full Unicode characters
02036                 if ( j != 0 && _utf16_surrogate_follow( at( j ) ) && _utf16_surrogate_lead( at( j - 1 ) ) )
02037                 {
02038                     j = index - ++i;
02039                 }
02040                 // and back to the usual dull test
02041                 unicode_char ch = getChar( j );
02042                 if ( !str.inString( ch ) )
02043                     return j;
02044                 i++;
02045             }
02046             return npos;
02047         }
02049         size_type find_last_not_of( code_point ch, size_type index = npos ) const
02050         {
02051             UString tmp;
02052             tmp.assign( 1, ch );
02053             return find_last_not_of( tmp, index );
02054         }
02056         size_type find_last_not_of( char ch, size_type index = npos ) const
02057         {
02058             return find_last_not_of( static_cast<code_point>( ch ), index );
02059         }
02060 #if MYGUI_IS_NATIVE_WCHAR_T
02061 
02062         size_type find_last_not_of( wchar_t ch, size_type index = npos ) const
02063         {
02064             return find_last_not_of( static_cast<unicode_char>( ch ), index );
02065         }
02066 #endif
02067 
02068         size_type find_last_not_of( unicode_char ch, size_type index = npos ) const
02069         {
02070             code_point cp[3] = { 0, 0, 0 };
02071             size_t l = _utf32_to_utf16( ch, cp );
02072             return find_last_not_of( UString( cp, l ), index );
02073         }
02075 
02077 
02079 
02080 
02081         bool operator<( const UString& right ) const
02082         {
02083             return compare( right ) < 0;
02084         }
02086         bool operator<=( const UString& right ) const
02087         {
02088             return compare( right ) <= 0;
02089         }
02091         bool operator>( const UString& right ) const
02092         {
02093             return compare( right ) > 0;
02094         }
02096         bool operator>=( const UString& right ) const
02097         {
02098             return compare( right ) >= 0;
02099         }
02101         bool operator==( const UString& right ) const
02102         {
02103             return compare( right ) == 0;
02104         }
02106         bool operator!=( const UString& right ) const
02107         {
02108             return !operator==( right );
02109         }
02111         UString& operator=( const UString& s )
02112         {
02113             return assign( s );
02114         }
02116         UString& operator=( code_point ch )
02117         {
02118             clear();
02119             return append( 1, ch );
02120         }
02122         UString& operator=( char ch )
02123         {
02124             clear();
02125             return append( 1, ch );
02126         }
02127 #if MYGUI_IS_NATIVE_WCHAR_T
02128 
02129         UString& operator=( wchar_t ch )
02130         {
02131             clear();
02132             return append( 1, ch );
02133         }
02134 #endif
02135 
02136         UString& operator=( unicode_char ch )
02137         {
02138             clear();
02139             return append( 1, ch );
02140         }
02142         code_point& operator[]( size_type index )
02143         {
02144             return at( index );
02145         }
02147         const code_point& operator[]( size_type index ) const
02148         {
02149             return at( index );
02150         }
02152 
02154 
02156 
02157 
02158         operator std::string() const
02159         {
02160             return std::string( asUTF8() );
02161         }
02163         operator std::wstring() const
02164         {
02165             return std::wstring( asWStr() );
02166         }
02168 
02170 
02172 
02173 
02174         static bool _utf16_independent_char( code_point cp )
02175         {
02176             if ( 0xD800 <= cp && cp <= 0xDFFF ) // tests if the cp is within the surrogate pair range
02177                 return false; // it matches a surrogate pair signature
02178             return true; // everything else is a standalone code point
02179         }
02181         static bool _utf16_surrogate_lead( code_point cp )
02182         {
02183             if ( 0xD800 <= cp && cp <= 0xDBFF ) // tests if the cp is within the 2nd word of a surrogate pair
02184                 return true; // it is a 1st word
02185             return false; // it isn't
02186         }
02188         static bool _utf16_surrogate_follow( code_point cp )
02189         {
02190             if ( 0xDC00 <= cp && cp <= 0xDFFF ) // tests if the cp is within the 2nd word of a surrogate pair
02191                 return true; // it is a 2nd word
02192             return false; // everything else isn't
02193         }
02195         static size_t _utf16_char_length( code_point cp )
02196         {
02197             if ( 0xD800 <= cp && cp <= 0xDBFF ) // test if cp is the beginning of a surrogate pair
02198                 return 2; // if it is, then we are 2 words long
02199             return 1; // otherwise we are only 1 word long
02200         }
02202         static size_t _utf16_char_length( unicode_char uc )
02203         {
02204             if ( uc > 0xFFFF ) // test if uc is greater than the single word maximum
02205                 return 2; // if so, we need a surrogate pair
02206             return 1; // otherwise we can stuff it into a single word
02207         }
02209 
02213         static size_t _utf16_to_utf32( const code_point in_cp[2], unicode_char& out_uc )
02214         {
02215             const code_point& cp1 = in_cp[0];
02216             const code_point& cp2 = in_cp[1];
02217             bool wordPair = false;
02218 
02219             // does it look like a surrogate pair?
02220             if ( 0xD800 <= cp1 && cp1 <= 0xDBFF )
02221             {
02222                 // looks like one, but does the other half match the algorithm as well?
02223                 if ( 0xDC00 <= cp2 && cp2 <= 0xDFFF )
02224                     wordPair = true; // yep!
02225             }
02226 
02227             if ( !wordPair )
02228             { // if we aren't a 100% authentic surrogate pair, then just copy the value
02229                 out_uc = cp1;
02230                 return 1;
02231             }
02232 
02233             unsigned short cU = cp1, cL = cp2; // copy upper and lower words of surrogate pair to writable buffers
02234             cU -= 0xD800; // remove the encoding markers
02235             cL -= 0xDC00;
02236 
02237             out_uc = ( cU & 0x03FF ) << 10; // grab the 10 upper bits and set them in their proper location
02238             out_uc |= ( cL & 0x03FF ); // combine in the lower 10 bits
02239             out_uc += 0x10000; // add back in the value offset
02240 
02241             return 2; // this whole operation takes to words, so that's what we'll return
02242         }
02244 
02249         static size_t _utf32_to_utf16( const unicode_char& in_uc, code_point out_cp[2] )
02250         {
02251             if ( in_uc <= 0xFFFF )
02252             { // we blindly preserve sentinel values because our decoder understands them
02253                 out_cp[0] = in_uc;
02254                 return 1;
02255             }
02256             unicode_char uc = in_uc; // copy to writable buffer
02257             unsigned short tmp; // single code point buffer
02258             uc -= 0x10000; // subtract value offset
02259 
02260             //process upper word
02261             tmp = ( uc >> 10 ) & 0x03FF; // grab the upper 10 bits
02262             tmp += 0xD800; // add encoding offset
02263             out_cp[0] = tmp; // write
02264 
02265             // process lower word
02266             tmp = uc & 0x03FF; // grab the lower 10 bits
02267             tmp += 0xDC00; // add encoding offset
02268             out_cp[1] = tmp; // write
02269 
02270             return 2; // return used word count (2 for surrogate pairs)
02271         }
02273 
02275 
02277 
02278 
02279         static bool _utf8_start_char( unsigned char cp )
02280         {
02281             return ( cp & ~_cont_mask ) != _cont;
02282         }
02284         static size_t _utf8_char_length( unsigned char cp )
02285         {
02286             if ( !( cp & 0x80 ) ) return 1;
02287             if (( cp & ~_lead1_mask ) == _lead1 ) return 2;
02288             if (( cp & ~_lead2_mask ) == _lead2 ) return 3;
02289             if (( cp & ~_lead3_mask ) == _lead3 ) return 4;
02290             if (( cp & ~_lead4_mask ) == _lead4 ) return 5;
02291             if (( cp & ~_lead5_mask ) == _lead5 ) return 6;
02292             throw invalid_data( "invalid UTF-8 sequence header value" );
02293         }
02295         static size_t _utf8_char_length( unicode_char uc )
02296         {
02297             /*
02298             7 bit:  U-00000000 - U-0000007F: 0xxxxxxx
02299             11 bit: U-00000080 - U-000007FF: 110xxxxx 10xxxxxx
02300             16 bit: U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
02301             21 bit: U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
02302             26 bit: U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
02303             31 bit: U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
02304             */
02305             if ( !( uc & ~0x0000007F ) ) return 1;
02306             if ( !( uc & ~0x000007FF ) ) return 2;
02307             if ( !( uc & ~0x0000FFFF ) ) return 3;
02308             if ( !( uc & ~0x001FFFFF ) ) return 4;
02309             if ( !( uc & ~0x03FFFFFF ) ) return 5;
02310             if ( !( uc & ~0x7FFFFFFF ) ) return 6;
02311             throw invalid_data( "invalid UTF-32 value" );
02312         }
02313 
02315         static size_t _utf8_to_utf32( const unsigned char in_cp[6], unicode_char& out_uc )
02316         {
02317             size_t len = _utf8_char_length( in_cp[0] );
02318             if ( len == 1 )
02319             { // if we are only 1 byte long, then just grab it and exit
02320                 out_uc = in_cp[0];
02321                 return 1;
02322             }
02323 
02324             unicode_char c = 0; // temporary buffer
02325             size_t i = 0;
02326             switch ( len )
02327             { // load header byte
02328             case 6:
02329                 c = in_cp[i] & _lead5_mask;
02330                 break;
02331             case 5:
02332                 c = in_cp[i] & _lead4_mask;
02333                 break;
02334             case 4:
02335                 c = in_cp[i] & _lead3_mask;
02336                 break;
02337             case 3:
02338                 c = in_cp[i] & _lead2_mask;
02339                 break;
02340             case 2:
02341                 c = in_cp[i] & _lead1_mask;
02342                 break;
02343             }
02344 
02345             for ( ++i; i < len; i++ )
02346             { // load each continuation byte
02347                 if (( in_cp[i] & ~_cont_mask ) != _cont )
02348                     throw invalid_data( "bad UTF-8 continuation byte" );
02349                 c <<= 6;
02350                 c |= ( in_cp[i] & _cont_mask );
02351             }
02352 
02353             out_uc = c; // write the final value and return the used byte length
02354             return len;
02355         }
02357         static size_t _utf32_to_utf8( const unicode_char& in_uc, unsigned char out_cp[6] )
02358         {
02359             size_t len = _utf8_char_length( in_uc ); // predict byte length of sequence
02360             unicode_char c = in_uc; // copy to temp buffer
02361 
02362             //stuff all of the lower bits
02363             for ( size_t i = len - 1; i > 0; i-- )
02364             {
02365                 out_cp[i] = (( c ) & _cont_mask ) | _cont;
02366                 c >>= 6;
02367             }
02368 
02369             //now write the header byte
02370             switch ( len )
02371             {
02372             case 6:
02373                 out_cp[0] = (( c ) & _lead5_mask ) | _lead5;
02374                 break;
02375             case 5:
02376                 out_cp[0] = (( c ) & _lead4_mask ) | _lead4;
02377                 break;
02378             case 4:
02379                 out_cp[0] = (( c ) & _lead3_mask ) | _lead3;
02380                 break;
02381             case 3:
02382                 out_cp[0] = (( c ) & _lead2_mask ) | _lead2;
02383                 break;
02384             case 2:
02385                 out_cp[0] = (( c ) & _lead1_mask ) | _lead1;
02386                 break;
02387             case 1:
02388             default:
02389                 out_cp[0] = ( c ) & 0x7F;
02390                 break;
02391             }
02392 
02393             // return the byte length of the sequence
02394             return len;
02395         }
02396 
02398         static size_type _verifyUTF8( const unsigned char* c_str )
02399         {
02400             std::string tmp( reinterpret_cast<const char*>( c_str ) );
02401             return _verifyUTF8( tmp );
02402         }
02404         static size_type _verifyUTF8( const std::string& str )
02405         {
02406             std::string::const_iterator i, ie = str.end();
02407             i = str.begin();
02408             size_type length = 0;
02409 
02410             while ( i != ie )
02411             {
02412                 // characters pass until we find an extended sequence
02413                 if (( *i ) & 0x80 )
02414                 {
02415                     unsigned char c = ( *i );
02416                     size_t contBytes = 0;
02417 
02418                     // get continuation byte count and test for overlong sequences
02419                     if (( c & ~_lead1_mask ) == _lead1 )
02420                     { // 1 additional byte
02421                         if ( c == _lead1 ) throw invalid_data( "overlong UTF-8 sequence" );
02422                         contBytes = 1;
02423 
02424                     }
02425                     else if (( c & ~_lead2_mask ) == _lead2 )
02426                     { // 2 additional bytes
02427                         contBytes = 2;
02428                         if ( c == _lead2 )
02429                         { // possible overlong UTF-8 sequence
02430                             c = ( *( i + 1 ) ); // look ahead to next byte in sequence
02431                             if (( c & _lead2 ) == _cont ) throw invalid_data( "overlong UTF-8 sequence" );
02432                         }
02433 
02434                     }
02435                     else if (( c & ~_lead3_mask ) == _lead3 )
02436                     { // 3 additional bytes
02437                         contBytes = 3;
02438                         if ( c == _lead3 )
02439                         { // possible overlong UTF-8 sequence
02440                             c = ( *( i + 1 ) ); // look ahead to next byte in sequence
02441                             if (( c & _lead3 ) == _cont ) throw invalid_data( "overlong UTF-8 sequence" );
02442                         }
02443 
02444                     }
02445                     else if (( c & ~_lead4_mask ) == _lead4 )
02446                     { // 4 additional bytes
02447                         contBytes = 4;
02448                         if ( c == _lead4 )
02449                         { // possible overlong UTF-8 sequence
02450                             c = ( *( i + 1 ) ); // look ahead to next byte in sequence
02451                             if (( c & _lead4 ) == _cont ) throw invalid_data( "overlong UTF-8 sequence" );
02452                         }
02453 
02454                     }
02455                     else if (( c & ~_lead5_mask ) == _lead5 )
02456                     { // 5 additional bytes
02457                         contBytes = 5;
02458                         if ( c == _lead5 )
02459                         { // possible overlong UTF-8 sequence
02460                             c = ( *( i + 1 ) ); // look ahead to next byte in sequence
02461                             if (( c & _lead5 ) == _cont ) throw invalid_data( "overlong UTF-8 sequence" );
02462                         }
02463                     }
02464 
02465                     // check remaining continuation bytes for
02466                     while ( contBytes-- )
02467                     {
02468                         c = ( *( ++i ) ); // get next byte in sequence
02469                         if (( c & ~_cont_mask ) != _cont )
02470                             throw invalid_data( "bad UTF-8 continuation byte" );
02471                     }
02472                 }
02473                 length++;
02474                 i++;
02475             }
02476             return length;
02477         }
02479 
02480     private:
02481         //template<class ITER_TYPE> friend class _iterator;
02482         dstring mData;
02483 
02485         enum BufferType
02486         {
02487             bt_none,
02488             bt_string,
02489             bt_wstring,
02490             bt_utf32string
02491         };
02492 
02494         void _init()
02495         {
02496             m_buffer.mVoidBuffer = 0;
02497             m_bufferType = bt_none;
02498             m_bufferSize = 0;
02499         }
02500 
02502         // Scratch buffer
02504         void _cleanBuffer() const
02505         {
02506             if ( m_buffer.mVoidBuffer != 0 )
02507             {
02508                 switch ( m_bufferType )
02509                 {
02510                 case bt_string:
02511                     delete m_buffer.mStrBuffer;
02512                     break;
02513                 case bt_wstring:
02514                     delete m_buffer.mWStrBuffer;
02515                     break;
02516                 case bt_utf32string:
02517                     delete m_buffer.mUTF32StrBuffer;
02518                     break;
02519                 case bt_none: // under the worse of circumstances, this is all we can do, and hope it works out
02520                 default:
02521                     //delete m_buffer.mVoidBuffer;
02522                     // delete void* is undefined, don't do that
02523                     MYGUI_ASSERT(false, "This should never happen - mVoidBuffer should never contain something if we "
02524                         "don't know the type");
02525                     break;
02526                 }
02527                 m_buffer.mVoidBuffer = 0;
02528                 m_bufferSize = 0;
02529             }
02530         }
02531 
02533         void _getBufferStr() const
02534         {
02535             if ( m_bufferType != bt_string )
02536             {
02537                 _cleanBuffer();
02538                 m_buffer.mStrBuffer = new std::string();
02539                 m_bufferType = bt_string;
02540             }
02541             m_buffer.mStrBuffer->clear();
02542         }
02544         void _getBufferWStr() const
02545         {
02546             if ( m_bufferType != bt_wstring )
02547             {
02548                 _cleanBuffer();
02549                 m_buffer.mWStrBuffer = new std::wstring();
02550                 m_bufferType = bt_wstring;
02551             }
02552             m_buffer.mWStrBuffer->clear();
02553         }
02555         void _getBufferUTF32Str() const
02556         {
02557             if ( m_bufferType != bt_utf32string )
02558             {
02559                 _cleanBuffer();
02560                 m_buffer.mUTF32StrBuffer = new utf32string();
02561                 m_bufferType = bt_utf32string;
02562             }
02563             m_buffer.mUTF32StrBuffer->clear();
02564         }
02565 
02566         void _load_buffer_UTF8() const
02567         {
02568             _getBufferStr();
02569             std::string& buffer = ( *m_buffer.mStrBuffer );
02570             buffer.reserve( length() );
02571 
02572             unsigned char utf8buf[6];
02573             char* charbuf = ( char* )utf8buf;
02574             unicode_char c;
02575             size_t len;
02576 
02577             const_iterator i, ie = end();
02578             for ( i = begin(); i != ie; i.moveNext() )
02579             {
02580                 c = i.getCharacter();
02581                 len = _utf32_to_utf8( c, utf8buf );
02582                 size_t j = 0;
02583                 while ( j < len )
02584                     buffer.push_back( charbuf[j++] );
02585             }
02586         }
02587         void _load_buffer_WStr() const
02588         {
02589             _getBufferWStr();
02590             std::wstring& buffer = ( *m_buffer.mWStrBuffer );
02591             buffer.reserve( length() ); // may over reserve, but should be close enough
02592 #ifdef WCHAR_UTF16 // wchar_t matches UTF-16
02593             const_iterator i, ie = end();
02594             for ( i = begin(); i != ie; ++i )
02595             {
02596                 buffer.push_back(( wchar_t )( *i ) );
02597             }
02598 #else // wchar_t fits UTF-32
02599             unicode_char c;
02600             const_iterator i, ie = end();
02601             for ( i = begin(); i != ie; i.moveNext() )
02602             {
02603                 c = i.getCharacter();
02604                 buffer.push_back(( wchar_t )c );
02605             }
02606 #endif
02607         }
02608         void _load_buffer_UTF32() const
02609         {
02610             _getBufferUTF32Str();
02611             utf32string& buffer = ( *m_buffer.mUTF32StrBuffer );
02612             buffer.reserve( length() ); // may over reserve, but should be close enough
02613 
02614             unicode_char c;
02615 
02616             const_iterator i, ie = end();
02617             for ( i = begin(); i != ie; i.moveNext() )
02618             {
02619                 c = i.getCharacter();
02620                 buffer.push_back( c );
02621             }
02622         }
02623 
02624         mutable BufferType m_bufferType; // identifies the data type held in m_buffer
02625         mutable size_t m_bufferSize; // size of the CString buffer
02626 
02627         // multi-purpose buffer used everywhere we need a throw-away buffer
02628         union
02629         {
02630             mutable void* mVoidBuffer;
02631             mutable std::string* mStrBuffer;
02632             mutable std::wstring* mWStrBuffer;
02633             mutable utf32string* mUTF32StrBuffer;
02634         }
02635         m_buffer;
02636     };
02637 
02639     inline UString operator+( const UString& s1, const UString& s2 )
02640     {
02641         return UString( s1 ).append( s2 );
02642     }
02644     inline UString operator+( const UString& s1, UString::code_point c )
02645     {
02646         return UString( s1 ).append( 1, c );
02647     }
02649     inline UString operator+( const UString& s1, UString::unicode_char c )
02650     {
02651         return UString( s1 ).append( 1, c );
02652     }
02654     inline UString operator+( const UString& s1, char c )
02655     {
02656         return UString( s1 ).append( 1, c );
02657     }
02658 #if MYGUI_IS_NATIVE_WCHAR_T
02659 
02660     inline UString operator+( const UString& s1, wchar_t c )
02661     {
02662         return UString( s1 ).append( 1, c );
02663     }
02664 #endif
02665 
02666     inline UString operator+( UString::code_point c, const UString& s2 )
02667     {
02668         return UString().append( 1, c ).append( s2 );
02669     }
02671     inline UString operator+( UString::unicode_char c, const UString& s2 )
02672     {
02673         return UString().append( 1, c ).append( s2 );
02674     }
02676     inline UString operator+( char c, const UString& s2 )
02677     {
02678         return UString().append( 1, c ).append( s2 );
02679     }
02680 #if MYGUI_IS_NATIVE_WCHAR_T
02681 
02682     inline UString operator+( wchar_t c, const UString& s2 )
02683     {
02684         return UString().append( 1, c ).append( s2 );
02685     }
02686 #endif
02687 
02688     // (const) forward iterator common operators
02689     inline UString::size_type operator-( const UString::_const_fwd_iterator& left, const UString::_const_fwd_iterator& right )
02690     {
02691         return ( left.mIter - right.mIter );
02692     }
02693     inline bool operator==( const UString::_const_fwd_iterator& left, const UString::_const_fwd_iterator& right )
02694     {
02695         return left.mIter == right.mIter;
02696     }
02697     inline bool operator!=( const UString::_const_fwd_iterator& left, const UString::_const_fwd_iterator& right )
02698     {
02699         return left.mIter != right.mIter;
02700     }
02701     inline bool operator<( const UString::_const_fwd_iterator& left, const UString::_const_fwd_iterator& right )
02702     {
02703         return left.mIter < right.mIter;
02704     }
02705     inline bool operator<=( const UString::_const_fwd_iterator& left, const UString::_const_fwd_iterator& right )
02706     {
02707         return left.mIter <= right.mIter;
02708     }
02709     inline bool operator>( const UString::_const_fwd_iterator& left, const UString::_const_fwd_iterator& right )
02710     {
02711         return left.mIter > right.mIter;
02712     }
02713     inline bool operator>=( const UString::_const_fwd_iterator& left, const UString::_const_fwd_iterator& right )
02714     {
02715         return left.mIter >= right.mIter;
02716     }
02717 
02718     // (const) reverse iterator common operators
02719     // NB: many of these operations are evaluated in reverse because this is a reverse iterator wrapping a forward iterator
02720     inline UString::size_type operator-( const UString::_const_rev_iterator& left, const UString::_const_rev_iterator& right )
02721     {
02722         return ( right.mIter - left.mIter );
02723     }
02724     inline bool operator==( const UString::_const_rev_iterator& left, const UString::_const_rev_iterator& right )
02725     {
02726         return left.mIter == right.mIter;
02727     }
02728     inline bool operator!=( const UString::_const_rev_iterator& left, const UString::_const_rev_iterator& right )
02729     {
02730         return left.mIter != right.mIter;
02731     }
02732     inline bool operator<( const UString::_const_rev_iterator& left, const UString::_const_rev_iterator& right )
02733     {
02734         return right.mIter < left.mIter;
02735     }
02736     inline bool operator<=( const UString::_const_rev_iterator& left, const UString::_const_rev_iterator& right )
02737     {
02738         return right.mIter <= left.mIter;
02739     }
02740     inline bool operator>( const UString::_const_rev_iterator& left, const UString::_const_rev_iterator& right )
02741     {
02742         return right.mIter > left.mIter;
02743     }
02744     inline bool operator>=( const UString::_const_rev_iterator& left, const UString::_const_rev_iterator& right )
02745     {
02746         return right.mIter >= left.mIter;
02747     }
02748 
02750     inline std::ostream& operator << ( std::ostream& os, const UString& s )
02751     {
02752         return os << s.asUTF8();
02753     }
02754 
02756     //inline std::wostream& operator << ( std::wostream& os, const UString& s )
02757     //{
02758     //  return os << s.asWStr();
02759     //}
02760 
02761 
02762 
02763 }
02764 
02765 #endif // __MYGUI_U_STRING_H__

Generated on Sun Jan 30 2011 for MyGUI by  doxygen 1.7.1