31 #if defined( DEBUG_PARSER )
32 # if defined( DEBUG ) && defined( _MSC_VER )
34 # define TIXML_LOG OutputDebugString
36 # define TIXML_LOG printf
43 TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] =
48 {
""", 6,
'\"' },
62 const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
63 const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
64 const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
66 const int TiXmlBase::utf8ByteTable[256] =
69 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
70 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
71 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
72 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
73 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
74 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
75 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
76 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
77 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
78 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
79 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
80 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
81 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
82 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
83 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
84 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
88 void TiXmlBase::ConvertUTF32ToUTF8(
unsigned long input,
char* output,
int* length )
90 const unsigned long BYTE_MASK = 0xBF;
91 const unsigned long BYTE_MARK = 0x80;
92 const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
96 else if ( input < 0x800 )
98 else if ( input < 0x10000 )
100 else if ( input < 0x200000 )
103 { *length = 0;
return; }
112 *output = (char)((input | BYTE_MARK) & BYTE_MASK);
116 *output = (char)((input | BYTE_MARK) & BYTE_MASK);
120 *output = (char)((input | BYTE_MARK) & BYTE_MASK);
124 *output = (char)(input | FIRST_BYTE_MARK[*length]);
129 int TiXmlBase::IsAlpha(
unsigned char anyByte, TiXmlEncoding )
139 return isalpha( anyByte );
150 int TiXmlBase::IsAlphaNum(
unsigned char anyByte, TiXmlEncoding )
160 return isalnum( anyByte );
171 class TiXmlParsingData
175 void Stamp(
const char* now, TiXmlEncoding encoding );
177 const TiXmlCursor& Cursor() {
return cursor; }
181 TiXmlParsingData(
const char* start,
int _tabsize,
int row,
int col )
196 void TiXmlParsingData::Stamp(
const char* now, TiXmlEncoding encoding )
207 int row = cursor.row;
208 int col = cursor.col;
209 const char* p = stamp;
215 const unsigned char* pU = (
const unsigned char*)p;
258 col = (col / tabsize + 1) * tabsize;
261 case TIXML_UTF_LEAD_0:
262 if ( encoding == TIXML_ENCODING_UTF8 )
264 if ( *(p+1) && *(p+2) )
268 if ( *(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2 )
270 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbeU )
272 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbfU )
286 if ( encoding == TIXML_ENCODING_UTF8 )
289 int step = TiXmlBase::utf8ByteTable[*((
const unsigned char*)p)];
307 assert( cursor.row >= -1 );
308 assert( cursor.col >= -1 );
314 const char* TiXmlBase::SkipWhiteSpace(
const char* p, TiXmlEncoding encoding )
320 if ( encoding == TIXML_ENCODING_UTF8 )
324 const unsigned char* pU = (
const unsigned char*)p;
327 if ( *(pU+0)==TIXML_UTF_LEAD_0
328 && *(pU+1)==TIXML_UTF_LEAD_1
329 && *(pU+2)==TIXML_UTF_LEAD_2 )
334 else if(*(pU+0)==TIXML_UTF_LEAD_0
341 else if(*(pU+0)==TIXML_UTF_LEAD_0
349 if ( IsWhiteSpace( *p ) || *p ==
'\n' || *p ==
'\r' )
357 while ( *p && IsWhiteSpace( *p ) || *p ==
'\n' || *p ==
'\r' )
365 bool TiXmlBase::StreamWhiteSpace( std::istream * in, TIXML_STRING * tag )
369 if ( !in->good() )
return false;
373 if ( !IsWhiteSpace( c ) || c <= 0 )
376 *tag += (char) in->get();
380 bool TiXmlBase::StreamTo( std::istream * in,
int character, TIXML_STRING * tag )
386 if ( c == character )
401 const char* TiXmlBase::ReadName(
const char* p, TIXML_STRING * name, TiXmlEncoding encoding )
417 && ( IsAlpha( (
unsigned char) *p, encoding ) || *p ==
'_' ) )
419 const char* start = p;
421 && ( IsAlphaNum( (
unsigned char ) *p, encoding )
431 name->assign( start, p-start );
438 const char* TiXmlBase::GetEntity(
const char* p,
char* value,
int* length, TiXmlEncoding encoding )
445 if ( *(p+1) && *(p+1) ==
'#' && *(p+2) )
447 unsigned long ucs = 0;
454 if ( !*(p+3) )
return 0;
457 q = strchr( q,
';' );
459 if ( !q || !*q )
return 0;
466 if ( *q >=
'0' && *q <=
'9' )
467 ucs += mult * (*q -
'0');
468 else if ( *q >=
'a' && *q <=
'f' )
469 ucs += mult * (*q -
'a' + 10);
470 else if ( *q >=
'A' && *q <=
'F' )
471 ucs += mult * (*q -
'A' + 10 );
481 if ( !*(p+2) )
return 0;
484 q = strchr( q,
';' );
486 if ( !q || !*q )
return 0;
493 if ( *q >=
'0' && *q <=
'9' )
494 ucs += mult * (*q -
'0');
501 if ( encoding == TIXML_ENCODING_UTF8 )
504 ConvertUTF32ToUTF8( ucs, value, length );
511 return p + delta + 1;
515 for( i=0; i<NUM_ENTITY; ++i )
517 if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )
519 assert( strlen( entity[i].str ) == entity[i].strLength );
520 *value = entity[i].chr;
522 return ( p + entity[i].strLength );
534 bool TiXmlBase::StringEqual(
const char* p,
537 TiXmlEncoding encoding )
551 while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) )
562 while ( *q && *tag && *q == *tag )
574 const char* TiXmlBase::ReadText(
const char* p,
578 bool caseInsensitive,
579 TiXmlEncoding encoding )
583 || !condenseWhiteSpace )
587 && !StringEqual( p, endTag, caseInsensitive, encoding )
591 char cArr[4] = { 0, 0, 0, 0 };
592 p = GetChar( p, cArr, &len, encoding );
593 text->append( cArr, len );
598 bool whitespace =
false;
601 p = SkipWhiteSpace( p, encoding );
603 && !StringEqual( p, endTag, caseInsensitive, encoding ) )
605 if ( *p ==
'\r' || *p ==
'\n' )
610 else if ( IsWhiteSpace( *p ) )
625 char cArr[4] = { 0, 0, 0, 0 };
626 p = GetChar( p, cArr, &len, encoding );
630 text->append( cArr, len );
635 p += strlen( endTag );
641 void TiXmlDocument::StreamIn( std::istream * in, TIXML_STRING * tag )
650 if ( !StreamTo( in,
'<', tag ) )
652 SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
658 int tagIndex = (int) tag->length();
659 while ( in->good() && in->peek() !=
'>' )
664 SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
675 TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING );
679 node->StreamIn( in, tag );
693 SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
699 SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
713 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
723 location.row = prevData->cursor.row;
724 location.col = prevData->cursor.col;
731 TiXmlParsingData data( p, TabSize(), location.row, location.col );
732 location = data.Cursor();
734 if ( encoding == TIXML_ENCODING_UNKNOWN )
737 const unsigned char* pU = (
const unsigned char*)p;
738 if ( *(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0
739 && *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1
740 && *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2 )
742 encoding = TIXML_ENCODING_UTF8;
743 useMicrosoftBOM =
true;
747 p = SkipWhiteSpace( p, encoding );
750 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
756 TiXmlNode* node = Identify( p, encoding );
759 p = node->Parse( p, &data, encoding );
768 if ( encoding == TIXML_ENCODING_UNKNOWN
776 encoding = TIXML_ENCODING_UTF8;
777 else if ( StringEqual( enc,
"UTF-8",
true, TIXML_ENCODING_UNKNOWN ) )
778 encoding = TIXML_ENCODING_UTF8;
779 else if ( StringEqual( enc,
"UTF8",
true, TIXML_ENCODING_UNKNOWN ) )
780 encoding = TIXML_ENCODING_UTF8;
782 encoding = TIXML_ENCODING_LEGACY;
785 p = SkipWhiteSpace( p, encoding );
790 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding );
798 void TiXmlDocument::SetError(
int err,
const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding )
804 assert( err > 0 && err < TIXML_ERROR_STRING_COUNT );
807 errorDesc = errorString[ errorId ];
809 errorLocation.Clear();
810 if ( pError && data )
812 data->Stamp( pError, encoding );
813 errorLocation = data->Cursor();
818 TiXmlNode* TiXmlNode::Identify(
const char* p, TiXmlEncoding encoding )
822 p = SkipWhiteSpace( p, encoding );
823 if( !p || !*p || *p !=
'<' )
829 p = SkipWhiteSpace( p, encoding );
843 const char* xmlHeader = {
"<?xml" };
844 const char* commentHeader = {
"<!--" };
845 const char* dtdHeader = {
"<!" };
846 const char* cdataHeader = {
"<![CDATA[" };
848 if ( StringEqual( p, xmlHeader,
true, encoding ) )
851 TIXML_LOG(
"XML parsing Declaration\n" );
855 else if ( StringEqual( p, commentHeader,
false, encoding ) )
858 TIXML_LOG(
"XML parsing Comment\n" );
862 else if ( StringEqual( p, cdataHeader,
false, encoding ) )
865 TIXML_LOG(
"XML parsing CDATA\n" );
871 else if ( StringEqual( p, dtdHeader,
false, encoding ) )
874 TIXML_LOG(
"XML parsing Unknown(1)\n" );
878 else if ( IsAlpha( *(p+1), encoding )
882 TIXML_LOG(
"XML parsing Element\n" );
889 TIXML_LOG(
"XML parsing Unknown(2)\n" );
897 returnNode->parent =
this;
902 doc->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, TIXML_ENCODING_UNKNOWN );
909 void TiXmlElement::StreamIn (std::istream * in, TIXML_STRING * tag)
920 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
929 if ( tag->length() < 3 )
return;
934 if ( tag->at( tag->length() - 1 ) ==
'>'
935 && tag->at( tag->length() - 2 ) ==
'/' )
940 else if ( tag->at( tag->length() - 1 ) ==
'>' )
949 StreamWhiteSpace( in, tag );
952 if ( in->good() && in->peek() !=
'<' )
956 text.StreamIn( in, tag );
965 if ( !in->good() )
return;
966 assert( in->peek() ==
'<' );
967 int tagIndex = (int) tag->length();
969 bool closingTag =
false;
970 bool firstCharFound =
false;
982 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
993 if ( c ==
'[' && tag->size() >= 9 )
995 size_t len = tag->size();
996 const char* start = tag->c_str() + len - 9;
997 if ( strcmp( start,
"<![CDATA[" ) == 0 ) {
998 assert( !closingTag );
1003 if ( !firstCharFound && c !=
'<' && !IsWhiteSpace( c ) )
1005 firstCharFound =
true;
1022 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1034 const char* tagloc = tag->c_str() + tagIndex;
1035 TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING );
1038 node->StreamIn( in, tag );
1049 const char*
TiXmlElement::Parse(
const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1051 p = SkipWhiteSpace( p, encoding );
1056 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding );
1062 data->Stamp( p, encoding );
1063 location = data->Cursor();
1068 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding );
1072 p = SkipWhiteSpace( p+1, encoding );
1075 const char* pErr = p;
1077 p = ReadName( p, &value, encoding );
1080 if ( document ) document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding );
1084 TIXML_STRING endTag (
"</");
1093 p = SkipWhiteSpace( p, encoding );
1096 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
1105 if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding );
1110 else if ( *p ==
'>' )
1116 p = ReadValue( p, data, encoding );
1120 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
1125 if ( StringEqual( p, endTag.c_str(),
false, encoding ) )
1127 p += endTag.length();
1132 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
1142 if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, pErr, data, encoding );
1146 attrib->SetDocument( document );
1148 p = attrib->Parse( p, data, encoding );
1152 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
1158 #ifdef TIXML_USE_STL
1170 attributeSet.Add( attrib );
1177 const char* TiXmlElement::ReadValue(
const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1182 const char* pWithWhiteSpace = p;
1183 p = SkipWhiteSpace( p, encoding );
1194 if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, encoding );
1200 p = textNode->Parse( p, data, encoding );
1206 p = textNode->Parse( pWithWhiteSpace, data, encoding );
1209 if ( !textNode->Blank() )
1219 if ( StringEqual( p,
"</",
false, encoding ) )
1225 TiXmlNode* node = Identify( p, encoding );
1228 p = node->Parse( p, data, encoding );
1237 pWithWhiteSpace = p;
1238 p = SkipWhiteSpace( p, encoding );
1243 if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding );
1249 #ifdef TIXML_USE_STL
1250 void TiXmlUnknown::StreamIn( std::istream * in, TIXML_STRING * tag )
1252 while ( in->good() )
1259 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1274 const char*
TiXmlUnknown::Parse(
const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1277 p = SkipWhiteSpace( p, encoding );
1281 data->Stamp( p, encoding );
1282 location = data->Cursor();
1284 if ( !p || !*p || *p !=
'<' )
1286 if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding );
1292 while ( p && *p && *p !=
'>' )
1300 if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding );
1307 #ifdef TIXML_USE_STL
1308 void TiXmlComment::StreamIn( std::istream * in, TIXML_STRING * tag )
1310 while ( in->good() )
1317 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1324 && tag->at( tag->length() - 2 ) ==
'-'
1325 && tag->at( tag->length() - 3 ) ==
'-' )
1335 const char*
TiXmlComment::Parse(
const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1340 p = SkipWhiteSpace( p, encoding );
1344 data->Stamp( p, encoding );
1345 location = data->Cursor();
1347 const char* startTag =
"<!--";
1348 const char* endTag =
"-->";
1350 if ( !StringEqual( p, startTag,
false, encoding ) )
1352 document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding );
1355 p += strlen( startTag );
1377 while ( p && *p && !StringEqual( p, endTag,
false, encoding ) )
1379 value.append( p, 1 );
1383 p += strlen( endTag );
1391 p = SkipWhiteSpace( p, encoding );
1392 if ( !p || !*p )
return 0;
1400 data->Stamp( p, encoding );
1401 location = data->Cursor();
1404 const char* pErr = p;
1405 p = ReadName( p, &name, encoding );
1408 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
1411 p = SkipWhiteSpace( p, encoding );
1412 if ( !p || !*p || *p !=
'=' )
1414 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
1419 p = SkipWhiteSpace( p, encoding );
1422 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
1427 const char SINGLE_QUOTE =
'\'';
1428 const char DOUBLE_QUOTE =
'\"';
1430 if ( *p == SINGLE_QUOTE )
1434 p = ReadText( p, &value,
false, end,
false, encoding );
1436 else if ( *p == DOUBLE_QUOTE )
1440 p = ReadText( p, &value,
false, end,
false, encoding );
1449 && !IsWhiteSpace( *p ) && *p !=
'\n' && *p !=
'\r'
1450 && *p !=
'/' && *p !=
'>' )
1452 if ( *p == SINGLE_QUOTE || *p == DOUBLE_QUOTE ) {
1456 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
1466 #ifdef TIXML_USE_STL
1467 void TiXmlText::StreamIn( std::istream * in, TIXML_STRING * tag )
1469 while ( in->good() )
1472 if ( !cdata && (c ==
'<' ) )
1480 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1487 if ( cdata && c ==
'>' && tag->size() >= 3 ) {
1488 size_t len = tag->size();
1489 if ( (*tag)[len-2] ==
']' && (*tag)[len-3] ==
']' ) {
1498 const char*
TiXmlText::Parse(
const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1505 data->Stamp( p, encoding );
1506 location = data->Cursor();
1509 const char*
const startTag =
"<![CDATA[";
1510 const char*
const endTag =
"]]>";
1512 if ( cdata || StringEqual( p, startTag,
false, encoding ) )
1516 if ( !StringEqual( p, startTag,
false, encoding ) )
1518 document->SetError( TIXML_ERROR_PARSING_CDATA, p, data, encoding );
1521 p += strlen( startTag );
1525 && !StringEqual( p, endTag,
false, encoding )
1533 p = ReadText( p, &dummy,
false, endTag,
false, encoding );
1538 bool ignoreWhite =
true;
1540 const char* end =
"<";
1541 p = ReadText( p, &value, ignoreWhite, end,
false, encoding );
1548 #ifdef TIXML_USE_STL
1549 void TiXmlDeclaration::StreamIn( std::istream * in, TIXML_STRING * tag )
1551 while ( in->good() )
1558 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1574 p = SkipWhiteSpace( p, _encoding );
1578 if ( !p || !*p || !StringEqual( p,
"<?xml",
true, _encoding ) )
1580 if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding );
1585 data->Stamp( p, _encoding );
1586 location = data->Cursor();
1602 p = SkipWhiteSpace( p, _encoding );
1603 if ( StringEqual( p,
"version",
true, _encoding ) )
1606 p = attrib.Parse( p, data, _encoding );
1607 version = attrib.
Value();
1609 else if ( StringEqual( p,
"encoding",
true, _encoding ) )
1612 p = attrib.Parse( p, data, _encoding );
1613 encoding = attrib.
Value();
1615 else if ( StringEqual( p,
"standalone",
true, _encoding ) )
1618 p = attrib.Parse( p, data, _encoding );
1619 standalone = attrib.
Value();
1624 while( p && *p && *p !=
'>' && !IsWhiteSpace( *p ) )
1631 bool TiXmlText::Blank()
const
1633 for (
unsigned i=0; i<value.length(); i++ )
1634 if ( !IsWhiteSpace( value[i] ) )