00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include "qjson_debug.h"
00022 #include "json_scanner.h"
00023 #include "json_parser.hh"
00024
00025 #include <ctype.h>
00026
00027 #include <QtCore/QDebug>
00028 #include <QtCore/QRegExp>
00029
00030 #include <cassert>
00031
00032 bool ishexnstring(const QString& string) {
00033 for (int i = 0; i < string.length(); i++) {
00034 if (isxdigit(string[i] == 0))
00035 return false;
00036 }
00037 return true;
00038 }
00039
00040 JSonScanner::JSonScanner(QIODevice* io)
00041 : m_io (io)
00042 {
00043 m_quotmarkClosed = true;
00044 m_quotmarkCount = 0;
00045 }
00046
00047 static QString unescape( const QByteArray& ba, bool* ok ) {
00048 assert( ok );
00049 *ok = false;
00050 QString res;
00051 QByteArray seg;
00052 bool bs = false;
00053 for ( int i = 0, size = ba.size(); i < size; ++i ) {
00054 const char ch = ba[i];
00055 if ( !bs ) {
00056 if ( ch == '\\' )
00057 bs = true;
00058 else
00059 seg += ch;
00060 } else {
00061 bs = false;
00062 switch ( ch ) {
00063 case 'b':
00064 seg += '\b';
00065 break;
00066 case 'f':
00067 seg += '\f';
00068 break;
00069 case 'n':
00070 seg += '\n';
00071 break;
00072 case 'r':
00073 seg += '\r';
00074 break;
00075 case 't':
00076 seg += '\t';
00077 break;
00078 case 'u':
00079 {
00080 res += QString::fromUtf8( seg );
00081 seg.clear();
00082
00083 if ( i > size - 5 ) {
00084
00085 return QString();
00086 }
00087
00088 const QString hex_digit1 = QString::fromUtf8( ba.mid( i + 1, 2 ) );
00089 const QString hex_digit2 = QString::fromUtf8( ba.mid( i + 3, 2 ) );
00090 i += 4;
00091
00092 if ( !ishexnstring( hex_digit1 ) || !ishexnstring( hex_digit2 ) ) {
00093 qCritical() << "Not an hex string:" << hex_digit1 << hex_digit2;
00094 return QString();
00095 }
00096 bool hexOk;
00097 const ushort hex_code1 = hex_digit1.toShort( &hexOk, 16 );
00098 if (!hexOk) {
00099 qCritical() << "error converting hex value to short:" << hex_digit1;
00100 return QString();
00101 }
00102 const ushort hex_code2 = hex_digit2.toShort( &hexOk, 16 );
00103 if (!hexOk) {
00104 qCritical() << "error converting hex value to short:" << hex_digit2;
00105 return QString();
00106 }
00107
00108 res += QChar(hex_code2, hex_code1);
00109 break;
00110 }
00111 case '\\':
00112 seg += '\\';
00113 break;
00114 default:
00115 seg += ch;
00116 break;
00117 }
00118 }
00119 }
00120 res += QString::fromUtf8( seg );
00121 *ok = true;
00122 return res;
00123 }
00124
00125 int JSonScanner::yylex(YYSTYPE* yylval, yy::location *yylloc)
00126 {
00127 char ch;
00128
00129 if (!m_io->isOpen()) {
00130 qCritical() << "JSonScanner::yylex - io device is not open";
00131 return -1;
00132 }
00133
00134 yylloc->step();
00135
00136 do {
00137 bool ret;
00138 if (m_io->atEnd()) {
00139 qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::END";
00140 return yy::json_parser::token::END;
00141 }
00142 else
00143 ret = m_io->getChar(&ch);
00144
00145 if (!ret) {
00146 qCritical() << "JSonScanner::yylex - error reading from io device";
00147 return -1;
00148 }
00149
00150 qjsonDebug() << "JSonScanner::yylex - got |" << ch << "|";
00151
00152 yylloc->columns();
00153
00154 if (ch == '\n' || ch == '\r')
00155 yylloc->lines();
00156
00157 } while (m_quotmarkClosed && (isspace(ch) != 0));
00158
00159 if (m_quotmarkClosed && ((ch == 't') || (ch == 'T')
00160 || (ch == 'n') || (ch == 'N'))) {
00161
00162 const QByteArray buf = m_io->peek(3).toLower();
00163
00164 if (buf.length() == 3) {
00165 if (buf == "rue") {
00166 m_io->read (3);
00167 yylloc->columns(3);
00168 qjsonDebug() << "JSonScanner::yylex - TRUE_VAL";
00169 return yy::json_parser::token::TRUE_VAL;
00170 }
00171 else if (buf == "ull") {
00172 m_io->read (3);
00173 yylloc->columns(3);
00174 qjsonDebug() << "JSonScanner::yylex - NULL_VAL";
00175 return yy::json_parser::token::NULL_VAL;
00176 }
00177 }
00178 }
00179 else if (m_quotmarkClosed && ((ch == 'f') || (ch == 'F'))) {
00180
00181 const QByteArray buf = m_io->peek(4).toLower();
00182 if (buf.length() == 4) {
00183 if (buf == "alse") {
00184 m_io->read (4);
00185 yylloc->columns(4);
00186 qjsonDebug() << "JSonScanner::yylex - FALSE_VAL";
00187 return yy::json_parser::token::FALSE_VAL;
00188 }
00189 }
00190 }
00191 else if (m_quotmarkClosed && ((ch == 'e') || (ch == 'E'))) {
00192 QByteArray ret(1, ch);
00193 const QByteArray buf = m_io->peek(1);
00194 if (!buf.isEmpty()) {
00195 if ((buf[0] == '+' ) || (buf[0] == '-' )) {
00196 ret += m_io->read (1);
00197 yylloc->columns();
00198 }
00199 }
00200 *yylval = QVariant(QString::fromUtf8(ret));
00201 return yy::json_parser::token::E;
00202 }
00203
00204 if (ch != '"' && !m_quotmarkClosed) {
00205
00206 QByteArray raw;
00207 raw += ch;
00208 char prevCh = ch;
00209 bool escape_on = (ch == '\\') ? true : false;
00210
00211 while ( true ) {
00212 char nextCh;
00213 qint64 ret = m_io->peek(&nextCh, 1);
00214 if (ret != 1) {
00215 if (m_io->atEnd())
00216 return yy::json_parser::token::END;
00217 else
00218 return -1;
00219 } else if ( !escape_on && nextCh == '\"' ) {
00220 bool ok;
00221 const QString str = unescape( raw, &ok );
00222 *yylval = ok ? str : QString();
00223 return ok ? yy::json_parser::token::STRING : -1;
00224 }
00225 #if 0
00226 if ( prevCh == '\\' && nextCh != '"' && nextCh != '\\' && nextCh != '/' &&
00227 nextCh != 'b' && nextCh != 'f' && nextCh != 'n' &&
00228 nextCh != 'r' && nextCh != 't' && nextCh != 'u') {
00229 qjsonDebug() << "Just read" << nextCh;
00230 qjsonDebug() << "JSonScanner::yylex - error decoding escaped sequence";
00231 return -1;
00232 }
00233 #endif
00234 m_io->read(1);
00235 raw += nextCh;
00236 prevCh = nextCh;
00237 if (escape_on)
00238 escape_on = false;
00239 else
00240 escape_on = (prevCh == '\\') ? true : false;
00241 #if 0
00242 if (nextCh == '\\') {
00243 char buf;
00244 if (m_io->getChar (&buf)) {
00245 yylloc->columns();
00246 if (((buf != '"') && (buf != '\\') && (buf != '/') &&
00247 (buf != 'b') && (buf != 'f') && (buf != 'n') &&
00248 (buf != 'r') && (buf != 't') && (buf != 'u'))) {
00249 qjsonDebug() << "Just read" << buf;
00250 qjsonDebug() << "JSonScanner::yylex - error decoding escaped sequence";
00251 return -1;
00252 }
00253 } else {
00254 qCritical() << "JSonScanner::yylex - error decoding escaped sequence : io error";
00255 return -1;
00256 }
00257 }
00258 #endif
00259 }
00260 }
00261 else if (isdigit(ch) != 0 && m_quotmarkClosed) {
00262 *yylval = QVariant(QString::fromLatin1(QByteArray(&ch,1)));
00263 qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::DIGIT";
00264 return yy::json_parser::token::DIGIT;
00265 }
00266 else if (isalnum(ch) != 0) {
00267 *yylval = QVariant(QString(QChar::fromLatin1(ch)));
00268 qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::WORD ("
00269 << ch << ")";
00270 return yy::json_parser::token::STRING;
00271 }
00272 else if (ch == ':') {
00273
00274 qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::COLON";
00275 return yy::json_parser::token::COLON;
00276 }
00277 else if (ch == '"') {
00278
00279
00280
00281 m_quotmarkCount++;
00282 if (m_quotmarkCount %2 == 0) {
00283 m_quotmarkClosed = true;
00284 m_quotmarkCount = 0;
00285 qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::QUOTMARKCLOSE";
00286 return yy::json_parser::token::QUOTMARKCLOSE;
00287 }
00288 else {
00289 m_quotmarkClosed = false;
00290 qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::QUOTMARKOPEN";
00291 return yy::json_parser::token::QUOTMARKOPEN;
00292 }
00293 }
00294 else if (ch == ',') {
00295 qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::COMMA";
00296 return yy::json_parser::token::COMMA;
00297 }
00298 else if (ch == '.') {
00299 qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::DOT";
00300 return yy::json_parser::token::DOT;
00301 }
00302 else if (ch == '-') {
00303 qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::MINUS";
00304 return yy::json_parser::token::MINUS;
00305 }
00306 else if (ch == '[') {
00307 qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::SQUARE_BRACKET_OPEN";
00308 return yy::json_parser::token::SQUARE_BRACKET_OPEN;
00309 }
00310 else if (ch == ']') {
00311 qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::SQUARE_BRACKET_CLOSE";
00312 return yy::json_parser::token::SQUARE_BRACKET_CLOSE;
00313 }
00314 else if (ch == '{') {
00315 qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::CURLY_BRACKET_OPEN";
00316 return yy::json_parser::token::CURLY_BRACKET_OPEN;
00317 }
00318 else if (ch == '}') {
00319 qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::CURLY_BRACKET_CLOSE";
00320 return yy::json_parser::token::CURLY_BRACKET_CLOSE;
00321 }
00322
00323
00324
00325 qCritical() << "JSonScanner::yylex - unknown char, returning -1";
00326 return -1;
00327 }
00328
00329