QJson project page QJson home page

/builddir/build/BUILD/qjson/src/json_scanner.cpp
00001 /* This file is part of QJson
00002  *
00003  * Copyright (C) 2008 Flavio Castelli <flavio.castelli@gmail.com>
00004  *
00005  * This library is free software; you can redistribute it and/or
00006  * modify it under the terms of the GNU Library General Public
00007  * License as published by the Free Software Foundation; either
00008  * version 2 of the License, or (at your option) any later version.
00009  *
00010  * This library is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013  * Library General Public License for more details.
00014  *
00015  * You should have received a copy of the GNU Library General Public License
00016  * along with this library; see the file COPYING.LIB.  If not, write to
00017  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00018  * Boston, MA 02110-1301, USA.
00019  */
00020 
00021 #include "qjson_debug.h"
00022 #include "json_scanner.h"
00023 #include "json_parser.hh"
00024 
00025 #include <ctype.h>
00026 
00027 #include <QtCore/QDebug>
00028 #include <QtCore/QRegExp>
00029 
00030 #include <cassert>
00031 
00032 bool ishexnstring(const QString& string) {
00033   for (int i = 0; i < string.length(); i++) {
00034     if (isxdigit(string[i] == 0))
00035       return false;
00036   }
00037   return true;
00038 }
00039 
00040 JSonScanner::JSonScanner(QIODevice* io)
00041   : m_io (io)
00042 {
00043   m_quotmarkClosed = true;
00044   m_quotmarkCount = 0;
00045 }
00046 
00047 static QString unescape( const QByteArray& ba, bool* ok ) {
00048   assert( ok );
00049   *ok = false;
00050   QString res;
00051   QByteArray seg;
00052   bool bs = false;
00053   for ( int i = 0, size = ba.size(); i < size; ++i ) {
00054     const char ch = ba[i];
00055     if ( !bs ) {
00056       if ( ch == '\\' )
00057         bs = true;
00058       else
00059         seg += ch;
00060     } else {
00061       bs = false;
00062       switch ( ch ) {
00063         case 'b':
00064           seg += '\b';
00065           break;
00066         case 'f':
00067           seg += '\f';
00068           break;
00069         case 'n':
00070           seg += '\n';
00071           break;
00072         case 'r':
00073           seg += '\r';
00074           break;
00075         case 't':
00076           seg += '\t';
00077           break;
00078         case 'u':
00079         {
00080           res += QString::fromUtf8( seg );
00081           seg.clear();
00082 
00083           if ( i > size - 5 ) {
00084             //error
00085             return QString();
00086           }
00087 
00088           const QString hex_digit1 = QString::fromUtf8( ba.mid( i + 1, 2 ) );
00089           const QString hex_digit2 = QString::fromUtf8( ba.mid( i + 3, 2 ) );
00090           i += 4;
00091 
00092           if ( !ishexnstring( hex_digit1 ) || !ishexnstring( hex_digit2 ) ) {
00093             qCritical() << "Not an hex string:" << hex_digit1 << hex_digit2;
00094             return QString();
00095           }
00096           bool hexOk;
00097           const ushort hex_code1 = hex_digit1.toShort( &hexOk, 16 );
00098           if (!hexOk) {
00099             qCritical() << "error converting hex value to short:" << hex_digit1;
00100             return QString();
00101           }
00102           const ushort hex_code2 = hex_digit2.toShort( &hexOk, 16 );
00103           if (!hexOk) {
00104             qCritical() << "error converting hex value to short:" << hex_digit2;
00105             return QString();
00106           }
00107 
00108           res += QChar(hex_code2, hex_code1);
00109           break;
00110         }
00111         case '\\':
00112           seg  += '\\';
00113           break;
00114         default:
00115           seg += ch;
00116           break;
00117       }
00118     }
00119   }
00120   res += QString::fromUtf8( seg );
00121   *ok = true;
00122   return res;
00123 }
00124 
00125 int JSonScanner::yylex(YYSTYPE* yylval, yy::location *yylloc)
00126 {
00127   char ch;
00128   
00129   if (!m_io->isOpen()) {
00130     qCritical() << "JSonScanner::yylex - io device is not open";
00131     return -1;
00132   }
00133 
00134   yylloc->step();
00135 
00136   do {
00137     bool ret;
00138     if (m_io->atEnd()) {
00139       qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::END";
00140       return yy::json_parser::token::END;
00141     }
00142     else
00143       ret = m_io->getChar(&ch);
00144 
00145     if (!ret) {
00146       qCritical() << "JSonScanner::yylex - error reading from io device";
00147       return -1;
00148     }
00149 
00150     qjsonDebug() << "JSonScanner::yylex - got |" << ch << "|";
00151     
00152     yylloc->columns();
00153     
00154     if (ch == '\n' || ch == '\r')
00155       yylloc->lines();
00156       
00157   } while (m_quotmarkClosed && (isspace(ch) != 0));
00158 
00159   if (m_quotmarkClosed && ((ch == 't') || (ch == 'T')
00160       || (ch == 'n') || (ch == 'N'))) {
00161     // check true & null value
00162     const QByteArray buf = m_io->peek(3).toLower();
00163 
00164     if (buf.length() == 3) {
00165       if (buf == "rue") {
00166         m_io->read (3);
00167         yylloc->columns(3);
00168         qjsonDebug() << "JSonScanner::yylex - TRUE_VAL";
00169         return yy::json_parser::token::TRUE_VAL;
00170       }
00171       else if (buf == "ull") {
00172         m_io->read (3);
00173         yylloc->columns(3);
00174         qjsonDebug() << "JSonScanner::yylex - NULL_VAL";
00175         return yy::json_parser::token::NULL_VAL;
00176       }
00177     }
00178   }
00179   else if (m_quotmarkClosed && ((ch == 'f') || (ch == 'F'))) {
00180     // check false value
00181     const QByteArray buf = m_io->peek(4).toLower();
00182     if (buf.length() == 4) {
00183       if (buf == "alse") {
00184         m_io->read (4);
00185         yylloc->columns(4);
00186         qjsonDebug() << "JSonScanner::yylex - FALSE_VAL";
00187         return yy::json_parser::token::FALSE_VAL;
00188       }
00189     }
00190   }
00191   else if (m_quotmarkClosed && ((ch == 'e') || (ch == 'E'))) {
00192     QByteArray ret(1, ch);
00193     const QByteArray buf = m_io->peek(1);
00194     if (!buf.isEmpty()) {
00195       if ((buf[0] == '+' ) || (buf[0] == '-' )) {
00196         ret += m_io->read (1);  
00197         yylloc->columns();
00198       }
00199     }
00200     *yylval = QVariant(QString::fromUtf8(ret));
00201     return yy::json_parser::token::E;
00202   }
00203   
00204   if (ch != '"' && !m_quotmarkClosed) {
00205     // we're inside a " " block
00206     QByteArray raw;
00207     raw += ch;
00208     char prevCh = ch;
00209     bool escape_on = (ch == '\\') ? true : false;
00210 
00211     while ( true ) {
00212       char nextCh;
00213       qint64 ret = m_io->peek(&nextCh, 1);
00214       if (ret != 1) {
00215         if (m_io->atEnd())
00216           return yy::json_parser::token::END;
00217         else
00218           return -1;
00219       } else if ( !escape_on && nextCh == '\"' ) {
00220         bool ok;
00221         const QString str = unescape( raw, &ok );
00222         *yylval = ok ? str : QString();
00223         return ok ? yy::json_parser::token::STRING : -1;
00224       }
00225 #if 0
00226       if ( prevCh == '\\' && nextCh != '"' && nextCh != '\\' && nextCh != '/' &&
00227            nextCh != 'b' && nextCh != 'f' && nextCh != 'n' &&
00228            nextCh != 'r' && nextCh != 't' && nextCh != 'u') {
00229         qjsonDebug() << "Just read" << nextCh;
00230         qjsonDebug() << "JSonScanner::yylex - error decoding escaped sequence";
00231         return -1;
00232        }
00233 #endif
00234       m_io->read(1); // consume
00235       raw += nextCh;
00236       prevCh = nextCh;
00237       if (escape_on)
00238         escape_on = false;
00239       else
00240         escape_on = (prevCh == '\\') ? true : false;
00241 #if 0
00242       if (nextCh == '\\') {
00243         char buf;
00244         if (m_io->getChar (&buf)) {
00245           yylloc->columns();
00246           if (((buf != '"') && (buf != '\\') && (buf != '/') &&
00247               (buf != 'b') && (buf != 'f') && (buf != 'n') &&
00248               (buf != 'r') && (buf != 't') && (buf != 'u'))) {
00249                 qjsonDebug() << "Just read" << buf;
00250                 qjsonDebug() << "JSonScanner::yylex - error decoding escaped sequence";
00251                 return -1;
00252           }
00253         } else {
00254           qCritical() << "JSonScanner::yylex - error decoding escaped sequence : io error";
00255           return -1;
00256         }
00257       }
00258 #endif
00259     }
00260   }
00261   else if (isdigit(ch) != 0 && m_quotmarkClosed) {
00262     *yylval = QVariant(QString::fromLatin1(QByteArray(&ch,1)));
00263     qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::DIGIT";
00264     return yy::json_parser::token::DIGIT;
00265   }
00266   else if (isalnum(ch) != 0) {
00267     *yylval = QVariant(QString(QChar::fromLatin1(ch)));
00268     qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::WORD ("
00269              << ch << ")";
00270     return yy::json_parser::token::STRING;
00271   }
00272   else if (ch == ':') {
00273     // set yylval
00274     qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::COLON";
00275     return yy::json_parser::token::COLON;
00276   }
00277   else if (ch == '"') {
00278     // yy::json_parser::token::QUOTMARK (")
00279 
00280     // set yylval
00281     m_quotmarkCount++;
00282     if (m_quotmarkCount %2 == 0) {
00283       m_quotmarkClosed = true;
00284       m_quotmarkCount = 0;
00285       qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::QUOTMARKCLOSE";
00286       return yy::json_parser::token::QUOTMARKCLOSE;
00287     }
00288     else {
00289       m_quotmarkClosed = false;
00290       qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::QUOTMARKOPEN";
00291       return yy::json_parser::token::QUOTMARKOPEN;
00292     }
00293   }
00294   else if (ch == ',') {
00295     qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::COMMA";
00296     return yy::json_parser::token::COMMA;
00297   }
00298   else if (ch == '.') {
00299     qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::DOT";
00300     return yy::json_parser::token::DOT;
00301   }
00302   else if (ch == '-') {
00303     qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::MINUS";
00304     return yy::json_parser::token::MINUS;
00305   }
00306   else if (ch == '[') {
00307     qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::SQUARE_BRACKET_OPEN";
00308     return yy::json_parser::token::SQUARE_BRACKET_OPEN;
00309   }
00310   else if (ch == ']') {
00311     qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::SQUARE_BRACKET_CLOSE";
00312     return yy::json_parser::token::SQUARE_BRACKET_CLOSE;
00313   }
00314   else if (ch == '{') {
00315     qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::CURLY_BRACKET_OPEN";
00316     return yy::json_parser::token::CURLY_BRACKET_OPEN;
00317   }
00318   else if (ch == '}') {
00319     qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::CURLY_BRACKET_CLOSE";
00320     return yy::json_parser::token::CURLY_BRACKET_CLOSE;
00321   }
00322 
00323   //unknown char!
00324   //TODO yyerror?
00325   qCritical() << "JSonScanner::yylex - unknown char, returning -1";
00326   return -1;
00327 }
00328 
00329 

SourceForge Logo hosts this site. Send comments to:
QJson Developers