KMIME Library
kmime_charfreq.cpp
Go to the documentation of this file.
00001 /* 00002 kmime_charfreq.cpp 00003 00004 KMime, the KDE Internet mail/usenet news message library. 00005 Copyright (c) 2001-2002 Marc Mutz <mutz@kde.org> 00006 00007 This library is free software; you can redistribute it and/or 00008 modify it under the terms of the GNU Library General Public 00009 License as published by the Free Software Foundation; either 00010 version 2 of the License, or (at your option) any later version. 00011 00012 This library is distributed in the hope that it will be useful, 00013 but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00015 Library General Public License for more details. 00016 00017 You should have received a copy of the GNU Library General Public License 00018 along with this library; see the file COPYING.LIB. If not, write to 00019 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 00020 Boston, MA 02110-1301, USA. 00021 */ 00022 00034 #include "kmime_charfreq.h" 00035 00036 using namespace KMime; 00037 00042 //@cond PRIVATE 00043 //class KMime::CharFreq::Private 00044 //{ 00045 // public: 00046 //}; 00047 //@endcond 00048 00049 CharFreq::CharFreq( const QByteArray &buf ) 00050 : mNUL( 0 ), 00051 mCTL( 0 ), 00052 mCR( 0 ), mLF( 0 ), 00053 mCRLF( 0 ), 00054 mPrintable( 0 ), 00055 mEightBit( 0 ), 00056 mTotal( 0 ), 00057 mLineMin( 0xffffffff ), 00058 mLineMax( 0 ), 00059 mTrailingWS( false ), 00060 mLeadingFrom( false ) 00061 { 00062 if ( !buf.isEmpty() ) { 00063 count( buf.data(), buf.size() ); 00064 } 00065 } 00066 00067 CharFreq::CharFreq( const char *buf, size_t len ) 00068 : mNUL( 0 ), 00069 mCTL( 0 ), 00070 mCR( 0 ), mLF( 0 ), 00071 mCRLF( 0 ), 00072 mPrintable( 0 ), 00073 mEightBit( 0 ), 00074 mTotal( 0 ), 00075 mLineMin( 0xffffffff ), 00076 mLineMax( 0 ), 00077 mTrailingWS( false ), 00078 mLeadingFrom( false ) 00079 { 00080 if ( buf && len > 0 ) { 00081 count( buf, len ); 00082 } 00083 } 00084 00085 //@cond PRIVATE 00086 static inline bool isWS( char ch ) 00087 { 00088 return ( ch == '\t' || ch == ' ' ); 00089 } 00090 //@endcond 00091 00092 void CharFreq::count( const char *it, size_t len ) 00093 { 00094 const char *end = it + len; 00095 uint currentLineLength = 0; 00096 // initialize the prevChar with LF so that From_ detection works w/o 00097 // special-casing: 00098 char prevChar = '\n'; 00099 char prevPrevChar = 0; 00100 00101 for ( ; it != end ; ++it ) { 00102 ++currentLineLength; 00103 switch ( *it ) { 00104 case '\0': ++mNUL; break; 00105 case '\r': ++mCR; break; 00106 case '\n': ++mLF; 00107 if ( prevChar == '\r' ) { 00108 --currentLineLength; ++mCRLF; 00109 } 00110 if ( currentLineLength >= mLineMax ) { 00111 mLineMax = currentLineLength-1; 00112 } 00113 if ( currentLineLength <= mLineMin ) { 00114 mLineMin = currentLineLength-1; 00115 } 00116 if ( !mTrailingWS ) { 00117 if ( isWS( prevChar ) || 00118 ( prevChar == '\r' && isWS( prevPrevChar ) ) ) { 00119 mTrailingWS = true; 00120 } 00121 } 00122 currentLineLength = 0; 00123 break; 00124 case 'F': // check for lines starting with From_ if not found already: 00125 if ( !mLeadingFrom ) { 00126 if ( prevChar == '\n' && end - it >= 5 && 00127 !qstrncmp( "From ", it, 5 ) ) { 00128 mLeadingFrom = true; 00129 } 00130 } 00131 ++mPrintable; 00132 break; 00133 default: 00134 { 00135 uchar c = *it; 00136 if ( c == '\t' || ( c >= ' ' && c <= '~' ) ) { 00137 ++mPrintable; 00138 } else if ( c == 127 || c < ' ' ) { 00139 ++mCTL; 00140 } else { 00141 ++mEightBit; 00142 } 00143 } 00144 } 00145 prevPrevChar = prevChar; 00146 prevChar = *it; 00147 } 00148 00149 // consider the length of the last line 00150 if ( currentLineLength >= mLineMax ) { 00151 mLineMax = currentLineLength; 00152 } 00153 if ( currentLineLength <= mLineMin ) { 00154 mLineMin = currentLineLength; 00155 } 00156 00157 // check whether the last character is tab or space 00158 if ( isWS( prevChar ) ) { 00159 mTrailingWS = true; 00160 } 00161 00162 mTotal = len; 00163 } 00164 00165 bool CharFreq::isEightBitData() const 00166 { 00167 return type() == EightBitData; 00168 } 00169 00170 bool CharFreq::isEightBitText() const 00171 { 00172 return type() == EightBitText; 00173 } 00174 00175 bool CharFreq::isSevenBitData() const 00176 { 00177 return type() == SevenBitData; 00178 } 00179 00180 bool CharFreq::isSevenBitText() const 00181 { 00182 return type() == SevenBitText; 00183 } 00184 00185 bool CharFreq::hasTrailingWhitespace() const 00186 { 00187 return mTrailingWS; 00188 } 00189 00190 bool CharFreq::hasLeadingFrom() const 00191 { 00192 return mLeadingFrom; 00193 } 00194 00195 CharFreq::Type CharFreq::type() const 00196 { 00197 #if 0 00198 qDebug( "Total: %d; NUL: %d; CTL: %d;\n" 00199 "CR: %d; LF: %d; CRLF: %d;\n" 00200 "lineMin: %d; lineMax: %d;\n" 00201 "printable: %d; eightBit: %d;\n" 00202 "trailing whitespace: %s;\n" 00203 "leading 'From ': %s;\n", 00204 total, NUL, CTL, CR, LF, CRLF, lineMin, lineMax, 00205 printable, eightBit, 00206 mTrailingWS ? "yes" : "no" , mLeadingFrom ? "yes" : "no" ); 00207 #endif 00208 if ( mNUL ) { // must be binary 00209 return Binary; 00210 } 00211 00212 // doesn't contain NUL's: 00213 if ( mEightBit ) { 00214 if ( mLineMax > 988 ) { 00215 return EightBitData; // not allowed in 8bit 00216 } 00217 if ( ( mLF != mCRLF && mCRLF > 0 ) || mCR != mCRLF || controlCodesRatio() > 0.2 ) { 00218 return EightBitData; 00219 } 00220 return EightBitText; 00221 } 00222 00223 // doesn't contain NUL's, nor 8bit chars: 00224 if ( mLineMax > 988 ) { 00225 return SevenBitData; 00226 } 00227 if ( ( mLF != mCRLF && mCRLF > 0 ) || mCR != mCRLF || controlCodesRatio() > 0.2 ) { 00228 return SevenBitData; 00229 } 00230 00231 // no NUL, no 8bit chars, no excessive CTLs and no lines > 998 chars: 00232 return SevenBitText; 00233 } 00234 00235 float CharFreq::printableRatio() const 00236 { 00237 if ( mTotal ) { 00238 return float(mPrintable) / float(mTotal); 00239 } else { 00240 return 0; 00241 } 00242 } 00243 00244 float CharFreq::controlCodesRatio() const 00245 { 00246 if ( mTotal ) { 00247 return float(mCTL) / float(mTotal); 00248 } else { 00249 return 0; 00250 } 00251 } 00252
This file is part of the KDE documentation.
Documentation copyright © 1996-2012 The KDE developers.
Generated on Mon May 7 2012 23:55:58 by doxygen 1.8.0 written by Dimitri van Heesch, © 1997-2006
Documentation copyright © 1996-2012 The KDE developers.
Generated on Mon May 7 2012 23:55:58 by doxygen 1.8.0 written by Dimitri van Heesch, © 1997-2006
KDE's Doxygen guidelines are available online.