• Skip to content
  • Skip to link menu
  • KDE API Reference
  • kdepimlibs-4.8.3 API Reference
  • KDE Home
  • Contact Us
 

KMIME Library

kmime_charfreq.cpp
Go to the documentation of this file.
00001 /*
00002   kmime_charfreq.cpp
00003 
00004   KMime, the KDE Internet mail/usenet news message library.
00005   Copyright (c) 2001-2002 Marc Mutz <mutz@kde.org>
00006 
00007   This library is free software; you can redistribute it and/or
00008   modify it under the terms of the GNU Library General Public
00009   License as published by the Free Software Foundation; either
00010   version 2 of the License, or (at your option) any later version.
00011 
00012   This library is distributed in the hope that it will be useful,
00013   but WITHOUT ANY WARRANTY; without even the implied warranty of
00014   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015   Library General Public License for more details.
00016 
00017   You should have received a copy of the GNU Library General Public License
00018   along with this library; see the file COPYING.LIB.  If not, write to
00019   the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00020   Boston, MA 02110-1301, USA.
00021 */
00022 
00034 #include "kmime_charfreq.h"
00035 
00036 using namespace KMime;
00037 
00042 //@cond PRIVATE
00043 //class KMime::CharFreq::Private
00044 //{
00045 //  public:
00046 //};
00047 //@endcond
00048 
00049 CharFreq::CharFreq( const QByteArray &buf )
00050   : mNUL( 0 ),
00051     mCTL( 0 ),
00052     mCR( 0 ), mLF( 0 ),
00053     mCRLF( 0 ),
00054     mPrintable( 0 ),
00055     mEightBit( 0 ),
00056     mTotal( 0 ),
00057     mLineMin( 0xffffffff ),
00058     mLineMax( 0 ),
00059     mTrailingWS( false ),
00060     mLeadingFrom( false )
00061 {
00062   if ( !buf.isEmpty() ) {
00063     count( buf.data(), buf.size() );
00064   }
00065 }
00066 
00067 CharFreq::CharFreq( const char *buf, size_t len )
00068   : mNUL( 0 ),
00069     mCTL( 0 ),
00070     mCR( 0 ), mLF( 0 ),
00071     mCRLF( 0 ),
00072     mPrintable( 0 ),
00073     mEightBit( 0 ),
00074     mTotal( 0 ),
00075     mLineMin( 0xffffffff ),
00076     mLineMax( 0 ),
00077     mTrailingWS( false ),
00078     mLeadingFrom( false )
00079 {
00080   if ( buf && len > 0 ) {
00081     count( buf, len );
00082   }
00083 }
00084 
00085 //@cond PRIVATE
00086 static inline bool isWS( char ch )
00087 {
00088   return ( ch == '\t' || ch == ' ' );
00089 }
00090 //@endcond
00091 
00092 void CharFreq::count( const char *it, size_t len )
00093 {
00094   const char *end = it + len;
00095   uint currentLineLength = 0;
00096   // initialize the prevChar with LF so that From_ detection works w/o
00097   // special-casing:
00098   char prevChar = '\n';
00099   char prevPrevChar = 0;
00100 
00101   for ( ; it != end ; ++it ) {
00102     ++currentLineLength;
00103     switch ( *it ) {
00104     case '\0': ++mNUL; break;
00105     case '\r': ++mCR;  break;
00106     case '\n': ++mLF;
00107       if ( prevChar == '\r' ) {
00108         --currentLineLength; ++mCRLF;
00109       }
00110       if ( currentLineLength >= mLineMax ) {
00111         mLineMax = currentLineLength-1;
00112       }
00113       if ( currentLineLength <= mLineMin ) {
00114         mLineMin = currentLineLength-1;
00115       }
00116       if ( !mTrailingWS ) {
00117         if ( isWS( prevChar ) ||
00118              ( prevChar == '\r' && isWS( prevPrevChar ) ) ) {
00119           mTrailingWS = true;
00120         }
00121       }
00122       currentLineLength = 0;
00123       break;
00124     case 'F': // check for lines starting with From_ if not found already:
00125       if ( !mLeadingFrom ) {
00126         if ( prevChar == '\n' && end - it >= 5 &&
00127              !qstrncmp( "From ", it, 5 ) ) {
00128           mLeadingFrom = true;
00129         }
00130       }
00131       ++mPrintable;
00132       break;
00133     default:
00134     {
00135       uchar c = *it;
00136       if ( c == '\t' || ( c >= ' ' && c <= '~' ) ) {
00137         ++mPrintable;
00138       } else if ( c == 127 || c < ' ' ) {
00139         ++mCTL;
00140       } else {
00141         ++mEightBit;
00142       }
00143     }
00144     }
00145     prevPrevChar = prevChar;
00146     prevChar = *it;
00147   }
00148 
00149   // consider the length of the last line
00150   if ( currentLineLength >= mLineMax ) {
00151     mLineMax = currentLineLength;
00152   }
00153   if ( currentLineLength <= mLineMin ) {
00154     mLineMin = currentLineLength;
00155   }
00156 
00157   // check whether the last character is tab or space
00158   if ( isWS( prevChar ) ) {
00159     mTrailingWS = true;
00160   }
00161 
00162   mTotal = len;
00163 }
00164 
00165 bool CharFreq::isEightBitData() const
00166 {
00167   return type() == EightBitData;
00168 }
00169 
00170 bool CharFreq::isEightBitText() const
00171 {
00172   return type() == EightBitText;
00173 }
00174 
00175 bool CharFreq::isSevenBitData() const
00176 {
00177   return type() == SevenBitData;
00178 }
00179 
00180 bool CharFreq::isSevenBitText() const
00181 {
00182   return type() == SevenBitText;
00183 }
00184 
00185 bool CharFreq::hasTrailingWhitespace() const
00186 {
00187   return mTrailingWS;
00188 }
00189 
00190 bool CharFreq::hasLeadingFrom() const
00191 {
00192   return mLeadingFrom;
00193 }
00194 
00195 CharFreq::Type CharFreq::type() const
00196 {
00197 #if 0
00198   qDebug( "Total: %d; NUL: %d; CTL: %d;\n"
00199           "CR: %d; LF: %d; CRLF: %d;\n"
00200           "lineMin: %d; lineMax: %d;\n"
00201           "printable: %d; eightBit: %d;\n"
00202           "trailing whitespace: %s;\n"
00203           "leading 'From ': %s;\n",
00204           total, NUL, CTL, CR, LF, CRLF, lineMin, lineMax,
00205           printable, eightBit,
00206           mTrailingWS ? "yes" : "no" , mLeadingFrom ? "yes" : "no" );
00207 #endif
00208   if ( mNUL ) { // must be binary
00209     return Binary;
00210   }
00211 
00212   // doesn't contain NUL's:
00213   if ( mEightBit ) {
00214     if ( mLineMax > 988 ) {
00215       return EightBitData; // not allowed in 8bit
00216     }
00217     if ( ( mLF != mCRLF && mCRLF > 0 ) || mCR != mCRLF || controlCodesRatio() > 0.2 ) {
00218       return EightBitData;
00219     }
00220     return EightBitText;
00221   }
00222 
00223   // doesn't contain NUL's, nor 8bit chars:
00224   if ( mLineMax > 988 ) {
00225     return SevenBitData;
00226   }
00227   if ( ( mLF != mCRLF && mCRLF > 0 ) || mCR != mCRLF || controlCodesRatio() > 0.2 ) {
00228     return SevenBitData;
00229   }
00230 
00231   // no NUL, no 8bit chars, no excessive CTLs and no lines > 998 chars:
00232   return SevenBitText;
00233 }
00234 
00235 float CharFreq::printableRatio() const
00236 {
00237   if ( mTotal ) {
00238     return float(mPrintable) / float(mTotal);
00239   } else {
00240     return 0;
00241   }
00242 }
00243 
00244 float CharFreq::controlCodesRatio() const
00245 {
00246   if ( mTotal ) {
00247     return float(mCTL) / float(mTotal);
00248   } else {
00249     return 0;
00250   }
00251 }
00252 
This file is part of the KDE documentation.
Documentation copyright © 1996-2012 The KDE developers.
Generated on Mon May 7 2012 23:55:58 by doxygen 1.8.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.

KMIME Library

Skip menu "KMIME Library"
  • Main Page
  • Namespace List
  • Namespace Members
  • Alphabetical List
  • Class List
  • Class Hierarchy
  • Class Members
  • File List
  • Related Pages

kdepimlibs-4.8.3 API Reference

Skip menu "kdepimlibs-4.8.3 API Reference"
  • akonadi
  •   contact
  •   kmime
  • kabc
  • kalarmcal
  • kblog
  • kcal
  • kcalcore
  • kcalutils
  • kholidays
  • kimap
  • kioslave
  •   imap4
  •   mbox
  •   nntp
  • kldap
  • kmbox
  • kmime
  • kontactinterface
  • kpimidentities
  • kpimtextedit
  •   richtextbuilders
  • kpimutils
  • kresources
  • ktnef
  • kxmlrpcclient
  • mailtransport
  • microblog
  • qgpgme
  • syndication
  •   atom
  •   rdf
  •   rss2
Report problems with this website to our bug tracking system.
Contact the specific authors with questions and comments about the page contents.

KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal