KMIME Library
kmime_util.cpp
00001 /* 00002 kmime_util.cpp 00003 00004 KMime, the KDE Internet mail/usenet news message library. 00005 Copyright (c) 2001 the KMime authors. 00006 See file AUTHORS for details 00007 00008 This library is free software; you can redistribute it and/or 00009 modify it under the terms of the GNU Library General Public 00010 License as published by the Free Software Foundation; either 00011 version 2 of the License, or (at your option) any later version. 00012 00013 This library is distributed in the hope that it will be useful, 00014 but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00016 Library General Public License for more details. 00017 00018 You should have received a copy of the GNU Library General Public License 00019 along with this library; see the file COPYING.LIB. If not, write to 00020 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 00021 Boston, MA 02110-1301, USA. 00022 */ 00023 00024 #include "kmime_util.h" 00025 #include "kmime_util_p.h" 00026 00027 #include "kmime_charfreq.h" 00028 #include "kmime_codecs.h" 00029 #include "kmime_header_parsing.h" 00030 #include "kmime_message.h" 00031 #include "kmime_warning.h" 00032 00033 #include <config-kmime.h> 00034 #include <kdefakes.h> // for strcasestr 00035 #include <kglobal.h> 00036 #include <klocale.h> 00037 #include <kcharsets.h> 00038 #include <kcodecs.h> 00039 #include <kdebug.h> 00040 00041 #include <QtCore/QList> 00042 #include <QtCore/QString> 00043 #include <QtCore/QTextCodec> 00044 00045 #include <ctype.h> 00046 #include <time.h> 00047 #include <stdlib.h> 00048 #include <unistd.h> 00049 #include <boost/concept_check.hpp> 00050 00051 using namespace KMime; 00052 00053 namespace KMime { 00054 00055 QList<QByteArray> c_harsetCache; 00056 QList<QByteArray> l_anguageCache; 00057 QString f_allbackCharEnc; 00058 bool u_seOutlookEncoding = false; 00059 00060 QByteArray cachedCharset( const QByteArray &name ) 00061 { 00062 foreach ( const QByteArray& charset, c_harsetCache ) { 00063 if ( qstricmp( name.data(), charset.data() ) == 0 ) { 00064 return charset; 00065 } 00066 } 00067 00068 c_harsetCache.append( name.toUpper() ); 00069 //kDebug() << "KNMimeBase::cachedCharset() number of cs" << c_harsetCache.count(); 00070 return c_harsetCache.last(); 00071 } 00072 00073 QByteArray cachedLanguage( const QByteArray &name ) 00074 { 00075 foreach ( const QByteArray& language, l_anguageCache ) { 00076 if ( qstricmp( name.data(), language.data() ) == 0 ) { 00077 return language; 00078 } 00079 } 00080 00081 l_anguageCache.append( name.toUpper() ); 00082 //kDebug() << "KNMimeBase::cachedCharset() number of cs" << c_harsetCache.count(); 00083 return l_anguageCache.last(); 00084 } 00085 00086 bool isUsAscii( const QString &s ) 00087 { 00088 uint sLength = s.length(); 00089 for ( uint i=0; i<sLength; i++ ) { 00090 if ( s.at( i ).toLatin1() <= 0 ) { // c==0: non-latin1, c<0: non-us-ascii 00091 return false; 00092 } 00093 } 00094 return true; 00095 } 00096 00097 QString nameForEncoding( Headers::contentEncoding enc ) 00098 { 00099 switch( enc ) { 00100 case Headers::CE7Bit: return QString::fromLatin1( "7bit" ); 00101 case Headers::CE8Bit: return QString::fromLatin1( "8bit" ); 00102 case Headers::CEquPr: return QString::fromLatin1( "quoted-printable" ); 00103 case Headers::CEbase64: return QString::fromLatin1( "base64" ); 00104 case Headers::CEuuenc: return QString::fromLatin1( "uuencode" ); 00105 case Headers::CEbinary: return QString::fromLatin1( "binary" ); 00106 default: return QString::fromLatin1( "unknown" ); 00107 } 00108 } 00109 00110 QList<Headers::contentEncoding> encodingsForData( const QByteArray &data ) 00111 { 00112 QList<Headers::contentEncoding> allowed; 00113 CharFreq cf( data ); 00114 00115 switch ( cf.type() ) { 00116 case CharFreq::SevenBitText: 00117 allowed << Headers::CE7Bit; 00118 case CharFreq::EightBitText: 00119 allowed << Headers::CE8Bit; 00120 case CharFreq::SevenBitData: 00121 if ( cf.printableRatio() > 5.0/6.0 ) { 00122 // let n the length of data and p the number of printable chars. 00123 // Then base64 \approx 4n/3; qp \approx p + 3(n-p) 00124 // => qp < base64 iff p > 5n/6. 00125 allowed << Headers::CEquPr; 00126 allowed << Headers::CEbase64; 00127 } else { 00128 allowed << Headers::CEbase64; 00129 allowed << Headers::CEquPr; 00130 } 00131 break; 00132 case CharFreq::EightBitData: 00133 allowed << Headers::CEbase64; 00134 break; 00135 case CharFreq::None: 00136 default: 00137 Q_ASSERT( false ); 00138 } 00139 00140 return allowed; 00141 } 00142 00143 // "(),.:;<>@[\] 00144 const uchar specialsMap[16] = { 00145 0x00, 0x00, 0x00, 0x00, // CTLs 00146 0x20, 0xCA, 0x00, 0x3A, // SPACE ... '?' 00147 0x80, 0x00, 0x00, 0x1C, // '@' ... '_' 00148 0x00, 0x00, 0x00, 0x00 // '`' ... DEL 00149 }; 00150 00151 // "(),:;<>@[\]/=? 00152 const uchar tSpecialsMap[16] = { 00153 0x00, 0x00, 0x00, 0x00, // CTLs 00154 0x20, 0xC9, 0x00, 0x3F, // SPACE ... '?' 00155 0x80, 0x00, 0x00, 0x1C, // '@' ... '_' 00156 0x00, 0x00, 0x00, 0x00 // '`' ... DEL 00157 }; 00158 00159 // all except specials, CTLs, SPACE. 00160 const uchar aTextMap[16] = { 00161 0x00, 0x00, 0x00, 0x00, 00162 0x5F, 0x35, 0xFF, 0xC5, 00163 0x7F, 0xFF, 0xFF, 0xE3, 00164 0xFF, 0xFF, 0xFF, 0xFE 00165 }; 00166 00167 // all except tspecials, CTLs, SPACE. 00168 const uchar tTextMap[16] = { 00169 0x00, 0x00, 0x00, 0x00, 00170 0x5F, 0x36, 0xFF, 0xC0, 00171 0x7F, 0xFF, 0xFF, 0xE3, 00172 0xFF, 0xFF, 0xFF, 0xFE 00173 }; 00174 00175 // none except a-zA-Z0-9!*+-/ 00176 const uchar eTextMap[16] = { 00177 0x00, 0x00, 0x00, 0x00, 00178 0x40, 0x35, 0xFF, 0xC0, 00179 0x7F, 0xFF, 0xFF, 0xE0, 00180 0x7F, 0xFF, 0xFF, 0xE0 00181 }; 00182 00183 void setFallbackCharEncoding(const QString& fallbackCharEnc) 00184 { 00185 f_allbackCharEnc = fallbackCharEnc; 00186 } 00187 00188 QString fallbackCharEncoding() 00189 { 00190 return f_allbackCharEnc; 00191 } 00192 00193 void setUseOutlookAttachmentEncoding( bool violateStandard ) 00194 { 00195 u_seOutlookEncoding = violateStandard; 00196 } 00197 00198 bool useOutlookAttachmentEncoding() 00199 { 00200 return u_seOutlookEncoding; 00201 } 00202 00203 00204 QString decodeRFC2047String( const QByteArray &src, QByteArray &usedCS, 00205 const QByteArray &defaultCS, bool forceCS ) 00206 { 00207 QByteArray result; 00208 QByteArray spaceBuffer; 00209 const char *scursor = src.constData(); 00210 const char *send = scursor + src.length(); 00211 bool onlySpacesSinceLastWord = false; 00212 00213 while ( scursor != send ) { 00214 // space 00215 if ( isspace( *scursor ) && onlySpacesSinceLastWord ) { 00216 spaceBuffer += *scursor++; 00217 continue; 00218 } 00219 00220 // possible start of an encoded word 00221 if ( *scursor == '=' ) { 00222 QByteArray language; 00223 QString decoded; 00224 ++scursor; 00225 const char *start = scursor; 00226 if ( HeaderParsing::parseEncodedWord( scursor, send, decoded, language, usedCS, defaultCS, forceCS ) ) { 00227 result += decoded.toUtf8(); 00228 onlySpacesSinceLastWord = true; 00229 spaceBuffer.clear(); 00230 } else { 00231 if ( onlySpacesSinceLastWord ) { 00232 result += spaceBuffer; 00233 onlySpacesSinceLastWord = false; 00234 } 00235 result += '='; 00236 scursor = start; // reset cursor after parsing failure 00237 } 00238 continue; 00239 } else { 00240 // unencoded data 00241 if ( onlySpacesSinceLastWord ) { 00242 result += spaceBuffer; 00243 onlySpacesSinceLastWord = false; 00244 } 00245 result += *scursor; 00246 ++scursor; 00247 } 00248 } 00249 // If there are any chars that couldn't be decoded in UTF-8, 00250 // use the fallback charset if it exists 00251 const QString tryUtf8 = QString::fromUtf8( result ); 00252 if ( tryUtf8.contains( 0xFFFD ) && !f_allbackCharEnc.isEmpty() ) { 00253 QTextCodec* codec = KGlobal::charsets()->codecForName( f_allbackCharEnc ); 00254 return codec->toUnicode( result ); 00255 } else { 00256 return tryUtf8; 00257 } 00258 } 00259 00260 QString decodeRFC2047String( const QByteArray &src ) 00261 { 00262 QByteArray usedCS; 00263 return decodeRFC2047String( src, usedCS, "utf-8", false ); 00264 } 00265 00266 static const char *reservedCharacters = "\"()<>@,.;:\\[]="; 00267 00268 QByteArray encodeRFC2047String( const QString &src, const QByteArray &charset, 00269 bool addressHeader, bool allow8BitHeaders ) 00270 { 00271 QByteArray result; 00272 int start=0, end=0; 00273 bool nonAscii=false, ok=true, useQEncoding=false; 00274 00275 // fromLatin1() is safe here, codecForName() uses toLatin1() internally 00276 const QTextCodec *codec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ), ok ); 00277 00278 QByteArray usedCS; 00279 if ( !ok ) { 00280 //no codec available => try local8Bit and hope the best ;-) 00281 usedCS = KGlobal::locale()->encoding(); 00282 codec = KGlobal::charsets()->codecForName( QString::fromLatin1( usedCS ), ok ); 00283 } 00284 else { 00285 Q_ASSERT( codec ); 00286 if ( charset.isEmpty() ) 00287 usedCS = codec->name(); 00288 else 00289 usedCS = charset; 00290 } 00291 00292 QTextCodec::ConverterState converterState( QTextCodec::IgnoreHeader ); 00293 QByteArray encoded8Bit = codec->fromUnicode( src.constData(), src.length(), &converterState ); 00294 if ( converterState.invalidChars > 0 ) { 00295 usedCS = "utf-8"; 00296 codec = QTextCodec::codecForName( usedCS ); 00297 encoded8Bit = codec->fromUnicode( src ); 00298 } 00299 00300 if ( usedCS.contains( "8859-" ) ) { // use "B"-Encoding for non iso-8859-x charsets 00301 useQEncoding = true; 00302 } 00303 00304 if ( allow8BitHeaders ) { 00305 return encoded8Bit; 00306 } 00307 00308 uint encoded8BitLength = encoded8Bit.length(); 00309 for ( unsigned int i=0; i<encoded8BitLength; i++ ) { 00310 if ( encoded8Bit[i] == ' ' ) { // encoding starts at word boundaries 00311 start = i + 1; 00312 } 00313 00314 // encode escape character, for japanese encodings... 00315 if ( ( (signed char)encoded8Bit[i] < 0 ) || ( encoded8Bit[i] == '\033' ) || 00316 ( addressHeader && ( strchr( "\"()<>@,.;:\\[]=", encoded8Bit[i] ) != 0 ) ) ) { 00317 end = start; // non us-ascii char found, now we determine where to stop encoding 00318 nonAscii = true; 00319 break; 00320 } 00321 } 00322 00323 if ( nonAscii ) { 00324 while ( ( end < encoded8Bit.length() ) && ( encoded8Bit[end] != ' ' ) ) { 00325 // we encode complete words 00326 end++; 00327 } 00328 00329 for ( int x=end; x<encoded8Bit.length(); x++ ) { 00330 if ( ( (signed char)encoded8Bit[x]<0) || ( encoded8Bit[x] == '\033' ) || 00331 ( addressHeader && ( strchr(reservedCharacters, encoded8Bit[x]) != 0 ) ) ) { 00332 end = x; // we found another non-ascii word 00333 00334 while ( ( end < encoded8Bit.length() ) && ( encoded8Bit[end] != ' ' ) ) { 00335 // we encode complete words 00336 end++; 00337 } 00338 } 00339 } 00340 00341 result = encoded8Bit.left( start ) + "=?" + usedCS; 00342 00343 if ( useQEncoding ) { 00344 result += "?Q?"; 00345 00346 char c, hexcode;// "Q"-encoding implementation described in RFC 2047 00347 for ( int i=start; i<end; i++ ) { 00348 c = encoded8Bit[i]; 00349 if ( c == ' ' ) { // make the result readable with not MIME-capable readers 00350 result += '_'; 00351 } else { 00352 if ( ( ( c >= 'a' ) && ( c <= 'z' ) ) || // paranoid mode, encode *all* special chars to avoid problems 00353 ( ( c >= 'A' ) && ( c <= 'Z' ) ) || // with "From" & "To" headers 00354 ( ( c >= '0' ) && ( c <= '9' ) ) ) { 00355 result += c; 00356 } else { 00357 result += '='; // "stolen" from KMail ;-) 00358 hexcode = ((c & 0xF0) >> 4) + 48; 00359 if ( hexcode >= 58 ) { 00360 hexcode += 7; 00361 } 00362 result += hexcode; 00363 hexcode = (c & 0x0F) + 48; 00364 if ( hexcode >= 58 ) { 00365 hexcode += 7; 00366 } 00367 result += hexcode; 00368 } 00369 } 00370 } 00371 } else { 00372 result += "?B?" + encoded8Bit.mid( start, end - start ).toBase64(); 00373 } 00374 00375 result +="?="; 00376 result += encoded8Bit.right( encoded8Bit.length() - end ); 00377 } else { 00378 result = encoded8Bit; 00379 } 00380 00381 return result; 00382 } 00383 00384 QByteArray encodeRFC2047Sentence(const QString& src, const QByteArray& charset ) 00385 { 00386 QByteArray result; 00387 QList<QChar> splitChars; 00388 splitChars << QLatin1Char(',') << QLatin1Char('\"') << QLatin1Char(';') << QLatin1Char('\\'); 00389 const QChar *ch = src.constData(); 00390 const int length = src.length(); 00391 int pos = 0; 00392 int wordStart = 0; 00393 00394 //qDebug() << "Input:" << src; 00395 // Loop over all characters of the string. 00396 // When encountering a split character, RFC-2047-encode the word before it, and add it to the result. 00397 while (pos < length) { 00398 //qDebug() << "Pos:" << pos << "Result:" << result << "Char:" << ch->toAscii(); 00399 const bool isAscii = ch->unicode() < 127; 00400 const bool isReserved = (strchr( reservedCharacters, ch->toAscii() ) != 0); 00401 if ( isAscii && isReserved ) { 00402 const int wordSize = pos - wordStart; 00403 if (wordSize > 0) { 00404 const QString word = src.mid( wordStart, wordSize ); 00405 result += encodeRFC2047String( word, charset ); 00406 } 00407 00408 result += ch->toAscii(); 00409 wordStart = pos + 1; 00410 } 00411 ch++; 00412 pos++; 00413 } 00414 00415 // Encode the last word 00416 const int wordSize = pos - wordStart; 00417 if (wordSize > 0) { 00418 const QString word = src.mid( wordStart, pos - wordStart ); 00419 result += encodeRFC2047String( word, charset ); 00420 } 00421 00422 return result; 00423 } 00424 00425 00426 00427 //----------------------------------------------------------------------------- 00428 QByteArray encodeRFC2231String( const QString& str, const QByteArray& charset ) 00429 { 00430 if ( str.isEmpty() ) 00431 return QByteArray(); 00432 00433 00434 const QTextCodec *codec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ) ); 00435 QByteArray latin; 00436 if ( charset == "us-ascii" ) 00437 latin = str.toAscii(); 00438 else if ( codec ) 00439 latin = codec->fromUnicode( str ); 00440 else 00441 latin = str.toLocal8Bit(); 00442 00443 char *l; 00444 for ( l = latin.data(); *l; ++l ) { 00445 if ( ( ( *l & 0xE0 ) == 0 ) || ( *l & 0x80 ) ) 00446 // *l is control character or 8-bit char 00447 break; 00448 } 00449 if ( !*l ) 00450 return latin; 00451 00452 QByteArray result = charset + "''"; 00453 for ( l = latin.data(); *l; ++l ) { 00454 bool needsQuoting = ( *l & 0x80 ) || ( *l == '%' ); 00455 if( !needsQuoting ) { 00456 const QByteArray especials = "()<>@,;:\"/[]?.= \033"; 00457 int len = especials.length(); 00458 for ( int i = 0; i < len; i++ ) 00459 if ( *l == especials[i] ) { 00460 needsQuoting = true; 00461 break; 00462 } 00463 } 00464 if ( needsQuoting ) { 00465 result += '%'; 00466 unsigned char hexcode; 00467 hexcode = ( ( *l & 0xF0 ) >> 4 ) + 48; 00468 if ( hexcode >= 58 ) 00469 hexcode += 7; 00470 result += hexcode; 00471 hexcode = ( *l & 0x0F ) + 48; 00472 if ( hexcode >= 58 ) 00473 hexcode += 7; 00474 result += hexcode; 00475 } else { 00476 result += *l; 00477 } 00478 } 00479 return result; 00480 } 00481 00482 00483 //----------------------------------------------------------------------------- 00484 QString decodeRFC2231String( const QByteArray &str, QByteArray &usedCS, const QByteArray &defaultCS, 00485 bool forceCS ) 00486 { 00487 int p = str.indexOf('\''); 00488 if (p < 0) return KGlobal::charsets()->codecForName( QString::fromLatin1( defaultCS ))->toUnicode( str ); 00489 00490 00491 QByteArray charset = str.left(p); 00492 00493 QByteArray st = str.mid( str.lastIndexOf('\'') + 1 ); 00494 00495 char ch, ch2; 00496 p = 0; 00497 while (p < (int)st.length()) 00498 { 00499 if (st.at(p) == 37) 00500 { 00501 // Only try to decode the percent-encoded character if the percent sign 00502 // is really followed by two other characters, see testcase at bug 163024 00503 if ( p + 2 < st.length() ) { 00504 ch = st.at(p+1) - 48; 00505 if (ch > 16) 00506 ch -= 7; 00507 ch2 = st.at(p+2) - 48; 00508 if (ch2 > 16) 00509 ch2 -= 7; 00510 st[p] = ch * 16 + ch2; 00511 st.remove( p+1, 2 ); 00512 } 00513 } 00514 p++; 00515 } 00516 kDebug() << "Got pre-decoded:" << st; 00517 QString result; 00518 const QTextCodec * charsetcodec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ) ); 00519 if ( !charsetcodec || forceCS ) 00520 charsetcodec = KGlobal::charsets()->codecForName( QString::fromLatin1( defaultCS ) ); 00521 00522 usedCS = charsetcodec->name(); 00523 return charsetcodec->toUnicode( st ); 00524 } 00525 00526 QString decodeRFC2231String( const QByteArray &src ) 00527 { 00528 QByteArray usedCS; 00529 return decodeRFC2231String( src, usedCS, "utf-8", false ); 00530 } 00531 00532 QByteArray uniqueString() 00533 { 00534 static char chars[] = "0123456789abcdefghijklmnopqrstuvxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; 00535 time_t now; 00536 char p[11]; 00537 int pos, ran; 00538 unsigned int timeval; 00539 00540 p[10] = '\0'; 00541 now = time( 0 ); 00542 ran = 1 + (int)(1000.0*rand() / (RAND_MAX + 1.0)); 00543 timeval = (now / ran) + getpid(); 00544 00545 for ( int i=0; i<10; i++ ) { 00546 pos = (int) (61.0*rand() / (RAND_MAX + 1.0)); 00547 //kDebug() << pos; 00548 p[i] = chars[pos]; 00549 } 00550 00551 QByteArray ret; 00552 ret.setNum( timeval ); 00553 ret += '.'; 00554 ret += p; 00555 00556 return ret; 00557 } 00558 00559 QByteArray multiPartBoundary() 00560 { 00561 return "nextPart" + uniqueString(); 00562 } 00563 00564 QByteArray unfoldHeader( const QByteArray &header ) 00565 { 00566 QByteArray result; 00567 if ( header.isEmpty() ) { 00568 return result; 00569 } 00570 00571 int pos = 0, foldBegin = 0, foldMid = 0, foldEnd = 0; 00572 while ( ( foldMid = header.indexOf( '\n', pos ) ) >= 0 ) { 00573 foldBegin = foldEnd = foldMid; 00574 // find the first space before the line-break 00575 while ( foldBegin > 0 ) { 00576 if ( !QChar::fromLatin1( header[foldBegin - 1] ).isSpace() ) { 00577 break; 00578 } 00579 --foldBegin; 00580 } 00581 // find the first non-space after the line-break 00582 while ( foldEnd <= header.length() - 1 ) { 00583 if ( QChar::fromLatin1( header[foldEnd] ).isSpace() ) { 00584 ++foldEnd; 00585 } 00586 else if ( foldEnd > 0 && header[foldEnd - 1] == '\n' && 00587 header[foldEnd] == '=' && foldEnd + 2 < header.length() && 00588 ( ( header[foldEnd + 1] == '0' && 00589 header[foldEnd + 2] == '9' ) || 00590 ( header[foldEnd + 1] == '2' && 00591 header[foldEnd + 2] == '0' ) ) ) { 00592 // bug #86302: malformed header continuation starting with =09/=20 00593 foldEnd += 3; 00594 } 00595 else { 00596 break; 00597 } 00598 } 00599 00600 result += header.mid( pos, foldBegin - pos ); 00601 if ( foldEnd < header.length() -1 ) 00602 result += ' '; 00603 pos = foldEnd; 00604 } 00605 const int len = header.length(); 00606 if ( len > pos ) { 00607 result += header.mid( pos, len - pos ); 00608 } 00609 return result; 00610 } 00611 00612 int findHeaderLineEnd( const QByteArray &src, int &dataBegin, bool *folded ) 00613 { 00614 int end = dataBegin; 00615 int len = src.length() - 1; 00616 00617 if ( folded ) 00618 *folded = false; 00619 00620 if ( dataBegin < 0 ) { 00621 // Not found 00622 return -1; 00623 } 00624 00625 if ( dataBegin > len ) { 00626 // No data available 00627 return len + 1; 00628 } 00629 00630 // If the first line contains nothing, but the next line starts with a space 00631 // or a tab, that means a stupid mail client has made the first header field line 00632 // entirely empty, and has folded the rest to the next line(s). 00633 if ( src.at(end) == '\n' && end + 1 < len && 00634 ( src[end+1] == ' ' || src[end+1] == '\t' ) ) { 00635 00636 // Skip \n and first whitespace 00637 dataBegin += 2; 00638 end += 2; 00639 } 00640 00641 if ( src.at(end) != '\n' ) { // check if the header is not empty 00642 while ( true ) { 00643 end = src.indexOf( '\n', end + 1 ); 00644 if ( end == -1 || end == len ) { 00645 // end of string 00646 break; 00647 } 00648 else if ( src[end+1] == ' ' || src[end+1] == '\t' || 00649 ( src[end+1] == '=' && end+3 <= len && 00650 ( ( src[end+2] == '0' && src[end+3] == '9' ) || 00651 ( src[end+2] == '2' && src[end+3] == '0' ) ) ) ) { 00652 // next line is header continuation or starts with =09/=20 (bug #86302) 00653 if ( folded ) 00654 *folded = true; 00655 } else { 00656 // end of header (no header continuation) 00657 break; 00658 } 00659 } 00660 } 00661 00662 if ( end < 0 ) { 00663 end = len + 1; //take the rest of the string 00664 } 00665 return end; 00666 } 00667 00668 int indexOfHeader( const QByteArray &src, const QByteArray &name, int &end, int &dataBegin, bool *folded ) 00669 { 00670 QByteArray n = name; 00671 n.append( ':' ); 00672 int begin = -1; 00673 00674 if ( qstrnicmp( n.constData(), src.constData(), n.length() ) == 0 ) { 00675 begin = 0; 00676 } else { 00677 n.prepend('\n'); 00678 const char *p = strcasestr( src.constData(), n.constData() ); 00679 if ( !p ) { 00680 begin = -1; 00681 } else { 00682 begin = p - src.constData(); 00683 ++begin; 00684 } 00685 } 00686 00687 if ( begin > -1) { //there is a header with the given name 00688 dataBegin = begin + name.length() + 1; //skip the name 00689 // skip the usual space after the colon 00690 if ( src.at( dataBegin ) == ' ' ) { 00691 ++dataBegin; 00692 } 00693 end = findHeaderLineEnd( src, dataBegin, folded ); 00694 return begin; 00695 00696 } else { 00697 end = -1; 00698 dataBegin = -1; 00699 return -1; //header not found 00700 } 00701 } 00702 00703 QByteArray extractHeader( const QByteArray &src, const QByteArray &name ) 00704 { 00705 int begin, end; 00706 bool folded; 00707 QByteArray result; 00708 00709 if ( src.isEmpty() || indexOfHeader( src, name, end, begin, &folded ) < 0 ) { 00710 return result; 00711 } 00712 00713 if ( begin >= 0 ) { 00714 if ( !folded ) { 00715 result = src.mid( begin, end - begin ); 00716 } else { 00717 if ( end > begin ) { 00718 QByteArray hdrValue = src.mid( begin, end - begin ); 00719 result = unfoldHeader( hdrValue ); 00720 } 00721 } 00722 } 00723 return result; 00724 } 00725 00726 QList<QByteArray> extractHeaders( const QByteArray &src, const QByteArray &name ) 00727 { 00728 int begin, end; 00729 bool folded; 00730 QList<QByteArray> result; 00731 QByteArray copySrc( src ); 00732 00733 if ( indexOfHeader( copySrc, name, end, begin, &folded ) < 0 ) { 00734 return result; 00735 } 00736 00737 while ( begin >= 0 ) { 00738 if ( !folded ) { 00739 result.append( copySrc.mid( begin, end - begin ) ); 00740 } else { 00741 QByteArray hdrValue = copySrc.mid( begin, end - begin ); 00742 result.append( unfoldHeader( hdrValue ) ); 00743 } 00744 00745 // get the next one, a tiny bit ugly, but we don't want the previous to be found again... 00746 copySrc = copySrc.mid( end ); 00747 if ( indexOfHeader( copySrc, name, end, begin, &folded ) < 0 ) { 00748 break; 00749 } 00750 } 00751 00752 return result; 00753 } 00754 00755 void removeHeader( QByteArray &header, const QByteArray &name ) 00756 { 00757 int begin, end, dummy; 00758 begin = indexOfHeader( header, name, end, dummy ); 00759 if ( begin >= 0 ) { 00760 header.remove( begin, end - begin + 1 ); 00761 } 00762 } 00763 00764 QByteArray CRLFtoLF( const QByteArray &s ) 00765 { 00766 QByteArray ret = s; 00767 ret.replace( "\r\n", "\n" ); 00768 return ret; 00769 } 00770 00771 QByteArray CRLFtoLF( const char *s ) 00772 { 00773 QByteArray ret = s; 00774 return CRLFtoLF( ret ); 00775 } 00776 00777 QByteArray LFtoCRLF( const QByteArray &s ) 00778 { 00779 QByteArray ret = s; 00780 ret.replace( '\n', "\r\n" ); 00781 return ret; 00782 } 00783 00784 QByteArray LFtoCRLF( const char *s ) 00785 { 00786 QByteArray ret = s; 00787 return LFtoCRLF( ret ); 00788 } 00789 00790 namespace { 00791 template < typename StringType, typename CharType > void removeQuotesGeneric( StringType & str ) 00792 { 00793 bool inQuote = false; 00794 for ( int i = 0; i < str.length(); ++i ) { 00795 if ( str[i] == CharType( '"' ) ) { 00796 str.remove( i, 1 ); 00797 i--; 00798 inQuote = !inQuote; 00799 } else { 00800 if ( inQuote && ( str[i] == CharType( '\\' ) ) ) { 00801 str.remove( i, 1 ); 00802 } 00803 } 00804 } 00805 } 00806 } 00807 00808 void removeQuots( QByteArray &str ) 00809 { 00810 removeQuotesGeneric<QByteArray,char>( str ); 00811 } 00812 00813 void removeQuots( QString &str ) 00814 { 00815 removeQuotesGeneric<QString,QLatin1Char>( str ); 00816 } 00817 00818 template<class StringType,class CharType,class CharConverterType,class StringConverterType,class ToString> 00819 void addQuotes_impl( StringType &str, bool forceQuotes ) 00820 { 00821 bool needsQuotes=false; 00822 for ( int i=0; i < str.length(); i++ ) { 00823 const CharType cur = str.at( i ); 00824 if ( QString( ToString( str ) ).contains( QRegExp( QLatin1String( "\"|\\\\|=|\\]|\\[|:|;|,|\\.|,|@|<|>|\\)|\\(" ) ) ) ) { 00825 needsQuotes = true; 00826 } 00827 if ( cur == CharConverterType( '\\' ) || cur == CharConverterType( '\"' ) ) { 00828 str.insert( i, CharConverterType( '\\' ) ); 00829 i++; 00830 } 00831 } 00832 00833 if ( needsQuotes || forceQuotes ) { 00834 str.insert( 0, CharConverterType( '\"' ) ); 00835 str.append( StringConverterType( "\"" ) ); 00836 } 00837 } 00838 00839 void addQuotes( QByteArray &str, bool forceQuotes ) 00840 { 00841 addQuotes_impl<QByteArray,char,char,char*,QLatin1String>( str, forceQuotes ); 00842 } 00843 00844 void addQuotes( QString &str, bool forceQuotes ) 00845 { 00846 addQuotes_impl<QString,QChar,QLatin1Char,QLatin1String,QString>( str, forceQuotes ); 00847 } 00848 00849 KMIME_EXPORT QString balanceBidiState( const QString &input ) 00850 { 00851 const int LRO = 0x202D; 00852 const int RLO = 0x202E; 00853 const int LRE = 0x202A; 00854 const int RLE = 0x202B; 00855 const int PDF = 0x202C; 00856 00857 QString result = input; 00858 00859 int openDirChangers = 0; 00860 int numPDFsRemoved = 0; 00861 for ( int i = 0; i < input.length(); i++ ) { 00862 const ushort &code = input.at( i ).unicode(); 00863 if ( code == LRO || code == RLO || code == LRE || code == RLE ) { 00864 openDirChangers++; 00865 } 00866 else if ( code == PDF ) { 00867 if ( openDirChangers > 0 ) { 00868 openDirChangers--; 00869 } 00870 else { 00871 // One PDF too much, remove it 00872 kWarning() << "Possible Unicode spoofing (unexpected PDF) detected in" << input; 00873 result.remove( i - numPDFsRemoved, 1 ); 00874 numPDFsRemoved++; 00875 } 00876 } 00877 } 00878 00879 if ( openDirChangers > 0 ) { 00880 kWarning() << "Possible Unicode spoofing detected in" << input; 00881 00882 // At PDF chars to the end until the correct state is restored. 00883 // As a special exception, when encountering quoted strings, place the PDF before 00884 // the last quote. 00885 for ( int i = openDirChangers; i > 0; i-- ) { 00886 if ( result.endsWith( QLatin1Char( '"' ) ) ) 00887 result.insert( result.length() - 1, QChar( PDF ) ); 00888 else 00889 result += QChar( PDF ); 00890 } 00891 } 00892 00893 return result; 00894 } 00895 00896 QString removeBidiControlChars( const QString &input ) 00897 { 00898 const int LRO = 0x202D; 00899 const int RLO = 0x202E; 00900 const int LRE = 0x202A; 00901 const int RLE = 0x202B; 00902 QString result = input; 00903 result.remove( LRO ); 00904 result.remove( RLO ); 00905 result.remove( LRE ); 00906 result.remove( RLE ); 00907 return result; 00908 } 00909 00910 static bool isCryptoPart( Content* content ) 00911 { 00912 if( !content->contentType( false ) ) 00913 return false; 00914 00915 if( content->contentType()->subType().toLower() == "octet-stream" && 00916 !content->contentDisposition( false ) ) 00917 return false; 00918 00919 const Headers::ContentType *contentType = content->contentType(); 00920 const QByteArray lowerSubType = contentType->subType().toLower(); 00921 return ( contentType->mediaType().toLower() == "application" && 00922 ( lowerSubType == "pgp-encrypted" || 00923 lowerSubType == "pgp-signature" || 00924 lowerSubType == "pkcs7-mime" || 00925 lowerSubType == "pkcs7-signature" || 00926 lowerSubType == "x-pkcs7-signature" || 00927 ( lowerSubType == "octet-stream" && 00928 content->contentDisposition()->filename().toLower() == QLatin1String( "msg.asc" ) ) ) ); 00929 } 00930 00931 bool hasAttachment( Content* content ) 00932 { 00933 if( !content ) 00934 return false; 00935 00936 bool emptyFilename = true; 00937 if( content->contentDisposition( false ) && !content->contentDisposition()->filename().isEmpty() ) 00938 emptyFilename = false; 00939 00940 if( emptyFilename && content->contentType( false ) && !content->contentType()->name().isEmpty() ) 00941 emptyFilename = false; 00942 00943 // ignore crypto parts 00944 if( !emptyFilename && !isCryptoPart( content ) ) 00945 return true; 00946 00947 // Ok, content itself is not an attachment. now we deal with multiparts 00948 if( content->contentType()->isMultipart() ) { 00949 Q_FOREACH( Content* child, content->contents() ) { 00950 if( hasAttachment( child ) ) 00951 return true; 00952 } 00953 } 00954 00955 return false; 00956 } 00957 00958 bool isSigned( Message *message ) 00959 { 00960 if ( !message ) 00961 return false; 00962 00963 const KMime::Headers::ContentType* const contentType = message->contentType(); 00964 if ( contentType->isSubtype( "signed" ) || 00965 contentType->isSubtype( "pgp-signature" ) || 00966 contentType->isSubtype( "pkcs7-signature" ) || 00967 contentType->isSubtype( "x-pkcs7-signature" ) || 00968 message->mainBodyPart( "multipart/signed" ) || 00969 message->mainBodyPart( "application/pgp-signature" ) || 00970 message->mainBodyPart( "application/pkcs7-signature" ) || 00971 message->mainBodyPart( "application/x-pkcs7-signature" ) ) { 00972 return true; 00973 } 00974 00975 return false; 00976 } 00977 00978 bool isEncrypted( Message *message ) 00979 { 00980 if ( !message ) 00981 return false; 00982 00983 const KMime::Headers::ContentType* const contentType = message->contentType(); 00984 if ( contentType->isSubtype( "encrypted" ) || 00985 contentType->isSubtype( "pgp-encrypted" ) || 00986 contentType->isSubtype( "pkcs7-mime" ) || 00987 message->mainBodyPart( "multipart/encrypted" ) || 00988 message->mainBodyPart( "application/pgp-encrypted" ) || 00989 message->mainBodyPart( "application/pkcs7-mime" ) ) { 00990 return true; 00991 } 00992 00993 return false; 00994 } 00995 00996 bool isInvitation( Content *content ) 00997 { 00998 if ( !content ) 00999 return false; 01000 01001 const KMime::Headers::ContentType* const contentType = content->contentType( false ); 01002 01003 if ( contentType && contentType->isMediatype( "text" ) && contentType->isSubtype( "calendar" ) ) 01004 return true; 01005 01006 return false; 01007 } 01008 01009 } // namespace KMime
This file is part of the KDE documentation.
Documentation copyright © 1996-2012 The KDE developers.
Generated on Mon May 14 2012 04:41:33 by doxygen 1.7.5 written by Dimitri van Heesch, © 1997-2006
Documentation copyright © 1996-2012 The KDE developers.
Generated on Mon May 14 2012 04:41:33 by doxygen 1.7.5 written by Dimitri van Heesch, © 1997-2006
KDE's Doxygen guidelines are available online.