KIMAP Library
rfccodecs.cpp
Go to the documentation of this file.
00001 /********************************************************************** 00002 * 00003 * rfccodecs.cpp - handler for various rfc/mime encodings 00004 * Copyright (C) 2000 s.carstens@gmx.de 00005 * 00006 * This library is free software; you can redistribute it and/or 00007 * modify it under the terms of the GNU Library General Public 00008 * License as published by the Free Software Foundation; either 00009 * version 2 of the License, or (at your option) any later version. 00010 * 00011 * This library is distributed in the hope that it will be useful, 00012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00014 * Library General Public License for more details. 00015 * 00016 * You should have received a copy of the GNU Library General Public License 00017 * along with this library; see the file COPYING.LIB. If not, write to 00018 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 00019 * Boston, MA 02110-1301, USA. 00020 * 00021 *********************************************************************/ 00033 #include "rfccodecs.h" 00034 00035 #include <ctype.h> 00036 #include <sys/types.h> 00037 00038 #include <stdio.h> 00039 #include <stdlib.h> 00040 00041 #include <QtCore/QTextCodec> 00042 #include <QtCore/QBuffer> 00043 #include <QtCore/QRegExp> 00044 #include <QtCore/QByteArray> 00045 #include <QtCore/QLatin1Char> 00046 #include <kcodecs.h> 00047 00048 using namespace KIMAP; 00049 00050 // This part taken from rfc 2192 IMAP URL Scheme. C. Newman. September 1997. 00051 // adapted to QT-Toolkit by Sven Carstens <s.carstens@gmx.de> 2000 00052 00053 //@cond PRIVATE 00054 static const unsigned char base64chars[] = 00055 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,"; 00056 #define UNDEFINED 64 00057 #define MAXLINE 76 00058 static const char especials[17] = "()<>@,;:\"/[]?.= "; 00059 00060 /* UTF16 definitions */ 00061 #define UTF16MASK 0x03FFUL 00062 #define UTF16SHIFT 10 00063 #define UTF16BASE 0x10000UL 00064 #define UTF16HIGHSTART 0xD800UL 00065 #define UTF16HIGHEND 0xDBFFUL 00066 #define UTF16LOSTART 0xDC00UL 00067 #define UTF16LOEND 0xDFFFUL 00068 //@endcond 00069 00070 //----------------------------------------------------------------------------- 00071 QByteArray KIMAP::decodeImapFolderName( const QByteArray &inSrc ) 00072 { 00073 unsigned char c, i, bitcount; 00074 unsigned long ucs4, utf16, bitbuf; 00075 unsigned char base64[256], utf8[6]; 00076 unsigned int srcPtr = 0; 00077 QByteArray dst; 00078 QByteArray src = inSrc; 00079 uint srcLen = inSrc.length(); 00080 00081 /* initialize modified base64 decoding table */ 00082 memset( base64, UNDEFINED, sizeof( base64 ) ); 00083 for ( i = 0; i < sizeof( base64chars ); ++i ) { 00084 base64[(int)base64chars[i]] = i; 00085 } 00086 00087 /* loop until end of string */ 00088 while ( srcPtr < srcLen ) { 00089 c = src[srcPtr++]; 00090 /* deal with literal characters and &- */ 00091 if ( c != '&' || src[srcPtr] == '-' ) { 00092 /* encode literally */ 00093 dst += c; 00094 /* skip over the '-' if this is an &- sequence */ 00095 if ( c == '&' ) { 00096 srcPtr++; 00097 } 00098 } else { 00099 /* convert modified UTF-7 -> UTF-16 -> UCS-4 -> UTF-8 -> HEX */ 00100 bitbuf = 0; 00101 bitcount = 0; 00102 ucs4 = 0; 00103 while ( ( c = base64[(unsigned char)src[srcPtr]] ) != UNDEFINED ) { 00104 ++srcPtr; 00105 bitbuf = ( bitbuf << 6 ) | c; 00106 bitcount += 6; 00107 /* enough bits for a UTF-16 character? */ 00108 if ( bitcount >= 16 ) { 00109 bitcount -= 16; 00110 utf16 = ( bitcount ? bitbuf >> bitcount : bitbuf ) & 0xffff; 00111 /* convert UTF16 to UCS4 */ 00112 if ( utf16 >= UTF16HIGHSTART && utf16 <= UTF16HIGHEND ) { 00113 ucs4 = ( utf16 - UTF16HIGHSTART ) << UTF16SHIFT; 00114 continue; 00115 } else if ( utf16 >= UTF16LOSTART && utf16 <= UTF16LOEND ) { 00116 ucs4 += utf16 - UTF16LOSTART + UTF16BASE; 00117 } else { 00118 ucs4 = utf16; 00119 } 00120 /* convert UTF-16 range of UCS4 to UTF-8 */ 00121 if ( ucs4 <= 0x7fUL ) { 00122 utf8[0] = ucs4; 00123 i = 1; 00124 } else if ( ucs4 <= 0x7ffUL ) { 00125 utf8[0] = 0xc0 | ( ucs4 >> 6 ); 00126 utf8[1] = 0x80 | ( ucs4 & 0x3f ); 00127 i = 2; 00128 } else if ( ucs4 <= 0xffffUL ) { 00129 utf8[0] = 0xe0 | ( ucs4 >> 12 ); 00130 utf8[1] = 0x80 | ( ( ucs4 >> 6 ) & 0x3f ); 00131 utf8[2] = 0x80 | ( ucs4 & 0x3f ); 00132 i = 3; 00133 } else { 00134 utf8[0] = 0xf0 | ( ucs4 >> 18 ); 00135 utf8[1] = 0x80 | ( ( ucs4 >> 12 ) & 0x3f ); 00136 utf8[2] = 0x80 | ( ( ucs4 >> 6 ) & 0x3f ); 00137 utf8[3] = 0x80 | ( ucs4 & 0x3f ); 00138 i = 4; 00139 } 00140 /* copy it */ 00141 for ( c = 0; c < i; ++c ) { 00142 dst += utf8[c]; 00143 } 00144 } 00145 } 00146 /* skip over trailing '-' in modified UTF-7 encoding */ 00147 if ( src[srcPtr] == '-' ) { 00148 ++srcPtr; 00149 } 00150 } 00151 } 00152 return dst; 00153 } 00154 00155 QString KIMAP::decodeImapFolderName( const QString &inSrc ) 00156 { 00157 return QString::fromUtf8( decodeImapFolderName( inSrc.toUtf8() ).data() ); 00158 } 00159 00160 //----------------------------------------------------------------------------- 00161 00162 QByteArray KIMAP::quoteIMAP( const QByteArray &src ) 00163 { 00164 uint len = src.length(); 00165 QByteArray result; 00166 result.reserve( 2 * len ); 00167 for ( unsigned int i = 0; i < len; i++ ) { 00168 if ( src[i] == '"' || src[i] == '\\' ) { 00169 result += '\\'; 00170 } 00171 result += src[i]; 00172 } 00173 result.squeeze(); 00174 return result; 00175 } 00176 00177 QString KIMAP::quoteIMAP( const QString &src ) 00178 { 00179 uint len = src.length(); 00180 QString result; 00181 result.reserve( 2 * len ); 00182 for ( unsigned int i = 0; i < len; i++ ) { 00183 if ( src[i] == '"' || src[i] == '\\' ) { 00184 result += '\\'; 00185 } 00186 result += src[i]; 00187 } 00188 //result.squeeze(); - unnecessary and slow 00189 return result; 00190 } 00191 00192 //----------------------------------------------------------------------------- 00193 QString KIMAP::encodeImapFolderName( const QString &inSrc ) 00194 { 00195 return QString::fromUtf8( encodeImapFolderName( inSrc.toUtf8() ).data() ); 00196 } 00197 00198 QByteArray KIMAP::encodeImapFolderName( const QByteArray &inSrc ) 00199 { 00200 unsigned int utf8pos, utf8total, c, utf7mode, bitstogo, utf16flag; 00201 unsigned int ucs4, bitbuf; 00202 QByteArray src = inSrc; 00203 QByteArray dst; 00204 00205 int srcPtr = 0; 00206 utf7mode = 0; 00207 utf8total = 0; 00208 bitstogo = 0; 00209 utf8pos = 0; 00210 bitbuf = 0; 00211 ucs4 = 0; 00212 while ( srcPtr < src.length () ) { 00213 c = (unsigned char)src[srcPtr++]; 00214 /* normal character? */ 00215 if ( c >= ' ' && c <= '~' ) { 00216 /* switch out of UTF-7 mode */ 00217 if ( utf7mode ) { 00218 if ( bitstogo ) { 00219 dst += base64chars[( bitbuf << ( 6 - bitstogo ) ) & 0x3F]; 00220 bitstogo = 0; 00221 } 00222 dst += '-'; 00223 utf7mode = 0; 00224 } 00225 dst += c; 00226 /* encode '&' as '&-' */ 00227 if ( c == '&' ) { 00228 dst += '-'; 00229 } 00230 continue; 00231 } 00232 /* switch to UTF-7 mode */ 00233 if ( !utf7mode ) { 00234 dst += '&'; 00235 utf7mode = 1; 00236 } 00237 /* Encode US-ASCII characters as themselves */ 00238 if ( c < 0x80 ) { 00239 ucs4 = c; 00240 utf8total = 1; 00241 } else if ( utf8total ) { 00242 /* save UTF8 bits into UCS4 */ 00243 ucs4 = ( ucs4 << 6 ) | ( c & 0x3FUL ); 00244 if ( ++utf8pos < utf8total ) { 00245 continue; 00246 } 00247 } else { 00248 utf8pos = 1; 00249 if ( c < 0xE0 ) { 00250 utf8total = 2; 00251 ucs4 = c & 0x1F; 00252 } else if ( c < 0xF0 ) { 00253 utf8total = 3; 00254 ucs4 = c & 0x0F; 00255 } else { 00256 /* NOTE: can't convert UTF8 sequences longer than 4 */ 00257 utf8total = 4; 00258 ucs4 = c & 0x03; 00259 } 00260 continue; 00261 } 00262 /* loop to split ucs4 into two utf16 chars if necessary */ 00263 utf8total = 0; 00264 do 00265 { 00266 if ( ucs4 >= UTF16BASE ) { 00267 ucs4 -= UTF16BASE; 00268 bitbuf = 00269 ( bitbuf << 16 ) | ( ( ucs4 >> UTF16SHIFT ) + UTF16HIGHSTART ); 00270 ucs4 = ( ucs4 & UTF16MASK ) + UTF16LOSTART; 00271 utf16flag = 1; 00272 } else { 00273 bitbuf = ( bitbuf << 16 ) | ucs4; 00274 utf16flag = 0; 00275 } 00276 bitstogo += 16; 00277 /* spew out base64 */ 00278 while ( bitstogo >= 6 ) { 00279 bitstogo -= 6; 00280 dst += 00281 base64chars[( bitstogo ? ( bitbuf >> bitstogo ) : bitbuf ) & 0x3F]; 00282 } 00283 } 00284 while ( utf16flag ); 00285 } 00286 /* if in UTF-7 mode, finish in ASCII */ 00287 if ( utf7mode ) { 00288 if ( bitstogo ) { 00289 dst += base64chars[( bitbuf << ( 6 - bitstogo ) ) & 0x3F]; 00290 } 00291 dst += '-'; 00292 } 00293 return quoteIMAP( dst ); 00294 } 00295 00296 //----------------------------------------------------------------------------- 00297 QTextCodec *KIMAP::codecForName( const QString &str ) 00298 { 00299 if ( str.isEmpty () ) { 00300 return 0; 00301 } 00302 return QTextCodec::codecForName ( str.toLower (). 00303 replace ( "windows", "cp" ).toLatin1 () ); 00304 } 00305 00306 //----------------------------------------------------------------------------- 00307 const QString KIMAP::decodeRFC2047String( const QString &str ) 00308 { 00309 QString throw_away; 00310 00311 return decodeRFC2047String( str, throw_away ); 00312 } 00313 00314 //----------------------------------------------------------------------------- 00315 const QString KIMAP::decodeRFC2047String( const QString &str, 00316 QString &charset ) 00317 { 00318 QString throw_away; 00319 00320 return decodeRFC2047String( str, charset, throw_away ); 00321 } 00322 00323 //----------------------------------------------------------------------------- 00324 const QString KIMAP::decodeRFC2047String( const QString &str, 00325 QString &charset, 00326 QString &language ) 00327 { 00328 //do we have a rfc string 00329 if ( !str.contains( "=?" ) ) { 00330 return str; 00331 } 00332 00333 // FIXME get rid of the conversion? 00334 QByteArray aStr = str.toAscii (); // QString.length() means Unicode chars 00335 QByteArray result; 00336 char *pos, *beg, *end, *mid = 0; 00337 QByteArray cstr; 00338 char encoding = 0, ch; 00339 bool valid; 00340 const int maxLen = 200; 00341 int i; 00342 00343 // result.truncate(aStr.length()); 00344 for ( pos = aStr.data (); *pos; pos++ ) { 00345 if ( pos[0] != '=' || pos[1] != '?' ) { 00346 result += *pos; 00347 continue; 00348 } 00349 beg = pos + 2; 00350 end = beg; 00351 valid = true; 00352 // parse charset name 00353 for ( i = 2, pos += 2; 00354 i < maxLen && 00355 ( *pos != '?' && ( ispunct( *pos ) || isalnum ( *pos ) ) ); 00356 i++ ) 00357 pos++; 00358 if ( *pos != '?' || i < 4 || i >= maxLen ) { 00359 valid = false; 00360 } else { 00361 charset = QByteArray( beg, i - 1 ); // -2 + 1 for the zero 00362 int pt = charset.lastIndexOf( '*' ); 00363 if ( pt != -1 ) { 00364 // save language for later usage 00365 language = charset.right( charset.length () - pt - 1 ); 00366 00367 // tie off language as defined in rfc2047 00368 charset.truncate( pt ); 00369 } 00370 // get encoding and check delimiting question marks 00371 encoding = toupper( pos[1] ); 00372 if ( pos[2] != '?' || 00373 ( encoding != 'Q' && encoding != 'B' && 00374 encoding != 'q' && encoding != 'b' ) ) { 00375 valid = false; 00376 } 00377 pos += 3; 00378 i += 3; 00379 // kDebug() << "Charset:" << charset << "- Language:" << language << "-'" << pos << "'"; 00380 } 00381 if ( valid ) { 00382 mid = pos; 00383 // search for end of encoded part 00384 while ( i < maxLen && *pos && !( *pos == '?' && *( pos + 1 ) == '=' ) ) { 00385 i++; 00386 pos++; 00387 } 00388 end = pos + 2;//end now points to the first char after the encoded string 00389 if ( i >= maxLen || !*pos ) { 00390 valid = false; 00391 } 00392 } 00393 if ( valid ) { 00394 ch = *pos; 00395 *pos = '\0'; 00396 cstr = QByteArray (mid).left( (int)( mid - pos - 1 ) ); 00397 if ( encoding == 'Q' ) { 00398 // decode quoted printable text 00399 for ( i = cstr.length () - 1; i >= 0; --i ) { 00400 if ( cstr[i] == '_' ) { 00401 cstr[i] = ' '; 00402 } 00403 } 00404 // kDebug() << "before QP '" 00405 // << cstr << "'"; 00406 cstr = KCodecs::quotedPrintableDecode( cstr ); 00407 // kDebug() << "after QP '" 00408 // << cstr << "'"; 00409 } else { 00410 // decode base64 text 00411 cstr = QByteArray::fromBase64( cstr ); 00412 } 00413 *pos = ch; 00414 int len = cstr.length(); 00415 for ( i = 0; i < len; ++i ) { 00416 result += cstr[i]; 00417 } 00418 00419 pos = end - 1; 00420 } else { 00421 // kDebug() << "invalid"; 00422 //result += "=?"; 00423 //pos = beg -1; // because pos gets increased shortly afterwards 00424 pos = beg - 2; 00425 result += *pos++; 00426 result += *pos; 00427 } 00428 } 00429 if ( !charset.isEmpty () ) { 00430 QTextCodec *aCodec = codecForName( charset.toAscii () ); 00431 if ( aCodec ) { 00432 // kDebug() << "Codec is" << aCodec->name(); 00433 return aCodec->toUnicode( result ); 00434 } 00435 } 00436 return result; 00437 } 00438 00439 //----------------------------------------------------------------------------- 00440 const QString KIMAP::encodeRFC2047String( const QString &str ) 00441 { 00442 return encodeRFC2047String( str.toLatin1() ); 00443 } 00444 00445 //----------------------------------------------------------------------------- 00446 const QByteArray KIMAP::encodeRFC2047String( const QByteArray &str ) 00447 { 00448 if ( str.isEmpty () ) { 00449 return str; 00450 } 00451 00452 const signed char *latin = 00453 reinterpret_cast<const signed char *> 00454 ( str.data() ), *l, *start, *stop; 00455 char hexcode; 00456 int numQuotes, i; 00457 int rptr = 0; 00458 // My stats show this number results in 12 resize() out of 73,000 00459 int resultLen = 3 * str.length() / 2; 00460 QByteArray result( resultLen, '\0' ); 00461 00462 while ( *latin ) { 00463 l = latin; 00464 start = latin; 00465 while ( *l ) { 00466 if ( *l == 32 ) { 00467 start = l + 1; 00468 } 00469 if ( *l < 0 ) { 00470 break; 00471 } 00472 l++; 00473 } 00474 if ( *l ) { 00475 numQuotes = 1; 00476 while ( *l ) { 00477 /* The encoded word must be limited to 75 character */ 00478 for ( i = 0; i < 16; ++i ) { 00479 if ( *l == especials[i] ) { 00480 numQuotes++; 00481 } 00482 } 00483 if ( *l < 0 ) { 00484 numQuotes++; 00485 } 00486 /* Stop after 58 = 75 - 17 characters or at "<user@host..." */ 00487 if ( l - start + 2 * numQuotes >= 58 || *l == 60 ) { 00488 break; 00489 } 00490 l++; 00491 } 00492 if ( *l ) { 00493 stop = l - 1; 00494 while ( stop >= start && *stop != 32 ) { 00495 stop--; 00496 } 00497 if ( stop <= start ) { 00498 stop = l; 00499 } 00500 } else { 00501 stop = l; 00502 } 00503 if ( resultLen - rptr - 1 <= start - latin + 1 + 16 ) { 00504 // =?iso-88... 00505 resultLen += ( start - latin + 1 ) * 2 + 20; // more space 00506 result.resize( resultLen ); 00507 } 00508 while ( latin < start ) { 00509 result[rptr++] = *latin; 00510 latin++; 00511 } 00512 result.replace( rptr, 15, "=?iso-8859-1?q?" ); 00513 rptr += 15; 00514 if ( resultLen - rptr - 1 <= 3 * ( stop - latin + 1 ) ) { 00515 resultLen += ( stop - latin + 1 ) * 4 + 20; // more space 00516 result.resize( resultLen ); 00517 } 00518 while ( latin < stop ) { 00519 // can add up to 3 chars/iteration 00520 numQuotes = 0; 00521 for ( i = 0; i < 16; ++i ) { 00522 if ( *latin == especials[i] ) { 00523 numQuotes = 1; 00524 } 00525 } 00526 if ( *latin < 0 ) { 00527 numQuotes = 1; 00528 } 00529 if ( numQuotes ) { 00530 result[rptr++] = '='; 00531 hexcode = ( ( *latin & 0xF0 ) >> 4 ) + 48; 00532 if ( hexcode >= 58 ) { 00533 hexcode += 7; 00534 } 00535 result[rptr++] = hexcode; 00536 hexcode = ( *latin & 0x0F ) + 48; 00537 if ( hexcode >= 58 ) { 00538 hexcode += 7; 00539 } 00540 result[rptr++] = hexcode; 00541 } else { 00542 result[rptr++] = *latin; 00543 } 00544 latin++; 00545 } 00546 result[rptr++] = '?'; 00547 result[rptr++] = '='; 00548 } else { 00549 while ( *latin ) { 00550 if ( rptr == resultLen - 1 ) { 00551 resultLen += 30; 00552 result.resize( resultLen ); 00553 } 00554 result[rptr++] = *latin; 00555 latin++; 00556 } 00557 } 00558 } 00559 result[rptr] = 0; 00560 return result; 00561 } 00562 00563 //----------------------------------------------------------------------------- 00564 const QString KIMAP::encodeRFC2231String( const QString &str ) 00565 { 00566 if ( str.isEmpty () ) { 00567 return str; 00568 } 00569 00570 signed char *latin = (signed char *)calloc( 1, str.length () + 1 ); 00571 char *latin_us = (char *)latin; 00572 strcpy( latin_us, str.toLatin1 () ); 00573 signed char *l = latin; 00574 char hexcode; 00575 int i; 00576 bool quote; 00577 while ( *l ) { 00578 if ( *l < 0 ) { 00579 break; 00580 } 00581 l++; 00582 } 00583 if ( !*l ) { 00584 free( latin ); 00585 return str; 00586 } 00587 QByteArray result; 00588 l = latin; 00589 while ( *l ) { 00590 quote = *l < 0; 00591 for ( i = 0; i < 16; ++i ) { 00592 if ( *l == especials[i] ) { 00593 quote = true; 00594 } 00595 } 00596 if ( quote ) { 00597 result += '%'; 00598 hexcode = ( ( *l & 0xF0 ) >> 4 ) + 48; 00599 if ( hexcode >= 58 ) { 00600 hexcode += 7; 00601 } 00602 result += hexcode; 00603 hexcode = ( *l & 0x0F ) + 48; 00604 if ( hexcode >= 58 ) { 00605 hexcode += 7; 00606 } 00607 result += hexcode; 00608 } else { 00609 result += *l; 00610 } 00611 l++; 00612 } 00613 free( latin ); 00614 return result; 00615 } 00616 00617 //----------------------------------------------------------------------------- 00618 const QString KIMAP::decodeRFC2231String( const QString &str ) 00619 { 00620 int p = str.indexOf ( '\'' ); 00621 00622 //see if it is an rfc string 00623 if ( p < 0 ) { 00624 return str; 00625 } 00626 00627 int l = str.lastIndexOf( '\'' ); 00628 00629 //second is language 00630 if ( p >= l ) { 00631 return str; 00632 } 00633 00634 //first is charset or empty 00635 QString charset = str.left ( p ); 00636 QString st = str.mid ( l + 1 ); 00637 QString language = str.mid ( p + 1, l - p - 1 ); 00638 00639 //kDebug() << "Charset:" << charset << "Language:" << language; 00640 00641 char ch, ch2; 00642 p = 0; 00643 while ( p < (int) st.length () ) { 00644 if ( st.at( p ) == 37 ) { 00645 ch = st.at( p + 1 ).toLatin1 () - 48; 00646 if ( ch > 16 ) { 00647 ch -= 7; 00648 } 00649 ch2 = st.at( p + 2 ).toLatin1 () - 48; 00650 if ( ch2 > 16 ) { 00651 ch2 -= 7; 00652 } 00653 st.replace( p, 1, ch * 16 + ch2 ); 00654 st.remove ( p + 1, 2 ); 00655 } 00656 p++; 00657 } 00658 return st; 00659 }
This file is part of the KDE documentation.
Documentation copyright © 1996-2012 The KDE developers.
Generated on Mon May 14 2012 04:39:10 by doxygen 1.7.5 written by Dimitri van Heesch, © 1997-2006
Documentation copyright © 1996-2012 The KDE developers.
Generated on Mon May 14 2012 04:39:10 by doxygen 1.7.5 written by Dimitri van Heesch, © 1997-2006
KDE's Doxygen guidelines are available online.