KMIME Library
kmime_parsers.cpp
00001 /* 00002 kmime_parsers.cpp 00003 00004 KMime, the KDE Internet mail/usenet news message library. 00005 Copyright (c) 2001 the KMime authors. 00006 See file AUTHORS for details 00007 00008 This library is free software; you can redistribute it and/or 00009 modify it under the terms of the GNU Library General Public 00010 License as published by the Free Software Foundation; either 00011 version 2 of the License, or (at your option) any later version. 00012 00013 This library is distributed in the hope that it will be useful, 00014 but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00016 Library General Public License for more details. 00017 00018 You should have received a copy of the GNU Library General Public License 00019 along with this library; see the file COPYING.LIB. If not, write to 00020 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 00021 Boston, MA 02110-1301, USA. 00022 */ 00023 #include "kmime_parsers.h" 00024 00025 #include <QtCore/QRegExp> 00026 #include <QtCore/QByteArray> 00027 00028 using namespace KMime::Parser; 00029 00030 namespace KMime { 00031 namespace Parser { 00032 00033 MultiPart::MultiPart( const QByteArray &src, const QByteArray &boundary ) 00034 { 00035 s_rc=src; 00036 b_oundary=boundary; 00037 } 00038 00039 bool MultiPart::parse() 00040 { 00041 QByteArray b = "--" + b_oundary, part; 00042 int pos1=0, pos2=0, blen=b.length(); 00043 00044 p_arts.clear(); 00045 00046 //find the first valid boundary 00047 while ( 1 ) { 00048 if ( ( pos1 = s_rc.indexOf( b, pos1 ) ) == -1 || pos1 == 0 || 00049 s_rc[pos1-1] == '\n' ) { //valid boundary found or no boundary at all 00050 break; 00051 } 00052 pos1 += blen; //boundary found but not valid => skip it; 00053 } 00054 00055 if ( pos1 > -1 ) { 00056 pos1 += blen; 00057 if ( s_rc[pos1] == '-' && s_rc[pos1+1] == '-' ) { 00058 // the only valid boundary is the end-boundary 00059 // this message is *really* broken 00060 pos1 = -1; //we give up 00061 } else if ( ( pos1 - blen ) > 1 ) { //preamble present 00062 p_reamble = s_rc.left( pos1 - blen - 1 ); 00063 } 00064 } 00065 00066 while ( pos1 > -1 && pos2 > -1 ) { 00067 00068 //skip the rest of the line for the first boundary - the message-part starts here 00069 if ( ( pos1 = s_rc.indexOf( '\n', pos1 ) ) > -1 ) { 00070 //now search the next linebreak 00071 //now find the next valid boundary 00072 pos2=++pos1; //pos1 and pos2 point now to the beginning of the next line after the boundary 00073 while ( 1 ) { 00074 if ( ( pos2 = s_rc.indexOf( b, pos2 ) ) == -1 || 00075 s_rc[pos2-1] == '\n' ) { //valid boundary or no more boundaries found 00076 break; 00077 } 00078 pos2 += blen; //boundary is invalid => skip it; 00079 } 00080 00081 if ( pos2 == -1 ) { // no more boundaries found 00082 part = s_rc.mid( pos1, s_rc.length() - pos1 ); //take the rest of the string 00083 p_arts.append( part ); 00084 pos1 = -1; 00085 pos2 = -1; //break; 00086 } else { 00087 part = s_rc.mid( pos1, pos2 - pos1 - 1 ); // pos2 - 1 (\n) is part of the boundary (see RFC 2046, section 5.1.1) 00088 p_arts.append( part ); 00089 pos2 += blen; //pos2 points now to the first character after the boundary 00090 if ( s_rc[pos2] == '-' && s_rc[pos2+1] == '-' ) { //end-boundary 00091 pos1 = pos2 + 2; //pos1 points now to the character directly after the end-boundary 00092 00093 if ( ( pos1 = s_rc.indexOf( '\n', pos1 ) ) > -1 ) { //skip the rest of this line 00094 //everything after the end-boundary is considered as the epilouge 00095 e_pilouge = s_rc.mid( pos1 + 1, s_rc.length() - pos1 - 1 ); 00096 } 00097 pos1 = -1; 00098 pos2 = -1; //break 00099 } else { 00100 pos1 = pos2; //the search continues ... 00101 } 00102 } 00103 } 00104 } 00105 00106 return !p_arts.isEmpty(); 00107 } 00108 00109 //============================================================================= 00110 00111 NonMimeParser::NonMimeParser( const QByteArray &src ) : 00112 s_rc( src ), p_artNr( -1 ), t_otalNr( -1 ) 00113 { 00114 } 00115 00119 QByteArray NonMimeParser::guessMimeType( const QByteArray &fileName ) 00120 { 00121 QByteArray tmp, mimeType; 00122 int pos; 00123 00124 if ( !fileName.isEmpty() ) { 00125 pos = fileName.lastIndexOf( '.' ); 00126 if ( pos++ != -1 ) { 00127 tmp = fileName.mid( pos, fileName.length() - pos).toUpper(); 00128 if ( tmp == "JPG" || tmp=="JPEG" ) { 00129 mimeType = "image/jpeg"; 00130 } else if ( tmp == "GIF") { 00131 mimeType = "image/gif"; 00132 } else if ( tmp == "PNG") { 00133 mimeType = "image/png"; 00134 } else if ( tmp == "TIFF" || tmp == "TIF") { 00135 mimeType = "image/tiff"; 00136 } else if ( tmp == "XPM") { 00137 mimeType = "image/x-xpixmap"; 00138 } else if ( tmp == "XBM") { 00139 mimeType = "image/x-xbitmap"; 00140 } else if ( tmp == "BMP") { 00141 mimeType = "image/bmp"; 00142 } else if ( tmp == "TXT" || 00143 tmp == "ASC" || 00144 tmp == "H" || 00145 tmp == "C" || 00146 tmp == "CC" || 00147 tmp == "CPP") { 00148 mimeType = "text/plain"; 00149 } else if ( tmp == "HTML" || tmp == "HTM" ) { 00150 mimeType = "text/html"; 00151 } else { 00152 mimeType = "application/octet-stream"; 00153 } 00154 } else { 00155 mimeType = "application/octet-stream"; 00156 } 00157 } else { 00158 mimeType = "application/octet-stream"; 00159 } 00160 00161 return mimeType; 00162 } 00163 00164 //============================================================================== 00165 00166 UUEncoded::UUEncoded( const QByteArray &src, const QByteArray &subject ) : 00167 NonMimeParser( src ), s_ubject( subject ) 00168 {} 00169 00170 bool UUEncoded::parse() 00171 { 00172 int currentPos=0; 00173 bool success=true, firstIteration=true; 00174 00175 while ( success ) { 00176 int beginPos=currentPos, uuStart=currentPos, endPos=0, lineCount=0, MCount=0, pos=0, len=0; 00177 bool containsBegin=false, containsEnd=false; 00178 QByteArray tmp, fileName; 00179 00180 if ( ( beginPos = QString::fromLatin1( s_rc ).indexOf( QRegExp( QLatin1String( "begin [0-9][0-9][0-9]" ) ), 00181 currentPos ) ) > -1 && 00182 ( beginPos == 0 || s_rc.at( beginPos - 1 ) == '\n') ) { 00183 containsBegin = true; 00184 uuStart = s_rc.indexOf( '\n', beginPos ); 00185 if ( uuStart == -1 ) {//no more line breaks found, we give up 00186 success = false; 00187 break; 00188 } else { 00189 uuStart++; //points now at the beginning of the next line 00190 } 00191 } else { 00192 beginPos=currentPos; 00193 } 00194 00195 if ( ( endPos = s_rc. 00196 indexOf( "\nend", ( uuStart > 0 ) ? uuStart-1:0 ) ) == -1 ) { 00197 endPos = s_rc.length(); //no end found 00198 } else { 00199 containsEnd = true; 00200 } 00201 00202 if ( ( containsBegin && containsEnd ) || firstIteration ) { 00203 00204 //printf("beginPos=%d , uuStart=%d , endPos=%d\n", beginPos, uuStart, endPos); 00205 //all lines in a uuencoded text start with 'M' 00206 for ( int idx=uuStart; idx<endPos; idx++ ) { 00207 if ( s_rc[idx] == '\n' ) { 00208 lineCount++; 00209 if ( idx+1 < endPos && s_rc[idx+1] == 'M') { 00210 idx++; 00211 MCount++; 00212 } 00213 } 00214 } 00215 00216 //printf("lineCount=%d , MCount=%d\n", lineCount, MCount); 00217 if ( MCount == 0 || ( lineCount - MCount ) > 10 || 00218 ( ( !containsBegin || !containsEnd ) && ( MCount < 15 ) ) ) { 00219 // harder check for split-articles 00220 success = false; 00221 break; //too many "non-M-Lines" found, we give up 00222 } 00223 00224 if ( ( !containsBegin || !containsEnd ) && !s_ubject.isNull() ) { 00225 // message may be split up => parse subject 00226 QRegExp rx( QLatin1String( "[0-9]+/[0-9]+") ); 00227 pos = rx.indexIn( QLatin1String( s_ubject ), 0 ); 00228 len = rx.matchedLength(); 00229 if ( pos != -1 ) { 00230 tmp = s_ubject.mid( pos, len ); 00231 pos = tmp.indexOf( '/' ); 00232 p_artNr = tmp.left( pos ).toInt(); 00233 t_otalNr = tmp.right( tmp.length() - pos - 1).toInt(); 00234 } else { 00235 success = false; 00236 break; //no "part-numbers" found in the subject, we give up 00237 } 00238 } 00239 00240 //everything before "begin" is text 00241 if ( beginPos > 0 ) { 00242 t_ext.append( s_rc.mid( currentPos, beginPos - currentPos ) ); 00243 } 00244 00245 if ( containsBegin ) { 00246 //everything between "begin ### " and the next LF is considered as the filename 00247 fileName = s_rc.mid( beginPos + 10, uuStart - beginPos - 11 ); 00248 } else { 00249 fileName = ""; 00250 } 00251 f_ilenames.append( fileName ); 00252 //everything beetween "begin" and "end" is uuencoded 00253 b_ins.append( s_rc.mid( uuStart, endPos - uuStart + 1 ) ); 00254 m_imeTypes.append( guessMimeType( fileName ) ); 00255 firstIteration = false; 00256 00257 int next = s_rc.indexOf( '\n', endPos + 1 ); 00258 if ( next == -1 ) { //no more line breaks found, we give up 00259 success = false; 00260 break; 00261 } else { 00262 next++; //points now at the beginning of the next line 00263 } 00264 currentPos = next; 00265 00266 } else { 00267 success = false; 00268 } 00269 } 00270 00271 // append trailing text part of the article 00272 t_ext.append( s_rc.right( s_rc.length() - currentPos ) ); 00273 00274 return ( ( b_ins.count() > 0 ) || isPartial() ); 00275 } 00276 00277 //============================================================================== 00278 00279 YENCEncoded::YENCEncoded( const QByteArray &src ) : 00280 NonMimeParser( src ) 00281 { 00282 } 00283 00284 bool YENCEncoded::yencMeta( QByteArray &src, const QByteArray &name, int *value ) 00285 { 00286 bool found = false; 00287 QByteArray sought=name + '='; 00288 00289 int iPos = src.indexOf( sought ); 00290 if ( iPos > -1 ) { 00291 int pos1 = src.indexOf( ' ', iPos ); 00292 int pos2 = src.indexOf( '\r', iPos ); 00293 int pos3 = src.indexOf( '\t', iPos ); 00294 int pos4 = src.indexOf( '\n', iPos ); 00295 if ( pos2 >= 0 && ( pos1 < 0 || pos1 > pos2 ) ) { 00296 pos1 = pos2; 00297 } 00298 if ( pos3 >= 0 && ( pos1 < 0 || pos1 > pos3 ) ) { 00299 pos1 = pos3; 00300 } 00301 if ( pos4 >= 0 && ( pos1 < 0 || pos1 > pos4 ) ) { 00302 pos1 = pos4; 00303 } 00304 iPos=src.lastIndexOf( '=', pos1 ) + 1; 00305 if ( iPos < pos1 ) { 00306 char c = src.at( iPos ); 00307 if ( c>='0' && c<='9' ) { 00308 found = true; 00309 *value = src.mid( iPos, pos1 - iPos ).toInt(); 00310 } 00311 } 00312 } 00313 return found; 00314 } 00315 00316 bool YENCEncoded::parse() 00317 { 00318 int currentPos=0; 00319 bool success=true; 00320 00321 while ( success ) { 00322 int beginPos=currentPos, yencStart=currentPos; 00323 bool containsPart=false; 00324 QByteArray fileName, mimeType; 00325 00326 if ( ( beginPos = s_rc. 00327 indexOf( "=ybegin ", currentPos ) ) > -1 && 00328 ( beginPos == 0 || s_rc.at( beginPos - 1 ) == '\n' ) ) { 00329 yencStart = s_rc.indexOf( '\n', beginPos ); 00330 if ( yencStart == -1 ) { // no more line breaks found, give up 00331 success = false; 00332 break; 00333 } else { 00334 yencStart++; 00335 if ( s_rc.indexOf( "=ypart", yencStart ) == yencStart ) { 00336 containsPart = true; 00337 yencStart = s_rc.indexOf( '\n', yencStart ); 00338 if ( yencStart == -1 ) { 00339 success = false; 00340 break; 00341 } 00342 yencStart++; 00343 } 00344 } 00345 // Try to identify yenc meta data 00346 00347 // Filenames can contain any embedded chars until end of line 00348 QByteArray meta = s_rc.mid( beginPos, yencStart - beginPos ); 00349 int namePos = meta.indexOf( "name=" ); 00350 if ( namePos == -1 ) { 00351 success = false; 00352 break; 00353 } 00354 int eolPos = meta.indexOf( '\r', namePos ); 00355 if ( eolPos == -1 ) { 00356 eolPos = meta.indexOf( '\n', namePos ); 00357 } 00358 if ( eolPos == -1 ) { 00359 success = false; 00360 break; 00361 } 00362 fileName = meta.mid( namePos + 5, eolPos - ( namePos + 5 ) ); 00363 00364 // Other metadata is integer 00365 int yencLine; 00366 if ( !yencMeta( meta, "line", ¥cLine ) ) { 00367 success = false; 00368 break; 00369 } 00370 int yencSize; 00371 if ( !yencMeta( meta, "size", ¥cSize ) ) { 00372 success = false; 00373 break; 00374 } 00375 00376 int partBegin, partEnd; 00377 if ( containsPart ) { 00378 if ( !yencMeta( meta, "part", &p_artNr ) ) { 00379 success = false; 00380 break; 00381 } 00382 if ( !yencMeta( meta, "begin", &partBegin ) || 00383 !yencMeta( meta, "end", &partEnd ) ) { 00384 success = false; 00385 break; 00386 } 00387 if ( !yencMeta( meta, "total", &t_otalNr ) ) { 00388 t_otalNr = p_artNr + 1; 00389 } 00390 if ( yencSize == partEnd - partBegin + 1 ) { 00391 t_otalNr = 1; 00392 } else { 00393 yencSize = partEnd - partBegin + 1; 00394 } 00395 } 00396 00397 // We have a valid yenc header; now we extract the binary data 00398 int totalSize = 0; 00399 int pos = yencStart; 00400 int len = s_rc.length(); 00401 bool lineStart = true; 00402 int lineLength = 0; 00403 bool containsEnd = false; 00404 QByteArray binary; 00405 binary.resize( yencSize ); 00406 while ( pos < len ) { 00407 int ch = s_rc.at( pos ); 00408 if ( ch < 0 ) { 00409 ch += 256; 00410 } 00411 if ( ch == '\r' ) { 00412 if ( lineLength != yencLine && totalSize != yencSize ) { 00413 break; 00414 } 00415 pos++; 00416 } 00417 else if ( ch == '\n' ) { 00418 lineStart = true; 00419 lineLength = 0; 00420 pos++; 00421 } else { 00422 if ( ch == '=' ) { 00423 if ( pos + 1 < len ) { 00424 ch = s_rc.at( pos + 1 ); 00425 if ( lineStart && ch == 'y' ) { 00426 containsEnd = true; 00427 break; 00428 } 00429 pos += 2; 00430 ch -= 64+42; 00431 if ( ch < 0 ) { 00432 ch += 256; 00433 } 00434 if ( totalSize >= yencSize ) { 00435 break; 00436 } 00437 binary[totalSize++] = ch; 00438 lineLength++; 00439 } else { 00440 break; 00441 } 00442 } else { 00443 ch -= 42; 00444 if ( ch < 0 ) { 00445 ch += 256; 00446 } 00447 if ( totalSize >= yencSize ) { 00448 break; 00449 } 00450 binary[totalSize++] = ch; 00451 lineLength++; 00452 pos++; 00453 } 00454 lineStart = false; 00455 } 00456 } 00457 00458 if ( !containsEnd ) { 00459 success = false; 00460 break; 00461 } 00462 if ( totalSize != yencSize ) { 00463 success = false; 00464 break; 00465 } 00466 00467 // pos now points to =yend; get end data 00468 eolPos = s_rc.indexOf( '\n', pos ); 00469 if ( eolPos == -1 ) { 00470 success = false; 00471 break; 00472 } 00473 meta = s_rc.mid( pos, eolPos - pos ); 00474 if ( !yencMeta( meta, "size", &totalSize ) ) { 00475 success = false; 00476 break; 00477 } 00478 if ( totalSize != yencSize ) { 00479 success = false; 00480 break; 00481 } 00482 00483 f_ilenames.append( fileName ); 00484 m_imeTypes.append( guessMimeType( fileName ) ); 00485 b_ins.append( binary ); 00486 00487 //everything before "begin" is text 00488 if ( beginPos > 0 ) { 00489 t_ext.append( s_rc.mid( currentPos, beginPos - currentPos ) ); 00490 } 00491 currentPos = eolPos + 1; 00492 00493 } else { 00494 success = false; 00495 } 00496 } 00497 00498 // append trailing text part of the article 00499 t_ext.append( s_rc.right( s_rc.length() - currentPos ) ); 00500 00501 return b_ins.count()>0; 00502 } 00503 00504 } // namespace Parser 00505 00506 } // namespace KMime
This file is part of the KDE documentation.
Documentation copyright © 1996-2012 The KDE developers.
Generated on Mon May 14 2012 04:41:33 by doxygen 1.7.5 written by Dimitri van Heesch, © 1997-2006
Documentation copyright © 1996-2012 The KDE developers.
Generated on Mon May 14 2012 04:41:33 by doxygen 1.7.5 written by Dimitri van Heesch, © 1997-2006
KDE's Doxygen guidelines are available online.