kmime_header_parsing.cpp
00001 /* -*- c++ -*- 00002 kmime_header_parsing.cpp 00003 00004 This file is part of KMime, the KDE internet mail/usenet news message library. 00005 Copyright (c) 2001-2002 Marc Mutz <mutz@kde.org> 00006 00007 KMime is free software; you can redistribute it and/or modify it 00008 under the terms of the GNU General Public License, version 2, as 00009 published by the Free Software Foundation. 00010 00011 KMime is distributed in the hope that it will be useful, but 00012 WITHOUT ANY WARRANTY; without even the implied warranty of 00013 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00014 General Public License for more details. 00015 00016 You should have received a copy of the GNU General Public License 00017 along with this library; if not, write to the Free Software 00018 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 00019 00020 In addition, as a special exception, the copyright holders give 00021 permission to link the code of this library with any edition of 00022 the TQt library by Trolltech AS, Norway (or with modified versions 00023 of TQt that use the same license as TQt), and distribute linked 00024 combinations including the two. You must obey the GNU General 00025 Public License in all respects for all of the code used other than 00026 TQt. If you modify this file, you may extend this exception to 00027 your version of the file, but you are not obligated to do so. If 00028 you do not wish to do so, delete this exception statement from 00029 your version. 00030 */ 00031 00032 #include <config.h> 00033 #include "kmime_header_parsing.h" 00034 00035 #include "kmime_codecs.h" 00036 #include "kmime_util.h" 00037 #include "kmime_warning.h" 00038 00039 #include <tdeglobal.h> 00040 #include <kcharsets.h> 00041 00042 #include <tqtextcodec.h> 00043 #include <tqmap.h> 00044 #include <tqcstring.h> 00045 #include <tqstringlist.h> 00046 00047 #include <ctype.h> // for isdigit 00048 #include <cassert> 00049 00050 using namespace KMime; 00051 using namespace KMime::Types; 00052 00053 namespace KMime { 00054 00055 namespace Types { 00056 00057 TQString AddrSpec::asString() const { 00058 bool needsQuotes = false; 00059 TQString result; 00060 result.reserve( localPart.length() + domain.length() + 1 ); 00061 for ( unsigned int i = 0 ; i < localPart.length() ; ++i ) { 00062 const char ch = localPart[i].latin1(); 00063 if ( ch == '.' || isAText( ch ) ) 00064 result += ch; 00065 else { 00066 needsQuotes = true; 00067 if ( ch == '\\' || ch == '"' ) 00068 result += '\\'; 00069 result += ch; 00070 } 00071 } 00072 if ( needsQuotes ) 00073 return '"' + result + "\"@" + domain; 00074 else 00075 return result + '@' + domain; 00076 } 00077 00078 } 00079 00080 namespace HeaderParsing { 00081 00082 // parse the encoded-word (scursor points to after the initial '=') 00083 bool parseEncodedWord( const char* & scursor, const char * const send, 00084 TQString & result, TQCString & language ) { 00085 00086 // make sure the caller already did a bit of the work. 00087 assert( *(scursor-1) == '=' ); 00088 00089 // 00090 // STEP 1: 00091 // scan for the charset/language portion of the encoded-word 00092 // 00093 00094 char ch = *scursor++; 00095 00096 if ( ch != '?' ) { 00097 kdDebug() << "first" << endl; 00098 KMIME_WARN_PREMATURE_END_OF(EncodedWord); 00099 return false; 00100 } 00101 00102 // remember start of charset (ie. just after the initial "=?") and 00103 // language (just after the first '*') fields: 00104 const char * charsetStart = scursor; 00105 const char * languageStart = 0; 00106 00107 // find delimiting '?' (and the '*' separating charset and language 00108 // tags, if any): 00109 for ( ; scursor != send ; scursor++ ) 00110 if ( *scursor == '?') 00111 break; 00112 else if ( *scursor == '*' && !languageStart ) 00113 languageStart = scursor + 1; 00114 00115 // not found? can't be an encoded-word! 00116 if ( scursor == send || *scursor != '?' ) { 00117 kdDebug() << "second" << endl; 00118 KMIME_WARN_PREMATURE_END_OF(EncodedWord); 00119 return false; 00120 } 00121 00122 // extract the language information, if any (if languageStart is 0, 00123 // language will be null, too): 00124 TQCString maybeLanguage( languageStart, scursor - languageStart + 1 /*for NUL*/); 00125 // extract charset information (keep in mind: the size given to the 00126 // ctor is one off due to the \0 terminator): 00127 TQCString maybeCharset( charsetStart, ( languageStart ? languageStart : scursor + 1 ) - charsetStart ); 00128 00129 // 00130 // STEP 2: 00131 // scan for the encoding portion of the encoded-word 00132 // 00133 00134 00135 // remember start of encoding (just _after_ the second '?'): 00136 scursor++; 00137 const char * encodingStart = scursor; 00138 00139 // find next '?' (ending the encoding tag): 00140 for ( ; scursor != send ; scursor++ ) 00141 if ( *scursor == '?' ) break; 00142 00143 // not found? Can't be an encoded-word! 00144 if ( scursor == send || *scursor != '?' ) { 00145 kdDebug() << "third" << endl; 00146 KMIME_WARN_PREMATURE_END_OF(EncodedWord); 00147 return false; 00148 } 00149 00150 // extract the encoding information: 00151 TQCString maybeEncoding( encodingStart, scursor - encodingStart + 1 ); 00152 00153 00154 kdDebug() << "parseEncodedWord: found charset == \"" << maybeCharset 00155 << "\"; language == \"" << maybeLanguage 00156 << "\"; encoding == \"" << maybeEncoding << "\"" << endl; 00157 00158 // 00159 // STEP 3: 00160 // scan for encoded-text portion of encoded-word 00161 // 00162 00163 00164 // remember start of encoded-text (just after the third '?'): 00165 scursor++; 00166 const char * encodedTextStart = scursor; 00167 00168 // find next '?' (ending the encoded-text): 00169 for ( ; scursor != send ; scursor++ ) 00170 if ( *scursor == '?' ) break; 00171 00172 // not found? Can't be an encoded-word! 00173 // ### maybe evaluate it nonetheless if the rest is OK? 00174 if ( scursor == send || *scursor != '?' ) { 00175 kdDebug() << "fourth" << endl; 00176 KMIME_WARN_PREMATURE_END_OF(EncodedWord); 00177 return false; 00178 } 00179 scursor++; 00180 // check for trailing '=': 00181 if ( scursor == send || *scursor != '=' ) { 00182 kdDebug() << "fifth" << endl; 00183 KMIME_WARN_PREMATURE_END_OF(EncodedWord); 00184 return false; 00185 } 00186 scursor++; 00187 00188 // set end sentinel for encoded-text: 00189 const char * const encodedTextEnd = scursor - 2; 00190 00191 // 00192 // STEP 4: 00193 // setup decoders for the transfer encoding and the charset 00194 // 00195 00196 00197 // try if there's a codec for the encoding found: 00198 Codec * codec = Codec::codecForName( maybeEncoding ); 00199 if ( !codec ) { 00200 KMIME_WARN_UNKNOWN(Encoding,maybeEncoding); 00201 return false; 00202 } 00203 00204 // get an instance of a corresponding decoder: 00205 Decoder * dec = codec->makeDecoder(); 00206 assert( dec ); 00207 00208 // try if there's a (text)codec for the charset found: 00209 bool matchOK = false; 00210 TQTextCodec 00211 *textCodec = TDEGlobal::charsets()->codecForName( maybeCharset, matchOK ); 00212 00213 if ( !matchOK || !textCodec ) { 00214 KMIME_WARN_UNKNOWN(Charset,maybeCharset); 00215 delete dec; 00216 return false; 00217 }; 00218 00219 kdDebug() << "mimeName(): \"" << textCodec->mimeName() << "\"" << endl; 00220 00221 // allocate a temporary buffer to store the 8bit text: 00222 int encodedTextLength = encodedTextEnd - encodedTextStart; 00223 TQByteArray buffer( codec->maxDecodedSizeFor( encodedTextLength ) ); 00224 TQByteArray::Iterator bit = buffer.begin(); 00225 TQByteArray::ConstIterator bend = buffer.end(); 00226 00227 // 00228 // STEP 5: 00229 // do the actual decoding 00230 // 00231 00232 if ( !dec->decode( encodedTextStart, encodedTextEnd, bit, bend ) ) 00233 KMIME_WARN << codec->name() << " codec lies about it's maxDecodedSizeFor( " 00234 << encodedTextLength << " )\nresult may be truncated" << endl; 00235 00236 result = textCodec->toUnicode( buffer.begin(), bit - buffer.begin() ); 00237 00238 kdDebug() << "result now: \"" << result << "\"" << endl; 00239 // cleanup: 00240 delete dec; 00241 language = maybeLanguage; 00242 00243 return true; 00244 } 00245 00246 static inline void eatWhiteSpace( const char* & scursor, const char * const send ) { 00247 while ( scursor != send 00248 && ( *scursor == ' ' || *scursor == '\n' || 00249 *scursor == '\t' || *scursor == '\r' ) ) 00250 scursor++; 00251 } 00252 00253 bool parseAtom( const char * & scursor, const char * const send, 00254 TQString & result, bool allow8Bit ) 00255 { 00256 TQPair<const char*,int> maybeResult; 00257 00258 if ( parseAtom( scursor, send, maybeResult, allow8Bit ) ) { 00259 result += TQString::fromLatin1( maybeResult.first, maybeResult.second ); 00260 return true; 00261 } 00262 00263 return false; 00264 } 00265 00266 bool parseAtom( const char * & scursor, const char * const send, 00267 TQPair<const char*,int> & result, bool allow8Bit ) { 00268 bool success = false; 00269 const char * start = scursor; 00270 00271 while ( scursor != send ) { 00272 signed char ch = *scursor++; 00273 if ( ch > 0 && isAText(ch) ) { 00274 // AText: OK 00275 success = true; 00276 } else if ( allow8Bit && ch < 0 ) { 00277 // 8bit char: not OK, but be tolerant. 00278 KMIME_WARN_8BIT(ch); 00279 success = true; 00280 } else { 00281 // CTL or special - marking the end of the atom: 00282 // re-set sursor to point to the offending 00283 // char and return: 00284 scursor--; 00285 break; 00286 } 00287 } 00288 result.first = start; 00289 result.second = scursor - start; 00290 return success; 00291 } 00292 00293 bool parseToken( const char * & scursor, const char * const send, 00294 TQString & result, bool allow8Bit ) 00295 { 00296 TQPair<const char*,int> maybeResult; 00297 00298 if ( parseToken( scursor, send, maybeResult, allow8Bit ) ) { 00299 result += TQString::fromLatin1( maybeResult.first, maybeResult.second ); 00300 return true; 00301 } 00302 00303 return false; 00304 } 00305 00306 bool parseToken( const char * & scursor, const char * const send, 00307 TQPair<const char*,int> & result, bool allow8Bit ) 00308 { 00309 bool success = false; 00310 const char * start = scursor; 00311 00312 while ( scursor != send ) { 00313 signed char ch = *scursor++; 00314 if ( ch > 0 && isTText(ch) ) { 00315 // TText: OK 00316 success = true; 00317 } else if ( allow8Bit && ch < 0 ) { 00318 // 8bit char: not OK, but be tolerant. 00319 KMIME_WARN_8BIT(ch); 00320 success = true; 00321 } else { 00322 // CTL or tspecial - marking the end of the atom: 00323 // re-set sursor to point to the offending 00324 // char and return: 00325 scursor--; 00326 break; 00327 } 00328 } 00329 result.first = start; 00330 result.second = scursor - start; 00331 return success; 00332 } 00333 00334 #define READ_ch_OR_FAIL if ( scursor == send ) { \ 00335 KMIME_WARN_PREMATURE_END_OF(GenericQuotedString); \ 00336 return false; \ 00337 } else { \ 00338 ch = *scursor++; \ 00339 } 00340 00341 // known issues: 00342 // 00343 // - doesn't handle quoted CRLF 00344 00345 bool parseGenericQuotedString( const char* & scursor, const char * const send, 00346 TQString & result, bool isCRLF, 00347 const char openChar, const char closeChar ) 00348 { 00349 char ch; 00350 // We are in a quoted-string or domain-literal or comment and the 00351 // cursor points to the first char after the openChar. 00352 // We will apply unfolding and quoted-pair removal. 00353 // We return when we either encounter the end or unescaped openChar 00354 // or closeChar. 00355 00356 assert( *(scursor-1) == openChar || *(scursor-1) == closeChar ); 00357 00358 while ( scursor != send ) { 00359 ch = *scursor++; 00360 00361 if ( ch == closeChar || ch == openChar ) { 00362 // end of quoted-string or another opening char: 00363 // let caller decide what to do. 00364 return true; 00365 } 00366 00367 switch( ch ) { 00368 case '\\': // quoted-pair 00369 // misses "\" CRLF LWSP-char handling, see rfc822, 3.4.5 00370 READ_ch_OR_FAIL; 00371 KMIME_WARN_IF_8BIT(ch); 00372 result += TQChar(ch); 00373 break; 00374 case '\r': 00375 // ### 00376 // The case of lonely '\r' is easy to solve, as they're 00377 // not part of Unix Line-ending conventions. 00378 // But I see a problem if we are given Unix-native 00379 // line-ending-mails, where we cannot determine anymore 00380 // whether a given '\n' was part of a CRLF or was occurring 00381 // on it's own. 00382 READ_ch_OR_FAIL; 00383 if ( ch != '\n' ) { 00384 // CR on it's own... 00385 KMIME_WARN_LONE(CR); 00386 result += TQChar('\r'); 00387 scursor--; // points to after the '\r' again 00388 } else { 00389 // CRLF encountered. 00390 // lookahead: check for folding 00391 READ_ch_OR_FAIL; 00392 if ( ch == ' ' || ch == '\t' ) { 00393 // correct folding; 00394 // position cursor behind the CRLF WSP (unfolding) 00395 // and add the WSP to the result 00396 result += TQChar(ch); 00397 } else { 00398 // this is the "shouldn't happen"-case. There is a CRLF 00399 // inside a quoted-string without it being part of FWS. 00400 // We take it verbatim. 00401 KMIME_WARN_NON_FOLDING(CRLF); 00402 result += "\r\n"; 00403 // the cursor is decremented again, so's we need not 00404 // duplicate the whole switch here. "ch" could've been 00405 // everything (incl. openChar or closeChar). 00406 scursor--; 00407 } 00408 } 00409 break; 00410 case '\n': 00411 // Note: CRLF has been handled above already! 00412 // ### LF needs special treatment, depending on whether isCRLF 00413 // is true (we can be sure a lonely '\n' was meant this way) or 00414 // false ('\n' alone could have meant LF or CRLF in the original 00415 // message. This parser assumes CRLF iff the LF is followed by 00416 // either WSP (folding) or NULL (premature end of quoted-string; 00417 // Should be fixed, since NULL is allowed as per rfc822). 00418 READ_ch_OR_FAIL; 00419 if ( !isCRLF && ( ch == ' ' || ch == '\t' ) ) { 00420 // folding 00421 // correct folding 00422 result += TQChar(ch); 00423 } else { 00424 // non-folding 00425 KMIME_WARN_LONE(LF); 00426 result += TQChar('\n'); 00427 // pos is decremented, so's we need not duplicate the whole 00428 // switch here. ch could've been everything (incl. <">, "\"). 00429 scursor--; 00430 } 00431 break; 00432 default: 00433 KMIME_WARN_IF_8BIT(ch); 00434 result += TQChar(ch); 00435 } 00436 } 00437 00438 return false; 00439 } 00440 00441 // known issues: 00442 // 00443 // - doesn't handle encoded-word inside comments. 00444 00445 bool parseComment( const char* & scursor, const char * const send, 00446 TQString & result, bool isCRLF, bool reallySave ) 00447 { 00448 int commentNestingDepth = 1; 00449 const char * afterLastClosingParenPos = 0; 00450 TQString maybeCmnt; 00451 const char * oldscursor = scursor; 00452 00453 assert( *(scursor-1) == '(' ); 00454 00455 while ( commentNestingDepth ) { 00456 TQString cmntPart; 00457 if ( parseGenericQuotedString( scursor, send, cmntPart, isCRLF, '(', ')' ) ) { 00458 assert( *(scursor-1) == ')' || *(scursor-1) == '(' ); 00459 // see the kdoc for above function for the possible conditions 00460 // we have to check: 00461 switch ( *(scursor-1) ) { 00462 case ')': 00463 if ( reallySave ) { 00464 // add the chunk that's now surely inside the comment. 00465 result += maybeCmnt; 00466 result += cmntPart; 00467 if ( commentNestingDepth > 1 ) // don't add the outermost ')'... 00468 result += TQChar(')'); 00469 maybeCmnt = TQString(); 00470 } 00471 afterLastClosingParenPos = scursor; 00472 --commentNestingDepth; 00473 break; 00474 case '(': 00475 if ( reallySave ) { 00476 // don't add to "result" yet, because we might find that we 00477 // are already outside the (broken) comment... 00478 maybeCmnt += cmntPart; 00479 maybeCmnt += TQChar('('); 00480 } 00481 ++commentNestingDepth; 00482 break; 00483 default: assert( 0 ); 00484 } // switch 00485 } else { 00486 // !parseGenericQuotedString, ie. premature end 00487 if ( afterLastClosingParenPos ) 00488 scursor = afterLastClosingParenPos; 00489 else 00490 scursor = oldscursor; 00491 return false; 00492 } 00493 } // while 00494 00495 return true; 00496 } 00497 00498 00499 // known issues: none. 00500 00501 bool parsePhrase( const char* & scursor, const char * const send, 00502 TQString & result, bool isCRLF ) 00503 { 00504 enum { None, Phrase, Atom, EncodedWord, QuotedString } found = None; 00505 TQString tmp; 00506 TQCString lang; 00507 const char * successfullyParsed = 0; 00508 // only used by the encoded-word branch 00509 const char * oldscursor; 00510 // used to suppress whitespace between adjacent encoded-words 00511 // (rfc2047, 6.2): 00512 bool lastWasEncodedWord = false; 00513 00514 while ( scursor != send ) { 00515 char ch = *scursor++; 00516 switch ( ch ) { 00517 case '.': // broken, but allow for intorop's sake 00518 if ( found == None ) { 00519 --scursor; 00520 return false; 00521 } else { 00522 if ( scursor != send && ( *scursor == ' ' || *scursor == '\t' ) ) 00523 result += ". "; 00524 else 00525 result += '.'; 00526 successfullyParsed = scursor; 00527 } 00528 break; 00529 case '"': // quoted-string 00530 tmp = TQString(); 00531 if ( parseGenericQuotedString( scursor, send, tmp, isCRLF, '"', '"' ) ) { 00532 successfullyParsed = scursor; 00533 assert( *(scursor-1) == '"' ); 00534 switch ( found ) { 00535 case None: 00536 found = QuotedString; 00537 break; 00538 case Phrase: 00539 case Atom: 00540 case EncodedWord: 00541 case QuotedString: 00542 found = Phrase; 00543 result += TQChar(' '); // rfc822, 3.4.4 00544 break; 00545 default: 00546 assert( 0 ); 00547 } 00548 lastWasEncodedWord = false; 00549 result += tmp; 00550 } else { 00551 // premature end of quoted string. 00552 // What to do? Return leading '"' as special? Return as quoted-string? 00553 // We do the latter if we already found something, else signal failure. 00554 if ( found == None ) { 00555 return false; 00556 } else { 00557 result += TQChar(' '); // rfc822, 3.4.4 00558 result += tmp; 00559 return true; 00560 } 00561 } 00562 break; 00563 case '(': // comment 00564 // parse it, but ignore content: 00565 tmp = TQString(); 00566 if ( parseComment( scursor, send, tmp, isCRLF, 00567 false /*don't bother with the content*/ ) ) { 00568 successfullyParsed = scursor; 00569 lastWasEncodedWord = false; // strictly interpreting rfc2047, 6.2 00570 } else { 00571 if ( found == None ) 00572 return false; 00573 else { 00574 scursor = successfullyParsed; 00575 return true; 00576 } 00577 } 00578 break; 00579 case '=': // encoded-word 00580 tmp = TQString(); 00581 oldscursor = scursor; 00582 lang = 0; 00583 if ( parseEncodedWord( scursor, send, tmp, lang ) ) { 00584 successfullyParsed = scursor; 00585 switch ( found ) { 00586 case None: 00587 found = EncodedWord; 00588 break; 00589 case Phrase: 00590 case EncodedWord: 00591 case Atom: 00592 case QuotedString: 00593 if ( !lastWasEncodedWord ) 00594 result += TQChar(' '); // rfc822, 3.4.4 00595 found = Phrase; 00596 break; 00597 default: assert( 0 ); 00598 } 00599 lastWasEncodedWord = true; 00600 result += tmp; 00601 break; 00602 } else 00603 // parse as atom: 00604 scursor = oldscursor; 00605 // fall though... 00606 00607 default: //atom 00608 tmp = TQString(); 00609 scursor--; 00610 if ( parseAtom( scursor, send, tmp, true /* allow 8bit */ ) ) { 00611 successfullyParsed = scursor; 00612 switch ( found ) { 00613 case None: 00614 found = Atom; 00615 break; 00616 case Phrase: 00617 case Atom: 00618 case EncodedWord: 00619 case QuotedString: 00620 found = Phrase; 00621 result += TQChar(' '); // rfc822, 3.4.4 00622 break; 00623 default: 00624 assert( 0 ); 00625 } 00626 lastWasEncodedWord = false; 00627 result += tmp; 00628 } else { 00629 if ( found == None ) 00630 return false; 00631 else { 00632 scursor = successfullyParsed; 00633 return true; 00634 } 00635 } 00636 } 00637 eatWhiteSpace( scursor, send ); 00638 } 00639 00640 return ( found != None ); 00641 } 00642 00643 00644 bool parseDotAtom( const char* & scursor, const char * const send, 00645 TQString & result, bool isCRLF ) 00646 { 00647 // always points to just after the last atom parsed: 00648 const char * successfullyParsed; 00649 00650 TQString tmp; 00651 if ( !parseAtom( scursor, send, tmp, false /* no 8bit */ ) ) 00652 return false; 00653 result += tmp; 00654 successfullyParsed = scursor; 00655 00656 while ( scursor != send ) { 00657 eatCFWS( scursor, send, isCRLF ); 00658 00659 // end of header or no '.' -> return 00660 if ( scursor == send || *scursor != '.' ) return true; 00661 scursor++; // eat '.' 00662 00663 eatCFWS( scursor, send, isCRLF ); 00664 00665 if ( scursor == send || !isAText( *scursor ) ) { 00666 // end of header or no AText, but this time following a '.'!: 00667 // reset cursor to just after last successfully parsed char and 00668 // return: 00669 scursor = successfullyParsed; 00670 return true; 00671 } 00672 00673 // try to parse the next atom: 00674 TQString maybeAtom; 00675 if ( !parseAtom( scursor, send, maybeAtom, false /*no 8bit*/ ) ) { 00676 scursor = successfullyParsed; 00677 return true; 00678 } 00679 00680 result += TQChar('.'); 00681 result += maybeAtom; 00682 successfullyParsed = scursor; 00683 } 00684 00685 scursor = successfullyParsed; 00686 return true; 00687 } 00688 00689 00690 void eatCFWS( const char* & scursor, const char * const send, bool isCRLF ) { 00691 TQString dummy; 00692 00693 while ( scursor != send ) { 00694 const char * oldscursor = scursor; 00695 00696 char ch = *scursor++; 00697 00698 switch( ch ) { 00699 case ' ': 00700 case '\t': // whitespace 00701 case '\r': 00702 case '\n': // folding 00703 continue; 00704 00705 case '(': // comment 00706 if ( parseComment( scursor, send, dummy, isCRLF, false /*don't save*/ ) ) 00707 continue; 00708 scursor = oldscursor; 00709 return; 00710 00711 default: 00712 scursor = oldscursor; 00713 return; 00714 } 00715 00716 } 00717 } 00718 00719 bool parseDomain( const char* & scursor, const char * const send, 00720 TQString & result, bool isCRLF ) { 00721 eatCFWS( scursor, send, isCRLF ); 00722 if ( scursor == send ) return false; 00723 00724 // domain := dot-atom / domain-literal / atom *("." atom) 00725 // 00726 // equivalent to: 00727 // domain = dot-atom / domain-literal, 00728 // since parseDotAtom does allow CFWS between atoms and dots 00729 00730 if ( *scursor == '[' ) { 00731 // domain-literal: 00732 TQString maybeDomainLiteral; 00733 // eat '[': 00734 scursor++; 00735 while ( parseGenericQuotedString( scursor, send, maybeDomainLiteral, 00736 isCRLF, '[', ']' ) ) { 00737 if ( scursor == send ) { 00738 // end of header: check for closing ']': 00739 if ( *(scursor-1) == ']' ) { 00740 // OK, last char was ']': 00741 result = maybeDomainLiteral; 00742 return true; 00743 } else { 00744 // not OK, domain-literal wasn't closed: 00745 return false; 00746 } 00747 } 00748 // we hit openChar in parseGenericQuotedString. 00749 // include it in maybeDomainLiteral and keep on parsing: 00750 if ( *(scursor-1) == '[' ) { 00751 maybeDomainLiteral += TQChar('['); 00752 continue; 00753 } 00754 // OK, real end of domain-literal: 00755 result = maybeDomainLiteral; 00756 return true; 00757 } 00758 } else { 00759 // dot-atom: 00760 TQString maybeDotAtom; 00761 if ( parseDotAtom( scursor, send, maybeDotAtom, isCRLF ) ) { 00762 result = maybeDotAtom; 00763 return true; 00764 } 00765 } 00766 return false; 00767 } 00768 00769 bool parseObsRoute( const char* & scursor, const char* const send, 00770 TQStringList & result, bool isCRLF, bool save ) { 00771 while ( scursor != send ) { 00772 eatCFWS( scursor, send, isCRLF ); 00773 if ( scursor == send ) return false; 00774 00775 // empty entry: 00776 if ( *scursor == ',' ) { 00777 scursor++; 00778 if ( save ) result.append( TQString() ); 00779 continue; 00780 } 00781 00782 // empty entry ending the list: 00783 if ( *scursor == ':' ) { 00784 scursor++; 00785 if ( save ) result.append( TQString() ); 00786 return true; 00787 } 00788 00789 // each non-empty entry must begin with '@': 00790 if ( *scursor != '@' ) 00791 return false; 00792 else 00793 scursor++; 00794 00795 TQString maybeDomain; 00796 if ( !parseDomain( scursor, send, maybeDomain, isCRLF ) ) return false; 00797 if ( save ) result.append( maybeDomain ); 00798 00799 // eat the following (optional) comma: 00800 eatCFWS( scursor, send, isCRLF ); 00801 if ( scursor == send ) return false; 00802 if ( *scursor == ':' ) { scursor++; return true; } 00803 if ( *scursor == ',' ) scursor++; 00804 00805 } 00806 00807 return false; 00808 } 00809 00810 bool parseAddrSpec( const char* & scursor, const char * const send, 00811 AddrSpec & result, bool isCRLF ) { 00812 // 00813 // STEP 1: 00814 // local-part := dot-atom / quoted-string / word *("." word) 00815 // 00816 // this is equivalent to: 00817 // local-part := word *("." word) 00818 00819 TQString maybeLocalPart; 00820 TQString tmp; 00821 00822 while ( scursor != send ) { 00823 // first, eat any whitespace 00824 eatCFWS( scursor, send, isCRLF ); 00825 00826 char ch = *scursor++; 00827 switch ( ch ) { 00828 case '.': // dot 00829 maybeLocalPart += TQChar('.'); 00830 break; 00831 00832 case '@': 00833 goto SAW_AT_SIGN; 00834 break; 00835 00836 case '"': // quoted-string 00837 tmp = TQString(); 00838 if ( parseGenericQuotedString( scursor, send, tmp, isCRLF, '"', '"' ) ) 00839 maybeLocalPart += tmp; 00840 else 00841 return false; 00842 break; 00843 00844 default: // atom 00845 scursor--; // re-set scursor to point to ch again 00846 tmp = TQString(); 00847 if ( parseAtom( scursor, send, tmp, false /* no 8bit */ ) ) 00848 maybeLocalPart += tmp; 00849 else 00850 return false; // parseAtom can only fail if the first char is non-atext. 00851 break; 00852 } 00853 } 00854 00855 return false; 00856 00857 00858 // 00859 // STEP 2: 00860 // domain 00861 // 00862 00863 SAW_AT_SIGN: 00864 00865 assert( *(scursor-1) == '@' ); 00866 00867 TQString maybeDomain; 00868 if ( !parseDomain( scursor, send, maybeDomain, isCRLF ) ) 00869 return false; 00870 00871 result.localPart = maybeLocalPart; 00872 result.domain = maybeDomain; 00873 00874 return true; 00875 } 00876 00877 00878 bool parseAngleAddr( const char* & scursor, const char * const send, 00879 AddrSpec & result, bool isCRLF ) { 00880 // first, we need an opening angle bracket: 00881 eatCFWS( scursor, send, isCRLF ); 00882 if ( scursor == send || *scursor != '<' ) return false; 00883 scursor++; // eat '<' 00884 00885 eatCFWS( scursor, send, isCRLF ); 00886 if ( scursor == send ) return false; 00887 00888 if ( *scursor == '@' || *scursor == ',' ) { 00889 // obs-route: parse, but ignore: 00890 KMIME_WARN << "obsolete source route found! ignoring." << endl; 00891 TQStringList dummy; 00892 if ( !parseObsRoute( scursor, send, dummy, 00893 isCRLF, false /* don't save */ ) ) 00894 return false; 00895 // angle-addr isn't complete until after the '>': 00896 if ( scursor == send ) return false; 00897 } 00898 00899 // parse addr-spec: 00900 AddrSpec maybeAddrSpec; 00901 if ( !parseAddrSpec( scursor, send, maybeAddrSpec, isCRLF ) ) return false; 00902 00903 eatCFWS( scursor, send, isCRLF ); 00904 if ( scursor == send || *scursor != '>' ) return false; 00905 scursor++; 00906 00907 result = maybeAddrSpec; 00908 return true; 00909 00910 } 00911 00912 bool parseMailbox( const char* & scursor, const char * const send, 00913 Mailbox & result, bool isCRLF ) { 00914 00915 // rfc: 00916 // mailbox := addr-spec / ([ display-name ] angle-addr) 00917 // us: 00918 // mailbox := addr-spec / ([ display-name ] angle-addr) 00919 // / (angle-addr "(" display-name ")") 00920 00921 eatCFWS( scursor, send, isCRLF ); 00922 if ( scursor == send ) return false; 00923 00924 AddrSpec maybeAddrSpec; 00925 00926 // first, try if it's a vanilla addr-spec: 00927 const char * oldscursor = scursor; 00928 if ( parseAddrSpec( scursor, send, maybeAddrSpec, isCRLF ) ) { 00929 result.displayName = TQString(); 00930 result.addrSpec = maybeAddrSpec; 00931 return true; 00932 } 00933 scursor = oldscursor; 00934 00935 // second, see if there's a display-name: 00936 TQString maybeDisplayName; 00937 if ( !parsePhrase( scursor, send, maybeDisplayName, isCRLF ) ) { 00938 // failed: reset cursor, note absent display-name 00939 maybeDisplayName = TQString(); 00940 scursor = oldscursor; 00941 } else { 00942 // succeeded: eat CFWS 00943 eatCFWS( scursor, send, isCRLF ); 00944 if ( scursor == send ) return false; 00945 } 00946 00947 // third, parse the angle-addr: 00948 if ( !parseAngleAddr( scursor, send, maybeAddrSpec, isCRLF ) ) 00949 return false; 00950 00951 if ( maybeDisplayName.isNull() ) { 00952 // check for the obsolete form of display-name (as comment): 00953 eatWhiteSpace( scursor, send ); 00954 if ( scursor != send && *scursor == '(' ) { 00955 scursor++; 00956 if ( !parseComment( scursor, send, maybeDisplayName, isCRLF, true /*keep*/ ) ) 00957 return false; 00958 } 00959 } 00960 00961 result.displayName = maybeDisplayName; 00962 result.addrSpec = maybeAddrSpec; 00963 return true; 00964 } 00965 00966 bool parseGroup( const char* & scursor, const char * const send, 00967 Address & result, bool isCRLF ) { 00968 // group := display-name ":" [ mailbox-list / CFWS ] ";" [CFWS] 00969 // 00970 // equivalent to: 00971 // group := display-name ":" [ obs-mbox-list ] ";" 00972 00973 eatCFWS( scursor, send, isCRLF ); 00974 if ( scursor == send ) return false; 00975 00976 // get display-name: 00977 TQString maybeDisplayName; 00978 if ( !parsePhrase( scursor, send, maybeDisplayName, isCRLF ) ) 00979 return false; 00980 00981 // get ":": 00982 eatCFWS( scursor, send, isCRLF ); 00983 if ( scursor == send || *scursor != ':' ) return false; 00984 00985 result.displayName = maybeDisplayName; 00986 00987 // get obs-mbox-list (may contain empty entries): 00988 scursor++; 00989 while ( scursor != send ) { 00990 eatCFWS( scursor, send, isCRLF ); 00991 if ( scursor == send ) return false; 00992 00993 // empty entry: 00994 if ( *scursor == ',' ) { scursor++; continue; } 00995 00996 // empty entry ending the list: 00997 if ( *scursor == ';' ) { scursor++; return true; } 00998 00999 Mailbox maybeMailbox; 01000 if ( !parseMailbox( scursor, send, maybeMailbox, isCRLF ) ) 01001 return false; 01002 result.mailboxList.append( maybeMailbox ); 01003 01004 eatCFWS( scursor, send, isCRLF ); 01005 // premature end: 01006 if ( scursor == send ) return false; 01007 // regular end of the list: 01008 if ( *scursor == ';' ) { scursor++; return true; } 01009 // eat regular list entry separator: 01010 if ( *scursor == ',' ) scursor++; 01011 } 01012 return false; 01013 } 01014 01015 01016 bool parseAddress( const char* & scursor, const char * const send, 01017 Address & result, bool isCRLF ) { 01018 // address := mailbox / group 01019 01020 eatCFWS( scursor, send, isCRLF ); 01021 if ( scursor == send ) return false; 01022 01023 // first try if it's a single mailbox: 01024 Mailbox maybeMailbox; 01025 const char * oldscursor = scursor; 01026 if ( parseMailbox( scursor, send, maybeMailbox, isCRLF ) ) { 01027 // yes, it is: 01028 result.displayName = TQString(); 01029 result.mailboxList.append( maybeMailbox ); 01030 return true; 01031 } 01032 scursor = oldscursor; 01033 01034 Address maybeAddress; 01035 01036 // no, it's not a single mailbox. Try if it's a group: 01037 if ( !parseGroup( scursor, send, maybeAddress, isCRLF ) ) 01038 return false; 01039 01040 result = maybeAddress; 01041 return true; 01042 } 01043 01044 bool parseAddressList( const char* & scursor, const char * const send, 01045 AddressList & result, bool isCRLF ) { 01046 while ( scursor != send ) { 01047 eatCFWS( scursor, send, isCRLF ); 01048 // end of header: this is OK. 01049 if ( scursor == send ) return true; 01050 // empty entry: ignore: 01051 if ( *scursor == ',' ) { scursor++; continue; } 01052 01053 // parse one entry 01054 Address maybeAddress; 01055 if ( !parseAddress( scursor, send, maybeAddress, isCRLF ) ) return false; 01056 result.append( maybeAddress ); 01057 01058 eatCFWS( scursor, send, isCRLF ); 01059 // end of header: this is OK. 01060 if ( scursor == send ) return true; 01061 // comma separating entries: eat it. 01062 if ( *scursor == ',' ) scursor++; 01063 } 01064 return true; 01065 } 01066 01067 01068 static TQString asterisk = TQString::fromLatin1("*0*",1); 01069 static TQString asteriskZero = TQString::fromLatin1("*0*",2); 01070 //static TQString asteriskZeroAsterisk = TQString::fromLatin1("*0*",3); 01071 01072 bool parseParameter( const char* & scursor, const char * const send, 01073 TQPair<TQString,TQStringOrTQPair> & result, bool isCRLF ) { 01074 // parameter = regular-parameter / extended-parameter 01075 // regular-parameter = regular-parameter-name "=" value 01076 // extended-parameter = 01077 // value = token / quoted-string 01078 // 01079 // note that rfc2231 handling is out of the scope of this function. 01080 // Therefore we return the attribute as TQString and the value as 01081 // (start,length) tupel if we see that the value is encoded 01082 // (trailing asterisk), for parseParameterList to decode... 01083 01084 eatCFWS( scursor, send, isCRLF ); 01085 if ( scursor == send ) return false; 01086 01087 // 01088 // parse the parameter name: 01089 // 01090 TQString maybeAttribute; 01091 if ( !parseToken( scursor, send, maybeAttribute, false /* no 8bit */ ) ) 01092 return false; 01093 01094 eatCFWS( scursor, send, isCRLF ); 01095 // premature end: not OK (haven't seen '=' yet). 01096 if ( scursor == send || *scursor != '=' ) return false; 01097 scursor++; // eat '=' 01098 01099 eatCFWS( scursor, send, isCRLF ); 01100 if ( scursor == send ) { 01101 // don't choke on attribute=, meaning the value was omitted: 01102 if ( maybeAttribute.endsWith( asterisk ) ) { 01103 KMIME_WARN << "attribute ends with \"*\", but value is empty! " 01104 "Chopping away \"*\"." << endl; 01105 maybeAttribute.truncate( maybeAttribute.length() - 1 ); 01106 } 01107 result = tqMakePair( maybeAttribute.lower(), TQStringOrTQPair() ); 01108 return true; 01109 } 01110 01111 const char * oldscursor = scursor; 01112 01113 // 01114 // parse the parameter value: 01115 // 01116 TQStringOrTQPair maybeValue; 01117 if ( *scursor == '"' ) { 01118 // value is a quoted-string: 01119 scursor++; 01120 if ( maybeAttribute.endsWith( asterisk ) ) { 01121 // attributes ending with "*" designate extended-parameters, 01122 // which cannot have quoted-strings as values. So we remove the 01123 // trailing "*" to not confuse upper layers. 01124 KMIME_WARN << "attribute ends with \"*\", but value is a quoted-string! " 01125 "Chopping away \"*\"." << endl; 01126 maybeAttribute.truncate( maybeAttribute.length() - 1 ); 01127 } 01128 01129 if ( !parseGenericQuotedString( scursor, send, maybeValue.qstring, isCRLF ) ) { 01130 scursor = oldscursor; 01131 result = tqMakePair( maybeAttribute.lower(), TQStringOrTQPair() ); 01132 return false; // this case needs further processing by upper layers!! 01133 } 01134 } else { 01135 // value is a token: 01136 if ( !parseToken( scursor, send, maybeValue.qpair, false /* no 8bit */ ) ) { 01137 scursor = oldscursor; 01138 result = tqMakePair( maybeAttribute.lower(), TQStringOrTQPair() ); 01139 return false; // this case needs further processing by upper layers!! 01140 } 01141 } 01142 01143 result = tqMakePair( maybeAttribute.lower(), maybeValue ); 01144 return true; 01145 } 01146 01147 01148 01149 bool parseRawParameterList( const char* & scursor, const char * const send, 01150 TQMap<TQString,TQStringOrTQPair> & result, 01151 bool isCRLF ) { 01152 // we use parseParameter() consecutively to obtain a map of raw 01153 // attributes to raw values. "Raw" here means that we don't do 01154 // rfc2231 decoding and concatenation. This is left to 01155 // parseParameterList(), which will call this function. 01156 // 01157 // The main reason for making this chunk of code a separate 01158 // (private) method is that we can deal with broken parameters 01159 // _here_ and leave the rfc2231 handling solely to 01160 // parseParameterList(), which will still be enough work. 01161 01162 while ( scursor != send ) { 01163 eatCFWS( scursor, send, isCRLF ); 01164 // empty entry ending the list: OK. 01165 if ( scursor == send ) return true; 01166 // empty list entry: ignore. 01167 if ( *scursor == ';' ) { scursor++; continue; } 01168 01169 TQPair<TQString,TQStringOrTQPair> maybeParameter; 01170 if ( !parseParameter( scursor, send, maybeParameter, isCRLF ) ) { 01171 // we need to do a bit of work if the attribute is not 01172 // NULL. These are the cases marked with "needs further 01173 // processing" in parseParameter(). Specifically, parsing of the 01174 // token or the quoted-string, which should represent the value, 01175 // failed. We take the easy way out and simply search for the 01176 // next ';' to start parsing again. (Another option would be to 01177 // take the text between '=' and ';' as value) 01178 if ( maybeParameter.first.isNull() ) return false; 01179 while ( scursor != send ) { 01180 if ( *scursor++ == ';' ) goto IS_SEMICOLON; 01181 } 01182 // scursor == send case: end of list. 01183 return true; 01184 IS_SEMICOLON: 01185 // *scursor == ';' case: parse next entry. 01186 continue; 01187 } 01188 // successful parsing brings us here: 01189 result.insert( maybeParameter.first, maybeParameter.second ); 01190 01191 eatCFWS( scursor, send, isCRLF ); 01192 // end of header: ends list. 01193 if ( scursor == send ) return true; 01194 // regular separator: eat it. 01195 if ( *scursor == ';' ) scursor++; 01196 } 01197 return true; 01198 } 01199 01200 01201 static void decodeRFC2231Value( Codec* & rfc2231Codec, 01202 TQTextCodec* & textcodec, 01203 bool isContinuation, TQString & value, 01204 TQPair<const char*,int> & source ) { 01205 01206 // 01207 // parse the raw value into (charset,language,text): 01208 // 01209 01210 const char * decBegin = source.first; 01211 const char * decCursor = decBegin; 01212 const char * decEnd = decCursor + source.second; 01213 01214 if ( !isContinuation ) { 01215 // find the first single quote 01216 while ( decCursor != decEnd ) { 01217 if ( *decCursor == '\'' ) break; 01218 else decCursor++; 01219 } 01220 01221 if ( decCursor == decEnd ) { 01222 // there wasn't a single single quote at all! 01223 // take the whole value to be in latin-1: 01224 KMIME_WARN << "No charset in extended-initial-value. " 01225 "Assuming \"iso-8859-1\"." << endl; 01226 value += TQString::fromLatin1( decBegin, source.second ); 01227 return; 01228 } 01229 01230 TQCString charset( decBegin, decCursor - decBegin + 1 ); 01231 01232 const char * oldDecCursor = ++decCursor; 01233 // find the second single quote (we ignore the language tag): 01234 while ( decCursor != decEnd ) { 01235 if ( *decCursor == '\'' ) break; 01236 else decCursor++; 01237 } 01238 if ( decCursor == decEnd ) { 01239 KMIME_WARN << "No language in extended-initial-value. " 01240 "Trying to recover." << endl; 01241 decCursor = oldDecCursor; 01242 } else 01243 decCursor++; 01244 01245 // decCursor now points to the start of the 01246 // "extended-other-values": 01247 01248 // 01249 // get the decoders: 01250 // 01251 01252 bool matchOK = false; 01253 textcodec = TDEGlobal::charsets()->codecForName( charset, matchOK ); 01254 if ( !matchOK ) { 01255 textcodec = 0; 01256 KMIME_WARN_UNKNOWN(Charset,charset); 01257 } 01258 } 01259 01260 if ( !rfc2231Codec ) { 01261 rfc2231Codec = Codec::codecForName("x-kmime-rfc2231"); 01262 assert( rfc2231Codec ); 01263 } 01264 01265 if ( !textcodec ) { 01266 value += TQString::fromLatin1( decCursor, decEnd - decCursor ); 01267 return; 01268 } 01269 01270 Decoder * dec = rfc2231Codec->makeDecoder(); 01271 assert( dec ); 01272 01273 // 01274 // do the decoding: 01275 // 01276 01277 TQByteArray buffer( rfc2231Codec->maxDecodedSizeFor( decEnd - decCursor ) ); 01278 TQByteArray::Iterator bit = buffer.begin(); 01279 TQByteArray::ConstIterator bend = buffer.end(); 01280 01281 if ( !dec->decode( decCursor, decEnd, bit, bend ) ) 01282 KMIME_WARN << rfc2231Codec->name() 01283 << " codec lies about it's maxDecodedSizeFor()\n" 01284 "result may be truncated" << endl; 01285 01286 value += textcodec->toUnicode( buffer.begin(), bit - buffer.begin() ); 01287 01288 kdDebug() << "value now: \"" << value << "\"" << endl; 01289 // cleanup: 01290 delete dec; 01291 } 01292 01293 // known issues: 01294 // - permutes rfc2231 continuations when the total number of parts 01295 // exceeds 10 (other-sections then becomes *xy, ie. two digits) 01296 01297 bool parseParameterList( const char* & scursor, const char * const send, 01298 TQMap<TQString,TQString> & result, bool isCRLF ) { 01299 // parse the list into raw attribute-value pairs: 01300 TQMap<TQString,TQStringOrTQPair> rawParameterList; 01301 if (!parseRawParameterList( scursor, send, rawParameterList, isCRLF ) ) 01302 return false; 01303 01304 if ( rawParameterList.isEmpty() ) return true; 01305 01306 // decode rfc 2231 continuations and alternate charset encoding: 01307 01308 // NOTE: this code assumes that what TQMapIterator delivers is sorted 01309 // by the key! 01310 01311 Codec * rfc2231Codec = 0; 01312 TQTextCodec * textcodec = 0; 01313 TQString attribute; 01314 TQString value; 01315 enum Modes { NoMode = 0x0, Continued = 0x1, Encoded = 0x2 } mode; 01316 01317 TQMapIterator<TQString,TQStringOrTQPair> it, end = rawParameterList.end(); 01318 01319 for ( it = rawParameterList.begin() ; it != end ; ++it ) { 01320 if ( attribute.isNull() || !it.key().startsWith( attribute ) ) { 01321 // 01322 // new attribute: 01323 // 01324 01325 // store the last attribute/value pair in the result map now: 01326 if ( !attribute.isNull() ) result.insert( attribute, value ); 01327 // and extract the information from the new raw attribute: 01328 value = TQString(); 01329 attribute = it.key(); 01330 mode = NoMode; 01331 // is the value encoded? 01332 if ( attribute.endsWith( asterisk ) ) { 01333 attribute.truncate( attribute.length() - 1 ); 01334 mode = (Modes) ((int) mode | Encoded); 01335 } 01336 // is the value continued? 01337 if ( attribute.endsWith( asteriskZero ) ) { 01338 attribute.truncate( attribute.length() - 2 ); 01339 mode = (Modes) ((int) mode | Continued); 01340 } 01341 // 01342 // decode if necessary: 01343 // 01344 if ( mode & Encoded ) { 01345 decodeRFC2231Value( rfc2231Codec, textcodec, 01346 false, /* isn't continuation */ 01347 value, (*it).qpair ); 01348 } else { 01349 // not encoded. 01350 if ( (*it).qpair.first ) 01351 value += TQString::fromLatin1( (*it).qpair.first, (*it).qpair.second ); 01352 else 01353 value += (*it).qstring; 01354 } 01355 01356 // 01357 // shortcut-processing when the value isn't encoded: 01358 // 01359 01360 if ( !(mode & Continued) ) { 01361 // save result already: 01362 result.insert( attribute, value ); 01363 // force begin of a new attribute: 01364 attribute = TQString(); 01365 } 01366 } else /* it.key().startsWith( attribute ) */ { 01367 // 01368 // continuation 01369 // 01370 01371 // ignore the section and trust TQMap to have sorted the keys: 01372 if ( it.key().endsWith( asterisk ) ) { 01373 // encoded 01374 decodeRFC2231Value( rfc2231Codec, textcodec, 01375 true, /* is continuation */ 01376 value, (*it).qpair ); 01377 } else { 01378 // not encoded 01379 if ( (*it).qpair.first ) 01380 value += TQString::fromLatin1( (*it).qpair.first, (*it).qpair.second ); 01381 else 01382 value += (*it).qstring; 01383 } 01384 } 01385 } 01386 01387 // write last attr/value pair: 01388 if ( !attribute.isNull() ) 01389 result.insert( attribute, value ); 01390 01391 return true; 01392 } 01393 01394 static const char * stdDayNames[] = { 01395 "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" 01396 }; 01397 static const int stdDayNamesLen = sizeof stdDayNames / sizeof *stdDayNames; 01398 01399 static bool parseDayName( const char* & scursor, const char * const send ) 01400 { 01401 // check bounds: 01402 if ( send - scursor < 3 ) return false; 01403 01404 for ( int i = 0 ; i < stdDayNamesLen ; ++i ) 01405 if ( tqstrnicmp( scursor, stdDayNames[i], 3 ) == 0 ) { 01406 scursor += 3; 01407 kdDebug() << "found " << stdDayNames[i] << endl; 01408 return true; 01409 } 01410 01411 return false; 01412 } 01413 01414 01415 static const char * stdMonthNames[] = { 01416 "Jan", "Feb", "Mar", "Apr", "May", "Jun", 01417 "Jul", "Aug", "Sep", "Oct", "Nov", "Dez" 01418 }; 01419 static const int stdMonthNamesLen = 01420 sizeof stdMonthNames / sizeof *stdMonthNames; 01421 01422 static bool parseMonthName( const char* & scursor, const char * const send, 01423 int & result ) 01424 { 01425 // check bounds: 01426 if ( send - scursor < 3 ) return false; 01427 01428 for ( result = 0 ; result < stdMonthNamesLen ; ++result ) 01429 if ( tqstrnicmp( scursor, stdMonthNames[result], 3 ) == 0 ) { 01430 scursor += 3; 01431 return true; 01432 } 01433 01434 // not found: 01435 return false; 01436 } 01437 01438 static const struct { 01439 const char * tzName; 01440 long int secsEastOfGMT; 01441 } timeZones[] = { 01442 // rfc 822 timezones: 01443 { "GMT", 0 }, 01444 { "UT", 0 }, 01445 { "EDT", -4*3600 }, 01446 { "EST", -5*3600 }, 01447 { "MST", -5*3600 }, 01448 { "CST", -6*3600 }, 01449 { "MDT", -6*3600 }, 01450 { "MST", -7*3600 }, 01451 { "PDT", -7*3600 }, 01452 { "PST", -8*3600 }, 01453 // common, non-rfc-822 zones: 01454 { "CET", 1*3600 }, 01455 { "MET", 1*3600 }, 01456 { "UTC", 0 }, 01457 { "CEST", 2*3600 }, 01458 { "BST", 1*3600 }, 01459 // rfc 822 military timezones: 01460 { "Z", 0 }, 01461 { "A", -1*3600 }, 01462 { "B", -2*3600 }, 01463 { "C", -3*3600 }, 01464 { "D", -4*3600 }, 01465 { "E", -5*3600 }, 01466 { "F", -6*3600 }, 01467 { "G", -7*3600 }, 01468 { "H", -8*3600 }, 01469 { "I", -9*3600 }, 01470 // J is not used! 01471 { "K", -10*3600 }, 01472 { "L", -11*3600 }, 01473 { "M", -12*3600 }, 01474 { "N", 1*3600 }, 01475 { "O", 2*3600 }, 01476 { "P", 3*3600 }, 01477 { "Q", 4*3600 }, 01478 { "R", 5*3600 }, 01479 { "S", 6*3600 }, 01480 { "T", 7*3600 }, 01481 { "U", 8*3600 }, 01482 { "V", 9*3600 }, 01483 { "W", 10*3600 }, 01484 { "X", 11*3600 }, 01485 { "Y", 12*3600 }, 01486 }; 01487 static const int timeZonesLen = sizeof timeZones / sizeof *timeZones; 01488 01489 static bool parseAlphaNumericTimeZone( const char* & scursor, 01490 const char * const send, 01491 long int & secsEastOfGMT, 01492 bool & timeZoneKnown ) 01493 { 01494 TQPair<const char*,int> maybeTimeZone(0,0); 01495 if ( !parseToken( scursor, send, maybeTimeZone, false /*no 8bit*/ ) ) 01496 return false; 01497 for ( int i = 0 ; i < timeZonesLen ; ++i ) 01498 if ( tqstrnicmp( timeZones[i].tzName, 01499 maybeTimeZone.first, maybeTimeZone.second ) == 0 ) { 01500 scursor += maybeTimeZone.second; 01501 secsEastOfGMT = timeZones[i].secsEastOfGMT; 01502 timeZoneKnown = true; 01503 return true; 01504 } 01505 01506 // don't choke just because we don't happen to know the time zone 01507 KMIME_WARN_UNKNOWN(time zone,TQCString( maybeTimeZone.first, maybeTimeZone.second+1 )); 01508 secsEastOfGMT = 0; 01509 timeZoneKnown = false; 01510 return true; 01511 } 01512 01513 // parse a number and return the number of digits parsed: 01514 static int parseDigits( const char* & scursor, const char * const send, 01515 int & result ) 01516 { 01517 result = 0; 01518 int digits = 0; 01519 for ( ; scursor != send && isdigit( *scursor ) ; scursor++, digits++ ) { 01520 result *= 10; 01521 result += int( *scursor - '0' ); 01522 } 01523 return digits; 01524 } 01525 01526 static bool parseTimeOfDay( const char* & scursor, const char * const send, 01527 int & hour, int & min, int & sec, bool isCRLF=false ) 01528 { 01529 // time-of-day := 2DIGIT [CFWS] ":" [CFWS] 2DIGIT [ [CFWS] ":" 2DIGIT ] 01530 01531 // 01532 // 2DIGIT representing "hour": 01533 // 01534 if ( !parseDigits( scursor, send, hour ) ) return false; 01535 01536 eatCFWS( scursor, send, isCRLF ); 01537 if ( scursor == send || *scursor != ':' ) return false; 01538 scursor++; // eat ':' 01539 01540 eatCFWS( scursor, send, isCRLF ); 01541 if ( scursor == send ) return false; 01542 01543 // 01544 // 2DIGIT representing "minute": 01545 // 01546 if ( !parseDigits( scursor, send, min ) ) return false; 01547 01548 eatCFWS( scursor, send, isCRLF ); 01549 if ( scursor == send ) return true; // seconds are optional 01550 01551 // 01552 // let's see if we have a 2DIGIT representing "second": 01553 // 01554 if ( *scursor == ':' ) { 01555 // yepp, there are seconds: 01556 scursor++; // eat ':' 01557 eatCFWS( scursor, send, isCRLF ); 01558 if ( scursor == send ) return false; 01559 01560 if ( !parseDigits( scursor, send, sec ) ) return false; 01561 } else { 01562 sec = 0; 01563 } 01564 01565 return true; 01566 } 01567 01568 01569 bool parseTime( const char* & scursor, const char * send, 01570 int & hour, int & min, int & sec, long int & secsEastOfGMT, 01571 bool & timeZoneKnown, bool isCRLF ) 01572 { 01573 // time := time-of-day CFWS ( zone / obs-zone ) 01574 // 01575 // obs-zone := "UT" / "GMT" / 01576 // "EST" / "EDT" / ; -0500 / -0400 01577 // "CST" / "CDT" / ; -0600 / -0500 01578 // "MST" / "MDT" / ; -0700 / -0600 01579 // "PST" / "PDT" / ; -0800 / -0700 01580 // "A"-"I" / "a"-"i" / 01581 // "K"-"Z" / "k"-"z" 01582 01583 eatCFWS( scursor, send, isCRLF ); 01584 if ( scursor == send ) return false; 01585 01586 if ( !parseTimeOfDay( scursor, send, hour, min, sec, isCRLF ) ) 01587 return false; 01588 01589 eatCFWS( scursor, send, isCRLF ); 01590 if ( scursor == send ) { 01591 timeZoneKnown = false; 01592 secsEastOfGMT = 0; 01593 return true; // allow missing timezone 01594 } 01595 01596 timeZoneKnown = true; 01597 if ( *scursor == '+' || *scursor == '-' ) { 01598 // remember and eat '-'/'+': 01599 const char sign = *scursor++; 01600 // numerical timezone: 01601 int maybeTimeZone; 01602 if ( parseDigits( scursor, send, maybeTimeZone ) != 4 ) return false; 01603 secsEastOfGMT = 60 * ( maybeTimeZone / 100 * 60 + maybeTimeZone % 100 ); 01604 if ( sign == '-' ) { 01605 secsEastOfGMT *= -1; 01606 if ( secsEastOfGMT == 0 ) 01607 timeZoneKnown = false; // -0000 means indetermined tz 01608 } 01609 } else { 01610 // maybe alphanumeric timezone: 01611 if ( !parseAlphaNumericTimeZone( scursor, send, secsEastOfGMT, timeZoneKnown ) ) 01612 return false; 01613 } 01614 return true; 01615 } 01616 01617 01618 bool parseDateTime( const char* & scursor, const char * const send, 01619 Types::DateTime & result, bool isCRLF ) 01620 { 01621 // Parsing date-time; strict mode: 01622 // 01623 // date-time := [ [CFWS] day-name [CFWS] "," ] ; wday 01624 // (expanded) [CFWS] 1*2DIGIT CFWS month-name CFWS 2*DIGIT [CFWS] ; date 01625 // time 01626 // 01627 // day-name := "Mon" / "Tue" / "Wed" / "Thu" / "Fri" / "Sat" / "Sun" 01628 // month-name := "Jan" / "Feb" / "Mar" / "Apr" / "May" / "Jun" / 01629 // "Jul" / "Aug" / "Sep" / "Oct" / "Nov" / "Dez" 01630 01631 struct tm maybeDateTime = { 01632 #ifdef HAVE_TM_GMTOFF 01633 0, 0, // initializers for members tm_gmtoff and tm_zone 01634 #endif 01635 0, 0, 0, 0, 0, 0, 0, 0, 0 01636 }; 01637 01638 eatCFWS( scursor, send, isCRLF ); 01639 if ( scursor == send ) return false; 01640 01641 // 01642 // let's see if there's a day-of-week: 01643 // 01644 if ( parseDayName( scursor, send ) ) { 01645 eatCFWS( scursor, send, isCRLF ); 01646 if ( scursor == send ) return false; 01647 // day-name should be followed by ',' but we treat it as optional: 01648 if ( *scursor == ',' ) { 01649 scursor++; // eat ',' 01650 eatCFWS( scursor, send, isCRLF ); 01651 } 01652 } 01653 01654 // 01655 // 1*2DIGIT representing "day" (of month): 01656 // 01657 int maybeDay; 01658 if ( !parseDigits( scursor, send, maybeDay ) ) return false; 01659 01660 eatCFWS( scursor, send, isCRLF ); 01661 if ( scursor == send ) return false; 01662 01663 // success: store maybeDay in maybeDateTime: 01664 maybeDateTime.tm_mday = maybeDay; 01665 01666 // 01667 // month-name: 01668 // 01669 int maybeMonth = 0; 01670 if ( !parseMonthName( scursor, send, maybeMonth ) ) return false; 01671 if ( scursor == send ) return false; 01672 assert( maybeMonth >= 0 ); assert( maybeMonth <= 11 ); 01673 01674 eatCFWS( scursor, send, isCRLF ); 01675 if ( scursor == send ) return false; 01676 01677 // success: store maybeMonth in maybeDateTime: 01678 maybeDateTime.tm_mon = maybeMonth; 01679 01680 // 01681 // 2*DIGIT representing "year": 01682 // 01683 int maybeYear; 01684 if ( !parseDigits( scursor, send, maybeYear ) ) return false; 01685 // RFC 2822 4.3 processing: 01686 if ( maybeYear < 50 ) 01687 maybeYear += 2000; 01688 else if ( maybeYear < 1000 ) 01689 maybeYear += 1900; 01690 // else keep as is 01691 if ( maybeYear < 1900 ) return false; // rfc2822, 3.3 01692 01693 eatCFWS( scursor, send, isCRLF ); 01694 if ( scursor == send ) return false; 01695 01696 // success: store maybeYear in maybeDateTime: 01697 maybeDateTime.tm_year = maybeYear - 1900; 01698 01699 // 01700 // time 01701 // 01702 int maybeHour, maybeMinute, maybeSecond; 01703 long int secsEastOfGMT; 01704 bool timeZoneKnown = true; 01705 01706 if ( !parseTime( scursor, send, 01707 maybeHour, maybeMinute, maybeSecond, 01708 secsEastOfGMT, timeZoneKnown, isCRLF ) ) 01709 return false; 01710 01711 // success: store everything in maybeDateTime: 01712 maybeDateTime.tm_hour = maybeHour; 01713 maybeDateTime.tm_min = maybeMinute; 01714 maybeDateTime.tm_sec = maybeSecond; 01715 maybeDateTime.tm_isdst = DateFormatter::isDaylight(); 01716 // now put everything together and check if mktime(3) likes it: 01717 result.time = mktime( &maybeDateTime ); 01718 if ( result.time == (time_t)(-1) ) return false; 01719 01720 // adjust to UTC/GMT: 01721 //result.time -= secsEastOfGMT; 01722 result.secsEastOfGMT = secsEastOfGMT; 01723 result.timeZoneKnown = timeZoneKnown; 01724 01725 return true; 01726 } 01727 01728 #if 0 01729 bool tryToMakeAnySenseOfDateString( const char* & scursor, 01730 const char * const send, 01731 time_t & result, bool isCRLF ) 01732 { 01733 return false; 01734 } 01735 #endif 01736 01737 } // namespace HeaderParsing 01738 01739 } // namespace KMime