00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #include <config.h>
00033 #include "kmime_header_parsing.h"
00034
00035 #include "kmime_codecs.h"
00036 #include "kmime_util.h"
00037 #include "kmime_warning.h"
00038
00039 #include <kglobal.h>
00040 #include <kcharsets.h>
00041
00042 #include <tqtextcodec.h>
00043 #include <tqmap.h>
00044 #include <tqcstring.h>
00045 #include <tqstringlist.h>
00046
00047 #include <ctype.h>
00048 #include <cassert>
00049
00050 using namespace KMime;
00051 using namespace KMime::Types;
00052
00053 namespace KMime {
00054
00055 namespace Types {
00056
00057 TQString AddrSpec::asString() const {
00058 bool needsQuotes = false;
00059 TQString result;
00060 result.reserve( localPart.length() + domain.length() + 1 );
00061 for ( unsigned int i = 0 ; i < localPart.length() ; ++i ) {
00062 const char ch = localPart[i].latin1();
00063 if ( ch == '.' || isAText( ch ) )
00064 result += ch;
00065 else {
00066 needsQuotes = true;
00067 if ( ch == '\\' || ch == '"' )
00068 result += '\\';
00069 result += ch;
00070 }
00071 }
00072 if ( needsQuotes )
00073 return '"' + result + "\"@" + domain;
00074 else
00075 return result + '@' + domain;
00076 }
00077
00078 }
00079
00080 namespace HeaderParsing {
00081
00082
00083 bool parseEncodedWord( const char* & scursor, const char * const send,
00084 TQString & result, TQCString & language ) {
00085
00086
00087 assert( *(scursor-1) == '=' );
00088
00089
00090
00091
00092
00093
00094 char ch = *scursor++;
00095
00096 if ( ch != '?' ) {
00097 kdDebug() << "first" << endl;
00098 KMIME_WARN_PREMATURE_END_OF(EncodedWord);
00099 return false;
00100 }
00101
00102
00103
00104 const char * charsetStart = scursor;
00105 const char * languageStart = 0;
00106
00107
00108
00109 for ( ; scursor != send ; scursor++ )
00110 if ( *scursor == '?')
00111 break;
00112 else if ( *scursor == '*' && !languageStart )
00113 languageStart = scursor + 1;
00114
00115
00116 if ( scursor == send || *scursor != '?' ) {
00117 kdDebug() << "second" << endl;
00118 KMIME_WARN_PREMATURE_END_OF(EncodedWord);
00119 return false;
00120 }
00121
00122
00123
00124 TQCString maybeLanguage( languageStart, scursor - languageStart + 1 );
00125
00126
00127 TQCString maybeCharset( charsetStart, ( languageStart ? languageStart : scursor + 1 ) - charsetStart );
00128
00129
00130
00131
00132
00133
00134
00135
00136 scursor++;
00137 const char * encodingStart = scursor;
00138
00139
00140 for ( ; scursor != send ; scursor++ )
00141 if ( *scursor == '?' ) break;
00142
00143
00144 if ( scursor == send || *scursor != '?' ) {
00145 kdDebug() << "third" << endl;
00146 KMIME_WARN_PREMATURE_END_OF(EncodedWord);
00147 return false;
00148 }
00149
00150
00151 TQCString maybeEncoding( encodingStart, scursor - encodingStart + 1 );
00152
00153
00154 kdDebug() << "parseEncodedWord: found charset == \"" << maybeCharset
00155 << "\"; language == \"" << maybeLanguage
00156 << "\"; encoding == \"" << maybeEncoding << "\"" << endl;
00157
00158
00159
00160
00161
00162
00163
00164
00165 scursor++;
00166 const char * encodedTextStart = scursor;
00167
00168
00169 for ( ; scursor != send ; scursor++ )
00170 if ( *scursor == '?' ) break;
00171
00172
00173
00174 if ( scursor == send || *scursor != '?' ) {
00175 kdDebug() << "fourth" << endl;
00176 KMIME_WARN_PREMATURE_END_OF(EncodedWord);
00177 return false;
00178 }
00179 scursor++;
00180
00181 if ( scursor == send || *scursor != '=' ) {
00182 kdDebug() << "fifth" << endl;
00183 KMIME_WARN_PREMATURE_END_OF(EncodedWord);
00184 return false;
00185 }
00186 scursor++;
00187
00188
00189 const char * const encodedTextEnd = scursor - 2;
00190
00191
00192
00193
00194
00195
00196
00197
00198 Codec * codec = Codec::codecForName( maybeEncoding );
00199 if ( !codec ) {
00200 KMIME_WARN_UNKNOWN(Encoding,maybeEncoding);
00201 return false;
00202 }
00203
00204
00205 Decoder * dec = codec->makeDecoder();
00206 assert( dec );
00207
00208
00209 bool matchOK = false;
00210 TQTextCodec
00211 *textCodec = KGlobal::charsets()->codecForName( maybeCharset, matchOK );
00212
00213 if ( !matchOK || !textCodec ) {
00214 KMIME_WARN_UNKNOWN(Charset,maybeCharset);
00215 delete dec;
00216 return false;
00217 };
00218
00219 kdDebug() << "mimeName(): \"" << textCodec->mimeName() << "\"" << endl;
00220
00221
00222 int encodedTextLength = encodedTextEnd - encodedTextStart;
00223 TQByteArray buffer( codec->maxDecodedSizeFor( encodedTextLength ) );
00224 TQByteArray::Iterator bit = buffer.begin();
00225 TQByteArray::ConstIterator bend = buffer.end();
00226
00227
00228
00229
00230
00231
00232 if ( !dec->decode( encodedTextStart, encodedTextEnd, bit, bend ) )
00233 KMIME_WARN << codec->name() << " codec lies about it's maxDecodedSizeFor( "
00234 << encodedTextLength << " )\nresult may be truncated" << endl;
00235
00236 result = textCodec->toUnicode( buffer.begin(), bit - buffer.begin() );
00237
00238 kdDebug() << "result now: \"" << result << "\"" << endl;
00239
00240 delete dec;
00241 language = maybeLanguage;
00242
00243 return true;
00244 }
00245
00246 static inline void eatWhiteSpace( const char* & scursor, const char * const send ) {
00247 while ( scursor != send
00248 && ( *scursor == ' ' || *scursor == '\n' ||
00249 *scursor == '\t' || *scursor == '\r' ) )
00250 scursor++;
00251 }
00252
00253 bool parseAtom( const char * & scursor, const char * const send,
00254 TQString & result, bool allow8Bit )
00255 {
00256 TQPair<const char*,int> maybeResult;
00257
00258 if ( parseAtom( scursor, send, maybeResult, allow8Bit ) ) {
00259 result += TQString::fromLatin1( maybeResult.first, maybeResult.second );
00260 return true;
00261 }
00262
00263 return false;
00264 }
00265
00266 bool parseAtom( const char * & scursor, const char * const send,
00267 TQPair<const char*,int> & result, bool allow8Bit ) {
00268 bool success = false;
00269 const char * start = scursor;
00270
00271 while ( scursor != send ) {
00272 signed char ch = *scursor++;
00273 if ( ch > 0 && isAText(ch) ) {
00274
00275 success = true;
00276 } else if ( allow8Bit && ch < 0 ) {
00277
00278 KMIME_WARN_8BIT(ch);
00279 success = true;
00280 } else {
00281
00282
00283
00284 scursor--;
00285 break;
00286 }
00287 }
00288 result.first = start;
00289 result.second = scursor - start;
00290 return success;
00291 }
00292
00293 bool parseToken( const char * & scursor, const char * const send,
00294 TQString & result, bool allow8Bit )
00295 {
00296 TQPair<const char*,int> maybeResult;
00297
00298 if ( parseToken( scursor, send, maybeResult, allow8Bit ) ) {
00299 result += TQString::fromLatin1( maybeResult.first, maybeResult.second );
00300 return true;
00301 }
00302
00303 return false;
00304 }
00305
00306 bool parseToken( const char * & scursor, const char * const send,
00307 TQPair<const char*,int> & result, bool allow8Bit )
00308 {
00309 bool success = false;
00310 const char * start = scursor;
00311
00312 while ( scursor != send ) {
00313 signed char ch = *scursor++;
00314 if ( ch > 0 && isTText(ch) ) {
00315
00316 success = true;
00317 } else if ( allow8Bit && ch < 0 ) {
00318
00319 KMIME_WARN_8BIT(ch);
00320 success = true;
00321 } else {
00322
00323
00324
00325 scursor--;
00326 break;
00327 }
00328 }
00329 result.first = start;
00330 result.second = scursor - start;
00331 return success;
00332 }
00333
00334 #define READ_ch_OR_FAIL if ( scursor == send ) { \
00335 KMIME_WARN_PREMATURE_END_OF(GenericQuotedString); \
00336 return false; \
00337 } else { \
00338 ch = *scursor++; \
00339 }
00340
00341
00342
00343
00344
00345 bool parseGenericQuotedString( const char* & scursor, const char * const send,
00346 TQString & result, bool isCRLF,
00347 const char openChar, const char closeChar )
00348 {
00349 char ch;
00350
00351
00352
00353
00354
00355
00356 assert( *(scursor-1) == openChar || *(scursor-1) == closeChar );
00357
00358 while ( scursor != send ) {
00359 ch = *scursor++;
00360
00361 if ( ch == closeChar || ch == openChar ) {
00362
00363
00364 return true;
00365 }
00366
00367 switch( ch ) {
00368 case '\\':
00369
00370 READ_ch_OR_FAIL;
00371 KMIME_WARN_IF_8BIT(ch);
00372 result += TQChar(ch);
00373 break;
00374 case '\r':
00375
00376
00377
00378
00379
00380
00381
00382 READ_ch_OR_FAIL;
00383 if ( ch != '\n' ) {
00384
00385 KMIME_WARN_LONE(CR);
00386 result += TQChar('\r');
00387 scursor--;
00388 } else {
00389
00390
00391 READ_ch_OR_FAIL;
00392 if ( ch == ' ' || ch == '\t' ) {
00393
00394
00395
00396 result += TQChar(ch);
00397 } else {
00398
00399
00400
00401 KMIME_WARN_NON_FOLDING(CRLF);
00402 result += "\r\n";
00403
00404
00405
00406 scursor--;
00407 }
00408 }
00409 break;
00410 case '\n':
00411
00412
00413
00414
00415
00416
00417
00418 READ_ch_OR_FAIL;
00419 if ( !isCRLF && ( ch == ' ' || ch == '\t' ) ) {
00420
00421
00422 result += TQChar(ch);
00423 } else {
00424
00425 KMIME_WARN_LONE(LF);
00426 result += TQChar('\n');
00427
00428
00429 scursor--;
00430 }
00431 break;
00432 default:
00433 KMIME_WARN_IF_8BIT(ch);
00434 result += TQChar(ch);
00435 }
00436 }
00437
00438 return false;
00439 }
00440
00441
00442
00443
00444
00445 bool parseComment( const char* & scursor, const char * const send,
00446 TQString & result, bool isCRLF, bool reallySave )
00447 {
00448 int commentNestingDepth = 1;
00449 const char * afterLastClosingParenPos = 0;
00450 TQString maybeCmnt;
00451 const char * oldscursor = scursor;
00452
00453 assert( *(scursor-1) == '(' );
00454
00455 while ( commentNestingDepth ) {
00456 TQString cmntPart;
00457 if ( parseGenericQuotedString( scursor, send, cmntPart, isCRLF, '(', ')' ) ) {
00458 assert( *(scursor-1) == ')' || *(scursor-1) == '(' );
00459
00460
00461 switch ( *(scursor-1) ) {
00462 case ')':
00463 if ( reallySave ) {
00464
00465 result += maybeCmnt;
00466 result += cmntPart;
00467 if ( commentNestingDepth > 1 )
00468 result += TQChar(')');
00469 maybeCmnt = TQString();
00470 }
00471 afterLastClosingParenPos = scursor;
00472 --commentNestingDepth;
00473 break;
00474 case '(':
00475 if ( reallySave ) {
00476
00477
00478 maybeCmnt += cmntPart;
00479 maybeCmnt += TQChar('(');
00480 }
00481 ++commentNestingDepth;
00482 break;
00483 default: assert( 0 );
00484 }
00485 } else {
00486
00487 if ( afterLastClosingParenPos )
00488 scursor = afterLastClosingParenPos;
00489 else
00490 scursor = oldscursor;
00491 return false;
00492 }
00493 }
00494
00495 return true;
00496 }
00497
00498
00499
00500
00501 bool parsePhrase( const char* & scursor, const char * const send,
00502 TQString & result, bool isCRLF )
00503 {
00504 enum { None, Phrase, Atom, EncodedWord, QuotedString } found = None;
00505 TQString tmp;
00506 TQCString lang;
00507 const char * successfullyParsed = 0;
00508
00509 const char * oldscursor;
00510
00511
00512 bool lastWasEncodedWord = false;
00513
00514 while ( scursor != send ) {
00515 char ch = *scursor++;
00516 switch ( ch ) {
00517 case '.':
00518 if ( found == None ) {
00519 --scursor;
00520 return false;
00521 } else {
00522 if ( scursor != send && ( *scursor == ' ' || *scursor == '\t' ) )
00523 result += ". ";
00524 else
00525 result += '.';
00526 successfullyParsed = scursor;
00527 }
00528 break;
00529 case '"':
00530 tmp = TQString();
00531 if ( parseGenericQuotedString( scursor, send, tmp, isCRLF, '"', '"' ) ) {
00532 successfullyParsed = scursor;
00533 assert( *(scursor-1) == '"' );
00534 switch ( found ) {
00535 case None:
00536 found = QuotedString;
00537 break;
00538 case Phrase:
00539 case Atom:
00540 case EncodedWord:
00541 case QuotedString:
00542 found = Phrase;
00543 result += TQChar(' ');
00544 break;
00545 default:
00546 assert( 0 );
00547 }
00548 lastWasEncodedWord = false;
00549 result += tmp;
00550 } else {
00551
00552
00553
00554 if ( found == None ) {
00555 return false;
00556 } else {
00557 result += TQChar(' ');
00558 result += tmp;
00559 return true;
00560 }
00561 }
00562 break;
00563 case '(':
00564
00565 tmp = TQString();
00566 if ( parseComment( scursor, send, tmp, isCRLF,
00567 false ) ) {
00568 successfullyParsed = scursor;
00569 lastWasEncodedWord = false;
00570 } else {
00571 if ( found == None )
00572 return false;
00573 else {
00574 scursor = successfullyParsed;
00575 return true;
00576 }
00577 }
00578 break;
00579 case '=':
00580 tmp = TQString();
00581 oldscursor = scursor;
00582 lang = 0;
00583 if ( parseEncodedWord( scursor, send, tmp, lang ) ) {
00584 successfullyParsed = scursor;
00585 switch ( found ) {
00586 case None:
00587 found = EncodedWord;
00588 break;
00589 case Phrase:
00590 case EncodedWord:
00591 case Atom:
00592 case QuotedString:
00593 if ( !lastWasEncodedWord )
00594 result += TQChar(' ');
00595 found = Phrase;
00596 break;
00597 default: assert( 0 );
00598 }
00599 lastWasEncodedWord = true;
00600 result += tmp;
00601 break;
00602 } else
00603
00604 scursor = oldscursor;
00605
00606
00607 default:
00608 tmp = TQString();
00609 scursor--;
00610 if ( parseAtom( scursor, send, tmp, true ) ) {
00611 successfullyParsed = scursor;
00612 switch ( found ) {
00613 case None:
00614 found = Atom;
00615 break;
00616 case Phrase:
00617 case Atom:
00618 case EncodedWord:
00619 case QuotedString:
00620 found = Phrase;
00621 result += TQChar(' ');
00622 break;
00623 default:
00624 assert( 0 );
00625 }
00626 lastWasEncodedWord = false;
00627 result += tmp;
00628 } else {
00629 if ( found == None )
00630 return false;
00631 else {
00632 scursor = successfullyParsed;
00633 return true;
00634 }
00635 }
00636 }
00637 eatWhiteSpace( scursor, send );
00638 }
00639
00640 return ( found != None );
00641 }
00642
00643
00644 bool parseDotAtom( const char* & scursor, const char * const send,
00645 TQString & result, bool isCRLF )
00646 {
00647
00648 const char * successfullyParsed;
00649
00650 TQString tmp;
00651 if ( !parseAtom( scursor, send, tmp, false ) )
00652 return false;
00653 result += tmp;
00654 successfullyParsed = scursor;
00655
00656 while ( scursor != send ) {
00657 eatCFWS( scursor, send, isCRLF );
00658
00659
00660 if ( scursor == send || *scursor != '.' ) return true;
00661 scursor++;
00662
00663 eatCFWS( scursor, send, isCRLF );
00664
00665 if ( scursor == send || !isAText( *scursor ) ) {
00666
00667
00668
00669 scursor = successfullyParsed;
00670 return true;
00671 }
00672
00673
00674 TQString maybeAtom;
00675 if ( !parseAtom( scursor, send, maybeAtom, false ) ) {
00676 scursor = successfullyParsed;
00677 return true;
00678 }
00679
00680 result += TQChar('.');
00681 result += maybeAtom;
00682 successfullyParsed = scursor;
00683 }
00684
00685 scursor = successfullyParsed;
00686 return true;
00687 }
00688
00689
00690 void eatCFWS( const char* & scursor, const char * const send, bool isCRLF ) {
00691 TQString dummy;
00692
00693 while ( scursor != send ) {
00694 const char * oldscursor = scursor;
00695
00696 char ch = *scursor++;
00697
00698 switch( ch ) {
00699 case ' ':
00700 case '\t':
00701 case '\r':
00702 case '\n':
00703 continue;
00704
00705 case '(':
00706 if ( parseComment( scursor, send, dummy, isCRLF, false ) )
00707 continue;
00708 scursor = oldscursor;
00709 return;
00710
00711 default:
00712 scursor = oldscursor;
00713 return;
00714 }
00715
00716 }
00717 }
00718
00719 bool parseDomain( const char* & scursor, const char * const send,
00720 TQString & result, bool isCRLF ) {
00721 eatCFWS( scursor, send, isCRLF );
00722 if ( scursor == send ) return false;
00723
00724
00725
00726
00727
00728
00729
00730 if ( *scursor == '[' ) {
00731
00732 TQString maybeDomainLiteral;
00733
00734 scursor++;
00735 while ( parseGenericQuotedString( scursor, send, maybeDomainLiteral,
00736 isCRLF, '[', ']' ) ) {
00737 if ( scursor == send ) {
00738
00739 if ( *(scursor-1) == ']' ) {
00740
00741 result = maybeDomainLiteral;
00742 return true;
00743 } else {
00744
00745 return false;
00746 }
00747 }
00748
00749
00750 if ( *(scursor-1) == '[' ) {
00751 maybeDomainLiteral += TQChar('[');
00752 continue;
00753 }
00754
00755 result = maybeDomainLiteral;
00756 return true;
00757 }
00758 } else {
00759
00760 TQString maybeDotAtom;
00761 if ( parseDotAtom( scursor, send, maybeDotAtom, isCRLF ) ) {
00762 result = maybeDotAtom;
00763 return true;
00764 }
00765 }
00766 return false;
00767 }
00768
00769 bool parseObsRoute( const char* & scursor, const char* const send,
00770 TQStringList & result, bool isCRLF, bool save ) {
00771 while ( scursor != send ) {
00772 eatCFWS( scursor, send, isCRLF );
00773 if ( scursor == send ) return false;
00774
00775
00776 if ( *scursor == ',' ) {
00777 scursor++;
00778 if ( save ) result.append( TQString() );
00779 continue;
00780 }
00781
00782
00783 if ( *scursor == ':' ) {
00784 scursor++;
00785 if ( save ) result.append( TQString() );
00786 return true;
00787 }
00788
00789
00790 if ( *scursor != '@' )
00791 return false;
00792 else
00793 scursor++;
00794
00795 TQString maybeDomain;
00796 if ( !parseDomain( scursor, send, maybeDomain, isCRLF ) ) return false;
00797 if ( save ) result.append( maybeDomain );
00798
00799
00800 eatCFWS( scursor, send, isCRLF );
00801 if ( scursor == send ) return false;
00802 if ( *scursor == ':' ) { scursor++; return true; }
00803 if ( *scursor == ',' ) scursor++;
00804
00805 }
00806
00807 return false;
00808 }
00809
00810 bool parseAddrSpec( const char* & scursor, const char * const send,
00811 AddrSpec & result, bool isCRLF ) {
00812
00813
00814
00815
00816
00817
00818
00819 TQString maybeLocalPart;
00820 TQString tmp;
00821
00822 while ( scursor != send ) {
00823
00824 eatCFWS( scursor, send, isCRLF );
00825
00826 char ch = *scursor++;
00827 switch ( ch ) {
00828 case '.':
00829 maybeLocalPart += TQChar('.');
00830 break;
00831
00832 case '@':
00833 goto SAW_AT_SIGN;
00834 break;
00835
00836 case '"':
00837 tmp = TQString();
00838 if ( parseGenericQuotedString( scursor, send, tmp, isCRLF, '"', '"' ) )
00839 maybeLocalPart += tmp;
00840 else
00841 return false;
00842 break;
00843
00844 default:
00845 scursor--;
00846 tmp = TQString();
00847 if ( parseAtom( scursor, send, tmp, false ) )
00848 maybeLocalPart += tmp;
00849 else
00850 return false;
00851 break;
00852 }
00853 }
00854
00855 return false;
00856
00857
00858
00859
00860
00861
00862
00863 SAW_AT_SIGN:
00864
00865 assert( *(scursor-1) == '@' );
00866
00867 TQString maybeDomain;
00868 if ( !parseDomain( scursor, send, maybeDomain, isCRLF ) )
00869 return false;
00870
00871 result.localPart = maybeLocalPart;
00872 result.domain = maybeDomain;
00873
00874 return true;
00875 }
00876
00877
00878 bool parseAngleAddr( const char* & scursor, const char * const send,
00879 AddrSpec & result, bool isCRLF ) {
00880
00881 eatCFWS( scursor, send, isCRLF );
00882 if ( scursor == send || *scursor != '<' ) return false;
00883 scursor++;
00884
00885 eatCFWS( scursor, send, isCRLF );
00886 if ( scursor == send ) return false;
00887
00888 if ( *scursor == '@' || *scursor == ',' ) {
00889
00890 KMIME_WARN << "obsolete source route found! ignoring." << endl;
00891 TQStringList dummy;
00892 if ( !parseObsRoute( scursor, send, dummy,
00893 isCRLF, false ) )
00894 return false;
00895
00896 if ( scursor == send ) return false;
00897 }
00898
00899
00900 AddrSpec maybeAddrSpec;
00901 if ( !parseAddrSpec( scursor, send, maybeAddrSpec, isCRLF ) ) return false;
00902
00903 eatCFWS( scursor, send, isCRLF );
00904 if ( scursor == send || *scursor != '>' ) return false;
00905 scursor++;
00906
00907 result = maybeAddrSpec;
00908 return true;
00909
00910 }
00911
00912 bool parseMailbox( const char* & scursor, const char * const send,
00913 Mailbox & result, bool isCRLF ) {
00914
00915
00916
00917
00918
00919
00920
00921 eatCFWS( scursor, send, isCRLF );
00922 if ( scursor == send ) return false;
00923
00924 AddrSpec maybeAddrSpec;
00925
00926
00927 const char * oldscursor = scursor;
00928 if ( parseAddrSpec( scursor, send, maybeAddrSpec, isCRLF ) ) {
00929 result.displayName = TQString();
00930 result.addrSpec = maybeAddrSpec;
00931 return true;
00932 }
00933 scursor = oldscursor;
00934
00935
00936 TQString maybeDisplayName;
00937 if ( !parsePhrase( scursor, send, maybeDisplayName, isCRLF ) ) {
00938
00939 maybeDisplayName = TQString();
00940 scursor = oldscursor;
00941 } else {
00942
00943 eatCFWS( scursor, send, isCRLF );
00944 if ( scursor == send ) return false;
00945 }
00946
00947
00948 if ( !parseAngleAddr( scursor, send, maybeAddrSpec, isCRLF ) )
00949 return false;
00950
00951 if ( maybeDisplayName.isNull() ) {
00952
00953 eatWhiteSpace( scursor, send );
00954 if ( scursor != send && *scursor == '(' ) {
00955 scursor++;
00956 if ( !parseComment( scursor, send, maybeDisplayName, isCRLF, true ) )
00957 return false;
00958 }
00959 }
00960
00961 result.displayName = maybeDisplayName;
00962 result.addrSpec = maybeAddrSpec;
00963 return true;
00964 }
00965
00966 bool parseGroup( const char* & scursor, const char * const send,
00967 Address & result, bool isCRLF ) {
00968
00969
00970
00971
00972
00973 eatCFWS( scursor, send, isCRLF );
00974 if ( scursor == send ) return false;
00975
00976
00977 TQString maybeDisplayName;
00978 if ( !parsePhrase( scursor, send, maybeDisplayName, isCRLF ) )
00979 return false;
00980
00981
00982 eatCFWS( scursor, send, isCRLF );
00983 if ( scursor == send || *scursor != ':' ) return false;
00984
00985 result.displayName = maybeDisplayName;
00986
00987
00988 scursor++;
00989 while ( scursor != send ) {
00990 eatCFWS( scursor, send, isCRLF );
00991 if ( scursor == send ) return false;
00992
00993
00994 if ( *scursor == ',' ) { scursor++; continue; }
00995
00996
00997 if ( *scursor == ';' ) { scursor++; return true; }
00998
00999 Mailbox maybeMailbox;
01000 if ( !parseMailbox( scursor, send, maybeMailbox, isCRLF ) )
01001 return false;
01002 result.mailboxList.append( maybeMailbox );
01003
01004 eatCFWS( scursor, send, isCRLF );
01005
01006 if ( scursor == send ) return false;
01007
01008 if ( *scursor == ';' ) { scursor++; return true; }
01009
01010 if ( *scursor == ',' ) scursor++;
01011 }
01012 return false;
01013 }
01014
01015
01016 bool parseAddress( const char* & scursor, const char * const send,
01017 Address & result, bool isCRLF ) {
01018
01019
01020 eatCFWS( scursor, send, isCRLF );
01021 if ( scursor == send ) return false;
01022
01023
01024 Mailbox maybeMailbox;
01025 const char * oldscursor = scursor;
01026 if ( parseMailbox( scursor, send, maybeMailbox, isCRLF ) ) {
01027
01028 result.displayName = TQString();
01029 result.mailboxList.append( maybeMailbox );
01030 return true;
01031 }
01032 scursor = oldscursor;
01033
01034 Address maybeAddress;
01035
01036
01037 if ( !parseGroup( scursor, send, maybeAddress, isCRLF ) )
01038 return false;
01039
01040 result = maybeAddress;
01041 return true;
01042 }
01043
01044 bool parseAddressList( const char* & scursor, const char * const send,
01045 AddressList & result, bool isCRLF ) {
01046 while ( scursor != send ) {
01047 eatCFWS( scursor, send, isCRLF );
01048
01049 if ( scursor == send ) return true;
01050
01051 if ( *scursor == ',' ) { scursor++; continue; }
01052
01053
01054 Address maybeAddress;
01055 if ( !parseAddress( scursor, send, maybeAddress, isCRLF ) ) return false;
01056 result.append( maybeAddress );
01057
01058 eatCFWS( scursor, send, isCRLF );
01059
01060 if ( scursor == send ) return true;
01061
01062 if ( *scursor == ',' ) scursor++;
01063 }
01064 return true;
01065 }
01066
01067
01068 static TQString asterisk = TQString::fromLatin1("*0*",1);
01069 static TQString asteriskZero = TQString::fromLatin1("*0*",2);
01070
01071
01072 bool parseParameter( const char* & scursor, const char * const send,
01073 TQPair<TQString,TQStringOrTQPair> & result, bool isCRLF ) {
01074
01075
01076
01077
01078
01079
01080
01081
01082
01083
01084 eatCFWS( scursor, send, isCRLF );
01085 if ( scursor == send ) return false;
01086
01087
01088
01089
01090 TQString maybeAttribute;
01091 if ( !parseToken( scursor, send, maybeAttribute, false ) )
01092 return false;
01093
01094 eatCFWS( scursor, send, isCRLF );
01095
01096 if ( scursor == send || *scursor != '=' ) return false;
01097 scursor++;
01098
01099 eatCFWS( scursor, send, isCRLF );
01100 if ( scursor == send ) {
01101
01102 if ( maybeAttribute.endsWith( asterisk ) ) {
01103 KMIME_WARN << "attribute ends with \"*\", but value is empty! "
01104 "Chopping away \"*\"." << endl;
01105 maybeAttribute.truncate( maybeAttribute.length() - 1 );
01106 }
01107 result = tqMakePair( maybeAttribute.lower(), TQStringOrTQPair() );
01108 return true;
01109 }
01110
01111 const char * oldscursor = scursor;
01112
01113
01114
01115
01116 TQStringOrTQPair maybeValue;
01117 if ( *scursor == '"' ) {
01118
01119 scursor++;
01120 if ( maybeAttribute.endsWith( asterisk ) ) {
01121
01122
01123
01124 KMIME_WARN << "attribute ends with \"*\", but value is a quoted-string! "
01125 "Chopping away \"*\"." << endl;
01126 maybeAttribute.truncate( maybeAttribute.length() - 1 );
01127 }
01128
01129 if ( !parseGenericQuotedString( scursor, send, maybeValue.qstring, isCRLF ) ) {
01130 scursor = oldscursor;
01131 result = tqMakePair( maybeAttribute.lower(), TQStringOrTQPair() );
01132 return false;
01133 }
01134 } else {
01135
01136 if ( !parseToken( scursor, send, maybeValue.qpair, false ) ) {
01137 scursor = oldscursor;
01138 result = tqMakePair( maybeAttribute.lower(), TQStringOrTQPair() );
01139 return false;
01140 }
01141 }
01142
01143 result = tqMakePair( maybeAttribute.lower(), maybeValue );
01144 return true;
01145 }
01146
01147
01148
01149 bool parseRawParameterList( const char* & scursor, const char * const send,
01150 TQMap<TQString,TQStringOrTQPair> & result,
01151 bool isCRLF ) {
01152
01153
01154
01155
01156
01157
01158
01159
01160
01161
01162 while ( scursor != send ) {
01163 eatCFWS( scursor, send, isCRLF );
01164
01165 if ( scursor == send ) return true;
01166
01167 if ( *scursor == ';' ) { scursor++; continue; }
01168
01169 TQPair<TQString,TQStringOrTQPair> maybeParameter;
01170 if ( !parseParameter( scursor, send, maybeParameter, isCRLF ) ) {
01171
01172
01173
01174
01175
01176
01177
01178 if ( maybeParameter.first.isNull() ) return false;
01179 while ( scursor != send ) {
01180 if ( *scursor++ == ';' ) goto IS_SEMICOLON;
01181 }
01182
01183 return true;
01184 IS_SEMICOLON:
01185
01186 continue;
01187 }
01188
01189 result.insert( maybeParameter.first, maybeParameter.second );
01190
01191 eatCFWS( scursor, send, isCRLF );
01192
01193 if ( scursor == send ) return true;
01194
01195 if ( *scursor == ';' ) scursor++;
01196 }
01197 return true;
01198 }
01199
01200
01201 static void decodeRFC2231Value( Codec* & rfc2231Codec,
01202 TQTextCodec* & textcodec,
01203 bool isContinuation, TQString & value,
01204 TQPair<const char*,int> & source ) {
01205
01206
01207
01208
01209
01210 const char * decBegin = source.first;
01211 const char * decCursor = decBegin;
01212 const char * decEnd = decCursor + source.second;
01213
01214 if ( !isContinuation ) {
01215
01216 while ( decCursor != decEnd ) {
01217 if ( *decCursor == '\'' ) break;
01218 else decCursor++;
01219 }
01220
01221 if ( decCursor == decEnd ) {
01222
01223
01224 KMIME_WARN << "No charset in extended-initial-value. "
01225 "Assuming \"iso-8859-1\"." << endl;
01226 value += TQString::fromLatin1( decBegin, source.second );
01227 return;
01228 }
01229
01230 TQCString charset( decBegin, decCursor - decBegin + 1 );
01231
01232 const char * oldDecCursor = ++decCursor;
01233
01234 while ( decCursor != decEnd ) {
01235 if ( *decCursor == '\'' ) break;
01236 else decCursor++;
01237 }
01238 if ( decCursor == decEnd ) {
01239 KMIME_WARN << "No language in extended-initial-value. "
01240 "Trying to recover." << endl;
01241 decCursor = oldDecCursor;
01242 } else
01243 decCursor++;
01244
01245
01246
01247
01248
01249
01250
01251
01252 bool matchOK = false;
01253 textcodec = KGlobal::charsets()->codecForName( charset, matchOK );
01254 if ( !matchOK ) {
01255 textcodec = 0;
01256 KMIME_WARN_UNKNOWN(Charset,charset);
01257 }
01258 }
01259
01260 if ( !rfc2231Codec ) {
01261 rfc2231Codec = Codec::codecForName("x-kmime-rfc2231");
01262 assert( rfc2231Codec );
01263 }
01264
01265 if ( !textcodec ) {
01266 value += TQString::fromLatin1( decCursor, decEnd - decCursor );
01267 return;
01268 }
01269
01270 Decoder * dec = rfc2231Codec->makeDecoder();
01271 assert( dec );
01272
01273
01274
01275
01276
01277 TQByteArray buffer( rfc2231Codec->maxDecodedSizeFor( decEnd - decCursor ) );
01278 TQByteArray::Iterator bit = buffer.begin();
01279 TQByteArray::ConstIterator bend = buffer.end();
01280
01281 if ( !dec->decode( decCursor, decEnd, bit, bend ) )
01282 KMIME_WARN << rfc2231Codec->name()
01283 << " codec lies about it's maxDecodedSizeFor()\n"
01284 "result may be truncated" << endl;
01285
01286 value += textcodec->toUnicode( buffer.begin(), bit - buffer.begin() );
01287
01288 kdDebug() << "value now: \"" << value << "\"" << endl;
01289
01290 delete dec;
01291 }
01292
01293
01294
01295
01296
01297 bool parseParameterList( const char* & scursor, const char * const send,
01298 TQMap<TQString,TQString> & result, bool isCRLF ) {
01299
01300 TQMap<TQString,TQStringOrTQPair> rawParameterList;
01301 if (!parseRawParameterList( scursor, send, rawParameterList, isCRLF ) )
01302 return false;
01303
01304 if ( rawParameterList.isEmpty() ) return true;
01305
01306
01307
01308
01309
01310
01311 Codec * rfc2231Codec = 0;
01312 TQTextCodec * textcodec = 0;
01313 TQString attribute;
01314 TQString value;
01315 enum Modes { NoMode = 0x0, Continued = 0x1, Encoded = 0x2 } mode;
01316
01317 TQMapIterator<TQString,TQStringOrTQPair> it, end = rawParameterList.end();
01318
01319 for ( it = rawParameterList.begin() ; it != end ; ++it ) {
01320 if ( attribute.isNull() || !it.key().startsWith( attribute ) ) {
01321
01322
01323
01324
01325
01326 if ( !attribute.isNull() ) result.insert( attribute, value );
01327
01328 value = TQString();
01329 attribute = it.key();
01330 mode = NoMode;
01331
01332 if ( attribute.endsWith( asterisk ) ) {
01333 attribute.truncate( attribute.length() - 1 );
01334 mode = (Modes) ((int) mode | Encoded);
01335 }
01336
01337 if ( attribute.endsWith( asteriskZero ) ) {
01338 attribute.truncate( attribute.length() - 2 );
01339 mode = (Modes) ((int) mode | Continued);
01340 }
01341
01342
01343
01344 if ( mode & Encoded ) {
01345 decodeRFC2231Value( rfc2231Codec, textcodec,
01346 false,
01347 value, (*it).qpair );
01348 } else {
01349
01350 if ( (*it).qpair.first )
01351 value += TQString::fromLatin1( (*it).qpair.first, (*it).qpair.second );
01352 else
01353 value += (*it).qstring;
01354 }
01355
01356
01357
01358
01359
01360 if ( !(mode & Continued) ) {
01361
01362 result.insert( attribute, value );
01363
01364 attribute = TQString();
01365 }
01366 } else {
01367
01368
01369
01370
01371
01372 if ( it.key().endsWith( asterisk ) ) {
01373
01374 decodeRFC2231Value( rfc2231Codec, textcodec,
01375 true,
01376 value, (*it).qpair );
01377 } else {
01378
01379 if ( (*it).qpair.first )
01380 value += TQString::fromLatin1( (*it).qpair.first, (*it).qpair.second );
01381 else
01382 value += (*it).qstring;
01383 }
01384 }
01385 }
01386
01387
01388 if ( !attribute.isNull() )
01389 result.insert( attribute, value );
01390
01391 return true;
01392 }
01393
01394 static const char * stdDayNames[] = {
01395 "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
01396 };
01397 static const int stdDayNamesLen = sizeof stdDayNames / sizeof *stdDayNames;
01398
01399 static bool parseDayName( const char* & scursor, const char * const send )
01400 {
01401
01402 if ( send - scursor < 3 ) return false;
01403
01404 for ( int i = 0 ; i < stdDayNamesLen ; ++i )
01405 if ( qstrnicmp( scursor, stdDayNames[i], 3 ) == 0 ) {
01406 scursor += 3;
01407 kdDebug() << "found " << stdDayNames[i] << endl;
01408 return true;
01409 }
01410
01411 return false;
01412 }
01413
01414
01415 static const char * stdMonthNames[] = {
01416 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
01417 "Jul", "Aug", "Sep", "Oct", "Nov", "Dez"
01418 };
01419 static const int stdMonthNamesLen =
01420 sizeof stdMonthNames / sizeof *stdMonthNames;
01421
01422 static bool parseMonthName( const char* & scursor, const char * const send,
01423 int & result )
01424 {
01425
01426 if ( send - scursor < 3 ) return false;
01427
01428 for ( result = 0 ; result < stdMonthNamesLen ; ++result )
01429 if ( qstrnicmp( scursor, stdMonthNames[result], 3 ) == 0 ) {
01430 scursor += 3;
01431 return true;
01432 }
01433
01434
01435 return false;
01436 }
01437
01438 static const struct {
01439 const char * tzName;
01440 long int secsEastOfGMT;
01441 } timeZones[] = {
01442
01443 { "GMT", 0 },
01444 { "UT", 0 },
01445 { "EDT", -4*3600 },
01446 { "EST", -5*3600 },
01447 { "MST", -5*3600 },
01448 { "CST", -6*3600 },
01449 { "MDT", -6*3600 },
01450 { "MST", -7*3600 },
01451 { "PDT", -7*3600 },
01452 { "PST", -8*3600 },
01453
01454 { "CET", 1*3600 },
01455 { "MET", 1*3600 },
01456 { "UTC", 0 },
01457 { "CEST", 2*3600 },
01458 { "BST", 1*3600 },
01459
01460 { "Z", 0 },
01461 { "A", -1*3600 },
01462 { "B", -2*3600 },
01463 { "C", -3*3600 },
01464 { "D", -4*3600 },
01465 { "E", -5*3600 },
01466 { "F", -6*3600 },
01467 { "G", -7*3600 },
01468 { "H", -8*3600 },
01469 { "I", -9*3600 },
01470
01471 { "K", -10*3600 },
01472 { "L", -11*3600 },
01473 { "M", -12*3600 },
01474 { "N", 1*3600 },
01475 { "O", 2*3600 },
01476 { "P", 3*3600 },
01477 { "Q", 4*3600 },
01478 { "R", 5*3600 },
01479 { "S", 6*3600 },
01480 { "T", 7*3600 },
01481 { "U", 8*3600 },
01482 { "V", 9*3600 },
01483 { "W", 10*3600 },
01484 { "X", 11*3600 },
01485 { "Y", 12*3600 },
01486 };
01487 static const int timeZonesLen = sizeof timeZones / sizeof *timeZones;
01488
01489 static bool parseAlphaNumericTimeZone( const char* & scursor,
01490 const char * const send,
01491 long int & secsEastOfGMT,
01492 bool & timeZoneKnown )
01493 {
01494 TQPair<const char*,int> maybeTimeZone(0,0);
01495 if ( !parseToken( scursor, send, maybeTimeZone, false ) )
01496 return false;
01497 for ( int i = 0 ; i < timeZonesLen ; ++i )
01498 if ( qstrnicmp( timeZones[i].tzName,
01499 maybeTimeZone.first, maybeTimeZone.second ) == 0 ) {
01500 scursor += maybeTimeZone.second;
01501 secsEastOfGMT = timeZones[i].secsEastOfGMT;
01502 timeZoneKnown = true;
01503 return true;
01504 }
01505
01506
01507 KMIME_WARN_UNKNOWN(time zone,TQCString( maybeTimeZone.first, maybeTimeZone.second+1 ));
01508 secsEastOfGMT = 0;
01509 timeZoneKnown = false;
01510 return true;
01511 }
01512
01513
01514 static int parseDigits( const char* & scursor, const char * const send,
01515 int & result )
01516 {
01517 result = 0;
01518 int digits = 0;
01519 for ( ; scursor != send && isdigit( *scursor ) ; scursor++, digits++ ) {
01520 result *= 10;
01521 result += int( *scursor - '0' );
01522 }
01523 return digits;
01524 }
01525
01526 static bool parseTimeOfDay( const char* & scursor, const char * const send,
01527 int & hour, int & min, int & sec, bool isCRLF=false )
01528 {
01529
01530
01531
01532
01533
01534 if ( !parseDigits( scursor, send, hour ) ) return false;
01535
01536 eatCFWS( scursor, send, isCRLF );
01537 if ( scursor == send || *scursor != ':' ) return false;
01538 scursor++;
01539
01540 eatCFWS( scursor, send, isCRLF );
01541 if ( scursor == send ) return false;
01542
01543
01544
01545
01546 if ( !parseDigits( scursor, send, min ) ) return false;
01547
01548 eatCFWS( scursor, send, isCRLF );
01549 if ( scursor == send ) return true;
01550
01551
01552
01553
01554 if ( *scursor == ':' ) {
01555
01556 scursor++;
01557 eatCFWS( scursor, send, isCRLF );
01558 if ( scursor == send ) return false;
01559
01560 if ( !parseDigits( scursor, send, sec ) ) return false;
01561 } else {
01562 sec = 0;
01563 }
01564
01565 return true;
01566 }
01567
01568
01569 bool parseTime( const char* & scursor, const char * send,
01570 int & hour, int & min, int & sec, long int & secsEastOfGMT,
01571 bool & timeZoneKnown, bool isCRLF )
01572 {
01573
01574
01575
01576
01577
01578
01579
01580
01581
01582
01583 eatCFWS( scursor, send, isCRLF );
01584 if ( scursor == send ) return false;
01585
01586 if ( !parseTimeOfDay( scursor, send, hour, min, sec, isCRLF ) )
01587 return false;
01588
01589 eatCFWS( scursor, send, isCRLF );
01590 if ( scursor == send ) {
01591 timeZoneKnown = false;
01592 secsEastOfGMT = 0;
01593 return true;
01594 }
01595
01596 timeZoneKnown = true;
01597 if ( *scursor == '+' || *scursor == '-' ) {
01598
01599 const char sign = *scursor++;
01600
01601 int maybeTimeZone;
01602 if ( parseDigits( scursor, send, maybeTimeZone ) != 4 ) return false;
01603 secsEastOfGMT = 60 * ( maybeTimeZone / 100 * 60 + maybeTimeZone % 100 );
01604 if ( sign == '-' ) {
01605 secsEastOfGMT *= -1;
01606 if ( secsEastOfGMT == 0 )
01607 timeZoneKnown = false;
01608 }
01609 } else {
01610
01611 if ( !parseAlphaNumericTimeZone( scursor, send, secsEastOfGMT, timeZoneKnown ) )
01612 return false;
01613 }
01614 return true;
01615 }
01616
01617
01618 bool parseDateTime( const char* & scursor, const char * const send,
01619 Types::DateTime & result, bool isCRLF )
01620 {
01621
01622
01623
01624
01625
01626
01627
01628
01629
01630
01631 struct tm maybeDateTime = {
01632 #ifdef HAVE_TM_GMTOFF
01633 0, 0,
01634 #endif
01635 0, 0, 0, 0, 0, 0, 0, 0, 0
01636 };
01637
01638 eatCFWS( scursor, send, isCRLF );
01639 if ( scursor == send ) return false;
01640
01641
01642
01643
01644 if ( parseDayName( scursor, send ) ) {
01645 eatCFWS( scursor, send, isCRLF );
01646 if ( scursor == send ) return false;
01647
01648 if ( *scursor == ',' ) {
01649 scursor++;
01650 eatCFWS( scursor, send, isCRLF );
01651 }
01652 }
01653
01654
01655
01656
01657 int maybeDay;
01658 if ( !parseDigits( scursor, send, maybeDay ) ) return false;
01659
01660 eatCFWS( scursor, send, isCRLF );
01661 if ( scursor == send ) return false;
01662
01663
01664 maybeDateTime.tm_mday = maybeDay;
01665
01666
01667
01668
01669 int maybeMonth = 0;
01670 if ( !parseMonthName( scursor, send, maybeMonth ) ) return false;
01671 if ( scursor == send ) return false;
01672 assert( maybeMonth >= 0 ); assert( maybeMonth <= 11 );
01673
01674 eatCFWS( scursor, send, isCRLF );
01675 if ( scursor == send ) return false;
01676
01677
01678 maybeDateTime.tm_mon = maybeMonth;
01679
01680
01681
01682
01683 int maybeYear;
01684 if ( !parseDigits( scursor, send, maybeYear ) ) return false;
01685
01686 if ( maybeYear < 50 )
01687 maybeYear += 2000;
01688 else if ( maybeYear < 1000 )
01689 maybeYear += 1900;
01690
01691 if ( maybeYear < 1900 ) return false;
01692
01693 eatCFWS( scursor, send, isCRLF );
01694 if ( scursor == send ) return false;
01695
01696
01697 maybeDateTime.tm_year = maybeYear - 1900;
01698
01699
01700
01701
01702 int maybeHour, maybeMinute, maybeSecond;
01703 long int secsEastOfGMT;
01704 bool timeZoneKnown = true;
01705
01706 if ( !parseTime( scursor, send,
01707 maybeHour, maybeMinute, maybeSecond,
01708 secsEastOfGMT, timeZoneKnown, isCRLF ) )
01709 return false;
01710
01711
01712 maybeDateTime.tm_hour = maybeHour;
01713 maybeDateTime.tm_min = maybeMinute;
01714 maybeDateTime.tm_sec = maybeSecond;
01715 maybeDateTime.tm_isdst = DateFormatter::isDaylight();
01716
01717 result.time = mktime( &maybeDateTime );
01718 if ( result.time == (time_t)(-1) ) return false;
01719
01720
01721
01722 result.secsEastOfGMT = secsEastOfGMT;
01723 result.timeZoneKnown = timeZoneKnown;
01724
01725 return true;
01726 }
01727
01728 #if 0
01729 bool tryToMakeAnySenseOfDateString( const char* & scursor,
01730 const char * const send,
01731 time_t & result, bool isCRLF )
01732 {
01733 return false;
01734 }
01735 #endif
01736
01737 }
01738
01739 }