linklocator.cpp
00001 00023 #include "linklocator.h" 00024 #include "pimemoticons.h" 00025 #include <tdeglobal.h> 00026 #include <kstandarddirs.h> 00027 #include <kstaticdeleter.h> 00028 #include <kmdcodec.h> 00029 #include <kdebug.h> 00030 00031 #include <tqstylesheet.h> 00032 #include <tqfile.h> 00033 #include <tqregexp.h> 00034 00035 #include <limits.h> 00036 00037 TQMap<TQString, TQString> *LinkLocator::s_smileyEmoticonNameMap = 0; 00038 TQMap<TQString, TQString> *LinkLocator::s_smileyEmoticonHTMLCache = 0; 00039 00040 static KStaticDeleter< TQMap<TQString, TQString> > smileyMapDeleter; 00041 static KStaticDeleter< TQMap<TQString, TQString> > smileyCacheDeleter; 00042 00043 LinkLocator::LinkLocator(const TQString& text, int pos) 00044 : mText(text), mPos(pos), mMaxUrlLen(4096), mMaxAddressLen(255) 00045 { 00046 // If you change either of the above values for maxUrlLen or 00047 // maxAddressLen, then please also update the documentation for 00048 // setMaxUrlLen()/setMaxAddressLen() in the header file AND the 00049 // default values used for the maxUrlLen/maxAddressLen parameters 00050 // of convertToHtml(). 00051 00052 if ( !s_smileyEmoticonNameMap ) { 00053 smileyMapDeleter.setObject( s_smileyEmoticonNameMap, 00054 new TQMap<TQString, TQString>() ); 00055 for ( int i = 0; i < EmotIcons::EnumSindex::COUNT; ++i ) { 00056 TQString imageName( EmotIcons::EnumSindex::enumToString[i] ); 00057 imageName.truncate( imageName.length() - 2 ); //remove the _0 bit 00058 s_smileyEmoticonNameMap->insert( EmotIcons::smiley(i), imageName ); 00059 } 00060 } 00061 00062 if ( !s_smileyEmoticonHTMLCache ) 00063 smileyCacheDeleter.setObject( s_smileyEmoticonHTMLCache, 00064 new TQMap<TQString, TQString>() ); 00065 } 00066 00067 void LinkLocator::setMaxUrlLen(int length) 00068 { 00069 mMaxUrlLen = length; 00070 } 00071 00072 int LinkLocator::maxUrlLen() const 00073 { 00074 return mMaxUrlLen; 00075 } 00076 00077 void LinkLocator::setMaxAddressLen(int length) 00078 { 00079 mMaxAddressLen = length; 00080 } 00081 00082 int LinkLocator::maxAddressLen() const 00083 { 00084 return mMaxAddressLen; 00085 } 00086 00087 TQString LinkLocator::getUrl() 00088 { 00089 TQString url; 00090 if(atUrl()) 00091 { 00092 // handle cases like this: <link>http://foobar.org/</link> 00093 int start = mPos; 00094 while(mPos < (int)mText.length() && mText[mPos] > ' ' && mText[mPos] != '"' && 00095 TQString("<>()[]").find(mText[mPos]) == -1) 00096 { 00097 ++mPos; 00098 } 00099 /* some URLs really end with: # / & - _ */ 00100 const TQString allowedSpecialChars = TQString("#/&-_"); 00101 while(mPos > start && mText[mPos-1].isPunct() && 00102 allowedSpecialChars.find(mText[mPos-1]) == -1 ) 00103 { 00104 --mPos; 00105 } 00106 00107 url = mText.mid(start, mPos - start); 00108 if(isEmptyUrl(url) || mPos - start > maxUrlLen()) 00109 { 00110 mPos = start; 00111 url = ""; 00112 } 00113 else 00114 { 00115 --mPos; 00116 } 00117 } 00118 return url; 00119 } 00120 00121 // keep this in sync with KMMainWin::slotUrlClicked() 00122 bool LinkLocator::atUrl() const 00123 { 00124 // the following characters are allowed in a dot-atom (RFC 2822): 00125 // a-z A-Z 0-9 . ! # $ % & ' * + - / = ? ^ _ ` { | } ~ 00126 const TQString allowedSpecialChars = TQString(".!#$%&'*+-/=?^_`{|}~"); 00127 00128 // the character directly before the URL must not be a letter, a number or 00129 // any other character allowed in a dot-atom (RFC 2822). 00130 if( ( mPos > 0 ) && ( mText[mPos-1].isLetterOrNumber() || 00131 ( allowedSpecialChars.find( mText[mPos-1] ) != -1 ) ) ) 00132 return false; 00133 00134 TQChar ch = mText[mPos]; 00135 return (ch=='h' && ( mText.mid(mPos, 7) == "http://" || 00136 mText.mid(mPos, 8) == "https://") ) || 00137 (ch=='v' && mText.mid(mPos, 6) == "vnc://") || 00138 (ch=='f' && ( mText.mid(mPos, 7) == "fish://" || 00139 mText.mid(mPos, 6) == "ftp://" || 00140 mText.mid(mPos, 7) == "ftps://") ) || 00141 (ch=='s' && ( mText.mid(mPos, 7) == "sftp://" || 00142 mText.mid(mPos, 6) == "smb://") ) || 00143 (ch=='m' && mText.mid(mPos, 7) == "mailto:") || 00144 (ch=='w' && mText.mid(mPos, 4) == "www.") || 00145 (ch=='f' && mText.mid(mPos, 4) == "ftp.") || 00146 (ch=='n' && mText.mid(mPos, 5) == "news:"); 00147 // note: no "file:" for security reasons 00148 } 00149 00150 bool LinkLocator::isEmptyUrl(const TQString& url) 00151 { 00152 return url.isEmpty() || 00153 url == "http://" || 00154 url == "https://" || 00155 url == "fish://" || 00156 url == "ftp://" || 00157 url == "ftps://" || 00158 url == "sftp://" || 00159 url == "smb://" || 00160 url == "vnc://" || 00161 url == "mailto" || 00162 url == "www" || 00163 url == "ftp" || 00164 url == "news" || 00165 url == "news://"; 00166 } 00167 00168 TQString LinkLocator::getEmailAddress() 00169 { 00170 TQString address; 00171 00172 if ( mText[mPos] == '@' ) { 00173 // the following characters are allowed in a dot-atom (RFC 2822): 00174 // a-z A-Z 0-9 . ! # $ % & ' * + - / = ? ^ _ ` { | } ~ 00175 const TQString allowedSpecialChars = TQString(".!#$%&'*+-/=?^_`{|}~"); 00176 00177 // determine the local part of the email address 00178 int start = mPos - 1; 00179 while ( start >= 0 && mText[start].unicode() < 128 && 00180 ( mText[start].isLetterOrNumber() || 00181 mText[start] == '@' || // allow @ to find invalid email addresses 00182 allowedSpecialChars.find( mText[start] ) != -1 ) ) { 00183 if ( mText[start] == '@' ) 00184 return TQString(); // local part contains '@' -> no email address 00185 --start; 00186 } 00187 ++start; 00188 // we assume that an email address starts with a letter or a digit 00189 while ( ( start < mPos ) && !mText[start].isLetterOrNumber() ) 00190 ++start; 00191 if ( start == mPos ) 00192 return TQString(); // local part is empty -> no email address 00193 00194 // determine the domain part of the email address 00195 int dotPos = INT_MAX; 00196 int end = mPos + 1; 00197 while ( end < (int)mText.length() && 00198 ( mText[end].isLetterOrNumber() || 00199 mText[end] == '@' || // allow @ to find invalid email addresses 00200 mText[end] == '.' || 00201 mText[end] == '-' ) ) { 00202 if ( mText[end] == '@' ) 00203 return TQString(); // domain part contains '@' -> no email address 00204 if ( mText[end] == '.' ) 00205 dotPos = TQMIN( dotPos, end ); // remember index of first dot in domain 00206 ++end; 00207 } 00208 // we assume that an email address ends with a letter or a digit 00209 while ( ( end > mPos ) && !mText[end - 1].isLetterOrNumber() ) 00210 --end; 00211 if ( end == mPos ) 00212 return TQString(); // domain part is empty -> no email address 00213 if ( dotPos >= end ) 00214 return TQString(); // domain part doesn't contain a dot 00215 00216 if ( end - start > maxAddressLen() ) 00217 return TQString(); // too long -> most likely no email address 00218 address = mText.mid( start, end - start ); 00219 00220 mPos = end - 1; 00221 } 00222 return address; 00223 } 00224 00225 TQString LinkLocator::convertToHtml(const TQString& plainText, int flags, 00226 int maxUrlLen, int maxAddressLen) 00227 { 00228 LinkLocator locator(plainText); 00229 locator.setMaxUrlLen(maxUrlLen); 00230 locator.setMaxAddressLen(maxAddressLen); 00231 00232 TQString str; 00233 TQString result((TQChar*)0, (int)locator.mText.length() * 2); 00234 TQChar ch; 00235 int x; 00236 bool startOfLine = true; 00237 TQString emoticon; 00238 00239 for (locator.mPos = 0, x = 0; locator.mPos < (int)locator.mText.length(); locator.mPos++, x++) 00240 { 00241 ch = locator.mText[locator.mPos]; 00242 if ( flags & PreserveSpaces ) 00243 { 00244 if (ch==' ') 00245 { 00246 if (startOfLine) { 00247 result += " "; 00248 locator.mPos++, x++; 00249 startOfLine = false; 00250 } 00251 while (locator.mText[locator.mPos] == ' ') 00252 { 00253 result += " "; 00254 locator.mPos++, x++; 00255 if (locator.mText[locator.mPos] == ' ') { 00256 result += " "; 00257 locator.mPos++, x++; 00258 } 00259 } 00260 locator.mPos--, x--; 00261 continue; 00262 } 00263 else if (ch=='\t') 00264 { 00265 do 00266 { 00267 result += " "; 00268 x++; 00269 } 00270 while((x&7) != 0); 00271 x--; 00272 startOfLine = false; 00273 continue; 00274 } 00275 } 00276 if (ch=='\n') 00277 { 00278 result += "<br />"; 00279 startOfLine = true; 00280 x = -1; 00281 continue; 00282 } 00283 00284 startOfLine = false; 00285 if (ch=='&') 00286 result += "&"; 00287 else if (ch=='"') 00288 result += """; 00289 else if (ch=='<') 00290 result += "<"; 00291 else if (ch=='>') 00292 result += ">"; 00293 else 00294 { 00295 const int start = locator.mPos; 00296 if ( !(flags & IgnoreUrls) ) { 00297 str = locator.getUrl(); 00298 if (!str.isEmpty()) 00299 { 00300 TQString hyperlink; 00301 if(str.left(4) == "www.") 00302 hyperlink = "http://" + str; 00303 else if(str.left(4) == "ftp.") 00304 hyperlink = "ftp://" + str; 00305 else 00306 hyperlink = str; 00307 00308 str = str.replace('&', "&"); 00309 result += "<a href=\"" + hyperlink + "\">" + str + "</a>"; 00310 x += locator.mPos - start; 00311 continue; 00312 } 00313 str = locator.getEmailAddress(); 00314 if(!str.isEmpty()) 00315 { 00316 // len is the length of the local part 00317 int len = str.find('@'); 00318 TQString localPart = str.left(len); 00319 00320 // remove the local part from the result (as '&'s have been expanded to 00321 // & we have to take care of the 4 additional characters per '&') 00322 result.truncate(result.length() - len - (localPart.contains('&')*4)); 00323 x -= len; 00324 00325 result += "<a href=\"mailto:" + str + "\">" + str + "</a>"; 00326 x += str.length() - 1; 00327 continue; 00328 } 00329 } 00330 if ( flags & ReplaceSmileys ) { 00331 str = locator.getEmoticon(); 00332 if ( ! str.isEmpty() ) { 00333 result += str; 00334 x += locator.mPos - start; 00335 continue; 00336 } 00337 } 00338 if ( flags & HighlightText ) { 00339 str = locator.highlightedText(); 00340 if ( !str.isEmpty() ) { 00341 result += str; 00342 x += locator.mPos - start; 00343 continue; 00344 } 00345 } 00346 result += ch; 00347 } 00348 } 00349 00350 return result; 00351 } 00352 00353 TQString LinkLocator::pngToDataUrl( const TQString & iconPath ) 00354 { 00355 if ( iconPath.isEmpty() ) 00356 return TQString(); 00357 00358 TQFile pngFile( iconPath ); 00359 if ( !pngFile.open( IO_ReadOnly | IO_Raw ) ) 00360 return TQString(); 00361 00362 TQByteArray ba = pngFile.readAll(); 00363 pngFile.close(); 00364 return TQString::fromLatin1("data:image/png;base64,%1") 00365 .arg( KCodecs::base64Encode( ba ).data() ); 00366 } 00367 00368 00369 TQString LinkLocator::getEmoticon() 00370 { 00371 // smileys have to be prepended by whitespace 00372 if ( ( mPos > 0 ) && !mText[mPos-1].isSpace() ) 00373 return TQString(); 00374 00375 // since smileys start with ':', ';', '(' or '8' short circuit method 00376 const TQChar ch = mText[mPos]; 00377 if ( ch !=':' && ch != ';' && ch != '(' && ch != '8' ) 00378 return TQString(); 00379 00380 // find the end of the smiley (a smiley is at most 4 chars long and ends at 00381 // lineend or whitespace) 00382 const int MinSmileyLen = 2; 00383 const int MaxSmileyLen = 4; 00384 int smileyLen = 1; 00385 while ( ( smileyLen <= MaxSmileyLen ) && 00386 ( mPos+smileyLen < (int)mText.length() ) && 00387 !mText[mPos+smileyLen].isSpace() ) 00388 smileyLen++; 00389 if ( smileyLen < MinSmileyLen || smileyLen > MaxSmileyLen ) 00390 return TQString(); 00391 00392 const TQString smiley = mText.mid( mPos, smileyLen ); 00393 if ( !s_smileyEmoticonNameMap->contains( smiley ) ) 00394 return TQString(); // that's not a (known) smiley 00395 00396 TQString htmlRep; 00397 if ( s_smileyEmoticonHTMLCache->contains( smiley ) ) { 00398 htmlRep = (*s_smileyEmoticonHTMLCache)[smiley]; 00399 } 00400 else { 00401 const TQString imageName = (*s_smileyEmoticonNameMap)[smiley]; 00402 00403 #if KDE_IS_VERSION( 3, 3, 91 ) 00404 const TQString iconPath = locate( "emoticons", 00405 EmotIcons::theme() + 00406 TQString::fromLatin1( "/" ) + 00407 imageName + TQString::fromLatin1(".png") ); 00408 #else 00409 const TQString iconPath = locate( "data", 00410 TQString::fromLatin1( "kopete/pics/emoticons/" )+ 00411 EmotIcons::theme() + 00412 TQString::fromLatin1( "/" ) + 00413 imageName + TQString::fromLatin1(".png") ); 00414 #endif 00415 00416 const TQString dataUrl = pngToDataUrl( iconPath ); 00417 if ( dataUrl.isEmpty() ) { 00418 htmlRep = TQString(); 00419 } 00420 else { 00421 // create an image tag (the text in attribute alt is used 00422 // for copy & paste) representing the smiley 00423 htmlRep = TQString("<img class=\"pimsmileyimg\" src=\"%1\" " 00424 "alt=\"%2\" title=\"%3\" width=\"16\" height=\"16\"/>") 00425 .arg( dataUrl, 00426 TQStyleSheet::escape( smiley ), 00427 TQStyleSheet::escape( smiley ) ); 00428 } 00429 s_smileyEmoticonHTMLCache->insert( smiley, htmlRep ); 00430 } 00431 00432 if ( !htmlRep.isEmpty() ) 00433 mPos += smileyLen - 1; 00434 00435 return htmlRep; 00436 } 00437 00438 TQString LinkLocator::highlightedText() 00439 { 00440 // formating symbols must be prepended with a whitespace 00441 if ( ( mPos > 0 ) && !mText[mPos-1].isSpace() ) 00442 return TQString(); 00443 00444 const TQChar ch = mText[mPos]; 00445 if ( ch != '/' && ch != '*' && ch != '_' ) 00446 return TQString(); 00447 00448 TQRegExp re = TQRegExp( TQString("\\%1([0-9A-Za-z]+)\\%2").arg( ch ).arg( ch ) ); 00449 if ( re.search( mText, mPos ) == mPos ) { 00450 uint length = re.matchedLength(); 00451 // there must be a whitespace after the closing formating symbol 00452 if ( mPos + length < mText.length() && !mText[mPos + length].isSpace() ) 00453 return TQString(); 00454 mPos += length - 1; 00455 switch ( ch.latin1() ) { 00456 case '*': 00457 return "<b>" + re.cap( 1 ) + "</b>"; 00458 case '_': 00459 return "<u>" + re.cap( 1 ) + "</u>"; 00460 case '/': 00461 return "<i>" + re.cap( 1 ) + "</i>"; 00462 } 00463 } 00464 return TQString(); 00465 } 00466