libtdepim

linklocator.cpp
1 
23 #include "linklocator.h"
24 #include "pimemoticons.h"
25 #include <tdeglobal.h>
26 #include <kstandarddirs.h>
27 #include <kstaticdeleter.h>
28 #include <kmdcodec.h>
29 #include <kdebug.h>
30 
31 #include <tqstylesheet.h>
32 #include <tqfile.h>
33 #include <tqregexp.h>
34 
35 #include <limits.h>
36 
37 TQMap<TQString, TQString> *LinkLocator::s_smileyEmoticonNameMap = 0;
38 TQMap<TQString, TQString> *LinkLocator::s_smileyEmoticonHTMLCache = 0;
39 
40 static KStaticDeleter< TQMap<TQString, TQString> > smileyMapDeleter;
41 static KStaticDeleter< TQMap<TQString, TQString> > smileyCacheDeleter;
42 
43 LinkLocator::LinkLocator(const TQString& text, int pos)
44  : mText(text), mPos(pos), mMaxUrlLen(4096), mMaxAddressLen(255)
45 {
46  // If you change either of the above values for maxUrlLen or
47  // maxAddressLen, then please also update the documentation for
48  // setMaxUrlLen()/setMaxAddressLen() in the header file AND the
49  // default values used for the maxUrlLen/maxAddressLen parameters
50  // of convertToHtml().
51 
52  if ( !s_smileyEmoticonNameMap ) {
53  smileyMapDeleter.setObject( s_smileyEmoticonNameMap,
54  new TQMap<TQString, TQString>() );
55  for ( int i = 0; i < EmotIcons::EnumSindex::COUNT; ++i ) {
56  TQString imageName( EmotIcons::EnumSindex::enumToString[i] );
57  imageName.truncate( imageName.length() - 2 ); //remove the _0 bit
58  s_smileyEmoticonNameMap->insert( EmotIcons::smiley(i), imageName );
59  }
60  }
61 
62  if ( !s_smileyEmoticonHTMLCache )
63  smileyCacheDeleter.setObject( s_smileyEmoticonHTMLCache,
64  new TQMap<TQString, TQString>() );
65 }
66 
67 void LinkLocator::setMaxUrlLen(int length)
68 {
69  mMaxUrlLen = length;
70 }
71 
73 {
74  return mMaxUrlLen;
75 }
76 
78 {
79  mMaxAddressLen = length;
80 }
81 
83 {
84  return mMaxAddressLen;
85 }
86 
88 {
89  TQString url;
90  if(atUrl())
91  {
92  // handle cases like this: <link>http://foobar.org/</link>
93  int start = mPos;
94  while(mPos < (int)mText.length() && mText[mPos] > ' ' && mText[mPos] != '"' &&
95  TQString("<>()[]").find(mText[mPos]) == -1)
96  {
97  ++mPos;
98  }
99  /* some URLs really end with: # / & - _ */
100  const TQString allowedSpecialChars = TQString("#/&-_");
101  while(mPos > start && mText[mPos-1].isPunct() &&
102  allowedSpecialChars.find(mText[mPos-1]) == -1 )
103  {
104  --mPos;
105  }
106 
107  url = mText.mid(start, mPos - start);
108  if(isEmptyUrl(url) || mPos - start > maxUrlLen())
109  {
110  mPos = start;
111  url = "";
112  }
113  else
114  {
115  --mPos;
116  }
117  }
118  return url;
119 }
120 
121 // keep this in sync with KMMainWin::slotUrlClicked()
122 bool LinkLocator::atUrl() const
123 {
124  // the following characters are allowed in a dot-atom (RFC 2822):
125  // a-z A-Z 0-9 . ! # $ % & ' * + - / = ? ^ _ ` { | } ~
126  const TQString allowedSpecialChars = TQString(".!#$%&'*+-/=?^_`{|}~");
127 
128  // the character directly before the URL must not be a letter, a number or
129  // any other character allowed in a dot-atom (RFC 2822).
130  if( ( mPos > 0 ) && ( mText[mPos-1].isLetterOrNumber() ||
131  ( allowedSpecialChars.find( mText[mPos-1] ) != -1 ) ) )
132  return false;
133 
134  TQChar ch = mText[mPos];
135  return (ch=='h' && ( mText.mid(mPos, 7) == "http://" ||
136  mText.mid(mPos, 8) == "https://") ) ||
137  (ch=='v' && mText.mid(mPos, 6) == "vnc://") ||
138  (ch=='f' && ( mText.mid(mPos, 7) == "fish://" ||
139  mText.mid(mPos, 6) == "ftp://" ||
140  mText.mid(mPos, 7) == "ftps://") ) ||
141  (ch=='s' && ( mText.mid(mPos, 7) == "sftp://" ||
142  mText.mid(mPos, 6) == "smb://") ) ||
143  (ch=='m' && mText.mid(mPos, 7) == "mailto:") ||
144  (ch=='w' && mText.mid(mPos, 4) == "www.") ||
145  (ch=='f' && mText.mid(mPos, 4) == "ftp.") ||
146  (ch=='n' && mText.mid(mPos, 5) == "news:");
147  // note: no "file:" for security reasons
148 }
149 
150 bool LinkLocator::isEmptyUrl(const TQString& url)
151 {
152  return url.isEmpty() ||
153  url == "http://" ||
154  url == "https://" ||
155  url == "fish://" ||
156  url == "ftp://" ||
157  url == "ftps://" ||
158  url == "sftp://" ||
159  url == "smb://" ||
160  url == "vnc://" ||
161  url == "mailto" ||
162  url == "www" ||
163  url == "ftp" ||
164  url == "news" ||
165  url == "news://";
166 }
167 
169 {
170  TQString address;
171 
172  if ( mText[mPos] == '@' ) {
173  // the following characters are allowed in a dot-atom (RFC 2822):
174  // a-z A-Z 0-9 . ! # $ % & ' * + - / = ? ^ _ ` { | } ~
175  const TQString allowedSpecialChars = TQString(".!#$%&'*+-/=?^_`{|}~");
176 
177  // determine the local part of the email address
178  int start = mPos - 1;
179  while ( start >= 0 && mText[start].unicode() < 128 &&
180  ( mText[start].isLetterOrNumber() ||
181  mText[start] == '@' || // allow @ to find invalid email addresses
182  allowedSpecialChars.find( mText[start] ) != -1 ) ) {
183  if ( mText[start] == '@' )
184  return TQString(); // local part contains '@' -> no email address
185  --start;
186  }
187  ++start;
188  // we assume that an email address starts with a letter or a digit
189  while ( ( start < mPos ) && !mText[start].isLetterOrNumber() )
190  ++start;
191  if ( start == mPos )
192  return TQString(); // local part is empty -> no email address
193 
194  // determine the domain part of the email address
195  int dotPos = INT_MAX;
196  int end = mPos + 1;
197  while ( end < (int)mText.length() &&
198  ( mText[end].isLetterOrNumber() ||
199  mText[end] == '@' || // allow @ to find invalid email addresses
200  mText[end] == '.' ||
201  mText[end] == '-' ) ) {
202  if ( mText[end] == '@' )
203  return TQString(); // domain part contains '@' -> no email address
204  if ( mText[end] == '.' )
205  dotPos = TQMIN( dotPos, end ); // remember index of first dot in domain
206  ++end;
207  }
208  // we assume that an email address ends with a letter or a digit
209  while ( ( end > mPos ) && !mText[end - 1].isLetterOrNumber() )
210  --end;
211  if ( end == mPos )
212  return TQString(); // domain part is empty -> no email address
213  if ( dotPos >= end )
214  return TQString(); // domain part doesn't contain a dot
215 
216  if ( end - start > maxAddressLen() )
217  return TQString(); // too long -> most likely no email address
218  address = mText.mid( start, end - start );
219 
220  mPos = end - 1;
221  }
222  return address;
223 }
224 
225 TQString LinkLocator::convertToHtml(const TQString& plainText, int flags,
226  int maxUrlLen, int maxAddressLen)
227 {
228  LinkLocator locator(plainText);
229  locator.setMaxUrlLen(maxUrlLen);
230  locator.setMaxAddressLen(maxAddressLen);
231 
232  TQString str;
233  TQString result((TQChar*)0, (int)locator.mText.length() * 2);
234  TQChar ch;
235  int x;
236  bool startOfLine = true;
237  TQString emoticon;
238 
239  for (locator.mPos = 0, x = 0; locator.mPos < (int)locator.mText.length(); locator.mPos++, x++)
240  {
241  ch = locator.mText[locator.mPos];
242  if ( flags & PreserveSpaces )
243  {
244  if (ch==' ')
245  {
246  if (startOfLine) {
247  result += "&nbsp;";
248  locator.mPos++, x++;
249  startOfLine = false;
250  }
251  while (locator.mText[locator.mPos] == ' ')
252  {
253  result += " ";
254  locator.mPos++, x++;
255  if (locator.mText[locator.mPos] == ' ') {
256  result += "&nbsp;";
257  locator.mPos++, x++;
258  }
259  }
260  locator.mPos--, x--;
261  continue;
262  }
263  else if (ch=='\t')
264  {
265  do
266  {
267  result += "&nbsp;";
268  x++;
269  }
270  while((x&7) != 0);
271  x--;
272  startOfLine = false;
273  continue;
274  }
275  }
276  if (ch=='\n')
277  {
278  result += "<br />";
279  startOfLine = true;
280  x = -1;
281  continue;
282  }
283 
284  startOfLine = false;
285  if (ch=='&')
286  result += "&amp;";
287  else if (ch=='"')
288  result += "&quot;";
289  else if (ch=='<')
290  result += "&lt;";
291  else if (ch=='>')
292  result += "&gt;";
293  else
294  {
295  const int start = locator.mPos;
296  if ( !(flags & IgnoreUrls) ) {
297  str = locator.getUrl();
298  if (!str.isEmpty())
299  {
300  TQString hyperlink;
301  if(str.left(4) == "www.")
302  hyperlink = "http://" + str;
303  else if(str.left(4) == "ftp.")
304  hyperlink = "ftp://" + str;
305  else
306  hyperlink = str;
307 
308  str = str.replace('&', "&amp;");
309  result += "<a href=\"" + hyperlink + "\">" + str + "</a>";
310  x += locator.mPos - start;
311  continue;
312  }
313  str = locator.getEmailAddress();
314  if(!str.isEmpty())
315  {
316  // len is the length of the local part
317  int len = str.find('@');
318  TQString localPart = str.left(len);
319 
320  // remove the local part from the result (as '&'s have been expanded to
321  // &amp; we have to take care of the 4 additional characters per '&')
322  result.truncate(result.length() - len - (localPart.contains('&')*4));
323  x -= len;
324 
325  result += "<a href=\"mailto:" + str + "\">" + str + "</a>";
326  x += str.length() - 1;
327  continue;
328  }
329  }
330  if ( flags & ReplaceSmileys ) {
331  str = locator.getEmoticon();
332  if ( ! str.isEmpty() ) {
333  result += str;
334  x += locator.mPos - start;
335  continue;
336  }
337  }
338  if ( flags & HighlightText ) {
339  str = locator.highlightedText();
340  if ( !str.isEmpty() ) {
341  result += str;
342  x += locator.mPos - start;
343  continue;
344  }
345  }
346  result += ch;
347  }
348  }
349 
350  return result;
351 }
352 
353 TQString LinkLocator::pngToDataUrl( const TQString & iconPath )
354 {
355  if ( iconPath.isEmpty() )
356  return TQString();
357 
358  TQFile pngFile( iconPath );
359  if ( !pngFile.open( IO_ReadOnly | IO_Raw ) )
360  return TQString();
361 
362  TQByteArray ba = pngFile.readAll();
363  pngFile.close();
364  return TQString::fromLatin1("data:image/png;base64,%1")
365  .arg( KCodecs::base64Encode( ba ).data() );
366 }
367 
368 
369 TQString LinkLocator::getEmoticon()
370 {
371  // smileys have to be prepended by whitespace
372  if ( ( mPos > 0 ) && !mText[mPos-1].isSpace() )
373  return TQString();
374 
375  // since smileys start with ':', ';', '(' or '8' short circuit method
376  const TQChar ch = mText[mPos];
377  if ( ch !=':' && ch != ';' && ch != '(' && ch != '8' )
378  return TQString();
379 
380  // find the end of the smiley (a smiley is at most 4 chars long and ends at
381  // lineend or whitespace)
382  const int MinSmileyLen = 2;
383  const int MaxSmileyLen = 4;
384  int smileyLen = 1;
385  while ( ( smileyLen <= MaxSmileyLen ) &&
386  ( mPos+smileyLen < (int)mText.length() ) &&
387  !mText[mPos+smileyLen].isSpace() )
388  smileyLen++;
389  if ( smileyLen < MinSmileyLen || smileyLen > MaxSmileyLen )
390  return TQString();
391 
392  const TQString smiley = mText.mid( mPos, smileyLen );
393  if ( !s_smileyEmoticonNameMap->contains( smiley ) )
394  return TQString(); // that's not a (known) smiley
395 
396  TQString htmlRep;
397  if ( s_smileyEmoticonHTMLCache->contains( smiley ) ) {
398  htmlRep = (*s_smileyEmoticonHTMLCache)[smiley];
399  }
400  else {
401  const TQString imageName = (*s_smileyEmoticonNameMap)[smiley];
402 
403 #if KDE_IS_VERSION( 3, 3, 91 )
404  const TQString iconPath = locate( "emoticons",
405  EmotIcons::theme() +
406  TQString::fromLatin1( "/" ) +
407  imageName + TQString::fromLatin1(".png") );
408 #else
409  const TQString iconPath = locate( "data",
410  TQString::fromLatin1( "kopete/pics/emoticons/" )+
411  EmotIcons::theme() +
412  TQString::fromLatin1( "/" ) +
413  imageName + TQString::fromLatin1(".png") );
414 #endif
415 
416  const TQString dataUrl = pngToDataUrl( iconPath );
417  if ( dataUrl.isEmpty() ) {
418  htmlRep = TQString();
419  }
420  else {
421  // create an image tag (the text in attribute alt is used
422  // for copy & paste) representing the smiley
423  htmlRep = TQString("<img class=\"pimsmileyimg\" src=\"%1\" "
424  "alt=\"%2\" title=\"%3\" width=\"16\" height=\"16\"/>")
425  .arg( dataUrl,
426  TQStyleSheet::escape( smiley ),
427  TQStyleSheet::escape( smiley ) );
428  }
429  s_smileyEmoticonHTMLCache->insert( smiley, htmlRep );
430  }
431 
432  if ( !htmlRep.isEmpty() )
433  mPos += smileyLen - 1;
434 
435  return htmlRep;
436 }
437 
438 TQString LinkLocator::highlightedText()
439 {
440  // formating symbols must be prepended with a whitespace
441  if ( ( mPos > 0 ) && !mText[mPos-1].isSpace() )
442  return TQString();
443 
444  const TQChar ch = mText[mPos];
445  if ( ch != '/' && ch != '*' && ch != '_' )
446  return TQString();
447 
448  TQRegExp re = TQRegExp( TQString("\\%1([0-9A-Za-z]+)\\%2").arg( ch ).arg( ch ) );
449  if ( re.search( mText, mPos ) == mPos ) {
450  uint length = re.matchedLength();
451  // there must be a whitespace after the closing formating symbol
452  if ( mPos + length < mText.length() && !mText[mPos + length].isSpace() )
453  return TQString();
454  mPos += length - 1;
455  switch ( ch.latin1() ) {
456  case '*':
457  return "<b>" + re.cap( 1 ) + "</b>";
458  case '_':
459  return "<u>" + re.cap( 1 ) + "</u>";
460  case '/':
461  return "<i>" + re.cap( 1 ) + "</i>";
462  }
463  }
464  return TQString();
465 }
466 
static TQString pngToDataUrl(const TQString &iconPath)
Embed the given PNG image into a data URL.
void setMaxUrlLen(int length)
Sets the maximum length of URLs that will be matched by getUrl().
Definition: linklocator.cpp:67
static TQString convertToHtml(const TQString &plainText, int flags=0, int maxUrlLen=4096, int maxAddressLen=255)
Converts plaintext into html.
TQString getUrl()
Attempts to grab a URL starting at the current scan position.
Definition: linklocator.cpp:87
int maxUrlLen() const
Definition: linklocator.cpp:72
TQString mText
The plaintext string being scanned for URLs and email addresses.
Definition: linklocator.h:157
int maxAddressLen() const
Definition: linklocator.cpp:82
int mPos
The current scan position.
Definition: linklocator.h:161
void setMaxAddressLen(int length)
Sets the maximum length of email addresses that will be matched by getEmailAddress().
Definition: linklocator.cpp:77
LinkLocator(const TQString &text, int pos=0)
Constructs a LinkLocator that will search a plaintext string from a given starting point...
Definition: linklocator.cpp:43
TQString getEmailAddress()
Attempts to grab an email address.
LinkLocator assists in identifying sections of text that can usefully be converted in hyperlinks in h...
Definition: linklocator.h:41