akregator/src

article.cpp
1 /*
2  This file is part of Akregator.
3 
4  Copyright (C) 2004 Stanislav Karchebny <Stanislav.Karchebny@kdemail.net>
5  2005 Frank Osterfeld <frank.osterfeld at kdemail.net>
6  This program is free software; you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation; either version 2 of the License, or
9  (at your option) any later version.
10 
11  This program is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  GNU General Public License for more details.
15 
16  You should have received a copy of the GNU General Public License
17  along with this program; if not, write to the Free Software
18  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 
20  As a special exception, permission is given to link this program
21  with any edition of TQt, and distribute the resulting executable,
22  without including the source code for TQt in the source distribution.
23 */
24 
25 #include "article.h"
26 #include "feed.h"
27 #include "feedstorage.h"
28 #include "storage.h"
29 #include "librss/librss.h"
30 #include "shared.h"
31 #include "utils.h"
32 
33 #include <tqdatetime.h>
34 #include <tqdom.h>
35 #include <tqregexp.h>
36 #include <tqstringlist.h>
37 #include <tqvaluelist.h>
38 
39 #include <krfcdate.h>
40 #include <kdebug.h>
41 #include <kurl.h>
42 
43 
44 namespace Akregator {
45 
46 struct Article::Private : public Shared
47 {
58  enum Status {Deleted=0x01, Trash=0x02, New=0x04, Read=0x08, Keep=0x10};
59 
60  TQString guid;
61  Backend::FeedStorage* archive;
62  Feed* feed;
63 
64  // the variables below are initialized to null values in the Article constructor
65  // and then loaded on demand instead.
66  //
67  // to read their values, you should therefore use the accessor methods of the Article
68  // hash(), pubDate(), statusBits() rather than accessing them directly.
69  uint hash;
70  TQDateTime pubDate;
71  int status;
72 };
73 
74 Article::Article() : d(new Private)
75 {
76  d->hash = 0;
77  d->status = 0;
78  d->feed = 0;
79  d->archive = 0;
80 }
81 
82 Article::Article(const TQString& guid, Feed* feed) : d(new Private)
83 {
84  // this constructor should be as cheap as possible, so avoid calls to
85  // read information from the archive in here if possible
86  //
87  // d->hash, d->pubDate and d->status are loaded on-demand by
88  // the hash(), pubDate() and statusBits() methods respectively
89 
90  d->feed = feed;
91  d->guid = guid;
92  d->archive = Backend::Storage::getInstance()->archiveFor(feed->xmlUrl());
93  d->status = 0;
94 }
95 
96 void Article::initialize(RSS::Article article, Backend::FeedStorage* archive)
97 {
98  d->archive = archive;
99  d->status = Private::New;
100  d->hash = Utils::calcHash(article.title() + article.description() + article.author() + article.link().url()
101  + article.commentsLink().url() );
102 
103  d->guid = article.guid();
104 
105  if (!d->archive->contains(d->guid))
106  {
107  d->archive->addEntry(d->guid);
108 
109  if (article.meta("deleted") == "true")
110  { // if article is in deleted state, we just add the status and omit the rest
111  d->status = Private::Read | Private::Deleted;
112  d->archive->setStatus(d->guid, d->status);
113  }
114  else
115  { // article is not deleted, let's add it to the archive
116 
117  d->archive->setHash(d->guid, hash() );
118  TQString title = article.title().isEmpty() ? buildTitle(article.description()) : article.title();
119  d->archive->setTitle(d->guid, title);
120  d->archive->setDescription(d->guid, article.description());
121  d->archive->setLink(d->guid, article.link().url());
122  d->archive->setComments(d->guid, article.comments());
123  d->archive->setCommentsLink(d->guid, article.commentsLink().url());
124  d->archive->setGuidIsPermaLink(d->guid, article.guidIsPermaLink());
125  d->archive->setGuidIsHash(d->guid, article.meta("guidIsHash") == "true");
126  d->pubDate = article.pubDate().isValid() ? article.pubDate() : TQDateTime::currentDateTime();
127  d->archive->setPubDate(d->guid, d->pubDate.toTime_t());
128  d->archive->setAuthor(d->guid, article.author());
129 
130  TQValueList<RSS::Category> cats = article.categories();
131  TQValueList<RSS::Category>::ConstIterator end = cats.end();
132 
133  for (TQValueList<RSS::Category>::ConstIterator it = cats.begin(); it != end; ++it)
134  {
135  Backend::Category cat;
136 
137  cat.term = (*it).category();
138  cat.scheme = (*it).domain();
139  cat.name = (*it).category();
140 
141  d->archive->addCategory(d->guid, cat);
142  }
143 
144  if (!article.enclosure().isNull())
145  {
146  d->archive->setEnclosure(d->guid, article.enclosure().url(), article.enclosure().type(), article.enclosure().length());
147  }
148  else
149  {
150  d->archive->removeEnclosure(d->guid);
151  }
152 
153  TQString status = article.meta("status");
154 
155  if (!status.isEmpty())
156  {
157  int statusInt = status.toInt();
158  if (statusInt == New)
159  statusInt = Unread;
160  setStatus(statusInt);
161  }
162  setKeep(article.meta("keep") == "true");
163  }
164  }
165  else
166  {
167  // always update comments count, as it's not used for hash calculation
168  d->archive->setComments(d->guid, article.comments());
169  if ( hash() != d->archive->hash(d->guid)) //article is in archive, was it modified?
170  { // if yes, update
171  d->pubDate.setTime_t(d->archive->pubDate(d->guid));
172  d->archive->setHash(d->guid, hash() );
173  TQString title = article.title().isEmpty() ? buildTitle(article.description()) : article.title();
174  d->archive->setTitle(d->guid, title);
175  d->archive->setDescription(d->guid, article.description());
176  d->archive->setLink(d->guid, article.link().url());
177  d->archive->setCommentsLink(d->guid, article.commentsLink().url());
178  d->archive->setAuthor(d->guid, article.author());
179  }
180  }
181 }
182 
183 Article::Article(RSS::Article article, Feed* feed) : d(new Private)
184 {
185  //assert(feed)
186  d->feed = feed;
187  initialize(article, Backend::Storage::getInstance()->archiveFor(feed->xmlUrl()));
188 }
189 
190 Article::Article(RSS::Article article, Backend::FeedStorage* archive) : d(new Private)
191 {
192  d->feed = 0;
193  initialize(article, archive);
194 }
195 
196 bool Article::isNull() const
197 {
198  return d->archive == 0; // TODO: use proper null state
199 }
200 
201 void Article::offsetPubDate(int secs)
202 {
203  d->pubDate = pubDate().addSecs(secs);
204  d->archive->setPubDate(d->guid, d->pubDate.toTime_t());
205 
206 }
207 
208 void Article::setDeleted()
209 {
210  if (isDeleted())
211  return;
212 
213  setStatus(Read);
214  d->status = Private::Deleted | Private::Read;
215  d->archive->setStatus(d->guid, d->status);
216  d->archive->setDeleted(d->guid);
217 
218  if (d->feed)
219  d->feed->setArticleDeleted(*this);
220 }
221 
222 bool Article::isDeleted() const
223 {
224  return (statusBits() & Private::Deleted) != 0;
225 }
226 
227 Article::Article(const Article &other) : d(new Private)
228 {
229  *this = other;
230 }
231 
232 Article::~Article()
233 {
234  if (d->deref())
235  {
236  delete d;
237  d = 0;
238  }
239 }
240 
241 Article &Article::operator=(const Article &other)
242 {
243  if (this != &other) {
244  other.d->ref();
245  if (d && d->deref())
246  delete d;
247  d = other.d;
248  }
249  return *this;
250 }
251 
252 
253 bool Article::operator<(const Article &other) const
254 {
255  return pubDate() > other.pubDate() ||
256  (pubDate() == other.pubDate() && guid() < other.guid() );
257 }
258 
259 bool Article::operator<=(const Article &other) const
260 {
261  return (pubDate() > other.pubDate() || *this == other);
262 }
263 
264 bool Article::operator>(const Article &other) const
265 {
266  return pubDate() < other.pubDate() ||
267  (pubDate() == other.pubDate() && guid() > other.guid() );
268 }
269 
270 bool Article::operator>=(const Article &other) const
271 {
272  return (pubDate() > other.pubDate() || *this == other);
273 }
274 
275 bool Article::operator==(const Article &other) const
276 {
277  return d->guid == other.guid();
278 }
279 
280 int Article::statusBits() const
281 {
282  // delayed loading of status information from archive
283  if ( d->status == 0 )
284  {
285  d->status = d->archive->status(d->guid);
286  }
287 
288  return d->status;
289 }
290 
291 int Article::status() const
292 {
293  if ((statusBits() & Private::Read) != 0)
294  return Read;
295 
296  if ((statusBits() & Private::New) != 0)
297  return New;
298  else
299  return Unread;
300 }
301 
302 void Article::setStatus(int stat)
303 {
304  // use status() rather than statusBits() here to filter out status flags that we are not
305  // interested in
306  int oldStatus = status();
307 
308  if (oldStatus != stat)
309  {
310  switch (stat)
311  {
312  case Read:
313  d->status = ( d->status | Private::Read) & ~Private::New;
314  break;
315  case Unread:
316  d->status = ( d->status & ~Private::Read) & ~Private::New;
317  break;
318  case New:
319  d->status = ( d->status | Private::New) & ~Private::Read;
320  break;
321  }
322  d->archive->setStatus(d->guid, d->status);
323  if (d->feed)
324  d->feed->setArticleChanged(*this, oldStatus);
325  }
326 }
327 
328 TQString Article::title() const
329 {
330  return d->archive->title(d->guid);
331 }
332 
333 TQString Article::author() const
334 {
335  return d->archive->author(d->guid);
336 }
337 
338 KURL Article::link() const
339 {
340  return d->archive->link(d->guid);
341 }
342 
343 TQString Article::description() const
344 {
345  return d->archive->description(d->guid);
346 }
347 
348 TQString Article::guid() const
349 {
350  return d->guid;
351 }
352 
353 KURL Article::commentsLink() const
354 {
355  return d->archive->commentsLink(d->guid);
356 }
357 
358 
359 int Article::comments() const
360 {
361 
362  return d->archive->comments(d->guid);
363 }
364 
365 
366 bool Article::guidIsPermaLink() const
367 {
368  return d->archive->guidIsPermaLink(d->guid);
369 }
370 
372 {
373  return d->archive->guidIsHash(d->guid);
374 }
375 
376 uint Article::hash() const
377 {
378  // delayed loading of hash from archive
379  if ( d->hash == 0 )
380  {
381  d->hash = d->archive->hash(d->guid);
382  }
383 
384  return d->hash;
385 }
386 
387 bool Article::keep() const
388 {
389  return ( statusBits() & Private::Keep) != 0;
390 }
391 
392 RSS::Enclosure Article::enclosure() const
393 {
394  bool hasEnc;
395  TQString url, type;
396  int length;
397  d->archive->enclosure(d->guid, hasEnc, url, type, length);
398  return hasEnc ? RSS::Enclosure(url, length, type) : RSS::Enclosure();
399 
400 
401 }
402 
403 
404 void Article::setKeep(bool keep)
405 {
406  d->status = keep ? ( statusBits() | Private::Keep) : ( statusBits() & ~Private::Keep);
407  d->archive->setStatus(d->guid, d->status);
408  if (d->feed)
409  d->feed->setArticleChanged(*this);
410 }
411 
412 void Article::addTag(const TQString& tag)
413 {
414  d->archive->addTag(d->guid, tag);
415  if (d->feed)
416  d->feed->setArticleChanged(*this);
417 }
418 
419 void Article::removeTag(const TQString& tag)
420 {
421  d->archive->removeTag(d->guid, tag);
422  if (d->feed)
423  d->feed->setArticleChanged(*this);
424 }
425 
426 bool Article::hasTag(const TQString& tag) const
427 {
428  return d->archive->tags(d->guid).contains(tag);
429 }
430 
431 TQStringList Article::tags() const
432 {
433  return d->archive->tags(d->guid);
434 }
435 
436 Feed* Article::feed() const
437 { return d->feed; }
438 
439 const TQDateTime& Article::pubDate() const
440 {
441  // delayed loading of publication date information from archive
442  if ( d->pubDate.isNull() )
443  {
444  d->pubDate.setTime_t(d->archive->pubDate(d->guid));
445  }
446 
447  return d->pubDate;
448 }
449 
450 TQString Article::buildTitle(const TQString& description)
451 {
452  TQString s = description;
453  if (description.stripWhiteSpace().isEmpty())
454  return "";
455 
456  int i = s.find('>',500); /*avoid processing too much */
457  if (i != -1)
458  s = s.left(i+1);
459  TQRegExp rx("(<([^\\s>]*)(?:[^>]*)>)[^<]*", false);
460  TQString tagName, toReplace, replaceWith;
461  while (rx.search(s) != -1 )
462  {
463  tagName=rx.cap(2);
464  if (tagName=="SCRIPT"||tagName=="script")
465  toReplace=rx.cap(0); // strip tag AND tag contents
466  else if (tagName.startsWith("br") || tagName.startsWith("BR"))
467  {
468  toReplace=rx.cap(1);
469  replaceWith=" ";
470  }
471  else
472  toReplace=rx.cap(1); // strip just tag
473  s=s.replace(s.find(toReplace),toReplace.length(),replaceWith); // do the deed
474  }
475  if (s.length()> 90)
476  s=s.left(90)+"...";
477  return s.simplifyWhiteSpace();
478 }
479 } // namespace Akregator