akregator/src

article.cpp
00001 /*
00002     This file is part of Akregator.
00003 
00004     Copyright (C) 2004 Stanislav Karchebny <Stanislav.Karchebny@kdemail.net>
00005                   2005 Frank Osterfeld <frank.osterfeld at kdemail.net>
00006     This program is free software; you can redistribute it and/or modify
00007     it under the terms of the GNU General Public License as published by
00008     the Free Software Foundation; either version 2 of the License, or
00009     (at your option) any later version.
00010 
00011     This program is distributed in the hope that it will be useful,
00012     but WITHOUT ANY WARRANTY; without even the implied warranty of
00013     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
00014     GNU General Public License for more details.
00015 
00016     You should have received a copy of the GNU General Public License
00017     along with this program; if not, write to the Free Software
00018     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00019 
00020     As a special exception, permission is given to link this program
00021     with any edition of TQt, and distribute the resulting executable,
00022     without including the source code for TQt in the source distribution.
00023 */
00024 
00025 #include "article.h"
00026 #include "feed.h"
00027 #include "feedstorage.h"
00028 #include "storage.h"
00029 #include "librss/librss.h"
00030 #include "shared.h"
00031 #include "utils.h"
00032 
00033 #include <tqdatetime.h>
00034 #include <tqdom.h>
00035 #include <tqregexp.h>
00036 #include <tqstringlist.h>
00037 #include <tqvaluelist.h>
00038 
00039 #include <krfcdate.h>
00040 #include <kdebug.h>
00041 #include <kurl.h>
00042 
00043 
00044 namespace Akregator {
00045 
00046 struct Article::Private : public Shared
00047 {
00058     enum Status {Deleted=0x01, Trash=0x02, New=0x04, Read=0x08, Keep=0x10};
00059 
00060     TQString guid;
00061     Backend::FeedStorage* archive;
00062     Feed* feed;
00063 
00064     // the variables below are initialized to null values in the Article constructor 
00065     // and then loaded on demand instead.
00066     //
00067     // to read their values, you should therefore use the accessor methods of the Article
00068     // hash(), pubDate(), statusBits() rather than accessing them directly.
00069     uint hash;
00070     TQDateTime pubDate;  
00071     int status;
00072 };
00073 
00074 Article::Article() : d(new Private)
00075 {
00076     d->hash = 0;
00077     d->status = 0;
00078     d->feed = 0;
00079     d->archive = 0;
00080 }
00081 
00082 Article::Article(const TQString& guid, Feed* feed) : d(new Private)
00083 {
00084     // this constructor should be as cheap as possible, so avoid calls to 
00085     // read information from the archive in here if possible
00086     //
00087     // d->hash, d->pubDate and d->status are loaded on-demand by
00088     // the hash(), pubDate() and statusBits() methods respectively
00089 
00090     d->feed = feed;
00091     d->guid = guid;
00092     d->archive = Backend::Storage::getInstance()->archiveFor(feed->xmlUrl());
00093     d->status = 0;
00094     d->hash = 0;
00095 }
00096 
00097 void Article::initialize(RSS::Article article, Backend::FeedStorage* archive)
00098 {
00099     d->archive = archive;
00100     d->status = Private::New;
00101     d->hash = Utils::calcHash(article.title() + article.description() + article.author() + article.link().url() 
00102                               + article.commentsLink().url() );
00103 
00104     d->guid = article.guid();
00105 
00106     if (!d->archive->contains(d->guid))
00107     {
00108         d->archive->addEntry(d->guid);
00109 
00110         if (article.meta("deleted") == "true") 
00111         { // if article is in deleted state, we just add the status and omit the rest
00112             d->status = Private::Read | Private::Deleted;
00113             d->archive->setStatus(d->guid, d->status);
00114         }
00115         else
00116         { // article is not deleted, let's add it to the archive
00117 
00118             d->archive->setHash(d->guid, hash() );
00119             TQString title = article.title().isEmpty() ? buildTitle(article.description()) :  article.title();
00120             d->archive->setTitle(d->guid, title);
00121             d->archive->setDescription(d->guid, article.description());
00122             d->archive->setLink(d->guid, article.link().url());
00123             d->archive->setComments(d->guid, article.comments());
00124             d->archive->setCommentsLink(d->guid, article.commentsLink().url());
00125             d->archive->setGuidIsPermaLink(d->guid, article.guidIsPermaLink());
00126             d->archive->setGuidIsHash(d->guid, article.meta("guidIsHash") == "true");
00127             d->pubDate = article.pubDate().isValid() ? article.pubDate() : TQDateTime::currentDateTime();
00128             d->archive->setPubDate(d->guid, d->pubDate.toTime_t());
00129             d->archive->setAuthor(d->guid, article.author());
00130                         
00131             TQValueList<RSS::Category> cats = article.categories();
00132             TQValueList<RSS::Category>::ConstIterator end = cats.end();
00133 
00134             for (TQValueList<RSS::Category>::ConstIterator it = cats.begin(); it != end; ++it)
00135             {
00136                 Backend::Category cat;
00137 
00138                 cat.term = (*it).category();
00139                 cat.scheme = (*it).domain();
00140                 cat.name = (*it).category();
00141 
00142                 d->archive->addCategory(d->guid, cat);
00143             }
00144 
00145             if (!article.enclosure().isNull())
00146             {
00147                 d->archive->setEnclosure(d->guid, article.enclosure().url(), article.enclosure().type(), article.enclosure().length());
00148             }
00149             else
00150             {
00151                 d->archive->removeEnclosure(d->guid);
00152             }
00153 
00154             TQString status = article.meta("status");
00155             
00156             if (!status.isEmpty())
00157             {
00158                 int statusInt = status.toInt();
00159                 if (statusInt == New)
00160                     statusInt = Unread;
00161                 setStatus(statusInt);
00162             }
00163             setKeep(article.meta("keep") == "true");
00164         }
00165     }
00166     else
00167     {
00168         // always update comments count, as it's not used for hash calculation
00169         d->archive->setComments(d->guid, article.comments());
00170         if ( hash() != d->archive->hash(d->guid)) //article is in archive, was it modified?
00171         { // if yes, update
00172             d->pubDate.setTime_t(d->archive->pubDate(d->guid));
00173             d->archive->setHash(d->guid, hash() );
00174             TQString title = article.title().isEmpty() ? buildTitle(article.description()) :  article.title();
00175             d->archive->setTitle(d->guid, title);
00176             d->archive->setDescription(d->guid, article.description());
00177             d->archive->setLink(d->guid, article.link().url());
00178             d->archive->setCommentsLink(d->guid, article.commentsLink().url());
00179             d->archive->setAuthor(d->guid, article.author());
00180         }
00181     }
00182 }
00183 
00184 Article::Article(RSS::Article article, Feed* feed) : d(new Private)
00185 {
00186     //assert(feed)
00187     d->feed = feed;
00188     initialize(article, Backend::Storage::getInstance()->archiveFor(feed->xmlUrl()));
00189 }
00190 
00191 Article::Article(RSS::Article article, Backend::FeedStorage* archive) : d(new Private)
00192 {
00193     d->feed = 0;
00194     initialize(article, archive);
00195 }
00196 
00197 bool Article::isNull() const
00198 {
00199     return d->archive == 0; // TODO: use proper null state
00200 }
00201 
00202 void Article::offsetPubDate(int secs)
00203 {
00204    d->pubDate = pubDate().addSecs(secs);
00205    d->archive->setPubDate(d->guid, d->pubDate.toTime_t());
00206 
00207 }
00208 
00209 void Article::setDeleted()
00210 {
00211     if (isDeleted())
00212         return;
00213   
00214     setStatus(Read);
00215     d->status = Private::Deleted | Private::Read;
00216     d->archive->setStatus(d->guid, d->status);
00217     d->archive->setDeleted(d->guid);
00218 
00219     if (d->feed)
00220         d->feed->setArticleDeleted(*this);
00221 }
00222 
00223 bool Article::isDeleted() const
00224 {
00225     return (statusBits() & Private::Deleted) != 0;
00226 }
00227 
00228 Article::Article(const Article &other) : d(new Private)
00229 {
00230     *this = other;
00231 }
00232 
00233 Article::~Article()
00234 {
00235     if (d->deref())
00236     {
00237         delete d;
00238         d = 0;
00239     }
00240 }
00241 
00242 Article &Article::operator=(const Article &other)
00243 {
00244     if (this != &other) {
00245         other.d->ref();
00246         if (d && d->deref())
00247             delete d;
00248         d = other.d;
00249     }
00250     return *this;
00251 }
00252 
00253 
00254 bool Article::operator<(const Article &other) const
00255 {
00256     return pubDate() > other.pubDate() || 
00257             (pubDate() == other.pubDate() && guid() < other.guid() );
00258 }
00259 
00260 bool Article::operator<=(const Article &other) const
00261 {
00262     return (pubDate() > other.pubDate() || *this == other);
00263 }
00264 
00265 bool Article::operator>(const Article &other) const
00266 {
00267     return pubDate() < other.pubDate() || 
00268             (pubDate() == other.pubDate() && guid() > other.guid() );
00269 }
00270 
00271 bool Article::operator>=(const Article &other) const
00272 {
00273     return (pubDate() > other.pubDate() || *this == other);
00274 }
00275 
00276 bool Article::operator==(const Article &other) const
00277 {
00278     return d->guid == other.guid();
00279 }
00280 
00281 int Article::statusBits() const
00282 {
00283     // delayed loading of status information from archive
00284     if ( d->status == 0 ) 
00285     {
00286         d->status = d->archive->status(d->guid);
00287     }
00288 
00289     return d->status;
00290 }
00291 
00292 int Article::status() const
00293 {
00294     if ((statusBits() & Private::Read) != 0)
00295         return Read;
00296 
00297     if ((statusBits() & Private::New) != 0)
00298         return New;
00299     else
00300         return Unread;
00301 }
00302 
00303 void Article::setStatus(int stat)
00304 {
00305     // use status() rather than statusBits() here to filter out status flags that we are not
00306     // interested in
00307     int oldStatus = status();
00308 
00309     if (oldStatus != stat)
00310     {
00311         switch (stat)
00312         {
00313             case Read:
00314                 d->status = ( d->status | Private::Read) & ~Private::New;
00315                 break;
00316             case Unread:
00317                 d->status = ( d->status & ~Private::Read) & ~Private::New;
00318                 break;
00319             case New:
00320                 d->status = ( d->status | Private::New) & ~Private::Read;
00321                 break;
00322         }
00323         d->archive->setStatus(d->guid, d->status);
00324         if (d->feed)
00325             d->feed->setArticleChanged(*this, oldStatus);
00326      }
00327 }
00328 
00329 TQString Article::title() const
00330 {
00331     return d->archive->title(d->guid);
00332 }
00333 
00334 TQString Article::author() const
00335 {
00336     return d->archive->author(d->guid);
00337 }
00338 
00339 KURL Article::link() const
00340 {
00341     return d->archive->link(d->guid);
00342 }
00343 
00344 TQString Article::description() const
00345 {
00346     return d->archive->description(d->guid);
00347 }
00348 
00349 TQString Article::guid() const
00350 {
00351     return d->guid;
00352 }
00353 
00354 KURL Article::commentsLink() const
00355 {
00356     return d->archive->commentsLink(d->guid);
00357 }
00358 
00359 
00360 int Article::comments() const
00361 {
00362     return d->archive->comments(d->guid);
00363 }
00364 
00365 
00366 bool Article::guidIsPermaLink() const
00367 {
00368     return d->archive->guidIsPermaLink(d->guid);
00369 }
00370 
00371 bool Article::guidIsHash() const
00372 {
00373     return d->archive->guidIsHash(d->guid);
00374 }
00375 
00376 uint Article::hash() const
00377 {
00378     // delayed loading of hash from archive
00379     if ( d->hash == 0 )
00380     {
00381         d->hash = d->archive->hash(d->guid);
00382     }
00383 
00384     return d->hash;
00385 }
00386 
00387 bool Article::keep() const
00388 {
00389     return ( statusBits() & Private::Keep) != 0;
00390 }
00391 
00392 RSS::Enclosure Article::enclosure() const
00393 {
00394     bool hasEnc;
00395     TQString url, type;
00396     int length;
00397     d->archive->enclosure(d->guid, hasEnc, url, type, length);
00398     return hasEnc ? RSS::Enclosure(url, length, type) : RSS::Enclosure();
00399 
00400     
00401 }
00402 
00403 
00404 void Article::setKeep(bool keep)
00405 {
00406     d->status = keep ? ( statusBits() | Private::Keep) : ( statusBits() & ~Private::Keep);
00407     d->archive->setStatus(d->guid, d->status);
00408     if (d->feed)
00409         d->feed->setArticleChanged(*this);
00410 }
00411 
00412 void Article::addTag(const TQString& tag)
00413 {
00414     d->archive->addTag(d->guid, tag);
00415     if (d->feed)
00416         d->feed->setArticleChanged(*this);
00417 }
00418 
00419 void Article::removeTag(const TQString& tag)
00420 {
00421     d->archive->removeTag(d->guid, tag);
00422     if (d->feed)
00423         d->feed->setArticleChanged(*this);
00424 }
00425 
00426 bool Article::hasTag(const TQString& tag) const
00427 {
00428     return d->archive->tags(d->guid).contains(tag);
00429 }
00430 
00431 TQStringList Article::tags() const
00432 {
00433     return d->archive->tags(d->guid);
00434 }
00435             
00436 Feed* Article::feed() const
00437 { return d->feed; }
00438 
00439 const TQDateTime& Article::pubDate() const
00440 {
00441     // delayed loading of publication date information from archive
00442     if ( d->pubDate.isNull() )
00443     {
00444         d->pubDate.setTime_t(d->archive->pubDate(d->guid));
00445     }
00446 
00447     return d->pubDate;
00448 }
00449 
00450 TQString Article::buildTitle(const TQString& description)
00451 {
00452     TQString s = description;
00453     if (description.stripWhiteSpace().isEmpty())
00454         return "";
00455         
00456     int i = s.find('>',500); /*avoid processing too much */
00457     if (i != -1)
00458         s = s.left(i+1);
00459     TQRegExp rx("(<([^\\s>]*)(?:[^>]*)>)[^<]*", false);
00460     TQString tagName, toReplace, replaceWith;
00461     while (rx.search(s) != -1 )
00462     {
00463         tagName=rx.cap(2);
00464         if (tagName=="SCRIPT"||tagName=="script")
00465             toReplace=rx.cap(0); // strip tag AND tag contents
00466         else if (tagName.startsWith("br") || tagName.startsWith("BR"))
00467         {
00468             toReplace=rx.cap(1);
00469             replaceWith=" ";
00470         }
00471         else
00472             toReplace=rx.cap(1);  // strip just tag
00473         s=s.replace(s.find(toReplace),toReplace.length(),replaceWith); // do the deed
00474     }
00475     if (s.length()> 90)
00476         s=s.left(90)+"...";
00477     return s.simplifyWhiteSpace();
00478 }
00479 } // namespace Akregator