akregator/src

article.cpp

00001 /*
00002     This file is part of Akregator.
00003 
00004     Copyright (C) 2004 Stanislav Karchebny <Stanislav.Karchebny@kdemail.net>
00005                   2005 Frank Osterfeld <frank.osterfeld at kdemail.net>
00006     This program is free software; you can redistribute it and/or modify
00007     it under the terms of the GNU General Public License as published by
00008     the Free Software Foundation; either version 2 of the License, or
00009     (at your option) any later version.
00010 
00011     This program is distributed in the hope that it will be useful,
00012     but WITHOUT ANY WARRANTY; without even the implied warranty of
00013     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
00014     GNU General Public License for more details.
00015 
00016     You should have received a copy of the GNU General Public License
00017     along with this program; if not, write to the Free Software
00018     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00019 
00020     As a special exception, permission is given to link this program
00021     with any edition of Qt, and distribute the resulting executable,
00022     without including the source code for Qt in the source distribution.
00023 */
00024 
00025 #include "article.h"
00026 #include "feed.h"
00027 #include "feedstorage.h"
00028 #include "storage.h"
00029 #include "librss/librss.h"
00030 #include "shared.h"
00031 #include "utils.h"
00032 
00033 #include <tqdatetime.h>
00034 #include <tqdom.h>
00035 #include <tqregexp.h>
00036 #include <tqstringlist.h>
00037 #include <tqvaluelist.h>
00038 
00039 #include <krfcdate.h>
00040 #include <kdebug.h>
00041 #include <kurl.h>
00042 
00043 
00044 namespace Akregator {
00045 
00046 struct Article::Private : public Shared
00047 {
00058     enum Status {Deleted=0x01, Trash=0x02, New=0x04, Read=0x08, Keep=0x10};
00059 
00060     TQString guid;
00061     Backend::FeedStorage* archive;
00062     Feed* feed;
00063 
00064     // the variables below are initialized to null values in the Article constructor 
00065     // and then loaded on demand instead.
00066     //
00067     // to read their values, you should therefore use the accessor methods of the Article
00068     // hash(), pubDate(), statusBits() rather than accessing them directly.
00069     uint hash;
00070     TQDateTime pubDate;  
00071     int status;
00072 };
00073 
00074 Article::Article() : d(new Private)
00075 {
00076     d->hash = 0;
00077     d->status = 0;
00078     d->feed = 0;
00079     d->archive = 0;
00080 }
00081 
00082 Article::Article(const TQString& guid, Feed* feed) : d(new Private)
00083 {
00084     // this constructor should be as cheap as possible, so avoid calls to 
00085     // read information from the archive in here if possible
00086     //
00087     // d->hash, d->pubDate and d->status are loaded on-demand by
00088     // the hash(), pubDate() and statusBits() methods respectively
00089 
00090     d->feed = feed;
00091     d->guid = guid;
00092     d->archive = Backend::Storage::getInstance()->archiveFor(feed->xmlUrl());
00093     d->status = 0;
00094 }
00095 
00096 void Article::initialize(RSS::Article article, Backend::FeedStorage* archive)
00097 {
00098     d->archive = archive;
00099     d->status = Private::New;
00100     d->hash = Utils::calcHash(article.title() + article.description() + article.author() + article.link().url() 
00101                               + article.commentsLink().url() );
00102 
00103     d->guid = article.guid();
00104     
00105     if (!d->archive->contains(d->guid))
00106     {
00107         d->archive->addEntry(d->guid);
00108 
00109         if (article.meta("deleted") == "true") 
00110         { // if article is in deleted state, we just add the status and omit the rest
00111             d->status = Private::Read | Private::Deleted;
00112             d->archive->setStatus(d->guid, d->status);
00113         }
00114         else
00115         { // article is not deleted, let's add it to the archive
00116         
00117             d->archive->setHash(d->guid, hash() );
00118             TQString title = article.title().isEmpty() ? buildTitle(article.description()) :  article.title();
00119             d->archive->setTitle(d->guid, title);
00120             d->archive->setDescription(d->guid, article.description());
00121             d->archive->setLink(d->guid, article.link().url());
00122             d->archive->setComments(d->guid, article.comments());
00123             d->archive->setCommentsLink(d->guid, article.commentsLink().url());
00124             d->archive->setGuidIsPermaLink(d->guid, article.guidIsPermaLink());
00125             d->archive->setGuidIsHash(d->guid, article.meta("guidIsHash") == "true");
00126             d->pubDate = article.pubDate().isValid() ? article.pubDate() : TQDateTime::currentDateTime();
00127             d->archive->setPubDate(d->guid, d->pubDate.toTime_t());
00128             d->archive->setAuthor(d->guid, article.author());
00129                         
00130             TQValueList<RSS::Category> cats = article.categories();
00131             TQValueList<RSS::Category>::ConstIterator end = cats.end();
00132 
00133             for (TQValueList<RSS::Category>::ConstIterator it = cats.begin(); it != end; ++it)
00134             {
00135                 Backend::Category cat;
00136 
00137                 cat.term = (*it).category();
00138                 cat.scheme = (*it).domain();
00139                 cat.name = (*it).category();
00140 
00141                 d->archive->addCategory(d->guid, cat);
00142             }
00143 
00144             if (!article.enclosure().isNull())
00145             {
00146                 d->archive->setEnclosure(d->guid, article.enclosure().url(), article.enclosure().type(), article.enclosure().length());
00147             }
00148             else
00149             {
00150                 d->archive->removeEnclosure(d->guid);
00151             }
00152 
00153             TQString status = article.meta("status");
00154             
00155             if (!status.isEmpty())
00156             {
00157                 int statusInt = status.toInt();
00158                 if (statusInt == New)
00159                     statusInt = Unread;
00160                 setStatus(statusInt);
00161             }
00162             setKeep(article.meta("keep") == "true");
00163         }
00164     }
00165     else
00166     {
00167         // always update comments count, as it's not used for hash calculation
00168         d->archive->setComments(d->guid, article.comments());
00169         if ( hash() != d->archive->hash(d->guid)) //article is in archive, was it modified?
00170         { // if yes, update
00171             d->pubDate.setTime_t(d->archive->pubDate(d->guid));
00172             d->archive->setHash(d->guid, hash() );
00173             TQString title = article.title().isEmpty() ? buildTitle(article.description()) :  article.title();
00174             d->archive->setTitle(d->guid, title);
00175             d->archive->setDescription(d->guid, article.description());
00176             d->archive->setLink(d->guid, article.link().url());
00177             d->archive->setCommentsLink(d->guid, article.commentsLink().url());
00178             d->archive->setAuthor(d->guid, article.author());
00179         }
00180     }
00181 }
00182 
00183 Article::Article(RSS::Article article, Feed* feed) : d(new Private)
00184 {
00185     //assert(feed)
00186     d->feed = feed;
00187     initialize(article, Backend::Storage::getInstance()->archiveFor(feed->xmlUrl()));
00188 }
00189 
00190 Article::Article(RSS::Article article, Backend::FeedStorage* archive) : d(new Private)
00191 {
00192     d->feed = 0;
00193     initialize(article, archive);
00194 }
00195 
00196 bool Article::isNull() const
00197 {
00198     return d->archive == 0; // TODO: use proper null state
00199 }
00200 
00201 void Article::offsetPubDate(int secs)
00202 {
00203    d->pubDate = pubDate().addSecs(secs);
00204    d->archive->setPubDate(d->guid, d->pubDate.toTime_t());
00205 
00206 }
00207 
00208 void Article::setDeleted()
00209 {
00210     if (isDeleted())
00211         return;
00212   
00213     setStatus(Read);
00214     d->status = Private::Deleted | Private::Read;
00215     d->archive->setStatus(d->guid, d->status);
00216     d->archive->setDeleted(d->guid);
00217 
00218     if (d->feed)
00219         d->feed->setArticleDeleted(*this);
00220 }
00221 
00222 bool Article::isDeleted() const
00223 {
00224     return (statusBits() & Private::Deleted) != 0;
00225 }
00226 
00227 Article::Article(const Article &other) : d(new Private)
00228 {
00229     *this = other;
00230 }
00231 
00232 Article::~Article()
00233 {
00234     if (d->deref())
00235     {
00236         delete d;
00237         d = 0;
00238     }
00239 }
00240 
00241 Article &Article::operator=(const Article &other)
00242 {
00243     if (this != &other) {
00244         other.d->ref();
00245         if (d && d->deref())
00246             delete d;
00247         d = other.d;
00248     }
00249     return *this;
00250 }
00251 
00252 
00253 bool Article::operator<(const Article &other) const
00254 {
00255     return pubDate() > other.pubDate() || 
00256             (pubDate() == other.pubDate() && guid() < other.guid() );
00257 }
00258 
00259 bool Article::operator<=(const Article &other) const
00260 {
00261     return (pubDate() > other.pubDate() || *this == other);
00262 }
00263 
00264 bool Article::operator>(const Article &other) const
00265 {
00266     return pubDate() < other.pubDate() || 
00267             (pubDate() == other.pubDate() && guid() > other.guid() );
00268 }
00269 
00270 bool Article::operator>=(const Article &other) const
00271 {
00272     return (pubDate() > other.pubDate() || *this == other);
00273 }
00274 
00275 bool Article::operator==(const Article &other) const
00276 {
00277     return d->guid == other.guid();
00278 }
00279 
00280 int Article::statusBits() const
00281 {
00282     // delayed loading of status information from archive
00283     if ( d->status == 0 ) 
00284     {
00285         d->status = d->archive->status(d->guid);
00286     }
00287 
00288     return d->status;
00289 }
00290 
00291 int Article::status() const
00292 {
00293     if ((statusBits() & Private::Read) != 0)
00294         return Read;
00295 
00296     if ((statusBits() & Private::New) != 0)
00297         return New;
00298     else
00299         return Unread;
00300 }
00301 
00302 void Article::setStatus(int stat)
00303 {
00304     // use status() rather than statusBits() here to filter out status flags that we are not
00305     // interested in
00306     int oldStatus = status();
00307 
00308     if (oldStatus != stat)
00309     {
00310         switch (stat)
00311         {
00312             case Read:
00313                 d->status = ( d->status | Private::Read) & ~Private::New;
00314                 break;
00315             case Unread:
00316                 d->status = ( d->status & ~Private::Read) & ~Private::New;
00317                 break;
00318             case New:
00319                 d->status = ( d->status | Private::New) & ~Private::Read;
00320                 break;
00321         }
00322         d->archive->setStatus(d->guid, d->status);
00323         if (d->feed)
00324             d->feed->setArticleChanged(*this, oldStatus);
00325      }
00326 }
00327 
00328 TQString Article::title() const
00329 {
00330     return d->archive->title(d->guid);
00331 }
00332 
00333 TQString Article::author() const
00334 {
00335     return d->archive->author(d->guid);
00336 }
00337 
00338 KURL Article::link() const
00339 {
00340     return d->archive->link(d->guid);
00341 }
00342 
00343 TQString Article::description() const
00344 {
00345     return d->archive->description(d->guid);
00346 }
00347 
00348 TQString Article::guid() const
00349 {
00350     return d->guid;
00351 }
00352 
00353 KURL Article::commentsLink() const
00354 {
00355     return d->archive->commentsLink(d->guid);
00356 }
00357 
00358 
00359 int Article::comments() const
00360 {
00361     
00362     return d->archive->comments(d->guid);
00363 }
00364 
00365 
00366 bool Article::guidIsPermaLink() const
00367 {
00368     return d->archive->guidIsPermaLink(d->guid);
00369 }
00370 
00371 bool Article::guidIsHash() const
00372 {
00373     return d->archive->guidIsHash(d->guid);
00374 }
00375 
00376 uint Article::hash() const
00377 {
00378     // delayed loading of hash from archive
00379     if ( d->hash == 0 )
00380     {
00381         d->hash = d->archive->hash(d->guid);
00382     }
00383 
00384     return d->hash;
00385 }
00386 
00387 bool Article::keep() const
00388 {
00389     return ( statusBits() & Private::Keep) != 0;
00390 }
00391 
00392 RSS::Enclosure Article::enclosure() const
00393 {
00394     bool hasEnc;
00395     TQString url, type;
00396     int length;
00397     d->archive->enclosure(d->guid, hasEnc, url, type, length);
00398     return hasEnc ? RSS::Enclosure(url, length, type) : RSS::Enclosure();
00399 
00400     
00401 }
00402 
00403 
00404 void Article::setKeep(bool keep)
00405 {
00406     d->status = keep ? ( statusBits() | Private::Keep) : ( statusBits() & ~Private::Keep);
00407     d->archive->setStatus(d->guid, d->status);
00408     if (d->feed)
00409         d->feed->setArticleChanged(*this);
00410 }
00411 
00412 void Article::addTag(const TQString& tag)
00413 {
00414     d->archive->addTag(d->guid, tag);
00415     if (d->feed)
00416         d->feed->setArticleChanged(*this);
00417 }
00418 
00419 void Article::removeTag(const TQString& tag)
00420 {
00421     d->archive->removeTag(d->guid, tag);
00422     if (d->feed)
00423         d->feed->setArticleChanged(*this);
00424 }
00425 
00426 bool Article::hasTag(const TQString& tag) const
00427 {
00428     return d->archive->tags(d->guid).contains(tag);
00429 }
00430 
00431 TQStringList Article::tags() const
00432 {
00433     return d->archive->tags(d->guid);
00434 }
00435             
00436 Feed* Article::feed() const
00437 { return d->feed; }
00438 
00439 const TQDateTime& Article::pubDate() const
00440 {
00441     // delayed loading of publication date information from archive
00442     if ( d->pubDate.isNull() )
00443     {
00444         d->pubDate.setTime_t(d->archive->pubDate(d->guid));
00445     }
00446 
00447     return d->pubDate;
00448 }
00449 
00450 TQString Article::buildTitle(const TQString& description)
00451 {
00452     TQString s = description;
00453     if (description.stripWhiteSpace().isEmpty())
00454         return "";
00455         
00456     int i = s.find('>',500); /*avoid processing too much */
00457     if (i != -1)
00458         s = s.left(i+1);
00459     TQRegExp rx("(<([^\\s>]*)(?:[^>]*)>)[^<]*", false);
00460     TQString tagName, toReplace, replaceWith;
00461     while (rx.search(s) != -1 )
00462     {
00463         tagName=rx.cap(2);
00464         if (tagName=="SCRIPT"||tagName=="script")
00465             toReplace=rx.cap(0); // strip tag AND tag contents
00466         else if (tagName.startsWith("br") || tagName.startsWith("BR"))
00467         {
00468             toReplace=rx.cap(1);
00469             replaceWith=" ";
00470         }
00471         else
00472             toReplace=rx.cap(1);  // strip just tag
00473         s=s.replace(s.find(toReplace),toReplace.length(),replaceWith); // do the deed
00474     }
00475     if (s.length()> 90)
00476         s=s.left(90)+"...";
00477     return s.simplifyWhiteSpace();
00478 }
00479 } // namespace Akregator