article.cpp
00001 /* 00002 This file is part of Akregator. 00003 00004 Copyright (C) 2004 Stanislav Karchebny <Stanislav.Karchebny@kdemail.net> 00005 2005 Frank Osterfeld <frank.osterfeld at kdemail.net> 00006 This program is free software; you can redistribute it and/or modify 00007 it under the terms of the GNU General Public License as published by 00008 the Free Software Foundation; either version 2 of the License, or 00009 (at your option) any later version. 00010 00011 This program is distributed in the hope that it will be useful, 00012 but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 GNU General Public License for more details. 00015 00016 You should have received a copy of the GNU General Public License 00017 along with this program; if not, write to the Free Software 00018 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 00019 00020 As a special exception, permission is given to link this program 00021 with any edition of TQt, and distribute the resulting executable, 00022 without including the source code for TQt in the source distribution. 00023 */ 00024 00025 #include "article.h" 00026 #include "feed.h" 00027 #include "feedstorage.h" 00028 #include "storage.h" 00029 #include "librss/librss.h" 00030 #include "shared.h" 00031 #include "utils.h" 00032 00033 #include <tqdatetime.h> 00034 #include <tqdom.h> 00035 #include <tqregexp.h> 00036 #include <tqstringlist.h> 00037 #include <tqvaluelist.h> 00038 00039 #include <krfcdate.h> 00040 #include <kdebug.h> 00041 #include <kurl.h> 00042 00043 00044 namespace Akregator { 00045 00046 struct Article::Private : public Shared 00047 { 00058 enum Status {Deleted=0x01, Trash=0x02, New=0x04, Read=0x08, Keep=0x10}; 00059 00060 TQString guid; 00061 Backend::FeedStorage* archive; 00062 Feed* feed; 00063 00064 // the variables below are initialized to null values in the Article constructor 00065 // and then loaded on demand instead. 00066 // 00067 // to read their values, you should therefore use the accessor methods of the Article 00068 // hash(), pubDate(), statusBits() rather than accessing them directly. 00069 uint hash; 00070 TQDateTime pubDate; 00071 int status; 00072 }; 00073 00074 Article::Article() : d(new Private) 00075 { 00076 d->hash = 0; 00077 d->status = 0; 00078 d->feed = 0; 00079 d->archive = 0; 00080 } 00081 00082 Article::Article(const TQString& guid, Feed* feed) : d(new Private) 00083 { 00084 // this constructor should be as cheap as possible, so avoid calls to 00085 // read information from the archive in here if possible 00086 // 00087 // d->hash, d->pubDate and d->status are loaded on-demand by 00088 // the hash(), pubDate() and statusBits() methods respectively 00089 00090 d->feed = feed; 00091 d->guid = guid; 00092 d->archive = Backend::Storage::getInstance()->archiveFor(feed->xmlUrl()); 00093 d->status = 0; 00094 d->hash = 0; 00095 } 00096 00097 void Article::initialize(RSS::Article article, Backend::FeedStorage* archive) 00098 { 00099 d->archive = archive; 00100 d->status = Private::New; 00101 d->hash = Utils::calcHash(article.title() + article.description() + article.author() + article.link().url() 00102 + article.commentsLink().url() ); 00103 00104 d->guid = article.guid(); 00105 00106 if (!d->archive->contains(d->guid)) 00107 { 00108 d->archive->addEntry(d->guid); 00109 00110 if (article.meta("deleted") == "true") 00111 { // if article is in deleted state, we just add the status and omit the rest 00112 d->status = Private::Read | Private::Deleted; 00113 d->archive->setStatus(d->guid, d->status); 00114 } 00115 else 00116 { // article is not deleted, let's add it to the archive 00117 00118 d->archive->setHash(d->guid, hash() ); 00119 TQString title = article.title().isEmpty() ? buildTitle(article.description()) : article.title(); 00120 d->archive->setTitle(d->guid, title); 00121 d->archive->setDescription(d->guid, article.description()); 00122 d->archive->setLink(d->guid, article.link().url()); 00123 d->archive->setComments(d->guid, article.comments()); 00124 d->archive->setCommentsLink(d->guid, article.commentsLink().url()); 00125 d->archive->setGuidIsPermaLink(d->guid, article.guidIsPermaLink()); 00126 d->archive->setGuidIsHash(d->guid, article.meta("guidIsHash") == "true"); 00127 d->pubDate = article.pubDate().isValid() ? article.pubDate() : TQDateTime::currentDateTime(); 00128 d->archive->setPubDate(d->guid, d->pubDate.toTime_t()); 00129 d->archive->setAuthor(d->guid, article.author()); 00130 00131 TQValueList<RSS::Category> cats = article.categories(); 00132 TQValueList<RSS::Category>::ConstIterator end = cats.end(); 00133 00134 for (TQValueList<RSS::Category>::ConstIterator it = cats.begin(); it != end; ++it) 00135 { 00136 Backend::Category cat; 00137 00138 cat.term = (*it).category(); 00139 cat.scheme = (*it).domain(); 00140 cat.name = (*it).category(); 00141 00142 d->archive->addCategory(d->guid, cat); 00143 } 00144 00145 if (!article.enclosure().isNull()) 00146 { 00147 d->archive->setEnclosure(d->guid, article.enclosure().url(), article.enclosure().type(), article.enclosure().length()); 00148 } 00149 else 00150 { 00151 d->archive->removeEnclosure(d->guid); 00152 } 00153 00154 TQString status = article.meta("status"); 00155 00156 if (!status.isEmpty()) 00157 { 00158 int statusInt = status.toInt(); 00159 if (statusInt == New) 00160 statusInt = Unread; 00161 setStatus(statusInt); 00162 } 00163 setKeep(article.meta("keep") == "true"); 00164 } 00165 } 00166 else 00167 { 00168 // always update comments count, as it's not used for hash calculation 00169 d->archive->setComments(d->guid, article.comments()); 00170 if ( hash() != d->archive->hash(d->guid)) //article is in archive, was it modified? 00171 { // if yes, update 00172 d->pubDate.setTime_t(d->archive->pubDate(d->guid)); 00173 d->archive->setHash(d->guid, hash() ); 00174 TQString title = article.title().isEmpty() ? buildTitle(article.description()) : article.title(); 00175 d->archive->setTitle(d->guid, title); 00176 d->archive->setDescription(d->guid, article.description()); 00177 d->archive->setLink(d->guid, article.link().url()); 00178 d->archive->setCommentsLink(d->guid, article.commentsLink().url()); 00179 d->archive->setAuthor(d->guid, article.author()); 00180 } 00181 } 00182 } 00183 00184 Article::Article(RSS::Article article, Feed* feed) : d(new Private) 00185 { 00186 //assert(feed) 00187 d->feed = feed; 00188 initialize(article, Backend::Storage::getInstance()->archiveFor(feed->xmlUrl())); 00189 } 00190 00191 Article::Article(RSS::Article article, Backend::FeedStorage* archive) : d(new Private) 00192 { 00193 d->feed = 0; 00194 initialize(article, archive); 00195 } 00196 00197 bool Article::isNull() const 00198 { 00199 return d->archive == 0; // TODO: use proper null state 00200 } 00201 00202 void Article::offsetPubDate(int secs) 00203 { 00204 d->pubDate = pubDate().addSecs(secs); 00205 d->archive->setPubDate(d->guid, d->pubDate.toTime_t()); 00206 00207 } 00208 00209 void Article::setDeleted() 00210 { 00211 if (isDeleted()) 00212 return; 00213 00214 setStatus(Read); 00215 d->status = Private::Deleted | Private::Read; 00216 d->archive->setStatus(d->guid, d->status); 00217 d->archive->setDeleted(d->guid); 00218 00219 if (d->feed) 00220 d->feed->setArticleDeleted(*this); 00221 } 00222 00223 bool Article::isDeleted() const 00224 { 00225 return (statusBits() & Private::Deleted) != 0; 00226 } 00227 00228 Article::Article(const Article &other) : d(new Private) 00229 { 00230 *this = other; 00231 } 00232 00233 Article::~Article() 00234 { 00235 if (d->deref()) 00236 { 00237 delete d; 00238 d = 0; 00239 } 00240 } 00241 00242 Article &Article::operator=(const Article &other) 00243 { 00244 if (this != &other) { 00245 other.d->ref(); 00246 if (d && d->deref()) 00247 delete d; 00248 d = other.d; 00249 } 00250 return *this; 00251 } 00252 00253 00254 bool Article::operator<(const Article &other) const 00255 { 00256 return pubDate() > other.pubDate() || 00257 (pubDate() == other.pubDate() && guid() < other.guid() ); 00258 } 00259 00260 bool Article::operator<=(const Article &other) const 00261 { 00262 return (pubDate() > other.pubDate() || *this == other); 00263 } 00264 00265 bool Article::operator>(const Article &other) const 00266 { 00267 return pubDate() < other.pubDate() || 00268 (pubDate() == other.pubDate() && guid() > other.guid() ); 00269 } 00270 00271 bool Article::operator>=(const Article &other) const 00272 { 00273 return (pubDate() > other.pubDate() || *this == other); 00274 } 00275 00276 bool Article::operator==(const Article &other) const 00277 { 00278 return d->guid == other.guid(); 00279 } 00280 00281 int Article::statusBits() const 00282 { 00283 // delayed loading of status information from archive 00284 if ( d->status == 0 ) 00285 { 00286 d->status = d->archive->status(d->guid); 00287 } 00288 00289 return d->status; 00290 } 00291 00292 int Article::status() const 00293 { 00294 if ((statusBits() & Private::Read) != 0) 00295 return Read; 00296 00297 if ((statusBits() & Private::New) != 0) 00298 return New; 00299 else 00300 return Unread; 00301 } 00302 00303 void Article::setStatus(int stat) 00304 { 00305 // use status() rather than statusBits() here to filter out status flags that we are not 00306 // interested in 00307 int oldStatus = status(); 00308 00309 if (oldStatus != stat) 00310 { 00311 switch (stat) 00312 { 00313 case Read: 00314 d->status = ( d->status | Private::Read) & ~Private::New; 00315 break; 00316 case Unread: 00317 d->status = ( d->status & ~Private::Read) & ~Private::New; 00318 break; 00319 case New: 00320 d->status = ( d->status | Private::New) & ~Private::Read; 00321 break; 00322 } 00323 d->archive->setStatus(d->guid, d->status); 00324 if (d->feed) 00325 d->feed->setArticleChanged(*this, oldStatus); 00326 } 00327 } 00328 00329 TQString Article::title() const 00330 { 00331 return d->archive->title(d->guid); 00332 } 00333 00334 TQString Article::author() const 00335 { 00336 return d->archive->author(d->guid); 00337 } 00338 00339 KURL Article::link() const 00340 { 00341 return d->archive->link(d->guid); 00342 } 00343 00344 TQString Article::description() const 00345 { 00346 return d->archive->description(d->guid); 00347 } 00348 00349 TQString Article::guid() const 00350 { 00351 return d->guid; 00352 } 00353 00354 KURL Article::commentsLink() const 00355 { 00356 return d->archive->commentsLink(d->guid); 00357 } 00358 00359 00360 int Article::comments() const 00361 { 00362 return d->archive->comments(d->guid); 00363 } 00364 00365 00366 bool Article::guidIsPermaLink() const 00367 { 00368 return d->archive->guidIsPermaLink(d->guid); 00369 } 00370 00371 bool Article::guidIsHash() const 00372 { 00373 return d->archive->guidIsHash(d->guid); 00374 } 00375 00376 uint Article::hash() const 00377 { 00378 // delayed loading of hash from archive 00379 if ( d->hash == 0 ) 00380 { 00381 d->hash = d->archive->hash(d->guid); 00382 } 00383 00384 return d->hash; 00385 } 00386 00387 bool Article::keep() const 00388 { 00389 return ( statusBits() & Private::Keep) != 0; 00390 } 00391 00392 RSS::Enclosure Article::enclosure() const 00393 { 00394 bool hasEnc; 00395 TQString url, type; 00396 int length; 00397 d->archive->enclosure(d->guid, hasEnc, url, type, length); 00398 return hasEnc ? RSS::Enclosure(url, length, type) : RSS::Enclosure(); 00399 00400 00401 } 00402 00403 00404 void Article::setKeep(bool keep) 00405 { 00406 d->status = keep ? ( statusBits() | Private::Keep) : ( statusBits() & ~Private::Keep); 00407 d->archive->setStatus(d->guid, d->status); 00408 if (d->feed) 00409 d->feed->setArticleChanged(*this); 00410 } 00411 00412 void Article::addTag(const TQString& tag) 00413 { 00414 d->archive->addTag(d->guid, tag); 00415 if (d->feed) 00416 d->feed->setArticleChanged(*this); 00417 } 00418 00419 void Article::removeTag(const TQString& tag) 00420 { 00421 d->archive->removeTag(d->guid, tag); 00422 if (d->feed) 00423 d->feed->setArticleChanged(*this); 00424 } 00425 00426 bool Article::hasTag(const TQString& tag) const 00427 { 00428 return d->archive->tags(d->guid).contains(tag); 00429 } 00430 00431 TQStringList Article::tags() const 00432 { 00433 return d->archive->tags(d->guid); 00434 } 00435 00436 Feed* Article::feed() const 00437 { return d->feed; } 00438 00439 const TQDateTime& Article::pubDate() const 00440 { 00441 // delayed loading of publication date information from archive 00442 if ( d->pubDate.isNull() ) 00443 { 00444 d->pubDate.setTime_t(d->archive->pubDate(d->guid)); 00445 } 00446 00447 return d->pubDate; 00448 } 00449 00450 TQString Article::buildTitle(const TQString& description) 00451 { 00452 TQString s = description; 00453 if (description.stripWhiteSpace().isEmpty()) 00454 return ""; 00455 00456 int i = s.find('>',500); /*avoid processing too much */ 00457 if (i != -1) 00458 s = s.left(i+1); 00459 TQRegExp rx("(<([^\\s>]*)(?:[^>]*)>)[^<]*", false); 00460 TQString tagName, toReplace, replaceWith; 00461 while (rx.search(s) != -1 ) 00462 { 00463 tagName=rx.cap(2); 00464 if (tagName=="SCRIPT"||tagName=="script") 00465 toReplace=rx.cap(0); // strip tag AND tag contents 00466 else if (tagName.startsWith("br") || tagName.startsWith("BR")) 00467 { 00468 toReplace=rx.cap(1); 00469 replaceWith=" "; 00470 } 00471 else 00472 toReplace=rx.cap(1); // strip just tag 00473 s=s.replace(s.find(toReplace),toReplace.length(),replaceWith); // do the deed 00474 } 00475 if (s.length()> 90) 00476 s=s.left(90)+"..."; 00477 return s.simplifyWhiteSpace(); 00478 } 00479 } // namespace Akregator