loader.cpp
00001 /* 00002 * loader.cpp 00003 * 00004 * Copyright (c) 2001, 2002, 2003 Frerich Raabe <raabe@kde.org> 00005 * 00006 * This program is distributed in the hope that it will be useful, but WITHOUT 00007 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 00008 * FOR A PARTICULAR PURPOSE. For licensing and distribution details, check the 00009 * accompanying file 'COPYING'. 00010 */ 00011 #include "loader.h" 00012 #include "document.h" 00013 #include "feeddetector.h" 00014 00015 #include <tdeio/job.h> 00016 #include <kprocess.h> 00017 #include <kstaticdeleter.h> 00018 #include <kurl.h> 00019 #include <kdebug.h> 00020 00021 #include <tqdom.h> 00022 #include <tqbuffer.h> 00023 #include <tqregexp.h> 00024 #include <tqstring.h> 00025 #include <tqstringlist.h> 00026 #include <tqtimer.h> 00027 00028 using namespace RSS; 00029 00030 DataRetriever::DataRetriever() 00031 { 00032 } 00033 00034 DataRetriever::~DataRetriever() 00035 { 00036 } 00037 00038 class FileRetriever::Private 00039 { 00040 public: 00041 00042 Private() 00043 : buffer(NULL), 00044 lastError(0), job(NULL) 00045 { 00046 } 00047 00048 ~Private() 00049 { 00050 delete buffer; 00051 } 00052 00053 TQBuffer *buffer; 00054 int lastError; 00055 TDEIO::Job *job; 00056 static KStaticDeleter<TQString> userAgentsd; 00057 static TQString* userAgent; 00058 }; 00059 00060 KStaticDeleter<TQString> FileRetriever::Private::userAgentsd; 00061 TQString* FileRetriever::Private::userAgent = 0L; 00062 FileRetriever::FileRetriever() 00063 : d(new Private) 00064 { 00065 } 00066 00067 FileRetriever::~FileRetriever() 00068 { 00069 delete d; 00070 } 00071 00072 bool FileRetriever::m_useCache = true; 00073 00074 TQString FileRetriever::userAgent() 00075 { 00076 if (Private::userAgent == 0L) 00077 FileRetriever::Private::userAgentsd.setObject(Private::userAgent, new TQString); 00078 return *Private::userAgent; 00079 } 00080 00081 void FileRetriever::setUserAgent(const TQString &ua) 00082 { 00083 if (Private::userAgent == 0L) 00084 FileRetriever::Private::userAgentsd.setObject(Private::userAgent, new TQString); 00085 (*Private::userAgent) = ua; 00086 } 00087 00088 void FileRetriever::setUseCache(bool enabled) 00089 { 00090 m_useCache = enabled; 00091 } 00092 00093 void FileRetriever::retrieveData(const KURL &url) 00094 { 00095 if (d->buffer) 00096 return; 00097 00098 d->buffer = new TQBuffer; 00099 d->buffer->open(IO_WriteOnly); 00100 00101 KURL u=url; 00102 00103 if (u.protocol()=="feed") 00104 u.setProtocol("http"); 00105 00106 d->job = TDEIO::get(u, false, false); 00107 d->job->addMetaData("cache", m_useCache ? "refresh" : "reload"); 00108 00109 TQString ua = userAgent(); 00110 if (!ua.isEmpty()) 00111 d->job->addMetaData("UserAgent", ua); 00112 00113 00114 TQTimer::singleShot(1000*90, this, TQT_SLOT(slotTimeout())); 00115 00116 connect(d->job, TQT_SIGNAL(data(TDEIO::Job *, const TQByteArray &)), 00117 TQT_SLOT(slotData(TDEIO::Job *, const TQByteArray &))); 00118 connect(d->job, TQT_SIGNAL(result(TDEIO::Job *)), TQT_SLOT(slotResult(TDEIO::Job *))); 00119 connect(d->job, TQT_SIGNAL(permanentRedirection(TDEIO::Job *, const KURL &, const KURL &)), 00120 TQT_SLOT(slotPermanentRedirection(TDEIO::Job *, const KURL &, const KURL &))); 00121 } 00122 00123 void FileRetriever::slotTimeout() 00124 { 00125 abort(); 00126 00127 delete d->buffer; 00128 d->buffer = NULL; 00129 00130 d->lastError = TDEIO::ERR_SERVER_TIMEOUT; 00131 00132 emit dataRetrieved(TQByteArray(), false); 00133 } 00134 00135 int FileRetriever::errorCode() const 00136 { 00137 return d->lastError; 00138 } 00139 00140 void FileRetriever::slotData(TDEIO::Job *, const TQByteArray &data) 00141 { 00142 d->buffer->writeBlock(data.data(), data.size()); 00143 } 00144 00145 void FileRetriever::slotResult(TDEIO::Job *job) 00146 { 00147 TQByteArray data = d->buffer->buffer(); 00148 data.detach(); 00149 00150 delete d->buffer; 00151 d->buffer = NULL; 00152 00153 d->lastError = job->error(); 00154 emit dataRetrieved(data, d->lastError == 0); 00155 } 00156 00157 void FileRetriever::slotPermanentRedirection(TDEIO::Job *, const KURL &, const KURL &newUrl) 00158 { 00159 emit permanentRedirection(newUrl); 00160 } 00161 00162 void FileRetriever::abort() 00163 { 00164 if (d->job) 00165 { 00166 d->job->kill(true); 00167 d->job = NULL; 00168 } 00169 } 00170 00171 struct OutputRetriever::Private 00172 { 00173 Private() : process(NULL), 00174 buffer(NULL), 00175 lastError(0) 00176 { 00177 } 00178 00179 ~Private() 00180 { 00181 delete process; 00182 delete buffer; 00183 } 00184 00185 KShellProcess *process; 00186 TQBuffer *buffer; 00187 int lastError; 00188 }; 00189 00190 OutputRetriever::OutputRetriever() : 00191 d(new Private) 00192 { 00193 } 00194 00195 OutputRetriever::~OutputRetriever() 00196 { 00197 delete d; 00198 } 00199 00200 void OutputRetriever::retrieveData(const KURL &url) 00201 { 00202 // Ignore subsequent calls if we didn't finish the previous job yet. 00203 if (d->buffer || d->process) 00204 return; 00205 00206 d->buffer = new TQBuffer; 00207 d->buffer->open(IO_WriteOnly); 00208 00209 d->process = new KShellProcess(); 00210 connect(d->process, TQT_SIGNAL(processExited(TDEProcess *)), 00211 TQT_SLOT(slotExited(TDEProcess *))); 00212 connect(d->process, TQT_SIGNAL(receivedStdout(TDEProcess *, char *, int)), 00213 TQT_SLOT(slotOutput(TDEProcess *, char *, int))); 00214 *d->process << url.path(); 00215 d->process->start(TDEProcess::NotifyOnExit, TDEProcess::Stdout); 00216 } 00217 00218 int OutputRetriever::errorCode() const 00219 { 00220 return d->lastError; 00221 } 00222 00223 void OutputRetriever::slotOutput(TDEProcess *, char *data, int length) 00224 { 00225 d->buffer->writeBlock(data, length); 00226 } 00227 00228 void OutputRetriever::slotExited(TDEProcess *p) 00229 { 00230 if (!p->normalExit()) 00231 d->lastError = p->exitStatus(); 00232 00233 TQByteArray data = d->buffer->buffer(); 00234 data.detach(); 00235 00236 delete d->buffer; 00237 d->buffer = NULL; 00238 00239 delete d->process; 00240 d->process = NULL; 00241 00242 emit dataRetrieved(data, p->normalExit() && p->exitStatus() == 0); 00243 } 00244 00245 struct Loader::Private 00246 { 00247 Private() : retriever(NULL), 00248 lastError(0) 00249 { 00250 } 00251 00252 ~Private() 00253 { 00254 delete retriever; 00255 } 00256 00257 DataRetriever *retriever; 00258 int lastError; 00259 KURL discoveredFeedURL; 00260 KURL url; 00261 }; 00262 00263 Loader *Loader::create() 00264 { 00265 return new Loader; 00266 } 00267 00268 Loader *Loader::create(TQObject *object, const char *slot) 00269 { 00270 Loader *loader = create(); 00271 connect(loader, TQT_SIGNAL(loadingComplete(Loader *, Document, Status)), 00272 object, slot); 00273 return loader; 00274 } 00275 00276 Loader::Loader() : d(new Private) 00277 { 00278 } 00279 00280 Loader::~Loader() 00281 { 00282 delete d; 00283 } 00284 00285 void Loader::loadFrom(const KURL &url, DataRetriever *retriever) 00286 { 00287 if (d->retriever != NULL) 00288 return; 00289 00290 d->url=url; 00291 d->retriever = retriever; 00292 00293 connect(d->retriever, TQT_SIGNAL(dataRetrieved(const TQByteArray &, bool)), 00294 this, TQT_SLOT(slotRetrieverDone(const TQByteArray &, bool))); 00295 00296 d->retriever->retrieveData(url); 00297 } 00298 00299 int Loader::errorCode() const 00300 { 00301 return d->lastError; 00302 } 00303 00304 void Loader::abort() 00305 { 00306 if (d && d->retriever) 00307 { 00308 d->retriever->abort(); 00309 delete d->retriever; 00310 d->retriever=NULL; 00311 } 00312 emit loadingComplete(this, TQDomDocument(), Aborted); 00313 delete this; 00314 } 00315 00316 const KURL &Loader::discoveredFeedURL() const 00317 { 00318 return d->discoveredFeedURL; 00319 } 00320 00321 void Loader::slotRetrieverDone(const TQByteArray &data, bool success) 00322 { 00323 d->lastError = d->retriever->errorCode(); 00324 00325 delete d->retriever; 00326 d->retriever = NULL; 00327 00328 Document rssDoc; 00329 Status status = Success; 00330 00331 if (success) { 00332 TQDomDocument doc; 00333 00334 /* Some servers insert whitespace before the <?xml...?> declaration. 00335 * TQDom doesn't tolerate that (and it's right, that's invalid XML), 00336 * so we strip that. 00337 */ 00338 00339 const char *charData = data.data(); 00340 int len = data.count(); 00341 00342 while (len && TQChar(*charData).isSpace()) { 00343 --len; 00344 ++charData; 00345 } 00346 00347 if ( len > 3 && TQChar(*charData) == TQChar(0357) ) { // 0357 0273 0277 00348 len -= 3; 00349 charData += 3; 00350 } 00351 TQByteArray tmpData; 00352 tmpData.setRawData(charData, len); 00353 00354 if (doc.setContent(tmpData)) 00355 { 00356 rssDoc = Document(doc); 00357 if (!rssDoc.isValid()) 00358 { 00359 discoverFeeds(tmpData); 00360 status = ParseError; 00361 } 00362 } 00363 else 00364 { 00365 discoverFeeds(tmpData); 00366 status = ParseError; 00367 } 00368 00369 tmpData.resetRawData(charData, len); 00370 } else 00371 status = RetrieveError; 00372 00373 emit loadingComplete(this, rssDoc, status); 00374 00375 delete this; 00376 } 00377 00378 void Loader::discoverFeeds(const TQByteArray &data) 00379 { 00380 TQString str = TQString(data).simplifyWhiteSpace(); 00381 00382 TQStringList feeds; 00383 00384 FeedDetectorEntryList list = FeedDetector::extractFromLinkTags(str); 00385 00386 for (FeedDetectorEntryList::ConstIterator it = list.begin(); it != list.end(); ++it) 00387 { 00388 feeds += (*it).url(); 00389 } 00390 00391 if (list.isEmpty()) 00392 feeds = FeedDetector::extractBruteForce(str); 00393 00394 TQString feed = feeds.first(); 00395 TQString host = d->url.host(); 00396 KURL testURL; 00397 // loop through, prefer feeds on same host 00398 TQStringList::Iterator end( feeds.end() ); 00399 for ( TQStringList::Iterator it = feeds.begin(); it != end; ++it) 00400 { 00401 testURL=*it; 00402 if (testURL.host() == host) 00403 { 00404 feed = *it; 00405 break; 00406 } 00407 } 00408 00409 d->discoveredFeedURL = feed.isNull() ? TQString() : FeedDetector::fixRelativeURL(feed, d->url); 00410 } 00411 00412 #include "loader.moc" 00413 // vim:noet:ts=4