document.cpp
00001 /* 00002 * document.cpp 00003 * 00004 * Copyright (c) 2001, 2002, 2003 Frerich Raabe <raabe@kde.org> 00005 * 00006 * This program is distributed in the hope that it will be useful, but WITHOUT 00007 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 00008 * FOR A PARTICULAR PURPOSE. For licensing and distribution details, check the 00009 * accompanying file 'COPYING'. 00010 * 00011 */ 00012 #include "document.h" 00013 #include "article.h" 00014 #include "image.h" 00015 #include "textinput.h" 00016 #include "tools_p.h" 00017 00018 #include <krfcdate.h> 00019 #include <kurl.h> 00020 00021 #include <tqdatetime.h> 00022 #include <tqdom.h> 00023 #include <tqptrlist.h> 00024 00025 #include <kdebug.h> 00026 00027 using namespace RSS; 00028 00029 struct Document::Private : public Shared 00030 { 00031 Private() : version(v0_90), image(NULL), textInput(NULL), language(en) 00032 { 00033 format=UnknownFormat; 00034 valid=false; 00035 ttl=-1; 00036 } 00037 00038 ~Private() 00039 { 00040 delete textInput; 00041 delete image; 00042 } 00043 00044 Version version; 00045 TQString title; 00046 TQString description; 00047 KURL link; 00048 Image *image; 00049 TextInput *textInput; 00050 Article::List articles; 00051 Language language; 00052 Format format; 00053 TQString copyright; 00054 TQDateTime pubDate; 00055 TQDateTime lastBuildDate; 00056 TQString rating; 00057 KURL docs; 00058 int ttl; 00059 TQString managingEditor; 00060 TQString webMaster; 00061 HourList skipHours; 00062 DayList skipDays; 00063 bool valid; 00064 }; 00065 00066 Document::Document() : d(new Private) 00067 { 00068 } 00069 00070 Document::Document(const Document &other) : d(0) 00071 { 00072 *this = other; 00073 } 00074 00075 static TQString extractLink(const TQDomNode& node, Format format) 00076 { 00077 if (format == AtomFeed) 00078 { 00079 TQDomNode n; 00080 for (n = node.firstChild(); !n.isNull(); n = n.nextSibling()) { 00081 const TQDomElement e = n.toElement(); 00082 if ( (e.tagName() == TQString::fromLatin1("link")) 00083 && (e.attribute(TQString::fromLatin1("rel"), TQString::fromLatin1("alternate")) == TQString::fromLatin1("alternate"))) 00084 { 00085 return n.toElement().attribute(TQString::fromLatin1("href")); 00086 } 00087 } 00088 } 00089 00090 return extractNode(node, TQString::fromLatin1("link")); 00091 00092 } 00093 00094 Document::Document(const TQDomDocument &doc) : d(new Private) 00095 { 00096 TQString elemText; 00097 TQDomNode rootNode = doc.documentElement(); 00098 00099 // Determine the version of the present RSS markup. 00100 TQString attr; 00101 00102 // we should probably check that it ISN'T feed or rss, rather than check if it is xhtml 00103 if (rootNode.toElement().tagName()==TQString::fromLatin1("html")) 00104 d->valid=false; 00105 else 00106 d->valid=true; 00107 00108 attr = rootNode.toElement().attribute(TQString::fromLatin1("version"), TQString()); 00109 if (rootNode.toElement().tagName()==TQString::fromLatin1("feed")) 00110 { 00111 d->format=AtomFeed; 00112 if (attr == TQString::fromLatin1("0.3")) 00113 d->version = vAtom_0_3; 00114 else if (attr == TQString::fromLatin1("0.2")) /* smt -> review */ 00115 d->version = vAtom_0_2; 00116 else if (attr == TQString::fromLatin1("0.1")) /* smt -> review */ 00117 d->version = vAtom_0_1; 00118 else 00119 d->version = vAtom_1_0; 00120 } 00121 else 00122 { 00123 d->format=RSSFeed; 00124 if (attr == TQString::fromLatin1("0.91")) 00125 d->version = v0_91; 00126 else if (attr == TQString::fromLatin1("0.92")) 00127 d->version = v0_92; 00128 else if (attr == TQString::fromLatin1("0.93")) 00129 d->version = v0_93; 00130 else if (attr == TQString::fromLatin1("0.94")) 00131 d->version = v0_94; 00132 else // otherwise, we just assume a RSS2 compatible feed. As rss2 is generally 00133 // backward-compatible, this should work 00134 d->version = v2_0; 00135 } 00136 00137 00138 if (d->format==UnknownFormat) 00139 { 00140 attr = rootNode.toElement().attribute(TQString::fromLatin1("xmlns"), TQString()); 00141 if (!attr.isNull()) { 00142 /* 00143 * Hardcoding these URLs is actually a bad idea, since the DTD doesn't 00144 * dictate a specific namespace. Still, most RSS files seem to use 00145 * these two, so I'll go for them now. If it turns out that many 00146 * mirrors of this RSS namespace are in use, I'll probably have to 00147 * distinguish the RSS versions by analyzing the relationship between 00148 * the nodes. 00149 */ 00150 if (attr == TQString::fromLatin1("http://my.netscape.com/rdf/simple/0.9/")) { 00151 d->format=RSSFeed; 00152 d->version = v0_90; 00153 } 00154 else if (attr == TQString::fromLatin1("http://purl.org/rss/1.0/")) { 00155 d->format=RSSFeed; 00156 d->version = v1_0; 00157 } 00158 } 00159 } 00160 00161 TQDomNode channelNode; 00162 00163 if (d->format == AtomFeed) 00164 channelNode=rootNode; 00165 else 00166 channelNode=rootNode.namedItem(TQString::fromLatin1("channel")); 00167 00168 if (!(elemText = extractTitle(channelNode)).isNull()) 00169 d->title = elemText; 00170 TQString descriptionTagName = "description"; 00171 00172 if (d->format == AtomFeed) 00173 { 00174 if (d->version == vAtom_1_0) 00175 descriptionTagName = "subtitle"; 00176 else 00177 descriptionTagName = "tagline"; 00178 } 00179 00180 if (!(elemText = extractNode(channelNode, descriptionTagName)).isNull()) 00181 d->description = elemText; 00182 00183 d->link = extractLink(channelNode, d->format); 00184 00185 00186 /* This is ugly but necessary since RSS 0.90 and 1.0 have a different parent 00187 * node for <image>, <textinput> and <item> than RSS 0.91-0.94 and RSS 2.0. 00188 */ 00189 TQDomNode parentNode; 00190 if (d->version == v0_90 || d->version == v1_0 || d->format == AtomFeed) 00191 parentNode = rootNode; 00192 else 00193 { 00194 // following is a HACK for broken 0.91 feeds like xanga.com's 00195 if (!rootNode.namedItem(TQString::fromLatin1("item")).isNull()) 00196 parentNode = rootNode; 00197 else 00198 parentNode = channelNode; 00199 } 00200 00201 // image and textinput aren't supported by Atom.. handle in case feed provides 00202 TQDomNode n = parentNode.namedItem(TQString::fromLatin1("image")); 00203 if (!n.isNull()) 00204 d->image = new Image(n); 00205 00206 n = parentNode.namedItem(TQString::fromLatin1("textinput")); 00207 if (!n.isNull()) 00208 d->textInput = new TextInput(n); 00209 00210 // Our (hopefully faster) version of elementsByTagName() 00211 TQString tagName; 00212 if (d->format == AtomFeed) 00213 tagName=TQString::fromLatin1("entry"); 00214 else 00215 tagName=TQString::fromLatin1("item"); 00216 00217 for (n = parentNode.firstChild(); !n.isNull(); n = n.nextSibling()) { 00218 const TQDomElement e = n.toElement(); 00219 if (e.tagName() == tagName) 00220 d->articles.append(Article(e, d->format, d->version)); 00221 } 00222 00223 if (!(elemText = extractNode(channelNode, TQString::fromLatin1("copyright"))).isNull()) 00224 d->copyright = elemText; 00225 00226 if (d->format == AtomFeed) 00227 elemText = rootNode.toElement().attribute(TQString::fromLatin1("xml:lang"), TQString()); 00228 else 00229 elemText = extractNode(channelNode, TQString::fromLatin1("language")); 00230 00231 if (!elemText.isNull()){ 00232 if (elemText == TQString::fromLatin1("af")) 00233 d->language = af; 00234 else if (elemText == TQString::fromLatin1("sq")) 00235 d->language = sq; 00236 else if (elemText == TQString::fromLatin1("eu")) 00237 d->language = eu; 00238 else if (elemText == TQString::fromLatin1("be")) 00239 d->language = be; 00240 else if (elemText == TQString::fromLatin1("bg")) 00241 d->language = bg; 00242 else if (elemText == TQString::fromLatin1("ca")) 00243 d->language = ca; 00244 else if (elemText == TQString::fromLatin1("zh-cn")) 00245 d->language = zh_cn; 00246 else if (elemText == TQString::fromLatin1("zh-tw")) 00247 d->language = zh_tw; 00248 else if (elemText == TQString::fromLatin1("hr")) 00249 d->language = hr; 00250 else if (elemText == TQString::fromLatin1("cs")) 00251 d->language = cs; 00252 else if (elemText == TQString::fromLatin1("da")) 00253 d->language = da; 00254 else if (elemText == TQString::fromLatin1("nl")) 00255 d->language = nl; 00256 else if (elemText == TQString::fromLatin1("nl-be")) 00257 d->language = nl_be; 00258 else if (elemText == TQString::fromLatin1("nl-nl")) 00259 d->language = nl_nl; 00260 else if (elemText == TQString::fromLatin1("en")) 00261 d->language = en; 00262 else if (elemText == TQString::fromLatin1("en-au")) 00263 d->language = en_au; 00264 else if (elemText == TQString::fromLatin1("en-bz")) 00265 d->language = en_bz; 00266 else if (elemText == TQString::fromLatin1("en-ca")) 00267 d->language = en_ca; 00268 else if (elemText == TQString::fromLatin1("en-ie")) 00269 d->language = en_ie; 00270 else if (elemText == TQString::fromLatin1("en-jm")) 00271 d->language = en_jm; 00272 else if (elemText == TQString::fromLatin1("en-nz")) 00273 d->language = en_nz; 00274 else if (elemText == TQString::fromLatin1("en-ph")) 00275 d->language = en_ph; 00276 else if (elemText == TQString::fromLatin1("en-za")) 00277 d->language = en_za; 00278 else if (elemText == TQString::fromLatin1("en-tt")) 00279 d->language = en_tt; 00280 else if (elemText == TQString::fromLatin1("en-gb")) 00281 d->language = en_gb; 00282 else if (elemText == TQString::fromLatin1("en-us")) 00283 d->language = en_us; 00284 else if (elemText == TQString::fromLatin1("en-zw")) 00285 d->language = en_zw; 00286 else if (elemText == TQString::fromLatin1("fo")) 00287 d->language = fo; 00288 else if (elemText == TQString::fromLatin1("fi")) 00289 d->language = fi; 00290 else if (elemText == TQString::fromLatin1("fr")) 00291 d->language = fr; 00292 else if (elemText == TQString::fromLatin1("fr-be")) 00293 d->language = fr_be; 00294 else if (elemText == TQString::fromLatin1("fr-ca")) 00295 d->language = fr_ca; 00296 else if (elemText == TQString::fromLatin1("fr-fr")) 00297 d->language = fr_fr; 00298 else if (elemText == TQString::fromLatin1("fr-lu")) 00299 d->language = fr_lu; 00300 else if (elemText == TQString::fromLatin1("fr-mc")) 00301 d->language = fr_mc; 00302 else if (elemText == TQString::fromLatin1("fr-ch")) 00303 d->language = fr_ch; 00304 else if (elemText == TQString::fromLatin1("gl")) 00305 d->language = gl; 00306 else if (elemText == TQString::fromLatin1("gd")) 00307 d->language = gd; 00308 else if (elemText == TQString::fromLatin1("de")) 00309 d->language = de; 00310 else if (elemText == TQString::fromLatin1("de-at")) 00311 d->language = de_at; 00312 else if (elemText == TQString::fromLatin1("de-de")) 00313 d->language = de_de; 00314 else if (elemText == TQString::fromLatin1("de-li")) 00315 d->language = de_li; 00316 else if (elemText == TQString::fromLatin1("de-lu")) 00317 d->language = de_lu; 00318 else if (elemText == TQString::fromLatin1("de-ch")) 00319 d->language = de_ch; 00320 else if (elemText == TQString::fromLatin1("el")) 00321 d->language = el; 00322 else if (elemText == TQString::fromLatin1("hu")) 00323 d->language = hu; 00324 else if (elemText == TQString::fromLatin1("is")) 00325 d->language = is; 00326 else if (elemText == TQString::fromLatin1("id")) 00327 d->language = id; 00328 else if (elemText == TQString::fromLatin1("ga")) 00329 d->language = ga; 00330 else if (elemText == TQString::fromLatin1("it")) 00331 d->language = it; 00332 else if (elemText == TQString::fromLatin1("it-it")) 00333 d->language = it_it; 00334 else if (elemText == TQString::fromLatin1("it-ch")) 00335 d->language = it_ch; 00336 else if (elemText == TQString::fromLatin1("ja")) 00337 d->language = ja; 00338 else if (elemText == TQString::fromLatin1("ko")) 00339 d->language = ko; 00340 else if (elemText == TQString::fromLatin1("mk")) 00341 d->language = mk; 00342 else if (elemText == TQString::fromLatin1("no")) 00343 d->language = no; 00344 else if (elemText == TQString::fromLatin1("pl")) 00345 d->language = pl; 00346 else if (elemText == TQString::fromLatin1("pt")) 00347 d->language = pt; 00348 else if (elemText == TQString::fromLatin1("pt-br")) 00349 d->language = pt_br; 00350 else if (elemText == TQString::fromLatin1("pt-pt")) 00351 d->language = pt_pt; 00352 else if (elemText == TQString::fromLatin1("ro")) 00353 d->language = ro; 00354 else if (elemText == TQString::fromLatin1("ro-mo")) 00355 d->language = ro_mo; 00356 else if (elemText == TQString::fromLatin1("ro-ro")) 00357 d->language = ro_ro; 00358 else if (elemText == TQString::fromLatin1("ru")) 00359 d->language = ru; 00360 else if (elemText == TQString::fromLatin1("ru-mo")) 00361 d->language = ru_mo; 00362 else if (elemText == TQString::fromLatin1("ru-ru")) 00363 d->language = ru_ru; 00364 else if (elemText == TQString::fromLatin1("sr")) 00365 d->language = sr; 00366 else if (elemText == TQString::fromLatin1("sk")) 00367 d->language = sk; 00368 else if (elemText == TQString::fromLatin1("sl")) 00369 d->language = sl; 00370 else if (elemText == TQString::fromLatin1("es")) 00371 d->language = es; 00372 else if (elemText == TQString::fromLatin1("es-ar")) 00373 d->language = es_ar; 00374 else if (elemText == TQString::fromLatin1("es-bo")) 00375 d->language = es_bo; 00376 else if (elemText == TQString::fromLatin1("es-cl")) 00377 d->language = es_cl; 00378 else if (elemText == TQString::fromLatin1("es-co")) 00379 d->language = es_co; 00380 else if (elemText == TQString::fromLatin1("es-cr")) 00381 d->language = es_cr; 00382 else if (elemText == TQString::fromLatin1("es-do")) 00383 d->language = es_do; 00384 else if (elemText == TQString::fromLatin1("es-ec")) 00385 d->language = es_ec; 00386 else if (elemText == TQString::fromLatin1("es-sv")) 00387 d->language = es_sv; 00388 else if (elemText == TQString::fromLatin1("es-gt")) 00389 d->language = es_gt; 00390 else if (elemText == TQString::fromLatin1("es-hn")) 00391 d->language = es_hn; 00392 else if (elemText == TQString::fromLatin1("es-mx")) 00393 d->language = es_mx; 00394 else if (elemText == TQString::fromLatin1("es-ni")) 00395 d->language = es_ni; 00396 else if (elemText == TQString::fromLatin1("es-pa")) 00397 d->language = es_pa; 00398 else if (elemText == TQString::fromLatin1("es-py")) 00399 d->language = es_py; 00400 else if (elemText == TQString::fromLatin1("es-pe")) 00401 d->language = es_pe; 00402 else if (elemText == TQString::fromLatin1("es-pr")) 00403 d->language = es_pr; 00404 else if (elemText == TQString::fromLatin1("es-es")) 00405 d->language = es_es; 00406 else if (elemText == TQString::fromLatin1("es-uy")) 00407 d->language = es_uy; 00408 else if (elemText == TQString::fromLatin1("es-ve")) 00409 d->language = es_ve; 00410 else if (elemText == TQString::fromLatin1("sv")) 00411 d->language = sv; 00412 else if (elemText == TQString::fromLatin1("sv-fi")) 00413 d->language = sv_fi; 00414 else if (elemText == TQString::fromLatin1("sv-se")) 00415 d->language = sv_se; 00416 else if (elemText == TQString::fromLatin1("tr")) 00417 d->language = tr; 00418 else if (elemText == TQString::fromLatin1("uk")) 00419 d->language = uk; 00420 else 00421 d->language = UndefinedLanguage; 00422 } 00423 00424 if (d->format == AtomFeed) 00425 tagName=TQString::fromLatin1("issued"); // atom doesn't specify this for feeds 00426 // but some broken feeds do this 00427 else 00428 tagName=TQString::fromLatin1("pubDate"); 00429 00430 if (!(elemText = extractNode(channelNode, tagName)).isNull()) { 00431 time_t _time; 00432 00433 if (d->format == AtomFeed) 00434 _time=parseISO8601Date(elemText); 00435 else 00436 _time=KRFCDate::parseDate(elemText); 00437 /* \bug This isn't really the right way since it will set the date to 00438 * Jan 1 1970, 1:00:00 if the passed date was invalid; this means that 00439 * we cannot distinguish between that date, and invalid values. :-/ 00440 */ 00441 d->pubDate.setTime_t(_time); 00442 } 00443 00444 if (!(elemText = extractNode(channelNode, TQString::fromLatin1("dc:date"))).isNull()) { 00445 time_t _time = parseISO8601Date(elemText); 00446 /* \bug This isn't really the right way since it will set the date to 00447 * Jan 1 1970, 1:00:00 if the passed date was invalid; this means that 00448 * we cannot distinguish between that date, and invalid values. :-/ 00449 */ 00450 d->pubDate.setTime_t(_time); 00451 } 00452 00453 if (d->format == AtomFeed) 00454 tagName=TQString::fromLatin1("modified"); 00455 else 00456 tagName=TQString::fromLatin1("lastBuildDate"); 00457 if (!(elemText = extractNode(channelNode, tagName)).isNull()) { 00458 time_t _time; 00459 if (d->format == AtomFeed) 00460 _time = parseISO8601Date(elemText); 00461 else 00462 _time = KRFCDate::parseDate(elemText); 00463 d->lastBuildDate.setTime_t(_time); 00464 } 00465 00466 if (!(elemText = extractNode(channelNode, TQString::fromLatin1("rating"))).isNull()) 00467 d->rating = elemText; 00468 if (!(elemText = extractNode(channelNode, TQString::fromLatin1("docs"))).isNull()) 00469 d->docs = elemText; 00470 if (!(elemText = extractNode(channelNode, TQString::fromLatin1((d->format == AtomFeed) ? "author" : "managingEditor"))).isNull()) 00471 d->managingEditor = elemText; 00472 if (!(elemText = extractNode(channelNode, TQString::fromLatin1("webMaster"))).isNull()) 00473 d->webMaster = elemText; 00474 00475 if (!(elemText = extractNode(channelNode, TQString::fromLatin1("ttl"))).isNull()) 00476 d->ttl = elemText.toUInt(); 00477 00478 n = channelNode.namedItem(TQString::fromLatin1("skipHours")); 00479 if (!n.isNull()) 00480 for (TQDomElement e = n.firstChild().toElement(); !e.isNull(); e = e.nextSibling().toElement()) 00481 if (e.tagName() == TQString::fromLatin1("hour")) 00482 d->skipHours.append(e.text().toUInt()); 00483 00484 n = channelNode.namedItem(TQString::fromLatin1("skipDays")); 00485 if (!n.isNull()) { 00486 Day day; 00487 TQString elemText; 00488 for (TQDomElement e = n.firstChild().toElement(); !e.isNull(); e = e.nextSibling().toElement()) 00489 if (e.tagName() == TQString::fromLatin1("day")) { 00490 elemText = e.text().lower(); 00491 if (elemText == TQString::fromLatin1("monday")) 00492 day = Monday; 00493 else if (elemText == TQString::fromLatin1("tuesday")) 00494 day = Tuesday; 00495 else if (elemText == TQString::fromLatin1("wednesday")) 00496 day = Wednesday; 00497 else if (elemText == TQString::fromLatin1("thursday")) 00498 day = Thursday; 00499 else if (elemText == TQString::fromLatin1("friday")) 00500 day = Friday; 00501 else if (elemText == TQString::fromLatin1("saturday")) 00502 day = Saturday; 00503 else if (elemText == TQString::fromLatin1("sunday")) 00504 day = Sunday; 00505 else 00506 day = UndefinedDay; 00507 if (day != UndefinedDay) 00508 d->skipDays.append(day); 00509 } 00510 } 00511 } 00512 00513 Document::~Document() 00514 { 00515 if (d->deref()) 00516 delete d; 00517 } 00518 00519 bool Document::isValid() const 00520 { 00521 return d->valid; 00522 } 00523 00524 Version Document::version() const 00525 { 00526 return d->version; 00527 } 00528 00529 TQString Document::verbVersion() const 00530 { 00531 switch (d->version) { 00532 case v0_90: return TQString::fromLatin1("0.90"); 00533 case v0_91: return TQString::fromLatin1("0.91"); 00534 case v0_92: return TQString::fromLatin1("0.92"); 00535 case v0_93: return TQString::fromLatin1("0.93"); 00536 case v0_94: return TQString::fromLatin1("0.94"); 00537 case v1_0: return TQString::fromLatin1("1.0"); 00538 case v2_0: return TQString::fromLatin1("2.0"); 00539 case vAtom_0_3: return TQString::fromLatin1("0.3"); 00540 case vAtom_0_2: return TQString::fromLatin1("0.2"); 00541 case vAtom_0_1: return TQString::fromLatin1("0.1"); 00542 case vAtom_1_0: return TQString::fromLatin1("1.0"); 00543 } 00544 return TQString(); 00545 } 00546 00547 TQString Document::title() const 00548 { 00549 return d->title; 00550 } 00551 00552 TQString Document::description() const 00553 { 00554 return d->description; 00555 } 00556 00557 const KURL &Document::link() const 00558 { 00559 return d->link; 00560 } 00561 00562 Image *Document::image() 00563 { 00564 return d->image; 00565 } 00566 00567 const Image *Document::image() const 00568 { 00569 return d->image; 00570 } 00571 00572 TextInput *Document::textInput() 00573 { 00574 return d->textInput; 00575 } 00576 00577 const TextInput *Document::textInput() const 00578 { 00579 return d->textInput; 00580 } 00581 00582 const Article::List &Document::articles() const 00583 { 00584 return d->articles; 00585 } 00586 00587 Language Document::language() const 00588 { 00589 return d->language; 00590 } 00591 00592 TQString Document::copyright() const 00593 { 00594 return d->copyright; 00595 } 00596 00597 const TQDateTime &Document::pubDate() const 00598 { 00599 return d->pubDate; 00600 } 00601 00602 const TQDateTime &Document::lastBuildDate() const 00603 { 00604 return d->lastBuildDate; 00605 } 00606 00607 TQString Document::rating() const 00608 { 00609 return d->rating; 00610 } 00611 00612 const KURL &Document::docs() const 00613 { 00614 return d->docs; 00615 } 00616 00617 TQString Document::managingEditor() const 00618 { 00619 return d->managingEditor; 00620 } 00621 00622 TQString Document::webMaster() const 00623 { 00624 return d->webMaster; 00625 } 00626 00627 const HourList &Document::skipHours() const 00628 { 00629 return d->skipHours; 00630 } 00631 00632 const DayList &Document::skipDays() const 00633 { 00634 return d->skipDays; 00635 } 00636 00637 int Document::ttl() const 00638 { 00639 return d->ttl; 00640 } 00641 00642 Document &Document::operator=(const Document &other) 00643 { 00644 if (this != &other) { 00645 other.d->ref(); 00646 if (d && d->deref()) 00647 delete d; 00648 d = other.d; 00649 } 00650 return *this; 00651 } 00652 00653 // vim:noet:ts=4