00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include "tools_p.h"
00012
00013 #include <krfcdate.h>
00014 #include <tqdom.h>
00015 #include <kcharsets.h>
00016 #include <tqregexp.h>
00017
00018 namespace RSS {
00019
00020 time_t parseISO8601Date(const TQString &s)
00021 {
00022
00023 if (s.stripWhiteSpace().left(4).toInt() < 1000)
00024 return 0;
00025
00026
00027 if (s.find('T') != -1)
00028 return KRFCDate::parseDateISO8601(s);
00029 else
00030 return KRFCDate::parseDateISO8601(s + "T12:00:00");
00031 }
00032
00033 TQString childNodesAsXML(const TQDomNode& parent)
00034 {
00035 TQDomNodeList list = parent.childNodes();
00036 TQString str;
00037 TQTextStream ts( &str, IO_WriteOnly );
00038 for (uint i = 0; i < list.count(); ++i)
00039 ts << list.item(i);
00040 return str.stripWhiteSpace();
00041 }
00042
00043 static TQString plainTextToHtml(const TQString& plainText)
00044 {
00045 TQString str(plainText);
00046 str.replace("&", "&");
00047 str.replace("\"", """);
00048 str.replace("<", "<");
00049
00050 str.replace("\n", "<br/>");
00051 return str;
00052 }
00053
00054 enum ContentFormat { Text, HTML, XML, Binary };
00055
00056 static ContentFormat mapTypeToFormat(const TQString& modep, const TQString& typep, const TQString& src)
00057 {
00058 TQString mode = modep.isNull() ? "escaped" : modep;
00059 TQString type = typep;
00060
00061
00062
00063
00064 if (type.isNull() && src.isEmpty())
00065 type = TQString::fromUtf8("text");
00066
00067 if (type == TQString::fromUtf8("html")
00068 || type == TQString::fromUtf8("text/html"))
00069 return HTML;
00070
00071 if (type == TQString::fromUtf8("text")
00072 || (type.startsWith(TQString::fromUtf8("text/"), false)
00073 && !type.startsWith(TQString::fromUtf8("text/xml"), false))
00074 )
00075 return Text;
00076
00077 TQStringList xmltypes;
00078 xmltypes.append(TQString::fromUtf8("xhtml"));
00079
00080 xmltypes.append(TQString::fromUtf8("text/xml"));
00081 xmltypes.append(TQString::fromUtf8("application/xml"));
00082 xmltypes.append(TQString::fromUtf8("text/xml-external-parsed-entity"));
00083 xmltypes.append(TQString::fromUtf8("application/xml-external-parsed-entity"));
00084 xmltypes.append(TQString::fromUtf8("application/xml-dtd"));
00085
00086
00087 if (xmltypes.contains(type)
00088 || type.endsWith(TQString::fromUtf8("+xml"), false)
00089 || type.endsWith(TQString::fromUtf8("/xml"), false))
00090 return XML;
00091
00092 return Binary;
00093 }
00094
00095 static TQString extractAtomContent(const TQDomElement& e)
00096 {
00097 ContentFormat format = mapTypeToFormat(e.attribute("mode"),
00098 e.attribute("type"),
00099 e.attribute("src"));
00100
00101 switch (format)
00102 {
00103 case HTML:
00104 {
00105 const bool hasPre = e.text().contains( "<pre>", false ) || e.text().contains( "<pre ", false );
00106 return KCharsets::resolveEntities( hasPre ? e.text() : e.text().simplifyWhiteSpace() );
00107 }
00108 case Text:
00109 return plainTextToHtml(e.text().stripWhiteSpace());
00110 case XML:
00111 return childNodesAsXML(e).simplifyWhiteSpace();
00112 case Binary:
00113 default:
00114 return TQString();
00115 }
00116
00117 return TQString();
00118 }
00119
00120 TQString extractNode(const TQDomNode &parent, const TQString &elemName, bool isInlined)
00121 {
00122 TQDomNode node = parent.namedItem(elemName);
00123 if (node.isNull())
00124 return TQString();
00125
00126 TQDomElement e = node.toElement();
00127 TQString result = e.text().stripWhiteSpace();
00128
00129 if (elemName == "content")
00130 {
00131 result = extractAtomContent(e);
00132 }
00133 else
00134 {
00135 bool hasPre = result.contains("<pre>", false) || result.contains("<pre ", false);
00136 bool hasHtml = hasPre || result.contains("<");
00137 if(!isInlined && !hasHtml)
00138 result = result = result.replace(TQChar('\n'), "<br />");
00139 if(!hasPre)
00140 result = result.simplifyWhiteSpace();
00141 }
00142
00143 return result.isEmpty() ? TQString() : result;
00144 }
00145
00146 TQString extractTitle(const TQDomNode & parent)
00147 {
00148 TQDomNode node = parent.namedItem(TQString::fromLatin1("title"));
00149 if (node.isNull())
00150 return TQString();
00151
00152 TQString result = node.toElement().text();
00153
00154 result = KCharsets::resolveEntities(KCharsets::resolveEntities(result).replace(TQRegExp("<[^>]*>"), "").remove("\\"));
00155 result = result.simplifyWhiteSpace();
00156
00157 if (result.isEmpty())
00158 return TQString();
00159
00160 return result;
00161 }
00162
00163 static void authorFromString(const TQString& strp, TQString& name, TQString& email)
00164 {
00165 TQString str = strp.stripWhiteSpace();
00166 if (str.isEmpty())
00167 return;
00168
00169
00170
00171
00172 TQRegExp remail("<?([^@\\s<]+@[^>\\s]+)>?");
00173
00174
00175 int pos = remail.search(str);
00176 if (pos != -1)
00177 {
00178 TQString all = remail.cap(0);
00179 email = remail.cap(1);
00180 str.replace(all, "");
00181 }
00182
00183
00184
00185 name = str.simplifyWhiteSpace();
00186
00187
00188
00189
00190
00191
00192
00193 TQRegExp rename("^\\(([^\\)]*)\\)");
00194
00195 pos = rename.search(name);
00196
00197 if (pos != -1)
00198 {
00199 name = rename.cap(1);
00200 }
00201
00202 name = name.isEmpty() ? TQString() : name;
00203 email = email.isEmpty() ? TQString() : email;
00204 }
00205
00206 TQString parseItemAuthor(const TQDomElement& element, Format format, Version version)
00207 {
00208 TQString name;
00209 TQString email;
00210
00211 TQDomElement dcCreator = element.namedItem("dc:creator").toElement();
00212
00213 if (!dcCreator.isNull())
00214 authorFromString(dcCreator.text(), name, email);
00215 else if (format == AtomFeed)
00216 {
00217 TQDomElement atomAuthor = element.namedItem("author").toElement();
00218 if (atomAuthor.isNull())
00219 atomAuthor = element.namedItem("atom:author").toElement();
00220 if (!atomAuthor.isNull())
00221 {
00222 TQDomElement atomName = atomAuthor.namedItem("name").toElement();
00223 if (atomName.isNull())
00224 atomName = atomAuthor.namedItem("atom:name").toElement();
00225 name = atomName.text().stripWhiteSpace();
00226
00227 TQDomElement atomEmail = atomAuthor.namedItem("email").toElement();
00228 if (atomEmail.isNull())
00229 atomEmail = atomAuthor.namedItem("atom:email").toElement();
00230 email = atomEmail.text().stripWhiteSpace();
00231 }
00232 }
00233 else if (format == RSSFeed)
00234 {
00235 authorFromString(element.namedItem("author").toElement().text(), name, email);
00236 }
00237
00238 if (name.isNull())
00239 name = email;
00240
00241 if (!email.isNull())
00242 return TQString("<a href=\"mailto:%1\">%2</a>").arg(email).arg(name);
00243 else
00244 return name;
00245 }
00246
00247 }
00248
00249