00001 #include <config.h>
00002 #include <string.h>
00003 #include <sys/time.h>
00004 #include <unistd.h>
00005 #include <libxml/xmlversion.h>
00006 #include <libxml/xmlmemory.h>
00007 #include <libxml/debugXML.h>
00008 #include <libxml/HTMLtree.h>
00009 #include <libxml/xmlIO.h>
00010 #include <libxml/parserInternals.h>
00011 #include <libxslt/xsltconfig.h>
00012 #include <libxslt/xsltInternals.h>
00013 #include <libxslt/transform.h>
00014 #include <libxslt/xsltutils.h>
00015 #include <tqstring.h>
00016 #include <kstandarddirs.h>
00017 #include <kinstance.h>
00018 #include <xslt.h>
00019 #include <tqfile.h>
00020 #include <tqdir.h>
00021 #include <tdecmdlineargs.h>
00022 #include <tdelocale.h>
00023 #include <tdeaboutdata.h>
00024 #include <stdlib.h>
00025 #include <kdebug.h>
00026 #include <tqtextcodec.h>
00027 #include <tqfileinfo.h>
00028 #include <kprocess.h>
00029 #include <tqvaluevector.h>
00030
00031 extern int xmlLoadExtDtdDefaultValue;
00032
00033 class MyPair {
00034 public:
00035 TQString word;
00036 int base;};
00037
00038 typedef TQValueList<MyPair> PairList;
00039
00040 void parseEntry(PairList &list, xmlNodePtr cur, int base)
00041 {
00042 if ( !cur )
00043 return;
00044
00045 base += atoi( ( const char* )xmlGetProp(cur, ( const xmlChar* )"header") );
00046 if ( base > 10 )
00047 base = 10;
00048
00049
00050 cur = cur->xmlChildrenNode;
00051 while (cur != NULL) {
00052
00053 if ( cur->type == XML_TEXT_NODE ) {
00054 TQString words = TQString::fromUtf8( ( char* )cur->content );
00055 TQStringList wlist = TQStringList::split( ' ', words.simplifyWhiteSpace() );
00056 for ( TQStringList::ConstIterator it = wlist.begin();
00057 it != wlist.end(); ++it )
00058 {
00059 MyPair m;
00060 m.word = *it;
00061 m.base = base;
00062 list.append( m );
00063 }
00064 } else if ( !xmlStrcmp( cur->name, (const xmlChar *) "entry") )
00065 parseEntry( list, cur, base );
00066
00067 cur = cur->next;
00068 }
00069
00070 }
00071
00072 static TDECmdLineOptions options[] =
00073 {
00074 { "stylesheet <xsl>", I18N_NOOP( "Stylesheet to use" ), 0 },
00075 { "stdout", I18N_NOOP( "Output whole document to stdout" ), 0 },
00076 { "o", 0, 0 },
00077 { "output <file>", I18N_NOOP("Output whole document to file" ), 0 },
00078 { "htdig", I18N_NOOP( "Create a ht://dig compatible index" ), 0 },
00079 { "check", I18N_NOOP( "Check the document for validity" ), 0 },
00080 { "cache <file>", I18N_NOOP( "Create a cache file for the document" ), 0},
00081 { "srcdir <dir>", I18N_NOOP( "Set the srcdir, for tdelibs" ), 0},
00082 { "param <key>=<value>", I18N_NOOP( "Parameters to pass to the stylesheet" ), 0},
00083 { "+xml", I18N_NOOP("The file to transform"), 0},
00084 TDECmdLineLastOption
00085 };
00086
00087
00088
00089
00090 int main(int argc, char **argv) {
00091
00092
00093
00094 TDEAboutData aboutData( "meinproc", I18N_NOOP("XML-Translator" ),
00095 "$Revision$",
00096 I18N_NOOP("TDE Translator for XML"));
00097
00098 TDECmdLineArgs::init(argc, argv, &aboutData);
00099 TDECmdLineArgs::addCmdLineOptions( options );
00100
00101 TDELocale::setMainCatalogue("tdeio_help");
00102 TDEInstance ins("meinproc");
00103 TDEGlobal::locale();
00104
00105
00106 TDECmdLineArgs *args = TDECmdLineArgs::parsedArgs();
00107 if ( args->count() != 1 ) {
00108 args->usage();
00109 return ( 1 );
00110 }
00111
00112
00113 TQString srcdir;
00114 if ( args->isSet( "srcdir" ) )
00115 srcdir = TQDir( TQFile::decodeName( args->getOption( "srcdir" ) ) ).absPath();
00116 fillInstance(ins,srcdir);
00117
00118 LIBXML_TEST_VERSION
00119
00120 TQString checkFilename = TQFile::decodeName(args->arg( 0 ));
00121 TQFileInfo checkFile(checkFilename);
00122 if (!checkFile.exists())
00123 {
00124 kdError() << "File '" << checkFilename << "' does not exist." << endl;
00125 return ( 2 );
00126 }
00127 if (!checkFile.isFile())
00128 {
00129 kdError() << "'" << checkFilename << "' is not a file." << endl;
00130 return ( 2 );
00131 }
00132 if (!checkFile.isReadable())
00133 {
00134 kdError() << "File '" << checkFilename << "' is not readable." << endl;
00135 return ( 2 );
00136 }
00137
00138 if ( args->isSet( "check" ) ) {
00139 #if !defined(PATH_MAX) && defined(__GLIBC__)
00140 char *pwd_buffer;
00141 #else
00142 char pwd_buffer[PATH_MAX];
00143 #endif
00144 TQFileInfo file( TQFile::decodeName(args->arg( 0 )) );
00145 #if !defined(PATH_MAX) && defined(__GLIBC__)
00146 if ( !(pwd_buffer = getcwd( NULL, 0 ) ) )
00147 #else
00148 if ( !getcwd( pwd_buffer, sizeof(pwd_buffer) - 1 ) )
00149 #endif
00150 {
00151 kdError() << "getcwd failed." << endl;
00152 return 2;
00153 }
00154
00155 TQString catalogs;
00156 catalogs += locate( "dtd", "customization/catalog.xml" );
00157 catalogs += " ";
00158 catalogs += locate( "dtd", "docbook/xml-dtd-4.1.2/catalog.xml" );
00159
00160 setenv( "XML_CATALOG_FILES", TQFile::encodeName( catalogs ).data(), 1);
00161 TQString exe;
00162 #if defined( XMLLINT )
00163 exe = XMLLINT;
00164 #endif
00165 if ( (::access( TQFile::encodeName( exe ), X_OK )!=0) ) {
00166 exe = TDEStandardDirs::findExe( "xmllint" );
00167 if (exe.isEmpty())
00168 exe = locate( "exe", "xmllint" );
00169 }
00170 if ( ::access( TQFile::encodeName( exe ), X_OK )==0 ) {
00171 chdir( TQFile::encodeName( file.dirPath( true ) ) );
00172 TQString cmd = exe;
00173 cmd += " --valid --noout ";
00174 cmd += TDEProcess::quote(file.fileName());
00175 cmd += " 2>&1";
00176 FILE *xmllint = popen( TQFile::encodeName( cmd ), "r");
00177 char buf[ 512 ];
00178 bool noout = true;
00179 unsigned int n;
00180 while ( ( n = fread(buf, 1, sizeof( buf ), xmllint ) ) ) {
00181 noout = false;
00182 buf[ n ] = '\0';
00183 fputs( buf, stderr );
00184 }
00185 pclose( xmllint );
00186 chdir( pwd_buffer );
00187 if ( !noout ) {
00188 #if !defined(PATH_MAX) && defined(__GLIBC__)
00189 free( pwd_buffer );
00190 #endif
00191 return 1;
00192 }
00193 } else {
00194 kdWarning() << "couldn't find xmllint" << endl;
00195 }
00196 #if !defined(PATH_MAX) && defined(__GLIBC__)
00197 free( pwd_buffer );
00198 #endif
00199 }
00200
00201 xmlSubstituteEntitiesDefault(1);
00202 xmlLoadExtDtdDefaultValue = 1;
00203
00204 TQValueVector<const char *> params;
00205 if (args->isSet( "output" ) ) {
00206 params.append( tqstrdup( "outputFile" ) );
00207 params.append( tqstrdup( TQString(TQFile::decodeName( args->getOption( "output" ) )).latin1() ) );
00208 }
00209 {
00210 const QCStringList paramList = args->getOptionList( "param" );
00211 QCStringList::ConstIterator it = paramList.begin();
00212 QCStringList::ConstIterator end = paramList.end();
00213 for ( ; it != end; ++it ) {
00214 const TQCString tuple = *it;
00215 const int ch = tuple.find( '=' );
00216 if ( ch == -1 ) {
00217 kdError() << "Key-Value tuple '" << tuple << "' lacks a '='!" << endl;
00218 return( 2 );
00219 }
00220 params.append( tqstrdup( tuple.left( ch ) ) );
00221 params.append( tqstrdup( tuple.mid( ch + 1 ) ) );
00222 }
00223 }
00224 params.append( NULL );
00225
00226 bool index = args->isSet( "htdig" );
00227 TQString tss = args->getOption( "stylesheet" );
00228 if ( tss.isEmpty() )
00229 tss = "customization/tde-chunk.xsl";
00230 if ( index )
00231 tss = "customization/htdig_index.xsl" ;
00232
00233 tss = locate( "dtd", tss );
00234
00235 if ( index ) {
00236 xsltStylesheetPtr style_sheet =
00237 xsltParseStylesheetFile((const xmlChar *)tss.latin1());
00238
00239 if (style_sheet != NULL) {
00240
00241 xmlDocPtr doc = xmlParseFile( TQFile::encodeName( args->arg( 0 ) ) );
00242
00243 xmlDocPtr res = xsltApplyStylesheet(style_sheet, doc, ¶ms[0]);
00244
00245 xmlFreeDoc(doc);
00246 xsltFreeStylesheet(style_sheet);
00247 if (res != NULL) {
00248 xmlNodePtr cur = xmlDocGetRootElement(res);
00249 if (!cur || xmlStrcmp(cur->name, (const xmlChar *) "entry")) {
00250 fprintf(stderr,"document of the wrong type, root node != entry");
00251 xmlFreeDoc(res);
00252 return(1);
00253 }
00254 PairList list;
00255 parseEntry( list, cur, 0 );
00256 int wi = 0;
00257 for ( PairList::ConstIterator it = list.begin(); it != list.end();
00258 ++it, ++wi )
00259 fprintf( stdout, "w\t%s\t%d\t%d\n", ( *it ).word.utf8().data(),
00260 1000*wi/(int)list.count(), ( *it ).base );
00261
00262 xmlFreeDoc(res);
00263 } else {
00264 kdDebug() << "couldn't parse document " << args->arg( 0 ) << endl;
00265 }
00266 } else {
00267 kdDebug() << "couldn't parse style sheet " << tss << endl;
00268 }
00269
00270 } else {
00271 TQString output = transform(args->arg( 0 ) , tss, params);
00272 if (output.isEmpty()) {
00273 fprintf(stderr, "unable to parse %s\n", args->arg( 0 ));
00274 return(1);
00275 }
00276
00277 TQString cache = args->getOption( "cache" );
00278 if ( !cache.isEmpty() ) {
00279 if ( !saveToCache( output, cache ) ) {
00280 kdError() << TQString(i18n( "Could not write to cache file %1." ).arg( cache )) << endl;
00281 }
00282 goto end;
00283 }
00284
00285 if (output.find( "<FILENAME " ) == -1 || args->isSet( "stdout" ) || args->isSet("output") )
00286 {
00287 TQFile file;
00288 if (args->isSet( "stdout" ) ) {
00289 file.open( IO_WriteOnly, stdout );
00290 } else {
00291 if (args->isSet( "output" ) )
00292 file.setName( TQFile::decodeName(args->getOption( "output" )));
00293 else
00294 file.setName( "index.html" );
00295 file.open(IO_WriteOnly);
00296 }
00297 replaceCharsetHeader( output );
00298
00299 TQCString data = output.local8Bit();
00300 file.writeBlock(data.data(), data.length());
00301 file.close();
00302 } else {
00303 int index = 0;
00304 while (true) {
00305 index = output.find("<FILENAME ", index);
00306 if (index == -1)
00307 break;
00308 int filename_index = index + strlen("<FILENAME filename=\"");
00309
00310 TQString filename = output.mid(filename_index,
00311 output.find("\"", filename_index) -
00312 filename_index);
00313
00314 TQString filedata = splitOut(output, index);
00315 TQFile file(filename);
00316 file.open(IO_WriteOnly);
00317 replaceCharsetHeader( filedata );
00318 TQCString data = fromUnicode( filedata );
00319 file.writeBlock(data.data(), data.length());
00320 file.close();
00321
00322 index += 8;
00323 }
00324 }
00325 }
00326 end:
00327 xmlCleanupParser();
00328 xmlMemoryDump();
00329 return(0);
00330 }
00331