• Skip to content
  • Skip to link menu
Trinity API Reference
  • Trinity API Reference
  • tdespell2
 

tdespell2

ispell_checker.cpp

00001 /* vim: set sw=8: -*- Mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
00002 /* tdespell2 - adopted from Enchant
00003  * Copyright (C) 2003 Dom Lachowicz
00004  * Copyright (C) 2004 Zack Rusin <zack@kde.org>
00005  *
00006  * This library is free software; you can redistribute it and/or
00007  * modify it under the terms of the GNU Lesser General Public
00008  * License as published by the Free Software Foundation; either
00009  * version 2.1 of the License, or (at your option) any later version.
00010  *
00011  * This library is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014  * Lesser General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU Lesser General Public
00017  * License along with this library; if not, write to the
00018  * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00019  * Boston, MA 02110-1301, USA.
00020  *
00021  * In addition, as a special exception, Dom Lachowicz
00022  * gives permission to link the code of this program with
00023  * non-LGPL Spelling Provider libraries (eg: a MSFT Office
00024  * spell checker backend) and distribute linked combinations including
00025  * the two.  You must obey the GNU Lesser General Public License in all
00026  * respects for all of the code used other than said providers.  If you modify
00027  * this file, you may extend this exception to your version of the
00028  * file, but you are not obligated to do so.  If you do not wish to
00029  * do so, delete this exception statement from your version.
00030  */
00031 
00032 #include <config.h>
00033 
00034 #include <stdio.h>
00035 #include <stdlib.h>
00036 #include <string.h>
00037 
00038 #include <string>
00039 #include <vector>
00040 
00041 #include "sp_spell.h"
00042 #include "ispell_checker.h"
00043 
00044 #include <tqmap.h>
00045 #include <tqdir.h>
00046 #include <tqfileinfo.h>
00047 
00048 /***************************************************************************/
00049 
00050 typedef struct str_ispell_map
00051 {
00052     const char * lang;
00053     const char * dict;
00054     const char * enc;
00055 } IspellMap;
00056 
00057 static const char *ispell_dirs [] = {
00058     "/usr/" SYSTEM_LIBDIR "/ispell",
00059     "/usr/lib/ispell",
00060     "/usr/local/" SYSTEM_LIBDIR "/ispell",
00061     "/usr/local/lib/ispell",
00062     "/usr/local/share/ispell",
00063     "/usr/share/ispell",
00064     "/usr/pkg/lib",
00065     0
00066 };
00067 static const IspellMap ispell_map [] = {
00068     {"ca"    ,"catala.hash"         ,"iso-8859-1" },
00069     {"ca_ES" ,"catala.hash"         ,"iso-8859-1" },
00070     {"cs"    ,"czech.hash"          ,"iso-8859-2" },
00071     {"cs_CZ" ,"czech.hash"          ,"iso-8859-2" },
00072     {"da"    ,"dansk.hash"          ,"iso-8859-1" },
00073     {"da_DK" ,"dansk.hash"          ,"iso-8859-1" },
00074     {"de"    ,"deutsch.hash"        ,"iso-8859-1" },
00075     {"de_CH" ,"swiss.hash"          ,"iso-8859-1" },
00076     {"de_AT" ,"deutsch.hash"        ,"iso-8859-1" },
00077     {"de_DE" ,"deutsch.hash"        ,"iso-8859-1" },
00078     {"el"    ,"ellhnika.hash"       ,"iso-8859-7" },
00079     {"el_GR" ,"ellhnika.hash"       ,"iso-8859-7" },
00080     {"en"    ,"british.hash"        ,"iso-8859-1" },
00081     {"en_AU" ,"british.hash"        ,"iso-8859-1" },
00082     {"en_BZ" ,"british.hash"        ,"iso-8859-1" },
00083     {"en_CA" ,"british.hash"        ,"iso-8859-1" },
00084     {"en_GB" ,"british.hash"        ,"iso-8859-1" },
00085     {"en_IE" ,"british.hash"        ,"iso-8859-1" },
00086     {"en_JM" ,"british.hash"        ,"iso-8859-1" },
00087     {"en_NZ" ,"british.hash"        ,"iso-8859-1" },
00088     {"en_TT" ,"british.hash"        ,"iso-8859-1" },
00089     {"en_ZA" ,"british.hash"        ,"iso-8859-1" },
00090     {"en_ZW" ,"british.hash"        ,"iso-8859-1" },
00091     {"en_PH" ,"american.hash"       ,"iso-8859-1" },
00092     {"en_US" ,"american.hash"       ,"iso-8859-1" },
00093     {"eo"    ,"esperanto.hash"      ,"iso-8859-3" },
00094     {"es"    ,"espanol.hash"        ,"iso-8859-1" },
00095     {"es_AR" ,"espanol.hash"        ,"iso-8859-1" },
00096     {"es_BO" ,"espanol.hash"        ,"iso-8859-1" },
00097     {"es_CL" ,"espanol.hash"        ,"iso-8859-1" },
00098     {"es_CO" ,"espanol.hash"        ,"iso-8859-1" },
00099     {"es_CR" ,"espanol.hash"        ,"iso-8859-1" },
00100     {"es_DO" ,"espanol.hash"        ,"iso-8859-1" },
00101     {"es_EC" ,"espanol.hash"        ,"iso-8859-1" },
00102     {"es_ES" ,"espanol.hash"        ,"iso-8859-1" },
00103     {"es_GT" ,"espanol.hash"        ,"iso-8859-1" },
00104     {"es_HN" ,"espanol.hash"        ,"iso-8859-1" },
00105     {"es_MX" ,"espanol.hash"        ,"iso-8859-1" },
00106     {"es_NI" ,"espanol.hash"        ,"iso-8859-1" },
00107     {"es_PA" ,"espanol.hash"        ,"iso-8859-1" },
00108     {"es_PE" ,"espanol.hash"        ,"iso-8859-1" },
00109     {"es_PR" ,"espanol.hash"        ,"iso-8859-1" },
00110     {"es_PY" ,"espanol.hash"        ,"iso-8859-1" },
00111     {"es_SV" ,"espanol.hash"        ,"iso-8859-1" },
00112     {"es_UY" ,"espanol.hash"        ,"iso-8859-1" },
00113     {"es_VE" ,"espanol.hash"        ,"iso-8859-1" },
00114     {"fi"    ,"finnish.hash"        ,"iso-8859-1" },
00115     {"fi_FI" ,"finnish.hash"        ,"iso-8859-1" },
00116     {"fr"    ,"francais.hash"       ,"iso-8859-1" },
00117     {"fr_BE" ,"francais.hash"       ,"iso-8859-1" },
00118     {"fr_CA" ,"francais.hash"       ,"iso-8859-1" },
00119     {"fr_CH" ,"francais.hash"       ,"iso-8859-1" },
00120     {"fr_FR" ,"francais.hash"       ,"iso-8859-1" },
00121     {"fr_LU" ,"francais.hash"       ,"iso-8859-1" },
00122     {"fr_MC" ,"francais.hash"       ,"iso-8859-1" },
00123     {"hu"    ,"hungarian.hash"      ,"iso-8859-2" },
00124     {"hu_HU" ,"hungarian.hash"      ,"iso-8859-2" },
00125     {"ga"    ,"irish.hash"          ,"iso-8859-1" },
00126     {"ga_IE" ,"irish.hash"          ,"iso-8859-1" },
00127     {"gl"    ,"galician.hash"       ,"iso-8859-1" },
00128     {"gl_ES" ,"galician.hash"       ,"iso-8859-1" },
00129     {"ia"    ,"interlingua.hash"    ,"iso-8859-1" },
00130     {"it"    ,"italian.hash"        ,"iso-8859-1" },
00131     {"it_IT" ,"italian.hash"        ,"iso-8859-1" },
00132     {"it_CH" ,"italian.hash"        ,"iso-8859-1" },
00133     {"la"    ,"mlatin.hash"         ,"iso-8859-1" },
00134     {"la_IT" ,"mlatin.hash"         ,"iso-8859-1" },
00135     {"lt"    ,"lietuviu.hash"       ,"iso-8859-13" },
00136     {"lt_LT" ,"lietuviu.hash"       ,"iso-8859-13" },
00137     {"nl"    ,"nederlands.hash"     ,"iso-8859-1" },
00138     {"nl_NL" ,"nederlands.hash"     ,"iso-8859-1" },
00139     {"nl_BE" ,"nederlands.hash"     ,"iso-8859-1" },
00140     {"nb"    ,"norsk.hash"          ,"iso-8859-1" },
00141     {"nb_NO" ,"norsk.hash"          ,"iso-8859-1" },
00142     {"nn"    ,"nynorsk.hash"        ,"iso-8859-1" },
00143     {"nn_NO" ,"nynorsk.hash"        ,"iso-8859-1" },
00144     {"no"    ,"norsk.hash"          ,"iso-8859-1" },
00145     {"no_NO" ,"norsk.hash"          ,"iso-8859-1" },
00146     {"pl"    ,"polish.hash"         ,"iso-8859-2" },
00147     {"pl_PL" ,"polish.hash"         ,"iso-8859-2" },
00148     {"pt"    ,"brazilian.hash"      ,"iso-8859-1" },
00149     {"pt_BR" ,"brazilian.hash"      ,"iso-8859-1" },
00150     {"pt_PT" ,"portugues.hash"      ,"iso-8859-1" },
00151     {"ru"    ,"russian.hash"        ,"koi8-r" },
00152     {"ru_MD" ,"russian.hash"        ,"koi8-r" },
00153     {"ru_RU" ,"russian.hash"        ,"koi8-r" },
00154     {"sc"    ,"sardinian.hash"      ,"iso-8859-1" },
00155     {"sc_IT" ,"sardinian.hash"      ,"iso-8859-1" },
00156     {"sk"    ,"slovak.hash"         ,"iso-8859-2" },
00157     {"sk_SK" ,"slovak.hash"         ,"iso-8859-2" },
00158     {"sl"    ,"slovensko.hash"      ,"iso-8859-2" },
00159     {"sl_SI" ,"slovensko.hash"      ,"iso-8859-2" },
00160     {"sv"    ,"svenska.hash"        ,"iso-8859-1" },
00161     {"sv_SE" ,"svenska.hash"        ,"iso-8859-1" },
00162     {"uk"    ,"ukrainian.hash"      ,"koi8-u" },
00163     {"uk_UA" ,"ukrainian.hash"      ,"koi8-u" },
00164     {"yi"    ,"yiddish-yivo.hash"   ,"utf-8" }
00165 };
00166 
00167 static const size_t size_ispell_map = ( sizeof(ispell_map) / sizeof((ispell_map)[0]) );
00168 static TQMap<TQString, TQString> ispell_dict_map;
00169 
00170 
00171 void
00172 ISpellChecker::try_autodetect_charset(const char * const inEncoding)
00173 {
00174     if (inEncoding && strlen(inEncoding))
00175         {
00176             m_translate_in = TQTextCodec::codecForName(inEncoding);
00177         }
00178 }
00179 
00180 /***************************************************************************/
00181 /***************************************************************************/
00182 
00183 ISpellChecker::ISpellChecker()
00184     : deftflag(-1),
00185      prefstringchar(-1),
00186      m_bSuccessfulInit(false),
00187      m_BC(NULL),
00188      m_cd(NULL),
00189      m_cl(NULL),
00190      m_cm(NULL),
00191      m_ho(NULL),
00192      m_nd(NULL),
00193      m_so(NULL),
00194      m_se(NULL),
00195      m_ti(NULL),
00196      m_te(NULL),
00197      m_hashstrings(NULL),
00198      m_hashtbl(NULL),
00199      m_pflaglist(NULL),
00200      m_sflaglist(NULL),
00201      m_chartypes(NULL),
00202      m_infile(NULL),
00203      m_outfile(NULL),
00204      m_askfilename(NULL),
00205      m_Trynum(0),
00206      m_translate_in(0)
00207 {
00208     memset(m_sflagindex,0,sizeof(m_sflagindex));
00209     memset(m_pflagindex,0,sizeof(m_pflagindex));
00210 }
00211 
00212 #ifndef FREEP
00213 #define FREEP(p)        do { if (p) free(p); } while (0)
00214 #endif
00215 
00216 ISpellChecker::~ISpellChecker()
00217 {
00218     if (m_bSuccessfulInit) {
00219         // only cleanup our mess if we were successfully initialized
00220 
00221         clearindex (m_pflagindex);
00222         clearindex (m_sflagindex);
00223     }
00224 
00225     FREEP(m_hashtbl);
00226     FREEP(m_hashstrings);
00227     FREEP(m_sflaglist);
00228     FREEP(m_chartypes);
00229 
00230     delete m_translate_in;
00231     m_translate_in = 0;
00232 }
00233 
00234 bool
00235 ISpellChecker::checkWord( const TQString& utf8Word )
00236 {
00237     ichar_t iWord[INPUTWORDLEN + MAXAFFIXLEN];
00238     if (!m_bSuccessfulInit)
00239         return false;
00240 
00241     if (!utf8Word || utf8Word.length() >= (INPUTWORDLEN + MAXAFFIXLEN) || utf8Word.isEmpty())
00242         return false;
00243 
00244     bool retVal = false;
00245     TQCString out;
00246     if (!m_translate_in)
00247         return false;
00248     else {
00249         /* convert to 8bit string and null terminate */
00250         int len_out = utf8Word.length();
00251 
00252         out = m_translate_in->fromUnicode( utf8Word, len_out );
00253     }
00254 
00255     if (!strtoichar(iWord, out.data(), INPUTWORDLEN + MAXAFFIXLEN, 0))
00256         {
00257             if (good(iWord, 0, 0, 1, 0) == 1 ||
00258                 compoundgood(iWord, 1) == 1)
00259                 {
00260                     retVal = true;
00261                 }
00262         }
00263 
00264     return retVal;
00265 }
00266 
00267 TQStringList
00268 ISpellChecker::suggestWord(const TQString& utf8Word)
00269 {
00270     ichar_t  iWord[INPUTWORDLEN + MAXAFFIXLEN];
00271     int  c;
00272 
00273     if (!m_bSuccessfulInit)
00274         return TQStringList();
00275 
00276     if (utf8Word.isEmpty() || utf8Word.length() >= (INPUTWORDLEN + MAXAFFIXLEN) ||
00277             utf8Word.length() == 0)
00278         return TQStringList();
00279 
00280     TQCString out;
00281     if (!m_translate_in)
00282         return TQStringList();
00283     else
00284         {
00285             /* convert to 8bit string and null terminate */
00286 
00287             int len_out = utf8Word.length();
00288             out = m_translate_in->fromUnicode( utf8Word, len_out );
00289         }
00290 
00291     if (!strtoichar(iWord, out.data(), INPUTWORDLEN + MAXAFFIXLEN, 0))
00292         makepossibilities(iWord);
00293     else
00294         return TQStringList();
00295 
00296     TQStringList sugg_arr;
00297     for (c = 0; c < m_pcount; c++)
00298     {
00299         TQString utf8Word;
00300 
00301         if (!m_translate_in)
00302         {
00303             /* copy to 8bit string and null terminate */
00304             utf8Word = TQString::fromUtf8( m_possibilities[c] );
00305         }
00306         else
00307         {
00308             /* convert to 32bit string and null terminate */
00309             utf8Word = m_translate_in->toUnicode( m_possibilities[c] );
00310         }
00311 
00312         sugg_arr.append( utf8Word );
00313     }
00314 
00315     return sugg_arr;
00316 }
00317 
00318 static void
00319 s_buildHashNames (std::vector<std::string> & names, const char * dict)
00320 {
00321     const char * tmp = 0;
00322     int i = 0;
00323 
00324     names.clear ();
00325 
00326     while ( (tmp = ispell_dirs[i++]) ) {
00327         TQCString maybeFile = TQCString( tmp ) + '/';
00328         maybeFile += dict;
00329         names.push_back( maybeFile.data() );
00330     }
00331 }
00332 
00333 static void
00334 s_allDics()
00335 {
00336     const char * tmp = 0;
00337     int i = 0;
00338 
00339     while ( (tmp = ispell_dirs[i++]) ) {
00340         TQDir dir( tmp );
00341         TQStringList lst = dir.entryList( "*.hash" );
00342         for ( TQStringList::Iterator it = lst.begin(); it != lst.end(); ++it ) {
00343             TQFileInfo info( *it );
00344             for (size_t i = 0; i < size_ispell_map; i++)
00345             {
00346                 const IspellMap * mapping = (const IspellMap *)(&(ispell_map[i]));
00347                 if (!strcmp (info.fileName().latin1(), mapping->dict))
00348                 {
00349                     ispell_dict_map.insert( mapping->lang, *it );
00350                 }
00351             }
00352         }
00353     }
00354 }
00355 
00356 TQValueList<TQString>
00357 ISpellChecker::allDics()
00358 {
00359     if ( ispell_dict_map.empty() )
00360         s_allDics();
00361 
00362     return ispell_dict_map.keys();
00363 }
00364 
00365 TQString
00366 ISpellChecker::loadDictionary (const char * szdict)
00367 {
00368     std::vector<std::string> dict_names;
00369 
00370     s_buildHashNames (dict_names, szdict);
00371 
00372     for (size_t i = 0; i < dict_names.size(); i++)
00373         {
00374             if (linit(const_cast<char*>(dict_names[i].c_str())) >= 0)
00375                 return dict_names[i].c_str();
00376         }
00377 
00378     return TQString::null;
00379 }
00380 
00387 bool
00388 ISpellChecker::loadDictionaryForLanguage ( const char * szLang )
00389 {
00390     TQString hashname;
00391 
00392     const char * encoding = NULL;
00393     const char * szFile = NULL;
00394 
00395     for (size_t i = 0; i < size_ispell_map; i++)
00396         {
00397             const IspellMap * mapping = (const IspellMap *)(&(ispell_map[i]));
00398             if (!strcmp (szLang, mapping->lang))
00399                 {
00400                     szFile = mapping->dict;
00401                     encoding = mapping->enc;
00402                     break;
00403                 }
00404         }
00405 
00406     if (!szFile || !strlen(szFile))
00407         return false;
00408 
00409     alloc_ispell_struct();
00410 
00411     hashname = loadDictionary(szFile);
00412     if (hashname.isEmpty())
00413         return false;
00414 
00415     // one of the two above calls succeeded
00416     setDictionaryEncoding (hashname, encoding);
00417 
00418     return true;
00419 }
00420 
00421 void
00422 ISpellChecker::setDictionaryEncoding( const TQString& hashname, const char * encoding )
00423 {
00424     /* Get Hash encoding from XML file. This should always work! */
00425     try_autodetect_charset(encoding);
00426 
00427     if (m_translate_in)
00428         {
00429             /* We still have to setup prefstringchar*/
00430             prefstringchar = findfiletype("utf8", 1, deftflag < 0 ? &deftflag
00431                               : static_cast<int *>(NULL));
00432 
00433             if (prefstringchar < 0)
00434                 {
00435                     std::string teststring;
00436                     for(int n1 = 1; n1 <= 15; n1++)
00437                         {
00438                             teststring = "latin" + n1;
00439                             prefstringchar = findfiletype(teststring.c_str(), 1,
00440                                               deftflag < 0 ? &deftflag : static_cast<int *>(NULL));
00441                             if (prefstringchar >= 0)
00442                                 break;
00443                         }
00444                 }
00445 
00446             return; /* success */
00447         }
00448 
00449     /* Test for UTF-8 first */
00450     prefstringchar = findfiletype("utf8", 1, deftflag < 0 ? &deftflag : static_cast<int *>(NULL));
00451     if (prefstringchar >= 0)
00452         {
00453             m_translate_in = TQTextCodec::codecForName("utf8");
00454         }
00455 
00456     if (m_translate_in)
00457         return; /* success */
00458 
00459     /* Test for "latinN" */
00460     if (!m_translate_in)
00461         {
00462             /* Look for "altstringtype" names from latin1 to latin15 */
00463             for(int n1 = 1; n1 <= 15; n1++)
00464                 {
00465                     TQString teststring = TQString("latin%1").arg(n1);
00466                     prefstringchar = findfiletype(teststring.latin1(), 1,
00467                                       deftflag < 0 ? &deftflag : static_cast<int *>(NULL));
00468                     if (prefstringchar >= 0)
00469                         {
00470                             //FIXME: latin1 might be wrong
00471                             m_translate_in = TQTextCodec::codecForName( teststring.latin1() );
00472                             break;
00473                         }
00474                 }
00475         }
00476 
00477     /* If nothing found, use latin1 */
00478     if (!m_translate_in)
00479         {
00480             m_translate_in = TQTextCodec::codecForName("latin1");
00481         }
00482 }
00483 
00484 bool
00485 ISpellChecker::requestDictionary(const char *szLang)
00486 {
00487     if (!loadDictionaryForLanguage (szLang))
00488         {
00489             // handle a shortened version of the language tag: en_US => en
00490             std::string shortened_dict (szLang);
00491             size_t uscore_pos;
00492 
00493             if ((uscore_pos = shortened_dict.rfind ('_')) != ((size_t)-1)) {
00494                 shortened_dict = shortened_dict.substr(0, uscore_pos);
00495                 if (!loadDictionaryForLanguage (shortened_dict.c_str()))
00496                     return false;
00497             } else
00498                 return false;
00499         }
00500 
00501     m_bSuccessfulInit = true;
00502 
00503     if (prefstringchar < 0)
00504         m_defdupchar = 0;
00505     else
00506         m_defdupchar = prefstringchar;
00507 
00508     return true;
00509 }

tdespell2

Skip menu "tdespell2"
  • Main Page
  • Namespace List
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Class Members

tdespell2

Skip menu "tdespell2"
  • arts
  • dcop
  • dnssd
  • interfaces
  •   kspeech
  •     interface
  •     library
  •   tdetexteditor
  • kate
  • kded
  • kdoctools
  • kimgio
  • kjs
  • libtdemid
  • libtdescreensaver
  • tdeabc
  • tdecmshell
  • tdecore
  • tdefx
  • tdehtml
  • tdeinit
  • tdeio
  •   bookmarks
  •   httpfilter
  •   kpasswdserver
  •   kssl
  •   tdefile
  •   tdeio
  •   tdeioexec
  • tdeioslave
  •   http
  • tdemdi
  •   tdemdi
  • tdenewstuff
  • tdeparts
  • tdeprint
  • tderandr
  • tderesources
  • tdespell2
  • tdesu
  • tdeui
  • tdeunittest
  • tdeutils
  • tdewallet
Generated for tdespell2 by doxygen 1.7.1
This website is maintained by Timothy Pearson.