00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #include <config.h>
00033
00034 #include <stdio.h>
00035 #include <stdlib.h>
00036 #include <string.h>
00037
00038 #include <string>
00039 #include <vector>
00040
00041 #include "sp_spell.h"
00042 #include "ispell_checker.h"
00043
00044 #include <tqmap.h>
00045 #include <tqdir.h>
00046 #include <tqfileinfo.h>
00047
00048
00049
00050 typedef struct str_ispell_map
00051 {
00052 const char * lang;
00053 const char * dict;
00054 const char * enc;
00055 } IspellMap;
00056
00057 static const char *ispell_dirs [] = {
00058 "/usr/" SYSTEM_LIBDIR "/ispell",
00059 "/usr/lib/ispell",
00060 "/usr/local/" SYSTEM_LIBDIR "/ispell",
00061 "/usr/local/lib/ispell",
00062 "/usr/local/share/ispell",
00063 "/usr/share/ispell",
00064 "/usr/pkg/lib",
00065 0
00066 };
00067 static const IspellMap ispell_map [] = {
00068 {"ca" ,"catala.hash" ,"iso-8859-1" },
00069 {"ca_ES" ,"catala.hash" ,"iso-8859-1" },
00070 {"cs" ,"czech.hash" ,"iso-8859-2" },
00071 {"cs_CZ" ,"czech.hash" ,"iso-8859-2" },
00072 {"da" ,"dansk.hash" ,"iso-8859-1" },
00073 {"da_DK" ,"dansk.hash" ,"iso-8859-1" },
00074 {"de" ,"deutsch.hash" ,"iso-8859-1" },
00075 {"de_CH" ,"swiss.hash" ,"iso-8859-1" },
00076 {"de_AT" ,"deutsch.hash" ,"iso-8859-1" },
00077 {"de_DE" ,"deutsch.hash" ,"iso-8859-1" },
00078 {"el" ,"ellhnika.hash" ,"iso-8859-7" },
00079 {"el_GR" ,"ellhnika.hash" ,"iso-8859-7" },
00080 {"en" ,"british.hash" ,"iso-8859-1" },
00081 {"en_AU" ,"british.hash" ,"iso-8859-1" },
00082 {"en_BZ" ,"british.hash" ,"iso-8859-1" },
00083 {"en_CA" ,"british.hash" ,"iso-8859-1" },
00084 {"en_GB" ,"british.hash" ,"iso-8859-1" },
00085 {"en_IE" ,"british.hash" ,"iso-8859-1" },
00086 {"en_JM" ,"british.hash" ,"iso-8859-1" },
00087 {"en_NZ" ,"british.hash" ,"iso-8859-1" },
00088 {"en_TT" ,"british.hash" ,"iso-8859-1" },
00089 {"en_ZA" ,"british.hash" ,"iso-8859-1" },
00090 {"en_ZW" ,"british.hash" ,"iso-8859-1" },
00091 {"en_PH" ,"american.hash" ,"iso-8859-1" },
00092 {"en_US" ,"american.hash" ,"iso-8859-1" },
00093 {"eo" ,"esperanto.hash" ,"iso-8859-3" },
00094 {"es" ,"espanol.hash" ,"iso-8859-1" },
00095 {"es_AR" ,"espanol.hash" ,"iso-8859-1" },
00096 {"es_BO" ,"espanol.hash" ,"iso-8859-1" },
00097 {"es_CL" ,"espanol.hash" ,"iso-8859-1" },
00098 {"es_CO" ,"espanol.hash" ,"iso-8859-1" },
00099 {"es_CR" ,"espanol.hash" ,"iso-8859-1" },
00100 {"es_DO" ,"espanol.hash" ,"iso-8859-1" },
00101 {"es_EC" ,"espanol.hash" ,"iso-8859-1" },
00102 {"es_ES" ,"espanol.hash" ,"iso-8859-1" },
00103 {"es_GT" ,"espanol.hash" ,"iso-8859-1" },
00104 {"es_HN" ,"espanol.hash" ,"iso-8859-1" },
00105 {"es_MX" ,"espanol.hash" ,"iso-8859-1" },
00106 {"es_NI" ,"espanol.hash" ,"iso-8859-1" },
00107 {"es_PA" ,"espanol.hash" ,"iso-8859-1" },
00108 {"es_PE" ,"espanol.hash" ,"iso-8859-1" },
00109 {"es_PR" ,"espanol.hash" ,"iso-8859-1" },
00110 {"es_PY" ,"espanol.hash" ,"iso-8859-1" },
00111 {"es_SV" ,"espanol.hash" ,"iso-8859-1" },
00112 {"es_UY" ,"espanol.hash" ,"iso-8859-1" },
00113 {"es_VE" ,"espanol.hash" ,"iso-8859-1" },
00114 {"fi" ,"finnish.hash" ,"iso-8859-1" },
00115 {"fi_FI" ,"finnish.hash" ,"iso-8859-1" },
00116 {"fr" ,"francais.hash" ,"iso-8859-1" },
00117 {"fr_BE" ,"francais.hash" ,"iso-8859-1" },
00118 {"fr_CA" ,"francais.hash" ,"iso-8859-1" },
00119 {"fr_CH" ,"francais.hash" ,"iso-8859-1" },
00120 {"fr_FR" ,"francais.hash" ,"iso-8859-1" },
00121 {"fr_LU" ,"francais.hash" ,"iso-8859-1" },
00122 {"fr_MC" ,"francais.hash" ,"iso-8859-1" },
00123 {"hu" ,"hungarian.hash" ,"iso-8859-2" },
00124 {"hu_HU" ,"hungarian.hash" ,"iso-8859-2" },
00125 {"ga" ,"irish.hash" ,"iso-8859-1" },
00126 {"ga_IE" ,"irish.hash" ,"iso-8859-1" },
00127 {"gl" ,"galician.hash" ,"iso-8859-1" },
00128 {"gl_ES" ,"galician.hash" ,"iso-8859-1" },
00129 {"ia" ,"interlingua.hash" ,"iso-8859-1" },
00130 {"it" ,"italian.hash" ,"iso-8859-1" },
00131 {"it_IT" ,"italian.hash" ,"iso-8859-1" },
00132 {"it_CH" ,"italian.hash" ,"iso-8859-1" },
00133 {"la" ,"mlatin.hash" ,"iso-8859-1" },
00134 {"la_IT" ,"mlatin.hash" ,"iso-8859-1" },
00135 {"lt" ,"lietuviu.hash" ,"iso-8859-13" },
00136 {"lt_LT" ,"lietuviu.hash" ,"iso-8859-13" },
00137 {"nl" ,"nederlands.hash" ,"iso-8859-1" },
00138 {"nl_NL" ,"nederlands.hash" ,"iso-8859-1" },
00139 {"nl_BE" ,"nederlands.hash" ,"iso-8859-1" },
00140 {"nb" ,"norsk.hash" ,"iso-8859-1" },
00141 {"nb_NO" ,"norsk.hash" ,"iso-8859-1" },
00142 {"nn" ,"nynorsk.hash" ,"iso-8859-1" },
00143 {"nn_NO" ,"nynorsk.hash" ,"iso-8859-1" },
00144 {"no" ,"norsk.hash" ,"iso-8859-1" },
00145 {"no_NO" ,"norsk.hash" ,"iso-8859-1" },
00146 {"pl" ,"polish.hash" ,"iso-8859-2" },
00147 {"pl_PL" ,"polish.hash" ,"iso-8859-2" },
00148 {"pt" ,"brazilian.hash" ,"iso-8859-1" },
00149 {"pt_BR" ,"brazilian.hash" ,"iso-8859-1" },
00150 {"pt_PT" ,"portugues.hash" ,"iso-8859-1" },
00151 {"ru" ,"russian.hash" ,"koi8-r" },
00152 {"ru_MD" ,"russian.hash" ,"koi8-r" },
00153 {"ru_RU" ,"russian.hash" ,"koi8-r" },
00154 {"sc" ,"sardinian.hash" ,"iso-8859-1" },
00155 {"sc_IT" ,"sardinian.hash" ,"iso-8859-1" },
00156 {"sk" ,"slovak.hash" ,"iso-8859-2" },
00157 {"sk_SK" ,"slovak.hash" ,"iso-8859-2" },
00158 {"sl" ,"slovensko.hash" ,"iso-8859-2" },
00159 {"sl_SI" ,"slovensko.hash" ,"iso-8859-2" },
00160 {"sv" ,"svenska.hash" ,"iso-8859-1" },
00161 {"sv_SE" ,"svenska.hash" ,"iso-8859-1" },
00162 {"uk" ,"ukrainian.hash" ,"koi8-u" },
00163 {"uk_UA" ,"ukrainian.hash" ,"koi8-u" },
00164 {"yi" ,"yiddish-yivo.hash" ,"utf-8" }
00165 };
00166
00167 static const size_t size_ispell_map = ( sizeof(ispell_map) / sizeof((ispell_map)[0]) );
00168 static TQMap<TQString, TQString> ispell_dict_map;
00169
00170
00171 void
00172 ISpellChecker::try_autodetect_charset(const char * const inEncoding)
00173 {
00174 if (inEncoding && strlen(inEncoding))
00175 {
00176 m_translate_in = TQTextCodec::codecForName(inEncoding);
00177 }
00178 }
00179
00180
00181
00182
00183 ISpellChecker::ISpellChecker()
00184 : deftflag(-1),
00185 prefstringchar(-1),
00186 m_bSuccessfulInit(false),
00187 m_BC(NULL),
00188 m_cd(NULL),
00189 m_cl(NULL),
00190 m_cm(NULL),
00191 m_ho(NULL),
00192 m_nd(NULL),
00193 m_so(NULL),
00194 m_se(NULL),
00195 m_ti(NULL),
00196 m_te(NULL),
00197 m_hashstrings(NULL),
00198 m_hashtbl(NULL),
00199 m_pflaglist(NULL),
00200 m_sflaglist(NULL),
00201 m_chartypes(NULL),
00202 m_infile(NULL),
00203 m_outfile(NULL),
00204 m_askfilename(NULL),
00205 m_Trynum(0),
00206 m_translate_in(0)
00207 {
00208 memset(m_sflagindex,0,sizeof(m_sflagindex));
00209 memset(m_pflagindex,0,sizeof(m_pflagindex));
00210 }
00211
00212 #ifndef FREEP
00213 #define FREEP(p) do { if (p) free(p); } while (0)
00214 #endif
00215
00216 ISpellChecker::~ISpellChecker()
00217 {
00218 if (m_bSuccessfulInit) {
00219
00220
00221 clearindex (m_pflagindex);
00222 clearindex (m_sflagindex);
00223 }
00224
00225 FREEP(m_hashtbl);
00226 FREEP(m_hashstrings);
00227 FREEP(m_sflaglist);
00228 FREEP(m_chartypes);
00229
00230 delete m_translate_in;
00231 m_translate_in = 0;
00232 }
00233
00234 bool
00235 ISpellChecker::checkWord( const TQString& utf8Word )
00236 {
00237 ichar_t iWord[INPUTWORDLEN + MAXAFFIXLEN];
00238 if (!m_bSuccessfulInit)
00239 return false;
00240
00241 if (!utf8Word || utf8Word.length() >= (INPUTWORDLEN + MAXAFFIXLEN) || utf8Word.isEmpty())
00242 return false;
00243
00244 bool retVal = false;
00245 TQCString out;
00246 if (!m_translate_in)
00247 return false;
00248 else {
00249
00250 int len_out = utf8Word.length();
00251
00252 out = m_translate_in->fromUnicode( utf8Word, len_out );
00253 }
00254
00255 if (!strtoichar(iWord, out.data(), INPUTWORDLEN + MAXAFFIXLEN, 0))
00256 {
00257 if (good(iWord, 0, 0, 1, 0) == 1 ||
00258 compoundgood(iWord, 1) == 1)
00259 {
00260 retVal = true;
00261 }
00262 }
00263
00264 return retVal;
00265 }
00266
00267 TQStringList
00268 ISpellChecker::suggestWord(const TQString& utf8Word)
00269 {
00270 ichar_t iWord[INPUTWORDLEN + MAXAFFIXLEN];
00271 int c;
00272
00273 if (!m_bSuccessfulInit)
00274 return TQStringList();
00275
00276 if (utf8Word.isEmpty() || utf8Word.length() >= (INPUTWORDLEN + MAXAFFIXLEN) ||
00277 utf8Word.length() == 0)
00278 return TQStringList();
00279
00280 TQCString out;
00281 if (!m_translate_in)
00282 return TQStringList();
00283 else
00284 {
00285
00286
00287 int len_out = utf8Word.length();
00288 out = m_translate_in->fromUnicode( utf8Word, len_out );
00289 }
00290
00291 if (!strtoichar(iWord, out.data(), INPUTWORDLEN + MAXAFFIXLEN, 0))
00292 makepossibilities(iWord);
00293 else
00294 return TQStringList();
00295
00296 TQStringList sugg_arr;
00297 for (c = 0; c < m_pcount; c++)
00298 {
00299 TQString utf8Word;
00300
00301 if (!m_translate_in)
00302 {
00303
00304 utf8Word = TQString::fromUtf8( m_possibilities[c] );
00305 }
00306 else
00307 {
00308
00309 utf8Word = m_translate_in->toUnicode( m_possibilities[c] );
00310 }
00311
00312 sugg_arr.append( utf8Word );
00313 }
00314
00315 return sugg_arr;
00316 }
00317
00318 static void
00319 s_buildHashNames (std::vector<std::string> & names, const char * dict)
00320 {
00321 const char * tmp = 0;
00322 int i = 0;
00323
00324 names.clear ();
00325
00326 while ( (tmp = ispell_dirs[i++]) ) {
00327 TQCString maybeFile = TQCString( tmp ) + '/';
00328 maybeFile += dict;
00329 names.push_back( maybeFile.data() );
00330 }
00331 }
00332
00333 static void
00334 s_allDics()
00335 {
00336 const char * tmp = 0;
00337 int i = 0;
00338
00339 while ( (tmp = ispell_dirs[i++]) ) {
00340 TQDir dir( tmp );
00341 TQStringList lst = dir.entryList( "*.hash" );
00342 for ( TQStringList::Iterator it = lst.begin(); it != lst.end(); ++it ) {
00343 TQFileInfo info( *it );
00344 for (size_t i = 0; i < size_ispell_map; i++)
00345 {
00346 const IspellMap * mapping = (const IspellMap *)(&(ispell_map[i]));
00347 if (!strcmp (info.fileName().latin1(), mapping->dict))
00348 {
00349 ispell_dict_map.insert( mapping->lang, *it );
00350 }
00351 }
00352 }
00353 }
00354 }
00355
00356 TQValueList<TQString>
00357 ISpellChecker::allDics()
00358 {
00359 if ( ispell_dict_map.empty() )
00360 s_allDics();
00361
00362 return ispell_dict_map.keys();
00363 }
00364
00365 TQString
00366 ISpellChecker::loadDictionary (const char * szdict)
00367 {
00368 std::vector<std::string> dict_names;
00369
00370 s_buildHashNames (dict_names, szdict);
00371
00372 for (size_t i = 0; i < dict_names.size(); i++)
00373 {
00374 if (linit(const_cast<char*>(dict_names[i].c_str())) >= 0)
00375 return dict_names[i].c_str();
00376 }
00377
00378 return TQString::null;
00379 }
00380
00387 bool
00388 ISpellChecker::loadDictionaryForLanguage ( const char * szLang )
00389 {
00390 TQString hashname;
00391
00392 const char * encoding = NULL;
00393 const char * szFile = NULL;
00394
00395 for (size_t i = 0; i < size_ispell_map; i++)
00396 {
00397 const IspellMap * mapping = (const IspellMap *)(&(ispell_map[i]));
00398 if (!strcmp (szLang, mapping->lang))
00399 {
00400 szFile = mapping->dict;
00401 encoding = mapping->enc;
00402 break;
00403 }
00404 }
00405
00406 if (!szFile || !strlen(szFile))
00407 return false;
00408
00409 alloc_ispell_struct();
00410
00411 hashname = loadDictionary(szFile);
00412 if (hashname.isEmpty())
00413 return false;
00414
00415
00416 setDictionaryEncoding (hashname, encoding);
00417
00418 return true;
00419 }
00420
00421 void
00422 ISpellChecker::setDictionaryEncoding( const TQString& hashname, const char * encoding )
00423 {
00424
00425 try_autodetect_charset(encoding);
00426
00427 if (m_translate_in)
00428 {
00429
00430 prefstringchar = findfiletype("utf8", 1, deftflag < 0 ? &deftflag
00431 : static_cast<int *>(NULL));
00432
00433 if (prefstringchar < 0)
00434 {
00435 std::string teststring;
00436 for(int n1 = 1; n1 <= 15; n1++)
00437 {
00438 teststring = "latin" + n1;
00439 prefstringchar = findfiletype(teststring.c_str(), 1,
00440 deftflag < 0 ? &deftflag : static_cast<int *>(NULL));
00441 if (prefstringchar >= 0)
00442 break;
00443 }
00444 }
00445
00446 return;
00447 }
00448
00449
00450 prefstringchar = findfiletype("utf8", 1, deftflag < 0 ? &deftflag : static_cast<int *>(NULL));
00451 if (prefstringchar >= 0)
00452 {
00453 m_translate_in = TQTextCodec::codecForName("utf8");
00454 }
00455
00456 if (m_translate_in)
00457 return;
00458
00459
00460 if (!m_translate_in)
00461 {
00462
00463 for(int n1 = 1; n1 <= 15; n1++)
00464 {
00465 TQString teststring = TQString("latin%1").arg(n1);
00466 prefstringchar = findfiletype(teststring.latin1(), 1,
00467 deftflag < 0 ? &deftflag : static_cast<int *>(NULL));
00468 if (prefstringchar >= 0)
00469 {
00470
00471 m_translate_in = TQTextCodec::codecForName( teststring.latin1() );
00472 break;
00473 }
00474 }
00475 }
00476
00477
00478 if (!m_translate_in)
00479 {
00480 m_translate_in = TQTextCodec::codecForName("latin1");
00481 }
00482 }
00483
00484 bool
00485 ISpellChecker::requestDictionary(const char *szLang)
00486 {
00487 if (!loadDictionaryForLanguage (szLang))
00488 {
00489
00490 std::string shortened_dict (szLang);
00491 size_t uscore_pos;
00492
00493 if ((uscore_pos = shortened_dict.rfind ('_')) != ((size_t)-1)) {
00494 shortened_dict = shortened_dict.substr(0, uscore_pos);
00495 if (!loadDictionaryForLanguage (shortened_dict.c_str()))
00496 return false;
00497 } else
00498 return false;
00499 }
00500
00501 m_bSuccessfulInit = true;
00502
00503 if (prefstringchar < 0)
00504 m_defdupchar = 0;
00505 else
00506 m_defdupchar = prefstringchar;
00507
00508 return true;
00509 }