• Skip to content
  • Skip to link menu
Trinity API Reference
  • Trinity API Reference
  • kspell2
 

kspell2

  • kspell2
  • plugins
  • ispell
ispell_checker.cpp
1 /* vim: set sw=8: -*- Mode: C++; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /* kspell2 - adopted from Enchant
3  * Copyright (C) 2003 Dom Lachowicz
4  * Copyright (C) 2004 Zack Rusin <zack@kde.org>
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, write to the
18  * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19  * Boston, MA 02110-1301, USA.
20  *
21  * In addition, as a special exception, Dom Lachowicz
22  * gives permission to link the code of this program with
23  * non-LGPL Spelling Provider libraries (eg: a MSFT Office
24  * spell checker backend) and distribute linked combinations including
25  * the two. You must obey the GNU Lesser General Public License in all
26  * respects for all of the code used other than said providers. If you modify
27  * this file, you may extend this exception to your version of the
28  * file, but you are not obligated to do so. If you do not wish to
29  * do so, delete this exception statement from your version.
30  */
31 
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string.h>
35 
36 #include <string>
37 #include <vector>
38 
39 #include "sp_spell.h"
40 #include "ispell_checker.h"
41 
42 #include <tqmap.h>
43 #include <tqdir.h>
44 #include <tqfileinfo.h>
45 
46 /***************************************************************************/
47 
48 typedef struct str_ispell_map
49 {
50  const char * lang;
51  const char * dict;
52  const char * enc;
53 } IspellMap;
54 
55 static const char *ispell_dirs [] = {
56  "/usr/lib/ispell",
57  "/usr/local/lib/ispell",
58  "/usr/local/share/ispell",
59  "/usr/share/ispell",
60  "/usr/pkg/lib",
61  0
62 };
63 static const IspellMap ispell_map [] = {
64  {"ca" ,"catala.hash" ,"iso-8859-1" },
65  {"ca_ES" ,"catala.hash" ,"iso-8859-1" },
66  {"cs" ,"czech.hash" ,"iso-8859-2" },
67  {"cs_CZ" ,"czech.hash" ,"iso-8859-2" },
68  {"da" ,"dansk.hash" ,"iso-8859-1" },
69  {"da_DK" ,"dansk.hash" ,"iso-8859-1" },
70  {"de" ,"deutsch.hash" ,"iso-8859-1" },
71  {"de_CH" ,"swiss.hash" ,"iso-8859-1" },
72  {"de_AT" ,"deutsch.hash" ,"iso-8859-1" },
73  {"de_DE" ,"deutsch.hash" ,"iso-8859-1" },
74  {"el" ,"ellhnika.hash" ,"iso-8859-7" },
75  {"el_GR" ,"ellhnika.hash" ,"iso-8859-7" },
76  {"en" ,"british.hash" ,"iso-8859-1" },
77  {"en_AU" ,"british.hash" ,"iso-8859-1" },
78  {"en_BZ" ,"british.hash" ,"iso-8859-1" },
79  {"en_CA" ,"british.hash" ,"iso-8859-1" },
80  {"en_GB" ,"british.hash" ,"iso-8859-1" },
81  {"en_IE" ,"british.hash" ,"iso-8859-1" },
82  {"en_JM" ,"british.hash" ,"iso-8859-1" },
83  {"en_NZ" ,"british.hash" ,"iso-8859-1" },
84  {"en_TT" ,"british.hash" ,"iso-8859-1" },
85  {"en_ZA" ,"british.hash" ,"iso-8859-1" },
86  {"en_ZW" ,"british.hash" ,"iso-8859-1" },
87  {"en_PH" ,"american.hash" ,"iso-8859-1" },
88  {"en_US" ,"american.hash" ,"iso-8859-1" },
89  {"eo" ,"esperanto.hash" ,"iso-8859-3" },
90  {"es" ,"espanol.hash" ,"iso-8859-1" },
91  {"es_AR" ,"espanol.hash" ,"iso-8859-1" },
92  {"es_BO" ,"espanol.hash" ,"iso-8859-1" },
93  {"es_CL" ,"espanol.hash" ,"iso-8859-1" },
94  {"es_CO" ,"espanol.hash" ,"iso-8859-1" },
95  {"es_CR" ,"espanol.hash" ,"iso-8859-1" },
96  {"es_DO" ,"espanol.hash" ,"iso-8859-1" },
97  {"es_EC" ,"espanol.hash" ,"iso-8859-1" },
98  {"es_ES" ,"espanol.hash" ,"iso-8859-1" },
99  {"es_GT" ,"espanol.hash" ,"iso-8859-1" },
100  {"es_HN" ,"espanol.hash" ,"iso-8859-1" },
101  {"es_MX" ,"espanol.hash" ,"iso-8859-1" },
102  {"es_NI" ,"espanol.hash" ,"iso-8859-1" },
103  {"es_PA" ,"espanol.hash" ,"iso-8859-1" },
104  {"es_PE" ,"espanol.hash" ,"iso-8859-1" },
105  {"es_PR" ,"espanol.hash" ,"iso-8859-1" },
106  {"es_PY" ,"espanol.hash" ,"iso-8859-1" },
107  {"es_SV" ,"espanol.hash" ,"iso-8859-1" },
108  {"es_UY" ,"espanol.hash" ,"iso-8859-1" },
109  {"es_VE" ,"espanol.hash" ,"iso-8859-1" },
110  {"fi" ,"finnish.hash" ,"iso-8859-1" },
111  {"fi_FI" ,"finnish.hash" ,"iso-8859-1" },
112  {"fr" ,"francais.hash" ,"iso-8859-1" },
113  {"fr_BE" ,"francais.hash" ,"iso-8859-1" },
114  {"fr_CA" ,"francais.hash" ,"iso-8859-1" },
115  {"fr_CH" ,"francais.hash" ,"iso-8859-1" },
116  {"fr_FR" ,"francais.hash" ,"iso-8859-1" },
117  {"fr_LU" ,"francais.hash" ,"iso-8859-1" },
118  {"fr_MC" ,"francais.hash" ,"iso-8859-1" },
119  {"hu" ,"hungarian.hash" ,"iso-8859-2" },
120  {"hu_HU" ,"hungarian.hash" ,"iso-8859-2" },
121  {"ga" ,"irish.hash" ,"iso-8859-1" },
122  {"ga_IE" ,"irish.hash" ,"iso-8859-1" },
123  {"gl" ,"galician.hash" ,"iso-8859-1" },
124  {"gl_ES" ,"galician.hash" ,"iso-8859-1" },
125  {"ia" ,"interlingua.hash" ,"iso-8859-1" },
126  {"it" ,"italian.hash" ,"iso-8859-1" },
127  {"it_IT" ,"italian.hash" ,"iso-8859-1" },
128  {"it_CH" ,"italian.hash" ,"iso-8859-1" },
129  {"la" ,"mlatin.hash" ,"iso-8859-1" },
130  {"la_IT" ,"mlatin.hash" ,"iso-8859-1" },
131  {"lt" ,"lietuviu.hash" ,"iso-8859-13" },
132  {"lt_LT" ,"lietuviu.hash" ,"iso-8859-13" },
133  {"nl" ,"nederlands.hash" ,"iso-8859-1" },
134  {"nl_NL" ,"nederlands.hash" ,"iso-8859-1" },
135  {"nl_BE" ,"nederlands.hash" ,"iso-8859-1" },
136  {"nb" ,"norsk.hash" ,"iso-8859-1" },
137  {"nb_NO" ,"norsk.hash" ,"iso-8859-1" },
138  {"nn" ,"nynorsk.hash" ,"iso-8859-1" },
139  {"nn_NO" ,"nynorsk.hash" ,"iso-8859-1" },
140  {"no" ,"norsk.hash" ,"iso-8859-1" },
141  {"no_NO" ,"norsk.hash" ,"iso-8859-1" },
142  {"pl" ,"polish.hash" ,"iso-8859-2" },
143  {"pl_PL" ,"polish.hash" ,"iso-8859-2" },
144  {"pt" ,"brazilian.hash" ,"iso-8859-1" },
145  {"pt_BR" ,"brazilian.hash" ,"iso-8859-1" },
146  {"pt_PT" ,"portugues.hash" ,"iso-8859-1" },
147  {"ru" ,"russian.hash" ,"koi8-r" },
148  {"ru_MD" ,"russian.hash" ,"koi8-r" },
149  {"ru_RU" ,"russian.hash" ,"koi8-r" },
150  {"sc" ,"sardinian.hash" ,"iso-8859-1" },
151  {"sc_IT" ,"sardinian.hash" ,"iso-8859-1" },
152  {"sk" ,"slovak.hash" ,"iso-8859-2" },
153  {"sk_SK" ,"slovak.hash" ,"iso-8859-2" },
154  {"sl" ,"slovensko.hash" ,"iso-8859-2" },
155  {"sl_SI" ,"slovensko.hash" ,"iso-8859-2" },
156  {"sv" ,"svenska.hash" ,"iso-8859-1" },
157  {"sv_SE" ,"svenska.hash" ,"iso-8859-1" },
158  {"uk" ,"ukrainian.hash" ,"koi8-u" },
159  {"uk_UA" ,"ukrainian.hash" ,"koi8-u" },
160  {"yi" ,"yiddish-yivo.hash" ,"utf-8" }
161 };
162 
163 static const size_t size_ispell_map = ( sizeof(ispell_map) / sizeof((ispell_map)[0]) );
164 static TQMap<TQString, TQString> ispell_dict_map;
165 
166 
167 void
168 ISpellChecker::try_autodetect_charset(const char * const inEncoding)
169 {
170  if (inEncoding && strlen(inEncoding))
171  {
172  m_translate_in = TQTextCodec::codecForName(inEncoding);
173  }
174 }
175 
176 /***************************************************************************/
177 /***************************************************************************/
178 
179 ISpellChecker::ISpellChecker()
180  : deftflag(-1),
181  prefstringchar(-1),
182  m_bSuccessfulInit(false),
183  m_BC(NULL),
184  m_cd(NULL),
185  m_cl(NULL),
186  m_cm(NULL),
187  m_ho(NULL),
188  m_nd(NULL),
189  m_so(NULL),
190  m_se(NULL),
191  m_ti(NULL),
192  m_te(NULL),
193  m_hashstrings(NULL),
194  m_hashtbl(NULL),
195  m_pflaglist(NULL),
196  m_sflaglist(NULL),
197  m_chartypes(NULL),
198  m_infile(NULL),
199  m_outfile(NULL),
200  m_askfilename(NULL),
201  m_Trynum(0),
202  m_translate_in(0)
203 {
204  memset(m_sflagindex,0,sizeof(m_sflagindex));
205  memset(m_pflagindex,0,sizeof(m_pflagindex));
206 }
207 
208 #ifndef FREEP
209 #define FREEP(p) do { if (p) free(p); } while (0)
210 #endif
211 
212 ISpellChecker::~ISpellChecker()
213 {
214  if (m_bSuccessfulInit) {
215  // only cleanup our mess if we were successfully initialized
216 
217  clearindex (m_pflagindex);
218  clearindex (m_sflagindex);
219  }
220 
221  FREEP(m_hashtbl);
222  FREEP(m_hashstrings);
223  FREEP(m_sflaglist);
224  FREEP(m_chartypes);
225 
226  delete m_translate_in;
227  m_translate_in = 0;
228 }
229 
230 bool
231 ISpellChecker::checkWord( const TQString& utf8Word )
232 {
233  ichar_t iWord[INPUTWORDLEN + MAXAFFIXLEN];
234  if (!m_bSuccessfulInit)
235  return false;
236 
237  if (!utf8Word || utf8Word.length() >= (INPUTWORDLEN + MAXAFFIXLEN) || utf8Word.isEmpty())
238  return false;
239 
240  bool retVal = false;
241  TQCString out;
242  if (!m_translate_in)
243  return false;
244  else {
245  /* convert to 8bit string and null terminate */
246  int len_out = utf8Word.length();
247 
248  out = m_translate_in->fromUnicode( utf8Word, len_out );
249  }
250 
251  if (!strtoichar(iWord, out.data(), INPUTWORDLEN + MAXAFFIXLEN, 0))
252  {
253  if (good(iWord, 0, 0, 1, 0) == 1 ||
254  compoundgood(iWord, 1) == 1)
255  {
256  retVal = true;
257  }
258  }
259 
260  return retVal;
261 }
262 
263 TQStringList
264 ISpellChecker::suggestWord(const TQString& utf8Word)
265 {
266  ichar_t iWord[INPUTWORDLEN + MAXAFFIXLEN];
267  int c;
268 
269  if (!m_bSuccessfulInit)
270  return TQStringList();
271 
272  if (utf8Word.isEmpty() || utf8Word.length() >= (INPUTWORDLEN + MAXAFFIXLEN) ||
273  utf8Word.length() == 0)
274  return TQStringList();
275 
276  TQCString out;
277  if (!m_translate_in)
278  return TQStringList();
279  else
280  {
281  /* convert to 8bit string and null terminate */
282 
283  int len_out = utf8Word.length();
284  out = m_translate_in->fromUnicode( utf8Word, len_out );
285  }
286 
287  if (!strtoichar(iWord, out.data(), INPUTWORDLEN + MAXAFFIXLEN, 0))
288  makepossibilities(iWord);
289  else
290  return TQStringList();
291 
292  TQStringList sugg_arr;
293  for (c = 0; c < m_pcount; c++)
294  {
295  TQString utf8Word;
296 
297  if (!m_translate_in)
298  {
299  /* copy to 8bit string and null terminate */
300  utf8Word = TQString::fromUtf8( m_possibilities[c] );
301  }
302  else
303  {
304  /* convert to 32bit string and null terminate */
305  utf8Word = m_translate_in->toUnicode( m_possibilities[c] );
306  }
307 
308  sugg_arr.append( utf8Word );
309  }
310 
311  return sugg_arr;
312 }
313 
314 static void
315 s_buildHashNames (std::vector<std::string> & names, const char * dict)
316 {
317  const char * tmp = 0;
318  int i = 0;
319 
320  names.clear ();
321 
322  while ( (tmp = ispell_dirs[i++]) ) {
323  TQCString maybeFile = TQCString( tmp ) + '/';
324  maybeFile += dict;
325  names.push_back( maybeFile.data() );
326  }
327 }
328 
329 static void
330 s_allDics()
331 {
332  const char * tmp = 0;
333  int i = 0;
334 
335  while ( (tmp = ispell_dirs[i++]) ) {
336  TQDir dir( tmp );
337  TQStringList lst = dir.entryList( "*.hash" );
338  for ( TQStringList::Iterator it = lst.begin(); it != lst.end(); ++it ) {
339  TQFileInfo info( *it );
340  for (size_t i = 0; i < size_ispell_map; i++)
341  {
342  const IspellMap * mapping = (const IspellMap *)(&(ispell_map[i]));
343  if (!strcmp (info.fileName().latin1(), mapping->dict))
344  {
345  ispell_dict_map.insert( mapping->lang, *it );
346  }
347  }
348  }
349  }
350 }
351 
352 TQValueList<TQString>
353 ISpellChecker::allDics()
354 {
355  if ( ispell_dict_map.empty() )
356  s_allDics();
357 
358  return ispell_dict_map.keys();
359 }
360 
361 TQString
362 ISpellChecker::loadDictionary (const char * szdict)
363 {
364  std::vector<std::string> dict_names;
365 
366  s_buildHashNames (dict_names, szdict);
367 
368  for (size_t i = 0; i < dict_names.size(); i++)
369  {
370  if (linit(const_cast<char*>(dict_names[i].c_str())) >= 0)
371  return dict_names[i].c_str();
372  }
373 
374  return TQString::null;
375 }
376 
383 bool
384 ISpellChecker::loadDictionaryForLanguage ( const char * szLang )
385 {
386  TQString hashname;
387 
388  const char * encoding = NULL;
389  const char * szFile = NULL;
390 
391  for (size_t i = 0; i < size_ispell_map; i++)
392  {
393  const IspellMap * mapping = (const IspellMap *)(&(ispell_map[i]));
394  if (!strcmp (szLang, mapping->lang))
395  {
396  szFile = mapping->dict;
397  encoding = mapping->enc;
398  break;
399  }
400  }
401 
402  if (!szFile || !strlen(szFile))
403  return false;
404 
405  alloc_ispell_struct();
406 
407  hashname = loadDictionary(szFile);
408  if (hashname.isEmpty())
409  return false;
410 
411  // one of the two above calls succeeded
412  setDictionaryEncoding (hashname, encoding);
413 
414  return true;
415 }
416 
417 void
418 ISpellChecker::setDictionaryEncoding( const TQString& hashname, const char * encoding )
419 {
420  /* Get Hash encoding from XML file. This should always work! */
421  try_autodetect_charset(encoding);
422 
423  if (m_translate_in)
424  {
425  /* We still have to setup prefstringchar*/
426  prefstringchar = findfiletype("utf8", 1, deftflag < 0 ? &deftflag
427  : static_cast<int *>(NULL));
428 
429  if (prefstringchar < 0)
430  {
431  std::string teststring;
432  for(int n1 = 1; n1 <= 15; n1++)
433  {
434  teststring = "latin" + n1;
435  prefstringchar = findfiletype(teststring.c_str(), 1,
436  deftflag < 0 ? &deftflag : static_cast<int *>(NULL));
437  if (prefstringchar >= 0)
438  break;
439  }
440  }
441 
442  return; /* success */
443  }
444 
445  /* Test for UTF-8 first */
446  prefstringchar = findfiletype("utf8", 1, deftflag < 0 ? &deftflag : static_cast<int *>(NULL));
447  if (prefstringchar >= 0)
448  {
449  m_translate_in = TQTextCodec::codecForName("utf8");
450  }
451 
452  if (m_translate_in)
453  return; /* success */
454 
455  /* Test for "latinN" */
456  if (!m_translate_in)
457  {
458  /* Look for "altstringtype" names from latin1 to latin15 */
459  for(int n1 = 1; n1 <= 15; n1++)
460  {
461  TQString teststring = TQString("latin%1").arg(n1);
462  prefstringchar = findfiletype(teststring.latin1(), 1,
463  deftflag < 0 ? &deftflag : static_cast<int *>(NULL));
464  if (prefstringchar >= 0)
465  {
466  //FIXME: latin1 might be wrong
467  m_translate_in = TQTextCodec::codecForName( teststring.latin1() );
468  break;
469  }
470  }
471  }
472 
473  /* If nothing found, use latin1 */
474  if (!m_translate_in)
475  {
476  m_translate_in = TQTextCodec::codecForName("latin1");
477  }
478 }
479 
480 bool
481 ISpellChecker::requestDictionary(const char *szLang)
482 {
483  if (!loadDictionaryForLanguage (szLang))
484  {
485  // handle a shortened version of the language tag: en_US => en
486  std::string shortened_dict (szLang);
487  size_t uscore_pos;
488 
489  if ((uscore_pos = shortened_dict.rfind ('_')) != ((size_t)-1)) {
490  shortened_dict = shortened_dict.substr(0, uscore_pos);
491  if (!loadDictionaryForLanguage (shortened_dict.c_str()))
492  return false;
493  } else
494  return false;
495  }
496 
497  m_bSuccessfulInit = true;
498 
499  if (prefstringchar < 0)
500  m_defdupchar = 0;
501  else
502  m_defdupchar = prefstringchar;
503 
504  return true;
505 }

kspell2

Skip menu "kspell2"
  • Main Page
  • Namespace List
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Class Members

kspell2

Skip menu "kspell2"
  • arts
  • dcop
  • dnssd
  • interfaces
  •     interface
  •     library
  •   kspeech
  •   ktexteditor
  • kabc
  • kate
  • kcmshell
  • kdecore
  • kded
  • kdefx
  • kdeprint
  • kdesu
  • kdeui
  • kdoctools
  • khtml
  • kimgio
  • kinit
  • kio
  •   bookmarks
  •   httpfilter
  •   kfile
  •   kio
  •   kioexec
  •   kpasswdserver
  •   kssl
  • kioslave
  •   http
  • kjs
  • kmdi
  •   kmdi
  • knewstuff
  • kparts
  • krandr
  • kresources
  • kspell2
  • kunittest
  • kutils
  • kwallet
  • libkmid
  • libkscreensaver
Generated for kspell2 by doxygen 1.8.3.1
This website is maintained by Timothy Pearson.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. |