40 #include "ispell_checker.h"
44 #include <tqfileinfo.h>
48 typedef struct str_ispell_map
55 static const char *ispell_dirs [] = {
57 "/usr/local/lib/ispell",
58 "/usr/local/share/ispell",
63 static const IspellMap ispell_map [] = {
64 {
"ca" ,
"catala.hash" ,
"iso-8859-1" },
65 {
"ca_ES" ,
"catala.hash" ,
"iso-8859-1" },
66 {
"cs" ,
"czech.hash" ,
"iso-8859-2" },
67 {
"cs_CZ" ,
"czech.hash" ,
"iso-8859-2" },
68 {
"da" ,
"dansk.hash" ,
"iso-8859-1" },
69 {
"da_DK" ,
"dansk.hash" ,
"iso-8859-1" },
70 {
"de" ,
"deutsch.hash" ,
"iso-8859-1" },
71 {
"de_CH" ,
"swiss.hash" ,
"iso-8859-1" },
72 {
"de_AT" ,
"deutsch.hash" ,
"iso-8859-1" },
73 {
"de_DE" ,
"deutsch.hash" ,
"iso-8859-1" },
74 {
"el" ,
"ellhnika.hash" ,
"iso-8859-7" },
75 {
"el_GR" ,
"ellhnika.hash" ,
"iso-8859-7" },
76 {
"en" ,
"british.hash" ,
"iso-8859-1" },
77 {
"en_AU" ,
"british.hash" ,
"iso-8859-1" },
78 {
"en_BZ" ,
"british.hash" ,
"iso-8859-1" },
79 {
"en_CA" ,
"british.hash" ,
"iso-8859-1" },
80 {
"en_GB" ,
"british.hash" ,
"iso-8859-1" },
81 {
"en_IE" ,
"british.hash" ,
"iso-8859-1" },
82 {
"en_JM" ,
"british.hash" ,
"iso-8859-1" },
83 {
"en_NZ" ,
"british.hash" ,
"iso-8859-1" },
84 {
"en_TT" ,
"british.hash" ,
"iso-8859-1" },
85 {
"en_ZA" ,
"british.hash" ,
"iso-8859-1" },
86 {
"en_ZW" ,
"british.hash" ,
"iso-8859-1" },
87 {
"en_PH" ,
"american.hash" ,
"iso-8859-1" },
88 {
"en_US" ,
"american.hash" ,
"iso-8859-1" },
89 {
"eo" ,
"esperanto.hash" ,
"iso-8859-3" },
90 {
"es" ,
"espanol.hash" ,
"iso-8859-1" },
91 {
"es_AR" ,
"espanol.hash" ,
"iso-8859-1" },
92 {
"es_BO" ,
"espanol.hash" ,
"iso-8859-1" },
93 {
"es_CL" ,
"espanol.hash" ,
"iso-8859-1" },
94 {
"es_CO" ,
"espanol.hash" ,
"iso-8859-1" },
95 {
"es_CR" ,
"espanol.hash" ,
"iso-8859-1" },
96 {
"es_DO" ,
"espanol.hash" ,
"iso-8859-1" },
97 {
"es_EC" ,
"espanol.hash" ,
"iso-8859-1" },
98 {
"es_ES" ,
"espanol.hash" ,
"iso-8859-1" },
99 {
"es_GT" ,
"espanol.hash" ,
"iso-8859-1" },
100 {
"es_HN" ,
"espanol.hash" ,
"iso-8859-1" },
101 {
"es_MX" ,
"espanol.hash" ,
"iso-8859-1" },
102 {
"es_NI" ,
"espanol.hash" ,
"iso-8859-1" },
103 {
"es_PA" ,
"espanol.hash" ,
"iso-8859-1" },
104 {
"es_PE" ,
"espanol.hash" ,
"iso-8859-1" },
105 {
"es_PR" ,
"espanol.hash" ,
"iso-8859-1" },
106 {
"es_PY" ,
"espanol.hash" ,
"iso-8859-1" },
107 {
"es_SV" ,
"espanol.hash" ,
"iso-8859-1" },
108 {
"es_UY" ,
"espanol.hash" ,
"iso-8859-1" },
109 {
"es_VE" ,
"espanol.hash" ,
"iso-8859-1" },
110 {
"fi" ,
"finnish.hash" ,
"iso-8859-1" },
111 {
"fi_FI" ,
"finnish.hash" ,
"iso-8859-1" },
112 {
"fr" ,
"francais.hash" ,
"iso-8859-1" },
113 {
"fr_BE" ,
"francais.hash" ,
"iso-8859-1" },
114 {
"fr_CA" ,
"francais.hash" ,
"iso-8859-1" },
115 {
"fr_CH" ,
"francais.hash" ,
"iso-8859-1" },
116 {
"fr_FR" ,
"francais.hash" ,
"iso-8859-1" },
117 {
"fr_LU" ,
"francais.hash" ,
"iso-8859-1" },
118 {
"fr_MC" ,
"francais.hash" ,
"iso-8859-1" },
119 {
"hu" ,
"hungarian.hash" ,
"iso-8859-2" },
120 {
"hu_HU" ,
"hungarian.hash" ,
"iso-8859-2" },
121 {
"ga" ,
"irish.hash" ,
"iso-8859-1" },
122 {
"ga_IE" ,
"irish.hash" ,
"iso-8859-1" },
123 {
"gl" ,
"galician.hash" ,
"iso-8859-1" },
124 {
"gl_ES" ,
"galician.hash" ,
"iso-8859-1" },
125 {
"ia" ,
"interlingua.hash" ,
"iso-8859-1" },
126 {
"it" ,
"italian.hash" ,
"iso-8859-1" },
127 {
"it_IT" ,
"italian.hash" ,
"iso-8859-1" },
128 {
"it_CH" ,
"italian.hash" ,
"iso-8859-1" },
129 {
"la" ,
"mlatin.hash" ,
"iso-8859-1" },
130 {
"la_IT" ,
"mlatin.hash" ,
"iso-8859-1" },
131 {
"lt" ,
"lietuviu.hash" ,
"iso-8859-13" },
132 {
"lt_LT" ,
"lietuviu.hash" ,
"iso-8859-13" },
133 {
"nl" ,
"nederlands.hash" ,
"iso-8859-1" },
134 {
"nl_NL" ,
"nederlands.hash" ,
"iso-8859-1" },
135 {
"nl_BE" ,
"nederlands.hash" ,
"iso-8859-1" },
136 {
"nb" ,
"norsk.hash" ,
"iso-8859-1" },
137 {
"nb_NO" ,
"norsk.hash" ,
"iso-8859-1" },
138 {
"nn" ,
"nynorsk.hash" ,
"iso-8859-1" },
139 {
"nn_NO" ,
"nynorsk.hash" ,
"iso-8859-1" },
140 {
"no" ,
"norsk.hash" ,
"iso-8859-1" },
141 {
"no_NO" ,
"norsk.hash" ,
"iso-8859-1" },
142 {
"pl" ,
"polish.hash" ,
"iso-8859-2" },
143 {
"pl_PL" ,
"polish.hash" ,
"iso-8859-2" },
144 {
"pt" ,
"brazilian.hash" ,
"iso-8859-1" },
145 {
"pt_BR" ,
"brazilian.hash" ,
"iso-8859-1" },
146 {
"pt_PT" ,
"portugues.hash" ,
"iso-8859-1" },
147 {
"ru" ,
"russian.hash" ,
"koi8-r" },
148 {
"ru_MD" ,
"russian.hash" ,
"koi8-r" },
149 {
"ru_RU" ,
"russian.hash" ,
"koi8-r" },
150 {
"sc" ,
"sardinian.hash" ,
"iso-8859-1" },
151 {
"sc_IT" ,
"sardinian.hash" ,
"iso-8859-1" },
152 {
"sk" ,
"slovak.hash" ,
"iso-8859-2" },
153 {
"sk_SK" ,
"slovak.hash" ,
"iso-8859-2" },
154 {
"sl" ,
"slovensko.hash" ,
"iso-8859-2" },
155 {
"sl_SI" ,
"slovensko.hash" ,
"iso-8859-2" },
156 {
"sv" ,
"svenska.hash" ,
"iso-8859-1" },
157 {
"sv_SE" ,
"svenska.hash" ,
"iso-8859-1" },
158 {
"uk" ,
"ukrainian.hash" ,
"koi8-u" },
159 {
"uk_UA" ,
"ukrainian.hash" ,
"koi8-u" },
160 {
"yi" ,
"yiddish-yivo.hash" ,
"utf-8" }
163 static const size_t size_ispell_map = (
sizeof(ispell_map) /
sizeof((ispell_map)[0]) );
164 static TQMap<TQString, TQString> ispell_dict_map;
168 ISpellChecker::try_autodetect_charset(
const char *
const inEncoding)
170 if (inEncoding && strlen(inEncoding))
172 m_translate_in = TQTextCodec::codecForName(inEncoding);
179 ISpellChecker::ISpellChecker()
182 m_bSuccessfulInit(false),
204 memset(m_sflagindex,0,
sizeof(m_sflagindex));
205 memset(m_pflagindex,0,
sizeof(m_pflagindex));
209 #define FREEP(p) do { if (p) free(p); } while (0)
212 ISpellChecker::~ISpellChecker()
214 if (m_bSuccessfulInit) {
217 clearindex (m_pflagindex);
218 clearindex (m_sflagindex);
222 FREEP(m_hashstrings);
226 delete m_translate_in;
231 ISpellChecker::checkWord(
const TQString& utf8Word )
233 ichar_t iWord[INPUTWORDLEN + MAXAFFIXLEN];
234 if (!m_bSuccessfulInit)
237 if (!utf8Word || utf8Word.length() >= (INPUTWORDLEN + MAXAFFIXLEN) || utf8Word.isEmpty())
246 int len_out = utf8Word.length();
248 out = m_translate_in->fromUnicode( utf8Word, len_out );
251 if (!strtoichar(iWord, out.data(), INPUTWORDLEN + MAXAFFIXLEN, 0))
253 if (good(iWord, 0, 0, 1, 0) == 1 ||
254 compoundgood(iWord, 1) == 1)
264 ISpellChecker::suggestWord(
const TQString& utf8Word)
266 ichar_t iWord[INPUTWORDLEN + MAXAFFIXLEN];
269 if (!m_bSuccessfulInit)
270 return TQStringList();
272 if (utf8Word.isEmpty() || utf8Word.length() >= (INPUTWORDLEN + MAXAFFIXLEN) ||
273 utf8Word.length() == 0)
274 return TQStringList();
278 return TQStringList();
283 int len_out = utf8Word.length();
284 out = m_translate_in->fromUnicode( utf8Word, len_out );
287 if (!strtoichar(iWord, out.data(), INPUTWORDLEN + MAXAFFIXLEN, 0))
288 makepossibilities(iWord);
290 return TQStringList();
292 TQStringList sugg_arr;
293 for (c = 0; c < m_pcount; c++)
300 utf8Word = TQString::fromUtf8( m_possibilities[c] );
305 utf8Word = m_translate_in->toUnicode( m_possibilities[c] );
308 sugg_arr.append( utf8Word );
315 s_buildHashNames (std::vector<std::string> & names,
const char * dict)
317 const char * tmp = 0;
322 while ( (tmp = ispell_dirs[i++]) ) {
323 TQCString maybeFile = TQCString( tmp ) +
'/';
325 names.push_back( maybeFile.data() );
332 const char * tmp = 0;
335 while ( (tmp = ispell_dirs[i++]) ) {
337 TQStringList lst = dir.entryList(
"*.hash" );
338 for ( TQStringList::Iterator it = lst.begin(); it != lst.end(); ++it ) {
339 TQFileInfo info( *it );
340 for (
size_t i = 0; i < size_ispell_map; i++)
342 const IspellMap * mapping = (
const IspellMap *)(&(ispell_map[i]));
343 if (!strcmp (info.fileName().latin1(), mapping->dict))
345 ispell_dict_map.insert( mapping->lang, *it );
352 TQValueList<TQString>
353 ISpellChecker::allDics()
355 if ( ispell_dict_map.empty() )
358 return ispell_dict_map.keys();
362 ISpellChecker::loadDictionary (
const char * szdict)
364 std::vector<std::string> dict_names;
366 s_buildHashNames (dict_names, szdict);
368 for (
size_t i = 0; i < dict_names.size(); i++)
370 if (linit(const_cast<char*>(dict_names[i].c_str())) >= 0)
371 return dict_names[i].c_str();
374 return TQString::null;
384 ISpellChecker::loadDictionaryForLanguage (
const char * szLang )
388 const char * encoding = NULL;
389 const char * szFile = NULL;
391 for (
size_t i = 0; i < size_ispell_map; i++)
393 const IspellMap * mapping = (
const IspellMap *)(&(ispell_map[i]));
394 if (!strcmp (szLang, mapping->lang))
396 szFile = mapping->dict;
397 encoding = mapping->enc;
402 if (!szFile || !strlen(szFile))
405 alloc_ispell_struct();
407 hashname = loadDictionary(szFile);
408 if (hashname.isEmpty())
412 setDictionaryEncoding (hashname, encoding);
418 ISpellChecker::setDictionaryEncoding(
const TQString& hashname,
const char * encoding )
421 try_autodetect_charset(encoding);
426 prefstringchar = findfiletype(
"utf8", 1, deftflag < 0 ? &deftflag
427 : static_cast<int *>(NULL));
429 if (prefstringchar < 0)
431 std::string teststring;
432 for(
int n1 = 1; n1 <= 15; n1++)
434 teststring =
"latin" + n1;
435 prefstringchar = findfiletype(teststring.c_str(), 1,
436 deftflag < 0 ? &deftflag : static_cast<int *>(NULL));
437 if (prefstringchar >= 0)
446 prefstringchar = findfiletype(
"utf8", 1, deftflag < 0 ? &deftflag : static_cast<int *>(NULL));
447 if (prefstringchar >= 0)
449 m_translate_in = TQTextCodec::codecForName(
"utf8");
459 for(
int n1 = 1; n1 <= 15; n1++)
461 TQString teststring = TQString(
"latin%1").arg(n1);
462 prefstringchar = findfiletype(teststring.latin1(), 1,
463 deftflag < 0 ? &deftflag : static_cast<int *>(NULL));
464 if (prefstringchar >= 0)
467 m_translate_in = TQTextCodec::codecForName( teststring.latin1() );
476 m_translate_in = TQTextCodec::codecForName(
"latin1");
481 ISpellChecker::requestDictionary(
const char *szLang)
483 if (!loadDictionaryForLanguage (szLang))
486 std::string shortened_dict (szLang);
489 if ((uscore_pos = shortened_dict.rfind (
'_')) != ((
size_t)-1)) {
490 shortened_dict = shortened_dict.substr(0, uscore_pos);
491 if (!loadDictionaryForLanguage (shortened_dict.c_str()))
497 m_bSuccessfulInit =
true;
499 if (prefstringchar < 0)
502 m_defdupchar = prefstringchar;