--- a/extensions/spellcheck/hunspell/src/Makefile.in
+++ b/extensions/spellcheck/hunspell/src/Makefile.in
@@ -65,16 +65,18 @@ CPPSRCS += affentry.cpp \
filemgr.cpp \
hashmgr.cpp \
hunspell.cpp \
hunzip.cpp \
phonet.cpp \
replist.cpp \
suggestmgr.cpp \
$(NULL)
+
+DEFINES = -DHUNSPELL_STATIC
endif
EXTRA_DSO_LDOPTS = \
$(LIBS_DIR) \
$(XPCOM_LIBS) \
$(NSPR_LIBS) \
$(MOZ_UNICHARUTIL_LIBS) \
$(MOZ_HUNSPELL_LIBS) \
--- a/extensions/spellcheck/hunspell/src/README.hunspell
+++ b/extensions/spellcheck/hunspell/src/README.hunspell
@@ -14,32 +14,34 @@
* The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
* and László Németh (Hunspell). Portions created by the Initial Developers
* are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
*
* Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
* David Einstein (deinst@world.std.com)
* László Németh (nemethl@gyorsposta.hu)
* Ryan VanderMeulen (ryanvm@gmail.com)
+* Caolan McNamara (caolanm@redhat.com)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
******* END LICENSE BLOCK *******
-Hunspell Version: 1.2.8
+Hunspell Version: 1.2.12
+Additional Patches: 583582
Hunspell Author: László Németh
MySpell Author: Kevin Hendricks & David Einstein
Hunspell is a spell checker and morphological analyser library. Hunspell
is based on OpenOffice.org's Myspell. Documentation, tests, and examples
are available at http://hunspell.sourceforge.net.
--- a/extensions/spellcheck/hunspell/src/affentry.cpp
+++ b/extensions/spellcheck/hunspell/src/affentry.cpp
@@ -13,16 +13,17 @@
*
* The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
* and László Németh (Hunspell). Portions created by the Initial Developers
* are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
*
* Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
* David Einstein (deinst@world.std.com)
* László Németh (nemethl@gyorsposta.hu)
+ * Caolan McNamara (caolanm@redhat.com)
* Davide Prina
* Giuseppe Modugno
* Gianluca Turconi
* Simon Brouwer
* Noll Janos
* Biro Arpad
* Goldman Eleonora
* Sarlos Tamas
@@ -49,44 +50,30 @@
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
******* END LICENSE BLOCK *******/
-#ifndef MOZILLA_CLIENT
-#include <cstdlib>
-#include <cstring>
-#include <cctype>
-#include <cstdio>
-#else
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <ctype.h>
-#endif
#include "affentry.hxx"
#include "csutil.hxx"
-#ifndef MOZILLA_CLIENT
-#ifndef W32
-using namespace std;
-#endif
-#endif
-
-
PfxEntry::PfxEntry(AffixMgr* pmgr, affentry* dp)
{
// register affix manager
pmyMgr = pmgr;
- // set up its intial values
+ // set up its initial values
aflag = dp->aflag; // flag
strip = dp->strip; // string to strip
appnd = dp->appnd; // string to append
stripl = dp->stripl; // length of strip string
appndl = dp->appndl; // length of append string
numconds = dp->numconds; // length of the condition
opts = dp->opts; // cross product flag
@@ -261,17 +248,17 @@ struct hentry * PfxEntry::checkword(cons
}
// prefix matched but no root word was found
// if aeXPRODUCT is allowed, try again but now
// ross checked combined with a suffix
//if ((opts & aeXPRODUCT) && in_compound) {
if ((opts & aeXPRODUCT)) {
- he = pmyMgr->suffix_check(tmpword, tmpl, aeXPRODUCT, (AffEntry *)this, NULL,
+ he = pmyMgr->suffix_check(tmpword, tmpl, aeXPRODUCT, this, NULL,
0, NULL, FLAG_NULL, needflag, in_compound);
if (he) return he;
}
}
}
return NULL;
}
@@ -310,17 +297,17 @@ struct hentry * PfxEntry::check_twosfx(c
if (test_condition(tmpword)) {
tmpl += stripl;
// prefix matched but no root word was found
// if aeXPRODUCT is allowed, try again but now
// cross checked combined with a suffix
if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
- he = pmyMgr->suffix_check_twosfx(tmpword, tmpl, aeXPRODUCT, (AffEntry *)this, needflag);
+ he = pmyMgr->suffix_check_twosfx(tmpword, tmpl, aeXPRODUCT, this, needflag);
if (he) return he;
}
}
}
return NULL;
}
// check if this prefix entry matches
@@ -358,17 +345,17 @@ char * PfxEntry::check_twosfx_morph(cons
tmpl += stripl;
// prefix matched but no root word was found
// if aeXPRODUCT is allowed, try again but now
// ross checked combined with a suffix
if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
return pmyMgr->suffix_check_twosfx_morph(tmpword, tmpl,
- aeXPRODUCT, (AffEntry *)this, needflag);
+ aeXPRODUCT, this, needflag);
}
}
}
return NULL;
}
// check if this prefix entry matches
char * PfxEntry::check_morph(const char * word, int len, char in_compound, const FLAG needflag)
@@ -442,17 +429,17 @@ char * PfxEntry::check_morph(const char
} while (he);
}
// prefix matched but no root word was found
// if aeXPRODUCT is allowed, try again but now
// ross checked combined with a suffix
if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
- st = pmyMgr->suffix_check_morph(tmpword, tmpl, aeXPRODUCT, (AffEntry *)this,
+ st = pmyMgr->suffix_check_morph(tmpword, tmpl, aeXPRODUCT, this,
FLAG_NULL, needflag);
if (st) {
mystrcat(result, st, MAXLNLEN);
free(st);
}
}
}
}
@@ -461,17 +448,17 @@ char * PfxEntry::check_morph(const char
return NULL;
}
SfxEntry::SfxEntry(AffixMgr * pmgr, affentry* dp)
{
// register affix manager
pmyMgr = pmgr;
- // set up its intial values
+ // set up its initial values
aflag = dp->aflag; // char flag
strip = dp->strip; // string to strip
appnd = dp->appnd; // string to append
stripl = dp->stripl; // length of strip string
appndl = dp->appndl; // length of append string
numconds = dp->numconds; // length of the condition
opts = dp->opts; // cross product flag
@@ -623,24 +610,24 @@ inline int SfxEntry::test_condition(cons
}
}
if (!p) return 1;
}
}
// see if this suffix is present in the word
struct hentry * SfxEntry::checkword(const char * word, int len, int optflags,
- AffEntry* ppfx, char ** wlst, int maxSug, int * ns, const FLAG cclass, const FLAG needflag,
+ PfxEntry* ppfx, char ** wlst, int maxSug, int * ns, const FLAG cclass, const FLAG needflag,
const FLAG badflag)
{
int tmpl; // length of tmpword
struct hentry * he; // hash entry pointer
unsigned char * cp;
char tmpword[MAXWORDUTF8LEN + 4];
- PfxEntry* ep = (PfxEntry *) ppfx;
+ PfxEntry* ep = ppfx;
// if this suffix is being cross checked with a prefix
// but it does not support cross products skip it
if (((optflags & aeXPRODUCT) != 0) && ((opts & aeXPRODUCT) == 0))
return NULL;
// upon entry suffix is 0 length or already matches the end of the word.
@@ -681,19 +668,19 @@ struct hentry * SfxEntry::checkword(cons
fprintf(stdout,"%s %s %c\n", word, tmpword, aflag);
#endif
if ((he = pmyMgr->lookup(tmpword)) != NULL) {
do {
// check conditional suffix (enabled by prefix)
if ((TESTAFF(he->astr, aflag, he->alen) || (ep && ep->getCont() &&
TESTAFF(ep->getCont(), aflag, ep->getContLen()))) &&
(((optflags & aeXPRODUCT) == 0) ||
- TESTAFF(he->astr, ep->getFlag(), he->alen) ||
+ (ep && TESTAFF(he->astr, ep->getFlag(), he->alen)) ||
// enabled by prefix
- ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen))
+ ((contclass) && (ep && TESTAFF(contclass, ep->getFlag(), contclasslen)))
) &&
// handle cont. class
((!cclass) ||
((contclass) && TESTAFF(contclass, cclass, contclasslen))
) &&
// check only in compound homonyms (bad flags)
(!badflag || !TESTAFF(he->astr, badflag, he->alen)
) &&
@@ -725,23 +712,23 @@ struct hentry * SfxEntry::checkword(cons
}
}
}
return NULL;
}
// see if two-level suffix is present in the word
struct hentry * SfxEntry::check_twosfx(const char * word, int len, int optflags,
- AffEntry* ppfx, const FLAG needflag)
+ PfxEntry* ppfx, const FLAG needflag)
{
int tmpl; // length of tmpword
struct hentry * he; // hash entry pointer
unsigned char * cp;
char tmpword[MAXWORDUTF8LEN + 4];
- PfxEntry* ep = (PfxEntry *) ppfx;
+ PfxEntry* ep = ppfx;
// if this suffix is being cross checked with a prefix
// but it does not support cross products skip it
if ((optflags & aeXPRODUCT) != 0 && (opts & aeXPRODUCT) == 0)
return NULL;
@@ -787,22 +774,22 @@ struct hentry * SfxEntry::check_twosfx(c
if (he) return he;
}
}
return NULL;
}
// see if two-level suffix is present in the word
char * SfxEntry::check_twosfx_morph(const char * word, int len, int optflags,
- AffEntry* ppfx, const FLAG needflag)
+ PfxEntry* ppfx, const FLAG needflag)
{
int tmpl; // length of tmpword
unsigned char * cp;
char tmpword[MAXWORDUTF8LEN + 4];
- PfxEntry* ep = (PfxEntry *) ppfx;
+ PfxEntry* ep = ppfx;
char * st;
char result[MAXLNLEN];
*result = '\0';
// if this suffix is being cross checked with a prefix
// but it does not support cross products skip it
@@ -840,18 +827,18 @@ char * SfxEntry::check_twosfx_morph(cons
// if all conditions are met then recall suffix_check
if (test_condition((char *) cp, (char *) tmpword)) {
if (ppfx) {
// handle conditional suffix
if ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen)) {
st = pmyMgr->suffix_check_morph(tmpword, tmpl, 0, NULL, aflag, needflag);
if (st) {
- if (((PfxEntry *) ppfx)->getMorph()) {
- mystrcat(result, ((PfxEntry *) ppfx)->getMorph(), MAXLNLEN);
+ if (ppfx->getMorph()) {
+ mystrcat(result, ppfx->getMorph(), MAXLNLEN);
mystrcat(result, " ", MAXLNLEN);
}
mystrcat(result,st, MAXLNLEN);
free(st);
mychomp(result);
}
} else {
st = pmyMgr->suffix_check_morph(tmpword, tmpl, optflags, ppfx, aflag, needflag);
@@ -871,20 +858,20 @@ char * SfxEntry::check_twosfx_morph(cons
}
if (*result) return mystrdup(result);
}
}
return NULL;
}
// get next homonym with same affix
-struct hentry * SfxEntry::get_next_homonym(struct hentry * he, int optflags, AffEntry* ppfx,
+struct hentry * SfxEntry::get_next_homonym(struct hentry * he, int optflags, PfxEntry* ppfx,
const FLAG cclass, const FLAG needflag)
{
- PfxEntry* ep = (PfxEntry *) ppfx;
+ PfxEntry* ep = ppfx;
FLAG eFlag = ep ? ep->getFlag() : FLAG_NULL;
while (he->next_homonym) {
he = he->next_homonym;
if ((TESTAFF(he->astr, aflag, he->alen) || (ep && ep->getCont() && TESTAFF(ep->getCont(), aflag, ep->getContLen()))) &&
((optflags & aeXPRODUCT) == 0 ||
TESTAFF(he->astr, eFlag, he->alen) ||
// handle conditional suffix
@@ -1021,8 +1008,9 @@ first two affentries for the suffix D de
where X is all characters *but* a, e, i, o, or u
conds['y'] = (1 << 1) (the last char must be a y)
all other bits for all other entries in the conds array are zero
#endif
+
--- a/extensions/spellcheck/hunspell/src/affentry.hxx
+++ b/extensions/spellcheck/hunspell/src/affentry.hxx
@@ -13,16 +13,17 @@
*
* The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
* and László Németh (Hunspell). Portions created by the Initial Developers
* are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
*
* Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
* David Einstein (deinst@world.std.com)
* László Németh (nemethl@gyorsposta.hu)
+ * Caolan McNamara (caolanm@redhat.com)
* Davide Prina
* Giuseppe Modugno
* Gianluca Turconi
* Simon Brouwer
* Noll Janos
* Biro Arpad
* Goldman Eleonora
* Sarlos Tamas
@@ -52,23 +53,25 @@
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
******* END LICENSE BLOCK *******/
#ifndef _AFFIX_HXX_
#define _AFFIX_HXX_
-#include "affixmgr.hxx"
+#include "hunvisapi.h"
+
#include "atypes.hxx"
#include "baseaffix.hxx"
+#include "affixmgr.hxx"
/* A Prefix Entry */
-class PfxEntry : public AffEntry
+class LIBHUNSPELL_DLL_EXPORTED PfxEntry : protected AffEntry
{
AffixMgr* pmyMgr;
PfxEntry * next;
PfxEntry * nexteq;
PfxEntry * nextne;
PfxEntry * flgnxt;
@@ -114,17 +117,17 @@ public:
inline int test_condition(const char * st);
};
/* A Suffix Entry */
-class SfxEntry : public AffEntry
+class LIBHUNSPELL_DLL_EXPORTED SfxEntry : protected AffEntry
{
AffixMgr* pmyMgr;
char * rappnd;
SfxEntry * next;
SfxEntry * nexteq;
SfxEntry * nextne;
SfxEntry * flgnxt;
@@ -135,26 +138,26 @@ class SfxEntry : public AffEntry
public:
SfxEntry(AffixMgr* pmgr, affentry* dp );
~SfxEntry();
inline bool allowCross() { return ((opts & aeXPRODUCT) != 0); }
struct hentry * checkword(const char * word, int len, int optflags,
- AffEntry* ppfx, char ** wlst, int maxSug, int * ns,
+ PfxEntry* ppfx, char ** wlst, int maxSug, int * ns,
// const FLAG cclass = FLAG_NULL, const FLAG needflag = FLAG_NULL, char in_compound=IN_CPD_NOT);
const FLAG cclass = FLAG_NULL, const FLAG needflag = FLAG_NULL, const FLAG badflag = 0);
- struct hentry * check_twosfx(const char * word, int len, int optflags, AffEntry* ppfx, const FLAG needflag = NULL);
+ struct hentry * check_twosfx(const char * word, int len, int optflags, PfxEntry* ppfx, const FLAG needflag = NULL);
char * check_twosfx_morph(const char * word, int len, int optflags,
- AffEntry* ppfx, const FLAG needflag = FLAG_NULL);
+ PfxEntry* ppfx, const FLAG needflag = FLAG_NULL);
struct hentry * get_next_homonym(struct hentry * he);
- struct hentry * get_next_homonym(struct hentry * word, int optflags, AffEntry* ppfx,
+ struct hentry * get_next_homonym(struct hentry * word, int optflags, PfxEntry* ppfx,
const FLAG cclass, const FLAG needflag);
inline FLAG getFlag() { return aflag; }
inline const char * getKey() { return rappnd; }
char * add(const char * word, int len);
@@ -181,8 +184,10 @@ public:
inline void setFlgNxt(SfxEntry * ptr) { flgnxt = ptr; }
inline char * nextchar(char * p);
inline int test_condition(const char * st, const char * begin);
};
#endif
+
+
--- a/extensions/spellcheck/hunspell/src/affixmgr.cpp
+++ b/extensions/spellcheck/hunspell/src/affixmgr.cpp
@@ -13,16 +13,17 @@
*
* The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
* and László Németh (Hunspell). Portions created by the Initial Developers
* are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
*
* Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
* David Einstein (deinst@world.std.com)
* László Németh (nemethl@gyorsposta.hu)
+ * Caolan McNamara (caolanm@redhat.com)
* Davide Prina
* Giuseppe Modugno
* Gianluca Turconi
* Simon Brouwer
* Noll Janos
* Biro Arpad
* Goldman Eleonora
* Sarlos Tamas
@@ -49,48 +50,39 @@
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
******* END LICENSE BLOCK *******/
-#ifndef MOZILLA_CLIENT
-#include <cstdlib>
-#include <cstring>
-#include <cctype>
-#include <cstdio>
-#else
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <ctype.h>
-#endif
-
+
+#include <vector>
+
+#include "affixmgr.hxx"
#include "affentry.hxx"
-#include "affixmgr.hxx"
-#include "csutil.hxx"
#include "langnum.hxx"
-#ifndef MOZILLA_CLIENT
-#ifndef W32
-using namespace std;
-#endif
-#endif
+#include "csutil.hxx"
AffixMgr::AffixMgr(const char * affpath, HashMgr** ptr, int * md, const char * key)
{
// register hash manager and load affix data from aff file
pHMgr = ptr[0];
alldic = ptr;
maxdic = md;
keystring = NULL;
trystring = NULL;
encoding=NULL;
+ csconv=NULL;
utf8 = 0;
complexprefixes = 0;
maptable = NULL;
nummap = 0;
breaktable = NULL;
numbreak = 0;
reptable = NULL;
numrep = 0;
@@ -171,34 +163,33 @@ AffixMgr::AffixMgr(const char * affpath,
if (cpdmin == -1) cpdmin = MINCPDLEN;
}
AffixMgr::~AffixMgr()
{
-
// pass through linked prefix entries and clean up
for (int i=0; i < SETSIZE ;i++) {
pFlag[i] = NULL;
- PfxEntry * ptr = (PfxEntry *)pStart[i];
+ PfxEntry * ptr = pStart[i];
PfxEntry * nptr = NULL;
while (ptr) {
nptr = ptr->getNext();
delete(ptr);
ptr = nptr;
nptr = NULL;
}
}
// pass through linked suffix entries and clean up
for (int j=0; j < SETSIZE ; j++) {
sFlag[j] = NULL;
- SfxEntry * ptr = (SfxEntry *)sStart[j];
+ SfxEntry * ptr = sStart[j];
SfxEntry * nptr = NULL;
while (ptr) {
nptr = ptr->getNext();
delete(ptr);
ptr = nptr;
nptr = NULL;
}
sStart[j] = NULL;
@@ -207,18 +198,20 @@ AffixMgr::~AffixMgr()
if (keystring) free(keystring);
keystring=NULL;
if (trystring) free(trystring);
trystring=NULL;
if (encoding) free(encoding);
encoding=NULL;
if (maptable) {
for (int j=0; j < nummap; j++) {
- if (maptable[j].set) free(maptable[j].set);
- if (maptable[j].set_utf16) free(maptable[j].set_utf16);
+ for (int k=0; k < maptable[j].len; k++) {
+ if (maptable[j].set[k]) free(maptable[j].set[k]);
+ }
+ free(maptable[j].set);
maptable[j].set = NULL;
maptable[j].len = 0;
}
free(maptable);
maptable = NULL;
}
nummap = 0;
if (breaktable) {
@@ -296,16 +289,19 @@ AffixMgr::~AffixMgr()
free_utf_tbl();
if (lang) free(lang);
if (wordchars) free(wordchars);
if (wordchars_utf16) free(wordchars_utf16);
if (ignorechars) free(ignorechars);
if (ignorechars_utf16) free(ignorechars_utf16);
if (version) free(version);
checknum=0;
+#ifdef MOZILLA_CLIENT
+ delete [] csconv;
+#endif
}
// read in aff file and build up prefix and suffix entry objects
int AffixMgr::parse_file(const char * affpath, const char * key)
{
char * line; // io buffers
char ft; // affix type
@@ -330,19 +326,19 @@ int AffixMgr::parse_file(const char * a
// read in each line ignoring any that do not
// start with a known line type indicator
while ((line = afflst->getline())) {
mychomp(line);
/* remove byte order mark */
if (firstline) {
firstline = 0;
+ // Affix file begins with byte order mark: possible incompatibility with old Hunspell versions
if (strncmp(line,"\xEF\xBB\xBF",3) == 0) {
memmove(line, line+3, strlen(line+3)+1);
- HUNSPELL_WARNING(stderr, "warning: affix file begins with byte order mark: possible incompatibility with old Hunspell versions\n");
}
}
/* parse in the keyboard string */
if (strncmp(line,"KEY",3) == 0) {
if (parse_string(line, &keystring, afflst->getlinenum())) {
delete afflst;
return 1;
@@ -693,17 +689,17 @@ int AffixMgr::parse_file(const char * a
}
/* parse this affix: P - prefix, S - suffix */
ft = ' ';
if (strncmp(line,"PFX",3) == 0) ft = complexprefixes ? 'S' : 'P';
if (strncmp(line,"SFX",3) == 0) ft = complexprefixes ? 'P' : 'S';
if (ft != ' ') {
if (dupflags_ini) {
- for (int i = 0; i < CONTSIZE; i++) dupflags[i] = 0;
+ memset(dupflags, 0, sizeof(dupflags));
dupflags_ini = 0;
}
if (parse_affix(line, ft, afflst, dupflags)) {
delete afflst;
process_pfx_tree_to_list();
process_sfx_tree_to_list();
return 1;
}
@@ -731,17 +727,17 @@ int AffixMgr::parse_file(const char * a
// Then to top this off why not examine the first char of the word to quickly
// limit the set of prefixes to examine (i.e. the prefixes to examine must
// be leading supersets of the first character of the word (if they exist)
// To take advantage of this "subset" relationship, we need to add two links
// from entry. One to take next if the current prefix is found (call it nexteq)
// and one to take next if the current prefix is not found (call it nextne).
- // Since we have built ordered lists, all that remains is to properly intialize
+ // Since we have built ordered lists, all that remains is to properly initialize
// the nextne and nexteq pointers that relate them
process_pfx_order();
process_sfx_order();
/* get encoding for CHECKCOMPOUNDCASE */
if (!utf8) {
char * enc = get_encoding();
@@ -778,51 +774,51 @@ int AffixMgr::parse_file(const char * a
return 0;
}
// we want to be able to quickly access prefix information
// both by prefix flag, and sorted by prefix string itself
// so we need to set up two indexes
-int AffixMgr::build_pfxtree(AffEntry* pfxptr)
+int AffixMgr::build_pfxtree(PfxEntry* pfxptr)
{
PfxEntry * ptr;
PfxEntry * pptr;
- PfxEntry * ep = (PfxEntry*) pfxptr;
+ PfxEntry * ep = pfxptr;
// get the right starting points
const char * key = ep->getKey();
const unsigned char flg = (unsigned char) (ep->getFlag() & 0x00FF);
// first index by flag which must exist
- ptr = (PfxEntry*)pFlag[flg];
+ ptr = pFlag[flg];
ep->setFlgNxt(ptr);
- pFlag[flg] = (AffEntry *) ep;
+ pFlag[flg] = ep;
// handle the special case of null affix string
if (strlen(key) == 0) {
// always inset them at head of list at element 0
- ptr = (PfxEntry*)pStart[0];
+ ptr = pStart[0];
ep->setNext(ptr);
- pStart[0] = (AffEntry*)ep;
+ pStart[0] = ep;
return 0;
}
// now handle the normal case
ep->setNextEQ(NULL);
ep->setNextNE(NULL);
unsigned char sp = *((const unsigned char *)key);
- ptr = (PfxEntry*)pStart[sp];
+ ptr = pStart[sp];
// handle the first insert
if (!ptr) {
- pStart[sp] = (AffEntry*)ep;
+ pStart[sp] = ep;
return 0;
}
// otherwise use binary tree insertion so that a sorted
// list can easily be generated later
pptr = NULL;
for (;;) {
@@ -842,52 +838,52 @@ int AffixMgr::build_pfxtree(AffEntry* pf
}
}
return 0;
}
// we want to be able to quickly access suffix information
// both by suffix flag, and sorted by the reverse of the
// suffix string itself; so we need to set up two indexes
-int AffixMgr::build_sfxtree(AffEntry* sfxptr)
+int AffixMgr::build_sfxtree(SfxEntry* sfxptr)
{
SfxEntry * ptr;
SfxEntry * pptr;
- SfxEntry * ep = (SfxEntry *) sfxptr;
+ SfxEntry * ep = sfxptr;
/* get the right starting point */
const char * key = ep->getKey();
const unsigned char flg = (unsigned char) (ep->getFlag() & 0x00FF);
// first index by flag which must exist
- ptr = (SfxEntry*)sFlag[flg];
+ ptr = sFlag[flg];
ep->setFlgNxt(ptr);
- sFlag[flg] = (AffEntry *) ep;
+ sFlag[flg] = ep;
// next index by affix string
// handle the special case of null affix string
if (strlen(key) == 0) {
// always inset them at head of list at element 0
- ptr = (SfxEntry*)sStart[0];
+ ptr = sStart[0];
ep->setNext(ptr);
- sStart[0] = (AffEntry*)ep;
+ sStart[0] = ep;
return 0;
}
// now handle the normal case
ep->setNextEQ(NULL);
ep->setNextNE(NULL);
unsigned char sp = *((const unsigned char *)key);
- ptr = (SfxEntry*)sStart[sp];
+ ptr = sStart[sp];
// handle the first insert
if (!ptr) {
- sStart[sp] = (AffEntry*)ep;
+ sStart[sp] = ep;
return 0;
}
// otherwise use binary tree insertion so that a sorted
// list can easily be generated later
pptr = NULL;
for (;;) {
pptr = ptr;
@@ -913,57 +909,57 @@ int AffixMgr::process_pfx_tree_to_list()
{
for (int i=1; i< SETSIZE; i++) {
pStart[i] = process_pfx_in_order(pStart[i],NULL);
}
return 0;
}
-AffEntry* AffixMgr::process_pfx_in_order(AffEntry* ptr, AffEntry* nptr)
+PfxEntry* AffixMgr::process_pfx_in_order(PfxEntry* ptr, PfxEntry* nptr)
{
if (ptr) {
- nptr = process_pfx_in_order(((PfxEntry*) ptr)->getNextNE(), nptr);
- ((PfxEntry*) ptr)->setNext((PfxEntry*) nptr);
- nptr = process_pfx_in_order(((PfxEntry*) ptr)->getNextEQ(), ptr);
+ nptr = process_pfx_in_order(ptr->getNextNE(), nptr);
+ ptr->setNext(nptr);
+ nptr = process_pfx_in_order(ptr->getNextEQ(), ptr);
}
return nptr;
}
// convert from binary tree to sorted list
int AffixMgr:: process_sfx_tree_to_list()
{
for (int i=1; i< SETSIZE; i++) {
sStart[i] = process_sfx_in_order(sStart[i],NULL);
}
return 0;
}
-AffEntry* AffixMgr::process_sfx_in_order(AffEntry* ptr, AffEntry* nptr)
+SfxEntry* AffixMgr::process_sfx_in_order(SfxEntry* ptr, SfxEntry* nptr)
{
if (ptr) {
- nptr = process_sfx_in_order(((SfxEntry*) ptr)->getNextNE(), nptr);
- ((SfxEntry*) ptr)->setNext((SfxEntry*) nptr);
- nptr = process_sfx_in_order(((SfxEntry*) ptr)->getNextEQ(), ptr);
+ nptr = process_sfx_in_order(ptr->getNextNE(), nptr);
+ ptr->setNext(nptr);
+ nptr = process_sfx_in_order(ptr->getNextEQ(), ptr);
}
return nptr;
}
// reinitialize the PfxEntry links NextEQ and NextNE to speed searching
// using the idea of leading subsets this time
int AffixMgr::process_pfx_order()
{
PfxEntry* ptr;
// loop through each prefix list starting point
for (int i=1; i < SETSIZE; i++) {
- ptr = (PfxEntry*)pStart[i];
+ ptr = pStart[i];
// look through the remainder of the list
// and find next entry with affix that
// the current one is not a subset of
// mark that as destination for NextNE
// use next in list that you are a subset
// of as NextEQ
@@ -979,17 +975,17 @@ int AffixMgr::process_pfx_order()
ptr->setNextEQ(ptr->getNext());
}
// now clean up by adding smart search termination strings:
// if you are already a superset of the previous prefix
// but not a subset of the next, search can end here
// so set NextNE properly
- ptr = (PfxEntry *) pStart[i];
+ ptr = pStart[i];
for (; ptr != NULL; ptr = ptr->getNext()) {
PfxEntry * nptr = ptr->getNext();
PfxEntry * mptr = NULL;
for (; nptr != NULL; nptr = nptr->getNext()) {
if (! isSubset(ptr->getKey(),nptr->getKey())) break;
mptr = nptr;
}
if (mptr) mptr->setNextNE(NULL);
@@ -1002,17 +998,17 @@ int AffixMgr::process_pfx_order()
// using the idea of leading subsets this time
int AffixMgr::process_sfx_order()
{
SfxEntry* ptr;
// loop through each prefix list starting point
for (int i=1; i < SETSIZE; i++) {
- ptr = (SfxEntry *) sStart[i];
+ ptr = sStart[i];
// look through the remainder of the list
// and find next entry with affix that
// the current one is not a subset of
// mark that as destination for NextNE
// use next in list that you are a subset
// of as NextEQ
@@ -1028,17 +1024,17 @@ int AffixMgr::process_sfx_order()
}
// now clean up by adding smart search termination strings:
// if you are already a superset of the previous suffix
// but not a subset of the next, search can end here
// so set NextNE properly
- ptr = (SfxEntry *) sStart[i];
+ ptr = sStart[i];
for (; ptr != NULL; ptr = ptr->getNext()) {
SfxEntry * nptr = ptr->getNext();
SfxEntry * mptr = NULL;
for (; nptr != NULL; nptr = nptr->getNext()) {
if (! isSubset(ptr->getKey(),nptr->getKey())) break;
mptr = nptr;
}
if (mptr) mptr->setNextNE(NULL);
@@ -1069,30 +1065,30 @@ int AffixMgr::condlen(char * st)
l++;
} else if (*st == ']') group = false;
else if (!group && (!utf8 ||
(!(*st & 0x80) || ((*st & 0xc0) == 0x80)))) l++;
}
return l;
}
-int AffixMgr::encodeit(struct affentry * ptr, char * cs)
+int AffixMgr::encodeit(affentry &entry, char * cs)
{
if (strcmp(cs,".") != 0) {
- ptr->numconds = (char) condlen(cs);
- strncpy(ptr->c.conds, cs, MAXCONDLEN);
+ entry.numconds = (char) condlen(cs);
+ strncpy(entry.c.conds, cs, MAXCONDLEN);
// long condition (end of conds padded by strncpy)
- if (ptr->c.conds[MAXCONDLEN - 1] && cs[MAXCONDLEN]) {
- ptr->opts += aeLONGCOND;
- ptr->c.l.conds2 = mystrdup(cs + MAXCONDLEN_1);
- if (!ptr->c.l.conds2) return 1;
+ if (entry.c.conds[MAXCONDLEN - 1] && cs[MAXCONDLEN]) {
+ entry.opts += aeLONGCOND;
+ entry.c.l.conds2 = mystrdup(cs + MAXCONDLEN_1);
+ if (!entry.c.l.conds2) return 1;
}
} else {
- ptr->numconds = 0;
- ptr->c.conds[0] = '\0';
+ entry.numconds = 0;
+ entry.c.conds[0] = '\0';
}
return 0;
}
// return 1 if s1 is a leading subset of s2 (dots are for infixes)
inline int AffixMgr::isSubset(const char * s1, const char * s2)
{
while (((*s1 == *s2) || (*s1 == '.')) && (*s1 != '\0')) {
@@ -1109,54 +1105,54 @@ struct hentry * AffixMgr::prefix_check(c
{
struct hentry * rv= NULL;
pfx = NULL;
pfxappnd = NULL;
sfxappnd = NULL;
// first handle the special case of 0 length prefixes
- PfxEntry * pe = (PfxEntry *) pStart[0];
+ PfxEntry * pe = pStart[0];
while (pe) {
if (
// fogemorpheme
((in_compound != IN_CPD_NOT) || !(pe->getCont() &&
(TESTAFF(pe->getCont(), onlyincompound, pe->getContLen())))) &&
// permit prefixes in compounds
((in_compound != IN_CPD_END) || (pe->getCont() &&
(TESTAFF(pe->getCont(), compoundpermitflag, pe->getContLen()))))
) {
// check prefix
rv = pe->checkword(word, len, in_compound, needflag);
if (rv) {
- pfx=(AffEntry *)pe; // BUG: pfx not stateless
+ pfx=pe; // BUG: pfx not stateless
return rv;
}
}
pe = pe->getNext();
}
// now handle the general case
unsigned char sp = *((const unsigned char *)word);
- PfxEntry * pptr = (PfxEntry *)pStart[sp];
+ PfxEntry * pptr = pStart[sp];
while (pptr) {
if (isSubset(pptr->getKey(),word)) {
if (
// fogemorpheme
((in_compound != IN_CPD_NOT) || !(pptr->getCont() &&
(TESTAFF(pptr->getCont(), onlyincompound, pptr->getContLen())))) &&
// permit prefixes in compounds
((in_compound != IN_CPD_END) || (pptr->getCont() &&
(TESTAFF(pptr->getCont(), compoundpermitflag, pptr->getContLen()))))
) {
// check prefix
rv = pptr->checkword(word, len, in_compound, needflag);
if (rv) {
- pfx=(AffEntry *)pptr; // BUG: pfx not stateless
+ pfx=pptr; // BUG: pfx not stateless
return rv;
}
}
pptr = pptr->getNextEQ();
} else {
pptr = pptr->getNextNE();
}
}
@@ -1169,33 +1165,33 @@ struct hentry * AffixMgr::prefix_check_t
char in_compound, const FLAG needflag)
{
struct hentry * rv= NULL;
pfx = NULL;
sfxappnd = NULL;
// first handle the special case of 0 length prefixes
- PfxEntry * pe = (PfxEntry *) pStart[0];
+ PfxEntry * pe = pStart[0];
while (pe) {
rv = pe->check_twosfx(word, len, in_compound, needflag);
if (rv) return rv;
pe = pe->getNext();
}
// now handle the general case
unsigned char sp = *((const unsigned char *)word);
- PfxEntry * pptr = (PfxEntry *)pStart[sp];
+ PfxEntry * pptr = pStart[sp];
while (pptr) {
if (isSubset(pptr->getKey(),word)) {
rv = pptr->check_twosfx(word, len, in_compound, needflag);
if (rv) {
- pfx = (AffEntry *)pptr;
+ pfx = pptr;
return rv;
}
pptr = pptr->getNextEQ();
} else {
pptr = pptr->getNextNE();
}
}
@@ -1210,40 +1206,40 @@ char * AffixMgr::prefix_check_morph(cons
char result[MAXLNLEN];
result[0] = '\0';
pfx = NULL;
sfxappnd = NULL;
// first handle the special case of 0 length prefixes
- PfxEntry * pe = (PfxEntry *) pStart[0];
+ PfxEntry * pe = pStart[0];
while (pe) {
st = pe->check_morph(word,len,in_compound, needflag);
if (st) {
mystrcat(result, st, MAXLNLEN);
free(st);
}
// if (rv) return rv;
pe = pe->getNext();
}
// now handle the general case
unsigned char sp = *((const unsigned char *)word);
- PfxEntry * pptr = (PfxEntry *)pStart[sp];
+ PfxEntry * pptr = pStart[sp];
while (pptr) {
if (isSubset(pptr->getKey(),word)) {
st = pptr->check_morph(word,len,in_compound, needflag);
if (st) {
// fogemorpheme
if ((in_compound != IN_CPD_NOT) || !((pptr->getCont() &&
(TESTAFF(pptr->getCont(), onlyincompound, pptr->getContLen()))))) {
mystrcat(result, st, MAXLNLEN);
- pfx = (AffEntry *)pptr;
+ pfx = pptr;
}
free(st);
}
pptr = pptr->getNextEQ();
} else {
pptr = pptr->getNextNE();
}
}
@@ -1261,37 +1257,37 @@ char * AffixMgr::prefix_check_twosfx_mor
char result[MAXLNLEN];
result[0] = '\0';
pfx = NULL;
sfxappnd = NULL;
// first handle the special case of 0 length prefixes
- PfxEntry * pe = (PfxEntry *) pStart[0];
+ PfxEntry * pe = pStart[0];
while (pe) {
st = pe->check_twosfx_morph(word,len,in_compound, needflag);
if (st) {
mystrcat(result, st, MAXLNLEN);
free(st);
}
pe = pe->getNext();
}
// now handle the general case
unsigned char sp = *((const unsigned char *)word);
- PfxEntry * pptr = (PfxEntry *)pStart[sp];
+ PfxEntry * pptr = pStart[sp];
while (pptr) {
if (isSubset(pptr->getKey(),word)) {
st = pptr->check_twosfx_morph(word, len, in_compound, needflag);
if (st) {
mystrcat(result, st, MAXLNLEN);
free(st);
- pfx = (AffEntry *)pptr;
+ pfx = pptr;
}
pptr = pptr->getNextEQ();
} else {
pptr = pptr->getNextNE();
}
}
if (*result) return mystrdup(result);
@@ -1371,16 +1367,21 @@ int AffixMgr::defcpd_check(hentry *** wo
int i, j;
int ok;
int w = 0;
if (!*words) {
w = 1;
*words = def;
}
+
+ if (!*words) {
+ return 0;
+ }
+
(*words)[wnum] = rv;
// has the last word COMPOUNDRULE flag?
if (rv->alen == 0) {
(*words)[wnum] = NULL;
if (w) *words = NULL;
return 0;
}
@@ -1606,21 +1607,21 @@ struct hentry * AffixMgr::compound_check
rv = rv->next_homonym;
}
if (!rv) {
if (compoundflag &&
!(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundflag))) {
if ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL,
FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) && !hu_mov_rule &&
- ((SfxEntry*)sfx)->getCont() &&
- ((compoundforbidflag && TESTAFF(((SfxEntry*)sfx)->getCont(), compoundforbidflag,
- ((SfxEntry*)sfx)->getContLen())) || (compoundend &&
- TESTAFF(((SfxEntry*)sfx)->getCont(), compoundend,
- ((SfxEntry*)sfx)->getContLen())))) {
+ sfx->getCont() &&
+ ((compoundforbidflag && TESTAFF(sfx->getCont(), compoundforbidflag,
+ sfx->getContLen())) || (compoundend &&
+ TESTAFF(sfx->getCont(), compoundend,
+ sfx->getContLen())))) {
rv = NULL;
}
}
if (rv ||
(((wordnum == 0) && compoundbegin &&
((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundbegin, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundbegin)))) ||
@@ -1634,44 +1635,44 @@ struct hentry * AffixMgr::compound_check
(is_sug && nosuggest && TESTAFF(rv->astr, nosuggest, rv->alen))
)) {
st[i] = ch;
continue;
}
// check non_compound flag in suffix and prefix
if ((rv) && !hu_mov_rule &&
- ((pfx && ((PfxEntry*)pfx)->getCont() &&
- TESTAFF(((PfxEntry*)pfx)->getCont(), compoundforbidflag,
- ((PfxEntry*)pfx)->getContLen())) ||
- (sfx && ((SfxEntry*)sfx)->getCont() &&
- TESTAFF(((SfxEntry*)sfx)->getCont(), compoundforbidflag,
- ((SfxEntry*)sfx)->getContLen())))) {
+ ((pfx && pfx->getCont() &&
+ TESTAFF(pfx->getCont(), compoundforbidflag,
+ pfx->getContLen())) ||
+ (sfx && sfx->getCont() &&
+ TESTAFF(sfx->getCont(), compoundforbidflag,
+ sfx->getContLen())))) {
rv = NULL;
}
// check compoundend flag in suffix and prefix
if ((rv) && !checked_prefix && compoundend && !hu_mov_rule &&
- ((pfx && ((PfxEntry*)pfx)->getCont() &&
- TESTAFF(((PfxEntry*)pfx)->getCont(), compoundend,
- ((PfxEntry*)pfx)->getContLen())) ||
- (sfx && ((SfxEntry*)sfx)->getCont() &&
- TESTAFF(((SfxEntry*)sfx)->getCont(), compoundend,
- ((SfxEntry*)sfx)->getContLen())))) {
+ ((pfx && pfx->getCont() &&
+ TESTAFF(pfx->getCont(), compoundend,
+ pfx->getContLen())) ||
+ (sfx && sfx->getCont() &&
+ TESTAFF(sfx->getCont(), compoundend,
+ sfx->getContLen())))) {
rv = NULL;
}
// check compoundmiddle flag in suffix and prefix
if ((rv) && !checked_prefix && (wordnum==0) && compoundmiddle && !hu_mov_rule &&
- ((pfx && ((PfxEntry*)pfx)->getCont() &&
- TESTAFF(((PfxEntry*)pfx)->getCont(), compoundmiddle,
- ((PfxEntry*)pfx)->getContLen())) ||
- (sfx && ((SfxEntry*)sfx)->getCont() &&
- TESTAFF(((SfxEntry*)sfx)->getCont(), compoundmiddle,
- ((SfxEntry*)sfx)->getContLen())))) {
+ ((pfx && pfx->getCont() &&
+ TESTAFF(pfx->getCont(), compoundmiddle,
+ pfx->getContLen())) ||
+ (sfx && sfx->getCont() &&
+ TESTAFF(sfx->getCont(), compoundmiddle,
+ sfx->getContLen())))) {
rv = NULL;
}
// check forbiddenwords
if ((rv) && (rv->astr) && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
(is_sug && nosuggest && TESTAFF(rv->astr, nosuggest, rv->alen)))) {
return NULL;
}
@@ -1711,31 +1712,31 @@ struct hentry * AffixMgr::compound_check
)
) ||
(
checkcompoundcase && scpd == 0 && !words && cpdcase_check(word, i)
))
)
// LANG_hu section: spec. Hungarian rule
|| ((!rv) && (langnum == LANG_hu) && hu_mov_rule && (rv = affix_check(st,i)) &&
- (sfx && ((SfxEntry*)sfx)->getCont() && ( // XXX hardwired Hungarian dic. codes
- TESTAFF(((SfxEntry*)sfx)->getCont(), (unsigned short) 'x', ((SfxEntry*)sfx)->getContLen()) ||
- TESTAFF(((SfxEntry*)sfx)->getCont(), (unsigned short) '%', ((SfxEntry*)sfx)->getContLen())
+ (sfx && sfx->getCont() && ( // XXX hardwired Hungarian dic. codes
+ TESTAFF(sfx->getCont(), (unsigned short) 'x', sfx->getContLen()) ||
+ TESTAFF(sfx->getCont(), (unsigned short) '%', sfx->getContLen())
)
)
)
) { // first word is ok condition
// LANG_hu section: spec. Hungarian rule
if (langnum == LANG_hu) {
// calculate syllable number of the word
numsyllable += get_syllable(st, i);
// + 1 word, if syllable number of the prefix > 1 (hungarian convention)
- if (pfx && (get_syllable(((PfxEntry *)pfx)->getKey(),strlen(((PfxEntry *)pfx)->getKey())) > 1)) wordnum++;
+ if (pfx && (get_syllable(pfx->getKey(),strlen(pfx->getKey())) > 1)) wordnum++;
}
// END of LANG_hu section
// NEXT WORD(S)
rv_first = rv;
st[i] = ch;
@@ -1837,22 +1838,22 @@ struct hentry * AffixMgr::compound_check
if (rv && !(scpd == 0 || checkcpdtable[scpd-1].cond2 == FLAG_NULL ||
TESTAFF(rv->astr, checkcpdtable[scpd-1].cond2, rv->alen))) rv = NULL;
// test CHECKCOMPOUNDPATTERN conditions (forbidden compounds)
if (rv && numcheckcpd && scpd == 0 && cpdpat_check(word, i, rv_first, rv)) rv = NULL;
// check non_compound flag in suffix and prefix
if ((rv) &&
- ((pfx && ((PfxEntry*)pfx)->getCont() &&
- TESTAFF(((PfxEntry*)pfx)->getCont(), compoundforbidflag,
- ((PfxEntry*)pfx)->getContLen())) ||
- (sfx && ((SfxEntry*)sfx)->getCont() &&
- TESTAFF(((SfxEntry*)sfx)->getCont(), compoundforbidflag,
- ((SfxEntry*)sfx)->getContLen())))) {
+ ((pfx && pfx->getCont() &&
+ TESTAFF(pfx->getCont(), compoundforbidflag,
+ pfx->getContLen())) ||
+ (sfx && sfx->getCont() &&
+ TESTAFF(sfx->getCont(), compoundforbidflag,
+ sfx->getContLen())))) {
rv = NULL;
}
// check forbiddenwords
if ((rv) && (rv->astr) && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
(is_sug && nosuggest && TESTAFF(rv->astr, nosuggest, rv->alen)))) return NULL;
// pfxappnd = prefix of word+i, or NULL
@@ -1868,17 +1869,17 @@ struct hentry * AffixMgr::compound_check
// XXX only second suffix (inflections, not derivations)
if (sfxappnd) {
char * tmp = myrevstrdup(sfxappnd);
numsyllable -= get_syllable(tmp, strlen(tmp));
free(tmp);
}
// + 1 word, if syllable number of the prefix > 1 (hungarian convention)
- if (pfx && (get_syllable(((PfxEntry *)pfx)->getKey(),strlen(((PfxEntry *)pfx)->getKey())) > 1)) wordnum++;
+ if (pfx && (get_syllable(pfx->getKey(),strlen(pfx->getKey())) > 1)) wordnum++;
// increment syllable num, if last word has a SYLLABLENUM flag
// and the suffix is beginning `s'
if (cpdsyllablenum) {
switch (sfxflag) {
case 'c': { numsyllable+=2; break; }
case 'J': { numsyllable += 1; break; }
@@ -1913,18 +1914,18 @@ struct hentry * AffixMgr::compound_check
numsyllable = oldnumsyllable2;
wordnum = oldwordnum2;
// perhaps second word is a compound word (recursive call)
if (wordnum < maxwordnum) {
rv = compound_check((st+i),strlen(st+i), wordnum+1,
numsyllable, maxwordnum, wnum + 1, words, 0, is_sug);
- if (rv && numcheckcpd && (scpd == 0 && cpdpat_check(word, i, rv_first, rv) ||
- scpd != 0 && !cpdpat_check(word, i, rv_first, rv))) rv = NULL;
+ if (rv && numcheckcpd && ((scpd == 0 && cpdpat_check(word, i, rv_first, rv)) ||
+ (scpd != 0 && !cpdpat_check(word, i, rv_first, rv)))) rv = NULL;
} else {
rv=NULL;
}
if (rv) {
// forbid compound word, if it is a non compound word with typical fault
if (checkcompoundrep && cpdrep_check(word, len)) return NULL;
return rv_first;
}
@@ -2001,17 +2002,17 @@ int AffixMgr::compound_check_morph(const
}
ch = st[i];
st[i] = '\0';
sfx = NULL;
// FIRST WORD
*presult = '\0';
- if (partresult) strcat(presult, partresult);
+ if (partresult) mystrcat(presult, partresult, MAXLNLEN);
rv = lookup(st); // perhaps without prefix
// search homonym with compound flag
while ((rv) && !hu_mov_rule &&
((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)) ||
!((compoundflag && !words && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
(compoundbegin && !wordnum &&
@@ -2036,21 +2037,21 @@ int AffixMgr::compound_check_morph(const
sprintf(presult + strlen(presult), "%c%s", MSEP_FLD, HENTRY_DATA2(rv));
}
}
if (!rv) {
if (compoundflag &&
!(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundflag))) {
if ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL,
FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) && !hu_mov_rule &&
- ((SfxEntry*)sfx)->getCont() &&
- ((compoundforbidflag && TESTAFF(((SfxEntry*)sfx)->getCont(), compoundforbidflag,
- ((SfxEntry*)sfx)->getContLen())) || (compoundend &&
- TESTAFF(((SfxEntry*)sfx)->getCont(), compoundend,
- ((SfxEntry*)sfx)->getContLen())))) {
+ sfx->getCont() &&
+ ((compoundforbidflag && TESTAFF(sfx->getCont(), compoundforbidflag,
+ sfx->getContLen())) || (compoundend &&
+ TESTAFF(sfx->getCont(), compoundend,
+ sfx->getContLen())))) {
rv = NULL;
}
}
if (rv ||
(((wordnum == 0) && compoundbegin &&
((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundbegin, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundbegin)))) ||
@@ -2081,44 +2082,44 @@ int AffixMgr::compound_check_morph(const
} else if (rv->astr && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
TESTAFF(rv->astr, needaffix, rv->alen))) {
st[i] = ch;
continue;
}
// check non_compound flag in suffix and prefix
if ((rv) && !hu_mov_rule &&
- ((pfx && ((PfxEntry*)pfx)->getCont() &&
- TESTAFF(((PfxEntry*)pfx)->getCont(), compoundforbidflag,
- ((PfxEntry*)pfx)->getContLen())) ||
- (sfx && ((SfxEntry*)sfx)->getCont() &&
- TESTAFF(((SfxEntry*)sfx)->getCont(), compoundforbidflag,
- ((SfxEntry*)sfx)->getContLen())))) {
+ ((pfx && pfx->getCont() &&
+ TESTAFF(pfx->getCont(), compoundforbidflag,
+ pfx->getContLen())) ||
+ (sfx && sfx->getCont() &&
+ TESTAFF(sfx->getCont(), compoundforbidflag,
+ sfx->getContLen())))) {
continue;
}
// check compoundend flag in suffix and prefix
if ((rv) && !checked_prefix && compoundend && !hu_mov_rule &&
- ((pfx && ((PfxEntry*)pfx)->getCont() &&
- TESTAFF(((PfxEntry*)pfx)->getCont(), compoundend,
- ((PfxEntry*)pfx)->getContLen())) ||
- (sfx && ((SfxEntry*)sfx)->getCont() &&
- TESTAFF(((SfxEntry*)sfx)->getCont(), compoundend,
- ((SfxEntry*)sfx)->getContLen())))) {
+ ((pfx && pfx->getCont() &&
+ TESTAFF(pfx->getCont(), compoundend,
+ pfx->getContLen())) ||
+ (sfx && sfx->getCont() &&
+ TESTAFF(sfx->getCont(), compoundend,
+ sfx->getContLen())))) {
continue;
}
// check compoundmiddle flag in suffix and prefix
if ((rv) && !checked_prefix && (wordnum==0) && compoundmiddle && !hu_mov_rule &&
- ((pfx && ((PfxEntry*)pfx)->getCont() &&
- TESTAFF(((PfxEntry*)pfx)->getCont(), compoundmiddle,
- ((PfxEntry*)pfx)->getContLen())) ||
- (sfx && ((SfxEntry*)sfx)->getCont() &&
- TESTAFF(((SfxEntry*)sfx)->getCont(), compoundmiddle,
- ((SfxEntry*)sfx)->getContLen())))) {
+ ((pfx && pfx->getCont() &&
+ TESTAFF(pfx->getCont(), compoundmiddle,
+ pfx->getContLen())) ||
+ (sfx && sfx->getCont() &&
+ TESTAFF(sfx->getCont(), compoundmiddle,
+ sfx->getContLen())))) {
rv = NULL;
}
// check forbiddenwords
if ((rv) && (rv->astr) && TESTAFF(rv->astr, forbiddenword, rv->alen)) continue;
// increment word number, if the second root has a compoundroot flag
if ((rv) && (compoundroot) &&
@@ -2153,65 +2154,65 @@ int AffixMgr::compound_check_morph(const
numcheckcpd && !words && cpdpat_check(word, i, rv, NULL)
) ||
(
checkcompoundcase && !words && cpdcase_check(word, i)
))
)
// LANG_hu section: spec. Hungarian rule
|| ((!rv) && (langnum == LANG_hu) && hu_mov_rule && (rv = affix_check(st,i)) &&
- (sfx && ((SfxEntry*)sfx)->getCont() && (
- TESTAFF(((SfxEntry*)sfx)->getCont(), (unsigned short) 'x', ((SfxEntry*)sfx)->getContLen()) ||
- TESTAFF(((SfxEntry*)sfx)->getCont(), (unsigned short) '%', ((SfxEntry*)sfx)->getContLen())
+ (sfx && sfx->getCont() && (
+ TESTAFF(sfx->getCont(), (unsigned short) 'x', sfx->getContLen()) ||
+ TESTAFF(sfx->getCont(), (unsigned short) '%', sfx->getContLen())
)
)
)
// END of LANG_hu section
) {
// LANG_hu section: spec. Hungarian rule
if (langnum == LANG_hu) {
// calculate syllable number of the word
numsyllable += get_syllable(st, i);
// + 1 word, if syllable number of the prefix > 1 (hungarian convention)
- if (pfx && (get_syllable(((PfxEntry *)pfx)->getKey(),strlen(((PfxEntry *)pfx)->getKey())) > 1)) wordnum++;
+ if (pfx && (get_syllable(pfx->getKey(),strlen(pfx->getKey())) > 1)) wordnum++;
}
// END of LANG_hu section
// NEXT WORD(S)
rv_first = rv;
rv = lookup((word+i)); // perhaps without prefix
// search homonym with compound flag
while ((rv) && ((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)) ||
!((compoundflag && !words && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
(compoundend && !words && TESTAFF(rv->astr, compoundend, rv->alen)) ||
- (numdefcpd && defcpd_check(&words, wnum + 1, rv, NULL,1))))) {
+ (numdefcpd && words && defcpd_check(&words, wnum + 1, rv, NULL,1))))) {
rv = rv->next_homonym;
}
if (rv && words && words[wnum + 1]) {
- strcat(*result, presult);
- strcat(*result, " ");
- strcat(*result, MORPH_PART);
- strcat(*result, word+i);
- if (complexprefixes && HENTRY_DATA(rv)) strcat(*result, HENTRY_DATA2(rv));
+ mystrcat(*result, presult, MAXLNLEN);
+ mystrcat(*result, " ", MAXLNLEN);
+ mystrcat(*result, MORPH_PART, MAXLNLEN);
+ mystrcat(*result, word+i, MAXLNLEN);
+ if (complexprefixes && HENTRY_DATA(rv)) mystrcat(*result, HENTRY_DATA2(rv), MAXLNLEN);
if (!HENTRY_FIND(rv, MORPH_STEM)) {
- strcat(*result, " ");
- strcat(*result, MORPH_STEM);
- strcat(*result, HENTRY_WORD(rv));
+ mystrcat(*result, " ", MAXLNLEN);
+ mystrcat(*result, MORPH_STEM, MAXLNLEN);
+ mystrcat(*result, HENTRY_WORD(rv), MAXLNLEN);
}
// store the pointer of the hash entry
// sprintf(*result + strlen(*result), " %s%p", MORPH_HENTRY, rv);
if (!complexprefixes && HENTRY_DATA(rv)) {
- strcat(*result, " ");
- strcat(*result, HENTRY_DATA2(rv));
+ mystrcat(*result, " ", MAXLNLEN);
+ mystrcat(*result, HENTRY_DATA2(rv), MAXLNLEN);
}
- strcat(*result, "\n");
+ mystrcat(*result, "\n", MAXLNLEN);
ok = 1;
return 0;
}
oldnumsyllable2 = numsyllable;
oldwordnum2 = wordnum;
// LANG_hu section: spec. Hungarian rule
@@ -2245,36 +2246,36 @@ int AffixMgr::compound_check_morph(const
(numsyllable+get_syllable(HENTRY_WORD(rv),rv->blen)<=cpdmaxsyllable))
)
&& (
(!checkcompounddup || (rv != rv_first))
)
)
{
// bad compound word
- strcat(*result, presult);
- strcat(*result, " ");
- strcat(*result, MORPH_PART);
- strcat(*result, word+i);
+ mystrcat(*result, presult, MAXLNLEN);
+ mystrcat(*result, " ", MAXLNLEN);
+ mystrcat(*result, MORPH_PART, MAXLNLEN);
+ mystrcat(*result, word+i, MAXLNLEN);
if (HENTRY_DATA(rv)) {
- if (complexprefixes) strcat(*result, HENTRY_DATA2(rv));
+ if (complexprefixes) mystrcat(*result, HENTRY_DATA2(rv), MAXLNLEN);
if (! HENTRY_FIND(rv, MORPH_STEM)) {
- strcat(*result, " ");
- strcat(*result, MORPH_STEM);
- strcat(*result, HENTRY_WORD(rv));
+ mystrcat(*result, " ", MAXLNLEN);
+ mystrcat(*result, MORPH_STEM, MAXLNLEN);
+ mystrcat(*result, HENTRY_WORD(rv), MAXLNLEN);
}
// store the pointer of the hash entry
// sprintf(*result + strlen(*result), " %s%p", MORPH_HENTRY, rv);
if (!complexprefixes) {
- strcat(*result, " ");
- strcat(*result, HENTRY_DATA2(rv));
+ mystrcat(*result, " ", MAXLNLEN);
+ mystrcat(*result, HENTRY_DATA2(rv), MAXLNLEN);
}
}
- strcat(*result, "\n");
+ mystrcat(*result, "\n", MAXLNLEN);
ok = 1;
}
numsyllable = oldnumsyllable2 ;
wordnum = oldwordnum2;
// perhaps second word has prefix or/and suffix
sfx = NULL;
@@ -2292,35 +2293,35 @@ int AffixMgr::compound_check_morph(const
rv = affix_check((word+i),strlen(word+i), 0, IN_CPD_END);
if (rv && words && defcpd_check(&words, wnum + 1, rv, NULL, 1)) {
char * m = NULL;
if (compoundflag) m = affix_check_morph((word+i),strlen(word+i), compoundflag);
if ((!m || *m == '\0') && compoundend) {
if (m) free(m);
m = affix_check_morph((word+i),strlen(word+i), compoundend);
}
- strcat(*result, presult);
+ mystrcat(*result, presult, MAXLNLEN);
if (m || (*m != '\0')) {
sprintf(*result + strlen(*result), "%c%s%s%s", MSEP_FLD,
MORPH_PART, word + i, line_uniq_app(&m, MSEP_REC));
}
if (m) free(m);
- strcat(*result, "\n");
+ mystrcat(*result, "\n", MAXLNLEN);
ok = 1;
}
}
// check non_compound flag in suffix and prefix
if ((rv) &&
- ((pfx && ((PfxEntry*)pfx)->getCont() &&
- TESTAFF(((PfxEntry*)pfx)->getCont(), compoundforbidflag,
- ((PfxEntry*)pfx)->getContLen())) ||
- (sfx && ((SfxEntry*)sfx)->getCont() &&
- TESTAFF(((SfxEntry*)sfx)->getCont(), compoundforbidflag,
- ((SfxEntry*)sfx)->getContLen())))) {
+ ((pfx && pfx->getCont() &&
+ TESTAFF(pfx->getCont(), compoundforbidflag,
+ pfx->getContLen())) ||
+ (sfx && sfx->getCont() &&
+ TESTAFF(sfx->getCont(), compoundforbidflag,
+ sfx->getContLen())))) {
rv = NULL;
}
// check forbiddenwords
if ((rv) && (rv->astr) && (TESTAFF(rv->astr,forbiddenword,rv->alen))
&& (! TESTAFF(rv->astr, needaffix, rv->alen))) {
st[i] = ch;
continue;
@@ -2334,17 +2335,17 @@ int AffixMgr::compound_check_morph(const
// XXX only second suffix (inflections, not derivations)
if (sfxappnd) {
char * tmp = myrevstrdup(sfxappnd);
numsyllable -= get_syllable(tmp, strlen(tmp));
free(tmp);
}
// + 1 word, if syllable number of the prefix > 1 (hungarian convention)
- if (pfx && (get_syllable(((PfxEntry *)pfx)->getKey(),strlen(((PfxEntry *)pfx)->getKey())) > 1)) wordnum++;
+ if (pfx && (get_syllable(pfx->getKey(),strlen(pfx->getKey())) > 1)) wordnum++;
// increment syllable num, if last word has a SYLLABLENUM flag
// and the suffix is beginning `s'
if (cpdsyllablenum) {
switch (sfxflag) {
case 'c': { numsyllable+=2; break; }
case 'J': { numsyllable += 1; break; }
@@ -2372,17 +2373,17 @@ int AffixMgr::compound_check_morph(const
(!checkcompounddup || (rv != rv_first))
)) {
char * m = NULL;
if (compoundflag) m = affix_check_morph((word+i),strlen(word+i), compoundflag);
if ((!m || *m == '\0') && compoundend) {
if (m) free(m);
m = affix_check_morph((word+i),strlen(word+i), compoundend);
}
- strcat(*result, presult);
+ mystrcat(*result, presult, MAXLNLEN);
if (m && (*m != '\0')) {
sprintf(*result + strlen(*result), "%c%s%s%s", MSEP_FLD,
MORPH_PART, word + i, line_uniq_app(&m, MSEP_REC));
}
if (m) free(m);
sprintf(*result + strlen(*result), "%c", MSEP_REC);
ok = 1;
}
@@ -2425,24 +2426,24 @@ inline int AffixMgr::isRevSubset(const c
len--;
}
return (*s1 == '\0');
}
// check word for suffixes
struct hentry * AffixMgr::suffix_check (const char * word, int len,
- int sfxopts, AffEntry * ppfx, char ** wlst, int maxSug, int * ns,
+ int sfxopts, PfxEntry * ppfx, char ** wlst, int maxSug, int * ns,
const FLAG cclass, const FLAG needflag, char in_compound)
{
struct hentry * rv = NULL;
- PfxEntry* ep = (PfxEntry *) ppfx;
+ PfxEntry* ep = ppfx;
// first handle the special case of 0 length suffixes
- SfxEntry * se = (SfxEntry *) sStart[0];
+ SfxEntry * se = sStart[0];
while (se) {
if (!cclass || se->getCont()) {
// suffixes are not allowed in beginning of compounds
if ((((in_compound != IN_CPD_BEGIN)) || // && !cclass
// except when signed with compoundpermitflag flag
(se->getCont() && compoundpermitflag &&
TESTAFF(se->getCont(),compoundpermitflag,se->getContLen()))) && (!circumfix ||
@@ -2463,27 +2464,27 @@ struct hentry * AffixMgr::suffix_check (
(ppfx && !((ep->getCont()) &&
TESTAFF(ep->getCont(), needaffix,
ep->getContLen())))
)
) {
rv = se->checkword(word,len, sfxopts, ppfx, wlst, maxSug, ns, (FLAG) cclass,
needflag, (in_compound ? 0 : onlyincompound));
if (rv) {
- sfx=(AffEntry *)se; // BUG: sfx not stateless
+ sfx=se; // BUG: sfx not stateless
return rv;
}
}
}
se = se->getNext();
}
// now handle the general case
unsigned char sp = *((const unsigned char *)(word + len - 1));
- SfxEntry * sptr = (SfxEntry *) sStart[sp];
+ SfxEntry * sptr = sStart[sp];
while (sptr) {
if (isRevSubset(sptr->getKey(), word + len - 1, len)
) {
// suffixes are not allowed in beginning of compounds
if ((((in_compound != IN_CPD_BEGIN)) || // && !cclass
// except when signed with compoundpermitflag flag
(sptr->getCont() && compoundpermitflag &&
@@ -2505,17 +2506,17 @@ struct hentry * AffixMgr::suffix_check (
(ppfx && !((ep->getCont()) &&
TESTAFF(ep->getCont(), needaffix,
ep->getContLen())))
)
) {
rv = sptr->checkword(word,len, sfxopts, ppfx, wlst,
maxSug, ns, cclass, needflag, (in_compound ? 0 : onlyincompound));
if (rv) {
- sfx=(AffEntry *)sptr; // BUG: sfx not stateless
+ sfx=sptr; // BUG: sfx not stateless
sfxflag = sptr->getFlag(); // BUG: sfxflag not stateless
if (!sptr->getCont()) sfxappnd=sptr->getKey(); // BUG: sfxappnd not stateless
return rv;
}
}
sptr = sptr->getNextEQ();
} else {
sptr = sptr->getNextNE();
@@ -2523,34 +2524,34 @@ struct hentry * AffixMgr::suffix_check (
}
return NULL;
}
// check word for two-level suffixes
struct hentry * AffixMgr::suffix_check_twosfx(const char * word, int len,
- int sfxopts, AffEntry * ppfx, const FLAG needflag)
+ int sfxopts, PfxEntry * ppfx, const FLAG needflag)
{
struct hentry * rv = NULL;
// first handle the special case of 0 length suffixes
- SfxEntry * se = (SfxEntry *) sStart[0];
+ SfxEntry * se = sStart[0];
while (se) {
if (contclasses[se->getFlag()])
{
rv = se->check_twosfx(word,len, sfxopts, ppfx, needflag);
if (rv) return rv;
}
se = se->getNext();
}
// now handle the general case
unsigned char sp = *((const unsigned char *)(word + len - 1));
- SfxEntry * sptr = (SfxEntry *) sStart[sp];
+ SfxEntry * sptr = sStart[sp];
while (sptr) {
if (isRevSubset(sptr->getKey(), word + len - 1, len)) {
if (contclasses[sptr->getFlag()])
{
rv = sptr->check_twosfx(word,len, sfxopts, ppfx, needflag);
if (rv) {
sfxflag = sptr->getFlag(); // BUG: sfxflag not stateless
@@ -2563,56 +2564,56 @@ struct hentry * AffixMgr::suffix_check_t
sptr = sptr->getNextNE();
}
}
return NULL;
}
char * AffixMgr::suffix_check_twosfx_morph(const char * word, int len,
- int sfxopts, AffEntry * ppfx, const FLAG needflag)
+ int sfxopts, PfxEntry * ppfx, const FLAG needflag)
{
char result[MAXLNLEN];
char result2[MAXLNLEN];
char result3[MAXLNLEN];
char * st;
result[0] = '\0';
result2[0] = '\0';
result3[0] = '\0';
// first handle the special case of 0 length suffixes
- SfxEntry * se = (SfxEntry *) sStart[0];
+ SfxEntry * se = sStart[0];
while (se) {
if (contclasses[se->getFlag()])
{
st = se->check_twosfx_morph(word,len, sfxopts, ppfx, needflag);
if (st) {
if (ppfx) {
- if (((PfxEntry *) ppfx)->getMorph()) {
- mystrcat(result, ((PfxEntry *) ppfx)->getMorph(), MAXLNLEN);
+ if (ppfx->getMorph()) {
+ mystrcat(result, ppfx->getMorph(), MAXLNLEN);
mystrcat(result, " ", MAXLNLEN);
- } else debugflag(result, ((PfxEntry *) ppfx)->getFlag());
+ } else debugflag(result, ppfx->getFlag());
}
mystrcat(result, st, MAXLNLEN);
free(st);
if (se->getMorph()) {
mystrcat(result, " ", MAXLNLEN);
mystrcat(result, se->getMorph(), MAXLNLEN);
} else debugflag(result, se->getFlag());
mystrcat(result, "\n", MAXLNLEN);
}
}
se = se->getNext();
}
// now handle the general case
unsigned char sp = *((const unsigned char *)(word + len - 1));
- SfxEntry * sptr = (SfxEntry *) sStart[sp];
+ SfxEntry * sptr = sStart[sp];
while (sptr) {
if (isRevSubset(sptr->getKey(), word + len - 1, len)) {
if (contclasses[sptr->getFlag()])
{
st = sptr->check_twosfx_morph(word,len, sfxopts, ppfx, needflag);
if (st) {
sfxflag = sptr->getFlag(); // BUG: sfxflag not stateless
@@ -2636,28 +2637,28 @@ char * AffixMgr::suffix_check_twosfx_mor
sptr = sptr->getNextNE();
}
}
if (*result) return mystrdup(result);
return NULL;
}
char * AffixMgr::suffix_check_morph(const char * word, int len,
- int sfxopts, AffEntry * ppfx, const FLAG cclass, const FLAG needflag, char in_compound)
+ int sfxopts, PfxEntry * ppfx, const FLAG cclass, const FLAG needflag, char in_compound)
{
char result[MAXLNLEN];
struct hentry * rv = NULL;
result[0] = '\0';
- PfxEntry* ep = (PfxEntry *) ppfx;
+ PfxEntry* ep = ppfx;
// first handle the special case of 0 length suffixes
- SfxEntry * se = (SfxEntry *) sStart[0];
+ SfxEntry * se = sStart[0];
while (se) {
if (!cclass || se->getCont()) {
// suffixes are not allowed in beginning of compounds
if (((((in_compound != IN_CPD_BEGIN)) || // && !cclass
// except when signed with compoundpermitflag flag
(se->getCont() && compoundpermitflag &&
TESTAFF(se->getCont(),compoundpermitflag,se->getContLen()))) && (!circumfix ||
// no circumfix flag in prefix and suffix
@@ -2674,23 +2675,23 @@ char * AffixMgr::suffix_check_morph(cons
// needaffix on prefix or first suffix
(cclass ||
!(se->getCont() && TESTAFF(se->getCont(), needaffix, se->getContLen())) ||
(ppfx && !((ep->getCont()) &&
TESTAFF(ep->getCont(), needaffix,
ep->getContLen())))
)
))
- rv = se->checkword(word,len, sfxopts, ppfx, NULL, 0, 0, cclass, needflag);
+ rv = se->checkword(word, len, sfxopts, ppfx, NULL, 0, 0, cclass, needflag);
while (rv) {
if (ppfx) {
- if (((PfxEntry *) ppfx)->getMorph()) {
- mystrcat(result, ((PfxEntry *) ppfx)->getMorph(), MAXLNLEN);
+ if (ppfx->getMorph()) {
+ mystrcat(result, ppfx->getMorph(), MAXLNLEN);
mystrcat(result, " ", MAXLNLEN);
- } else debugflag(result, ((PfxEntry *) ppfx)->getFlag());
+ } else debugflag(result, ppfx->getFlag());
}
if (complexprefixes && HENTRY_DATA(rv)) mystrcat(result, HENTRY_DATA2(rv), MAXLNLEN);
if (! HENTRY_FIND(rv, MORPH_STEM)) {
mystrcat(result, " ", MAXLNLEN);
mystrcat(result, MORPH_STEM, MAXLNLEN);
mystrcat(result, HENTRY_WORD(rv), MAXLNLEN);
}
// store the pointer of the hash entry
@@ -2708,17 +2709,17 @@ char * AffixMgr::suffix_check_morph(cons
rv = se->get_next_homonym(rv, sfxopts, ppfx, cclass, needflag);
}
}
se = se->getNext();
}
// now handle the general case
unsigned char sp = *((const unsigned char *)(word + len - 1));
- SfxEntry * sptr = (SfxEntry *) sStart[sp];
+ SfxEntry * sptr = sStart[sp];
while (sptr) {
if (isRevSubset(sptr->getKey(), word + len - 1, len)
) {
// suffixes are not allowed in beginning of compounds
if (((((in_compound != IN_CPD_BEGIN)) || // && !cclass
// except when signed with compoundpermitflag flag
(sptr->getCont() && compoundpermitflag &&
@@ -2735,20 +2736,20 @@ char * AffixMgr::suffix_check_morph(cons
(in_compound ||
!((sptr->getCont() && (TESTAFF(sptr->getCont(), onlyincompound, sptr->getContLen()))))) &&
// needaffix on first suffix
(cclass || !(sptr->getCont() &&
TESTAFF(sptr->getCont(), needaffix, sptr->getContLen())))
)) rv = sptr->checkword(word,len, sfxopts, ppfx, NULL, 0, 0, cclass, needflag);
while (rv) {
if (ppfx) {
- if (((PfxEntry *) ppfx)->getMorph()) {
- mystrcat(result, ((PfxEntry *) ppfx)->getMorph(), MAXLNLEN);
+ if (ppfx->getMorph()) {
+ mystrcat(result, ppfx->getMorph(), MAXLNLEN);
mystrcat(result, " ", MAXLNLEN);
- } else debugflag(result, ((PfxEntry *) ppfx)->getFlag());
+ } else debugflag(result, ppfx->getFlag());
}
if (complexprefixes && HENTRY_DATA(rv)) mystrcat(result, HENTRY_DATA2(rv), MAXLNLEN);
if (! HENTRY_FIND(rv, MORPH_STEM)) {
mystrcat(result, " ", MAXLNLEN);
mystrcat(result, MORPH_STEM, MAXLNLEN);
mystrcat(result, HENTRY_WORD(rv), MAXLNLEN);
}
// store the pointer of the hash entry
@@ -2847,39 +2848,39 @@ char * AffixMgr::affix_check_morph(const
char * AffixMgr::morphgen(char * ts, int wl, const unsigned short * ap,
unsigned short al, char * morph, char * targetmorph, int level)
{
// handle suffixes
char * stemmorph;
char * stemmorphcatpos;
char mymorph[MAXLNLEN];
- if (!morph && !targetmorph) return NULL;
+ if (!morph) return NULL;
// check substandard flag
if (TESTAFF(ap, substandard, al)) return NULL;
if (morphcmp(morph, targetmorph) == 0) return mystrdup(ts);
// int targetcount = get_sfxcount(targetmorph);
// use input suffix fields, if exist
if (strstr(morph, MORPH_INFL_SFX) || strstr(morph, MORPH_DERI_SFX)) {
stemmorph = mymorph;
strcpy(stemmorph, morph);
- strcat(stemmorph, " ");
+ mystrcat(stemmorph, " ", MAXLNLEN);
stemmorphcatpos = stemmorph + strlen(stemmorph);
} else {
stemmorph = morph;
stemmorphcatpos = NULL;
}
for (int i = 0; i < al; i++) {
const unsigned char c = (unsigned char) (ap[i] & 0x00FF);
- SfxEntry * sptr = (SfxEntry *)sFlag[c];
+ SfxEntry * sptr = sFlag[c];
while (sptr) {
if (sptr->getFlag() == ap[i] && sptr->getMorph() && ((sptr->getContLen() == 0) ||
// don't generate forms with substandard affixes
!TESTAFF(sptr->getCont(), substandard, sptr->getContLen()))) {
if (stemmorphcatpos) strcpy(stemmorphcatpos, sptr->getMorph());
else stemmorph = (char *) sptr->getMorph();
@@ -2910,17 +2911,17 @@ char * AffixMgr::morphgen(char * ts, int
free(newword);
return newword2;
}
free(newword);
newword = NULL;
}
}
}
- sptr = (SfxEntry *)sptr ->getFlgNxt();
+ sptr = sptr->getFlgNxt();
}
}
return NULL;
}
int AffixMgr::expand_rootword(struct guessword * wlst, int maxn, const char * ts,
int wl, const unsigned short * ap, unsigned short al, char * bad, int badl,
@@ -2944,17 +2945,17 @@ int AffixMgr::expand_rootword(struct gue
if (!wlst[nh].orig) return nh - 1;
nh++;
}
}
// handle suffixes
for (int i = 0; i < al; i++) {
const unsigned char c = (unsigned char) (ap[i] & 0x00FF);
- SfxEntry * sptr = (SfxEntry *)sFlag[c];
+ SfxEntry * sptr = sFlag[c];
while (sptr) {
if ((sptr->getFlag() == ap[i]) && (!sptr->getKeyLen() || ((badl > sptr->getKeyLen()) &&
(strcmp(sptr->getAffix(), bad + badl - sptr->getKeyLen()) == 0))) &&
// check needaffix flag
!(sptr->getCont() && ((needaffix &&
TESTAFF(sptr->getCont(), needaffix, sptr->getContLen())) ||
(circumfix &&
TESTAFF(sptr->getCont(), circumfix, sptr->getContLen())) ||
@@ -2981,54 +2982,54 @@ int AffixMgr::expand_rootword(struct gue
if (!wlst[nh].orig) return nh - 1;
nh++;
}
} else {
free(newword);
}
}
}
- sptr = (SfxEntry *)sptr ->getFlgNxt();
+ sptr = sptr->getFlgNxt();
}
}
int n = nh;
// handle cross products of prefixes and suffixes
for (int j=1;j<n ;j++)
if (wlst[j].allow) {
for (int k = 0; k < al; k++) {
const unsigned char c = (unsigned char) (ap[k] & 0x00FF);
- PfxEntry * cptr = (PfxEntry *) pFlag[c];
+ PfxEntry * cptr = pFlag[c];
while (cptr) {
if ((cptr->getFlag() == ap[k]) && cptr->allowCross() && (!cptr->getKeyLen() || ((badl > cptr->getKeyLen()) &&
(strncmp(cptr->getKey(), bad, cptr->getKeyLen()) == 0)))) {
int l1 = strlen(wlst[j].word);
char * newword = cptr->add(wlst[j].word, l1);
if (newword) {
if (nh < maxn) {
wlst[nh].word = newword;
wlst[nh].allow = cptr->allowCross();
wlst[nh].orig = NULL;
nh++;
} else {
free(newword);
}
}
}
- cptr = (PfxEntry *)cptr ->getFlgNxt();
+ cptr = cptr->getFlgNxt();
}
}
}
// now handle pure prefixes
for (int m = 0; m < al; m ++) {
const unsigned char c = (unsigned char) (ap[m] & 0x00FF);
- PfxEntry * ptr = (PfxEntry *) pFlag[c];
+ PfxEntry * ptr = pFlag[c];
while (ptr) {
if ((ptr->getFlag() == ap[m]) && (!ptr->getKeyLen() || ((badl > ptr->getKeyLen()) &&
(strncmp(ptr->getKey(), bad, ptr->getKeyLen()) == 0))) &&
// check needaffix flag
!(ptr->getCont() && ((needaffix &&
TESTAFF(ptr->getCont(), needaffix, ptr->getContLen())) ||
(circumfix &&
TESTAFF(ptr->getCont(), circumfix, ptr->getContLen())) ||
@@ -3042,280 +3043,280 @@ int AffixMgr::expand_rootword(struct gue
wlst[nh].allow = ptr->allowCross();
wlst[nh].orig = NULL;
nh++;
} else {
free(newword);
}
}
}
- ptr = (PfxEntry *)ptr ->getFlgNxt();
+ ptr = ptr->getFlgNxt();
}
}
return nh;
}
// return length of replacing table
-int AffixMgr::get_numrep()
+int AffixMgr::get_numrep() const
{
return numrep;
}
// return replacing table
-struct replentry * AffixMgr::get_reptable()
+struct replentry * AffixMgr::get_reptable() const
{
if (! reptable ) return NULL;
return reptable;
}
// return iconv table
-RepList * AffixMgr::get_iconvtable()
+RepList * AffixMgr::get_iconvtable() const
{
if (! iconvtable ) return NULL;
return iconvtable;
}
// return oconv table
-RepList * AffixMgr::get_oconvtable()
+RepList * AffixMgr::get_oconvtable() const
{
if (! oconvtable ) return NULL;
return oconvtable;
}
// return replacing table
-struct phonetable * AffixMgr::get_phonetable()
+struct phonetable * AffixMgr::get_phonetable() const
{
if (! phone ) return NULL;
return phone;
}
// return length of character map table
-int AffixMgr::get_nummap()
+int AffixMgr::get_nummap() const
{
return nummap;
}
// return character map table
-struct mapentry * AffixMgr::get_maptable()
+struct mapentry * AffixMgr::get_maptable() const
{
if (! maptable ) return NULL;
return maptable;
}
// return length of word break table
-int AffixMgr::get_numbreak()
+int AffixMgr::get_numbreak() const
{
return numbreak;
}
// return character map table
-char ** AffixMgr::get_breaktable()
+char ** AffixMgr::get_breaktable() const
{
if (! breaktable ) return NULL;
return breaktable;
}
// return text encoding of dictionary
char * AffixMgr::get_encoding()
{
if (! encoding ) encoding = mystrdup(SPELL_ENCODING);
return mystrdup(encoding);
}
// return text encoding of dictionary
-int AffixMgr::get_langnum()
+int AffixMgr::get_langnum() const
{
return langnum;
}
// return double prefix option
-int AffixMgr::get_complexprefixes()
+int AffixMgr::get_complexprefixes() const
{
return complexprefixes;
}
// return FULLSTRIP option
-int AffixMgr::get_fullstrip()
+int AffixMgr::get_fullstrip() const
{
return fullstrip;
}
-FLAG AffixMgr::get_keepcase()
+FLAG AffixMgr::get_keepcase() const
{
return keepcase;
}
-int AffixMgr::get_checksharps()
+int AffixMgr::get_checksharps() const
{
return checksharps;
}
-char * AffixMgr::encode_flag(unsigned short aflag)
+char * AffixMgr::encode_flag(unsigned short aflag) const
{
return pHMgr->encode_flag(aflag);
}
// return the preferred ignore string for suggestions
-char * AffixMgr::get_ignore()
+char * AffixMgr::get_ignore() const
{
if (!ignorechars) return NULL;
return ignorechars;
}
// return the preferred ignore string for suggestions
-unsigned short * AffixMgr::get_ignore_utf16(int * len)
+unsigned short * AffixMgr::get_ignore_utf16(int * len) const
{
*len = ignorechars_utf16_len;
return ignorechars_utf16;
}
// return the keyboard string for suggestions
char * AffixMgr::get_key_string()
{
if (! keystring ) keystring = mystrdup(SPELL_KEYSTRING);
return mystrdup(keystring);
}
// return the preferred try string for suggestions
-char * AffixMgr::get_try_string()
+char * AffixMgr::get_try_string() const
{
if (! trystring ) return NULL;
return mystrdup(trystring);
}
// return the preferred try string for suggestions
-const char * AffixMgr::get_wordchars()
+const char * AffixMgr::get_wordchars() const
{
return wordchars;
}
-unsigned short * AffixMgr::get_wordchars_utf16(int * len)
+unsigned short * AffixMgr::get_wordchars_utf16(int * len) const
{
*len = wordchars_utf16_len;
return wordchars_utf16;
}
// is there compounding?
-int AffixMgr::get_compound()
+int AffixMgr::get_compound() const
{
return compoundflag || compoundbegin || numdefcpd;
}
// return the compound words control flag
-FLAG AffixMgr::get_compoundflag()
+FLAG AffixMgr::get_compoundflag() const
{
return compoundflag;
}
// return the forbidden words control flag
-FLAG AffixMgr::get_forbiddenword()
+FLAG AffixMgr::get_forbiddenword() const
{
return forbiddenword;
}
// return the forbidden words control flag
-FLAG AffixMgr::get_nosuggest()
+FLAG AffixMgr::get_nosuggest() const
{
return nosuggest;
}
// return the forbidden words flag modify flag
-FLAG AffixMgr::get_needaffix()
+FLAG AffixMgr::get_needaffix() const
{
return needaffix;
}
// return the onlyincompound flag
-FLAG AffixMgr::get_onlyincompound()
+FLAG AffixMgr::get_onlyincompound() const
{
return onlyincompound;
}
// return the compound word signal flag
-FLAG AffixMgr::get_compoundroot()
+FLAG AffixMgr::get_compoundroot() const
{
return compoundroot;
}
// return the compound begin signal flag
-FLAG AffixMgr::get_compoundbegin()
+FLAG AffixMgr::get_compoundbegin() const
{
return compoundbegin;
}
// return the value of checknum
-int AffixMgr::get_checknum()
+int AffixMgr::get_checknum() const
{
return checknum;
}
// return the value of prefix
-const char * AffixMgr::get_prefix()
+const char * AffixMgr::get_prefix() const
{
- if (pfx) return ((PfxEntry *)pfx)->getKey();
+ if (pfx) return pfx->getKey();
return NULL;
}
// return the value of suffix
-const char * AffixMgr::get_suffix()
+const char * AffixMgr::get_suffix() const
{
return sfxappnd;
}
// return the value of suffix
-const char * AffixMgr::get_version()
+const char * AffixMgr::get_version() const
{
return version;
}
// return lemma_present flag
-FLAG AffixMgr::get_lemma_present()
+FLAG AffixMgr::get_lemma_present() const
{
return lemma_present;
}
// utility method to look up root words in hash table
struct hentry * AffixMgr::lookup(const char * word)
{
int i;
struct hentry * he = NULL;
for (i = 0; i < *maxdic && !he; i++) {
he = (alldic[i])->lookup(word);
}
return he;
}
// return the value of suffix
-const int AffixMgr::have_contclass()
+int AffixMgr::have_contclass() const
{
return havecontclass;
}
// return utf8
-int AffixMgr::get_utf8()
+int AffixMgr::get_utf8() const
{
return utf8;
}
// return nosplitsugs
-int AffixMgr::get_maxngramsugs(void)
+int AffixMgr::get_maxngramsugs(void) const
{
return maxngramsugs;
}
// return nosplitsugs
-int AffixMgr::get_nosplitsugs(void)
+int AffixMgr::get_nosplitsugs(void) const
{
return nosplitsugs;
}
// return sugswithdots
-int AffixMgr::get_sugswithdots(void)
+int AffixMgr::get_sugswithdots(void) const
{
return sugswithdots;
}
/* parse flag */
int AffixMgr::parse_flag(char * line, unsigned short * out, FileMgr * af) {
char * s = NULL;
if (*out != FLAG_NULL && !(*out >= DEFAULTFLAGS)) {
@@ -3480,17 +3481,17 @@ int AffixMgr::parse_convtable(char * li
case 0: { np++; break; }
case 1: {
numrl = atoi(piece);
if (numrl < 1) {
HUNSPELL_WARNING(stderr, "error: line %d: incorrect entry number\n", af->getlinenum());
return 1;
}
*rl = new RepList(numrl);
- if (!rl) return 1;
+ if (!*rl) return 1;
np++;
break;
}
default: break;
}
i++;
}
piece = mystrsep(&tp, 0);
@@ -3556,26 +3557,30 @@ int AffixMgr::parse_phonetable(char * l
int np = 0;
piece = mystrsep(&tp, 0);
while (piece) {
if (*piece != '\0') {
switch(i) {
case 0: { np++; break; }
case 1: {
phone = (phonetable *) malloc(sizeof(struct phonetable));
+ if (!phone) return 1;
phone->num = atoi(piece);
phone->rules = NULL;
phone->utf8 = (char) utf8;
- if (!phone) return 1;
if (phone->num < 1) {
HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", af->getlinenum());
return 1;
}
phone->rules = (char * *) malloc(2 * (phone->num + 1) * sizeof(char *));
- if (!phone->rules) return 1;
+ if (!phone->rules) {
+ free(phone);
+ phone = NULL;
+ return 1;
+ }
np++;
break;
}
default: break;
}
i++;
}
piece = mystrsep(&tp, 0);
@@ -3776,17 +3781,17 @@ int AffixMgr::parse_defcpdtable(char *
HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
numdefcpd = 0;
return 1;
}
break;
}
case 1: { // handle parenthesized flags
if (strchr(piece, '(')) {
- defcpdtable[j].def = (FLAG *) malloc(sizeof(piece) * sizeof(FLAG));
+ defcpdtable[j].def = (FLAG *) malloc(strlen(piece) * sizeof(FLAG));
defcpdtable[j].len = 0;
int end = 0;
FLAG * conv;
while (!end) {
char * par = piece + 1;
while (*par != '(' && *par != ')' && *par != '\0') par++;
if (*par == '\0') end = 1; else *par = '\0';
if (*piece == '(') piece++;
@@ -3875,41 +3880,52 @@ int AffixMgr::parse_maptable(char * lin
if (strncmp(piece,"MAP",3) != 0) {
HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
nummap = 0;
return 1;
}
break;
}
case 1: {
- maptable[j].len = 0;
- maptable[j].set = NULL;
- maptable[j].set_utf16 = NULL;
- if (!utf8) {
- maptable[j].set = mystrdup(piece);
- maptable[j].len = strlen(maptable[j].set);
- } else {
- w_char w[MAXWORDLEN];
- int n = u8_u16(w, MAXWORDLEN, piece);
- if (n > 0) {
- flag_qsort((unsigned short *) w, 0, n);
- maptable[j].set_utf16 = (w_char *) malloc(n * sizeof(w_char));
- if (!maptable[j].set_utf16) return 1;
- memcpy(maptable[j].set_utf16, w, n * sizeof(w_char));
- }
- maptable[j].len = n;
- }
+ int setn = 0;
+ maptable[j].len = strlen(piece);
+ maptable[j].set = (char **) malloc(maptable[j].len * sizeof(char*));
+ if (!maptable[j].set) return 1;
+ for (int k = 0; k < maptable[j].len; k++) {
+ int chl = 1;
+ int chb = k;
+ if (piece[k] == '(') {
+ char * parpos = strchr(piece + k, ')');
+ if (parpos != NULL) {
+ chb = k + 1;
+ chl = (int)(parpos - piece) - k - 1;
+ k = k + chl + 1;
+ }
+ } else {
+ if (utf8 && (piece[k] & 0xc0) == 0xc0) {
+ for (k++; utf8 && (piece[k] & 0xc0) == 0x80; k++);
+ chl = k - chb;
+ k--;
+ }
+ }
+ maptable[j].set[setn] = (char *) malloc(chl + 1);
+ if (!maptable[j].set[setn]) return 1;
+ strncpy(maptable[j].set[setn], piece + chb, chl);
+ maptable[j].set[setn][chl] = '\0';
+ setn++;
+ }
+ maptable[j].len = setn;
break; }
default: break;
}
i++;
}
piece = mystrsep(&tp, 0);
}
- if ((!(maptable[j].set || maptable[j].set_utf16)) || (!(maptable[j].len))) {
+ if (!maptable[j].set || !maptable[j].len) {
HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
nummap = 0;
return 1;
}
}
return 0;
}
@@ -4016,18 +4032,17 @@ void AffixMgr::reverse_condition(char *
int AffixMgr::parse_affix(char * line, const char at, FileMgr * af, char * dupflags)
{
int numents = 0; // number of affentry structures to parse
unsigned short aflag = 0; // affix char identifier
char ff=0;
- struct affentry * ptr= NULL;
- struct affentry * nptr= NULL;
+ std::vector<affentry> affentries;
char * tp = line;
char * nl = line;
char * piece;
int i = 0;
// checking lines with bad syntax
#ifdef DEBUG
@@ -4069,62 +4084,59 @@ int AffixMgr::parse_affix(char * line,
char * err = pHMgr->encode_flag(aflag);
if (err) {
HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n",
af->getlinenum());
free(err);
}
return 1;
}
- ptr = (struct affentry *) malloc(numents * sizeof(struct affentry));
- if (!ptr) return 1;
- ptr->opts = ff;
- if (utf8) ptr->opts += aeUTF8;
- if (pHMgr->is_aliasf()) ptr->opts += aeALIASF;
- if (pHMgr->is_aliasm()) ptr->opts += aeALIASM;
- ptr->aflag = aflag;
+ affentries.resize(numents);
+ affentries[0].opts = ff;
+ if (utf8) affentries[0].opts += aeUTF8;
+ if (pHMgr->is_aliasf()) affentries[0].opts += aeALIASF;
+ if (pHMgr->is_aliasm()) affentries[0].opts += aeALIASM;
+ affentries[0].aflag = aflag;
}
default: break;
}
i++;
}
piece = mystrsep(&tp, 0);
}
// check to make sure we parsed enough pieces
if (np != 4) {
char * err = pHMgr->encode_flag(aflag);
if (err) {
HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum());
free(err);
}
- free(ptr);
return 1;
}
- // store away ptr to first affentry
- nptr = ptr;
-
// now parse numents affentries for this affix
- for (int j=0; j < numents; j++) {
+ std::vector<affentry>::iterator start = affentries.begin();
+ std::vector<affentry>::iterator end = affentries.end();
+ for (std::vector<affentry>::iterator entry = start; entry != end; ++entry) {
if (!(nl = af->getline())) return 1;
mychomp(nl);
tp = nl;
i = 0;
np = 0;
// split line into pieces
piece = mystrsep(&tp, 0);
while (piece) {
if (*piece != '\0') {
switch(i) {
// piece 1 - is type
case 0: {
np++;
- if (nptr != ptr) nptr->opts = ptr->opts &
+ if (entry != start) entry->opts = start->opts &
(char) (aeXPRODUCT + aeUTF8 + aeALIASF + aeALIASM);
break;
}
// piece 2 - is affix char
case 1: {
np++;
if (pHMgr->decode_flag(piece) != aflag) {
@@ -4132,132 +4144,132 @@ int AffixMgr::parse_affix(char * line,
if (err) {
HUNSPELL_WARNING(stderr, "error: line %d: affix %s is corrupt\n",
af->getlinenum(), err);
free(err);
}
return 1;
}
- if (nptr != ptr) nptr->aflag = ptr->aflag;
+ if (entry != start) entry->aflag = start->aflag;
break;
}
// piece 3 - is string to strip or 0 for null
case 2: {
np++;
if (complexprefixes) {
if (utf8) reverseword_utf(piece); else reverseword(piece);
}
- nptr->strip = mystrdup(piece);
- nptr->stripl = (unsigned char) strlen(nptr->strip);
- if (strcmp(nptr->strip,"0") == 0) {
- free(nptr->strip);
- nptr->strip=mystrdup("");
- nptr->stripl = 0;
+ entry->strip = mystrdup(piece);
+ entry->stripl = (unsigned char) strlen(entry->strip);
+ if (strcmp(entry->strip,"0") == 0) {
+ free(entry->strip);
+ entry->strip=mystrdup("");
+ entry->stripl = 0;
}
break;
}
// piece 4 - is affix string or 0 for null
case 3: {
char * dash;
- nptr->morphcode = NULL;
- nptr->contclass = NULL;
- nptr->contclasslen = 0;
+ entry->morphcode = NULL;
+ entry->contclass = NULL;
+ entry->contclasslen = 0;
np++;
dash = strchr(piece, '/');
if (dash) {
*dash = '\0';
if (ignorechars) {
if (utf8) {
remove_ignored_chars_utf(piece, ignorechars_utf16, ignorechars_utf16_len);
} else {
remove_ignored_chars(piece,ignorechars);
}
}
if (complexprefixes) {
if (utf8) reverseword_utf(piece); else reverseword(piece);
}
- nptr->appnd = mystrdup(piece);
+ entry->appnd = mystrdup(piece);
if (pHMgr->is_aliasf()) {
int index = atoi(dash + 1);
- nptr->contclasslen = (unsigned short) pHMgr->get_aliasf(index, &(nptr->contclass), af);
- if (!nptr->contclasslen) HUNSPELL_WARNING(stderr, "error: bad affix flag alias: \"%s\"\n", dash+1);
+ entry->contclasslen = (unsigned short) pHMgr->get_aliasf(index, &(entry->contclass), af);
+ if (!entry->contclasslen) HUNSPELL_WARNING(stderr, "error: bad affix flag alias: \"%s\"\n", dash+1);
} else {
- nptr->contclasslen = (unsigned short) pHMgr->decode_flags(&(nptr->contclass), dash + 1, af);
- flag_qsort(nptr->contclass, 0, nptr->contclasslen);
+ entry->contclasslen = (unsigned short) pHMgr->decode_flags(&(entry->contclass), dash + 1, af);
+ flag_qsort(entry->contclass, 0, entry->contclasslen);
}
*dash = '/';
havecontclass = 1;
- for (unsigned short _i = 0; _i < nptr->contclasslen; _i++) {
- contclasses[(nptr->contclass)[_i]] = 1;
+ for (unsigned short _i = 0; _i < entry->contclasslen; _i++) {
+ contclasses[(entry->contclass)[_i]] = 1;
}
} else {
if (ignorechars) {
if (utf8) {
remove_ignored_chars_utf(piece, ignorechars_utf16, ignorechars_utf16_len);
} else {
remove_ignored_chars(piece,ignorechars);
}
}
if (complexprefixes) {
if (utf8) reverseword_utf(piece); else reverseword(piece);
}
- nptr->appnd = mystrdup(piece);
+ entry->appnd = mystrdup(piece);
}
- nptr->appndl = (unsigned char) strlen(nptr->appnd);
- if (strcmp(nptr->appnd,"0") == 0) {
- free(nptr->appnd);
- nptr->appnd=mystrdup("");
- nptr->appndl = 0;
+ entry->appndl = (unsigned char) strlen(entry->appnd);
+ if (strcmp(entry->appnd,"0") == 0) {
+ free(entry->appnd);
+ entry->appnd=mystrdup("");
+ entry->appndl = 0;
}
break;
}
// piece 5 - is the conditions descriptions
case 4: {
np++;
if (complexprefixes) {
if (utf8) reverseword_utf(piece); else reverseword(piece);
reverse_condition(piece);
}
- if (nptr->stripl && (strcmp(piece, ".") != 0) &&
- redundant_condition(at, nptr->strip, nptr->stripl, piece, af->getlinenum()))
+ if (entry->stripl && (strcmp(piece, ".") != 0) &&
+ redundant_condition(at, entry->strip, entry->stripl, piece, af->getlinenum()))
strcpy(piece, ".");
if (at == 'S') {
reverseword(piece);
reverse_condition(piece);
}
- if (encodeit(nptr, piece)) return 1;
+ if (encodeit(*entry, piece)) return 1;
break;
}
case 5: {
np++;
if (pHMgr->is_aliasm()) {
int index = atoi(piece);
- nptr->morphcode = pHMgr->get_aliasm(index);
+ entry->morphcode = pHMgr->get_aliasm(index);
} else {
if (complexprefixes) { // XXX - fix me for morph. gen.
if (utf8) reverseword_utf(piece); else reverseword(piece);
}
// add the remaining of the line
if (*tp) {
*(tp - 1) = ' ';
tp = tp + strlen(tp);
}
- nptr->morphcode = mystrdup(piece);
- if (!nptr->morphcode) return 1;
+ entry->morphcode = mystrdup(piece);
+ if (!entry->morphcode) return 1;
}
break;
}
default: break;
}
i++;
}
piece = mystrsep(&tp, 0);
@@ -4265,47 +4277,42 @@ int AffixMgr::parse_affix(char * line,
// check to make sure we parsed enough pieces
if (np < 4) {
char * err = pHMgr->encode_flag(aflag);
if (err) {
HUNSPELL_WARNING(stderr, "error: line %d: affix %s is corrupt\n",
af->getlinenum(), err);
free(err);
}
- free(ptr);
return 1;
}
#ifdef DEBUG
// detect unnecessary fields, excepting comments
if (basefieldnum) {
- int fieldnum = !(nptr->morphcode) ? 5 : ((*(nptr->morphcode)=='#') ? 5 : 6);
+ int fieldnum = !(entry->morphcode) ? 5 : ((*(entry->morphcode)=='#') ? 5 : 6);
if (fieldnum != basefieldnum)
HUNSPELL_WARNING(stderr, "warning: line %d: bad field number\n", af->getlinenum());
} else {
- basefieldnum = !(nptr->morphcode) ? 5 : ((*(nptr->morphcode)=='#') ? 5 : 6);
+ basefieldnum = !(entry->morphcode) ? 5 : ((*(entry->morphcode)=='#') ? 5 : 6);
}
#endif
- nptr++;
}
// now create SfxEntry or PfxEntry objects and use links to
// build an ordered (sorted by affix string) list
- nptr = ptr;
- for (int k = 0; k < numents; k++) {
+ for (std::vector<affentry>::iterator entry = start; entry != end; ++entry) {
if (at == 'P') {
- PfxEntry * pfxptr = new PfxEntry(this,nptr);
- build_pfxtree((AffEntry *)pfxptr);
+ PfxEntry * pfxptr = new PfxEntry(this,&(*entry));
+ build_pfxtree(pfxptr);
} else {
- SfxEntry * sfxptr = new SfxEntry(this,nptr);
- build_sfxtree((AffEntry *)sfxptr);
+ SfxEntry * sfxptr = new SfxEntry(this,&(*entry));
+ build_sfxtree(sfxptr);
}
- nptr++;
}
- free(ptr);
return 0;
}
int AffixMgr::redundant_condition(char ft, char * strip, int stripl, const char * cond, int linenum) {
int condl = strlen(cond);
int i;
int j;
int neg;
@@ -4323,17 +4330,17 @@ int AffixMgr::redundant_condition(char f
} else {
neg = (cond[j+1] == '^') ? 1 : 0;
in = 0;
do {
j++;
if (strip[i] == cond[j]) in = 1;
} while ((j < (condl - 1)) && (cond[j] != ']'));
if (j == (condl - 1) && (cond[j] != ']')) {
- HUNSPELL_WARNING(stderr, "error: line %d: missing ] in condition:\n%s\n", linenum);
+ HUNSPELL_WARNING(stderr, "error: line %d: missing ] in condition:\n%s\n", linenum, cond);
return 0;
}
if ((!neg && !in) || (neg && in)) {
HUNSPELL_WARNING(stderr, "warning: line %d: incompatible stripping characters and condition\n", linenum);
return 0;
}
}
}
@@ -4351,17 +4358,17 @@ int AffixMgr::redundant_condition(char f
}
} else {
in = 0;
do {
j--;
if (strip[i] == cond[j]) in = 1;
} while ((j > 0) && (cond[j] != '['));
if ((j == 0) && (cond[j] != '[')) {
- HUNSPELL_WARNING(stderr, "error: error: %d: missing ] in condition:\n%s\n", linenum);
+ HUNSPELL_WARNING(stderr, "error: line: %d: missing ] in condition:\n%s\n", linenum, cond);
return 0;
}
neg = (cond[j+1] == '^') ? 1 : 0;
if ((!neg && !in) || (neg && in)) {
HUNSPELL_WARNING(stderr, "warning: line %d: incompatible stripping characters and condition\n", linenum);
return 0;
}
}
--- a/extensions/spellcheck/hunspell/src/affixmgr.hxx
+++ b/extensions/spellcheck/hunspell/src/affixmgr.hxx
@@ -13,16 +13,17 @@
*
* The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
* and László Németh (Hunspell). Portions created by the Initial Developers
* are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
*
* Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
* David Einstein (deinst@world.std.com)
* László Németh (nemethl@gyorsposta.hu)
+ * Caolan McNamara (caolanm@redhat.com)
* Davide Prina
* Giuseppe Modugno
* Gianluca Turconi
* Simon Brouwer
* Noll Janos
* Biro Arpad
* Goldman Eleonora
* Sarlos Tamas
@@ -52,42 +53,40 @@
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
******* END LICENSE BLOCK *******/
#ifndef _AFFIXMGR_HXX_
#define _AFFIXMGR_HXX_
-#ifdef MOZILLA_CLIENT
-#ifdef __SUNPRO_CC // for SunONE Studio compiler
-using namespace std;
-#endif
+#include "hunvisapi.h"
+
#include <stdio.h>
-#else
-#include <cstdio>
-#endif
#include "atypes.hxx"
#include "baseaffix.hxx"
#include "hashmgr.hxx"
#include "phonet.hxx"
#include "replist.hxx"
// check flag duplication
#define dupSFX (1 << 0)
#define dupPFX (1 << 1)
-class AffixMgr
+class PfxEntry;
+class SfxEntry;
+
+class LIBHUNSPELL_DLL_EXPORTED AffixMgr
{
- AffEntry * pStart[SETSIZE];
- AffEntry * sStart[SETSIZE];
- AffEntry * pFlag[SETSIZE];
- AffEntry * sFlag[SETSIZE];
+ PfxEntry * pStart[SETSIZE];
+ SfxEntry * sStart[SETSIZE];
+ PfxEntry * pFlag[SETSIZE];
+ SfxEntry * sFlag[SETSIZE];
HashMgr * pHMgr;
HashMgr ** alldic;
int * maxdic;
char * keystring;
char * trystring;
char * encoding;
struct cs_info * csconv;
int utf8;
@@ -130,18 +129,18 @@ class AffixMgr
char * cpdvowels;
w_char * cpdvowels_utf16;
int cpdvowels_utf16_len;
char * cpdsyllablenum;
const char * pfxappnd; // BUG: not stateless
const char * sfxappnd; // BUG: not stateless
FLAG sfxflag; // BUG: not stateless
char * derived; // BUG: not stateless
- AffEntry * sfx; // BUG: not stateless
- AffEntry * pfx; // BUG: not stateless
+ SfxEntry * sfx; // BUG: not stateless
+ PfxEntry * pfx; // BUG: not stateless
int checknum;
char * wordchars;
unsigned short * wordchars_utf16;
int wordchars_utf16_len;
char * ignorechars;
unsigned short * ignorechars_utf16;
int ignorechars_utf16_len;
char * version;
@@ -168,34 +167,34 @@ public:
char in_compound = IN_CPD_NOT);
struct hentry * prefix_check(const char * word, int len,
char in_compound, const FLAG needflag = FLAG_NULL);
inline int isSubset(const char * s1, const char * s2);
struct hentry * prefix_check_twosfx(const char * word, int len,
char in_compound, const FLAG needflag = FLAG_NULL);
inline int isRevSubset(const char * s1, const char * end_of_s2, int len);
struct hentry * suffix_check(const char * word, int len, int sfxopts,
- AffEntry* ppfx, char ** wlst, int maxSug, int * ns,
+ PfxEntry* ppfx, char ** wlst, int maxSug, int * ns,
const FLAG cclass = FLAG_NULL, const FLAG needflag = FLAG_NULL,
char in_compound = IN_CPD_NOT);
struct hentry * suffix_check_twosfx(const char * word, int len,
- int sfxopts, AffEntry* ppfx, const FLAG needflag = FLAG_NULL);
+ int sfxopts, PfxEntry* ppfx, const FLAG needflag = FLAG_NULL);
char * affix_check_morph(const char * word, int len,
const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT);
char * prefix_check_morph(const char * word, int len,
char in_compound, const FLAG needflag = FLAG_NULL);
char * suffix_check_morph (const char * word, int len, int sfxopts,
- AffEntry * ppfx, const FLAG cclass = FLAG_NULL,
+ PfxEntry * ppfx, const FLAG cclass = FLAG_NULL,
const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT);
char * prefix_check_twosfx_morph(const char * word, int len,
char in_compound, const FLAG needflag = FLAG_NULL);
char * suffix_check_twosfx_morph(const char * word, int len,
- int sfxopts, AffEntry * ppfx, const FLAG needflag = FLAG_NULL);
+ int sfxopts, PfxEntry * ppfx, const FLAG needflag = FLAG_NULL);
char * morphgen(char * ts, int wl, const unsigned short * ap,
unsigned short al, char * morph, char * targetmorph, int level);
int expand_rootword(struct guessword * wlst, int maxn, const char * ts,
int wl, const unsigned short * ap, unsigned short al, char * bad,
int, char *);
@@ -211,59 +210,58 @@ public:
short numsyllable, short maxwordnum, short wnum, hentry ** words,
char hu_mov_rule, char is_sug);
int compound_check_morph(const char * word, int len, short wordnum,
short numsyllable, short maxwordnum, short wnum, hentry ** words,
char hu_mov_rule, char ** result, char * partresult);
struct hentry * lookup(const char * word);
- int get_numrep();
- struct replentry * get_reptable();
- RepList * get_iconvtable();
- RepList * get_oconvtable();
- struct phonetable * get_phonetable();
- int get_nummap();
- struct mapentry * get_maptable();
- int get_numbreak();
- char ** get_breaktable();
+ int get_numrep() const;
+ struct replentry * get_reptable() const;
+ RepList * get_iconvtable() const;
+ RepList * get_oconvtable() const;
+ struct phonetable * get_phonetable() const;
+ int get_nummap() const;
+ struct mapentry * get_maptable() const;
+ int get_numbreak() const;
+ char ** get_breaktable() const;
char * get_encoding();
- int get_langnum();
+ int get_langnum() const;
char * get_key_string();
- char * get_try_string();
- const char * get_wordchars();
- unsigned short * get_wordchars_utf16(int * len);
- char * get_ignore();
- unsigned short * get_ignore_utf16(int * len);
- int get_compound();
- FLAG get_compoundflag();
- FLAG get_compoundbegin();
- FLAG get_forbiddenword();
- FLAG get_nosuggest();
- FLAG get_needaffix();
- FLAG get_onlyincompound();
- FLAG get_compoundroot();
- FLAG get_lemma_present();
- int get_checknum();
- char * get_possible_root();
- const char * get_prefix();
- const char * get_suffix();
- const char * get_derived();
- const char * get_version();
- const int have_contclass();
- int get_utf8();
- int get_complexprefixes();
- char * get_suffixed(char );
- int get_maxngramsugs();
- int get_nosplitsugs();
- int get_sugswithdots(void);
- FLAG get_keepcase(void);
- int get_checksharps(void);
- char * encode_flag(unsigned short aflag);
- int get_fullstrip();
+ char * get_try_string() const;
+ const char * get_wordchars() const;
+ unsigned short * get_wordchars_utf16(int * len) const;
+ char * get_ignore() const;
+ unsigned short * get_ignore_utf16(int * len) const;
+ int get_compound() const;
+ FLAG get_compoundflag() const;
+ FLAG get_compoundbegin() const;
+ FLAG get_forbiddenword() const;
+ FLAG get_nosuggest() const;
+ FLAG get_needaffix() const;
+ FLAG get_onlyincompound() const;
+ FLAG get_compoundroot() const;
+ FLAG get_lemma_present() const;
+ int get_checknum() const;
+ const char * get_prefix() const;
+ const char * get_suffix() const;
+ const char * get_derived() const;
+ const char * get_version() const;
+ int have_contclass() const;
+ int get_utf8() const;
+ int get_complexprefixes() const;
+ char * get_suffixed(char ) const;
+ int get_maxngramsugs() const;
+ int get_nosplitsugs() const;
+ int get_sugswithdots(void) const;
+ FLAG get_keepcase(void) const;
+ int get_checksharps(void) const;
+ char * encode_flag(unsigned short aflag) const;
+ int get_fullstrip() const;
private:
int parse_file(const char * affpath, const char * key);
int parse_flag(char * line, unsigned short * out, FileMgr * af);
int parse_num(char * line, int * out, FileMgr * af);
int parse_cpdsyllable(char * line, FileMgr * af);
int parse_reptable(char * line, FileMgr * af);
int parse_convtable(char * line, FileMgr * af, RepList ** rl, const char * keyword);
@@ -272,22 +270,23 @@ private:
int parse_breaktable(char * line, FileMgr * af);
int parse_checkcpdtable(char * line, FileMgr * af);
int parse_defcpdtable(char * line, FileMgr * af);
int parse_affix(char * line, const char at, FileMgr * af, char * dupflags);
void reverse_condition(char *);
void debugflag(char * result, unsigned short flag);
int condlen(char *);
- int encodeit(struct affentry * ptr, char * cs);
- int build_pfxtree(AffEntry* pfxptr);
- int build_sfxtree(AffEntry* sfxptr);
+ int encodeit(affentry &entry, char * cs);
+ int build_pfxtree(PfxEntry* pfxptr);
+ int build_sfxtree(SfxEntry* sfxptr);
int process_pfx_order();
int process_sfx_order();
- AffEntry * process_pfx_in_order(AffEntry * ptr, AffEntry * nptr);
- AffEntry * process_sfx_in_order(AffEntry * ptr, AffEntry * nptr);
+ PfxEntry * process_pfx_in_order(PfxEntry * ptr, PfxEntry * nptr);
+ SfxEntry * process_sfx_in_order(SfxEntry * ptr, SfxEntry * nptr);
int process_pfx_tree_to_list();
int process_sfx_tree_to_list();
int redundant_condition(char, char * strip, int stripl,
const char * cond, int);
};
#endif
+
--- a/extensions/spellcheck/hunspell/src/atypes.hxx
+++ b/extensions/spellcheck/hunspell/src/atypes.hxx
@@ -13,16 +13,17 @@
*
* The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
* and László Németh (Hunspell). Portions created by the Initial Developers
* are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
*
* Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
* David Einstein (deinst@world.std.com)
* László Németh (nemethl@gyorsposta.hu)
+ * Caolan McNamara (caolanm@redhat.com)
* Davide Prina
* Giuseppe Modugno
* Gianluca Turconi
* Simon Brouwer
* Noll Janos
* Biro Arpad
* Goldman Eleonora
* Sarlos Tamas
@@ -53,16 +54,17 @@
* the terms of any one of the MPL, the GPL or the LGPL.
*
******* END LICENSE BLOCK *******/
#ifndef _ATYPES_HXX_
#define _ATYPES_HXX_
#ifndef HUNSPELL_WARNING
+#include <stdio.h>
#ifdef HUNSPELL_WARNING_ON
#define HUNSPELL_WARNING fprintf
#else
// empty inline function to switch off warnings (instead of the C99 standard variadic macros)
static inline void HUNSPELL_WARNING(FILE *, const char *, ...) {}
#endif
#endif
@@ -128,18 +130,17 @@ struct affentry
struct guessword {
char * word;
bool allow;
char * orig;
};
struct mapentry {
- char * set;
- w_char * set_utf16;
+ char ** set;
int len;
};
struct flagentry {
FLAG * def;
int len;
};
--- a/extensions/spellcheck/hunspell/src/baseaffix.hxx
+++ b/extensions/spellcheck/hunspell/src/baseaffix.hxx
@@ -13,16 +13,17 @@
*
* The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
* and László Németh (Hunspell). Portions created by the Initial Developers
* are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
*
* Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
* David Einstein (deinst@world.std.com)
* László Németh (nemethl@gyorsposta.hu)
+ * Caolan McNamara (caolanm@redhat.com)
* Davide Prina
* Giuseppe Modugno
* Gianluca Turconi
* Simon Brouwer
* Noll Janos
* Biro Arpad
* Goldman Eleonora
* Sarlos Tamas
@@ -52,20 +53,20 @@
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
******* END LICENSE BLOCK *******/
#ifndef _BASEAFF_HXX_
#define _BASEAFF_HXX_
-class AffEntry
+#include "hunvisapi.h"
+
+class LIBHUNSPELL_DLL_EXPORTED AffEntry
{
-public:
-
protected:
char * appnd;
char * strip;
unsigned char appndl;
unsigned char stripl;
char numconds;
char opts;
unsigned short aflag;
--- a/extensions/spellcheck/hunspell/src/csutil.cpp
+++ b/extensions/spellcheck/hunspell/src/csutil.cpp
@@ -13,16 +13,18 @@
*
* The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
* and László Németh (Hunspell). Portions created by the Initial Developers
* are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
*
* Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
* David Einstein (deinst@world.std.com)
* László Németh (nemethl@gyorsposta.hu)
+ * L. David Baron (dbaron@dbaron.org)
+ * Caolan McNamara (caolanm@redhat.com)
* Davide Prina
* Giuseppe Modugno
* Gianluca Turconi
* Simon Brouwer
* Noll Janos
* Biro Arpad
* Goldman Eleonora
* Sarlos Tamas
@@ -49,65 +51,47 @@
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
******* END LICENSE BLOCK *******/
-#ifndef MOZILLA_CLIENT
-#include <cstdlib>
-#include <cstring>
-#include <cstdio>
-#include <cctype>
-#else
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <ctype.h>
-#endif
+#include "csutil.hxx"
#include "atypes.hxx"
-#include "csutil.hxx"
#include "langnum.hxx"
#ifdef OPENOFFICEORG
# include <unicode/uchar.h>
#else
# ifndef MOZILLA_CLIENT
# include "utf_info.cxx"
# define UTF_LST_LEN (sizeof(utf_lst) / (sizeof(unicode_info)))
# endif
#endif
#ifdef MOZILLA_CLIENT
#include "nsCOMPtr.h"
#include "nsServiceManagerUtils.h"
#include "nsIUnicodeEncoder.h"
#include "nsIUnicodeDecoder.h"
-#include "nsICaseConversion.h"
#include "nsICharsetConverterManager.h"
#include "nsUnicharUtilCIID.h"
#include "nsUnicharUtils.h"
static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
static NS_DEFINE_CID(kUnicharUtilCID, NS_UNICHARUTIL_CID);
#endif
-#ifdef MOZILLA_CLIENT
-#ifdef __SUNPRO_CC // for SunONE Studio compiler
-using namespace std;
-#endif
-#else
-#ifndef W32
-using namespace std;
-#endif
-#endif
-
static struct unicode_info2 * utf_tbl = NULL;
static int utf_tbl_count = 0; // utf_tbl can be used by multiple Hunspell instances
/* only UTF-16 (BMP) implementation */
char * u16_u8(char * dest, int size, const w_char * src, int srclen) {
signed char * u8 = (signed char *)dest;
signed char * u8_max = (signed char *)(u8 + size);
const w_char * u2 = src;
@@ -220,17 +204,17 @@ int u8_u16(w_char * dest, int size, cons
u2->h = 0xff;
u2->l = 0xfd;
return -1;
}
}
u8++;
u2++;
}
- return u2 - dest;
+ return (int)(u2 - dest);
}
void flag_qsort(unsigned short flags[], int begin, int end) {
unsigned short reg;
if (end > begin) {
unsigned short pivot = flags[begin];
int l = begin + 1;
int r = end;
@@ -282,39 +266,37 @@ int flag_bsearch(unsigned short flags[],
} else {
// don't use isspace() here, the string can be in some random charset
// that's way different than the locale's
for (dp = mp; (*dp && *dp != ' ' && *dp != '\t'); dp++);
if (!*dp) dp = NULL;
}
if (dp) {
*stringp = dp+1;
- int nc = (int)((unsigned long)dp - (unsigned long)mp);
- *(mp+nc) = '\0';
- return mp;
+ *dp = '\0';
} else {
*stringp = mp + strlen(mp);
- return mp;
}
+ return mp;
}
return NULL;
}
// replaces strdup with ansi version
char * mystrdup(const char * s)
{
char * d = NULL;
if (s) {
- int sl = strlen(s);
- d = (char *) malloc(((sl+1) * sizeof(char)));
+ size_t sl = strlen(s)+1;
+ d = (char *) malloc(sl);
if (d) {
- memcpy(d,s,((sl+1)*sizeof(char)));
- return d;
+ memcpy(d,s,sl);
+ } else {
+ HUNSPELL_WARNING(stderr, "Can't allocate memory.\n");
}
- HUNSPELL_WARNING(stderr, "Can't allocate memory.\n");
}
return d;
}
// strcat for limited length destination string
char * mystrcat(char * dest, const char * st, int max) {
int len;
int len2;
@@ -324,43 +306,48 @@ int flag_bsearch(unsigned short flags[],
if (len + len2 + 1 > max) return dest;
strcpy(dest + len, st);
return dest;
}
// remove cross-platform text line end characters
void mychomp(char * s)
{
- int k = strlen(s);
+ size_t k = strlen(s);
if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0';
if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0';
}
// does an ansi strdup of the reverse of a string
char * myrevstrdup(const char * s)
{
char * d = NULL;
if (s) {
- int sl = strlen(s);
- d = (char *) malloc((sl+1) * sizeof(char));
+ size_t sl = strlen(s);
+ d = (char *) malloc(sl+1);
if (d) {
const char * p = s + sl - 1;
char * q = d;
while (p >= s) *q++ = *p--;
*q = '\0';
+ } else {
+ HUNSPELL_WARNING(stderr, "Can't allocate memory.\n");
}
}
return d;
}
// break text to lines
// return number of lines
int line_tok(const char * text, char *** lines, char breakchar) {
int linenum = 0;
+ if (!text) {
+ return linenum;
+ }
char * dup = mystrdup(text);
char * p = strchr(dup, breakchar);
while (p) {
linenum++;
*p = '\0';
p++;
p = strchr(p, breakchar);
}
@@ -577,17 +564,17 @@ int get_sfxcount(const char * morph)
}
return n;
}
int fieldlen(const char * r)
{
int n = 0;
- while (r && *r != '\t' && *r != '\0' && *r != '\n' && *r != ' ') {
+ while (r && *r != ' ' && *r != '\t' && *r != '\0' && *r != '\n') {
r++;
n++;
}
return n;
}
char * copy_field(char * dest, const char * morph, const char * var)
{
@@ -716,30 +703,30 @@ void mkallcap_utf(w_char * u, int nc, in
unsigned short idx = (u[i].h << 8) + u[i].l;
if (idx != unicodetoupper(idx, langnum)) {
u[i].h = (unsigned char) (unicodetoupper(idx, langnum) >> 8);
u[i].l = (unsigned char) (unicodetoupper(idx, langnum) & 0x00FF);
}
}
}
- // convert null terminated string to have intial capital
+ // convert null terminated string to have initial capital
void mkinitcap(char * p, const struct cs_info * csconv)
{
if (*p != '\0') *p = csconv[((unsigned char)*p)].cupper;
}
// conversion function for protected memory
void store_pointer(char * dest, char * source)
{
memcpy(dest, &source, sizeof(char *));
}
// conversion function for protected memory
- char * get_stored_pointer(char * s)
+ char * get_stored_pointer(const char * s)
{
char * p;
memcpy(&p, s, sizeof(char *));
return p;
}
#ifndef MOZILLA_CLIENT
// convert null terminated string to all caps using encoding
@@ -760,29 +747,29 @@ void mkallcap_utf(w_char * u, int nc, in
struct cs_info * csconv = get_current_cs(encoding);
while (*p != '\0') {
*d++ = csconv[((unsigned char) *p)].clower;
p++;
}
*d = '\0';
}
- // convert null terminated string to have intial capital using encoding
+ // convert null terminated string to have initial capital using encoding
void enmkinitcap(char * d, const char * p, const char * encoding)
{
struct cs_info * csconv = get_current_cs(encoding);
memcpy(d,p,(strlen(p)+1));
if (*p != '\0') *d= csconv[((unsigned char)*p)].cupper;
}
// these are simple character mappings for the
// encodings supported
// supplying isupper, tolower, and toupper
-struct cs_info iso1_tbl[] = {
+static struct cs_info iso1_tbl[] = {
{ 0x00, 0x00, 0x00 },
{ 0x00, 0x01, 0x01 },
{ 0x00, 0x02, 0x02 },
{ 0x00, 0x03, 0x03 },
{ 0x00, 0x04, 0x04 },
{ 0x00, 0x05, 0x05 },
{ 0x00, 0x06, 0x06 },
{ 0x00, 0x07, 0x07 },
@@ -1032,17 +1019,17 @@ struct cs_info iso1_tbl[] = {
{ 0x00, 0xfb, 0xdb },
{ 0x00, 0xfc, 0xdc },
{ 0x00, 0xfd, 0xdd },
{ 0x00, 0xfe, 0xde },
{ 0x00, 0xff, 0xff }
};
-struct cs_info iso2_tbl[] = {
+static struct cs_info iso2_tbl[] = {
{ 0x00, 0x00, 0x00 },
{ 0x00, 0x01, 0x01 },
{ 0x00, 0x02, 0x02 },
{ 0x00, 0x03, 0x03 },
{ 0x00, 0x04, 0x04 },
{ 0x00, 0x05, 0x05 },
{ 0x00, 0x06, 0x06 },
{ 0x00, 0x07, 0x07 },
@@ -1292,17 +1279,17 @@ struct cs_info iso2_tbl[] = {
{ 0x00, 0xfb, 0xdb },
{ 0x00, 0xfc, 0xdc },
{ 0x00, 0xfd, 0xdd },
{ 0x00, 0xfe, 0xde },
{ 0x00, 0xff, 0xff }
};
-struct cs_info iso3_tbl[] = {
+static struct cs_info iso3_tbl[] = {
{ 0x00, 0x00, 0x00 },
{ 0x00, 0x01, 0x01 },
{ 0x00, 0x02, 0x02 },
{ 0x00, 0x03, 0x03 },
{ 0x00, 0x04, 0x04 },
{ 0x00, 0x05, 0x05 },
{ 0x00, 0x06, 0x06 },
{ 0x00, 0x07, 0x07 },
@@ -1551,17 +1538,17 @@ struct cs_info iso3_tbl[] = {
{ 0x00, 0xfa, 0xda },
{ 0x00, 0xfb, 0xdb },
{ 0x00, 0xfc, 0xdc },
{ 0x00, 0xfd, 0xdd },
{ 0x00, 0xfe, 0xde },
{ 0x00, 0xff, 0xff }
};
-struct cs_info iso4_tbl[] = {
+static struct cs_info iso4_tbl[] = {
{ 0x00, 0x00, 0x00 },
{ 0x00, 0x01, 0x01 },
{ 0x00, 0x02, 0x02 },
{ 0x00, 0x03, 0x03 },
{ 0x00, 0x04, 0x04 },
{ 0x00, 0x05, 0x05 },
{ 0x00, 0x06, 0x06 },
{ 0x00, 0x07, 0x07 },
@@ -1810,17 +1797,17 @@ struct cs_info iso4_tbl[] = {
{ 0x00, 0xfa, 0xda },
{ 0x00, 0xfb, 0xdb },
{ 0x00, 0xfc, 0xdc },
{ 0x00, 0xfd, 0xdd },
{ 0x00, 0xfe, 0xde },
{ 0x00, 0xff, 0xff }
};
-struct cs_info iso5_tbl[] = {
+static struct cs_info iso5_tbl[] = {
{ 0x00, 0x00, 0x00 },
{ 0x00, 0x01, 0x01 },
{ 0x00, 0x02, 0x02 },
{ 0x00, 0x03, 0x03 },
{ 0x00, 0x04, 0x04 },
{ 0x00, 0x05, 0x05 },
{ 0x00, 0x06, 0x06 },
{ 0x00, 0x07, 0x07 },
@@ -2069,17 +2056,17 @@ struct cs_info iso5_tbl[] = {
{ 0x00, 0xfa, 0xaa },
{ 0x00, 0xfb, 0xab },
{ 0x00, 0xfc, 0xac },
{ 0x00, 0xfd, 0xfd },
{ 0x00, 0xfe, 0xae },
{ 0x00, 0xff, 0xaf }
};
-struct cs_info iso6_tbl[] = {
+static struct cs_info iso6_tbl[] = {
{ 0x00, 0x00, 0x00 },
{ 0x00, 0x01, 0x01 },
{ 0x00, 0x02, 0x02 },
{ 0x00, 0x03, 0x03 },
{ 0x00, 0x04, 0x04 },
{ 0x00, 0x05, 0x05 },
{ 0x00, 0x06, 0x06 },
{ 0x00, 0x07, 0x07 },
@@ -2328,17 +2315,17 @@ struct cs_info iso6_tbl[] = {
{ 0x00, 0xfa, 0xfa },
{ 0x00, 0xfb, 0xfb },
{ 0x00, 0xfc, 0xfc },
{ 0x00, 0xfd, 0xfd },
{ 0x00, 0xfe, 0xfe },
{ 0x00, 0xff, 0xff }
};
-struct cs_info iso7_tbl[] = {
+static struct cs_info iso7_tbl[] = {
{ 0x00, 0x00, 0x00 },
{ 0x00, 0x01, 0x01 },
{ 0x00, 0x02, 0x02 },
{ 0x00, 0x03, 0x03 },
{ 0x00, 0x04, 0x04 },
{ 0x00, 0x05, 0x05 },
{ 0x00, 0x06, 0x06 },
{ 0x00, 0x07, 0x07 },
@@ -2587,17 +2574,17 @@ struct cs_info iso7_tbl[] = {
{ 0x00, 0xfa, 0xda },
{ 0x00, 0xfb, 0xdb },
{ 0x00, 0xfc, 0xbc },
{ 0x00, 0xfd, 0xbe },
{ 0x00, 0xfe, 0xbf },
{ 0x00, 0xff, 0xff }
};
-struct cs_info iso8_tbl[] = {
+static struct cs_info iso8_tbl[] = {
{ 0x00, 0x00, 0x00 },
{ 0x00, 0x01, 0x01 },
{ 0x00, 0x02, 0x02 },
{ 0x00, 0x03, 0x03 },
{ 0x00, 0x04, 0x04 },
{ 0x00, 0x05, 0x05 },
{ 0x00, 0x06, 0x06 },
{ 0x00, 0x07, 0x07 },
@@ -2846,17 +2833,17 @@ struct cs_info iso8_tbl[] = {
{ 0x00, 0xfa, 0xfa },
{ 0x00, 0xfb, 0xfb },
{ 0x00, 0xfc, 0xfc },
{ 0x00, 0xfd, 0xfd },
{ 0x00, 0xfe, 0xfe },
{ 0x00, 0xff, 0xff }
};
-struct cs_info iso9_tbl[] = {
+static struct cs_info iso9_tbl[] = {
{ 0x00, 0x00, 0x00 },
{ 0x00, 0x01, 0x01 },
{ 0x00, 0x02, 0x02 },
{ 0x00, 0x03, 0x03 },
{ 0x00, 0x04, 0x04 },
{ 0x00, 0x05, 0x05 },
{ 0x00, 0x06, 0x06 },
{ 0x00, 0x07, 0x07 },
@@ -3105,17 +3092,17 @@ struct cs_info iso9_tbl[] = {
{ 0x00, 0xfa, 0xda },
{ 0x00, 0xfb, 0xdb },
{ 0x00, 0xfc, 0xdc },
{ 0x00, 0xfd, 0x49 },
{ 0x00, 0xfe, 0xde },
{ 0x00, 0xff, 0xff }
};
-struct cs_info iso10_tbl[] = {
+static struct cs_info iso10_tbl[] = {
{ 0x00, 0x00, 0x00 },
{ 0x00, 0x01, 0x01 },
{ 0x00, 0x02, 0x02 },
{ 0x00, 0x03, 0x03 },
{ 0x00, 0x04, 0x04 },
{ 0x00, 0x05, 0x05 },
{ 0x00, 0x06, 0x06 },
{ 0x00, 0x07, 0x07 },
@@ -3364,17 +3351,17 @@ struct cs_info iso10_tbl[] = {
{ 0x00, 0xfa, 0xfa },
{ 0x00, 0xfb, 0xfb },
{ 0x00, 0xfc, 0xfc },
{ 0x00, 0xfd, 0xfd },
{ 0x00, 0xfe, 0xfe },
{ 0x00, 0xff, 0xff }
};
-struct cs_info koi8r_tbl[] = {
+static struct cs_info koi8r_tbl[] = {
{ 0x00, 0x00, 0x00 },
{ 0x00, 0x01, 0x01 },
{ 0x00, 0x02, 0x02 },
{ 0x00, 0x03, 0x03 },
{ 0x00, 0x04, 0x04 },
{ 0x00, 0x05, 0x05 },
{ 0x00, 0x06, 0x06 },
{ 0x00, 0x07, 0x07 },
@@ -3623,17 +3610,17 @@ struct cs_info koi8r_tbl[] = {
{ 0x01, 0xda, 0xfa },
{ 0x01, 0xdb, 0xfb },
{ 0x01, 0xdc, 0xfc },
{ 0x01, 0xdd, 0xfd },
{ 0x01, 0xde, 0xfe },
{ 0x01, 0xdf, 0xff }
};
-struct cs_info koi8u_tbl[] = {
+static struct cs_info koi8u_tbl[] = {
{ 0x00, 0x00, 0x00 },
{ 0x00, 0x01, 0x01 },
{ 0x00, 0x02, 0x02 },
{ 0x00, 0x03, 0x03 },
{ 0x00, 0x04, 0x04 },
{ 0x00, 0x05, 0x05 },
{ 0x00, 0x06, 0x06 },
{ 0x00, 0x07, 0x07 },
@@ -3882,17 +3869,17 @@ struct cs_info koi8u_tbl[] = {
{ 0x01, 0xda, 0xfa },
{ 0x01, 0xdb, 0xfb },
{ 0x01, 0xdc, 0xfc },
{ 0x01, 0xdd, 0xfd },
{ 0x01, 0xde, 0xfe },
{ 0x01, 0xdf, 0xff }
};
-struct cs_info cp1251_tbl[] = {
+static struct cs_info cp1251_tbl[] = {
{ 0x00, 0x00, 0x00 },
{ 0x00, 0x01, 0x01 },
{ 0x00, 0x02, 0x02 },
{ 0x00, 0x03, 0x03 },
{ 0x00, 0x04, 0x04 },
{ 0x00, 0x05, 0x05 },
{ 0x00, 0x06, 0x06 },
{ 0x00, 0x07, 0x07 },
@@ -4141,17 +4128,17 @@ struct cs_info cp1251_tbl[] = {
{ 0x00, 0xfa, 0xda },
{ 0x00, 0xfb, 0xdb },
{ 0x00, 0xfc, 0xdc },
{ 0x00, 0xfd, 0xdd },
{ 0x00, 0xfe, 0xde },
{ 0x00, 0xff, 0xdf }
};
-struct cs_info iso13_tbl[] = {
+static struct cs_info iso13_tbl[] = {
{ 0x00, 0x00, 0x00 },
{ 0x00, 0x01, 0x01 },
{ 0x00, 0x02, 0x02 },
{ 0x00, 0x03, 0x03 },
{ 0x00, 0x04, 0x04 },
{ 0x00, 0x05, 0x05 },
{ 0x00, 0x06, 0x06 },
{ 0x00, 0x07, 0x07 },
@@ -4401,17 +4388,17 @@ struct cs_info iso13_tbl[] = {
{ 0x00, 0xFB, 0xDB },
{ 0x00, 0xFC, 0xDC },
{ 0x00, 0xFD, 0xDD },
{ 0x00, 0xFE, 0xDE },
{ 0x00, 0xFF, 0xFF }
};
-struct cs_info iso14_tbl[] = {
+static struct cs_info iso14_tbl[] = {
{ 0x00, 0x00, 0x00 },
{ 0x00, 0x01, 0x01 },
{ 0x00, 0x02, 0x02 },
{ 0x00, 0x03, 0x03 },
{ 0x00, 0x04, 0x04 },
{ 0x00, 0x05, 0x05 },
{ 0x00, 0x06, 0x06 },
{ 0x00, 0x07, 0x07 },
@@ -4660,17 +4647,17 @@ struct cs_info iso14_tbl[] = {
{ 0x00, 0xfa, 0xda },
{ 0x00, 0xfb, 0xdb },
{ 0x00, 0xfc, 0xdc },
{ 0x00, 0xfd, 0xdd },
{ 0x00, 0xfe, 0xde },
{ 0x00, 0xff, 0xff }
};
-struct cs_info iso15_tbl[] = {
+static struct cs_info iso15_tbl[] = {
{ 0x00, 0x00, 0x00 },
{ 0x00, 0x01, 0x01 },
{ 0x00, 0x02, 0x02 },
{ 0x00, 0x03, 0x03 },
{ 0x00, 0x04, 0x04 },
{ 0x00, 0x05, 0x05 },
{ 0x00, 0x06, 0x06 },
{ 0x00, 0x07, 0x07 },
@@ -4919,17 +4906,17 @@ struct cs_info iso15_tbl[] = {
{ 0x00, 0xfa, 0xda },
{ 0x00, 0xfb, 0xdb },
{ 0x00, 0xfc, 0xdc },
{ 0x00, 0xfd, 0xdd },
{ 0x00, 0xfe, 0xde },
{ 0x00, 0xff, 0xbe }
};
-struct cs_info iscii_devanagari_tbl[] = {
+static struct cs_info iscii_devanagari_tbl[] = {
{ 0x00, 0x00, 0x00 },
{ 0x00, 0x01, 0x01 },
{ 0x00, 0x02, 0x02 },
{ 0x00, 0x03, 0x03 },
{ 0x00, 0x04, 0x04 },
{ 0x00, 0x05, 0x05 },
{ 0x00, 0x06, 0x06 },
{ 0x00, 0x07, 0x07 },
@@ -5218,37 +5205,32 @@ struct cs_info * get_current_cs(const ch
// XXX This function was rewritten for mozilla. Instead of storing the
// conversion tables static in this file, create them when needed
// with help the mozilla backend.
struct cs_info * get_current_cs(const char * es) {
struct cs_info *ccs;
nsCOMPtr<nsIUnicodeEncoder> encoder;
nsCOMPtr<nsIUnicodeDecoder> decoder;
- nsCOMPtr<nsICaseConversion> caseConv;
nsresult rv;
nsCOMPtr<nsICharsetConverterManager> ccm = do_GetService(kCharsetConverterManagerCID, &rv);
if (NS_FAILED(rv))
return nsnull;
rv = ccm->GetUnicodeEncoder(es, getter_AddRefs(encoder));
if (NS_FAILED(rv))
return nsnull;
encoder->SetOutputErrorBehavior(encoder->kOnError_Signal, nsnull, '?');
rv = ccm->GetUnicodeDecoder(es, getter_AddRefs(decoder));
if (NS_FAILED(rv))
return nsnull;
decoder->SetInputErrorBehavior(decoder->kOnError_Signal);
- caseConv = do_GetService(kUnicharUtilCID, &rv);
- if (NS_FAILED(rv))
- return nsnull;
-
- ccs = (struct cs_info *) malloc(256 * sizeof(cs_info));
+ ccs = new cs_info[256];
for (unsigned int i = 0; i <= 0xff; ++i) {
PRBool success = PR_FALSE;
// We want to find the upper/lowercase equivalents of each byte
// in this 1-byte character encoding. Call our encoding/decoding
// APIs separately for each byte since they may reject some of the
// bytes, and we want to handle errors separately for each byte.
char lower, upper;
@@ -5259,28 +5241,24 @@ struct cs_info * get_current_cs(const ch
PRUnichar uni, uniCased;
PRInt32 charLength = 1, uniLength = 1;
rv = decoder->Convert(&source, &charLength, &uni, &uniLength);
// Explicitly check NS_OK because we don't want to allow
// NS_OK_UDEC_MOREOUTPUT or NS_OK_UDEC_MOREINPUT.
if (rv != NS_OK || charLength != 1 || uniLength != 1)
break;
- rv = caseConv->ToLower(uni, &uniCased);
- if (NS_FAILED(rv))
- break;
+ uniCased = ToLowerCase(uni);
rv = encoder->Convert(&uniCased, &uniLength, &lower, &charLength);
// Explicitly check NS_OK because we don't want to allow
// NS_OK_UDEC_MOREOUTPUT or NS_OK_UDEC_MOREINPUT.
if (rv != NS_OK || charLength != 1 || uniLength != 1)
break;
- rv = caseConv->ToUpper(uni, &uniCased);
- if (NS_FAILED(rv))
- break;
+ uniCased = ToUpperCase(uni);
rv = encoder->Convert(&uniCased, &uniLength, &upper, &charLength);
// Explicitly check NS_OK because we don't want to allow
// NS_OK_UDEC_MOREOUTPUT or NS_OK_UDEC_MOREINPUT.
if (rv != NS_OK || charLength != 1 || uniLength != 1)
break;
success = PR_TRUE;
} while (0);
@@ -5311,24 +5289,24 @@ char * get_casechars(const char * enc) {
for (int i = 0; i <= 255; i++) {
if ((csconv[i].cupper != csconv[i].clower)) {
*p = (char) i;
p++;
}
}
*p = '\0';
#ifdef MOZILLA_CLIENT
- delete csconv;
+ delete [] csconv;
#endif
return mystrdup(expw);
}
-struct lang_map lang2enc[] = {
+static struct lang_map lang2enc[] = {
{"ar", "UTF-8", LANG_ar},
{"az", "UTF-8", LANG_az},
{"bg", "microsoft-cp1251", LANG_bg},
{"ca", "ISO8859-1", LANG_ca},
{"cs", "ISO8859-2", LANG_cs},
{"da", "ISO8859-1", LANG_da},
{"de", "ISO8859-1", LANG_de},
{"el", "ISO8859-7", LANG_el},
@@ -5374,17 +5352,17 @@ int get_lang_num(const char * lang) {
#ifndef OPENOFFICEORG
#ifndef MOZILLA_CLIENT
int initialize_utf_tbl() {
utf_tbl_count++;
if (utf_tbl) return 0;
utf_tbl = (unicode_info2 *) malloc(CONTSIZE * sizeof(unicode_info2));
if (utf_tbl) {
- int j;
+ size_t j;
for (j = 0; j < CONTSIZE; j++) {
utf_tbl[j].cletter = 0;
utf_tbl[j].clower = (unsigned short) j;
utf_tbl[j].cupper = (unsigned short) j;
}
for (j = 0; j < UTF_LST_LEN; j++) {
utf_tbl[utf_lst[j].c].cletter = 1;
utf_tbl[utf_lst[j].c].clower = utf_lst[j].clower;
@@ -5399,39 +5377,28 @@ int initialize_utf_tbl() {
void free_utf_tbl() {
if (utf_tbl_count > 0) utf_tbl_count--;
if (utf_tbl && (utf_tbl_count == 0)) {
free(utf_tbl);
utf_tbl = NULL;
}
}
-#ifdef MOZILLA_CLIENT
-static nsCOMPtr<nsICaseConversion>& getcaseConv()
-{
- nsresult rv;
- static nsCOMPtr<nsICaseConversion> caseConv = do_GetService(kUnicharUtilCID, &rv);
- return caseConv;
-}
-#endif
-
unsigned short unicodetoupper(unsigned short c, int langnum)
{
// In Azeri and Turkish, I and i dictinct letters:
// There are a dotless lower case i pair of upper `I',
// and an upper I with dot pair of lower `i'.
if (c == 0x0069 && ((langnum == LANG_az) || (langnum == LANG_tr)))
return 0x0130;
#ifdef OPENOFFICEORG
return u_toupper(c);
#else
#ifdef MOZILLA_CLIENT
- PRUnichar ch2;
- getcaseConv()->ToUpper((PRUnichar) c, &ch2);
- return ch2;
+ return ToUpperCase((PRUnichar) c);
#else
return (utf_tbl) ? utf_tbl[c].cupper : c;
#endif
#endif
}
unsigned short unicodetolower(unsigned short c, int langnum)
{
@@ -5439,19 +5406,17 @@ unsigned short unicodetolower(unsigned s
// There are a dotless lower case i pair of upper `I',
// and an upper I with dot pair of lower `i'.
if (c == 0x0049 && ((langnum == LANG_az) || (langnum == LANG_tr)))
return 0x0131;
#ifdef OPENOFFICEORG
return u_tolower(c);
#else
#ifdef MOZILLA_CLIENT
- PRUnichar ch2;
- getcaseConv()->ToLower((PRUnichar) c, &ch2);
- return ch2;
+ return ToLowerCase((PRUnichar) c);
#else
return (utf_tbl) ? utf_tbl[c].clower : c;
#endif
#endif
}
int unicodeisalpha(unsigned short c)
{
--- a/extensions/spellcheck/hunspell/src/csutil.hxx
+++ b/extensions/spellcheck/hunspell/src/csutil.hxx
@@ -13,16 +13,17 @@
*
* The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
* and László Németh (Hunspell). Portions created by the Initial Developers
* are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
*
* Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
* David Einstein (deinst@world.std.com)
* László Németh (nemethl@gyorsposta.hu)
+ * Caolan McNamara (caolanm@redhat.com)
* Davide Prina
* Giuseppe Modugno
* Gianluca Turconi
* Simon Brouwer
* Noll Janos
* Biro Arpad
* Goldman Eleonora
* Sarlos Tamas
@@ -52,19 +53,27 @@
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
******* END LICENSE BLOCK *******/
#ifndef __CSUTILHXX__
#define __CSUTILHXX__
+#include "hunvisapi.h"
+
// First some base level utility routines
+#include <string.h>
#include "w_char.hxx"
+#include "htypes.hxx"
+
+#ifdef MOZILLA_CLIENT
+#include "nscore.h" // for mozalloc headers
+#endif
// casing
#define NOCAP 0
#define INITCAP 1
#define ALLCAP 2
#define HUHCAP 3
#define HUHINITCAP 4
@@ -95,82 +104,72 @@
#define MSEP_REC '\n'
#define MSEP_ALT '\v'
// default flags
#define DEFAULTFLAGS 65510
#define FORBIDDENWORD 65510
#define ONLYUPCASEFLAG 65511
-// hash entry macros
-#define HENTRY_DATA(h) (h->var ? ((h->var & H_OPT_ALIASM) ? \
- get_stored_pointer(&(h->word) + h->blen + 1) : &(h->word) + h->blen + 1) : NULL)
-// NULL-free version for warning-free OOo build
-#define HENTRY_DATA2(h) (h->var ? ((h->var & H_OPT_ALIASM) ? \
- get_stored_pointer(&(h->word) + h->blen + 1) : &(h->word) + h->blen + 1) : "")
-#define HENTRY_FIND(h,p) (HENTRY_DATA(h) ? strstr(HENTRY_DATA(h), p) : NULL)
-
-#define w_char_eq(a,b) (((a).l == (b).l) && ((a).h == (b).h))
-
// convert UTF-16 characters to UTF-8
-char * u16_u8(char * dest, int size, const w_char * src, int srclen);
+LIBHUNSPELL_DLL_EXPORTED char * u16_u8(char * dest, int size, const w_char * src, int srclen);
// convert UTF-8 characters to UTF-16
-int u8_u16(w_char * dest, int size, const char * src);
+LIBHUNSPELL_DLL_EXPORTED int u8_u16(w_char * dest, int size, const char * src);
// sort 2-byte vector
-void flag_qsort(unsigned short flags[], int begin, int end);
+LIBHUNSPELL_DLL_EXPORTED void flag_qsort(unsigned short flags[], int begin, int end);
// binary search in 2-byte vector
-int flag_bsearch(unsigned short flags[], unsigned short flag, int right);
+LIBHUNSPELL_DLL_EXPORTED int flag_bsearch(unsigned short flags[], unsigned short flag, int right);
// remove end of line char(s)
-void mychomp(char * s);
+LIBHUNSPELL_DLL_EXPORTED void mychomp(char * s);
// duplicate string
-char * mystrdup(const char * s);
+LIBHUNSPELL_DLL_EXPORTED char * mystrdup(const char * s);
// strcat for limited length destination string
-char * mystrcat(char * dest, const char * st, int max);
+LIBHUNSPELL_DLL_EXPORTED char * mystrcat(char * dest, const char * st, int max);
// duplicate reverse of string
-char * myrevstrdup(const char * s);
+LIBHUNSPELL_DLL_EXPORTED char * myrevstrdup(const char * s);
// parse into tokens with char delimiter
-char * mystrsep(char ** sptr, const char delim);
+LIBHUNSPELL_DLL_EXPORTED char * mystrsep(char ** sptr, const char delim);
// parse into tokens with char delimiter
-char * mystrsep2(char ** sptr, const char delim);
+LIBHUNSPELL_DLL_EXPORTED char * mystrsep2(char ** sptr, const char delim);
// parse into tokens with char delimiter
-char * mystrrep(char *, const char *, const char *);
+LIBHUNSPELL_DLL_EXPORTED char * mystrrep(char *, const char *, const char *);
// append s to ends of every lines in text
-void strlinecat(char * lines, const char * s);
+LIBHUNSPELL_DLL_EXPORTED void strlinecat(char * lines, const char * s);
// tokenize into lines with new line
- int line_tok(const char * text, char *** lines, char breakchar);
+LIBHUNSPELL_DLL_EXPORTED int line_tok(const char * text, char *** lines, char breakchar);
// tokenize into lines with new line and uniq in place
- char * line_uniq(char * text, char breakchar);
- char * line_uniq_app(char ** text, char breakchar);
+LIBHUNSPELL_DLL_EXPORTED char * line_uniq(char * text, char breakchar);
+LIBHUNSPELL_DLL_EXPORTED char * line_uniq_app(char ** text, char breakchar);
// change oldchar to newchar in place
- char * tr(char * text, char oldc, char newc);
+LIBHUNSPELL_DLL_EXPORTED char * tr(char * text, char oldc, char newc);
// reverse word
- int reverseword(char *);
+LIBHUNSPELL_DLL_EXPORTED int reverseword(char *);
// reverse word
- int reverseword_utf(char *);
+LIBHUNSPELL_DLL_EXPORTED int reverseword_utf(char *);
// remove duplicates
- int uniqlist(char ** list, int n);
+LIBHUNSPELL_DLL_EXPORTED int uniqlist(char ** list, int n);
// free character array list
- void freelist(char *** list, int n);
+LIBHUNSPELL_DLL_EXPORTED void freelist(char *** list, int n);
// character encoding information
struct cs_info {
unsigned char ccase;
unsigned char clower;
unsigned char cupper;
};
@@ -182,92 +181,125 @@ struct unicode_info {
};
struct unicode_info2 {
char cletter;
unsigned short cupper;
unsigned short clower;
};
-int initialize_utf_tbl();
-void free_utf_tbl();
-unsigned short unicodetoupper(unsigned short c, int langnum);
-unsigned short unicodetolower(unsigned short c, int langnum);
-int unicodeisalpha(unsigned short c);
+LIBHUNSPELL_DLL_EXPORTED int initialize_utf_tbl();
+LIBHUNSPELL_DLL_EXPORTED void free_utf_tbl();
+LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetoupper(unsigned short c, int langnum);
+LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetolower(unsigned short c, int langnum);
+LIBHUNSPELL_DLL_EXPORTED int unicodeisalpha(unsigned short c);
struct enc_entry {
const char * enc_name;
struct cs_info * cs_table;
};
// language to encoding default map
struct lang_map {
const char * lang;
const char * def_enc;
int num;
};
-struct cs_info * get_current_cs(const char * es);
+LIBHUNSPELL_DLL_EXPORTED struct cs_info * get_current_cs(const char * es);
-const char * get_default_enc(const char * lang);
+LIBHUNSPELL_DLL_EXPORTED const char * get_default_enc(const char * lang);
// get language identifiers of language codes
-int get_lang_num(const char * lang);
+LIBHUNSPELL_DLL_EXPORTED int get_lang_num(const char * lang);
// get characters of the given 8bit encoding with lower- and uppercase forms
-char * get_casechars(const char * enc);
+LIBHUNSPELL_DLL_EXPORTED char * get_casechars(const char * enc);
// convert null terminated string to all caps using encoding
-void enmkallcap(char * d, const char * p, const char * encoding);
+LIBHUNSPELL_DLL_EXPORTED void enmkallcap(char * d, const char * p, const char * encoding);
// convert null terminated string to all little using encoding
-void enmkallsmall(char * d, const char * p, const char * encoding);
+LIBHUNSPELL_DLL_EXPORTED void enmkallsmall(char * d, const char * p, const char * encoding);
-// convert null terminated string to have intial capital using encoding
-void enmkinitcap(char * d, const char * p, const char * encoding);
+// convert null terminated string to have initial capital using encoding
+LIBHUNSPELL_DLL_EXPORTED void enmkinitcap(char * d, const char * p, const char * encoding);
// convert null terminated string to all caps
-void mkallcap(char * p, const struct cs_info * csconv);
+LIBHUNSPELL_DLL_EXPORTED void mkallcap(char * p, const struct cs_info * csconv);
// convert null terminated string to all little
-void mkallsmall(char * p, const struct cs_info * csconv);
+LIBHUNSPELL_DLL_EXPORTED void mkallsmall(char * p, const struct cs_info * csconv);
-// convert null terminated string to have intial capital
-void mkinitcap(char * p, const struct cs_info * csconv);
+// convert null terminated string to have initial capital
+LIBHUNSPELL_DLL_EXPORTED void mkinitcap(char * p, const struct cs_info * csconv);
// convert first nc characters of UTF-8 string to little
-void mkallsmall_utf(w_char * u, int nc, int langnum);
+LIBHUNSPELL_DLL_EXPORTED void mkallsmall_utf(w_char * u, int nc, int langnum);
// convert first nc characters of UTF-8 string to capital
-void mkallcap_utf(w_char * u, int nc, int langnum);
+LIBHUNSPELL_DLL_EXPORTED void mkallcap_utf(w_char * u, int nc, int langnum);
// get type of capitalization
-int get_captype(char * q, int nl, cs_info *);
+LIBHUNSPELL_DLL_EXPORTED int get_captype(char * q, int nl, cs_info *);
// get type of capitalization (UTF-8)
-int get_captype_utf8(w_char * q, int nl, int langnum);
+LIBHUNSPELL_DLL_EXPORTED int get_captype_utf8(w_char * q, int nl, int langnum);
// strip all ignored characters in the string
-void remove_ignored_chars_utf(char * word, unsigned short ignored_chars[], int ignored_len);
+LIBHUNSPELL_DLL_EXPORTED void remove_ignored_chars_utf(char * word, unsigned short ignored_chars[], int ignored_len);
// strip all ignored characters in the string
-void remove_ignored_chars(char * word, char * ignored_chars);
+LIBHUNSPELL_DLL_EXPORTED void remove_ignored_chars(char * word, char * ignored_chars);
-int parse_string(char * line, char ** out, int ln);
+LIBHUNSPELL_DLL_EXPORTED int parse_string(char * line, char ** out, int ln);
-int parse_array(char * line, char ** out, unsigned short ** out_utf16,
+LIBHUNSPELL_DLL_EXPORTED int parse_array(char * line, char ** out, unsigned short ** out_utf16,
int * out_utf16_len, int utf8, int ln);
-int fieldlen(const char * r);
-char * copy_field(char * dest, const char * morph, const char * var);
+LIBHUNSPELL_DLL_EXPORTED int fieldlen(const char * r);
+LIBHUNSPELL_DLL_EXPORTED char * copy_field(char * dest, const char * morph, const char * var);
+
+LIBHUNSPELL_DLL_EXPORTED int morphcmp(const char * s, const char * t);
-int morphcmp(const char * s, const char * t);
+LIBHUNSPELL_DLL_EXPORTED int get_sfxcount(const char * morph);
-int get_sfxcount(const char * morph);
+// conversion function for protected memory
+LIBHUNSPELL_DLL_EXPORTED void store_pointer(char * dest, char * source);
// conversion function for protected memory
-void store_pointer(char * dest, char * source);
+LIBHUNSPELL_DLL_EXPORTED char * get_stored_pointer(const char * s);
+
+// hash entry macros
+LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_DATA(struct hentry *h)
+{
+ char *ret;
+ if (!h->var)
+ ret = NULL;
+ else if (h->var & H_OPT_ALIASM)
+ ret = get_stored_pointer(&(h->word[0]) + h->blen + 1);
+ else
+ ret = &(h->word[0]) + h->blen + 1;
+ return ret;
+}
-// conversion function for protected memory
-char * get_stored_pointer(char * s);
+// NULL-free version for warning-free OOo build
+LIBHUNSPELL_DLL_EXPORTED inline const char* HENTRY_DATA2(const struct hentry *h)
+{
+ const char *ret;
+ if (!h->var)
+ ret = "";
+ else if (h->var & H_OPT_ALIASM)
+ ret = get_stored_pointer(&(h->word[0]) + h->blen + 1);
+ else
+ ret = &(h->word[0]) + h->blen + 1;
+ return ret;
+}
+
+LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_FIND(struct hentry *h, const char *p)
+{
+ return (HENTRY_DATA(h) ? strstr(HENTRY_DATA(h), p) : NULL);
+}
+
+#define w_char_eq(a,b) (((a).l == (b).l) && ((a).h == (b).h))
#endif
--- a/extensions/spellcheck/hunspell/src/dictmgr.cpp
+++ b/extensions/spellcheck/hunspell/src/dictmgr.cpp
@@ -11,63 +11,48 @@
* for the specific language governing rights and limitations under the
* License.
*
* The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
* and László Németh (Hunspell). Portions created by the Initial Developers
* are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
*
* Contributor(s): László Németh (nemethl@gyorsposta.hu)
+ * Caolan McNamara (caolanm@redhat.com)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
******* END LICENSE BLOCK *******/
-#ifndef MOZILLA_CLIENT
-#include <cstdlib>
-#include <cstring>
-#include <cstdio>
-#include <cctype>
-#else
-#include <stdlib.h>
+#include <stdlib.h>
#include <string.h>
-#include <stdio.h>
#include <ctype.h>
-#endif
+#include <stdio.h>
#include "dictmgr.hxx"
-#ifndef MOZILLA_CLIENT
-#ifndef W32
-using namespace std;
-#endif
-#endif
-
-DictMgr::DictMgr(const char * dictpath, const char * etype)
+DictMgr::DictMgr(const char * dictpath, const char * etype) : numdict(0)
{
// load list of etype entries
- numdict = 0;
pdentry = (dictentry *)malloc(MAXDICTIONARIES*sizeof(struct dictentry));
if (pdentry) {
if (parse_file(dictpath, etype)) {
numdict = 0;
// no dictionary.lst found is okay
}
- } else {
- numdict = 0;
}
}
DictMgr::~DictMgr()
{
dictentry * pdict = NULL;
if (pdentry) {
@@ -139,16 +124,26 @@ int DictMgr::parse_file(const char * di
i++;
}
free(piece);
}
if (i == 4) {
numdict++;
pdict++;
} else {
+ switch (i) {
+ case 3:
+ free(pdict->region);
+ pdict->region=NULL;
+ case 2: //deliberate fallthrough
+ free(pdict->lang);
+ pdict->lang=NULL;
+ default:
+ break;
+ }
fprintf(stderr,"dictionary list corruption in line \"%s\"\n",line);
fflush(stderr);
}
}
}
}
fclose(dictlst);
return 0;
@@ -166,54 +161,53 @@ int DictMgr::get_list(dictentry ** ppent
// strip strings into token based on single char delimiter
// acts like strsep() but only uses a delim char and not
// a delim string
char * DictMgr::mystrsep(char ** stringp, const char delim)
{
char * rv = NULL;
char * mp = *stringp;
- int n = strlen(mp);
+ size_t n = strlen(mp);
if (n > 0) {
char * dp = (char *)memchr(mp,(int)((unsigned char)delim),n);
if (dp) {
*stringp = dp+1;
- int nc = (int)((unsigned long)dp - (unsigned long)mp);
+ size_t nc = dp - mp;
rv = (char *) malloc(nc+1);
if (rv) {
memcpy(rv,mp,nc);
*(rv+nc) = '\0';
- return rv;
}
} else {
rv = (char *) malloc(n+1);
if (rv) {
memcpy(rv, mp, n);
*(rv+n) = '\0';
*stringp = mp + n;
- return rv;
}
}
}
- return NULL;
+ return rv;
}
// replaces strdup with ansi version
char * DictMgr::mystrdup(const char * s)
{
char * d = NULL;
if (s) {
- int sl = strlen(s);
- d = (char *) malloc(((sl+1) * sizeof(char)));
- if (d) memcpy(d,s,((sl+1)*sizeof(char)));
+ int sl = strlen(s)+1;
+ d = (char *) malloc(sl);
+ if (d) memcpy(d,s,sl);
}
return d;
}
// remove cross-platform text line end characters
void DictMgr:: mychomp(char * s)
{
int k = strlen(s);
if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0';
if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0';
}
+
--- a/extensions/spellcheck/hunspell/src/dictmgr.hxx
+++ b/extensions/spellcheck/hunspell/src/dictmgr.hxx
@@ -11,16 +11,17 @@
* for the specific language governing rights and limitations under the
* License.
*
* The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
* and László Németh (Hunspell). Portions created by the Initial Developers
* are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
*
* Contributor(s): László Németh (nemethl@gyorsposta.hu)
+ * Caolan McNamara (caolanm@redhat.com)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
@@ -29,27 +30,29 @@
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
******* END LICENSE BLOCK *******/
#ifndef _DICTMGR_HXX_
#define _DICTMGR_HXX_
+#include "hunvisapi.h"
+
#define MAXDICTIONARIES 100
#define MAXDICTENTRYLEN 1024
struct dictentry {
char * filename;
char * lang;
char * region;
};
-class DictMgr
+class LIBHUNSPELL_DLL_EXPORTED DictMgr
{
int numdict;
dictentry * pdentry;
public:
DictMgr(const char * dictpath, const char * etype);
--- a/extensions/spellcheck/hunspell/src/filemgr.cpp
+++ b/extensions/spellcheck/hunspell/src/filemgr.cpp
@@ -11,59 +11,55 @@
* for the specific language governing rights and limitations under the
* License.
*
* The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
* and László Németh (Hunspell). Portions created by the Initial Developers
* are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
*
* Contributor(s): László Németh (nemethl@gyorsposta.hu)
+ * Caolan McNamara (caolanm@redhat.com)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
******* END LICENSE BLOCK *******/
-#ifndef MOZILLA_CLIENT
-#include <cstdlib>
-#include <cstring>
-#include <cstdio>
-#else
-#include <stdlib.h>
+#include <stdlib.h>
#include <string.h>
-#include <stdio.h>
-#endif
+#include <stdio.h>
#include "filemgr.hxx"
int FileMgr::fail(const char * err, const char * par) {
fprintf(stderr, err, par);
return -1;
}
FileMgr::FileMgr(const char * file, const char * key) {
linenum = 0;
hin = NULL;
fin = fopen(file, "r");
if (!fin) {
// check hzipped file
- char * st = (char *) malloc(strlen(file) + strlen(HZIP_EXTENSION));
+ char * st = (char *) malloc(strlen(file) + strlen(HZIP_EXTENSION) + 1);
if (st) {
strcpy(st, file);
strcat(st, HZIP_EXTENSION);
hin = new Hunzip(st, key);
+ free(st);
}
}
if (!fin && !hin) fail(MSG_OPEN, file);
}
FileMgr::~FileMgr()
{
if (fin) fclose(fin);
--- a/extensions/spellcheck/hunspell/src/filemgr.hxx
+++ b/extensions/spellcheck/hunspell/src/filemgr.hxx
@@ -11,16 +11,17 @@
* for the specific language governing rights and limitations under the
* License.
*
* The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
* and László Németh (Hunspell). Portions created by the Initial Developers
* are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
*
* Contributor(s): László Németh (nemethl@gyorsposta.hu)
+ * Caolan McNamara (caolanm@redhat.com)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
@@ -29,19 +30,22 @@
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
******* END LICENSE BLOCK *******/
#ifndef _FILEMGR_HXX_
#define _FILEMGR_HXX_
-#include "hunzip.hxx"
+#include "hunvisapi.h"
-class FileMgr
+#include "hunzip.hxx"
+#include <stdio.h>
+
+class LIBHUNSPELL_DLL_EXPORTED FileMgr
{
protected:
FILE * fin;
Hunzip * hin;
char in[BUFSIZE + 50]; // input buffer
int fail(const char * err, const char * par);
int linenum;
--- a/extensions/spellcheck/hunspell/src/hashmgr.cpp
+++ b/extensions/spellcheck/hunspell/src/hashmgr.cpp
@@ -13,16 +13,17 @@
*
* The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
* and László Németh (Hunspell). Portions created by the Initial Developers
* are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
*
* Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
* David Einstein (deinst@world.std.com)
* László Németh (nemethl@gyorsposta.hu)
+ * Caolan McNamara (caolanm@redhat.com)
* Davide Prina
* Giuseppe Modugno
* Gianluca Turconi
* Simon Brouwer
* Noll Janos
* Biro Arpad
* Goldman Eleonora
* Sarlos Tamas
@@ -49,41 +50,24 @@
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
******* END LICENSE BLOCK *******/
-#ifndef MOZILLA_CLIENT
-#include <cstdlib>
-#include <cstring>
-#include <cstdio>
-#include <cctype>
-#else
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <ctype.h>
-#endif
-#include "atypes.hxx"
-#include "csutil.hxx"
#include "hashmgr.hxx"
-
-#ifdef MOZILLA_CLIENT
-#ifdef __SUNPRO_CC // for SunONE Studio compiler
-using namespace std;
-#endif
-#else
-#ifndef W32
-using namespace std;
-#endif
-#endif
+#include "csutil.hxx"
+#include "atypes.hxx"
// build a hash table from a munched word list
HashMgr::HashMgr(const char * tpath, const char * apath, const char * key)
{
tablesize = 0;
tableptr = NULL;
flag_mode = FLAG_CHAR;
@@ -155,44 +139,48 @@ HashMgr::~HashMgr()
#endif
#endif
if (enc) free(enc);
if (lang) free(lang);
if (ignorechars) free(ignorechars);
if (ignorechars_utf16) free(ignorechars_utf16);
+
+#ifdef MOZILLA_CLIENT
+ delete [] csconv;
+#endif
}
// lookup a root word in the hashtable
struct hentry * HashMgr::lookup(const char *word) const
{
struct hentry * dp;
if (tableptr) {
dp = tableptr[hash(word)];
if (!dp) return NULL;
for ( ; dp != NULL; dp = dp->next) {
- if (strcmp(word,&(dp->word)) == 0) return dp;
+ if (strcmp(word, dp->word) == 0) return dp;
}
}
return NULL;
}
// add a word to the hash table (private)
int HashMgr::add_word(const char * word, int wbl, int wcl, unsigned short * aff,
int al, const char * desc, bool onlyupcase)
{
bool upcasehomonym = false;
int descl = desc ? (aliasm ? sizeof(short) : strlen(desc) + 1) : 0;
// variable-length hash record with word and optional fields
struct hentry* hp =
(struct hentry *) malloc (sizeof(struct hentry) + wbl + descl);
if (!hp) return 1;
- char * hpw = &(hp->word);
+ char * hpw = hp->word;
strcpy(hpw, word);
if (ignorechars != NULL) {
if (utf8) {
remove_ignored_chars_utf(hpw, ignorechars_utf16, ignorechars_utf16_len);
} else {
remove_ignored_chars(hpw, ignorechars);
}
}
@@ -226,17 +214,17 @@ int HashMgr::add_word(const char * word,
} else hp->var = 0;
struct hentry * dp = tableptr[i];
if (!dp) {
tableptr[i] = hp;
return 0;
}
while (dp->next != NULL) {
- if ((!dp->next_homonym) && (strcmp(&(hp->word), &(dp->word)) == 0)) {
+ if ((!dp->next_homonym) && (strcmp(hp->word, dp->word) == 0)) {
// remove hidden onlyupcase homonym
if (!onlyupcase) {
if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) {
free(dp->astr);
dp->astr = hp->astr;
dp->alen = hp->alen;
free(hp);
return 0;
@@ -244,17 +232,17 @@ int HashMgr::add_word(const char * word,
dp->next_homonym = hp;
}
} else {
upcasehomonym = true;
}
}
dp=dp->next;
}
- if (strcmp(&(hp->word), &(dp->word)) == 0) {
+ if (strcmp(hp->word, dp->word) == 0) {
// remove hidden onlyupcase homonym
if (!onlyupcase) {
if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) {
free(dp->astr);
dp->astr = hp->astr;
dp->alen = hp->alen;
free(hp);
return 0;
@@ -321,17 +309,17 @@ int HashMgr::get_clen_and_captype(const
// remove word (personal dictionary function for standalone applications)
int HashMgr::remove(const char * word)
{
struct hentry * dp = lookup(word);
while (dp) {
if (dp->alen == 0 || !TESTAFF(dp->astr, forbiddenword, dp->alen)) {
unsigned short * flags =
- (unsigned short *) malloc(sizeof(short *) * (dp->alen + 1));
+ (unsigned short *) malloc(sizeof(short) * (dp->alen + 1));
if (!flags) return 1;
for (int i = 0; i < dp->alen; i++) flags[i] = dp->astr[i];
flags[dp->alen] = forbiddenword;
dp->astr = flags;
dp->alen++;
flag_qsort(flags, 0, dp->alen);
}
dp = dp->next_homonym;
@@ -343,17 +331,17 @@ int HashMgr::remove(const char * word)
int HashMgr::remove_forbidden_flag(const char * word) {
struct hentry * dp = lookup(word);
if (!dp) return 1;
while (dp) {
if (dp->astr && TESTAFF(dp->astr, forbiddenword, dp->alen)) {
if (dp->alen == 1) dp->alen = 0; // XXX forbidden words of personal dic.
else {
unsigned short * flags2 =
- (unsigned short *) malloc(sizeof(short *) * (dp->alen - 1));
+ (unsigned short *) malloc(sizeof(short) * (dp->alen - 1));
if (!flags2) return 1;
int i, j = 0;
for (i = 0; i < dp->alen; i++) {
if (dp->astr[i] != forbiddenword) flags2[j++] = dp->astr[i];
}
dp->alen--;
dp->astr = flags2; // XXX allowed forbidden words
}
@@ -434,17 +422,17 @@ int HashMgr::load_tables(const char * tp
delete dict;
return 2;
}
mychomp(ts);
/* remove byte order mark */
if (strncmp(ts,"\xEF\xBB\xBF",3) == 0) {
memmove(ts, ts+3, strlen(ts+3)+1);
- HUNSPELL_WARNING(stderr, "warning: dic file begins with byte order mark: possible incompatibility with old Hunspell versions\n");
+ // warning: dic file begins with byte order mark: possible incompatibility with old Hunspell versions
}
tablesize = atoi(ts);
if (tablesize == 0) {
HUNSPELL_WARNING(stderr, "error: line 1: missing or bad word count in the dic file\n");
delete dict;
return 4;
}
@@ -507,16 +495,20 @@ int HashMgr::load_tables(const char * tp
int index = atoi(ap + 1);
al = get_aliasf(index, &flags, dict);
if (!al) {
HUNSPELL_WARNING(stderr, "error: line %d: bad flag vector alias\n", dict->getlinenum());
*ap = '\0';
}
} else {
al = decode_flags(&flags, ap + 1, dict);
+ if (al == -1) {
+ HUNSPELL_WARNING(stderr, "Can't allocate memory.\n");
+ return 6;
+ }
flag_qsort(flags, 0, al);
}
} else {
al = 0;
ap = NULL;
flags = NULL;
}
@@ -547,16 +539,21 @@ int HashMgr::hash(const char * word) con
ROTATE(hv,ROTATE_LEN);
hv ^= (*word++);
}
return (unsigned long) hv % tablesize;
}
int HashMgr::decode_flags(unsigned short ** result, char * flags, FileMgr * af) {
int len;
+ if (*flags == '\0') {
+ HUNSPELL_WARNING(stderr, "error: line %d: bad flagvector\n", af->getlinenum());
+ *result = NULL;
+ return 0;
+ }
switch (flag_mode) {
case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz)
len = strlen(flags);
if (len%2 == 1) HUNSPELL_WARNING(stderr, "error: line %d: bad flagvector\n", af->getlinenum());
len /= 2;
*result = (unsigned short *) malloc(len * sizeof(short));
if (!*result) return -1;
for (int i = 0; i < len; i++) {
--- a/extensions/spellcheck/hunspell/src/hashmgr.hxx
+++ b/extensions/spellcheck/hunspell/src/hashmgr.hxx
@@ -13,16 +13,17 @@
*
* The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
* and László Németh (Hunspell). Portions created by the Initial Developers
* are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
*
* Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
* David Einstein (deinst@world.std.com)
* László Németh (nemethl@gyorsposta.hu)
+ * Caolan McNamara (caolanm@redhat.com)
* Davide Prina
* Giuseppe Modugno
* Gianluca Turconi
* Simon Brouwer
* Noll Janos
* Biro Arpad
* Goldman Eleonora
* Sarlos Tamas
@@ -52,28 +53,26 @@
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
******* END LICENSE BLOCK *******/
#ifndef _HASHMGR_HXX_
#define _HASHMGR_HXX_
-#ifndef MOZILLA_CLIENT
-#include <cstdio>
-#else
+#include "hunvisapi.h"
+
#include <stdio.h>
-#endif
+#include "htypes.hxx"
#include "filemgr.hxx"
-#include "htypes.hxx"
enum flag { FLAG_CHAR, FLAG_LONG, FLAG_NUM, FLAG_UNI };
-class HashMgr
+class LIBHUNSPELL_DLL_EXPORTED HashMgr
{
int tablesize;
struct hentry ** tableptr;
int userword;
flag flag_mode;
int complexprefixes;
int utf8;
unsigned short forbiddenword;
--- a/extensions/spellcheck/hunspell/src/htypes.hxx
+++ b/extensions/spellcheck/hunspell/src/htypes.hxx
@@ -13,16 +13,17 @@
*
* The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
* and László Németh (Hunspell). Portions created by the Initial Developers
* are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
*
* Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
* David Einstein (deinst@world.std.com)
* László Németh (nemethl@gyorsposta.hu)
+ * Caolan McNamara (caolanm@redhat.com)
* Davide Prina
* Giuseppe Modugno
* Gianluca Turconi
* Simon Brouwer
* Noll Janos
* Biro Arpad
* Goldman Eleonora
* Sarlos Tamas
@@ -63,26 +64,26 @@
(v) = ((v) << (q)) | (((v) >> (32 - q)) & ((1 << (q))-1));
// hentry options
#define H_OPT (1 << 0)
#define H_OPT_ALIASM (1 << 1)
#define H_OPT_PHON (1 << 2)
// see also csutil.hxx
-#define HENTRY_WORD(h) &(h->word)
+#define HENTRY_WORD(h) &(h->word[0])
// approx. number of user defined words
#define USERWORD 1000
struct hentry
{
unsigned char blen; // word length in bytes
unsigned char clen; // word length in characters (different for UTF-8 enc.)
short alen; // length of affix flag vector
unsigned short * astr; // affix flag vector
struct hentry * next; // next word with same hash code
struct hentry * next_homonym; // next homonym word (with same hash code)
char var; // variable fields (only for special pronounciation yet)
- char word; // variable-length word (8-bit or UTF-8 encoding)
+ char word[1]; // variable-length word (8-bit or UTF-8 encoding)
};
#endif
--- a/extensions/spellcheck/hunspell/src/hunspell.cpp
+++ b/extensions/spellcheck/hunspell/src/hunspell.cpp
@@ -13,16 +13,17 @@
*
* The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
* and László Németh (Hunspell). Portions created by the Initial Developers
* are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
*
* Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
* David Einstein (deinst@world.std.com)
* László Németh (nemethl@gyorsposta.hu)
+ * Caolan McNamara (caolanm@redhat.com)
* Davide Prina
* Giuseppe Modugno
* Gianluca Turconi
* Simon Brouwer
* Noll Janos
* Biro Arpad
* Goldman Eleonora
* Sarlos Tamas
@@ -49,35 +50,26 @@
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
******* END LICENSE BLOCK *******/
-#ifndef MOZILLA_CLIENT
-#include <cstdlib>
-#include <cstring>
-#include <cstdio>
-#else
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
-#endif
-#include "csutil.hxx"
-#include "hunspell.h"
#include "hunspell.hxx"
-
+#include "hunspell.h"
#ifndef MOZILLA_CLIENT
-#ifndef W32
-using namespace std;
+# include "config.h"
#endif
-#endif
+#include "csutil.hxx"
Hunspell::Hunspell(const char * affpath, const char * dpath, const char * key)
{
encoding = NULL;
csconv = NULL;
utf8 = 0;
complexprefixes = 0;
affixpath = mystrdup(affpath);
@@ -110,17 +102,17 @@ Hunspell::~Hunspell()
{
if (pSMgr) delete pSMgr;
if (pAMgr) delete pAMgr;
for (int i = 0; i < maxdic; i++) delete pHMgr[i];
maxdic = 0;
pSMgr = NULL;
pAMgr = NULL;
#ifdef MOZILLA_CLIENT
- free(csconv);
+ delete [] csconv;
#endif
csconv= NULL;
if (encoding) free(encoding);
encoding = NULL;
if (affixpath) free(affixpath);
affixpath = NULL;
}
@@ -449,31 +441,34 @@ int Hunspell::spell(const char * word, i
*(wspace+wl+1) = '\0';
rv = checkword(wspace, info, root);
if (rv) break;
}
// Spec. prefix handling for Catalan, French, Italian:
// prefixes separated by apostrophe (SANT'ELIA -> Sant'+Elia).
if (pAMgr && strchr(cw, '\'')) {
wl = mkallsmall2(cw, unicw, nc);
- char * apostrophe = strchr(cw, '\'');
- if (utf8) {
- w_char tmpword[MAXWORDLEN];
- *apostrophe = '\0';
- wl2 = u8_u16(tmpword, MAXWORDLEN, cw);
- *apostrophe = '\'';
- if (wl2 < nc) {
- mkinitcap2(apostrophe + 1, unicw + wl2 + 1, nc - wl2 - 1);
- rv = checkword(cw, info, root);
- if (rv) break;
+ //There are no really sane circumstances where this could fail,
+ //but anyway...
+ if (char * apostrophe = strchr(cw, '\'')) {
+ if (utf8) {
+ w_char tmpword[MAXWORDLEN];
+ *apostrophe = '\0';
+ wl2 = u8_u16(tmpword, MAXWORDLEN, cw);
+ *apostrophe = '\'';
+ if (wl2 < nc) {
+ mkinitcap2(apostrophe + 1, unicw + wl2 + 1, nc - wl2 - 1);
+ rv = checkword(cw, info, root);
+ if (rv) break;
+ }
+ } else {
+ mkinitcap2(apostrophe + 1, unicw, nc);
+ rv = checkword(cw, info, root);
+ if (rv) break;
}
- } else {
- mkinitcap2(apostrophe + 1, unicw, nc);
- rv = checkword(cw, info, root);
- if (rv) break;
}
mkinitcap2(cw, unicw, nc);
rv = checkword(cw, info, root);
if (rv) break;
}
if (pAMgr && pAMgr->get_checksharps() && strstr(cw, "SS")) {
char tmpword[MAXWORDUTF8LEN];
wl = mkallsmall2(cw, unicw, nc);
@@ -543,36 +538,50 @@ int Hunspell::spell(const char * word, i
}
if (rv) return 1;
// recursive breaking at break points
if (wordbreak) {
char * s;
char r;
- int corr = 0;
+ int nbr = 0;
wl = strlen(cw);
int numbreak = pAMgr ? pAMgr->get_numbreak() : 0;
+
+ // calculate break points for recursion limit
+ for (int j = 0; j < numbreak; j++) {
+ s = cw;
+ do {
+ s = (char *) strstr(s, wordbreak[j]);
+ if (s) {
+ nbr++;
+ s++;
+ }
+ } while (s);
+ }
+ if (nbr >= 10) return 0;
+
// check boundary patterns (^begin and end$)
for (int j = 0; j < numbreak; j++) {
int plen = strlen(wordbreak[j]);
if (plen == 1 || plen > wl) continue;
if (wordbreak[j][0] == '^' && strncmp(cw, wordbreak[j] + 1, plen - 1) == 0
&& spell(cw + plen - 1)) return 1;
if (wordbreak[j][plen - 1] == '$' &&
strncmp(cw + wl - plen + 1, wordbreak[j], plen - 1) == 0) {
r = cw[wl - plen + 1];
cw[wl - plen + 1] = '\0';
if (spell(cw)) return 1;
cw[wl - plen + 1] = r;
}
}
+
// other patterns
for (int j = 0; j < numbreak; j++) {
- int result = 0;
int plen = strlen(wordbreak[j]);
s=(char *) strstr(cw, wordbreak[j]);
if (s && (s > cw) && (s < cw + wl - plen)) {
if (!spell(s + plen)) continue;
r = *s;
*s = '\0';
// examine 2 sides of the break point
if (spell(cw)) return 1;
@@ -664,17 +673,17 @@ struct hentry * Hunspell::checkword(cons
}
if (he) {
if ((he->astr) && (pAMgr) && TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
if (info) *info += SPELL_FORBIDDEN;
return NULL;
}
if (root) {
- *root = mystrdup(&(he->word));
+ *root = mystrdup(he->word);
if (*root && complexprefixes) {
if (utf8) reverseword_utf(*root); else reverseword(*root);
}
}
// try check compound word
} else if (pAMgr->get_compound()) {
he = pAMgr->compound_check(word, len, 0, 0, 100, 0, NULL, 0, 0);
// LANG_hu section: `moving rule' with last dash
@@ -683,17 +692,17 @@ struct hentry * Hunspell::checkword(cons
if (!dup) return NULL;
dup[len-1] = '\0';
he = pAMgr->compound_check(dup, len-1, -5, 0, 100, 0, NULL, 1, 0);
free(dup);
}
// end of LANG speficic region
if (he) {
if (root) {
- *root = mystrdup(&(he->word));
+ *root = mystrdup(he->word);
if (*root && complexprefixes) {
if (utf8) reverseword_utf(*root); else reverseword(*root);
}
}
if (info) *info += SPELL_COMPOUND;
}
}
@@ -861,17 +870,17 @@ int Hunspell::suggest(char*** slst, cons
*pos = ' ';
} else *pos = '-';
}
}
}
// END OF LANG_hu section
// try ngram approach since found nothing
- if ((ns == 0 || onlycmpdsug) && pAMgr && (pAMgr->get_maxngramsugs() != 0)) {
+ if ((ns == 0 || onlycmpdsug) && pAMgr && (pAMgr->get_maxngramsugs() != 0) && (*slst)) {
switch(captype) {
case NOCAP: {
ns = pSMgr->ngsuggest(*slst, cw, ns, pHMgr, maxdic);
break;
}
case HUHINITCAP:
capwords = 1;
case HUHCAP: {
@@ -895,25 +904,26 @@ int Hunspell::suggest(char*** slst, cons
for (int j = oldns; j < ns; j++)
mkallcap((*slst)[j]);
break;
}
}
}
// try dash suggestion (Afo-American -> Afro-American)
- if (strchr(cw, '-')) {
- char * pos = strchr(cw, '-');
+ if (char * pos = strchr(cw, '-')) {
char * ppos = cw;
int nodashsug = 1;
char ** nlst = NULL;
int nn = 0;
int last = 0;
- for (int j = 0; j < ns && nodashsug == 1; j++) {
- if (strchr((*slst)[j], '-')) nodashsug = 0;
+ if (*slst) {
+ for (int j = 0; j < ns && nodashsug == 1; j++) {
+ if (strchr((*slst)[j], '-')) nodashsug = 0;
+ }
}
while (nodashsug && !last) {
if (*pos == '\0') last = 1; else *pos = '\0';
if (!spell(ppos)) {
nn = suggest(&nlst, ppos);
for (int j = nn - 1; j >= 0; j--) {
strncpy(wspace, cw, ppos - cw);
strcpy(wspace + (ppos - cw), nlst[j]);
@@ -1512,17 +1522,20 @@ int Hunspell::analyze(char*** slst, cons
char * dash = NULL;
int nresult = 0;
// LANG_hu section: set dash information for suggestions
if (langnum == LANG_hu) dash = (char *) strchr(cw,'-');
if ((langnum == LANG_hu) && dash) {
*dash='\0';
// examine 2 sides of the dash
if (dash[1] == '\0') { // base word ending with dash
- if (spell(cw)) return line_tok(pSMgr->suggest_morph(cw), slst, MSEP_REC);
+ if (spell(cw)) {
+ char * p = pSMgr->suggest_morph(cw);
+ if (p) return line_tok(pSMgr->suggest_morph(cw), slst, MSEP_REC);
+ }
} else if ((dash[1] == 'e') && (dash[2] == '\0')) { // XXX (HU) -e hat.
if (spell(cw) && (spell("-e"))) {
st = pSMgr->suggest_morph(cw);
if (st) {
mystrcat(result, st, MAXLNLEN);
free(st);
}
mystrcat(result,"+", MAXLNLEN); // XXX spec. separator in MORPHCODE
@@ -1655,17 +1668,22 @@ int Hunspell::get_xml_par(char * dest, c
char end = *par;
char * dmax = dest + max;
if (end == '>') end = '<';
else if (end != '\'' && end != '"') return 0; // bad XML
for (par++; d < dmax && *par != '\0' && *par != end; par++, d++) *d = *par;
*d = '\0';
mystrrep(dest, "<", "<");
mystrrep(dest, "&", "&");
- return d - dest;
+ return (int)(d - dest);
+}
+
+int Hunspell::get_langnum() const
+{
+ return langnum;
}
// return the beginning of the element (attr == NULL) or the attribute
const char * Hunspell::get_xml_pos(const char * s, const char * attr)
{
const char * end = strchr(s, '>');
const char * p = s;
if (attr == NULL) return end;
@@ -1688,36 +1706,39 @@ int Hunspell::get_xml_list(char ***slst,
char * p;
if (!list) return 0;
for (p = list; (p = strstr(p, tag)); p++) n++;
if (n == 0) return 0;
*slst = (char **) malloc(sizeof(char *) * n);
if (!*slst) return 0;
for (p = list, n = 0; (p = strstr(p, tag)); p++, n++) {
int l = strlen(p);
- (*slst)[n] = (char *) malloc(l);
+ (*slst)[n] = (char *) malloc(l + 1);
if (!(*slst)[n]) return (n > 0 ? n - 1 : 0);
- get_xml_par((*slst)[n], p + strlen(tag) - 1, l);
+ if (!get_xml_par((*slst)[n], p + strlen(tag) - 1, l)) {
+ free((*slst)[n]);
+ break;
+ }
}
return n;
}
int Hunspell::spellml(char*** slst, const char * word)
{
char *q, *q2;
char cw[MAXWORDUTF8LEN], cw2[MAXWORDUTF8LEN];
q = (char *) strstr(word, "<query");
if (!q) return 0; // bad XML input
q2 = strchr(q, '>');
if (!q2) return 0; // bad XML input
q2 = strstr(q2, "<word");
if (!q2) return 0; // bad XML input
if (check_xml_par(q, "type=", "analyze")) {
int n = 0, s = 0;
- if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN)) n = analyze(slst, cw);
+ if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 10)) n = analyze(slst, cw);
if (n == 0) return 0;
// convert the result to <code><a>ana1</a><a>ana2</a></code> format
for (int i = 0; i < n; i++) s+= strlen((*slst)[i]);
char * r = (char *) malloc(6 + 5 * s + 7 * n + 7 + 1); // XXX 5*s->&->&
if (!r) return 0;
strcpy(r, "<code>");
for (int i = 0; i < n; i++) {
int l = strlen(r);
@@ -1728,23 +1749,23 @@ int Hunspell::spellml(char*** slst, cons
mystrrep(r + l + 3, "&", "&");
strcat(r, "</a>");
free((*slst)[i]);
}
strcat(r, "</code>");
(*slst)[0] = r;
return 1;
} else if (check_xml_par(q, "type=", "stem")) {
- if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN)) return stem(slst, cw);
+ if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1)) return stem(slst, cw);
} else if (check_xml_par(q, "type=", "generate")) {
- int n = get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN);
+ int n = get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1);
if (n == 0) return 0;
char * q3 = strstr(q2 + 1, "<word");
if (q3) {
- if (get_xml_par(cw2, strchr(q3, '>'), MAXWORDUTF8LEN)) {
+ if (get_xml_par(cw2, strchr(q3, '>'), MAXWORDUTF8LEN - 1)) {
return generate(slst, cw, cw2);
}
} else {
char ** slst2;
if ((q2 = strstr(q2 + 1, "<code")) &&
(n = get_xml_list(&slst2, strchr(q2, '>'), "<a>"))) {
int n2 = generate(slst, cw, slst2, n);
freelist(&slst2, n);
@@ -1943,28 +1964,28 @@ int Hunspell_analyze(Hunhandle *pHunspel
return ((Hunspell*)pHunspell)->analyze(slst, word);
}
int Hunspell_stem(Hunhandle *pHunspell, char*** slst, const char * word)
{
return ((Hunspell*)pHunspell)->stem(slst, word);
}
-int Hunspell_stem(Hunhandle *pHunspell, char*** slst, char** desc, int n)
+int Hunspell_stem2(Hunhandle *pHunspell, char*** slst, char** desc, int n)
{
return ((Hunspell*)pHunspell)->stem(slst, desc, n);
}
int Hunspell_generate(Hunhandle *pHunspell, char*** slst, const char * word,
const char * word2)
{
return ((Hunspell*)pHunspell)->generate(slst, word, word2);
}
-int Hunspell_generate(Hunhandle *pHunspell, char*** slst, const char * word,
+int Hunspell_generate2(Hunhandle *pHunspell, char*** slst, const char * word,
char** desc, int n)
{
return ((Hunspell*)pHunspell)->generate(slst, word, desc, n);
}
/* functions for run-time modification of the dictionary */
/* add word to the run-time dictionary */
@@ -1984,11 +2005,11 @@ int Hunspell_add_with_affix(Hunhandle *p
}
/* remove word from the run-time dictionary */
int Hunspell_remove(Hunhandle *pHunspell, const char * word) {
return ((Hunspell*)pHunspell)->remove(word);
}
-void Hunspell_free_list(Hunhandle *pHunspell, char *** slst, int n) {
+void Hunspell_free_list(Hunhandle *, char *** slst, int n) {
freelist(slst, n);
}
--- a/extensions/spellcheck/hunspell/src/hunspell.h
+++ b/extensions/spellcheck/hunspell/src/hunspell.h
@@ -13,16 +13,17 @@
*
* The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
* and László Németh (Hunspell). Portions created by the Initial Developers
* are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
*
* Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
* David Einstein (deinst@world.std.com)
* László Németh (nemethl@gyorsposta.hu)
+ * Caolan McNamara (caolanm@redhat.com)
* Davide Prina
* Giuseppe Modugno
* Gianluca Turconi
* Simon Brouwer
* Noll Janos
* Biro Arpad
* Goldman Eleonora
* Sarlos Tamas
@@ -52,104 +53,100 @@
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
******* END LICENSE BLOCK *******/
#ifndef _MYSPELLMGR_H_
#define _MYSPELLMGR_H_
+#include "hunvisapi.h"
+
#ifdef __cplusplus
extern "C" {
#endif
typedef struct Hunhandle Hunhandle;
-#ifdef _MSC_VER
-#define DLL __declspec ( dllexport )
-#else
-#define DLL
-#endif
+LIBHUNSPELL_DLL_EXPORTED Hunhandle *Hunspell_create(const char * affpath, const char * dpath);
-DLL Hunhandle *Hunspell_create(const char * affpath, const char * dpath);
-
-DLL Hunhandle *Hunspell_create_key(const char * affpath, const char * dpath,
+LIBHUNSPELL_DLL_EXPORTED Hunhandle *Hunspell_create_key(const char * affpath, const char * dpath,
const char * key);
-DLL void Hunspell_destroy(Hunhandle *pHunspell);
+LIBHUNSPELL_DLL_EXPORTED void Hunspell_destroy(Hunhandle *pHunspell);
/* spell(word) - spellcheck word
* output: 0 = bad word, not 0 = good word
*/
-DLL int Hunspell_spell(Hunhandle *pHunspell, const char *);
+LIBHUNSPELL_DLL_EXPORTED int Hunspell_spell(Hunhandle *pHunspell, const char *);
-DLL char *Hunspell_get_dic_encoding(Hunhandle *pHunspell);
+LIBHUNSPELL_DLL_EXPORTED char *Hunspell_get_dic_encoding(Hunhandle *pHunspell);
/* suggest(suggestions, word) - search suggestions
* input: pointer to an array of strings pointer and the (bad) word
* array of strings pointer (here *slst) may not be initialized
* output: number of suggestions in string array, and suggestions in
* a newly allocated array of strings (*slts will be NULL when number
* of suggestion equals 0.)
*/
-DLL int Hunspell_suggest(Hunhandle *pHunspell, char*** slst, const char * word);
+LIBHUNSPELL_DLL_EXPORTED int Hunspell_suggest(Hunhandle *pHunspell, char*** slst, const char * word);
/* morphological functions */
/* analyze(result, word) - morphological analysis of the word */
-DLL int Hunspell_analyze(Hunhandle *pHunspell, char*** slst, const char * word);
+LIBHUNSPELL_DLL_EXPORTED int Hunspell_analyze(Hunhandle *pHunspell, char*** slst, const char * word);
/* stem(result, word) - stemmer function */
-DLL int Hunspell_stem(Hunhandle *pHunspell, char*** slst, const char * word);
+LIBHUNSPELL_DLL_EXPORTED int Hunspell_stem(Hunhandle *pHunspell, char*** slst, const char * word);
/* stem(result, analysis, n) - get stems from a morph. analysis
* example:
* char ** result, result2;
* int n1 = Hunspell_analyze(result, "words");
* int n2 = Hunspell_stem2(result2, result, n1);
*/
-DLL int Hunspell_stem2(Hunhandle *pHunspell, char*** slst, char** desc, int n);
+LIBHUNSPELL_DLL_EXPORTED int Hunspell_stem2(Hunhandle *pHunspell, char*** slst, char** desc, int n);
/* generate(result, word, word2) - morphological generation by example(s) */
-DLL int Hunspell_generate(Hunhandle *pHunspell, char*** slst, const char * word,
+LIBHUNSPELL_DLL_EXPORTED int Hunspell_generate(Hunhandle *pHunspell, char*** slst, const char * word,
const char * word2);
/* generate(result, word, desc, n) - generation by morph. description(s)
* example:
* char ** result;
* char * affix = "is:plural"; // description depends from dictionaries, too
* int n = Hunspell_generate2(result, "word", &affix, 1);
* for (int i = 0; i < n; i++) printf("%s\n", result[i]);
*/
-DLL int Hunspell_generate2(Hunhandle *pHunspell, char*** slst, const char * word,
+LIBHUNSPELL_DLL_EXPORTED int Hunspell_generate2(Hunhandle *pHunspell, char*** slst, const char * word,
char** desc, int n);
/* functions for run-time modification of the dictionary */
/* add word to the run-time dictionary */
-DLL int Hunspell_add(Hunhandle *pHunspell, const char * word);
+LIBHUNSPELL_DLL_EXPORTED int Hunspell_add(Hunhandle *pHunspell, const char * word);
/* add word to the run-time dictionary with affix flags of
* the example (a dictionary word): Hunspell will recognize
* affixed forms of the new word, too.
*/
-DLL int Hunspell_add_with_affix(Hunhandle *pHunspell, const char * word, const char * example);
+LIBHUNSPELL_DLL_EXPORTED int Hunspell_add_with_affix(Hunhandle *pHunspell, const char * word, const char * example);
/* remove word from the run-time dictionary */
-DLL int Hunspell_remove(Hunhandle *pHunspell, const char * word);
+LIBHUNSPELL_DLL_EXPORTED int Hunspell_remove(Hunhandle *pHunspell, const char * word);
/* free suggestion lists */
-DLL void Hunspell_free_list(Hunhandle *pHunspell, char *** slst, int n);
+LIBHUNSPELL_DLL_EXPORTED void Hunspell_free_list(Hunhandle *pHunspell, char *** slst, int n);
#ifdef __cplusplus
}
#endif
#endif
--- a/extensions/spellcheck/hunspell/src/hunspell.hxx
+++ b/extensions/spellcheck/hunspell/src/hunspell.hxx
@@ -13,16 +13,17 @@
*
* The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
* and László Németh (Hunspell). Portions created by the Initial Developers
* are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
*
* Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
* David Einstein (deinst@world.std.com)
* László Németh (nemethl@gyorsposta.hu)
+ * Caolan McNamara (caolanm@redhat.com)
* Davide Prina
* Giuseppe Modugno
* Gianluca Turconi
* Simon Brouwer
* Noll Janos
* Biro Arpad
* Goldman Eleonora
* Sarlos Tamas
@@ -49,51 +50,39 @@
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
******* END LICENSE BLOCK *******/
-#include "affixmgr.hxx"
+#include "hunvisapi.h"
+
#include "hashmgr.hxx"
+#include "affixmgr.hxx"
+#include "suggestmgr.hxx"
#include "langnum.hxx"
-#include "suggestmgr.hxx"
#define SPELL_COMPOUND (1 << 0)
#define SPELL_FORBIDDEN (1 << 1)
#define SPELL_ALLCAP (1 << 2)
#define SPELL_NOCAP (1 << 3)
#define SPELL_INITCAP (1 << 4)
#define SPELL_XML "<?xml?>"
#define MAXDIC 20
#define MAXSUGGESTION 15
#define MAXSHARPS 5
#ifndef _MYSPELLMGR_HXX_
#define _MYSPELLMGR_HXX_
-#ifdef HUNSPELL_STATIC
- #define DLLEXPORT
-#else
- #ifdef HUNSPELL_EXPORTS
- #define DLLEXPORT __declspec( dllexport )
- #else
- #define DLLEXPORT __declspec( dllimport )
- #endif
-#endif
-
-#ifdef W32
-class DLLEXPORT Hunspell
-#else
-class Hunspell
-#endif
+class LIBHUNSPELL_DLL_EXPORTED Hunspell
{
AffixMgr* pAMgr;
HashMgr* pHMgr[MAXDIC];
int maxdic;
SuggestMgr* pSMgr;
char * affixpath;
char * encoding;
struct cs_info * csconv;
@@ -195,28 +184,29 @@ public:
/* other */
/* get extra word characters definied in affix file for tokenization */
const char * get_wordchars();
unsigned short * get_wordchars_utf16(int * len);
struct cs_info * get_csconv();
const char * get_version();
+
+ int get_langnum() const;
/* experimental and deprecated functions */
#ifdef HUNSPELL_EXPERIMENTAL
/* suffix is an affix flag string, similarly in dictionary files */
int put_word_suffix(const char * word, const char * suffix);
char * morph_with_correction(const char * word);
/* spec. suggestions */
int suggest_auto(char*** slst, const char * word);
int suggest_pos_stems(char*** slst, const char * word);
- char * get_possible_root();
#endif
private:
int cleanword(char *, const char *, int * pcaptype, int * pabbrev);
int cleanword2(char *, const char *, w_char *, int * w_len, int * pcaptype, int * pabbrev);
void mkinitcap(char *);
int mkinitcap2(char * p, w_char * u, int nc);
int mkinitsmall2(char * p, w_char * u, int nc);
new file mode 100644
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/hunvisapi.h
@@ -0,0 +1,51 @@
+/******* BEGIN LICENSE BLOCK *******
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Initial Developers of the Original Code is Caolan McNamara.
+ * Portions created by the Initial Developer are Copyright (C) 2010 the
+ * Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s): Caolan McNamara (caolanm@redhat.com)
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ ******* END LICENSE BLOCK *******/
+
+#ifndef _HUNSPELL_VISIBILITY_H_
+#define _HUNSPELL_VISIBILITY_H_
+
+#if defined(HUNSPELL_STATIC)
+# define LIBHUNSPELL_DLL_EXPORTED
+#elif defined(_MSC_VER)
+# if defined(BUILDING_LIBHUNSPELL)
+# define LIBHUNSPELL_DLL_EXPORTED __declspec(dllexport)
+# else
+# define LIBHUNSPELL_DLL_EXPORTED __declspec(dllimport)
+# endif
+#elif BUILDING_LIBHUNSPELL && 1
+# define LIBHUNSPELL_DLL_EXPORTED __attribute__((__visibility__("default")))
+#else
+# define LIBHUNSPELL_DLL_EXPORTED
+#endif
+
+#endif
--- a/extensions/spellcheck/hunspell/src/hunzip.cpp
+++ b/extensions/spellcheck/hunspell/src/hunzip.cpp
@@ -11,40 +11,35 @@
* for the specific language governing rights and limitations under the
* License.
*
* The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
* and László Németh (Hunspell). Portions created by the Initial Developers
* are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
*
* Contributor(s): László Németh (nemethl@gyorsposta.hu)
+ * Caolan McNamara (caolanm@redhat.com)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
******* END LICENSE BLOCK *******/
-#ifndef MOZILLA_CLIENT
-#include <cstdlib>
-#include <cstring>
-#include <cstdio>
-#else
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
-#endif
#include "hunzip.hxx"
#define CODELEN 65536
#define BASEBITREC 5000
#define UNCOMPRESSED '\002'
#define MAGIC "hz0"
@@ -57,28 +52,31 @@ int Hunzip::fail(const char * err, const
}
Hunzip::Hunzip(const char * file, const char * key) {
bufsiz = 0;
lastbit = 0;
inc = 0;
outc = 0;
dec = NULL;
+ fin = NULL;
filename = (char *) malloc(strlen(file) + 1);
if (filename) strcpy(filename, file);
if (getcode(key) == -1) bufsiz = -1;
else bufsiz = getbuf();
}
int Hunzip::getcode(const char * key) {
unsigned char c[2];
int i, j, n, p;
int allocatedbit = BASEBITREC;
const char * enc = key;
+ if (!filename) return -1;
+
fin = fopen(filename, "rb");
if (!fin) return -1;
// read magic number
if ((fread(in, 1, 3, fin) < MAGICLEN)
|| !(strncmp(MAGIC, in, MAGICLEN) == 0 ||
strncmp(MAGIC_ENCRYPT, in, MAGICLEN) == 0)) {
return fail(MSG_FORMAT, filename);
--- a/extensions/spellcheck/hunspell/src/hunzip.hxx
+++ b/extensions/spellcheck/hunspell/src/hunzip.hxx
@@ -11,16 +11,17 @@
* for the specific language governing rights and limitations under the
* License.
*
* The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
* and László Németh (Hunspell). Portions created by the Initial Developers
* are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
*
* Contributor(s): László Németh (nemethl@gyorsposta.hu)
+ * Caolan McNamara (caolanm@redhat.com)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
@@ -29,30 +30,34 @@
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
******* END LICENSE BLOCK *******/
#ifndef _HUNZIP_HXX_
#define _HUNZIP_HXX_
+#include "hunvisapi.h"
+
+#include <stdio.h>
+
#define BUFSIZE 65536
#define HZIP_EXTENSION ".hz"
#define MSG_OPEN "error: %s: cannot open\n"
#define MSG_FORMAT "error: %s: not in hzip format\n"
#define MSG_MEMORY "error: %s: missing memory\n"
#define MSG_KEY "error: %s: missing or bad password\n"
struct bit {
unsigned char c[2];
int v[2];
};
-class Hunzip
+class LIBHUNSPELL_DLL_EXPORTED Hunzip
{
protected:
char * filename;
FILE * fin;
int bufsiz, lastbit, inc, inbits, outc;
struct bit * dec; // code table
char in[BUFSIZE]; // input buffer
--- a/extensions/spellcheck/hunspell/src/phonet.cpp
+++ b/extensions/spellcheck/hunspell/src/phonet.cpp
@@ -12,16 +12,17 @@
* License.
*
* The Initial Developer of the Original Code is Björn Jacke. Portions created
* by the Initial Developers are Copyright (C) 2000-2007 the Initial
* Developers. All Rights Reserved.
*
* Contributor(s): Björn Jacke (bjoern.jacke@gmx.de)
* László Németh (nemethl@gyorsposta.hu)
+ * Caolan McNamara (caolanm@redhat.com)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
@@ -38,27 +39,20 @@
* 2007-07-26 Björn Jacke <bjoern.jacke AT gmx.de>
* Released under MPL/GPL/LGPL tri-license for Hunspell
*
* 2007-08-23 László Németh <nemeth at OOo>
* Porting from Aspell to Hunspell using C-like structs
*
******* END LICENSE BLOCK *******/
-#ifndef MOZILLA_CLIENT
-#include <cstdlib>
-#include <cstring>
-#include <cstdio>
-#include <cctype>
-#else
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <ctype.h>
-#endif
#include "csutil.hxx"
#include "phonet.hxx"
void init_phonet_hash(phonetable & parms)
{
int i, k;
@@ -71,25 +65,25 @@ void init_phonet_hash(phonetable & parms
k = (unsigned char) parms.rules[i][0];
if (parms.hash[k] < 0) {
parms.hash[k] = i;
}
}
}
- // like strcpy but safe if the strings overlap
- // but only if dest < src
- static inline void strmove(char * dest, char * src) {
- while (*src)
- *dest++ = *src++;
- *dest = '\0';
- }
+// like strcpy but safe if the strings overlap
+// but only if dest < src
+static inline void strmove(char * dest, char * src) {
+ while (*src)
+ *dest++ = *src++;
+ *dest = '\0';
+}
-int myisalpha(char ch) {
+static int myisalpha(char ch) {
if ((unsigned char) ch < 128) return isalpha(ch);
return 1;
}
/* phonetic transcription algorithm */
/* see: http://aspell.net/man-html/Phonetic-Code.html */
/* convert string to uppercase before this call */
int phonet (const char * inword, char * target,
--- a/extensions/spellcheck/hunspell/src/phonet.hxx
+++ b/extensions/spellcheck/hunspell/src/phonet.hxx
@@ -12,16 +12,17 @@
* License.
*
* The Initial Developer of the Original Code is Björn Jacke. Portions created
* by the Initial Developers are Copyright (C) 2000-2007 the Initial
* Developers. All Rights Reserved.
*
* Contributor(s): Björn Jacke (bjoern.jacke@gmx.de)
* László Németh (nemethl@gyorsposta.hu)
+ * Caolan McNamara (caolanm@redhat.com)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
@@ -45,22 +46,24 @@
#ifndef __PHONETHXX__
#define __PHONETHXX__
#define HASHSIZE 256
#define MAXPHONETLEN 256
#define MAXPHONETUTF8LEN (MAXPHONETLEN * 4)
+#include "hunvisapi.h"
+
struct phonetable {
char utf8;
cs_info * lang;
int num;
char * * rules;
int hash[HASHSIZE];
};
-void init_phonet_hash(phonetable & parms);
+LIBHUNSPELL_DLL_EXPORTED void init_phonet_hash(phonetable & parms);
-int phonet (const char * inword, char * target,
+LIBHUNSPELL_DLL_EXPORTED int phonet (const char * inword, char * target,
int len, phonetable & phone);
#endif
--- a/extensions/spellcheck/hunspell/src/replist.cpp
+++ b/extensions/spellcheck/hunspell/src/replist.cpp
@@ -11,40 +11,35 @@
* for the specific language governing rights and limitations under the
* License.
*
* The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
* and László Németh (Hunspell). Portions created by the Initial Developers
* are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
*
* Contributor(s): László Németh (nemethl@gyorsposta.hu)
+ * Caolan McNamara (caolanm@redhat.com)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
******* END LICENSE BLOCK *******/
-#ifndef MOZILLA_CLIENT
-#include <cstdlib>
-#include <cstring>
-#include <cstdio>
-#else
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
-#endif
#include "replist.hxx"
#include "csutil.hxx"
RepList::RepList(int n) {
dat = (replentry **) malloc(sizeof(replentry *) * n);
if (dat == 0) size = 0; else size = n;
pos = 0;
@@ -104,17 +99,17 @@ int RepList::add(char * pat1, char * pat
}
return 0;
}
int RepList::conv(const char * word, char * dest) {
int stl = 0;
int change = 0;
// for (int i = 0; i < pos; i++) fprintf(stderr, "%d. %s\n", i, dat[i]->pattern);
- for (int i = 0; i < strlen(word); i++) {
+ for (size_t i = 0; i < strlen(word); i++) {
int n = near(word + i);
int l = match(word + i, n);
if (l) {
strcpy(dest + stl, dat[n]->pattern2);
stl += strlen(dat[n]->pattern2);
i += l - 1;
change = 1;
} else dest[stl++] = word[i];
--- a/extensions/spellcheck/hunspell/src/replist.hxx
+++ b/extensions/spellcheck/hunspell/src/replist.hxx
@@ -11,16 +11,17 @@
* for the specific language governing rights and limitations under the
* License.
*
* The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
* and László Németh (Hunspell). Portions created by the Initial Developers
* are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
*
* Contributor(s): László Németh (nemethl@gyorsposta.hu)
+ * Caolan McNamara (caolanm@redhat.com)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
@@ -29,19 +30,22 @@
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
******* END LICENSE BLOCK *******/
/* string replacement list class */
#ifndef _REPLIST_HXX_
#define _REPLIST_HXX_
+
+#include "hunvisapi.h"
+
#include "w_char.hxx"
-class RepList
+class LIBHUNSPELL_DLL_EXPORTED RepList
{
protected:
replentry ** dat;
int size;
int pos;
public:
RepList(int n);
--- a/extensions/spellcheck/hunspell/src/suggestmgr.cpp
+++ b/extensions/spellcheck/hunspell/src/suggestmgr.cpp
@@ -13,16 +13,17 @@
*
* The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
* and László Németh (Hunspell). Portions created by the Initial Developers
* are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
*
* Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
* David Einstein (deinst@world.std.com)
* László Németh (nemethl@gyorsposta.hu)
+ * Caolan McNamara (caolanm@redhat.com)
* Davide Prina
* Giuseppe Modugno
* Gianluca Turconi
* Simon Brouwer
* Noll Janos
* Biro Arpad
* Goldman Eleonora
* Sarlos Tamas
@@ -49,48 +50,37 @@
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
******* END LICENSE BLOCK *******/
-#ifndef MOZILLA_CLIENT
-#include <cstdlib>
-#include <cstring>
-#include <cstdio>
-#include <cctype>
-#else
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <ctype.h>
-#endif
-#include "csutil.hxx"
-#include "htypes.hxx"
#include "suggestmgr.hxx"
-
-#ifndef MOZILLA_CLIENT
-#ifndef W32
-using namespace std;
-#endif
-#endif
+#include "htypes.hxx"
+#include "csutil.hxx"
const w_char W_VLINE = { '\0', '|' };
SuggestMgr::SuggestMgr(const char * tryme, int maxn,
AffixMgr * aptr)
{
// register affix manager and check in string of chars to
// try when building candidate suggestions
pAMgr = aptr;
+ csconv = NULL;
+
ckeyl = 0;
ckey = NULL;
ckey_utf = NULL;
ctryl = 0;
ctry = NULL;
ctry_utf = NULL;
@@ -115,16 +105,17 @@ SuggestMgr::SuggestMgr(const char * trym
}
if (ckey) {
if (utf8) {
w_char t[MAXSWL];
ckeyl = u8_u16(t, MAXSWL, ckey);
ckey_utf = (w_char *) malloc(ckeyl * sizeof(w_char));
if (ckey_utf) memcpy(ckey_utf, t, ckeyl * sizeof(w_char));
+ else ckeyl = 0;
} else {
ckeyl = strlen(ckey);
}
}
if (tryme) {
ctry = mystrdup(tryme);
if (ctry) ctryl = strlen(ctry);
@@ -148,16 +139,19 @@ SuggestMgr::~SuggestMgr()
ckey_utf = NULL;
ckeyl = 0;
if (ctry) free(ctry);
ctry = NULL;
if (ctry_utf) free(ctry_utf);
ctry_utf = NULL;
ctryl = 0;
maxSug = 0;
+#ifdef MOZILLA_CLIENT
+ delete [] csconv;
+#endif
}
int SuggestMgr::testsug(char** wlst, const char * candidate, int wl, int ns, int cpdsuggest,
int * timer, clock_t * timelimit) {
int cwrd = 1;
if (ns == maxSug) return maxSug;
for (int k=0; k < ns; k++) {
if (strcmp(candidate,wlst[k]) == 0) cwrd = 0;
@@ -374,123 +368,75 @@ int SuggestMgr::capchars(char** wlst, co
strcpy(candidate, word);
mkallcap(candidate, csconv);
return testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, NULL);
}
// suggestions for when chose the wrong char out of a related set
int SuggestMgr::mapchars(char** wlst, const char * word, int ns, int cpdsuggest)
{
+ char candidate[MAXSWUTF8L];
clock_t timelimit;
int timer;
-
+ candidate[0] = '\0';
+
int wl = strlen(word);
if (wl < 2 || ! pAMgr) return ns;
int nummap = pAMgr->get_nummap();
struct mapentry* maptable = pAMgr->get_maptable();
if (maptable==NULL) return ns;
timelimit = clock();
timer = MINTIMER;
- if (utf8) {
- w_char w[MAXSWL];
- int len = u8_u16(w, MAXSWL, word);
- ns = map_related_utf(w, len, 0, cpdsuggest, wlst, ns, maptable, nummap, &timer, &timelimit);
- } else ns = map_related(word, 0, wlst, cpdsuggest, ns, maptable, nummap, &timer, &timelimit);
- return ns;
+ return map_related(word, (char *) &candidate, 0, 0, wlst, cpdsuggest, ns, maptable, nummap, &timer, &timelimit);
}
-int SuggestMgr::map_related(const char * word, int i, char** wlst,
- int cpdsuggest, int ns,
+int SuggestMgr::map_related(const char * word, char * candidate, int wn, int cn,
+ char** wlst, int cpdsuggest, int ns,
const mapentry* maptable, int nummap, int * timer, clock_t * timelimit)
{
- char c = *(word + i);
- if (c == 0) {
+ if (*(word + wn) == '\0') {
int cwrd = 1;
- int wl = strlen(word);
+ *(candidate + cn) = '\0';
+ int wl = strlen(candidate);
for (int m=0; m < ns; m++)
- if (strcmp(word,wlst[m]) == 0) cwrd = 0;
- if ((cwrd) && checkword(word, wl, cpdsuggest, timer, timelimit)) {
+ if (strcmp(candidate, wlst[m]) == 0) cwrd = 0;
+ if ((cwrd) && checkword(candidate, wl, cpdsuggest, timer, timelimit)) {
if (ns < maxSug) {
- wlst[ns] = mystrdup(word);
+ wlst[ns] = mystrdup(candidate);
if (wlst[ns] == NULL) return -1;
ns++;
}
}
return ns;
}
int in_map = 0;
for (int j = 0; j < nummap; j++) {
- if (strchr(maptable[j].set,c) != 0) {
- in_map = 1;
- char * newword = mystrdup(word);
- if (!newword) return -1;
- for (int k = 0; k < maptable[j].len; k++) {
- *(newword + i) = *(maptable[j].set + k);
- ns = map_related(newword, (i+1), wlst, cpdsuggest,
- ns, maptable, nummap, timer, timelimit);
- if (!(*timer)) return ns;
+ for (int k = 0; k < maptable[j].len; k++) {
+ int len = strlen(maptable[j].set[k]);
+ if (strncmp(maptable[j].set[k], word + wn, len) == 0) {
+ in_map = 1;
+ for (int l = 0; l < maptable[j].len; l++) {
+ strcpy(candidate + cn, maptable[j].set[l]);
+ ns = map_related(word, candidate, wn + len, strlen(candidate), wlst,
+ cpdsuggest, ns, maptable, nummap, timer, timelimit);
+ if (!(*timer)) return ns;
+ }
}
- free(newword);
}
}
if (!in_map) {
- i++;
- ns = map_related(word, i, wlst, cpdsuggest,
+ *(candidate + cn) = *(word + wn);
+ ns = map_related(word, candidate, wn + 1, cn + 1, wlst, cpdsuggest,
ns, maptable, nummap, timer, timelimit);
}
return ns;
}
-int SuggestMgr::map_related_utf(w_char * word, int len, int i, int cpdsuggest,
- char** wlst, int ns, const mapentry* maptable, int nummap,
- int * timer, clock_t * timelimit)
-{
- if (i == len) {
- int cwrd = 1;
- int wl;
- char s[MAXSWUTF8L];
- u16_u8(s, MAXSWUTF8L, word, len);
- wl = strlen(s);
- for (int m=0; m < ns; m++)
- if (strcmp(s,wlst[m]) == 0) cwrd = 0;
- if ((cwrd) && checkword(s, wl, cpdsuggest, timer, timelimit)) {
- if (ns < maxSug) {
- wlst[ns] = mystrdup(s);
- if (wlst[ns] == NULL) return -1;
- ns++;
- }
- }
- return ns;
- }
- int in_map = 0;
- unsigned short c = *((unsigned short *) word + i);
- for (int j = 0; j < nummap; j++) {
- if (flag_bsearch((unsigned short *) maptable[j].set_utf16, c, maptable[j].len)) {
- in_map = 1;
- for (int k = 0; k < maptable[j].len; k++) {
- *(word + i) = *(maptable[j].set_utf16 + k);
- ns = map_related_utf(word, len, i + 1, cpdsuggest,
- wlst, ns, maptable, nummap, timer, timelimit);
- if (!(*timer)) return ns;
- }
- *((unsigned short *) word + i) = c;
- }
- }
- if (!in_map) {
- i++;
- ns = map_related_utf(word, len, i, cpdsuggest,
- wlst, ns, maptable, nummap, timer, timelimit);
- }
- return ns;
-}
-
-
-
// suggestions for a typical fault of spelling, that
// differs with more, than 1 letter from the right form.
int SuggestMgr::replchars(char** wlst, const char * word, int ns, int cpdsuggest)
{
char candidate[MAXSWUTF8L];
const char * r;
int lenr, lenp;
int wl = strlen(word);
@@ -966,17 +912,17 @@ int SuggestMgr::longswapchar(char ** wls
char * p;
char * q;
char tmpc;
int wl=strlen(word);
// try swapping not adjacent chars one by one
strcpy(candidate, word);
for (p = candidate; *p != 0; p++) {
for (q = candidate; *q != 0; q++) {
- if (abs(p-q) > 1) {
+ if (abs((int)(p-q)) > 1) {
tmpc = *p;
*p = *q;
*q = tmpc;
ns = testsug(wlst, candidate, wl, ns, cpdsuggest, NULL, NULL);
if (ns == -1) return -1;
*q = *p;
*p = tmpc;
}
@@ -993,17 +939,17 @@ int SuggestMgr::longswapchar_utf(char **
char candidate[MAXSWUTF8L];
w_char * p;
w_char * q;
w_char tmpc;
// try swapping not adjacent chars
memcpy (candidate_utf, word, wl * sizeof(w_char));
for (p = candidate_utf; p < (candidate_utf + wl); p++) {
for (q = candidate_utf; q < (candidate_utf + wl); q++) {
- if (abs(p-q) > 1) {
+ if (abs((int)(p-q)) > 1) {
tmpc = *p;
*p = *q;
*q = tmpc;
u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl);
ns = testsug(wlst, candidate, strlen(candidate), ns, cpdsuggest, NULL, NULL);
if (ns == -1) return -1;
*q = *p;
*p = tmpc;
@@ -1190,17 +1136,17 @@ int SuggestMgr::ngsuggest(char** wlst, c
for (j=0; j < MAX_ROOTS; j++)
if (scoresphon[j] < lval) {
lpphon = j;
lval = scoresphon[j];
}
}
}}
- // find minimum threshhold for a passable suggestion
+ // find minimum threshold for a passable suggestion
// mangle original word three differnt ways
// and score them to generate a minimum acceptable score
int thresh = 0;
for (int sp = 1; sp < 4; sp++) {
if (utf8) {
for (int k=sp; k < n; k+=4) *((unsigned short *) u8 + k) = '*';
u16_u8(mw, MAXSWUTF8L, u8, n);
thresh = thresh + ngram(n, word, mw, NGRAM_ANY_MISMATCH + NGRAM_LOWERING);
@@ -1790,33 +1736,33 @@ int SuggestMgr::ngram(int n, char * s1,
break;
}
}
}
nscore = nscore + ns;
if (ns < 2) break;
}
} else {
- char t[MAXSWUTF8L];
- l1 = strlen(s1);
l2 = strlen(s2);
if (l2 == 0) return 0;
- strcpy(t, s2);
+ l1 = strlen(s1);
+ char *t = mystrdup(s2);
if (opt & NGRAM_LOWERING) mkallsmall(t, csconv);
for (int j = 1; j <= n; j++) {
ns = 0;
for (int i = 0; i <= (l1-j); i++) {
char c = *(s1 + i + j);
*(s1 + i + j) = '\0';
if (strstr(t,(s1+i))) ns++;
*(s1 + i + j ) = c;
}
nscore = nscore + ns;
if (ns < 2) break;
}
+ free(t);
}
ns = 0;
if (opt & NGRAM_LONGER_WORSE) ns = (l2-l1)-2;
if (opt & NGRAM_ANY_MISMATCH) ns = abs(l2-l1)-2;
ns = (nscore - ((ns > 0) ? ns : 0));
return ns;
}
@@ -1831,37 +1777,38 @@ int SuggestMgr::leftcommonsubstring(char
int l1 = u8_u16(su1, MAXSWL, s1);
int l2 = u8_u16(su2, MAXSWL, s2);
if (*((short *)su1+l1-1) == *((short *)su2+l2-1)) return 1;
} else {
int i;
u8_u16(su1, 1, s1);
u8_u16(su2, 1, s2);
unsigned short idx = (su2->h << 8) + su2->l;
- if (*((short *)su1) != *((short *)su2) &&
- (*((unsigned short *)su1) != unicodetolower(idx, langnum))) return 0;
+ unsigned short otheridx = (su1->h << 8) + su1->l;
+ if (otheridx != idx &&
+ (otheridx != unicodetolower(idx, langnum))) return 0;
int l1 = u8_u16(su1, MAXSWL, s1);
int l2 = u8_u16(su2, MAXSWL, s2);
for(i = 1; (i < l1) && (i < l2) &&
- (*((short *)(su1 + i)) == *((short *)(su2 + i))); i++);
+ (su1[i].l == su2[i].l) && (su1[i].h == su2[i].h); i++);
return i;
}
} else {
if (complexprefixes) {
int l1 = strlen(s1);
int l2 = strlen(s2);
if (*(s2+l1-1) == *(s2+l2-1)) return 1;
} else {
char * olds = s1;
// decapitalise dictionary word
if ((*s1 != *s2) && (*s1 != csconv[((unsigned char)*s2)].clower)) return 0;
do {
s1++; s2++;
} while ((*s1 == *s2) && (*s1 != '\0'));
- return s1 - olds;
+ return (int)(s1 - olds);
}
}
return 0;
}
int SuggestMgr::commoncharacterpositions(char * s1, const char * s2, int * is_swap) {
int num = 0;
int diff = 0;
@@ -1972,18 +1919,18 @@ void SuggestMgr::lcs(const char * s, con
if (b) free(b);
*result = NULL;
return;
}
for (i = 1; i <= m; i++) c[i*(n+1)] = 0;
for (j = 0; j <= n; j++) c[j] = 0;
for (i = 1; i <= m; i++) {
for (j = 1; j <= n; j++) {
- if ((utf8) && (*((short *) su+i-1) == *((short *)su2+j-1))
- || (!utf8) && ((*(s+i-1)) == (*(s2+j-1)))) {
+ if ( ((utf8) && (*((short *) su+i-1) == *((short *)su2+j-1)))
+ || ((!utf8) && ((*(s+i-1)) == (*(s2+j-1))))) {
c[i*(n+1) + j] = c[(i-1)*(n+1) + j-1]+1;
b[i*(n+1) + j] = LCS_UPLEFT;
} else if (c[(i-1)*(n+1) + j] >= c[i*(n+1) + j-1]) {
c[i*(n+1) + j] = c[(i-1)*(n+1) + j];
b[i*(n+1) + j] = LCS_UP;
} else {
c[i*(n+1) + j] = c[i*(n+1) + j-1];
b[i*(n+1) + j] = LCS_LEFT;
--- a/extensions/spellcheck/hunspell/src/suggestmgr.hxx
+++ b/extensions/spellcheck/hunspell/src/suggestmgr.hxx
@@ -13,16 +13,17 @@
*
* The Initial Developers of the Original Code are Kevin Hendricks (MySpell)
* and László Németh (Hunspell). Portions created by the Initial Developers
* are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
*
* Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
* David Einstein (deinst@world.std.com)
* László Németh (nemethl@gyorsposta.hu)
+ * Caolan McNamara (caolanm@redhat.com)
* Davide Prina
* Giuseppe Modugno
* Gianluca Turconi
* Simon Brouwer
* Noll Janos
* Biro Arpad
* Goldman Eleonora
* Sarlos Tamas
@@ -69,25 +70,27 @@
#define TIMELIMIT (CLOCKS_PER_SEC >> 2)
#define MINTIMER 100
#define MAXPLUSTIMER 100
#define NGRAM_LONGER_WORSE (1 << 0)
#define NGRAM_ANY_MISMATCH (1 << 1)
#define NGRAM_LOWERING (1 << 2)
+#include "hunvisapi.h"
+
+#include "atypes.hxx"
#include "affixmgr.hxx"
-#include "atypes.hxx"
#include "hashmgr.hxx"
#include "langnum.hxx"
#include <time.h>
enum { LCS_UP, LCS_LEFT, LCS_UPLEFT };
-class SuggestMgr
+class LIBHUNSPELL_DLL_EXPORTED SuggestMgr
{
char * ckey;
int ckeyl;
w_char * ckey_utf;
char * ctry;
int ctryl;
w_char * ctry_utf;
@@ -141,22 +144,22 @@ private:
int extrachar_utf(char**, const w_char *, int wl, int, int);
int badcharkey_utf(char **, const w_char *, int wl, int, int);
int badchar_utf(char **, const w_char *, int wl, int, int);
int swapchar_utf(char **, const w_char *, int wl, int, int);
int longswapchar_utf(char **, const w_char *, int, int, int);
int movechar_utf(char **, const w_char *, int, int, int);
int mapchars(char**, const char *, int, int);
- int map_related(const char *, int, char ** wlst, int, int, const mapentry*, int, int *, clock_t *);
- int map_related_utf(w_char *, int, int, int, char ** wlst, int, const mapentry*, int, int *, clock_t *);
+ int map_related(const char *, char *, int, int, char ** wlst, int, int, const mapentry*, int, int *, clock_t *);
int ngram(int n, char * s1, const char * s2, int opt);
int mystrlen(const char * word);
int leftcommonsubstring(char * s1, const char * s2);
int commoncharacterpositions(char * s1, const char * s2, int * is_swap);
void bubblesort( char ** rwd, char ** rwd2, int * rsc, int n);
void lcs(const char * s, const char * s2, int * l1, int * l2, char ** result);
int lcslen(const char * s, const char* s2);
char * suggest_hentry_gen(hentry * rv, char * pattern);
};
#endif
+