Bug 620626 - Upgrade to Hunspell 1.3.1; r=smaug,ehsan
authorRyan VanderMeulen <ryanvm@gmail.com>
Thu, 24 Mar 2011 20:14:47 -0400
changeset 63910 d981206620ffbb1724bedb2820e77a65afee66c2
parent 63909 067fdfe97f0d9b0400e665d38578c0b3a0f670db
child 63911 d6eac984c86f8940298e5f73c38027d181be99fc
push idunknown
push userunknown
push dateunknown
reviewerssmaug, ehsan
bugs620626
milestone2.2a1pre
Bug 620626 - Upgrade to Hunspell 1.3.1; r=smaug,ehsan
extensions/spellcheck/hunspell/src/README.hunspell
extensions/spellcheck/hunspell/src/affentry.cpp
extensions/spellcheck/hunspell/src/affixmgr.cpp
extensions/spellcheck/hunspell/src/affixmgr.hxx
extensions/spellcheck/hunspell/src/atypes.hxx
extensions/spellcheck/hunspell/src/csutil.cpp
extensions/spellcheck/hunspell/src/csutil.hxx
extensions/spellcheck/hunspell/src/hashmgr.cpp
extensions/spellcheck/hunspell/src/hunspell.cpp
extensions/spellcheck/hunspell/src/hunspell.hxx
extensions/spellcheck/hunspell/src/replist.cpp
extensions/spellcheck/hunspell/src/suggestmgr.cpp
extensions/spellcheck/hunspell/src/suggestmgr.hxx
extensions/spellcheck/hunspell/src/w_char.hxx
--- a/extensions/spellcheck/hunspell/src/README.hunspell
+++ b/extensions/spellcheck/hunspell/src/README.hunspell
@@ -30,18 +30,18 @@
 * use your version of this file under the terms of the MPL, indicate your
 * decision by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL or the LGPL. If you do not delete
 * the provisions above, a recipient may use your version of this file under
 * the terms of any one of the MPL, the GPL or the LGPL.
 *
 ******* END LICENSE BLOCK *******
 
-Hunspell Version: 1.2.12
-Additional Patches: 583582, 586671, 586686, 603311, 617953, 626195, 631607
+Hunspell Version:   1.3.1
+Additional Patches: 
 
 Hunspell Author: László Németh
 MySpell Author: Kevin Hendricks & David Einstein
 
 Hunspell is a spell checker and morphological analyser library. Hunspell
 is based on OpenOffice.org's Myspell. Documentation, tests, and examples
 are available at http://hunspell.sourceforge.net.
 
--- a/extensions/spellcheck/hunspell/src/affentry.cpp
+++ b/extensions/spellcheck/hunspell/src/affentry.cpp
@@ -155,17 +155,17 @@ inline int PfxEntry::test_condition(cons
                 pos = st; break;
             }
         case '^': { p = nextchar(p); neg = true; break; }
         case ']': { 
                 if ((neg && ingroup) || (!neg && !ingroup)) return 0;
                 pos = NULL;
                 p = nextchar(p);
                 // skip the next character
-                if (!ingroup) for (st++; (opts & aeUTF8) && (*st & 0xc0) == 0x80; st++);
+                if (!ingroup && *st) for (st++; (opts & aeUTF8) && (*st & 0xc0) == 0x80; st++);
                 if (*st == '\0' && p) return 0; // word <= condition
                 break;
             }
          case '.': if (!pos) { // dots are not metacharacters in groups: [.]
                 p = nextchar(p);
                 // skip the next character
                 for (st++; (opts & aeUTF8) && (*st & 0xc0) == 0x80; st++);
                 if (*st == '\0' && p) return 0; // word <= condition
--- a/extensions/spellcheck/hunspell/src/affixmgr.cpp
+++ b/extensions/spellcheck/hunspell/src/affixmgr.cpp
@@ -78,17 +78,17 @@ AffixMgr::AffixMgr(const char * affpath,
   trystring = NULL;
   encoding=NULL;
   csconv=NULL;
   utf8 = 0;
   complexprefixes = 0;
   maptable = NULL;
   nummap = 0;
   breaktable = NULL;
-  numbreak = 0;
+  numbreak = -1;
   reptable = NULL;
   numrep = 0;
   iconvtable = NULL;
   oconvtable = NULL;
   checkcpdtable = NULL;
   // allow simplified compound forms (see 3rd field of CHECKCOMPOUNDPATTERN)
   simplifiedcpd = 0;
   numcheckcpd = 0;
@@ -104,16 +104,17 @@ AffixMgr::AffixMgr(const char * affpath,
   compoundforbidflag = FLAG_NULL; // compound fordidden flag for suffixed word
   checkcompounddup = 0; // forbid double words in compounds
   checkcompoundrep = 0; // forbid bad compounds (may be non compound word with a REP substitution)
   checkcompoundcase = 0; // forbid upper and lowercase combinations at word bounds
   checkcompoundtriple = 0; // forbid compounds with triple letters
   simplifiedtriple = 0; // allow simplified triple letters in compounds (Schiff+fahrt -> Schiffahrt)
   forbiddenword = FORBIDDENWORD; // forbidden word signing flag
   nosuggest = FLAG_NULL; // don't suggest words signed with NOSUGGEST flag
+  nongramsuggest = FLAG_NULL;
   lang = NULL; // language
   langnum = 0; // language code (see http://l10n.openoffice.org/languages.html)
   needaffix = FLAG_NULL; // forbidden root, allowed only with suffixes
   cpdwordmax = -1; // default: unlimited wordcount in compound words
   cpdmin = -1;  // undefined
   cpdmaxsyllable = 0; // default: unlimited syllablecount in compound words
   cpdvowels=NULL; // vowels (for calculating of Hungarian compounding limit, O(n) search! XXX)
   cpdvowels_utf16=NULL; // vowels for UTF-8 encoding (bsearch instead of O(n) search)
@@ -131,19 +132,25 @@ AffixMgr::AffixMgr(const char * affpath,
   version=NULL; // affix and dictionary file version string
   havecontclass=0; // flags of possible continuing classes (double affix)
   // LEMMA_PRESENT: not put root into the morphological output. Lemma presents
   // in morhological description in dictionary file. It's often combined with PSEUDOROOT.
   lemma_present = FLAG_NULL; 
   circumfix = FLAG_NULL; 
   onlyincompound = FLAG_NULL; 
   maxngramsugs = -1; // undefined
+  maxdiff = -1; // undefined
+  onlymaxdiff = 0;
+  maxcpdsugs = -1; // undefined
   nosplitsugs = 0;
   sugswithdots = 0;
   keepcase = 0;
+  forceucase = 0;
+  warn = 0;
+  forbidwarn = 0;
   checksharps = 0;
   substandard = FLAG_NULL;
   fullstrip = 0;
 
   sfx = NULL;
   pfx = NULL;
 
   for (int i=0; i < SETSIZE; i++) {
@@ -269,16 +276,17 @@ AffixMgr::~AffixMgr()
   FREE_FLAG(compoundbegin);
   FREE_FLAG(compoundmiddle);
   FREE_FLAG(compoundend);
   FREE_FLAG(compoundpermitflag);
   FREE_FLAG(compoundforbidflag);
   FREE_FLAG(compoundroot);
   FREE_FLAG(forbiddenword);
   FREE_FLAG(nosuggest);
+  FREE_FLAG(nongramsuggest);
   FREE_FLAG(needaffix);
   FREE_FLAG(lemma_present);
   FREE_FLAG(circumfix);
   FREE_FLAG(onlyincompound);
   
   cpdwordmax = 0;
   pHMgr = NULL;
   cpdmin = 0;
@@ -472,16 +480,23 @@ int  AffixMgr::parse_file(const char * a
 
        if (strncmp(line,"NOSUGGEST",9) == 0) {
           if (parse_flag(line, &nosuggest, afflst)) {
              delete afflst;
              return 1;
           }
        }
 
+       if (strncmp(line,"NONGRAMSUGGEST",14) == 0) {
+          if (parse_flag(line, &nongramsuggest, afflst)) {
+             delete afflst;
+             return 1;
+          }
+       }
+
        /* parse in the flag used by forbidden words */
        if (strncmp(line,"FORBIDDENWORD",13) == 0) {
           if (parse_flag(line, &forbiddenword, afflst)) {
              delete afflst;
              return 1;
           }
        }
 
@@ -651,16 +666,33 @@ int  AffixMgr::parse_file(const char * a
 
        if (strncmp(line,"MAXNGRAMSUGS",12) == 0) {
           if (parse_num(line, &maxngramsugs, afflst)) {
              delete afflst;
              return 1;
           }
        }
 
+       if (strncmp(line,"ONLYMAXDIFF", 11) == 0)
+                   onlymaxdiff = 1;
+
+       if (strncmp(line,"MAXDIFF",7) == 0) {
+          if (parse_num(line, &maxdiff, afflst)) {
+             delete afflst;
+             return 1;
+          }
+       }
+
+       if (strncmp(line,"MAXCPDSUGS",10) == 0) {
+          if (parse_num(line, &maxcpdsugs, afflst)) {
+             delete afflst;
+             return 1;
+          }
+       }
+
        if (strncmp(line,"NOSPLITSUGS",11) == 0) {
                    nosplitsugs=1;
        }
 
        if (strncmp(line,"FULLSTRIP",9) == 0) {
                    fullstrip=1;
        }
 
@@ -671,16 +703,36 @@ int  AffixMgr::parse_file(const char * a
        /* parse in the flag used by forbidden words */
        if (strncmp(line,"KEEPCASE",8) == 0) {
           if (parse_flag(line, &keepcase, afflst)) {
              delete afflst;
              return 1;
           }
        }
 
+       /* parse in the flag used by `forceucase' */
+       if (strncmp(line,"FORCEUCASE",10) == 0) {
+          if (parse_flag(line, &forceucase, afflst)) {
+             delete afflst;
+             return 1;
+          }
+       }
+
+       /* parse in the flag used by `warn' */
+       if (strncmp(line,"WARN",4) == 0) {
+          if (parse_flag(line, &warn, afflst)) {
+             delete afflst;
+             return 1;
+          }
+       }
+
+       if (strncmp(line,"FORBIDWARN",10) == 0) {
+                   forbidwarn=1;
+       }
+
        /* parse in the flag used by the affix generator */
        if (strncmp(line,"SUBSTANDARD",11) == 0) {
           if (parse_flag(line, &substandard, afflst)) {
              delete afflst;
              return 1;
           }
        }
 
@@ -758,17 +810,17 @@ int  AffixMgr::parse_file(const char * a
                 *(expw + strlen(expw)) = (char) i;
         }
     }
 
     wordchars = mystrdup(expw);
     }
 
     // default BREAK definition
-    if (!breaktable) {
+    if (numbreak == -1) {
         breaktable = (char **) malloc(sizeof(char *) * 3);
         if (!breaktable) return 1;
         breaktable[0] = mystrdup("-");
         breaktable[1] = mystrdup("^-");
         breaktable[2] = mystrdup("-$");
         if (breaktable[0] && breaktable[1] && breaktable[2]) numbreak = 3;
     }
     return 0;
@@ -1316,27 +1368,33 @@ int AffixMgr::cpdrep_check(const char * 
           if (candidate_check(candidate,strlen(candidate))) return 1;
           r++; // search for the next letter
       }
    }
    return 0;
 }
 
 // forbid compoundings when there are special patterns at word bound
-int AffixMgr::cpdpat_check(const char * word, int pos, hentry * r1, hentry * r2)
+int AffixMgr::cpdpat_check(const char * word, int pos, hentry * r1, hentry * r2, const char affixed)
 {
   int len;
   for (int i = 0; i < numcheckcpd; i++) {
       if (isSubset(checkcpdtable[i].pattern2, word + pos) &&
         (!r1 || !checkcpdtable[i].cond ||
           (r1->astr && TESTAFF(r1->astr, checkcpdtable[i].cond, r1->alen))) &&
         (!r2 || !checkcpdtable[i].cond2 ||
           (r2->astr && TESTAFF(r2->astr, checkcpdtable[i].cond2, r2->alen))) &&
-        (len = strlen(checkcpdtable[i].pattern)) && (pos > len) &&
-        (strncmp(word + pos - len, checkcpdtable[i].pattern, len) == 0)) return 1;
+        // zero length pattern => only TESTAFF
+        // zero pattern (0/flag) => unmodified stem (zero affixes allowed)
+        (!*(checkcpdtable[i].pattern) || (
+            (*(checkcpdtable[i].pattern)=='0' && r1->blen <= pos && strncmp(word + pos - r1->blen, r1->word, r1->blen) == 0) ||
+            (*(checkcpdtable[i].pattern)!='0' && (len = strlen(checkcpdtable[i].pattern)) &&
+                strncmp(word + pos - len, checkcpdtable[i].pattern, len) == 0)))) {
+            return 1;
+        }
   }
   return 0;
 }
 
 // forbid compounding with neighbouring upper and lower case characters at word bounds
 int AffixMgr::cpdcase_check(const char * word, int pos)
 {
   if (utf8) {
@@ -1511,57 +1569,66 @@ void AffixMgr::setcminmax(int * cmin, in
           for ((*cmax)--; (word[*cmax] & 0xc0) == 0x80; (*cmax)--);
         }
     } else {
         *cmin = cpdmin;
         *cmax = len - cpdmin + 1;
     }
 }
 
+
 // check if compound word is correctly spelled
 // hu_mov_rule = spec. Hungarian rule (XXX)
 struct hentry * AffixMgr::compound_check(const char * word, int len, 
     short wordnum, short numsyllable, short maxwordnum, short wnum, hentry ** words = NULL,
-    char hu_mov_rule = 0, char is_sug = 0)
+    char hu_mov_rule = 0, char is_sug = 0, int * info = NULL)
 {
     int i; 
     short oldnumsyllable, oldnumsyllable2, oldwordnum, oldwordnum2;
     struct hentry * rv = NULL;
     struct hentry * rv_first;
     struct hentry * rwords[MAXWORDLEN]; // buffer for COMPOUND pattern checking
     char st [MAXWORDUTF8LEN + 4];
-    char ch;
+    char ch = '\0';
     int cmin;
     int cmax;
     int striple = 0;
     int scpd = 0;
     int soldi = 0;
     int oldcmin = 0;
     int oldcmax = 0;
     int oldlen = 0;
     int checkedstriple = 0;
+    int onlycpdrule;
+    int affixed = 0;
+    hentry ** oldwords = words;
 
     int checked_prefix;
 
     setcminmax(&cmin, &cmax, word, len);
 
     strcpy(st, word);
 
     for (i = cmin; i < cmax; i++) {
-
-        oldnumsyllable = numsyllable;
-        oldwordnum = wordnum;
-        checked_prefix = 0;
-
         // go to end of the UTF-8 character
         if (utf8) {
             for (; (st[i] & 0xc0) == 0x80; i++);
             if (i >= cmax) return NULL;
         }
 
+        words = oldwords;
+        onlycpdrule = (words) ? 1 : 0;
+
+        do { // onlycpdrule loop
+
+        oldnumsyllable = numsyllable;
+        oldwordnum = wordnum;
+        checked_prefix = 0;
+
+
         do { // simplified checkcompoundpattern loop
 
         if (scpd > 0) {
           for (; scpd <= numcheckcpd && (!checkcpdtable[scpd-1].pattern3 ||
             strncmp(word + i, checkcpdtable[scpd-1].pattern3, strlen(checkcpdtable[scpd-1].pattern3)) != 0); scpd++);
 
           if (scpd > numcheckcpd) break; // break simplified checkcompoundpattern loop
           strcpy(st + i, checkcpdtable[scpd-1].pattern);
@@ -1574,45 +1641,48 @@ struct hentry * AffixMgr::compound_check
           len += strlen(checkcpdtable[scpd-1].pattern) + strlen(checkcpdtable[scpd-1].pattern2) - strlen(checkcpdtable[scpd-1].pattern3);
           oldcmin = cmin;
           oldcmax = cmax;
           setcminmax(&cmin, &cmax, st, len);
 
           cmax = len - cpdmin + 1;
         }
 
-
         ch = st[i];
         st[i] = '\0';
 
         sfx = NULL;
         pfx = NULL;
 
         // FIRST WORD
 
+        affixed = 1;
         rv = lookup(st); // perhaps without prefix
 
         // search homonym with compound flag
         while ((rv) && !hu_mov_rule &&
             ((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)) ||
-                !((compoundflag && !words && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
-                  (compoundbegin && !wordnum &&
+                !((compoundflag && !words && !onlycpdrule && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
+                  (compoundbegin && !wordnum && !onlycpdrule && 
                         TESTAFF(rv->astr, compoundbegin, rv->alen)) ||
-                  (compoundmiddle && wordnum && !words &&
+                  (compoundmiddle && wordnum && !words && !onlycpdrule &&
                     TESTAFF(rv->astr, compoundmiddle, rv->alen)) ||
-                  (numdefcpd &&
+                  (numdefcpd && onlycpdrule &&
                     ((!words && !wordnum && defcpd_check(&words, wnum, rv, (hentry **) &rwords, 0)) ||
                     (words && defcpd_check(&words, wnum, rv, (hentry **) &rwords, 0))))) ||
                   (scpd != 0 && checkcpdtable[scpd-1].cond != FLAG_NULL &&
                     !TESTAFF(rv->astr, checkcpdtable[scpd-1].cond, rv->alen)))
                   ) {
             rv = rv->next_homonym;
         }
 
+        if (rv) affixed = 0;
+
         if (!rv) {
+            if (onlycpdrule) break;
             if (compoundflag && 
              !(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundflag))) {
                 if ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL,
                         FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) && !hu_mov_rule &&
                     sfx->getCont() &&
                         ((compoundforbidflag && TESTAFF(sfx->getCont(), compoundforbidflag, 
                             sfx->getContLen())) || (compoundend &&
                         TESTAFF(sfx->getCont(), compoundend, 
@@ -1626,21 +1696,23 @@ struct hentry * AffixMgr::compound_check
                 ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundbegin, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
                 (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundbegin)))) ||
               ((wordnum > 0) && compoundmiddle &&
                 ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundmiddle, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
                 (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundmiddle)))))
               ) checked_prefix = 1;
         // else check forbiddenwords and needaffix
         } else if (rv->astr && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
-            TESTAFF(rv->astr, needaffix, rv->alen) || 
+            TESTAFF(rv->astr, needaffix, rv->alen) ||
+            TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) ||
             (is_sug && nosuggest && TESTAFF(rv->astr, nosuggest, rv->alen))
              )) {
                 st[i] = ch;
-                continue;
+                //continue;
+                break;
         }
 
             // check non_compound flag in suffix and prefix
             if ((rv) && !hu_mov_rule &&
                 ((pfx && pfx->getCont() &&
                     TESTAFF(pfx->getCont(), compoundforbidflag, 
                         pfx->getContLen())) ||
                 (sfx && sfx->getCont() &&
@@ -1668,16 +1740,17 @@ struct hentry * AffixMgr::compound_check
                 (sfx && sfx->getCont() &&
                     TESTAFF(sfx->getCont(), compoundmiddle, 
                         sfx->getContLen())))) {
                     rv = NULL;
             }
 
         // check forbiddenwords
         if ((rv) && (rv->astr) && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
+            TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) ||
             (is_sug && nosuggest && TESTAFF(rv->astr, nosuggest, rv->alen)))) {
                 return NULL;
             }
 
         // increment word number, if the second root has a compoundroot flag
         if ((rv) && compoundroot && 
             (TESTAFF(rv->astr, compoundroot, rv->alen))) {
                 wordnum++;
@@ -1724,23 +1797,21 @@ struct hentry * AffixMgr::compound_check
                )
              )
          ) { // first word is ok condition
 
 // LANG_hu section: spec. Hungarian rule
             if (langnum == LANG_hu) {
                 // calculate syllable number of the word
                 numsyllable += get_syllable(st, i);
-
                 // + 1 word, if syllable number of the prefix > 1 (hungarian convention)
                 if (pfx && (get_syllable(pfx->getKey(),strlen(pfx->getKey())) > 1)) wordnum++;
             }
 // END of LANG_hu section
 
-
             // NEXT WORD(S)
             rv_first = rv;
             st[i] = ch;
 
         do { // striple loop
 
             // check simplifiedtriple
             if (simplifiedtriple) { 
@@ -1758,35 +1829,41 @@ struct hentry * AffixMgr::compound_check
                           (compoundend && !words && TESTAFF(rv->astr, compoundend, rv->alen)) ||
                            (numdefcpd && words && defcpd_check(&words, wnum + 1, rv, NULL,1))) ||
                              (scpd != 0 && checkcpdtable[scpd-1].cond2 != FLAG_NULL &&
                                 !TESTAFF(rv->astr, checkcpdtable[scpd-1].cond2, rv->alen))
                            )) {
             rv = rv->next_homonym;
         }
 
+            // check FORCEUCASE
+            if (rv && forceucase && (rv) &&
+                (TESTAFF(rv->astr, forceucase, rv->alen)) && !(info && *info & SPELL_ORIGCAP)) rv = NULL;
+
             if (rv && words && words[wnum + 1]) return rv_first;
 
             oldnumsyllable2 = numsyllable;
             oldwordnum2 = wordnum;
 
+
 // LANG_hu section: spec. Hungarian rule, XXX hardwired dictionary code
             if ((rv) && (langnum == LANG_hu) && (TESTAFF(rv->astr, 'I', rv->alen)) && !(TESTAFF(rv->astr, 'J', rv->alen))) {
                 numsyllable--;
             }
 // END of LANG_hu section
 
             // increment word number, if the second root has a compoundroot flag
             if ((rv) && (compoundroot) && 
                 (TESTAFF(rv->astr, compoundroot, rv->alen))) {
                     wordnum++;
             }
 
             // check forbiddenwords
             if ((rv) && (rv->astr) && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
+                TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) ||
                (is_sug && nosuggest && TESTAFF(rv->astr, nosuggest, rv->alen)))) return NULL;
 
             // second word is acceptable, as a root?
             // hungarian conventions: compounding is acceptable,
             // when compound forms consist of 2 words, or if more,
             // then the syllable number of root words must be 6, or lesser.
 
             if ((rv) && (
@@ -1795,17 +1872,17 @@ struct hentry * AffixMgr::compound_check
                     )
                 && (
                       ((cpdwordmax==-1) || (wordnum+1<cpdwordmax)) || 
                       ((cpdmaxsyllable!=0) && 
                           (numsyllable + get_syllable(HENTRY_WORD(rv), rv->clen)<=cpdmaxsyllable))
                     ) &&
                (
                  // test CHECKCOMPOUNDPATTERN
-                 !numcheckcpd || scpd != 0 || !cpdpat_check(word, i, rv_first, rv)
+                 !numcheckcpd || scpd != 0 || !cpdpat_check(word, i, rv_first, rv, 0)
                ) &&
                 (
                      (!checkcompounddup || (rv != rv_first))
                    )
             // test CHECKCOMPOUNDPATTERN conditions
                 && (scpd == 0 || checkcpdtable[scpd-1].cond2 == FLAG_NULL ||
                       TESTAFF(rv->astr, checkcpdtable[scpd-1].cond2, rv->alen))
                 )
@@ -1816,49 +1893,54 @@ struct hentry * AffixMgr::compound_check
             }
 
             numsyllable = oldnumsyllable2;
             wordnum = oldwordnum2;
 
             // perhaps second word has prefix or/and suffix
             sfx = NULL;
             sfxflag = FLAG_NULL;
-            rv = (compoundflag) ? affix_check((word+i),strlen(word+i), compoundflag, IN_CPD_END) : NULL;
-            if (!rv && compoundend) {
+            rv = (compoundflag && !onlycpdrule) ? affix_check((word+i),strlen(word+i), compoundflag, IN_CPD_END) : NULL;
+            if (!rv && compoundend && !onlycpdrule) {
                 sfx = NULL;
                 pfx = NULL;
                 rv = affix_check((word+i),strlen(word+i), compoundend, IN_CPD_END);
             }
 
             if (!rv && numdefcpd && words) {
                 rv = affix_check((word+i),strlen(word+i), 0, IN_CPD_END);
                 if (rv && defcpd_check(&words, wnum + 1, rv, NULL, 1)) return rv_first;
                 rv = NULL;
             }
 
             // test CHECKCOMPOUNDPATTERN conditions (allowed forms)
             if (rv && !(scpd == 0 || checkcpdtable[scpd-1].cond2 == FLAG_NULL || 
                 TESTAFF(rv->astr, checkcpdtable[scpd-1].cond2, rv->alen))) rv = NULL;
 
             // test CHECKCOMPOUNDPATTERN conditions (forbidden compounds)
-            if (rv && numcheckcpd && scpd == 0 && cpdpat_check(word, i, rv_first, rv)) rv = NULL;
+            if (rv && numcheckcpd && scpd == 0 && cpdpat_check(word, i, rv_first, rv, affixed)) rv = NULL;
 
             // check non_compound flag in suffix and prefix
             if ((rv) && 
                 ((pfx && pfx->getCont() &&
                     TESTAFF(pfx->getCont(), compoundforbidflag, 
                         pfx->getContLen())) ||
                 (sfx && sfx->getCont() &&
                     TESTAFF(sfx->getCont(), compoundforbidflag, 
                         sfx->getContLen())))) {
                     rv = NULL;
             }
 
+            // check FORCEUCASE
+            if (rv && forceucase && (rv) &&
+                (TESTAFF(rv->astr, forceucase, rv->alen)) && !(info && *info & SPELL_ORIGCAP)) rv = NULL;
+
             // check forbiddenwords
             if ((rv) && (rv->astr) && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
+                TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) ||
                (is_sug && nosuggest && TESTAFF(rv->astr, nosuggest, rv->alen)))) return NULL;
 
             // pfxappnd = prefix of word+i, or NULL
             // calculate syllable number of prefix.
             // hungarian convention: when syllable number of prefix is more,
             // than 1, the prefix+word counts as two words.
 
             if (langnum == LANG_hu) {
@@ -1878,17 +1960,17 @@ struct hentry * AffixMgr::compound_check
 
                 // increment syllable num, if last word has a SYLLABLENUM flag
                 // and the suffix is beginning `s'
 
                 if (cpdsyllablenum) {
                     switch (sfxflag) {
                         case 'c': { numsyllable+=2; break; }
                         case 'J': { numsyllable += 1; break; }
-                        case 'I': { if (TESTAFF(rv->astr, 'J', rv->alen)) numsyllable += 1; break; }
+                        case 'I': { if (rv && TESTAFF(rv->astr, 'J', rv->alen)) numsyllable += 1; break; }
                     }
                 }
             }
 
             // increment word number, if the second word has a compoundroot flag
             if ((rv) && (compoundroot) && 
                 (TESTAFF(rv->astr, compoundroot, rv->alen))) {
                     wordnum++;
@@ -1913,25 +1995,51 @@ struct hentry * AffixMgr::compound_check
             }
 
             numsyllable = oldnumsyllable2;
             wordnum = oldwordnum2;
 
             // perhaps second word is a compound word (recursive call)
             if (wordnum < maxwordnum) {
                 rv = compound_check((st+i),strlen(st+i), wordnum+1,
-                     numsyllable, maxwordnum, wnum + 1, words, 0, is_sug);
-                if (rv && numcheckcpd && ((scpd == 0 && cpdpat_check(word, i, rv_first, rv)) ||
-                   (scpd != 0 && !cpdpat_check(word, i, rv_first, rv)))) rv = NULL;
+                     numsyllable, maxwordnum, wnum + 1, words, 0, is_sug, info);
+                
+                if (rv && numcheckcpd && ((scpd == 0 && cpdpat_check(word, i, rv_first, rv, affixed)) ||
+                   (scpd != 0 && !cpdpat_check(word, i, rv_first, rv, affixed)))) rv = NULL;
             } else {
                 rv=NULL;
             }
             if (rv) {
                 // forbid compound word, if it is a non compound word with typical fault
-                if (checkcompoundrep && cpdrep_check(word, len)) return NULL;
+                if (checkcompoundrep || forbiddenword) {
+                    struct hentry * rv2 = NULL;
+
+                    if (checkcompoundrep && cpdrep_check(word, len)) return NULL;
+                    
+                    // check first part
+                    if (strncmp(rv->word, word + i, rv->blen) == 0) {
+                        char r = *(st + i + rv->blen);
+                        *(st + i + rv->blen) = '\0';
+                        
+                        if (checkcompoundrep && cpdrep_check(st, i + rv->blen)) {
+                            *(st + i + rv->blen) = r;
+                            continue;
+                        }
+
+                        if (forbiddenword) {
+                            rv2 = lookup(word);
+                            if (!rv2) rv2 = affix_check(word, len);
+                            if (rv2 && rv2->astr && TESTAFF(rv2->astr, forbiddenword, rv2->alen) && 
+                                (strncmp(rv2->word, st, i + rv->blen) == 0)) {
+                                    return NULL;
+                            }
+                        }
+                        *(st + i + rv->blen) = r;
+                    }
+                }
                 return rv_first;
             }
           } while (striple && !checkedstriple); // end of striple loop
 
           if (checkedstriple) {
             i++;
             checkedstriple = 0;
             striple = 0;
@@ -1943,27 +2051,31 @@ struct hentry * AffixMgr::compound_check
           i = soldi;
           soldi = 0;
           len = oldlen;
           cmin = oldcmin;
           cmax = oldcmax;
         }
         scpd++;
 
-        } while (simplifiedcpd && scpd <= numcheckcpd); // end of simplifiedcpd loop
+
+        } while (!onlycpdrule && simplifiedcpd && scpd <= numcheckcpd); // end of simplifiedcpd loop
+
+        scpd = 0;
+        wordnum = oldwordnum;
+        numsyllable = oldnumsyllable;
 
         if (soldi != 0) {
           i = soldi;
           strcpy(st, word); // XXX add more optim.
           soldi = 0;
         } else st[i] = ch;
 
-        scpd = 0;
-        wordnum = oldwordnum;
-        numsyllable = oldnumsyllable;
+        } while (numdefcpd && oldwordnum == 0 && !onlycpdrule && (onlycpdrule = 1)); // end of onlycpd loop
+
     }
 
     return NULL;
 }
 
 // check if compound word is correctly spelled
 // hu_mov_rule = spec. Hungarian rule (XXX)
 int AffixMgr::compound_check_morph(const char * word, int len, 
@@ -1981,69 +2093,89 @@ int AffixMgr::compound_check_morph(const
     char ch;
 
     int checked_prefix;
     char presult[MAXLNLEN];
 
     int cmin;
     int cmax;
 
+    int onlycpdrule;
+    int affixed = 0;
+    hentry ** oldwords = words;
+
     setcminmax(&cmin, &cmax, word, len);
 
     strcpy(st, word);
 
     for (i = cmin; i < cmax; i++) {
         oldnumsyllable = numsyllable;
         oldwordnum = wordnum;
         checked_prefix = 0;
 
         // go to end of the UTF-8 character
         if (utf8) {
             for (; (st[i] & 0xc0) == 0x80; i++);
             if (i >= cmax) return 0;
         }
 
+        words = oldwords;
+        onlycpdrule = (words) ? 1 : 0;
+
+        do { // onlycpdrule loop
+
+        oldnumsyllable = numsyllable;
+        oldwordnum = wordnum;
+        checked_prefix = 0;
+
         ch = st[i];
         st[i] = '\0';
         sfx = NULL;
 
         // FIRST WORD
+
+        affixed = 1;
+
         *presult = '\0';
         if (partresult) mystrcat(presult, partresult, MAXLNLEN);
 
         rv = lookup(st); // perhaps without prefix
 
         // search homonym with compound flag
         while ((rv) && !hu_mov_rule && 
             ((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)) ||
-                !((compoundflag && !words && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
-                (compoundbegin && !wordnum &&
+                !((compoundflag && !words && !onlycpdrule && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
+                (compoundbegin && !wordnum && !onlycpdrule &&
                         TESTAFF(rv->astr, compoundbegin, rv->alen)) ||
-                (compoundmiddle && wordnum && !words &&
+                (compoundmiddle && wordnum && !words && !onlycpdrule &&
                     TESTAFF(rv->astr, compoundmiddle, rv->alen)) ||
-                  (numdefcpd &&
+                  (numdefcpd && onlycpdrule &&
                     ((!words && !wordnum && defcpd_check(&words, wnum, rv, (hentry **) &rwords, 0)) ||
                     (words && defcpd_check(&words, wnum, rv, (hentry **) &rwords, 0))))
                   ))) {
             rv = rv->next_homonym;
         }
 
+        if (rv) affixed = 0;
+
         if (rv)  {
             sprintf(presult + strlen(presult), "%c%s%s", MSEP_FLD, MORPH_PART, st);
             if (!HENTRY_FIND(rv, MORPH_STEM)) {
                 sprintf(presult + strlen(presult), "%c%s%s", MSEP_FLD, MORPH_STEM, st);
             }
             // store the pointer of the hash entry
 //            sprintf(presult + strlen(presult), "%c%s%p", MSEP_FLD, MORPH_HENTRY, rv);
             if (HENTRY_DATA(rv)) {
                 sprintf(presult + strlen(presult), "%c%s", MSEP_FLD, HENTRY_DATA2(rv));
             }
         }        
+
         if (!rv) {
-            if (compoundflag && 
+            if (onlycpdrule) break;
+            if (compoundflag &&
              !(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundflag))) {
                 if ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL,
                         FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) && !hu_mov_rule &&
                     sfx->getCont() &&
                         ((compoundforbidflag && TESTAFF(sfx->getCont(), compoundforbidflag, 
                             sfx->getContLen())) || (compoundend &&
                         TESTAFF(sfx->getCont(), compoundend, 
                             sfx->getContLen())))) {
@@ -2075,16 +2207,17 @@ int AffixMgr::compound_check_morph(const
                     sprintf(presult + strlen(presult), "%c%s%s%s", MSEP_FLD,
                         MORPH_PART, st, line_uniq_app(&p, MSEP_REC));
                 }
                 if (p) free(p);
                 checked_prefix = 1;
             }
         // else check forbiddenwords
         } else if (rv->astr && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
+            TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) ||
             TESTAFF(rv->astr, needaffix, rv->alen))) {
                 st[i] = ch;
                 continue;
         }
 
             // check non_compound flag in suffix and prefix
             if ((rv) && !hu_mov_rule &&
                 ((pfx && pfx->getCont() &&
@@ -2114,17 +2247,18 @@ int AffixMgr::compound_check_morph(const
                         pfx->getContLen())) ||
                 (sfx && sfx->getCont() &&
                     TESTAFF(sfx->getCont(), compoundmiddle, 
                         sfx->getContLen())))) {
                     rv = NULL;
             }       
 
         // check forbiddenwords
-        if ((rv) && (rv->astr) && TESTAFF(rv->astr, forbiddenword, rv->alen)) continue;
+        if ((rv) && (rv->astr) && (TESTAFF(rv->astr, forbiddenword, rv->alen)
+            || TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen))) continue;
 
         // increment word number, if the second root has a compoundroot flag
         if ((rv) && (compoundroot) && 
             (TESTAFF(rv->astr, compoundroot, rv->alen))) {
                 wordnum++;
         }
 
         // first word is acceptable in compound words?
@@ -2146,17 +2280,17 @@ int AffixMgr::compound_check_morph(const
           && ! (( checkcompoundtriple && !words && // test triple letters
                    (word[i-1]==word[i]) && (
                       ((i>1) && (word[i-1]==word[i-2])) || 
                       ((word[i-1]==word[i+1])) // may be word[i+1] == '\0'
                    )
                ) ||
                (
                    // test CHECKCOMPOUNDPATTERN
-                   numcheckcpd && !words && cpdpat_check(word, i, rv, NULL)
+                   numcheckcpd && !words && cpdpat_check(word, i, rv, NULL, affixed)
                ) ||
                ( 
                  checkcompoundcase && !words && cpdcase_check(word, i)
                ))
          )
 // LANG_hu section: spec. Hungarian rule
          || ((!rv) && (langnum == LANG_hu) && hu_mov_rule && (rv = affix_check(st,i)) &&
               (sfx && sfx->getCont() && (
@@ -2222,17 +2356,18 @@ int AffixMgr::compound_check_morph(const
 // END of LANG_hu section
             // increment word number, if the second root has a compoundroot flag
             if ((rv) && (compoundroot) && 
                 (TESTAFF(rv->astr, compoundroot, rv->alen))) {
                     wordnum++;
             }
 
             // check forbiddenwords
-            if ((rv) && (rv->astr) && TESTAFF(rv->astr, forbiddenword, rv->alen)) {
+            if ((rv) && (rv->astr) && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
+                TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen))) {
                 st[i] = ch;
                 continue;
             }
 
             // second word is acceptable, as a root?
             // hungarian conventions: compounding is acceptable,
             // when compound forms consist of 2 words, or if more,
             // then the syllable number of root words must be 6, or lesser.
@@ -2276,19 +2411,19 @@ int AffixMgr::compound_check_morph(const
 
             numsyllable = oldnumsyllable2 ;
             wordnum = oldwordnum2;
 
             // perhaps second word has prefix or/and suffix
             sfx = NULL;
             sfxflag = FLAG_NULL;
 
-            if (compoundflag) rv = affix_check((word+i),strlen(word+i), compoundflag); else rv = NULL;
-
-            if (!rv && compoundend) {
+            if (compoundflag && !onlycpdrule) rv = affix_check((word+i),strlen(word+i), compoundflag); else rv = NULL;
+
+            if (!rv && compoundend && !onlycpdrule) {
                 sfx = NULL;
                 pfx = NULL;
                 rv = affix_check((word+i),strlen(word+i), compoundend);
             }
 
             if (!rv && numdefcpd && words) {
                 rv = affix_check((word+i),strlen(word+i), 0, IN_CPD_END);
                 if (rv && words && defcpd_check(&words, wnum + 1, rv, NULL, 1)) {
@@ -2316,17 +2451,18 @@ int AffixMgr::compound_check_morph(const
                         pfx->getContLen())) ||
                 (sfx && sfx->getCont() &&
                     TESTAFF(sfx->getCont(), compoundforbidflag, 
                         sfx->getContLen())))) {
                     rv = NULL;
             }
 
             // check forbiddenwords
-            if ((rv) && (rv->astr) && (TESTAFF(rv->astr,forbiddenword,rv->alen))
+            if ((rv) && (rv->astr) && (TESTAFF(rv->astr,forbiddenword,rv->alen) ||
+                    TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen))
                     && (! TESTAFF(rv->astr, needaffix, rv->alen))) {
                         st[i] = ch;
                         continue;
                     }
 
             if (langnum == LANG_hu) {
                 // calculate syllable number of the word
                 numsyllable += get_syllable(word + i, strlen(word + i));
@@ -2397,16 +2533,19 @@ int AffixMgr::compound_check_morph(const
                              numsyllable, maxwordnum, wnum + 1, words, 0, result, presult);
             } else {
                 rv=NULL;
             }
         }
         st[i] = ch;
         wordnum = oldwordnum;
         numsyllable = oldnumsyllable;
+
+        } while (numdefcpd && oldwordnum == 0 && !onlycpdrule && (onlycpdrule = 1)); // end of onlycpd loop
+
     }
     return 0;
 }    
 
  // return 1 if s1 (reversed) is a leading subset of end of s2
 /* inline int AffixMgr::isRevSubset(const char * s1, const char * end_of_s2, int len)
  {
     while ((len > 0) && *s1 && (*s1 == *end_of_s2)) {
@@ -2452,39 +2591,38 @@ struct hentry * AffixMgr::suffix_check (
                    circumfix, ep->getContLen())) &&
                (!se->getCont() || !(TESTAFF(se->getCont(),circumfix,se->getContLen())))) ||
               // circumfix flag in prefix AND suffix
               ((ppfx && (ep->getCont()) && TESTAFF(ep->getCont(),
                    circumfix, ep->getContLen())) &&
                (se->getCont() && (TESTAFF(se->getCont(),circumfix,se->getContLen())))))  &&
             // fogemorpheme
               (in_compound || 
-                 !((se->getCont() && (TESTAFF(se->getCont(), onlyincompound, se->getContLen()))))) &&
+                 !(se->getCont() && (TESTAFF(se->getCont(), onlyincompound, se->getContLen())))) &&
             // needaffix on prefix or first suffix
               (cclass || 
                    !(se->getCont() && TESTAFF(se->getCont(), needaffix, se->getContLen())) ||
                    (ppfx && !((ep->getCont()) &&
                      TESTAFF(ep->getCont(), needaffix,
                        ep->getContLen())))
-              )
-            ) {
+              )) {
                 rv = se->checkword(word,len, sfxopts, ppfx, wlst, maxSug, ns, (FLAG) cclass, 
                     needflag, (in_compound ? 0 : onlyincompound));
                 if (rv) {
                     sfx=se; // BUG: sfx not stateless
                     return rv;
                 }
             }
         }
        se = se->getNext();
     }
 
     // now handle the general case
     if (len == 0) return NULL; // FULLSTRIP
-    unsigned char sp = *((const unsigned char *)(word + len - 1));
+    unsigned char sp= *((const unsigned char *)(word + len - 1));
     SfxEntry * sptr = sStart[sp];
 
     while (sptr) {
         if (isRevSubset(sptr->getKey(), word + len - 1, len)
         ) {
             // suffixes are not allowed in beginning of compounds
             if ((((in_compound != IN_CPD_BEGIN)) || // && !cclass
              // except when signed with compoundpermitflag flag
@@ -2503,17 +2641,17 @@ struct hentry * AffixMgr::suffix_check (
                  !((sptr->getCont() && (TESTAFF(sptr->getCont(), onlyincompound, sptr->getContLen()))))) &&
             // needaffix on prefix or first suffix
               (cclass || 
                   !(sptr->getCont() && TESTAFF(sptr->getCont(), needaffix, sptr->getContLen())) ||
                   (ppfx && !((ep->getCont()) &&
                      TESTAFF(ep->getCont(), needaffix,
                        ep->getContLen())))
               )
-            ) {
+            ) if (in_compound != IN_CPD_END || ppfx || !(sptr->getCont() && TESTAFF(sptr->getCont(), onlyincompound, sptr->getContLen()))) {
                 rv = sptr->checkword(word,len, sfxopts, ppfx, wlst,
                     maxSug, ns, cclass, needflag, (in_compound ? 0 : onlyincompound));
                 if (rv) {
                     sfx=sptr; // BUG: sfx not stateless
                     sfxflag = sptr->getFlag(); // BUG: sfxflag not stateless
                     if (!sptr->getCont()) sfxappnd=sptr->getKey(); // BUG: sfxappnd not stateless
                     return rv;
                 }
@@ -2791,23 +2929,26 @@ struct hentry * AffixMgr::affix_check (c
     if (rv) return rv;
 
     // if still not found check all suffixes
     rv = suffix_check(word, len, 0, NULL, NULL, 0, NULL, FLAG_NULL, needflag, in_compound);
 
     if (havecontclass) {
         sfx = NULL;
         pfx = NULL;
+
         if (rv) return rv;
         // if still not found check all two-level suffixes
         rv = suffix_check_twosfx(word, len, 0, NULL, needflag);
+
         if (rv) return rv;
         // if still not found check all two-level suffixes
         rv = prefix_check_twosfx(word, len, IN_CPD_NOT, needflag);
     }
+
     return rv;
 }
 
 // check if word with affixes is correctly spelled
 char * AffixMgr::affix_check_morph(const char * word, int len, const FLAG needflag, char in_compound)
 {
     char result[MAXLNLEN];
     char * st = NULL;
@@ -2890,17 +3031,18 @@ char * AffixMgr::morphgen(char * ts, int
 
                 int cmp = morphcmp(stemmorph, targetmorph);
 
                 if (cmp == 0) {
                     char * newword = sptr->add(ts, wl);
                     if (newword) {
                         hentry * check = pHMgr->lookup(newword); // XXX extra dic
                         if (!check || !check->astr || 
-                            !TESTAFF(check->astr, forbiddenword, check->alen)) {
+                            !(TESTAFF(check->astr, forbiddenword, check->alen) || 
+                              TESTAFF(check->astr, ONLYUPCASEFLAG, check->alen))) {
                                 return newword;
                         }
                         free(newword);
                     }
                 }
                 
                 // recursive call for secondary suffixes
                 if ((level == 0) && (cmp == 1) && (sptr->getContLen() > 0) &&
@@ -3144,16 +3286,31 @@ int AffixMgr::get_fullstrip() const
   return fullstrip;
 }
 
 FLAG AffixMgr::get_keepcase() const
 {
   return keepcase;
 }
 
+FLAG AffixMgr::get_forceucase() const
+{
+  return forceucase;
+}
+
+FLAG AffixMgr::get_warn() const
+{
+  return warn;
+}
+
+int AffixMgr::get_forbidwarn() const
+{
+  return forbidwarn;
+}
+
 int AffixMgr::get_checksharps() const
 {
   return checksharps;
 }
 
 char * AffixMgr::encode_flag(unsigned short aflag) const
 {
   return pHMgr->encode_flag(aflag);
@@ -3219,16 +3376,22 @@ FLAG AffixMgr::get_forbiddenword() const
 }
 
 // return the forbidden words control flag
 FLAG AffixMgr::get_nosuggest() const
 {
   return nosuggest;
 }
 
+// return the forbidden words control flag
+FLAG AffixMgr::get_nongramsuggest() const
+{
+  return nongramsuggest;
+}
+
 // return the forbidden words flag modify flag
 FLAG AffixMgr::get_needaffix() const
 {
   return needaffix;
 }
 
 // return the onlyincompound flag
 FLAG AffixMgr::get_onlyincompound() const
@@ -3297,22 +3460,36 @@ int AffixMgr::have_contclass() const
 }
 
 // return utf8
 int AffixMgr::get_utf8() const
 {
   return utf8;
 }
 
-// return nosplitsugs
 int AffixMgr::get_maxngramsugs(void) const
 {
   return maxngramsugs;
 }
 
+int AffixMgr::get_maxcpdsugs(void) const
+{
+  return maxcpdsugs;
+}
+
+int AffixMgr::get_maxdiff(void) const
+{
+  return maxdiff;
+}
+
+int AffixMgr::get_onlymaxdiff(void) const
+{
+  return onlymaxdiff;
+}
+
 // return nosplitsugs
 int AffixMgr::get_nosplitsugs(void) const
 {
   return nosplitsugs;
 }
 
 // return sugswithdots
 int AffixMgr::get_sugswithdots(void) const
@@ -3444,17 +3621,26 @@ int  AffixMgr::parse_reptable(char * lin
                   case 0: {
                              if (strncmp(piece,"REP",3) != 0) {
                                  HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
                                  numrep = 0;
                                  return 1;
                              }
                              break;
                           }
-                  case 1: { reptable[j].pattern = mystrrep(mystrdup(piece),"_"," "); break; }
+                  case 1: {
+                            if (*piece == '^') reptable[j].start = true; else reptable[j].start = false;
+                            reptable[j].pattern = mystrrep(mystrdup(piece + int(reptable[j].start)),"_"," ");
+                            int lr = strlen(reptable[j].pattern) - 1;
+                            if (reptable[j].pattern[lr] == '$') {
+                                reptable[j].end = true;
+                                reptable[j].pattern[lr] = '\0';
+                            } else reptable[j].end = false;
+                            break;
+                          }
                   case 2: { reptable[j].pattern2 = mystrrep(mystrdup(piece),"_"," "); break; }
                   default: break;
                }
                i++;
            }
            piece = mystrsep(&tp, 0);
         }
         if ((!(reptable[j].pattern)) || (!(reptable[j].pattern2))) {
@@ -3533,25 +3719,25 @@ int  AffixMgr::parse_convtable(char * li
                     break; 
                   }
                   default: break;
                }
                i++;
            }
            piece = mystrsep(&tp, 0);
         }
-      if (!pattern || !pattern2) {
-         if (pattern)
-            free(pattern);
-         if (pattern2)
-            free(pattern2);
-         HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
-         return 1;
-      }
-      (*rl)->add(pattern, pattern2);
+        if (!pattern || !pattern2) {
+            if (pattern)
+                free(pattern);
+            if (pattern2)
+                free(pattern2);
+            HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
+            return 1;
+        }
+        (*rl)->add(pattern, pattern2);
    }
    return 0;
 }
 
 
 /* parse in the typical fault correcting table */
 int  AffixMgr::parse_phonetable(char * line, FileMgr * af)
 {
@@ -3935,35 +4121,36 @@ int  AffixMgr::parse_maptable(char * lin
         }
    }
    return 0;
 }
 
 /* parse in the word breakpoint table */
 int  AffixMgr::parse_breaktable(char * line, FileMgr * af)
 {
-   if (numbreak != 0) {
+   if (numbreak > -1) {
       HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", af->getlinenum());
       return 1;
    }
    char * tp = line;
    char * piece;
    int i = 0;
    int np = 0;
    piece = mystrsep(&tp, 0);
    while (piece) {
        if (*piece != '\0') {
           switch(i) {
              case 0: { np++; break; }
              case 1: { 
                        numbreak = atoi(piece);
-                       if (numbreak < 1) {
+                       if (numbreak < 0) {
                           HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", af->getlinenum());
                           return 1;
                        }
+                       if (numbreak == 0) return 0;
                        breaktable = (char **) malloc(numbreak * sizeof(char *));
                        if (!breaktable) return 1;
                        np++;
                        break;
                      }
              default: break;
           }
           i++;
--- a/extensions/spellcheck/hunspell/src/affixmgr.hxx
+++ b/extensions/spellcheck/hunspell/src/affixmgr.hxx
@@ -100,16 +100,17 @@ class LIBHUNSPELL_DLL_EXPORTED AffixMgr
   FLAG                compoundpermitflag;
   int                 checkcompounddup;
   int                 checkcompoundrep;
   int                 checkcompoundcase;
   int                 checkcompoundtriple;
   int                 simplifiedtriple;
   FLAG                forbiddenword;
   FLAG                nosuggest;
+  FLAG                nongramsuggest;
   FLAG                needaffix;
   int                 cpdmin;
   int                 numrep;
   replentry *         reptable;
   RepList *           iconvtable;
   RepList *           oconvtable;
   int                 nummap;
   mapentry *          maptable;
@@ -117,16 +118,19 @@ class LIBHUNSPELL_DLL_EXPORTED AffixMgr
   char **             breaktable;
   int                 numcheckcpd;
   patentry *          checkcpdtable;
   int                 simplifiedcpd;
   int                 numdefcpd;
   flagentry *         defcpdtable;
   phonetable *        phone;
   int                 maxngramsugs;
+  int                 maxcpdsugs;
+  int                 maxdiff;
+  int                 onlymaxdiff;
   int                 nosplitsugs;
   int                 sugswithdots;
   int                 cpdwordmax;
   int                 cpdmaxsyllable;
   char *              cpdvowels;
   w_char *            cpdvowels_utf16;
   int                 cpdvowels_utf16_len;
   char *              cpdsyllablenum;
@@ -145,16 +149,19 @@ class LIBHUNSPELL_DLL_EXPORTED AffixMgr
   int                 ignorechars_utf16_len;
   char *              version;
   char *              lang;
   int                 langnum;
   FLAG                lemma_present;
   FLAG                circumfix;
   FLAG                onlyincompound;
   FLAG                keepcase;
+  FLAG                forceucase;
+  FLAG                warn;
+  int                 forbidwarn;
   FLAG                substandard;
   int                 checksharps;
   int                 fullstrip;
 
   int                 havecontclass; // boolean variable
   char                contclasses[CONTSIZE]; // flags of possible continuing classes (twofold affix)
 
 public:
@@ -195,25 +202,26 @@ public:
             unsigned short al, char * morph, char * targetmorph, int level);
 
   int    expand_rootword(struct guessword * wlst, int maxn, const char * ts,
             int wl, const unsigned short * ap, unsigned short al, char * bad,
             int, char *);
 
   short       get_syllable (const char * word, int wlen);
   int         cpdrep_check(const char * word, int len);
-  int         cpdpat_check(const char * word, int len, hentry * r1, hentry * r2);
+  int         cpdpat_check(const char * word, int len, hentry * r1, hentry * r2,
+                    const char affixed);
   int         defcpd_check(hentry *** words, short wnum, hentry * rv,
                     hentry ** rwords, char all);
   int         cpdcase_check(const char * word, int len);
   inline int  candidate_check(const char * word, int len);
   void        setcminmax(int * cmin, int * cmax, const char * word, int len);
   struct hentry * compound_check(const char * word, int len, short wordnum,
             short numsyllable, short maxwordnum, short wnum, hentry ** words,
-            char hu_mov_rule, char is_sug);
+            char hu_mov_rule, char is_sug, int * info);
 
   int compound_check_morph(const char * word, int len, short wordnum,
             short numsyllable, short maxwordnum, short wnum, hentry ** words,
             char hu_mov_rule, char ** result, char * partresult);
 
   struct hentry * lookup(const char * word);
   int                 get_numrep() const;
   struct replentry *  get_reptable() const;
@@ -232,33 +240,40 @@ public:
   unsigned short *    get_wordchars_utf16(int * len) const;
   char *              get_ignore() const;
   unsigned short *    get_ignore_utf16(int * len) const;
   int                 get_compound() const;
   FLAG                get_compoundflag() const;
   FLAG                get_compoundbegin() const;
   FLAG                get_forbiddenword() const;
   FLAG                get_nosuggest() const;
+  FLAG                get_nongramsuggest() const;
   FLAG                get_needaffix() const;
   FLAG                get_onlyincompound() const;
   FLAG                get_compoundroot() const;
   FLAG                get_lemma_present() const;
   int                 get_checknum() const;
   const char *        get_prefix() const;
   const char *        get_suffix() const;
   const char *        get_derived() const;
   const char *        get_version() const;
   int                 have_contclass() const;
   int                 get_utf8() const;
   int                 get_complexprefixes() const;
   char *              get_suffixed(char ) const;
   int                 get_maxngramsugs() const;
+  int                 get_maxcpdsugs() const;
+  int                 get_maxdiff() const;
+  int                 get_onlymaxdiff() const;
   int                 get_nosplitsugs() const;
   int                 get_sugswithdots(void) const;
   FLAG                get_keepcase(void) const;
+  FLAG                get_forceucase(void) const;
+  FLAG                get_warn(void) const;
+  int                 get_forbidwarn(void) const;
   int                 get_checksharps(void) const;
   char *              encode_flag(unsigned short aflag) const;
   int                 get_fullstrip() const;
 
 private:
   int  parse_file(const char * affpath, const char * key);
   int  parse_flag(char * line, unsigned short * out, FileMgr * af);
   int  parse_num(char * line, int * out, FileMgr * af);
--- a/extensions/spellcheck/hunspell/src/atypes.hxx
+++ b/extensions/spellcheck/hunspell/src/atypes.hxx
@@ -87,16 +87,25 @@ static inline void HUNSPELL_WARNING(FILE
 #define aeLONGCOND      (1 << 4)
 
 // compound options
 #define IN_CPD_NOT   0
 #define IN_CPD_BEGIN 1
 #define IN_CPD_END   2
 #define IN_CPD_OTHER 3
 
+// info options
+#define  SPELL_COMPOUND  (1 << 0)
+#define  SPELL_FORBIDDEN (1 << 1)
+#define  SPELL_ALLCAP    (1 << 2)
+#define  SPELL_NOCAP     (1 << 3)
+#define  SPELL_INITCAP   (1 << 4)
+#define  SPELL_ORIGCAP   (1 << 5)
+#define  SPELL_WARN      (1 << 6)
+
 #define MAXLNLEN        8192
 
 #define MINCPDLEN       3
 #define MAXCOMPOUND     10
 #define MAXCONDLEN      20
 #define MAXCONDLEN_1    (MAXCONDLEN - sizeof(char *))
 
 #define MAXACC          1000
--- a/extensions/spellcheck/hunspell/src/csutil.cpp
+++ b/extensions/spellcheck/hunspell/src/csutil.cpp
@@ -60,38 +60,49 @@
 #include <string.h>
 #include <stdio.h> 
 #include <ctype.h>
 
 #include "csutil.hxx"
 #include "atypes.hxx"
 #include "langnum.hxx"
 
+// Unicode character encoding information
+struct unicode_info {
+  unsigned short c;
+  unsigned short cupper;
+  unsigned short clower;
+};
+
 #ifdef OPENOFFICEORG
 #  include <unicode/uchar.h>
 #else
 #  ifndef MOZILLA_CLIENT
 #    include "utf_info.cxx"
 #    define UTF_LST_LEN (sizeof(utf_lst) / (sizeof(unicode_info)))
 #  endif
 #endif
 
 #ifdef MOZILLA_CLIENT
 #include "nsCOMPtr.h"
 #include "nsServiceManagerUtils.h"
 #include "nsIUnicodeEncoder.h"
 #include "nsIUnicodeDecoder.h"
+#include "nsUnicharUtils.h"
 #include "nsICharsetConverterManager.h"
-#include "nsUnicharUtilCIID.h"
-#include "nsUnicharUtils.h"
 
 static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
-static NS_DEFINE_CID(kUnicharUtilCID, NS_UNICHARUTIL_CID);
 #endif
 
+struct unicode_info2 {
+  char cletter;
+  unsigned short cupper;
+  unsigned short clower;
+};
+
 static struct unicode_info2 * utf_tbl = NULL;
 static int utf_tbl_count = 0; // utf_tbl can be used by multiple Hunspell instances
 
 /* only UTF-16 (BMP) implementation */
 char * u16_u8(char * dest, int size, const w_char * src, int srclen) {
     signed char * u8 = (signed char *)dest;
     signed char * u8_max = (signed char *)(u8 + size);
     const w_char * u2 = src;
@@ -347,19 +358,17 @@ int line_tok(const char * text, char ***
     char * p = strchr(dup, breakchar);
     while (p) {
         linenum++;
         *p = '\0';
         p++;
         p = strchr(p, breakchar);
     }
     linenum++;
-//    fprintf(stderr, "LINEN:%d %p %p\n", linenum, lines, *lines);
     *lines = (char **) malloc(linenum * sizeof(char *));
-//    fprintf(stderr, "hello\n");
     if (!(*lines)) {
         free(dup);
         return 0;
     }
 
     p = dup;
     int l = 0;
     for (int i = 0; i < linenum; i++) {
@@ -590,31 +599,34 @@ char * copy_field(char * dest, const cha
        return dest;
   }
   return NULL;
 }
 
 char * mystrrep(char * word, const char * pat, const char * rep) {
     char * pos = strstr(word, pat);
     if (pos) {
-        int replen = strlen(rep);
-        int patlen = strlen(pat);
+      int replen = strlen(rep);
+      int patlen = strlen(pat);
+      while (pos) {
         if (replen < patlen) {
             char * end = word + strlen(word);
             char * next = pos + replen;
             char * prev = pos + strlen(pat);
             for (; prev < end; *next = *prev, prev++, next++);
             *next = '\0';
         } else if (replen > patlen) {
             char * end = pos + patlen;
             char * next = word + strlen(word) + replen - patlen;
             char * prev = next - replen + patlen;
             for (; prev >= end; *next = *prev, prev--, next--);
         }
         strncpy(pos, rep, replen);
+        pos = strstr(word, pat);
+      }
     }
     return word;
 }
 
  // reverse word 
  int reverseword(char * word) {
    char r;
    for (char * dest = word + strlen(word) - 1; word < dest; word++, dest--) {
@@ -5165,45 +5177,350 @@ static struct cs_info iscii_devanagari_t
 { 0x00, 0xfa, 0xfa },
 { 0x00, 0xfb, 0xfb },
 { 0x00, 0xfc, 0xfc },
 { 0x00, 0xfd, 0xfd },
 { 0x00, 0xfe, 0xfe },
 { 0x00, 0xff, 0xff }
 };
 
-static struct enc_entry encds[] = {
-{"ISO8859-1",iso1_tbl},
-{"ISO8859-2",iso2_tbl},
-{"ISO8859-3",iso3_tbl},
-{"ISO8859-4",iso4_tbl},
-{"ISO8859-5",iso5_tbl},
-{"ISO8859-6",iso6_tbl},
-{"ISO8859-7",iso7_tbl},
-{"ISO8859-8",iso8_tbl},
-{"ISO8859-9",iso9_tbl},
-{"ISO8859-10",iso10_tbl},
-{"KOI8-R",koi8r_tbl},
-{"KOI8-U",koi8u_tbl},
-{"microsoft-cp1251",cp1251_tbl},
-{"ISO8859-13", iso13_tbl},
-{"ISO8859-14", iso14_tbl},
-{"ISO8859-15", iso15_tbl},
-{"ISCII-DEVANAGARI", iscii_devanagari_tbl}
+static struct cs_info tis620_tbl[] = {
+{ 0x00, 0x00, 0x00 },
+{ 0x00, 0x01, 0x01 },
+{ 0x00, 0x02, 0x02 },
+{ 0x00, 0x03, 0x03 },
+{ 0x00, 0x04, 0x04 },
+{ 0x00, 0x05, 0x05 },
+{ 0x00, 0x06, 0x06 },
+{ 0x00, 0x07, 0x07 },
+{ 0x00, 0x08, 0x08 },
+{ 0x00, 0x09, 0x09 },
+{ 0x00, 0x0a, 0x0a },
+{ 0x00, 0x0b, 0x0b },
+{ 0x00, 0x0c, 0x0c },
+{ 0x00, 0x0d, 0x0d },
+{ 0x00, 0x0e, 0x0e },
+{ 0x00, 0x0f, 0x0f },
+{ 0x00, 0x10, 0x10 },
+{ 0x00, 0x11, 0x11 },
+{ 0x00, 0x12, 0x12 },
+{ 0x00, 0x13, 0x13 },
+{ 0x00, 0x14, 0x14 },
+{ 0x00, 0x15, 0x15 },
+{ 0x00, 0x16, 0x16 },
+{ 0x00, 0x17, 0x17 },
+{ 0x00, 0x18, 0x18 },
+{ 0x00, 0x19, 0x19 },
+{ 0x00, 0x1a, 0x1a },
+{ 0x00, 0x1b, 0x1b },
+{ 0x00, 0x1c, 0x1c },
+{ 0x00, 0x1d, 0x1d },
+{ 0x00, 0x1e, 0x1e },
+{ 0x00, 0x1f, 0x1f },
+{ 0x00, 0x20, 0x20 },
+{ 0x00, 0x21, 0x21 },
+{ 0x00, 0x22, 0x22 },
+{ 0x00, 0x23, 0x23 },
+{ 0x00, 0x24, 0x24 },
+{ 0x00, 0x25, 0x25 },
+{ 0x00, 0x26, 0x26 },
+{ 0x00, 0x27, 0x27 },
+{ 0x00, 0x28, 0x28 },
+{ 0x00, 0x29, 0x29 },
+{ 0x00, 0x2a, 0x2a },
+{ 0x00, 0x2b, 0x2b },
+{ 0x00, 0x2c, 0x2c },
+{ 0x00, 0x2d, 0x2d },
+{ 0x00, 0x2e, 0x2e },
+{ 0x00, 0x2f, 0x2f },
+{ 0x00, 0x30, 0x30 },
+{ 0x00, 0x31, 0x31 },
+{ 0x00, 0x32, 0x32 },
+{ 0x00, 0x33, 0x33 },
+{ 0x00, 0x34, 0x34 },
+{ 0x00, 0x35, 0x35 },
+{ 0x00, 0x36, 0x36 },
+{ 0x00, 0x37, 0x37 },
+{ 0x00, 0x38, 0x38 },
+{ 0x00, 0x39, 0x39 },
+{ 0x00, 0x3a, 0x3a },
+{ 0x00, 0x3b, 0x3b },
+{ 0x00, 0x3c, 0x3c },
+{ 0x00, 0x3d, 0x3d },
+{ 0x00, 0x3e, 0x3e },
+{ 0x00, 0x3f, 0x3f },
+{ 0x00, 0x40, 0x40 },
+{ 0x01, 0x61, 0x41 },
+{ 0x01, 0x62, 0x42 },
+{ 0x01, 0x63, 0x43 },
+{ 0x01, 0x64, 0x44 },
+{ 0x01, 0x65, 0x45 },
+{ 0x01, 0x66, 0x46 },
+{ 0x01, 0x67, 0x47 },
+{ 0x01, 0x68, 0x48 },
+{ 0x01, 0x69, 0x49 },
+{ 0x01, 0x6a, 0x4a },
+{ 0x01, 0x6b, 0x4b },
+{ 0x01, 0x6c, 0x4c },
+{ 0x01, 0x6d, 0x4d },
+{ 0x01, 0x6e, 0x4e },
+{ 0x01, 0x6f, 0x4f },
+{ 0x01, 0x70, 0x50 },
+{ 0x01, 0x71, 0x51 },
+{ 0x01, 0x72, 0x52 },
+{ 0x01, 0x73, 0x53 },
+{ 0x01, 0x74, 0x54 },
+{ 0x01, 0x75, 0x55 },
+{ 0x01, 0x76, 0x56 },
+{ 0x01, 0x77, 0x57 },
+{ 0x01, 0x78, 0x58 },
+{ 0x01, 0x79, 0x59 },
+{ 0x01, 0x7a, 0x5a },
+{ 0x00, 0x5b, 0x5b },
+{ 0x00, 0x5c, 0x5c },
+{ 0x00, 0x5d, 0x5d },
+{ 0x00, 0x5e, 0x5e },
+{ 0x00, 0x5f, 0x5f },
+{ 0x00, 0x60, 0x60 },
+{ 0x00, 0x61, 0x41 },
+{ 0x00, 0x62, 0x42 },
+{ 0x00, 0x63, 0x43 },
+{ 0x00, 0x64, 0x44 },
+{ 0x00, 0x65, 0x45 },
+{ 0x00, 0x66, 0x46 },
+{ 0x00, 0x67, 0x47 },
+{ 0x00, 0x68, 0x48 },
+{ 0x00, 0x69, 0x49 },
+{ 0x00, 0x6a, 0x4a },
+{ 0x00, 0x6b, 0x4b },
+{ 0x00, 0x6c, 0x4c },
+{ 0x00, 0x6d, 0x4d },
+{ 0x00, 0x6e, 0x4e },
+{ 0x00, 0x6f, 0x4f },
+{ 0x00, 0x70, 0x50 },
+{ 0x00, 0x71, 0x51 },
+{ 0x00, 0x72, 0x52 },
+{ 0x00, 0x73, 0x53 },
+{ 0x00, 0x74, 0x54 },
+{ 0x00, 0x75, 0x55 },
+{ 0x00, 0x76, 0x56 },
+{ 0x00, 0x77, 0x57 },
+{ 0x00, 0x78, 0x58 },
+{ 0x00, 0x79, 0x59 },
+{ 0x00, 0x7a, 0x5a },
+{ 0x00, 0x7b, 0x7b },
+{ 0x00, 0x7c, 0x7c },
+{ 0x00, 0x7d, 0x7d },
+{ 0x00, 0x7e, 0x7e },
+{ 0x00, 0x7f, 0x7f },
+{ 0x00, 0x80, 0x80 },
+{ 0x00, 0x81, 0x81 },
+{ 0x00, 0x82, 0x82 },
+{ 0x00, 0x83, 0x83 },
+{ 0x00, 0x84, 0x84 },
+{ 0x00, 0x85, 0x85 },
+{ 0x00, 0x86, 0x86 },
+{ 0x00, 0x87, 0x87 },
+{ 0x00, 0x88, 0x88 },
+{ 0x00, 0x89, 0x89 },
+{ 0x00, 0x8a, 0x8a },
+{ 0x00, 0x8b, 0x8b },
+{ 0x00, 0x8c, 0x8c },
+{ 0x00, 0x8d, 0x8d },
+{ 0x00, 0x8e, 0x8e },
+{ 0x00, 0x8f, 0x8f },
+{ 0x00, 0x90, 0x90 },
+{ 0x00, 0x91, 0x91 },
+{ 0x00, 0x92, 0x92 },
+{ 0x00, 0x93, 0x93 },
+{ 0x00, 0x94, 0x94 },
+{ 0x00, 0x95, 0x95 },
+{ 0x00, 0x96, 0x96 },
+{ 0x00, 0x97, 0x97 },
+{ 0x00, 0x98, 0x98 },
+{ 0x00, 0x99, 0x99 },
+{ 0x00, 0x9a, 0x9a },
+{ 0x00, 0x9b, 0x9b },
+{ 0x00, 0x9c, 0x9c },
+{ 0x00, 0x9d, 0x9d },
+{ 0x00, 0x9e, 0x9e },
+{ 0x00, 0x9f, 0x9f },
+{ 0x00, 0xa0, 0xa0 },
+{ 0x00, 0xa1, 0xa1 },
+{ 0x00, 0xa2, 0xa2 },
+{ 0x00, 0xa3, 0xa3 },
+{ 0x00, 0xa4, 0xa4 },
+{ 0x00, 0xa5, 0xa5 },
+{ 0x00, 0xa6, 0xa6 },
+{ 0x00, 0xa7, 0xa7 },
+{ 0x00, 0xa8, 0xa8 },
+{ 0x00, 0xa9, 0xa9 },
+{ 0x00, 0xaa, 0xaa },
+{ 0x00, 0xab, 0xab },
+{ 0x00, 0xac, 0xac },
+{ 0x00, 0xad, 0xad },
+{ 0x00, 0xae, 0xae },
+{ 0x00, 0xaf, 0xaf },
+{ 0x00, 0xb0, 0xb0 },
+{ 0x00, 0xb1, 0xb1 },
+{ 0x00, 0xb2, 0xb2 },
+{ 0x00, 0xb3, 0xb3 },
+{ 0x00, 0xb4, 0xb4 },
+{ 0x00, 0xb5, 0xb5 },
+{ 0x00, 0xb6, 0xb6 },
+{ 0x00, 0xb7, 0xb7 },
+{ 0x00, 0xb8, 0xb8 },
+{ 0x00, 0xb9, 0xb9 },
+{ 0x00, 0xba, 0xba },
+{ 0x00, 0xbb, 0xbb },
+{ 0x00, 0xbc, 0xbc },
+{ 0x00, 0xbd, 0xbd },
+{ 0x00, 0xbe, 0xbe },
+{ 0x00, 0xbf, 0xbf },
+{ 0x00, 0xc0, 0xc0 },
+{ 0x00, 0xc1, 0xc1 },
+{ 0x00, 0xc2, 0xc2 },
+{ 0x00, 0xc3, 0xc3 },
+{ 0x00, 0xc4, 0xc4 },
+{ 0x00, 0xc5, 0xc5 },
+{ 0x00, 0xc6, 0xc6 },
+{ 0x00, 0xc7, 0xc7 },
+{ 0x00, 0xc8, 0xc8 },
+{ 0x00, 0xc9, 0xc9 },
+{ 0x00, 0xca, 0xca },
+{ 0x00, 0xcb, 0xcb },
+{ 0x00, 0xcc, 0xcc },
+{ 0x00, 0xcd, 0xcd },
+{ 0x00, 0xce, 0xce },
+{ 0x00, 0xcf, 0xcf },
+{ 0x00, 0xd0, 0xd0 },
+{ 0x00, 0xd1, 0xd1 },
+{ 0x00, 0xd2, 0xd2 },
+{ 0x00, 0xd3, 0xd3 },
+{ 0x00, 0xd4, 0xd4 },
+{ 0x00, 0xd5, 0xd5 },
+{ 0x00, 0xd6, 0xd6 },
+{ 0x00, 0xd7, 0xd7 },
+{ 0x00, 0xd8, 0xd8 },
+{ 0x00, 0xd9, 0xd9 },
+{ 0x00, 0xda, 0xda },
+{ 0x00, 0xdb, 0xdb },
+{ 0x00, 0xdc, 0xdc },
+{ 0x00, 0xdd, 0xdd },
+{ 0x00, 0xde, 0xde },
+{ 0x00, 0xdf, 0xdf },
+{ 0x00, 0xe0, 0xe0 },
+{ 0x00, 0xe1, 0xe1 },
+{ 0x00, 0xe2, 0xe2 },
+{ 0x00, 0xe3, 0xe3 },
+{ 0x00, 0xe4, 0xe4 },
+{ 0x00, 0xe5, 0xe5 },
+{ 0x00, 0xe6, 0xe6 },
+{ 0x00, 0xe7, 0xe7 },
+{ 0x00, 0xe8, 0xe8 },
+{ 0x00, 0xe9, 0xe9 },
+{ 0x00, 0xea, 0xea },
+{ 0x00, 0xeb, 0xeb },
+{ 0x00, 0xec, 0xec },
+{ 0x00, 0xed, 0xed },
+{ 0x00, 0xee, 0xee },
+{ 0x00, 0xef, 0xef },
+{ 0x00, 0xf0, 0xf0 },
+{ 0x00, 0xf1, 0xf1 },
+{ 0x00, 0xf2, 0xf2 },
+{ 0x00, 0xf3, 0xf3 },
+{ 0x00, 0xf4, 0xf4 },
+{ 0x00, 0xf5, 0xf5 },
+{ 0x00, 0xf6, 0xf6 },
+{ 0x00, 0xf7, 0xf7 },
+{ 0x00, 0xf8, 0xf8 },
+{ 0x00, 0xf9, 0xf9 },
+{ 0x00, 0xfa, 0xfa },
+{ 0x00, 0xfb, 0xfb },
+{ 0x00, 0xfc, 0xfc },
+{ 0x00, 0xfd, 0xfd },
+{ 0x00, 0xfe, 0xfe },
+{ 0x00, 0xff, 0xff }
 };
 
+struct enc_entry {
+  const char * enc_name;
+  struct cs_info * cs_table;
+};
+
+static struct enc_entry encds[] = {
+  {"iso88591",iso1_tbl},                     //ISO-8859-1
+  {"iso88592",iso2_tbl},                     //ISO-8859-2
+  {"iso88593",iso3_tbl},                     //ISO-8859-3
+  {"iso88594",iso4_tbl},                     //ISO-8859-4
+  {"iso88595",iso5_tbl},                     //ISO-8859-5
+  {"iso88596",iso6_tbl},                     //ISO-8859-6
+  {"iso88597",iso7_tbl},                     //ISO-8859-7
+  {"iso88598",iso8_tbl},                     //ISO-8859-8
+  {"iso88599",iso9_tbl},                     //ISO-8859-9
+  {"iso885910",iso10_tbl},                   //ISO-8859-10
+  {"tis620",tis620_tbl},                     //TIS-620/ISO-8859-11
+  {"tis6202533",tis620_tbl},                 //TIS-620/ISO-8859-11
+  {"iso885911",tis620_tbl},                  //TIS-620/ISO-8859-11
+  {"iso885913", iso13_tbl},                  //ISO-8859-13
+  {"iso885914", iso14_tbl},                  //ISO-8859-14
+  {"iso885915", iso15_tbl},                  //ISO-8859-15
+  {"koi8r",koi8r_tbl},                       //KOI8-R
+  {"koi8u",koi8u_tbl},                       //KOI8-U
+  {"cp1251",cp1251_tbl},                     //CP-1251
+  {"microsoftcp1251",cp1251_tbl},            //microsoft-cp1251
+  {"xisciias", iscii_devanagari_tbl},        //x-iscii-as
+  {"isciidevanagari", iscii_devanagari_tbl}  //ISCII-DEVANAGARI
+};
+
+/* map to lower case and remove non alphanumeric chars */
+static void toAsciiLowerAndRemoveNonAlphanumeric( const char* pName, char* pBuf )
+{
+    while ( *pName )
+    {
+        /* A-Z */
+        if ( (*pName >= 0x41) && (*pName <= 0x5A) )
+        {
+            *pBuf = (*pName)+0x20;  /* toAsciiLower */
+            pBuf++;
+        }
+        /* a-z, 0-9 */
+        else if ( ((*pName >= 0x61) && (*pName <= 0x7A)) ||
+                  ((*pName >= 0x30) && (*pName <= 0x39)) )
+        {
+            *pBuf = *pName;
+            pBuf++;
+        }
+
+        pName++;
+    }
+
+    *pBuf = '\0';
+}
+
 struct cs_info * get_current_cs(const char * es) {
-  struct cs_info * ccs = encds[0].cs_table;
+  char *normalized_encoding = new char[strlen(es)+1];
+  toAsciiLowerAndRemoveNonAlphanumeric(es, normalized_encoding);
+
+  struct cs_info * ccs = NULL;
   int n = sizeof(encds) / sizeof(encds[0]);
   for (int i = 0; i < n; i++) {
-    if (strcmp(es,encds[i].enc_name) == 0) {
+    if (strcmp(normalized_encoding,encds[i].enc_name) == 0) {
       ccs = encds[i].cs_table;
       break;
     }
   }
+
+  delete[] normalized_encoding;
+
+  if (!ccs) {
+    HUNSPELL_WARNING(stderr, "error: unknown encoding %s: using %s as fallback\n", es, encds[0].enc_name);
+    ccs = encds[0].cs_table;
+  }
+
   return ccs;
 }
 #else
 // XXX This function was rewritten for mozilla. Instead of storing the
 // conversion tables static in this file, create them when needed
 // with help the mozilla backend.
 struct cs_info * get_current_cs(const char * es) {
   struct cs_info *ccs;
@@ -5220,16 +5537,19 @@ struct cs_info * get_current_cs(const ch
   if (NS_FAILED(rv))
     return nsnull;
   encoder->SetOutputErrorBehavior(encoder->kOnError_Signal, nsnull, '?');
   rv = ccm->GetUnicodeDecoder(es, getter_AddRefs(decoder));
   if (NS_FAILED(rv))
     return nsnull;
   decoder->SetInputErrorBehavior(decoder->kOnError_Signal);
 
+  if (NS_FAILED(rv))
+    return nsnull;
+
   ccs = new cs_info[256];
 
   for (unsigned int i = 0; i <= 0xff; ++i) {
     PRBool success = PR_FALSE;
     // We want to find the upper/lowercase equivalents of each byte
     // in this 1-byte character encoding.  Call our encoding/decoding
     // APIs separately for each byte since they may reject some of the
     // bytes, and we want to handle errors separately for each byte.
@@ -5294,61 +5614,59 @@ char * get_casechars(const char * enc) {
     }
     *p = '\0';
 #ifdef MOZILLA_CLIENT
     delete [] csconv;
 #endif
     return mystrdup(expw);
 }
 
+// language to encoding default map
 
+struct lang_map {
+  const char * lang;
+  int num;
+};
 
 static struct lang_map lang2enc[] = {
-{"ar", "UTF-8", LANG_ar},
-{"az", "UTF-8", LANG_az},
-{"bg", "microsoft-cp1251", LANG_bg},
-{"ca", "ISO8859-1", LANG_ca},
-{"cs", "ISO8859-2", LANG_cs},
-{"da", "ISO8859-1", LANG_da},
-{"de", "ISO8859-1", LANG_de},
-{"el", "ISO8859-7", LANG_el},
-{"en", "ISO8859-1", LANG_en},
-{"es", "ISO8859-1", LANG_es},
-{"eu", "ISO8859-1", LANG_eu},
-{"gl", "ISO8859-1", LANG_gl},
-{"fr", "ISO8859-15", LANG_fr},
-{"hr", "ISO8859-2", LANG_hr},
-{"hu", "ISO8859-2", LANG_hu},
-{"it", "ISO8859-1", LANG_it},
-{"la", "ISO8859-1", LANG_la},
-{"lv", "ISO8859-13", LANG_lv},
-{"nl", "ISO8859-1", LANG_nl},
-{"pl", "ISO8859-2", LANG_pl},
-{"pt", "ISO8859-1", LANG_pt},
-{"sv", "ISO8859-1", LANG_sv},
-{"tr", "UTF-8", LANG_tr},
-{"ru", "KOI8-R", LANG_ru},
-{"uk", "KOI8-U", LANG_uk}
+{"ar", LANG_ar},
+{"az", LANG_az},
+{"az_AZ", LANG_az}, // for back-compatibility
+{"bg", LANG_bg},
+{"ca", LANG_ca},
+{"cs", LANG_cs},
+{"da", LANG_da},
+{"de", LANG_de},
+{"el", LANG_el},
+{"en", LANG_en},
+{"es", LANG_es},
+{"eu", LANG_eu},
+{"gl", LANG_gl},
+{"fr", LANG_fr},
+{"hr", LANG_hr},
+{"hu", LANG_hu},
+{"hu_HU", LANG_hu}, // for back-compatibility
+{"it", LANG_it},
+{"la", LANG_la},
+{"lv", LANG_lv},
+{"nl", LANG_nl},
+{"pl", LANG_pl},
+{"pt", LANG_pt},
+{"sv", LANG_sv},
+{"tr", LANG_tr},
+{"tr_TR", LANG_tr}, // for back-compatibility
+{"ru", LANG_ru},
+{"uk", LANG_uk}
 };
 
 
-const char * get_default_enc(const char * lang) {
-  int n = sizeof(lang2enc) / sizeof(lang2enc[0]);
-  for (int i = 0; i < n; i++) {
-    if (strcmp(lang,lang2enc[i].lang) == 0) {
-      return lang2enc[i].def_enc;
-    }
-  }
-  return NULL;
-}
-
 int get_lang_num(const char * lang) {
   int n = sizeof(lang2enc) / sizeof(lang2enc[0]);
   for (int i = 0; i < n; i++) {
-    if (strncmp(lang,lang2enc[i].lang,2) == 0) {
+    if (strcmp(lang, lang2enc[i].lang) == 0) {
       return lang2enc[i].num;
     }
   }
   return LANG_xx;
 }
 
 #ifndef OPENOFFICEORG
 #ifndef MOZILLA_CLIENT
--- a/extensions/spellcheck/hunspell/src/csutil.hxx
+++ b/extensions/spellcheck/hunspell/src/csutil.hxx
@@ -168,52 +168,24 @@ LIBHUNSPELL_DLL_EXPORTED void freelist(c
 
 // character encoding information
 struct cs_info {
   unsigned char ccase;
   unsigned char clower;
   unsigned char cupper;
 };
 
-// Unicode character encoding information
-struct unicode_info {
-  unsigned short c;
-  unsigned short cupper;
-  unsigned short clower;
-};
-
-struct unicode_info2 {
-  char cletter;
-  unsigned short cupper;
-  unsigned short clower;
-};
-
 LIBHUNSPELL_DLL_EXPORTED int initialize_utf_tbl();
 LIBHUNSPELL_DLL_EXPORTED void free_utf_tbl();
 LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetoupper(unsigned short c, int langnum);
 LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetolower(unsigned short c, int langnum);
 LIBHUNSPELL_DLL_EXPORTED int unicodeisalpha(unsigned short c);
 
-struct enc_entry {
-  const char * enc_name;
-  struct cs_info * cs_table;
-};
-
-// language to encoding default map
-
-struct lang_map {
-  const char * lang;
-  const char * def_enc;
-  int num;
-};
-
 LIBHUNSPELL_DLL_EXPORTED struct cs_info * get_current_cs(const char * es);
 
-LIBHUNSPELL_DLL_EXPORTED const char * get_default_enc(const char * lang);
-
 // get language identifiers of language codes
 LIBHUNSPELL_DLL_EXPORTED int get_lang_num(const char * lang);
 
 // get characters of the given 8bit encoding with lower- and uppercase forms
 LIBHUNSPELL_DLL_EXPORTED char * get_casechars(const char * enc);
 
 // convert null terminated string to all caps using encoding
 LIBHUNSPELL_DLL_EXPORTED void enmkallcap(char * d, const char * p, const char * encoding);
@@ -271,32 +243,32 @@ LIBHUNSPELL_DLL_EXPORTED char * get_stor
 
 // hash entry macros
 LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_DATA(struct hentry *h)
 {
     char *ret;
     if (!h->var)
         ret = NULL;
     else if (h->var & H_OPT_ALIASM)
-        ret = get_stored_pointer(&(h->word[0]) + h->blen + 1);
+        ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1);
     else 
-        ret = &(h->word[0]) + h->blen + 1;
+        ret = HENTRY_WORD(h) + h->blen + 1;
     return ret;
 }
 
 // NULL-free version for warning-free OOo build
 LIBHUNSPELL_DLL_EXPORTED inline const char* HENTRY_DATA2(const struct hentry *h)
 {
     const char *ret;
     if (!h->var)
         ret = "";
     else if (h->var & H_OPT_ALIASM)
-        ret = get_stored_pointer(&(h->word[0]) + h->blen + 1);
+        ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1);
     else
-        ret = &(h->word[0]) + h->blen + 1;
+        ret = HENTRY_WORD(h) + h->blen + 1;
     return ret;
 }
 
 LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_FIND(struct hentry *h, const char *p)
 {
     return (HENTRY_DATA(h) ? strstr(HENTRY_DATA(h), p) : NULL);
 }
 
--- a/extensions/spellcheck/hunspell/src/hashmgr.cpp
+++ b/extensions/spellcheck/hunspell/src/hashmgr.cpp
@@ -541,17 +541,16 @@ int HashMgr::hash(const char * word) con
       hv ^= (*word++);
     }
     return (unsigned long) hv % tablesize;
 }
 
 int HashMgr::decode_flags(unsigned short ** result, char * flags, FileMgr * af) {
     int len;
     if (*flags == '\0') {
-        HUNSPELL_WARNING(stderr, "error: line %d: bad flagvector\n", af->getlinenum());
         *result = NULL;
         return 0;
     }
     switch (flag_mode) {
       case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz)
         len = strlen(flags);
         if (len%2 == 1) HUNSPELL_WARNING(stderr, "error: line %d: bad flagvector\n", af->getlinenum());
         len /= 2;
--- a/extensions/spellcheck/hunspell/src/hunspell.cpp
+++ b/extensions/spellcheck/hunspell/src/hunspell.cpp
@@ -82,19 +82,20 @@ Hunspell::Hunspell(const char * affpath,
     /* next set up the affix manager */
     /* it needs access to the hash manager lookup methods */
     pAMgr = new AffixMgr(affpath, pHMgr, &maxdic, key);
 
     /* get the preferred try string and the dictionary */
     /* encoding from the Affix Manager for that dictionary */
     char * try_string = pAMgr->get_try_string();
     encoding = pAMgr->get_encoding();
-    csconv = get_current_cs(encoding);
     langnum = pAMgr->get_langnum();
     utf8 = pAMgr->get_utf8();
+    if (!utf8)
+        csconv = get_current_cs(encoding);
     complexprefixes = pAMgr->get_complexprefixes();
     wordbreak = pAMgr->get_breaktable();
 
     /* and finally set up the suggestion manager */
     pSMgr = new SuggestMgr(try_string, MAXSUGGESTION, pAMgr);
     if (try_string) free(try_string);
 }
 
@@ -417,27 +418,29 @@ int Hunspell::spell(const char * word, i
     } else break;
   }
   if ((i == wl) && (nstate == NNUM)) return 1;
   if (!info) info = &info2; else *info = 0;
 
   switch(captype) {
      case HUHCAP:
      case HUHINITCAP:
+            *info += SPELL_ORIGCAP;
      case NOCAP: {
             rv = checkword(cw, info, root);
             if ((abbv) && !(rv)) {
                 memcpy(wspace,cw,wl);
                 *(wspace+wl) = '.';
                 *(wspace+wl+1) = '\0';
                 rv = checkword(wspace, info, root);
             }
             break;
          }
      case ALLCAP: {
+            *info += SPELL_ORIGCAP;
             rv = checkword(cw, info, root);
             if (rv) break;
             if (abbv) {
                 memcpy(wspace,cw,wl);
                 *(wspace+wl) = '.';
                 *(wspace+wl+1) = '\0';
                 rv = checkword(wspace, info, root);
                 if (rv) break;
@@ -488,22 +491,23 @@ int Hunspell::spell(const char * word, i
                         *(wspace+wl2+1) = '\0';
                         rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);
                     }
                 }
                 if (rv) break;
             }
         }
      case INITCAP: {
+             *info += SPELL_ORIGCAP;
              wl = mkallsmall2(cw, unicw, nc);
              memcpy(wspace,cw,(wl+1));
              wl2 = mkinitcap2(cw, unicw, nc);
-    	     if (captype == INITCAP) *info += SPELL_INITCAP;
+             if (captype == INITCAP) *info += SPELL_INITCAP;
              rv = checkword(cw, info, root);
-    	     if (captype == INITCAP) *info -= SPELL_INITCAP;
+             if (captype == INITCAP) *info -= SPELL_INITCAP;
              // forbid bad capitalization
              // (for example, ijs -> Ijs instead of IJs in Dutch)
              // use explicit forms in dic: Ijs/F (F = FORBIDDENWORD flag)
              if (*info & SPELL_FORBIDDEN) {
                 rv = NULL;
                 break;
              }
              if (rv && is_keepcase(rv) && (captype == ALLCAP)) rv = NULL;
@@ -532,17 +536,25 @@ int Hunspell::spell(const char * word, i
                    // in INITCAP form, too.
                    !(pAMgr->get_checksharps() &&
                       ((utf8 && strstr(wspace, "\xC3\x9F")) ||
                       (!utf8 && strchr(wspace, '\xDF')))))) rv = NULL;
              break;
            }
   }
 
-  if (rv) return 1;
+  if (rv) {
+      if (pAMgr && pAMgr->get_warn() && rv->astr &&
+          TESTAFF(rv->astr, pAMgr->get_warn(), rv->alen)) {
+              *info += SPELL_WARN;
+	      if (pAMgr->get_forbidwarn()) return 0;
+              return HUNSPELL_OK_WARN;
+      }
+      return HUNSPELL_OK;
+  }
 
   // recursive breaking at break points
   if (wordbreak) {
     char * s;
     char r;
     int nbr = 0;
     wl = strlen(cw);
     int numbreak = pAMgr ? pAMgr->get_numbreak() : 0;
@@ -618,16 +630,21 @@ struct hentry * Hunspell::checkword(cons
         unsigned short * ignoredchars_utf16 = pAMgr->get_ignore_utf16(&ignoredchars_utf16_len);
         remove_ignored_chars_utf(w2, ignoredchars_utf16, ignoredchars_utf16_len);
      } else {
         remove_ignored_chars(w2,ignoredchars);
      }
      word = w2;
   } else word = w;
 
+  len = strlen(word);
+
+  if (!len)
+      return NULL;
+
   // word reversing wrapper for complex prefixes
   if (complexprefixes) {
     if (word != w2) {
       strcpy(w2, word);
       word = w2;
     }
     if (utf8) reverseword_utf(w2); else reverseword(w2);
   }
@@ -655,17 +672,16 @@ struct hentry * Hunspell::checkword(cons
        (pAMgr->get_onlyincompound() && TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
        (info && (*info & SPELL_INITCAP) && TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen))
     )) he = he->next_homonym;
   }
 
   // check with affixes
   if (!he && pAMgr) {
      // try stripping off affixes */
-     len = strlen(word);
      he = pAMgr->affix_check(word, len, 0);
 
      // check compound restriction and onlyupcase
      if (he && he->astr && (
         (pAMgr->get_onlyincompound() &&
     	    TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
         (info && (*info & SPELL_INITCAP) &&
     	    TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen)))) {
@@ -680,23 +696,23 @@ struct hentry * Hunspell::checkword(cons
         if (root) {
             *root = mystrdup(he->word);
             if (*root && complexprefixes) {
                 if (utf8) reverseword_utf(*root); else reverseword(*root);
             }
         }
      // try check compound word
      } else if (pAMgr->get_compound()) {
-          he = pAMgr->compound_check(word, len, 0, 0, 100, 0, NULL, 0, 0);
+          he = pAMgr->compound_check(word, len, 0, 0, 100, 0, NULL, 0, 0, info);
           // LANG_hu section: `moving rule' with last dash
           if ((!he) && (langnum == LANG_hu) && (word[len-1] == '-')) {
              char * dup = mystrdup(word);
              if (!dup) return NULL;
              dup[len-1] = '\0';
-             he = pAMgr->compound_check(dup, len-1, -5, 0, 100, 0, NULL, 1, 0);
+             he = pAMgr->compound_check(dup, len-1, -5, 0, 100, 0, NULL, 1, 0, info);
              free(dup);
           }
           // end of LANG speficic region
           if (he) {
                 if (root) {
                     *root = mystrdup(he->word);
                     if (*root && complexprefixes) {
                         if (utf8) reverseword_utf(*root); else reverseword(*root);
@@ -737,16 +753,37 @@ int Hunspell::suggest(char*** slst, cons
   RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
   if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
   else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
 
   if (wl == 0) return 0;
   int ns = 0;
   int capwords = 0;
 
+  // check capitalized form for FORCEUCASE
+  if (pAMgr && captype == NOCAP && pAMgr->get_forceucase()) {
+    int info = SPELL_ORIGCAP;
+    char ** wlst;
+    if (checkword(cw, &info, NULL)) {
+        if (*slst) {
+            wlst = *slst;
+        } else {
+            wlst = (char **) malloc(MAXSUGGESTION * sizeof(char *));
+            if (wlst == NULL) return -1;
+            *slst = wlst;
+            for (int i = 0; i < MAXSUGGESTION; i++) {
+                wlst[i] = NULL;
+            }
+        }
+        wlst[0] = mystrdup(cw);
+        mkinitcap(wlst[0]);
+        return 1;
+    }
+  }
+ 
   switch(captype) {
      case NOCAP:   {
                      ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);
                      break;
                    }
 
      case INITCAP: {
                      capwords = 1;
@@ -850,17 +887,17 @@ int Hunspell::suggest(char*** slst, cons
                                 }
                             }
                         }
                      }
                      break;
                    }
   }
 
-  // LANG_hu section: replace '-' with ' ' in Hungarian
+ // LANG_hu section: replace '-' with ' ' in Hungarian
   if (langnum == LANG_hu) {
       for (int j=0; j < ns; j++) {
           char * pos = strchr((*slst)[j],'-');
           if (pos) {
               int info;
               char w[MAXWORDUTF8LEN];
               *pos = '\0';
               strcpy(w, (*slst)[j]);
@@ -869,18 +906,18 @@ int Hunspell::suggest(char*** slst, cons
               if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {
                   *pos = ' ';
               } else *pos = '-';
           }
       }
   }
   // END OF LANG_hu section
 
-  // try ngram approach since found nothing
-  if ((ns == 0 || onlycmpdsug) && pAMgr && (pAMgr->get_maxngramsugs() != 0) && (*slst)) {
+  // try ngram approach since found nothing or only compound words
+  if (pAMgr && (ns == 0 || onlycmpdsug) && (pAMgr->get_maxngramsugs() != 0) && (*slst)) {
       switch(captype) {
           case NOCAP: {
               ns = pSMgr->ngsuggest(*slst, cw, ns, pHMgr, maxdic);
               break;
           }
 	  case HUHINITCAP:
               capwords = 1;
           case HUHCAP: {
@@ -1512,17 +1549,16 @@ int Hunspell::analyze(char*** slst, cons
   }
 
   if (*result) {
     // word reversing wrapper for complex prefixes
     if (complexprefixes) {
       if (utf8) reverseword_utf(result); else reverseword(result);
     }
     return line_tok(result, slst, MSEP_REC);
-
   }
 
   // compound word with dash (HU) I18n
   char * dash = NULL;
   int nresult = 0;
   // LANG_hu section: set dash information for suggestions
   if (langnum == LANG_hu) dash = (char *) strchr(cw,'-');
   if ((langnum == LANG_hu) && dash) {
--- a/extensions/spellcheck/hunspell/src/hunspell.hxx
+++ b/extensions/spellcheck/hunspell/src/hunspell.hxx
@@ -57,28 +57,25 @@
 
 #include "hunvisapi.h"
 
 #include "hashmgr.hxx"
 #include "affixmgr.hxx"
 #include "suggestmgr.hxx"
 #include "langnum.hxx"
 
-#define  SPELL_COMPOUND  (1 << 0)
-#define  SPELL_FORBIDDEN (1 << 1)
-#define  SPELL_ALLCAP    (1 << 2)
-#define  SPELL_NOCAP     (1 << 3)
-#define  SPELL_INITCAP   (1 << 4)
-
 #define  SPELL_XML "<?xml?>"
 
 #define MAXDIC 20
 #define MAXSUGGESTION 15
 #define MAXSHARPS 5
 
+#define HUNSPELL_OK       (1 << 0)
+#define HUNSPELL_OK_WARN  (1 << 1)
+
 #ifndef _MYSPELLMGR_HXX_
 #define _MYSPELLMGR_HXX_
 
 class LIBHUNSPELL_DLL_EXPORTED Hunspell
 {
   AffixMgr*       pAMgr;
   HashMgr*        pHMgr[MAXDIC];
   int             maxdic;
--- a/extensions/spellcheck/hunspell/src/replist.cpp
+++ b/extensions/spellcheck/hunspell/src/replist.cpp
@@ -63,58 +63,56 @@ replentry * RepList::item(int n) {
     return dat[n];
 }
 
 int RepList::near(const char * word) {
     int p1 = 0;
     int p2 = pos;
     while ((p2 - p1) > 1) {
       int m = (p1 + p2) / 2;
-//      fprintf(stderr, "m: %d p1: %d p2: %d dat: %s\n", m, p1, p2, dat[m]->pattern);
       int c = strcmp(word, dat[m]->pattern);
       if (c <= 0) {
         if (c < 0) p2 = m; else p1 = p2 = m;
       } else p1 = m;
     }
-//    fprintf(stderr, "NEAR: %s (word: %s)\n", dat[p1]->pattern, word);
     return p1;
 }
 
 int RepList::match(const char * word, int n) {
     if (strncmp(word, dat[n]->pattern, strlen(dat[n]->pattern)) == 0) return strlen(dat[n]->pattern);
     return 0;
 }
 
 int RepList::add(char * pat1, char * pat2) {
     if (pos >= size || pat1 == NULL || pat2 == NULL) return 1;
     replentry * r = (replentry *) malloc(sizeof(replentry));
     if (r == NULL) return 1;
     r->pattern = mystrrep(pat1, "_", " ");
     r->pattern2 = mystrrep(pat2, "_", " ");
+    r->start = false;
+    r->end = false;
     dat[pos++] = r;
     for (int i = pos - 1; i > 0; i--) {
       r = dat[i];
       if (strcmp(r->pattern, dat[i - 1]->pattern) < 0) {
           dat[i] = dat[i - 1];
           dat[i - 1] = r;
       } else break;
     }
     return 0;
 }
 
 int RepList::conv(const char * word, char * dest) {
     int stl = 0;
     int change = 0;
-//    for (int i = 0; i < pos; i++) fprintf(stderr, "%d. %s\n", i, dat[i]->pattern);
     for (size_t i = 0; i < strlen(word); i++) {
         int n = near(word + i);
         int l = match(word + i, n);
         if (l) {
           strcpy(dest + stl, dat[n]->pattern2);
           stl += strlen(dat[n]->pattern2);
           i += l - 1;
           change = 1;
         } else dest[stl++] = word[i];
     }
     dest[stl] = '\0';
-//    fprintf(stderr, "i: %s o: %s change: %d\n", word, dest, change);
     return change;
 }
--- a/extensions/spellcheck/hunspell/src/suggestmgr.cpp
+++ b/extensions/spellcheck/hunspell/src/suggestmgr.cpp
@@ -86,26 +86,33 @@ SuggestMgr::SuggestMgr(const char * trym
 
   utf8 = 0;
   langnum = 0;
   complexprefixes = 0;  
   
   maxSug = maxn;
   nosplitsugs = 0;
   maxngramsugs = MAXNGRAMSUGS;
+  maxcpdsugs = MAXCOMPOUNDSUGS;
 
   if (pAMgr) {
-        char * enc = pAMgr->get_encoding();
-        csconv = get_current_cs(enc);
-        free(enc);
         langnum = pAMgr->get_langnum();
         ckey = pAMgr->get_key_string();
         nosplitsugs = pAMgr->get_nosplitsugs();
-        if (pAMgr->get_maxngramsugs() >= 0) maxngramsugs = pAMgr->get_maxngramsugs();
+        if (pAMgr->get_maxngramsugs() >= 0)
+            maxngramsugs = pAMgr->get_maxngramsugs();
         utf8 = pAMgr->get_utf8();
+	if (pAMgr->get_maxcpdsugs() >= 0)
+	    maxcpdsugs = pAMgr->get_maxcpdsugs();
+        if (!utf8)
+        {
+            char * enc = pAMgr->get_encoding();
+            csconv = get_current_cs(enc);
+            free(enc);
+        }
         complexprefixes = pAMgr->get_complexprefixes();
   }
 
   if (ckey) {  
     if (utf8) {
         w_char t[MAXSWL];    
         ckeyl = u8_u16(t, MAXSWL, ckey);
         ckey_utf = (w_char *) malloc(ckeyl * sizeof(w_char));
@@ -173,19 +180,20 @@ int SuggestMgr::testsug(char** wlst, con
 
 int SuggestMgr::suggest(char*** slst, const char * w, int nsug,
     int * onlycompoundsug)
 {
   int nocompoundtwowords = 0;
   char ** wlst;    
   w_char word_utf[MAXSWL];
   int wl = 0;
-
+  int nsugorig = nsug;
   char w2[MAXWORDUTF8LEN];
   const char * word = w;
+  int oldSug = 0;
 
   // word reversing wrapper for complex prefixes
   if (complexprefixes) {
     strcpy(w2, w);
     if (utf8) reverseword_utf(w2); else reverseword(w2);
     word = w2;
   }
     
@@ -196,115 +204,124 @@ int SuggestMgr::suggest(char*** slst, co
         if (wlst == NULL) return -1;
         for (int i = 0; i < maxSug; i++) {
             wlst[i] = NULL;
         }
     }
     
     if (utf8) {
         wl = u8_u16(word_utf, MAXSWL, word);
+	if (wl == -1) {
+    		*slst = wlst;
+		 return nsug;
+	}
     }
 
     for (int cpdsuggest=0; (cpdsuggest<2) && (nocompoundtwowords==0); cpdsuggest++) {
 
+    // limit compound suggestion
+    if (cpdsuggest > 0) oldSug = nsug;
+
     // suggestions for an uppercase word (html -> HTML)
     if ((nsug < maxSug) && (nsug > -1)) {
         nsug = (utf8) ? capchars_utf(wlst, word_utf, wl, nsug, cpdsuggest) :
                     capchars(wlst, word, nsug, cpdsuggest);
     }
 
     // perhaps we made a typical fault of spelling
-    if ((nsug < maxSug) && (nsug > -1))
-    nsug = replchars(wlst, word, nsug, cpdsuggest);
+    if ((nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) {
+      nsug = replchars(wlst, word, nsug, cpdsuggest);
+    }
 
     // perhaps we made chose the wrong char from a related set
-    if ((nsug < maxSug) && (nsug > -1)) {
+    if ((nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) {
       nsug = mapchars(wlst, word, nsug, cpdsuggest);
     }
 
+    // only suggest compound words when no other suggestion
+    if ((cpdsuggest == 0) && (nsug > nsugorig)) nocompoundtwowords=1;
+
     // did we swap the order of chars by mistake
-    if ((nsug < maxSug) && (nsug > -1)) {
+    if ((nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) {
         nsug = (utf8) ? swapchar_utf(wlst, word_utf, wl, nsug, cpdsuggest) :
                     swapchar(wlst, word, nsug, cpdsuggest);
     }
 
     // did we swap the order of non adjacent chars by mistake
-    if ((nsug < maxSug) && (nsug > -1)) {
+    if ((nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) {
         nsug = (utf8) ? longswapchar_utf(wlst, word_utf, wl, nsug, cpdsuggest) :
                     longswapchar(wlst, word, nsug, cpdsuggest);
     }
 
     // did we just hit the wrong key in place of a good char (case and keyboard)
-    if ((nsug < maxSug) && (nsug > -1)) {
+    if ((nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) {
         nsug = (utf8) ? badcharkey_utf(wlst, word_utf, wl, nsug, cpdsuggest) :
                     badcharkey(wlst, word, nsug, cpdsuggest);
     }
 
-    // only suggest compound words when no other suggestion
-    if ((cpdsuggest == 0) && (nsug > 0)) nocompoundtwowords=1;
-
     // did we add a char that should not be there
-    if ((nsug < maxSug) && (nsug > -1)) {
+    if ((nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) {
         nsug = (utf8) ? extrachar_utf(wlst, word_utf, wl, nsug, cpdsuggest) :
                     extrachar(wlst, word, nsug, cpdsuggest);
     }
 
 
     // did we forgot a char
-    if ((nsug < maxSug) && (nsug > -1)) {
+    if ((nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) {
         nsug = (utf8) ? forgotchar_utf(wlst, word_utf, wl, nsug, cpdsuggest) :
                     forgotchar(wlst, word, nsug, cpdsuggest);
     }
 
     // did we move a char
-    if ((nsug < maxSug) && (nsug > -1)) {
+    if ((nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) {
         nsug = (utf8) ? movechar_utf(wlst, word_utf, wl, nsug, cpdsuggest) :
                     movechar(wlst, word, nsug, cpdsuggest);
     }
 
     // did we just hit the wrong key in place of a good char
-    if ((nsug < maxSug) && (nsug > -1)) {
+    if ((nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) {
         nsug = (utf8) ? badchar_utf(wlst, word_utf, wl, nsug, cpdsuggest) :
                     badchar(wlst, word, nsug, cpdsuggest);
     }
 
     // did we double two characters
-    if ((nsug < maxSug) && (nsug > -1)) {
+    if ((nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) {
         nsug = (utf8) ? doubletwochars_utf(wlst, word_utf, wl, nsug, cpdsuggest) :
                     doubletwochars(wlst, word, nsug, cpdsuggest);
     }
 
     // perhaps we forgot to hit space and two words ran together
-    if ((!nosplitsugs) && (nsug < maxSug) && (nsug > -1)) {
-                nsug = twowords(wlst, word, nsug, cpdsuggest);
-        }
+    if (!nosplitsugs && (nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcpdsugs))) {
+        nsug = twowords(wlst, word, nsug, cpdsuggest);
+    }
 
     } // repeating ``for'' statement compounding support
 
     if (nsug < 0) {
      // we ran out of memory - we should free up as much as possible
        for (int i = 0; i < maxSug; i++)
          if (wlst[i] != NULL) free(wlst[i]);
        free(wlst);
        wlst = NULL;
     }
-    
+
     if (!nocompoundtwowords && (nsug > 0) && onlycompoundsug) *onlycompoundsug = 1;
 
     *slst = wlst;
     return nsug;
 }
 
 // generate suggestions for a word with typical mistake
 //    pass in address of array of char * pointers
 #ifdef HUNSPELL_EXPERIMENTAL
 int SuggestMgr::suggest_auto(char*** slst, const char * w, int nsug)
 {
     int nocompoundtwowords = 0;
     char ** wlst;
+    int oldSug;
 
   char w2[MAXWORDUTF8LEN];
   const char * word = w;
 
   // word reversing wrapper for complex prefixes
   if (complexprefixes) {
     strcpy(w2, w);
     if (utf8) reverseword_utf(w2); else reverseword(w2);
@@ -315,29 +332,32 @@ int SuggestMgr::suggest_auto(char*** sls
         wlst = *slst;
     } else {
         wlst = (char **) malloc(maxSug * sizeof(char *));
         if (wlst == NULL) return -1;
     }
 
     for (int cpdsuggest=0; (cpdsuggest<2) && (nocompoundtwowords==0); cpdsuggest++) {
 
+    // limit compound suggestion
+    if (cpdsuggest > 0) oldSug = nsug;
+
     // perhaps we made a typical fault of spelling
     if ((nsug < maxSug) && (nsug > -1))
     nsug = replchars(wlst, word, nsug, cpdsuggest);
 
     // perhaps we made chose the wrong char from a related set
-    if ((nsug < maxSug) && (nsug > -1))
+    if ((nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcpdsugs)))
       nsug = mapchars(wlst, word, nsug, cpdsuggest);
 
     if ((cpdsuggest==0) && (nsug>0)) nocompoundtwowords=1;
 
     // perhaps we forgot to hit space and two words ran together
 
-    if ((nsug < maxSug) && (nsug > -1) && check_forbidden(word, strlen(word))) {
+    if ((nsug < maxSug) && (nsug > -1) && (!cpdsuggest || (nsug < oldSug + maxcpdsugs)) && check_forbidden(word, strlen(word))) {
                 nsug = twowords(wlst, word, nsug, cpdsuggest);
         }
     
     } // repeating ``for'' statement compounding support
 
     if (nsug < 0) {
        for (int i=0;i<maxSug; i++)
          if (wlst[i] != NULL) free(wlst[i]);
@@ -444,39 +464,45 @@ int SuggestMgr::replchars(char** wlst, c
   int numrep = pAMgr->get_numrep();
   struct replentry* reptable = pAMgr->get_reptable();
   if (reptable==NULL) return ns;
   for (int i=0; i < numrep; i++ ) {
       r = word;
       lenr = strlen(reptable[i].pattern2);
       lenp = strlen(reptable[i].pattern);
       // search every occurence of the pattern in the word
-      while ((r=strstr(r, reptable[i].pattern)) != NULL) {
+      while ((r=strstr(r, reptable[i].pattern)) != NULL && (!reptable[i].end || strlen(r) == strlen(reptable[i].pattern)) &&
+        (!reptable[i].start || r == word)) {
           strcpy(candidate, word);
           if (r-word + lenr + strlen(r+lenp) >= MAXSWUTF8L) break;
           strcpy(candidate+(r-word),reptable[i].pattern2);
           strcpy(candidate+(r-word)+lenr, r+lenp);
           ns = testsug(wlst, candidate, wl-lenp+lenr, ns, cpdsuggest, NULL, NULL);
           if (ns == -1) return -1;
           // check REP suggestions with space
           char * sp = strchr(candidate, ' ');
           if (sp) {
-            *sp = '\0';
-            if (checkword(candidate, strlen(candidate), 0, NULL, NULL)) {
-              int oldns = ns;
+            char * prev = candidate;
+            while (sp) {
+              *sp = '\0';
+              if (checkword(prev, strlen(prev), 0, NULL, NULL)) {
+                int oldns = ns;
+                *sp = ' ';
+                ns = testsug(wlst, sp + 1, strlen(sp + 1), ns, cpdsuggest, NULL, NULL);
+                if (ns == -1) return -1;
+                if (oldns < ns) {
+                  free(wlst[ns - 1]);
+                  wlst[ns - 1] = mystrdup(candidate);
+                  if (!wlst[ns - 1]) return -1;
+                }
+              }
               *sp = ' ';
-              ns = testsug(wlst, sp + 1, strlen(sp + 1), ns, cpdsuggest, NULL, NULL);
-              if (ns == -1) return -1;
-              if (oldns < ns) {
-                free(wlst[ns - 1]);
-                wlst[ns - 1] = mystrdup(candidate);
-                if (!wlst[ns - 1]) return -1;
-              }
-            }            
-            *sp = ' ';
+              prev = sp + 1;
+              sp = strchr(prev, ' ');
+            }
           }
           r++; // search for the next letter
       }
    }
    return ns;
 }
 
 // perhaps we doubled two characters (pattern aba -> ababa, for example vacation -> vacacation)
@@ -1051,17 +1077,18 @@ int SuggestMgr::ngsuggest(char** wlst, c
   for (i = 0; i < MAX_ROOTS; i++) {
     roots[i] = NULL;
     scores[i] = -100 * i;
     rootsphon[i] = NULL;
     scoresphon[i] = -100 * i;
   }
   lp = MAX_ROOTS - 1;
   lpphon = MAX_ROOTS - 1;
-  scphon = scoresphon[MAX_ROOTS-1];
+  scphon = -20000;
+  int low = NGRAM_LOWERING;
   
   char w2[MAXWORDUTF8LEN];
   char f[MAXSWUTF8L];
   char * word = w;
 
   // word reversing wrapper for complex prefixes
   if (complexprefixes) {
     strcpy(w2, w);
@@ -1071,69 +1098,92 @@ int SuggestMgr::ngsuggest(char** wlst, c
 
   char mw[MAXSWUTF8L];
   w_char u8[MAXSWL];
   int nc = strlen(word);
   int n = (utf8) ? u8_u16(u8, MAXSWL, word) : nc;
   
   // set character based ngram suggestion for words with non-BMP Unicode characters
   if (n == -1) {
-    utf8 = 0;
+    utf8 = 0; // XXX not state-free
     n = nc;
     nonbmp = 1;
+    low = 0;
   }
 
   struct hentry* hp = NULL;
   int col = -1;
   phonetable * ph = (pAMgr) ? pAMgr->get_phonetable() : NULL;
   char target[MAXSWUTF8L];
   char candidate[MAXSWUTF8L];
   if (ph) {
-    strcpy(candidate, word);
-    mkallcap(candidate, csconv);
-    phonet(candidate, target, n, *ph);
+    if (utf8) {
+      w_char _w[MAXSWL];
+      int _wl = u8_u16(_w, MAXSWL, word);
+      mkallcap_utf(_w, _wl, langnum);
+      u16_u8(candidate, MAXSWUTF8L, _w, _wl);
+    } else {
+      strcpy(candidate, word);
+      if (!nonbmp) mkallcap(candidate, csconv);
+    }
+    phonet(candidate, target, nc, *ph); // XXX phonet() is 8-bit (nc, not n)
   }
 
+  FLAG forbiddenword = pAMgr ? pAMgr->get_forbiddenword() : FLAG_NULL;
+  FLAG nosuggest = pAMgr ? pAMgr->get_nosuggest() : FLAG_NULL;
+  FLAG nongramsuggest = pAMgr ? pAMgr->get_nongramsuggest() : FLAG_NULL;
+  FLAG onlyincompound = pAMgr ? pAMgr->get_onlyincompound() : FLAG_NULL;
+
   for (i = 0; i < md; i++) {  
   while (0 != (hp = (pHMgr[i])->walk_hashtable(col, hp))) {
     if ((hp->astr) && (pAMgr) && 
-       (TESTAFF(hp->astr, pAMgr->get_forbiddenword(), hp->alen) ||
+       (TESTAFF(hp->astr, forbiddenword, hp->alen) ||
           TESTAFF(hp->astr, ONLYUPCASEFLAG, hp->alen) ||
-          TESTAFF(hp->astr, pAMgr->get_nosuggest(), hp->alen) ||
-          TESTAFF(hp->astr, pAMgr->get_onlyincompound(), hp->alen))) continue;
+          TESTAFF(hp->astr, nosuggest, hp->alen) ||
+          TESTAFF(hp->astr, nongramsuggest, hp->alen) ||
+          TESTAFF(hp->astr, onlyincompound, hp->alen))) continue;
 
-    sc = ngram(3, word, HENTRY_WORD(hp), NGRAM_LONGER_WORSE + NGRAM_LOWERING) +
+    sc = ngram(3, word, HENTRY_WORD(hp), NGRAM_LONGER_WORSE + low) +
 	leftcommonsubstring(word, HENTRY_WORD(hp));
 
     // check special pronounciation
     if ((hp->var & H_OPT_PHON) && copy_field(f, HENTRY_DATA(hp), MORPH_PHON)) {
-	int sc2 = ngram(3, word, f, NGRAM_LONGER_WORSE + NGRAM_LOWERING) +
-	leftcommonsubstring(word, f);
+	int sc2 = ngram(3, word, f, NGRAM_LONGER_WORSE + low) +
+		+ leftcommonsubstring(word, f);
 	if (sc2 > sc) sc = sc2;
     }
     
+    scphon = -20000;
     if (ph && (sc > 2) && (abs(n - (int) hp->clen) <= 3)) {
-	char target2[MAXSWUTF8L];
+      char target2[MAXSWUTF8L];
+      if (utf8) {
+        w_char _w[MAXSWL];
+        int _wl = u8_u16(_w, MAXSWL, HENTRY_WORD(hp));
+        mkallcap_utf(_w, _wl, langnum);
+        u16_u8(candidate, MAXSWUTF8L, _w, _wl);
+      } else {
         strcpy(candidate, HENTRY_WORD(hp));
         mkallcap(candidate, csconv);
-        phonet(candidate, target2, -1, *ph);
-        scphon = 2 * ngram(3, target, target2, NGRAM_LONGER_WORSE);
+      }
+      phonet(candidate, target2, -1, *ph);
+      scphon = 2 * ngram(3, target, target2, NGRAM_LONGER_WORSE);
     }
 
     if (sc > scores[lp]) {
       scores[lp] = sc;  
       roots[lp] = hp;
       lval = sc;
       for (j=0; j < MAX_ROOTS; j++)
         if (scores[j] < lval) {
           lp = j;
           lval = scores[j];
         }
     }
 
+
     if (scphon > scoresphon[lpphon]) {
       scoresphon[lpphon] = scphon;
       rootsphon[lpphon] = HENTRY_WORD(hp);
       lval = scphon;
       for (j=0; j < MAX_ROOTS; j++)
         if (scoresphon[j] < lval) {
           lpphon = j;
           lval = scoresphon[j];
@@ -1144,27 +1194,27 @@ int SuggestMgr::ngsuggest(char** wlst, c
   // find minimum threshold for a passable suggestion
   // mangle original word three differnt ways
   // and score them to generate a minimum acceptable score
   int thresh = 0;
   for (int sp = 1; sp < 4; sp++) {
      if (utf8) {
        for (int k=sp; k < n; k+=4) *((unsigned short *) u8 + k) = '*';
        u16_u8(mw, MAXSWUTF8L, u8, n);
-       thresh = thresh + ngram(n, word, mw, NGRAM_ANY_MISMATCH + NGRAM_LOWERING);
+       thresh = thresh + ngram(n, word, mw, NGRAM_ANY_MISMATCH + low);
      } else {
        strcpy(mw, word);
        for (int k=sp; k < n; k+=4) *(mw + k) = '*';
-       thresh = thresh + ngram(n, word, mw, NGRAM_ANY_MISMATCH + NGRAM_LOWERING);
+       thresh = thresh + ngram(n, word, mw, NGRAM_ANY_MISMATCH + low);
      }
   }
   thresh = thresh / 3;
   thresh--;
 
-  // now expand affixes on each of these root words and
+ // now expand affixes on each of these root words and
   // and use length adjusted ngram scores to select
   // possible suggestions
   char * guess[MAX_GUESS];
   char * guessorig[MAX_GUESS];
   int gscore[MAX_GUESS];
   for(i=0;i<MAX_GUESS;i++) {
      guess[i] = NULL;
      guessorig[i] = NULL;
@@ -1183,20 +1233,20 @@ int SuggestMgr::ngsuggest(char** wlst, c
   for (i = 0; i < MAX_ROOTS; i++) {
       if (roots[i]) {
         struct hentry * rp = roots[i];
         int nw = pAMgr->expand_rootword(glst, MAX_WORDS, HENTRY_WORD(rp), rp->blen,
             	    rp->astr, rp->alen, word, nc, 
                     ((rp->var & H_OPT_PHON) ? copy_field(f, HENTRY_DATA(rp), MORPH_PHON) : NULL));
 
         for (int k = 0; k < nw ; k++) {
-           sc = ngram(n, word, glst[k].word, NGRAM_ANY_MISMATCH + NGRAM_LOWERING) +
+           sc = ngram(n, word, glst[k].word, NGRAM_ANY_MISMATCH + low) +
                leftcommonsubstring(word, glst[k].word);
 
-           if ((sc > thresh)) {
+           if (sc > thresh) {
               if (sc > gscore[lp]) {
                  if (guess[lp]) {
                     free (guess[lp]);
                     if (guessorig[lp]) {
                 	free(guessorig[lp]);
                 	guessorig[lp] = NULL;
             	    }
                  }
@@ -1209,17 +1259,17 @@ int SuggestMgr::ngsuggest(char** wlst, c
                        lp = j;
                        lval = gscore[j];
                     }
               } else { 
                 free(glst[k].word);
                 if (glst[k].orig) free(glst[k].orig);
               }
            } else {
-        	free(glst[k].word);
+                free(glst[k].word);
                 if (glst[k].orig) free(glst[k].orig);
            }
         }
       }
   }
   free(glst);
 
   // now we are done generating guesses
@@ -1227,51 +1277,64 @@ int SuggestMgr::ngsuggest(char** wlst, c
   
   
   bubblesort(&guess[0], &guessorig[0], &gscore[0], MAX_GUESS);
   if (ph) bubblesort(&rootsphon[0], NULL, &scoresphon[0], MAX_ROOTS);
 
   // weight suggestions with a similarity index, based on
   // the longest common subsequent algorithm and resort
 
-  int is_swap;
+  int is_swap = 0;
+  int re = 0;
+  double fact = 1.0;
+  if (pAMgr) {
+	int maxd = pAMgr->get_maxdiff();
+	if (maxd >= 0) fact = (10.0 - maxd)/5.0;
+  }
+
   for (i=0; i < MAX_GUESS; i++) {
       if (guess[i]) {
         // lowering guess[i]
         char gl[MAXSWUTF8L];
         int len;
         if (utf8) {
           w_char _w[MAXSWL];
           len = u8_u16(_w, MAXSWL, guess[i]);
           mkallsmall_utf(_w, len, langnum);
           u16_u8(gl, MAXSWUTF8L, _w, len);
         } else {
           strcpy(gl, guess[i]);
-          mkallsmall(gl, csconv);
+          if (!nonbmp) mkallsmall(gl, csconv);
           len = strlen(guess[i]);
         }
 
         int _lcs = lcslen(word, gl);
 
         // same characters with different casing
         if ((n == len) && (n == _lcs)) {
             gscore[i] += 2000;
             break;
         }
-        
-        // heuristic weigthing of ngram scores
-        gscore[i] +=
+        // using 2-gram instead of 3, and other weightening
+        gscore[i] =
           // length of longest common subsequent minus length difference
           2 * _lcs - abs((int) (n - len)) +
           // weight length of the left common substring
           leftcommonsubstring(word, gl) +
           // weight equal character positions
-          ((_lcs == commoncharacterpositions(word, gl, &is_swap)) ? 1: 0) +
+          (!nonbmp && commoncharacterpositions(word, gl, &is_swap) ? 1: 0) +
           // swap character (not neighboring)
-          ((is_swap) ? 1000 : 0);
+          ((is_swap) ? 10 : 0) +
+          // ngram
+          ngram(4, word, gl, NGRAM_ANY_MISMATCH + low) +
+          // weighted ngrams
+          (re = ngram(2, word, gl, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED)) +
+          (re += ngram(2, gl, word, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED)) +
+          // different limit for dictionaries with PHONE rules
+          (ph ? (re < len * fact ? -1000 : 0) : (re < (n + len)*fact? -1000 : 0));
       }
   }
 
   bubblesort(&guess[0], &guessorig[0], &gscore[0], MAX_GUESS);
 
 // phonetic version
   if (ph) for (i=0; i < MAX_ROOTS; i++) {
       if (rootsphon[i]) {
@@ -1280,17 +1343,17 @@ int SuggestMgr::ngsuggest(char** wlst, c
         int len;
         if (utf8) {
           w_char _w[MAXSWL];
           len = u8_u16(_w, MAXSWL, rootsphon[i]);
           mkallsmall_utf(_w, len, langnum);
           u16_u8(gl, MAXSWUTF8L, _w, len);
         } else {
           strcpy(gl, rootsphon[i]);
-          mkallsmall(gl, csconv);
+          if (!nonbmp) mkallsmall(gl, csconv);
           len = strlen(rootsphon[i]);
         }
 
         // heuristic weigthing of ngram scores
         scoresphon[i] += 2 * lcslen(word, gl) - abs((int) (n - len)) +
           // weight length of the left common substring
           leftcommonsubstring(word, gl);
       }
@@ -1302,17 +1365,25 @@ int SuggestMgr::ngsuggest(char** wlst, c
   int oldns = ns;
 
   int same = 0;
   for (i=0; i < MAX_GUESS; i++) {
     if (guess[i]) {
       if ((ns < oldns + maxngramsugs) && (ns < maxSug) && (!same || (gscore[i] > 1000))) {
         int unique = 1;
         // leave only excellent suggestions, if exists
-        if (gscore[i] > 1000) same = 1;
+        if (gscore[i] > 1000) same = 1; else if (gscore[i] < -100) {
+            same = 1;
+	    // keep the best ngram suggestions, unless in ONLYMAXDIFF mode
+            if (ns > oldns || (pAMgr && pAMgr->get_onlymaxdiff())) {
+    	        free(guess[i]);
+    	        if (guessorig[i]) free(guessorig[i]);
+                continue;
+            }
+        }
         for (j = 0; j < ns; j++) {
           // don't suggest previous suggestions or a previous suggestion with prefixes or affixes
           if ((!guessorig[i] && strstr(guess[i], wlst[j])) ||
 	     (guessorig[i] && strstr(guessorig[i], wlst[j])) ||
             // check forbidden words
             !checkword(guess[i], strlen(guess[i]), 0, NULL, NULL)) unique = 0;
         }
         if (unique) {
@@ -1331,17 +1402,17 @@ int SuggestMgr::ngsuggest(char** wlst, c
       }
     }
   }
 
   oldns = ns;
   if (ph) for (i=0; i < MAX_ROOTS; i++) {
     if (rootsphon[i]) {
       if ((ns < oldns + MAXPHONSUGS) && (ns < maxSug)) {
-        int unique = 1;
+	int unique = 1;
         for (j = 0; j < ns; j++) {
           // don't suggest previous suggestions or a previous suggestion with prefixes or affixes
           if (strstr(rootsphon[i], wlst[j]) || 
             // check forbidden words
             !checkword(rootsphon[i], strlen(rootsphon[i]), 0, NULL, NULL)) unique = 0;
         }
         if (unique) {
             wlst[ns++] = mystrdup(rootsphon[i]);
@@ -1360,32 +1431,35 @@ int SuggestMgr::ngsuggest(char** wlst, c
 // needs to check both root words and words with affixes
 
 // obsolote MySpell-HU modifications:
 // return value 2 and 3 marks compounding with hyphen (-)
 // `3' marks roots without suffix
 int SuggestMgr::checkword(const char * word, int len, int cpdsuggest, int * timer, clock_t * timelimit)
 {
   struct hentry * rv=NULL;
+  struct hentry * rv2=NULL;
   int nosuffix = 0;
 
   // check time limit
   if (timer) {
     (*timer)--;
     if (!(*timer) && timelimit) {
       if ((clock() - *timelimit) > TIMELIMIT) return 0;
       *timer = MAXPLUSTIMER;
     }
   }
   
   if (pAMgr) { 
     if (cpdsuggest==1) {
       if (pAMgr->get_compound()) {
-        rv = pAMgr->compound_check(word, len, 0, 0, 100, 0, NULL, 0, 1); //EXT
-        if (rv) return 3; // XXX obsolote categorisation
+        rv = pAMgr->compound_check(word, len, 0, 0, 100, 0, NULL, 0, 1, 0); //EXT
+        if (rv && (!(rv2 = pAMgr->lookup(word)) || !rv2->astr || 
+            !(TESTAFF(rv2->astr,pAMgr->get_forbiddenword(),rv2->alen) ||
+            TESTAFF(rv2->astr,pAMgr->get_nosuggest(),rv2->alen)))) return 3; // XXX obsolote categorisation + only ICONV needs affix flag check?
         }
         return 0;
     }
 
     rv = pAMgr->lookup(word);
 
     if (rv) {
         if ((rv->astr) && (TESTAFF(rv->astr,pAMgr->get_forbiddenword(),rv->alen)
@@ -1707,76 +1781,89 @@ char * SuggestMgr::suggest_gen(char ** d
 
 // generate an n-gram score comparing s1 and s2
 int SuggestMgr::ngram(int n, char * s1, const char * s2, int opt)
 {
   int nscore = 0;
   int ns;
   int l1;
   int l2;
+  int test = 0;
 
   if (utf8) {
     w_char su1[MAXSWL];
     w_char su2[MAXSWL];
     l1 = u8_u16(su1, MAXSWL, s1);
     l2 = u8_u16(su2, MAXSWL, s2);
     if ((l2 <= 0) || (l1 == -1)) return 0;
     // lowering dictionary word
     if (opt & NGRAM_LOWERING) mkallsmall_utf(su2, l2, langnum);
     for (int j = 1; j <= n; j++) {
       ns = 0;
       for (int i = 0; i <= (l1-j); i++) {
+	int k = 0;
         for (int l = 0; l <= (l2-j); l++) {
-            int k;
-            for (k = 0; (k < j); k++) {
+            for (k = 0; k < j; k++) {
               w_char * c1 = su1 + i + k;
               w_char * c2 = su2 + l + k;
               if ((c1->l != c2->l) || (c1->h != c2->h)) break;
             }
             if (k == j) {
-                ns++;
+		ns++;
                 break;
-            }
-        }
+            } 
+	}
+	if (k != j && opt & NGRAM_WEIGHTED) {
+	  ns--;
+	  test++;
+	  if (i == 0 || i == l1-j) ns--; // side weight
+	}
       }
       nscore = nscore + ns;
-      if (ns < 2) break;
+      if (ns < 2 && !(opt & NGRAM_WEIGHTED)) break;
     }
   } else {  
     l2 = strlen(s2);
     if (l2 == 0) return 0;
     l1 = strlen(s1);
     char *t = mystrdup(s2);
     if (opt & NGRAM_LOWERING) mkallsmall(t, csconv);
     for (int j = 1; j <= n; j++) {
       ns = 0;
       for (int i = 0; i <= (l1-j); i++) {
         char c = *(s1 + i + j);
         *(s1 + i + j) = '\0';
-        if (strstr(t,(s1+i))) ns++;
+        if (strstr(t,(s1+i))) {
+	  ns++;
+	} else if (opt & NGRAM_WEIGHTED) {
+	  ns--;
+test++;
+	  if (i == 0 || i == l1-j) ns--; // side weight
+	}
         *(s1 + i + j ) = c;
       }
       nscore = nscore + ns;
-      if (ns < 2) break;
+      if (ns < 2 && !(opt & NGRAM_WEIGHTED)) break;
     }
     free(t);
   }
   
   ns = 0;
   if (opt & NGRAM_LONGER_WORSE) ns = (l2-l1)-2;
   if (opt & NGRAM_ANY_MISMATCH) ns = abs(l2-l1)-2;
   ns = (nscore - ((ns > 0) ? ns : 0));
   return ns;
 }
 
 // length of the left common substring of s1 and (decapitalised) s2
 int SuggestMgr::leftcommonsubstring(char * s1, const char * s2) {
   if (utf8) {
     w_char su1[MAXSWL];
     w_char su2[MAXSWL];
+    su1[0].l = su2[0].l = su1[0].h = su2[0].h = 0;
     // decapitalize dictionary word
     if (complexprefixes) {
       int l1 = u8_u16(su1, MAXSWL, s1);
       int l2 = u8_u16(su2, MAXSWL, s2);
       if (*((short *)su1+l1-1) == *((short *)su2+l2-1)) return 1;
     } else {
       int i;
       u8_u16(su1, 1, s1);
--- a/extensions/spellcheck/hunspell/src/suggestmgr.hxx
+++ b/extensions/spellcheck/hunspell/src/suggestmgr.hxx
@@ -60,25 +60,27 @@
 
 #define MAXSWL 100
 #define MAXSWUTF8L (MAXSWL * 4)
 #define MAX_ROOTS 100
 #define MAX_WORDS 100
 #define MAX_GUESS 200
 #define MAXNGRAMSUGS 4
 #define MAXPHONSUGS 2
+#define MAXCOMPOUNDSUGS 3
 
 // timelimit: max ~1/4 sec (process time on Linux) for a time consuming function
 #define TIMELIMIT (CLOCKS_PER_SEC >> 2)
 #define MINTIMER 100
 #define MAXPLUSTIMER 100
 
 #define NGRAM_LONGER_WORSE  (1 << 0)
 #define NGRAM_ANY_MISMATCH  (1 << 1)
 #define NGRAM_LOWERING      (1 << 2)
+#define NGRAM_WEIGHTED      (1 << 3)
 
 #include "hunvisapi.h"
 
 #include "atypes.hxx"
 #include "affixmgr.hxx"
 #include "hashmgr.hxx"
 #include "langnum.hxx"
 #include <time.h>
@@ -97,16 +99,17 @@ class LIBHUNSPELL_DLL_EXPORTED SuggestMg
 
   AffixMgr*       pAMgr;
   int             maxSug;
   struct cs_info * csconv;
   int             utf8;
   int             langnum;
   int             nosplitsugs;
   int             maxngramsugs;
+  int             maxcpdsugs;
   int             complexprefixes;
 
 
 public:
   SuggestMgr(const char * tryme, int maxn, AffixMgr *aptr);
   ~SuggestMgr();
 
   int suggest(char*** slst, const char * word, int nsug, int * onlycmpdsug);
--- a/extensions/spellcheck/hunspell/src/w_char.hxx
+++ b/extensions/spellcheck/hunspell/src/w_char.hxx
@@ -42,11 +42,13 @@ typedef struct __attribute__ ((packed)) 
     unsigned char l;
     unsigned char h;
 } w_char;
 
 // two character arrays
 struct replentry {
   char * pattern;
   char * pattern2;
+  bool start;
+  bool end;
 };
 
 #endif