Bug 711769 - Update Hyphen library to version 2.8.3. r=jfkthame
authorRyan VanderMeulen <ryanvm@gmail.com>
Sun, 18 Dec 2011 20:18:59 +0100
changeset 82921 047d1d0ea0a79448c31f26d2e96fe48b4da5cbc2
parent 82918 bf93caeef44e1cc121cd35f99e0c2e1469506649
child 82922 e48f73ccaf1c58f11feb2d16d31a56f08a1884a7
push id1
push userroot
push dateMon, 20 Oct 2014 17:29:22 +0000
reviewersjfkthame
bugs711769
milestone11.0a1
Bug 711769 - Update Hyphen library to version 2.8.3. r=jfkthame
intl/hyphenation/src/README
intl/hyphenation/src/README.compound
intl/hyphenation/src/hyphen.c
intl/locales/en-US/hyphenation/hyph_en_US.dic
--- a/intl/hyphenation/src/README
+++ b/intl/hyphenation/src/README
@@ -1,13 +1,13 @@
 Hyphen - hyphenation library to use converted TeX hyphenation patterns
  
 (C) 1998 Raph Levien
 (C) 2001 ALTLinux, Moscow
-(C) 2006, 2007, 2008, 2010 László Németh
+(C) 2006, 2007, 2008, 2010, 2011 László Németh
  
 This was part of libHnj library by Raph Levien.
  
 Peter Novodvorsky from ALTLinux cut hyphenation part from libHnj
 to use it in OpenOffice.org.
  
 Compound word and non-standard hyphenation support by László Németh.
   
@@ -119,9 +119,9 @@ perl substrings.pl hyphen.gb hyph_en_GB.
 OTHERS
 
 Java hyphenation: Peter B. West (Folio project) implements a hyphenator with
 non standard hyphenation facilities based on extended Libhnj. The HyFo module
 is released in binary form as jar files and in source form as zip files.
 See http://sourceforge.net/project/showfiles.php?group_id=119136
 
 László Németh
-<nemeth (at) openoffice (dot) org>
+<nemeth (at) numbertext (dot) org>
--- a/intl/hyphenation/src/README.compound
+++ b/intl/hyphenation/src/README.compound
@@ -13,16 +13,26 @@ 1'1
 NEXTLEVEL
 
 Description:
 
 1-1 and 1'1 declare hyphen and apostrophe as word boundary characters
 and NOHYPHEN with the comma separated character (or character sequence)
 list forbid the (extra) hyphens at the hyphen and apostrophe characters.
 
+Implicite NOHYPHEN declaration
+
+Without explicite NEXTLEVEL declaration, Hyphen 2.8 uses the
+previous settings, plus in UTF-8 encoding, endash (U+2013) and
+typographical apostrophe (U+2019) are NOHYPHEN characters, too.
+
+It's possible to enlarge the hyphenation distance from these
+NOHYPHEN characters by using COMPOUNDLEFTHYPHENMIN and
+COMPOUNDRIGHTHYPHENMIN attributes.
+
 Compound word hyphenation
 
 Hyphen library supports better compound word hyphenation and special
 rules of compound word hyphenation of German languages and other
 languages with arbitrary number of compound words. The new options,
 COMPOUNDLEFTHYPHENMIN and COMPOUNDRIGHTHYPHENMIN help to set the right
 style for the hyphenation of compound words.
 
--- a/intl/hyphenation/src/hyphen.c
+++ b/intl/hyphenation/src/hyphen.c
@@ -221,128 +221,71 @@ hnj_add_trans (HyphenDict *dict, int sta
 						sizeof(HyphenTrans));
     }
   dict->states[state1].trans[num_trans].ch = ch;
   dict->states[state1].trans[num_trans].new_state = state2;
   dict->states[state1].num_trans++;
 }
 
 #ifdef VERBOSE
-HashTab *global;
+HashTab *global[1];
 
 static char *
-get_state_str (int state)
+get_state_str (int state, int level)
 {
   int i;
   HashEntry *e;
 
   for (i = 0; i < HASH_SIZE; i++)
-    for (e = global->entries[i]; e; e = e->next)
+    for (e = global[level]->entries[i]; e; e = e->next)
       if (e->val == state)
 	return e->key;
   return NULL;
 }
 #endif
 
-HyphenDict *
-hnj_hyphen_load (const char *fn)
-{
-  HyphenDict *dict[2];
-  HashTab *hashtab;
-  FILE *f;
-  char buf[MAX_CHARS];
+void hnj_hyphen_load_line(char * buf, HyphenDict * dict, HashTab * hashtab) {
+  int i, j;
   char word[MAX_CHARS];
   char pattern[MAX_CHARS];
   char * repl;
   signed char replindex;
   signed char replcut;
-  int state_num = 0, last_state;
-  int i, j, k;
+  int state_num = 0;
+  int last_state;
   char ch;
   int found;
-  HashEntry *e;
-  int nextlevel = 0;
 
-  f = fopen (fn, "r");
-  if (f == NULL)
-    return NULL;
-
-// loading one or two dictionaries (separated by NEXTLEVEL keyword)
-for (k = 0; k == 0 || (k == 1 && nextlevel); k++) { 
-  hashtab = hnj_hash_new ();
-#ifdef VERBOSE
-  global = hashtab;
-#endif
-  hnj_hash_insert (hashtab, "", 0);
-  dict[k] = hnj_malloc (sizeof(HyphenDict));
-  dict[k]->num_states = 1;
-  dict[k]->states = hnj_malloc (sizeof(HyphenState));
-  dict[k]->states[0].match = NULL;
-  dict[k]->states[0].repl = NULL;
-  dict[k]->states[0].fallback_state = -1;
-  dict[k]->states[0].num_trans = 0;
-  dict[k]->states[0].trans = NULL;
-  dict[k]->nextlevel = NULL;
-  dict[k]->lhmin = 0;
-  dict[k]->rhmin = 0;
-  dict[k]->clhmin = 0;
-  dict[k]->crhmin = 0;
-  dict[k]->nohyphen = NULL;
-  dict[k]->nohyphenl = 0;
-
-  /* read in character set info */
-  if (k == 0) {
-    for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0;
-    if (fgets(dict[k]->cset,  sizeof(dict[k]->cset),f) != NULL) {
-      for (i=0;i<MAX_NAME;i++)
-        if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n'))
-          dict[k]->cset[i] = 0;
-    } else {
-      dict[k]->cset[0] = 0;
-    }
-    dict[k]->utf8 = (strcmp(dict[k]->cset, "UTF-8") == 0);
-  } else {
-    strcpy(dict[k]->cset, dict[0]->cset);
-    dict[k]->utf8 = dict[0]->utf8;
-  }
-
-  while (fgets (buf, sizeof(buf), f) != NULL)
-    {
-      if (buf[0] != '%')
-	{
-	  if (strncmp(buf, "NEXTLEVEL", 9) == 0) {
-	    nextlevel = 1;
-	    break;
-	  } else if (strncmp(buf, "LEFTHYPHENMIN", 13) == 0) {
-	    dict[k]->lhmin = atoi(buf + 13);
-	    continue;
+	  if (strncmp(buf, "LEFTHYPHENMIN", 13) == 0) {
+	    dict->lhmin = atoi(buf + 13);
+	    return;
 	  } else if (strncmp(buf, "RIGHTHYPHENMIN", 14) == 0) {
-	    dict[k]->rhmin = atoi(buf + 14);
-	    continue;
+	    dict->rhmin = atoi(buf + 14);
+	    return;
 	  } else if (strncmp(buf, "COMPOUNDLEFTHYPHENMIN", 21) == 0) {
-	    dict[k]->clhmin = atoi(buf + 21);
-	    continue;
+	    dict->clhmin = atoi(buf + 21);
+	    return;
 	  } else if (strncmp(buf, "COMPOUNDRIGHTHYPHENMIN", 22) == 0) {
-	    dict[k]->crhmin = atoi(buf + 22);
-	    continue;
+	    dict->crhmin = atoi(buf + 22);
+	    return;
 	  } else if (strncmp(buf, "NOHYPHEN", 8) == 0) {
 	    char * space = buf + 8;
 	    while (*space != '\0' && (*space == ' ' || *space == '\t')) space++;
-	    if (*buf != '\0') dict[k]->nohyphen = hnj_strdup(space);
-	    if (dict[k]->nohyphen) {
-	        char * nhe = dict[k]->nohyphen + strlen(dict[k]->nohyphen) - 1;
+	    if (*buf != '\0') dict->nohyphen = hnj_strdup(space);
+	    if (dict->nohyphen) {
+	        char * nhe = dict->nohyphen + strlen(dict->nohyphen) - 1;
 	        *nhe = 0;
-	        for (nhe = nhe - 1; nhe > dict[k]->nohyphen; nhe--) {
+	        for (nhe = nhe - 1; nhe > dict->nohyphen; nhe--) {
 	                if (*nhe == ',') {
-	                    dict[k]->nohyphenl++;
+	                    dict->nohyphenl++;
 	                    *nhe = 0;
 	                }
 	        }
 	    }
-	    continue;
+	    return;
 	  } 
 	  j = 0;
 	  pattern[j] = '0';
           repl = strchr(buf, '/');
           replindex = 0;
           replcut = 0;
           if (repl) {
             char * index = strchr(repl + 1, ',');
@@ -377,17 +320,17 @@ for (k = 0; k == 0 || (k == 1 && nextlev
 
           i = 0;
 	  if (!repl) {
 	    /* Optimize away leading zeroes */
             for (; pattern[i] == '0'; i++);
           } else {
             if (*word == '.') i++;
             /* convert UTF-8 char. positions of discretionary hyph. replacements to 8-bit */
-            if (dict[k]->utf8) {
+            if (dict->utf8) {
                 int pu = -1;        /* unicode character position */
                 int ps = -1;        /* unicode start position (original replindex) */
                 int pc = (*word == '.') ? 1: 0; /* 8-bit character position */
                 for (; pc < (strlen(word) + 1); pc++) {
                 /* beginning of an UTF-8 character (not '10' start bits) */
                     if ((((unsigned char) word[pc]) >> 6) != 2) pu++;
                     if ((ps < 0) && (replindex == pu)) {
                         ps = replindex;
@@ -401,38 +344,112 @@ for (k = 0; k == 0 || (k == 1 && nextlev
                 if (*word == '.') replindex--;
             }
           }
 
 #ifdef VERBOSE
 	  printf ("word %s pattern %s, j = %d  repl: %s\n", word, pattern + i, j, repl);
 #endif
 	  found = hnj_hash_lookup (hashtab, word);
-	  state_num = hnj_get_state (dict[k], hashtab, word);
-	  dict[k]->states[state_num].match = hnj_strdup (pattern + i);
-	  dict[k]->states[state_num].repl = repl;
-	  dict[k]->states[state_num].replindex = replindex;
+	  state_num = hnj_get_state (dict, hashtab, word);
+	  dict->states[state_num].match = hnj_strdup (pattern + i);
+	  dict->states[state_num].repl = repl;
+	  dict->states[state_num].replindex = replindex;
           if (!replcut) {
-            dict[k]->states[state_num].replcut = (signed char) strlen(word);
+            dict->states[state_num].replcut = (signed char) strlen(word);
           } else {
-            dict[k]->states[state_num].replcut = replcut;
+            dict->states[state_num].replcut = replcut;
           }
 
 	  /* now, put in the prefix transitions */
           for (; found < 0 ;j--)
 	    {
 	      last_state = state_num;
 	      ch = word[j - 1];
 	      word[j - 1] = '\0';
 	      found = hnj_hash_lookup (hashtab, word);
-	      state_num = hnj_get_state (dict[k], hashtab, word);
-	      hnj_add_trans (dict[k], state_num, last_state, ch);
+	      state_num = hnj_get_state (dict, hashtab, word);
+	      hnj_add_trans (dict, state_num, last_state, ch);
 	    }
-	}
+}
+
+HyphenDict *
+hnj_hyphen_load (const char *fn)
+{
+  HyphenDict *dict[2];
+  HashTab *hashtab;
+  FILE *f;
+  char buf[MAX_CHARS];
+  int nextlevel = 0;
+  int i, j, k;
+  HashEntry *e;
+  int state_num = 0;
+
+  f = fopen (fn, "r");
+  if (f == NULL)
+    return NULL;
+
+// loading one or two dictionaries (separated by NEXTLEVEL keyword)
+for (k = 0; k < 2; k++) { 
+  hashtab = hnj_hash_new ();
+#ifdef VERBOSE
+  global[k] = hashtab;
+#endif
+  hnj_hash_insert (hashtab, "", 0);
+  dict[k] = hnj_malloc (sizeof(HyphenDict));
+  dict[k]->num_states = 1;
+  dict[k]->states = hnj_malloc (sizeof(HyphenState));
+  dict[k]->states[0].match = NULL;
+  dict[k]->states[0].repl = NULL;
+  dict[k]->states[0].fallback_state = -1;
+  dict[k]->states[0].num_trans = 0;
+  dict[k]->states[0].trans = NULL;
+  dict[k]->nextlevel = NULL;
+  dict[k]->lhmin = 0;
+  dict[k]->rhmin = 0;
+  dict[k]->clhmin = 0;
+  dict[k]->crhmin = 0;
+  dict[k]->nohyphen = NULL;
+  dict[k]->nohyphenl = 0;
+
+  /* read in character set info */
+  if (k == 0) {
+    for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0;
+    if (fgets(dict[k]->cset,  sizeof(dict[k]->cset),f) != NULL) {
+      for (i=0;i<MAX_NAME;i++)
+        if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n'))
+          dict[k]->cset[i] = 0;
+    } else {
+      dict[k]->cset[0] = 0;
     }
+    dict[k]->utf8 = (strcmp(dict[k]->cset, "UTF-8") == 0);
+  } else {
+    strcpy(dict[k]->cset, dict[0]->cset);
+    dict[k]->utf8 = dict[0]->utf8;
+  }
+
+  if (k == 0 || nextlevel) {
+    while (fgets (buf, sizeof(buf), f) != NULL) {
+      if (strncmp(buf, "NEXTLEVEL", 9) == 0) {
+	nextlevel = 1;
+	break;
+      } else if (buf[0] != '%') hnj_hyphen_load_line(buf, dict[k], hashtab);
+    }
+  } else if (k == 1) {
+    /* default first level: hyphen and ASCII apostrophe */
+    if (!dict[0]->utf8) hnj_hyphen_load_line("NOHYPHEN '\n", dict[k], hashtab);
+    else hnj_hyphen_load_line("NOHYPHEN ',\xe2\x80\x93,\xe2\x80\x99\n", dict[k], hashtab);
+    strcpy(buf, "1-1/=,1,1\n"); // buf rewritten by hnj_hyphen_load here
+    hnj_hyphen_load_line(buf, dict[k], hashtab); /* remove hyphen */
+    hnj_hyphen_load_line("1'1\n", dict[k], hashtab); /* ASCII apostrophe */
+    if (dict[0]->utf8) {
+      hnj_hyphen_load_line("1\xe2\x80\x93" "1\n", dict[k], hashtab); /* endash */
+      hnj_hyphen_load_line("1\xe2\x80\x99" "1\n", dict[k], hashtab); /* apostrophe */
+    }
+  }
 
   /* Could do unioning of matches here (instead of the preprocessor script).
      If we did, the pseudocode would look something like this:
 
      foreach state in the hash table
         foreach i = [1..length(state) - 1]
            state to check is substr (state, i)
            look it up
@@ -474,17 +491,30 @@ for (k = 0; k == 0 || (k == 1 && nextlev
 #endif
 
 #ifndef VERBOSE
   hnj_hash_free (hashtab);
 #endif
   state_num = 0;
 }
   fclose(f);
-  if (k == 2) dict[0]->nextlevel = dict[1];
+  if (nextlevel) dict[0]->nextlevel = dict[1];
+  else {
+    dict[1] -> nextlevel = dict[0];
+    dict[1]->lhmin = dict[0]->lhmin;
+    dict[1]->rhmin = dict[0]->rhmin;
+    dict[1]->clhmin = (dict[0]->clhmin) ? dict[0]->clhmin : ((dict[0]->lhmin) ? dict[0]->lhmin : 3);
+    dict[1]->crhmin = (dict[0]->crhmin) ? dict[0]->crhmin : ((dict[0]->rhmin) ? dict[0]->rhmin : 3);
+#ifdef VERBOSE
+    HashTab *r = global[0];
+    global[0] = global[1];
+    global[1] = r;
+#endif
+    return dict[1];
+  }
   return dict[0];
 }
 
 void hnj_hyphen_free (HyphenDict *dict)
 {
   int state_num;
   HyphenState *hstate;
 
@@ -525,18 +555,23 @@ int hnj_hyphen_hyphenate (HyphenDict *di
   if (word_size + 3 < MAX_WORD)
     prep_word = prep_word_buf;
   else
     prep_word = hnj_malloc (word_size + 3);
 
   j = 0;
   prep_word[j++] = '.';
 
-  for (i = 0; i < word_size; i++)
+  for (i = 0; i < word_size; i++) {
+    if (word[i] <= '9' && word[i] >= '0') {
+      prep_word[j++] = '.';
+    } else {
       prep_word[j++] = word[i];
+    }
+  }
 
   prep_word[j++] = '.';
   prep_word[j] = '\0';
 
   for (i = 0; i < word_size + 5; i++)
     hyphens[i] = '0';
 
 #ifdef VERBOSE
@@ -555,17 +590,17 @@ int hnj_hyphen_hyphenate (HyphenDict *di
             /* return 1; */
 	    /*  KBH: FIXME shouldn't this be as follows? */
             state = 0;
             goto try_next_letter;
           }          
 
 #ifdef VERBOSE
 	  char *state_str;
-	  state_str = get_state_str (state);
+	  state_str = get_state_str (state, 0);
 
 	  for (k = 0; k < i - strlen (state_str); k++)
 	    putchar (' ');
 	  printf ("%s", state_str);
 #endif
 
 	  hstate = &dict->states[state];
 	  for (k = 0; k < hstate->num_trans; k++)
@@ -668,16 +703,19 @@ int hnj_hyphen_lhmin(int utf8, const cha
 {
     int i = 1, j;
 
     // Unicode ligature support
     if (utf8 && ((unsigned char) word[0] == 0xEF) && ((unsigned char) word[1] == 0xAC))  {
       i += hnj_ligature(word[2]);
     }
 
+    // ignore numbers
+    for (j = 0; word[j] <= '9' && word[j] >= '0'; j++) i--;
+
     for (j = 0; i < lhmin && word[j] != '\0'; i++) do {
       // check length of the non-standard part
       if (*rep && *pos && *cut && (*rep)[j]) {
         char * rh = strchr((*rep)[j], '=');
         if (rh && (hnj_hyphen_strnlen(word, j - (*pos)[j] + 1, utf8) +
           hnj_hyphen_strnlen((*rep)[j], rh - (*rep)[j], utf8)) < lhmin) {
             free((*rep)[j]);
             (*rep)[j] = NULL;
@@ -694,19 +732,23 @@ int hnj_hyphen_lhmin(int utf8, const cha
        }
     } while (utf8 && (word[j] & 0xc0) == 0x80);
     return 0;
 }
 
 int hnj_hyphen_rhmin(int utf8, const char *word, int word_size, char * hyphens,
 	char *** rep, int ** pos, int ** cut, int rhmin)
 {
-    int i;
-    int j = word_size - 2;    
-    for (i = 1; i < rhmin && j > 0; j--) {
+    int i = 1;
+    int j;
+
+    // ignore numbers
+    for (j = word_size - 1; j > 0 && word[j] <= '9' && word[j] >= '0'; j--) i--;
+
+    for (j = word_size - 2; i < rhmin && j > 0; j--) {
       // check length of the non-standard part
       if (*rep && *pos && *cut && (*rep)[j]) {
         char * rh = strchr((*rep)[j], '=');
         if (rh && (hnj_hyphen_strnlen(word + j - (*pos)[j] + (*cut)[j] + 1, 100, utf8) +
           hnj_hyphen_strnlen(rh + 1, strlen(rh + 1), utf8)) < rhmin) {
             free((*rep)[j]);
             (*rep)[j] = NULL;
             hyphens[j] = '0';
@@ -754,18 +796,25 @@ int hnj_hyphen_hyph_(HyphenDict *dict, c
     matchlen = hnj_malloc ((word_size + 3) * sizeof(int));
     matchindex = hnj_malloc ((word_size + 3) * sizeof(int));
     matchrepl = hnj_malloc ((word_size + 3) * sizeof(char *));
   }
 
   j = 0;
   prep_word[j++] = '.';
   
-  for (i = 0; i < word_size; i++)
+  for (i = 0; i < word_size; i++) {
+    if (word[i] <= '9' && word[i] >= '0') {
+      prep_word[j++] = '.';
+    } else {
       prep_word[j++] = word[i];
+    }
+  }
+
+
 
   prep_word[j++] = '.';
   prep_word[j] = '\0';
 
   for (i = 0; i < j; i++)
     hyphens[i] = '0';    
 
 #ifdef VERBOSE
@@ -784,17 +833,17 @@ int hnj_hyphen_hyph_(HyphenDict *dict, c
             /* return 1; */
 	    /*  KBH: FIXME shouldn't this be as follows? */
             state = 0;
             goto try_next_letter;
           }          
 
 #ifdef VERBOSE
 	  char *state_str;
-	  state_str = get_state_str (state);
+	  state_str = get_state_str (state, 1);
 
 	  for (k = 0; k < i - strlen (state_str); k++)
 	    putchar (' ');
 	  printf ("%s", state_str);
 #endif
 
 	  hstate = &dict->states[state];
 	  for (k = 0; k < hstate->num_trans; k++)
@@ -1031,16 +1080,19 @@ int hnj_hyphen_norm(const char *word, in
         if (j < i) {
             (*rep)[i] = NULL;
             (*pos)[i] = 0;
             (*cut)[i] = 0;
         }
     }
   }
   hyphens[j + 1] = '\0';
+#ifdef VERBOSE
+  printf ("nums: %s\n", hyphens);
+#endif
   return 0;
 }
 
 /* get the word with all possible hyphenations (output: hyphword) */
 void hnj_hyphen_hyphword(const char * word, int l, const char * hyphens, 
     char * hyphword, char *** rep, int ** pos, int ** cut)
 {
   int i, j;
@@ -1072,37 +1124,42 @@ int hnj_hyphen_hyphenate2 (HyphenDict *d
 
   /* nohyphen */
   if (dict->nohyphen) {
     char * nh = dict->nohyphen;
     int nhi;
     for (nhi = 0; nhi <= dict->nohyphenl; nhi++) {
         char * nhy = (char *) strstr(word, nh);
         while (nhy) {
-            hyphens[nhy - word + strlen(nh) - 1] = 0;
-            if (nhy - word  - 1 >= 0) hyphens[nhy - word - 1] = 0;
+            hyphens[nhy - word + strlen(nh) - 1] = '0';
+            if (nhy - word  - 1 >= 0) hyphens[nhy - word - 1] = '0';
             nhy = (char *) strstr(nhy + 1, nh);
         }
         nh = nh + strlen(nh) + 1;
     }
   }
 
   if (hyphword) hnj_hyphen_hyphword(word, word_size, hyphens, hyphword, rep, pos, cut);
   if (dict->utf8) return hnj_hyphen_norm(word, word_size, hyphens, rep, pos, cut);
+#ifdef VERBOSE
+  printf ("nums: %s\n", hyphens);
+#endif
   return 0;
 }
 
 /* previous main api function with hyphenmin parameters */
 int hnj_hyphen_hyphenate3 (HyphenDict *dict,
 	const char *word, int word_size, char * hyphens,
 	char *hyphword, char *** rep, int ** pos, int ** cut,
 	int lhmin, int rhmin, int clhmin, int crhmin)
 {
-  lhmin = (lhmin > 0 ? lhmin : dict->lhmin);
-  rhmin = (rhmin > 0 ? rhmin : dict->rhmin);
+  lhmin = (lhmin > dict->lhmin) ? lhmin : dict->lhmin;
+  rhmin = (rhmin > dict->rhmin) ? rhmin : dict->rhmin;
+  clhmin = (clhmin > dict->clhmin) ? clhmin : dict->clhmin;
+  crhmin = (crhmin > dict->crhmin) ? crhmin : dict->crhmin;
   hnj_hyphen_hyph_(dict, word, word_size, hyphens, rep, pos, cut,
     clhmin, crhmin, 1, 1);
   hnj_hyphen_lhmin(dict->utf8, word, word_size, hyphens,
     rep, pos, cut, (lhmin > 0 ? lhmin : 2));
   hnj_hyphen_rhmin(dict->utf8, word, word_size, hyphens,
     rep, pos, cut, (rhmin > 0 ? rhmin : 2));
   if (hyphword) hnj_hyphen_hyphword(word, word_size, hyphens, hyphword, rep, pos, cut);
 
--- a/intl/locales/en-US/hyphenation/hyph_en_US.dic
+++ b/intl/locales/en-US/hyphenation/hyph_en_US.dic
@@ -1,18 +1,11 @@
 UTF-8
 LEFTHYPHENMIN 2
 RIGHTHYPHENMIN 3
-COMPOUNDLEFTHYPHENMIN 2
-COMPOUNDRIGHTHYPHENMIN 3
-NOHYPHEN -,',’
-1-1
-1'1
-1’1
-NEXTLEVEL
 .a2ch4
 .ad4der
 .a2d
 .ad1d4
 .a2f1t
 .a2f
 .a4l3t
 .am5at