office-gobmx/hyphen/hyphen-2.7.1-2.8.3.patch

--- misc/build/hyphen-2.7.1/hyphen.c.old	2011-10-10 15:58:33.317260138 +0200
+++ misc/build/hyphen-2.7.1/hyphen.c	2011-10-10 15:58:55.221260136 +0200
@@ -226,115 +226,61 @@
 }
 
 #ifdef VERBOSE
-HashTab *global;
+HashTab *global[1];
 
 static char *
-get_state_str (int state)
+get_state_str (int state, int level)
 {
   int i;
   HashEntry *e;
 
   for (i = 0; i < HASH_SIZE; i++)
-    for (e = global->entries[i]; e; e = e->next)
+    for (e = global[level]->entries[i]; e; e = e->next)
       if (e->val == state)
 	return e->key;
   return NULL;
 }
 #endif
 
-HyphenDict *
-hnj_hyphen_load (const char *fn)
-{
-  HyphenDict *dict[2];
-  HashTab *hashtab;
-  FILE *f;
-  char buf[MAX_CHARS];
+void hnj_hyphen_load_line(char * buf, HyphenDict * dict, HashTab * hashtab) {
+  int i, j;
   char word[MAX_CHARS];
   char pattern[MAX_CHARS];
   char * repl;
   signed char replindex;
   signed char replcut;
-  int state_num = 0, last_state;
-  int i, j, k;
+  int state_num = 0;
+  int last_state;
   char ch;
   int found;
-  HashEntry *e;
-  int nextlevel = 0;
-
-  f = fopen (fn, "r");
-  if (f == NULL)
-    return NULL;
-
-// loading one or two dictionaries (separated by NEXTLEVEL keyword)
-for (k = 0; k == 0 || (k == 1 && nextlevel); k++) { 
-  hashtab = hnj_hash_new ();
-#ifdef VERBOSE
-  global = hashtab;
-#endif
-  hnj_hash_insert (hashtab, "", 0);
-  dict[k] = hnj_malloc (sizeof(HyphenDict));
-  dict[k]->num_states = 1;
-  dict[k]->states = hnj_malloc (sizeof(HyphenState));
-  dict[k]->states[0].match = NULL;
-  dict[k]->states[0].repl = NULL;
-  dict[k]->states[0].fallback_state = -1;
-  dict[k]->states[0].num_trans = 0;
-  dict[k]->states[0].trans = NULL;
-  dict[k]->nextlevel = NULL;
-  dict[k]->lhmin = 0;
-  dict[k]->rhmin = 0;
-  dict[k]->clhmin = 0;
-  dict[k]->crhmin = 0;
-  dict[k]->nohyphen = NULL;
-  dict[k]->nohyphenl = 0;
-
-  /* read in character set info */
-  if (k == 0) {
-    for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0;
-    fgets(dict[k]->cset,  sizeof(dict[k]->cset),f);
-    for (i=0;i<MAX_NAME;i++)
-      if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n'))
-        dict[k]->cset[i] = 0;
-    dict[k]->utf8 = (strcmp(dict[k]->cset, "UTF-8") == 0);
-  } else {
-    strcpy(dict[k]->cset, dict[0]->cset);
-    dict[k]->utf8 = dict[0]->utf8;
-  }
 
-  while (fgets (buf, sizeof(buf), f) != NULL)
-    {
-      if (buf[0] != '%')
-	{
-	  if (strncmp(buf, "NEXTLEVEL", 9) == 0) {
-	    nextlevel = 1;
-	    break;
-	  } else if (strncmp(buf, "LEFTHYPHENMIN", 13) == 0) {
-	    dict[k]->lhmin = atoi(buf + 13);
-	    continue;
+	  if (strncmp(buf, "LEFTHYPHENMIN", 13) == 0) {
+	    dict->lhmin = atoi(buf + 13);
+	    return;
 	  } else if (strncmp(buf, "RIGHTHYPHENMIN", 14) == 0) {
-	    dict[k]->rhmin = atoi(buf + 14);
-	    continue;
+	    dict->rhmin = atoi(buf + 14);
+	    return;
 	  } else if (strncmp(buf, "COMPOUNDLEFTHYPHENMIN", 21) == 0) {
-	    dict[k]->clhmin = atoi(buf + 21);
-	    continue;
+	    dict->clhmin = atoi(buf + 21);
+	    return;
 	  } else if (strncmp(buf, "COMPOUNDRIGHTHYPHENMIN", 22) == 0) {
-	    dict[k]->crhmin = atoi(buf + 22);
-	    continue;
+	    dict->crhmin = atoi(buf + 22);
+	    return;
 	  } else if (strncmp(buf, "NOHYPHEN", 8) == 0) {
 	    char * space = buf + 8;
 	    while (*space != '\0' && (*space == ' ' || *space == '\t')) space++;
-	    if (*buf != '\0') dict[k]->nohyphen = hnj_strdup(space);
-	    if (dict[k]->nohyphen) {
-	        char * nhe = dict[k]->nohyphen + strlen(dict[k]->nohyphen) - 1;
+	    if (*buf != '\0') dict->nohyphen = hnj_strdup(space);
+	    if (dict->nohyphen) {
+	        char * nhe = dict->nohyphen + strlen(dict->nohyphen) - 1;
 	        *nhe = 0;
-	        for (nhe = nhe - 1; nhe > dict[k]->nohyphen; nhe--) {
+	        for (nhe = nhe - 1; nhe > dict->nohyphen; nhe--) {
 	                if (*nhe == ',') {
-	                    dict[k]->nohyphenl++;
+	                    dict->nohyphenl++;
 	                    *nhe = 0;
 	                }
 	        }
 	    }
-	    continue;
+	    return;
 	  } 
 	  j = 0;
 	  pattern[j] = '0';
@@ -379,7 +325,7 @@
           } else {
             if (*word == '.') i++;
             /* convert UTF-8 char. positions of discretionary hyph. replacements to 8-bit */
-            if (dict[k]->utf8) {
+            if (dict->utf8) {
                 int pu = -1;        /* unicode character position */
                 int ps = -1;        /* unicode start position (original replindex) */
                 int pc = (*word == '.') ? 1: 0; /* 8-bit character position */
@@ -403,14 +349,14 @@
 	  printf ("word %s pattern %s, j = %d  repl: %s\n", word, pattern + i, j, repl);
 #endif
 	  found = hnj_hash_lookup (hashtab, word);
-	  state_num = hnj_get_state (dict[k], hashtab, word);
-	  dict[k]->states[state_num].match = hnj_strdup (pattern + i);
-	  dict[k]->states[state_num].repl = repl;
-	  dict[k]->states[state_num].replindex = replindex;
+	  state_num = hnj_get_state (dict, hashtab, word);
+	  dict->states[state_num].match = hnj_strdup (pattern + i);
+	  dict->states[state_num].repl = repl;
+	  dict->states[state_num].replindex = replindex;
           if (!replcut) {
-            dict[k]->states[state_num].replcut = (signed char) strlen(word);
+            dict->states[state_num].replcut = (signed char) strlen(word);
           } else {
-            dict[k]->states[state_num].replcut = replcut;
+            dict->states[state_num].replcut = replcut;
           }
 
 	  /* now, put in the prefix transitions */
@@ -420,11 +366,82 @@
 	      ch = word[j - 1];
 	      word[j - 1] = '\0';
 	      found = hnj_hash_lookup (hashtab, word);
-	      state_num = hnj_get_state (dict[k], hashtab, word);
-	      hnj_add_trans (dict[k], state_num, last_state, ch);
+	      state_num = hnj_get_state (dict, hashtab, word);
+	      hnj_add_trans (dict, state_num, last_state, ch);
 	    }
-	}
+}
+
+HyphenDict *
+hnj_hyphen_load (const char *fn)
+{
+  HyphenDict *dict[2];
+  HashTab *hashtab;
+  FILE *f;
+  char buf[MAX_CHARS];
+  int nextlevel = 0;
+  int i, j, k;
+  HashEntry *e;
+  int state_num = 0;
+
+  f = fopen (fn, "r");
+  if (f == NULL)
+    return NULL;
+
+// loading one or two dictionaries (separated by NEXTLEVEL keyword)
+for (k = 0; k < 2; k++) { 
+  hashtab = hnj_hash_new ();
+#ifdef VERBOSE
+  global[k] = hashtab;
+#endif
+  hnj_hash_insert (hashtab, "", 0);
+  dict[k] = hnj_malloc (sizeof(HyphenDict));
+  dict[k]->num_states = 1;
+  dict[k]->states = hnj_malloc (sizeof(HyphenState));
+  dict[k]->states[0].match = NULL;
+  dict[k]->states[0].repl = NULL;
+  dict[k]->states[0].fallback_state = -1;
+  dict[k]->states[0].num_trans = 0;
+  dict[k]->states[0].trans = NULL;
+  dict[k]->nextlevel = NULL;
+  dict[k]->lhmin = 0;
+  dict[k]->rhmin = 0;
+  dict[k]->clhmin = 0;
+  dict[k]->crhmin = 0;
+  dict[k]->nohyphen = NULL;
+  dict[k]->nohyphenl = 0;
+
+  /* read in character set info */
+  if (k == 0) {
+    for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0;
+    fgets(dict[k]->cset,  sizeof(dict[k]->cset),f);
+    for (i=0;i<MAX_NAME;i++)
+      if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n'))
+        dict[k]->cset[i] = 0;
+    dict[k]->utf8 = (strcmp(dict[k]->cset, "UTF-8") == 0);
+  } else {
+    strcpy(dict[k]->cset, dict[0]->cset);
+    dict[k]->utf8 = dict[0]->utf8;
+  }
+
+  if (k == 0 || nextlevel) {
+    while (fgets (buf, sizeof(buf), f) != NULL) {
+      if (strncmp(buf, "NEXTLEVEL", 9) == 0) {
+	nextlevel = 1;
+	break;
+      } else if (buf[0] != '%') hnj_hyphen_load_line(buf, dict[k], hashtab);
+    }
+  } else if (k == 1) {
+    /* default first level: hyphen and ASCII apostrophe */
+    if (!dict[0]->utf8) hnj_hyphen_load_line("NOHYPHEN '\n", dict[k], hashtab);
+    else hnj_hyphen_load_line("NOHYPHEN ',\xe2\x80\x93,\xe2\x80\x99\n", dict[k], hashtab);
+    strcpy(buf, "1-1/=,1,1\n"); // buf rewritten by hnj_hyphen_load here
+    hnj_hyphen_load_line(buf, dict[k], hashtab); /* remove hyphen */
+    hnj_hyphen_load_line("1'1\n", dict[k], hashtab); /* ASCII apostrophe */
+    if (dict[0]->utf8) {
+      hnj_hyphen_load_line("1\xe2\x80\x93" "1\n", dict[k], hashtab); /* endash */
+      hnj_hyphen_load_line("1\xe2\x80\x99" "1\n", dict[k], hashtab); /* apostrophe */
     }
+  }
 
   /* Could do unioning of matches here (instead of the preprocessor script).
      If we did, the pseudocode would look something like this:
@@ -476,7 +493,20 @@
   state_num = 0;
 }
   fclose(f);
-  if (k == 2) dict[0]->nextlevel = dict[1];
+  if (nextlevel) dict[0]->nextlevel = dict[1];
+  else {
+    dict[1] -> nextlevel = dict[0];
+    dict[1]->lhmin = dict[0]->lhmin;
+    dict[1]->rhmin = dict[0]->rhmin;
+    dict[1]->clhmin = (dict[0]->clhmin) ? dict[0]->clhmin : ((dict[0]->lhmin) ? dict[0]->lhmin : 3);
+    dict[1]->crhmin = (dict[0]->crhmin) ? dict[0]->crhmin : ((dict[0]->rhmin) ? dict[0]->rhmin : 3);
+#ifdef VERBOSE
+    HashTab *r = global[0];
+    global[0] = global[1];
+    global[1] = r;
+#endif
+    return dict[1];
+  }
   return dict[0];
 }
 
@@ -527,8 +557,13 @@
   j = 0;
   prep_word[j++] = '.';
 
-  for (i = 0; i < word_size; i++)
+  for (i = 0; i < word_size; i++) {
+    if (word[i] <= '9' && word[i] >= '0') {
+      prep_word[j++] = '.';
+    } else {
       prep_word[j++] = word[i];
+    }
+  }
 
   prep_word[j++] = '.';
   prep_word[j] = '\0';
@@ -557,7 +592,7 @@
 
 #ifdef VERBOSE
 	  char *state_str;
-	  state_str = get_state_str (state);
+	  state_str = get_state_str (state, 0);
 
 	  for (k = 0; k < i - strlen (state_str); k++)
 	    putchar (' ');
@@ -670,6 +705,9 @@
       i += hnj_ligature(word[2]);
     }
 
+    // ignore numbers
+    for (j = 0; word[j] <= '9' && word[j] >= '0'; j++) i--;
+
     for (j = 0; i < lhmin && word[j] != '\0'; i++) do {
       // check length of the non-standard part
       if (*rep && *pos && *cut && (*rep)[j]) {
@@ -696,9 +734,13 @@
 int hnj_hyphen_rhmin(int utf8, const char *word, int word_size, char * hyphens,
 	char *** rep, int ** pos, int ** cut, int rhmin)
 {
-    int i;
-    int j = word_size - 2;    
-    for (i = 1; i < rhmin && j > 0; j--) {
+    int i = 1;
+    int j;
+
+    // ignore numbers
+    for (j = word_size - 1; j > 0 && word[j] <= '9' && word[j] >= '0'; j--) i--;
+
+    for (j = word_size - 2; i < rhmin && j > 0; j--) {
       // check length of the non-standard part
       if (*rep && *pos && *cut && (*rep)[j]) {
         char * rh = strchr((*rep)[j], '=');
@@ -756,8 +798,15 @@
   j = 0;
   prep_word[j++] = '.';
   
-  for (i = 0; i < word_size; i++)
+  for (i = 0; i < word_size; i++) {
+    if (word[i] <= '9' && word[i] >= '0') {
+      prep_word[j++] = '.';
+    } else {
       prep_word[j++] = word[i];
+    }
+  }
+
+
 
   prep_word[j++] = '.';
   prep_word[j] = '\0';
@@ -786,7 +835,7 @@
 
 #ifdef VERBOSE
 	  char *state_str;
-	  state_str = get_state_str (state);
+	  state_str = get_state_str (state, 1);
 
 	  for (k = 0; k < i - strlen (state_str); k++)
 	    putchar (' ');
@@ -1033,6 +1082,9 @@
     }
   }
   hyphens[j + 1] = '\0';
+#ifdef VERBOSE
+  printf ("nums: %s\n", hyphens);
+#endif
   return 0;
 }
 
@@ -1074,8 +1126,8 @@
     for (nhi = 0; nhi <= dict->nohyphenl; nhi++) {
         char * nhy = (char *) strstr(word, nh);
         while (nhy) {
-            hyphens[nhy - word + strlen(nh) - 1] = 0;
-            if (nhy - word  - 1 >= 0) hyphens[nhy - word - 1] = 0;
+            hyphens[nhy - word + strlen(nh) - 1] = '0';
+            if (nhy - word  - 1 >= 0) hyphens[nhy - word - 1] = '0';
             nhy = (char *) strstr(nhy + 1, nh);
         }
         nh = nh + strlen(nh) + 1;
@@ -1084,6 +1136,9 @@
 
   if (hyphword) hnj_hyphen_hyphword(word, word_size, hyphens, hyphword, rep, pos, cut);
   if (dict->utf8) return hnj_hyphen_norm(word, word_size, hyphens, rep, pos, cut);
+#ifdef VERBOSE
+  printf ("nums: %s\n", hyphens);
+#endif
   return 0;
 }
 
@@ -1093,8 +1148,10 @@
 	char *hyphword, char *** rep, int ** pos, int ** cut,
 	int lhmin, int rhmin, int clhmin, int crhmin)
 {
-  lhmin = (lhmin > 0 ? lhmin : dict->lhmin);
-  rhmin = (rhmin > 0 ? rhmin : dict->rhmin);
+  lhmin = (lhmin > dict->lhmin) ? lhmin : dict->lhmin;
+  rhmin = (rhmin > dict->rhmin) ? rhmin : dict->rhmin;
+  clhmin = (clhmin > dict->clhmin) ? clhmin : dict->clhmin;
+  crhmin = (crhmin > dict->crhmin) ? crhmin : dict->crhmin;
   hnj_hyphen_hyph_(dict, word, word_size, hyphens, rep, pos, cut,
     clhmin, crhmin, 1, 1);
   hnj_hyphen_lhmin(dict->utf8, word, word_size, hyphens,
Add fixes of Hyphen 2.8.3 2011-10-10 09:22:13 -05:00			`--- misc/build/hyphen-2.7.1/hyphen.c.old 2011-10-10 15:58:33.317260138 +0200`
			`+++ misc/build/hyphen-2.7.1/hyphen.c 2011-10-10 15:58:55.221260136 +0200`
			`@@ -226,115 +226,61 @@`
			`}`

			`#ifdef VERBOSE`
			`-HashTab *global;`
			`+HashTab *global[1];`

			`static char *`
			`-get_state_str (int state)`
			`+get_state_str (int state, int level)`
			`{`
			`int i;`
			`HashEntry *e;`

			`for (i = 0; i < HASH_SIZE; i++)`
			`- for (e = global->entries[i]; e; e = e->next)`
			`+ for (e = global[level]->entries[i]; e; e = e->next)`
			`if (e->val == state)`
			`return e->key;`
			`return NULL;`
			`}`
			`#endif`

			`-HyphenDict *`
			`-hnj_hyphen_load (const char *fn)`
			`-{`
			`- HyphenDict *dict[2];`
			`- HashTab *hashtab;`
			`- FILE *f;`
			`- char buf[MAX_CHARS];`
			`+void hnj_hyphen_load_line(char * buf, HyphenDict * dict, HashTab * hashtab) {`
			`+ int i, j;`
			`char word[MAX_CHARS];`
			`char pattern[MAX_CHARS];`
			`char * repl;`
			`signed char replindex;`
			`signed char replcut;`
			`- int state_num = 0, last_state;`
			`- int i, j, k;`
			`+ int state_num = 0;`
			`+ int last_state;`
			`char ch;`
			`int found;`
			`- HashEntry *e;`
			`- int nextlevel = 0;`
			`-`
			`- f = fopen (fn, "r");`
			`- if (f == NULL)`
			`- return NULL;`
			`-`
			`-// loading one or two dictionaries (separated by NEXTLEVEL keyword)`
			`-for (k = 0; k == 0 \|\| (k == 1 && nextlevel); k++) {`
			`- hashtab = hnj_hash_new ();`
			`-#ifdef VERBOSE`
			`- global = hashtab;`
			`-#endif`
			`- hnj_hash_insert (hashtab, "", 0);`
			`- dict[k] = hnj_malloc (sizeof(HyphenDict));`
			`- dict[k]->num_states = 1;`
			`- dict[k]->states = hnj_malloc (sizeof(HyphenState));`
			`- dict[k]->states[0].match = NULL;`
			`- dict[k]->states[0].repl = NULL;`
			`- dict[k]->states[0].fallback_state = -1;`
			`- dict[k]->states[0].num_trans = 0;`
			`- dict[k]->states[0].trans = NULL;`
			`- dict[k]->nextlevel = NULL;`
			`- dict[k]->lhmin = 0;`
			`- dict[k]->rhmin = 0;`
			`- dict[k]->clhmin = 0;`
			`- dict[k]->crhmin = 0;`
			`- dict[k]->nohyphen = NULL;`
			`- dict[k]->nohyphenl = 0;`
			`-`
			`- /* read in character set info */`
			`- if (k == 0) {`
			`- for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0;`
			`- fgets(dict[k]->cset, sizeof(dict[k]->cset),f);`
			`- for (i=0;i<MAX_NAME;i++)`
			`- if ((dict[k]->cset[i] == '\r') \|\| (dict[k]->cset[i] == '\n'))`
			`- dict[k]->cset[i] = 0;`
			`- dict[k]->utf8 = (strcmp(dict[k]->cset, "UTF-8") == 0);`
			`- } else {`
			`- strcpy(dict[k]->cset, dict[0]->cset);`
			`- dict[k]->utf8 = dict[0]->utf8;`
			`- }`

			`- while (fgets (buf, sizeof(buf), f) != NULL)`
			`- {`
			`- if (buf[0] != '%')`
			`- {`
			`- if (strncmp(buf, "NEXTLEVEL", 9) == 0) {`
			`- nextlevel = 1;`
			`- break;`
			`- } else if (strncmp(buf, "LEFTHYPHENMIN", 13) == 0) {`
			`- dict[k]->lhmin = atoi(buf + 13);`
			`- continue;`
			`+ if (strncmp(buf, "LEFTHYPHENMIN", 13) == 0) {`
			`+ dict->lhmin = atoi(buf + 13);`
			`+ return;`
			`} else if (strncmp(buf, "RIGHTHYPHENMIN", 14) == 0) {`
			`- dict[k]->rhmin = atoi(buf + 14);`
			`- continue;`
			`+ dict->rhmin = atoi(buf + 14);`
			`+ return;`
			`} else if (strncmp(buf, "COMPOUNDLEFTHYPHENMIN", 21) == 0) {`
			`- dict[k]->clhmin = atoi(buf + 21);`
			`- continue;`
			`+ dict->clhmin = atoi(buf + 21);`
			`+ return;`
			`} else if (strncmp(buf, "COMPOUNDRIGHTHYPHENMIN", 22) == 0) {`
			`- dict[k]->crhmin = atoi(buf + 22);`
			`- continue;`
			`+ dict->crhmin = atoi(buf + 22);`
			`+ return;`
			`} else if (strncmp(buf, "NOHYPHEN", 8) == 0) {`
			`char * space = buf + 8;`
			`while (space != '\0' && (space == ' ' \|\| *space == '\t')) space++;`
			`- if (*buf != '\0') dict[k]->nohyphen = hnj_strdup(space);`
			`- if (dict[k]->nohyphen) {`
			`- char * nhe = dict[k]->nohyphen + strlen(dict[k]->nohyphen) - 1;`
			`+ if (*buf != '\0') dict->nohyphen = hnj_strdup(space);`
			`+ if (dict->nohyphen) {`
			`+ char * nhe = dict->nohyphen + strlen(dict->nohyphen) - 1;`
			`*nhe = 0;`
			`- for (nhe = nhe - 1; nhe > dict[k]->nohyphen; nhe--) {`
			`+ for (nhe = nhe - 1; nhe > dict->nohyphen; nhe--) {`
			`if (*nhe == ',') {`
			`- dict[k]->nohyphenl++;`
			`+ dict->nohyphenl++;`
			`*nhe = 0;`
			`}`
			`}`
			`}`
			`- continue;`
			`+ return;`
			`}`
			`j = 0;`
			`pattern[j] = '0';`
			`@@ -379,7 +325,7 @@`
			`} else {`
			`if (*word == '.') i++;`
			`/* convert UTF-8 char. positions of discretionary hyph. replacements to 8-bit */`
			`- if (dict[k]->utf8) {`
			`+ if (dict->utf8) {`
			`int pu = -1; /* unicode character position */`
			`int ps = -1; /* unicode start position (original replindex) */`
			`int pc = (word == '.') ? 1: 0; / 8-bit character position */`
			`@@ -403,14 +349,14 @@`
			`printf ("word %s pattern %s, j = %d repl: %s\n", word, pattern + i, j, repl);`
			`#endif`
			`found = hnj_hash_lookup (hashtab, word);`
			`- state_num = hnj_get_state (dict[k], hashtab, word);`
			`- dict[k]->states[state_num].match = hnj_strdup (pattern + i);`
			`- dict[k]->states[state_num].repl = repl;`
			`- dict[k]->states[state_num].replindex = replindex;`
			`+ state_num = hnj_get_state (dict, hashtab, word);`
			`+ dict->states[state_num].match = hnj_strdup (pattern + i);`
			`+ dict->states[state_num].repl = repl;`
			`+ dict->states[state_num].replindex = replindex;`
			`if (!replcut) {`
			`- dict[k]->states[state_num].replcut = (signed char) strlen(word);`
			`+ dict->states[state_num].replcut = (signed char) strlen(word);`
			`} else {`
			`- dict[k]->states[state_num].replcut = replcut;`
			`+ dict->states[state_num].replcut = replcut;`
			`}`

			`/* now, put in the prefix transitions */`
			`@@ -420,11 +366,82 @@`
			`ch = word[j - 1];`
			`word[j - 1] = '\0';`
			`found = hnj_hash_lookup (hashtab, word);`
			`- state_num = hnj_get_state (dict[k], hashtab, word);`
			`- hnj_add_trans (dict[k], state_num, last_state, ch);`
			`+ state_num = hnj_get_state (dict, hashtab, word);`
			`+ hnj_add_trans (dict, state_num, last_state, ch);`
			`}`
			`- }`
			`+}`
			`+`
			`+HyphenDict *`
			`+hnj_hyphen_load (const char *fn)`
			`+{`
			`+ HyphenDict *dict[2];`
			`+ HashTab *hashtab;`
			`+ FILE *f;`
			`+ char buf[MAX_CHARS];`
			`+ int nextlevel = 0;`
			`+ int i, j, k;`
			`+ HashEntry *e;`
			`+ int state_num = 0;`
			`+`
			`+ f = fopen (fn, "r");`
			`+ if (f == NULL)`
			`+ return NULL;`
			`+`
			`+// loading one or two dictionaries (separated by NEXTLEVEL keyword)`
			`+for (k = 0; k < 2; k++) {`
			`+ hashtab = hnj_hash_new ();`
			`+#ifdef VERBOSE`
			`+ global[k] = hashtab;`
			`+#endif`
			`+ hnj_hash_insert (hashtab, "", 0);`
			`+ dict[k] = hnj_malloc (sizeof(HyphenDict));`
			`+ dict[k]->num_states = 1;`
			`+ dict[k]->states = hnj_malloc (sizeof(HyphenState));`
			`+ dict[k]->states[0].match = NULL;`
			`+ dict[k]->states[0].repl = NULL;`
			`+ dict[k]->states[0].fallback_state = -1;`
			`+ dict[k]->states[0].num_trans = 0;`
			`+ dict[k]->states[0].trans = NULL;`
			`+ dict[k]->nextlevel = NULL;`
			`+ dict[k]->lhmin = 0;`
			`+ dict[k]->rhmin = 0;`
			`+ dict[k]->clhmin = 0;`
			`+ dict[k]->crhmin = 0;`
			`+ dict[k]->nohyphen = NULL;`
			`+ dict[k]->nohyphenl = 0;`
			`+`
			`+ /* read in character set info */`
			`+ if (k == 0) {`
			`+ for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0;`
			`+ fgets(dict[k]->cset, sizeof(dict[k]->cset),f);`
			`+ for (i=0;i<MAX_NAME;i++)`
			`+ if ((dict[k]->cset[i] == '\r') \|\| (dict[k]->cset[i] == '\n'))`
			`+ dict[k]->cset[i] = 0;`
			`+ dict[k]->utf8 = (strcmp(dict[k]->cset, "UTF-8") == 0);`
			`+ } else {`
			`+ strcpy(dict[k]->cset, dict[0]->cset);`
			`+ dict[k]->utf8 = dict[0]->utf8;`
			`+ }`
			`+`
			`+ if (k == 0 \|\| nextlevel) {`
			`+ while (fgets (buf, sizeof(buf), f) != NULL) {`
			`+ if (strncmp(buf, "NEXTLEVEL", 9) == 0) {`
			`+ nextlevel = 1;`
			`+ break;`
			`+ } else if (buf[0] != '%') hnj_hyphen_load_line(buf, dict[k], hashtab);`
			`+ }`
			`+ } else if (k == 1) {`
			`+ /* default first level: hyphen and ASCII apostrophe */`
			`+ if (!dict[0]->utf8) hnj_hyphen_load_line("NOHYPHEN '\n", dict[k], hashtab);`
			`+ else hnj_hyphen_load_line("NOHYPHEN ',\xe2\x80\x93,\xe2\x80\x99\n", dict[k], hashtab);`
			`+ strcpy(buf, "1-1/=,1,1\n"); // buf rewritten by hnj_hyphen_load here`
			`+ hnj_hyphen_load_line(buf, dict[k], hashtab); /* remove hyphen */`
			`+ hnj_hyphen_load_line("1'1\n", dict[k], hashtab); /* ASCII apostrophe */`
			`+ if (dict[0]->utf8) {`
			`+ hnj_hyphen_load_line("1\xe2\x80\x93" "1\n", dict[k], hashtab); /* endash */`
			`+ hnj_hyphen_load_line("1\xe2\x80\x99" "1\n", dict[k], hashtab); /* apostrophe */`
			`}`
			`+ }`

			`/* Could do unioning of matches here (instead of the preprocessor script).`
			`If we did, the pseudocode would look something like this:`
			`@@ -476,7 +493,20 @@`
			`state_num = 0;`
			`}`
			`fclose(f);`
			`- if (k == 2) dict[0]->nextlevel = dict[1];`
			`+ if (nextlevel) dict[0]->nextlevel = dict[1];`
			`+ else {`
			`+ dict[1] -> nextlevel = dict[0];`
			`+ dict[1]->lhmin = dict[0]->lhmin;`
			`+ dict[1]->rhmin = dict[0]->rhmin;`
			`+ dict[1]->clhmin = (dict[0]->clhmin) ? dict[0]->clhmin : ((dict[0]->lhmin) ? dict[0]->lhmin : 3);`
			`+ dict[1]->crhmin = (dict[0]->crhmin) ? dict[0]->crhmin : ((dict[0]->rhmin) ? dict[0]->rhmin : 3);`
			`+#ifdef VERBOSE`
			`+ HashTab *r = global[0];`
			`+ global[0] = global[1];`
			`+ global[1] = r;`
			`+#endif`
			`+ return dict[1];`
			`+ }`
			`return dict[0];`
			`}`

			`@@ -527,8 +557,13 @@`
			`j = 0;`
			`prep_word[j++] = '.';`

			`- for (i = 0; i < word_size; i++)`
			`+ for (i = 0; i < word_size; i++) {`
			`+ if (word[i] <= '9' && word[i] >= '0') {`
			`+ prep_word[j++] = '.';`
			`+ } else {`
			`prep_word[j++] = word[i];`
			`+ }`
			`+ }`

			`prep_word[j++] = '.';`
			`prep_word[j] = '\0';`
			`@@ -557,7 +592,7 @@`

			`#ifdef VERBOSE`
			`char *state_str;`
			`- state_str = get_state_str (state);`
			`+ state_str = get_state_str (state, 0);`

			`for (k = 0; k < i - strlen (state_str); k++)`
			`putchar (' ');`
			`@@ -670,6 +705,9 @@`
			`i += hnj_ligature(word[2]);`
			`}`

			`+ // ignore numbers`
			`+ for (j = 0; word[j] <= '9' && word[j] >= '0'; j++) i--;`
			`+`
			`for (j = 0; i < lhmin && word[j] != '\0'; i++) do {`
			`// check length of the non-standard part`
			`if (rep && pos && cut && (rep)[j]) {`
			`@@ -696,9 +734,13 @@`
			`int hnj_hyphen_rhmin(int utf8, const char word, int word_size, char hyphens,`
			`char * rep, int pos, int ** cut, int rhmin)`
			`{`
			`- int i;`
			`- int j = word_size - 2;`
			`- for (i = 1; i < rhmin && j > 0; j--) {`
			`+ int i = 1;`
			`+ int j;`
			`+`
			`+ // ignore numbers`
			`+ for (j = word_size - 1; j > 0 && word[j] <= '9' && word[j] >= '0'; j--) i--;`
			`+`
			`+ for (j = word_size - 2; i < rhmin && j > 0; j--) {`
			`// check length of the non-standard part`
			`if (rep && pos && cut && (rep)[j]) {`
			`char * rh = strchr((*rep)[j], '=');`
			`@@ -756,8 +798,15 @@`
			`j = 0;`
			`prep_word[j++] = '.';`

			`- for (i = 0; i < word_size; i++)`
			`+ for (i = 0; i < word_size; i++) {`
			`+ if (word[i] <= '9' && word[i] >= '0') {`
			`+ prep_word[j++] = '.';`
			`+ } else {`
			`prep_word[j++] = word[i];`
			`+ }`
			`+ }`
			`+`
			`+`

			`prep_word[j++] = '.';`
			`prep_word[j] = '\0';`
			`@@ -786,7 +835,7 @@`

			`#ifdef VERBOSE`
			`char *state_str;`
			`- state_str = get_state_str (state);`
			`+ state_str = get_state_str (state, 1);`

			`for (k = 0; k < i - strlen (state_str); k++)`
			`putchar (' ');`
			`@@ -1033,6 +1082,9 @@`
			`}`
			`}`
			`hyphens[j + 1] = '\0';`
			`+#ifdef VERBOSE`
			`+ printf ("nums: %s\n", hyphens);`
			`+#endif`
			`return 0;`
			`}`

			`@@ -1074,8 +1126,8 @@`
			`for (nhi = 0; nhi <= dict->nohyphenl; nhi++) {`
			`char * nhy = (char *) strstr(word, nh);`
			`while (nhy) {`
			`- hyphens[nhy - word + strlen(nh) - 1] = 0;`
			`- if (nhy - word - 1 >= 0) hyphens[nhy - word - 1] = 0;`
			`+ hyphens[nhy - word + strlen(nh) - 1] = '0';`
			`+ if (nhy - word - 1 >= 0) hyphens[nhy - word - 1] = '0';`
			`nhy = (char *) strstr(nhy + 1, nh);`
			`}`
			`nh = nh + strlen(nh) + 1;`
			`@@ -1084,6 +1136,9 @@`

			`if (hyphword) hnj_hyphen_hyphword(word, word_size, hyphens, hyphword, rep, pos, cut);`
			`if (dict->utf8) return hnj_hyphen_norm(word, word_size, hyphens, rep, pos, cut);`
			`+#ifdef VERBOSE`
			`+ printf ("nums: %s\n", hyphens);`
			`+#endif`
			`return 0;`
			`}`

			`@@ -1093,8 +1148,10 @@`
			`char hyphword, char rep, int pos, int ** cut,`
			`int lhmin, int rhmin, int clhmin, int crhmin)`
			`{`
			`- lhmin = (lhmin > 0 ? lhmin : dict->lhmin);`
			`- rhmin = (rhmin > 0 ? rhmin : dict->rhmin);`
			`+ lhmin = (lhmin > dict->lhmin) ? lhmin : dict->lhmin;`
			`+ rhmin = (rhmin > dict->rhmin) ? rhmin : dict->rhmin;`
			`+ clhmin = (clhmin > dict->clhmin) ? clhmin : dict->clhmin;`
			`+ crhmin = (crhmin > dict->crhmin) ? crhmin : dict->crhmin;`
			`hnj_hyphen_hyph_(dict, word, word_size, hyphens, rep, pos, cut,`
			`clhmin, crhmin, 1, 1);`
			`hnj_hyphen_lhmin(dict->utf8, word, word_size, hyphens,`