tdf#95024 fix compound word handling for new Hungarian orthography

This commit contains the recent Hunspell fix for Hungarian compound
word handling (commit 42807f970ac2d65f0d13a7c57eb454b210e92240
in Hunspell git repository), changing spell checking only
in Hungarian documents.

Change-Id: I1c6c3736ecf8c1e2fffcf1c53959b25dc9d27966
This commit is contained in:
László Németh 2015-10-14 01:17:26 +02:00
parent dfe1e25b4a
commit 2511a21841
2 changed files with 109 additions and 0 deletions

View file

@ -21,6 +21,7 @@ $(eval $(call gb_UnpackedTarball_add_patches,hunspell,\
external/hunspell/hunspell-morph-overflow.patch \
external/hunspell/ubsan.patch.0 \
external/hunspell/hunspell-1.3.3-rhbz1261421.patch \
external/hunspell/hunspell-tdf95024-compound.patch \
))
ifeq ($(COM),MSC)

View file

@ -0,0 +1,108 @@
From 42807f970ac2d65f0d13a7c57eb454b210e92240 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C3=A1szl=C3=B3=20N=C3=A9meth?=
<laszlo.nemeth@collabora.com>
Date: Mon, 12 Oct 2015 08:43:12 +0200
Subject: [PATCH] fix compound handling for new Hungarian orthography
The frequent cases of this compound limitation are handled by
the extended dictionary, but not these ones with both derivative
and inflectional suffixes.
---
src/hunspell/affixmgr.cxx | 19 +++++++++++++++----
src/hunspell/affixmgr.hxx | 1 +
2 files changed, 16 insertions(+), 4 deletions(-)
diff --git a/src/hunspell/affixmgr.cxx b/src/hunspell/affixmgr.cxx
index 0992e6e..0950425 100644
--- misc/hunspell-1.3.3/src/hunspell/affixmgr.cxx
+++ misc/build/hunspell-1.3.3/src/hunspell/affixmgr.cxx
@@ -139,8 +139,9 @@ AffixMgr::AffixMgr(const char * affpath, HashMgr** ptr, int * md, const char * k
cpdvowels=NULL; // vowels (for calculating of Hungarian compounding limit, O(n) search! XXX)
cpdvowels_utf16=NULL; // vowels for UTF-8 encoding (bsearch instead of O(n) search)
cpdvowels_utf16_len=0; // vowels
- pfxappnd=NULL; // previous prefix for counting the syllables of prefix BUG
- sfxappnd=NULL; // previous suffix for counting a special syllables BUG
+ pfxappnd=NULL; // previous prefix for counting syllables of the prefix BUG
+ sfxappnd=NULL; // previous suffix for counting syllables of the suffix BUG
+ sfxextra=0; // modifier for syllable count of sfxappnd BUG
cpdsyllablenum=NULL; // syllable count incrementing flag
checknum=0; // checking numbers, and word with numbers
wordchars=NULL; // letters + spec. word characters
@@ -1201,6 +1202,7 @@ struct hentry * AffixMgr::prefix_check(const char * word, int len, char in_compo
pfx = NULL;
pfxappnd = NULL;
sfxappnd = NULL;
+ sfxextra = 0;
// first handle the special case of 0 length prefixes
PfxEntry * pe = pStart[0];
@@ -1261,6 +1263,7 @@ struct hentry * AffixMgr::prefix_check_twosfx(const char * word, int len,
pfx = NULL;
sfxappnd = NULL;
+ sfxextra = 0;
// first handle the special case of 0 length prefixes
PfxEntry * pe = pStart[0];
@@ -1302,6 +1305,7 @@ char * AffixMgr::prefix_check_morph(const char * word, int len, char in_compound
pfx = NULL;
sfxappnd = NULL;
+ sfxextra = 0;
// first handle the special case of 0 length prefixes
PfxEntry * pe = pStart[0];
@@ -1353,6 +1357,7 @@ char * AffixMgr::prefix_check_twosfx_morph(const char * word, int len,
pfx = NULL;
sfxappnd = NULL;
+ sfxextra = 0;
// first handle the special case of 0 length prefixes
PfxEntry * pe = pStart[0];
@@ -1993,7 +1998,7 @@ struct hentry * AffixMgr::compound_check(const char * word, int len,
// XXX only second suffix (inflections, not derivations)
if (sfxappnd) {
char * tmp = myrevstrdup(sfxappnd);
- numsyllable -= get_syllable(tmp, strlen(tmp));
+ numsyllable -= get_syllable(tmp, strlen(tmp)) + sfxextra;
free(tmp);
}
@@ -2512,7 +2517,7 @@ int AffixMgr::compound_check_morph(const char * word, int len,
// XXX only second suffix (inflections, not derivations)
if (sfxappnd) {
char * tmp = myrevstrdup(sfxappnd);
- numsyllable -= get_syllable(tmp, strlen(tmp));
+ numsyllable -= get_syllable(tmp, strlen(tmp)) + sfxextra;
free(tmp);
}
@@ -2696,6 +2701,12 @@ struct hentry * AffixMgr::suffix_check (const char * word, int len,
sfx=sptr; // BUG: sfx not stateless
sfxflag = sptr->getFlag(); // BUG: sfxflag not stateless
if (!sptr->getCont()) sfxappnd=sptr->getKey(); // BUG: sfxappnd not stateless
+// LANG_hu section: spec. Hungarian rule
+ else if (langnum == LANG_hu && sptr->getKeyLen() && sptr->getKey()[0] == 'i' &&
+ sptr->getKey()[1] != 'y' && sptr->getKey()[1] != 't') {
+ sfxextra = 1;
+ }
+// END of LANG_hu section
return rv;
}
}
diff --git a/src/hunspell/affixmgr.hxx b/src/hunspell/affixmgr.hxx
index 2679b7f..8839814 100644
--- misc/hunspell-1.3.3/src/hunspell/affixmgr.hxx
+++ misc/build/hunspell-1.3.3/src/hunspell/affixmgr.hxx
@@ -155,6 +155,7 @@ class LIBHUNSPELL_DLL_EXPORTED AffixMgr
char * cpdsyllablenum;
const char * pfxappnd; // BUG: not stateless
const char * sfxappnd; // BUG: not stateless
+ int sfxextra; // BUG: not stateless
FLAG sfxflag; // BUG: not stateless
char * derived; // BUG: not stateless
SfxEntry * sfx; // BUG: not stateless
--
1.9.1