From 2511a21841dd9dec735a53add8174e47d24deb88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A1szl=C3=B3=20N=C3=A9meth?= Date: Wed, 14 Oct 2015 01:17:26 +0200 Subject: [PATCH] tdf#95024 fix compound word handling for new Hungarian orthography This commit contains the recent Hunspell fix for Hungarian compound word handling (commit 42807f970ac2d65f0d13a7c57eb454b210e92240 in Hunspell git repository), changing spell checking only in Hungarian documents. Change-Id: I1c6c3736ecf8c1e2fffcf1c53959b25dc9d27966 --- external/hunspell/UnpackedTarball_hunspell.mk | 1 + .../hunspell/hunspell-tdf95024-compound.patch | 108 ++++++++++++++++++ 2 files changed, 109 insertions(+) create mode 100644 external/hunspell/hunspell-tdf95024-compound.patch diff --git a/external/hunspell/UnpackedTarball_hunspell.mk b/external/hunspell/UnpackedTarball_hunspell.mk index 6ad31dd685d4..8f14062e47e1 100644 --- a/external/hunspell/UnpackedTarball_hunspell.mk +++ b/external/hunspell/UnpackedTarball_hunspell.mk @@ -21,6 +21,7 @@ $(eval $(call gb_UnpackedTarball_add_patches,hunspell,\ external/hunspell/hunspell-morph-overflow.patch \ external/hunspell/ubsan.patch.0 \ external/hunspell/hunspell-1.3.3-rhbz1261421.patch \ + external/hunspell/hunspell-tdf95024-compound.patch \ )) ifeq ($(COM),MSC) diff --git a/external/hunspell/hunspell-tdf95024-compound.patch b/external/hunspell/hunspell-tdf95024-compound.patch new file mode 100644 index 000000000000..133cf4ace537 --- /dev/null +++ b/external/hunspell/hunspell-tdf95024-compound.patch @@ -0,0 +1,108 @@ +From 42807f970ac2d65f0d13a7c57eb454b210e92240 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?L=C3=A1szl=C3=B3=20N=C3=A9meth?= + +Date: Mon, 12 Oct 2015 08:43:12 +0200 +Subject: [PATCH] fix compound handling for new Hungarian orthography + +The frequent cases of this compound limitation are handled by +the extended dictionary, but not these ones with both derivative +and inflectional suffixes. +--- + src/hunspell/affixmgr.cxx | 19 +++++++++++++++---- + src/hunspell/affixmgr.hxx | 1 + + 2 files changed, 16 insertions(+), 4 deletions(-) + +diff --git a/src/hunspell/affixmgr.cxx b/src/hunspell/affixmgr.cxx +index 0992e6e..0950425 100644 +--- misc/hunspell-1.3.3/src/hunspell/affixmgr.cxx ++++ misc/build/hunspell-1.3.3/src/hunspell/affixmgr.cxx +@@ -139,8 +139,9 @@ AffixMgr::AffixMgr(const char * affpath, HashMgr** ptr, int * md, const char * k + cpdvowels=NULL; // vowels (for calculating of Hungarian compounding limit, O(n) search! XXX) + cpdvowels_utf16=NULL; // vowels for UTF-8 encoding (bsearch instead of O(n) search) + cpdvowels_utf16_len=0; // vowels +- pfxappnd=NULL; // previous prefix for counting the syllables of prefix BUG +- sfxappnd=NULL; // previous suffix for counting a special syllables BUG ++ pfxappnd=NULL; // previous prefix for counting syllables of the prefix BUG ++ sfxappnd=NULL; // previous suffix for counting syllables of the suffix BUG ++ sfxextra=0; // modifier for syllable count of sfxappnd BUG + cpdsyllablenum=NULL; // syllable count incrementing flag + checknum=0; // checking numbers, and word with numbers + wordchars=NULL; // letters + spec. word characters +@@ -1201,6 +1202,7 @@ struct hentry * AffixMgr::prefix_check(const char * word, int len, char in_compo + pfx = NULL; + pfxappnd = NULL; + sfxappnd = NULL; ++ sfxextra = 0; + + // first handle the special case of 0 length prefixes + PfxEntry * pe = pStart[0]; +@@ -1261,6 +1263,7 @@ struct hentry * AffixMgr::prefix_check_twosfx(const char * word, int len, + + pfx = NULL; + sfxappnd = NULL; ++ sfxextra = 0; + + // first handle the special case of 0 length prefixes + PfxEntry * pe = pStart[0]; +@@ -1302,6 +1305,7 @@ char * AffixMgr::prefix_check_morph(const char * word, int len, char in_compound + + pfx = NULL; + sfxappnd = NULL; ++ sfxextra = 0; + + // first handle the special case of 0 length prefixes + PfxEntry * pe = pStart[0]; +@@ -1353,6 +1357,7 @@ char * AffixMgr::prefix_check_twosfx_morph(const char * word, int len, + + pfx = NULL; + sfxappnd = NULL; ++ sfxextra = 0; + + // first handle the special case of 0 length prefixes + PfxEntry * pe = pStart[0]; +@@ -1993,7 +1998,7 @@ struct hentry * AffixMgr::compound_check(const char * word, int len, + // XXX only second suffix (inflections, not derivations) + if (sfxappnd) { + char * tmp = myrevstrdup(sfxappnd); +- numsyllable -= get_syllable(tmp, strlen(tmp)); ++ numsyllable -= get_syllable(tmp, strlen(tmp)) + sfxextra; + free(tmp); + } + +@@ -2512,7 +2517,7 @@ int AffixMgr::compound_check_morph(const char * word, int len, + // XXX only second suffix (inflections, not derivations) + if (sfxappnd) { + char * tmp = myrevstrdup(sfxappnd); +- numsyllable -= get_syllable(tmp, strlen(tmp)); ++ numsyllable -= get_syllable(tmp, strlen(tmp)) + sfxextra; + free(tmp); + } + +@@ -2696,6 +2701,12 @@ struct hentry * AffixMgr::suffix_check (const char * word, int len, + sfx=sptr; // BUG: sfx not stateless + sfxflag = sptr->getFlag(); // BUG: sfxflag not stateless + if (!sptr->getCont()) sfxappnd=sptr->getKey(); // BUG: sfxappnd not stateless ++// LANG_hu section: spec. Hungarian rule ++ else if (langnum == LANG_hu && sptr->getKeyLen() && sptr->getKey()[0] == 'i' && ++ sptr->getKey()[1] != 'y' && sptr->getKey()[1] != 't') { ++ sfxextra = 1; ++ } ++// END of LANG_hu section + return rv; + } + } +diff --git a/src/hunspell/affixmgr.hxx b/src/hunspell/affixmgr.hxx +index 2679b7f..8839814 100644 +--- misc/hunspell-1.3.3/src/hunspell/affixmgr.hxx ++++ misc/build/hunspell-1.3.3/src/hunspell/affixmgr.hxx +@@ -155,6 +155,7 @@ class LIBHUNSPELL_DLL_EXPORTED AffixMgr + char * cpdsyllablenum; + const char * pfxappnd; // BUG: not stateless + const char * sfxappnd; // BUG: not stateless ++ int sfxextra; // BUG: not stateless + FLAG sfxflag; // BUG: not stateless + char * derived; // BUG: not stateless + SfxEntry * sfx; // BUG: not stateless +-- +1.9.1 +