tdf#95024 fix compound word handling for new Hungarian orthography
This commit contains the recent Hunspell fix for Hungarian compound word handling (commit 42807f970ac2d65f0d13a7c57eb454b210e92240 in Hunspell git repository), changing spell checking only in Hungarian documents. Change-Id: I1c6c3736ecf8c1e2fffcf1c53959b25dc9d27966
This commit is contained in:
parent
dfe1e25b4a
commit
2511a21841
2 changed files with 109 additions and 0 deletions
|
@ -21,6 +21,7 @@ $(eval $(call gb_UnpackedTarball_add_patches,hunspell,\
|
|||
external/hunspell/hunspell-morph-overflow.patch \
|
||||
external/hunspell/ubsan.patch.0 \
|
||||
external/hunspell/hunspell-1.3.3-rhbz1261421.patch \
|
||||
external/hunspell/hunspell-tdf95024-compound.patch \
|
||||
))
|
||||
|
||||
ifeq ($(COM),MSC)
|
||||
|
|
108
external/hunspell/hunspell-tdf95024-compound.patch
vendored
Normal file
108
external/hunspell/hunspell-tdf95024-compound.patch
vendored
Normal file
|
@ -0,0 +1,108 @@
|
|||
From 42807f970ac2d65f0d13a7c57eb454b210e92240 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?L=C3=A1szl=C3=B3=20N=C3=A9meth?=
|
||||
<laszlo.nemeth@collabora.com>
|
||||
Date: Mon, 12 Oct 2015 08:43:12 +0200
|
||||
Subject: [PATCH] fix compound handling for new Hungarian orthography
|
||||
|
||||
The frequent cases of this compound limitation are handled by
|
||||
the extended dictionary, but not these ones with both derivative
|
||||
and inflectional suffixes.
|
||||
---
|
||||
src/hunspell/affixmgr.cxx | 19 +++++++++++++++----
|
||||
src/hunspell/affixmgr.hxx | 1 +
|
||||
2 files changed, 16 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/src/hunspell/affixmgr.cxx b/src/hunspell/affixmgr.cxx
|
||||
index 0992e6e..0950425 100644
|
||||
--- misc/hunspell-1.3.3/src/hunspell/affixmgr.cxx
|
||||
+++ misc/build/hunspell-1.3.3/src/hunspell/affixmgr.cxx
|
||||
@@ -139,8 +139,9 @@ AffixMgr::AffixMgr(const char * affpath, HashMgr** ptr, int * md, const char * k
|
||||
cpdvowels=NULL; // vowels (for calculating of Hungarian compounding limit, O(n) search! XXX)
|
||||
cpdvowels_utf16=NULL; // vowels for UTF-8 encoding (bsearch instead of O(n) search)
|
||||
cpdvowels_utf16_len=0; // vowels
|
||||
- pfxappnd=NULL; // previous prefix for counting the syllables of prefix BUG
|
||||
- sfxappnd=NULL; // previous suffix for counting a special syllables BUG
|
||||
+ pfxappnd=NULL; // previous prefix for counting syllables of the prefix BUG
|
||||
+ sfxappnd=NULL; // previous suffix for counting syllables of the suffix BUG
|
||||
+ sfxextra=0; // modifier for syllable count of sfxappnd BUG
|
||||
cpdsyllablenum=NULL; // syllable count incrementing flag
|
||||
checknum=0; // checking numbers, and word with numbers
|
||||
wordchars=NULL; // letters + spec. word characters
|
||||
@@ -1201,6 +1202,7 @@ struct hentry * AffixMgr::prefix_check(const char * word, int len, char in_compo
|
||||
pfx = NULL;
|
||||
pfxappnd = NULL;
|
||||
sfxappnd = NULL;
|
||||
+ sfxextra = 0;
|
||||
|
||||
// first handle the special case of 0 length prefixes
|
||||
PfxEntry * pe = pStart[0];
|
||||
@@ -1261,6 +1263,7 @@ struct hentry * AffixMgr::prefix_check_twosfx(const char * word, int len,
|
||||
|
||||
pfx = NULL;
|
||||
sfxappnd = NULL;
|
||||
+ sfxextra = 0;
|
||||
|
||||
// first handle the special case of 0 length prefixes
|
||||
PfxEntry * pe = pStart[0];
|
||||
@@ -1302,6 +1305,7 @@ char * AffixMgr::prefix_check_morph(const char * word, int len, char in_compound
|
||||
|
||||
pfx = NULL;
|
||||
sfxappnd = NULL;
|
||||
+ sfxextra = 0;
|
||||
|
||||
// first handle the special case of 0 length prefixes
|
||||
PfxEntry * pe = pStart[0];
|
||||
@@ -1353,6 +1357,7 @@ char * AffixMgr::prefix_check_twosfx_morph(const char * word, int len,
|
||||
|
||||
pfx = NULL;
|
||||
sfxappnd = NULL;
|
||||
+ sfxextra = 0;
|
||||
|
||||
// first handle the special case of 0 length prefixes
|
||||
PfxEntry * pe = pStart[0];
|
||||
@@ -1993,7 +1998,7 @@ struct hentry * AffixMgr::compound_check(const char * word, int len,
|
||||
// XXX only second suffix (inflections, not derivations)
|
||||
if (sfxappnd) {
|
||||
char * tmp = myrevstrdup(sfxappnd);
|
||||
- numsyllable -= get_syllable(tmp, strlen(tmp));
|
||||
+ numsyllable -= get_syllable(tmp, strlen(tmp)) + sfxextra;
|
||||
free(tmp);
|
||||
}
|
||||
|
||||
@@ -2512,7 +2517,7 @@ int AffixMgr::compound_check_morph(const char * word, int len,
|
||||
// XXX only second suffix (inflections, not derivations)
|
||||
if (sfxappnd) {
|
||||
char * tmp = myrevstrdup(sfxappnd);
|
||||
- numsyllable -= get_syllable(tmp, strlen(tmp));
|
||||
+ numsyllable -= get_syllable(tmp, strlen(tmp)) + sfxextra;
|
||||
free(tmp);
|
||||
}
|
||||
|
||||
@@ -2696,6 +2701,12 @@ struct hentry * AffixMgr::suffix_check (const char * word, int len,
|
||||
sfx=sptr; // BUG: sfx not stateless
|
||||
sfxflag = sptr->getFlag(); // BUG: sfxflag not stateless
|
||||
if (!sptr->getCont()) sfxappnd=sptr->getKey(); // BUG: sfxappnd not stateless
|
||||
+// LANG_hu section: spec. Hungarian rule
|
||||
+ else if (langnum == LANG_hu && sptr->getKeyLen() && sptr->getKey()[0] == 'i' &&
|
||||
+ sptr->getKey()[1] != 'y' && sptr->getKey()[1] != 't') {
|
||||
+ sfxextra = 1;
|
||||
+ }
|
||||
+// END of LANG_hu section
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
diff --git a/src/hunspell/affixmgr.hxx b/src/hunspell/affixmgr.hxx
|
||||
index 2679b7f..8839814 100644
|
||||
--- misc/hunspell-1.3.3/src/hunspell/affixmgr.hxx
|
||||
+++ misc/build/hunspell-1.3.3/src/hunspell/affixmgr.hxx
|
||||
@@ -155,6 +155,7 @@ class LIBHUNSPELL_DLL_EXPORTED AffixMgr
|
||||
char * cpdsyllablenum;
|
||||
const char * pfxappnd; // BUG: not stateless
|
||||
const char * sfxappnd; // BUG: not stateless
|
||||
+ int sfxextra; // BUG: not stateless
|
||||
FLAG sfxflag; // BUG: not stateless
|
||||
char * derived; // BUG: not stateless
|
||||
SfxEntry * sfx; // BUG: not stateless
|
||||
--
|
||||
1.9.1
|
||||
|
Loading…
Reference in a new issue