diff --git a/i18npool/qa/cppunit/test_breakiterator.cxx b/i18npool/qa/cppunit/test_breakiterator.cxx index 98a0bca96a77..552274864035 100644 --- a/i18npool/qa/cppunit/test_breakiterator.cxx +++ b/i18npool/qa/cppunit/test_breakiterator.cxx @@ -158,6 +158,22 @@ void TestBreakIterator::testLineBreaking() (void)m_xBreak->getLineBreak(aTest, 0, aLocale, 0, aHyphOptions, aUserOptions); } } + + //See https://bugs.documentfoundation.org/show_bug.cgi?id=96197 + { + const sal_Unicode HANGUL[] = { 0xc560, 0xad6D, 0xac00, 0xc758, 0x0020, 0xac00, + 0xc0ac, 0xb294}; + OUString aTest(HANGUL, SAL_N_ELEMENTS(HANGUL)); + + aLocale.Language = "ko"; + aLocale.Country = "KR"; + + { + i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest, aTest.getLength()-2, aLocale, 0, + aHyphOptions, aUserOptions); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Expected a break don't split the Korean word!", static_cast(5), aResult.breakIndex); + } + } } //See https://bugs.libreoffice.org/show_bug.cgi?id=49629 diff --git a/i18npool/source/breakiterator/breakiterator_cjk.cxx b/i18npool/source/breakiterator/breakiterator_cjk.cxx index 98115e6c1d32..8a4244631759 100644 --- a/i18npool/source/breakiterator/breakiterator_cjk.cxx +++ b/i18npool/source/breakiterator/breakiterator_cjk.cxx @@ -86,6 +86,8 @@ BreakIterator_CJK::getWordBoundary( const OUString& text, sal_Int32 anyPos, return BreakIterator_Unicode::getWordBoundary(text, anyPos, nLocale, wordType, bDirection); } +#define isHangul(cCh) ((cCh>=0xAC00&&cCh<=0xD7AF)||(cCh>=0x1100&&cCh<=0x11FF)) + LineBreakResults SAL_CALL BreakIterator_CJK::getLineBreak( const OUString& Text, sal_Int32 nStartPos, const css::lang::Locale& /*rLocale*/, sal_Int32 /*nMinBreakPos*/, @@ -94,17 +96,34 @@ LineBreakResults SAL_CALL BreakIterator_CJK::getLineBreak( { LineBreakResults lbr; + const sal_Int32 nOldStartPos = nStartPos; + if (bOptions.allowPunctuationOutsideMargin && hangingCharacters.indexOf(Text[nStartPos]) != -1 && (Text.iterateCodePoints( &nStartPos ), nStartPos == Text.getLength())) { ; // do nothing } else if (bOptions.applyForbiddenRules && 0 < nStartPos && nStartPos < Text.getLength()) { + while (nStartPos > 0 && (bOptions.forbiddenBeginCharacters.indexOf(Text[nStartPos]) != -1 || bOptions.forbiddenEndCharacters.indexOf(Text[nStartPos-1]) != -1)) Text.iterateCodePoints( &nStartPos, -1); } + // Prevent cutting Korean words in the middle. + if ( nOldStartPos == nStartPos && isHangul( Text[nStartPos] ) ) + { + while ( nStartPos >= 0 && isHangul( Text[nStartPos] ) ) + --nStartPos; + + // beginning of the last Korean word. + if ( nStartPos < nOldStartPos ) + ++nStartPos; + + if ( nStartPos == 0 ) + nStartPos = nOldStartPos; + } + lbr.breakIndex = nStartPos; lbr.breakType = BreakType::WORDBOUNDARY; return lbr;