fdo#72219: Fix for corruption of symbols in docx
Issue: OUString uses UTF-16, so for a Unicode surrogate character there are 2 values stored, not just 1. So we are getting assert failure in "rtl_uString_iterateCodePoints" method. erAck: Underlying cause was that the dictionary breakiterator misused UTF-16 positions as Unicode code point positions. Change-Id: I923485f56c2d879b63687adaea2b489a3479991c Reviewed-on: https://gerrit.libreoffice.org/6955 Reviewed-by: Eike Rathke <erack@redhat.com> Tested-by: Eike Rathke <erack@redhat.com>
This commit is contained in:
parent
45b72633d1
commit
d8fd158759
2 changed files with 22 additions and 3 deletions
|
@ -45,7 +45,7 @@ public:
|
|||
#endif
|
||||
void testKhmer();
|
||||
void testJapanese();
|
||||
|
||||
void testChinese();
|
||||
CPPUNIT_TEST_SUITE(TestBreakIterator);
|
||||
CPPUNIT_TEST(testLineBreaking);
|
||||
CPPUNIT_TEST(testGraphemeIteration);
|
||||
|
@ -64,6 +64,7 @@ public:
|
|||
CPPUNIT_TEST(testLao);
|
||||
#endif
|
||||
CPPUNIT_TEST(testJapanese);
|
||||
CPPUNIT_TEST(testChinese);
|
||||
CPPUNIT_TEST_SUITE_END();
|
||||
private:
|
||||
uno::Reference<i18n::XBreakIterator> m_xBreak;
|
||||
|
@ -938,6 +939,22 @@ void TestBreakIterator::testJapanese()
|
|||
}
|
||||
}
|
||||
|
||||
void TestBreakIterator::testChinese()
|
||||
{
|
||||
lang::Locale aLocale;
|
||||
aLocale.Language = "zh";
|
||||
aLocale.Country = "CN";
|
||||
i18n::Boundary aBounds;
|
||||
|
||||
{
|
||||
const sal_Unicode CHINESE[] = { 0x6A35, 0x6A30, 0x69FE, 0x8919, 0xD867, 0xDEDB };
|
||||
|
||||
OUString aTest(CHINESE, SAL_N_ELEMENTS(CHINESE));
|
||||
aBounds = m_xBreak->getWordBoundary(aTest, 4, aLocale,
|
||||
i18n::WordType::DICTIONARY_WORD, true);
|
||||
CPPUNIT_ASSERT(aBounds.startPos == 4 && aBounds.endPos == 6);
|
||||
}
|
||||
}
|
||||
void TestBreakIterator::setUp()
|
||||
{
|
||||
BootstrapFixtureBase::setUp();
|
||||
|
|
|
@ -387,9 +387,11 @@ Boundary xdictionary::getWordBoundary(const OUString& rText, sal_Int32 anyPos, s
|
|||
if (u_isWhitespace(ch))
|
||||
i--;
|
||||
}
|
||||
|
||||
boundary.endPos = boundary.startPos;
|
||||
rText.iterateCodePoints(&boundary.endPos, aCache.wordboundary[i]);
|
||||
rText.iterateCodePoints(&boundary.startPos, aCache.wordboundary[i-1]);
|
||||
boundary.endPos += aCache.wordboundary[i];
|
||||
boundary.startPos += aCache.wordboundary[i-1];
|
||||
|
||||
} else {
|
||||
boundary.startPos = anyPos;
|
||||
if (anyPos < len) rText.iterateCodePoints(&anyPos, 1);
|
||||
|
|
Loading…
Reference in a new issue