From 64743ee6bc9567015f164333ed9b508542017337 Mon Sep 17 00:00:00 2001 From: Jonathan Clark Date: Tue, 16 Apr 2024 10:13:31 -0600 Subject: [PATCH] tdf#49885 Removed custom Thai BreakIterator Previously, a custom BreakIterator was used for Thai grapheme clusters. This change deletes the custom BreakIterator, in favor of the ICU implementation. Change-Id: Icec94c73a5734c2059786dfbba085f487c488d7c Reviewed-on: https://gerrit.libreoffice.org/c/core/+/166156 Tested-by: Jenkins Reviewed-by: Eike Rathke --- i18npool/Library_i18npool.mk | 1 - i18npool/inc/breakiterator_th.hxx | 53 ---- i18npool/qa/cppunit/test_breakiterator.cxx | 42 ++++ .../breakiterator/breakiteratorImpl.cxx | 4 - .../source/breakiterator/breakiterator_th.cxx | 232 ------------------ .../registerservices/registerservices.cxx | 2 - i18npool/util/i18npool.component | 4 - solenv/bin/native-code.py | 1 - solenv/clang-format/excludelist | 2 - 9 files changed, 42 insertions(+), 299 deletions(-) delete mode 100644 i18npool/inc/breakiterator_th.hxx delete mode 100644 i18npool/source/breakiterator/breakiterator_th.cxx diff --git a/i18npool/Library_i18npool.mk b/i18npool/Library_i18npool.mk index bd945d9329dc..3ee4518c8fd2 100644 --- a/i18npool/Library_i18npool.mk +++ b/i18npool/Library_i18npool.mk @@ -50,7 +50,6 @@ endif $(eval $(call gb_Library_add_exception_objects,i18npool,\ i18npool/source/breakiterator/breakiterator_cjk \ i18npool/source/breakiterator/breakiteratorImpl \ - i18npool/source/breakiterator/breakiterator_th \ i18npool/source/breakiterator/breakiterator_unicode \ i18npool/source/breakiterator/xdictionary \ i18npool/source/calendar/calendarImpl \ diff --git a/i18npool/inc/breakiterator_th.hxx b/i18npool/inc/breakiterator_th.hxx deleted file mode 100644 index cc6a717493ab..000000000000 --- a/i18npool/inc/breakiterator_th.hxx +++ /dev/null @@ -1,53 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/* - * This file is part of the LibreOffice project. - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - * - * This file incorporates work covered by the following license notice: - * - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed - * with this work for additional information regarding copyright - * ownership. The ASF licenses this file to you under the Apache - * License, Version 2.0 (the "License"); you may not use this file - * except in compliance with the License. You may obtain a copy of - * the License at http://www.apache.org/licenses/LICENSE-2.0 . - */ -#pragma once - -#include "breakiterator_unicode.hxx" - -namespace i18npool { - - - -class BreakIterator_th final : public BreakIterator_Unicode -{ -public: - BreakIterator_th(); - virtual ~BreakIterator_th() override; - virtual sal_Int32 SAL_CALL previousCharacters(const OUString& text, sal_Int32 start, - const css::lang::Locale& nLocale, sal_Int16 nCharacterIteratorMode, sal_Int32 count, - sal_Int32& nDone) override; - virtual sal_Int32 SAL_CALL nextCharacters(const OUString& text, sal_Int32 start, - const css::lang::Locale& rLocale, sal_Int16 nCharacterIteratorMode, sal_Int32 count, - sal_Int32& nDone) override; - virtual css::i18n::LineBreakResults SAL_CALL getLineBreak( const OUString& Text, sal_Int32 nStartPos, - const css::lang::Locale& nLocale, sal_Int32 nMinBreakPos, - const css::i18n::LineBreakHyphenationOptions& hOptions, - const css::i18n::LineBreakUserOptions& bOptions ) override; - -private: - OUString cachedText; // for cell index - std::vector m_aNextCellIndex; - std::vector m_aPreviousCellIndex; - - void makeIndex(const OUString& text, sal_Int32 pos); -}; - -} - -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/qa/cppunit/test_breakiterator.cxx b/i18npool/qa/cppunit/test_breakiterator.cxx index b33466bee46d..1e5fd8f025af 100644 --- a/i18npool/qa/cppunit/test_breakiterator.cxx +++ b/i18npool/qa/cppunit/test_breakiterator.cxx @@ -1120,6 +1120,48 @@ void TestBreakIterator::testGraphemeIteration() i18n::CharacterIteratorMode::SKIPCELL, 1, nDone); CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", static_cast(0), nPos); } + + // tdf#49885: Replace custom Thai implementation with ICU + { + aLocale.Language = "th"; + aLocale.Country = "TH"; + + static constexpr OUString aTest = u"กำ"_ustr; + + CPPUNIT_ASSERT_EQUAL(sal_Int32{ 2 }, aTest.getLength()); + + sal_Int32 nDone = 0; + sal_Int32 nPos = 0; + + nPos = m_xBreak->nextCharacters(aTest, 0, aLocale, i18n::CharacterIteratorMode::SKIPCELL, 1, + nDone); + CPPUNIT_ASSERT_EQUAL(aTest.getLength(), nPos); + + nPos = m_xBreak->previousCharacters(aTest, aTest.getLength(), aLocale, + i18n::CharacterIteratorMode::SKIPCELL, 1, nDone); + CPPUNIT_ASSERT_EQUAL(sal_Int32{ 0 }, nPos); + } + + // Korean may also use grapheme clusters for character composition + { + aLocale.Language = "ko"; + aLocale.Country = "KR"; + + static constexpr OUString aTest = u"각"_ustr; + + CPPUNIT_ASSERT_EQUAL(sal_Int32{ 3 }, aTest.getLength()); + + sal_Int32 nDone = 0; + sal_Int32 nPos = 0; + + nPos = m_xBreak->nextCharacters(aTest, 0, aLocale, i18n::CharacterIteratorMode::SKIPCELL, 1, + nDone); + CPPUNIT_ASSERT_EQUAL(aTest.getLength(), nPos); + + nPos = m_xBreak->previousCharacters(aTest, aTest.getLength(), aLocale, + i18n::CharacterIteratorMode::SKIPCELL, 1, nDone); + CPPUNIT_ASSERT_EQUAL(sal_Int32{ 0 }, nPos); + } } //A test to ensure that certain ranges and codepoints that are categorized as diff --git a/i18npool/source/breakiterator/breakiteratorImpl.cxx b/i18npool/source/breakiterator/breakiteratorImpl.cxx index 5a2304757603..51f191326e5f 100644 --- a/i18npool/source/breakiterator/breakiteratorImpl.cxx +++ b/i18npool/source/breakiterator/breakiteratorImpl.cxx @@ -592,10 +592,6 @@ bool BreakIteratorImpl::createLocaleSpecificBreakIterator(const OUString& aLocal if (aLocaleName == "ko") return false; #endif -#if !WITH_LOCALE_ALL && !WITH_LOCALE_th - if (aLocaleName == "th") - return false; -#endif Reference < uno::XInterface > xI = m_xContext->getServiceManager()->createInstanceWithContext( "com.sun.star.i18n.BreakIterator_" + aLocaleName, m_xContext); diff --git a/i18npool/source/breakiterator/breakiterator_th.cxx b/i18npool/source/breakiterator/breakiterator_th.cxx deleted file mode 100644 index 8d9245a4bf62..000000000000 --- a/i18npool/source/breakiterator/breakiterator_th.cxx +++ /dev/null @@ -1,232 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/* - * This file is part of the LibreOffice project. - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. - * - * This file incorporates work covered by the following license notice: - * - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed - * with this work for additional information regarding copyright - * ownership. The ASF licenses this file to you under the Apache - * License, Version 2.0 (the "License"); you may not use this file - * except in compliance with the License. You may obtain a copy of - * the License at http://www.apache.org/licenses/LICENSE-2.0 . - */ - - -#include -#include -#include -#include - -using namespace ::com::sun::star; -using namespace ::com::sun::star::i18n; -using namespace ::com::sun::star::lang; - -namespace i18npool { - -/** - * Constructor. - */ -BreakIterator_th::BreakIterator_th() -{ - cBreakIterator = u"com.sun.star.i18n.BreakIterator_th"_ustr; - // to improve performance, alloc big enough memory in construct. - m_aNextCellIndex.assign(512, 0); - m_aPreviousCellIndex.assign(512, 0); - lineRule=nullptr; -} - -/** - * Deconstructor. - */ -BreakIterator_th::~BreakIterator_th() -{ -} - -sal_Int32 SAL_CALL BreakIterator_th::previousCharacters( const OUString& Text, - sal_Int32 nStartPos, const lang::Locale& rLocale, - sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32& nDone ) -{ - if (nCharacterIteratorMode == CharacterIteratorMode::SKIPCELL ) { - nDone = 0; - if (nStartPos > 0) { // for others to skip cell. - makeIndex(Text, nStartPos); - - if (m_aNextCellIndex[nStartPos-1] == 0) // not a CTL character - return BreakIterator_Unicode::previousCharacters(Text, nStartPos, rLocale, - nCharacterIteratorMode, nCount, nDone); - else - { - while (nCount > 0 && m_aNextCellIndex[nStartPos - 1] > 0) - { - nCount--; nDone++; - nStartPos = m_aPreviousCellIndex[nStartPos - 1]; - } - } - } else - nStartPos = 0; - } else { // for BS to delete one char. - for (nDone = 0; nDone < nCount && nStartPos > 0; nDone++) - Text.iterateCodePoints(&nStartPos, -1); - } - - return nStartPos; -} - -sal_Int32 SAL_CALL BreakIterator_th::nextCharacters(const OUString& Text, - sal_Int32 nStartPos, const lang::Locale& rLocale, - sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32& nDone) -{ - sal_Int32 len = Text.getLength(); - if (nCharacterIteratorMode == CharacterIteratorMode::SKIPCELL ) { - nDone = 0; - if (nStartPos < len) { - makeIndex(Text, nStartPos); - - if (m_aNextCellIndex[nStartPos] == 0) // not a CTL character - return BreakIterator_Unicode::nextCharacters(Text, nStartPos, rLocale, - nCharacterIteratorMode, nCount, nDone); - else - { - while (nCount > 0 && m_aNextCellIndex[nStartPos] > 0) - { - nCount--; nDone++; - nStartPos = m_aNextCellIndex[nStartPos]; - } - } - } else - nStartPos = len; - } else { - for (nDone = 0; nDone < nCount && nStartPos < Text.getLength(); nDone++) - Text.iterateCodePoints(&nStartPos); - } - - return nStartPos; -} - -// Make sure line is broken on cell boundary if we implement cell iterator. -LineBreakResults SAL_CALL BreakIterator_th::getLineBreak( - const OUString& Text, sal_Int32 nStartPos, - const lang::Locale& rLocale, sal_Int32 nMinBreakPos, - const LineBreakHyphenationOptions& hOptions, - const LineBreakUserOptions& bOptions ) -{ - LineBreakResults lbr = BreakIterator_Unicode::getLineBreak(Text, nStartPos, - rLocale, nMinBreakPos, hOptions, bOptions ); - if (lbr.breakIndex < Text.getLength()) { - makeIndex(Text, lbr.breakIndex); - lbr.breakIndex = m_aPreviousCellIndex[ lbr.breakIndex ]; - } - return lbr; -} - -#define SARA_AM 0x0E33 - -/* - * cell composition states - */ - -#define ST_COM 1 // Compose the following character with leading char and display in the same cell -#define ST_NXT 2 // display the following character in the next cell -#define ST_NDP 3 // non-display - -const sal_Int16 thaiCompRel[MAX_CT][MAX_CT] = { - // C N C L F F F B B B T A A A A A A - // T O O V V V V V V D O D D D V V V - // R N N 1 2 3 1 2 N 1 2 3 1 2 3 - // L S E - // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 - { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // CTRL 0 - { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // NON 1 - { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_COM, ST_COM, ST_COM, ST_COM, ST_COM, ST_COM, ST_COM, ST_COM, ST_COM, ST_COM }, // CONS 2 - { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // LV 3 - { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // FV1 4 - { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // FV2 5 - { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // FV3 6 - { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_COM, ST_COM, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // BV1 7 - { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_COM, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // BV2 8 - { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // BD 9 - { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // TONE 10 - { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // AD1 11 - { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // AD2 12 - { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // AD3 13 - { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_COM, ST_COM, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // AV1 14 - { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_COM, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // AV2 15 - { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_COM, ST_NXT, ST_COM, ST_NXT, ST_NXT, ST_NXT, ST_NXT } // AV3 16 - -}; - -const sal_uInt32 is_ST_COM = (1<= Text.getLength() || m_aNextCellIndex[nStartPos] > 0 - || !is_Thai(Text[nStartPos])) - return; - - const sal_Unicode* str = cachedText.getStr(); - sal_Int32 const len = cachedText.getLength(); - - sal_Int32 startPos = nStartPos; - while (startPos > 0 && is_Thai(str[startPos-1])) startPos--; - sal_Int32 endPos = nStartPos; - while (endPos < len && is_Thai(str[endPos])) endPos++; - - sal_Int32 start, end, pos; - pos = start = end = startPos; - - assert(endPos >= 0 && o3tl::make_unsigned(endPos) <= m_aNextCellIndex.size()); - while (pos < endPos) { - end += getACell(str, start, endPos); - assert(end >= 0 && o3tl::make_unsigned(end) <= m_aNextCellIndex.size()); - while (pos < end) { - m_aNextCellIndex[pos] = end; - m_aPreviousCellIndex[pos] = start; - pos++; - } - start = end; - } -} - -} - -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/source/registerservices/registerservices.cxx b/i18npool/source/registerservices/registerservices.cxx index d04a5205daa3..b3289c2d2a53 100644 --- a/i18npool/source/registerservices/registerservices.cxx +++ b/i18npool/source/registerservices/registerservices.cxx @@ -41,7 +41,6 @@ #include #include -#include #include #include @@ -219,7 +218,6 @@ IMPL_UNO_CONSTRUCTOR( NumToTextInformalUpper_ko ) #if WITH_LOCALE_ALL || WITH_LOCALE_th IMPL_CREATEINSTANCE( NumToChar_th ) -IMPL_UNO_CONSTRUCTOR( BreakIterator_th ) IMPL_UNO_CONSTRUCTOR( CharToNum_th ) IMPL_UNO_CONSTRUCTOR( InputSequenceChecker_th ) #endif diff --git a/i18npool/util/i18npool.component b/i18npool/util/i18npool.component index 2f5085f02384..fdbe5dcaa568 100644 --- a/i18npool/util/i18npool.component +++ b/i18npool/util/i18npool.component @@ -35,10 +35,6 @@ constructor="i18npool_BreakIterator_ko_get_implementation"> - - - diff --git a/solenv/bin/native-code.py b/solenv/bin/native-code.py index 58884af6b4d6..26de5c199ec1 100755 --- a/solenv/bin/native-code.py +++ b/solenv/bin/native-code.py @@ -239,7 +239,6 @@ core_constructor_list = [ "com_sun_star_text_DefaultNumberingProvider_get_implementation", ("i18npool_BreakIterator_ja_get_implementation", "#if WITH_LOCALE_ALL || WITH_LOCALE_ja"), ("i18npool_BreakIterator_ko_get_implementation", "#if WITH_LOCALE_ALL || WITH_LOCALE_ko"), - ("i18npool_BreakIterator_th_get_implementation", "#if WITH_LOCALE_ALL || WITH_LOCALE_th"), ("i18npool_BreakIterator_zh_get_implementation", "#if WITH_LOCALE_ALL || WITH_LOCALE_zh"), ("i18npool_BreakIterator_zh_TW_get_implementation", "#if WITH_LOCALE_ALL || WITH_LOCALE_zh"), "i18npool_CalendarImpl_get_implementation", diff --git a/solenv/clang-format/excludelist b/solenv/clang-format/excludelist index 90245483e6c5..654b613e5b64 100644 --- a/solenv/clang-format/excludelist +++ b/solenv/clang-format/excludelist @@ -4577,7 +4577,6 @@ i18nlangtag/source/languagetag/languagetag.cxx i18nlangtag/source/languagetag/languagetagicu.cxx i18npool/inc/breakiteratorImpl.hxx i18npool/inc/breakiterator_cjk.hxx -i18npool/inc/breakiterator_th.hxx i18npool/inc/breakiterator_unicode.hxx i18npool/inc/bullet.h i18npool/inc/calendarImpl.hxx @@ -4625,7 +4624,6 @@ i18npool/qa/cppunit/test_ordinalsuffix.cxx i18npool/qa/cppunit/test_textsearch.cxx i18npool/source/breakiterator/breakiteratorImpl.cxx i18npool/source/breakiterator/breakiterator_cjk.cxx -i18npool/source/breakiterator/breakiterator_th.cxx i18npool/source/breakiterator/breakiterator_unicode.cxx i18npool/source/breakiterator/gendict.cxx i18npool/source/breakiterator/xdictionary.cxx