tdf#139863, tdf#155514: Don’t split COMPLEX text portions

When a COMPLEX text portion contains more than one script, it gets split into multiple portions. This is broken into more than one way: * ScriptTypeDetector::endOfCTLScriptType() does not handle surrogate pairs, so it split the portion in the middle of a surrogate pair, causing tdf#139863. * If a combing mark of a different COMPLEX script is used, it gets split into its own portion, causing tdf#155514 This was supposedly done to fix https://bz.apache.org/ooo/show_bug.cgi?id=28203, but testing the documents from this bug report with this change does not show recurrence of any of the kashida issue, so this split is unnecessary. Change-Id: I9bd60efe3023b96ee74f861a5dc15a2229c27d79 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/152354 Tested-by: Jenkins Reviewed-by: خالد حسني <khaled@libreoffice.org>
2023-05-28 16:28:52 +03:00 · 2023-05-28 16:28:52 +03:00 · 98f7ec383d
commit 98f7ec383d
parent 1a6d250f34
1 changed files with 0 additions and 24 deletions
--- a/sw/source/core/text/porlay.cxx
+++ b/sw/source/core/text/porlay.cxx
@ -33,7 +33,6 @@
 #include <unicode/uchar.h>
 #include <com/sun/star/i18n/ScriptType.hpp>
 #include <com/sun/star/i18n/CharacterIteratorMode.hpp>
 #include <com/sun/star/i18n/CTLScriptType.hpp>
 #include <com/sun/star/i18n/WordType.hpp>
 #include <com/sun/star/i18n/XBreakIterator.hpp>
 #include <paratr.hxx>
@ -1447,29 +1446,6 @@ void SwScriptInfo::InitScriptInfo(const SwTextNode& rNode,
        if (nChg > TextFrameIndex(rText.getLength()) || nChg < TextFrameIndex(0))
            nChg = TextFrameIndex(rText.getLength());
        // #i28203#
        // for 'complex' portions, we make sure that a portion does not contain more
        // than one script:
        if( i18n::ScriptType::COMPLEX == nScript )
        {
            const short nScriptType = ScriptTypeDetector::getCTLScriptType(
                    rText, sal_Int32(nSearchStt) );
            TextFrameIndex nNextCTLScriptStart = nSearchStt;
            short nCurrentScriptType = nScriptType;
            while( css::i18n::CTLScriptType::CTL_UNKNOWN == nCurrentScriptType || nScriptType == nCurrentScriptType )
            {
                nNextCTLScriptStart = TextFrameIndex(
                        ScriptTypeDetector::endOfCTLScriptType(
                            rText, sal_Int32(nNextCTLScriptStart)));
                if (nNextCTLScriptStart >= TextFrameIndex(rText.getLength())
                    || nNextCTLScriptStart >= nChg)
                    break;
                nCurrentScriptType = ScriptTypeDetector::getCTLScriptType(
                                        rText, sal_Int32(nNextCTLScriptStart));
            }
            nChg = std::min( nChg, nNextCTLScriptStart );
        }
        // special case for dotted circle since it can be used with complex
        // before a mark, so we want it associated with the mark's script
        if (nChg < TextFrameIndex(rText.getLength()) && nChg > TextFrameIndex(0)