tdf#139863, tdf#155514: Don’t split COMPLEX text portions

When a COMPLEX text portion contains more than one script, it gets split
into multiple portions. This is broken into more than one way:
* ScriptTypeDetector::endOfCTLScriptType() does not handle surrogate
  pairs, so it split the portion in the middle of a surrogate pair,
  causing tdf#139863.
* If a combing mark of a different COMPLEX script is used, it gets split
  into its own portion, causing tdf#155514

This was supposedly done to fix https://bz.apache.org/ooo/show_bug.cgi?id=28203,
but testing the documents from this bug report with this change does not
show recurrence of any of the kashida issue, so this split is unnecessary.

Change-Id: I9bd60efe3023b96ee74f861a5dc15a2229c27d79
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/152354
Tested-by: Jenkins
Reviewed-by: خالد حسني <khaled@libreoffice.org>
This commit is contained in:
Khaled Hosny 2023-05-28 16:28:52 +03:00 committed by خالد حسني
parent 1a6d250f34
commit 98f7ec383d

View file

@ -33,7 +33,6 @@
#include <unicode/uchar.h> #include <unicode/uchar.h>
#include <com/sun/star/i18n/ScriptType.hpp> #include <com/sun/star/i18n/ScriptType.hpp>
#include <com/sun/star/i18n/CharacterIteratorMode.hpp> #include <com/sun/star/i18n/CharacterIteratorMode.hpp>
#include <com/sun/star/i18n/CTLScriptType.hpp>
#include <com/sun/star/i18n/WordType.hpp> #include <com/sun/star/i18n/WordType.hpp>
#include <com/sun/star/i18n/XBreakIterator.hpp> #include <com/sun/star/i18n/XBreakIterator.hpp>
#include <paratr.hxx> #include <paratr.hxx>
@ -1447,29 +1446,6 @@ void SwScriptInfo::InitScriptInfo(const SwTextNode& rNode,
if (nChg > TextFrameIndex(rText.getLength()) || nChg < TextFrameIndex(0)) if (nChg > TextFrameIndex(rText.getLength()) || nChg < TextFrameIndex(0))
nChg = TextFrameIndex(rText.getLength()); nChg = TextFrameIndex(rText.getLength());
// #i28203#
// for 'complex' portions, we make sure that a portion does not contain more
// than one script:
if( i18n::ScriptType::COMPLEX == nScript )
{
const short nScriptType = ScriptTypeDetector::getCTLScriptType(
rText, sal_Int32(nSearchStt) );
TextFrameIndex nNextCTLScriptStart = nSearchStt;
short nCurrentScriptType = nScriptType;
while( css::i18n::CTLScriptType::CTL_UNKNOWN == nCurrentScriptType || nScriptType == nCurrentScriptType )
{
nNextCTLScriptStart = TextFrameIndex(
ScriptTypeDetector::endOfCTLScriptType(
rText, sal_Int32(nNextCTLScriptStart)));
if (nNextCTLScriptStart >= TextFrameIndex(rText.getLength())
|| nNextCTLScriptStart >= nChg)
break;
nCurrentScriptType = ScriptTypeDetector::getCTLScriptType(
rText, sal_Int32(nNextCTLScriptStart));
}
nChg = std::min( nChg, nNextCTLScriptStart );
}
// special case for dotted circle since it can be used with complex // special case for dotted circle since it can be used with complex
// before a mark, so we want it associated with the mark's script // before a mark, so we want it associated with the mark's script
if (nChg < TextFrameIndex(rText.getLength()) && nChg > TextFrameIndex(0) if (nChg < TextFrameIndex(rText.getLength()) && nChg > TextFrameIndex(0)