crashtesting: fix font-dependent infinite loop in kashida justification

Commit c3c29d31d7 (tdf#140767 Implemented
Syriac justification) indirectly introduced a font-dependent infinite
loop in Writer layout by relaxing some restrictions on kashida candidate
positions. The infinite loop was found in tdf97806-2.odt.

This change fixes an underlying logic bug in Writer, which caused Writer
to attempt to apply kashida justification to non-Arabic/Syriac CTL
scripts. This change also reintroduces some of the previously-lifted
restrictions on kashida candidate positions.

Change-Id: I783bf327f4ef0f1f6a032f0d3dddbcfd60a026fa
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/174503
Tested-by: Jenkins
Reviewed-by: Jonathan Clark <jonathan@libreoffice.org>
This commit is contained in:
Jonathan Clark 2024-10-04 11:50:25 -06:00
parent 75f3ee7e8a
commit 99c7cf2816
4 changed files with 37 additions and 22 deletions

View file

@ -86,7 +86,7 @@ void KashidaTest::testNoZwnjExpansion()
CPPUNIT_ASSERT_EQUAL(sal_Int32(0), GetWordKashidaPosition(u"نویس\u200Cه"_ustr).value().nIndex);
CPPUNIT_ASSERT_EQUAL(sal_Int32(1), GetWordKashidaPosition(u"متن"_ustr).value().nIndex);
CPPUNIT_ASSERT_EQUAL(sal_Int32(0), GetWordKashidaPosition(u"مت\u200Cن"_ustr).value().nIndex);
CPPUNIT_ASSERT(!GetWordKashidaPosition(u"مت\u200Cن"_ustr).has_value());
}
// tdf#163105: Do not insert kashida if the position is invalid

View file

@ -134,6 +134,13 @@ bool CanConnectToPrev(sal_Unicode cCh, sal_Unicode cPrevCh)
return bRet;
}
bool isSyriacChar(sal_Unicode cCh)
{
return (cCh >= 0x700 && cCh <= 0x74F) || (cCh >= 0x860 && cCh <= 0x86A);
}
bool isArabicChar(sal_Unicode cCh) { return cCh >= 0x60C && cCh <= 0x6FE; }
std::optional<i18nutil::KashidaPosition>
GetWordKashidaPositionArabic(const OUString& rWord, const std::vector<bool>& pValidPositions)
{
@ -283,7 +290,7 @@ GetWordKashidaPositionArabic(const OUString& rWord, const std::vector<bool>& pVa
{
// Reh, Zain (right joining) final form may appear in the middle of word
// All others except Yeh - only at end of word
if (isRehChar(cCh) || (0x60C <= cCh && 0x6FE >= cCh && nIdx == nWordLen - 1))
if (isRehChar(cCh) || (isArabicChar(cCh) && nIdx == nWordLen - 1))
{
SAL_WARN_IF(0 == cPrevCh, "i18n", "No previous character");
// check if character is connectable to previous character,
@ -295,7 +302,8 @@ GetWordKashidaPositionArabic(const OUString& rWord, const std::vector<bool>& pVa
}
// 8. Try any valid position
if (nPriorityLevel >= 7 && nIdx > 0)
if (nPriorityLevel >= 7 && nIdx > 0 && isArabicChar(cPrevCh) && isArabicChar(cCh)
&& !pValidPositions.empty())
{
fnTryInsertBefore(7);
}
@ -339,7 +347,7 @@ GetWordKashidaPositionSyriac(const OUString& rWord, const std::vector<bool>& pVa
sal_Int32 nWordMidpoint = nWordLen / 2;
auto fnPositionValid = [&pValidPositions](sal_Int32 nIdx) {
auto fnPositionValid = [&pValidPositions, &rWord](sal_Int32 nIdx) {
// Exclusions:
// tdf#163105: Do not insert kashida if the position is invalid
@ -348,7 +356,8 @@ GetWordKashidaPositionSyriac(const OUString& rWord, const std::vector<bool>& pVa
return false;
}
return true;
sal_Unicode cCh = rWord[nIdx];
return isSyriacChar(cCh);
};
// End to midpoint
@ -385,7 +394,7 @@ i18nutil::GetWordKashidaPosition(const OUString& rWord, const std::vector<bool>&
{
auto cCh = rWord[nIdx];
if ((cCh >= 0x700 && cCh <= 0x74F) || (cCh >= 0x860 && cCh <= 0x86A))
if (isSyriacChar(cCh))
{
// This word contains Syriac characters.
return GetWordKashidaPositionSyriac(rWord, pValidPositions);

View file

@ -167,10 +167,11 @@ static bool lcl_CheckKashidaPositions(SwScriptInfo& rSI, SwTextSizeInfo& rInf, S
const OUString& rWord = aScanner.GetWord();
// Fetch the set of valid positions from VCL, where possible
aValidPositions.clear();
if (SwScriptInfo::IsKashidaScriptText(rInf.GetText(), TextFrameIndex{ aScanner.GetBegin() },
TextFrameIndex{ aScanner.GetLen() }))
{
aValidPositions.clear();
rItr.SeekAndChgAttrIter(TextFrameIndex{ aScanner.GetBegin() }, rInf.GetRefDev());
vcl::text::ComplexTextLayoutFlags nOldLayout = rInf.GetRefDev()->GetLayoutMode();
@ -179,13 +180,13 @@ static bool lcl_CheckKashidaPositions(SwScriptInfo& rSI, SwTextSizeInfo& rInf, S
rInf.GetRefDev()->GetWordKashidaPositions(rWord, &aValidPositions);
rInf.GetRefDev()->SetLayoutMode(nOldLayout);
}
auto stKashidaPos = i18nutil::GetWordKashidaPosition(rWord, aValidPositions);
if (stKashidaPos.has_value())
{
TextFrameIndex nNewKashidaPos{ aScanner.GetBegin() + stKashidaPos->nIndex };
aNewKashidaPositions.push_back(nNewKashidaPos);
auto stKashidaPos = i18nutil::GetWordKashidaPosition(rWord, aValidPositions);
if (stKashidaPos.has_value())
{
TextFrameIndex nNewKashidaPos{ aScanner.GetBegin() + stKashidaPos->nIndex };
aNewKashidaPositions.push_back(nNewKashidaPos);
}
}
}

View file

@ -1525,16 +1525,21 @@ void SwScriptInfo::InitScriptInfo(const SwTextNode& rNode,
// the search has to be performed on a per word base
while ( aScanner.NextWord() )
{
const OUString& rWord = aScanner.GetWord();
auto stKashidaPos = i18nutil::GetWordKashidaPosition(rWord);
if (stKashidaPos.has_value())
if (SwScriptInfo::IsKashidaScriptText(rText, TextFrameIndex{ aScanner.GetBegin() },
TextFrameIndex{ aScanner.GetLen() }))
{
// Only populate kashida positions for the invalidated tail
TextFrameIndex nNewKashidaPos{aScanner.GetBegin() + stKashidaPos->nIndex};
if(nNewKashidaPos >= nLastKashida) {
m_Kashida.insert(m_Kashida.begin() + nCntKash, nNewKashidaPos);
nCntKash++;
const OUString& rWord = aScanner.GetWord();
auto stKashidaPos = i18nutil::GetWordKashidaPosition(rWord);
if (stKashidaPos.has_value())
{
// Only populate kashida positions for the invalidated tail
TextFrameIndex nNewKashidaPos{ aScanner.GetBegin() + stKashidaPos->nIndex };
if (nNewKashidaPos >= nLastKashida)
{
m_Kashida.insert(m_Kashida.begin() + nCntKash, nNewKashidaPos);
nCntKash++;
}
}
}
} // end of kashida search