tdf#163105 Consolidated duplicated kashida justification code
The kashida candidate position selection logic was copied-and-pasted from Writer into Edit Engine. This change consolidates the shared code into a library. This change also adds some minimal characteristic tests, which previously did not exist. Change-Id: I2bfbfa79858347803474b754566436f3e74d1a54 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/173883 Reviewed-by: Jonathan Clark <jonathan@libreoffice.org> Tested-by: Jenkins
This commit is contained in:
parent
2fc1034de4
commit
fe4687ed17
8 changed files with 409 additions and 496 deletions
|
@ -68,6 +68,7 @@
|
|||
#include <com/sun/star/i18n/InputSequenceChecker.hpp>
|
||||
#include <vcl/pdfextoutdevdata.hxx>
|
||||
#include <i18nlangtag/mslangid.hxx>
|
||||
#include <i18nutil/kashida.hxx>
|
||||
|
||||
#include <comphelper/processfactory.hxx>
|
||||
#include <comphelper/lok.hxx>
|
||||
|
@ -232,93 +233,6 @@ static void lcl_DrawRedLines( OutputDevice& rOutDev,
|
|||
}
|
||||
}
|
||||
|
||||
// For Kashidas from sw/source/core/text/porlay.cxx
|
||||
|
||||
#define IS_JOINING_GROUP(c, g) ( u_getIntPropertyValue( (c), UCHAR_JOINING_GROUP ) == U_JG_##g )
|
||||
#define isAinChar(c) IS_JOINING_GROUP((c), AIN)
|
||||
#define isAlefChar(c) IS_JOINING_GROUP((c), ALEF)
|
||||
#define isDalChar(c) IS_JOINING_GROUP((c), DAL)
|
||||
#define isFehChar(c) (IS_JOINING_GROUP((c), FEH) || IS_JOINING_GROUP((c), AFRICAN_FEH))
|
||||
#define isGafChar(c) IS_JOINING_GROUP((c), GAF)
|
||||
#define isHehChar(c) IS_JOINING_GROUP((c), HEH)
|
||||
#define isKafChar(c) IS_JOINING_GROUP((c), KAF)
|
||||
#define isLamChar(c) IS_JOINING_GROUP((c), LAM)
|
||||
#define isQafChar(c) (IS_JOINING_GROUP((c), QAF) || IS_JOINING_GROUP((c), AFRICAN_QAF))
|
||||
#define isRehChar(c) IS_JOINING_GROUP((c), REH)
|
||||
#define isTahChar(c) IS_JOINING_GROUP((c), TAH)
|
||||
#define isTehMarbutaChar(c) IS_JOINING_GROUP((c), TEH_MARBUTA)
|
||||
#define isWawChar(c) IS_JOINING_GROUP((c), WAW)
|
||||
#define isSeenOrSadChar(c) (IS_JOINING_GROUP((c), SAD) || IS_JOINING_GROUP((c), SEEN))
|
||||
|
||||
// Beh and characters that behave like Beh in medial form.
|
||||
static bool isBehChar(sal_Unicode cCh)
|
||||
{
|
||||
bool bRet = false;
|
||||
switch (u_getIntPropertyValue(cCh, UCHAR_JOINING_GROUP))
|
||||
{
|
||||
case U_JG_BEH:
|
||||
case U_JG_NOON:
|
||||
case U_JG_AFRICAN_NOON:
|
||||
case U_JG_NYA:
|
||||
case U_JG_YEH:
|
||||
case U_JG_FARSI_YEH:
|
||||
case U_JG_BURUSHASKI_YEH_BARREE:
|
||||
bRet = true;
|
||||
break;
|
||||
default:
|
||||
bRet = false;
|
||||
break;
|
||||
}
|
||||
|
||||
return bRet;
|
||||
}
|
||||
|
||||
// Yeh and characters that behave like Yeh in final form.
|
||||
static bool isYehChar(sal_Unicode cCh)
|
||||
{
|
||||
bool bRet = false;
|
||||
switch (u_getIntPropertyValue(cCh, UCHAR_JOINING_GROUP))
|
||||
{
|
||||
case U_JG_YEH:
|
||||
case U_JG_FARSI_YEH:
|
||||
case U_JG_YEH_BARREE:
|
||||
case U_JG_BURUSHASKI_YEH_BARREE:
|
||||
case U_JG_YEH_WITH_TAIL:
|
||||
bRet = true;
|
||||
break;
|
||||
default:
|
||||
bRet = false;
|
||||
break;
|
||||
}
|
||||
|
||||
return bRet;
|
||||
}
|
||||
|
||||
static bool isTransparentChar ( sal_Unicode cCh )
|
||||
{
|
||||
return u_getIntPropertyValue( cCh, UCHAR_JOINING_TYPE ) == U_JT_TRANSPARENT;
|
||||
}
|
||||
|
||||
static bool lcl_IsLigature( sal_Unicode cCh, sal_Unicode cNextCh )
|
||||
{
|
||||
// Lam + Alef
|
||||
return ( isLamChar ( cCh ) && isAlefChar ( cNextCh ));
|
||||
}
|
||||
|
||||
static bool lcl_ConnectToPrev( sal_Unicode cCh, sal_Unicode cPrevCh )
|
||||
{
|
||||
const int32_t nJoiningType = u_getIntPropertyValue( cPrevCh, UCHAR_JOINING_TYPE );
|
||||
bool bRet = nJoiningType != U_JT_RIGHT_JOINING && nJoiningType != U_JT_NON_JOINING;
|
||||
|
||||
// check for ligatures cPrevChar + cChar
|
||||
if ( bRet )
|
||||
bRet = ! lcl_IsLigature( cPrevCh, cCh );
|
||||
|
||||
return bRet;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void ImpEditEngine::UpdateViews( EditView* pCurView )
|
||||
{
|
||||
if ( !IsUpdateLayout() || IsFormatting() || maInvalidRect.IsEmpty() )
|
||||
|
@ -2317,9 +2231,6 @@ void ImpEditEngine::ImpAdjustBlocks(ParaPortion& rParaPortion, EditLine& rLine,
|
|||
{
|
||||
EditPaM aPaM( pNode, nChar+1 );
|
||||
sal_uInt16 nScript = GetI18NScriptType(aPaM);
|
||||
// Arabic script is handled above, but if no Kashida positions are found, use blanks.
|
||||
if (nKashidas)
|
||||
continue;
|
||||
|
||||
if ( pNode->GetChar(nChar) == ' ' )
|
||||
{
|
||||
|
@ -2460,154 +2371,12 @@ void ImpEditEngine::ImpFindKashidas(ContentNode* pNode, sal_Int32 nStart, sal_In
|
|||
// restore selection for proper iteration at the end of the function
|
||||
aWordSel.Max().SetIndex( nSavPos );
|
||||
|
||||
sal_Int32 nIdx = 0, nPrevIdx = 0;
|
||||
sal_Int32 nKashidaPos = -1;
|
||||
sal_Unicode cCh, cPrevCh = 0;
|
||||
auto stKashidaPos = i18nutil::GetWordKashidaPosition(aWord);
|
||||
|
||||
int nPriorityLevel = 7; // 0..6 = level found
|
||||
// 7 not found
|
||||
|
||||
sal_Int32 nWordLen = aWord.getLength();
|
||||
|
||||
// ignore trailing vowel chars
|
||||
while( nWordLen && isTransparentChar( aWord[ nWordLen - 1 ] ))
|
||||
--nWordLen;
|
||||
|
||||
while ( nIdx < nWordLen )
|
||||
if (stKashidaPos.has_value())
|
||||
{
|
||||
cCh = aWord[ nIdx ];
|
||||
sal_Int32 nKashidaPos = aWordSel.Min().GetIndex() + stKashidaPos->nIndex;
|
||||
|
||||
// 1. Priority:
|
||||
// after user inserted kashida
|
||||
if ( 0x640 == cCh )
|
||||
{
|
||||
nKashidaPos = aWordSel.Min().GetIndex() + nIdx;
|
||||
nPriorityLevel = 0;
|
||||
}
|
||||
|
||||
// 2. Priority:
|
||||
// after a Seen or Sad
|
||||
if (nPriorityLevel >= 1 && nIdx < nWordLen - 1)
|
||||
{
|
||||
if( isSeenOrSadChar( cCh )
|
||||
&& (aWord[ nIdx+1 ] != 0x200C) ) // #i98410#: prevent ZWNJ expansion
|
||||
{
|
||||
nKashidaPos = aWordSel.Min().GetIndex() + nIdx;
|
||||
nPriorityLevel = 1;
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Priority:
|
||||
// before final form of Teh Marbuta, Heh, Dal
|
||||
if ( nPriorityLevel >= 2 && nIdx > 0 )
|
||||
{
|
||||
if ( isTehMarbutaChar ( cCh ) || // Teh Marbuta (right joining)
|
||||
isDalChar ( cCh ) || // Dal (right joining) final form may appear in the middle of word
|
||||
( isHehChar ( cCh ) && nIdx == nWordLen - 1)) // Heh (dual joining) only at end of word
|
||||
{
|
||||
|
||||
SAL_WARN_IF( 0 == cPrevCh, "editeng", "No previous character" );
|
||||
// check if character is connectable to previous character,
|
||||
if ( lcl_ConnectToPrev( cCh, cPrevCh ) )
|
||||
{
|
||||
nKashidaPos = aWordSel.Min().GetIndex() + nPrevIdx;
|
||||
nPriorityLevel = 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Priority:
|
||||
// before final form of Alef, Tah, Lam, Kaf or Gaf
|
||||
if ( nPriorityLevel >= 3 && nIdx > 0 )
|
||||
{
|
||||
if ( isAlefChar ( cCh ) || // Alef (right joining) final form may appear in the middle of word
|
||||
(( isLamChar ( cCh ) || // Lam,
|
||||
isTahChar ( cCh ) || // Tah,
|
||||
isKafChar ( cCh ) || // Kaf (all dual joining)
|
||||
isGafChar ( cCh ) )
|
||||
&& nIdx == nWordLen - 1)) // only at end of word
|
||||
{
|
||||
SAL_WARN_IF( 0 == cPrevCh, "editeng", "No previous character" );
|
||||
// check if character is connectable to previous character,
|
||||
if ( lcl_ConnectToPrev( cCh, cPrevCh ) )
|
||||
{
|
||||
nKashidaPos = aWordSel.Min().GetIndex() + nPrevIdx;
|
||||
nPriorityLevel = 3;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 5. Priority:
|
||||
// before medial Beh-like
|
||||
if ( nPriorityLevel >= 4 && nIdx > 0 && nIdx < nWordLen - 1 )
|
||||
{
|
||||
if ( isBehChar ( cCh ) )
|
||||
{
|
||||
// check if next character is Reh or Yeh-like
|
||||
sal_Unicode cNextCh = aWord[ nIdx + 1 ];
|
||||
if ( isRehChar ( cNextCh ) || isYehChar ( cNextCh ))
|
||||
{
|
||||
SAL_WARN_IF( 0 == cPrevCh, "editeng", "No previous character" );
|
||||
// check if character is connectable to previous character,
|
||||
if ( lcl_ConnectToPrev( cCh, cPrevCh ) )
|
||||
{
|
||||
nKashidaPos = aWordSel.Min().GetIndex() + nPrevIdx;
|
||||
nPriorityLevel = 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 6. Priority:
|
||||
// before the final form of Waw, Ain, Qaf and Feh
|
||||
if ( nPriorityLevel >= 5 && nIdx > 0 )
|
||||
{
|
||||
if ( isWawChar ( cCh ) || // Wav (right joining)
|
||||
// final form may appear in the middle of word
|
||||
(( isAinChar ( cCh ) || // Ain (dual joining)
|
||||
isQafChar ( cCh ) || // Qaf (dual joining)
|
||||
isFehChar ( cCh ) ) // Feh (dual joining)
|
||||
&& nIdx == nWordLen - 1)) // only at end of word
|
||||
{
|
||||
SAL_WARN_IF( 0 == cPrevCh, "editeng", "No previous character" );
|
||||
// check if character is connectable to previous character,
|
||||
if ( lcl_ConnectToPrev( cCh, cPrevCh ) )
|
||||
{
|
||||
nKashidaPos = aWordSel.Min().GetIndex() + nPrevIdx;
|
||||
nPriorityLevel = 5;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// other connecting possibilities
|
||||
if ( nPriorityLevel >= 6 && nIdx > 0 )
|
||||
{
|
||||
// Reh, Zain
|
||||
if ( isRehChar ( cCh ) )
|
||||
{
|
||||
SAL_WARN_IF( 0 == cPrevCh, "editeng", "No previous character" );
|
||||
// check if character is connectable to previous character,
|
||||
if ( lcl_ConnectToPrev( cCh, cPrevCh ) )
|
||||
{
|
||||
nKashidaPos = aWordSel.Min().GetIndex() + nPrevIdx;
|
||||
nPriorityLevel = 6;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Do not consider vowel marks when checking if a character
|
||||
// can be connected to previous character.
|
||||
if ( !isTransparentChar ( cCh) )
|
||||
{
|
||||
cPrevCh = cCh;
|
||||
nPrevIdx = nIdx;
|
||||
}
|
||||
|
||||
++nIdx;
|
||||
} // end of current word
|
||||
|
||||
if (nKashidaPos >= 0)
|
||||
{
|
||||
SeekCursor(pNode, nKashidaPos + 1, aTmpFont);
|
||||
aTmpFont.SetPhysFont(*GetRefDevice());
|
||||
|
||||
|
|
24
i18nutil/CppunitTest_i18nutil_kashida.mk
Normal file
24
i18nutil/CppunitTest_i18nutil_kashida.mk
Normal file
|
@ -0,0 +1,24 @@
|
|||
For makefiles:
|
||||
|
||||
# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t; fill-column: 100 -*-
|
||||
#
|
||||
# This file is part of the LibreOffice project.
|
||||
#
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
#
|
||||
|
||||
$(eval $(call gb_CppunitTest_CppunitTest,i18nutil_kashida))
|
||||
|
||||
$(eval $(call gb_CppunitTest_add_exception_objects,i18nutil_kashida,\
|
||||
i18nutil/qa/cppunit/test_kashida \
|
||||
))
|
||||
|
||||
$(eval $(call gb_CppunitTest_use_libraries,i18nutil_kashida,\
|
||||
i18nutil \
|
||||
sal \
|
||||
test \
|
||||
))
|
||||
|
||||
# vim: set noet sw=4 ts=4:
|
|
@ -44,6 +44,7 @@ $(eval $(call gb_Library_use_libraries,i18nutil,\
|
|||
|
||||
$(eval $(call gb_Library_add_exception_objects,i18nutil,\
|
||||
i18nutil/source/utility/casefolding \
|
||||
i18nutil/source/utility/kashida \
|
||||
i18nutil/source/utility/oneToOneMapping \
|
||||
i18nutil/source/utility/paper \
|
||||
i18nutil/source/utility/scripttypedetector \
|
||||
|
|
|
@ -12,4 +12,8 @@ $(eval $(call gb_Module_add_targets,i18nutil,\
|
|||
Library_i18nutil \
|
||||
))
|
||||
|
||||
$(eval $(call gb_Module_add_check_targets,i18nutil,\
|
||||
CppunitTest_i18nutil_kashida \
|
||||
))
|
||||
|
||||
# vim: set noet sw=4:
|
||||
|
|
58
i18nutil/qa/cppunit/test_kashida.cxx
Normal file
58
i18nutil/qa/cppunit/test_kashida.cxx
Normal file
|
@ -0,0 +1,58 @@
|
|||
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */
|
||||
/*
|
||||
* This file is part of the LibreOffice project.
|
||||
*
|
||||
* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
*/
|
||||
|
||||
#include <sal/types.h>
|
||||
#include <cppunit/TestAssert.h>
|
||||
#include <cppunit/TestFixture.h>
|
||||
#include <cppunit/extensions/HelperMacros.h>
|
||||
#include <cppunit/plugin/TestPlugIn.h>
|
||||
#include <i18nutil/kashida.hxx>
|
||||
|
||||
using namespace i18nutil;
|
||||
|
||||
namespace
|
||||
{
|
||||
class KashidaTest : public CppUnit::TestFixture
|
||||
{
|
||||
public:
|
||||
void testCharacteristic();
|
||||
|
||||
CPPUNIT_TEST_SUITE(KashidaTest);
|
||||
CPPUNIT_TEST(testCharacteristic);
|
||||
CPPUNIT_TEST_SUITE_END();
|
||||
};
|
||||
|
||||
void KashidaTest::testCharacteristic()
|
||||
{
|
||||
// Characteristic tests for kashida candidate selection.
|
||||
// Uses words from sample documents.
|
||||
CPPUNIT_ASSERT(!GetWordKashidaPosition(u"متن"_ustr).has_value());
|
||||
CPPUNIT_ASSERT_EQUAL(sal_Int32(3), GetWordKashidaPosition(u"فارسی"_ustr).value().nIndex);
|
||||
CPPUNIT_ASSERT_EQUAL(sal_Int32(0), GetWordKashidaPosition(u"با"_ustr).value().nIndex);
|
||||
CPPUNIT_ASSERT_EQUAL(sal_Int32(3), GetWordKashidaPosition(u"نویسه"_ustr).value().nIndex);
|
||||
CPPUNIT_ASSERT_EQUAL(sal_Int32(1), GetWordKashidaPosition(u"کشیده"_ustr).value().nIndex);
|
||||
CPPUNIT_ASSERT_EQUAL(sal_Int32(0), GetWordKashidaPosition(u"برای"_ustr).value().nIndex);
|
||||
CPPUNIT_ASSERT(!GetWordKashidaPosition(u"چینش"_ustr).has_value());
|
||||
CPPUNIT_ASSERT_EQUAL(sal_Int32(1), GetWordKashidaPosition(u"بهتر"_ustr).value().nIndex);
|
||||
CPPUNIT_ASSERT(!GetWordKashidaPosition(u"ببببب"_ustr).has_value());
|
||||
CPPUNIT_ASSERT(!GetWordKashidaPosition(u"بپپپپ"_ustr).has_value());
|
||||
CPPUNIT_ASSERT_EQUAL(sal_Int32(3), GetWordKashidaPosition(u"تطویل"_ustr).value().nIndex);
|
||||
CPPUNIT_ASSERT(!GetWordKashidaPosition(u"بپ"_ustr).has_value());
|
||||
CPPUNIT_ASSERT_EQUAL(sal_Int32(1), GetWordKashidaPosition(u"تطوی"_ustr).value().nIndex);
|
||||
CPPUNIT_ASSERT_EQUAL(sal_Int32(3), GetWordKashidaPosition(u"تحویل"_ustr).value().nIndex);
|
||||
CPPUNIT_ASSERT_EQUAL(sal_Int32(1), GetWordKashidaPosition(u"تشویل"_ustr).value().nIndex);
|
||||
CPPUNIT_ASSERT_EQUAL(sal_Int32(3), GetWordKashidaPosition(u"تمثیل"_ustr).value().nIndex);
|
||||
}
|
||||
|
||||
CPPUNIT_TEST_SUITE_REGISTRATION(KashidaTest);
|
||||
}
|
||||
|
||||
CPPUNIT_PLUGIN_IMPLEMENT();
|
||||
|
||||
/* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */
|
286
i18nutil/source/utility/kashida.cxx
Normal file
286
i18nutil/source/utility/kashida.cxx
Normal file
|
@ -0,0 +1,286 @@
|
|||
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */
|
||||
/*
|
||||
* This file is part of the LibreOffice project.
|
||||
*
|
||||
* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
*/
|
||||
|
||||
#include <i18nutil/kashida.hxx>
|
||||
#include <i18nutil/unicode.hxx>
|
||||
#include <sal/log.hxx>
|
||||
|
||||
namespace
|
||||
{
|
||||
/*
|
||||
https://www.khtt.net/en/page/1821/the-big-kashida-secret
|
||||
|
||||
the rules of priorities that govern the addition of kashidas in Arabic text
|
||||
made ... for ... Explorer 5.5 browser.
|
||||
|
||||
The kashida justification is based on a connection priority scheme that
|
||||
decides where kashidas are put automatically.
|
||||
|
||||
This is how the software decides on kashida-inserting priorities:
|
||||
1. First it looks for characters with the highest priority in each word,
|
||||
which means kashida-extensions will only been used in one position in each
|
||||
word. Not more.
|
||||
2. The kashida will be connected to the character with the highest priority.
|
||||
3. If kashida connection opportunities are found with an equal level of
|
||||
priority in one word, the kashida will be placed towards the end of the
|
||||
word.
|
||||
|
||||
The priority list of characters and the positioning is as follows:
|
||||
1. after a kashida that is manually placed in the text by the user,
|
||||
2. after a Seen or Sad (initial and medial form),
|
||||
3. before the final form of Taa Marbutah, Haa, Dal,
|
||||
4. before the final form of Alef, Tah Lam, Kaf and Gaf,
|
||||
5. before the preceding medial Baa of Ra, Ya and Alef Maqsurah,
|
||||
6. before the final form of Waw, Ain, Qaf and Fa,
|
||||
7. before the final form of other characters that can be connected.
|
||||
*/
|
||||
|
||||
#define IS_JOINING_GROUP(c, g) (u_getIntPropertyValue((c), UCHAR_JOINING_GROUP) == U_JG_##g)
|
||||
#define isAinChar(c) IS_JOINING_GROUP((c), AIN)
|
||||
#define isAlefChar(c) IS_JOINING_GROUP((c), ALEF)
|
||||
#define isDalChar(c) IS_JOINING_GROUP((c), DAL)
|
||||
#define isFehChar(c) (IS_JOINING_GROUP((c), FEH) || IS_JOINING_GROUP((c), AFRICAN_FEH))
|
||||
#define isGafChar(c) IS_JOINING_GROUP((c), GAF)
|
||||
#define isHehChar(c) IS_JOINING_GROUP((c), HEH)
|
||||
#define isKafChar(c) IS_JOINING_GROUP((c), KAF)
|
||||
#define isLamChar(c) IS_JOINING_GROUP((c), LAM)
|
||||
#define isQafChar(c) (IS_JOINING_GROUP((c), QAF) || IS_JOINING_GROUP((c), AFRICAN_QAF))
|
||||
#define isRehChar(c) IS_JOINING_GROUP((c), REH)
|
||||
#define isTahChar(c) IS_JOINING_GROUP((c), TAH)
|
||||
#define isTehMarbutaChar(c) IS_JOINING_GROUP((c), TEH_MARBUTA)
|
||||
#define isWawChar(c) IS_JOINING_GROUP((c), WAW)
|
||||
#define isSeenOrSadChar(c) (IS_JOINING_GROUP((c), SAD) || IS_JOINING_GROUP((c), SEEN))
|
||||
|
||||
// Beh and characters that behave like Beh in medial form.
|
||||
bool isBehChar(sal_Unicode cCh)
|
||||
{
|
||||
bool bRet = false;
|
||||
switch (u_getIntPropertyValue(cCh, UCHAR_JOINING_GROUP))
|
||||
{
|
||||
case U_JG_BEH:
|
||||
case U_JG_NOON:
|
||||
case U_JG_AFRICAN_NOON:
|
||||
case U_JG_NYA:
|
||||
case U_JG_YEH:
|
||||
case U_JG_FARSI_YEH:
|
||||
case U_JG_BURUSHASKI_YEH_BARREE:
|
||||
bRet = true;
|
||||
break;
|
||||
default:
|
||||
bRet = false;
|
||||
break;
|
||||
}
|
||||
|
||||
return bRet;
|
||||
}
|
||||
|
||||
// Yeh and characters that behave like Yeh in final form.
|
||||
bool isYehChar(sal_Unicode cCh)
|
||||
{
|
||||
bool bRet = false;
|
||||
switch (u_getIntPropertyValue(cCh, UCHAR_JOINING_GROUP))
|
||||
{
|
||||
case U_JG_YEH:
|
||||
case U_JG_FARSI_YEH:
|
||||
case U_JG_YEH_BARREE:
|
||||
case U_JG_BURUSHASKI_YEH_BARREE:
|
||||
case U_JG_YEH_WITH_TAIL:
|
||||
bRet = true;
|
||||
break;
|
||||
default:
|
||||
bRet = false;
|
||||
break;
|
||||
}
|
||||
|
||||
return bRet;
|
||||
}
|
||||
|
||||
bool isTransparentChar(sal_Unicode cCh)
|
||||
{
|
||||
return u_getIntPropertyValue(cCh, UCHAR_JOINING_TYPE) == U_JT_TRANSPARENT;
|
||||
}
|
||||
|
||||
// Checks if cCh + cNectCh builds a ligature (used for Kashidas)
|
||||
bool isLigature(sal_Unicode cCh, sal_Unicode cNextCh)
|
||||
{
|
||||
// Lam + Alef
|
||||
return (isLamChar(cCh) && isAlefChar(cNextCh));
|
||||
}
|
||||
|
||||
// Checks if cCh is connectable to cPrevCh (used for Kashidas)
|
||||
bool CanConnectToPrev(sal_Unicode cCh, sal_Unicode cPrevCh)
|
||||
{
|
||||
const int32_t nJoiningType = u_getIntPropertyValue(cPrevCh, UCHAR_JOINING_TYPE);
|
||||
bool bRet = nJoiningType != U_JT_RIGHT_JOINING && nJoiningType != U_JT_NON_JOINING;
|
||||
|
||||
// check for ligatures cPrevChar + cChar
|
||||
if (bRet)
|
||||
bRet = !isLigature(cPrevCh, cCh);
|
||||
|
||||
return bRet;
|
||||
}
|
||||
}
|
||||
|
||||
std::optional<i18nutil::KashidaPosition> i18nutil::GetWordKashidaPosition(const OUString& rWord)
|
||||
{
|
||||
sal_Int32 nIdx = 0;
|
||||
sal_Int32 nPrevIdx = 0;
|
||||
sal_Int32 nKashidaPos = -1;
|
||||
sal_Unicode cCh = 0;
|
||||
sal_Unicode cPrevCh = 0;
|
||||
|
||||
int nPriorityLevel = 7; // 0..6 = level found, 7 not found
|
||||
|
||||
sal_Int32 nWordLen = rWord.getLength();
|
||||
|
||||
// ignore trailing vowel chars
|
||||
while (nWordLen && isTransparentChar(rWord[nWordLen - 1]))
|
||||
{
|
||||
--nWordLen;
|
||||
}
|
||||
|
||||
while (nIdx < nWordLen)
|
||||
{
|
||||
cCh = rWord[nIdx];
|
||||
|
||||
// 1. Priority:
|
||||
// after user inserted kashida
|
||||
if (0x640 == cCh)
|
||||
{
|
||||
nKashidaPos = nIdx;
|
||||
nPriorityLevel = 0;
|
||||
}
|
||||
|
||||
// 2. Priority:
|
||||
// after a Seen or Sad
|
||||
if (nPriorityLevel >= 1 && nIdx < nWordLen - 1)
|
||||
{
|
||||
if (isSeenOrSadChar(cCh)
|
||||
&& (rWord[nIdx + 1] != 0x200C)) // #i98410#: prevent ZWNJ expansion
|
||||
{
|
||||
nKashidaPos = nIdx;
|
||||
nPriorityLevel = 1;
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Priority:
|
||||
// before final form of Teh Marbuta, Heh, Dal
|
||||
if (nPriorityLevel >= 2 && nIdx > 0)
|
||||
{
|
||||
// Teh Marbuta (right joining)
|
||||
// Dal (right joining) final form may appear in the middle of word
|
||||
// Heh (dual joining) only at end of word
|
||||
if (isTehMarbutaChar(cCh) || isDalChar(cCh) || (isHehChar(cCh) && nIdx == nWordLen - 1))
|
||||
{
|
||||
SAL_WARN_IF(0 == cPrevCh, "i18n", "No previous character");
|
||||
// check if character is connectable to previous character,
|
||||
if (CanConnectToPrev(cCh, cPrevCh))
|
||||
{
|
||||
nKashidaPos = nPrevIdx;
|
||||
nPriorityLevel = 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Priority:
|
||||
// before final form of Alef, Tah, Lam, Kaf or Gaf
|
||||
if (nPriorityLevel >= 3 && nIdx > 0)
|
||||
{
|
||||
// Alef (right joining) final form may appear in the middle of word
|
||||
// Lam, Tah, Kaf (all dual joining) only at end of word
|
||||
if (isAlefChar(cCh)
|
||||
|| ((isLamChar(cCh) || isTahChar(cCh) || isKafChar(cCh) || isGafChar(cCh))
|
||||
&& nIdx == nWordLen - 1))
|
||||
{
|
||||
SAL_WARN_IF(0 == cPrevCh, "i18n", "No previous character");
|
||||
// check if character is connectable to previous character,
|
||||
if (CanConnectToPrev(cCh, cPrevCh))
|
||||
{
|
||||
nKashidaPos = nPrevIdx;
|
||||
nPriorityLevel = 3;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 5. Priority:
|
||||
// before medial Beh-like
|
||||
if (nPriorityLevel >= 4 && nIdx > 0 && nIdx < nWordLen - 1)
|
||||
{
|
||||
if (isBehChar(cCh))
|
||||
{
|
||||
// check if next character is Reh or Yeh-like
|
||||
sal_Unicode cNextCh = rWord[nIdx + 1];
|
||||
if (isRehChar(cNextCh) || isYehChar(cNextCh))
|
||||
{
|
||||
SAL_WARN_IF(0 == cPrevCh, "i18n", "No previous character");
|
||||
// check if character is connectable to previous character,
|
||||
if (CanConnectToPrev(cCh, cPrevCh))
|
||||
{
|
||||
nKashidaPos = nPrevIdx;
|
||||
nPriorityLevel = 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 6. Priority:
|
||||
// before the final form of Waw, Ain, Qaf and Feh
|
||||
if (nPriorityLevel >= 5 && nIdx > 0)
|
||||
{
|
||||
// Wav (right joining) final form may appear in the middle of word
|
||||
// Ain, Qaf, Feh (all dual joining) only at end of word
|
||||
if (isWawChar(cCh)
|
||||
|| ((isAinChar(cCh) || isQafChar(cCh) || isFehChar(cCh)) && nIdx == nWordLen - 1))
|
||||
{
|
||||
SAL_WARN_IF(0 == cPrevCh, "i18n", "No previous character");
|
||||
// check if character is connectable to previous character,
|
||||
if (CanConnectToPrev(cCh, cPrevCh))
|
||||
{
|
||||
nKashidaPos = nPrevIdx;
|
||||
nPriorityLevel = 5;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// other connecting possibilities
|
||||
if (nPriorityLevel >= 6 && nIdx > 0)
|
||||
{
|
||||
// Reh, Zain
|
||||
if (isRehChar(cCh))
|
||||
{
|
||||
SAL_WARN_IF(0 == cPrevCh, "i18n", "No previous character");
|
||||
// check if character is connectable to previous character,
|
||||
if (CanConnectToPrev(cCh, cPrevCh))
|
||||
{
|
||||
nKashidaPos = nPrevIdx;
|
||||
nPriorityLevel = 6;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Do not consider vowel marks when checking if a character
|
||||
// can be connected to previous character.
|
||||
if (!isTransparentChar(cCh))
|
||||
{
|
||||
cPrevCh = cCh;
|
||||
nPrevIdx = nIdx;
|
||||
}
|
||||
|
||||
++nIdx;
|
||||
} // end of current word
|
||||
|
||||
if (-1 != nKashidaPos)
|
||||
{
|
||||
return KashidaPosition{ nKashidaPos };
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
/* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */
|
24
include/i18nutil/kashida.hxx
Normal file
24
include/i18nutil/kashida.hxx
Normal file
|
@ -0,0 +1,24 @@
|
|||
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */
|
||||
/*
|
||||
* This file is part of the LibreOffice project.
|
||||
*
|
||||
* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
*/
|
||||
|
||||
#include <i18nutil/i18nutildllapi.h>
|
||||
#include <rtl/ustring.hxx>
|
||||
#include <optional>
|
||||
|
||||
namespace i18nutil
|
||||
{
|
||||
struct KashidaPosition
|
||||
{
|
||||
sal_Int32 nIndex;
|
||||
};
|
||||
|
||||
I18NUTIL_DLLPUBLIC std::optional<KashidaPosition> GetWordKashidaPosition(const OUString& rWord);
|
||||
}
|
||||
|
||||
/* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */
|
|
@ -79,124 +79,12 @@
|
|||
#include <unicode/ubidi.h>
|
||||
#include <i18nutil/scripttypedetector.hxx>
|
||||
#include <i18nutil/unicode.hxx>
|
||||
#include <i18nutil/kashida.hxx>
|
||||
#include <unotxdoc.hxx>
|
||||
|
||||
using namespace ::com::sun::star;
|
||||
using namespace i18n::ScriptType;
|
||||
|
||||
/*
|
||||
https://www.khtt.net/en/page/1821/the-big-kashida-secret
|
||||
|
||||
the rules of priorities that govern the addition of kashidas in Arabic text
|
||||
made ... for ... Explorer 5.5 browser.
|
||||
|
||||
The kashida justification is based on a connection priority scheme that
|
||||
decides where kashidas are put automatically.
|
||||
|
||||
This is how the software decides on kashida-inserting priorities:
|
||||
1. First it looks for characters with the highest priority in each word,
|
||||
which means kashida-extensions will only been used in one position in each
|
||||
word. Not more.
|
||||
2. The kashida will be connected to the character with the highest priority.
|
||||
3. If kashida connection opportunities are found with an equal level of
|
||||
priority in one word, the kashida will be placed towards the end of the
|
||||
word.
|
||||
|
||||
The priority list of characters and the positioning is as follows:
|
||||
1. after a kashida that is manually placed in the text by the user,
|
||||
2. after a Seen or Sad (initial and medial form),
|
||||
3. before the final form of Taa Marbutah, Haa, Dal,
|
||||
4. before the final form of Alef, Tah Lam, Kaf and Gaf,
|
||||
5. before the preceding medial Baa of Ra, Ya and Alef Maqsurah,
|
||||
6. before the final form of Waw, Ain, Qaf and Fa,
|
||||
7. before the final form of other characters that can be connected.
|
||||
*/
|
||||
|
||||
#define IS_JOINING_GROUP(c, g) ( u_getIntPropertyValue( (c), UCHAR_JOINING_GROUP ) == U_JG_##g )
|
||||
#define isAinChar(c) IS_JOINING_GROUP((c), AIN)
|
||||
#define isAlefChar(c) IS_JOINING_GROUP((c), ALEF)
|
||||
#define isDalChar(c) IS_JOINING_GROUP((c), DAL)
|
||||
#define isFehChar(c) (IS_JOINING_GROUP((c), FEH) || IS_JOINING_GROUP((c), AFRICAN_FEH))
|
||||
#define isGafChar(c) IS_JOINING_GROUP((c), GAF)
|
||||
#define isHehChar(c) IS_JOINING_GROUP((c), HEH)
|
||||
#define isKafChar(c) IS_JOINING_GROUP((c), KAF)
|
||||
#define isLamChar(c) IS_JOINING_GROUP((c), LAM)
|
||||
#define isQafChar(c) (IS_JOINING_GROUP((c), QAF) || IS_JOINING_GROUP((c), AFRICAN_QAF))
|
||||
#define isRehChar(c) IS_JOINING_GROUP((c), REH)
|
||||
#define isTahChar(c) IS_JOINING_GROUP((c), TAH)
|
||||
#define isTehMarbutaChar(c) IS_JOINING_GROUP((c), TEH_MARBUTA)
|
||||
#define isWawChar(c) IS_JOINING_GROUP((c), WAW)
|
||||
#define isSeenOrSadChar(c) (IS_JOINING_GROUP((c), SAD) || IS_JOINING_GROUP((c), SEEN))
|
||||
|
||||
// Beh and characters that behave like Beh in medial form.
|
||||
static bool isBehChar(sal_Unicode cCh)
|
||||
{
|
||||
bool bRet = false;
|
||||
switch (u_getIntPropertyValue(cCh, UCHAR_JOINING_GROUP))
|
||||
{
|
||||
case U_JG_BEH:
|
||||
case U_JG_NOON:
|
||||
case U_JG_AFRICAN_NOON:
|
||||
case U_JG_NYA:
|
||||
case U_JG_YEH:
|
||||
case U_JG_FARSI_YEH:
|
||||
case U_JG_BURUSHASKI_YEH_BARREE:
|
||||
bRet = true;
|
||||
break;
|
||||
default:
|
||||
bRet = false;
|
||||
break;
|
||||
}
|
||||
|
||||
return bRet;
|
||||
}
|
||||
|
||||
// Yeh and characters that behave like Yeh in final form.
|
||||
static bool isYehChar(sal_Unicode cCh)
|
||||
{
|
||||
bool bRet = false;
|
||||
switch (u_getIntPropertyValue(cCh, UCHAR_JOINING_GROUP))
|
||||
{
|
||||
case U_JG_YEH:
|
||||
case U_JG_FARSI_YEH:
|
||||
case U_JG_YEH_BARREE:
|
||||
case U_JG_BURUSHASKI_YEH_BARREE:
|
||||
case U_JG_YEH_WITH_TAIL:
|
||||
bRet = true;
|
||||
break;
|
||||
default:
|
||||
bRet = false;
|
||||
break;
|
||||
}
|
||||
|
||||
return bRet;
|
||||
}
|
||||
|
||||
static bool isTransparentChar ( sal_Unicode cCh )
|
||||
{
|
||||
return u_getIntPropertyValue( cCh, UCHAR_JOINING_TYPE ) == U_JT_TRANSPARENT;
|
||||
}
|
||||
|
||||
// Checks if cCh + cNectCh builds a ligature (used for Kashidas)
|
||||
static bool lcl_IsLigature( sal_Unicode cCh, sal_Unicode cNextCh )
|
||||
{
|
||||
// Lam + Alef
|
||||
return ( isLamChar ( cCh ) && isAlefChar ( cNextCh ));
|
||||
}
|
||||
|
||||
// Checks if cCh is connectable to cPrevCh (used for Kashidas)
|
||||
static bool lcl_ConnectToPrev( sal_Unicode cCh, sal_Unicode cPrevCh )
|
||||
{
|
||||
const int32_t nJoiningType = u_getIntPropertyValue( cPrevCh, UCHAR_JOINING_TYPE );
|
||||
bool bRet = nJoiningType != U_JT_RIGHT_JOINING && nJoiningType != U_JT_NON_JOINING;
|
||||
|
||||
// check for ligatures cPrevChar + cChar
|
||||
if( bRet )
|
||||
bRet = !lcl_IsLigature( cPrevCh, cCh );
|
||||
|
||||
return bRet;
|
||||
}
|
||||
|
||||
static bool lcl_HasStrongLTR ( std::u16string_view rText, sal_Int32 nStart, sal_Int32 nEnd )
|
||||
{
|
||||
for( sal_Int32 nCharIdx = nStart; nCharIdx < nEnd; ++nCharIdx )
|
||||
|
@ -1618,157 +1506,16 @@ void SwScriptInfo::InitScriptInfo(const SwTextNode& rNode,
|
|||
while ( aScanner.NextWord() )
|
||||
{
|
||||
const OUString& rWord = aScanner.GetWord();
|
||||
auto stKashidaPos = i18nutil::GetWordKashidaPosition(rWord);
|
||||
|
||||
sal_Int32 nIdx = 0, nPrevIdx = 0;
|
||||
sal_Int32 nKashidaPos = -1;
|
||||
sal_Unicode cCh, cPrevCh = 0;
|
||||
|
||||
int nPriorityLevel = 7; // 0..6 = level found
|
||||
// 7 not found
|
||||
|
||||
sal_Int32 nWordLen = rWord.getLength();
|
||||
|
||||
// ignore trailing vowel chars
|
||||
while( nWordLen && isTransparentChar( rWord[ nWordLen - 1 ] ))
|
||||
--nWordLen;
|
||||
|
||||
while (nIdx < nWordLen)
|
||||
if (stKashidaPos.has_value())
|
||||
{
|
||||
cCh = rWord[ nIdx ];
|
||||
|
||||
// 1. Priority:
|
||||
// after user inserted kashida
|
||||
if ( 0x640 == cCh )
|
||||
{
|
||||
nKashidaPos = aScanner.GetBegin() + nIdx;
|
||||
nPriorityLevel = 0;
|
||||
// Only populate kashida positions for the invalidated tail
|
||||
TextFrameIndex nNewKashidaPos{aScanner.GetBegin() + stKashidaPos->nIndex};
|
||||
if(nNewKashidaPos >= nLastKashida) {
|
||||
m_Kashida.insert(m_Kashida.begin() + nCntKash, nNewKashidaPos);
|
||||
nCntKash++;
|
||||
}
|
||||
|
||||
// 2. Priority:
|
||||
// after a Seen or Sad
|
||||
if (nPriorityLevel >= 1 && nIdx < nWordLen - 1)
|
||||
{
|
||||
if( isSeenOrSadChar( cCh )
|
||||
&& (rWord[ nIdx+1 ] != 0x200C) ) // #i98410#: prevent ZWNJ expansion
|
||||
{
|
||||
nKashidaPos = aScanner.GetBegin() + nIdx;
|
||||
nPriorityLevel = 1;
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Priority:
|
||||
// before final form of Teh Marbuta, Heh, Dal
|
||||
if ( nPriorityLevel >= 2 && nIdx > 0 )
|
||||
{
|
||||
if ( isTehMarbutaChar ( cCh ) || // Teh Marbuta (right joining)
|
||||
isDalChar ( cCh ) || // Dal (right joining) final form may appear in the middle of word
|
||||
( isHehChar ( cCh ) && nIdx == nWordLen - 1)) // Heh (dual joining) only at end of word
|
||||
{
|
||||
|
||||
SAL_WARN_IF( 0 == cPrevCh, "sw.core", "No previous character" );
|
||||
// check if character is connectable to previous character,
|
||||
if ( lcl_ConnectToPrev( cCh, cPrevCh ) )
|
||||
{
|
||||
nKashidaPos = aScanner.GetBegin() + nPrevIdx;
|
||||
nPriorityLevel = 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Priority:
|
||||
// before final form of Alef, Tah, Lam, Kaf or Gaf
|
||||
if ( nPriorityLevel >= 3 && nIdx > 0 )
|
||||
{
|
||||
if ( isAlefChar ( cCh ) || // Alef (right joining) final form may appear in the middle of word
|
||||
(( isLamChar ( cCh ) || // Lam,
|
||||
isTahChar ( cCh ) || // Tah,
|
||||
isKafChar ( cCh ) || // Kaf (all dual joining)
|
||||
isGafChar ( cCh ) )
|
||||
&& nIdx == nWordLen - 1)) // only at end of word
|
||||
{
|
||||
SAL_WARN_IF( 0 == cPrevCh, "sw.core", "No previous character" );
|
||||
// check if character is connectable to previous character,
|
||||
if ( lcl_ConnectToPrev( cCh, cPrevCh ) )
|
||||
{
|
||||
nKashidaPos = aScanner.GetBegin() + nPrevIdx;
|
||||
nPriorityLevel = 3;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 5. Priority:
|
||||
// before medial Beh-like
|
||||
if ( nPriorityLevel >= 4 && nIdx > 0 && nIdx < nWordLen - 1 )
|
||||
{
|
||||
if ( isBehChar ( cCh ) )
|
||||
{
|
||||
// check if next character is Reh or Yeh-like
|
||||
sal_Unicode cNextCh = rWord[ nIdx + 1 ];
|
||||
if ( isRehChar ( cNextCh ) || isYehChar ( cNextCh ))
|
||||
{
|
||||
SAL_WARN_IF( 0 == cPrevCh, "sw.core", "No previous character" );
|
||||
// check if character is connectable to previous character,
|
||||
if ( lcl_ConnectToPrev( cCh, cPrevCh ) )
|
||||
{
|
||||
nKashidaPos = aScanner.GetBegin() + nPrevIdx;
|
||||
nPriorityLevel = 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 6. Priority:
|
||||
// before the final form of Waw, Ain, Qaf and Feh
|
||||
if ( nPriorityLevel >= 5 && nIdx > 0 )
|
||||
{
|
||||
if ( isWawChar ( cCh ) || // Wav (right joining)
|
||||
// final form may appear in the middle of word
|
||||
(( isAinChar ( cCh ) || // Ain (dual joining)
|
||||
isQafChar ( cCh ) || // Qaf (dual joining)
|
||||
isFehChar ( cCh ) ) // Feh (dual joining)
|
||||
&& nIdx == nWordLen - 1)) // only at end of word
|
||||
{
|
||||
SAL_WARN_IF( 0 == cPrevCh, "sw.core", "No previous character" );
|
||||
// check if character is connectable to previous character,
|
||||
if ( lcl_ConnectToPrev( cCh, cPrevCh ) )
|
||||
{
|
||||
nKashidaPos = aScanner.GetBegin() + nPrevIdx;
|
||||
nPriorityLevel = 5;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// other connecting possibilities
|
||||
if ( nPriorityLevel >= 6 && nIdx > 0 )
|
||||
{
|
||||
// Reh, Zain
|
||||
if ( isRehChar ( cCh ) )
|
||||
{
|
||||
SAL_WARN_IF( 0 == cPrevCh, "sw.core", "No previous character" );
|
||||
// check if character is connectable to previous character,
|
||||
if ( lcl_ConnectToPrev( cCh, cPrevCh ) )
|
||||
{
|
||||
nKashidaPos = aScanner.GetBegin() + nPrevIdx;
|
||||
nPriorityLevel = 6;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Do not consider vowel marks when checking if a character
|
||||
// can be connected to previous character.
|
||||
if ( !isTransparentChar ( cCh) )
|
||||
{
|
||||
cPrevCh = cCh;
|
||||
nPrevIdx = nIdx;
|
||||
}
|
||||
|
||||
++nIdx;
|
||||
} // end of current word
|
||||
|
||||
if ( -1 != nKashidaPos )
|
||||
{
|
||||
m_Kashida.insert(m_Kashida.begin() + nCntKash, TextFrameIndex(nKashidaPos));
|
||||
nCntKash++;
|
||||
}
|
||||
} // end of kashida search
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue