tdf#164106 Fix reordered glyph positioning with split grapheme clusters

Due to formatting, grapheme clusters can possibly be split across
multiple layouts. Layouts containing split grapheme clusters are created
by laying out the complete string, and extracting only the necessary
glyphs based on source codepoint index.

This approach is good enough for most diacritic cases, but it cannot
handle certain substitution cases where glyphs with advances would be
interleaved with other layouts. Sub-layouts must be contiguous.

This change introduces code to disable grapheme cluster splitting in
these cases that cannot be handled correctly.

Change-Id: I122abbf9c3f8a5efa4c72ad47991d0ad9ff8a8c0
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/177927
Tested-by: Jenkins
Reviewed-by: Jonathan Clark <jonathan@libreoffice.org>
This commit is contained in:
Jonathan Clark 2024-12-05 20:49:03 -07:00
parent 00df9cec57
commit 9b2eaa37ba
3 changed files with 262 additions and 4 deletions

View file

@ -0,0 +1,133 @@
<?xml version='1.0' encoding='UTF-8'?>
<office:document xmlns:css3t="http://www.w3.org/TR/css3-text/" xmlns:grddl="http://www.w3.org/2003/g/data-view#" xmlns:xhtml="http://www.w3.org/1999/xhtml" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xforms="http://www.w3.org/2002/xforms" xmlns:dom="http://www.w3.org/2001/xml-events" xmlns:script="urn:oasis:names:tc:opendocument:xmlns:script:1.0" xmlns:form="urn:oasis:names:tc:opendocument:xmlns:form:1.0" xmlns:math="http://www.w3.org/1998/Math/MathML" xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0" xmlns:ooo="http://openoffice.org/2004/office" xmlns:fo="urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0" xmlns:config="urn:oasis:names:tc:opendocument:xmlns:config:1.0" xmlns:ooow="http://openoffice.org/2004/writer" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:drawooo="http://openoffice.org/2010/draw" xmlns:oooc="http://openoffice.org/2004/calc" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:calcext="urn:org:documentfoundation:names:experimental:calc:xmlns:calcext:1.0" xmlns:style="urn:oasis:names:tc:opendocument:xmlns:style:1.0" xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0" xmlns:of="urn:oasis:names:tc:opendocument:xmlns:of:1.2" xmlns:tableooo="http://openoffice.org/2009/table" xmlns:draw="urn:oasis:names:tc:opendocument:xmlns:drawing:1.0" xmlns:dr3d="urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0" xmlns:rpt="http://openoffice.org/2005/report" xmlns:formx="urn:openoffice:names:experimental:ooxml-odf-interop:xmlns:form:1.0" xmlns:svg="urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0" xmlns:chart="urn:oasis:names:tc:opendocument:xmlns:chart:1.0" xmlns:officeooo="http://openoffice.org/2009/office" xmlns:table="urn:oasis:names:tc:opendocument:xmlns:table:1.0" xmlns:field="urn:openoffice:names:experimental:ooo-ms-interop:xmlns:field:1.0" xmlns:number="urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0" xmlns:meta="urn:oasis:names:tc:opendocument:xmlns:meta:1.0" xmlns:loext="urn:org:documentfoundation:names:experimental:office:xmlns:loext:1.0" office:version="1.4" office:mimetype="application/vnd.oasis.opendocument.text">
<office:meta><meta:creation-date>2024-11-30T17:13:03</meta:creation-date><meta:initial-creator>your servant</meta:initial-creator><dc:language>en-US</dc:language><dc:date>2024-12-05T06:45:52.650400638</dc:date><meta:editing-cycles>6</meta:editing-cycles><meta:editing-duration>PT4M37S</meta:editing-duration><meta:generator>LibreOfficeDev/25.8.0.0.alpha0$Linux_X86_64 LibreOffice_project/5f4d5a012865d717040012eb0f698a725b82d4cc</meta:generator><meta:document-statistic meta:table-count="0" meta:image-count="0" meta:object-count="0" meta:page-count="1" meta:paragraph-count="2" meta:word-count="2" meta:character-count="16" meta:non-whitespace-character-count="16"/><meta:user-defined meta:name="AppVersion">15.0000</meta:user-defined><meta:template xlink:type="simple" xlink:actuate="onRequest" xlink:title="Normal" xlink:href=""/></office:meta>
<office:font-face-decls>
<style:font-face style:name="Arial" svg:font-family="Arial" style:font-family-generic="swiss" style:font-pitch="variable"/>
<style:font-face style:name="NSimSun" svg:font-family="NSimSun" style:font-family-generic="system" style:font-pitch="variable"/>
<style:font-face style:name="Noto Sans" svg:font-family="'Noto Sans'" style:font-family-generic="roman" style:font-pitch="variable"/>
<style:font-face style:name="Tahoma1" svg:font-family="Tahoma" style:font-family-generic="system" style:font-pitch="variable"/>
<style:font-face style:name="Times New Roman" svg:font-family="'Times New Roman'" style:font-family-generic="roman" style:font-pitch="variable"/>
<style:font-face style:name="Times New Roman1" svg:font-family="'Times New Roman'" style:font-family-generic="system" style:font-pitch="variable"/>
</office:font-face-decls>
<office:styles>
<style:default-style style:family="graphic">
<style:graphic-properties svg:stroke-color="#3465a4" draw:fill-color="#729fcf" fo:wrap-option="no-wrap" draw:shadow-offset-x="0.1181in" draw:shadow-offset-y="0.1181in" draw:start-line-spacing-horizontal="0.1114in" draw:start-line-spacing-vertical="0.1114in" draw:end-line-spacing-horizontal="0.1114in" draw:end-line-spacing-vertical="0.1114in" style:writing-mode="lr-tb" style:flow-with-text="false"/>
<style:paragraph-properties style:text-autospace="ideograph-alpha" style:line-break="strict" loext:tab-stop-distance="0in" style:font-independent-line-spacing="false">
<style:tab-stops/>
</style:paragraph-properties>
<style:text-properties style:use-window-font-color="true" loext:opacity="0%" style:font-name="Arial" fo:font-size="12pt" fo:language="en" fo:country="US" style:letter-kerning="true" style:font-name-asian="NSimSun" style:font-size-asian="12pt" style:language-asian="zh" style:country-asian="CN" style:font-name-complex="Tahoma1" style:font-size-complex="12pt" style:language-complex="hi" style:country-complex="IN"/>
</style:default-style>
<style:default-style style:family="paragraph">
<style:paragraph-properties fo:hyphenation-ladder-count="no-limit" fo:hyphenation-keep="auto" loext:hyphenation-keep-type="column" style:text-autospace="ideograph-alpha" style:punctuation-wrap="hanging" style:line-break="strict" style:tab-stop-distance="0.4925in" style:writing-mode="lr-tb"/>
<style:text-properties style:use-window-font-color="true" loext:opacity="0%" style:font-name="Arial" fo:font-size="12pt" fo:language="en" fo:country="US" style:letter-kerning="true" style:font-name-asian="NSimSun" style:font-size-asian="12pt" style:language-asian="zh" style:country-asian="CN" style:font-name-complex="Tahoma1" style:font-size-complex="12pt" style:language-complex="hi" style:country-complex="IN" fo:hyphenate="false" fo:hyphenation-remain-char-count="2" fo:hyphenation-push-char-count="2" loext:hyphenation-no-caps="false" loext:hyphenation-no-last-word="false" loext:hyphenation-word-char-count="5" loext:hyphenation-zone="no-limit"/>
</style:default-style>
<style:default-style style:family="table">
<style:table-properties table:border-model="collapsing"/>
</style:default-style>
<style:default-style style:family="table-row">
<style:table-row-properties fo:keep-together="auto"/>
</style:default-style>
<style:style style:name="LO-normal" style:family="paragraph">
<style:paragraph-properties fo:margin-top="0in" fo:margin-bottom="0in" style:contextual-spacing="false" fo:text-align="start" style:justify-single-word="false" fo:orphans="2" fo:widows="2" fo:hyphenation-ladder-count="no-limit" fo:hyphenation-keep="auto" loext:hyphenation-keep-type="column" style:writing-mode="lr-tb"/>
<style:text-properties style:use-window-font-color="true" loext:opacity="0%" style:font-name="Times New Roman" fo:font-family="'Times New Roman'" style:font-family-generic="roman" style:font-pitch="variable" fo:font-size="12pt" fo:language="fr" fo:country="CA" style:letter-kerning="false" style:font-name-asian="Times New Roman1" style:font-family-asian="'Times New Roman'" style:font-family-generic-asian="system" style:font-pitch-asian="variable" style:font-size-asian="12pt" style:language-asian="zh" style:country-asian="CN" style:font-name-complex="Times New Roman1" style:font-family-complex="'Times New Roman'" style:font-family-generic-complex="system" style:font-pitch-complex="variable" style:font-size-complex="12pt" style:language-complex="hi" style:country-complex="IN" fo:hyphenate="false" fo:hyphenation-remain-char-count="2" fo:hyphenation-push-char-count="2" loext:hyphenation-no-caps="false" loext:hyphenation-no-last-word="false" loext:hyphenation-word-char-count="5" loext:hyphenation-zone="no-limit"/>
</style:style>
<text:outline-style style:name="Outline">
<text:outline-level-style text:level="1" loext:num-list-format="%1%" style:num-format="">
<style:list-level-properties text:list-level-position-and-space-mode="label-alignment">
<style:list-level-label-alignment text:label-followed-by="listtab"/>
</style:list-level-properties>
</text:outline-level-style>
<text:outline-level-style text:level="2" loext:num-list-format="%2%" style:num-format="">
<style:list-level-properties text:list-level-position-and-space-mode="label-alignment">
<style:list-level-label-alignment text:label-followed-by="listtab"/>
</style:list-level-properties>
</text:outline-level-style>
<text:outline-level-style text:level="3" loext:num-list-format="%3%" style:num-format="">
<style:list-level-properties text:list-level-position-and-space-mode="label-alignment">
<style:list-level-label-alignment text:label-followed-by="listtab"/>
</style:list-level-properties>
</text:outline-level-style>
<text:outline-level-style text:level="4" loext:num-list-format="%4%" style:num-format="">
<style:list-level-properties text:list-level-position-and-space-mode="label-alignment">
<style:list-level-label-alignment text:label-followed-by="listtab"/>
</style:list-level-properties>
</text:outline-level-style>
<text:outline-level-style text:level="5" loext:num-list-format="%5%" style:num-format="">
<style:list-level-properties text:list-level-position-and-space-mode="label-alignment">
<style:list-level-label-alignment text:label-followed-by="listtab"/>
</style:list-level-properties>
</text:outline-level-style>
<text:outline-level-style text:level="6" loext:num-list-format="%6%" style:num-format="">
<style:list-level-properties text:list-level-position-and-space-mode="label-alignment">
<style:list-level-label-alignment text:label-followed-by="listtab"/>
</style:list-level-properties>
</text:outline-level-style>
<text:outline-level-style text:level="7" loext:num-list-format="%7%" style:num-format="">
<style:list-level-properties text:list-level-position-and-space-mode="label-alignment">
<style:list-level-label-alignment text:label-followed-by="listtab"/>
</style:list-level-properties>
</text:outline-level-style>
<text:outline-level-style text:level="8" loext:num-list-format="%8%" style:num-format="">
<style:list-level-properties text:list-level-position-and-space-mode="label-alignment">
<style:list-level-label-alignment text:label-followed-by="listtab"/>
</style:list-level-properties>
</text:outline-level-style>
<text:outline-level-style text:level="9" loext:num-list-format="%9%" style:num-format="">
<style:list-level-properties text:list-level-position-and-space-mode="label-alignment">
<style:list-level-label-alignment text:label-followed-by="listtab"/>
</style:list-level-properties>
</text:outline-level-style>
<text:outline-level-style text:level="10" loext:num-list-format="%10%" style:num-format="">
<style:list-level-properties text:list-level-position-and-space-mode="label-alignment">
<style:list-level-label-alignment text:label-followed-by="listtab"/>
</style:list-level-properties>
</text:outline-level-style>
</text:outline-style>
<text:notes-configuration text:note-class="footnote" style:num-format="1" text:start-value="0" text:footnotes-position="page" text:start-numbering-at="document"/>
<text:notes-configuration text:note-class="endnote" style:num-format="i" text:start-value="0"/>
<text:linenumbering-configuration text:number-lines="false" text:offset="0.1965in" style:num-format="1" text:number-position="left" text:increment="5"/>
<style:default-page-layout>
<style:page-layout-properties style:writing-mode="lr-tb" style:layout-grid-standard-mode="true"/>
</style:default-page-layout>
</office:styles>
<office:automatic-styles>
<style:style style:name="P1" style:family="paragraph" style:parent-style-name="LO-normal">
<style:text-properties style:font-name-complex="Noto Sans"/>
</style:style>
<style:style style:name="P2" style:family="paragraph" style:parent-style-name="LO-normal">
<style:text-properties fo:color="#0000ff" loext:opacity="100%" style:font-name-complex="Noto Sans"/>
</style:style>
<style:style style:name="T1" style:family="text">
<style:text-properties fo:color="#00ff00" loext:opacity="100%"/>
</style:style>
<style:style style:name="T2" style:family="text">
<style:text-properties fo:color="#0000ff" loext:opacity="100%"/>
</style:style>
<style:page-layout style:name="pm1">
<style:page-layout-properties fo:page-width="8.5in" fo:page-height="11in" style:num-format="1" style:print-orientation="portrait" fo:margin-top="0.7874in" fo:margin-bottom="0.7874in" fo:margin-left="0.7874in" fo:margin-right="0.7874in" style:writing-mode="lr-tb" style:layout-grid-color="#c0c0c0" style:layout-grid-lines="136" style:layout-grid-base-height="0.0693in" style:layout-grid-ruby-height="0in" style:layout-grid-mode="none" style:layout-grid-ruby-below="false" style:layout-grid-print="false" style:layout-grid-display="false" style:layout-grid-base-width="0.1665in" style:layout-grid-snap-to="true" style:footnote-max-height="0in" loext:margin-gutter="0in">
<style:footnote-sep style:width="0.0071in" style:distance-before-sep="0.0398in" style:distance-after-sep="0.0398in" style:line-style="solid" style:adjustment="left" style:rel-width="25%" style:color="#000000"/>
</style:page-layout-properties>
<style:header-style/>
<style:footer-style/>
</style:page-layout>
<style:style style:name="dp1" style:family="drawing-page">
<style:drawing-page-properties draw:background-size="full"/>
</style:style>
</office:automatic-styles>
<office:master-styles>
<style:master-page style:name="Standard" style:page-layout-name="pm1" draw:style-name="dp1"/>
</office:master-styles>
<office:body>
<office:text>
<text:sequence-decls>
<text:sequence-decl text:display-outline-level="0" text:name="Illustration"/>
<text:sequence-decl text:display-outline-level="0" text:name="Table"/>
<text:sequence-decl text:display-outline-level="0" text:name="Text"/>
<text:sequence-decl text:display-outline-level="0" text:name="Drawing"/>
<text:sequence-decl text:display-outline-level="0" text:name="Figure"/>
</text:sequence-decls>
<text:p text:style-name="P1"><text:span text:style-name="T1">वीथीर्</text:span><text:span text:style-name="T2">भजनमार्गान्</text:span></text:p>
<text:p text:style-name="P2">वीथीर्भजनमार्गान्</text:p>
</office:text>
</office:body>
</office:document>

View file

@ -6077,6 +6077,55 @@ CPPUNIT_TEST_FIXTURE(PdfExportTest2, testTdf162750SmallCapsLigature)
CPPUNIT_ASSERT_EQUAL(u"FI"_ustr, aText.at(2).trim());
}
CPPUNIT_TEST_FIXTURE(PdfExportTest2, testTdf164106SplitReorderedClusters)
{
saveAsPDF(u"tdf164106.fodt");
auto pPdfDocument = parsePDFExport();
CPPUNIT_ASSERT_EQUAL(1, pPdfDocument->getPageCount());
auto pPdfPage = pPdfDocument->openPage(/*nIndex*/ 0);
CPPUNIT_ASSERT(pPdfPage);
auto pTextPage = pPdfPage->getTextPage();
CPPUNIT_ASSERT(pTextPage);
int nPageObjectCount = pPdfPage->getObjectCount();
CPPUNIT_ASSERT_EQUAL(14, nPageObjectCount);
std::vector<OUString> aText;
std::vector<basegfx::B2DRectangle> aRect;
for (int i = 0; i < nPageObjectCount; ++i)
{
auto pPageObject = pPdfPage->getObject(i);
CPPUNIT_ASSERT_MESSAGE("no object", pPageObject != nullptr);
if (pPageObject->getType() == vcl::pdf::PDFPageObjectType::Text)
{
aText.push_back(pPageObject->getText(pTextPage));
aRect.push_back(pPageObject->getBounds());
}
}
CPPUNIT_ASSERT_EQUAL(size_t(14), aText.size());
auto fnCompareIndices = [&](size_t nSplit, size_t nCombined) {
CPPUNIT_ASSERT_EQUAL(aText.at(nSplit).trim(), aText.at(nCombined).trim());
CPPUNIT_ASSERT_DOUBLES_EQUAL(aRect.at(nSplit).getMinX(), aRect.at(nCombined).getMinX(),
/*delta*/ 0.2);
CPPUNIT_ASSERT_DOUBLES_EQUAL(aRect.at(nSplit).getMaxX(), aRect.at(nCombined).getMaxX(),
/*delta*/ 0.2);
};
fnCompareIndices(0, 7);
fnCompareIndices(1, 8);
fnCompareIndices(2, 9);
fnCompareIndices(3, 10);
fnCompareIndices(4, 11);
fnCompareIndices(5, 12);
fnCompareIndices(6, 13);
}
} // end anonymous namespace
CPPUNIT_PLUGIN_IMPLEMENT();

View file

@ -151,6 +151,14 @@ public:
return nClusterId;
}
void Reset()
{
for (auto& rElement : m_aGlyphs)
{
rElement.second.m_bUsed = false;
}
}
void ShapeSubRun(const sal_Unicode* pStr, const int nLength, const SubRun& aSubRun,
hb_font_t* pHbFont, const std::vector<hb_feature_t>& maFeatures,
hb_language_t oHbLanguage)
@ -598,6 +606,73 @@ bool GenericSalLayout::LayoutText(vcl::text::ImplLayoutArgs& rArgs, const SalLay
hb_glyph_info_t *pHbGlyphInfos = hb_buffer_get_glyph_infos(pHbBuffer, nullptr);
hb_glyph_position_t *pHbPositions = hb_buffer_get_glyph_positions(pHbBuffer, nullptr);
// tdf#164106: Grapheme clusters can be split across multiple layouts. To do this,
// the complete string is laid out, and only the necessary glyphs are extracted.
// These sub-layouts are positioned side-by-side to form the complete text.
// This approach is good enough for most diacritic cases, but it cannot handle cases
// where a glyph with an advance is reordered into a different sub-layout.
bool bStartClusterOutOfOrder = false;
bool bEndClusterOutOfOrder = false;
{
double nNormalAdvance = 0.0;
double nStartAdvance = 0.0;
double nEndAdvance = 0.0;
auto fnHandleGlyph = [&](int i)
{
int32_t nGlyphIndex = pHbGlyphInfos[i].codepoint;
int32_t nCluster = pHbGlyphInfos[i].cluster;
auto nOrigCharPos = stClusterMapper.RemapGlyph(nCluster, nGlyphIndex);
double nAdvance = 0.0;
if (aSubRun.maDirection == HB_DIRECTION_TTB)
{
nAdvance = -pHbPositions[i].y_advance;
}
else
{
nAdvance = pHbPositions[i].x_advance;
}
nNormalAdvance += nAdvance;
if (nOrigCharPos < rArgs.mnDrawMinCharPos)
{
nStartAdvance += nAdvance;
if (nStartAdvance != nNormalAdvance)
{
bStartClusterOutOfOrder = true;
}
}
if (nOrigCharPos < rArgs.mnDrawEndCharPos)
{
nEndAdvance += nAdvance;
if (nEndAdvance != nNormalAdvance)
{
bEndClusterOutOfOrder = true;
}
}
};
if (bRightToLeft)
{
for (int i = nRunGlyphCount - 1; i >= 0; --i)
{
fnHandleGlyph(i);
}
}
else
{
for (int i = 0; i < nRunGlyphCount; ++i)
{
fnHandleGlyph(i);
}
}
stClusterMapper.Reset();
}
for (int i = 0; i < nRunGlyphCount; ++i) {
int32_t nGlyphIndex = pHbGlyphInfos[i].codepoint;
int32_t nCharPos = pHbGlyphInfos[i].cluster;
@ -740,14 +815,15 @@ bool GenericSalLayout::LayoutText(vcl::text::ImplLayoutArgs& rArgs, const SalLay
const GlyphItem aGI(nCharPos, nCharCount, nGlyphIndex, aNewPos, nGlyphFlags,
nAdvance, nXOffset, nYOffset, nOrigCharPos);
if (aGI.origCharPos() >= rArgs.mnDrawMinCharPos
&& aGI.origCharPos() < rArgs.mnDrawEndCharPos)
auto nLowerBound = (bStartClusterOutOfOrder ? aGI.charPos() : aGI.origCharPos());
auto nUpperBound = (bEndClusterOutOfOrder ? aGI.charPos() : aGI.origCharPos());
if (nLowerBound >= rArgs.mnDrawMinCharPos && nUpperBound < rArgs.mnDrawEndCharPos)
{
m_GlyphItems.push_back(aGI);
}
if (aGI.origCharPos() >= rArgs.mnDrawOriginCluster
&& aGI.origCharPos() < rArgs.mnDrawEndCharPos)
if (nLowerBound >= rArgs.mnDrawOriginCluster
&& nUpperBound < rArgs.mnDrawEndCharPos)
{
nCurrX += nAdvance;
}