tdf#163873 sw inline heading: use HTML text, not lo-res bitmap export

Text frames formatted with Inline Heading style and anchored as
characters were converted to unacceptably low resolution images in
the HTML export. Now the HTML export contains normal h1–h6 elements
with display:inline; CSS setting to get text-based/searchable inline
headings, fixing also the rendering quality.

Follow-up to commit 7a35f3dc74
"tdf#48459 sw inline heading: apply it on the selected words",
commit d87cf67f8f
"tdf#131728 sw inline heading: fix missing/broken DOCX export",
commit a1dcbd1d1c
"tdf48459 sw inline heading: add Inline Heading frame style"
and commit 984f0e49d3
"tdf#163874 sw inline heading: fix XHTML export".

Change-Id: I02f7af8a39314e78d670b4db9a76897e931b3a47
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/176512
Tested-by: Jenkins
Reviewed-by: László Németh <nemeth@numbertext.org>
This commit is contained in:
László Németh 2024-11-12 23:07:16 +01:00
parent b51386b77c
commit bcee366d82
6 changed files with 60 additions and 6 deletions

Binary file not shown.

View file

@ -3374,6 +3374,24 @@ CPPUNIT_TEST_FIXTURE(SwHtmlDomExportTest, testHTML_162426)
assertXPath(pDoc, "/html/body/p/img", "border", u"0");
}
CPPUNIT_TEST_FIXTURE(SwHtmlDomExportTest, testHTML_163873)
{
// Given a document with an image with style:wrap="none":
createSwDoc("tdf131728.docx");
// Before the fix, an assertion failed in HtmlWriter::attribute when exporting to HTML :
ExportToHTML();
xmlDocUniquePtr pDoc = parseXml(maTempFile);
CPPUNIT_ASSERT(pDoc);
// Before the fix, inline headings weren't inline
assertXPath(pDoc, "/html/body/p[5]/span/h2", "style", u"display:inline;");
assertXPath(pDoc, "/html/body/p[6]/span/h2", "style", u"display:inline;");
assertXPath(pDoc, "/html/body/p[7]/span/h2", "style", u"display:inline;");
assertXPath(pDoc, "/html/body/p[11]/span/h2", "style", u"display:inline;");
assertXPath(pDoc, "/html/body/p[14]/span/h2", "style", u"display:inline;");
}
} // end of anonymous namespace
CPPUNIT_PLUGIN_IMPLEMENT();

View file

@ -436,6 +436,7 @@ static void OutHTML_SwFormat( SwHTMLWriter& rWrt, const SwFormat& rFormat,
rInfo.bInNumberBulletList = false; // Are we in a list?
bool bNumbered = false; // The current paragraph is numbered
bool bPara = false; // the current token is <P>
bool bHeading = false; // the current token is <H1> .. <H6>
rInfo.bParaPossible = false; // a <P> may be additionally output
bool bNoEndTag = false; // don't output an end tag
@ -554,6 +555,15 @@ static void OutHTML_SwFormat( SwHTMLWriter& rWrt, const SwFormat& rFormat,
rWrt.m_bNoAlign = true;
bForceDL = true;
}
else if (rInfo.aToken == OOO_STRING_SVTOOLS_HTML_head1 ||
rInfo.aToken == OOO_STRING_SVTOOLS_HTML_head2 ||
rInfo.aToken == OOO_STRING_SVTOOLS_HTML_head3 ||
rInfo.aToken == OOO_STRING_SVTOOLS_HTML_head4 ||
rInfo.aToken == OOO_STRING_SVTOOLS_HTML_head5 ||
rInfo.aToken == OOO_STRING_SVTOOLS_HTML_head6)
{
bHeading = true;
}
}
else
{
@ -925,6 +935,12 @@ static void OutHTML_SwFormat( SwHTMLWriter& rWrt, const SwFormat& rFormat,
HTMLOutFuncs::Out_String( rWrt.Strm(), aClass );
sOut += "\"";
}
// set inline heading (heading in a text frame anchored as character and
// formatted with frame style "Inline Heading")
if( bHeading && rWrt.IsInlineHeading() )
sOut += " " OOO_STRING_SVTOOLS_HTML_O_style "=\"display:inline;\"";
rWrt.Strm().WriteOString( sOut );
sOut = ""_ostr;

View file

@ -60,7 +60,8 @@ enum class HtmlOut {
Marquee,
GraphicFrame,
OleGraphic,
Span
Span,
InlineHeading
};
enum class HtmlPosition {

View file

@ -43,6 +43,7 @@
#include <osl/diagnose.h>
#include <svx/svdograf.hxx>
#include <comphelper/xmlencode.hxx>
#include <poolfmt.hxx>
#include <fmtanchr.hxx>
#include <fmtornt.hxx>
@ -289,11 +290,13 @@ SwHTMLFrameType SwHTMLWriter::GuessFrameType( const SwFrameFormat& rFrameFormat,
void SwHTMLWriter::CollectFlyFrames()
{
SwPosFlyFrames aFlyPos(
m_pDoc->GetAllFlyFormats(m_bWriteAll ? nullptr : m_pCurrentPam.get(), true));
m_pDoc->GetAllFlyFormats(m_bWriteAll ? nullptr : m_pCurrentPam.get(),
/*bDrawAlso=*/true, /*bAsCharAlso=*/true));
for(const SwPosFlyFrame& rItem : aFlyPos)
{
const SwFrameFormat& rFrameFormat = rItem.GetFormat();
const SwFormat* pParent = rFrameFormat.DerivedFrom();
const SdrObject *pSdrObj = nullptr;
const SwNode *pAnchorNode;
const SwContentNode *pACNd;
@ -308,7 +311,6 @@ void SwHTMLWriter::CollectFlyFrames()
case RndStdIds::FLY_AT_FLY:
nMode = getHTMLOutFramePageFlyTable(eType, m_nExportMode);
break;
case RndStdIds::FLY_AT_PARA:
// frames that are anchored to a paragraph are only placed
// before the paragraph, if the paragraph has a
@ -329,12 +331,18 @@ void SwHTMLWriter::CollectFlyFrames()
}
nMode = getHTMLOutFrameParaPrtAreaTable(eType, m_nExportMode);
break;
case RndStdIds::FLY_AS_CHAR:
// keep only Inline Heading frames from the frames anchored as characters
if ( !(pParent && pParent->GetPoolFormatId() == RES_POOLFRM_INLINE_HEADING) )
continue;
[[fallthrough]];
case RndStdIds::FLY_AT_CHAR:
if( text::RelOrientation::FRAME == eHoriRel || text::RelOrientation::PRINT_AREA == eHoriRel )
nMode = getHTMLOutFrameParaPrtAreaTable(eType, m_nExportMode);
else
nMode = getHTMLOutFrameParaOtherTable(eType, m_nExportMode);
if ( rAnchor.GetAnchorId() == RndStdIds::FLY_AS_CHAR )
nMode.nOut = HtmlOut::InlineHeading;
break;
default:
@ -389,6 +397,7 @@ bool SwHTMLWriter::OutFlyFrame( SwNodeOffset nNdIdx, sal_Int32 nContentIdx, Html
{
case HtmlOut::Div:
case HtmlOut::Span:
case HtmlOut::InlineHeading:
case HtmlOut::MultiCol:
case HtmlOut::TableNode:
bRestart = true; // It could become recursive here
@ -464,8 +473,13 @@ void SwHTMLWriter::OutFrameFormat( AllHtmlFlags nMode, const SwFrameFormat& rFra
break;
case HtmlOut::Div:
case HtmlOut::Span:
case HtmlOut::InlineHeading:
if( nOutMode == HtmlOut::InlineHeading )
m_bInlineHeading = true;
OSL_ENSURE( aContainerStr.isEmpty(), "Div: Container is not supposed to be here" );
OutHTML_FrameFormatAsDivOrSpan( *this, rFrameFormat, HtmlOut::Span==nOutMode );
OutHTML_FrameFormatAsDivOrSpan( *this, rFrameFormat, HtmlOut::Div!=nOutMode );
if (nOutMode == HtmlOut::InlineHeading)
m_bInlineHeading = false;
break;
case HtmlOut::MultiCol: // OK
OutHTML_FrameFormatAsMulticol( *this, rFrameFormat, !aContainerStr.isEmpty() );
@ -488,7 +502,10 @@ void SwHTMLWriter::OutFrameFormat( AllHtmlFlags nMode, const SwFrameFormat& rFra
static_cast<const SwDrawFrameFormat &>(rFrameFormat), *pSdrObject );
break;
case HtmlOut::GraphicFrame:
OutHTML_FrameFormatAsImage( *this, rFrameFormat, /*bPNGFallback=*/true );
// skip already exported inline headings
const SwFormat* pParent = rFrameFormat.DerivedFrom();
if ( !(pParent && pParent->GetPoolFormatId() == RES_POOLFRM_INLINE_HEADING) )
OutHTML_FrameFormatAsImage( *this, rFrameFormat, /*bPNGFallback=*/true );
break;
}

View file

@ -282,6 +282,7 @@ class SwHTMLWriter : public Writer
bool m_bLFPossible = false; // a line break can be inserted
bool m_bSpacePreserve = false; // Using xml::space="preserve", or "white-space: pre-wrap" style
bool m_bPreserveSpacesOnWrite = false; // If export should use m_bSpacePreserve
bool m_bInlineHeading = false; // If export should use display:inline for inline heading
// If "Save URLs relative to *" is ignored for self-generated images / objects
bool m_bRelativeURLsForOwnObjects = false;
@ -633,6 +634,7 @@ public:
bool IsSpacePreserve() const { return m_bSpacePreserve; }
void SetSpacePreserve(bool val) { m_bSpacePreserve = val; }
bool IsPreserveSpacesOnWritePrefSet() const { return m_bPreserveSpacesOnWrite; }
bool IsInlineHeading() const { return m_bInlineHeading; }
OUString normalizeURL(const OUString& url, bool own) const;
};