From 543e52481e764b8e0eea6cf0123a77cf492bdf8e Mon Sep 17 00:00:00 2001 From: Miklos Vajna Date: Wed, 7 Feb 2024 08:12:02 +0100 Subject: [PATCH] tdf#159483 sc HTML paste: handle data-sheets-value here, too HTML import into Calc could already create text cells, but HTML paste with the same content remained auto-converted to numbers unconditionally. Turns out HTML paste goes via ScHTMLLayoutParser instead of the HTML import's ScHTMLQueryParser, so the data-sheets-value was ignored for paste. Fix the problem by extracting the old data-sheets-value handler from ScHTMLQueryParser to a separate ParseDataSheetsValue(), and use it also in ScHTMLLayoutParser. For the actual handling, still only text is handled, no other formats yet. Change-Id: I0b2bf4665af331d07624ed42e30a24e31bfca331 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/163068 Reviewed-by: Miklos Vajna Tested-by: Jenkins --- sc/CppunitTest_sc_filter_html.mk | 1 + sc/qa/filter/html/html.cxx | 32 ++++++++++++++++-- sc/source/filter/html/htmlpars.cxx | 52 ++++++++++++++++++++---------- sc/source/filter/inc/htmlpars.hxx | 3 ++ 4 files changed, 69 insertions(+), 19 deletions(-) diff --git a/sc/CppunitTest_sc_filter_html.mk b/sc/CppunitTest_sc_filter_html.mk index b78349d64703..f3dec22c0866 100644 --- a/sc/CppunitTest_sc_filter_html.mk +++ b/sc/CppunitTest_sc_filter_html.mk @@ -58,6 +58,7 @@ $(eval $(call gb_CppunitTest_use_libraries,sc_filter_html, \ $(eval $(call gb_CppunitTest_set_include,sc_filter_html,\ -I$(SRCDIR)/sc/source/ui/inc \ -I$(SRCDIR)/sc/inc \ + -I$(SRCDIR)/sc/qa/unit \ $$(INCLUDE) \ )) diff --git a/sc/qa/filter/html/html.cxx b/sc/qa/filter/html/html.cxx index 76413c6455b4..ba50361e927e 100644 --- a/sc/qa/filter/html/html.cxx +++ b/sc/qa/filter/html/html.cxx @@ -16,16 +16,19 @@ #include +#include +#include + using namespace com::sun::star; namespace { /// Covers sc/source/filter/html/ fixes. -class Test : public UnoApiXmlTest, public HtmlTestTools +class Test : public ScModelTestBase, public HtmlTestTools { public: Test() - : UnoApiXmlTest("/sc/qa/filter/html/data/") + : ScModelTestBase("/sc/qa/filter/html/data/") { } }; @@ -55,6 +58,31 @@ CPPUNIT_TEST_FIXTURE(Test, testTdAsText) // i.e. data-sheets-value was ignored on import. CPPUNIT_ASSERT_EQUAL(table::CellContentType_TEXT, eType); } + +CPPUNIT_TEST_FIXTURE(Test, testPasteTdAsText) +{ + // Given an empty document: + createScDoc(); + + // When pasting HTML with an A2 cell that contains "01" as text: + ScDocument* pDoc = getScDoc(); + ScAddress aCellPos(/*nColP=*/0, /*nRowP=*/0, /*nTabP=*/0); + ScImportExport aImporter(*pDoc, aCellPos); + SvFileStream aFile(createFileURL(u"text.html"), StreamMode::READ); + SvMemoryStream aMemory; + aMemory.WriteStream(aFile); + aMemory.Seek(0); + CPPUNIT_ASSERT(aImporter.ImportStream(aMemory, OUString(), SotClipboardFormatId::HTML)); + + // Then make sure "01" is not auto-converted to 1, as a number: + aCellPos = ScAddress(/*nColP=*/0, /*nRowP=*/1, /*nTabP=*/0); + CellType eCellType = pDoc->GetCellType(aCellPos); + // Without the accompanying fix in place, this test would have failed with: + // - Expected: 2 (CELLTYPE_STRING) + // - Actual : 1 (CELLTYPE_VALUE) + // i.e. data-sheets-value was ignored on paste. + CPPUNIT_ASSERT_EQUAL(CELLTYPE_STRING, eCellType); +} } CPPUNIT_PLUGIN_IMPLEMENT(); diff --git a/sc/source/filter/html/htmlpars.cxx b/sc/source/filter/html/htmlpars.cxx index 5d46d12dabe3..1a7eff2d4ff8 100644 --- a/sc/source/filter/html/htmlpars.cxx +++ b/sc/source/filter/html/htmlpars.cxx @@ -75,6 +75,31 @@ using ::editeng::SvxBorderLine; using namespace ::com::sun::star; +namespace +{ +/// data-sheets-value from google sheets, value is a JSON. +void ParseDataSheetsValue(const OUString& rDataSheetsValue, sal_uInt32& rNumberFormat) +{ + // data-sheets-value from google sheets, value is a JSON. + OString aEncodedOption = rDataSheetsValue.toUtf8(); + const char* pEncodedOption = aEncodedOption.getStr(); + std::stringstream aStream(pEncodedOption); + boost::property_tree::ptree aTree; + boost::property_tree::read_json(aStream, aTree); + // The "1" key describes the original data type. + auto it = aTree.find("1"); + if (it != aTree.not_found()) + { + int nValueType = std::stoi(it->second.get_value()); + // 2 is text. + if (nValueType == 2) + { + rNumberFormat = NF_STANDARD_FORMAT_TEXT; + } + } +} +} + ScHTMLStyles::ScHTMLStyles() : maEmpty() {} void ScHTMLStyles::add(const char* pElemName, size_t nElemName, const char* pClassName, size_t nClassName, @@ -914,6 +939,7 @@ void ScHTMLLayoutParser::TableDataOn( HtmlImportInfo* pInfo ) bInCell = true; bool bHorJustifyCenterTH = (pInfo->nToken == HtmlTokenId::TABLEHEADER_ON); const HTMLOptions& rOptions = static_cast(pInfo->pParser)->GetOptions(); + sal_uInt32 nNumberFormat = NUMBERFORMAT_ENTRY_NOT_FOUND; for (const auto & rOption : rOptions) { switch( rOption.GetToken() ) @@ -982,10 +1008,18 @@ void ScHTMLLayoutParser::TableDataOn( HtmlImportInfo* pInfo ) mxActEntry->pNumStr = rOption.GetString(); } break; + case HtmlOptionId::DSVAL: + { + ParseDataSheetsValue(rOption.GetString(), nNumberFormat); + } + break; default: break; } } + if (nNumberFormat != NUMBERFORMAT_ENTRY_NOT_FOUND) + mxActEntry->aItemSet.Put(SfxUInt32Item(ATTR_VALUE_FORMAT, nNumberFormat)); + mxActEntry->nCol = nColCnt; mxActEntry->nRow = nRowCnt; mxActEntry->nTab = nTable; @@ -2129,23 +2163,7 @@ void ScHTMLTable::DataOn( const HtmlImportInfo& rInfo ) break; case HtmlOptionId::DSVAL: { - // data-sheets-value from google sheets, value is a JSON. - OString aEncodedOption = rOption.GetString().toUtf8(); - const char* pEncodedOption = aEncodedOption.getStr(); - std::stringstream aStream(pEncodedOption); - boost::property_tree::ptree aTree; - boost::property_tree::read_json(aStream, aTree); - // The "1" key describes the original data type. - auto it = aTree.find("1"); - if (it != aTree.not_found()) - { - int nValueType = std::stoi(it->second.get_value()); - // 2 is text. - if (nValueType == 2) - { - nNumberFormat = NF_STANDARD_FORMAT_TEXT; - } - } + ParseDataSheetsValue(rOption.GetString(), nNumberFormat); } break; default: break; diff --git a/sc/source/filter/inc/htmlpars.hxx b/sc/source/filter/inc/htmlpars.hxx index fcdf6b4443fa..5b2d441098f3 100644 --- a/sc/source/filter/inc/htmlpars.hxx +++ b/sc/source/filter/inc/htmlpars.hxx @@ -149,6 +149,7 @@ class HTMLOption; typedef ::std::map InnerMap; typedef ::std::map OuterMap; +/// HTML parser used during paste into Calc. class ScHTMLLayoutParser : public ScHTMLParser { private: @@ -575,6 +576,8 @@ public: Builds the table structure correctly, ignores extended formatting like pictures or column widths. + + Used during file load / import into Calc. */ class ScHTMLQueryParser : public ScHTMLParser {