From 6f16beca51c86e0decfde71236174b5e45a9e904 Mon Sep 17 00:00:00 2001
From: Jonathan Clark <jonathan@libreoffice.org>
Date: Fri, 1 Nov 2024 14:10:59 -0600
Subject: [PATCH] tdf#36709 Refactor Converter to separate parsing from unit
 conversion

Previously, Converter::convertMeasure parsed measure strings and
performed unit conversion in a single call. This was appropriate, as all
such measure strings could be converted to a common unit ahead of time.

tdf#36709, however, will introduce measures that are relative to the
current font and font size. In order to perform layout, the original
measure unit must be preserved and passed along with the original value.

This change introduces Converter::convertMeasureUnit, which parses a
measure string and returns both the value and the unit. Existing unit
conversion code has been refactored to facilitate this change.

Change-Id: Ic8dfda432e1ca117ad80c05c1939ebaf43e79a9f
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/175937
Tested-by: Jenkins
Reviewed-by: Jonathan Clark <jonathan@libreoffice.org>
---
 include/sax/tools/converter.hxx          |  12 ++
 offapi/com/sun/star/util/MeasureUnit.idl |   6 +
 sax/qa/cppunit/test_converter.cxx        |  86 +++++++++
 sax/source/tools/converter.cxx           | 220 ++++++++++++++++++-----
 4 files changed, 280 insertions(+), 44 deletions(-)

diff --git a/include/sax/tools/converter.hxx b/include/sax/tools/converter.hxx
index a8107d592a17..6b18cac11bf4 100644
--- a/include/sax/tools/converter.hxx
+++ b/include/sax/tools/converter.hxx
@@ -78,6 +78,18 @@ public:
                                 sal_Int16 SourceUnit,
                                 sal_Int16 nTargetUnit  );
 
+    /** convert string to measure and unit pair */
+    static bool convertMeasureUnit(double& rValue, std::optional<sal_Int16>& rValueUnit,
+                                   std::u16string_view rString);
+
+    /** convert string to measure and unit pair */
+    static bool convertMeasureUnit(double& rValue, std::optional<sal_Int16>& rValueUnit,
+                                   std::string_view rString);
+
+    /** convert measure and unit pair to string */
+    static void convertMeasureUnit(OUStringBuffer& rBuffer, double dValue,
+                                   std::optional<sal_Int16> nValueUnit);
+
     /** convert string to boolean */
     static bool convertBool( bool& rBool,
                              std::u16string_view rString );
diff --git a/offapi/com/sun/star/util/MeasureUnit.idl b/offapi/com/sun/star/util/MeasureUnit.idl
index fcbe7d0cb474..2b9a93a6e12a 100644
--- a/offapi/com/sun/star/util/MeasureUnit.idl
+++ b/offapi/com/sun/star/util/MeasureUnit.idl
@@ -84,6 +84,12 @@ published constants MeasureUnit
     /** all measures for this component are in SYSFONT */
     const short SYSFONT = 18;
 
+    /** all measures for this component are in em relative to the font */
+    const short FONT_EM = 19;
+
+    /** all measures for this component are in ideographic advances relative to the font */
+    const short FONT_IC = 20;
+
 };
 
 
diff --git a/sax/qa/cppunit/test_converter.cxx b/sax/qa/cppunit/test_converter.cxx
index 525e110c1a46..47bd5b9d412d 100644
--- a/sax/qa/cppunit/test_converter.cxx
+++ b/sax/qa/cppunit/test_converter.cxx
@@ -56,6 +56,7 @@ public:
     void testPercent();
     void testColor();
     void testNumber();
+    void testConvertMeasureUnit();
 
     CPPUNIT_TEST_SUITE(ConverterTest);
     CPPUNIT_TEST(testDuration);
@@ -67,6 +68,7 @@ public:
     CPPUNIT_TEST(testPercent);
     CPPUNIT_TEST(testColor);
     CPPUNIT_TEST(testNumber);
+    CPPUNIT_TEST(testConvertMeasureUnit);
     CPPUNIT_TEST_SUITE_END();
 
 private:
@@ -614,6 +616,90 @@ void ConverterTest::testNumber()
     doTestStringToNumber(0, "666", -0, 0);
 }
 
+void ConverterTest::testConvertMeasureUnit()
+{
+    auto fnFromStr = [](std::string_view aStr, double dExpValue, std::optional<sal_Int16> nExpUnit,
+                        bool bExpResult)
+    {
+        double dValue = 0.0;
+        std::optional<sal_Int16> nUnit;
+
+        bool bResult = Converter::convertMeasureUnit(dValue, nUnit, aStr);
+
+        CPPUNIT_ASSERT_EQUAL(bExpResult, bResult);
+        CPPUNIT_ASSERT_DOUBLES_EQUAL(dExpValue, dValue, 0.00001);
+        CPPUNIT_ASSERT_EQUAL(nExpUnit.has_value(), nUnit.has_value());
+
+        if (nExpUnit.has_value())
+        {
+            CPPUNIT_ASSERT_EQUAL(nExpUnit.value(), nUnit.value());
+        }
+    };
+
+    auto fnToStr = [](double dValue, std::optional<sal_Int16> nValueUnit) -> OUString
+    {
+        OUStringBuffer stBuf;
+        Converter::convertMeasureUnit(stBuf, dValue, nValueUnit);
+        return stBuf.makeStringAndClear();
+    };
+
+    // Characteristic cases without unit parsing
+    fnFromStr("5000", 5000.0, std::nullopt, true);
+    fnFromStr("-123", -123.0, std::nullopt, true);
+    CPPUNIT_ASSERT_EQUAL(u"5000"_ustr, fnToStr(5000.0, std::nullopt));
+    CPPUNIT_ASSERT_EQUAL(u"-123"_ustr, fnToStr(-123.0, std::nullopt));
+
+    // Characteristic case with invalid unit
+    fnFromStr("5000xy", 0.0, std::nullopt, false);
+
+    // Characteristic cases for unit printing
+    CPPUNIT_ASSERT_EQUAL(u"5000em"_ustr, fnToStr(5000.0, MeasureUnit::FONT_EM));
+    CPPUNIT_ASSERT_EQUAL(u"-123%"_ustr, fnToStr(-123.0, MeasureUnit::PERCENT));
+
+    // Branch coverage for unit parsing
+    fnFromStr("5000%", 5000.0, MeasureUnit::PERCENT, true);
+    fnFromStr("5000cm", 5000.0, MeasureUnit::CM, true);
+    fnFromStr("5000em", 5000.0, MeasureUnit::FONT_EM, true);
+    fnFromStr("5000ic", 5000.0, MeasureUnit::FONT_IC, true);
+    fnFromStr("5000in", 5000.0, MeasureUnit::INCH, true);
+    fnFromStr("5000mm", 5000.0, MeasureUnit::MM, true);
+    fnFromStr("5000pt", 5000.0, MeasureUnit::POINT, true);
+    fnFromStr("5000pc", 5000.0, MeasureUnit::PICA, true);
+    fnFromStr("5000px", 5000.0, MeasureUnit::PIXEL, true);
+
+    // All units should be case-insensitive
+    fnFromStr("5000cm", 5000.0, MeasureUnit::CM, true);
+    fnFromStr("5000Cm", 5000.0, MeasureUnit::CM, true);
+    fnFromStr("5000cM", 5000.0, MeasureUnit::CM, true);
+    fnFromStr("5000CM", 5000.0, MeasureUnit::CM, true);
+    fnFromStr("5000px", 5000.0, MeasureUnit::PIXEL, true);
+    fnFromStr("5000Px", 5000.0, MeasureUnit::PIXEL, true);
+    fnFromStr("5000pX", 5000.0, MeasureUnit::PIXEL, true);
+    fnFromStr("5000PX", 5000.0, MeasureUnit::PIXEL, true);
+
+    // Characteristic cases for whitespace between numbers and units
+    fnFromStr("5000 cm", 5000.0, MeasureUnit::CM, true);
+    fnFromStr("5000\t\tcm", 5000.0, MeasureUnit::CM, true);
+
+    // tdf#36709: Measure conversion was refactored to isolate parsing and unit conversion.
+    // Some of the unit parsing code looks suspicious. The current behavior is correct, but could
+    // be prone to well-meaning breakage (e.g. refactoring, argument reordering). The following
+    // cases exercise relevant edge cases.
+
+    // The tail after percent is always ignored
+    fnFromStr("5000 %%", 5000.0, MeasureUnit::PERCENT, true);
+    fnFromStr("5000 % ", 5000.0, MeasureUnit::PERCENT, true);
+    fnFromStr("5000 %cmcmcmcm cm", 5000.0, MeasureUnit::PERCENT, true);
+
+    // The tail after other units, however, is not ignored
+    fnFromStr("5000 cmc", 0.0, std::nullopt, false);
+    fnFromStr("5000 ccm", 0.0, std::nullopt, false);
+
+    // Whitespace is allowed after units, but not inside units
+    fnFromStr("5000 c m", 0.0, std::nullopt, false);
+    fnFromStr("5000 cm ", 5000.0, MeasureUnit::CM, true);
+}
+
 CPPUNIT_TEST_SUITE_REGISTRATION(ConverterTest);
 
 }
diff --git a/sax/source/tools/converter.cxx b/sax/source/tools/converter.cxx
index a5ca362a794f..f5218a47d46e 100644
--- a/sax/source/tools/converter.cxx
+++ b/sax/source/tools/converter.cxx
@@ -38,6 +38,7 @@
 #include <tools/time.hxx>
 
 #include <algorithm>
+#include <map>
 #include <string_view>
 
 using namespace com::sun::star;
@@ -52,6 +53,10 @@ const std::string_view gpsCM = "cm";
 const std::string_view gpsPT = "pt";
 const std::string_view gpsINCH = "in";
 const std::string_view gpsPC = "pc";
+const std::string_view gpsPX = "px";
+const std::string_view gpsPERCENT = "%";
+const std::string_view gpsFONT_EM = "em";
+const std::string_view gpsFONT_IC = "ic";
 
 const sal_Int8 XML_MAXDIGITSCOUNT_TIME = 14;
 
@@ -66,6 +71,14 @@ static sal_Int64 toInt64_WithLength(const char * str, sal_Int16 radix, sal_Int32
 
 namespace
 {
+const std::map<sal_Int16, std::string_view> stConvertMeasureUnitStrMap{
+    { MeasureUnit::MM, gpsMM },          { MeasureUnit::CM, gpsCM },
+    { MeasureUnit::INCH, gpsINCH },      { MeasureUnit::POINT, gpsPT },
+    { MeasureUnit::PICA, gpsPC },        { MeasureUnit::PERCENT, gpsPERCENT },
+    { MeasureUnit::PIXEL, gpsPX },       { MeasureUnit::FONT_EM, gpsFONT_EM },
+    { MeasureUnit::FONT_IC, gpsFONT_IC }
+};
+
 o3tl::Length Measure2O3tlUnit(sal_Int16 nUnit)
 {
     switch (nUnit)
@@ -121,14 +134,62 @@ template <typename V> bool wordEndsWith(V string, std::string_view expected)
 
 }
 
-/** convert string to measure using optional min and max values*/
-template<typename V>
-static bool lcl_convertMeasure( sal_Int32& rValue,
-                                V rString,
-                                sal_Int16 nTargetUnit /* = MeasureUnit::MM_100TH */,
-                                sal_Int32 nMin /* = SAL_MIN_INT32 */,
-                                sal_Int32 nMax /* = SAL_MAX_INT32 */ )
+/** parse unit substring into measure unit*/
+template <class V> static std::optional<sal_Int16> lcl_parseMeasureUnit(const V& rString)
 {
+    if (rString.empty())
+    {
+        return std::nullopt;
+    }
+
+    switch (rtl::toAsciiLowerCase<sal_uInt32>(rString[0]))
+    {
+        case u'%':
+            return MeasureUnit::PERCENT;
+
+        case u'c':
+            if (wordEndsWith(rString.substr(1), "m"))
+                return MeasureUnit::CM;
+            break;
+
+        case u'e':
+            if (wordEndsWith(rString.substr(1), "m"))
+                return MeasureUnit::FONT_EM;
+            break;
+
+        case u'i':
+            if (wordEndsWith(rString.substr(1), "c"))
+                return MeasureUnit::FONT_IC;
+            if (wordEndsWith(rString.substr(1), "n"))
+                return MeasureUnit::INCH;
+            break;
+
+        case u'm':
+            if (wordEndsWith(rString.substr(1), "m"))
+                return MeasureUnit::MM;
+            break;
+
+        case u'p':
+            if (wordEndsWith(rString.substr(1), "c"))
+                return MeasureUnit::PICA;
+            if (wordEndsWith(rString.substr(1), "t"))
+                return MeasureUnit::POINT;
+            if (wordEndsWith(rString.substr(1), "x"))
+                return MeasureUnit::PIXEL;
+            break;
+    }
+
+    return std::nullopt;
+}
+
+/** parse measure string into double and measure unit*/
+template <class V>
+static bool lcl_parseMeasure(double& rValue, std::optional<sal_Int16>& rSourceUnit, bool& rNeg, const V& rString)
+{
+    rValue = 0.0;
+    rSourceUnit.reset();
+    rNeg = false;
+
     bool bNeg = false;
     double nVal = 0;
 
@@ -175,21 +236,50 @@ static bool lcl_convertMeasure( sal_Int32& rValue,
     while( (nPos < nLen) && (rString[nPos] <= ' ') )
         nPos++;
 
-    if( nPos < nLen )
+    if (nPos < nLen)
     {
+        // Parse unit from the tail
+        auto nUnit = lcl_parseMeasureUnit(rString.substr(nPos));
+        if (!nUnit.has_value())
+        {
+            return false;
+        }
 
+        rSourceUnit = nUnit.value();
+    }
+
+    rValue = nVal;
+    rNeg = bNeg;
+
+    return true;
+}
+
+/** convert string to measure using optional min and max values*/
+template <class V>
+static bool lcl_convertMeasure(sal_Int32& rValue, const V& rString,
+        sal_Int16 nTargetUnit /* = MeasureUnit::MM_100TH */,
+        sal_Int32 nMin /* = SAL_MIN_INT32 */,
+        sal_Int32 nMax /* = SAL_MAX_INT32 */)
+{
+    double nVal = 0.0;
+    std::optional<sal_Int16> nSourceUnit;
+    bool bNeg = false;
+
+    if (!lcl_parseMeasure(nVal, nSourceUnit, bNeg, rString))
+    {
+        return false;
+    }
+
+    if (nSourceUnit.has_value())
+    {
         if( MeasureUnit::PERCENT == nTargetUnit )
         {
-            if( '%' != rString[nPos] )
+            if (MeasureUnit::PERCENT != nSourceUnit)
                 return false;
         }
         else if( MeasureUnit::PIXEL == nTargetUnit )
         {
-            if( nPos + 1 >= nLen ||
-                ('p' != rString[nPos] &&
-                 'P' != rString[nPos])||
-                ('x' != rString[nPos+1] &&
-                 'X' != rString[nPos+1]) )
+            if (MeasureUnit::PIXEL != nSourceUnit)
                 return false;
         }
         else
@@ -202,57 +292,52 @@ static bool lcl_convertMeasure( sal_Int32& rValue,
 
             if( MeasureUnit::TWIP == nTargetUnit )
             {
-                switch (rtl::toAsciiLowerCase<sal_uInt32>(rString[nPos]))
+                switch (nSourceUnit.value())
                 {
-                case u'c':
-                    if (wordEndsWith(rString.substr(nPos + 1), "m"))
+                    case MeasureUnit::CM:
                         eFrom = o3tl::Length::cm;
-                    break;
-                case u'i':
-                    if (wordEndsWith(rString.substr(nPos + 1), "n"))
+                        break;
+                    case MeasureUnit::INCH:
                         eFrom = o3tl::Length::in;
-                    break;
-                case u'm':
-                    if (wordEndsWith(rString.substr(nPos + 1), "m"))
+                        break;
+                    case MeasureUnit::MM:
                         eFrom = o3tl::Length::mm;
-                    break;
-                case u'p':
-                    if (wordEndsWith(rString.substr(nPos + 1), "t"))
+                        break;
+                    case MeasureUnit::POINT:
                         eFrom = o3tl::Length::pt;
-                    else if (wordEndsWith(rString.substr(nPos + 1), "c"))
+                        break;
+                    case MeasureUnit::PICA:
                         eFrom = o3tl::Length::pc;
-                    break;
+                        break;
                 }
             }
             else if( MeasureUnit::MM_100TH == nTargetUnit || MeasureUnit::MM_10TH == nTargetUnit )
             {
-                switch (rtl::toAsciiLowerCase<sal_uInt32>(rString[nPos]))
+                switch (nSourceUnit.value())
                 {
-                case u'c':
-                    if (wordEndsWith(rString.substr(nPos + 1), "m"))
+                    case MeasureUnit::CM:
                         eFrom = o3tl::Length::cm;
-                    break;
-                case u'i':
-                    if (wordEndsWith(rString.substr(nPos + 1), "n"))
+                        break;
+                    case MeasureUnit::INCH:
                         eFrom = o3tl::Length::in;
-                    break;
-                case u'm':
-                    if (wordEndsWith(rString.substr(nPos + 1), "m"))
+                        break;
+                    case MeasureUnit::MM:
                         eFrom = o3tl::Length::mm;
-                    break;
-                case u'p':
-                    if (wordEndsWith(rString.substr(nPos + 1), "t"))
+                        break;
+                    case MeasureUnit::POINT:
                         eFrom = o3tl::Length::pt;
-                    else if (wordEndsWith(rString.substr(nPos + 1), "c"))
+                        break;
+                    case MeasureUnit::PICA:
                         eFrom = o3tl::Length::pc;
-                    else if (wordEndsWith(rString.substr(nPos + 1), "x"))
+                        break;
+                    case MeasureUnit::PIXEL:
                         eFrom = o3tl::Length::px;
-                    break;
+                        break;
                 }
             }
             else if( MeasureUnit::POINT == nTargetUnit )
             {
-                if (wordEndsWith(rString.substr(nPos), "pt"))
+                if (MeasureUnit::POINT == nSourceUnit)
                     eFrom = o3tl::Length::pt;
             }
 
@@ -435,6 +520,53 @@ void Converter::convertMeasure( OUStringBuffer& rBuffer,
         rBuffer.appendAscii(psUnit.data(), psUnit.length());
 }
 
+/** convert string to measure with unit*/
+bool Converter::convertMeasureUnit(double& rValue, std::optional<sal_Int16>& rValueUnit,
+                                   std::u16string_view rString)
+{
+    bool bNeg = false;
+    bool bResult = lcl_parseMeasure(rValue, rValueUnit, bNeg, rString);
+
+    if (bNeg)
+    {
+        rValue = -rValue;
+    }
+
+    return bResult;
+}
+
+/** convert string to measure with unit*/
+bool Converter::convertMeasureUnit(double& rValue, std::optional<sal_Int16>& rValueUnit,
+                                   std::string_view rString)
+{
+    bool bNeg = false;
+    bool bResult = lcl_parseMeasure(rValue, rValueUnit, bNeg, rString);
+
+    if (bNeg)
+    {
+        rValue = -rValue;
+    }
+
+    return bResult;
+}
+
+/** convert measure with given unit to string with given unit*/
+void Converter::convertMeasureUnit(OUStringBuffer& rBuffer, double dValue,
+                                   std::optional<sal_Int16> nValueUnit)
+{
+    ::rtl::math::doubleToUStringBuffer(rBuffer, dValue, rtl_math_StringFormat_Automatic,
+                                       rtl_math_DecimalPlaces_Max, '.', true);
+
+    if (nValueUnit.has_value())
+    {
+        if (auto it = stConvertMeasureUnitStrMap.find(*nValueUnit);
+            it != stConvertMeasureUnitStrMap.end())
+        {
+            rBuffer.appendAscii(it->second.data(), it->second.length());
+        }
+    }
+}
+
 /** convert string to boolean */
 bool Converter::convertBool( bool& rBool, std::u16string_view rString )
 {