tdf#36709 Refactor Converter to separate parsing from unit conversion

Previously, Converter::convertMeasure parsed measure strings and performed unit conversion in a single call. This was appropriate, as all such measure strings could be converted to a common unit ahead of time. tdf#36709, however, will introduce measures that are relative to the current font and font size. In order to perform layout, the original measure unit must be preserved and passed along with the original value. This change introduces Converter::convertMeasureUnit, which parses a measure string and returns both the value and the unit. Existing unit conversion code has been refactored to facilitate this change. Change-Id: Ic8dfda432e1ca117ad80c05c1939ebaf43e79a9f Reviewed-on: https://gerrit.libreoffice.org/c/core/+/175937 Tested-by: Jenkins Reviewed-by: Jonathan Clark <jonathan@libreoffice.org>
2024-11-01 14:10:59 -06:00 · 2024-11-01 14:10:59 -06:00 · 6f16beca51
commit 6f16beca51
parent e90e601d5b
4 changed files with 280 additions and 44 deletions
--- a/include/sax/tools/converter.hxx
+++ b/include/sax/tools/converter.hxx
@ -78,6 +78,18 @@ public:
                                sal_Int16 SourceUnit,
                                sal_Int16 nTargetUnit  );

+    /** convert string to measure and unit pair */
+    static bool convertMeasureUnit(double& rValue, std::optional<sal_Int16>& rValueUnit,
+                                   std::u16string_view rString);
+
+    /** convert string to measure and unit pair */
+    static bool convertMeasureUnit(double& rValue, std::optional<sal_Int16>& rValueUnit,
+                                   std::string_view rString);
+
+    /** convert measure and unit pair to string */
+    static void convertMeasureUnit(OUStringBuffer& rBuffer, double dValue,
+                                   std::optional<sal_Int16> nValueUnit);
+
    /** convert string to boolean */
    static bool convertBool( bool& rBool,
                             std::u16string_view rString );
--- a/offapi/com/sun/star/util/MeasureUnit.idl
+++ b/offapi/com/sun/star/util/MeasureUnit.idl
@ -84,6 +84,12 @@ published constants MeasureUnit
    /** all measures for this component are in SYSFONT */
    const short SYSFONT = 18;

+    /** all measures for this component are in em relative to the font */
+    const short FONT_EM = 19;
+
+    /** all measures for this component are in ideographic advances relative to the font */
+    const short FONT_IC = 20;
+
 };


--- a/sax/qa/cppunit/test_converter.cxx
+++ b/sax/qa/cppunit/test_converter.cxx
@ -56,6 +56,7 @@ public:
    void testPercent();
    void testColor();
    void testNumber();
+    void testConvertMeasureUnit();

    CPPUNIT_TEST_SUITE(ConverterTest);
    CPPUNIT_TEST(testDuration);
@ -67,6 +68,7 @@ public:
    CPPUNIT_TEST(testPercent);
    CPPUNIT_TEST(testColor);
    CPPUNIT_TEST(testNumber);
+    CPPUNIT_TEST(testConvertMeasureUnit);
    CPPUNIT_TEST_SUITE_END();

 private:
@ -614,6 +616,90 @@ void ConverterTest::testNumber()
    doTestStringToNumber(0, "666", -0, 0);
 }

+void ConverterTest::testConvertMeasureUnit()
+{
+    auto fnFromStr = [](std::string_view aStr, double dExpValue, std::optional<sal_Int16> nExpUnit,
+                        bool bExpResult)
+    {
+        double dValue = 0.0;
+        std::optional<sal_Int16> nUnit;
+
+        bool bResult = Converter::convertMeasureUnit(dValue, nUnit, aStr);
+
+        CPPUNIT_ASSERT_EQUAL(bExpResult, bResult);
+        CPPUNIT_ASSERT_DOUBLES_EQUAL(dExpValue, dValue, 0.00001);
+        CPPUNIT_ASSERT_EQUAL(nExpUnit.has_value(), nUnit.has_value());
+
+        if (nExpUnit.has_value())
+        {
+            CPPUNIT_ASSERT_EQUAL(nExpUnit.value(), nUnit.value());
+        }
+    };
+
+    auto fnToStr = [](double dValue, std::optional<sal_Int16> nValueUnit) -> OUString
+    {
+        OUStringBuffer stBuf;
+        Converter::convertMeasureUnit(stBuf, dValue, nValueUnit);
+        return stBuf.makeStringAndClear();
+    };
+
+    // Characteristic cases without unit parsing
+    fnFromStr("5000", 5000.0, std::nullopt, true);
+    fnFromStr("-123", -123.0, std::nullopt, true);
+    CPPUNIT_ASSERT_EQUAL(u"5000"_ustr, fnToStr(5000.0, std::nullopt));
+    CPPUNIT_ASSERT_EQUAL(u"-123"_ustr, fnToStr(-123.0, std::nullopt));
+
+    // Characteristic case with invalid unit
+    fnFromStr("5000xy", 0.0, std::nullopt, false);
+
+    // Characteristic cases for unit printing
+    CPPUNIT_ASSERT_EQUAL(u"5000em"_ustr, fnToStr(5000.0, MeasureUnit::FONT_EM));
+    CPPUNIT_ASSERT_EQUAL(u"-123%"_ustr, fnToStr(-123.0, MeasureUnit::PERCENT));
+
+    // Branch coverage for unit parsing
+    fnFromStr("5000%", 5000.0, MeasureUnit::PERCENT, true);
+    fnFromStr("5000cm", 5000.0, MeasureUnit::CM, true);
+    fnFromStr("5000em", 5000.0, MeasureUnit::FONT_EM, true);
+    fnFromStr("5000ic", 5000.0, MeasureUnit::FONT_IC, true);
+    fnFromStr("5000in", 5000.0, MeasureUnit::INCH, true);
+    fnFromStr("5000mm", 5000.0, MeasureUnit::MM, true);
+    fnFromStr("5000pt", 5000.0, MeasureUnit::POINT, true);
+    fnFromStr("5000pc", 5000.0, MeasureUnit::PICA, true);
+    fnFromStr("5000px", 5000.0, MeasureUnit::PIXEL, true);
+
+    // All units should be case-insensitive
+    fnFromStr("5000cm", 5000.0, MeasureUnit::CM, true);
+    fnFromStr("5000Cm", 5000.0, MeasureUnit::CM, true);
+    fnFromStr("5000cM", 5000.0, MeasureUnit::CM, true);
+    fnFromStr("5000CM", 5000.0, MeasureUnit::CM, true);
+    fnFromStr("5000px", 5000.0, MeasureUnit::PIXEL, true);
+    fnFromStr("5000Px", 5000.0, MeasureUnit::PIXEL, true);
+    fnFromStr("5000pX", 5000.0, MeasureUnit::PIXEL, true);
+    fnFromStr("5000PX", 5000.0, MeasureUnit::PIXEL, true);
+
+    // Characteristic cases for whitespace between numbers and units
+    fnFromStr("5000 cm", 5000.0, MeasureUnit::CM, true);
+    fnFromStr("5000\t\tcm", 5000.0, MeasureUnit::CM, true);
+
+    // tdf#36709: Measure conversion was refactored to isolate parsing and unit conversion.
+    // Some of the unit parsing code looks suspicious. The current behavior is correct, but could
+    // be prone to well-meaning breakage (e.g. refactoring, argument reordering). The following
+    // cases exercise relevant edge cases.
+
+    // The tail after percent is always ignored
+    fnFromStr("5000 %%", 5000.0, MeasureUnit::PERCENT, true);
+    fnFromStr("5000 % ", 5000.0, MeasureUnit::PERCENT, true);
+    fnFromStr("5000 %cmcmcmcm cm", 5000.0, MeasureUnit::PERCENT, true);
+
+    // The tail after other units, however, is not ignored
+    fnFromStr("5000 cmc", 0.0, std::nullopt, false);
+    fnFromStr("5000 ccm", 0.0, std::nullopt, false);
+
+    // Whitespace is allowed after units, but not inside units
+    fnFromStr("5000 c m", 0.0, std::nullopt, false);
+    fnFromStr("5000 cm ", 5000.0, MeasureUnit::CM, true);
+}
+
 CPPUNIT_TEST_SUITE_REGISTRATION(ConverterTest);

 }
--- a/sax/source/tools/converter.cxx
+++ b/sax/source/tools/converter.cxx
@ -38,6 +38,7 @@
 #include <tools/time.hxx>

 #include <algorithm>
+#include <map>
 #include <string_view>

 using namespace com::sun::star;
@ -52,6 +53,10 @@ const std::string_view gpsCM = "cm";
 const std::string_view gpsPT = "pt";
 const std::string_view gpsINCH = "in";
 const std::string_view gpsPC = "pc";
+const std::string_view gpsPX = "px";
+const std::string_view gpsPERCENT = "%";
+const std::string_view gpsFONT_EM = "em";
+const std::string_view gpsFONT_IC = "ic";

 const sal_Int8 XML_MAXDIGITSCOUNT_TIME = 14;

@ -66,6 +71,14 @@ static sal_Int64 toInt64_WithLength(const char * str, sal_Int16 radix, sal_Int32

 namespace
 {
+const std::map<sal_Int16, std::string_view> stConvertMeasureUnitStrMap{
+    { MeasureUnit::MM, gpsMM },          { MeasureUnit::CM, gpsCM },
+    { MeasureUnit::INCH, gpsINCH },      { MeasureUnit::POINT, gpsPT },
+    { MeasureUnit::PICA, gpsPC },        { MeasureUnit::PERCENT, gpsPERCENT },
+    { MeasureUnit::PIXEL, gpsPX },       { MeasureUnit::FONT_EM, gpsFONT_EM },
+    { MeasureUnit::FONT_IC, gpsFONT_IC }
+};
+
 o3tl::Length Measure2O3tlUnit(sal_Int16 nUnit)
 {
    switch (nUnit)
@ -121,14 +134,62 @@ template <typename V> bool wordEndsWith(V string, std::string_view expected)

 }

-/** convert string to measure using optional min and max values*/
-template<typename V>
-static bool lcl_convertMeasure( sal_Int32& rValue,
-                                V rString,
-                                sal_Int16 nTargetUnit /* = MeasureUnit::MM_100TH */,
-                                sal_Int32 nMin /* = SAL_MIN_INT32 */,
-                                sal_Int32 nMax /* = SAL_MAX_INT32 */ )
+/** parse unit substring into measure unit*/
+template <class V> static std::optional<sal_Int16> lcl_parseMeasureUnit(const V& rString)
 {
+    if (rString.empty())
+    {
+        return std::nullopt;
+    }
+
+    switch (rtl::toAsciiLowerCase<sal_uInt32>(rString[0]))
+    {
+        case u'%':
+            return MeasureUnit::PERCENT;
+
+        case u'c':
+            if (wordEndsWith(rString.substr(1), "m"))
+                return MeasureUnit::CM;
+            break;
+
+        case u'e':
+            if (wordEndsWith(rString.substr(1), "m"))
+                return MeasureUnit::FONT_EM;
+            break;
+
+        case u'i':
+            if (wordEndsWith(rString.substr(1), "c"))
+                return MeasureUnit::FONT_IC;
+            if (wordEndsWith(rString.substr(1), "n"))
+                return MeasureUnit::INCH;
+            break;
+
+        case u'm':
+            if (wordEndsWith(rString.substr(1), "m"))
+                return MeasureUnit::MM;
+            break;
+
+        case u'p':
+            if (wordEndsWith(rString.substr(1), "c"))
+                return MeasureUnit::PICA;
+            if (wordEndsWith(rString.substr(1), "t"))
+                return MeasureUnit::POINT;
+            if (wordEndsWith(rString.substr(1), "x"))
+                return MeasureUnit::PIXEL;
+            break;
+    }
+
+    return std::nullopt;
+}
+
+/** parse measure string into double and measure unit*/
+template <class V>
+static bool lcl_parseMeasure(double& rValue, std::optional<sal_Int16>& rSourceUnit, bool& rNeg, const V& rString)
+{
+    rValue = 0.0;
+    rSourceUnit.reset();
+    rNeg = false;
+
    bool bNeg = false;
    double nVal = 0;

@ -175,21 +236,50 @@ static bool lcl_convertMeasure( sal_Int32& rValue,
    while( (nPos < nLen) && (rString[nPos] <= ' ') )
        nPos++;

-    if( nPos < nLen )
+    if (nPos < nLen)
    {
+        // Parse unit from the tail
+        auto nUnit = lcl_parseMeasureUnit(rString.substr(nPos));
+        if (!nUnit.has_value())
+        {
+            return false;
+        }

+        rSourceUnit = nUnit.value();
+    }
+
+    rValue = nVal;
+    rNeg = bNeg;
+
+    return true;
+}
+
+/** convert string to measure using optional min and max values*/
+template <class V>
+static bool lcl_convertMeasure(sal_Int32& rValue, const V& rString,
+        sal_Int16 nTargetUnit /* = MeasureUnit::MM_100TH */,
+        sal_Int32 nMin /* = SAL_MIN_INT32 */,
+        sal_Int32 nMax /* = SAL_MAX_INT32 */)
+{
+    double nVal = 0.0;
+    std::optional<sal_Int16> nSourceUnit;
+    bool bNeg = false;
+
+    if (!lcl_parseMeasure(nVal, nSourceUnit, bNeg, rString))
+    {
+        return false;
+    }
+
+    if (nSourceUnit.has_value())
+    {
        if( MeasureUnit::PERCENT == nTargetUnit )
        {
-            if( '%' != rString[nPos] )
+            if (MeasureUnit::PERCENT != nSourceUnit)
                return false;
        }
        else if( MeasureUnit::PIXEL == nTargetUnit )
        {
-            if( nPos + 1 >= nLen ||
-                ('p' != rString[nPos] &&
-                 'P' != rString[nPos])||
-                ('x' != rString[nPos+1] &&
-                 'X' != rString[nPos+1]) )
+            if (MeasureUnit::PIXEL != nSourceUnit)
                return false;
        }
        else
@ -202,57 +292,52 @@ static bool lcl_convertMeasure( sal_Int32& rValue,

            if( MeasureUnit::TWIP == nTargetUnit )
            {
-                switch (rtl::toAsciiLowerCase<sal_uInt32>(rString[nPos]))
+                switch (nSourceUnit.value())
                {
-                case u'c':
-                    if (wordEndsWith(rString.substr(nPos + 1), "m"))
+                    case MeasureUnit::CM:
                        eFrom = o3tl::Length::cm;
-                    break;
-                case u'i':
-                    if (wordEndsWith(rString.substr(nPos + 1), "n"))
+                        break;
+                    case MeasureUnit::INCH:
                        eFrom = o3tl::Length::in;
-                    break;
-                case u'm':
-                    if (wordEndsWith(rString.substr(nPos + 1), "m"))
+                        break;
+                    case MeasureUnit::MM:
                        eFrom = o3tl::Length::mm;
-                    break;
-                case u'p':
-                    if (wordEndsWith(rString.substr(nPos + 1), "t"))
+                        break;
+                    case MeasureUnit::POINT:
                        eFrom = o3tl::Length::pt;
-                    else if (wordEndsWith(rString.substr(nPos + 1), "c"))
+                        break;
+                    case MeasureUnit::PICA:
                        eFrom = o3tl::Length::pc;
-                    break;
+                        break;
                }
            }
            else if( MeasureUnit::MM_100TH == nTargetUnit || MeasureUnit::MM_10TH == nTargetUnit )
            {
-                switch (rtl::toAsciiLowerCase<sal_uInt32>(rString[nPos]))
+                switch (nSourceUnit.value())
                {
-                case u'c':
-                    if (wordEndsWith(rString.substr(nPos + 1), "m"))
+                    case MeasureUnit::CM:
                        eFrom = o3tl::Length::cm;
-                    break;
-                case u'i':
-                    if (wordEndsWith(rString.substr(nPos + 1), "n"))
+                        break;
+                    case MeasureUnit::INCH:
                        eFrom = o3tl::Length::in;
-                    break;
-                case u'm':
-                    if (wordEndsWith(rString.substr(nPos + 1), "m"))
+                        break;
+                    case MeasureUnit::MM:
                        eFrom = o3tl::Length::mm;
-                    break;
-                case u'p':
-                    if (wordEndsWith(rString.substr(nPos + 1), "t"))
+                        break;
+                    case MeasureUnit::POINT:
                        eFrom = o3tl::Length::pt;
-                    else if (wordEndsWith(rString.substr(nPos + 1), "c"))
+                        break;
+                    case MeasureUnit::PICA:
                        eFrom = o3tl::Length::pc;
-                    else if (wordEndsWith(rString.substr(nPos + 1), "x"))
+                        break;
+                    case MeasureUnit::PIXEL:
                        eFrom = o3tl::Length::px;
-                    break;
+                        break;
                }
            }
            else if( MeasureUnit::POINT == nTargetUnit )
            {
-                if (wordEndsWith(rString.substr(nPos), "pt"))
+                if (MeasureUnit::POINT == nSourceUnit)
                    eFrom = o3tl::Length::pt;
            }

@ -435,6 +520,53 @@ void Converter::convertMeasure( OUStringBuffer& rBuffer,
        rBuffer.appendAscii(psUnit.data(), psUnit.length());
 }

+/** convert string to measure with unit*/
+bool Converter::convertMeasureUnit(double& rValue, std::optional<sal_Int16>& rValueUnit,
+                                   std::u16string_view rString)
+{
+    bool bNeg = false;
+    bool bResult = lcl_parseMeasure(rValue, rValueUnit, bNeg, rString);
+
+    if (bNeg)
+    {
+        rValue = -rValue;
+    }
+
+    return bResult;
+}
+
+/** convert string to measure with unit*/
+bool Converter::convertMeasureUnit(double& rValue, std::optional<sal_Int16>& rValueUnit,
+                                   std::string_view rString)
+{
+    bool bNeg = false;
+    bool bResult = lcl_parseMeasure(rValue, rValueUnit, bNeg, rString);
+
+    if (bNeg)
+    {
+        rValue = -rValue;
+    }
+
+    return bResult;
+}
+
+/** convert measure with given unit to string with given unit*/
+void Converter::convertMeasureUnit(OUStringBuffer& rBuffer, double dValue,
+                                   std::optional<sal_Int16> nValueUnit)
+{
+    ::rtl::math::doubleToUStringBuffer(rBuffer, dValue, rtl_math_StringFormat_Automatic,
+                                       rtl_math_DecimalPlaces_Max, '.', true);
+
+    if (nValueUnit.has_value())
+    {
+        if (auto it = stConvertMeasureUnitStrMap.find(*nValueUnit);
+            it != stConvertMeasureUnitStrMap.end())
+        {
+            rBuffer.appendAscii(it->second.data(), it->second.length());
+        }
+    }
+}
+
 /** convert string to boolean */
 bool Converter::convertBool( bool& rBool, std::u16string_view rString )
 {