comphelper: fix natural string compare function + reorganize tests

Natural string compare function doesn't take into account that
the string can start with a number and in this case it treats it
like a conventional string compare. This change takes this case
into account.

This change also refactores the tests for NaturalStringSorter
class. The previous tet used a mock XBreakIterator and XCollator
implementations to test the functionallity. This is not needed as
we can just use a real one instead, which makes the test more
real as it actually uses a real implementation instead of a mock
implementation, which could differ. This change removes the mock
XCollator and XBreakIterator implementations and moves the test
into a new file - NaturalStringSortTest.cxx

The test is also extended with the new use case where the string
starts with a number.

Change-Id: I32ea055f914c2947e4d979093b32f56170a61102
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/134540
Tested-by: Jenkins
Reviewed-by: Tomaž Vajngerl <quikee@gmail.com>
This commit is contained in:
Tomaž Vajngerl 2022-05-19 00:11:17 +09:00 committed by Tomaž Vajngerl
parent 3d37b6a351
commit 09822cf77c
4 changed files with 129 additions and 198 deletions

View file

@ -11,6 +11,7 @@ $(eval $(call gb_CppunitTest_CppunitTest,comphelper_test))
$(eval $(call gb_CppunitTest_add_exception_objects,comphelper_test, \
comphelper/qa/string/test_string \
comphelper/qa/string/NaturalStringSortTest \
comphelper/qa/container/testifcontainer \
comphelper/qa/container/testifcontainer3 \
comphelper/qa/unit/test_hash \
@ -21,6 +22,7 @@ $(eval $(call gb_CppunitTest_add_exception_objects,comphelper_test, \
comphelper/qa/unit/test_traceevent \
))
$(eval $(call gb_CppunitTest_use_ure,comphelper_test))
$(eval $(call gb_CppunitTest_use_sdk_api,comphelper_test))
$(eval $(call gb_CppunitTest_use_libraries,comphelper_test, \
@ -28,6 +30,7 @@ $(eval $(call gb_CppunitTest_use_libraries,comphelper_test, \
cppuhelper \
cppu \
sal \
unotest \
))
ifeq ($(TLS),NSS)
@ -37,4 +40,8 @@ $(eval $(call gb_CppunitTest_use_externals,comphelper_test,\
))
endif
$(eval $(call gb_CppunitTest_use_components,comphelper_test,\
i18npool/util/i18npool \
))
# vim: set noet sw=4 ts=4:

View file

@ -0,0 +1,95 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This file incorporates work covered by the following license notice:
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed
* with this work for additional information regarding copyright
* ownership. The ASF licenses this file to you under the Apache
* License, Version 2.0 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at http://www.apache.org/licenses/LICENSE-2.0 .
*/
#include <comphelper/string.hxx>
#include <comphelper/processfactory.hxx>
#include <cppuhelper/implbase.hxx>
#include <com/sun/star/i18n/CharType.hpp>
#include <com/sun/star/i18n/XBreakIterator.hpp>
#include <com/sun/star/i18n/XCollator.hpp>
#include <unotest/bootstrapfixturebase.hxx>
#include <cppunit/TestAssert.h>
#include <cppunit/TestFixture.h>
#include <cppunit/extensions/HelperMacros.h>
#include <cppunit/plugin/TestPlugIn.h>
#include <rtl/ustring.hxx>
using namespace css;
namespace
{
class TestStringNaturalCompare : public test::BootstrapFixtureBase
{
public:
void testNatural()
{
lang::Locale aLocale;
aLocale.Language = "en";
aLocale.Country = "US";
comphelper::string::NaturalStringSorter aSorter(comphelper::getProcessComponentContext(),
aLocale);
CPPUNIT_ASSERT_EQUAL(sal_Int32(+0), aSorter.compare("ABC", "ABC"));
CPPUNIT_ASSERT_EQUAL(sal_Int32(+1), aSorter.compare("ABC", "abc"));
CPPUNIT_ASSERT_EQUAL(sal_Int32(-1), aSorter.compare("abc", "ABC"));
CPPUNIT_ASSERT_EQUAL(sal_Int32(+1), aSorter.compare("alongstring", "alongerstring"));
CPPUNIT_ASSERT_EQUAL(sal_Int32(-1), aSorter.compare("alongerstring", "alongstring"));
CPPUNIT_ASSERT_EQUAL(sal_Int32(-1), aSorter.compare("Heading 9", "Heading 10"));
CPPUNIT_ASSERT_EQUAL(sal_Int32(+1), aSorter.compare("Heading 10", "Heading 9"));
CPPUNIT_ASSERT_EQUAL(sal_Int32(-1), aSorter.compare("July, the 4th", "July, the 10th"));
CPPUNIT_ASSERT_EQUAL(sal_Int32(+1), aSorter.compare("July, the 10th", "July, the 4th"));
CPPUNIT_ASSERT_EQUAL(sal_Int32(-1), aSorter.compare("abc08", "abc010"));
CPPUNIT_ASSERT_EQUAL(sal_Int32(+1), aSorter.compare("abc010", "abc08"));
CPPUNIT_ASSERT_EQUAL(sal_Int32(+0), aSorter.compare("apple10apple", "apple10apple"));
CPPUNIT_ASSERT_EQUAL(sal_Int32(+1), aSorter.compare("KA1", "KA0"));
CPPUNIT_ASSERT_EQUAL(sal_Int32(+0), aSorter.compare("KA1", "KA1"));
CPPUNIT_ASSERT_EQUAL(sal_Int32(-1), aSorter.compare("KA1", "KA2"));
CPPUNIT_ASSERT_EQUAL(sal_Int32(+1), aSorter.compare("KA50", "KA5"));
CPPUNIT_ASSERT_EQUAL(sal_Int32(-1), aSorter.compare("KA50", "KA100"));
CPPUNIT_ASSERT_EQUAL(sal_Int32(+1), aSorter.compare("1", "0"));
CPPUNIT_ASSERT_EQUAL(sal_Int32(+0), aSorter.compare("1", "1"));
CPPUNIT_ASSERT_EQUAL(sal_Int32(-1), aSorter.compare("1", "2"));
CPPUNIT_ASSERT_EQUAL(sal_Int32(+1), aSorter.compare("11", "1"));
CPPUNIT_ASSERT_EQUAL(sal_Int32(-1), aSorter.compare("50", "100"));
CPPUNIT_ASSERT_EQUAL(sal_Int32(-1), aSorter.compare("0", "100000"));
CPPUNIT_ASSERT_EQUAL(sal_Int32(-1), aSorter.compare("0", "A"));
CPPUNIT_ASSERT_EQUAL(sal_Int32(+1), aSorter.compare("A", "0"));
CPPUNIT_ASSERT_EQUAL(sal_Int32(+1), aSorter.compare("A", "99"));
CPPUNIT_ASSERT_EQUAL(sal_Int32(+1), aSorter.compare("00ABC2", "00ABC1"));
CPPUNIT_ASSERT_EQUAL(sal_Int32(-1), aSorter.compare("00ABC1", "00ABC2"));
CPPUNIT_ASSERT_EQUAL(sal_Int32(+1), aSorter.compare("00ABC11", "00ABC2"));
CPPUNIT_ASSERT_EQUAL(sal_Int32(-1), aSorter.compare("00ABC2", "00ABC11"));
}
CPPUNIT_TEST_SUITE(TestStringNaturalCompare);
CPPUNIT_TEST(testNatural);
CPPUNIT_TEST_SUITE_END();
};
CPPUNIT_TEST_SUITE_REGISTRATION(TestStringNaturalCompare);
}
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */

View file

@ -35,7 +35,6 @@ namespace {
class TestString: public CppUnit::TestFixture
{
public:
void testNatural();
void testStripStart();
void testStripEnd();
void testStrip();
@ -48,7 +47,6 @@ public:
void testRemoveAny();
CPPUNIT_TEST_SUITE(TestString);
CPPUNIT_TEST(testNatural);
CPPUNIT_TEST(testStripStart);
CPPUNIT_TEST(testStripEnd);
CPPUNIT_TEST(testStrip);
@ -82,192 +80,6 @@ void TestString::testIsdigitAsciiString()
CPPUNIT_ASSERT_EQUAL(true, comphelper::string::isdigitAsciiString(""));
}
using namespace ::com::sun::star;
class testCollator : public cppu::WeakImplHelper< i18n::XCollator >
{
public:
virtual sal_Int32 SAL_CALL compareSubstring(
const OUString& str1, sal_Int32 off1, sal_Int32 len1,
const OUString& str2, sal_Int32 off2, sal_Int32 len2) override
{
return str1.copy(off1, len1).compareTo(str2.subView(off2, len2));
}
virtual sal_Int32 SAL_CALL compareString(
const OUString& str1,
const OUString& str2) override
{
return str1.compareTo(str2);
}
virtual sal_Int32 SAL_CALL loadDefaultCollator(const lang::Locale&, sal_Int32) override {return 0;}
virtual sal_Int32 SAL_CALL loadCollatorAlgorithm(const OUString&,
const lang::Locale&, sal_Int32) override {return 0;}
virtual void SAL_CALL loadCollatorAlgorithmWithEndUserOption(const OUString&,
const lang::Locale&, const uno::Sequence< sal_Int32 >&) override {}
virtual uno::Sequence< OUString > SAL_CALL listCollatorAlgorithms(const lang::Locale&) override
{
return uno::Sequence< OUString >();
}
virtual uno::Sequence< sal_Int32 > SAL_CALL listCollatorOptions(const OUString&) override
{
return uno::Sequence< sal_Int32 >();
}
};
#define IS_DIGIT(CHAR) (((CHAR) >= 48) && ((CHAR <= 57)))
class testBreakIterator : public cppu::WeakImplHelper< i18n::XBreakIterator >
{
public:
virtual sal_Int32 SAL_CALL nextCharacters( const OUString&, sal_Int32,
const lang::Locale&, sal_Int16, sal_Int32, sal_Int32& ) override {return -1;}
virtual sal_Int32 SAL_CALL previousCharacters( const OUString&, sal_Int32,
const lang::Locale&, sal_Int16, sal_Int32, sal_Int32& ) override {return -1;}
virtual i18n::Boundary SAL_CALL previousWord( const OUString&, sal_Int32,
const lang::Locale&, sal_Int16) override
{ return i18n::Boundary(); }
virtual i18n::Boundary SAL_CALL nextWord( const OUString&, sal_Int32,
const lang::Locale&, sal_Int16) override
{ return i18n::Boundary(); }
virtual i18n::Boundary SAL_CALL getWordBoundary( const OUString&, sal_Int32,
const lang::Locale&, sal_Int16, sal_Bool ) override
{ return i18n::Boundary(); }
virtual sal_Bool SAL_CALL isBeginWord( const OUString&, sal_Int32,
const lang::Locale&, sal_Int16 ) override
{ return false; }
virtual sal_Bool SAL_CALL isEndWord( const OUString&, sal_Int32,
const lang::Locale& , sal_Int16 ) override
{ return false; }
virtual sal_Int16 SAL_CALL getWordType( const OUString&, sal_Int32,
const lang::Locale& ) override
{ return 0; }
virtual sal_Int32 SAL_CALL beginOfSentence( const OUString&, sal_Int32,
const lang::Locale& ) override
{ return 0; }
virtual sal_Int32 SAL_CALL endOfSentence( const OUString& rText, sal_Int32,
const lang::Locale& ) override
{ return rText.getLength(); }
virtual i18n::LineBreakResults SAL_CALL getLineBreak( const OUString&, sal_Int32,
const lang::Locale&, sal_Int32,
const i18n::LineBreakHyphenationOptions&,
const i18n::LineBreakUserOptions&) override
{
return i18n::LineBreakResults();
}
virtual sal_Int16 SAL_CALL getScriptType( const OUString&, sal_Int32 ) override { return -1; }
virtual sal_Int32 SAL_CALL beginOfScript( const OUString&, sal_Int32,
sal_Int16 ) override { return -1; }
virtual sal_Int32 SAL_CALL endOfScript( const OUString&, sal_Int32,
sal_Int16 ) override { return -1; }
virtual sal_Int32 SAL_CALL previousScript( const OUString&, sal_Int32,
sal_Int16 ) override { return -1; }
virtual sal_Int32 SAL_CALL nextScript( const OUString&, sal_Int32,
sal_Int16 ) override { return -1; }
virtual sal_Int32 SAL_CALL beginOfCharBlock( const OUString&, sal_Int32,
const lang::Locale&, sal_Int16 ) override { return -1; }
virtual sal_Int32 SAL_CALL endOfCharBlock( const OUString& rText, sal_Int32 nStartPos,
const lang::Locale&, sal_Int16 CharType ) override
{
const sal_Unicode *pStr = rText.getStr()+nStartPos;
for (sal_Int32 nI = nStartPos; nI < rText.getLength(); ++nI)
{
if (CharType == i18n::CharType::DECIMAL_DIGIT_NUMBER && !IS_DIGIT(*pStr))
return nI;
else if (CharType != i18n::CharType::DECIMAL_DIGIT_NUMBER && IS_DIGIT(*pStr))
return nI;
++pStr;
}
return -1;
}
virtual sal_Int32 SAL_CALL previousCharBlock( const OUString&, sal_Int32,
const lang::Locale&, sal_Int16 ) override { return -1; }
virtual sal_Int32 SAL_CALL nextCharBlock( const OUString& rText, sal_Int32 nStartPos,
const lang::Locale&, sal_Int16 CharType ) override
{
const sal_Unicode *pStr = rText.getStr()+nStartPos;
for (sal_Int32 nI = nStartPos; nI < rText.getLength(); ++nI)
{
if (CharType == i18n::CharType::DECIMAL_DIGIT_NUMBER && IS_DIGIT(*pStr))
return nI;
else if (CharType != i18n::CharType::DECIMAL_DIGIT_NUMBER && !IS_DIGIT(*pStr))
return nI;
++pStr;
}
return -1;
}
};
void TestString::testNatural()
{
using namespace comphelper::string;
uno::Reference< i18n::XCollator > xCollator(new testCollator);
uno::Reference< i18n::XBreakIterator > xBI(new testBreakIterator);
// --- Some generic tests to ensure we do not alter original behavior
// outside what we want
CPPUNIT_ASSERT_EQUAL(
static_cast<sal_Int32>(0), compareNatural("ABC", "ABC", xCollator, xBI, lang::Locale())
);
// Case sensitivity
CPPUNIT_ASSERT(
compareNatural("ABC", "abc", xCollator, xBI, lang::Locale()) < 0
);
// Reverse
CPPUNIT_ASSERT(
compareNatural("abc", "ABC", xCollator, xBI, lang::Locale()) > 0
);
// First shorter
CPPUNIT_ASSERT(
compareNatural("alongstring", "alongerstring", xCollator, xBI, lang::Locale()) > 0
);
// Second shorter
CPPUNIT_ASSERT(
compareNatural("alongerstring", "alongstring", xCollator, xBI, lang::Locale()) < 0
);
// -- Here we go on natural order, each one is followed by classic compare and the reverse comparison
// That's why we originally made the patch
CPPUNIT_ASSERT(
compareNatural("Heading 9", "Heading 10", xCollator, xBI, lang::Locale()) < 0
);
// Original behavior
CPPUNIT_ASSERT(
OUString("Heading 9").compareTo(u"Heading 10") > 0
);
CPPUNIT_ASSERT(
compareNatural("Heading 10", "Heading 9", xCollator, xBI, lang::Locale()) > 0
);
// Harder
CPPUNIT_ASSERT(
compareNatural("July, the 4th", "July, the 10th", xCollator, xBI, lang::Locale()) < 0
);
CPPUNIT_ASSERT(
OUString("July, the 4th").compareTo(u"July, the 10th") > 0
);
CPPUNIT_ASSERT(
compareNatural("July, the 10th", "July, the 4th", xCollator, xBI, lang::Locale()) > 0
);
// Hardest
CPPUNIT_ASSERT(
compareNatural("abc08", "abc010", xCollator, xBI, lang::Locale()) < 0
);
CPPUNIT_ASSERT(
OUString("abc08").compareTo(u"abc010") > 0
);
CPPUNIT_ASSERT(
compareNatural("abc010", "abc08", xCollator, xBI, lang::Locale()) > 0
);
CPPUNIT_ASSERT_EQUAL(
static_cast<sal_Int32>(0), compareNatural("apple10apple", "apple10apple", xCollator, xBI, lang::Locale())
);
}
void TestString::testStripStart()
{
OString aIn("abc");

View file

@ -425,33 +425,50 @@ sal_Int32 compareNatural( const OUString & rLHS, const OUString & rRHS,
sal_Int32 nLHSFirstDigitPos = 0;
sal_Int32 nRHSFirstDigitPos = 0;
// Check if the string starts with a digit
sal_Int32 nStartsDigitLHS = rBI->endOfCharBlock(rLHS, nLHSFirstDigitPos, rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER);
sal_Int32 nStartsDigitRHS = rBI->endOfCharBlock(rRHS, nRHSFirstDigitPos, rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER);
if (nStartsDigitLHS > 0 && nStartsDigitRHS > 0)
{
sal_uInt32 nLHS = comphelper::string::decimalStringToNumber(rLHS, 0, nStartsDigitLHS);
sal_uInt32 nRHS = comphelper::string::decimalStringToNumber(rRHS, 0, nStartsDigitRHS);
if (nLHS != nRHS)
return nLHS < nRHS ? -1 : 1;
nLHSLastNonDigitPos = nStartsDigitLHS;
nRHSLastNonDigitPos = nStartsDigitRHS;
}
else if (nStartsDigitLHS > 0)
return -1;
else if (nStartsDigitRHS > 0)
return 1;
while (nLHSFirstDigitPos < rLHS.getLength() || nRHSFirstDigitPos < rRHS.getLength())
{
sal_Int32 nLHSChunkLen;
sal_Int32 nRHSChunkLen;
//Compare non digit block as normal strings
nLHSFirstDigitPos = rBI->nextCharBlock(rLHS, nLHSLastNonDigitPos,
rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER);
nRHSFirstDigitPos = rBI->nextCharBlock(rRHS, nRHSLastNonDigitPos,
rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER);
nLHSFirstDigitPos = rBI->nextCharBlock(rLHS, nLHSLastNonDigitPos, rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER);
nRHSFirstDigitPos = rBI->nextCharBlock(rRHS, nRHSLastNonDigitPos, rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER);
if (nLHSFirstDigitPos == -1)
nLHSFirstDigitPos = rLHS.getLength();
if (nRHSFirstDigitPos == -1)
nRHSFirstDigitPos = rRHS.getLength();
nLHSChunkLen = nLHSFirstDigitPos - nLHSLastNonDigitPos;
nRHSChunkLen = nRHSFirstDigitPos - nRHSLastNonDigitPos;
nRet = rCollator->compareSubstring(rLHS, nLHSLastNonDigitPos,
nLHSChunkLen, rRHS, nRHSLastNonDigitPos, nRHSChunkLen);
nRet = rCollator->compareSubstring(rLHS, nLHSLastNonDigitPos, nLHSChunkLen, rRHS, nRHSLastNonDigitPos, nRHSChunkLen);
if (nRet != 0)
break;
//Compare digit block as one number vs another
nLHSLastNonDigitPos = rBI->endOfCharBlock(rLHS, nLHSFirstDigitPos,
rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER);
nRHSLastNonDigitPos = rBI->endOfCharBlock(rRHS, nRHSFirstDigitPos,
rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER);
nLHSLastNonDigitPos = rBI->endOfCharBlock(rLHS, nLHSFirstDigitPos, rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER);
nRHSLastNonDigitPos = rBI->endOfCharBlock(rRHS, nRHSFirstDigitPos, rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER);
if (nLHSLastNonDigitPos == -1)
nLHSLastNonDigitPos = rLHS.getLength();
if (nRHSLastNonDigitPos == -1)