office-gobmx/sc/inc/arraysumfunctor.hxx
Eike Rathke 361c4f008e Follow-up: tdf#156985 Use SC_USE_SSE2 to determine which KahanSum::add() to use
Also, the CPU identifier for MSVC WIN32 is not X86 but INTEL, so
actually use SSE2 there as well, which was the cause of things
failing on that platform.

For other platforms than Intel x86/x86_64 SSE2 is not defined, so
exclude the new unit test based on that and live on with the old
slightly off value. Experiments did not yield any solution that
works, even using plain sumNeumaierNormal() (similar to SSE2) in
the executeUnrolled() case instead of KahanSum with its m_fMem did
not help, nor trying to add the internal values in different
orders or with long double, au contraire the error was slightly
larger.

Change-Id: Ica0b2963f76c01f248799e9a809ef06eb099e722
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/156899
Reviewed-by: Eike Rathke <erack@redhat.com>
Tested-by: Jenkins
2023-10-15 18:46:01 +02:00

105 lines
2.7 KiB
C++

/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
*/
#pragma once
#include <cmath>
#include "kahan.hxx"
#include <formula/errorcodes.hxx>
namespace sc::op
{
/**
* If no boosts available, Unrolled KahanSum.
*/
static inline KahanSum executeUnrolled(size_t& i, size_t nSize, const double* pCurrent)
{
size_t nRealSize = nSize - i;
size_t nUnrolledSize = nRealSize - (nRealSize % 4);
if (nUnrolledSize > 0)
{
KahanSum sum0 = 0.0;
KahanSum sum1 = 0.0;
KahanSum sum2 = 0.0;
KahanSum sum3 = 0.0;
for (; i + 3 < nUnrolledSize; i += 4)
{
sum0 += *pCurrent++;
sum1 += *pCurrent++;
sum2 += *pCurrent++;
sum3 += *pCurrent++;
}
// We are using pairwise summation alongside Kahan
return (sum0 + sum1) + (sum2 + sum3);
}
return 0.0;
}
/**
* This function task is to choose the fastest method available to perform the sum.
* @param i
* @param nSize
* @param pCurrent
*/
static inline KahanSum executeFast(size_t& i, size_t nSize, const double* pCurrent)
{
#if SC_USE_SSE2
return executeSSE2(i, nSize, pCurrent);
#else
return executeUnrolled(i, nSize, pCurrent);
#endif
}
/**
* Performs the sum of an array.
* Note that align 16 will speed up the process.
* @param pArray
* @param nSize
*/
inline KahanSum sumArray(const double* pArray, size_t nSize)
{
size_t i = 0;
const double* pCurrent = pArray;
KahanSum fSum = executeFast(i, nSize, pCurrent);
// sum rest of the array
for (; i < nSize; ++i)
fSum += pArray[i];
// If the sum is a NaN, some of the terms were empty cells, probably.
// Re-calculate, carefully
double fVal = fSum.get();
if (!std::isfinite(fVal))
{
FormulaError nErr = GetDoubleErrorValue(fVal);
if (nErr == FormulaError::NoValue)
{
fSum = 0;
for (i = 0; i < nSize; i++)
{
if (!std::isfinite(pArray[i]))
{
nErr = GetDoubleErrorValue(pArray[i]);
if (nErr != FormulaError::NoValue)
fSum += pArray[i]; // Let errors encoded as NaNs propagate ???
}
else
fSum += pArray[i];
}
}
}
return fSum;
}
} // end namespace sc::op
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */