361c4f008e
Also, the CPU identifier for MSVC WIN32 is not X86 but INTEL, so actually use SSE2 there as well, which was the cause of things failing on that platform. For other platforms than Intel x86/x86_64 SSE2 is not defined, so exclude the new unit test based on that and live on with the old slightly off value. Experiments did not yield any solution that works, even using plain sumNeumaierNormal() (similar to SSE2) in the executeUnrolled() case instead of KahanSum with its m_fMem did not help, nor trying to add the internal values in different orders or with long double, au contraire the error was slightly larger. Change-Id: Ica0b2963f76c01f248799e9a809ef06eb099e722 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/156899 Reviewed-by: Eike Rathke <erack@redhat.com> Tested-by: Jenkins
105 lines
2.7 KiB
C++
105 lines
2.7 KiB
C++
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
|
|
/*
|
|
* This file is part of the LibreOffice project.
|
|
*
|
|
* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
*
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include <cmath>
|
|
#include "kahan.hxx"
|
|
#include <formula/errorcodes.hxx>
|
|
|
|
namespace sc::op
|
|
{
|
|
/**
|
|
* If no boosts available, Unrolled KahanSum.
|
|
*/
|
|
static inline KahanSum executeUnrolled(size_t& i, size_t nSize, const double* pCurrent)
|
|
{
|
|
size_t nRealSize = nSize - i;
|
|
size_t nUnrolledSize = nRealSize - (nRealSize % 4);
|
|
|
|
if (nUnrolledSize > 0)
|
|
{
|
|
KahanSum sum0 = 0.0;
|
|
KahanSum sum1 = 0.0;
|
|
KahanSum sum2 = 0.0;
|
|
KahanSum sum3 = 0.0;
|
|
|
|
for (; i + 3 < nUnrolledSize; i += 4)
|
|
{
|
|
sum0 += *pCurrent++;
|
|
sum1 += *pCurrent++;
|
|
sum2 += *pCurrent++;
|
|
sum3 += *pCurrent++;
|
|
}
|
|
// We are using pairwise summation alongside Kahan
|
|
return (sum0 + sum1) + (sum2 + sum3);
|
|
}
|
|
return 0.0;
|
|
}
|
|
|
|
/**
|
|
* This function task is to choose the fastest method available to perform the sum.
|
|
* @param i
|
|
* @param nSize
|
|
* @param pCurrent
|
|
*/
|
|
static inline KahanSum executeFast(size_t& i, size_t nSize, const double* pCurrent)
|
|
{
|
|
#if SC_USE_SSE2
|
|
return executeSSE2(i, nSize, pCurrent);
|
|
#else
|
|
return executeUnrolled(i, nSize, pCurrent);
|
|
#endif
|
|
}
|
|
|
|
/**
|
|
* Performs the sum of an array.
|
|
* Note that align 16 will speed up the process.
|
|
* @param pArray
|
|
* @param nSize
|
|
*/
|
|
inline KahanSum sumArray(const double* pArray, size_t nSize)
|
|
{
|
|
size_t i = 0;
|
|
const double* pCurrent = pArray;
|
|
KahanSum fSum = executeFast(i, nSize, pCurrent);
|
|
|
|
// sum rest of the array
|
|
for (; i < nSize; ++i)
|
|
fSum += pArray[i];
|
|
|
|
// If the sum is a NaN, some of the terms were empty cells, probably.
|
|
// Re-calculate, carefully
|
|
double fVal = fSum.get();
|
|
if (!std::isfinite(fVal))
|
|
{
|
|
FormulaError nErr = GetDoubleErrorValue(fVal);
|
|
if (nErr == FormulaError::NoValue)
|
|
{
|
|
fSum = 0;
|
|
for (i = 0; i < nSize; i++)
|
|
{
|
|
if (!std::isfinite(pArray[i]))
|
|
{
|
|
nErr = GetDoubleErrorValue(pArray[i]);
|
|
if (nErr != FormulaError::NoValue)
|
|
fSum += pArray[i]; // Let errors encoded as NaNs propagate ???
|
|
}
|
|
else
|
|
fSum += pArray[i];
|
|
}
|
|
}
|
|
}
|
|
return fSum;
|
|
}
|
|
|
|
} // end namespace sc::op
|
|
|
|
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|