tdf#133589 Numbertext: Use Win32API to avoid std::codecvt_utf8 bug

Change-Id: I45c85db44c3dfd92e0929f66c8c95cb309c91e05
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/96609
Reviewed-by: László Németh <nemeth@numbertext.org>
Tested-by: Jenkins
This commit is contained in:
Mike Kaganski 2020-06-18 16:03:38 +03:00
parent c4a4c76648
commit 21a59b59d9
3 changed files with 81 additions and 16 deletions

View file

@ -0,0 +1,69 @@
MSVC's std::codecvt_utf8 has a bug converting non-BMP codepoints like U+10CFA.
Use MultiByteToWideChar/WideCharToMultiByte instead on Windows.
diff --git a/src/Numbertext.cxx b/src/Numbertext.cxx
index 5f05b48579af..eb83e59f366f 100755
--- a/src/Numbertext.cxx
+++ b/src/Numbertext.cxx
@@ -7,6 +7,10 @@
#include <sstream>
#include <fstream>
+#ifdef _WIN32
+#include <Windows.h>
+#endif
+
#include "Numbertext.hxx"
#ifdef NUMBERTEXT_BOOST
@@ -22,6 +26,14 @@
bool readfile(const std::string& filename, std::wstring& result)
{
+#ifdef _WIN32
+ std::ifstream ifs(filename);
+ if (ifs.fail())
+ return false;
+ std::stringstream ss;
+ ss << ifs.rdbuf();
+ result = Numbertext::string2wstring(ss.str());
+#else
std::wifstream wif(filename);
if (wif.fail())
return false;
@@ -29,6 +44,7 @@ bool readfile(const std::string& filename, std::wstring& result)
std::wstringstream wss;
wss << wif.rdbuf();
result = wss.str();
+#endif
return true;
}
@@ -99,7 +112,12 @@
std::wstring Numbertext::string2wstring(const std::string& s)
{
-#ifndef NUMBERTEXT_BOOST
+#ifdef _WIN32
+ int nSize = MultiByteToWideChar(CP_UTF8, 0, s.c_str(), -1, nullptr, 0);
+ std::unique_ptr<wchar_t[]> wstr(new wchar_t[nSize]);
+ MultiByteToWideChar(CP_UTF8, 0, s.c_str(), -1, wstr.get(), nSize);
+ return wstr.get();
+#elif !defined NUMBERTEXT_BOOST
typedef std::codecvt_utf8<wchar_t> convert_type;
std::wstring_convert<convert_type, wchar_t> converter;
return converter.from_bytes( s );
@@ -110,7 +128,12 @@
std::string Numbertext::wstring2string(const std::wstring& s)
{
-#ifndef NUMBERTEXT_BOOST
+#ifdef _WIN32
+ int nSize = WideCharToMultiByte(CP_UTF8, 0, s.c_str(), -1, nullptr, 0, nullptr, nullptr);
+ std::unique_ptr<char[]> str(new char[nSize]);
+ WideCharToMultiByte(CP_UTF8, 0, s.c_str(), -1, str.get(), nSize, nullptr, nullptr);
+ return str.get();
+#elif !defined NUMBERTEXT_BOOST
typedef std::codecvt_utf8<wchar_t> convert_type;
std::wstring_convert<convert_type, wchar_t> converter;
return converter.to_bytes( s );

View file

@ -15,4 +15,8 @@ $(eval $(call gb_UnpackedTarball_update_autoconf_configs,libnumbertext))
$(eval $(call gb_UnpackedTarball_set_patchlevel,libnumbertext,1))
$(eval $(call gb_UnpackedTarball_add_patches,libnumbertext, \
external/libnumbertext/MSVCNonBMPBug.patch1 \
))
# vim: set noet sw=4 ts=4:

View file

@ -21,7 +21,7 @@
#include <osl/file.hxx>
#include <tools/debug.hxx>
#include <rtl/ustrbuf.hxx>
#include <o3tl/char16_t2wchar_t.hxx>
#include <sal/config.h>
#include <cppuhelper/factory.hxx>
@ -132,26 +132,18 @@ OUString SAL_CALL NumberText_Impl::getNumberText(const OUString& rText, const Lo
if (!aCountry.isEmpty())
aCode += "-" + aCountry;
OString aLangCode(OUStringToOString(aCode, RTL_TEXTENCODING_ASCII_US));
#if defined(_WIN32)
std::wstring sResult(o3tl::toW(rText.getStr()));
#else
OString aInput(OUStringToOString(rText, RTL_TEXTENCODING_UTF8));
std::wstring sResult = Numbertext::string2wstring(aInput.getStr());
#endif
bool result = m_aNumberText.numbertext(sResult, aLangCode.getStr());
DBG_ASSERT(result, "numbertext: false");
OUString aResult = OUString::fromUtf8(Numbertext::wstring2string(sResult).c_str());
#if defined(_WIN32)
// workaround to fix non-BMP Unicode characters resulted by wstring limitation
if (!aScript.isEmpty() && aScript == "Hung")
{
OUStringBuffer aFix;
for (int i = 0; i < aResult.getLength(); ++i)
{
sal_Unicode c = aResult[i];
if (0x0C80 <= c && c <= 0x0CFF)
aFix.append(sal_Unicode(0xD803)).append(sal_Unicode(c + 0xD000));
else
aFix.append(c);
}
aResult = aFix.makeStringAndClear();
}
OUString aResult(o3tl::toU(sResult.c_str()));
#else
OUString aResult = OUString::fromUtf8(Numbertext::wstring2string(sResult).c_str());
#endif
return aResult;
}