tdf#133589 Numbertext: Use Win32API to avoid std::codecvt_utf8 bug
Change-Id: I45c85db44c3dfd92e0929f66c8c95cb309c91e05 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/96609 Reviewed-by: László Németh <nemeth@numbertext.org> Tested-by: Jenkins
This commit is contained in:
parent
c4a4c76648
commit
21a59b59d9
3 changed files with 81 additions and 16 deletions
69
external/libnumbertext/MSVCNonBMPBug.patch1
vendored
Normal file
69
external/libnumbertext/MSVCNonBMPBug.patch1
vendored
Normal file
|
@ -0,0 +1,69 @@
|
|||
MSVC's std::codecvt_utf8 has a bug converting non-BMP codepoints like U+10CFA.
|
||||
Use MultiByteToWideChar/WideCharToMultiByte instead on Windows.
|
||||
|
||||
diff --git a/src/Numbertext.cxx b/src/Numbertext.cxx
|
||||
index 5f05b48579af..eb83e59f366f 100755
|
||||
--- a/src/Numbertext.cxx
|
||||
+++ b/src/Numbertext.cxx
|
||||
@@ -7,6 +7,10 @@
|
||||
#include <sstream>
|
||||
#include <fstream>
|
||||
|
||||
+#ifdef _WIN32
|
||||
+#include <Windows.h>
|
||||
+#endif
|
||||
+
|
||||
#include "Numbertext.hxx"
|
||||
|
||||
#ifdef NUMBERTEXT_BOOST
|
||||
@@ -22,6 +26,14 @@
|
||||
|
||||
bool readfile(const std::string& filename, std::wstring& result)
|
||||
{
|
||||
+#ifdef _WIN32
|
||||
+ std::ifstream ifs(filename);
|
||||
+ if (ifs.fail())
|
||||
+ return false;
|
||||
+ std::stringstream ss;
|
||||
+ ss << ifs.rdbuf();
|
||||
+ result = Numbertext::string2wstring(ss.str());
|
||||
+#else
|
||||
std::wifstream wif(filename);
|
||||
if (wif.fail())
|
||||
return false;
|
||||
@@ -29,6 +44,7 @@ bool readfile(const std::string& filename, std::wstring& result)
|
||||
std::wstringstream wss;
|
||||
wss << wif.rdbuf();
|
||||
result = wss.str();
|
||||
+#endif
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -99,7 +112,12 @@
|
||||
|
||||
std::wstring Numbertext::string2wstring(const std::string& s)
|
||||
{
|
||||
-#ifndef NUMBERTEXT_BOOST
|
||||
+#ifdef _WIN32
|
||||
+ int nSize = MultiByteToWideChar(CP_UTF8, 0, s.c_str(), -1, nullptr, 0);
|
||||
+ std::unique_ptr<wchar_t[]> wstr(new wchar_t[nSize]);
|
||||
+ MultiByteToWideChar(CP_UTF8, 0, s.c_str(), -1, wstr.get(), nSize);
|
||||
+ return wstr.get();
|
||||
+#elif !defined NUMBERTEXT_BOOST
|
||||
typedef std::codecvt_utf8<wchar_t> convert_type;
|
||||
std::wstring_convert<convert_type, wchar_t> converter;
|
||||
return converter.from_bytes( s );
|
||||
@@ -110,7 +128,12 @@
|
||||
|
||||
std::string Numbertext::wstring2string(const std::wstring& s)
|
||||
{
|
||||
-#ifndef NUMBERTEXT_BOOST
|
||||
+#ifdef _WIN32
|
||||
+ int nSize = WideCharToMultiByte(CP_UTF8, 0, s.c_str(), -1, nullptr, 0, nullptr, nullptr);
|
||||
+ std::unique_ptr<char[]> str(new char[nSize]);
|
||||
+ WideCharToMultiByte(CP_UTF8, 0, s.c_str(), -1, str.get(), nSize, nullptr, nullptr);
|
||||
+ return str.get();
|
||||
+#elif !defined NUMBERTEXT_BOOST
|
||||
typedef std::codecvt_utf8<wchar_t> convert_type;
|
||||
std::wstring_convert<convert_type, wchar_t> converter;
|
||||
return converter.to_bytes( s );
|
|
@ -15,4 +15,8 @@ $(eval $(call gb_UnpackedTarball_update_autoconf_configs,libnumbertext))
|
|||
|
||||
$(eval $(call gb_UnpackedTarball_set_patchlevel,libnumbertext,1))
|
||||
|
||||
$(eval $(call gb_UnpackedTarball_add_patches,libnumbertext, \
|
||||
external/libnumbertext/MSVCNonBMPBug.patch1 \
|
||||
))
|
||||
|
||||
# vim: set noet sw=4 ts=4:
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
|
||||
#include <osl/file.hxx>
|
||||
#include <tools/debug.hxx>
|
||||
#include <rtl/ustrbuf.hxx>
|
||||
#include <o3tl/char16_t2wchar_t.hxx>
|
||||
|
||||
#include <sal/config.h>
|
||||
#include <cppuhelper/factory.hxx>
|
||||
|
@ -132,26 +132,18 @@ OUString SAL_CALL NumberText_Impl::getNumberText(const OUString& rText, const Lo
|
|||
if (!aCountry.isEmpty())
|
||||
aCode += "-" + aCountry;
|
||||
OString aLangCode(OUStringToOString(aCode, RTL_TEXTENCODING_ASCII_US));
|
||||
#if defined(_WIN32)
|
||||
std::wstring sResult(o3tl::toW(rText.getStr()));
|
||||
#else
|
||||
OString aInput(OUStringToOString(rText, RTL_TEXTENCODING_UTF8));
|
||||
std::wstring sResult = Numbertext::string2wstring(aInput.getStr());
|
||||
#endif
|
||||
bool result = m_aNumberText.numbertext(sResult, aLangCode.getStr());
|
||||
DBG_ASSERT(result, "numbertext: false");
|
||||
OUString aResult = OUString::fromUtf8(Numbertext::wstring2string(sResult).c_str());
|
||||
#if defined(_WIN32)
|
||||
// workaround to fix non-BMP Unicode characters resulted by wstring limitation
|
||||
if (!aScript.isEmpty() && aScript == "Hung")
|
||||
{
|
||||
OUStringBuffer aFix;
|
||||
for (int i = 0; i < aResult.getLength(); ++i)
|
||||
{
|
||||
sal_Unicode c = aResult[i];
|
||||
if (0x0C80 <= c && c <= 0x0CFF)
|
||||
aFix.append(sal_Unicode(0xD803)).append(sal_Unicode(c + 0xD000));
|
||||
else
|
||||
aFix.append(c);
|
||||
}
|
||||
aResult = aFix.makeStringAndClear();
|
||||
}
|
||||
OUString aResult(o3tl::toU(sResult.c_str()));
|
||||
#else
|
||||
OUString aResult = OUString::fromUtf8(Numbertext::wstring2string(sResult).c_str());
|
||||
#endif
|
||||
return aResult;
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue