tdf#150247: patch orcus to use UTF-16 paths on Windows
This adds UTF8->UTF16 conversion of the paths passed to ctor of
file_content on Windows, since both boost::filesystem::file_size
and boost::interprocess::file_mapping take UTF-16 strings on this
platform. So the assumption is that 8-bit path strings passed to
orcus are UTF-8-encoded.
This partially reverts commit 75252e58d9b5d020bf7bd6ca66b3a9d780463051
(it keeps use of osl_getThreadTextEncoding for platforms other than
Windows).
Change-Id: Ie467f71a65945f4f07ff432136ea06b811c3f794
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/137759
Tested-by: Jenkins
Reviewed-by: Mike Kaganski <mike.kaganski@collabora.com>
diff --git a/external/liborcus/UnpackedTarball_liborcus.mk b/external/liborcus/UnpackedTarball_liborcus.mk
index 6e78c0c..30d3f0d 100644
--- a/external/liborcus/UnpackedTarball_liborcus.mk
+++ b/external/liborcus/UnpackedTarball_liborcus.mk
@@ -47,6 +47,7 @@ $(eval $(call gb_UnpackedTarball_add_patches,liborcus,\
ifeq ($(OS),WNT)
$(eval $(call gb_UnpackedTarball_add_patches,liborcus,\
external/liborcus/windows-constants-hack.patch \
external/liborcus/win_path_utf16.patch \
))
endif
diff --git a/external/liborcus/win_path_utf16.patch b/external/liborcus/win_path_utf16.patch
new file mode 100644
index 0000000..0a6781e
--- /dev/null
+++ b/external/liborcus/win_path_utf16.patch
@@ -0,0 +1,33 @@
diff --git a/src/parser/stream.cpp b/src/parser/stream.cpp
index 00395f59ff25..8f385fb8965a 100644
--- a/src/parser/stream.cpp
+++ b/src/parser/stream.cpp
@@ -147,6 +147,14 @@ std::tuple<std::string_view, size_t, size_t> find_line_with_offset(std::string_v
return std::make_tuple(line, line_num, offset_on_line);
}
+#ifdef _WIN32
+std::wstring to_wstring(std::string_view s)
+{
+ std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> conversion;
+ return conversion.from_bytes(s.data(), s.data() + s.size());
+}
+#endif
+
} // anonymous namespace
struct file_content::impl
@@ -162,8 +170,13 @@ struct file_content::impl
impl() : content_size(0), content(nullptr) {}
impl(std::string_view filepath) :
+#ifdef _WIN32
+ content_size(fs::file_size(to_wstring(filepath))),
+ mapped_file(to_wstring(filepath).c_str(), bip::read_only),
+#else
content_size(fs::file_size(std::string{filepath}.c_str())),
mapped_file(std::string{filepath}.c_str(), bip::read_only),
+#endif
mapped_region(mapped_file, bip::read_only, 0, content_size),
content(nullptr)
{
diff --git a/sc/source/filter/orcus/orcusfiltersimpl.cxx b/sc/source/filter/orcus/orcusfiltersimpl.cxx
index db2d570..e7fdb44 100644
--- a/sc/source/filter/orcus/orcusfiltersimpl.cxx
+++ b/sc/source/filter/orcus/orcusfiltersimpl.cxx
@@ -11,8 +11,6 @@
#include <orcusinterface.hxx>
#include <tokenarray.hxx>
#include <memory>
#include <osl/thread.hxx>
#include <sfx2/docfile.hxx>
#include <sfx2/frame.hxx>
@@ -30,13 +28,6 @@
#include <orcus/stream.hpp>
#include <com/sun/star/task/XStatusIndicator.hpp>
#if defined _WIN32
#include <boost/filesystem/operations.hpp> // for boost::filesystem::filesystem_error
#include <o3tl/char16_t2wchar_t.hxx>
#include <prewin.h>
#include <postwin.h>
#endif
using namespace com::sun::star;
namespace
@@ -129,33 +120,17 @@ bool ScOrcusFiltersImpl::importODS(ScDocument& rDoc, SfxMedium& rMedium) const
bool ScOrcusFiltersImpl::importODS_Styles(ScDocument& rDoc, OUString& aPath) const
{
OString aPath8 = OUStringToOString(aPath, osl_getThreadTextEncoding());
try
{
#if defined _WIN32
std::unique_ptr<orcus::file_content> content;
try
{
content = std::make_unique<orcus::file_content>(aPath8.getStr());
}
catch (const boost::filesystem::filesystem_error&)
{
// Maybe the path contains characters not representable in ACP. It's not
// yet possible to pass Unicode path to orcus::file_content ctor - see
// https://gitlab.com/orcus/orcus/-/issues/30; try short path.
wchar_t buf[32767];
if (GetShortPathNameW(o3tl::toW(aPath.getStr()), buf, std::size(buf)) == 0)
throw;
aPath8 = OUStringToOString(o3tl::toU(buf), osl_getThreadTextEncoding());
content = std::make_unique<orcus::file_content>(aPath8);
}
OString aPath8 = OUStringToOString(aPath, RTL_TEXTENCODING_UTF8);
#else
auto content = std::make_unique<orcus::file_content>(aPath8);
OString aPath8 = OUStringToOString(aPath, osl_getThreadTextEncoding());
#endif
orcus::file_content content(aPath8);
ScOrcusFactory aFactory(rDoc);
ScOrcusStyles aStyles(aFactory);
orcus::import_ods::read_styles(content->str(), &aStyles);
orcus::import_ods::read_styles(content.str(), &aStyles);
}
catch (const std::exception& e)
{