tdf#150247: patch orcus to use UTF-16 paths on Windows

This adds UTF8->UTF16 conversion of the paths passed to ctor of
file_content on Windows, since both boost::filesystem::file_size
and boost::interprocess::file_mapping take UTF-16 strings on this
platform. So the assumption is that 8-bit path strings passed to
orcus are UTF-8-encoded.

This partially reverts commit 75252e58d9b5d020bf7bd6ca66b3a9d780463051
(it keeps use of osl_getThreadTextEncoding for platforms other than
Windows).

Change-Id: Ie467f71a65945f4f07ff432136ea06b811c3f794
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/137759
Tested-by: Jenkins
Reviewed-by: Mike Kaganski <mike.kaganski@collabora.com>
diff --git a/external/liborcus/UnpackedTarball_liborcus.mk b/external/liborcus/UnpackedTarball_liborcus.mk
index 6e78c0c..30d3f0d 100644
--- a/external/liborcus/UnpackedTarball_liborcus.mk
+++ b/external/liborcus/UnpackedTarball_liborcus.mk
@@ -47,6 +47,7 @@ $(eval $(call gb_UnpackedTarball_add_patches,liborcus,\
ifeq ($(OS),WNT)
$(eval $(call gb_UnpackedTarball_add_patches,liborcus,\
	external/liborcus/windows-constants-hack.patch \
	external/liborcus/win_path_utf16.patch \
))
endif

diff --git a/external/liborcus/win_path_utf16.patch b/external/liborcus/win_path_utf16.patch
new file mode 100644
index 0000000..0a6781e
--- /dev/null
+++ b/external/liborcus/win_path_utf16.patch
@@ -0,0 +1,33 @@
diff --git a/src/parser/stream.cpp b/src/parser/stream.cpp
index 00395f59ff25..8f385fb8965a 100644
--- a/src/parser/stream.cpp
+++ b/src/parser/stream.cpp
@@ -147,6 +147,14 @@ std::tuple<std::string_view, size_t, size_t> find_line_with_offset(std::string_v
     return std::make_tuple(line, line_num, offset_on_line);
 }
 
+#ifdef _WIN32
+std::wstring to_wstring(std::string_view s)
+{
+    std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> conversion;
+    return conversion.from_bytes(s.data(), s.data() + s.size());
+}
+#endif
+
 } // anonymous namespace
 
 struct file_content::impl
@@ -162,8 +170,13 @@ struct file_content::impl
     impl() : content_size(0), content(nullptr) {}
 
     impl(std::string_view filepath) :
+#ifdef _WIN32
+        content_size(fs::file_size(to_wstring(filepath))),
+        mapped_file(to_wstring(filepath).c_str(), bip::read_only),
+#else
         content_size(fs::file_size(std::string{filepath}.c_str())),
         mapped_file(std::string{filepath}.c_str(), bip::read_only),
+#endif
         mapped_region(mapped_file, bip::read_only, 0, content_size),
         content(nullptr)
     {
diff --git a/sc/source/filter/orcus/orcusfiltersimpl.cxx b/sc/source/filter/orcus/orcusfiltersimpl.cxx
index db2d570..e7fdb44 100644
--- a/sc/source/filter/orcus/orcusfiltersimpl.cxx
+++ b/sc/source/filter/orcus/orcusfiltersimpl.cxx
@@ -11,8 +11,6 @@
#include <orcusinterface.hxx>
#include <tokenarray.hxx>

#include <memory>

#include <osl/thread.hxx>
#include <sfx2/docfile.hxx>
#include <sfx2/frame.hxx>
@@ -30,13 +28,6 @@
#include <orcus/stream.hpp>
#include <com/sun/star/task/XStatusIndicator.hpp>

#if defined _WIN32
#include <boost/filesystem/operations.hpp> // for boost::filesystem::filesystem_error
#include <o3tl/char16_t2wchar_t.hxx>
#include <prewin.h>
#include <postwin.h>
#endif

using namespace com::sun::star;

namespace
@@ -129,33 +120,17 @@ bool ScOrcusFiltersImpl::importODS(ScDocument& rDoc, SfxMedium& rMedium) const

bool ScOrcusFiltersImpl::importODS_Styles(ScDocument& rDoc, OUString& aPath) const
{
    OString aPath8 = OUStringToOString(aPath, osl_getThreadTextEncoding());

    try
    {
#if defined _WIN32
        std::unique_ptr<orcus::file_content> content;
        try
        {
            content = std::make_unique<orcus::file_content>(aPath8.getStr());
        }
        catch (const boost::filesystem::filesystem_error&)
        {
            // Maybe the path contains characters not representable in ACP. It's not
            // yet possible to pass Unicode path to orcus::file_content ctor - see
            // https://gitlab.com/orcus/orcus/-/issues/30; try short path.
            wchar_t buf[32767];
            if (GetShortPathNameW(o3tl::toW(aPath.getStr()), buf, std::size(buf)) == 0)
                throw;
            aPath8 = OUStringToOString(o3tl::toU(buf), osl_getThreadTextEncoding());
            content = std::make_unique<orcus::file_content>(aPath8);
        }
        OString aPath8 = OUStringToOString(aPath, RTL_TEXTENCODING_UTF8);
#else
        auto content = std::make_unique<orcus::file_content>(aPath8);
        OString aPath8 = OUStringToOString(aPath, osl_getThreadTextEncoding());
#endif
        orcus::file_content content(aPath8);
        ScOrcusFactory aFactory(rDoc);
        ScOrcusStyles aStyles(aFactory);
        orcus::import_ods::read_styles(content->str(), &aStyles);
        orcus::import_ods::read_styles(content.str(), &aStyles);
    }
    catch (const std::exception& e)
    {