tdf#106899 - Import concordance file using appropriate charset

At the beginning of the import process for the various index entries,
try to determine the correct character set for the tox concordance file.

Change-Id: I3f48325a80ed08c2c06c295a24b2fc29ce1adf99
diff --git a/sw/inc/iodetect.hxx b/sw/inc/iodetect.hxx
index 78c72ba..e9fa620 100644
--- a/sw/inc/iodetect.hxx
+++ b/sw/inc/iodetect.hxx
@@ -28,6 +28,8 @@
#include <tools/solar.h>
#include "swdllapi.h"

#define DETECT_ENCODING_BUFFER_SIZE 4096

inline constexpr OUStringLiteral FILTER_RTF = u"RTF";       ///< RTF filter
inline constexpr OUStringLiteral sRtfWH = u"WH_RTF";
inline constexpr OUStringLiteral FILTER_TEXT = u"TEXT"; ///< text filter with default codeset
@@ -105,8 +107,10 @@ public:
    static bool IsValidStgFilter( SotStorage& , const SfxFilter& );
    static bool IsValidStgFilter( const css::uno::Reference < css::embed::XStorage >& rStg, const SfxFilter& rFilter);

    // tdf#106899 - wrapper around IsDetectableText to retrieve the text encoding for a given stream
    static rtl_TextEncoding GetTextEncoding(SvStream&);
    static bool IsDetectableText( const char* pBuf, sal_uLong &rLen,
            rtl_TextEncoding *pCharSet, bool *pSwap, LineEnd *pLineEnd, bool *pBom);
            rtl_TextEncoding *pCharSet, bool *pSwap = nullptr, LineEnd *pLineEnd = nullptr, bool *pBom = nullptr);

    static OUString GetSubStorageName( const SfxFilter& rFltr );
};
diff --git a/sw/source/core/edit/edtox.cxx b/sw/source/core/edit/edtox.cxx
index c10f366..429008d 100644
--- a/sw/source/core/edit/edtox.cxx
+++ b/sw/source/core/edit/edtox.cxx
@@ -42,6 +42,7 @@
#include <docary.hxx>
#include <mdiexp.hxx>
#include <strings.hrc>
#include <iodetect.hxx>

using namespace ::com::sun::star;
using namespace ::com::sun::star::i18n;
@@ -295,7 +296,8 @@ void SwEditShell::ApplyAutoMark()
        SfxMedium aMedium( sAutoMarkURL, StreamMode::STD_READ );
        SvStream& rStrm = *aMedium.GetInStream();
        Push();
        rtl_TextEncoding eChrSet = ::osl_getThreadTextEncoding();
        // tdf#106899 - import tox concordance file using the appropriate character set
        const rtl_TextEncoding eChrSet = SwIoSystem::GetTextEncoding(rStrm);

        // SearchOptions to be used in loop below
        sal_Int32 const nLEV_Other    = 2;    //  -> changedChars;
diff --git a/sw/source/filter/basflt/iodetect.cxx b/sw/source/filter/basflt/iodetect.cxx
index 5a0cf29..58d3ac9 100644
--- a/sw/source/filter/basflt/iodetect.cxx
+++ b/sw/source/filter/basflt/iodetect.cxx
@@ -20,6 +20,7 @@
#include <iodetect.hxx>
#include <memory>
#include <osl/endian.h>
#include <osl/thread.h>
#include <sot/storage.hxx>
#include <tools/urlobj.hxx>
#include <unotools/moduleoptions.hxx>
@@ -238,6 +239,25 @@ std::shared_ptr<const SfxFilter> SwIoSystem::GetFileFilter(const OUString& rFile
    return SwIoSystem::GetFilterOfFormat(FILTER_TEXT);
}

rtl_TextEncoding SwIoSystem::GetTextEncoding(SvStream& rStrm)
{
    sal_Size nLen, nOrig;
    char aBuf[DETECT_ENCODING_BUFFER_SIZE];
    nOrig = nLen = rStrm.ReadBytes(aBuf, DETECT_ENCODING_BUFFER_SIZE);

    rtl_TextEncoding eCharSet;
    const bool bRet = SwIoSystem::IsDetectableText(aBuf, nLen, &eCharSet);
    if (bRet && eCharSet != RTL_TEXTENCODING_DONTKNOW)
        rStrm.SeekRel(-(tools::Long(nLen)));
    else
    {
        rStrm.SeekRel(-(tools::Long(nOrig)));
        eCharSet = osl_getThreadTextEncoding();
    }

    return eCharSet;
}

bool SwIoSystem::IsDetectableText(const char* pBuf, sal_uLong &rLen,
    rtl_TextEncoding *pCharSet, bool *pSwap, LineEnd *pLineEnd, bool *pBom)
{