tdf#130795 use concurrent hashmap in SharedStringPool

we are loading a spreadsheet in parallel here, but the parallel threads
achievei very little actual concurrency because of heavy contention
in the SharedStringPool mutex.

So switch to a concurrent hash map. I looked at a couple of different
ones (including the Folly one), and this was the one with the simplest
resulting code.

This takes my load time from 12.5s to 8s

Change-Id: I04d6d8e11d613b510eb3bc981f3338819b7ac813
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/121717
Tested-by: Jenkins
Reviewed-by: Noel Grandin <noel.grandin@collabora.co.uk>
diff --git a/Makefile.fetch b/Makefile.fetch
index 3c5b7ed..85e2136 100644
--- a/Makefile.fetch
+++ b/Makefile.fetch
@@ -117,6 +117,7 @@
		$(call fetch_Optional,COINMP,COINMP_TARBALL) \
		$(call fetch_Optional,CPPUNIT,CPPUNIT_TARBALL) \
		$(call fetch_Optional,CT2N,CT2N_TARBALL) \
		$(call fetch_Optional,CUCKOO,CUCKOO_TARBALL) \
		$(call fetch_Optional,CURL,CURL_TARBALL) \
		$(call fetch_Optional,EBOOK,EBOOK_TARBALL) \
		$(call fetch_Optional,EPM,EPM_TARBALL) \
diff --git a/RepositoryExternal.mk b/RepositoryExternal.mk
index 15cfbfd..3f645e0 100644
--- a/RepositoryExternal.mk
+++ b/RepositoryExternal.mk
@@ -4249,4 +4249,37 @@

endif # SYSTEM_ZXING

# vim: set noet sw=4 ts=4:


ifneq ($(SYSTEM_CUCKOO),)

gb_ExternalProject__use_cuckoo_headers :=

define gb_LinkTarget__use_cuckoo_headers
$(call gb_LinkTarget_set_include,$(1),\
	-I$(call gb_UnpackedTarball_get_dir,cuckoo) \
	$$(INCLUDE) \
)

endef

else # !SYSTEM_CUCKOO

define gb_ExternalProject__use_cuckoo_headers
$(call gb_ExternalProject_use_unpacked,$(1),cuckoo)

endef

define gb_LinkTarget__use_cuckoo_headers
$(call gb_LinkTarget_use_unpacked,$(1),cuckoo)
$(call gb_LinkTarget_set_include,$(1),\
	-I$(call gb_UnpackedTarball_get_dir,cuckoo) \
	$$(INCLUDE) \
)

endef

endif # SYSTEM_CUCKOO


# vim: set noet sw=4 ts=4:
diff --git a/config_host.mk.in b/config_host.mk.in
index abda7d6..87d5fed 100644
--- a/config_host.mk.in
+++ b/config_host.mk.in
@@ -92,6 +92,7 @@
export CPPUNIT_LIBS=$(gb_SPACE)@CPPUNIT_LIBS@
export CPUNAME=@CPUNAME@
export CROSS_COMPILING=@CROSS_COMPILING@
export CUCKOO_CFLAGS=$(gb_SPACE)@CUCKOO_CFLAGS@
export CURL=@CURL@
export CURL_CFLAGS=$(gb_SPACE)@CURL_CFLAGS@
export CURL_LIBS=$(gb_SPACE)@CURL_LIBS@
@@ -571,6 +572,7 @@
export SYSTEM_CAIRO=@SYSTEM_CAIRO@
export SYSTEM_CLUCENE=@SYSTEM_CLUCENE@
export SYSTEM_CPPUNIT=@SYSTEM_CPPUNIT@
export SYSTEM_CUCKOO=@SYSTEM_CUCKOO@
export SYSTEM_CURL=@SYSTEM_CURL@
export SYSTEM_DICTS=@SYSTEM_DICTS@
export SYSTEM_EXPAT=@SYSTEM_EXPAT@
diff --git a/configure.ac b/configure.ac
index 3779924..5f34231 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2334,6 +2334,11 @@
        [Use boost already on system.]),,
    [with_system_boost="$with_system_headers"])

AC_ARG_WITH(system-cuckoo,
    AS_HELP_STRING([--with-system-cuckoo],
        [Use libcuckoo already on system.]),,
    [with_system_cuckoo="$with_system_headers"])

AC_ARG_WITH(system-glm,
    AS_HELP_STRING([--with-system-glm],
        [Use glm already on system.]),,
@@ -10350,6 +10355,26 @@
libo_CHECK_SYSTEM_MODULE([mdds], [MDDS], [mdds-1.5 >= 1.5.0], ["-I${WORKDIR}/UnpackedTarball/mdds/include"])

dnl ===================================================================
dnl Check for system cuckoo
dnl ===================================================================
AC_MSG_CHECKING([which cuckoo to use])
if test "$with_system_cuckoo" = "yes" -o "$with_system_headers" = "yes"; then
    AC_MSG_RESULT([external])
    SYSTEM_CUCKOO=TRUE
    AC_LANG_PUSH([C++])
    AC_CHECK_HEADER([libcuckoo/cuckoohash_map.hh], [],
       [AC_MSG_ERROR([libcuckoo/cuckoohash_map.hh not found. install cuckoo])], [])
    AC_LANG_POP([C++])
else
    AC_MSG_RESULT([internal])
    BUILD_TYPE="$BUILD_TYPE CUCKOO"
    SYSTEM_CUCKOO=
    CUCKOO_CFLAGS="${ISYSTEM}${WORKDIR}/UnpackedTarball/cuckoo"
fi
AC_SUBST([CUCKOO_CFLAGS])
AC_SUBST([SYSTEM_CUCKOO])

dnl ===================================================================
dnl Check for system glm
dnl ===================================================================
AC_MSG_CHECKING([which glm to use])
diff --git a/download.lst b/download.lst
index 10db15a..9ae9db6 100644
--- a/download.lst
+++ b/download.lst
@@ -269,3 +269,5 @@
export ZXING_TARBALL := zxing-cpp-1.1.1.tar.gz
NUMBERTEXT_EXTENSION_SHA256SUM := 1568ed1d2feb8210bb5de61d69574a165cded536cfa17c6953c9064076469de2
OPENSYMBOL_SHA256SUM := f543e6e2d7275557a839a164941c0a86e5f2c3f2a0042bfc434c88c6dde9e140
export CUCKOO_SHA256SUM := 471dd83a813ed2816c2246c373004470ad0f6612c7ce72038929dc5161cdd58e
export CUCKOO_TARBALL := libcuckoo-93217f8d391718380c508a722ab9acd5e9081233.tar.gz
diff --git a/external/Module_external.mk b/external/Module_external.mk
index 4566d82..3de6f9c 100644
--- a/external/Module_external.mk
+++ b/external/Module_external.mk
@@ -104,6 +104,7 @@
	$(call gb_Helper_optional,XSLTML,xsltml) \
	$(call gb_Helper_optional,ZLIB,zlib) \
	$(call gb_Helper_optional,ZMF,libzmf) \
	$(call gb_Helper_optional,CUCKOO,cuckoo) \
))

# vim: set noet sw=4 ts=4:
diff --git a/external/cuckoo/Makefile b/external/cuckoo/Makefile
new file mode 100644
index 0000000..e4968cf8
--- /dev/null
+++ b/external/cuckoo/Makefile
@@ -0,0 +1,7 @@
# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-

module_directory:=$(dir $(realpath $(firstword $(MAKEFILE_LIST))))

include $(module_directory)/../../solenv/gbuild/partial_build.mk

# vim: set noet sw=4 ts=4:
diff --git a/external/cuckoo/Module_cuckoo.mk b/external/cuckoo/Module_cuckoo.mk
new file mode 100644
index 0000000..d2fda7b
--- /dev/null
+++ b/external/cuckoo/Module_cuckoo.mk
@@ -0,0 +1,16 @@
# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
#
# This file is part of the LibreOffice project.
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
#

$(eval $(call gb_Module_Module,cuckoo))

$(eval $(call gb_Module_add_targets,cuckoo,\
	UnpackedTarball_cuckoo \
))

# vim: set noet sw=4 ts=4:
diff --git a/external/cuckoo/README b/external/cuckoo/README
new file mode 100644
index 0000000..6b8c983
--- /dev/null
+++ b/external/cuckoo/README
@@ -0,0 +1,3 @@
A high-performance, concurrent hash table

[https://github.com/efficient/libcuckoo]
diff --git a/external/cuckoo/UnpackedTarball_cuckoo.mk b/external/cuckoo/UnpackedTarball_cuckoo.mk
new file mode 100644
index 0000000..1cfbcc6
--- /dev/null
+++ b/external/cuckoo/UnpackedTarball_cuckoo.mk
@@ -0,0 +1,18 @@
# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
#
# This file is part of the LibreOffice project.
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
#

$(eval $(call gb_UnpackedTarball_UnpackedTarball,cuckoo))

$(eval $(call gb_UnpackedTarball_set_tarball,cuckoo,$(CUCKOO_TARBALL)))

$(eval $(call gb_UnpackedTarball_set_patchlevel,cuckoo,0))

$(eval $(call gb_UnpackedTarball_update_autoconf_configs,cuckoo))

# vim: set noet sw=4 ts=4:
diff --git a/readlicense_oo/license/license.xml b/readlicense_oo/license/license.xml
index 2a27cab..25d3724 100644
--- a/readlicense_oo/license/license.xml
+++ b/readlicense_oo/license/license.xml
@@ -676,6 +676,12 @@
        <p><a href="#a__LGPL_version_2">Jump to LGPL Version 2</a></p>
        <p><a href="#a__MPL_version_1_1">Jump to MPL Version 1.1</a></p>
    </div>
    <div class="LIBCUCKOO" >
        <h2>libcuckoo</h2>
        <p>The following software may be included in this product: libcuckoo. Use of any of this software is governed by
        the terms of the license below:</p>
        <p><a href="#a__Apache_License_version_2_0">Jump to Apache License Version 2.0</a></p>
    </div>
    <div class="CURL" >
        <h2>libcurl</h2>
        <p>The following software may be included in this product: libcurl. Use of any of this software is governed by
diff --git a/solenv/flatpak-manifest.in b/solenv/flatpak-manifest.in
index 2312670..70eaa31 100644
--- a/solenv/flatpak-manifest.in
+++ b/solenv/flatpak-manifest.in
@@ -682,6 +682,13 @@
                    "type": "file",
                    "dest": "external/tarballs",
                    "dest-filename": "f543e6e2d7275557a839a164941c0a86e5f2c3f2a0042bfc434c88c6dde9e140-opens___.ttf"
                },
                {
                    "url": "https://dev-www.libreoffice.org/src/libcuckoo-93217f8d391718380c508a722ab9acd5e9081233.tar.gz",
                    "sha256": "471dd83a813ed2816c2246c373004470ad0f6612c7ce72038929dc5161cdd58e",
                    "type": "file",
                    "dest": "external/tarballs",
                    "dest-filename": "libcuckoo-93217f8d391718380c508a722ab9acd5e9081233.tar.gz"
                }
            ],
            "buildsystem": "simple",
diff --git a/svl/Library_svl.mk b/svl/Library_svl.mk
index 17d64fe..b816600 100644
--- a/svl/Library_svl.mk
+++ b/svl/Library_svl.mk
@@ -21,6 +21,7 @@

$(eval $(call gb_Library_use_externals,svl,\
    boost_headers \
    cuckoo_headers \
    $(if $(filter LINUX MACOSX ANDROID iOS %BSD SOLARIS HAIKU,$(OS)), \
        curl) \
    dtoa \
diff --git a/svl/source/misc/sharedstringpool.cxx b/svl/source/misc/sharedstringpool.cxx
index 2fe8fd8..bf1f555 100644
--- a/svl/source/misc/sharedstringpool.cxx
+++ b/svl/source/misc/sharedstringpool.cxx
@@ -15,50 +15,57 @@
#include <unordered_map>
#include <unordered_set>

/** create a key class that caches the hashcode */
namespace
{
struct StringWithHash
{
    OUString str;
    sal_Int32 hashCode;
    StringWithHash(OUString s)
        : str(s)
        , hashCode(s.hashCode())
    {
    }

    bool operator==(StringWithHash const& rhs) const
    {
        if (hashCode != rhs.hashCode)
            return false;
        return str == rhs.str;
    }
};
}

namespace std
{
template <> struct hash<StringWithHash>
{
    std::size_t operator()(const StringWithHash& k) const { return k.hashCode; }
};
}
#if defined __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wunused-parameter"
#endif
#if defined _MSC_VER
#pragma warning(push)
#pragma warning(disable : 4324) // structure was padded due to alignment specifier
#endif
#include <libcuckoo/cuckoohash_map.hh>
#if defined _MSC_VER
#pragma warning(pop)
#endif
#if defined __clang__
#pragma clang diagnostic pop
#endif

namespace svl
{
namespace
{
sal_Int32 getRefCount(const rtl_uString* p) { return (p->refCount & 0x3FFFFFFF); }

// we store the key twice, because the concurrent hashtable we are using does not provide any way to return the key in use
typedef std::pair<OUString, OUString> Mapped;

struct HashFunction
{
    size_t operator()(rtl_uString* const key) const
    {
        return rtl_ustr_hashCode_WithLength(key->buffer, key->length);
    }
};

struct EqualsFunction
{
    bool operator()(rtl_uString* const lhs, rtl_uString* const rhs) const
    {
        return OUString::unacquired(&lhs) == OUString::unacquired(&rhs);
    }
};
}

struct SharedStringPool::Impl
{
    mutable std::mutex maMutex;
    // We use this map for two purposes - to store lower->upper case mappings
    // and to retrieve a shared uppercase object, so the management logic
    // is quite complex.
    std::unordered_map<StringWithHash, OUString> maStrMap;
    // and to store an upper->upper mapping.
    // The second mapping is used so that we can
    // share the same rtl_uString object between different keys which map to the same uppercase string to save memory.
    //
    // Docs for this concurrent hashtable here: http://efficient.github.io/libcuckoo/classlibcuckoo_1_1cuckoohash__map.html
    libcuckoo::cuckoohash_map<rtl_uString*, Mapped, HashFunction, EqualsFunction> maStrMap;
    const CharClass& mrCharClass;

    explicit Impl(const CharClass& rCharClass)
@@ -76,43 +83,50 @@

SharedString SharedStringPool::intern(const OUString& rStr)
{
    StringWithHash aStrWithHash(rStr);
    std::scoped_lock<std::mutex> aGuard(mpImpl->maMutex);
    auto& rMap = mpImpl->maStrMap;

    auto[mapIt, bInserted] = mpImpl->maStrMap.emplace(aStrWithHash, rStr);
    if (!bInserted)
    rtl_uString *pResultLower, *pResultUpper;
    if (rMap.find_fn(rStr.pData, [&](const Mapped& rMapped) {
            pResultLower = rMapped.first.pData;
            pResultUpper = rMapped.second.pData;
        }))
        // there is already a mapping
        return SharedString(mapIt->first.str.pData, mapIt->second.pData);
        return SharedString(pResultLower, pResultUpper);

    // This is a new string insertion. Establish mapping to upper-case variant.
    OUString aUpper = mpImpl->mrCharClass.uppercase(rStr);
    if (aUpper == rStr)
        // no need to do anything more, because we inserted an upper->upper mapping
        return SharedString(mapIt->first.str.pData, mapIt->second.pData);

    // We need to insert a lower->upper mapping, so also insert
    // an upper->upper mapping, which we can use both for when an upper string
    // is interned, and to look up a shared upper string.
    StringWithHash aUpperWithHash(aUpper);
    auto mapIt2 = mpImpl->maStrMap.find(aUpperWithHash);
    if (mapIt2 != mpImpl->maStrMap.end())
    // either insert a new upper->upper mapping, or write the existing mapping into aUpper
    mpImpl->maStrMap.uprase_fn(aUpper.pData,
                               [&](Mapped& mapped) -> bool {
                                   aUpper = mapped.second;
                                   return false;
                               },
                               aUpper, aUpper);

    if (aUpper == rStr)
        // no need to do anything more, because the key is already uppercase
        return SharedString(aUpper.pData, aUpper.pData);

    // either insert a new lower->upper mapping, or write the existing mapping into aLower
    if (mpImpl->maStrMap.uprase_fn(rStr.pData,
                                   [&](Mapped& mapped) -> bool {
                                       pResultLower = mapped.first.pData;
                                       pResultUpper = mapped.second.pData;
                                       return false;
                                   },
                                   rStr, aUpper))
    {
        // there is an already existing upper string
        mapIt->second = mapIt2->first.str;
        return SharedString(mapIt->first.str.pData, mapIt->second.pData);
        pResultLower = rStr.pData;
        pResultUpper = aUpper.pData;
    }

    // There is no already existing upper string.
    // First, update using the iterator, can't do this later because
    // the iterator will be invalid.
    mapIt->second = aUpper;
    mpImpl->maStrMap.emplace_hint(mapIt2, aUpperWithHash, aUpper);
    return SharedString(rStr.pData, aUpper.pData);
    return SharedString(pResultLower, pResultUpper);
}

void SharedStringPool::purge()
{
    std::scoped_lock<std::mutex> aGuard(mpImpl->maMutex);
    auto locked_table = mpImpl->maStrMap.lock_table();

    // Because we can have an uppercase entry mapped to itself,
    // and then a bunch of lowercase entries mapped to that same
@@ -120,12 +134,12 @@
    // time to remove lowercase entries, and then only can we
    // check for unused uppercase entries.

    auto it = mpImpl->maStrMap.begin();
    auto itEnd = mpImpl->maStrMap.end();
    auto it = locked_table.begin();
    auto itEnd = locked_table.end();
    while (it != itEnd)
    {
        rtl_uString* p1 = it->first.str.pData;
        rtl_uString* p2 = it->second.pData;
        rtl_uString* p1 = it->second.first.pData;
        rtl_uString* p2 = it->second.second.pData;
        if (p1 != p2)
        {
            // normal case - lowercase mapped to uppercase, which
@@ -133,19 +147,19 @@
            // entry as the key in the map
            if (getRefCount(p1) == 1)
            {
                it = mpImpl->maStrMap.erase(it);
                it = locked_table.erase(it);
                continue;
            }
        }
        ++it;
    }

    it = mpImpl->maStrMap.begin();
    itEnd = mpImpl->maStrMap.end();
    it = locked_table.begin();
    itEnd = locked_table.end();
    while (it != itEnd)
    {
        rtl_uString* p1 = it->first.str.pData;
        rtl_uString* p2 = it->second.pData;
        rtl_uString* p1 = it->second.first.pData;
        rtl_uString* p2 = it->second.second.pData;
        if (p1 == p2)
        {
            // uppercase which is mapped to itself, which means
@@ -153,7 +167,7 @@
            // one ref-counted entry in the value in the map
            if (getRefCount(p1) == 2)
            {
                it = mpImpl->maStrMap.erase(it);
                it = locked_table.erase(it);
                continue;
            }
        }
@@ -161,19 +175,15 @@
    }
}

size_t SharedStringPool::getCount() const
{
    std::scoped_lock<std::mutex> aGuard(mpImpl->maMutex);
    return mpImpl->maStrMap.size();
}
size_t SharedStringPool::getCount() const { return mpImpl->maStrMap.size(); }

size_t SharedStringPool::getCountIgnoreCase() const
{
    std::scoped_lock<std::mutex> aGuard(mpImpl->maMutex);
    // this is only called from unit tests, so no need to be efficient
    std::unordered_set<OUString> aUpperSet;
    for (auto const& pair : mpImpl->maStrMap)
        aUpperSet.insert(pair.second);
    auto locked_table = mpImpl->maStrMap.lock_table();
    for (auto const& pair : locked_table)
        aUpperSet.insert(pair.second.second);
    return aUpperSet.size();
}
}