tdf#150452: Revert "tdf#130795 use concurrent hashmap in SharedStringPool"
This commit reverts 3749d9af3745c0eaff7239e379578e4e2af89e9d
which removes the dependency on the external library cuckoo
Without using cuckoo the same file in tdf#130795 takes
real 0m4,892s
user 0m5,298s
sys 0m0,449s
With it, it takes
real 0m4,914s
user 0m5,276s
sys 0m0,444s
pretty much the same time
Change-Id: I4cc9000ac5bf26de22bb9835283ae8d5b3230196
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/138435
Tested-by: Jenkins
Reviewed-by: Noel Grandin <noel.grandin@collabora.co.uk>
Reviewed-by: Xisco Fauli <xiscofauli@libreoffice.org>
(cherry picked from commit dbc53fbbd13ee53da0160eaf9dbe84455653590f)
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/138418
diff --git a/Makefile.fetch b/Makefile.fetch
index ccb3860..f83fada 100644
--- a/Makefile.fetch
+++ b/Makefile.fetch
@@ -115,7 +115,6 @@ $(WORKDIR)/download: $(BUILDDIR)/config_$(gb_Side).mk $(SRCDIR)/download.lst $(S
$(call fetch_Optional,LIBCMIS,LIBCMIS_TARBALL) \
$(call fetch_Optional,COINMP,COINMP_TARBALL) \
$(call fetch_Optional,CPPUNIT,CPPUNIT_TARBALL) \
$(call fetch_Optional,CUCKOO,CUCKOO_TARBALL) \
$(call fetch_Optional,CURL,CURL_TARBALL) \
$(call fetch_Optional,EBOOK,EBOOK_TARBALL) \
$(call fetch_Optional,EPM,EPM_TARBALL) \
diff --git a/RepositoryExternal.mk b/RepositoryExternal.mk
index 2b20634..950ed72 100644
--- a/RepositoryExternal.mk
+++ b/RepositoryExternal.mk
@@ -4298,36 +4298,4 @@ endif # ENABLE_ZXING
endif # SYSTEM_ZXING
ifneq ($(SYSTEM_CUCKOO),)
gb_ExternalProject__use_cuckoo_headers :=
define gb_LinkTarget__use_cuckoo_headers
$(call gb_LinkTarget_set_include,$(1),\
$$(INCLUDE) \
)
endef
else # !SYSTEM_CUCKOO
define gb_ExternalProject__use_cuckoo_headers
$(call gb_ExternalProject_use_unpacked,$(1),cuckoo)
endef
define gb_LinkTarget__use_cuckoo_headers
$(call gb_LinkTarget_use_unpacked,$(1),cuckoo)
$(call gb_LinkTarget_set_include,$(1),\
-I$(call gb_UnpackedTarball_get_dir,cuckoo) \
$$(INCLUDE) \
)
endef
endif # SYSTEM_CUCKOO
# vim: set noet sw=4 ts=4:
diff --git a/bin/oss-fuzz-setup.sh b/bin/oss-fuzz-setup.sh
index f69c6b1..0455802 100755
--- a/bin/oss-fuzz-setup.sh
+++ b/bin/oss-fuzz-setup.sh
@@ -24,7 +24,6 @@ curl --no-progress-meter -S \
-C - -O https://dev-www.libreoffice.org/src/$BOX2D_TARBALL \
-C - -O https://dev-www.libreoffice.org/src/$DTOA_TARBALL \
-C - -O https://dev-www.libreoffice.org/src/$EXPAT_TARBALL \
-C - -O https://dev-www.libreoffice.org/src/$CUCKOO_TARBALL \
-C - -O https://dev-www.libreoffice.org/src/$LIBJPEG_TURBO_TARBALL \
-C - -O https://dev-www.libreoffice.org/src/$LCMS2_TARBALL \
-C - -O https://dev-www.libreoffice.org/src/$LIBEXTTEXTCAT_TARBALL \
diff --git a/config_host.mk.in b/config_host.mk.in
index 386e8323..45946d0 100644
--- a/config_host.mk.in
+++ b/config_host.mk.in
@@ -611,7 +611,6 @@ export SYSTEM_BZIP2=@SYSTEM_BZIP2@
export SYSTEM_CAIRO=@SYSTEM_CAIRO@
export SYSTEM_CLUCENE=@SYSTEM_CLUCENE@
export SYSTEM_CPPUNIT=@SYSTEM_CPPUNIT@
export SYSTEM_CUCKOO=@SYSTEM_CUCKOO@
export SYSTEM_CURL=@SYSTEM_CURL@
export SYSTEM_DICTS=@SYSTEM_DICTS@
export SYSTEM_EXPAT=@SYSTEM_EXPAT@
diff --git a/configure.ac b/configure.ac
index a1bbc7b..846a5bf 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2416,11 +2416,6 @@ AC_ARG_WITH(system-boost,
[Use boost already on system.]),,
[with_system_boost="$with_system_headers"])
AC_ARG_WITH(system-cuckoo,
AS_HELP_STRING([--with-system-cuckoo],
[Use libcuckoo already on system.]),,
[with_system_cuckoo="$with_system_headers"])
AC_ARG_WITH(system-dragonbox,
AS_HELP_STRING([--with-system-dragonbox],
[Use dragonbox already on system.]),,
@@ -5745,7 +5740,6 @@ if test "$cross_compiling" = "yes"; then
BOOST
CAIRO
CLUCENE
CUCKOO
CURL
DBCONNECTIVITY
DESKTOP
@@ -10548,24 +10542,6 @@ MDDS_CFLAGS_internal="-I${WORKDIR}/UnpackedTarball/mdds/include"
libo_CHECK_SYSTEM_MODULE([mdds],[MDDS],[mdds-2.0 >= 2.0.0])
dnl ===================================================================
dnl Check for system cuckoo
dnl ===================================================================
AC_MSG_CHECKING([which cuckoo to use])
if test "$with_system_cuckoo" = "yes"; then
AC_MSG_RESULT([external])
SYSTEM_CUCKOO=TRUE
AC_LANG_PUSH([C++])
AC_CHECK_HEADER([libcuckoo/cuckoohash_map.hh], [],
[AC_MSG_ERROR([libcuckoo/cuckoohash_map.hh not found. install cuckoo])], [])
AC_LANG_POP([C++])
else
AC_MSG_RESULT([internal])
BUILD_TYPE="$BUILD_TYPE CUCKOO"
SYSTEM_CUCKOO=
fi
AC_SUBST([SYSTEM_CUCKOO])
dnl ===================================================================
dnl Check for system dragonbox
dnl ===================================================================
AC_MSG_CHECKING([which dragonbox to use])
diff --git a/distro-configs/LibreOfficeCoverity.conf b/distro-configs/LibreOfficeCoverity.conf
index 04876f0..0f8ae9c 100644
--- a/distro-configs/LibreOfficeCoverity.conf
+++ b/distro-configs/LibreOfficeCoverity.conf
@@ -11,7 +11,6 @@
--with-idlc-cpp=cpp
--with-system-libs
--with-system-headers
--without-system-cuckoo
--without-system-cppunit
--without-system-libfixmath
--enable-dbus
diff --git a/distro-configs/LibreOfficeFlatpak.conf b/distro-configs/LibreOfficeFlatpak.conf
index 2988d98..ae525e6 100644
--- a/distro-configs/LibreOfficeFlatpak.conf
+++ b/distro-configs/LibreOfficeFlatpak.conf
@@ -19,7 +19,6 @@
--without-system-clucene
--without-system-coinmp
--without-system-cppunit
--without-system-cuckoo
--without-system-dragonbox
--without-system-firebird
--without-system-glm
diff --git a/download.lst b/download.lst
index 5d487b0..e79b39d 100644
--- a/download.lst
+++ b/download.lst
@@ -264,5 +264,3 @@ export ZXING_TARBALL := zxing-cpp-1.2.0.tar.gz
NUMBERTEXT_EXTENSION_SHA256SUM := 1568ed1d2feb8210bb5de61d69574a165cded536cfa17c6953c9064076469de2
export OPENSYMBOL_SHA256SUM := f543e6e2d7275557a839a164941c0a86e5f2c3f2a0042bfc434c88c6dde9e140
export OPENSYMBOL_TTF := f543e6e2d7275557a839a164941c0a86e5f2c3f2a0042bfc434c88c6dde9e140-opens___.ttf
export CUCKOO_SHA256SUM := 471dd83a813ed2816c2246c373004470ad0f6612c7ce72038929dc5161cdd58e
export CUCKOO_TARBALL := libcuckoo-93217f8d391718380c508a722ab9acd5e9081233.tar.gz
diff --git a/external/Module_external.mk b/external/Module_external.mk
index c8ad9ae..366fff2 100644
--- a/external/Module_external.mk
+++ b/external/Module_external.mk
@@ -102,7 +102,6 @@ $(eval $(call gb_Module_add_moduledirs,external,\
$(call gb_Helper_optional,XSLTML,xsltml) \
$(call gb_Helper_optional,ZLIB,zlib) \
$(call gb_Helper_optional,ZMF,libzmf) \
$(call gb_Helper_optional,CUCKOO,cuckoo) \
))
# vim: set noet sw=4 ts=4:
diff --git a/external/cuckoo/Makefile b/external/cuckoo/Makefile
deleted file mode 100644
index e4968cf8..0000000
--- a/external/cuckoo/Makefile
+++ /dev/null
@@ -1,7 +0,0 @@
# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
module_directory:=$(dir $(realpath $(firstword $(MAKEFILE_LIST))))
include $(module_directory)/../../solenv/gbuild/partial_build.mk
# vim: set noet sw=4 ts=4:
diff --git a/external/cuckoo/Module_cuckoo.mk b/external/cuckoo/Module_cuckoo.mk
deleted file mode 100644
index d2fda7b..0000000
--- a/external/cuckoo/Module_cuckoo.mk
+++ /dev/null
@@ -1,16 +0,0 @@
# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
#
# This file is part of the LibreOffice project.
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
#
$(eval $(call gb_Module_Module,cuckoo))
$(eval $(call gb_Module_add_targets,cuckoo,\
UnpackedTarball_cuckoo \
))
# vim: set noet sw=4 ts=4:
diff --git a/external/cuckoo/README b/external/cuckoo/README
deleted file mode 100644
index 6b8c983..0000000
--- a/external/cuckoo/README
+++ /dev/null
@@ -1,3 +0,0 @@
A high-performance, concurrent hash table
[https://github.com/efficient/libcuckoo]
\ No newline at end of file
diff --git a/external/cuckoo/UnpackedTarball_cuckoo.mk b/external/cuckoo/UnpackedTarball_cuckoo.mk
deleted file mode 100644
index e042618..0000000
--- a/external/cuckoo/UnpackedTarball_cuckoo.mk
+++ /dev/null
@@ -1,22 +0,0 @@
# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
#
# This file is part of the LibreOffice project.
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
#
$(eval $(call gb_UnpackedTarball_UnpackedTarball,cuckoo))
$(eval $(call gb_UnpackedTarball_set_tarball,cuckoo,$(CUCKOO_TARBALL)))
$(eval $(call gb_UnpackedTarball_set_patchlevel,cuckoo,0))
$(eval $(call gb_UnpackedTarball_update_autoconf_configs,cuckoo))
$(eval $(call gb_UnpackedTarball_add_patches,cuckoo,\
external/cuckoo/cuckoo-coverity.patch \
))
# vim: set noet sw=4 ts=4:
diff --git a/external/cuckoo/cuckoo-coverity.patch b/external/cuckoo/cuckoo-coverity.patch
deleted file mode 100644
index 273d3bf..0000000
--- a/external/cuckoo/cuckoo-coverity.patch
+++ /dev/null
@@ -1,22 +0,0 @@
--- libcuckoo/cuckoohash_map.hh 2021-09-11 11:29:05.656553870 +0100
+++ libcuckoo/cuckoohash_map.hh 2021-09-11 11:31:48.265203248 +0100
@@ -866,7 +866,8 @@
class TwoBuckets {
public:
- TwoBuckets() {}
+ TwoBuckets()
+ : i1(0), i2(0) {}
TwoBuckets(size_type i1_, size_type i2_, locked_table_mode)
: i1(i1_), i2(i2_) {}
TwoBuckets(locks_t &locks, size_type i1_, size_type i2_, normal_mode)
@@ -1552,7 +1553,8 @@
" MAX_BFS_PATH_LEN - 1");
static_assert(-1 >= std::numeric_limits<decltype(depth)>::min(),
"The depth type must be able to hold a value of -1");
- b_slot() {}
+ b_slot()
+ : bucket(0), pathcode(0), depth(0) {}
b_slot(const size_type b, const uint16_t p, const decltype(depth) d)
: bucket(b), pathcode(p), depth(d) {
assert(d < MAX_BFS_PATH_LEN);
diff --git a/readlicense_oo/license/license.xml b/readlicense_oo/license/license.xml
index 3b9d109..d1dec6a 100644
--- a/readlicense_oo/license/license.xml
+++ b/readlicense_oo/license/license.xml
@@ -664,12 +664,6 @@
<p><a href="#a__LGPL_version_2">Jump to LGPL Version 2</a></p>
<p><a href="#a__MPL_version_1_1">Jump to MPL Version 1.1</a></p>
</div>
<div class="LIBCUCKOO" >
<h2>libcuckoo</h2>
<p>The following software may be included in this product: libcuckoo. Use of any of this software is governed by
the terms of the license below:</p>
<p><a href="#a__Apache_License_version_2_0">Jump to Apache License Version 2.0</a></p>
</div>
<div class="CURL" >
<h2>libcurl</h2>
<p>The following software may be included in this product: libcurl. Use of any of this software is governed by
diff --git a/solenv/flatpak-manifest.in b/solenv/flatpak-manifest.in
index 55358b0..3099285 100644
--- a/solenv/flatpak-manifest.in
+++ b/solenv/flatpak-manifest.in
@@ -682,13 +682,6 @@
"type": "file",
"dest": "external/tarballs",
"dest-filename": "@OPENSYMBOL_TTF@"
},
{
"url": "https://dev-www.libreoffice.org/src/@CUCKOO_TARBALL@",
"sha256": "@CUCKOO_SHA256SUM@",
"type": "file",
"dest": "external/tarballs",
"dest-filename": "@CUCKOO_TARBALL@"
}
],
"buildsystem": "simple",
diff --git a/svl/Library_svl.mk b/svl/Library_svl.mk
index 116f9ba..a67184b 100644
--- a/svl/Library_svl.mk
+++ b/svl/Library_svl.mk
@@ -21,7 +21,6 @@ $(eval $(call gb_Library_Library,svl))
$(eval $(call gb_Library_use_externals,svl,\
boost_headers \
cuckoo_headers \
$(if $(filter LINUX MACOSX ANDROID iOS %BSD SOLARIS HAIKU,$(OS)), \
curl) \
dtoa \
diff --git a/svl/source/misc/sharedstringpool.cxx b/svl/source/misc/sharedstringpool.cxx
index 4f891d3..2fe8fd8 100644
--- a/svl/source/misc/sharedstringpool.cxx
+++ b/svl/source/misc/sharedstringpool.cxx
@@ -11,66 +11,54 @@
#include <svl/sharedstring.hxx>
#include <unotools/charclass.hxx>
#include <mutex>
#include <unordered_map>
#include <unordered_set>
#ifdef __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wshadow"
#endif
#if defined __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wunused-parameter"
#endif
#if defined _MSC_VER
#pragma warning(push)
#pragma warning(disable : 4324) // structure was padded due to alignment specifier
#endif
#include <libcuckoo/cuckoohash_map.hh>
#if defined _MSC_VER
#pragma warning(pop)
#endif
#if defined __clang__
#pragma clang diagnostic pop
#endif
#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif
/** create a key class that caches the hashcode */
namespace
{
struct StringWithHash
{
OUString str;
sal_Int32 hashCode;
StringWithHash(OUString s)
: str(s)
, hashCode(s.hashCode())
{
}
bool operator==(StringWithHash const& rhs) const
{
if (hashCode != rhs.hashCode)
return false;
return str == rhs.str;
}
};
}
namespace std
{
template <> struct hash<StringWithHash>
{
std::size_t operator()(const StringWithHash& k) const { return k.hashCode; }
};
}
namespace svl
{
namespace
{
sal_Int32 getRefCount(const rtl_uString* p) { return (p->refCount & 0x3FFFFFFF); }
// we store the key twice, because the concurrent hashtable we are using does not provide any way to return the key in use
typedef std::pair<OUString, OUString> Mapped;
struct HashFunction
{
size_t operator()(rtl_uString* const key) const
{
return rtl_ustr_hashCode_WithLength(key->buffer, key->length);
}
};
struct EqualsFunction
{
bool operator()(rtl_uString* const lhs, rtl_uString* const rhs) const
{
return OUString::unacquired(&lhs) == OUString::unacquired(&rhs);
}
};
}
struct SharedStringPool::Impl
{
mutable std::mutex maMutex;
// We use this map for two purposes - to store lower->upper case mappings
// and to store an upper->upper mapping.
// The second mapping is used so that we can
// share the same rtl_uString object between different keys which map to the same uppercase string to save memory.
//
// Docs for this concurrent hashtable here: http://efficient.github.io/libcuckoo/classlibcuckoo_1_1cuckoohash__map.html
libcuckoo::cuckoohash_map<rtl_uString*, Mapped, HashFunction, EqualsFunction> maStrMap;
// and to retrieve a shared uppercase object, so the management logic
// is quite complex.
std::unordered_map<StringWithHash, OUString> maStrMap;
const CharClass& mrCharClass;
explicit Impl(const CharClass& rCharClass)
@@ -88,50 +76,43 @@ SharedStringPool::~SharedStringPool() {}
SharedString SharedStringPool::intern(const OUString& rStr)
{
auto& rMap = mpImpl->maStrMap;
StringWithHash aStrWithHash(rStr);
std::scoped_lock<std::mutex> aGuard(mpImpl->maMutex);
rtl_uString *pResultLower = {}, *pResultUpper = {}; // bogus GCC 12 -Werror=maybe-uninitialized
if (rMap.find_fn(rStr.pData, [&](const Mapped& rMapped) {
pResultLower = rMapped.first.pData;
pResultUpper = rMapped.second.pData;
}))
auto[mapIt, bInserted] = mpImpl->maStrMap.emplace(aStrWithHash, rStr);
if (!bInserted)
// there is already a mapping
return SharedString(pResultLower, pResultUpper);
return SharedString(mapIt->first.str.pData, mapIt->second.pData);
// This is a new string insertion. Establish mapping to upper-case variant.
OUString aUpper = mpImpl->mrCharClass.uppercase(rStr);
// either insert a new upper->upper mapping, or write the existing mapping into aUpper
mpImpl->maStrMap.uprase_fn(aUpper.pData,
[&](Mapped& mapped) -> bool {
aUpper = mapped.second;
return false;
},
aUpper, aUpper);
if (aUpper == rStr)
// no need to do anything more, because the key is already uppercase
return SharedString(aUpper.pData, aUpper.pData);
// no need to do anything more, because we inserted an upper->upper mapping
return SharedString(mapIt->first.str.pData, mapIt->second.pData);
// either insert a new lower->upper mapping, or write the existing mapping into aLower
if (mpImpl->maStrMap.uprase_fn(rStr.pData,
[&](Mapped& mapped) -> bool {
pResultLower = mapped.first.pData;
pResultUpper = mapped.second.pData;
return false;
},
rStr, aUpper))
// We need to insert a lower->upper mapping, so also insert
// an upper->upper mapping, which we can use both for when an upper string
// is interned, and to look up a shared upper string.
StringWithHash aUpperWithHash(aUpper);
auto mapIt2 = mpImpl->maStrMap.find(aUpperWithHash);
if (mapIt2 != mpImpl->maStrMap.end())
{
pResultLower = rStr.pData;
pResultUpper = aUpper.pData;
// there is an already existing upper string
mapIt->second = mapIt2->first.str;
return SharedString(mapIt->first.str.pData, mapIt->second.pData);
}
return SharedString(pResultLower, pResultUpper);
// There is no already existing upper string.
// First, update using the iterator, can't do this later because
// the iterator will be invalid.
mapIt->second = aUpper;
mpImpl->maStrMap.emplace_hint(mapIt2, aUpperWithHash, aUpper);
return SharedString(rStr.pData, aUpper.pData);
}
void SharedStringPool::purge()
{
auto locked_table = mpImpl->maStrMap.lock_table();
std::scoped_lock<std::mutex> aGuard(mpImpl->maMutex);
// Because we can have an uppercase entry mapped to itself,
// and then a bunch of lowercase entries mapped to that same
@@ -139,12 +120,12 @@ void SharedStringPool::purge()
// time to remove lowercase entries, and then only can we
// check for unused uppercase entries.
auto it = locked_table.begin();
auto itEnd = locked_table.end();
auto it = mpImpl->maStrMap.begin();
auto itEnd = mpImpl->maStrMap.end();
while (it != itEnd)
{
rtl_uString* p1 = it->second.first.pData;
rtl_uString* p2 = it->second.second.pData;
rtl_uString* p1 = it->first.str.pData;
rtl_uString* p2 = it->second.pData;
if (p1 != p2)
{
// normal case - lowercase mapped to uppercase, which
@@ -152,19 +133,19 @@ void SharedStringPool::purge()
// entry as the key in the map
if (getRefCount(p1) == 1)
{
it = locked_table.erase(it);
it = mpImpl->maStrMap.erase(it);
continue;
}
}
++it;
}
it = locked_table.begin();
itEnd = locked_table.end();
it = mpImpl->maStrMap.begin();
itEnd = mpImpl->maStrMap.end();
while (it != itEnd)
{
rtl_uString* p1 = it->second.first.pData;
rtl_uString* p2 = it->second.second.pData;
rtl_uString* p1 = it->first.str.pData;
rtl_uString* p2 = it->second.pData;
if (p1 == p2)
{
// uppercase which is mapped to itself, which means
@@ -172,7 +153,7 @@ void SharedStringPool::purge()
// one ref-counted entry in the value in the map
if (getRefCount(p1) == 2)
{
it = locked_table.erase(it);
it = mpImpl->maStrMap.erase(it);
continue;
}
}
@@ -180,15 +161,19 @@ void SharedStringPool::purge()
}
}
size_t SharedStringPool::getCount() const { return mpImpl->maStrMap.size(); }
size_t SharedStringPool::getCount() const
{
std::scoped_lock<std::mutex> aGuard(mpImpl->maMutex);
return mpImpl->maStrMap.size();
}
size_t SharedStringPool::getCountIgnoreCase() const
{
std::scoped_lock<std::mutex> aGuard(mpImpl->maMutex);
// this is only called from unit tests, so no need to be efficient
std::unordered_set<OUString> aUpperSet;
auto locked_table = mpImpl->maStrMap.lock_table();
for (auto const& pair : locked_table)
aUpperSet.insert(pair.second.second);
for (auto const& pair : mpImpl->maStrMap)
aUpperSet.insert(pair.second);
return aUpperSet.size();
}
}