tdf#98955 hardware_concurrency not ideal for thread pools

A new static member getPreferredConcurrency added to
comphelper::ThreadPool to return a configurable max
number of threads.

By default the new function returns the hardware_concurrency
value provided by std::thread. When MAX_CONCURRENCY envar is
defined, the return value is limited to whatever is set there.

Three call-sites that used std::thread::hardware_concurrency
have been replaced with getPreferredConcurrency.

Unittests added to cover the functionality of the new member.

Unittests are capped to 4 threads.

Reviewed-on: https://gerrit.libreoffice.org/26254
Tested-by: Jenkins <ci@libreoffice.org>
Reviewed-by: Ashod Nakashian <ashnakash@gmail.com>
(cherry picked from commit 60e75fb276778459f6055360646d879b8c615d83)

Change-Id: I3332e393a88a5ed436316fa712ed920a4b37f4af
Reviewed-on: https://gerrit.libreoffice.org/26395
Tested-by: Jenkins <ci@libreoffice.org>
Reviewed-by: Michael Meeks <michael.meeks@collabora.com>
diff --git a/comphelper/CppunitTest_comphelper_threadpool_test.mk b/comphelper/CppunitTest_comphelper_threadpool_test.mk
new file mode 100644
index 0000000..aa4e0f0
--- /dev/null
+++ b/comphelper/CppunitTest_comphelper_threadpool_test.mk
@@ -0,0 +1,30 @@
# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
#
# This file is part of the LibreOffice project.
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
#

$(eval $(call gb_CppunitTest_CppunitTest,comphelper_threadpool_test))

$(eval $(call gb_CppunitTest_add_exception_objects,comphelper_threadpool_test, \
    comphelper/qa/unit/threadpooltest \
))

$(eval $(call gb_CppunitTest_use_externals,comphelper_threadpool_test,\
	boost_headers \
))

$(eval $(call gb_CppunitTest_use_sdk_api,comphelper_threadpool_test))

$(eval $(call gb_CppunitTest_use_libraries,comphelper_threadpool_test, \
    comphelper \
    cppuhelper \
    cppu \
    sal \
	$(gb_UWINAPI) \
))

# vim: set noet sw=4 ts=4:
diff --git a/comphelper/Module_comphelper.mk b/comphelper/Module_comphelper.mk
index 2eba43d..7a1925f 100644
--- a/comphelper/Module_comphelper.mk
+++ b/comphelper/Module_comphelper.mk
@@ -28,6 +28,7 @@ $(eval $(call gb_Module_add_subsequentcheck_targets,comphelper,\
))

$(eval $(call gb_Module_add_check_targets,comphelper,\
    CppunitTest_comphelper_threadpool_test \
    CppunitTest_comphelper_syntaxhighlight_test \
    CppunitTest_comphelper_variadictemplates_test \
	CppunitTest_comphelper_ifcontainer \
diff --git a/comphelper/qa/unit/threadpooltest.cxx b/comphelper/qa/unit/threadpooltest.cxx
new file mode 100644
index 0000000..d71a111
--- /dev/null
+++ b/comphelper/qa/unit/threadpooltest.cxx
@@ -0,0 +1,55 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
 * This file is part of the LibreOffice project.
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 */

#include <comphelper/threadpool.hxx>
#include "cppunit/TestAssert.h"
#include "cppunit/TestFixture.h"
#include "cppunit/extensions/HelperMacros.h"
#include "cppunit/plugin/TestPlugIn.h"

#include <stdlib.h>
#include <thread>

class ThreadPoolTest : public CppUnit::TestFixture
{
public:
    void testPreferredConcurrency();

    CPPUNIT_TEST_SUITE(ThreadPoolTest);
    CPPUNIT_TEST(testPreferredConcurrency);
    CPPUNIT_TEST_SUITE_END();
};

void ThreadPoolTest::testPreferredConcurrency() {

    // Check default.
    auto nThreads = comphelper::ThreadPool::getPreferredConcurrency();
    sal_Int32 nExpected = 4; // UTs are capped to 4.
    CPPUNIT_ASSERT_MESSAGE("Expected no more than 4 threads", nExpected >= nThreads);

#ifndef _WIN32_WINNT
    // The result should be cached, so this should change anything.
    nThreads = std::thread::hardware_concurrency() * 2;
    setenv("MAX_CONCURRENCY", std::to_string(nThreads).c_str(), true);
    nThreads = comphelper::ThreadPool::getPreferredConcurrency();
    CPPUNIT_ASSERT_MESSAGE("Expected no more than hardware threads",
                           nThreads <= (sal_Int32)std::thread::hardware_concurrency());

    // Revert and check. Again, nothing should change.
    unsetenv("MAX_CONCURRENCY");
    nThreads = comphelper::ThreadPool::getPreferredConcurrency();
    CPPUNIT_ASSERT_MESSAGE("Expected no more than 4 threads", nExpected >= nThreads);
#endif
}

CPPUNIT_TEST_SUITE_REGISTRATION(ThreadPoolTest);

CPPUNIT_PLUGIN_IMPLEMENT();

/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/comphelper/source/misc/threadpool.cxx b/comphelper/source/misc/threadpool.cxx
index 5861364..8680e00 100644
--- a/comphelper/source/misc/threadpool.cxx
+++ b/comphelper/source/misc/threadpool.cxx
@@ -10,6 +10,7 @@
#include <comphelper/threadpool.hxx>

#include <rtl/instance.hxx>
#include <rtl/string.hxx>
#include <algorithm>
#include <memory>
#include <thread>
@@ -115,7 +116,7 @@ struct ThreadPoolStatic : public rtl::StaticWithInit< std::shared_ptr< ThreadPoo
                                                      ThreadPoolStatic >
{
    std::shared_ptr< ThreadPool > operator () () {
        sal_Int32 nThreads = std::max( std::thread::hardware_concurrency(), 1U );
        const sal_Int32 nThreads = ThreadPool::getPreferredConcurrency();
        return std::make_shared< ThreadPool >( nThreads );
    };
};
@@ -125,6 +126,27 @@ ThreadPool& ThreadPool::getSharedOptimalPool()
    return *ThreadPoolStatic::get().get();
}

sal_Int32 ThreadPool::getPreferredConcurrency()
{
    static sal_Int32 ThreadCount = 0;
    if (ThreadCount == 0)
    {
        const sal_Int32 nHardThreads = std::max(std::thread::hardware_concurrency(), 1U);
        sal_Int32 nThreads = nHardThreads;
        const char *pEnv = getenv("MAX_CONCURRENCY");
        if (pEnv != nullptr)
        {
            // Override with user/admin preferrence.
            nThreads = rtl_str_toInt32(pEnv, 10);
        }

        nThreads = std::min(nHardThreads, nThreads);
        ThreadCount = std::max<sal_Int32>(nThreads, 1);
    }

    return ThreadCount;
}

void ThreadPool::waitAndCleanupWorkers()
{
    waitUntilEmpty();
diff --git a/include/comphelper/threadpool.hxx b/include/comphelper/threadpool.hxx
index cfa471b..2f726f0 100644
--- a/include/comphelper/threadpool.hxx
+++ b/include/comphelper/threadpool.hxx
@@ -36,6 +36,12 @@ public:
    /// count for the CPU
    static      ThreadPool& getSharedOptimalPool();

    /// returns a configurable max-concurrency
    /// limit to avoid spawning an unnecessarily
    /// large number of threads on high-core boxes.
    /// MAX_CONCURRENCY envar controls the cap.
    static      sal_Int32 getPreferredConcurrency();

                ThreadPool( sal_Int32 nWorkers );
    virtual    ~ThreadPool();

diff --git a/package/source/zippackage/ZipPackageStream.cxx b/package/source/zippackage/ZipPackageStream.cxx
index 3f3a47c..43a9b85 100644
--- a/package/source/zippackage/ZipPackageStream.cxx
+++ b/package/source/zippackage/ZipPackageStream.cxx
@@ -832,7 +832,7 @@ bool ZipPackageStream::saveChild(
                    // cores and allow 4-times the amount for having the queue well filled. The
                    // 2nd parameter is the time to wait between cleanups in 10th of a second.
                    // Both values may be added to the configuration settings if needed.
                    static sal_Int32 nAllowedThreads(std::max(std::thread::hardware_concurrency(), 1U) * 4);
                    static sal_Int32 nAllowedThreads(comphelper::ThreadPool::getPreferredConcurrency() * 4);
                    rZipOut.reduceScheduledThreadsToGivenNumberOrLess(nAllowedThreads, 1);

                    // Start a new thread deflating this zip entry
diff --git a/sc/source/filter/excel/xetable.cxx b/sc/source/filter/excel/xetable.cxx
index bc6e48b..f539b92 100644
--- a/sc/source/filter/excel/xetable.cxx
+++ b/sc/source/filter/excel/xetable.cxx
@@ -2133,7 +2133,7 @@ void XclExpRowBuffer::Finalize( XclExpDefaultRowData& rDefRowData, const ScfUInt
    // This is staggeringly slow, and each element operates only
    // on its own data.
    const size_t nRows = maRowMap.size();
    const size_t nThreads = nRows < 128 ? 1 : std::max(std::thread::hardware_concurrency(), 1U);
    const size_t nThreads = nRows < 128 ? 1 : comphelper::ThreadPool::getPreferredConcurrency();
#else
    const size_t nThreads = 1; // globally disable multi-threading for now.
#endif
diff --git a/sc/source/filter/oox/workbookfragment.cxx b/sc/source/filter/oox/workbookfragment.cxx
index 52b7140..6e6e101 100644
--- a/sc/source/filter/oox/workbookfragment.cxx
+++ b/sc/source/filter/oox/workbookfragment.cxx
@@ -309,7 +309,7 @@ public:

void importSheetFragments( WorkbookFragment& rWorkbookHandler, SheetFragmentVector& rSheets )
{
    sal_Int32 nThreads = std::min( rSheets.size(), (size_t) std::max(std::thread::hardware_concurrency(), 1U) );
    sal_Int32 nThreads = std::min( rSheets.size(), (size_t) comphelper::ThreadPool::getPreferredConcurrency() );

    Reference< XComponentContext > xContext = comphelper::getProcessComponentContext();

diff --git a/solenv/gbuild/CppunitTest.mk b/solenv/gbuild/CppunitTest.mk
index 5ab32ca..5532192 100644
--- a/solenv/gbuild/CppunitTest.mk
+++ b/solenv/gbuild/CppunitTest.mk
@@ -19,6 +19,9 @@

# CppunitTest class

# Cap the number of threads unittests use.
export MAX_CONCURRENCY=4

gb_CppunitTest_UNITTESTFAILED ?= $(GBUILDDIR)/platform/unittest-failed-default.sh
gb_CppunitTest_PYTHONDEPS ?= $(call gb_Library_get_target,pyuno_wrapper) $(if $(SYSTEM_PYTHON),,$(call gb_Package_get_target,python3))