faster bulk insert into o3tl::sorted_vector (tdf#117366)

Repeated insert() into o3tl::sorted_vector can turn out to be
pathological, repeatedly moving most items aside for each
element inserted. Make it possible to insert a normal vector
that's been pre-sorted and made unique. 31s->9s for loading
tdf#117366.

Change-Id: If3a0366dd240ad46c23f5f3dc04e781b8c4b2aa2
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/131085
Tested-by: Jenkins
Reviewed-by: Luboš Luňák <l.lunak@collabora.com>
diff --git a/include/o3tl/sorted_vector.hxx b/include/o3tl/sorted_vector.hxx
index 2bd8b72..13c0b25f 100644
--- a/include/o3tl/sorted_vector.hxx
+++ b/include/o3tl/sorted_vector.hxx
@@ -12,7 +12,9 @@

#include <vector>
#include <algorithm>
#include <cassert>
#include <functional>
#include <iterator>
#include <memory>
#include <type_traits>

@@ -229,19 +231,32 @@ public:

    void insert(sorted_vector<Value,Compare,Find> const& rOther)
    {
       // optimization for the rather common case that we are overwriting this with the contents
       // of another sorted vector
       if ( empty() )
       {
           m_vector.insert(m_vector.begin(), rOther.m_vector.begin(), rOther.m_vector.end());
       }
       else
       {
           for (const_iterator it = rOther.m_vector.begin(); it != rOther.m_vector.end(); ++it)
           {
               insert(*it);
           }
       }
        // optimization for the rather common case that we are overwriting this with the contents
        // of another sorted vector
        if ( empty() )
            m_vector.insert(m_vector.begin(), rOther.m_vector.begin(), rOther.m_vector.end());
        else
            insert_internal( rOther.m_vector );
    }

    void insert_sorted_unique_vector(const std::vector<Value>& rOther)
    {
        assert( std::is_sorted(rOther.begin(), rOther.end(), Compare()));
        assert( std::unique(rOther.begin(), rOther.end(), compare_equal) == rOther.end());
        if ( empty() )
            m_vector.insert(m_vector.begin(), rOther.m_vector.begin(), rOther.m_vector.end());
        else
            insert_internal( rOther );
    }

    void insert_sorted_unique_vector(std::vector<Value>&& rOther)
    {
        assert( std::is_sorted(rOther.begin(), rOther.end(), Compare()));
        assert( std::unique(rOther.begin(), rOther.end(), compare_equal) == rOther.end());
        if ( empty() )
            m_vector.swap( rOther );
        else
            insert_internal( rOther );
    }

    /* Clear() elements in the vector, and free them one by one. */
@@ -266,6 +281,22 @@ public:
    }

private:
    static bool compare_equal( const Value& v1, const Value& v2 )
    {   // Synthetize == check from < check for std::unique asserts above.
        return !Compare()( v1, v2 ) && !Compare()( v2, v1 );
    }

    void insert_internal( const std::vector<Value>& rOther )
    {
        // Do a union in one pass rather than repeated insert() that could repeatedly
        // move large amounts of data.
        vector_t tmp;
        tmp.reserve( m_vector.size() + rOther.size());
        std::set_union( m_vector.begin(), m_vector.end(),
                        rOther.begin(), rOther.end(),
                        std::back_inserter( tmp ), Compare());
        m_vector.swap( tmp );
    }

    vector_t m_vector;
};
diff --git a/sc/source/filter/inc/sheetdatabuffer.hxx b/sc/source/filter/inc/sheetdatabuffer.hxx
index a673f91..7f4930b 100644
--- a/sc/source/filter/inc/sheetdatabuffer.hxx
+++ b/sc/source/filter/inc/sheetdatabuffer.hxx
@@ -201,8 +201,17 @@ private:
            return lhs.mnEndRow<rhs.mnStartRow;
        }
    };
    struct StyleRowRangeCompEqual
    {
        bool operator() (const RowRangeStyle& lhs, const RowRangeStyle& rhs) const
        {
            return lhs.mnEndRow==rhs.mnStartRow;
        }
    };
    typedef ::o3tl::sorted_vector< RowRangeStyle, StyleRowRangeComp > RowStyles;
    typedef ::std::map< sal_Int32, RowStyles > ColStyles;
    typedef ::std::vector< RowRangeStyle > TmpRowStyles;
    typedef ::std::map< sal_Int32, TmpRowStyles > TmpColStyles;
    /** Stores information about a merged cell range. */
    struct MergedRange
    {
diff --git a/sc/source/filter/oox/sheetdatabuffer.cxx b/sc/source/filter/oox/sheetdatabuffer.cxx
index d4d0ad8..b4d6158 100644
--- a/sc/source/filter/oox/sheetdatabuffer.cxx
+++ b/sc/source/filter/oox/sheetdatabuffer.cxx
@@ -353,6 +353,9 @@ void SheetDataBuffer::addColXfStyles()
        addIfNotInMyMap( getStyles(), rangeStyleListMap, rFormatKeyPair.first, rFormatKeyPair.second, rRangeList );
    }
    // gather all ranges that have the same style and apply them in bulk
    // Collect data in unsorted vectors and sort them just once at the end
    // instead of possibly slow repeated inserts.
    TmpColStyles tmpStylesPerColumn;
    for ( const auto& [rFormatKeyPair, rRanges] : rangeStyleListMap )
    {
        for (const ScRange & rAddress : rRanges)
@@ -363,9 +366,16 @@ void SheetDataBuffer::addColXfStyles()
            aStyleRows.mnStartRow = rAddress.aStart.Row();
            aStyleRows.mnEndRow = rAddress.aEnd.Row();
            for ( sal_Int32 nCol = rAddress.aStart.Col(); nCol <= rAddress.aEnd.Col(); ++nCol )
               maStylesPerColumn[ nCol ].insert( aStyleRows );
               tmpStylesPerColumn[ nCol ].push_back( aStyleRows );
        }
    }
    for( auto& rowStyles : tmpStylesPerColumn )
    {
        TmpRowStyles& s = rowStyles.second;
        std::sort( s.begin(), s.end(), StyleRowRangeComp());
        s.erase( std::unique( s.begin(), s.end(), StyleRowRangeCompEqual()), s.end());
        maStylesPerColumn[ rowStyles.first ].insert_sorted_unique_vector( std::move( s ));
    }
}

void SheetDataBuffer::addColXfStyleProcessRowRanges()