tdf#79049 speed up OOXML workbook load (2)
the allocation of memory and pointer chasing was
slowing things down in the styles conversion.
So switch to more cache-dense data structure,
and re-arrange the loops to be more cache friendly.
The takes the time from 1m9 to 40s for me.
Change-Id: I876580adc7823313b0cdb067f2f6b3e61c39ecf8
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/114941
Tested-by: Jenkins
Reviewed-by: Noel Grandin <noel.grandin@collabora.co.uk>
diff --git a/sc/source/filter/inc/sheetdatabuffer.hxx b/sc/source/filter/inc/sheetdatabuffer.hxx
index c49e332..17add16e 100644
--- a/sc/source/filter/inc/sheetdatabuffer.hxx
+++ b/sc/source/filter/inc/sheetdatabuffer.hxx
@@ -22,6 +22,7 @@
#include <vector>
#include <map>
#include <set>
#include <o3tl/sorted_vector.hxx>
#include "richstring.hxx"
#include "worksheethelper.hxx"
@@ -165,7 +166,8 @@ private:
/** Writes all cell formatting attributes to the passed cell range list. (depreciates writeXfIdRangeProperties) */
void applyCellMerging( const ScRange& rRange );
void addColXfStyle( sal_Int32 nXfId, sal_Int32 nFormatId, const ScRange& rAddress, bool bProcessRowRange = false );
void addColXfStyles();
void addColXfStyleProcessRowRanges();
private:
/** Stores cell range address and formula token array of an array formula. */
typedef std::pair< ScRange, ApiTokenSequence > ArrayFormula;
@@ -200,7 +202,7 @@ private:
return lhs.mnEndRow<rhs.mnStartRow;
}
};
typedef ::std::set< RowRangeStyle, StyleRowRangeComp > RowStyles;
typedef ::o3tl::sorted_vector< RowRangeStyle, StyleRowRangeComp > RowStyles;
typedef ::std::map< sal_Int32, RowStyles > ColStyles;
/** Stores information about a merged cell range. */
struct MergedRange
diff --git a/sc/source/filter/oox/sheetdatabuffer.cxx b/sc/source/filter/oox/sheetdatabuffer.cxx
index de1d2c7..c9c688c 100644
--- a/sc/source/filter/oox/sheetdatabuffer.cxx
+++ b/sc/source/filter/oox/sheetdatabuffer.cxx
@@ -346,57 +346,99 @@ static void addIfNotInMyMap( const StylesBuffer& rStyles, std::map< FormatKeyPai
rMap[ FormatKeyPair( nXfId, nFormatId ) ] = rRangeList;
}
void SheetDataBuffer::addColXfStyle( sal_Int32 nXfId, sal_Int32 nFormatId, const ScRange& rAddress, bool bProcessRowRange )
void SheetDataBuffer::addColXfStyles()
{
RowRangeStyle aStyleRows;
aStyleRows.mnNumFmt.first = nXfId;
aStyleRows.mnNumFmt.second = nFormatId;
aStyleRows.mnStartRow = rAddress.aStart.Row();
aStyleRows.mnEndRow = rAddress.aEnd.Row();
for ( sal_Int32 nCol = rAddress.aStart.Col(); nCol <= rAddress.aEnd.Col(); ++nCol )
std::map< FormatKeyPair, ScRangeList > rangeStyleListMap;
for( const auto& [rFormatKeyPair, rRangeList] : maXfIdRangeLists )
{
if ( !bProcessRowRange )
maStylesPerColumn[ nCol ].insert( aStyleRows );
else
addIfNotInMyMap( getStyles(), rangeStyleListMap, rFormatKeyPair.first, rFormatKeyPair.second, rRangeList );
}
// gather all ranges that have the same style and apply them in bulk
for ( const auto& [rFormatKeyPair, rRanges] : rangeStyleListMap )
{
for (const ScRange & rAddress : rRanges)
{
RowStyles& rRowStyles = maStylesPerColumn[ nCol ];
// Reset row range for each column
RowRangeStyle aStyleRows;
aStyleRows.mnNumFmt.first = rFormatKeyPair.first;
aStyleRows.mnNumFmt.second = rFormatKeyPair.second;
aStyleRows.mnStartRow = rAddress.aStart.Row();
aStyleRows.mnEndRow = rAddress.aEnd.Row();
for ( sal_Int32 nCol = rAddress.aStart.Col(); nCol <= rAddress.aEnd.Col(); ++nCol )
maStylesPerColumn[ nCol ].insert( aStyleRows );
}
}
}
// If aStyleRows includes rows already allocated to a style
// in rRowStyles, then we need to split it into parts.
// ( to occupy only rows that have no style definition)
// Start iterating at the first element that is not completely before aStyleRows
RowStyles::iterator rows_it = rRowStyles.lower_bound(aStyleRows);
RowStyles::iterator rows_end = rRowStyles.end();
bool bAddRange = true;
for ( ; rows_it != rows_end; ++rows_it )
void SheetDataBuffer::addColXfStyleProcessRowRanges()
{
// count the number of row-range-styles we have
AddressConverter& rAddrConv = getAddressConverter();
int cnt = 0;
for ( const auto& [nXfId, rRowRangeList] : maXfIdRowRangeList )
{
if ( nXfId == -1 ) // it's a dud skip it
continue;
cnt += rRowRangeList.size();
}
// pre-allocate space in the sorted_vector
for ( sal_Int32 nCol = 0; nCol <= rAddrConv.getMaxApiAddress().Col(); ++nCol )
{
RowStyles& rRowStyles = maStylesPerColumn[ nCol ];
rRowStyles.reserve(rRowStyles.size() + cnt);
}
const auto nMaxCol = rAddrConv.getMaxApiAddress().Col();
for ( sal_Int32 nCol = 0; nCol <= nMaxCol; ++nCol )
{
RowStyles& rRowStyles = maStylesPerColumn[ nCol ];
for ( const auto& [nXfId, rRowRangeList] : maXfIdRowRangeList )
{
if ( nXfId == -1 ) // it's a dud skip it
continue;
// get all row ranges for id
for ( const auto& rRange : rRowRangeList )
{
const RowRangeStyle& r = *rows_it;
RowRangeStyle aStyleRows;
aStyleRows.mnNumFmt.first = nXfId;
aStyleRows.mnNumFmt.second = -1;
aStyleRows.mnStartRow = rRange.mnFirst;
aStyleRows.mnEndRow = rRange.mnLast;
// Add the part of aStyleRows that does not overlap with r
if ( aStyleRows.mnStartRow < r.mnStartRow )
// Reset row range for each column
aStyleRows.mnStartRow = rRange.mnFirst;
aStyleRows.mnEndRow = rRange.mnLast;
// If aStyleRows includes rows already allocated to a style
// in rRowStyles, then we need to split it into parts.
// ( to occupy only rows that have no style definition)
// Start iterating at the first element that is not completely before aStyleRows
RowStyles::const_iterator rows_it = rRowStyles.lower_bound(aStyleRows);
bool bAddRange = true;
for ( ; rows_it != rRowStyles.end(); ++rows_it )
{
RowRangeStyle aSplit = aStyleRows;
aSplit.mnEndRow = std::min(aStyleRows.mnEndRow, r.mnStartRow - 1);
// Insert with hint that aSplit comes directly before the current position
rRowStyles.insert( rows_it, aSplit );
}
const RowRangeStyle& r = *rows_it;
// Done if no part of aStyleRows extends beyond r
if ( aStyleRows.mnEndRow <= r.mnEndRow )
{
bAddRange = false;
break;
}
// Add the part of aStyleRows that does not overlap with r
if ( aStyleRows.mnStartRow < r.mnStartRow )
{
RowRangeStyle aSplit = aStyleRows;
aSplit.mnEndRow = std::min(aStyleRows.mnEndRow, r.mnStartRow - 1);
rows_it = rRowStyles.insert( aSplit ).first;
}
// Cut off the part aStyleRows that was handled above
aStyleRows.mnStartRow = r.mnEndRow + 1;
// Done if no part of aStyleRows extends beyond r
if ( aStyleRows.mnEndRow <= r.mnEndRow )
{
bAddRange = false;
break;
}
// Cut off the part aStyleRows that was handled above
aStyleRows.mnStartRow = r.mnEndRow + 1;
}
if ( bAddRange )
rRowStyles.insert( aStyleRows );
}
if ( bAddRange )
rRowStyles.insert( aStyleRows );
}
}
}
@@ -414,32 +456,9 @@ void SheetDataBuffer::finalizeImport()
// write default formatting of remaining row range
maXfIdRowRangeList[ maXfIdRowRange.mnXfId ].push_back( maXfIdRowRange.maRowRange );
std::map< FormatKeyPair, ScRangeList > rangeStyleListMap;
for( const auto& [rFormatKeyPair, rRangeList] : maXfIdRangeLists )
{
addIfNotInMyMap( getStyles(), rangeStyleListMap, rFormatKeyPair.first, rFormatKeyPair.second, rRangeList );
}
// gather all ranges that have the same style and apply them in bulk
for ( const auto& [rFormatKeyPair, rRanges] : rangeStyleListMap )
{
for (size_t i = 0, nSize = rRanges.size(); i < nSize; ++i)
addColXfStyle( rFormatKeyPair.first, rFormatKeyPair.second, rRanges[i]);
}
addColXfStyles();
for ( const auto& [rXfId, rRowRangeList] : maXfIdRowRangeList )
{
if ( rXfId == -1 ) // it's a dud skip it
continue;
AddressConverter& rAddrConv = getAddressConverter();
// get all row ranges for id
for ( const auto& rRange : rRowRangeList )
{
ScRange aRange( 0, rRange.mnFirst, getSheetIndex(),
rAddrConv.getMaxApiAddress().Col(), rRange.mnLast, getSheetIndex() );
addColXfStyle( rXfId, -1, aRange, true );
}
}
addColXfStyleProcessRowRanges();
ScDocumentImport& rDocImport = getDocImport();
ScDocument& rDoc = rDocImport.getDoc();