tdf#148253: fix matching algorithm
Using 'flag' as a "continue the loop" marker allowed to continue
processing wildcard after its last character - not crashing because
it was a subview of a larger string with separators, but failing
the match.
Change-Id: I308058b68c59d5719f3c8b5f5656998a95a9ba09
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/132336
Tested-by: Jenkins
Reviewed-by: Mike Kaganski <mike.kaganski@collabora.com>
diff --git a/tools/CppunitTest_tools_test.mk b/tools/CppunitTest_tools_test.mk
index 096b3dd..be47c09 100644
--- a/tools/CppunitTest_tools_test.mk
+++ b/tools/CppunitTest_tools_test.mk
@@ -37,6 +37,7 @@ $(eval $(call gb_CppunitTest_add_exception_objects,tools_test, \
tools/qa/cppunit/test_cpu_runtime_detection_AVX2 \
tools/qa/cppunit/test_cpu_runtime_detection_SSE2 \
tools/qa/cppunit/test_cpu_runtime_detection_SSSE3 \
tools/qa/cppunit/test_Wildcard \
))
$(eval $(call gb_CppunitTest_add_exception_objects,tools_test,\
diff --git a/tools/qa/cppunit/test_Wildcard.cxx b/tools/qa/cppunit/test_Wildcard.cxx
new file mode 100644
index 0000000..1760ca6
--- /dev/null
+++ b/tools/qa/cppunit/test_Wildcard.cxx
@@ -0,0 +1,46 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
#include <sal/config.h>
#include <sal/types.h>
#include <cppunit/TestAssert.h>
#include <cppunit/TestFixture.h>
#include <cppunit/extensions/HelperMacros.h>
#include <tools/wldcrd.hxx>
namespace
{
class Test : public CppUnit::TestFixture
{
public:
void test_Wildcard();
CPPUNIT_TEST_SUITE(Test);
CPPUNIT_TEST(test_Wildcard);
CPPUNIT_TEST_SUITE_END();
};
void Test::test_Wildcard()
{
WildCard wildcard(u"*.html;*??a;*\\*abc;*\\?xyz", ';'); // tdf#148253
CPPUNIT_ASSERT(wildcard.Matches(u"foo.html"));
CPPUNIT_ASSERT(wildcard.Matches(u"foo.ht.html")); // test stepping back after partial match
CPPUNIT_ASSERT(wildcard.Matches(u"foo.html.html")); // test stepping back after full match
CPPUNIT_ASSERT(wildcard.Matches(u"??aa")); // test stepping back with question marks
CPPUNIT_ASSERT(wildcard.Matches(u"111*abc")); // test escaped asterisk
CPPUNIT_ASSERT(!wildcard.Matches(u"111-abc")); // test escaped asterisk
CPPUNIT_ASSERT(wildcard.Matches(u"111?xyz")); // test escaped question mark
CPPUNIT_ASSERT(!wildcard.Matches(u"111-xyz")); // test escaped question mark
}
CPPUNIT_TEST_SUITE_REGISTRATION(Test);
}
/* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */
diff --git a/tools/source/fsys/wldcrd.cxx b/tools/source/fsys/wldcrd.cxx
index 8d31400..e819997 100644
--- a/tools/source/fsys/wldcrd.cxx
+++ b/tools/source/fsys/wldcrd.cxx
@@ -27,67 +27,74 @@
*/
bool WildCard::ImpMatch( std::u16string_view aWild, std::u16string_view aStr )
{
int pos=0;
int flag=0;
const sal_Unicode* pPosAfterAsterisk = nullptr;
const sal_Unicode* pWild = aWild.data();
const sal_Unicode* pWildEnd = aWild.data() + aWild.size();
const sal_Unicode* pStr = aStr.data();
const sal_Unicode* pStrEnd = aStr.data() + aStr.size();
while ( pWild != pWildEnd || flag )
while (pWild != pWildEnd)
{
switch (*pWild)
{
case '?':
if ( pStr == pStrEnd )
return false;
break;
default:
if ( (*pWild == '\\') && (pWild + 1 != pWildEnd) && ((*(pWild+1)=='?') || (*(pWild+1) == '*')) )
break; // Match -> proceed to the next character
case '\\': // Escaping '?' and '*'; don't we need to escape '\\'?
if ((pWild + 1 != pWildEnd) && ((*(pWild + 1) == '?') || (*(pWild + 1) == '*')))
pWild++;
if ( *pWild != *pStr )
if ( !pos )
return false;
else
pWild += pos;
else
break;
// WARNING/TODO: may cause execution of next case in some
// circumstances!
[[fallthrough]];
default: // No wildcard, literal match
if (pStr == pStrEnd)
return false;
if (*pWild == *pStr)
break; // Match -> proceed to the next character
if (!pPosAfterAsterisk)
return false;
pWild = pPosAfterAsterisk;
[[fallthrough]];
case '*':
while ( pWild != pWildEnd && *pWild == '*' )
pWild++;
if ( pWild == pWildEnd )
return true;
flag = 1;
pos = 0;
// Consider strange things like "**?*?*"
while (*pWild == '?')
{
if (pStr == pStrEnd)
return false;
pWild++;
pStr++;
while (pWild != pWildEnd && *pWild == '*')
pWild++;
if (pWild == pWildEnd)
return true;
}
// At this point, we are past wildcards, and a literal match must follow
if ( pStr == pStrEnd )
return false;
while ( pStr != pStrEnd && *pStr != *pWild )
pPosAfterAsterisk = pWild;
if ((*pWild == '\\') && (pWild + 1 != pWildEnd) && ((*(pWild + 1) == '?') || (*(pWild + 1) == '*')))
pWild++;
while (*pStr != *pWild)
{
if ( *pWild == '?' ) {
pWild++;
while ( pWild != pWildEnd && *pWild == '*' )
pWild++;
}
pStr++;
if ( pStr == pStrEnd )
return pWild == pWildEnd;
return false;
}
break;
break; // Match -> proceed to the next character
}
if ( pWild != pWildEnd )
pWild++;
if ( pStr != pStrEnd )
pStr++;
else
flag = 0;
if ( flag )
pos--;
// We arrive here when the current characters in pWild and pStr match
assert(pWild != pWildEnd);
pWild++;
assert(pStr != pStrEnd);
pStr++;
if (pWild == pWildEnd && pPosAfterAsterisk && pStr != pStrEnd)
pWild = pPosAfterAsterisk; // Try again on the rest of pStr
}
return ( pStr == pStrEnd ) && ( pWild == pWildEnd );
assert(pWild == pWildEnd);
return pStr == pStrEnd;
}
bool WildCard::Matches( std::u16string_view rString ) const