tdf#148253: fix matching algorithm

Using 'flag' as a "continue the loop" marker allowed to continue
processing wildcard after its last character - not crashing because
it was a subview of a larger string with separators, but failing
the match.

Change-Id: I308058b68c59d5719f3c8b5f5656998a95a9ba09
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/132336
Tested-by: Jenkins
Reviewed-by: Mike Kaganski <mike.kaganski@collabora.com>
diff --git a/tools/CppunitTest_tools_test.mk b/tools/CppunitTest_tools_test.mk
index 096b3dd..be47c09 100644
--- a/tools/CppunitTest_tools_test.mk
+++ b/tools/CppunitTest_tools_test.mk
@@ -37,6 +37,7 @@ $(eval $(call gb_CppunitTest_add_exception_objects,tools_test, \
    tools/qa/cppunit/test_cpu_runtime_detection_AVX2 \
    tools/qa/cppunit/test_cpu_runtime_detection_SSE2 \
    tools/qa/cppunit/test_cpu_runtime_detection_SSSE3 \
    tools/qa/cppunit/test_Wildcard \
))

$(eval $(call gb_CppunitTest_add_exception_objects,tools_test,\
diff --git a/tools/qa/cppunit/test_Wildcard.cxx b/tools/qa/cppunit/test_Wildcard.cxx
new file mode 100644
index 0000000..1760ca6
--- /dev/null
+++ b/tools/qa/cppunit/test_Wildcard.cxx
@@ -0,0 +1,46 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */
/*
 * This file is part of the LibreOffice project.
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 */

#include <sal/config.h>

#include <sal/types.h>
#include <cppunit/TestAssert.h>
#include <cppunit/TestFixture.h>
#include <cppunit/extensions/HelperMacros.h>
#include <tools/wldcrd.hxx>

namespace
{
class Test : public CppUnit::TestFixture
{
public:
    void test_Wildcard();

    CPPUNIT_TEST_SUITE(Test);
    CPPUNIT_TEST(test_Wildcard);
    CPPUNIT_TEST_SUITE_END();
};

void Test::test_Wildcard()
{
    WildCard wildcard(u"*.html;*??a;*\\*abc;*\\?xyz", ';'); // tdf#148253
    CPPUNIT_ASSERT(wildcard.Matches(u"foo.html"));
    CPPUNIT_ASSERT(wildcard.Matches(u"foo.ht.html")); // test stepping back after partial match
    CPPUNIT_ASSERT(wildcard.Matches(u"foo.html.html")); // test stepping back after full match
    CPPUNIT_ASSERT(wildcard.Matches(u"??aa")); // test stepping back with question marks
    CPPUNIT_ASSERT(wildcard.Matches(u"111*abc")); // test escaped asterisk
    CPPUNIT_ASSERT(!wildcard.Matches(u"111-abc")); // test escaped asterisk
    CPPUNIT_ASSERT(wildcard.Matches(u"111?xyz")); // test escaped question mark
    CPPUNIT_ASSERT(!wildcard.Matches(u"111-xyz")); // test escaped question mark
}

CPPUNIT_TEST_SUITE_REGISTRATION(Test);
}

/* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */
diff --git a/tools/source/fsys/wldcrd.cxx b/tools/source/fsys/wldcrd.cxx
index 8d31400..e819997 100644
--- a/tools/source/fsys/wldcrd.cxx
+++ b/tools/source/fsys/wldcrd.cxx
@@ -27,67 +27,74 @@
 */
bool WildCard::ImpMatch( std::u16string_view aWild, std::u16string_view aStr )
{
    int    pos=0;
    int    flag=0;
    const sal_Unicode* pPosAfterAsterisk = nullptr;
    const sal_Unicode* pWild = aWild.data();
    const sal_Unicode* pWildEnd = aWild.data() + aWild.size();
    const sal_Unicode* pStr = aStr.data();
    const sal_Unicode* pStrEnd = aStr.data() + aStr.size();

    while ( pWild != pWildEnd || flag )
    while (pWild != pWildEnd)
    {
        switch (*pWild)
        {
            case '?':
                if ( pStr == pStrEnd )
                    return false;
                break;

            default:
                if ( (*pWild == '\\') && (pWild + 1 != pWildEnd) && ((*(pWild+1)=='?') || (*(pWild+1) == '*')) )
                break; // Match -> proceed to the next character
            case '\\': // Escaping '?' and '*'; don't we need to escape '\\'?
                if ((pWild + 1 != pWildEnd) && ((*(pWild + 1) == '?') || (*(pWild + 1) == '*')))
                    pWild++;
                if ( *pWild != *pStr )
                    if ( !pos )
                        return false;
                    else
                        pWild += pos;
                else
                    break;
                // WARNING/TODO: may cause execution of next case in some
                // circumstances!
                [[fallthrough]];
            default: // No wildcard, literal match
                if (pStr == pStrEnd)
                    return false;
                if (*pWild == *pStr)
                    break; // Match -> proceed to the next character
                if (!pPosAfterAsterisk)
                    return false;
                pWild = pPosAfterAsterisk;
                [[fallthrough]];
            case '*':
                while ( pWild != pWildEnd && *pWild == '*' )
                    pWild++;
                if ( pWild == pWildEnd )
                    return true;
                flag = 1;
                pos  = 0;
                // Consider strange things like "**?*?*"
                while (*pWild == '?')
                {
                    if (pStr == pStrEnd)
                        return false;
                    pWild++;
                    pStr++;
                    while (pWild != pWildEnd && *pWild == '*')
                        pWild++;
                    if (pWild == pWildEnd)
                        return true;
                }
                // At this point, we are past wildcards, and a literal match must follow
                if ( pStr == pStrEnd )
                    return false;
                while ( pStr != pStrEnd && *pStr != *pWild )
                pPosAfterAsterisk = pWild;
                if ((*pWild == '\\') && (pWild + 1 != pWildEnd) && ((*(pWild + 1) == '?') || (*(pWild + 1) == '*')))
                    pWild++;
                while (*pStr != *pWild)
                {
                    if ( *pWild == '?' ) {
                        pWild++;
                        while ( pWild != pWildEnd && *pWild == '*' )
                            pWild++;
                    }
                    pStr++;
                    if ( pStr == pStrEnd )
                        return pWild == pWildEnd;
                        return false;
                }
                break;
                break; // Match -> proceed to the next character
        }
        if ( pWild != pWildEnd )
            pWild++;
        if ( pStr != pStrEnd )
            pStr++;
        else
            flag = 0;
        if ( flag )
            pos--;
        // We arrive here when the current characters in pWild and pStr match
        assert(pWild != pWildEnd);
        pWild++;
        assert(pStr != pStrEnd);
        pStr++;
        if (pWild == pWildEnd && pPosAfterAsterisk && pStr != pStrEnd)
            pWild = pPosAfterAsterisk; // Try again on the rest of pStr
    }
    return ( pStr == pStrEnd ) && ( pWild == pWildEnd );
    assert(pWild == pWildEnd);
    return pStr == pStrEnd;
}

bool WildCard::Matches( std::u16string_view rString ) const