Resolves: tdf#113977 implement REGEX() spreadsheet function

REGEX( Text ; Expression [ ; Replacement ] )

Using ICU regular expressions
http://userguide.icu-project.org/strings/regexp

Change-Id: I4cb9b8ba77cfb5b8faab93037aa0d947609383d7
Reviewed-on: https://gerrit.libreoffice.org/62332
Reviewed-by: Eike Rathke <erack@redhat.com>
Tested-by: Jenkins
diff --git a/formula/inc/core_resource.hrc b/formula/inc/core_resource.hrc
index 467a64b..4cc928f 100644
--- a/formula/inc/core_resource.hrc
+++ b/formula/inc/core_resource.hrc
@@ -465,6 +465,7 @@ const std::pair<const char*, int> RID_STRLIST_FUNCTION_NAMES_ENGLISH_ODFF[] =
    { "COM.MICROSOFT.ENCODEURL" , SC_OPCODE_ENCODEURL },
    { "ORG.LIBREOFFICE.RAWSUBTRACT" , SC_OPCODE_RAWSUBTRACT },
    { "ORG.LIBREOFFICE.ROUNDSIG" , SC_OPCODE_ROUNDSIG },
    { "ORG.LIBREOFFICE.REGEX" , SC_OPCODE_REGEX },
    { nullptr,  -1 }
};

@@ -907,6 +908,7 @@ const std::pair<const char*, int> RID_STRLIST_FUNCTION_NAMES_ENGLISH_OOXML[] =
    { "_xlfn.ENCODEURL" , SC_OPCODE_ENCODEURL },
    { "_xlfn.ORG.LIBREOFFICE.RAWSUBTRACT" , SC_OPCODE_RAWSUBTRACT },
    { "_xlfn.ORG.LIBREOFFICE.ROUNDSIG" , SC_OPCODE_ROUNDSIG },
    { "_xlfn.ORG.LIBREOFFICE.REGEX" , SC_OPCODE_REGEX },
    { nullptr,  -1 }
};

@@ -1354,6 +1356,7 @@ const std::pair<const char*, int> RID_STRLIST_FUNCTION_NAMES_ENGLISH_PODF[] =
    { "ENCODEURL" , SC_OPCODE_ENCODEURL },
    { "RAWSUBTRACT" , SC_OPCODE_RAWSUBTRACT },
    { "ROUNDSIG" , SC_OPCODE_ROUNDSIG },
    { "REGEX" , SC_OPCODE_REGEX },
    { nullptr, -1 }
};

@@ -1800,6 +1803,7 @@ const std::pair<const char*, int> RID_STRLIST_FUNCTION_NAMES_ENGLISH_API[] =
    { "ENCODEURL" , SC_OPCODE_ENCODEURL },
    { "RAWSUBTRACT" , SC_OPCODE_RAWSUBTRACT },
    { "ROUNDSIG" , SC_OPCODE_ROUNDSIG },
    { "REGEX" , SC_OPCODE_REGEX },
    { nullptr, -1 }
};

@@ -2245,6 +2249,7 @@ const std::pair<const char*, int> RID_STRLIST_FUNCTION_NAMES_ENGLISH[] =
    { "ENCODEURL" , SC_OPCODE_ENCODEURL },
    { "RAWSUBTRACT" , SC_OPCODE_RAWSUBTRACT },
    { "ROUNDSIG" , SC_OPCODE_ROUNDSIG },
    { "REGEX" , SC_OPCODE_REGEX },
    { nullptr, -1 }
};

@@ -2674,6 +2679,7 @@ const std::pair<const char*, int> RID_STRLIST_FUNCTION_NAMES[] =
    { NC_("RID_STRLIST_FUNCTION_NAMES", "ROUNDSIG") , SC_OPCODE_ROUNDSIG },
    { NC_("RID_STRLIST_FUNCTION_NAMES", "FINDB") , SC_OPCODE_FINDB },
    { NC_("RID_STRLIST_FUNCTION_NAMES", "SEARCHB") , SC_OPCODE_SEARCHB },
    { NC_("RID_STRLIST_FUNCTION_NAMES", "REGEX") , SC_OPCODE_REGEX },

    { nullptr, -1 }
};
diff --git a/include/formula/compiler.hxx b/include/formula/compiler.hxx
index 0d5b1cc..09a507f 100644
--- a/include/formula/compiler.hxx
+++ b/include/formula/compiler.hxx
@@ -505,7 +505,8 @@
#define SC_OPCODE_REPLACEB          494
#define SC_OPCODE_FINDB             495
#define SC_OPCODE_SEARCHB           496
#define SC_OPCODE_STOP_2_PAR        497     /* last function with two or more parameters' OpCode + 1 */
#define SC_OPCODE_REGEX             497
#define SC_OPCODE_STOP_2_PAR        498     /* last function with two or more parameters' OpCode + 1 */

#define SC_OPCODE_STOP_FUNCTION     SC_OPCODE_STOP_2_PAR            /* last function's OpCode + 1 */
#define SC_OPCODE_LAST_OPCODE_ID    (SC_OPCODE_STOP_FUNCTION - 1)   /* last OpCode */
diff --git a/include/formula/opcode.hxx b/include/formula/opcode.hxx
index 437403d..d2c6548 100644
--- a/include/formula/opcode.hxx
+++ b/include/formula/opcode.hxx
@@ -343,6 +343,7 @@ enum OpCode : sal_uInt16
        ocFindB             = SC_OPCODE_FINDB,
        ocSearchB           = SC_OPCODE_SEARCHB,
        ocNumberValue       = SC_OPCODE_NUMBERVALUE,
        ocRegex             = SC_OPCODE_REGEX,
    // Matrix functions
        ocMatValue          = SC_OPCODE_MAT_VALUE,
        ocMatDet            = SC_OPCODE_MAT_DET,
@@ -808,6 +809,7 @@ inline std::string OpCodeEnumToString(OpCode eCode)
    case ocText: return "Text";
    case ocSubstitute: return "Substitute";
    case ocRept: return "Rept";
    case ocRegex: return "Regex";
    case ocConcat: return "Concat";
    case ocConcat_MS: return "Concat_MS";
    case ocTextJoin_MS: return "TextJoin_MS";
diff --git a/sc/inc/helpids.h b/sc/inc/helpids.h
index b0770e6..283057e 100644
--- a/sc/inc/helpids.h
+++ b/sc/inc/helpids.h
@@ -577,6 +577,7 @@
#define HID_FUNC_REPLACEB                                       "SC_HID_FUNC_REPLACEB"
#define HID_FUNC_FINDB                                          "SC_HID_FUNC_FINDB"
#define HID_FUNC_SEARCHB                                        "SC_HID_FUNC_SEARCHB"
#define HID_FUNC_REGEX                                          "SC_HID_FUNC_REGEX"

#endif

diff --git a/sc/inc/scfuncs.hrc b/sc/inc/scfuncs.hrc
index c469214..5ed0949 100644
--- a/sc/inc/scfuncs.hrc
+++ b/sc/inc/scfuncs.hrc
@@ -3816,6 +3816,18 @@ const char* SC_OPCODE_SUBSTITUTE_ARY[] =
    NC_("SC_OPCODE_SUBSTITUTE", "Which occurrence of the old text is to be replaced.")
};

// -=*# Resource for function REGEX #*=-
const char* SC_OPCODE_REGEX_ARY[] =
{
    NC_("SC_OPCODE_REGEX", "Matches and optionally replaces text using regular expressions."),
    NC_("SC_OPCODE_REGEX", "Text"),
    NC_("SC_OPCODE_REGEX", "The text to be operated on."),
    NC_("SC_OPCODE_REGEX", "Expression"),
    NC_("SC_OPCODE_REGEX", "The regular expression to be matched."),
    NC_("SC_OPCODE_REGEX", "Replacement"),
    NC_("SC_OPCODE_REGEX", "The replacement text and expression.")
};

// -=*# Resource for function BASE #*=-
const char* SC_OPCODE_BASE_ARY[] =
{
diff --git a/sc/qa/unit/ucalc.cxx b/sc/qa/unit/ucalc.cxx
index c9d092c..2f1e739 100644
--- a/sc/qa/unit/ucalc.cxx
+++ b/sc/qa/unit/ucalc.cxx
@@ -2654,6 +2654,7 @@ void Test::testFunctionLists()
        "MIDB",
        "NUMBERVALUE",
        "PROPER",
        "REGEX",
        "REPLACE",
        "REPLACEB",
        "REPT",
diff --git a/sc/source/core/data/funcdesc.cxx b/sc/source/core/data/funcdesc.cxx
index 6f31ab7..5c530b3 100644
--- a/sc/source/core/data/funcdesc.cxx
+++ b/sc/source/core/data/funcdesc.cxx
@@ -807,7 +807,8 @@ ScFunctionList::ScFunctionList()
        { SC_OPCODE_ROUNDSIG, ENTRY(SC_OPCODE_ROUNDSIG_ARY), 0, ID_FUNCTION_GRP_MATH, HID_FUNC_ROUNDSIG, 2, { 0, 0 } },
        { SC_OPCODE_REPLACEB, ENTRY(SC_OPCODE_REPLACEB_ARY), 0, ID_FUNCTION_GRP_TEXT, HID_FUNC_REPLACEB, 4, { 0, 0, 0, 0 } },
        { SC_OPCODE_FINDB, ENTRY(SC_OPCODE_FINDB_ARY), 0, ID_FUNCTION_GRP_TEXT, HID_FUNC_FINDB, 3, { 0, 0, 1 } },
        { SC_OPCODE_SEARCHB, ENTRY(SC_OPCODE_SEARCHB_ARY), 0, ID_FUNCTION_GRP_TEXT, HID_FUNC_SEARCHB, 3, { 0, 0, 1 } }
        { SC_OPCODE_SEARCHB, ENTRY(SC_OPCODE_SEARCHB_ARY), 0, ID_FUNCTION_GRP_TEXT, HID_FUNC_SEARCHB, 3, { 0, 0, 1 } },
        { SC_OPCODE_REGEX, ENTRY(SC_OPCODE_REGEX_ARY), 0, ID_FUNCTION_GRP_TEXT, HID_FUNC_REGEX, 3, { 0, 0, 1 } }
    };

    ScFuncDesc* pDesc = nullptr;
diff --git a/sc/source/core/inc/interpre.hxx b/sc/source/core/inc/interpre.hxx
index 1a93baa..c09f914 100644
--- a/sc/source/core/inc/interpre.hxx
+++ b/sc/source/core/inc/interpre.hxx
@@ -665,6 +665,7 @@ private:
    void ScText();
    void ScSubstitute();
    void ScRept();
    void ScRegex();
    void ScConcat();
    void ScConcat_MS();
    void ScTextJoin_MS();
diff --git a/sc/source/core/tool/interpr1.cxx b/sc/source/core/tool/interpr1.cxx
index dc23b55..6708bdc 100644
--- a/sc/source/core/tool/interpr1.cxx
+++ b/sc/source/core/tool/interpr1.cxx
@@ -9222,6 +9222,61 @@ void ScInterpreter::ScSearch()
    }
}

void ScInterpreter::ScRegex()
{
    sal_uInt8 nParamCount = GetByte();
    if (MustHaveParamCount( nParamCount, 2, 3))
    {
        bool bReplacement = false;
        OUString aReplacement;
        if (nParamCount == 3)
        {
            // A missing argument is not an empty string to replace the match.
            if (IsMissing())
                Pop();
            else
            {
                aReplacement = GetString().getString();
                bReplacement = true;
            }
        }

        OUString aExpression = GetString().getString();
        OUString aText = GetString().getString();

        if (nGlobalError != FormulaError::NONE)
        {
            PushError( nGlobalError);
            return;
        }

        sal_Int32 nPos = 0;
        sal_Int32 nEndPos = aText.getLength();
        utl::SearchParam aParam( aExpression, utl::SearchParam::SearchType::Regexp);
        css::util::SearchResult aResult;
        utl::TextSearch aSearch( aParam, *ScGlobal::pCharClass);
        const bool bMatch = aSearch.SearchForward( aText, &nPos, &nEndPos, &aResult);
        if (!bMatch)
            PushNoValue();
        else
        {
            assert(aResult.subRegExpressions >= 1);
            if (!bReplacement)
                PushString( aText.copy( aResult.startOffset[0], aResult.endOffset[0] - aResult.startOffset[0]));
            else
            {
                /* TODO: global replacement of multiple occurrences, introduce
                 * extra parameter with flag 'g'? Loop over positions after
                 * nEndPos until none left? How to keep the offsets in sync
                 * after replacement? That should be done by
                 * ReplaceBackReferences(). */
                aSearch.ReplaceBackReferences( aReplacement, aText, aResult);
                PushString( aReplacement);
            }
        }
    }
}

void ScInterpreter::ScMid()
{
    if ( MustHaveParamCount( GetByte(), 3 ) )
diff --git a/sc/source/core/tool/interpr4.cxx b/sc/source/core/tool/interpr4.cxx
index f5ca631..d044295 100644
--- a/sc/source/core/tool/interpr4.cxx
+++ b/sc/source/core/tool/interpr4.cxx
@@ -4215,6 +4215,7 @@ StackVar ScInterpreter::Interpret()
                case ocMid              : ScMid();                      break;
                case ocText             : ScText();                     break;
                case ocSubstitute       : ScSubstitute();               break;
                case ocRegex            : ScRegex();                    break;
                case ocRept             : ScRept();                     break;
                case ocConcat           : ScConcat();                   break;
                case ocConcat_MS        : ScConcat_MS();                break;
diff --git a/sc/source/filter/excel/xlformula.cxx b/sc/source/filter/excel/xlformula.cxx
index 644a806..2af5523 100644
--- a/sc/source/filter/excel/xlformula.cxx
+++ b/sc/source/filter/excel/xlformula.cxx
@@ -639,7 +639,8 @@ static const XclFunctionInfo saFuncTable_OOoLO[] =
    EXC_FUNCENTRY_OOO( ocForecast_ETS_MUL, 3,  6,  0,  "ORG.LIBREOFFICE.FORECAST.ETS.MULT" ),
    EXC_FUNCENTRY_OOO( ocForecast_ETS_PIM, 3,  7,  0,  "ORG.LIBREOFFICE.FORECAST.ETS.PI.MULT" ),
    EXC_FUNCENTRY_OOO( ocForecast_ETS_STM, 3,  6,  0,  "ORG.LIBREOFFICE.FORECAST.ETS.STAT.MULT" ),
    EXC_FUNCENTRY_OOO( ocRoundSig,      2,  2,  0,  "ORG.LIBREOFFICE.ROUNDSIG" )
    EXC_FUNCENTRY_OOO( ocRoundSig,      2,  2,  0,  "ORG.LIBREOFFICE.ROUNDSIG" ),
    EXC_FUNCENTRY_OOO( ocRegex,         2,  3,  0,  "ORG.LIBREOFFICE.REGEX" )
};

#undef EXC_FUNCENTRY_OOO_IBR
diff --git a/sc/source/filter/oox/formulabase.cxx b/sc/source/filter/oox/formulabase.cxx
index 57bfb35..be09c75 100644
--- a/sc/source/filter/oox/formulabase.cxx
+++ b/sc/source/filter/oox/formulabase.cxx
@@ -910,7 +910,8 @@ static const FunctionData saFuncTableOOoLO[] =
    { "ORG.LIBREOFFICE.FORECAST.ETS.MULT",      "ORG.LIBREOFFICE.FORECAST.ETS.MULT",      NOID,   NOID,   3,  6,  V, { VR, VA, VR }, FuncFlags::MACROCALL_NEW },
    { "ORG.LIBREOFFICE.FORECAST.ETS.PI.MULT",   "ORG.LIBREOFFICE.FORECAST.ETS.PI.MULT",   NOID,   NOID,   4,  7,  V, { VR, VA, VR }, FuncFlags::MACROCALL_NEW },
    { "ORG.LIBREOFFICE.FORECAST.ETS.STAT.MULT", "ORG.LIBREOFFICE.FORECAST.ETS.STAT.MULT", NOID,   NOID,   3,  6,  V, { VR, VA, VR }, FuncFlags::MACROCALL_NEW },
    { "ORG.LIBREOFFICE.ROUNDSIG",  "ORG.LIBREOFFICE.ROUNDSIG",  NOID,   NOID,   2,  2,  V, { RX }, FuncFlags::MACROCALL_NEW }
    { "ORG.LIBREOFFICE.ROUNDSIG",   "ORG.LIBREOFFICE.ROUNDSIG", NOID, NOID,  2,  2,  V, { RX }, FuncFlags::MACROCALL_NEW },
    { "ORG.LIBREOFFICE.REGEX",      "ORG.LIBREOFFICE.REGEX", NOID, NOID,  2,  3,  V, { RX }, FuncFlags::MACROCALL_NEW }

};