move ConvertStringToValue() implementation from ScInterpreter to ScGlobal

In preparation of matrix calculations to use string conversion
configuration and UI markers for cells containing strings that could be
numeric values.

Change-Id: Ifa9e45853dded249fa741c050ae1f106365f99ea
diff --git a/sc/inc/global.hxx b/sc/inc/global.hxx
index d738b60..dbfeb63 100644
--- a/sc/inc/global.hxx
+++ b/sc/inc/global.hxx
@@ -34,6 +34,7 @@ class ImageList;
class Bitmap;
class SfxItemSet;
class Color;
struct ScCalcConfig;
enum class SvtScriptType;

#define SC_COLLATOR_IGNORES ( \
@@ -703,6 +704,117 @@ SC_DLLPUBLIC    static const sal_Unicode* FindUnquoted( const sal_Unicode* pStri
    SC_DLLPUBLIC static OUString    ReplaceOrAppend( const OUString& rString,
                                                     const OUString& rPlaceholder,
                                                     const OUString& rReplacement );


    /** Convert string content to numeric value.

        In any case, if rError is set 0.0 is returned.

        If nStringNoValueError is errCellNoValue, that is unconditionally
        assigned to rError and 0.0 is returned. The caller is expected to
        handle this situation. Used by the interpreter.

        Usually errNoValue is passed as nStringNoValueError.

        Otherwise, depending on the string conversion configuration different
        approaches are taken:


        For ScCalcConfig::StringConversion::ILLEGAL
        The error value passed in nStringNoValueError is assigned to rError
        (and 0.0 returned).


        For ScCalcConfig::StringConversion::ZERO
        A zero value is returned and no error assigned.


        For ScCalcConfig::StringConversion::LOCALE

        If the string is empty or consists only of spaces, if "treat empty
        string as zero" is set 0.0 is returned, else nStringNoValueError
        assigned to rError (and 0.0 returned).

        Else a non-empty string is passed to the number formatter's scanner to
        be parsed locale dependent. If that does not detect a numeric value
        nStringNoValueError is assigned to rError (and 0.0 returned).

        If no number formatter was passed, the conversion falls back to
        UNAMBIGUOUS.


        For ScCalcConfig::StringConversion::UNAMBIGUOUS

        If the string is empty or consists only of spaces, if "treat empty
        string as zero" is set 0.0 is returned, else nStringNoValueError
        assigned to rError (and 0.0 returned).

        If the string is not empty the following conversion rules are applied:

        Converted are only integer numbers including exponent, and ISO 8601 dates
        and times in their extended formats with separators. Anything else,
        especially fractional numeric values with decimal separators or dates other
        than ISO 8601 would be locale dependent and is a no-no. Leading and
        trailing blanks are ignored.

        The following ISO 8601 formats are converted:

        CCYY-MM-DD
        CCYY-MM-DDThh:mm
        CCYY-MM-DDThh:mm:ss
        CCYY-MM-DDThh:mm:ss,s
        CCYY-MM-DDThh:mm:ss.s
        hh:mm
        hh:mm:ss
        hh:mm:ss,s
        hh:mm:ss.s

        The century CC may not be omitted and the two-digit year setting is not
        taken into account. Instead of the T date and time separator exactly one
        blank may be used.

        If a date is given, it must be a valid Gregorian calendar date. In this
        case the optional time must be in the range 00:00 to 23:59:59.99999...
        If only time is given, it may have any value for hours, taking elapsed time
        into account; minutes and seconds are limited to the value 59 as well.

        If the string can not be converted to a numeric value, the error value
        passed in nStringNoValueError is assigned to rError.


        @param rStr
            The string to be converted.

        @param rConfig
            The calculation configuration.

        @param rError
            Contains the error on return, if any. If an error was set before
            and the conversion did not result in an error, still 0.0 is
            returned.

        @param nStringNoValueError
            The error value to be assigned to rError if string could not be
            converted to number.

        @param pFormatter
            The number formatter to use in case of
            ScCalcConfig::StringConversion::LOCALE. Can but should not be
            nullptr in which case conversion falls back to
            ScCalcConfig::StringConversion::UNAMBIGUOUS and if a date is
            detected the null date is assumed to be the standard 1899-12-30
            instead of the configured null date.

        @param rCurFmtType
            Can be assigned a format type in case a date or time or date+time
            string was converted, e.g. css::util::NumberFormat::DATE or
            css::util::NumberFormat::TIME or a combination thereof.

     */
    static double ConvertStringToValue( const OUString& rStr, const ScCalcConfig& rConfig,
            sal_uInt16 & rError, sal_uInt16 nStringNoValueError,
            SvNumberFormatter* pFormatter, short & rCurFmtType );

};

// maybe move to dbdata.hxx (?):
diff --git a/sc/source/core/data/global2.cxx b/sc/source/core/data/global2.cxx
index 71867c4..9a0f51f 100644
--- a/sc/source/core/data/global2.cxx
+++ b/sc/source/core/data/global2.cxx
@@ -26,12 +26,15 @@
#include <stdlib.h>
#include <ctype.h>
#include <unotools/syslocale.hxx>
#include <svl/zforlist.hxx>
#include <formula/errorcodes.hxx>

#include "global.hxx"
#include "rangeutl.hxx"
#include "rechead.hxx"
#include "compiler.hxx"
#include "paramisc.hxx"
#include "calcconfig.hxx"

#include "sc.hrc"
#include "globstr.hrc"
@@ -356,4 +359,267 @@ OUString ScGlobal::GetDocTabName( const OUString& rFileName,
    return aDocTab;
}

namespace
{
bool isEmptyString( const OUString& rStr )
{
    if (rStr.isEmpty())
        return true;
    else if (rStr[0] == ' ')
    {
        const sal_Unicode* p = rStr.getStr() + 1;
        const sal_Unicode* const pStop = p - 1 + rStr.getLength();
        while (p < pStop && *p == ' ')
            ++p;
        if (p == pStop)
            return true;
    }
    return false;
}
}

double ScGlobal::ConvertStringToValue( const OUString& rStr, const ScCalcConfig& rConfig,
        sal_uInt16 & rError, sal_uInt16 nStringNoValueError,
        SvNumberFormatter* pFormatter, short & rCurFmtType )
{
    // We keep ScCalcConfig::StringConversion::LOCALE default until
    // we provide a friendly way to convert string numbers into numbers in the UI.

    double fValue = 0.0;
    if (nStringNoValueError == errCellNoValue)
    {
        // Requested that all strings result in 0, error handled by caller.
        rError = nStringNoValueError;
        return fValue;
    }

    switch (rConfig.meStringConversion)
    {
        case ScCalcConfig::StringConversion::ILLEGAL:
            rError = nStringNoValueError;
            return fValue;
        case ScCalcConfig::StringConversion::ZERO:
            return fValue;
        case ScCalcConfig::StringConversion::LOCALE:
            {
                if (rConfig.mbEmptyStringAsZero)
                {
                    // The number scanner does not accept empty strings or strings
                    // containing only spaces, be on par in these cases with what was
                    // accepted in OOo and is in AOO (see also the
                    // StringConversion::UNAMBIGUOUS branch) and convert to 0 to prevent
                    // interoperability nightmares.

                    if (isEmptyString( rStr))
                        return fValue;
                }

                if (!pFormatter)
                    goto Label_fallback_to_unambiguous;

                sal_uInt32 nFIndex = 0;
                if (!pFormatter->IsNumberFormat(rStr, nFIndex, fValue))
                {
                    rError = nStringNoValueError;
                    fValue = 0.0;
                }
                return fValue;
            }
            break;
        case ScCalcConfig::StringConversion::UNAMBIGUOUS:
Label_fallback_to_unambiguous:
            {
                if (!rConfig.mbEmptyStringAsZero)
                {
                    if (isEmptyString( rStr))
                    {
                        rError = nStringNoValueError;
                        return fValue;
                    }
                }
            }
            // continue below, pulled from switch case for better readability
            break;
    }

    OUString aStr( rStr);
    rtl_math_ConversionStatus eStatus;
    sal_Int32 nParseEnd;
    // Decimal and group separator 0 => only integer and possibly exponent,
    // stops at first non-digit non-sign.
    fValue = ::rtl::math::stringToDouble( aStr, 0, 0, &eStatus, &nParseEnd);
    sal_Int32 nLen;
    if (eStatus == rtl_math_ConversionStatus_Ok && nParseEnd < (nLen = aStr.getLength()))
    {
        // Not at string end, check for trailing blanks or switch to date or
        // time parsing or bail out.
        const sal_Unicode* const pStart = aStr.getStr();
        const sal_Unicode* p = pStart + nParseEnd;
        const sal_Unicode* const pStop = pStart + nLen;
        switch (*p++)
        {
            case ' ':
                while (p < pStop && *p == ' ')
                    ++p;
                if (p < pStop)
                    rError = nStringNoValueError;
                break;
            case '-':
            case ':':
                {
                    bool bDate = (*(p-1) == '-');
                    enum State { year = 0, month, day, hour, minute, second, fraction, done, blank, stop };
                    sal_Int32 nUnit[done] = {0,0,0,0,0,0,0};
                    const sal_Int32 nLimit[done] = {0,12,31,0,59,59,0};
                    State eState = (bDate ? month : minute);
                    rCurFmtType = (bDate ? css::util::NumberFormat::DATE : css::util::NumberFormat::TIME);
                    nUnit[eState-1] = aStr.copy( 0, nParseEnd).toInt32();
                    const sal_Unicode* pLastStart = p;
                    // Ensure there's no preceding sign. Negative dates
                    // currently aren't handled correctly. Also discard
                    // +CCYY-MM-DD
                    p = pStart;
                    while (p < pStop && *p == ' ')
                        ++p;
                    if (p < pStop && !rtl::isAsciiDigit(*p))
                        rError = nStringNoValueError;
                    p = pLastStart;
                    while (p < pStop && !rError && eState < blank)
                    {
                        if (eState == minute)
                            rCurFmtType |= css::util::NumberFormat::TIME;
                        if (rtl::isAsciiDigit(*p))
                        {
                            // Maximum 2 digits per unit, except fractions.
                            if (p - pLastStart >= 2 && eState != fraction)
                                rError = nStringNoValueError;
                        }
                        else if (p > pLastStart)
                        {
                            // We had at least one digit.
                            if (eState < done)
                            {
                                nUnit[eState] = aStr.copy( pLastStart - pStart, p - pLastStart).toInt32();
                                if (nLimit[eState] && nLimit[eState] < nUnit[eState])
                                    rError = nStringNoValueError;
                            }
                            pLastStart = p + 1;     // hypothetical next start
                            // Delimiters must match, a trailing delimiter
                            // yields an invalid date/time.
                            switch (eState)
                            {
                                case month:
                                    // Month must be followed by separator and
                                    // day, no trailing blanks.
                                    if (*p != '-' || (p+1 == pStop))
                                        rError = nStringNoValueError;
                                    break;
                                case day:
                                    if ((*p != 'T' || (p+1 == pStop)) && *p != ' ')
                                        rError = nStringNoValueError;
                                    // Take one blank as a valid delimiter
                                    // between date and time.
                                    break;
                                case hour:
                                    // Hour must be followed by separator and
                                    // minute, no trailing blanks.
                                    if (*p != ':' || (p+1 == pStop))
                                        rError = nStringNoValueError;
                                    break;
                                case minute:
                                    if ((*p != ':' || (p+1 == pStop)) && *p != ' ')
                                        rError = nStringNoValueError;
                                    if (*p == ' ')
                                        eState = done;
                                    break;
                                case second:
                                    if (((*p != ',' && *p != '.') || (p+1 == pStop)) && *p != ' ')
                                        rError = nStringNoValueError;
                                    if (*p == ' ')
                                        eState = done;
                                    break;
                                case fraction:
                                    eState = done;
                                    break;
                                case year:
                                case done:
                                case blank:
                                case stop:
                                    rError = nStringNoValueError;
                                    break;
                            }
                            eState = static_cast<State>(eState + 1);
                        }
                        else
                            rError = nStringNoValueError;
                        ++p;
                    }
                    if (eState == blank)
                    {
                        while (p < pStop && *p == ' ')
                            ++p;
                        if (p < pStop)
                            rError = nStringNoValueError;
                        eState = stop;
                    }

                    // Month without day, or hour without minute.
                    if (eState == month || (eState == day && p <= pLastStart) ||
                            eState == hour || (eState == minute && p <= pLastStart))
                        rError = nStringNoValueError;

                    if (!rError)
                    {
                        // Catch the very last unit at end of string.
                        if (p > pLastStart && eState < done)
                        {
                            nUnit[eState] = aStr.copy( pLastStart - pStart, p - pLastStart).toInt32();
                            if (nLimit[eState] && nLimit[eState] < nUnit[eState])
                                rError = nStringNoValueError;
                        }
                        if (bDate && nUnit[hour] > 23)
                            rError = nStringNoValueError;
                        if (!rError)
                        {
                            if (bDate && nUnit[day] == 0)
                                nUnit[day] = 1;
                            double fFraction = (nUnit[fraction] <= 0 ? 0.0 :
                                    ::rtl::math::pow10Exp( nUnit[fraction],
                                        static_cast<int>( -ceil( log10( static_cast<double>( nUnit[fraction]))))));
                            if (!bDate)
                                fValue = 0.0;
                            else
                            {
                                Date aDate(
                                        sal::static_int_cast<sal_Int16>(nUnit[day]),
                                        sal::static_int_cast<sal_Int16>(nUnit[month]),
                                        sal::static_int_cast<sal_Int16>(nUnit[year]));
                                if (!aDate.IsValidDate())
                                    rError = nStringNoValueError;
                                else
                                {
                                    if (pFormatter)
                                        fValue = aDate - *(pFormatter->GetNullDate());
                                    else
                                    {
                                        SAL_WARN("sc.core","ScGlobal::ConvertStringToValue - fixed null date");
                                        static Date aDefaultNullDate( 30, 12, 1899);
                                        fValue = aDate - aDefaultNullDate;
                                    }
                                }
                            }
                            fValue += ((nUnit[hour] * 3600) + (nUnit[minute] * 60) + nUnit[second] + fFraction) / 86400.0;
                        }
                    }
                }
                break;
            default:
                rError = nStringNoValueError;
        }
        if (rError)
            fValue = 0.0;
    }
    return fValue;
}

/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/core/tool/interpr4.cxx b/sc/source/core/tool/interpr4.cxx
index 37ef230..3f95e6d 100644
--- a/sc/source/core/tool/interpr4.cxx
+++ b/sc/source/core/tool/interpr4.cxx
@@ -184,277 +184,12 @@ sal_uInt16 ScInterpreter::GetCellErrCode( const ScRefCellValue& rCell )
    return rCell.meType == CELLTYPE_FORMULA ? rCell.mpFormula->GetErrCode() : 0;
}

namespace
{
bool isEmptyString( const OUString& rStr )
{
    if (rStr.isEmpty())
        return true;
    else if (rStr[0] == ' ')
    {
        const sal_Unicode* p = rStr.getStr() + 1;
        const sal_Unicode* const pStop = p - 1 + rStr.getLength();
        while (p < pStop && *p == ' ')
            ++p;
        if (p == pStop)
            return true;
    }
    return false;
}
}

/** Convert string content to numeric value.

    Depending on the string conversion configuration different approaches are
    taken. For ScCalcConfig::StringConversion::UNAMBIGUOUS if the string is not
    empty the following conversion rules are applied:

    Converted are only integer numbers including exponent, and ISO 8601 dates
    and times in their extended formats with separators. Anything else,
    especially fractional numeric values with decimal separators or dates other
    than ISO 8601 would be locale dependent and is a no-no. Leading and
    trailing blanks are ignored.

    The following ISO 8601 formats are converted:

    CCYY-MM-DD
    CCYY-MM-DDThh:mm
    CCYY-MM-DDThh:mm:ss
    CCYY-MM-DDThh:mm:ss,s
    CCYY-MM-DDThh:mm:ss.s
    hh:mm
    hh:mm:ss
    hh:mm:ss,s
    hh:mm:ss.s

    The century CC may not be omitted and the two-digit year setting is not
    taken into account. Instead of the T date and time separator exactly one
    blank may be used.

    If a date is given, it must be a valid Gregorian calendar date. In this
    case the optional time must be in the range 00:00 to 23:59:59.99999...
    If only time is given, it may have any value for hours, taking elapsed time
    into account; minutes and seconds are limited to the value 59 as well.
 */

double ScInterpreter::ConvertStringToValue( const OUString& rStr )
{
    // We keep ScCalcConfig::StringConversion::LOCALE default until
    // we provide a friendly way to convert string numbers into numbers in the UI.

    double fValue = 0.0;
    if (mnStringNoValueError == errCellNoValue)
    {
        // Requested that all strings result in 0, error handled by caller.
        SetError( mnStringNoValueError);
        return fValue;
    }

    switch (maCalcConfig.meStringConversion)
    {
        case ScCalcConfig::StringConversion::ILLEGAL:
            SetError( mnStringNoValueError);
            return fValue;
        case ScCalcConfig::StringConversion::ZERO:
            return fValue;
        case ScCalcConfig::StringConversion::LOCALE:
            {
                if (maCalcConfig.mbEmptyStringAsZero)
                {
                    // The number scanner does not accept empty strings or strings
                    // containing only spaces, be on par in these cases with what was
                    // accepted in OOo and is in AOO (see also the
                    // StringConversion::UNAMBIGUOUS branch) and convert to 0 to prevent
                    // interoperability nightmares.

                    if (isEmptyString( rStr))
                        return fValue;
                }

                sal_uInt32 nFIndex = 0;
                if (!pFormatter->IsNumberFormat(rStr, nFIndex, fValue))
                {
                    SetError( mnStringNoValueError);
                    fValue = 0.0;
                }
                return fValue;
            }
            break;
        case ScCalcConfig::StringConversion::UNAMBIGUOUS:
            {
                if (!maCalcConfig.mbEmptyStringAsZero)
                {
                    if (isEmptyString( rStr))
                    {
                        SetError( mnStringNoValueError);
                        return fValue;
                    }
                }
            }
            // continue below, pulled from switch case for better readability
            break;
    }

    OUString aStr( rStr);
    rtl_math_ConversionStatus eStatus;
    sal_Int32 nParseEnd;
    // Decimal and group separator 0 => only integer and possibly exponent,
    // stops at first non-digit non-sign.
    fValue = ::rtl::math::stringToDouble( aStr, 0, 0, &eStatus, &nParseEnd);
    sal_Int32 nLen;
    if (eStatus == rtl_math_ConversionStatus_Ok && nParseEnd < (nLen = aStr.getLength()))
    {
        // Not at string end, check for trailing blanks or switch to date or
        // time parsing or bail out.
        const sal_Unicode* const pStart = aStr.getStr();
        const sal_Unicode* p = pStart + nParseEnd;
        const sal_Unicode* const pStop = pStart + nLen;
        switch (*p++)
        {
            case ' ':
                while (p < pStop && *p == ' ')
                    ++p;
                if (p < pStop)
                    SetError( mnStringNoValueError);
                break;
            case '-':
            case ':':
                {
                    bool bDate = (*(p-1) == '-');
                    enum State { year = 0, month, day, hour, minute, second, fraction, done, blank, stop };
                    sal_Int32 nUnit[done] = {0,0,0,0,0,0,0};
                    const sal_Int32 nLimit[done] = {0,12,31,0,59,59,0};
                    State eState = (bDate ? month : minute);
                    nCurFmtType = (bDate ? css::util::NumberFormat::DATE : css::util::NumberFormat::TIME);
                    nUnit[eState-1] = aStr.copy( 0, nParseEnd).toInt32();
                    const sal_Unicode* pLastStart = p;
                    // Ensure there's no preceding sign. Negative dates
                    // currently aren't handled correctly. Also discard
                    // +CCYY-MM-DD
                    p = pStart;
                    while (p < pStop && *p == ' ')
                        ++p;
                    if (p < pStop && !rtl::isAsciiDigit(*p))
                        SetError( mnStringNoValueError);
                    p = pLastStart;
                    while (p < pStop && !nGlobalError && eState < blank)
                    {
                        if (eState == minute)
                            nCurFmtType |= css::util::NumberFormat::TIME;
                        if (rtl::isAsciiDigit(*p))
                        {
                            // Maximum 2 digits per unit, except fractions.
                            if (p - pLastStart >= 2 && eState != fraction)
                                SetError( mnStringNoValueError);
                        }
                        else if (p > pLastStart)
                        {
                            // We had at least one digit.
                            if (eState < done)
                            {
                                nUnit[eState] = aStr.copy( pLastStart - pStart, p - pLastStart).toInt32();
                                if (nLimit[eState] && nLimit[eState] < nUnit[eState])
                                    SetError( mnStringNoValueError);
                            }
                            pLastStart = p + 1;     // hypothetical next start
                            // Delimiters must match, a trailing delimiter
                            // yields an invalid date/time.
                            switch (eState)
                            {
                                case month:
                                    // Month must be followed by separator and
                                    // day, no trailing blanks.
                                    if (*p != '-' || (p+1 == pStop))
                                        SetError( mnStringNoValueError);
                                    break;
                                case day:
                                    if ((*p != 'T' || (p+1 == pStop)) && *p != ' ')
                                        SetError( mnStringNoValueError);
                                    // Take one blank as a valid delimiter
                                    // between date and time.
                                    break;
                                case hour:
                                    // Hour must be followed by separator and
                                    // minute, no trailing blanks.
                                    if (*p != ':' || (p+1 == pStop))
                                        SetError( mnStringNoValueError);
                                    break;
                                case minute:
                                    if ((*p != ':' || (p+1 == pStop)) && *p != ' ')
                                        SetError( mnStringNoValueError);
                                    if (*p == ' ')
                                        eState = done;
                                    break;
                                case second:
                                    if (((*p != ',' && *p != '.') || (p+1 == pStop)) && *p != ' ')
                                        SetError( mnStringNoValueError);
                                    if (*p == ' ')
                                        eState = done;
                                    break;
                                case fraction:
                                    eState = done;
                                    break;
                                case year:
                                case done:
                                case blank:
                                case stop:
                                    SetError( mnStringNoValueError);
                                    break;
                            }
                            eState = static_cast<State>(eState + 1);
                        }
                        else
                            SetError( mnStringNoValueError);
                        ++p;
                    }
                    if (eState == blank)
                    {
                        while (p < pStop && *p == ' ')
                            ++p;
                        if (p < pStop)
                            SetError( mnStringNoValueError);
                        eState = stop;
                    }

                    // Month without day, or hour without minute.
                    if (eState == month || (eState == day && p <= pLastStart) ||
                            eState == hour || (eState == minute && p <= pLastStart))
                        SetError( mnStringNoValueError);

                    if (!nGlobalError)
                    {
                        // Catch the very last unit at end of string.
                        if (p > pLastStart && eState < done)
                        {
                            nUnit[eState] = aStr.copy( pLastStart - pStart, p - pLastStart).toInt32();
                            if (nLimit[eState] && nLimit[eState] < nUnit[eState])
                                SetError( mnStringNoValueError);
                        }
                        if (bDate && nUnit[hour] > 23)
                            SetError( mnStringNoValueError);
                        if (!nGlobalError)
                        {
                            if (bDate && nUnit[day] == 0)
                                nUnit[day] = 1;
                            double fFraction = (nUnit[fraction] <= 0 ? 0.0 :
                                    ::rtl::math::pow10Exp( nUnit[fraction],
                                        static_cast<int>( -ceil( log10( static_cast<double>( nUnit[fraction]))))));
                            fValue = (bDate ? GetDateSerial(
                                        sal::static_int_cast<sal_Int16>(nUnit[year]),
                                        sal::static_int_cast<sal_Int16>(nUnit[month]),
                                        sal::static_int_cast<sal_Int16>(nUnit[day]),
                                        true, false) : 0.0);
                            fValue += ((nUnit[hour] * 3600) + (nUnit[minute] * 60) + nUnit[second] + fFraction) / 86400.0;
                        }
                    }
                }
                break;
            default:
                SetError( mnStringNoValueError);
        }
        if (nGlobalError)
            fValue = 0.0;
    }
    double fValue = ScGlobal::ConvertStringToValue( rStr, maCalcConfig, nGlobalError, mnStringNoValueError,
            pFormatter, nCurFmtType);
    if (nGlobalError)
        SetError(nGlobalError);
    return fValue;
}