Clean up new rtl/surrogates.h

Change-Id: Iec781bdbbf216cb14c9ba5be5955123273d7699c
diff --git a/include/rtl/character.hxx b/include/rtl/character.hxx
index f5c9490..52151e8 100644
--- a/include/rtl/character.hxx
+++ b/include/rtl/character.hxx
@@ -211,6 +211,90 @@ inline sal_Int32 compareIgnoreAsciiCase(sal_uInt32 code1, sal_uInt32 code2)
        - static_cast<sal_Int32>(toAsciiLowerCase(code2));
}

/// @cond INTERNAL
namespace detail {

sal_uInt32 const surrogatesHighFirst = 0xD800;
sal_uInt32 const surrogatesHighLast = 0xDBFF;
sal_uInt32 const surrogatesLowFirst = 0xDC00;
sal_uInt32 const surrogatesLowLast = 0xDFFF;

}
/// @endcond

/** Check for high surrogate.

    @param code  A Unicode code point.

    @return  True if code is a high surrogate code point (0xD800--0xDBFF).

    @since LibreOffice 5.0
*/
inline bool isHighSurrogate(sal_uInt32 code) {
    assert(code <= 0x10FFFF);
    return code >= detail::surrogatesHighFirst
        && code <= detail::surrogatesHighLast;
}

/** Check for low surrogate.

    @param code  A Unicode code point.

    @return  True if code is a low surrogate code point (0xDC00--0xDFFF).

    @since LibreOffice 5.0
*/
inline bool isLowSurrogate(sal_uInt32 code) {
    assert(code <= 0x10FFFF);
    return code >= detail::surrogatesLowFirst
        && code <= detail::surrogatesLowLast;
}

/** Get high surrogate half of a non-BMP Unicode code point.

    @param code  A non-BMP Unicode code point.

    @return  The UTF-16 high surrogate half for the give code point.

    @since LibreOffice 5.0
 */
inline sal_Unicode getHighSurrogate(sal_uInt32 code) {
    assert(code <= 0x10FFFF);
    assert(code >= 0x10000);
    return ((code - 0x10000) >> 10) | detail::surrogatesHighFirst;
}

/** Get low surrogate half of a non-BMP Unicode code point.

    @param code  A non-BMP Unicode code point.

    @return  The UTF-16 low surrogate half for the give code point.

    @since LibreOffice 5.0
 */
inline sal_Unicode getLowSurrogate(sal_uInt32 code) {
    assert(code <= 0x10FFFF);
    assert(code >= 0x10000);
    return ((code - 0x10000) & 0x3FF) | detail::surrogatesLowFirst;
}

/** Combine surrogates to form a code point.

    @param high  A high surrogate code point.

    @param low  A low surrogate code point.

    @return  The code point represented by the surrogate pair.

    @since LibreOffice 5.0
*/
inline sal_uInt32 combineSurrogates(sal_uInt32 high, sal_uInt32 low) {
    assert(isHighSurrogate(high));
    assert(isLowSurrogate(low));
    return ((high - detail::surrogatesHighFirst) << 10)
        + (low - detail::surrogatesLowFirst) + 0x10000;
}

}

#endif
diff --git a/include/rtl/surrogates.h b/include/rtl/surrogates.h
deleted file mode 100644
index ab98cd6..0000000
--- a/include/rtl/surrogates.h
+++ /dev/null
@@ -1,57 +0,0 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
 * This file is part of the LibreOffice project.
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 *
 * This file incorporates work covered by the following license notice:
 *
 *   Licensed to the Apache Software Foundation (ASF) under one or more
 *   contributor license agreements. See the NOTICE file distributed
 *   with this work for additional information regarding copyright
 *   ownership. The ASF licenses this file to you under the Apache
 *   License, Version 2.0 (the "License"); you may not use this file
 *   except in compliance with the License. You may obtain a copy of
 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
 */

#ifndef INCLUDED_RTL_SURROGATES_H
#define INCLUDED_RTL_SURROGATES_H

#include <sal/config.h>

#include <sal/types.h>

#define SAL_RTL_FIRST_HIGH_SURROGATE 0xD800
#define SAL_RTL_LAST_HIGH_SURROGATE 0xDBFF
#define SAL_RTL_FIRST_LOW_SURROGATE 0xDC00
#define SAL_RTL_LAST_LOW_SURROGATE 0xDFFF

#ifdef __cplusplus
extern "C" {
#endif

inline bool isHighSurrogate(sal_uInt32 utf16) {
    return utf16 >= SAL_RTL_FIRST_HIGH_SURROGATE
        && utf16 <= SAL_RTL_LAST_HIGH_SURROGATE;
}

inline bool isLowSurrogate(sal_uInt32 utf16) {
    return utf16 >= SAL_RTL_FIRST_LOW_SURROGATE
        && utf16 <= SAL_RTL_LAST_LOW_SURROGATE;
}

inline sal_uInt32 combineSurrogates(sal_uInt32 high, sal_uInt32 low) {
    return ((high - SAL_RTL_FIRST_HIGH_SURROGATE) << 10)
        + (low - SAL_RTL_FIRST_LOW_SURROGATE) + 0x10000;
}

#ifdef __cplusplus
}
#endif

#endif

/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sal/rtl/string.cxx b/sal/rtl/string.cxx
index 68a85f0..3647908 100644
--- a/sal/rtl/string.cxx
+++ b/sal/rtl/string.cxx
@@ -32,7 +32,7 @@
#include <rtl/tencinfo.h>

#include "strimp.hxx"
#include <rtl/surrogates.h>
#include <rtl/character.hxx>
#include <rtl/string.h>

#include "rtl/math.h"
@@ -154,7 +154,7 @@ static int rtl_ImplGetFastUTF8ByteLen( const sal_Unicode* pStr, sal_Int32 nLen )
            n += 2;
        else
        {
            if ( !isHighSurrogate(c) )
            if ( !rtl::isHighSurrogate(c) )
                n += 3;
            else
            {
@@ -163,9 +163,9 @@ static int rtl_ImplGetFastUTF8ByteLen( const sal_Unicode* pStr, sal_Int32 nLen )
                if ( pStr+1 < pEndStr )
                {
                    c = *(pStr+1);
                    if ( isLowSurrogate(c) )
                    if ( rtl::isLowSurrogate(c) )
                    {
                        nUCS4Char = combineSurrogates(nUCS4Char, c);
                        nUCS4Char = rtl::combineSurrogates(nUCS4Char, c);
                        pStr++;
                    }
                }
diff --git a/sal/rtl/uri.cxx b/sal/rtl/uri.cxx
index a90b40ba..ea895e5 100644
--- a/sal/rtl/uri.cxx
+++ b/sal/rtl/uri.cxx
@@ -20,7 +20,6 @@
#include "osl/diagnose.h"
#include "rtl/character.hxx"
#include "rtl/strbuf.hxx"
#include "rtl/surrogates.h"
#include "rtl/textenc.h"
#include "rtl/textcvt.h"
#include "rtl/uri.h"
@@ -133,8 +132,9 @@ sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
                    p += 3;
                    nEncoded |= ((nWeight1 & 3) << 4 | nWeight2) << nShift;
                }
                if (bUTF8 && nEncoded >= nMin && !isHighSurrogate(nEncoded)
                    && !isLowSurrogate(nEncoded) && nEncoded <= 0x10FFFF)
                if (bUTF8 && nEncoded >= nMin && nEncoded <= 0x10FFFF
                    && !rtl::isHighSurrogate(nEncoded)
                    && !rtl::isLowSurrogate(nEncoded))
                {
                    *pBegin = p;
                    *pType = EscapeChar;
@@ -171,10 +171,10 @@ sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
                    *pBegin = p;
                    *pType = EscapeChar;
                    assert( nDstSize == 1
                        || (nDstSize == 2 && isHighSurrogate(aDst[0])
                            && isLowSurrogate(aDst[1])));
                        || (nDstSize == 2 && rtl::isHighSurrogate(aDst[0])
                            && rtl::isLowSurrogate(aDst[1])));
                    return nDstSize == 1
                        ? aDst[0] : combineSurrogates(aDst[0], aDst[1]);
                        ? aDst[0] : rtl::combineSurrogates(aDst[0], aDst[1]);
                }
                else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
                         && pEnd - p >= 3 && p[0] == cEscapePrefix
@@ -205,9 +205,9 @@ sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
    else
    {
        *pType = EscapeNo;
        return isHighSurrogate(nChar) && *pBegin < pEnd
               && isLowSurrogate(**pBegin) ?
                   combineSurrogates(nChar, *(*pBegin)++) : nChar;
        return rtl::isHighSurrogate(nChar) && *pBegin < pEnd
               && rtl::isLowSurrogate(**pBegin) ?
                   rtl::combineSurrogates(nChar, *(*pBegin)++) : nChar;
    }
}

diff --git a/sal/rtl/ustring.cxx b/sal/rtl/ustring.cxx
index a418c6a..3c9c8b7 100644
--- a/sal/rtl/ustring.cxx
+++ b/sal/rtl/ustring.cxx
@@ -39,7 +39,7 @@

#include "hash.hxx"
#include "strimp.hxx"
#include <rtl/surrogates.h>
#include <rtl/character.hxx>
#include <rtl/ustring.h>

#include "rtl/math.h"
@@ -588,9 +588,8 @@ void SAL_CALL rtl_uString_newFromCodePoints(
        if (c < 0x10000) {
            *p++ = (sal_Unicode) c;
        } else {
            c -= 0x10000;
            *p++ = (sal_Unicode) ((c >> 10) | SAL_RTL_FIRST_HIGH_SURROGATE);
            *p++ = (sal_Unicode) ((c & 0x3FF) | SAL_RTL_FIRST_LOW_SURROGATE);
            *p++ = rtl::getHighSurrogate(c);
            *p++ = rtl::getLowSurrogate(c);
        }
    }
    RTL_LOG_STRING_NEW( *newString );
@@ -1049,8 +1048,8 @@ sal_uInt32 SAL_CALL rtl_uString_iterateCodePoints(
    while (incrementCodePoints < 0) {
        assert(n > 0);
        cu = string->buffer[--n];
        if (isLowSurrogate(cu) && n != 0 &&
            isHighSurrogate(string->buffer[n - 1]))
        if (rtl::isLowSurrogate(cu) && n != 0 &&
            rtl::isHighSurrogate(string->buffer[n - 1]))
        {
            --n;
        }
@@ -1058,18 +1057,18 @@ sal_uInt32 SAL_CALL rtl_uString_iterateCodePoints(
    }
    assert(n >= 0 && n < string->length);
    cu = string->buffer[n];
    if (isHighSurrogate(cu) && string->length - n >= 2 &&
        isLowSurrogate(string->buffer[n + 1]))
    if (rtl::isHighSurrogate(cu) && string->length - n >= 2 &&
        rtl::isLowSurrogate(string->buffer[n + 1]))
    {
        cp = combineSurrogates(cu, string->buffer[n + 1]);
        cp = rtl::combineSurrogates(cu, string->buffer[n + 1]);
    } else {
        cp = cu;
    }
    while (incrementCodePoints > 0) {
        assert(n < string->length);
        cu = string->buffer[n++];
        if (isHighSurrogate(cu) && n != string->length &&
            isLowSurrogate(string->buffer[n]))
        if (rtl::isHighSurrogate(cu) && n != string->length &&
            rtl::isLowSurrogate(string->buffer[n]))
        {
            ++n;
        }
diff --git a/svl/source/misc/urihelper.cxx b/svl/source/misc/urihelper.cxx
index 7d7cc2c..06936b8 100644
--- a/svl/source/misc/urihelper.cxx
+++ b/svl/source/misc/urihelper.cxx
@@ -36,8 +36,8 @@
#include <com/sun/star/uri/XUriReferenceFactory.hpp>
#include <comphelper/processfactory.hxx>
#include <osl/diagnose.h>
#include <rtl/character.hxx>
#include <rtl/instance.hxx>
#include <rtl/surrogates.h>
#include <rtl/ustrbuf.hxx>
#include <rtl/ustring.h>
#include <rtl/ustring.hxx>
@@ -281,9 +281,9 @@ namespace {

inline sal_Int32 nextChar(OUString const & rStr, sal_Int32 nPos)
{
    return isHighSurrogate(rStr[nPos])
    return rtl::isHighSurrogate(rStr[nPos])
           && rStr.getLength() - nPos >= 2
           && isLowSurrogate(rStr[nPos + 1]) ?
           && rtl::isLowSurrogate(rStr[nPos + 1]) ?
        nPos + 2 : nPos + 1;
}

diff --git a/sw/source/filter/ww8/ww8par3.cxx b/sw/source/filter/ww8/ww8par3.cxx
index 5bb17c9..104052a 100644
--- a/sw/source/filter/ww8/ww8par3.cxx
+++ b/sw/source/filter/ww8/ww8par3.cxx
@@ -79,7 +79,7 @@

#include <IMark.hxx>
#include <unotools/fltrcfg.hxx>
#include <rtl/surrogates.h>
#include <rtl/character.hxx>
#include <xmloff/odffields.hxx>

#include <stdio.h>
@@ -500,16 +500,17 @@ OUString sanitizeString(const OUString& rString)
    while (i < rString.getLength())
    {
        sal_Unicode c = rString[i];
        if (isHighSurrogate(c))
        if (rtl::isHighSurrogate(c))
        {
            if (i+1 == rString.getLength() || !isLowSurrogate(rString[i+1]))
            if (i+1 == rString.getLength()
                || !rtl::isLowSurrogate(rString[i+1]))
            {
                SAL_WARN("sw.ww8", "Surrogate error: high without low");
                return rString.copy(0, i);
            }
            ++i;    //skip correct low
        }
        if (isLowSurrogate(c)) //bare low without preceeding high
        if (rtl::isLowSurrogate(c)) //bare low without preceeding high
        {
            SAL_WARN("sw.ww8", "Surrogate error: low without high");
            return rString.copy(0, i);
diff --git a/tools/source/fsys/urlobj.cxx b/tools/source/fsys/urlobj.cxx
index 4dfe588..08c0c6d 100644
--- a/tools/source/fsys/urlobj.cxx
+++ b/tools/source/fsys/urlobj.cxx
@@ -31,7 +31,6 @@
#include <osl/file.hxx>
#include <rtl/character.hxx>
#include <rtl/string.h>
#include <rtl/surrogates.h>
#include <rtl/textenc.h>
#include <rtl/ustring.hxx>
#include <sal/types.h>
@@ -4778,9 +4777,9 @@ sal_uInt32 INetURLObject::getUTF32(sal_Unicode const *& rBegin,
                                    nShift -= 6;
                                }
                                if (bUTF8 && nEncoded >= nMin
                                    && !isHighSurrogate(nEncoded)
                                    && !isLowSurrogate(nEncoded)
                                    && nEncoded <= 0x10FFFF)
                                    && nEncoded <= 0x10FFFF
                                    && !rtl::isHighSurrogate(nEncoded)
                                    && !rtl::isLowSurrogate(nEncoded))
                                {
                                    rBegin = p;
                                    nUTF32 = nEncoded;