Clean up new rtl/surrogates.h
Change-Id: Iec781bdbbf216cb14c9ba5be5955123273d7699c
diff --git a/include/rtl/character.hxx b/include/rtl/character.hxx
index f5c9490..52151e8 100644
--- a/include/rtl/character.hxx
+++ b/include/rtl/character.hxx
@@ -211,6 +211,90 @@ inline sal_Int32 compareIgnoreAsciiCase(sal_uInt32 code1, sal_uInt32 code2)
- static_cast<sal_Int32>(toAsciiLowerCase(code2));
}
/// @cond INTERNAL
namespace detail {
sal_uInt32 const surrogatesHighFirst = 0xD800;
sal_uInt32 const surrogatesHighLast = 0xDBFF;
sal_uInt32 const surrogatesLowFirst = 0xDC00;
sal_uInt32 const surrogatesLowLast = 0xDFFF;
}
/// @endcond
/** Check for high surrogate.
@param code A Unicode code point.
@return True if code is a high surrogate code point (0xD800--0xDBFF).
@since LibreOffice 5.0
*/
inline bool isHighSurrogate(sal_uInt32 code) {
assert(code <= 0x10FFFF);
return code >= detail::surrogatesHighFirst
&& code <= detail::surrogatesHighLast;
}
/** Check for low surrogate.
@param code A Unicode code point.
@return True if code is a low surrogate code point (0xDC00--0xDFFF).
@since LibreOffice 5.0
*/
inline bool isLowSurrogate(sal_uInt32 code) {
assert(code <= 0x10FFFF);
return code >= detail::surrogatesLowFirst
&& code <= detail::surrogatesLowLast;
}
/** Get high surrogate half of a non-BMP Unicode code point.
@param code A non-BMP Unicode code point.
@return The UTF-16 high surrogate half for the give code point.
@since LibreOffice 5.0
*/
inline sal_Unicode getHighSurrogate(sal_uInt32 code) {
assert(code <= 0x10FFFF);
assert(code >= 0x10000);
return ((code - 0x10000) >> 10) | detail::surrogatesHighFirst;
}
/** Get low surrogate half of a non-BMP Unicode code point.
@param code A non-BMP Unicode code point.
@return The UTF-16 low surrogate half for the give code point.
@since LibreOffice 5.0
*/
inline sal_Unicode getLowSurrogate(sal_uInt32 code) {
assert(code <= 0x10FFFF);
assert(code >= 0x10000);
return ((code - 0x10000) & 0x3FF) | detail::surrogatesLowFirst;
}
/** Combine surrogates to form a code point.
@param high A high surrogate code point.
@param low A low surrogate code point.
@return The code point represented by the surrogate pair.
@since LibreOffice 5.0
*/
inline sal_uInt32 combineSurrogates(sal_uInt32 high, sal_uInt32 low) {
assert(isHighSurrogate(high));
assert(isLowSurrogate(low));
return ((high - detail::surrogatesHighFirst) << 10)
+ (low - detail::surrogatesLowFirst) + 0x10000;
}
}
#endif
diff --git a/include/rtl/surrogates.h b/include/rtl/surrogates.h
deleted file mode 100644
index ab98cd6..0000000
--- a/include/rtl/surrogates.h
+++ /dev/null
@@ -1,57 +0,0 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This file incorporates work covered by the following license notice:
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed
* with this work for additional information regarding copyright
* ownership. The ASF licenses this file to you under the Apache
* License, Version 2.0 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at http://www.apache.org/licenses/LICENSE-2.0 .
*/
#ifndef INCLUDED_RTL_SURROGATES_H
#define INCLUDED_RTL_SURROGATES_H
#include <sal/config.h>
#include <sal/types.h>
#define SAL_RTL_FIRST_HIGH_SURROGATE 0xD800
#define SAL_RTL_LAST_HIGH_SURROGATE 0xDBFF
#define SAL_RTL_FIRST_LOW_SURROGATE 0xDC00
#define SAL_RTL_LAST_LOW_SURROGATE 0xDFFF
#ifdef __cplusplus
extern "C" {
#endif
inline bool isHighSurrogate(sal_uInt32 utf16) {
return utf16 >= SAL_RTL_FIRST_HIGH_SURROGATE
&& utf16 <= SAL_RTL_LAST_HIGH_SURROGATE;
}
inline bool isLowSurrogate(sal_uInt32 utf16) {
return utf16 >= SAL_RTL_FIRST_LOW_SURROGATE
&& utf16 <= SAL_RTL_LAST_LOW_SURROGATE;
}
inline sal_uInt32 combineSurrogates(sal_uInt32 high, sal_uInt32 low) {
return ((high - SAL_RTL_FIRST_HIGH_SURROGATE) << 10)
+ (low - SAL_RTL_FIRST_LOW_SURROGATE) + 0x10000;
}
#ifdef __cplusplus
}
#endif
#endif
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sal/rtl/string.cxx b/sal/rtl/string.cxx
index 68a85f0..3647908 100644
--- a/sal/rtl/string.cxx
+++ b/sal/rtl/string.cxx
@@ -32,7 +32,7 @@
#include <rtl/tencinfo.h>
#include "strimp.hxx"
#include <rtl/surrogates.h>
#include <rtl/character.hxx>
#include <rtl/string.h>
#include "rtl/math.h"
@@ -154,7 +154,7 @@ static int rtl_ImplGetFastUTF8ByteLen( const sal_Unicode* pStr, sal_Int32 nLen )
n += 2;
else
{
if ( !isHighSurrogate(c) )
if ( !rtl::isHighSurrogate(c) )
n += 3;
else
{
@@ -163,9 +163,9 @@ static int rtl_ImplGetFastUTF8ByteLen( const sal_Unicode* pStr, sal_Int32 nLen )
if ( pStr+1 < pEndStr )
{
c = *(pStr+1);
if ( isLowSurrogate(c) )
if ( rtl::isLowSurrogate(c) )
{
nUCS4Char = combineSurrogates(nUCS4Char, c);
nUCS4Char = rtl::combineSurrogates(nUCS4Char, c);
pStr++;
}
}
diff --git a/sal/rtl/uri.cxx b/sal/rtl/uri.cxx
index a90b40ba..ea895e5 100644
--- a/sal/rtl/uri.cxx
+++ b/sal/rtl/uri.cxx
@@ -20,7 +20,6 @@
#include "osl/diagnose.h"
#include "rtl/character.hxx"
#include "rtl/strbuf.hxx"
#include "rtl/surrogates.h"
#include "rtl/textenc.h"
#include "rtl/textcvt.h"
#include "rtl/uri.h"
@@ -133,8 +132,9 @@ sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
p += 3;
nEncoded |= ((nWeight1 & 3) << 4 | nWeight2) << nShift;
}
if (bUTF8 && nEncoded >= nMin && !isHighSurrogate(nEncoded)
&& !isLowSurrogate(nEncoded) && nEncoded <= 0x10FFFF)
if (bUTF8 && nEncoded >= nMin && nEncoded <= 0x10FFFF
&& !rtl::isHighSurrogate(nEncoded)
&& !rtl::isLowSurrogate(nEncoded))
{
*pBegin = p;
*pType = EscapeChar;
@@ -171,10 +171,10 @@ sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
*pBegin = p;
*pType = EscapeChar;
assert( nDstSize == 1
|| (nDstSize == 2 && isHighSurrogate(aDst[0])
&& isLowSurrogate(aDst[1])));
|| (nDstSize == 2 && rtl::isHighSurrogate(aDst[0])
&& rtl::isLowSurrogate(aDst[1])));
return nDstSize == 1
? aDst[0] : combineSurrogates(aDst[0], aDst[1]);
? aDst[0] : rtl::combineSurrogates(aDst[0], aDst[1]);
}
else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
&& pEnd - p >= 3 && p[0] == cEscapePrefix
@@ -205,9 +205,9 @@ sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
else
{
*pType = EscapeNo;
return isHighSurrogate(nChar) && *pBegin < pEnd
&& isLowSurrogate(**pBegin) ?
combineSurrogates(nChar, *(*pBegin)++) : nChar;
return rtl::isHighSurrogate(nChar) && *pBegin < pEnd
&& rtl::isLowSurrogate(**pBegin) ?
rtl::combineSurrogates(nChar, *(*pBegin)++) : nChar;
}
}
diff --git a/sal/rtl/ustring.cxx b/sal/rtl/ustring.cxx
index a418c6a..3c9c8b7 100644
--- a/sal/rtl/ustring.cxx
+++ b/sal/rtl/ustring.cxx
@@ -39,7 +39,7 @@
#include "hash.hxx"
#include "strimp.hxx"
#include <rtl/surrogates.h>
#include <rtl/character.hxx>
#include <rtl/ustring.h>
#include "rtl/math.h"
@@ -588,9 +588,8 @@ void SAL_CALL rtl_uString_newFromCodePoints(
if (c < 0x10000) {
*p++ = (sal_Unicode) c;
} else {
c -= 0x10000;
*p++ = (sal_Unicode) ((c >> 10) | SAL_RTL_FIRST_HIGH_SURROGATE);
*p++ = (sal_Unicode) ((c & 0x3FF) | SAL_RTL_FIRST_LOW_SURROGATE);
*p++ = rtl::getHighSurrogate(c);
*p++ = rtl::getLowSurrogate(c);
}
}
RTL_LOG_STRING_NEW( *newString );
@@ -1049,8 +1048,8 @@ sal_uInt32 SAL_CALL rtl_uString_iterateCodePoints(
while (incrementCodePoints < 0) {
assert(n > 0);
cu = string->buffer[--n];
if (isLowSurrogate(cu) && n != 0 &&
isHighSurrogate(string->buffer[n - 1]))
if (rtl::isLowSurrogate(cu) && n != 0 &&
rtl::isHighSurrogate(string->buffer[n - 1]))
{
--n;
}
@@ -1058,18 +1057,18 @@ sal_uInt32 SAL_CALL rtl_uString_iterateCodePoints(
}
assert(n >= 0 && n < string->length);
cu = string->buffer[n];
if (isHighSurrogate(cu) && string->length - n >= 2 &&
isLowSurrogate(string->buffer[n + 1]))
if (rtl::isHighSurrogate(cu) && string->length - n >= 2 &&
rtl::isLowSurrogate(string->buffer[n + 1]))
{
cp = combineSurrogates(cu, string->buffer[n + 1]);
cp = rtl::combineSurrogates(cu, string->buffer[n + 1]);
} else {
cp = cu;
}
while (incrementCodePoints > 0) {
assert(n < string->length);
cu = string->buffer[n++];
if (isHighSurrogate(cu) && n != string->length &&
isLowSurrogate(string->buffer[n]))
if (rtl::isHighSurrogate(cu) && n != string->length &&
rtl::isLowSurrogate(string->buffer[n]))
{
++n;
}
diff --git a/svl/source/misc/urihelper.cxx b/svl/source/misc/urihelper.cxx
index 7d7cc2c..06936b8 100644
--- a/svl/source/misc/urihelper.cxx
+++ b/svl/source/misc/urihelper.cxx
@@ -36,8 +36,8 @@
#include <com/sun/star/uri/XUriReferenceFactory.hpp>
#include <comphelper/processfactory.hxx>
#include <osl/diagnose.h>
#include <rtl/character.hxx>
#include <rtl/instance.hxx>
#include <rtl/surrogates.h>
#include <rtl/ustrbuf.hxx>
#include <rtl/ustring.h>
#include <rtl/ustring.hxx>
@@ -281,9 +281,9 @@ namespace {
inline sal_Int32 nextChar(OUString const & rStr, sal_Int32 nPos)
{
return isHighSurrogate(rStr[nPos])
return rtl::isHighSurrogate(rStr[nPos])
&& rStr.getLength() - nPos >= 2
&& isLowSurrogate(rStr[nPos + 1]) ?
&& rtl::isLowSurrogate(rStr[nPos + 1]) ?
nPos + 2 : nPos + 1;
}
diff --git a/sw/source/filter/ww8/ww8par3.cxx b/sw/source/filter/ww8/ww8par3.cxx
index 5bb17c9..104052a 100644
--- a/sw/source/filter/ww8/ww8par3.cxx
+++ b/sw/source/filter/ww8/ww8par3.cxx
@@ -79,7 +79,7 @@
#include <IMark.hxx>
#include <unotools/fltrcfg.hxx>
#include <rtl/surrogates.h>
#include <rtl/character.hxx>
#include <xmloff/odffields.hxx>
#include <stdio.h>
@@ -500,16 +500,17 @@ OUString sanitizeString(const OUString& rString)
while (i < rString.getLength())
{
sal_Unicode c = rString[i];
if (isHighSurrogate(c))
if (rtl::isHighSurrogate(c))
{
if (i+1 == rString.getLength() || !isLowSurrogate(rString[i+1]))
if (i+1 == rString.getLength()
|| !rtl::isLowSurrogate(rString[i+1]))
{
SAL_WARN("sw.ww8", "Surrogate error: high without low");
return rString.copy(0, i);
}
++i; //skip correct low
}
if (isLowSurrogate(c)) //bare low without preceeding high
if (rtl::isLowSurrogate(c)) //bare low without preceeding high
{
SAL_WARN("sw.ww8", "Surrogate error: low without high");
return rString.copy(0, i);
diff --git a/tools/source/fsys/urlobj.cxx b/tools/source/fsys/urlobj.cxx
index 4dfe588..08c0c6d 100644
--- a/tools/source/fsys/urlobj.cxx
+++ b/tools/source/fsys/urlobj.cxx
@@ -31,7 +31,6 @@
#include <osl/file.hxx>
#include <rtl/character.hxx>
#include <rtl/string.h>
#include <rtl/surrogates.h>
#include <rtl/textenc.h>
#include <rtl/ustring.hxx>
#include <sal/types.h>
@@ -4778,9 +4777,9 @@ sal_uInt32 INetURLObject::getUTF32(sal_Unicode const *& rBegin,
nShift -= 6;
}
if (bUTF8 && nEncoded >= nMin
&& !isHighSurrogate(nEncoded)
&& !isLowSurrogate(nEncoded)
&& nEncoded <= 0x10FFFF)
&& nEncoded <= 0x10FFFF
&& !rtl::isHighSurrogate(nEncoded)
&& !rtl::isLowSurrogate(nEncoded))
{
rBegin = p;
nUTF32 = nEncoded;