tdf#121069, tdf#121469 migrate special characters

dbahsql: Decode UTF8 characters stored as unicode code point values in
schema file.

Change-Id: I90db2345a6de9bee7aae8ae6a7c046a03eebc0a7
Reviewed-on: https://gerrit.libreoffice.org/67197
Tested-by: Jenkins
Reviewed-by: Tamás Bunth <btomi96@gmail.com>
diff --git a/dbaccess/source/filter/hsqldb/createparser.cxx b/dbaccess/source/filter/hsqldb/createparser.cxx
index 77b1fbe..e81de3e 100644
--- a/dbaccess/source/filter/hsqldb/createparser.cxx
+++ b/dbaccess/source/filter/hsqldb/createparser.cxx
@@ -28,42 +28,6 @@

namespace
{
//Find ascii escaped unicode
sal_Int32 lcl_IndexOfUnicode(const OString& rSource, const sal_Int32 nFrom = 0)
{
    const OString sHexDigits = "0123456789abcdefABCDEF";
    sal_Int32 nIndex = rSource.indexOf("\\u", nFrom);
    if (nIndex == -1)
    {
        return -1;
    }
    bool bIsUnicode = true;
    for (short nDist = 2; nDist <= 5; ++nDist)
    {
        if (sHexDigits.indexOf(rSource[nIndex + nDist]) == -1)
        {
            bIsUnicode = false;
        }
    }
    return bIsUnicode ? nIndex : -1;
}

//Convert ascii escaped unicode to utf-8
OUString lcl_ConvertToUTF8(const OString& original)
{
    OString sResult = original;
    sal_Int32 nIndex = lcl_IndexOfUnicode(sResult);
    while (nIndex != -1 && nIndex < original.getLength())
    {
        const OString sHex = original.copy(nIndex + 2, 4);
        const sal_Unicode cDec = static_cast<sal_Unicode>(strtol(sHex.getStr(), nullptr, 16));
        const OString sNewChar = OString(&cDec, 1, RTL_TEXTENCODING_UTF8);
        sResult = sResult.replaceAll("\\u" + sHex, sNewChar);
        nIndex = lcl_IndexOfUnicode(original, nIndex + 1);
    }
    return OStringToOUString(sResult, RTL_TEXTENCODING_UTF8);
}

/// Returns substring of sSql from the first occurrence of '(' until the
/// last occurrence of ')' (excluding the parenthesis)
OUString lcl_getColumnPart(const OUString& sSql)
@@ -281,7 +245,6 @@
        ColumnTypeParts typeParts = lcl_getColumnTypeParts(sFullTypeName);

        bool bCaseInsensitive = typeParts.typeName.indexOf("IGNORECASE") >= 0;
        rColumnName = lcl_ConvertToUTF8(OUStringToOString(rColumnName, RTL_TEXTENCODING_UTF8));
        bool isPrimaryKey = lcl_isPrimaryKey(sColumn);

        if (isPrimaryKey)
diff --git a/dbaccess/source/filter/hsqldb/parseschema.cxx b/dbaccess/source/filter/hsqldb/parseschema.cxx
index beca3c2..e04998c 100644
--- a/dbaccess/source/filter/hsqldb/parseschema.cxx
+++ b/dbaccess/source/filter/hsqldb/parseschema.cxx
@@ -20,6 +20,7 @@
#include "parseschema.hxx"
#include "fbcreateparser.hxx"
#include "fbalterparser.hxx"
#include "utils.hxx"

#include <com/sun/star/io/TextInputStream.hpp>
#include <com/sun/star/embed/XStorage.hpp>
@@ -123,7 +124,8 @@
    while (!xTextInput->isEOF())
    {
        // every line contains exactly one DDL statement
        OUString sSql = xTextInput->readLine();
        OUString sSql = utils::convertToUTF8(
            OUStringToOString(xTextInput->readLine(), RTL_TEXTENCODING_UTF8));

        IndexStmtParser indexParser{ sSql };
        if (indexParser.isIndexStatement())
diff --git a/dbaccess/source/filter/hsqldb/utils.cxx b/dbaccess/source/filter/hsqldb/utils.cxx
index 8d6c49e..dc869f5 100644
--- a/dbaccess/source/filter/hsqldb/utils.cxx
+++ b/dbaccess/source/filter/hsqldb/utils.cxx
@@ -28,6 +28,46 @@

using namespace dbahsql;

namespace
{
//Find ascii escaped unicode
sal_Int32 lcl_IndexOfUnicode(const OString& rSource, const sal_Int32 nFrom = 0)
{
    const OString sHexDigits = "0123456789abcdefABCDEF";
    sal_Int32 nIndex = rSource.indexOf("\\u", nFrom);
    if (nIndex == -1)
    {
        return -1;
    }
    bool bIsUnicode = true;
    for (short nDist = 2; nDist <= 5; ++nDist)
    {
        if (sHexDigits.indexOf(rSource[nIndex + nDist]) == -1)
        {
            bIsUnicode = false;
        }
    }
    return bIsUnicode ? nIndex : -1;
}

} // unnamed namespace

//Convert ascii escaped unicode to utf-8
OUString utils::convertToUTF8(const OString& original)
{
    OString sResult = original;
    sal_Int32 nIndex = lcl_IndexOfUnicode(sResult);
    while (nIndex != -1 && nIndex < original.getLength())
    {
        const OString sHex = original.copy(nIndex + 2, 4);
        const sal_Unicode cDec = static_cast<sal_Unicode>(strtol(sHex.getStr(), nullptr, 16));
        const OString sNewChar = OString(&cDec, 1, RTL_TEXTENCODING_UTF8);
        sResult = sResult.replaceAll("\\u" + sHex, sNewChar);
        nIndex = lcl_IndexOfUnicode(original, nIndex + 1);
    }
    return OStringToOUString(sResult, RTL_TEXTENCODING_UTF8);
}

OUString utils::getTableNameFromStmt(const OUString& sSql)
{
    auto stmtComponents = comphelper::string::split(sSql, sal_Unicode(u' '));
diff --git a/dbaccess/source/filter/hsqldb/utils.hxx b/dbaccess/source/filter/hsqldb/utils.hxx
index 02ccc3d..b2d54fb 100644
--- a/dbaccess/source/filter/hsqldb/utils.hxx
+++ b/dbaccess/source/filter/hsqldb/utils.hxx
@@ -16,6 +16,8 @@
{
namespace utils
{
OUString convertToUTF8(const OString& original);

OUString getTableNameFromStmt(const OUString& sSql);

void ensureFirebirdTableLength(const OUString& sName);