tdf#121069, tdf#121469 migrate special characters
dbahsql: Decode UTF8 characters stored as unicode code point values in
schema file.
Change-Id: I90db2345a6de9bee7aae8ae6a7c046a03eebc0a7
Reviewed-on: https://gerrit.libreoffice.org/67197
Tested-by: Jenkins
Reviewed-by: Tamás Bunth <btomi96@gmail.com>
diff --git a/dbaccess/source/filter/hsqldb/createparser.cxx b/dbaccess/source/filter/hsqldb/createparser.cxx
index 77b1fbe..e81de3e 100644
--- a/dbaccess/source/filter/hsqldb/createparser.cxx
+++ b/dbaccess/source/filter/hsqldb/createparser.cxx
@@ -28,42 +28,6 @@
namespace
{
//Find ascii escaped unicode
sal_Int32 lcl_IndexOfUnicode(const OString& rSource, const sal_Int32 nFrom = 0)
{
const OString sHexDigits = "0123456789abcdefABCDEF";
sal_Int32 nIndex = rSource.indexOf("\\u", nFrom);
if (nIndex == -1)
{
return -1;
}
bool bIsUnicode = true;
for (short nDist = 2; nDist <= 5; ++nDist)
{
if (sHexDigits.indexOf(rSource[nIndex + nDist]) == -1)
{
bIsUnicode = false;
}
}
return bIsUnicode ? nIndex : -1;
}
//Convert ascii escaped unicode to utf-8
OUString lcl_ConvertToUTF8(const OString& original)
{
OString sResult = original;
sal_Int32 nIndex = lcl_IndexOfUnicode(sResult);
while (nIndex != -1 && nIndex < original.getLength())
{
const OString sHex = original.copy(nIndex + 2, 4);
const sal_Unicode cDec = static_cast<sal_Unicode>(strtol(sHex.getStr(), nullptr, 16));
const OString sNewChar = OString(&cDec, 1, RTL_TEXTENCODING_UTF8);
sResult = sResult.replaceAll("\\u" + sHex, sNewChar);
nIndex = lcl_IndexOfUnicode(original, nIndex + 1);
}
return OStringToOUString(sResult, RTL_TEXTENCODING_UTF8);
}
/// Returns substring of sSql from the first occurrence of '(' until the
/// last occurrence of ')' (excluding the parenthesis)
OUString lcl_getColumnPart(const OUString& sSql)
@@ -281,7 +245,6 @@
ColumnTypeParts typeParts = lcl_getColumnTypeParts(sFullTypeName);
bool bCaseInsensitive = typeParts.typeName.indexOf("IGNORECASE") >= 0;
rColumnName = lcl_ConvertToUTF8(OUStringToOString(rColumnName, RTL_TEXTENCODING_UTF8));
bool isPrimaryKey = lcl_isPrimaryKey(sColumn);
if (isPrimaryKey)
diff --git a/dbaccess/source/filter/hsqldb/parseschema.cxx b/dbaccess/source/filter/hsqldb/parseschema.cxx
index beca3c2..e04998c 100644
--- a/dbaccess/source/filter/hsqldb/parseschema.cxx
+++ b/dbaccess/source/filter/hsqldb/parseschema.cxx
@@ -20,6 +20,7 @@
#include "parseschema.hxx"
#include "fbcreateparser.hxx"
#include "fbalterparser.hxx"
#include "utils.hxx"
#include <com/sun/star/io/TextInputStream.hpp>
#include <com/sun/star/embed/XStorage.hpp>
@@ -123,7 +124,8 @@
while (!xTextInput->isEOF())
{
// every line contains exactly one DDL statement
OUString sSql = xTextInput->readLine();
OUString sSql = utils::convertToUTF8(
OUStringToOString(xTextInput->readLine(), RTL_TEXTENCODING_UTF8));
IndexStmtParser indexParser{ sSql };
if (indexParser.isIndexStatement())
diff --git a/dbaccess/source/filter/hsqldb/utils.cxx b/dbaccess/source/filter/hsqldb/utils.cxx
index 8d6c49e..dc869f5 100644
--- a/dbaccess/source/filter/hsqldb/utils.cxx
+++ b/dbaccess/source/filter/hsqldb/utils.cxx
@@ -28,6 +28,46 @@
using namespace dbahsql;
namespace
{
//Find ascii escaped unicode
sal_Int32 lcl_IndexOfUnicode(const OString& rSource, const sal_Int32 nFrom = 0)
{
const OString sHexDigits = "0123456789abcdefABCDEF";
sal_Int32 nIndex = rSource.indexOf("\\u", nFrom);
if (nIndex == -1)
{
return -1;
}
bool bIsUnicode = true;
for (short nDist = 2; nDist <= 5; ++nDist)
{
if (sHexDigits.indexOf(rSource[nIndex + nDist]) == -1)
{
bIsUnicode = false;
}
}
return bIsUnicode ? nIndex : -1;
}
} // unnamed namespace
//Convert ascii escaped unicode to utf-8
OUString utils::convertToUTF8(const OString& original)
{
OString sResult = original;
sal_Int32 nIndex = lcl_IndexOfUnicode(sResult);
while (nIndex != -1 && nIndex < original.getLength())
{
const OString sHex = original.copy(nIndex + 2, 4);
const sal_Unicode cDec = static_cast<sal_Unicode>(strtol(sHex.getStr(), nullptr, 16));
const OString sNewChar = OString(&cDec, 1, RTL_TEXTENCODING_UTF8);
sResult = sResult.replaceAll("\\u" + sHex, sNewChar);
nIndex = lcl_IndexOfUnicode(original, nIndex + 1);
}
return OStringToOUString(sResult, RTL_TEXTENCODING_UTF8);
}
OUString utils::getTableNameFromStmt(const OUString& sSql)
{
auto stmtComponents = comphelper::string::split(sSql, sal_Unicode(u' '));
diff --git a/dbaccess/source/filter/hsqldb/utils.hxx b/dbaccess/source/filter/hsqldb/utils.hxx
index 02ccc3d..b2d54fb 100644
--- a/dbaccess/source/filter/hsqldb/utils.hxx
+++ b/dbaccess/source/filter/hsqldb/utils.hxx
@@ -16,6 +16,8 @@
{
namespace utils
{
OUString convertToUTF8(const OString& original);
OUString getTableNameFromStmt(const OUString& sSql);
void ensureFirebirdTableLength(const OUString& sName);