Do proper script itemization with HarfBuzz

This implements http://www.unicode.org/reports/tr24/ by using ICU’s
implementation of it, but since the code in question is private API, I
simply copied the two self-contained files.

This commit is best viewed with --ignore-space-change.

Change-Id: I38c385d4fb6f8a2edc804d48f0aa14df9f0a8b3b
diff --git a/vcl/Library_vcl.mk b/vcl/Library_vcl.mk
index cbbae6a..204a0d6 100644
--- a/vcl/Library_vcl.mk
+++ b/vcl/Library_vcl.mk
@@ -464,6 +464,7 @@
    vcl/generic/glyphs/gcach_layout \
    vcl/generic/glyphs/gcach_rbmp \
    vcl/generic/glyphs/glyphcache \
    vcl/generic/glyphs/scrptrun \
    vcl/generic/fontmanager/fontcache \
    vcl/generic/fontmanager/fontconfig \
    vcl/generic/fontmanager/fontmanager \
diff --git a/vcl/generic/glyphs/gcach_layout.cxx b/vcl/generic/glyphs/gcach_layout.cxx
index 7a8bfc9..f450ee1 100644
--- a/vcl/generic/glyphs/gcach_layout.cxx
+++ b/vcl/generic/glyphs/gcach_layout.cxx
@@ -20,6 +20,7 @@
#include <gcach_ftyp.hxx>
#include <sallayout.hxx>
#include <salgdi.hxx>
#include <scrptrun.h>

#include <boost/static_assert.hpp>

@@ -354,6 +355,19 @@
    hb_face_destroy(mpHbFace);
}

struct HbScriptRun
{
    int32_t mnMin;
    int32_t mnEnd;
    UScriptCode maScript;

    HbScriptRun(int32_t nMin, int32_t nEnd, UScriptCode aScript)
    : mnMin(nMin), mnEnd(nEnd), maScript(aScript)
    {}
};

typedef std::vector<HbScriptRun> HbScriptRuns;

bool HbLayoutEngine::layout(ServerFontLayout& rLayout, ImplLayoutArgs& rArgs)
{
    ServerFont& rFont = rLayout.GetServerFont();
@@ -376,137 +390,151 @@

    rLayout.Reserve(nGlyphCapacity);

    ScriptRun aScriptRun(reinterpret_cast<const UChar *>(rArgs.mpStr), rArgs.mnLength);

    Point aCurrPos(0, 0);
    while (true)
    {
        int nMinRunPos, nEndRunPos;
        int nBidiMinRunPos, nBidiEndRunPos;
        bool bRightToLeft;
        if (!rArgs.GetNextRun(&nMinRunPos, &nEndRunPos, &bRightToLeft))
        if (!rArgs.GetNextRun(&nBidiMinRunPos, &nBidiEndRunPos, &bRightToLeft))
            break;

        int nRunLen = nEndRunPos - nMinRunPos;

        // find matching script
        // TODO: use ICU's UScriptRun API to properly resolves "common" and
        // "inherited" script codes, probably use it in GetNextRun() and return
        // the script there
        UScriptCode eScriptCode = USCRIPT_INVALID_CODE;
        for (int i = nMinRunPos; i < nEndRunPos; ++i)
        // Find script subruns.
        int nCurrentPos = nBidiMinRunPos;
        HbScriptRuns aScriptSubRuns;
        while (aScriptRun.next())
        {
            UErrorCode rcI18n = U_ZERO_ERROR;
            UScriptCode eNextScriptCode = uscript_getScript(rArgs.mpStr[i], &rcI18n);
            if ((eNextScriptCode > USCRIPT_INHERITED))
            {
                eScriptCode = eNextScriptCode;
                if (eNextScriptCode != USCRIPT_LATIN)
                    break;
            }
        }
        if (eScriptCode < 0)   // TODO: handle errors better
            eScriptCode = USCRIPT_LATIN;

        meScriptCode = eScriptCode;

        OString sLanguage = OUStringToOString(rArgs.maLanguageTag.getLanguage(), RTL_TEXTENCODING_UTF8);

        if (pHbUnicodeFuncs == NULL)
            pHbUnicodeFuncs = getUnicodeFuncs();

        hb_buffer_t *pHbBuffer = hb_buffer_create();
        hb_buffer_set_unicode_funcs(pHbBuffer, pHbUnicodeFuncs);
        hb_buffer_set_direction(pHbBuffer, bRightToLeft ? HB_DIRECTION_RTL: HB_DIRECTION_LTR);
        hb_buffer_set_script(pHbBuffer, hb_icu_script_to_script(eScriptCode));
        hb_buffer_set_language(pHbBuffer, hb_language_from_string(sLanguage.getStr(), -1));
        hb_buffer_add_utf16(pHbBuffer, rArgs.mpStr, rArgs.mnLength, nMinRunPos, nRunLen);
        hb_shape(pHbFont, pHbBuffer, NULL, 0);

        int nRunGlyphCount = hb_buffer_get_length(pHbBuffer);
        hb_glyph_info_t *pHbGlyphInfos = hb_buffer_get_glyph_infos(pHbBuffer, NULL);
        hb_glyph_position_t *pHbPositions = hb_buffer_get_glyph_positions(pHbBuffer, NULL);

        for (int i = 0; i < nRunGlyphCount; ++i) {
            int32_t nGlyphIndex = pHbGlyphInfos[i].codepoint;
            int32_t nCharPos = pHbGlyphInfos[i].cluster;

            // if needed request glyph fallback by updating LayoutArgs
            if (!nGlyphIndex)
            {
                rLayout.setNeedFallback(rArgs, nCharPos, bRightToLeft);
                if (SAL_LAYOUT_FOR_FALLBACK & rArgs.mnFlags)
                    continue;
            }

            // apply vertical flags and glyph substitution
            // XXX: Use HB_DIRECTION_TTB above and apply whatever flags magic
            // FixupGlyphIndex() is doing, minus the GSUB part.
            if (nCharPos >= 0)
            {
                sal_UCS4 aChar = rArgs.mpStr[nCharPos];
                nGlyphIndex = rFont.FixupGlyphIndex(nGlyphIndex, aChar);
            }

            bool bInCluster = false;
            if (i > 0 && pHbGlyphInfos[i].cluster == pHbGlyphInfos[i - 1].cluster)
                bInCluster = true;

            long nGlyphFlags = 0;
            if (bRightToLeft)
                nGlyphFlags |= GlyphItem::IS_RTL_GLYPH;

            if (bInCluster)
                nGlyphFlags |= GlyphItem::IS_IN_CLUSTER;

            // The whole IS_DIACRITIC concept is a stupid hack that was
            // introduced ages ago to work around the utter brokenness of the
            // way justification adjustments are applied (the DXArray fiasco).
            // Since it is such a stupid hack, there is no sane way to directly
            // map to concepts of the "outside" world, so we do some rather
            // ugly hacks:
            // * If the font has a GDEF table, we check for glyphs with mark
            //   glyph class which is sensible, except that some fonts
            //   (fdo#70968) assign mark class to spacing marks (which is wrong
            //   but usually harmless), so we try to sniff what HarfBuzz thinks
            //   about this glyph by checking if it gives it a zero advance
            //   width.
            // * If the font has no GDEF table, we just check if the glyph has
            //   zero advance width, but this is stupid and can be wrong. A
            //   better way would to check the character's Unicode combining
            //   class, but unfortunately glyph gives combining marks the
            //   cluster value of its base character, so nCharPos will be
            //   pointing to the wrong character (but HarfBuzz might change
            //   this in the future).
            bool bDiacritic = false;
            if (hb_ot_layout_has_glyph_classes(mpHbFace))
            {
                // the font has GDEF table
                bool bMark = hb_ot_layout_get_glyph_class(mpHbFace, nGlyphIndex) == HB_OT_LAYOUT_GLYPH_CLASS_MARK;
                if (bMark && pHbPositions[i].x_advance == 0)
                    bDiacritic = true;
            }
            else
            {
                // the font lacks GDEF table
                if (pHbPositions[i].x_advance == 0)
                    bDiacritic = true;
            }

            if (bDiacritic)
                nGlyphFlags |= GlyphItem::IS_DIACRITIC;

            int32_t nXOffset =  pHbPositions[i].x_offset >> 6;
            int32_t nYOffset =  pHbPositions[i].y_offset >> 6;
            int32_t nXAdvance = pHbPositions[i].x_advance >> 6;
            int32_t nYAdvance = pHbPositions[i].y_advance >> 6;

            Point aNewPos = Point(aCurrPos.X() + nXOffset, -(aCurrPos.Y() + nYOffset));
            const GlyphItem aGI(nCharPos, nGlyphIndex, aNewPos, nGlyphFlags, nXAdvance, nXOffset);
            rLayout.AppendGlyph(aGI);

            aCurrPos.X() += nXAdvance;
            aCurrPos.Y() += nYAdvance;
            if (aScriptRun.getScriptStart() <= nCurrentPos && aScriptRun.getScriptEnd() > nCurrentPos)
                break;
        }

        hb_buffer_destroy(pHbBuffer);
        while (nCurrentPos < nBidiEndRunPos)
        {
            int32_t nMinRunPos = nCurrentPos;
            int32_t nEndRunPos = std::min(aScriptRun.getScriptEnd(), nBidiEndRunPos);
            HbScriptRun aRun(nMinRunPos, nEndRunPos, aScriptRun.getScriptCode());
            aScriptSubRuns.push_back(aRun);

            nCurrentPos = nEndRunPos;
            aScriptRun.next();
        }

        // RTL subruns should be reversed to ensure that final glyph order is
        // correct.
        if (bRightToLeft)
            std::reverse(aScriptSubRuns.begin(), aScriptSubRuns.end());

        aScriptRun.reset();

        for (HbScriptRuns::iterator it = aScriptSubRuns.begin(); it != aScriptSubRuns.end(); ++it)
        {
            int nMinRunPos = it->mnMin;
            int nEndRunPos = it->mnEnd;
            int nRunLen = nEndRunPos - nMinRunPos;
            meScriptCode = it->maScript;

            OString sLanguage = OUStringToOString(rArgs.maLanguageTag.getLanguage(), RTL_TEXTENCODING_UTF8);

            if (pHbUnicodeFuncs == NULL)
                pHbUnicodeFuncs = getUnicodeFuncs();

            hb_buffer_t *pHbBuffer = hb_buffer_create();
            hb_buffer_set_unicode_funcs(pHbBuffer, pHbUnicodeFuncs);
            hb_buffer_set_direction(pHbBuffer, bRightToLeft ? HB_DIRECTION_RTL: HB_DIRECTION_LTR);
            hb_buffer_set_script(pHbBuffer, hb_icu_script_to_script(meScriptCode));
            hb_buffer_set_language(pHbBuffer, hb_language_from_string(sLanguage.getStr(), -1));
            hb_buffer_add_utf16(pHbBuffer, rArgs.mpStr, rArgs.mnLength, nMinRunPos, nRunLen);
            hb_shape(pHbFont, pHbBuffer, NULL, 0);

            int nRunGlyphCount = hb_buffer_get_length(pHbBuffer);
            hb_glyph_info_t *pHbGlyphInfos = hb_buffer_get_glyph_infos(pHbBuffer, NULL);
            hb_glyph_position_t *pHbPositions = hb_buffer_get_glyph_positions(pHbBuffer, NULL);

            for (int i = 0; i < nRunGlyphCount; ++i) {
                int32_t nGlyphIndex = pHbGlyphInfos[i].codepoint;
                int32_t nCharPos = pHbGlyphInfos[i].cluster;

                // if needed request glyph fallback by updating LayoutArgs
                if (!nGlyphIndex)
                {
                    rLayout.setNeedFallback(rArgs, nCharPos, bRightToLeft);
                    if (SAL_LAYOUT_FOR_FALLBACK & rArgs.mnFlags)
                        continue;
                }

                // apply vertical flags and glyph substitution
                // XXX: Use HB_DIRECTION_TTB above and apply whatever flags magic
                // FixupGlyphIndex() is doing, minus the GSUB part.
                if (nCharPos >= 0)
                {
                    sal_UCS4 aChar = rArgs.mpStr[nCharPos];
                    nGlyphIndex = rFont.FixupGlyphIndex(nGlyphIndex, aChar);
                }

                bool bInCluster = false;
                if (i > 0 && pHbGlyphInfos[i].cluster == pHbGlyphInfos[i - 1].cluster)
                    bInCluster = true;

                long nGlyphFlags = 0;
                if (bRightToLeft)
                    nGlyphFlags |= GlyphItem::IS_RTL_GLYPH;

                if (bInCluster)
                    nGlyphFlags |= GlyphItem::IS_IN_CLUSTER;

                // The whole IS_DIACRITIC concept is a stupid hack that was
                // introduced ages ago to work around the utter brokenness of the
                // way justification adjustments are applied (the DXArray fiasco).
                // Since it is such a stupid hack, there is no sane way to directly
                // map to concepts of the "outside" world, so we do some rather
                // ugly hacks:
                // * If the font has a GDEF table, we check for glyphs with mark
                //   glyph class which is sensible, except that some fonts
                //   (fdo#70968) assign mark class to spacing marks (which is wrong
                //   but usually harmless), so we try to sniff what HarfBuzz thinks
                //   about this glyph by checking if it gives it a zero advance
                //   width.
                // * If the font has no GDEF table, we just check if the glyph has
                //   zero advance width, but this is stupid and can be wrong. A
                //   better way would to check the character's Unicode combining
                //   class, but unfortunately glyph gives combining marks the
                //   cluster value of its base character, so nCharPos will be
                //   pointing to the wrong character (but HarfBuzz might change
                //   this in the future).
                bool bDiacritic = false;
                if (hb_ot_layout_has_glyph_classes(mpHbFace))
                {
                    // the font has GDEF table
                    bool bMark = hb_ot_layout_get_glyph_class(mpHbFace, nGlyphIndex) == HB_OT_LAYOUT_GLYPH_CLASS_MARK;
                    if (bMark && pHbPositions[i].x_advance == 0)
                        bDiacritic = true;
                }
                else
                {
                    // the font lacks GDEF table
                    if (pHbPositions[i].x_advance == 0)
                        bDiacritic = true;
                }

                if (bDiacritic)
                    nGlyphFlags |= GlyphItem::IS_DIACRITIC;

                int32_t nXOffset =  pHbPositions[i].x_offset >> 6;
                int32_t nYOffset =  pHbPositions[i].y_offset >> 6;
                int32_t nXAdvance = pHbPositions[i].x_advance >> 6;
                int32_t nYAdvance = pHbPositions[i].y_advance >> 6;

                Point aNewPos = Point(aCurrPos.X() + nXOffset, -(aCurrPos.Y() + nYOffset));
                const GlyphItem aGI(nCharPos, nGlyphIndex, aNewPos, nGlyphFlags, nXAdvance, nXOffset);
                rLayout.AppendGlyph(aGI);

                aCurrPos.X() += nXAdvance;
                aCurrPos.Y() += nYAdvance;
            }

            hb_buffer_destroy(pHbBuffer);
        }
    }

    hb_font_destroy(pHbFont);
diff --git a/vcl/generic/glyphs/scrptrun.cxx b/vcl/generic/glyphs/scrptrun.cxx
new file mode 100644
index 0000000..ea7790d
--- /dev/null
+++ b/vcl/generic/glyphs/scrptrun.cxx
@@ -0,0 +1,225 @@
/*
 *******************************************************************************
 *
 *   Copyright (c) 1995-2013 International Business Machines Corporation and others
 *
 *   All rights reserved.
 *
 *   Permission is hereby granted, free of charge, to any person obtaining a copy of
 *   this software and associated documentation files (the "Software"), to deal in
 *   the Software without restriction, including without limitation the rights to
 *   use, copy, modify, merge, publish, distribute, and/or sell copies of the
 *   Software, and to permit persons to whom the Software is furnished to do so,
 *   provided that the above copyright notice(s) and this permission notice appear
 *   in all copies of the Software and that both the above copyright notice(s) and
 *   this permission notice appear in supporting documentation.
 *
 *   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 *   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 *   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN
 *   NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE
 *   LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY
 *   DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 *   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
 *   CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 *
 *   Except as contained in this notice, the name of a copyright holder shall not be
 *   used in advertising or otherwise to promote the sale, use or other dealings in
 *   this Software without prior written authorization of the copyright holder.
 *
 *******************************************************************************
 *   file name:  scrptrun.cpp
 *
 *   created on: 10/17/2001
 *   created by: Eric R. Mader
 */

#include "unicode/utypes.h"
#include "unicode/uscript.h"

#include "scrptrun.h"

#define ARRAY_SIZE(array) (sizeof array  / sizeof array[0])

const char ScriptRun::fgClassID=0;

UChar32 ScriptRun::pairedChars[] = {
    0x0028, 0x0029, // ascii paired punctuation
    0x003c, 0x003e,
    0x005b, 0x005d,
    0x007b, 0x007d,
    0x00ab, 0x00bb, // guillemets
    0x2018, 0x2019, // general punctuation
    0x201c, 0x201d,
    0x2039, 0x203a,
    0x3008, 0x3009, // chinese paired punctuation
    0x300a, 0x300b,
    0x300c, 0x300d,
    0x300e, 0x300f,
    0x3010, 0x3011,
    0x3014, 0x3015,
    0x3016, 0x3017,
    0x3018, 0x3019,
    0x301a, 0x301b
};

const int32_t ScriptRun::pairedCharCount = ARRAY_SIZE(pairedChars);
const int32_t ScriptRun::pairedCharPower = 1 << highBit(pairedCharCount);
const int32_t ScriptRun::pairedCharExtra = pairedCharCount - pairedCharPower;

int8_t ScriptRun::highBit(int32_t value)
{
    if (value <= 0) {
        return -32;
    }

    int8_t bit = 0;

    if (value >= 1 << 16) {
        value >>= 16;
        bit += 16;
    }

    if (value >= 1 << 8) {
        value >>= 8;
        bit += 8;
    }

    if (value >= 1 << 4) {
        value >>= 4;
        bit += 4;
    }

    if (value >= 1 << 2) {
        value >>= 2;
        bit += 2;
    }

    if (value >= 1 << 1) {
        value >>= 1;
        bit += 1;
    }

    return bit;
}

int32_t ScriptRun::getPairIndex(UChar32 ch)
{
    int32_t probe = pairedCharPower;
    int32_t index = 0;

    if (ch >= pairedChars[pairedCharExtra]) {
        index = pairedCharExtra;
    }

    while (probe > (1 << 0)) {
        probe >>= 1;

        if (ch >= pairedChars[index + probe]) {
            index += probe;
        }
    }

    if (pairedChars[index] != ch) {
        index = -1;
    }

    return index;
}

UBool ScriptRun::sameScript(int32_t scriptOne, int32_t scriptTwo)
{
    return scriptOne <= USCRIPT_INHERITED || scriptTwo <= USCRIPT_INHERITED || scriptOne == scriptTwo;
}

UBool ScriptRun::next()
{
    int32_t startSP  = parenSP;  // used to find the first new open character
    UErrorCode error = U_ZERO_ERROR;

    // if we've fallen off the end of the text, we're done
    if (scriptEnd >= charLimit) {
        return false;
    }

    scriptCode = USCRIPT_COMMON;

    for (scriptStart = scriptEnd; scriptEnd < charLimit; scriptEnd += 1) {
        UChar   high = charArray[scriptEnd];
        UChar32 ch   = high;

        // if the character is a high surrogate and it's not the last one
        // in the text, see if it's followed by a low surrogate
        if (high >= 0xD800 && high <= 0xDBFF && scriptEnd < charLimit - 1)
        {
            UChar low = charArray[scriptEnd + 1];

            // if it is followed by a low surrogate,
            // consume it and form the full character
            if (low >= 0xDC00 && low <= 0xDFFF) {
                ch = (high - 0xD800) * 0x0400 + low - 0xDC00 + 0x10000;
                scriptEnd += 1;
            }
        }

        UScriptCode sc = uscript_getScript(ch, &error);
        int32_t pairIndex = getPairIndex(ch);

        // Paired character handling:
        //
        // if it's an open character, push it onto the stack.
        // if it's a close character, find the matching open on the
        // stack, and use that script code. Any non-matching open
        // characters above it on the stack will be poped.
        if (pairIndex >= 0) {
            if ((pairIndex & 1) == 0) {
                parenStack[++parenSP].pairIndex = pairIndex;
                parenStack[parenSP].scriptCode  = scriptCode;
            } else if (parenSP >= 0) {
                int32_t pi = pairIndex & ~1;

                while (parenSP >= 0 && parenStack[parenSP].pairIndex != pi) {
                    parenSP -= 1;
                }

                if (parenSP < startSP) {
                    startSP = parenSP;
                }

                if (parenSP >= 0) {
                    sc = parenStack[parenSP].scriptCode;
                }
            }
        }

        if (sameScript(scriptCode, sc)) {
            if (scriptCode <= USCRIPT_INHERITED && sc > USCRIPT_INHERITED) {
                scriptCode = sc;

                // now that we have a final script code, fix any open
                // characters we pushed before we knew the script code.
                while (startSP < parenSP) {
                    parenStack[++startSP].scriptCode = scriptCode;
                }
            }

            // if this character is a close paired character,
            // pop it from the stack
            if (pairIndex >= 0 && (pairIndex & 1) != 0 && parenSP >= 0) {
                parenSP -= 1;
                startSP -= 1;
            }
        } else {
            // if the run broke on a surrogate pair,
            // end it before the high surrogate
            if (ch >= 0x10000) {
                scriptEnd -= 1;
            }

            break;
        }
    }

    return true;
}

diff --git a/vcl/generic/glyphs/scrptrun.h b/vcl/generic/glyphs/scrptrun.h
new file mode 100644
index 0000000..bdea661
--- /dev/null
+++ b/vcl/generic/glyphs/scrptrun.h
@@ -0,0 +1,177 @@
/*
 *******************************************************************************
 *
 *   Copyright (c) 1995-2013 International Business Machines Corporation and others
 *
 *   All rights reserved.
 *
 *   Permission is hereby granted, free of charge, to any person obtaining a copy of
 *   this software and associated documentation files (the "Software"), to deal in
 *   the Software without restriction, including without limitation the rights to
 *   use, copy, modify, merge, publish, distribute, and/or sell copies of the
 *   Software, and to permit persons to whom the Software is furnished to do so,
 *   provided that the above copyright notice(s) and this permission notice appear
 *   in all copies of the Software and that both the above copyright notice(s) and
 *   this permission notice appear in supporting documentation.
 *
 *   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 *   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 *   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN
 *   NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE
 *   LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY
 *   DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 *   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
 *   CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 *
 *   Except as contained in this notice, the name of a copyright holder shall not be
 *   used in advertising or otherwise to promote the sale, use or other dealings in
 *   this Software without prior written authorization of the copyright holder.
 *
 *******************************************************************************
 *   file name:  scrptrun.h
 *
 *   created on: 10/17/2001
 *   created by: Eric R. Mader
 */

#ifndef __SCRPTRUN_H
#define __SCRPTRUN_H

#include "unicode/utypes.h"
#include "unicode/uobject.h"
#include "unicode/uscript.h"

struct ScriptRecord
{
    UChar32 startChar;
    UChar32 endChar;
    UScriptCode scriptCode;
};

struct ParenStackEntry
{
    int32_t pairIndex;
    UScriptCode scriptCode;
};

class ScriptRun : public UObject {
public:
    ScriptRun();

    ScriptRun(const UChar chars[], int32_t length);

    ScriptRun(const UChar chars[], int32_t start, int32_t length);

    void reset();

    void reset(int32_t start, int32_t count);

    void reset(const UChar chars[], int32_t start, int32_t length);

    int32_t getScriptStart();

    int32_t getScriptEnd();

    UScriptCode getScriptCode();

    UBool next();

    /**
     * ICU "poor man's RTTI", returns a UClassID for the actual class.
     *
     * @stable ICU 2.2
     */
    virtual inline UClassID getDynamicClassID() const { return getStaticClassID(); }

    /**
     * ICU "poor man's RTTI", returns a UClassID for this class.
     *
     * @stable ICU 2.2
     */
    static inline UClassID getStaticClassID() { return (UClassID)&fgClassID; }

private:

    static UBool sameScript(int32_t scriptOne, int32_t scriptTwo);

    int32_t charStart;
    int32_t charLimit;
    const UChar *charArray;

    int32_t scriptStart;
    int32_t scriptEnd;
    UScriptCode scriptCode;

    ParenStackEntry parenStack[128];
    int32_t parenSP;

    static int8_t highBit(int32_t value);
    static int32_t getPairIndex(UChar32 ch);

    static UChar32 pairedChars[];
    static const int32_t pairedCharCount;
    static const int32_t pairedCharPower;
    static const int32_t pairedCharExtra;

    /**
     * The address of this static class variable serves as this class's ID
     * for ICU "poor man's RTTI".
     */
    static const char fgClassID;
};

inline ScriptRun::ScriptRun()
{
    reset(NULL, 0, 0);
}

inline ScriptRun::ScriptRun(const UChar chars[], int32_t length)
{
    reset(chars, 0, length);
}

inline ScriptRun::ScriptRun(const UChar chars[], int32_t start, int32_t length)
{
    reset(chars, start, length);
}

inline int32_t ScriptRun::getScriptStart()
{
    return scriptStart;
}

inline int32_t ScriptRun::getScriptEnd()
{
    return scriptEnd;
}

inline UScriptCode ScriptRun::getScriptCode()
{
    return scriptCode;
}

inline void ScriptRun::reset()
{
    scriptStart = charStart;
    scriptEnd   = charStart;
    scriptCode  = USCRIPT_INVALID_CODE;
    parenSP     = -1;
}

inline void ScriptRun::reset(int32_t start, int32_t length)
{
    charStart = start;
    charLimit = start + length;

    reset();
}

inline void ScriptRun::reset(const UChar chars[], int32_t start, int32_t length)
{
    charArray = chars;

    reset(start, length);
}


#endif