Basic code syntaxhighlighting added to LibreOffice help

Change-Id: Id47172d0386e7aa28d82178f04b5f626f0c441fe
diff --git a/helpcompiler/Library_helplinker.mk b/helpcompiler/Library_helplinker.mk
index 6820c3b..0d9be0b 100644
--- a/helpcompiler/Library_helplinker.mk
+++ b/helpcompiler/Library_helplinker.mk
@@ -43,8 +43,16 @@ endif

$(eval $(call gb_Library_use_libraries,helplinker,\
    sal \
    svt \
    tl \
))

$(eval $(call gb_Library_use_internal_api,helplinker,\
    udkapi \
    offapi \
))


$(eval $(call gb_Library_use_externals,helplinker,\
    boost_headers \
    expat_utf8 \
@@ -58,6 +66,7 @@ $(eval $(call gb_Library_add_exception_objects,helplinker,\
    helpcompiler/source/LuceneHelper \
    helpcompiler/source/HelpIndexer \
    helpcompiler/source/HelpSearch \
    helpcompiler/source/BasCodeTagger \
))

ifeq ($(strip $(OS)$(CPU)$(COM)),MACOSXPGCC)
diff --git a/helpcompiler/Package_inc.mk b/helpcompiler/Package_inc.mk
index b0717b2..e532d95 100644
--- a/helpcompiler/Package_inc.mk
+++ b/helpcompiler/Package_inc.mk
@@ -15,5 +15,6 @@ $(eval $(call gb_Package_add_file,helpcompiler_inc,inc/helpcompiler/HelpCompiler
$(eval $(call gb_Package_add_file,helpcompiler_inc,inc/helpcompiler/HelpIndexer.hxx,inc/HelpIndexer.hxx))
$(eval $(call gb_Package_add_file,helpcompiler_inc,inc/helpcompiler/HelpLinker.hxx,inc/HelpLinker.hxx))
$(eval $(call gb_Package_add_file,helpcompiler_inc,inc/helpcompiler/HelpSearch.hxx,inc/HelpSearch.hxx))
$(eval $(call gb_Package_add_file,helpcompiler_inc,inc/helpcompiler/BasCodeTagger.hxx,inc/BasCodeTagger.hxx))

# vim: set noet sw=4 ts=4:
diff --git a/helpcompiler/inc/BasCodeTagger.hxx b/helpcompiler/inc/BasCodeTagger.hxx
new file mode 100644
index 0000000..3cf9261
--- /dev/null
+++ b/helpcompiler/inc/BasCodeTagger.hxx
@@ -0,0 +1,57 @@
#ifndef BASCODETAGGER_HXX
#define BASCODETAGGER_HXX

#include <iostream>
#include <cstdlib>
#include <string>
#include <list>
#include <libxml/xmlmemory.h>
#include <libxml/parser.h>
#include <rtl/ustring.hxx>
#include <svtools/syntaxhighlight.hxx>
#include <helpcompiler/dllapi.h>

class BasicCodeTagger;
class LibXmlTreeWalker;

//!Tagger class.
class L10N_DLLPUBLIC BasicCodeTagger
{
  private:
    xmlDocPtr             m_pDocument;
    std::list<xmlNodePtr> m_BasicCodeContainerTags;
    LibXmlTreeWalker   *m_pXmlTreeWalker;
    std::list<std::string>  m_BasicCodeStringList;
    SyntaxHighlighter     m_Highlighter;
    bool m_bTaggingCompleted;
    void tagParagraph( xmlNodePtr paragraph );
    xmlChar* getTypeString( TokenTypes tokenType );
    void getBasicCodeContainerNodes();
    void tagBasCodeParagraphs();

  public:
    enum TaggerException { FILE_WRITING, NULL_DOCUMENT, EMPTY_DOCUMENT };
    BasicCodeTagger( xmlDocPtr rootDoc );
    ~BasicCodeTagger();
    void tagBasicCodes();
    void saveTreeToFile( const std::string& filePath, const std::string& encoding );
};

//================LibXmlTreeWalker===========================================================

class L10N_DLLPUBLIC LibXmlTreeWalker
{
  private:
    xmlNodePtr            m_pCurrentNode;
    std::list<xmlNodePtr> m_Queue; //!Queue for breath-first search

  public:
    LibXmlTreeWalker( xmlDocPtr doc );
    ~LibXmlTreeWalker() {}
    void nextNode();
    xmlNodePtr currentNode();
    bool end();
    void ignoreCurrNodesChildren();
};

#endif
diff --git a/helpcompiler/inc/HelpCompiler.hxx b/helpcompiler/inc/HelpCompiler.hxx
index f0a4177..034a629 100644
--- a/helpcompiler/inc/HelpCompiler.hxx
+++ b/helpcompiler/inc/HelpCompiler.hxx
@@ -71,7 +71,6 @@ namespace fs
        {
            rtl::OUString sWorkingDir;
            osl_getProcessWorkingDir(&sWorkingDir.pData);

            rtl::OString tmp(in.c_str());
            rtl::OUString ustrSystemPath(rtl::OStringToOUString(tmp, getThreadTextEncoding()));
            osl::File::getFileURLFromSystemPath(ustrSystemPath, data);
@@ -230,6 +229,7 @@ public:
    HelpCompiler(StreamTable &streamTable,
                const fs::path &in_inputFile,
                const fs::path &in_src,
                const fs::path &in_zipdir,
                const fs::path &in_resEmbStylesheet,
                const std::string &in_module,
                const std::string &in_lang,
@@ -245,9 +245,10 @@ public:
                const std::string &entryName, const Hashtable &bytesToAdd);
private:
    xmlDocPtr getSourceDocument(const fs::path &filePath);
    void sourceDocumentPreWorks( xmlDocPtr doc , const fs::path &filePath);
    xmlNodePtr clone(xmlNodePtr node, const std::string& appl);
    StreamTable &streamTable;
    const fs::path inputFile, src;
    const fs::path inputFile, src, zipdir;
    const std::string module, lang;
    const fs::path resEmbStylesheet;
    bool bExtensionMode;
@@ -260,5 +261,4 @@ inline char tocharlower(char c)
}

#endif

/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/helpcompiler/source/BasCodeTagger.cxx b/helpcompiler/source/BasCodeTagger.cxx
new file mode 100644
index 0000000..858db82
--- /dev/null
+++ b/helpcompiler/source/BasCodeTagger.cxx
@@ -0,0 +1,251 @@
#include <helpcompiler/BasCodeTagger.hxx>

LibXmlTreeWalker::LibXmlTreeWalker( xmlDocPtr doc )
{
    if ( doc == NULL )
        throw BasicCodeTagger::NULL_DOCUMENT;
    m_pCurrentNode = xmlDocGetRootElement( doc );
    if ( m_pCurrentNode == NULL )
        throw BasicCodeTagger::EMPTY_DOCUMENT;
    else if ( m_pCurrentNode->xmlChildrenNode != NULL )
        m_Queue.push_back( m_pCurrentNode->xmlChildrenNode );
    nextNode();
}

void LibXmlTreeWalker::nextNode()
{

      //next node
    if ( m_pCurrentNode->next == NULL )
    {
        m_pCurrentNode = m_Queue.front();
        m_Queue.pop_front();
    }
    else
        m_pCurrentNode = m_pCurrentNode->next;
    //queue chiledren if they exist
    if ( m_pCurrentNode->xmlChildrenNode != NULL )
        m_Queue.push_back( m_pCurrentNode->xmlChildrenNode );
}

void LibXmlTreeWalker::ignoreCurrNodesChildren()
{
    if ( m_pCurrentNode->xmlChildrenNode != NULL )
          m_Queue.pop_back();
}

bool LibXmlTreeWalker::end()
{
    return m_pCurrentNode->next == NULL && m_Queue.empty();
}

xmlNodePtr LibXmlTreeWalker::currentNode()
{
    return m_pCurrentNode;
}

//======================================================

BasicCodeTagger::BasicCodeTagger( xmlDocPtr rootDoc )
{
      if ( rootDoc == NULL )
          throw NULL_DOCUMENT;
      m_pDocument = rootDoc;
    m_pXmlTreeWalker = NULL;
    m_Highlighter.initialize( HIGHLIGHT_BASIC );
    m_bTaggingCompleted = false;

}

BasicCodeTagger::~BasicCodeTagger()
{
    if ( m_pXmlTreeWalker != NULL )
      delete m_pXmlTreeWalker;
}
//!Gathers all the <bascode> tag nodes from xml tree.
/*!
 *    Assumes m_pDocument is valid. Handles m_pXmlTreeWalker and m_BasicCodeContainerTags members.
 */
void BasicCodeTagger::getBasicCodeContainerNodes()
{
      xmlNodePtr currentNode;

    m_BasicCodeContainerTags.clear();

    if ( m_pXmlTreeWalker != NULL )
      delete m_pXmlTreeWalker;
    m_pXmlTreeWalker = new LibXmlTreeWalker( m_pDocument );

    currentNode = m_pXmlTreeWalker->currentNode();
    if ( !( xmlStrcmp( currentNode->name, (const xmlChar*) "bascode" ) ) )
    { //Found <bascode>
        m_BasicCodeContainerTags.push_back( currentNode ); //it goes to the end of the list
    }
    while ( !m_pXmlTreeWalker->end() )
    {
          m_pXmlTreeWalker->nextNode();
        if ( !( xmlStrcmp( m_pXmlTreeWalker->currentNode()->name, (const xmlChar*) "bascode" ) ) )
        { //Found <bascode>
            m_BasicCodeContainerTags.push_back( m_pXmlTreeWalker->currentNode() ); //it goes to the end of the list
            m_pXmlTreeWalker->ignoreCurrNodesChildren();
        }
    }
}

//! Extracts Basic Codes containted in <bascode> tags.
/*!
 *  For each <bascode> this method iterates trough it's <paragraph> tags and "inserts" <item> tags according
 *  to the Basic code syntax found in that paragraph.
 */
void BasicCodeTagger::tagBasCodeParagraphs()
{
    //helper variables
    xmlNodePtr currBascodeNode;
    xmlNodePtr currParagraph;
    while ( !m_BasicCodeContainerTags.empty() )
    {
        currBascodeNode = m_BasicCodeContainerTags.front();
        currParagraph = currBascodeNode->xmlChildrenNode; //first <paragraph>
        while ( currParagraph != NULL )
        {
            tagParagraph( currParagraph );
            currParagraph=currParagraph->next;
        }
        m_BasicCodeContainerTags.pop_front(); //next element
    }
}

//! Used by tagBasCodeParagraphs(). It does the work on the current paragraph containing Basic code.
void BasicCodeTagger::tagParagraph( xmlNodePtr paragraph )
{
    //1. get paragraph text
    xmlChar* codeSnippet;
    codeSnippet = xmlNodeListGetString( m_pDocument, paragraph->xmlChildrenNode, 1 );
    if ( codeSnippet == NULL )
    {
        return; //no text, nothing more to do here
    }
    //2. delete every child from paragraph (except attributes)
    xmlNodePtr curNode = paragraph->xmlChildrenNode;
    xmlNodePtr sibling;
    while ( curNode != NULL )
    {
        sibling = curNode->next;
        xmlUnlinkNode( curNode );
        xmlFreeNode( curNode );
        curNode = sibling;
    }

    //3. create new paragraph content
    String strLine(
                    OUString(
                                reinterpret_cast<const sal_Char*>(codeSnippet),
                                strlen(
                                    reinterpret_cast<const char*>(codeSnippet)
                                ),
                                RTL_TEXTENCODING_UTF8
                                )
                     ) ;
    m_Highlighter.notifyChange ( 0, 0, &strLine, 1 );
    HighlightPortions portions;
    m_Highlighter.getHighlightPortions( 0, strLine, portions );
    xmlChar* subStr;
    xmlChar* typeStr;
    xmlNodePtr text;
    for ( size_t i=0; i<portions.size(); i++ )
    {
        HighlightPortion& r = portions[i];
        subStr = xmlStrsub( codeSnippet, r.nBegin, r.nEnd-r.nBegin );
        text = xmlNewText( subStr );
        if ( r.tokenType != TT_WHITESPACE )
        {
            typeStr = getTypeString( r.tokenType );
            curNode = xmlNewTextChild( paragraph, 0, (xmlChar*)"item", 0 );
            xmlNewProp( curNode, (xmlChar*)"type", typeStr );
            xmlAddChild( curNode, text );
            xmlFree( typeStr );
        }
        else
            xmlAddChild( paragraph, text );
        xmlFree( subStr );
    }
    xmlFree( codeSnippet );
}

//! Manages tagging process.
/*!
 *    This is the "main" function of BasicCodeTagger.
 */
void BasicCodeTagger::tagBasicCodes()
{
      if ( m_bTaggingCompleted )
        return;
    //gather <bascode> nodes
    try
    {
        getBasicCodeContainerNodes();
    }
    catch (TaggerException ex)
    {
          std::cout << "Some kind of error occured." << std::endl;
    }

    //tag basic code paragraphs in <bascode> tag
    tagBasCodeParagraphs();
    m_bTaggingCompleted = true;
}

//! Converts SyntaxHighlighter's TokenTypes enum to a type string for <item type=... >
xmlChar* BasicCodeTagger::getTypeString( TokenTypes tokenType )
{
    const char* str;
    switch ( tokenType )
    {
        case TT_UNKNOWN :
            str = "unknown";
            break;
        case TT_IDENTIFIER :
            str = "identifier";
            break;
        case TT_WHITESPACE :
            str = "whitespace";
            break;
        case TT_NUMBER :
            str = "number";
            break;
        case TT_STRING :
            str = "string";
            break;
        case TT_EOL :
            str = "eol";
            break;
        case TT_COMMENT :
            str = "comment";
            break;
        case TT_ERROR :
            str = "error";
            break;
        case TT_OPERATOR :
            str = "operator";
            break;
        case TT_KEYWORDS :
            str = "keywords";
            break;
        case TT_PARAMETER :
            str = "parameter";
            break;
        default :
            str = "unknown";
            break;
    }
    return xmlCharStrdup( str );
}

//! Saves the current xml DOM to file with the provided libxml2 encoding string in an unformatted way.
void BasicCodeTagger::saveTreeToFile( const std::string& filePath, const std::string& encoding )
{
    //saveDocument
    int ret = xmlSaveFormatFileEnc( filePath.c_str(), m_pDocument, encoding.c_str(), 0 );
    if ( ret == -1 )
        throw FILE_WRITING;
}
diff --git a/helpcompiler/source/HelpCompiler.cxx b/helpcompiler/source/HelpCompiler.cxx
index 74b29f5..8c6f66b 100644
--- a/helpcompiler/source/HelpCompiler.cxx
+++ b/helpcompiler/source/HelpCompiler.cxx
@@ -19,6 +19,7 @@


#include <helpcompiler/HelpCompiler.hxx>
#include <helpcompiler/BasCodeTagger.hxx>
#include <limits.h>
#include <stdlib.h>
#include <string.h>
@@ -36,25 +37,55 @@ static void impl_sleep( sal_uInt32 nSec )

    osl::Thread::wait( aTime );
}

HelpCompiler::HelpCompiler(StreamTable &in_streamTable, const fs::path &in_inputFile,
    const fs::path &in_src, const fs::path &in_resEmbStylesheet,
    const fs::path &in_src, const fs::path &in_zipdir, const fs::path &in_resEmbStylesheet,
    const std::string &in_module, const std::string &in_lang, bool in_bExtensionMode)
    : streamTable(in_streamTable), inputFile(in_inputFile),
    src(in_src), module(in_module), lang(in_lang), resEmbStylesheet(in_resEmbStylesheet),
    src(in_src), zipdir(in_zipdir), module(in_module), lang(in_lang), resEmbStylesheet(in_resEmbStylesheet),
    bExtensionMode( in_bExtensionMode )
{
    xmlKeepBlanksDefaultValue = 0;
    char* guitmp = getenv("GUI");
    if (guitmp)
    {
        // WTF?
        gui = (strcmp(guitmp, "UNX") ? gui : "UNIX");
        gui = (strcmp(guitmp, "MAC") ? gui : "MAC");
        gui = (strcmp(guitmp, "WNT") ? gui : "WIN");
    }
}

void HelpCompiler::sourceDocumentPreWorks( xmlDocPtr doc, const fs::path &filePath )
{
    if ( doc )
    {
        if ( module == "sbasic" )
        {
            try
            {
                BasicCodeTagger bct( doc );
                bct.tagBasicCodes();
            }
            catch ( BasicCodeTagger::TaggerException ex )
            {
                if ( ex != BasicCodeTagger::EMPTY_DOCUMENT )
                    throw;
            }
            //save document in ziptmp<modul>_<lang>/text directory
            //1. construct new path
            const std::string& pth = filePath.native_file_string();
            std::string sourceNativeXhpPath = pth.substr( pth.rfind( lang+"/text/" ) ).substr( lang.length() );
            std::string xhpFileName = sourceNativeXhpPath.substr( sourceNativeXhpPath.rfind( '/' ) + 1 );
            sourceNativeXhpPath = sourceNativeXhpPath.substr( 0, sourceNativeXhpPath.rfind( '/' ) );
            //2. save xml doc with the new path
            //  -create directory hierachy
            fs::create_directory( fs::path( zipdir.native_file_string() + sourceNativeXhpPath, fs::native ) );
            //  -save document
            if ( -1 == xmlSaveFormatFileEnc( (zipdir.native_file_string() + sourceNativeXhpPath + '/' + xhpFileName).c_str(), doc, "utf-8", 0 ) )
                throw BasicCodeTagger::FILE_WRITING;
        }
    }
}

xmlDocPtr HelpCompiler::getSourceDocument(const fs::path &filePath)
{
    static const char *params[4 + 1];
@@ -68,6 +99,7 @@ xmlDocPtr HelpCompiler::getSourceDocument(const fs::path &filePath)
            impl_sleep( 3 );
            res = xmlParseFile(filePath.native_file_string().c_str());
        }
        sourceDocumentPreWorks( res, filePath );
    }
    else
    {
@@ -93,7 +125,7 @@ xmlDocPtr HelpCompiler::getSourceDocument(const fs::path &filePath)
            impl_sleep( 3 );
            doc = xmlParseFile(filePath.native_file_string().c_str());
        }

        sourceDocumentPreWorks( doc, filePath );
        //???res = xmlParseFile(filePath.native_file_string().c_str());

        res = xsltApplyStylesheet(cur, doc, params);
diff --git a/helpcompiler/source/HelpLinker.cxx b/helpcompiler/source/HelpLinker.cxx
index 62d3645..4bbe2b6 100644
--- a/helpcompiler/source/HelpLinker.cxx
+++ b/helpcompiler/source/HelpLinker.cxx
@@ -377,7 +377,7 @@ void HelpLinker::link() throw( HelpProcessingException )
            xhpFile = fs::path(xhpFileName, fs::native);
        }

        HelpCompiler hc( streamTable, xhpFile, langsourceRoot,
        HelpCompiler hc( streamTable, xhpFile, langsourceRoot, zipdir,
            embeddStylesheet, module, lang, bExtensionMode );

        HCDBG(std::cerr << "before compile of " << xhpFileName << std::endl);