summaryrefslogtreecommitdiff
path: root/comphelper
diff options
context:
space:
mode:
authorAndras Timar <atimar@suse.com>2013-02-15 13:02:10 +0100
committerAndras Timar <atimar@suse.com>2013-02-16 12:55:03 +0100
commitc16e9f4ed97f65357e9986f46ad88ee9f2237997 (patch)
tree9da5c0056d4aad772a72d57f7bbf07d24ec2478a /comphelper
parentc4aa2c4d7eb1cef0f3b172d1dbc4e51e9b379b80 (diff)
Move SyntaxHighlighter class from svtools to comphelper
We use this class in helpcompiler, and it is not desirable to compile svtools (thus half of LibreOffice) for a build tool in cross-compiling environment. Change-Id: I5e6bc3e576af41eb03c1420dd347c542306f69fa
Diffstat (limited to 'comphelper')
-rw-r--r--comphelper/Library_comphelper.mk7
-rw-r--r--comphelper/Package_inc.mk1
-rw-r--r--comphelper/inc/comphelper/syntaxhighlight.hxx165
-rw-r--r--comphelper/source/misc/syntaxhighlight.cxx739
4 files changed, 911 insertions, 1 deletions
diff --git a/comphelper/Library_comphelper.mk b/comphelper/Library_comphelper.mk
index 5c7fcd5a8c9a..93b0f3b48499 100644
--- a/comphelper/Library_comphelper.mk
+++ b/comphelper/Library_comphelper.mk
@@ -35,7 +35,11 @@ $(eval $(call gb_Library_add_defs,comphelper,\
-DCOMPHELPER_DLLIMPLEMENTATION \
))
-$(eval $(call gb_Library_use_external,comphelper,boost_headers))
+$(eval $(call gb_Library_use_externals,comphelper,\
+ boost_headers \
+ icuuc \
+ icu_headers \
+))
$(eval $(call gb_Library_use_libraries,comphelper,\
cppu \
@@ -108,6 +112,7 @@ $(eval $(call gb_Library_add_exception_objects,comphelper,\
comphelper/source/misc/storagehelper \
comphelper/source/misc/string \
comphelper/source/misc/synchronousdispatch \
+ comphelper/source/misc/syntaxhighlight \
comphelper/source/misc/types \
comphelper/source/misc/weak \
comphelper/source/misc/weakeventlistener \
diff --git a/comphelper/Package_inc.mk b/comphelper/Package_inc.mk
index 195a5c97c4a7..35552f6d58fc 100644
--- a/comphelper/Package_inc.mk
+++ b/comphelper/Package_inc.mk
@@ -124,5 +124,6 @@ $(eval $(call gb_Package_add_file,comphelper_inc,inc/comphelper/sequenceashashma
$(eval $(call gb_Package_add_file,comphelper_inc,inc/comphelper/configuration.hxx,comphelper/configuration.hxx))
$(eval $(call gb_Package_add_file,comphelper_inc,inc/comphelper/configurationhelper.hxx,comphelper/configurationhelper.hxx))
$(eval $(call gb_Package_add_file,comphelper_inc,inc/comphelper/newarray.hxx,comphelper/newarray.hxx))
+$(eval $(call gb_Package_add_file,comphelper_inc,inc/comphelper/syntaxhighlight.hxx,comphelper/syntaxhighlight.hxx))
# vim: set noet sw=4 ts=4:
diff --git a/comphelper/inc/comphelper/syntaxhighlight.hxx b/comphelper/inc/comphelper/syntaxhighlight.hxx
new file mode 100644
index 000000000000..11a57db6ff05
--- /dev/null
+++ b/comphelper/inc/comphelper/syntaxhighlight.hxx
@@ -0,0 +1,165 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+#ifndef _COMPHELPER_SYNTAXHIGHLIGHT_HXX
+#define _COMPHELPER_SYNTAXHIGHLIGHT_HXX
+
+#include <vector>
+#include <rtl/ustring.hxx>
+
+#include <comphelper/comphelperdllapi.h>
+
+
+#if defined CDECL
+#undef CDECL
+#endif
+
+// for the bsearch
+#ifdef WNT
+#define CDECL _cdecl
+#endif
+#if defined(UNX)
+#define CDECL
+#endif
+#ifdef UNX
+#include <sys/resource.h>
+#endif
+
+
+// Token-Typen TT_...
+enum TokenTypes
+{
+ TT_UNKNOWN,
+ TT_IDENTIFIER,
+ TT_WHITESPACE,
+ TT_NUMBER,
+ TT_STRING,
+ TT_EOL,
+ TT_COMMENT,
+ TT_ERROR,
+ TT_OPERATOR,
+ TT_KEYWORDS,
+ TT_PARAMETER
+};
+
+struct HighlightPortion { sal_uInt16 nBegin; sal_uInt16 nEnd; TokenTypes tokenType; };
+
+
+typedef std::vector<HighlightPortion> HighlightPortions;
+
+/////////////////////////////////////////////////////////////////////////
+// Hilfsklasse zur Untersuchung von JavaScript-Modulen, zunaechst zum
+// Heraussuchen der Funktionen, spaeter auch zum Syntax-Highlighting verwenden
+
+// Flags fuer Zeichen-Eigenschaften
+#define CHAR_START_IDENTIFIER 0x0001
+#define CHAR_IN_IDENTIFIER 0x0002
+#define CHAR_START_NUMBER 0x0004
+#define CHAR_IN_NUMBER 0x0008
+#define CHAR_IN_HEX_NUMBER 0x0010
+#define CHAR_IN_OCT_NUMBER 0x0020
+#define CHAR_START_STRING 0x0040
+#define CHAR_OPERATOR 0x0080
+#define CHAR_SPACE 0x0100
+#define CHAR_EOL 0x0200
+
+#define CHAR_EOF 0x00
+
+
+// Sprachmodus des HighLighters (spaeter eventuell feiner
+// differenzieren mit Keyword-Liste, C-Kommentar-Flag)
+enum HighlighterLanguage
+{
+ HIGHLIGHT_BASIC,
+ HIGHLIGHT_SQL
+};
+
+class SimpleTokenizer_Impl
+{
+ HighlighterLanguage aLanguage;
+ // Zeichen-Info-Tabelle
+ sal_uInt16 aCharTypeTab[256];
+
+ const sal_Unicode* mpStringBegin;
+ const sal_Unicode* mpActualPos;
+
+ // Zeile und Spalte
+ sal_uInt32 nLine;
+ sal_uInt32 nCol;
+
+ sal_Unicode peekChar( void ) { return *mpActualPos; }
+ sal_Unicode getChar( void ) { nCol++; return *mpActualPos++; }
+
+ // Hilfsfunktion: Zeichen-Flag Testen
+ sal_Bool testCharFlags( sal_Unicode c, sal_uInt16 nTestFlags );
+
+ // Neues Token holen, Leerstring == nix mehr da
+ sal_Bool getNextToken( /*out*/TokenTypes& reType,
+ /*out*/const sal_Unicode*& rpStartPos, /*out*/const sal_Unicode*& rpEndPos );
+
+ const char** ppListKeyWords;
+ sal_uInt16 nKeyWordCount;
+
+public:
+ SimpleTokenizer_Impl( HighlighterLanguage aLang = HIGHLIGHT_BASIC );
+ ~SimpleTokenizer_Impl( void );
+
+ sal_uInt16 parseLine( sal_uInt32 nLine, const OUString* aSource );
+ void getHighlightPortions( sal_uInt32 nParseLine, const OUString& rLine,
+ /*out*/HighlightPortions& portions );
+ void setKeyWords( const char** ppKeyWords, sal_uInt16 nCount );
+};
+
+
+//*** SyntaxHighlighter-Klasse ***
+// Konzept: Der Highlighter wird ueber alle Aenderungen im Source
+// informiert (notifyChange) und liefert dem Aufrufer jeweils die
+// Information zurueck, welcher Zeilen-Bereich des Source-Codes
+// aufgrund dieser Aenderung neu gehighlighted werden muss.
+// Dazu merkt sich Highlighter intern fuer jede Zeile, ob dort
+// C-Kommentare beginnen oder enden.
+class COMPHELPER_DLLPUBLIC SyntaxHighlighter
+{
+ HighlighterLanguage eLanguage;
+ SimpleTokenizer_Impl* m_pSimpleTokenizer;
+ char* m_pKeyWords;
+ sal_uInt16 m_nKeyWordCount;
+
+// void initializeKeyWords( HighlighterLanguage eLanguage );
+
+public:
+ SyntaxHighlighter( void );
+ ~SyntaxHighlighter( void );
+
+ // HighLighter (neu) initialisieren, die Zeilen-Tabelle wird
+ // dabei komplett geloescht, d.h. im Abschluss wird von einem
+ // leeren Source ausgegangen. In notifyChange() kann dann
+ // nur Zeile 0 angegeben werden.
+ void initialize( HighlighterLanguage eLanguage_ );
+
+ void notifyChange( sal_uInt32 nLine, sal_Int32 nLineCountDifference,
+ const OUString* pChangedLines, sal_uInt32 nArrayLength);
+
+ void getHighlightPortions( sal_uInt32 nLine, const OUString& rLine,
+ HighlightPortions& pPortions );
+
+ HighlighterLanguage GetLanguage() { return eLanguage;}
+};
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/comphelper/source/misc/syntaxhighlight.cxx b/comphelper/source/misc/syntaxhighlight.cxx
new file mode 100644
index 000000000000..2064c0c76a11
--- /dev/null
+++ b/comphelper/source/misc/syntaxhighlight.cxx
@@ -0,0 +1,739 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+
+#include <unicode/uchar.h>
+#include <comphelper/syntaxhighlight.hxx>
+#include <comphelper/string.hxx>
+
+// ##########################################################################
+// ATTENTION: all these words need to be in lower case
+// ##########################################################################
+static const char* strListBasicKeyWords[] = {
+ "access",
+ "alias",
+ "and",
+ "any",
+ "append",
+ "as",
+ "attribute",
+ "base",
+ "binary",
+ "boolean",
+ "byref",
+ "byte",
+ "byval",
+ "call",
+ "case",
+ "cdecl",
+ "classmodule",
+ "close",
+ "compare",
+ "compatible",
+ "const",
+ "currency",
+ "date",
+ "declare",
+ "defbool",
+ "defcur",
+ "defdate",
+ "defdbl",
+ "deferr",
+ "defint",
+ "deflng",
+ "defobj",
+ "defsng",
+ "defstr",
+ "defvar",
+ "dim",
+ "do",
+ "double",
+ "each",
+ "else",
+ "elseif",
+ "end",
+ "end enum",
+ "end function",
+ "end if",
+ "end property",
+ "end select",
+ "end sub",
+ "end type",
+ "endif",
+ "enum",
+ "eqv",
+ "erase",
+ "error",
+ "exit",
+ "explicit",
+ "for",
+ "function",
+ "get",
+ "global",
+ "gosub",
+ "goto",
+ "if",
+ "imp",
+ "implements",
+ "in",
+ "input",
+ "integer",
+ "is",
+ "let",
+ "lib",
+ "like",
+ "line",
+ "line input",
+ "local",
+ "lock",
+ "long",
+ "loop",
+ "lprint",
+ "lset",
+ "mod",
+ "name",
+ "new",
+ "next",
+ "not",
+ "object",
+ "on",
+ "open",
+ "option",
+ "optional",
+ "or",
+ "output",
+ "paramarray",
+ "preserve",
+ "print",
+ "private",
+ "property",
+ "public",
+ "random",
+ "read",
+ "redim",
+ "rem",
+ "resume",
+ "return",
+ "rset",
+ "select",
+ "set",
+ "shared",
+ "single",
+ "static",
+ "step",
+ "stop",
+ "string",
+ "sub",
+ "system",
+ "text",
+ "then",
+ "to",
+ "type",
+ "typeof",
+ "until",
+ "variant",
+ "vbasupport",
+ "wend",
+ "while",
+ "with",
+ "withevents",
+ "write",
+ "xor"
+};
+
+
+static const char* strListSqlKeyWords[] = {
+ "all",
+ "and",
+ "any",
+ "as",
+ "asc",
+ "avg",
+ "between",
+ "by",
+ "cast",
+ "corresponding",
+ "count",
+ "create",
+ "cross",
+ "delete",
+ "desc",
+ "distinct",
+ "drop",
+ "escape",
+ "except",
+ "exists",
+ "false",
+ "from",
+ "full",
+ "global",
+ "group",
+ "having",
+ "in",
+ "inner",
+ "insert",
+ "intersect",
+ "into",
+ "is",
+ "join",
+ "left",
+ "like",
+ "local",
+ "match",
+ "max",
+ "min",
+ "natural",
+ "not",
+ "null",
+ "on",
+ "or",
+ "order",
+ "outer",
+ "right",
+ "select",
+ "set",
+ "some",
+ "sum",
+ "table",
+ "temporary",
+ "true",
+ "union",
+ "unique",
+ "unknown",
+ "update",
+ "using",
+ "values",
+ "where"
+};
+
+
+extern "C" int CDECL compare_strings( const void *arg1, const void *arg2 )
+{
+ return strcmp( (char *)arg1, *(char **)arg2 );
+}
+
+
+namespace
+{
+ static bool isAlpha(sal_Unicode c)
+ {
+ if (comphelper::string::isalphaAscii(c))
+ return true;
+ return u_isalpha(c);
+ }
+}
+
+// Helper function: test character flag
+sal_Bool SimpleTokenizer_Impl::testCharFlags( sal_Unicode c, sal_uInt16 nTestFlags )
+{
+ bool bRet = false;
+ if( c != 0 && c <= 255 )
+ {
+ bRet = ( (aCharTypeTab[c] & nTestFlags) != 0 );
+ }
+ else if( c > 255 )
+ {
+ bRet = (( CHAR_START_IDENTIFIER | CHAR_IN_IDENTIFIER ) & nTestFlags) != 0
+ ? isAlpha(c) : false;
+ }
+ return bRet;
+}
+
+void SimpleTokenizer_Impl::setKeyWords( const char** ppKeyWords, sal_uInt16 nCount )
+{
+ ppListKeyWords = ppKeyWords;
+ nKeyWordCount = nCount;
+}
+
+sal_Bool SimpleTokenizer_Impl::getNextToken( /*out*/TokenTypes& reType,
+ /*out*/const sal_Unicode*& rpStartPos, /*out*/const sal_Unicode*& rpEndPos )
+{
+ reType = TT_UNKNOWN;
+
+ rpStartPos = mpActualPos;
+
+ sal_Unicode c = peekChar();
+ if( c == CHAR_EOF )
+ return sal_False;
+
+ getChar();
+
+ //*** Go through all possibilities ***
+ // Space?
+ if ( (testCharFlags( c, CHAR_SPACE ) == sal_True) )
+ {
+ while( testCharFlags( peekChar(), CHAR_SPACE ) == sal_True )
+ getChar();
+
+ reType = TT_WHITESPACE;
+ }
+
+ // Identifier?
+ else if ( (testCharFlags( c, CHAR_START_IDENTIFIER ) == sal_True) )
+ {
+ sal_Bool bIdentifierChar;
+ do
+ {
+ // Naechstes Zeichen holen
+ c = peekChar();
+ bIdentifierChar = testCharFlags( c, CHAR_IN_IDENTIFIER );
+ if( bIdentifierChar )
+ getChar();
+ }
+ while( bIdentifierChar );
+
+ reType = TT_IDENTIFIER;
+
+ // Keyword table
+ if (ppListKeyWords != NULL)
+ {
+ int nCount = mpActualPos - rpStartPos;
+
+ // No keyword if string contains char > 255
+ bool bCanBeKeyword = true;
+ for( int i = 0 ; i < nCount ; i++ )
+ {
+ if( rpStartPos[i] > 255 )
+ {
+ bCanBeKeyword = false;
+ break;
+ }
+ }
+
+ if( bCanBeKeyword )
+ {
+ OUString aKWString(rpStartPos, nCount);
+ OString aByteStr = OUStringToOString(aKWString,
+ RTL_TEXTENCODING_ASCII_US).toAsciiLowerCase();
+ if ( bsearch( aByteStr.getStr(), ppListKeyWords, nKeyWordCount, sizeof( char* ),
+ compare_strings ) )
+ {
+ reType = TT_KEYWORDS;
+
+ if (aByteStr.equalsL(RTL_CONSTASCII_STRINGPARAM("rem")))
+ {
+ // Remove all characters until end of line or EOF
+ sal_Unicode cPeek = peekChar();
+ while( cPeek != CHAR_EOF && testCharFlags( cPeek, CHAR_EOL ) == sal_False )
+ {
+ c = getChar();
+ cPeek = peekChar();
+ }
+
+ reType = TT_COMMENT;
+ }
+ }
+ }
+ }
+ }
+
+ // Operator?
+ // only for BASIC '\'' should be a comment, otherwise it is a normal string and handled there
+ else if ( ( testCharFlags( c, CHAR_OPERATOR ) == sal_True ) || ( (c == '\'') && (aLanguage==HIGHLIGHT_BASIC)) )
+ {
+ // parameters for SQL view
+ if ( (c==':') || (c=='?'))
+ {
+ if (c!='?')
+ {
+ sal_Bool bIdentifierChar;
+ do
+ {
+ // Get next character
+ c = peekChar();
+ bIdentifierChar = isAlpha(c);
+ if( bIdentifierChar )
+ getChar();
+ }
+ while( bIdentifierChar );
+ }
+ reType = TT_PARAMETER;
+ }
+ else if (c=='-')
+ {
+ sal_Unicode cPeekNext = peekChar();
+ if (cPeekNext=='-')
+ {
+ // Remove all characters until end of line or EOF
+ while( cPeekNext != CHAR_EOF && testCharFlags( cPeekNext, CHAR_EOL ) == sal_False )
+ {
+ getChar();
+ cPeekNext = peekChar();
+ }
+ reType = TT_COMMENT;
+ }
+ }
+ else if (c=='/')
+ {
+ sal_Unicode cPeekNext = peekChar();
+ if (cPeekNext=='/')
+ {
+ // Remove all characters until end of line or EOF
+ while( cPeekNext != CHAR_EOF && testCharFlags( cPeekNext, CHAR_EOL ) == sal_False )
+ {
+ getChar();
+ cPeekNext = peekChar();
+ }
+ reType = TT_COMMENT;
+ }
+ }
+ else
+ {
+ // Comment?
+ if ( c == '\'' )
+ {
+ c = getChar();
+
+ // Remove all characters until end of line or EOF
+ sal_Unicode cPeek = c;
+ while( cPeek != CHAR_EOF && testCharFlags( cPeek, CHAR_EOL ) == sal_False )
+ {
+ getChar();
+ cPeek = peekChar();
+ }
+
+ reType = TT_COMMENT;
+ }
+
+ // Echter Operator, kann hier einfach behandelt werden,
+ // da nicht der wirkliche Operator, wie z.B. += interessiert,
+ // sondern nur die Tatsache, dass es sich um einen handelt.
+ if( reType != TT_COMMENT )
+ {
+ reType = TT_OPERATOR;
+ }
+
+ }
+ }
+
+ // Objekt-Trenner? Muss vor Number abgehandelt werden
+ else if( c == '.' && ( peekChar() < '0' || peekChar() > '9' ) )
+ {
+ reType = TT_OPERATOR;
+ }
+
+ // Number?
+ else if( testCharFlags( c, CHAR_START_NUMBER ) == sal_True )
+ {
+ reType = TT_NUMBER;
+
+ // Number system, 10 = normal, it is changed for Oct/Hex
+ int nRadix = 10;
+
+ // Is it an Oct or a Hex number?
+ if( c == '&' )
+ {
+ // Octal?
+ if( peekChar() == 'o' || peekChar() == 'O' )
+ {
+ // remove o
+ getChar();
+ nRadix = 8; // Octal base
+
+ // Read all numbers
+ while( testCharFlags( peekChar(), CHAR_IN_OCT_NUMBER ) )
+ c = getChar();
+ }
+ // Hexadecimal?
+ else if( peekChar() == 'h' || peekChar() == 'H' )
+ {
+ // remove x
+ getChar();
+ nRadix = 16; // Hexadecimal base
+
+ // Read all numbers
+ while( testCharFlags( peekChar(), CHAR_IN_HEX_NUMBER ) )
+ c = getChar();
+ }
+ else
+ {
+ reType = TT_OPERATOR;
+ }
+ }
+
+ // When it is not Oct or Hex, then it is double
+ if( reType == TT_NUMBER && nRadix == 10 )
+ {
+ // Flag if the last character is an exponent
+ sal_Bool bAfterExpChar = sal_False;
+
+ // Read all numbers
+ while( testCharFlags( peekChar(), CHAR_IN_NUMBER ) ||
+ (bAfterExpChar && peekChar() == '+' ) ||
+ (bAfterExpChar && peekChar() == '-' ) )
+ // After exponent +/- are OK, too
+ {
+ c = getChar();
+ bAfterExpChar = ( c == 'e' || c == 'E' );
+ }
+ }
+ }
+
+ // String?
+ else if( testCharFlags( c, CHAR_START_STRING ) == sal_True )
+ {
+ // Remember which character has opened the string
+ sal_Unicode cEndString = c;
+ if( c == '[' )
+ cEndString = ']';
+
+ // Read all characters
+ while( peekChar() != cEndString )
+ {
+ // Detect EOF before getChar(), so we do not loose EOF
+ if( peekChar() == CHAR_EOF )
+ {
+ // ERROR: unterminated string literal
+ reType = TT_ERROR;
+ break;
+ }
+ c = getChar();
+ if( testCharFlags( c, CHAR_EOL ) == sal_True )
+ {
+ // ERROR: unterminated string literal
+ reType = TT_ERROR;
+ break;
+ }
+ }
+
+ if( reType != TT_ERROR )
+ {
+ getChar();
+ if( cEndString == ']' )
+ reType = TT_IDENTIFIER;
+ else
+ reType = TT_STRING;
+ }
+ }
+
+ // End of line?
+ else if( testCharFlags( c, CHAR_EOL ) == sal_True )
+ {
+ // If another EOL character comes, read it
+ sal_Unicode cNext = peekChar();
+ if( cNext != c && testCharFlags( cNext, CHAR_EOL ) == sal_True )
+ getChar();
+
+ // Set position data at the line start
+ nCol = 0;
+ nLine++;
+
+ reType = TT_EOL;
+ }
+
+ // All other will remain TT_UNKNOWN
+
+ // Save end position
+ rpEndPos = mpActualPos;
+ return sal_True;
+}
+
+SimpleTokenizer_Impl::SimpleTokenizer_Impl( HighlighterLanguage aLang ): aLanguage(aLang)
+{
+ memset( aCharTypeTab, 0, sizeof( aCharTypeTab ) );
+
+ // Fill character table
+ sal_uInt16 i;
+
+ // Allowed characters for identifiers
+ sal_uInt16 nHelpMask = (sal_uInt16)( CHAR_START_IDENTIFIER | CHAR_IN_IDENTIFIER );
+ for( i = 'a' ; i <= 'z' ; i++ )
+ aCharTypeTab[i] |= nHelpMask;
+ for( i = 'A' ; i <= 'Z' ; i++ )
+ aCharTypeTab[i] |= nHelpMask;
+ aCharTypeTab[(int)'_'] |= nHelpMask;
+ aCharTypeTab[(int)'$'] |= nHelpMask;
+
+ // Digit (can be identifier and number)
+ nHelpMask = (sal_uInt16)( CHAR_IN_IDENTIFIER | CHAR_START_NUMBER |
+ CHAR_IN_NUMBER | CHAR_IN_HEX_NUMBER );
+ for( i = '0' ; i <= '9' ; i++ )
+ aCharTypeTab[i] |= nHelpMask;
+
+ // Add e, E, . and & here manually
+ aCharTypeTab[(int)'e'] |= CHAR_IN_NUMBER;
+ aCharTypeTab[(int)'E'] |= CHAR_IN_NUMBER;
+ aCharTypeTab[(int)'.'] |= (sal_uInt16)( CHAR_IN_NUMBER | CHAR_START_NUMBER );
+ aCharTypeTab[(int)'&'] |= CHAR_START_NUMBER;
+
+ // Hexadecimal digit
+ for( i = 'a' ; i <= 'f' ; i++ )
+ aCharTypeTab[i] |= CHAR_IN_HEX_NUMBER;
+ for( i = 'A' ; i <= 'F' ; i++ )
+ aCharTypeTab[i] |= CHAR_IN_HEX_NUMBER;
+
+ // Octal digit
+ for( i = '0' ; i <= '7' ; i++ )
+ aCharTypeTab[i] |= CHAR_IN_OCT_NUMBER;
+
+ // String literal start/end characters
+ aCharTypeTab[(int)'\''] |= CHAR_START_STRING;
+ aCharTypeTab[(int)'\"'] |= CHAR_START_STRING;
+ aCharTypeTab[(int)'['] |= CHAR_START_STRING;
+ aCharTypeTab[(int)'`'] |= CHAR_START_STRING;
+
+ // Operator characters
+ aCharTypeTab[(int)'!'] |= CHAR_OPERATOR;
+ aCharTypeTab[(int)'%'] |= CHAR_OPERATOR;
+ // aCharTypeTab[(int)'&'] |= CHAR_OPERATOR; Removed because of #i14140
+ aCharTypeTab[(int)'('] |= CHAR_OPERATOR;
+ aCharTypeTab[(int)')'] |= CHAR_OPERATOR;
+ aCharTypeTab[(int)'*'] |= CHAR_OPERATOR;
+ aCharTypeTab[(int)'+'] |= CHAR_OPERATOR;
+ aCharTypeTab[(int)','] |= CHAR_OPERATOR;
+ aCharTypeTab[(int)'-'] |= CHAR_OPERATOR;
+ aCharTypeTab[(int)'/'] |= CHAR_OPERATOR;
+ aCharTypeTab[(int)':'] |= CHAR_OPERATOR;
+ aCharTypeTab[(int)'<'] |= CHAR_OPERATOR;
+ aCharTypeTab[(int)'='] |= CHAR_OPERATOR;
+ aCharTypeTab[(int)'>'] |= CHAR_OPERATOR;
+ aCharTypeTab[(int)'?'] |= CHAR_OPERATOR;
+ aCharTypeTab[(int)'^'] |= CHAR_OPERATOR;
+ aCharTypeTab[(int)'|'] |= CHAR_OPERATOR;
+ aCharTypeTab[(int)'~'] |= CHAR_OPERATOR;
+ aCharTypeTab[(int)'{'] |= CHAR_OPERATOR;
+ aCharTypeTab[(int)'}'] |= CHAR_OPERATOR;
+ // aCharTypeTab[(int)'['] |= CHAR_OPERATOR; Removed because of #i17826
+ aCharTypeTab[(int)']'] |= CHAR_OPERATOR;
+ aCharTypeTab[(int)';'] |= CHAR_OPERATOR;
+
+ // Space
+ aCharTypeTab[(int)' ' ] |= CHAR_SPACE;
+ aCharTypeTab[(int)'\t'] |= CHAR_SPACE;
+
+ // End of line characters
+ aCharTypeTab[(int)'\r'] |= CHAR_EOL;
+ aCharTypeTab[(int)'\n'] |= CHAR_EOL;
+
+ ppListKeyWords = NULL;
+}
+
+SimpleTokenizer_Impl::~SimpleTokenizer_Impl( void )
+{
+}
+
+SimpleTokenizer_Impl* getSimpleTokenizer( void )
+{
+ static SimpleTokenizer_Impl* pSimpleTokenizer = NULL;
+ if( !pSimpleTokenizer )
+ pSimpleTokenizer = new SimpleTokenizer_Impl();
+ return pSimpleTokenizer;
+}
+
+sal_uInt16 SimpleTokenizer_Impl::parseLine( sal_uInt32 nParseLine, const OUString* aSource )
+{
+ // Set the position to the beginning of the source string
+ mpStringBegin = mpActualPos = aSource->getStr();
+
+ // Initialize row and column
+ nLine = nParseLine;
+ nCol = 0L;
+
+ // Variables for the out parameter
+ TokenTypes eType;
+ const sal_Unicode* pStartPos;
+ const sal_Unicode* pEndPos;
+
+ // Loop over all the tokens
+ sal_uInt16 nTokenCount = 0;
+ while( getNextToken( eType, pStartPos, pEndPos ) )
+ nTokenCount++;
+
+ return nTokenCount;
+}
+
+void SimpleTokenizer_Impl::getHighlightPortions( sal_uInt32 nParseLine, const OUString& rLine,
+ /*out*/HighlightPortions& portions )
+{
+ // Set the position to the beginning of the source string
+ mpStringBegin = mpActualPos = rLine.getStr();
+
+ // Initialize row and column
+ nLine = nParseLine;
+ nCol = 0L;
+
+ // Variables for the out parameter
+ TokenTypes eType;
+ const sal_Unicode* pStartPos;
+ const sal_Unicode* pEndPos;
+
+ // Loop over all the tokens
+ while( getNextToken( eType, pStartPos, pEndPos ) )
+ {
+ HighlightPortion portion;
+
+ portion.nBegin = (sal_uInt16)(pStartPos - mpStringBegin);
+ portion.nEnd = (sal_uInt16)(pEndPos - mpStringBegin);
+ portion.tokenType = eType;
+
+ portions.push_back(portion);
+ }
+}
+
+
+SyntaxHighlighter::SyntaxHighlighter()
+{
+ m_pSimpleTokenizer = 0;
+ m_pKeyWords = NULL;
+ m_nKeyWordCount = 0;
+}
+
+SyntaxHighlighter::~SyntaxHighlighter()
+{
+ delete m_pSimpleTokenizer;
+ delete m_pKeyWords;
+}
+
+void SyntaxHighlighter::initialize( HighlighterLanguage eLanguage_ )
+{
+ eLanguage = eLanguage_;
+ delete m_pSimpleTokenizer;
+ m_pSimpleTokenizer = new SimpleTokenizer_Impl(eLanguage);
+
+ switch (eLanguage)
+ {
+ case HIGHLIGHT_BASIC:
+ m_pSimpleTokenizer->setKeyWords( strListBasicKeyWords,
+ sizeof( strListBasicKeyWords ) / sizeof( char* ));
+ break;
+ case HIGHLIGHT_SQL:
+ m_pSimpleTokenizer->setKeyWords( strListSqlKeyWords,
+ sizeof( strListSqlKeyWords ) / sizeof( char* ));
+ break;
+ default:
+ m_pSimpleTokenizer->setKeyWords( NULL, 0 );
+ }
+}
+
+void SyntaxHighlighter::notifyChange( sal_uInt32 nLine, sal_Int32 nLineCountDifference,
+ const OUString* pChangedLines, sal_uInt32 nArrayLength)
+{
+ (void)nLineCountDifference;
+
+ for( sal_uInt32 i=0 ; i < nArrayLength ; i++ )
+ m_pSimpleTokenizer->parseLine(nLine+i, &pChangedLines[i]);
+}
+
+void SyntaxHighlighter::getHighlightPortions( sal_uInt32 nLine, const OUString& rLine,
+ /*out*/HighlightPortions& portions )
+{
+ m_pSimpleTokenizer->getHighlightPortions( nLine, rLine, portions );
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */