summaryrefslogtreecommitdiff
path: root/svtools
diff options
context:
space:
mode:
authorAndras Timar <atimar@suse.com>2013-02-12 15:39:09 +0100
committerAndras Timar <atimar@suse.com>2013-02-13 10:19:17 +0100
commit6e4fd7ba9f4a02e130e817aadf0b977b8b8b6262 (patch)
tree96b75ef5c5c98414477c1adc30a3e2d5c0a9d5d5 /svtools
parent4ab3d5bb6f6f095375c2eaf200dd285be516feda (diff)
use u_isalpha() from ICU instead of home-grown solution
plus German comments were translated Change-Id: Id9ff5d4835e4ea224c9e6232a1762822aa833d37
Diffstat (limited to 'svtools')
-rw-r--r--svtools/source/edit/syntaxhighlight.cxx217
1 files changed, 53 insertions, 164 deletions
diff --git a/svtools/source/edit/syntaxhighlight.cxx b/svtools/source/edit/syntaxhighlight.cxx
index 7db7b722b660..ce3fcf153377 100644
--- a/svtools/source/edit/syntaxhighlight.cxx
+++ b/svtools/source/edit/syntaxhighlight.cxx
@@ -18,13 +18,12 @@
*/
+#include <unicode/uchar.h>
#include <svtools/syntaxhighlight.hxx>
-
-#include <unotools/charclass.hxx>
#include <comphelper/string.hxx>
// ##########################################################################
-// ATTENTION: all these words needs to be in small caps
+// ATTENTION: all these words need to be in lower case
// ##########################################################################
static const char* strListBasicKeyWords[] = {
"access",
@@ -232,111 +231,15 @@ extern "C" int CDECL compare_strings( const void *arg1, const void *arg2 )
namespace
{
-
- class LetterTable
- {
- bool IsLetterTab[256];
-
- public:
- LetterTable( void );
-
- inline bool isLetter( sal_Unicode c )
- {
- bool bRet = (c < 256) ? IsLetterTab[c] : isLetterUnicode( c );
- return bRet;
- }
- bool isLetterUnicode( sal_Unicode c );
- };
-
static bool isAlpha(sal_Unicode c)
{
if (comphelper::string::isalphaAscii(c))
return true;
- static LetterTable aLetterTable;
- return aLetterTable.isLetter(c);
+ return u_isalpha(c);
}
}
-LetterTable::LetterTable( void )
-{
- for( int i = 0 ; i < 256 ; ++i )
- IsLetterTab[i] = false;
-
- IsLetterTab[0xC0] = true; // ?, CAPITAL LETTER A WITH GRAVE ACCENT
- IsLetterTab[0xC1] = true; // ?, CAPITAL LETTER A WITH ACUTE ACCENT
- IsLetterTab[0xC2] = true; // ?, CAPITAL LETTER A WITH CIRCUMFLEX ACCENT
- IsLetterTab[0xC3] = true; // ?, CAPITAL LETTER A WITH TILDE
- IsLetterTab[0xC4] = true; // ?, CAPITAL LETTER A WITH DIAERESIS
- IsLetterTab[0xC5] = true; // ?, CAPITAL LETTER A WITH RING ABOVE
- IsLetterTab[0xC6] = true; // ?, CAPITAL LIGATURE AE
- IsLetterTab[0xC7] = true; // ?, CAPITAL LETTER C WITH CEDILLA
- IsLetterTab[0xC8] = true; // ?, CAPITAL LETTER E WITH GRAVE ACCENT
- IsLetterTab[0xC9] = true; // ?, CAPITAL LETTER E WITH ACUTE ACCENT
- IsLetterTab[0xCA] = true; // ?, CAPITAL LETTER E WITH CIRCUMFLEX ACCENT
- IsLetterTab[0xCB] = true; // ?, CAPITAL LETTER E WITH DIAERESIS
- IsLetterTab[0xCC] = true; // ?, CAPITAL LETTER I WITH GRAVE ACCENT
- IsLetterTab[0xCD] = true; // ?, CAPITAL LETTER I WITH ACUTE ACCENT
- IsLetterTab[0xCE] = true; // ?, CAPITAL LETTER I WITH CIRCUMFLEX ACCENT
- IsLetterTab[0xCF] = true; // ?, CAPITAL LETTER I WITH DIAERESIS
- IsLetterTab[0xD0] = true; // ?, CAPITAL LETTER ETH
- IsLetterTab[0xD1] = true; // ?, CAPITAL LETTER N WITH TILDE
- IsLetterTab[0xD2] = true; // ?, CAPITAL LETTER O WITH GRAVE ACCENT
- IsLetterTab[0xD3] = true; // ?, CAPITAL LETTER O WITH ACUTE ACCENT
- IsLetterTab[0xD4] = true; // ?, CAPITAL LETTER O WITH CIRCUMFLEX ACCENT
- IsLetterTab[0xD5] = true; // ?, CAPITAL LETTER O WITH TILDE
- IsLetterTab[0xD6] = true; // ?, CAPITAL LETTER O WITH DIAERESIS
- IsLetterTab[0xD8] = true; // ?, CAPITAL LETTER O WITH STROKE
- IsLetterTab[0xD9] = true; // ?, CAPITAL LETTER U WITH GRAVE ACCENT
- IsLetterTab[0xDA] = true; // ?, CAPITAL LETTER U WITH ACUTE ACCENT
- IsLetterTab[0xDB] = true; // ?, CAPITAL LETTER U WITH CIRCUMFLEX ACCENT
- IsLetterTab[0xDC] = true; // ?, CAPITAL LETTER U WITH DIAERESIS
- IsLetterTab[0xDD] = true; // ?, CAPITAL LETTER Y WITH ACUTE ACCENT
- IsLetterTab[0xDE] = true; // ?, CAPITAL LETTER THORN
- IsLetterTab[0xDF] = true; // ?, SMALL LETTER SHARP S
- IsLetterTab[0xE0] = true; // ?, SMALL LETTER A WITH GRAVE ACCENT
- IsLetterTab[0xE1] = true; // ?, SMALL LETTER A WITH ACUTE ACCENT
- IsLetterTab[0xE2] = true; // ?, SMALL LETTER A WITH CIRCUMFLEX ACCENT
- IsLetterTab[0xE3] = true; // ?, SMALL LETTER A WITH TILDE
- IsLetterTab[0xE4] = true; // ?, SMALL LETTER A WITH DIAERESIS
- IsLetterTab[0xE5] = true; // ?, SMALL LETTER A WITH RING ABOVE
- IsLetterTab[0xE6] = true; // ?, SMALL LIGATURE AE
- IsLetterTab[0xE7] = true; // ?, SMALL LETTER C WITH CEDILLA
- IsLetterTab[0xE8] = true; // ?, SMALL LETTER E WITH GRAVE ACCENT
- IsLetterTab[0xE9] = true; // ?, SMALL LETTER E WITH ACUTE ACCENT
- IsLetterTab[0xEA] = true; // ?, SMALL LETTER E WITH CIRCUMFLEX ACCENT
- IsLetterTab[0xEB] = true; // ?, SMALL LETTER E WITH DIAERESIS
- IsLetterTab[0xEC] = true; // ?, SMALL LETTER I WITH GRAVE ACCENT
- IsLetterTab[0xED] = true; // ?, SMALL LETTER I WITH ACUTE ACCENT
- IsLetterTab[0xEE] = true; // ?, SMALL LETTER I WITH CIRCUMFLEX ACCENT
- IsLetterTab[0xEF] = true; // ?, SMALL LETTER I WITH DIAERESIS
- IsLetterTab[0xF0] = true; // ?, SMALL LETTER ETH
- IsLetterTab[0xF1] = true; // ?, SMALL LETTER N WITH TILDE
- IsLetterTab[0xF2] = true; // ?, SMALL LETTER O WITH GRAVE ACCENT
- IsLetterTab[0xF3] = true; // ?, SMALL LETTER O WITH ACUTE ACCENT
- IsLetterTab[0xF4] = true; // ?, SMALL LETTER O WITH CIRCUMFLEX ACCENT
- IsLetterTab[0xF5] = true; // ?, SMALL LETTER O WITH TILDE
- IsLetterTab[0xF6] = true; // ?, SMALL LETTER O WITH DIAERESIS
- IsLetterTab[0xF8] = true; // ?, SMALL LETTER O WITH OBLIQUE BAR
- IsLetterTab[0xF9] = true; // ?, SMALL LETTER U WITH GRAVE ACCENT
- IsLetterTab[0xFA] = true; // ?, SMALL LETTER U WITH ACUTE ACCENT
- IsLetterTab[0xFB] = true; // ?, SMALL LETTER U WITH CIRCUMFLEX ACCENT
- IsLetterTab[0xFC] = true; // ?, SMALL LETTER U WITH DIAERESIS
- IsLetterTab[0xFD] = true; // ?, SMALL LETTER Y WITH ACUTE ACCENT
- IsLetterTab[0xFE] = true; // ?, SMALL LETTER THORN
- IsLetterTab[0xFF] = true; // � , SMALL LETTER Y WITH DIAERESIS
-}
-
-bool LetterTable::isLetterUnicode( sal_Unicode c )
-{
- static CharClass* pCharClass = NULL;
- if( pCharClass == NULL )
- pCharClass = new CharClass( Application::GetSettings().GetLanguageTag() );
- rtl::OUString aStr( c );
- bool bRet = pCharClass->isLetter( aStr, 0 );
- return bRet;
-}
-
-// Hilfsfunktion: Zeichen-Flag Testen
+// Helper function: test character flag
sal_Bool SimpleTokenizer_Impl::testCharFlags( sal_Unicode c, sal_uInt16 nTestFlags )
{
bool bRet = false;
@@ -358,24 +261,20 @@ void SimpleTokenizer_Impl::setKeyWords( const char** ppKeyWords, sal_uInt16 nCou
nKeyWordCount = nCount;
}
-// Neues Token holen
sal_Bool SimpleTokenizer_Impl::getNextToken( /*out*/TokenTypes& reType,
/*out*/const sal_Unicode*& rpStartPos, /*out*/const sal_Unicode*& rpEndPos )
{
reType = TT_UNKNOWN;
- // Position merken
rpStartPos = mpActualPos;
- // Zeichen untersuchen
sal_Unicode c = peekChar();
if( c == CHAR_EOF )
return sal_False;
- // Zeichen lesen
getChar();
- //*** Alle Moeglichkeiten durchgehen ***
+ //*** Go through all possibilities ***
// Space?
if ( (testCharFlags( c, CHAR_SPACE ) == sal_True) )
{
@@ -401,7 +300,7 @@ sal_Bool SimpleTokenizer_Impl::getNextToken( /*out*/TokenTypes& reType,
reType = TT_IDENTIFIER;
- // Schluesselwort-Tabelle
+ // Keyword table
if (ppListKeyWords != NULL)
{
int nCount = mpActualPos - rpStartPos;
@@ -429,7 +328,7 @@ sal_Bool SimpleTokenizer_Impl::getNextToken( /*out*/TokenTypes& reType,
if (aByteStr.equalsL(RTL_CONSTASCII_STRINGPARAM("rem")))
{
- // Alle Zeichen bis Zeilen-Ende oder EOF entfernen
+ // Remove all characters until end of line or EOF
sal_Unicode cPeek = peekChar();
while( cPeek != CHAR_EOF && testCharFlags( cPeek, CHAR_EOL ) == sal_False )
{
@@ -456,7 +355,7 @@ sal_Bool SimpleTokenizer_Impl::getNextToken( /*out*/TokenTypes& reType,
sal_Bool bIdentifierChar;
do
{
- // Naechstes Zeichen holen
+ // Get next character
c = peekChar();
bIdentifierChar = isAlpha(c);
if( bIdentifierChar )
@@ -471,7 +370,7 @@ sal_Bool SimpleTokenizer_Impl::getNextToken( /*out*/TokenTypes& reType,
sal_Unicode cPeekNext = peekChar();
if (cPeekNext=='-')
{
- // Alle Zeichen bis Zeilen-Ende oder EOF entfernen
+ // Remove all characters until end of line or EOF
while( cPeekNext != CHAR_EOF && testCharFlags( cPeekNext, CHAR_EOL ) == sal_False )
{
getChar();
@@ -485,7 +384,7 @@ sal_Bool SimpleTokenizer_Impl::getNextToken( /*out*/TokenTypes& reType,
sal_Unicode cPeekNext = peekChar();
if (cPeekNext=='/')
{
- // Alle Zeichen bis Zeilen-Ende oder EOF entfernen
+ // Remove all characters until end of line or EOF
while( cPeekNext != CHAR_EOF && testCharFlags( cPeekNext, CHAR_EOL ) == sal_False )
{
getChar();
@@ -496,12 +395,12 @@ sal_Bool SimpleTokenizer_Impl::getNextToken( /*out*/TokenTypes& reType,
}
else
{
- // Kommentar ?
+ // Comment?
if ( c == '\'' )
{
- c = getChar(); // '/' entfernen
+ c = getChar();
- // Alle Zeichen bis Zeilen-Ende oder EOF entfernen
+ // Remove all characters until end of line or EOF
sal_Unicode cPeek = c;
while( cPeek != CHAR_EOF && testCharFlags( cPeek, CHAR_EOL ) == sal_False )
{
@@ -529,36 +428,36 @@ sal_Bool SimpleTokenizer_Impl::getNextToken( /*out*/TokenTypes& reType,
reType = TT_OPERATOR;
}
- // Zahl?
+ // Number?
else if( testCharFlags( c, CHAR_START_NUMBER ) == sal_True )
{
reType = TT_NUMBER;
- // Zahlensystem, 10 = normal, wird bei Oct/Hex geaendert
+ // Number system, 10 = normal, it is changed for Oct/Hex
int nRadix = 10;
- // Ist es eine Hex- oder Oct-Zahl?
+ // Is it an Oct or a Hex number?
if( c == '&' )
{
// Octal?
if( peekChar() == 'o' || peekChar() == 'O' )
{
- // o entfernen
+ // remove o
getChar();
- nRadix = 8; // Octal-Basis
+ nRadix = 8; // Octal base
- // Alle Ziffern einlesen
+ // Read all numbers
while( testCharFlags( peekChar(), CHAR_IN_OCT_NUMBER ) )
c = getChar();
}
- // Hex?
+ // Hexadecimal?
else if( peekChar() == 'h' || peekChar() == 'H' )
{
- // x entfernen
+ // remove x
getChar();
- nRadix = 16; // Hex-Basis
+ nRadix = 16; // Hexadecimal base
- // Alle Ziffern einlesen und puffern
+ // Read all numbers
while( testCharFlags( peekChar(), CHAR_IN_HEX_NUMBER ) )
c = getChar();
}
@@ -568,38 +467,36 @@ sal_Bool SimpleTokenizer_Impl::getNextToken( /*out*/TokenTypes& reType,
}
}
- // Wenn nicht Oct oder Hex als double ansehen
+ // When it is not Oct or Hex, then it is double
if( reType == TT_NUMBER && nRadix == 10 )
{
- // Flag, ob das letzte Zeichen ein Exponent war
+ // Flag if the last character is an exponent
sal_Bool bAfterExpChar = sal_False;
- // Alle Ziffern einlesen
+ // Read all numbers
while( testCharFlags( peekChar(), CHAR_IN_NUMBER ) ||
(bAfterExpChar && peekChar() == '+' ) ||
(bAfterExpChar && peekChar() == '-' ) )
- // Nach Exponent auch +/- OK
+ // After exponent +/- are OK, too
{
- c = getChar(); // Zeichen lesen
+ c = getChar();
bAfterExpChar = ( c == 'e' || c == 'E' );
}
}
-
- // reType = TT_NUMBER;
}
// String?
else if( testCharFlags( c, CHAR_START_STRING ) == sal_True )
{
- // Merken, welches Zeichen den String eroeffnet hat
+ // Remember which character has opened the string
sal_Unicode cEndString = c;
if( c == '[' )
cEndString = ']';
- // Alle Ziffern einlesen und puffern
+ // Read all characters
while( peekChar() != cEndString )
{
- // #58846 EOF vor getChar() abfangen, damit EOF micht verloren geht
+ // Detect EOF before getChar(), so we do not loose EOF
if( peekChar() == CHAR_EOF )
{
// ERROR: unterminated string literal
@@ -615,7 +512,6 @@ sal_Bool SimpleTokenizer_Impl::getNextToken( /*out*/TokenTypes& reType,
}
}
- // Zeichen lesen
if( reType != TT_ERROR )
{
getChar();
@@ -626,25 +522,24 @@ sal_Bool SimpleTokenizer_Impl::getNextToken( /*out*/TokenTypes& reType,
}
}
- // Zeilenende?
+ // End of line?
else if( testCharFlags( c, CHAR_EOL ) == sal_True )
{
- // Falls ein weiteres anderes EOL-Char folgt, weg damit
+ // If another EOL character comes, read it
sal_Unicode cNext = peekChar();
if( cNext != c && testCharFlags( cNext, CHAR_EOL ) == sal_True )
getChar();
- // Positions-Daten auf Zeilen-Beginn setzen
+ // Set position data at the line start
nCol = 0;
nLine++;
reType = TT_EOL;
}
- // Alles andere bleibt TT_UNKNOWN
+ // All other will remain TT_UNKNOWN
-
- // End-Position eintragen
+ // Save end position
rpEndPos = mpActualPos;
return sal_True;
}
@@ -653,49 +548,47 @@ SimpleTokenizer_Impl::SimpleTokenizer_Impl( HighlighterLanguage aLang ): aLangua
{
memset( aCharTypeTab, 0, sizeof( aCharTypeTab ) );
- // Zeichen-Tabelle fuellen
+ // Fill character table
sal_uInt16 i;
- // Zulaessige Zeichen fuer Identifier
+ // Allowed characters for identifiers
sal_uInt16 nHelpMask = (sal_uInt16)( CHAR_START_IDENTIFIER | CHAR_IN_IDENTIFIER );
for( i = 'a' ; i <= 'z' ; i++ )
aCharTypeTab[i] |= nHelpMask;
for( i = 'A' ; i <= 'Z' ; i++ )
aCharTypeTab[i] |= nHelpMask;
- // '_' extra eintragen
aCharTypeTab[(int)'_'] |= nHelpMask;
- // AB 23.6.97: '$' ist auch erlaubt
aCharTypeTab[(int)'$'] |= nHelpMask;
- // Ziffern (Identifier und Number ist moeglich)
+ // Digit (can be identifier and number)
nHelpMask = (sal_uInt16)( CHAR_IN_IDENTIFIER | CHAR_START_NUMBER |
CHAR_IN_NUMBER | CHAR_IN_HEX_NUMBER );
for( i = '0' ; i <= '9' ; i++ )
aCharTypeTab[i] |= nHelpMask;
- // e und E sowie . von Hand ergaenzen
+ // Add e, E, . and & here manually
aCharTypeTab[(int)'e'] |= CHAR_IN_NUMBER;
aCharTypeTab[(int)'E'] |= CHAR_IN_NUMBER;
aCharTypeTab[(int)'.'] |= (sal_uInt16)( CHAR_IN_NUMBER | CHAR_START_NUMBER );
aCharTypeTab[(int)'&'] |= CHAR_START_NUMBER;
- // Hex-Ziffern
+ // Hexadecimal digit
for( i = 'a' ; i <= 'f' ; i++ )
aCharTypeTab[i] |= CHAR_IN_HEX_NUMBER;
for( i = 'A' ; i <= 'F' ; i++ )
aCharTypeTab[i] |= CHAR_IN_HEX_NUMBER;
- // Oct-Ziffern
+ // Octal digit
for( i = '0' ; i <= '7' ; i++ )
aCharTypeTab[i] |= CHAR_IN_OCT_NUMBER;
- // String-Beginn/End-Zeichen
+ // String literal start/end characters
aCharTypeTab[(int)'\''] |= CHAR_START_STRING;
aCharTypeTab[(int)'\"'] |= CHAR_START_STRING;
aCharTypeTab[(int)'['] |= CHAR_START_STRING;
aCharTypeTab[(int)'`'] |= CHAR_START_STRING;
- // Operator-Zeichen
+ // Operator characters
aCharTypeTab[(int)'!'] |= CHAR_OPERATOR;
aCharTypeTab[(int)'%'] |= CHAR_OPERATOR;
// aCharTypeTab[(int)'&'] |= CHAR_OPERATOR; Removed because of #i14140
@@ -724,7 +617,7 @@ SimpleTokenizer_Impl::SimpleTokenizer_Impl( HighlighterLanguage aLang ): aLangua
aCharTypeTab[(int)' ' ] |= CHAR_SPACE;
aCharTypeTab[(int)'\t'] |= CHAR_SPACE;
- // Zeilen-Ende-Zeichen
+ // End of line characters
aCharTypeTab[(int)'\r'] |= CHAR_EOL;
aCharTypeTab[(int)'\n'] |= CHAR_EOL;
@@ -743,22 +636,21 @@ SimpleTokenizer_Impl* getSimpleTokenizer( void )
return pSimpleTokenizer;
}
-// Heraussuchen der jeweils naechsten Funktion aus einem JavaScript-Modul
sal_uInt16 SimpleTokenizer_Impl::parseLine( sal_uInt32 nParseLine, const String* aSource )
{
- // Position auf den Anfang des Source-Strings setzen
+ // Set the position to the beginning of the source string
mpStringBegin = mpActualPos = aSource->GetBuffer();
- // Zeile und Spalte initialisieren
+ // Initialize row and column
nLine = nParseLine;
nCol = 0L;
- // Variablen fuer die Out-Parameter
+ // Variables for the out parameter
TokenTypes eType;
const sal_Unicode* pStartPos;
const sal_Unicode* pEndPos;
- // Schleife ueber alle Tokens
+ // Loop over all the tokens
sal_uInt16 nTokenCount = 0;
while( getNextToken( eType, pStartPos, pEndPos ) )
nTokenCount++;
@@ -769,19 +661,19 @@ sal_uInt16 SimpleTokenizer_Impl::parseLine( sal_uInt32 nParseLine, const String*
void SimpleTokenizer_Impl::getHighlightPortions( sal_uInt32 nParseLine, const String& rLine,
/*out*/HighlightPortions& portions )
{
- // Position auf den Anfang des Source-Strings setzen
+ // Set the position to the beginning of the source string
mpStringBegin = mpActualPos = rLine.GetBuffer();
- // Zeile und Spalte initialisieren
+ // Initialize row and column
nLine = nParseLine;
nCol = 0L;
- // Variablen fuer die Out-Parameter
+ // Variables for the out parameter
TokenTypes eType;
const sal_Unicode* pStartPos;
const sal_Unicode* pEndPos;
- // Schleife ueber alle Tokens
+ // Loop over all the tokens
while( getNextToken( eType, pStartPos, pEndPos ) )
{
HighlightPortion portion;
@@ -795,9 +687,6 @@ void SimpleTokenizer_Impl::getHighlightPortions( sal_uInt32 nParseLine, const St
}
-//////////////////////////////////////////////////////////////////////////
-// Implementierung des SyntaxHighlighter
-
SyntaxHighlighter::SyntaxHighlighter()
{
m_pSimpleTokenizer = 0;