diff options
Diffstat (limited to 'basic/source/comp/scanner.cxx')
-rw-r--r-- | basic/source/comp/scanner.cxx | 612 |
1 files changed, 0 insertions, 612 deletions
diff --git a/basic/source/comp/scanner.cxx b/basic/source/comp/scanner.cxx deleted file mode 100644 index fac945f69b..0000000000 --- a/basic/source/comp/scanner.cxx +++ /dev/null @@ -1,612 +0,0 @@ -/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ -/************************************************************************* - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2000, 2010 Oracle and/or its affiliates. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * <http://www.openoffice.org/license.html> - * for a copy of the LGPLv3 License. - * - ************************************************************************/ - -// MARKER(update_precomp.py): autogen include statement, do not remove -#include "precompiled_basic.hxx" - -#include "sbcomp.hxx" -#include <stdio.h> -#include <string.h> -#include <ctype.h> -#if defined UNX -#include <stdlib.h> -#else -#include <math.h> // atof() -#endif -#include <rtl/math.hxx> -#include <vcl/svapp.hxx> -#include <unotools/charclass.hxx> - -#include <runtime.hxx> - -SbiScanner::SbiScanner( const ::rtl::OUString& rBuf, StarBASIC* p ) : aBuf( rBuf ) -{ - pBasic = p; - pLine = NULL; - nVal = 0; - eScanType = SbxVARIANT; - nErrors = 0; - nBufPos = 0; - nCurCol1 = 0; - nSavedCol1 = 0; - nColLock = 0; - nLine = 0; - nCol1 = 0; - nCol2 = 0; - nCol = 0; - bError = - bAbort = - bSpaces = - bNumber = - bSymbol = - bUsedForHilite = - bCompatible = - bVBASupportOn = - bPrevLineExtentsComment = sal_False; - bHash = - bErrors = sal_True; -} - -SbiScanner::~SbiScanner() -{} - -void SbiScanner::LockColumn() -{ - if( !nColLock++ ) - nSavedCol1 = nCol1; -} - -void SbiScanner::UnlockColumn() -{ - if( nColLock ) - nColLock--; -} - -void SbiScanner::GenError( SbError code ) -{ - if( GetSbData()->bBlockCompilerError ) - { - bAbort = sal_True; - return; - } - if( !bError && bErrors ) - { - sal_Bool bRes = sal_True; - // Nur einen Fehler pro Statement reporten - bError = sal_True; - if( pBasic ) - { - // Falls EXPECTED oder UNEXPECTED kommen sollte, bezieht es sich - // immer auf das letzte Token, also die Col1 uebernehmen - sal_uInt16 nc = nColLock ? nSavedCol1 : nCol1; - switch( code ) - { - case SbERR_EXPECTED: - case SbERR_UNEXPECTED: - case SbERR_SYMBOL_EXPECTED: - case SbERR_LABEL_EXPECTED: - nc = nCol1; - if( nc > nCol2 ) nCol2 = nc; - break; - } - bRes = pBasic->CError( code, aError, nLine, nc, nCol2 ); - } - bAbort |= !bRes | - ( code == SbERR_NO_MEMORY || code == SbERR_PROG_TOO_LARGE ); - } - if( bErrors ) - nErrors++; -} - -// Falls sofort ein Doppelpunkt folgt, wird sal_True zurueckgeliefert. -// Wird von SbiTokenizer::MayBeLabel() verwendet, um einen Label zu erkennen - -sal_Bool SbiScanner::DoesColonFollow() -{ - if( pLine && *pLine == ':' ) - { - pLine++; nCol++; return sal_True; - } - else return sal_False; -} - -// Testen auf ein legales Suffix - -static SbxDataType GetSuffixType( sal_Unicode c ) -{ - static String aSuffixesStr = String::CreateFromAscii( "%&!#@ $" ); - if( c ) - { - sal_uInt32 n = aSuffixesStr.Search( c ); - if( STRING_NOTFOUND != n && c != ' ' ) - return SbxDataType( (sal_uInt16) n + SbxINTEGER ); - } - return SbxVARIANT; -} - -// Einlesen des naechsten Symbols in die Variablen aSym, nVal und eType -// Returnwert ist sal_False bei EOF oder Fehlern -#define BUF_SIZE 80 - -namespace { - -/** Returns true, if the passed character is a white space character. */ -inline bool lclIsWhitespace( sal_Unicode cChar ) -{ - return (cChar == ' ') || (cChar == '\t') || (cChar == '\f'); -} - -} // namespace - -sal_Bool SbiScanner::NextSym() -{ - // Fuer den EOLN-Fall merken - sal_uInt16 nOldLine = nLine; - sal_uInt16 nOldCol1 = nCol1; - sal_uInt16 nOldCol2 = nCol2; - sal_Unicode buf[ BUF_SIZE ], *p = buf; - bHash = sal_False; - - eScanType = SbxVARIANT; - aSym.Erase(); - bSymbol = - bNumber = bSpaces = sal_False; - - // Zeile einlesen? - if( !pLine ) - { - sal_Int32 n = nBufPos; - sal_Int32 nLen = aBuf.getLength(); - if( nBufPos >= nLen ) - return sal_False; - const sal_Unicode* p2 = aBuf.getStr(); - p2 += n; - while( ( n < nLen ) && ( *p2 != '\n' ) && ( *p2 != '\r' ) ) - p2++, n++; - // #163944# ignore trailing whitespace - sal_Int32 nCopyEndPos = n; - while( (nBufPos < nCopyEndPos) && lclIsWhitespace( aBuf[ nCopyEndPos - 1 ] ) ) - --nCopyEndPos; - aLine = aBuf.copy( nBufPos, nCopyEndPos - nBufPos ); - if( n < nLen ) - { - if( *p2 == '\r' && *( p2+1 ) == '\n' ) - n += 2; - else - n++; - } - nBufPos = n; - pLine = aLine.getStr(); - nOldLine = ++nLine; - nCol = nCol1 = nCol2 = nOldCol1 = nOldCol2 = 0; - nColLock = 0; - } - - // Leerstellen weg: - while( lclIsWhitespace( *pLine ) ) - pLine++, nCol++, bSpaces = sal_True; - - nCol1 = nCol; - - // nur Leerzeile? - if( !*pLine ) - goto eoln; - - if( bPrevLineExtentsComment ) - goto PrevLineCommentLbl; - - if( *pLine == '#' ) - { - pLine++; - nCol++; - bHash = sal_True; - } - - // Symbol? Dann Zeichen kopieren. - if( BasicSimpleCharClass::isAlpha( *pLine, bCompatible ) || *pLine == '_' ) - { - // Wenn nach '_' nichts kommt, ist es ein Zeilenabschluss! - if( *pLine == '_' && !*(pLine+1) ) - { pLine++; - goto eoln; } - bSymbol = sal_True; - short n = nCol; - for ( ; (BasicSimpleCharClass::isAlphaNumeric( *pLine, bCompatible ) || ( *pLine == '_' ) ); pLine++ ) - nCol++; - aSym = aLine.copy( n, nCol - n ); - - // Special handling for "go to" - if( bCompatible && *pLine && aSym.EqualsIgnoreCaseAscii( "go" ) ) - { - const sal_Unicode* pTestLine = pLine; - short nTestCol = nCol; - while( lclIsWhitespace( *pTestLine ) ) - { - pTestLine++; - nTestCol++; - } - - if( *pTestLine && *(pTestLine + 1) ) - { - String aTestSym = aLine.copy( nTestCol, 2 ); - if( aTestSym.EqualsIgnoreCaseAscii( "to" ) ) - { - aSym = String::CreateFromAscii( "goto" ); - pLine = pTestLine + 2; - nCol = nTestCol + 2; - } - } - } - - // Abschliessendes '_' durch Space ersetzen, wenn Zeilenende folgt - // (sonst falsche Zeilenfortsetzung) - if( !bUsedForHilite && !*pLine && *(pLine-1) == '_' ) - { - aSym.GetBufferAccess(); // #109693 force copy if necessary - *((sal_Unicode*)(pLine-1)) = ' '; // cast wegen const - } - // Typkennung? - // Das Ausrufezeichen bitte nicht testen, wenn - // danach noch ein Symbol anschliesst - else if( *pLine != '!' || !BasicSimpleCharClass::isAlpha( pLine[ 1 ], bCompatible ) ) - { - SbxDataType t = GetSuffixType( *pLine ); - if( t != SbxVARIANT ) - { - eScanType = t; - pLine++; - nCol++; - } - } - } - - // Zahl? Dann einlesen und konvertieren. - else if( BasicSimpleCharClass::isDigit( *pLine & 0xFF ) - || ( *pLine == '.' && BasicSimpleCharClass::isDigit( *(pLine+1) & 0xFF ) ) ) - { - short exp = 0; - short comma = 0; - short ndig = 0; - short ncdig = 0; - eScanType = SbxDOUBLE; - sal_Bool bBufOverflow = sal_False; - while( strchr( "0123456789.DEde", *pLine ) && *pLine ) - { - // AB 4.1.1996: Buffer voll? -> leer weiter scannen - if( (p-buf) == (BUF_SIZE-1) ) - { - bBufOverflow = sal_True; - pLine++, nCol++; - continue; - } - // Komma oder Exponent? - if( *pLine == '.' ) - { - if( ++comma > 1 ) - { - pLine++; nCol++; continue; - } - else *p++ = *pLine++, nCol++; - } - else if( strchr( "DdEe", *pLine ) ) - { - if (++exp > 1) - { - pLine++; nCol++; continue; - } - *p++ = 'E'; pLine++; nCol++; - // Vorzeichen hinter Exponent? - if( *pLine == '+' ) - pLine++, nCol++; - else - if( *pLine == '-' ) - *p++ = *pLine++, nCol++; - } - else - { - *p++ = *pLine++, nCol++; - if( comma && !exp ) ncdig++; - } - if (!exp) ndig++; - } - *p = 0; - aSym = p; bNumber = sal_True; - // Komma, Exponent mehrfach vorhanden? - if( comma > 1 || exp > 1 ) - { aError = '.'; - GenError( SbERR_BAD_CHAR_IN_NUMBER ); } - - // #57844 Lokalisierte Funktion benutzen - nVal = rtl_math_uStringToDouble( buf, buf+(p-buf), '.', ',', NULL, NULL ); - - ndig = ndig - comma; - if( !comma && !exp ) - { - if( nVal >= SbxMININT && nVal <= SbxMAXINT ) - eScanType = SbxINTEGER; - else - if( nVal >= SbxMINLNG && nVal <= SbxMAXLNG ) - eScanType = SbxLONG; - } - if( bBufOverflow ) - GenError( SbERR_MATH_OVERFLOW ); - - // Typkennung? - SbxDataType t = GetSuffixType( *pLine ); - if( t != SbxVARIANT ) - { - eScanType = t; - pLine++; - nCol++; - } - } - - // Hex/Oktalzahl? Einlesen und konvertieren: - else if( *pLine == '&' ) - { - pLine++; nCol++; - sal_Unicode cmp1[] = { '0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F', 0 }; - sal_Unicode cmp2[] = { '0', '1', '2', '3', '4', '5', '6', '7', 0 }; - sal_Unicode *cmp = cmp1; - sal_Unicode base = 16; - sal_Unicode ndig = 8; - sal_Unicode xch = *pLine++ & 0xFF; nCol++; - switch( toupper( xch ) ) - { - case 'O': - cmp = cmp2; base = 8; ndig = 11; break; - case 'H': - break; - default : - // Wird als Operator angesehen - pLine--; nCol--; nCol1 = nCol-1; aSym = '&'; return SYMBOL; - } - bNumber = sal_True; - long l = 0; - int i; - sal_Bool bBufOverflow = sal_False; - while( BasicSimpleCharClass::isAlphaNumeric( *pLine & 0xFF, bCompatible ) ) - { - sal_Unicode ch = sal::static_int_cast< sal_Unicode >( - toupper( *pLine & 0xFF ) ); - pLine++; nCol++; - // AB 4.1.1996: Buffer voll, leer weiter scannen - if( (p-buf) == (BUF_SIZE-1) ) - bBufOverflow = sal_True; - else if( String( cmp ).Search( ch ) != STRING_NOTFOUND ) - //else if( strchr( cmp, ch ) ) - *p++ = ch; - else - { - aError = ch; - GenError( SbERR_BAD_CHAR_IN_NUMBER ); - } - } - *p = 0; - for( p = buf; *p; p++ ) - { - i = (*p & 0xFF) - '0'; - if( i > 9 ) i -= 7; - l = ( l * base ) + i; - if( !ndig-- ) - { - GenError( SbERR_MATH_OVERFLOW ); break; - } - } - if( *pLine == '&' ) pLine++, nCol++; - nVal = (double) l; - eScanType = ( l >= SbxMININT && l <= SbxMAXINT ) ? SbxINTEGER : SbxLONG; - if( bBufOverflow ) - GenError( SbERR_MATH_OVERFLOW ); - } - - // Strings: - else if( *pLine == '"' || *pLine == '[' ) - { - sal_Unicode cSep = *pLine; - if( cSep == '[' ) - bSymbol = sal_True, cSep = ']'; - short n = nCol+1; - while( *pLine ) - { - do pLine++, nCol++; - while( *pLine && ( *pLine != cSep ) ); - if( *pLine == cSep ) - { - pLine++; nCol++; - if( *pLine != cSep || cSep == ']' ) break; - } else aError = cSep, GenError( SbERR_EXPECTED ); - } - // If VBA Interop then doen't eat the [] chars - if ( cSep == ']' && bVBASupportOn ) - aSym = aLine.copy( n - 1, nCol - n + 1); - else - aSym = aLine.copy( n, nCol - n - 1 ); - // Doppelte Stringbegrenzer raus - String s( cSep ); - s += cSep; - sal_uInt16 nIdx = 0; - do - { - nIdx = aSym.Search( s, nIdx ); - if( nIdx == STRING_NOTFOUND ) - break; - aSym.Erase( nIdx, 1 ); - nIdx++; - } - while( true ); - if( cSep != ']' ) - eScanType = ( cSep == '#' ) ? SbxDATE : SbxSTRING; - } - // ungueltige Zeichen: - else if( ( *pLine & 0xFF ) >= 0x7F ) - { - GenError( SbERR_SYNTAX ); pLine++; nCol++; - } - // andere Gruppen: - else - { - short n = 1; - switch( *pLine++ ) - { - case '<': if( *pLine == '>' || *pLine == '=' ) n = 2; break; - case '>': if( *pLine == '=' ) n = 2; break; - case ':': if( *pLine == '=' ) n = 2; break; - } - aSym = aLine.copy( nCol, n ); - pLine += n-1; nCol = nCol + n; - } - - nCol2 = nCol-1; - -PrevLineCommentLbl: - // Kommentar? - if( bPrevLineExtentsComment || (eScanType != SbxSTRING && - ( aSym.GetBuffer()[0] == '\'' || aSym.EqualsIgnoreCaseAscii( "REM" ) ) ) ) - { - bPrevLineExtentsComment = sal_False; - aSym = String::CreateFromAscii( "REM" ); - sal_uInt16 nLen = String( pLine ).Len(); - if( bCompatible && pLine[ nLen - 1 ] == '_' && pLine[ nLen - 2 ] == ' ' ) - bPrevLineExtentsComment = sal_True; - nCol2 = nCol2 + nLen; - pLine = NULL; - } - return sal_True; - - // Sonst Zeilen-Ende: aber bitte auf '_' testen, ob die - // Zeile nicht weitergeht! -eoln: - if( nCol && *--pLine == '_' ) - { - pLine = NULL; - bool bRes = NextSym(); - if( bVBASupportOn && aSym.GetBuffer()[0] == '.' ) - { - // object _ - // .Method - // ^^^ <- spaces is legal in MSO VBA - OSL_TRACE("*** resetting bSpaces***"); - bSpaces = sal_False; - } - return bRes; - } - else - { - pLine = NULL; - nLine = nOldLine; - nCol1 = nOldCol1; - nCol2 = nOldCol2; - aSym = '\n'; - nColLock = 0; - return sal_True; - } -} - -LetterTable BasicSimpleCharClass::aLetterTable; - -LetterTable::LetterTable( void ) -{ - for( int i = 0 ; i < 256 ; ++i ) - IsLetterTab[i] = false; - - IsLetterTab[0xC0] = true; // À , CAPITAL LETTER A WITH GRAVE ACCENT - IsLetterTab[0xC1] = true; // Á , CAPITAL LETTER A WITH ACUTE ACCENT - IsLetterTab[0xC2] = true; // Â , CAPITAL LETTER A WITH CIRCUMFLEX ACCENT - IsLetterTab[0xC3] = true; // Ã , CAPITAL LETTER A WITH TILDE - IsLetterTab[0xC4] = true; // Ä , CAPITAL LETTER A WITH DIAERESIS - IsLetterTab[0xC5] = true; // Å , CAPITAL LETTER A WITH RING ABOVE - IsLetterTab[0xC6] = true; // Æ , CAPITAL LIGATURE AE - IsLetterTab[0xC7] = true; // Ç , CAPITAL LETTER C WITH CEDILLA - IsLetterTab[0xC8] = true; // È , CAPITAL LETTER E WITH GRAVE ACCENT - IsLetterTab[0xC9] = true; // É , CAPITAL LETTER E WITH ACUTE ACCENT - IsLetterTab[0xCA] = true; // Ê , CAPITAL LETTER E WITH CIRCUMFLEX ACCENT - IsLetterTab[0xCB] = true; // Ë , CAPITAL LETTER E WITH DIAERESIS - IsLetterTab[0xCC] = true; // Ì , CAPITAL LETTER I WITH GRAVE ACCENT - IsLetterTab[0xCD] = true; // Í , CAPITAL LETTER I WITH ACUTE ACCENT - IsLetterTab[0xCE] = true; // Î , CAPITAL LETTER I WITH CIRCUMFLEX ACCENT - IsLetterTab[0xCF] = true; // Ï , CAPITAL LETTER I WITH DIAERESIS - IsLetterTab[0xD0] = true; // Ð , CAPITAL LETTER ETH - IsLetterTab[0xD1] = true; // Ñ , CAPITAL LETTER N WITH TILDE - IsLetterTab[0xD2] = true; // Ò , CAPITAL LETTER O WITH GRAVE ACCENT - IsLetterTab[0xD3] = true; // Ó , CAPITAL LETTER O WITH ACUTE ACCENT - IsLetterTab[0xD4] = true; // Ô , CAPITAL LETTER O WITH CIRCUMFLEX ACCENT - IsLetterTab[0xD5] = true; // Õ , CAPITAL LETTER O WITH TILDE - IsLetterTab[0xD6] = true; // Ö , CAPITAL LETTER O WITH DIAERESIS - IsLetterTab[0xD8] = true; // Ø , CAPITAL LETTER O WITH STROKE - IsLetterTab[0xD9] = true; // Ù , CAPITAL LETTER U WITH GRAVE ACCENT - IsLetterTab[0xDA] = true; // Ú , CAPITAL LETTER U WITH ACUTE ACCENT - IsLetterTab[0xDB] = true; // Û , CAPITAL LETTER U WITH CIRCUMFLEX ACCENT - IsLetterTab[0xDC] = true; // Ü , CAPITAL LETTER U WITH DIAERESIS - IsLetterTab[0xDD] = true; // Ý , CAPITAL LETTER Y WITH ACUTE ACCENT - IsLetterTab[0xDE] = true; // Þ , CAPITAL LETTER THORN - IsLetterTab[0xDF] = true; // ß , SMALL LETTER SHARP S - IsLetterTab[0xE0] = true; // à , SMALL LETTER A WITH GRAVE ACCENT - IsLetterTab[0xE1] = true; // á , SMALL LETTER A WITH ACUTE ACCENT - IsLetterTab[0xE2] = true; // â , SMALL LETTER A WITH CIRCUMFLEX ACCENT - IsLetterTab[0xE3] = true; // ã , SMALL LETTER A WITH TILDE - IsLetterTab[0xE4] = true; // ä , SMALL LETTER A WITH DIAERESIS - IsLetterTab[0xE5] = true; // å , SMALL LETTER A WITH RING ABOVE - IsLetterTab[0xE6] = true; // æ , SMALL LIGATURE AE - IsLetterTab[0xE7] = true; // ç , SMALL LETTER C WITH CEDILLA - IsLetterTab[0xE8] = true; // è , SMALL LETTER E WITH GRAVE ACCENT - IsLetterTab[0xE9] = true; // é , SMALL LETTER E WITH ACUTE ACCENT - IsLetterTab[0xEA] = true; // ê , SMALL LETTER E WITH CIRCUMFLEX ACCENT - IsLetterTab[0xEB] = true; // ë , SMALL LETTER E WITH DIAERESIS - IsLetterTab[0xEC] = true; // ì , SMALL LETTER I WITH GRAVE ACCENT - IsLetterTab[0xED] = true; // í , SMALL LETTER I WITH ACUTE ACCENT - IsLetterTab[0xEE] = true; // î , SMALL LETTER I WITH CIRCUMFLEX ACCENT - IsLetterTab[0xEF] = true; // ï , SMALL LETTER I WITH DIAERESIS - IsLetterTab[0xF0] = true; // ð , SMALL LETTER ETH - IsLetterTab[0xF1] = true; // ñ , SMALL LETTER N WITH TILDE - IsLetterTab[0xF2] = true; // ò , SMALL LETTER O WITH GRAVE ACCENT - IsLetterTab[0xF3] = true; // ó , SMALL LETTER O WITH ACUTE ACCENT - IsLetterTab[0xF4] = true; // ô , SMALL LETTER O WITH CIRCUMFLEX ACCENT - IsLetterTab[0xF5] = true; // õ , SMALL LETTER O WITH TILDE - IsLetterTab[0xF6] = true; // ö , SMALL LETTER O WITH DIAERESIS - IsLetterTab[0xF8] = true; // ø , SMALL LETTER O WITH OBLIQUE BAR - IsLetterTab[0xF9] = true; // ù , SMALL LETTER U WITH GRAVE ACCENT - IsLetterTab[0xFA] = true; // ú , SMALL LETTER U WITH ACUTE ACCENT - IsLetterTab[0xFB] = true; // û , SMALL LETTER U WITH CIRCUMFLEX ACCENT - IsLetterTab[0xFC] = true; // ü , SMALL LETTER U WITH DIAERESIS - IsLetterTab[0xFD] = true; // ý , SMALL LETTER Y WITH ACUTE ACCENT - IsLetterTab[0xFE] = true; // þ , SMALL LETTER THORN - IsLetterTab[0xFF] = true; // ÿ , SMALL LETTER Y WITH DIAERESIS -} - -bool LetterTable::isLetterUnicode( sal_Unicode c ) -{ - static CharClass* pCharClass = NULL; - if( pCharClass == NULL ) - pCharClass = new CharClass( Application::GetSettings().GetLocale() ); - String aStr( c ); - bool bRet = pCharClass->isLetter( aStr, 0 ); - return bRet; -} - -/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |