summaryrefslogtreecommitdiff
path: root/xmerge/source/pexcel/java/org/openoffice/xmerge/converter/xml/sxc/pexcel/records/formula/FormulaParser.java
diff options
context:
space:
mode:
Diffstat (limited to 'xmerge/source/pexcel/java/org/openoffice/xmerge/converter/xml/sxc/pexcel/records/formula/FormulaParser.java')
-rw-r--r--xmerge/source/pexcel/java/org/openoffice/xmerge/converter/xml/sxc/pexcel/records/formula/FormulaParser.java561
1 files changed, 561 insertions, 0 deletions
diff --git a/xmerge/source/pexcel/java/org/openoffice/xmerge/converter/xml/sxc/pexcel/records/formula/FormulaParser.java b/xmerge/source/pexcel/java/org/openoffice/xmerge/converter/xml/sxc/pexcel/records/formula/FormulaParser.java
new file mode 100644
index 000000000000..0ab40ec53fd2
--- /dev/null
+++ b/xmerge/source/pexcel/java/org/openoffice/xmerge/converter/xml/sxc/pexcel/records/formula/FormulaParser.java
@@ -0,0 +1,561 @@
+/*************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2000, 2010 Oracle and/or its affiliates.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+package org.openoffice.xmerge.converter.xml.sxc.pexcel.records.formula;
+
+
+import java.util.Vector;
+
+import org.openoffice.xmerge.converter.xml.sxc.pexcel.records.Workbook;
+import org.openoffice.xmerge.util.Debug;
+
+/**
+ * This is the Formula Parser based on an article written by Jack Crenshaw. It is a
+ * top down parser with some basic error handling. It handles
+ * +,-,*,/,>,<,>=,<=,=,<>, unary + and - as well as functions.
+ * The BNF notation for this parser is
+ * <pre>
+ * &lt;expression&gt; ::= &lt;unary op&gt; &lt;term&gt; [&lt;addop&gt;|&lt;logop&gt; &lt;term&gt;]
+ * &lt;term&gt; ::= &lt;factor&gt; [&lt;mulop&gt; &lt;factor&gt;]
+ * &lt;factor&gt; ::= &lt;number&gt;[%] | &lt;CellRef&gt; | &lt;QuoteString&gt; | &lt;expression&gt;
+ * </pre>
+ */
+public class FormulaParser {
+
+ private char look;
+ private String formulaStr;
+ private int index = 1;
+ private TokenFactory tokenFactory;
+ private Vector tokenVector;
+ private Workbook wb;
+
+ /**
+ * Default constructor
+ */
+ public FormulaParser() {
+
+ Debug.log(Debug.TRACE,"Creating a Formula Parser");
+ tokenFactory = new TokenFactory();
+ tokenVector = new Vector();
+ }
+
+ /**
+ *
+ */
+ public void setWorkbook(Workbook wb) {
+
+ this.wb = wb;
+ }
+
+ /**
+ * Parse method for parsing from a String to a byte[]
+ *
+ * @param formula A <code>String</code> representation of a formula
+ * starting with the '=' character
+ * @return A <code>Vector</code> containing the parsed <code>Token</code>s
+ */
+ public Vector parse(String formula) throws FormulaParsingException {
+
+ index = 1;
+ look = ' ';
+ tokenVector.clear();
+ if(formula.startsWith("=")) {
+ formulaStr = formula;
+ Debug.log(Debug.TRACE,"Creating a Formula Parser for " + formulaStr);
+ getChar();
+ expression();
+ } else {
+ throw new FormulaParsingException("No equals found!" + makeErrorString());
+ }
+ return tokenVector;
+ }
+
+ /**
+ * Identify + and - operators
+ *
+ * @param c The character which is to be identified
+ * @return A boolean returning the result of the comparison
+ */
+ private boolean isAddOp(char c) {
+ return (c == '-') || (c == '+');
+ }
+
+ /**
+ * Determine if the current character is a multiop
+ *
+ * @return A boolean returning the result of the comparison
+ */
+ private boolean isMultiOp() {
+ return look=='*' || look =='/' || look == '^' || look == '&';
+ }
+
+ /**
+ * Identify <, >, <=, >=, =, <> using the index to find the current character(s)
+ *
+ * @return A boolean returning the result of the comparison
+ */
+ private boolean isLogicalOp() {
+ if (!isLogicalOpChar(look)) {
+ return false;
+ } else if ((index+1) >= formulaStr.length()) {//logical operators in their own right : if at end then return true
+ return true;
+ } else if (!isLogicalOpChar(formulaStr.charAt(index))) { // we have >, < or = on their own
+ return true;
+ } else if ((look == '<') && ((formulaStr.charAt(index) == '>') || formulaStr.charAt(index) == '=')) { // <>, or <=
+ return true;
+ } else if ((look == '>') && (formulaStr.charAt(index) == '=')) { // >=
+ return true;
+ }
+
+ return false;
+ }
+
+ /**
+ * Identify <, >, <=, >=, =, <>
+ *
+ * @param The <code>String</code> which is to be identified
+ * @return A boolean returning the result of the comparison
+ */
+ private boolean isLogicalOp(String op) {
+ return ((op.compareTo(">") == 0) ||
+ (op.compareTo("<") == 0) ||
+ (op.compareTo(">=") == 0) ||
+ (op.compareTo("<=") == 0) ||
+ (op.compareTo("=") == 0) ||
+ (op.compareTo("<>") == 0));
+ }
+
+
+ /**
+ * Identify characters that MAY be logical operator characters
+ *
+ * @param c The character which is to be identified
+ * @return A boolean returning the result of the comparison
+ */
+ private boolean isLogicalOpChar(char c) {
+ return (c == '>') || (c == '<') || (c == '=');
+ }
+
+ /**
+ * Identify special Cell Reference charaters
+ *
+ * @param c The character which is to be identified
+ * @return A boolean returning the result of the comparison
+ */
+ private boolean isCellRefSpecialChar(char c) {
+ return (c == ':') || (c == '$') || (c == '.');
+ }
+
+ /**
+ * Identify letters
+ *
+ * @param c The character which is to be identified
+ * @return A boolean returning the result of the comparison
+ */
+ private boolean isAlpha(char c) {
+ return(Character.isLetter(c));
+ }
+
+ /**
+ * Identify numbers
+ *
+ * @param c The character which is to be identified
+ * @return A boolean returning the result of the comparison
+ */
+ private boolean isDigit(char c) {
+ return(Character.isDigit(c));
+ }
+
+ /**
+ * Identify numbers
+ *
+ * @param c The character which is to be identified
+ * @return A boolean returning the result of the comparison
+ */
+ private boolean isPercent(char c) {
+ return (c == '%');
+ }
+
+ /**
+ * Identify letters or numbers
+ *
+ * @param c The character which is to be identified
+ * @return A boolean returning the result of the comparison
+ */
+ private boolean isAlphaNum(char c) {
+ return(isAlpha(c) || isDigit(c));
+ }
+
+ /**
+ * Identify valid Characters for cell references
+ *
+ * @param c The character which is to be identified
+ * @return A boolean returning the result of the comparison
+ */
+ private boolean isCellRefChar(char c) {
+ return(isAlpha(c) || isDigit(c) || isCellRefSpecialChar(c));
+ }
+
+ /**
+ * Test if current character is a match and move to next character
+ *
+ * @param c The character which is to be matched
+ */
+ private void match(char c) throws FormulaParsingException {
+
+ if(look==c) {
+ Debug.log(Debug.TRACE,"Operator Found : " + look);
+ getChar();
+ skipWhite();
+ }
+ else
+ throw new FormulaParsingException("Unexpected character '" + c + "'" + makeErrorString());
+ }
+
+ /**
+ * Test if current character is a match and move to next character
+ *
+ * @param symbol The <code>String</code> to be matched.
+ */
+ private void match(String symbol) throws FormulaParsingException {
+
+ int numChars = symbol.length();
+ boolean bContinue = true;
+ for (int i=0;i<numChars && bContinue; i++) {
+ if (look == symbol.charAt(i)) {
+ bContinue = getChar();
+ skipWhite();
+ } else {
+ throw new FormulaParsingException("Unexpected character '" + symbol + "'" + makeErrorString());
+ }
+ }
+ }
+
+ /**
+ * Skip over whitespaces (ie. spaces and tabs)
+ */
+ private void skipWhite() throws FormulaParsingException {
+
+ boolean success = true;
+
+ while(Character.isWhitespace(look) && success) {
+ success = getChar();
+ }
+ }
+
+ /**
+ * This is a factor for multiplication and division operators
+ */
+ private void factor() throws FormulaParsingException {
+ if(isAddOp(look)) { // handle unary addop
+ Character ch = new Character(look);
+ match(look);
+ tokenVector.add(tokenFactory.getOperatorToken(ch.toString(), 1));
+ }
+ if(look=='(') {
+ match('(');
+ tokenVector.add(tokenFactory.getOperatorToken("(", 1));
+ expression();
+ match(')');
+ tokenVector.add(tokenFactory.getOperatorToken(")", 1));
+ } else if(isDigit(look)){
+ getNum();
+ } else {
+ ident();
+ }
+ }
+
+ /**
+ * Pulls the next character from the <code>String</code>
+ *
+ * @return boolean false if the end if the statement
+ * is reached otherwise true
+ */
+ private boolean getChar() throws FormulaParsingException {
+
+ boolean success = true;
+
+ if(index<formulaStr.length()) {
+ look = formulaStr.charAt(index);
+ index++;
+ if(look==',')
+ success = false;
+ } else {
+ success = false;
+ }
+ return success;
+ }
+
+ /**
+ * Parses the number of arguments in a function
+ *
+ * @return The number of arguments
+ */
+ private int arguments() throws FormulaParsingException {
+ int numArgs;
+
+ skipWhite();
+ if(look==')')
+ numArgs = 0;
+ else
+ numArgs = 1;
+
+ while(look!=')') {
+ expression();
+ if(look==',') {
+ numArgs++;
+ match(',');
+ tokenVector.add(tokenFactory.getOperatorToken(",", 1));
+ }
+ }
+ return numArgs;
+ }
+
+ /**
+ * Test to see if we have come across a cell reference or a Name
+ * Definition.
+ */
+ private boolean isCellRef(String s) {
+ char c;
+ boolean result = false;
+
+ for(int i = 0;i<s.length();i++) {
+ c = s.charAt(i);
+ if(isCellRefSpecialChar(c)) {
+ result = true;
+ break;
+ }
+ }
+
+ // if it is a simple cell reference then there will not be a cell
+ // reference 'special char' so we should also look for a digit
+ if(!result) {
+ if(isDigit(s.charAt(1)) || isDigit(s.charAt(2))) {
+ result = true;
+ }
+ }
+ return result;
+ }
+
+ /**
+ * Test to see if we have come across a cell reference or a function and
+ * add the resulting toek nto the tokenVector.
+ */
+ private void ident() throws FormulaParsingException {
+
+ String cell = getTokenString();
+ if(look=='(') {
+ Debug.log(Debug.TRACE,"Found Function : " + cell);
+
+ int index = tokenVector.size();
+ match('(');
+ tokenVector.add(tokenFactory.getOperatorToken("(", 1));
+ int numArgs = arguments();
+ match(')');
+ tokenVector.add(tokenFactory.getOperatorToken(")", 1));
+ tokenVector.insertElementAt(tokenFactory.getFunctionToken(cell, numArgs), index);
+ } else {
+
+ if(cell.indexOf('.')!=-1) {
+ String cellRef = cell.substring(cell.indexOf('.') + 1, cell.length());
+ if(cellRef.indexOf(':')!=-1) {
+ tokenVector.add(tokenFactory.getOperandToken(cell, "3D_CELL_AREA_REFERENCE"));
+ } else {
+ tokenVector.add(tokenFactory.getOperandToken(cell, "3D_CELL_REFERENCE"));
+ }
+ } else if(cell.indexOf(':')!=-1) {
+ tokenVector.add(tokenFactory.getOperandToken(cell, "CELL_AREA_REFERENCE"));
+ } else if(isCellRef(cell)) {
+ tokenVector.add(tokenFactory.getOperandToken(cell, "CELL_REFERENCE"));
+ } else {
+ tokenVector.add(tokenFactory.getOperandToken(cell, "NAME"));
+ }
+ }
+ }
+
+ /**
+ * Will keep pulling valid logical operators from the formula and return
+ * the resultant <code>String</code>.
+ *
+ * @return a <code>String<code> representing a logical operator
+ */
+ private String getLogicalOperator() throws FormulaParsingException {
+ String op = new String();
+ boolean status;
+
+ do {
+ op += look;
+ status = getChar();
+ } while(isLogicalOpChar(look) && status);
+ skipWhite();
+ return op;
+ }
+
+ /**
+ * Keeps pulling characters from the statement until we get an
+ * operator and returns the resulting string.
+ *
+ * @return A <code>String</code>representing the next token
+ */
+ private String getTokenString() throws FormulaParsingException {
+
+ if(!isAlpha(look) && look!='$')
+ throw new FormulaParsingException("Expected Cell Reference" + makeErrorString());
+ else {
+ String cell = new String();
+ boolean status;
+ do {
+ cell += look;
+ status = getChar();
+ } while(isCellRefChar(look) && status);
+ skipWhite();
+ return cell;
+ }
+ }
+
+ /**
+ * Keeps pulling numbers from the statement and add the resulting integer
+ * token to the tokenVector.
+ */
+ private void getNum() throws FormulaParsingException {
+
+ Debug.log(Debug.TRACE,"getNum : ");
+ if(!isDigit(look))
+ throw new FormulaParsingException("Expected Integer" + makeErrorString());
+ else {
+ String num = new String();
+ boolean status;
+
+ do {
+ num += look;
+ status = getChar();
+ } while((isDigit(look) || ((look == '.') && isDigit(formulaStr.charAt(index)))) && status);
+ skipWhite();
+ tokenVector.add(tokenFactory.getOperandToken(num, "INTEGER"));
+ if(isPercent(look)) {
+ match(look);
+ tokenVector.add(tokenFactory.getOperatorToken("%", 1));
+ Debug.log(Debug.TRACE,"Added Percent token to Vector: ");
+ }
+ Debug.log(Debug.TRACE,"Number parsed : " + num);
+ }
+ }
+
+
+ /**
+ * Term will parse multiplication/division expressions
+ */
+ private void term() throws FormulaParsingException {
+ factor();
+ while(isMultiOp()) {
+ multiOp(Character.toString(look));
+ }
+ }
+
+ /**
+ * Expression is the entry point for the parser. It is the code
+ * that parses addition/subtraction expressions.
+ */
+ private void expression() throws FormulaParsingException {
+
+ if (look == '"') { //Extract a quoted string...
+ StringBuffer buff = new StringBuffer();
+ boolean success = true;
+ success = getChar();
+ while (look != '"' && success) {
+ buff.append(look);
+ success = getChar();
+ }
+
+ if (look != '"') { //We've reached the end of the string without getting a closing quote
+ throw new FormulaParsingException("Expected closing quote." + makeErrorString());
+ } else {
+ tokenVector.add(tokenFactory.getOperandToken(buff.toString(), "STRING"));
+ getChar(); //Move on to the next character
+ }
+ } else {
+ term();
+ }
+ while(isAddOp(look) || isLogicalOp()) {
+ if (isAddOp(look)) {
+ addOp(Character.toString(look));
+ } else if (isLogicalOp()) {
+ logicalOp();
+ }
+ }
+ }
+
+ /**
+ * Test to see if the next token (represented as a <code>String</code>) is
+ * the same as the String passed in. Move the index along to the end of
+ * that String and add that <code>Token</code> to the tokenVector. Then
+ * call <code>term</code> to parse the right hand side of the operator.
+ *
+ * @param op A <code>String</code> representing the operator
+ */
+ private void addOp(String op) throws FormulaParsingException {
+ match(op);
+ tokenVector.add(tokenFactory.getOperatorToken(op, 2));
+ term();
+ }
+
+ /**
+ * Test to see if the next token (represented as a <code>String</code>) is
+ * the same as the String passed in. Move the index along to the end of
+ * that String and add that <code>Token</code> to the tokenVector. Then
+ * call <code>factor</code> to parse the right hand side of the operator.
+ *
+ * @param op A <code>String</code> representing the operator
+ */
+ private void multiOp(String op) throws FormulaParsingException {
+ match(op);
+ tokenVector.add(tokenFactory.getOperatorToken(op, 2));
+ factor();
+ }
+
+ /**
+ * Pull a logical operator starting at the current index, add a token for
+ * that operator to the tokenVector and call <code>term<code> to parse the
+ * right hand side of the operator
+ */
+ private void logicalOp() throws FormulaParsingException {
+ String op = getLogicalOperator();
+ tokenVector.add(tokenFactory.getOperatorToken(op, 2));
+ term();
+ }
+
+ private String makeErrorString() {
+ StringBuffer buff = new StringBuffer();
+ for (int i=0; i<index-1; i++) {
+ buff.append(' ');
+ }
+
+ buff.append('^');
+ return "\n\t" + formulaStr + "\n\t" + buff.toString();
+ }
+ }
+