1 files changed, 561 insertions, 0 deletions
diff --git a/xmerge/source/pexcel/java/org/openoffice/xmerge/converter/xml/sxc/pexcel/records/formula/FormulaParser.java b/xmerge/source/pexcel/java/org/openoffice/xmerge/converter/xml/sxc/pexcel/records/formula/FormulaParser.java
new file mode 100644
index 000000000000..0ab40ec53fd2
--- /dev/null
+++ b/xmerge/source/pexcel/java/org/openoffice/xmerge/converter/xml/sxc/pexcel/records/formula/FormulaParser.java
@@ -0,0 +1,561 @@
+/*************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2000, 2010 Oracle and/or its affiliates.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org.  If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+package org.openoffice.xmerge.converter.xml.sxc.pexcel.records.formula;
+
+
+import java.util.Vector;
+
+import org.openoffice.xmerge.converter.xml.sxc.pexcel.records.Workbook;
+import org.openoffice.xmerge.util.Debug;
+
+/**
+ * This is the Formula Parser based on an article written by Jack Crenshaw. It is a
+ * top down parser with some basic error handling. It handles
+ * +,-,*,/,>,<,>=,<=,=,<>, unary + and - as well as functions.
+ * The BNF notation for this parser is
+ * <pre>
+ *  &lt;expression&gt; ::= &lt;unary op&gt; &lt;term&gt; [&lt;addop&gt;|&lt;logop&gt; &lt;term&gt;]
+ *  &lt;term&gt;       ::= &lt;factor&gt; [&lt;mulop&gt; &lt;factor&gt;]
+ *  &lt;factor&gt;     ::= &lt;number&gt;[%] | &lt;CellRef&gt; | &lt;QuoteString&gt; | &lt;expression&gt;
+ * </pre>
+ */
+public class FormulaParser {
+
+    private char look;
+    private String formulaStr;
+    private int index = 1;
+    private TokenFactory tokenFactory;
+    private Vector tokenVector;
+    private Workbook wb;
+
+     /**
+      * Default constructor
+      */
+     public FormulaParser() {
+
+        Debug.log(Debug.TRACE,"Creating a Formula Parser");
+        tokenFactory = new TokenFactory();
+        tokenVector = new Vector();
+    }
+
+    /**
+     *
+     */
+    public void setWorkbook(Workbook wb) {
+
+        this.wb = wb;
+    }
+
+    /**
+     * Parse method for parsing from a String to a byte[]
+     *
+     * @param formula A <code>String</code> representation of a formula
+     * starting with the '=' character
+     * @return A <code>Vector</code> containing the parsed <code>Token</code>s
+     */
+    public Vector parse(String formula) throws FormulaParsingException {
+
+        index = 1;
+        look = ' ';
+        tokenVector.clear();
+        if(formula.startsWith("=")) {
+            formulaStr = formula;
+            Debug.log(Debug.TRACE,"Creating a Formula Parser for " + formulaStr);
+            getChar();
+            expression();
+        } else {
+            throw new FormulaParsingException("No equals found!" + makeErrorString());
+        }
+        return tokenVector;
+    }
+
+    /**
+     * Identify + and - operators
+     *
+     * @param  c The character which is to be identified
+     * @return A boolean returning the result of the comparison
+     */
+    private boolean isAddOp(char c) {
+         return (c == '-') || (c == '+');
+     }
+
+    /**
+     * Determine if the current character is a multiop
+     *
+     * @return A boolean returning the result of the comparison
+     */
+    private boolean isMultiOp() {
+        return look=='*' || look =='/' || look == '^' || look == '&';
+    }
+
+    /**
+     * Identify <, >, <=, >=, =, <> using the index to find the current character(s)
+     *
+     * @return A boolean returning the result of the comparison
+     */
+    private boolean isLogicalOp() {
+        if (!isLogicalOpChar(look)) {
+            return false;
+        } else if ((index+1) >= formulaStr.length()) {//logical operators in their own right : if at end then return true
+            return true;
+        } else if (!isLogicalOpChar(formulaStr.charAt(index))) { // we have >, < or = on their own
+            return true;
+        } else if ((look == '<') && ((formulaStr.charAt(index) == '>') || formulaStr.charAt(index) == '=')) { // <>, or <=
+            return true;
+        } else if ((look == '>')  && (formulaStr.charAt(index) == '=')) { // >=
+            return true;
+        }
+
+        return false;
+     }
+
+    /**
+     * Identify <, >, <=, >=, =, <>
+     *
+     * @param  The <code>String</code> which is to be identified
+     * @return A boolean returning the result of the comparison
+     */
+    private boolean isLogicalOp(String op) {
+        return  ((op.compareTo(">") == 0) ||
+                 (op.compareTo("<") == 0) ||
+                 (op.compareTo(">=") == 0) ||
+                 (op.compareTo("<=") == 0) ||
+                 (op.compareTo("=") == 0) ||
+                 (op.compareTo("<>") == 0));
+    }
+
+
+    /**
+     * Identify characters that MAY be logical operator characters
+     *
+     * @param  c The character which is to be identified
+     * @return A boolean returning the result of the comparison
+     */
+    private boolean isLogicalOpChar(char c) {
+        return (c == '>') || (c == '<') || (c == '=');
+    }
+
+     /**
+      * Identify special Cell Reference charaters
+     *
+     * @param  c The character which is to be identified
+     * @return A boolean returning the result of the comparison
+      */
+     private boolean isCellRefSpecialChar(char c) {
+         return (c == ':') || (c == '$') || (c == '.');
+     }
+
+     /**
+      * Identify letters
+     *
+     * @param  c The character which is to be identified
+     * @return A boolean returning the result of the comparison
+      */
+     private boolean isAlpha(char c) {
+         return(Character.isLetter(c));
+     }
+
+     /**
+      * Identify numbers
+     *
+     * @param  c The character which is to be identified
+     * @return A boolean returning the result of the comparison
+      */
+     private boolean isDigit(char c) {
+         return(Character.isDigit(c));
+     }
+
+     /**
+      * Identify numbers
+     *
+     * @param  c The character which is to be identified
+     * @return A boolean returning the result of the comparison
+      */
+     private boolean isPercent(char c) {
+         return (c == '%');
+     }
+
+     /**
+      * Identify letters or numbers
+     *
+     * @param  c The character which is to be identified
+     * @return A boolean returning the result of the comparison
+      */
+     private boolean isAlphaNum(char c) {
+         return(isAlpha(c) || isDigit(c));
+     }
+
+    /**
+      * Identify valid Characters for cell references
+     *
+     * @param  c The character which is to be identified
+     * @return A boolean returning the result of the comparison
+      */
+     private boolean isCellRefChar(char c) {
+         return(isAlpha(c) || isDigit(c) || isCellRefSpecialChar(c));
+     }
+
+    /**
+     * Test if current character is a match and move to next character
+     *
+     * @param  c The character which is to be matched
+     */
+     private void match(char c) throws FormulaParsingException {
+
+         if(look==c) {
+            Debug.log(Debug.TRACE,"Operator Found : " + look);
+             getChar();
+             skipWhite();
+         }
+         else
+             throw new FormulaParsingException("Unexpected character '" + c + "'" + makeErrorString());
+     }
+
+    /**
+     * Test if current character is a match and move to next character
+     *
+     * @param symbol The <code>String</code> to be matched.
+     */
+     private void match(String symbol) throws FormulaParsingException {
+
+         int numChars = symbol.length();
+        boolean bContinue = true;
+        for (int i=0;i<numChars && bContinue; i++) {
+            if (look == symbol.charAt(i)) {
+                bContinue = getChar();
+                skipWhite();
+            } else {
+                throw new FormulaParsingException("Unexpected character '" + symbol + "'" + makeErrorString());
+            }
+        }
+     }
+
+     /**
+      * Skip over whitespaces (ie. spaces and tabs)
+      */
+     private void skipWhite() throws FormulaParsingException {
+
+         boolean success = true;
+
+         while(Character.isWhitespace(look) && success) {
+             success = getChar();
+         }
+     }
+
+     /**
+      * This is a factor for multiplication and division operators
+      */
+     private void factor() throws FormulaParsingException {
+        if(isAddOp(look)) {         // handle unary addop
+            Character ch = new Character(look);
+            match(look);
+            tokenVector.add(tokenFactory.getOperatorToken(ch.toString(), 1));
+        }
+         if(look=='(') {
+             match('(');
+            tokenVector.add(tokenFactory.getOperatorToken("(", 1));
+             expression();
+             match(')');
+                        tokenVector.add(tokenFactory.getOperatorToken(")", 1));
+         } else if(isDigit(look)){
+             getNum();
+         } else {
+            ident();
+         }
+     }
+
+     /**
+      * Pulls the next character from the <code>String</code>
+      *
+      * @return boolean false if the end if the statement
+      *                     is reached otherwise true
+      */
+     private boolean getChar() throws FormulaParsingException {
+
+             boolean success = true;
+
+             if(index<formulaStr.length()) {
+                 look = formulaStr.charAt(index);
+                 index++;
+                                if(look==',')
+                                    success = false;
+             } else {
+                 success = false;
+             }
+             return success;
+     }
+
+    /**
+     * Parses the number of arguments in a function
+     *
+     * @return The number of arguments
+     */
+    private int arguments() throws FormulaParsingException {
+        int numArgs;
+
+        skipWhite();
+        if(look==')')
+            numArgs = 0;
+        else
+            numArgs = 1;
+
+        while(look!=')') {
+            expression();
+            if(look==',') {
+                numArgs++;
+                match(',');
+                tokenVector.add(tokenFactory.getOperatorToken(",", 1));
+            }
+        }
+        return numArgs;
+    }
+
+     /**
+      * Test to see if we have come across a cell reference or a Name
+     * Definition.
+     */
+     private boolean isCellRef(String s) {
+         char c;
+        boolean result = false;
+
+        for(int i = 0;i<s.length();i++) {
+            c = s.charAt(i);
+            if(isCellRefSpecialChar(c)) {
+                result = true;
+                break;
+            }
+        }
+
+        // if it is a simple cell reference then there will not be a cell
+        // reference 'special char' so we should also look for a digit
+        if(!result) {
+            if(isDigit(s.charAt(1)) || isDigit(s.charAt(2))) {
+                result = true;
+            }
+        }
+        return result;
+     }
+
+     /**
+      * Test to see if we have come across a cell reference or a function and
+     * add the resulting toek nto the tokenVector.
+     */
+     private void ident() throws FormulaParsingException {
+
+         String cell = getTokenString();
+        if(look=='(') {
+            Debug.log(Debug.TRACE,"Found Function : " + cell);
+
+            int index = tokenVector.size();
+             match('(');
+            tokenVector.add(tokenFactory.getOperatorToken("(", 1));
+            int numArgs = arguments();
+            match(')');
+            tokenVector.add(tokenFactory.getOperatorToken(")", 1));
+            tokenVector.insertElementAt(tokenFactory.getFunctionToken(cell, numArgs), index);
+         } else {
+
+            if(cell.indexOf('.')!=-1) {
+                String cellRef = cell.substring(cell.indexOf('.') + 1, cell.length());
+                if(cellRef.indexOf(':')!=-1) {
+                    tokenVector.add(tokenFactory.getOperandToken(cell, "3D_CELL_AREA_REFERENCE"));
+                } else {
+                    tokenVector.add(tokenFactory.getOperandToken(cell, "3D_CELL_REFERENCE"));
+                }
+            } else if(cell.indexOf(':')!=-1) {
+                tokenVector.add(tokenFactory.getOperandToken(cell, "CELL_AREA_REFERENCE"));
+            } else if(isCellRef(cell)) {
+                tokenVector.add(tokenFactory.getOperandToken(cell, "CELL_REFERENCE"));
+            } else {
+                tokenVector.add(tokenFactory.getOperandToken(cell, "NAME"));
+            }
+         }
+     }
+
+     /**
+     * Will keep pulling valid logical operators from the formula and return
+     * the resultant <code>String</code>.
+     *
+     * @return a <code>String<code> representing a logical operator
+     */
+    private String getLogicalOperator() throws FormulaParsingException {
+        String op = new String();
+        boolean status;
+
+        do {
+            op += look;
+            status = getChar();
+        } while(isLogicalOpChar(look) && status);
+         skipWhite();
+        return op;
+    }
+
+     /**
+      * Keeps pulling characters from the statement until we get an
+     * operator and returns the resulting string.
+     *
+     * @return A <code>String</code>representing the next token
+      */
+      private String getTokenString() throws FormulaParsingException {
+
+         if(!isAlpha(look) && look!='$')
+             throw new FormulaParsingException("Expected Cell Reference" + makeErrorString());
+         else {
+             String cell = new String();
+                        boolean status;
+             do {
+                 cell += look;
+                status = getChar();
+             } while(isCellRefChar(look) && status);
+             skipWhite();
+                        return cell;
+           }
+     }
+
+     /**
+      * Keeps pulling numbers from the statement and add the resulting integer
+     * token to the tokenVector.
+      */
+     private void getNum() throws FormulaParsingException {
+
+        Debug.log(Debug.TRACE,"getNum : ");
+         if(!isDigit(look))
+             throw new FormulaParsingException("Expected Integer" + makeErrorString());
+         else {
+             String num = new String();
+             boolean status;
+
+             do {
+                 num += look;
+                status = getChar();
+             } while((isDigit(look) || ((look == '.') && isDigit(formulaStr.charAt(index)))) && status);
+             skipWhite();
+            tokenVector.add(tokenFactory.getOperandToken(num, "INTEGER"));
+            if(isPercent(look)) {
+                match(look);
+                tokenVector.add(tokenFactory.getOperatorToken("%", 1));
+                Debug.log(Debug.TRACE,"Added Percent token to Vector: ");
+            }
+            Debug.log(Debug.TRACE,"Number parsed : " + num);
+         }
+     }
+
+
+    /**
+     * Term will parse multiplication/division expressions
+     */
+     private void term() throws FormulaParsingException {
+        factor();
+        while(isMultiOp()) {
+            multiOp(Character.toString(look));
+        }
+     }
+
+     /**
+      * Expression is the entry point for the parser. It is the code
+     * that parses addition/subtraction expressions.
+      */
+     private void expression() throws FormulaParsingException {
+
+        if (look == '"') { //Extract a quoted string...
+            StringBuffer buff = new StringBuffer();
+            boolean success = true;
+            success = getChar();
+            while (look != '"' && success) {
+                buff.append(look);
+                success = getChar();
+            }
+
+            if (look != '"') { //We've reached the end of the string without getting a closing quote
+                throw new FormulaParsingException("Expected closing quote." + makeErrorString());
+            } else {
+                tokenVector.add(tokenFactory.getOperandToken(buff.toString(), "STRING"));
+                getChar();      //Move on to the next character
+            }
+        } else {
+            term();
+        }
+        while(isAddOp(look) || isLogicalOp()) {
+            if (isAddOp(look)) {
+                addOp(Character.toString(look));
+            } else if (isLogicalOp()) {
+                logicalOp();
+            }
+        }
+     }
+
+    /**
+     * Test to see if the next token (represented as a <code>String</code>) is
+     * the same as the String passed in. Move the index along to the end of
+     * that String and add that <code>Token</code> to the tokenVector. Then
+     * call <code>term</code> to parse the right hand side of the operator.
+     *
+     * @param op A <code>String</code> representing the operator
+     */
+    private void addOp(String op) throws FormulaParsingException {
+         match(op);
+        tokenVector.add(tokenFactory.getOperatorToken(op, 2));
+         term();
+    }
+
+    /**
+     * Test to see if the next token (represented as a <code>String</code>) is
+     * the same as the String passed in. Move the index along to the end of
+     * that String and add that <code>Token</code> to the tokenVector. Then
+     * call <code>factor</code> to parse the right hand side of the operator.
+     *
+     * @param op A <code>String</code> representing the operator
+     */
+    private void multiOp(String op) throws FormulaParsingException {
+        match(op);
+        tokenVector.add(tokenFactory.getOperatorToken(op, 2));
+         factor();
+    }
+
+    /**
+     * Pull a logical operator starting at the current index, add a token for
+     * that operator to the tokenVector and call <code>term<code> to parse the
+     * right hand side of the operator
+     */
+    private void logicalOp()  throws FormulaParsingException {
+        String op = getLogicalOperator();
+        tokenVector.add(tokenFactory.getOperatorToken(op, 2));
+        term();
+    }
+
+    private String makeErrorString() {
+        StringBuffer buff = new StringBuffer();
+        for (int i=0; i<index-1; i++) {
+            buff.append(' ');
+        }
+
+        buff.append('^');
+        return "\n\t" + formulaStr + "\n\t" + buff.toString();
+    }
+ }
+