Resync to trunk, windows non-pro buildaoo/aw080

author: Armin Le Grand <alg@apache.org> 2014-07-14 19:09:11 +0000
committer: Armin Le Grand <alg@apache.org> 2014-07-14 19:09:11 +0000
commit: 3c1d4742e649fe9c8aed8c2817fe3e1f3364f298 (patch)
tree: e0c6e02c89aa9227726c9469da1001b3e29c41df /ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/OOXMLParser.java
parent: c5c31e2aeaedbdf76e1f38d3c385e34f5ed875ca (diff)
1 files changed, 223 insertions, 0 deletions
diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/OOXMLParser.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/OOXMLParser.java
new file mode 100644
index 000000000000..369f5c3daf42
--- /dev/null
+++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/OOXMLParser.java
@@ -0,0 +1,223 @@
+/**************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+package org.apache.openoffice.ooxml.parser;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.InputStream;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.TreeMap;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipFile;
+
+import javax.xml.stream.Location;
+
+import org.apache.openoffice.ooxml.parser.action.ActionManager;
+import org.apache.openoffice.ooxml.parser.action.ActionTrigger;
+import org.apache.openoffice.ooxml.parser.action.IAction;
+
+/** This OOXML parser is based on the output of the schema parser.
+ *  It exists to debug the schema parser and as illustration and preparation of
+ *  the C++ parse (yet to come.)
+ *  Because of this, the parser data (set of states and transitions) are
+ *  read at runtime while a real parser would do that at compile time.
+ */
+public class OOXMLParser
+{
+    class ActionContext
+    {
+        public Map<String,Integer> TypeCounts = new TreeMap<>();
+    }
+    /** The parser is called with two arguments:
+     *  - A path to where the parser tables with the states and transitions can
+     *    be found.
+     *  - The XML input file or Zip stream to parse.
+     *    The syntax for a Zip stream contains a '#' that separates the filename
+     *    to its left from the entry name to its right.
+     */
+    public static void main (final String ... aArgumentList)
+    {
+        if (aArgumentList.length<2 ||aArgumentList.length>3)
+            throw new RuntimeException("usage: OOXMLParser <parser-tables-path> <XML-input-file> <log-file>?");
+
+        if (aArgumentList.length == 3)
+        {
+            final File aLogFile = new File(aArgumentList[2]);
+            Log.Dbg = new Log(aLogFile);
+            System.out.printf("writing log data to %s\n", aLogFile.toString());
+        }
+        else
+        {
+            Log.Dbg = null;
+            System.out.printf("writing no log data\n");
+        }
+
+        new OOXMLParser(aArgumentList[0], aArgumentList[1]);
+    }
+
+
+
+    private OOXMLParser (
+        final String sParseTableFilename,
+        final String sInputFilename)
+    {
+        long nStartTime = System.currentTimeMillis();
+        final StateMachine aMachine = new StateMachine(new File(sParseTableFilename), null);
+        final InputStream aIn = GetInputStream(sInputFilename);
+        long nEndTime = System.currentTimeMillis();
+
+        final ActionContext aActionContext = new ActionContext();
+        AddSomeActions(aMachine.GetActionManager(), aActionContext);
+
+        System.out.printf("initialzed parser in %fs\n", (nEndTime-nStartTime)/1000.0);
+
+        try
+        {
+            nStartTime = System.currentTimeMillis();
+            final Parser aParser = new Parser(aMachine, aIn);
+            aParser.Parse();
+            final int  nElementCount = aParser.GetElementCount();
+            nEndTime = System.currentTimeMillis();
+            System.out.printf("parsed %d elements in %fs\n",
+                nElementCount,
+                (nEndTime-nStartTime)/1000.0);
+
+            System.out.printf("%d different elements found:\n", aActionContext.TypeCounts.size());
+            for (final Entry<String, Integer> aEntry : aActionContext.TypeCounts.entrySet())
+            {
+                System.out.printf("%-32s : %6d\n", aEntry.getKey(), aEntry.getValue());
+            }
+        }
+        catch (final Exception aException)
+        {
+            aException.printStackTrace();
+        }
+    }
+
+
+
+
+    private static void AddSomeActions (
+        final ActionManager aActionManager,
+        final ActionContext aActionContext)
+    {
+        aActionManager.AddElementStartAction(
+            "*",
+            new IAction()
+            {
+                @Override public void Run(
+                    final ActionTrigger eTrigger,
+                    final ElementContext aContext,
+                    final String sText,
+                    final Location aStartLocation,
+                    final Location aEndLocation)
+                {
+                    Integer nValue = aActionContext.TypeCounts.get(aContext.GetTypeName());
+                    if (nValue == null)
+                        nValue = 1;
+                    else
+                        ++nValue;
+                    aActionContext.TypeCounts.put(aContext.GetTypeName(), nValue);
+                }
+            }
+        );
+        aActionManager.AddElementStartAction(
+            ".*CT_Shd",
+            new IAction()
+            {
+                @Override public void Run(
+                    final ActionTrigger eTrigger,
+                    final ElementContext aContext,
+                    final String sText,
+                    final Location aStartLocation,
+                    final Location aEndLocation)
+                {
+                    System.out.printf("processing %s of element %s at position %d\n",
+                        eTrigger,
+                        aContext.GetElementName(),
+                        aStartLocation.getCharacterOffset());
+
+                    if (aContext.GetAttributes().GetAttributeCount() == 0)
+                        System.out.printf("    no attributes\n");
+                    else
+                        for (final Entry<String,String> aAttribute : aContext.GetAttributes().GetAttributes())
+                            System.out.printf("    %s -> %s\n", aAttribute.getKey(), aAttribute.getValue());
+                }
+            }
+        );
+        aActionManager.AddTextAction(
+            ".*CT_Text",
+            new IAction()
+            {
+                @Override public void Run(
+                    final ActionTrigger eTrigger,
+                    final ElementContext aContext,
+                    final String sText,
+                    final Location aStartLocation,
+                    final Location aEndLocation)
+                {
+//                    System.out.printf("%s text \"%s\"\n", aContext.GetTypeName(), sText.replace("\n", "\\n"));
+                }
+            }
+        );
+    }
+
+
+
+
+    private static InputStream GetInputStream (final String sInputName)
+    {
+        final InputStream aIn;
+        try
+        {
+            final int nSeparator = sInputName.indexOf('#');
+            if (nSeparator >= 0)
+            {
+                // Split the input name into the file name of the archive and the
+                // name of a zip entry.
+                final String sArchiveName = sInputName.substring(0, nSeparator);
+                String sEntryName = sInputName.substring(nSeparator+1);
+
+                // Normalize and cleanup the entry name.
+                sEntryName = sEntryName.replace('\\',  '/');
+                if (sEntryName.startsWith("/"))
+                    sEntryName = sEntryName.substring(1);
+
+                final ZipFile aZipFile = new ZipFile(new File(sArchiveName));
+                final ZipEntry aZipEntry = aZipFile.getEntry(sEntryName);
+                aIn = aZipFile.getInputStream(aZipEntry);
+            }
+            else
+            {
+                // The input name points to a plain XML file.
+                aIn = new FileInputStream(sInputName);
+            }
+        }
+        catch (final Exception aException)
+        {
+            aException.printStackTrace();
+            return null;
+        }
+        return aIn;
+    }
+}
author	Armin Le Grand <alg@apache.org>	2014-07-14 19:09:11 +0000
committer	Armin Le Grand <alg@apache.org>	2014-07-14 19:09:11 +0000
commit	3c1d4742e649fe9c8aed8c2817fe3e1f3364f298 (patch)
tree	e0c6e02c89aa9227726c9469da1001b3e29c41df /ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/OOXMLParser.java
parent	c5c31e2aeaedbdf76e1f38d3c385e34f5ed875ca (diff)