diff options
author | Armin Le Grand <alg@apache.org> | 2014-07-14 19:09:11 +0000 |
---|---|---|
committer | Armin Le Grand <alg@apache.org> | 2014-07-14 19:09:11 +0000 |
commit | 3c1d4742e649fe9c8aed8c2817fe3e1f3364f298 (patch) | |
tree | e0c6e02c89aa9227726c9469da1001b3e29c41df /ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/OOXMLParser.java | |
parent | c5c31e2aeaedbdf76e1f38d3c385e34f5ed875ca (diff) |
Resync to trunk, windows non-pro buildaoo/aw080
Diffstat (limited to 'ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/OOXMLParser.java')
-rw-r--r-- | ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/OOXMLParser.java | 223 |
1 files changed, 223 insertions, 0 deletions
diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/OOXMLParser.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/OOXMLParser.java new file mode 100644 index 000000000000..369f5c3daf42 --- /dev/null +++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/OOXMLParser.java @@ -0,0 +1,223 @@ +/************************************************************** +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +package org.apache.openoffice.ooxml.parser; + +import java.io.File; +import java.io.FileInputStream; +import java.io.InputStream; +import java.util.Map; +import java.util.Map.Entry; +import java.util.TreeMap; +import java.util.zip.ZipEntry; +import java.util.zip.ZipFile; + +import javax.xml.stream.Location; + +import org.apache.openoffice.ooxml.parser.action.ActionManager; +import org.apache.openoffice.ooxml.parser.action.ActionTrigger; +import org.apache.openoffice.ooxml.parser.action.IAction; + +/** This OOXML parser is based on the output of the schema parser. + * It exists to debug the schema parser and as illustration and preparation of + * the C++ parse (yet to come.) + * Because of this, the parser data (set of states and transitions) are + * read at runtime while a real parser would do that at compile time. + */ +public class OOXMLParser +{ + class ActionContext + { + public Map<String,Integer> TypeCounts = new TreeMap<>(); + } + /** The parser is called with two arguments: + * - A path to where the parser tables with the states and transitions can + * be found. + * - The XML input file or Zip stream to parse. + * The syntax for a Zip stream contains a '#' that separates the filename + * to its left from the entry name to its right. + */ + public static void main (final String ... aArgumentList) + { + if (aArgumentList.length<2 ||aArgumentList.length>3) + throw new RuntimeException("usage: OOXMLParser <parser-tables-path> <XML-input-file> <log-file>?"); + + if (aArgumentList.length == 3) + { + final File aLogFile = new File(aArgumentList[2]); + Log.Dbg = new Log(aLogFile); + System.out.printf("writing log data to %s\n", aLogFile.toString()); + } + else + { + Log.Dbg = null; + System.out.printf("writing no log data\n"); + } + + new OOXMLParser(aArgumentList[0], aArgumentList[1]); + } + + + + private OOXMLParser ( + final String sParseTableFilename, + final String sInputFilename) + { + long nStartTime = System.currentTimeMillis(); + final StateMachine aMachine = new StateMachine(new File(sParseTableFilename), null); + final InputStream aIn = GetInputStream(sInputFilename); + long nEndTime = System.currentTimeMillis(); + + final ActionContext aActionContext = new ActionContext(); + AddSomeActions(aMachine.GetActionManager(), aActionContext); + + System.out.printf("initialzed parser in %fs\n", (nEndTime-nStartTime)/1000.0); + + try + { + nStartTime = System.currentTimeMillis(); + final Parser aParser = new Parser(aMachine, aIn); + aParser.Parse(); + final int nElementCount = aParser.GetElementCount(); + nEndTime = System.currentTimeMillis(); + System.out.printf("parsed %d elements in %fs\n", + nElementCount, + (nEndTime-nStartTime)/1000.0); + + System.out.printf("%d different elements found:\n", aActionContext.TypeCounts.size()); + for (final Entry<String, Integer> aEntry : aActionContext.TypeCounts.entrySet()) + { + System.out.printf("%-32s : %6d\n", aEntry.getKey(), aEntry.getValue()); + } + } + catch (final Exception aException) + { + aException.printStackTrace(); + } + } + + + + + private static void AddSomeActions ( + final ActionManager aActionManager, + final ActionContext aActionContext) + { + aActionManager.AddElementStartAction( + "*", + new IAction() + { + @Override public void Run( + final ActionTrigger eTrigger, + final ElementContext aContext, + final String sText, + final Location aStartLocation, + final Location aEndLocation) + { + Integer nValue = aActionContext.TypeCounts.get(aContext.GetTypeName()); + if (nValue == null) + nValue = 1; + else + ++nValue; + aActionContext.TypeCounts.put(aContext.GetTypeName(), nValue); + } + } + ); + aActionManager.AddElementStartAction( + ".*CT_Shd", + new IAction() + { + @Override public void Run( + final ActionTrigger eTrigger, + final ElementContext aContext, + final String sText, + final Location aStartLocation, + final Location aEndLocation) + { + System.out.printf("processing %s of element %s at position %d\n", + eTrigger, + aContext.GetElementName(), + aStartLocation.getCharacterOffset()); + + if (aContext.GetAttributes().GetAttributeCount() == 0) + System.out.printf(" no attributes\n"); + else + for (final Entry<String,String> aAttribute : aContext.GetAttributes().GetAttributes()) + System.out.printf(" %s -> %s\n", aAttribute.getKey(), aAttribute.getValue()); + } + } + ); + aActionManager.AddTextAction( + ".*CT_Text", + new IAction() + { + @Override public void Run( + final ActionTrigger eTrigger, + final ElementContext aContext, + final String sText, + final Location aStartLocation, + final Location aEndLocation) + { +// System.out.printf("%s text \"%s\"\n", aContext.GetTypeName(), sText.replace("\n", "\\n")); + } + } + ); + } + + + + + private static InputStream GetInputStream (final String sInputName) + { + final InputStream aIn; + try + { + final int nSeparator = sInputName.indexOf('#'); + if (nSeparator >= 0) + { + // Split the input name into the file name of the archive and the + // name of a zip entry. + final String sArchiveName = sInputName.substring(0, nSeparator); + String sEntryName = sInputName.substring(nSeparator+1); + + // Normalize and cleanup the entry name. + sEntryName = sEntryName.replace('\\', '/'); + if (sEntryName.startsWith("/")) + sEntryName = sEntryName.substring(1); + + final ZipFile aZipFile = new ZipFile(new File(sArchiveName)); + final ZipEntry aZipEntry = aZipFile.getEntry(sEntryName); + aIn = aZipFile.getInputStream(aZipEntry); + } + else + { + // The input name points to a plain XML file. + aIn = new FileInputStream(sInputName); + } + } + catch (final Exception aException) + { + aException.printStackTrace(); + return null; + } + return aIn; + } +} |