diff options
Diffstat (limited to 'ooxml/source/framework/JavaOOXMLParser/src')
29 files changed, 3479 insertions, 0 deletions
diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/AcceptingStateTable.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/AcceptingStateTable.java new file mode 100644 index 000000000000..c11ea4405fe6 --- /dev/null +++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/AcceptingStateTable.java @@ -0,0 +1,69 @@ +/************************************************************** +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +package org.apache.openoffice.ooxml.parser; + +import java.util.HashSet; +import java.util.Set; + +/** List of all accepting states. + * + * The accepting status of states is important when a closing tag is seen. + * It denotes the end of the input stream for the state machine of the currently + * processed element. It is an error when the current state is not accepting + * when a closing tag is processed. + */ +public class AcceptingStateTable +{ + public AcceptingStateTable (final Iterable<String[]> aData) + { + maAcceptingStates = new HashSet<>(); + + for (final String[] aLine : aData) + { + // Create new transition. + final int nStateId = Integer.parseInt(aLine[1]); + + maAcceptingStates.add(nStateId); + } + } + + + + + public boolean Contains (final int nStateId) + { + return maAcceptingStates.contains(nStateId); + } + + + + + public int GetAcceptingStateCount () + { + return maAcceptingStates.size(); + } + + + + + private final Set<Integer> maAcceptingStates; +} diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/ElementContext.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/ElementContext.java new file mode 100644 index 000000000000..2114dd75af4e --- /dev/null +++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/ElementContext.java @@ -0,0 +1,88 @@ +/************************************************************** +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +package org.apache.openoffice.ooxml.parser; + +import org.apache.openoffice.ooxml.parser.attribute.AttributeValues; + +/** Context that has the same life time (by default) as the element it represents. + * Gives access to the attribute values and the parent context. + */ +public class ElementContext +{ + ElementContext ( + final String sElementName, + final String sTypeName, + final boolean bIsSkipping, + final AttributeValues aValues, + final ElementContext aParentContext) + { + msElementName = sElementName; + msTypeName = sTypeName; + mbIsSkipping = bIsSkipping; + maAttributeValues = aValues; + maParentContext = aParentContext; + } + + + + + public String GetElementName () + { + return msElementName; + } + + + + + public String GetTypeName () + { + return msTypeName; + } + + + + + public AttributeValues GetAttributes () + { + return maAttributeValues; + } + + + + + /** Return the context of the parent element. + * Can be null when there is no parent element. + */ + public ElementContext GetParentContext () + { + return maParentContext; + } + + + + + private final String msElementName; + private final String msTypeName; + private final boolean mbIsSkipping; + private final AttributeValues maAttributeValues; + private final ElementContext maParentContext; +} diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/Log.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/Log.java new file mode 100644 index 000000000000..cefdd513f8b4 --- /dev/null +++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/Log.java @@ -0,0 +1,130 @@ +/************************************************************** +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +package org.apache.openoffice.ooxml.parser; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; + +public class Log +{ + public static Log Std = new Log(System.out); + public static Log Err = new Log(System.err); + public static Log Dbg = Std; + + + public Log (final OutputStream aOut) + { + maOut = aOut; + msIndentation = ""; + } + + + + + public Log (final String sFilename) + { + this(CreateFileOutputStream(sFilename)); + } + + + + + public Log (final File aFile) + { + this(CreateFileOutputStream(aFile)); + } + + + + + private static OutputStream CreateFileOutputStream (final File aFile) + { + try + { + return new FileOutputStream(aFile); + } + catch (final Exception aException) + { + aException.printStackTrace(); + return null; + } + } + + + + + public void printf (final String sFormat, final Object ... aArgumentList) + { + try + { + maOut.write(msIndentation.getBytes()); + maOut.write(String.format(sFormat, aArgumentList).getBytes()); + } + catch (IOException e) + { + e.printStackTrace(); + } + } + + + + + public void IncreaseIndentation () + { + msIndentation += " "; + } + + + + + public void DecreaseIndentation () + { + msIndentation = msIndentation.substring(4); + } + + + + + private static OutputStream CreateFileOutputStream (final String sFilename) + { + OutputStream aOut; + try + { + aOut = new FileOutputStream(sFilename); + return aOut; + } + catch (FileNotFoundException e) + { + e.printStackTrace(); + return null; + } + } + + + + + private final OutputStream maOut; + private String msIndentation; +} diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/NameMap.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/NameMap.java new file mode 100644 index 000000000000..a8af8f93ce51 --- /dev/null +++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/NameMap.java @@ -0,0 +1,113 @@ +/************************************************************** +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +package org.apache.openoffice.ooxml.parser; + +import java.util.HashMap; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Vector; + +public class NameMap +{ + NameMap (final Vector<String[]> aData) + { + maNameToIdMap = new HashMap<>(); + maIdToNameMap = new Vector<>(); + + for (final String[] aLine : aData) + { + final int nId = Integer.parseInt(aLine[1]); + + maNameToIdMap.put(aLine[2], nId); + + if (maIdToNameMap.size() <= nId) + maIdToNameMap.setSize(nId+1); + maIdToNameMap.set(nId, aLine[2]); + } + } + + + + + public int GetIdForName ( + final String sName) + { + if ( ! maNameToIdMap.containsKey(sName)) + throw new RuntimeException("token '"+sName+"' is not known"); + + return maNameToIdMap.get(sName); + } + + + + + public int GetIdForOptionalName ( + final String sName) + { + final Integer aId = maNameToIdMap.get(sName); + if (aId == null) + return -1; + else + return aId; + } + + + + + public String GetNameForId (final int nId) + { + if (nId == -1) + return "<none>"; + else + return maIdToNameMap.get(nId); + } + + + + + public int GetNameCount () + { + return maIdToNameMap.size(); + } + + + + + /** Return the ids of all states whose names match the given pattern. + */ + public Vector<Integer> GetMatchingStateIds (final String sPattern) + { + final Vector<Integer> aStateIds = new Vector<>(); + for (final Entry<String,Integer> aEntry : maNameToIdMap.entrySet()) + { + if (aEntry.getKey().matches(sPattern)) + aStateIds.add(aEntry.getValue()); + } + return aStateIds; + } + + + + + private final Map<String,Integer> maNameToIdMap; + private final Vector<String> maIdToNameMap; +} diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/NamespaceMap.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/NamespaceMap.java new file mode 100644 index 000000000000..d5a2af818c53 --- /dev/null +++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/NamespaceMap.java @@ -0,0 +1,91 @@ +/************************************************************** +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +package org.apache.openoffice.ooxml.parser; + +import java.util.HashMap; +import java.util.Map; +import java.util.Vector; + +public class NamespaceMap +{ + public class NamespaceDescriptor + { + NamespaceDescriptor (final String sPrefix, final int nId) + { + Prefix = sPrefix; + Id = nId; + } + public final String Prefix; + public final int Id; + } + NamespaceMap (final Vector<String[]> aData) + { + maUriToDescriptorMap = new HashMap<>(); + maIdToDescriptorMap = new HashMap<>(); + + for (final String[] aLine : aData) + { + final int nId = Integer.parseInt(aLine[2]); + final NamespaceDescriptor aDescriptor = new NamespaceDescriptor(aLine[1], nId); + maUriToDescriptorMap.put( + aLine[3], + aDescriptor); + maIdToDescriptorMap.put( + nId, + aDescriptor); + } + } + + + + + public NamespaceDescriptor GetDescriptorForURI (final String sURI) + { + if (sURI == null) + throw new RuntimeException("namespace is null"); + if ( ! maUriToDescriptorMap.containsKey(sURI)) + throw new RuntimeException("namespace '"+sURI+"' is not known"); + return maUriToDescriptorMap.get(sURI); + } + + + + + public NamespaceDescriptor GetDescriptorForId (final int nId) + { + return maIdToDescriptorMap.get(nId); + } + + + + + public int GetNamespaceCount () + { + return maUriToDescriptorMap.size(); + } + + + + + private final Map<String,NamespaceDescriptor> maUriToDescriptorMap; + private final Map<Integer,NamespaceDescriptor> maIdToDescriptorMap; +} diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/OOXMLParser.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/OOXMLParser.java new file mode 100644 index 000000000000..369f5c3daf42 --- /dev/null +++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/OOXMLParser.java @@ -0,0 +1,223 @@ +/************************************************************** +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +package org.apache.openoffice.ooxml.parser; + +import java.io.File; +import java.io.FileInputStream; +import java.io.InputStream; +import java.util.Map; +import java.util.Map.Entry; +import java.util.TreeMap; +import java.util.zip.ZipEntry; +import java.util.zip.ZipFile; + +import javax.xml.stream.Location; + +import org.apache.openoffice.ooxml.parser.action.ActionManager; +import org.apache.openoffice.ooxml.parser.action.ActionTrigger; +import org.apache.openoffice.ooxml.parser.action.IAction; + +/** This OOXML parser is based on the output of the schema parser. + * It exists to debug the schema parser and as illustration and preparation of + * the C++ parse (yet to come.) + * Because of this, the parser data (set of states and transitions) are + * read at runtime while a real parser would do that at compile time. + */ +public class OOXMLParser +{ + class ActionContext + { + public Map<String,Integer> TypeCounts = new TreeMap<>(); + } + /** The parser is called with two arguments: + * - A path to where the parser tables with the states and transitions can + * be found. + * - The XML input file or Zip stream to parse. + * The syntax for a Zip stream contains a '#' that separates the filename + * to its left from the entry name to its right. + */ + public static void main (final String ... aArgumentList) + { + if (aArgumentList.length<2 ||aArgumentList.length>3) + throw new RuntimeException("usage: OOXMLParser <parser-tables-path> <XML-input-file> <log-file>?"); + + if (aArgumentList.length == 3) + { + final File aLogFile = new File(aArgumentList[2]); + Log.Dbg = new Log(aLogFile); + System.out.printf("writing log data to %s\n", aLogFile.toString()); + } + else + { + Log.Dbg = null; + System.out.printf("writing no log data\n"); + } + + new OOXMLParser(aArgumentList[0], aArgumentList[1]); + } + + + + private OOXMLParser ( + final String sParseTableFilename, + final String sInputFilename) + { + long nStartTime = System.currentTimeMillis(); + final StateMachine aMachine = new StateMachine(new File(sParseTableFilename), null); + final InputStream aIn = GetInputStream(sInputFilename); + long nEndTime = System.currentTimeMillis(); + + final ActionContext aActionContext = new ActionContext(); + AddSomeActions(aMachine.GetActionManager(), aActionContext); + + System.out.printf("initialzed parser in %fs\n", (nEndTime-nStartTime)/1000.0); + + try + { + nStartTime = System.currentTimeMillis(); + final Parser aParser = new Parser(aMachine, aIn); + aParser.Parse(); + final int nElementCount = aParser.GetElementCount(); + nEndTime = System.currentTimeMillis(); + System.out.printf("parsed %d elements in %fs\n", + nElementCount, + (nEndTime-nStartTime)/1000.0); + + System.out.printf("%d different elements found:\n", aActionContext.TypeCounts.size()); + for (final Entry<String, Integer> aEntry : aActionContext.TypeCounts.entrySet()) + { + System.out.printf("%-32s : %6d\n", aEntry.getKey(), aEntry.getValue()); + } + } + catch (final Exception aException) + { + aException.printStackTrace(); + } + } + + + + + private static void AddSomeActions ( + final ActionManager aActionManager, + final ActionContext aActionContext) + { + aActionManager.AddElementStartAction( + "*", + new IAction() + { + @Override public void Run( + final ActionTrigger eTrigger, + final ElementContext aContext, + final String sText, + final Location aStartLocation, + final Location aEndLocation) + { + Integer nValue = aActionContext.TypeCounts.get(aContext.GetTypeName()); + if (nValue == null) + nValue = 1; + else + ++nValue; + aActionContext.TypeCounts.put(aContext.GetTypeName(), nValue); + } + } + ); + aActionManager.AddElementStartAction( + ".*CT_Shd", + new IAction() + { + @Override public void Run( + final ActionTrigger eTrigger, + final ElementContext aContext, + final String sText, + final Location aStartLocation, + final Location aEndLocation) + { + System.out.printf("processing %s of element %s at position %d\n", + eTrigger, + aContext.GetElementName(), + aStartLocation.getCharacterOffset()); + + if (aContext.GetAttributes().GetAttributeCount() == 0) + System.out.printf(" no attributes\n"); + else + for (final Entry<String,String> aAttribute : aContext.GetAttributes().GetAttributes()) + System.out.printf(" %s -> %s\n", aAttribute.getKey(), aAttribute.getValue()); + } + } + ); + aActionManager.AddTextAction( + ".*CT_Text", + new IAction() + { + @Override public void Run( + final ActionTrigger eTrigger, + final ElementContext aContext, + final String sText, + final Location aStartLocation, + final Location aEndLocation) + { +// System.out.printf("%s text \"%s\"\n", aContext.GetTypeName(), sText.replace("\n", "\\n")); + } + } + ); + } + + + + + private static InputStream GetInputStream (final String sInputName) + { + final InputStream aIn; + try + { + final int nSeparator = sInputName.indexOf('#'); + if (nSeparator >= 0) + { + // Split the input name into the file name of the archive and the + // name of a zip entry. + final String sArchiveName = sInputName.substring(0, nSeparator); + String sEntryName = sInputName.substring(nSeparator+1); + + // Normalize and cleanup the entry name. + sEntryName = sEntryName.replace('\\', '/'); + if (sEntryName.startsWith("/")) + sEntryName = sEntryName.substring(1); + + final ZipFile aZipFile = new ZipFile(new File(sArchiveName)); + final ZipEntry aZipEntry = aZipFile.getEntry(sEntryName); + aIn = aZipFile.getInputStream(aZipEntry); + } + else + { + // The input name points to a plain XML file. + aIn = new FileInputStream(sInputName); + } + } + catch (final Exception aException) + { + aException.printStackTrace(); + return null; + } + return aIn; + } +} diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/ParseException.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/ParseException.java new file mode 100644 index 000000000000..ae07f9ca1fec --- /dev/null +++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/ParseException.java @@ -0,0 +1,42 @@ +/************************************************************** +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +package org.apache.openoffice.ooxml.parser; + +import javax.xml.stream.Location; + +@SuppressWarnings("serial") +public class ParseException + extends RuntimeException +{ + public ParseException ( + final Exception aException, + final Location aLocation) + { + super(aException); + Location = aLocation; + } + + + + + public final Location Location; +} diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/ParseTableReader.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/ParseTableReader.java new file mode 100644 index 000000000000..0288886c5342 --- /dev/null +++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/ParseTableReader.java @@ -0,0 +1,100 @@ +/************************************************************** +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +package org.apache.openoffice.ooxml.parser; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.util.HashMap; +import java.util.Map; +import java.util.Vector; + +/** A simple reader for the parse table data that allows simple filtering on the + * first word in each line. + * + * Lines that only contain comments or whitespace are ignored. + * + */ +public class ParseTableReader +{ + public ParseTableReader (final File aFile) + { + maSections = new HashMap<>(); + + try + { + final BufferedReader aReader = new BufferedReader(new FileReader(aFile)); + + while (true) + { + final String sLine = aReader.readLine(); + if (sLine == null) + break; + else if (sLine.startsWith("#")) + continue; + else if (sLine.isEmpty()) + continue; + + final String[] aLineParts = sLine.split("\\s+"); + for (int nIndex=0; nIndex<aLineParts.length; ++nIndex) + { + final String sPart = aLineParts[nIndex]; + if (sPart.isEmpty()) + { + throw new RuntimeException(); + } + else if (sPart.charAt(0) == '"') + { + // Remove leading and trailing quotes, unquote spaces. + aLineParts[nIndex] = sPart.substring(1, sPart.length()-1).replace("%20", " ").replace(""", "\""); + } + } + GetSection(aLineParts[0]).add(aLineParts); + } + + aReader.close(); + } + catch (final Exception aException) + { + throw new RuntimeException(aException); + } + } + + + + + public Vector<String[]> GetSection (final String sSectionName) + { + Vector<String[]> aSection = maSections.get(sSectionName); + if (aSection == null) + { + aSection = new Vector<>(); + maSections.put(sSectionName, aSection); + } + return aSection; + } + + + + + private final Map<String,Vector<String[]>> maSections; +} diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/Parser.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/Parser.java new file mode 100644 index 000000000000..cd51087f3c3d --- /dev/null +++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/Parser.java @@ -0,0 +1,249 @@ +/************************************************************** +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +package org.apache.openoffice.ooxml.parser; + +import java.io.InputStream; +import java.util.Vector; + +import javax.xml.stream.Location; +import javax.xml.stream.XMLInputFactory; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamReader; + +import org.apache.openoffice.ooxml.parser.action.ActionManager; +import org.apache.openoffice.ooxml.parser.attribute.AttributeProvider; + +/** This is the actual parser (where OOXMLParser is the front end that handles + * parameters given to the main method). + */ +public class Parser +{ + public Parser ( + final StateMachine aMachine, + final InputStream aIn) + { + maMachine = aMachine; + maReader = GetStreamReader(aIn, "input"); + mnElementCount = 0; + } + + + + + public void Parse () + { + Location aLocation = null; + try + { + final AttributeProvider aAttributeProvider = new AttributeProvider(maReader); + while (maReader.hasNext()) + { + aLocation = maReader.getLocation(); + final int nCode = maReader.next(); + switch(nCode) + { + case XMLStreamReader.START_ELEMENT: + ++mnElementCount; + if (maMachine.IsInSkipState()) + { + if (Log.Dbg != null) + Log.Dbg.printf("is skip state -> starting to skip\n"); + Skip(); + } + else if ( ! maMachine.ProcessStartElement( + maReader.getNamespaceURI(), + maReader.getLocalName(), + aLocation, + maReader.getLocation(), + aAttributeProvider)) + { + if (Log.Dbg != null) + Log.Dbg.printf("starting to skip to recover from error\n"); + Skip(); + } + break; + + case XMLStreamReader.END_ELEMENT: + maMachine.ProcessEndElement( + maReader.getNamespaceURI(), + maReader.getLocalName(), + aLocation, + maReader.getLocation()); + break; + + case XMLStreamReader.CHARACTERS: + maMachine.ProcessCharacters( + maReader.getText(), + aLocation, + maReader.getLocation()); + break; + + case XMLStreamReader.END_DOCUMENT: + break; + + default: + Log.Err.printf("can't handle XML event of type %d\n", nCode); + } + } + + maReader.close(); + } + catch (final XMLStreamException aException) + { + aException.printStackTrace(); + } + catch (final Exception aException) + { + throw new ParseException( + aException, + aLocation); + } + } + + + + + + + public int GetElementCount () + { + return mnElementCount; + } + + + + + private void Skip () + { + if (Log.Dbg != null) + { + Log.Dbg.printf("starting to skip on %s at L%dC%d\n", + maReader.getLocalName(), + maReader.getLocation().getLineNumber(), + maReader.getLocation().getColumnNumber()); + Log.Dbg.IncreaseIndentation(); + } + + // We are called when processing a start element. This means that we are + // already at relative depth 1. + int nRelativeDepth = 1; + try + { + while (maReader.hasNext()) + { + final int nCode = maReader.next(); + switch (nCode) + { + case XMLStreamReader.START_ELEMENT: + ++nRelativeDepth; + ++mnElementCount; + if (Log.Dbg != null) + { + Log.Dbg.printf("skipping start element %s\n", maReader.getLocalName()); + Log.Dbg.IncreaseIndentation(); + } + break; + + case XMLStreamReader.END_ELEMENT: + --nRelativeDepth; + if (Log.Dbg != null) + Log.Dbg.DecreaseIndentation(); + if (nRelativeDepth <= 0) + { + if (Log.Dbg != null) + Log.Dbg.printf("leaving skip mode on %s\n", maReader.getLocalName()); + return; + } + break; + + case XMLStreamReader.END_DOCUMENT: + throw new RuntimeException("saw end of document while skipping elements\n"); + + case XMLStreamReader.CHARACTERS: + SkipText(maReader.getText()); + break; + + default: + if (Log.Dbg != null) + Log.Dbg.printf("%s\n", nCode); + break; + } + } + } + catch (final XMLStreamException aException) + { + aException.printStackTrace(); + } + } + + + + + private void SkipText (final String sText) + { + if (Log.Dbg != null) + Log.Dbg.printf("skipping text [%s]\n", sText.replace("\n", "\\n")); + } + + + + + private XMLStreamReader GetStreamReader ( + final InputStream aIn, + final String sDescription) + { + if (aIn == null) + return null; + + try + { + final XMLInputFactory aFactory = (XMLInputFactory)XMLInputFactory.newInstance(); + aFactory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, false); + aFactory.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false); + aFactory.setProperty(XMLInputFactory.IS_COALESCING, false); + + return (XMLStreamReader)aFactory.createXMLStreamReader( + sDescription, + aIn); + } + catch (final Exception aException) + { + aException.printStackTrace(); + return null; + } + } + + + + + public ActionManager GetActionManager() + { + return maMachine.GetActionManager(); + } + + + + + private final XMLStreamReader maReader; + private final StateMachine maMachine; + private int mnElementCount; +} diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/SkipStateTable.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/SkipStateTable.java new file mode 100644 index 000000000000..0171b743a811 --- /dev/null +++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/SkipStateTable.java @@ -0,0 +1,70 @@ +/************************************************************** +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +package org.apache.openoffice.ooxml.parser; + +import java.util.HashSet; +import java.util.Set; + +/** Table of all skip states. + * + * A skip state corresponds to the 'any' element in the schemas. + * It means that the content of the element is specified by an extension of the + * schema which may or may not be known at parse time. + * At the moment the whole element is skipped, i.e. ignored. + * + */ +public class SkipStateTable +{ + public SkipStateTable (final Iterable<String[]> aData) + { + maSkipStates = new HashSet<>(); + + for (final String[] aLine : aData) + { + // Create new transition. + final int nStateId = Integer.parseInt(aLine[1]); + + maSkipStates.add(nStateId); + } + } + + + + + public boolean Contains (final int nStateId) + { + return maSkipStates.contains(nStateId); + } + + + + + public int GetSkipStateCount () + { + return maSkipStates.size(); + } + + + + + private final Set<Integer> maSkipStates; +} diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/StateMachine.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/StateMachine.java new file mode 100644 index 000000000000..39ff4dfd8137 --- /dev/null +++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/StateMachine.java @@ -0,0 +1,330 @@ +/************************************************************** +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +package org.apache.openoffice.ooxml.parser; + +import java.io.File; +import java.util.Stack; +import java.util.Vector; + +import javax.xml.stream.Location; + +import org.apache.openoffice.ooxml.parser.action.ActionManager; +import org.apache.openoffice.ooxml.parser.action.ActionTrigger; +import org.apache.openoffice.ooxml.parser.action.IAction; +import org.apache.openoffice.ooxml.parser.attribute.AttributeManager; +import org.apache.openoffice.ooxml.parser.attribute.AttributeProvider; +import org.apache.openoffice.ooxml.parser.attribute.AttributeValues; +import org.apache.openoffice.ooxml.parser.type.SimpleTypeManager; + +/** The state machine is initialized at creation from the data tables + * created previously by a stack automaton. + */ +public class StateMachine +{ + public StateMachine ( + final File aParseTableFile, + final Vector<String> aErrorsAndWarnings) + { + if (Log.Dbg != null) + Log.Dbg.printf("reading parse tables from %s\n", aParseTableFile.toString()); + + final ParseTableReader aReader = new ParseTableReader(aParseTableFile); + maNamespaceMap = new NamespaceMap(aReader.GetSection("namespace")); + maNameMap = new NameMap(aReader.GetSection("name")); + maStateNameMap = new NameMap(aReader.GetSection("state-name")); + maTransitions = new TransitionTable(aReader.GetSection("transition")); + maSkipStates = new SkipStateTable(aReader.GetSection("skip")); + maAttributeValueMap = new NameMap(aReader.GetSection("attribute-value")); + maAcceptingStates = new AcceptingStateTable(aReader.GetSection("accepting-state")); + maSimpleTypeManager = new SimpleTypeManager( + aReader.GetSection("simple-type"), + maAttributeValueMap); + maAttributeManager = new AttributeManager( + aReader.GetSection("attribute"), + maNamespaceMap, + maNameMap, + maStateNameMap, + maSimpleTypeManager, + aErrorsAndWarnings); + mnStartStateId = Integer.parseInt(aReader.GetSection("start-state").firstElement()[1]); + mnEndStateId = Integer.parseInt(aReader.GetSection("end-state").firstElement()[1]); + + mnCurrentStateId = mnStartStateId; + maStateStack = new Stack<>(); + maElementContextStack = new Stack<>(); + maActionManager = new ActionManager(maStateNameMap); + maErrorsAndWarnings = aErrorsAndWarnings; + + if (Log.Dbg != null) + { + Log.Dbg.printf("read %d namespace, %d names, %d states (%d skip, %d accept), %d transitions and %d attributes\n", + maNamespaceMap.GetNamespaceCount(), + maNameMap.GetNameCount(), + maStateNameMap.GetNameCount(), + maSkipStates.GetSkipStateCount(), + maAcceptingStates.GetAcceptingStateCount(), + maTransitions.GetTransitionCount(), + maAttributeManager.GetAttributeCount()); + Log.Dbg.printf("starting in state _start_ (%d)\n", mnCurrentStateId); + } + } + + + + + public boolean ProcessStartElement ( + final String sNamespaceURI, + final String sElementName, + final Location aStartLocation, + final Location aEndLocation, + final AttributeProvider aAttributes) + { + boolean bResult = false; + + try + { + final NamespaceMap.NamespaceDescriptor aNamespaceDescriptor = maNamespaceMap.GetDescriptorForURI(sNamespaceURI); + final int nElementNameId = maNameMap.GetIdForName(sElementName); + if (Log.Dbg != null) + Log.Dbg.printf("%s:%s(%d:%d) L%dC%d\n", + aNamespaceDescriptor.Prefix, + sElementName, + aNamespaceDescriptor.Id, + nElementNameId, + aStartLocation.getLineNumber(), + aStartLocation.getColumnNumber()); + + final Transition aTransition = maTransitions.GetTransition( + mnCurrentStateId, + aNamespaceDescriptor.Id, + nElementNameId); + if (aTransition == null) + { + final String sText = String.format( + "can not find transition for state %s(%d) and element %s:%s(%d:%d) at L%dC%d\n", + maStateNameMap.GetNameForId(mnCurrentStateId), + mnCurrentStateId, + aNamespaceDescriptor.Prefix, + maNameMap.GetNameForId(nElementNameId), + aNamespaceDescriptor.Id, + nElementNameId, + aStartLocation.getLineNumber(), + aStartLocation.getColumnNumber()); + Log.Err.printf(sText); + if (Log.Dbg != null) + Log.Dbg.printf(sText); + } + else + { + if (Log.Dbg != null) + { + Log.Dbg.printf(" %s(%d) -> %s(%d) via %s(%d)", + maStateNameMap.GetNameForId(mnCurrentStateId), + mnCurrentStateId, + maStateNameMap.GetNameForId(aTransition.GetEndStateId()), + aTransition.GetEndStateId(), + maStateNameMap.GetNameForId(aTransition.GetActionId()), + aTransition.GetActionId()); + Log.Dbg.printf("\n"); + } + + // Follow the transition to its end state but first process its + // content. We do that by + + if (Log.Dbg != null) + Log.Dbg.IncreaseIndentation(); + + // a) pushing the end state to the state stack so that on the + // end tag that corresponds to the current start tag it will become the current state. + maStateStack.push(aTransition.GetEndStateId()); + + // b) entering the state that corresponds to start tag that + // we are currently processing. + mnCurrentStateId = aTransition.GetActionId(); + + // c) Prepare the attributes and store them in the new element context. + final AttributeValues aAttributeValues = maAttributeManager.ParseAttributes( + mnCurrentStateId, + aAttributes); + + // d) creating a new ElementContext for the element that just starts. + maElementContextStack.push(maCurrentElementContext); + final ElementContext aPreviousElementContext = maCurrentElementContext; + maCurrentElementContext = new ElementContext( + sElementName, + maStateNameMap.GetNameForId(aTransition.GetActionId()), + false, + aAttributeValues, + aPreviousElementContext); + + // e) and run all actions that are bound to the the current start tag. + ExecuteActions( + mnCurrentStateId, + maCurrentElementContext, + ActionTrigger.ElementStart, + null, + aStartLocation, + aEndLocation); + + bResult = true; + } + } + catch (RuntimeException aException) + { + Log.Err.printf("error at line %d and column %d\n", + aStartLocation.getLineNumber(), + aStartLocation.getColumnNumber()); + throw aException; + } + return bResult; + } + + + + + public void ProcessEndElement ( + final String sNamespaceURI, + final String sElementName, + final Location aStartLocation, + final Location aEndLocation) + { + if ( ! maAcceptingStates.Contains(mnCurrentStateId) + && mnCurrentStateId!=-1) + { + if (Log.Dbg != null) + Log.Dbg.printf("current state %s(%d) is not an accepting state\n", + maStateNameMap.GetNameForId(mnCurrentStateId), + mnCurrentStateId); + throw new RuntimeException("not expecting end element "+sElementName); + } + + final NamespaceMap.NamespaceDescriptor aDescriptor = maNamespaceMap.GetDescriptorForURI(sNamespaceURI); + + // Leave the current element. + + final int nPreviousStateId = mnCurrentStateId; + mnCurrentStateId = maStateStack.pop(); + if (mnCurrentStateId == mnEndStateId) + mnCurrentStateId = mnStartStateId; + + final ElementContext aPreviousElementContext = maCurrentElementContext; + maCurrentElementContext = maElementContextStack.pop(); + + ExecuteActions( + nPreviousStateId, + aPreviousElementContext, + ActionTrigger.ElementEnd, + null, + aStartLocation, + aEndLocation); + + if (Log.Dbg != null) + { + Log.Dbg.DecreaseIndentation(); + Log.Dbg.printf("/%s:%s L%d%d\n", + aDescriptor.Prefix, + sElementName, + aStartLocation.getLineNumber(), + aStartLocation.getColumnNumber()); + Log.Dbg.printf(" %s(%d) <- %s(%d)\n", + maStateNameMap.GetNameForId(nPreviousStateId), + nPreviousStateId, + maStateNameMap.GetNameForId(mnCurrentStateId), + mnCurrentStateId); + } + } + + + + + public void ProcessCharacters ( + final String sText, + final Location aStartLocation, + final Location aEndLocation) + { + if (Log.Dbg != null) + Log.Dbg.printf("text [%s]\n", sText.replace("\n", "\\n")); + + ExecuteActions( + mnCurrentStateId, + maCurrentElementContext, + ActionTrigger.Text, + sText, + aStartLocation, + aEndLocation); + + } + + + + + public boolean IsInSkipState () + { + return maSkipStates.Contains(mnCurrentStateId); + } + + + + + public ActionManager GetActionManager () + { + return maActionManager; + } + + + + + private void ExecuteActions ( + final int nStateId, + final ElementContext aElementContext, + final ActionTrigger eTrigger, + final String sText, + final Location aStartLocation, + final Location aEndLocation) + { + final Iterable<IAction> aActions = maActionManager.GetActions(nStateId, eTrigger); + if (aActions != null) + for (final IAction aAction : aActions) + aAction.Run(eTrigger, aElementContext, sText, aStartLocation, aEndLocation); + } + + + + + private final NamespaceMap maNamespaceMap; + private final NameMap maNameMap; + private final NameMap maStateNameMap; + private final TransitionTable maTransitions; + private final SimpleTypeManager maSimpleTypeManager; + private final AttributeManager maAttributeManager; + private final NameMap maAttributeValueMap; + private int mnCurrentStateId; + private Stack<Integer> maStateStack; + private ElementContext maCurrentElementContext; + private Stack<ElementContext> maElementContextStack; + private final int mnStartStateId; + private final int mnEndStateId; + private SkipStateTable maSkipStates; + private AcceptingStateTable maAcceptingStates; + private final ActionManager maActionManager; + private final Vector<String> maErrorsAndWarnings; +} diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/Transition.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/Transition.java new file mode 100644 index 000000000000..c5c956116b28 --- /dev/null +++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/Transition.java @@ -0,0 +1,77 @@ +/************************************************************** +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +package org.apache.openoffice.ooxml.parser; + +class Transition +{ + Transition ( + final int nStartStateId, + final int nEndStateId, + final int nElementId, + final int nActionStateId) + { + mnStartStateId = nStartStateId; + mnEndStateId = nEndStateId; + mnElementId = nElementId; + mnActionStateId = nActionStateId; + } + + + + + public int GetStartStateId () + { + return mnStartStateId; + } + + + + + public int GetEndStateId () + { + return mnEndStateId; + } + + + + + public int GetElementId () + { + return mnElementId; + } + + + + + public int GetActionId () + { + return mnActionStateId; + } + + + + + private final int mnStartStateId; + private final int mnEndStateId; + private final int mnElementId; + private final int mnActionStateId; +} diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/TransitionTable.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/TransitionTable.java new file mode 100644 index 000000000000..32326daa5a9f --- /dev/null +++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/TransitionTable.java @@ -0,0 +1,85 @@ +/************************************************************** +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +package org.apache.openoffice.ooxml.parser; + +import java.util.HashMap; +import java.util.Map; +import java.util.Vector; + +public class TransitionTable +{ + public TransitionTable (final Vector<String[]> aData) + { + maTransitions = new HashMap<>(); + + for (final String[] aLine : aData) + { + // Create new transition. + final int nStartStateId = Integer.parseInt(aLine[1]); + final int nEndStateId = Integer.parseInt(aLine[2]); + final int nElementPrefixId = Integer.parseInt(aLine[3]); + final int nElementLocalId = Integer.parseInt(aLine[4]); + final int nElementStateId = Integer.parseInt(aLine[5]); + final Transition aTransition = new Transition( + nStartStateId, + nEndStateId, + (nElementPrefixId<<16) | nElementLocalId, + nElementStateId); + + Map<Integer,Transition> aPerElementTransitions = maTransitions.get(aTransition.GetStartStateId()); + if (aPerElementTransitions == null) + { + aPerElementTransitions = new HashMap<>(); + maTransitions.put(aTransition.GetStartStateId(), aPerElementTransitions); + } + aPerElementTransitions.put(aTransition.GetElementId(), aTransition); + } + } + + + + + public Transition GetTransition ( + final int nStateId, + final int nPrefixId, + final int nLocalId) + { + Map<Integer,Transition> aPerElementTransitions = maTransitions.get(nStateId); + if (aPerElementTransitions == null) + return null; + else + return aPerElementTransitions.get((nPrefixId<<16) | nLocalId); + } + + + + + public int GetTransitionCount () + { + return maTransitions.size(); + } + + + + + private final Map<Integer,Map<Integer,Transition>> maTransitions; +} diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/action/ActionDescriptor.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/action/ActionDescriptor.java new file mode 100644 index 000000000000..27ce9d4b8f90 --- /dev/null +++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/action/ActionDescriptor.java @@ -0,0 +1,114 @@ +/************************************************************** +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +package org.apache.openoffice.ooxml.parser.action; + +import java.util.Vector; + +/** Container of all actions that are associated with a single state. + */ +public class ActionDescriptor +{ + public ActionDescriptor ( + final int nStateId, + final String sName) + { + msStateName = sName; + + maElementStartActions = null; + maElementEndActions = null; + maTextActions = null; + } + + + + + public void AddAction ( + final IAction aAction, + final ActionTrigger eTrigger) + { + GetActionsForTrigger(eTrigger, true).add(aAction); + } + + + + + public Iterable<IAction> GetActions ( + final ActionTrigger eTrigger) + { + return GetActionsForTrigger(eTrigger, false); + } + + + + + @Override + public String toString () + { + return "actions for state "+msStateName; + } + + + + + private Vector<IAction> GetActionsForTrigger ( + final ActionTrigger eTrigger, + final boolean bCreateWhenMissing) + { + Vector<IAction> aActions = null; + switch(eTrigger) + { + case ElementStart: + aActions = maElementStartActions; + if (bCreateWhenMissing && aActions==null) + { + aActions = new Vector<>(); + maElementStartActions = aActions; + } + break; + case ElementEnd: + aActions = maElementEndActions; + if (bCreateWhenMissing && aActions==null) + { + aActions = new Vector<>(); + maElementEndActions = aActions; + } + break; + case Text: + aActions = maTextActions; + if (bCreateWhenMissing && aActions==null) + { + aActions = new Vector<>(); + maTextActions = aActions; + } + break; + } + return aActions; + } + + + + + private final String msStateName; + private Vector<IAction> maElementStartActions; + private Vector<IAction> maElementEndActions; + private Vector<IAction> maTextActions; +} diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/action/ActionIterator.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/action/ActionIterator.java new file mode 100644 index 000000000000..0ca176ad51bb --- /dev/null +++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/action/ActionIterator.java @@ -0,0 +1,118 @@ +/************************************************************** +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +package org.apache.openoffice.ooxml.parser.action; + +import java.util.Iterator; + +/** Iterate over two sources of actions, both given as an Iterable<IAction> + * object that can be null. +*/ +public class ActionIterator implements Iterable<IAction> +{ + public ActionIterator ( + final Iterable<IAction> aOneStateActions, + final Iterable<IAction> aAllStateActions) + { + maOneStateActions = aOneStateActions; + maAllStateActions = aAllStateActions; + } + + + + + @Override public Iterator<IAction> iterator() + { + return new Iterator<IAction>() + { + Iterator<IAction> maIterator = null; + int mnPhase = 0; + + @Override + public boolean hasNext() + { + while(true) + { + if (mnPhase == 2) + return false; + else if (mnPhase == 0) + { + if (maIterator == null) + if (maOneStateActions == null) + { + mnPhase = 1; + continue; + } + else + maIterator = maOneStateActions.iterator(); + if (maIterator.hasNext()) + return true; + else + { + maIterator = null; + mnPhase = 1; + } + } + else if (mnPhase == 1) + { + if (maIterator == null) + if (maAllStateActions == null) + { + mnPhase = 2; + return false; + } + else + maIterator = maAllStateActions.iterator(); + if (maIterator.hasNext()) + return true; + else + { + mnPhase = 2; + } + } + } + } + + + + + @Override + public IAction next() + { + return maIterator.next(); + } + + + + + @Override + public void remove() + { + } + }; + } + + + + + private final Iterable<IAction> maOneStateActions; + private final Iterable<IAction> maAllStateActions; +} diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/action/ActionManager.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/action/ActionManager.java new file mode 100644 index 000000000000..48d78a03977b --- /dev/null +++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/action/ActionManager.java @@ -0,0 +1,165 @@ +/************************************************************** +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +package org.apache.openoffice.ooxml.parser.action; + +import java.util.HashMap; +import java.util.Map; + +import org.apache.openoffice.ooxml.parser.NameMap; + +/** Manage actions that are bound to states and XML events. + */ +public class ActionManager +{ + public ActionManager ( + final NameMap aStateNameToIdMap) + { + maStateNameToIdMap = aStateNameToIdMap; + maAllStatesActions = new ActionDescriptor(0,"*"); + maStateToActionsMap = new HashMap<>(); + } + + + + + /** Add an action for an element start. + * @param sStateSelector + * The element is specified via a state name. This allows one element + * that leads to different complex types to have different actions, + * depending on the complex type. + * The selector value can be a full state name (including the namespace + * prefix and CT prefix, e.g. w06_CT_Table) or a regular expression + * (e.g. .*_CT_Table to match w06_CT_Table and w12_CT_Table). + * The action is bound to all matching states. + * @param aAction + * The action to call on entering any of the states that match the + * selector. + */ + public void AddElementStartAction ( + final String sStateSelector, + final IAction aAction) + { + AddAction(sStateSelector, aAction, ActionTrigger.ElementStart); + } + + + + + /** Add an action for an element end. + * @see AddElementStartAction. + */ + public void AddElementEndAction ( + final String sStateSelector, + final IAction aAction) + { + AddAction(sStateSelector, aAction, ActionTrigger.ElementEnd); + } + + + + + /** Add an action for XML text events. + * @see AddElementStartAction. + */ + public void AddTextAction ( + final String sStateSelector, + final IAction aAction) + { + AddAction(sStateSelector, aAction, ActionTrigger.Text); + } + + + + + /** Return an iterable object that gives access to all actions + * bound to the given state and trigger. + * Return value can be null when there are no actions bound to the state + * and trigger. + */ + public Iterable<IAction> GetActions ( + final int nStateId, + final ActionTrigger eTrigger) + { + final ActionDescriptor aOneStateActionsDescriptor = maStateToActionsMap.get(nStateId); + final Iterable<IAction> aOneStateActions = aOneStateActionsDescriptor!=null + ? aOneStateActionsDescriptor.GetActions(eTrigger) + : null; + final Iterable<IAction> aAllStateActions = maAllStatesActions.GetActions(eTrigger); + + if (aOneStateActions == null) + return aAllStateActions; + else if (aAllStateActions == null) + return aOneStateActions; + else + return new ActionIterator(aOneStateActions, aAllStateActions); + } + + + + + private void AddAction ( + final String sStateSelector, + final IAction aAction, + final ActionTrigger eTrigger) + { + if (sStateSelector.equals("*")) + { + // Simple optimization when an action is defined for all states. + maAllStatesActions.AddAction(aAction, eTrigger); + } + else if (sStateSelector.contains("*") || sStateSelector.contains("?")) + { + // The state selector contains wildcards. We have to iterate over + // all state names to find the matching ones. + for (final int nStateId : maStateNameToIdMap.GetMatchingStateIds(sStateSelector)) + { + GetActionDescriptor(nStateId).AddAction(aAction, eTrigger); + } + } + else + { + final int nStateId = maStateNameToIdMap.GetIdForName(sStateSelector); + GetActionDescriptor(nStateId).AddAction(aAction, eTrigger); + } + } + + + + + private ActionDescriptor GetActionDescriptor (final int nStateId) + { + ActionDescriptor aDescriptor = maStateToActionsMap.get(nStateId); + if (aDescriptor == null) + { + aDescriptor = new ActionDescriptor(nStateId, maStateNameToIdMap.GetNameForId(nStateId)); + maStateToActionsMap.put(nStateId, aDescriptor); + } + return aDescriptor; + } + + + + + private final NameMap maStateNameToIdMap; + private final ActionDescriptor maAllStatesActions; + private final Map<Integer,ActionDescriptor> maStateToActionsMap; +} diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/action/ActionTrigger.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/action/ActionTrigger.java new file mode 100644 index 000000000000..33b781a0b1a6 --- /dev/null +++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/action/ActionTrigger.java @@ -0,0 +1,31 @@ +/************************************************************** +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +package org.apache.openoffice.ooxml.parser.action; + +/** An enumeration of all supported action triggers. + */ +public enum ActionTrigger +{ + ElementStart, + ElementEnd, + Text +}
\ No newline at end of file diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/action/IAction.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/action/IAction.java new file mode 100644 index 000000000000..e784ed7c5537 --- /dev/null +++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/action/IAction.java @@ -0,0 +1,53 @@ +/************************************************************** +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +package org.apache.openoffice.ooxml.parser.action; + +import javax.xml.stream.Location; + +import org.apache.openoffice.ooxml.parser.ElementContext; + +/** Interface for actions that are bound to states and triggered by XML events. + */ +public interface IAction +{ + /** Callback for a single XML event. + * @param eTrigger + * Equivalent to the XML event type. + * @param aContext + * The context of the element that was just entered (element start), + * is about to be left (element end) or is currently active (all other + * events). + * @param sText + * Contains text for ActionTrigger.Text. Is null for all other + * triggers. + * @param aStartLocation + * The location in the source file where the triggering element starts. + * @param aEndLocation + * The location in the source file where the triggering element ends. + */ + void Run ( + final ActionTrigger eTrigger, + final ElementContext aContext, + final String sText, + final Location aStartLocation, + final Location aEndLocation); +} diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/attribute/AttributeDescriptor.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/attribute/AttributeDescriptor.java new file mode 100644 index 000000000000..a51aa54f184f --- /dev/null +++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/attribute/AttributeDescriptor.java @@ -0,0 +1,130 @@ +/************************************************************** +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +package org.apache.openoffice.ooxml.parser.attribute; + +/** Store information about a single attribute (per state) that was read + * from the parse table. + * + * Note that an attribute that is defined for more than one state has one + * AttributeDescriptor object per state. + * + */ +public class AttributeDescriptor +{ + public AttributeDescriptor ( + final int nPrefixId, + final int nAttributeId, + final boolean bCanBeUnqualified, + final boolean bIsOptional, + final String sDefaultValue, + final String sAttributeName, + final int nAttributeTypeId) + { + mnNamespaceId = nPrefixId; + mnAttributeId = nAttributeId; + mbCanBeUnqualified = bCanBeUnqualified; + mbIsOptional = bIsOptional; + msDefaultValue = sDefaultValue; + msAttributeName = sAttributeName; + mnAttributeTypeId = nAttributeTypeId; + } + + + + + public int GetTypeId() + { + return mnAttributeTypeId; + } + + + + + public int GetNamespaceId () + { + return mnNamespaceId; + } + + + + + public int GetNameId () + { + return mnAttributeId; + } + + + + + public boolean CanBeUnqualified () + { + return mbCanBeUnqualified; + } + + + + + public boolean IsOptional () + { + return mbIsOptional; + } + + + + + public String GetDefaultValue () + { + return msDefaultValue; + } + + + + + public String GetName () + { + return msAttributeName; + } + + + + + @Override + public String toString () + { + return String.format( + "attribute %s(%d) of type %d", + msAttributeName, + mnAttributeId, + mnAttributeTypeId); + } + + + + + private final int mnNamespaceId; + private final int mnAttributeId; + private final boolean mbCanBeUnqualified; + private final boolean mbIsOptional; + private final String msDefaultValue; + private final String msAttributeName; + private final int mnAttributeTypeId; +} diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/attribute/AttributeManager.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/attribute/AttributeManager.java new file mode 100644 index 000000000000..55b1df9f1f5e --- /dev/null +++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/attribute/AttributeManager.java @@ -0,0 +1,273 @@ +/************************************************************** +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +package org.apache.openoffice.ooxml.parser.attribute; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.Vector; + +import org.apache.openoffice.ooxml.parser.Log; +import org.apache.openoffice.ooxml.parser.NameMap; +import org.apache.openoffice.ooxml.parser.NamespaceMap; +import org.apache.openoffice.ooxml.parser.type.SimpleTypeManager; + + +/** Match a set of attributes from the document with the attribute + * specifications of a state. + * + */ +public class AttributeManager +{ + /** Create a new AttributeManager for the attribute specifications that + * are given in the parse table. + */ + public AttributeManager ( + final Vector<String[]> aData, + final NamespaceMap aNamespaceMap, + final NameMap aNameMap, + final NameMap aStateNameMap, + final SimpleTypeManager aSimpleTypeManager, + final Vector<String> aErrorsAndWarnings) + { + maStateIdToAttributesMap = new HashMap<>(); + maNamespaceMap = aNamespaceMap; + maNameMap = aNameMap; + maStateNameMap = aStateNameMap; + maSimpleTypeManager = aSimpleTypeManager; + maErrorsAndWarnings = aErrorsAndWarnings; + ParseData(aData); + } + + + + + private void ParseData (final Vector<String[]> aData) + { + for (final String[] aLine : aData) + { + final int nStateId = Integer.parseInt(aLine[1]); + final int nPrefixId = Integer.parseInt(aLine[2]); + final boolean bCanBeUnqualified = aLine[3].startsWith("u"); + final int nAttributeId = Integer.parseInt(aLine[4]); + final int nAttributeTypeId = aLine[5].equals("null") ? -1 : Integer.parseInt(aLine[5]); + final boolean bIsOptional = aLine[6].startsWith("o"); + final String sDefault = aLine[7]; + // State name. + final String sAttributeName = aLine[9]; + // Attribute type name. + + Map<Integer,AttributeDescriptor> aAttributesPerState = maStateIdToAttributesMap.get(nStateId); + if (aAttributesPerState == null) + { + aAttributesPerState = new HashMap<>(); + maStateIdToAttributesMap.put(nStateId, aAttributesPerState); + } + + final AttributeDescriptor aAttributeDescriptor = new AttributeDescriptor( + nPrefixId, + nAttributeId, + bCanBeUnqualified, + bIsOptional, + sDefault, + sAttributeName, + nAttributeTypeId); + + aAttributesPerState.put( + (nPrefixId<<16)|nAttributeId, + aAttributeDescriptor); + if (bCanBeUnqualified) + aAttributesPerState.put( + nAttributeId, + aAttributeDescriptor); + } + } + + + + + /** For the state with id nStateId, match the attributes from the document + * with the attribute specifications of that state. + */ + public AttributeValues ParseAttributes ( + final int nStateId, + final AttributeProvider aDocumentAttributes) + { + final AttributeValues aValues = new AttributeValues(); + + final Map<Integer,AttributeDescriptor> aAttributesPerState = maStateIdToAttributesMap.get(nStateId); + if (aAttributesPerState == null) + { + if (aDocumentAttributes.HasAttributes()) + { + Log.Std.printf("state has not attributes defined but document provides %d attributes\n", + aDocumentAttributes.GetAttributeCount()); + for (final String[] aEntry : aDocumentAttributes) + { + Log.Dbg.printf(" %s -> %s\n", aEntry[0], aEntry[1]); + } + throw new RuntimeException(); + } + } + else + { + final Set<AttributeDescriptor> aUsedAttributes = new HashSet<>(); + + // Process all attributes from the document. + for (final String[] aEntry : aDocumentAttributes) + { + final String sRawValue = aEntry[2]; + final AttributeDescriptor aAttributeDescriptor = ProcessAttribute( + aEntry[0], + aEntry[1], + sRawValue, + aAttributesPerState); + aUsedAttributes.add(aAttributeDescriptor); + final Object aProcessedValue = maSimpleTypeManager.PreprocessValue( + sRawValue, + aAttributeDescriptor); + if (aProcessedValue == null) + { + maSimpleTypeManager.PreprocessValue( + sRawValue, + aAttributeDescriptor); + throw new RuntimeException( + String.format("value '%s' of attribute '%s' is not recognized", + sRawValue, + aAttributeDescriptor.GetName())); + } + aValues.AddAttribute( + aAttributeDescriptor, + sRawValue, + aProcessedValue); + + if (Log.Dbg != null) + { + if (aAttributeDescriptor == null) + Log.Dbg.printf("attribute %s%s is not known\n", + aEntry[0]==null ? "" : ":"+aEntry[0], + aEntry[1]); + else + Log.Dbg.printf("attribute %s:%s(%d:%d) has type %s(%d) and value %s('%s')\n", + maNamespaceMap.GetDescriptorForId(aAttributeDescriptor.GetNamespaceId()).Prefix, + maNameMap.GetNameForId(aAttributeDescriptor.GetNameId()), + aAttributeDescriptor.GetNamespaceId(), + aAttributeDescriptor.GetNameId(), + maStateNameMap.GetNameForId(aAttributeDescriptor.GetTypeId()), + aAttributeDescriptor.GetTypeId(), + aProcessedValue, + sRawValue); + } + } + + // Check if all required attributes where given. + for (final AttributeDescriptor aAttribute : aAttributesPerState.values()) + { + if ( ! aUsedAttributes.contains(aAttribute)) + { + if ( ! aAttribute.IsOptional()) + { + final String sMessage = String.format("attribute '"+aAttribute.GetName()+"' is not present but also not optional"); + if (maErrorsAndWarnings != null) + maErrorsAndWarnings.add(sMessage); + else + throw new RuntimeException(sMessage); + } + else + { + // Add an entry that gives access to the default value. + aValues.AddAttribute(aAttribute, null, null); + } + } + } + } + + return aValues; + } + + + + + private AttributeDescriptor ProcessAttribute ( + final String sNamespace, + final String sAttributeName, + final String sAttributeValue, + final Map<Integer,AttributeDescriptor> aAttributesPerState) + { + final AttributeDescriptor aAttributeDescriptor; + if (sNamespace == null) + { + // Attribute name has no namespace. + final int nAttributeNameId = maNameMap.GetIdForName(sAttributeName); + aAttributeDescriptor = aAttributesPerState.get(nAttributeNameId); + } + else + { + // Attribute name has explicit namespace. + final NamespaceMap.NamespaceDescriptor aDescriptor = maNamespaceMap.GetDescriptorForURI(sNamespace); + final int nAttributeNameId = maNameMap.GetIdForName(sAttributeName); + aAttributeDescriptor = aAttributesPerState.get((aDescriptor.Id<<16) | nAttributeNameId); + } + return aAttributeDescriptor; + } + + + + + /** Remove the quotes around the given string. + * If it has the special value null (without quotes) then the null reference + * is returned. + */ + private String UnquoteString (final String sValue) + { + if (sValue.equals("null")) + return null; + else + { + assert(sValue.startsWith("\"")); + assert(sValue.endsWith("\"")); + return sValue.substring(1, sValue.length()-1); + } + } + + + + + public int GetAttributeCount () + { + int nCount = 0; + for (final Map<Integer,AttributeDescriptor> aMap : maStateIdToAttributesMap.values()) + nCount += aMap.size(); + return nCount; + } + + + + + private final Map<Integer,Map<Integer,AttributeDescriptor>> maStateIdToAttributesMap; + private final NamespaceMap maNamespaceMap; + private final NameMap maNameMap; + private final NameMap maStateNameMap; + private final SimpleTypeManager maSimpleTypeManager; + private final Vector<String> maErrorsAndWarnings; +} diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/attribute/AttributeProvider.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/attribute/AttributeProvider.java new file mode 100644 index 000000000000..ab9e28550bc2 --- /dev/null +++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/attribute/AttributeProvider.java @@ -0,0 +1,99 @@ +/************************************************************** +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +package org.apache.openoffice.ooxml.parser.attribute; + +import java.util.Iterator; + +import javax.xml.stream.XMLStreamReader; + +/** Give access to the attributes that are read from an OOXML stream. + */ +public class AttributeProvider + implements Iterable<String[]> +{ + public AttributeProvider (final XMLStreamReader aReader) + { + maReader = aReader; + } + + + + public boolean HasAttributes () + { + return maReader.getAttributeCount() > 0; + } + + + + + public String GetValue (final String sKey) + { + return maReader.getAttributeValue(null, sKey); + } + + + + @Override + public Iterator<String[]> iterator () + { + return new Iterator<String[]> () + { + int nIndex = 0; + final int nCount = maReader.getAttributeCount(); + + @Override public boolean hasNext() + { + return nIndex < nCount; + } + + @Override public String[] next() + { + final String[] aResult = new String[] + { + maReader.getAttributeNamespace(nIndex), + maReader.getAttributeLocalName(nIndex), + maReader.getAttributeValue(nIndex) + }; + ++nIndex; + return aResult; + } + + @Override public void remove() + { + } + + }; + } + + + + + public Integer GetAttributeCount () + { + return maReader.getAttributeCount(); + } + + + + + private final XMLStreamReader maReader; +} diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/attribute/AttributeValues.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/attribute/AttributeValues.java new file mode 100644 index 000000000000..a419abbec0ec --- /dev/null +++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/attribute/AttributeValues.java @@ -0,0 +1,91 @@ +/************************************************************** +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +package org.apache.openoffice.ooxml.parser.attribute; + +import java.util.Map; +import java.util.Map.Entry; +import java.util.TreeMap; + +/** Container of attribute values of an opening tag. + */ +public class AttributeValues +{ + AttributeValues () + { + maRawAttributeValues = new TreeMap<>(); + maProcessedAttributeValues = new TreeMap<>(); + } + + + + + public void AddAttribute ( + final AttributeDescriptor aAttributeDescriptor, + final String sRawValue, + final Object aProcessedValue) + { + maRawAttributeValues.put( + aAttributeDescriptor.GetName(), + sRawValue); + maProcessedAttributeValues.put( + aAttributeDescriptor.GetName(), + aProcessedValue); + } + + + + + public Iterable<Entry<String,String>> GetAttributes () + { + return maRawAttributeValues.entrySet(); + } + + + + + public String GetRawAttributeValue (final String sName) + { + return maRawAttributeValues.get(sName); + } + + + + + public Object GetProcessedAttributeValue (final String sName) + { + return maProcessedAttributeValues.get(sName); + } + + + + + public int GetAttributeCount () + { + return maRawAttributeValues.size(); + } + + + + + private Map<String,String> maRawAttributeValues; + private Map<String,Object> maProcessedAttributeValues; +} diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/type/BlobParser.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/type/BlobParser.java new file mode 100644 index 000000000000..7edafaeb0b24 --- /dev/null +++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/type/BlobParser.java @@ -0,0 +1,102 @@ +/************************************************************** +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +package org.apache.openoffice.ooxml.parser.type; + +import org.apache.openoffice.ooxml.parser.NameMap; + +public class BlobParser implements ISimpleTypeParser +{ + public BlobParser(final String[] aLine) + { + switch(aLine[5]) + { + case "B": + meType = Type.Base64Binary; + break; + case "H": + meType = Type.HexBinary; + break; + default: + throw new RuntimeException(); + } + switch(aLine[6]) + { + case "L": + mnLengthRestriction = Integer.parseInt(aLine[7]); + break; + case "N": + mnLengthRestriction = null; + break; + default: + throw new RuntimeException(); + } + } + + + + + @Override + public Object Parse ( + final String sRawValue, + final NameMap aAttributeValueMap) + { + if (mnLengthRestriction != null) + if (sRawValue.length()/2 != mnLengthRestriction) + return null; + /* + throw new RuntimeException( + String.format( + "length restriction (=%d) is violated, actual length is %d", + mnLengthRestriction, + sRawValue.length())); + */ + switch(meType) + { + case Base64Binary: + throw new RuntimeException("not yet implemented"); + + case HexBinary: + try + { + return Integer.parseInt(sRawValue, 16); + } + catch (NumberFormatException aException) + { + return null; + } + + default: + throw new RuntimeException(); + } + } + + + + + enum Type + { + Base64Binary, + HexBinary + }; + private final Type meType; + private final Integer mnLengthRestriction; +} diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/type/DateTimeParser.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/type/DateTimeParser.java new file mode 100644 index 000000000000..979d4051b6e5 --- /dev/null +++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/type/DateTimeParser.java @@ -0,0 +1,42 @@ +/************************************************************** +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +package org.apache.openoffice.ooxml.parser.type; + +import org.apache.openoffice.ooxml.parser.NameMap; + +public class DateTimeParser implements ISimpleTypeParser +{ + + public DateTimeParser(String[] aLine) + { + // TODO Auto-generated constructor stub + } + + @Override + public Object Parse( + final String sRawValue, + final NameMap aAttributeValueMap) + { + throw new RuntimeException(); + } + +} diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/type/ISimpleTypeParser.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/type/ISimpleTypeParser.java new file mode 100644 index 000000000000..aaa234009767 --- /dev/null +++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/type/ISimpleTypeParser.java @@ -0,0 +1,31 @@ +/************************************************************** +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +package org.apache.openoffice.ooxml.parser.type; + +import org.apache.openoffice.ooxml.parser.NameMap; + +public interface ISimpleTypeParser +{ + Object Parse ( + final String sRawValue, + final NameMap aAttributeValueMap); +} diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/type/ListParser.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/type/ListParser.java new file mode 100644 index 000000000000..94feeb04e3b2 --- /dev/null +++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/type/ListParser.java @@ -0,0 +1,53 @@ +/************************************************************** +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +package org.apache.openoffice.ooxml.parser.type; + +import org.apache.openoffice.ooxml.parser.NameMap; + +public class ListParser implements ISimpleTypeParser +{ + + public ListParser (final ISimpleTypeParser aItemParser) + { + maItemParser = aItemParser; + } + + + + + @Override + public Object Parse( + final String sRawValue, + final NameMap aAttributeValueMap) + { + final String[] aParts = sRawValue.split("\\s+"); + final Object[] aValues = new Object[aParts.length]; + for (int nIndex=0; nIndex<aParts.length; ++nIndex) + aValues[nIndex] = maItemParser.Parse(aParts[nIndex], aAttributeValueMap); + return aValues; + } + + + + + private final ISimpleTypeParser maItemParser; +} diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/type/NumberParser.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/type/NumberParser.java new file mode 100644 index 000000000000..19fc833867cf --- /dev/null +++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/type/NumberParser.java @@ -0,0 +1,270 @@ +/************************************************************** +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +package org.apache.openoffice.ooxml.parser.type; + +import java.util.HashSet; +import java.util.Set; + +import org.apache.openoffice.ooxml.parser.NameMap; + +public class NumberParser implements ISimpleTypeParser +{ + public NumberParser (final String[] aLine) + { + switch(aLine[5]) + { + case "u1": + meNumberType = NumberType.Boolean; + meJavaNumberType = JavaNumberType.Boolean; + break; + case "s8": + meNumberType = NumberType.Byte; + meJavaNumberType = JavaNumberType.Byte; + break; + case "u8": + meNumberType = NumberType.UnsignedByte; + meJavaNumberType = JavaNumberType.Short; + break; + case "s16": + meNumberType = NumberType.Short; + meJavaNumberType = JavaNumberType.Short; + break; + case "u16": + meNumberType = NumberType.UnsignedShort; + meJavaNumberType = JavaNumberType.Integer; + break; + case "s32": + meNumberType = NumberType.Int; + meJavaNumberType = JavaNumberType.Integer; + break; + case "u32": + meNumberType = NumberType.UnsignedInt; + meJavaNumberType = JavaNumberType.Long; + break; + case "s64": + meNumberType = NumberType.Long; + meJavaNumberType = JavaNumberType.Long; + break; + case "u64": + meNumberType = NumberType.UnsignedLong; + meJavaNumberType = JavaNumberType.Long; + break; + case "s*": + meNumberType = NumberType.Integer; + meJavaNumberType = JavaNumberType.Long; + break; + case "f": + meNumberType = NumberType.Float; + meJavaNumberType = JavaNumberType.Float; + break; + case "d": + meNumberType = NumberType.Double; + meJavaNumberType = JavaNumberType.Double; + break; + default: + throw new RuntimeException("unsupported numerical type "+aLine[5]); + } + + switch(aLine[6]) + { + case "E": + meRestrictionType = RestrictionType.Enumeration; + maEnumeration = new HashSet<>(); + for (int nIndex=7; nIndex<aLine.length; ++nIndex) + maEnumeration.add(ParseNumber(aLine[nIndex])); + break; + + case "S": + meRestrictionType = RestrictionType.Size; + for (int nIndex=7; nIndex<=9; nIndex+=2) + if (nIndex<aLine.length) + switch (aLine[nIndex]) + { + case "<=": + maMaximumValue = ParseNumber(aLine[nIndex+1]); + mbIsMaximumInclusive = true; + break; + case "<": + maMaximumValue = ParseNumber(aLine[nIndex+1]); + mbIsMaximumInclusive = false; + break; + case ">=": + maMinimumValue = ParseNumber(aLine[nIndex+1]); + mbIsMinimumInclusive = true; + break; + case ">": + maMinimumValue = ParseNumber(aLine[nIndex+1]); + mbIsMinimumInclusive = false; + break; + } + break; + + case "N": + meRestrictionType = RestrictionType.None; + break; + + default: + throw new RuntimeException("unsupported numerical restriction "+aLine[6]); + } + } + + + + + @Override + public Object Parse( + final String sRawValue, + final NameMap aAttributeValueMap) + { + final Object aNumber = ParseNumber(sRawValue); + switch(meRestrictionType) + { + case Enumeration: + if (maEnumeration.contains(aNumber)) + return aNumber; + else + return null; + + case Size: + if (maMinimumValue != null) + if (mbIsMinimumInclusive) + { + if (CompareTo(aNumber, maMinimumValue, meJavaNumberType) < 0) + return null; + } + else + { + if (CompareTo(aNumber, maMinimumValue, meJavaNumberType) <= 0) + return null; + } + if (maMaximumValue != null) + if (mbIsMaximumInclusive) + { + if (CompareTo(aNumber, maMaximumValue, meJavaNumberType) > 0) + return null; + } + else + { + if (CompareTo(aNumber, maMaximumValue, meJavaNumberType) >= 0) + return null; + } + return aNumber; + + case None: + return aNumber; + + default: + throw new RuntimeException(); + } + } + + + + + Object ParseNumber (final String sNumber) + { + switch(meJavaNumberType) + { + case Boolean: return Boolean.parseBoolean(sNumber); + case Byte: return Byte.parseByte(sNumber); + case Short: return Short.parseShort(sNumber); + case Integer: return Integer.parseInt(sNumber); + case Long: return Long.parseLong(sNumber); + case Float: return Float.parseFloat(sNumber); + case Double: return Double.parseDouble(sNumber); + default: + throw new RuntimeException(); + } + } + + + + + private static int CompareTo ( + final Object aLeft, + final Object aRight, + final JavaNumberType eType) + { + switch(eType) + { + case Boolean: + return ((Boolean)aLeft).compareTo((Boolean)aRight); + case Byte: + return ((Byte)aLeft).compareTo((Byte)aRight); + case Short: + return ((Short)aLeft).compareTo((Short)aRight); + case Integer: + return ((Integer)aLeft).compareTo((Integer)aRight); + case Long: + return ((Long)aLeft).compareTo((Long)aRight); + case Float: + return ((Float)aLeft).compareTo((Float)aRight); + case Double: + return ((Double)aLeft).compareTo((Double)aRight); + default: + throw new RuntimeException(); + } + } + + + + + enum NumberType + { + Boolean, + Byte, + UnsignedByte, + Short, + UnsignedShort, + Int, + UnsignedInt, + Long, + UnsignedLong, + Integer, + Float, + Double + } + enum JavaNumberType + { + Boolean, + Byte, + Short, + Integer, + Long, + Float, + Double + } + enum RestrictionType + { + Enumeration, + Size, + None + } + private final NumberType meNumberType; + private final JavaNumberType meJavaNumberType; + private final RestrictionType meRestrictionType; + private Set<Object> maEnumeration; + private Object maMinimumValue; + private boolean mbIsMinimumInclusive; + private Object maMaximumValue; + private boolean mbIsMaximumInclusive; +} diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/type/SimpleTypeManager.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/type/SimpleTypeManager.java new file mode 100644 index 000000000000..abeb529ddc52 --- /dev/null +++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/type/SimpleTypeManager.java @@ -0,0 +1,118 @@ +/************************************************************** +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +package org.apache.openoffice.ooxml.parser.type; + +import java.util.HashMap; +import java.util.Map; +import java.util.Vector; + +import org.apache.openoffice.ooxml.parser.NameMap; +import org.apache.openoffice.ooxml.parser.attribute.AttributeDescriptor; + +public class SimpleTypeManager +{ + public SimpleTypeManager( + final Vector<String[]> aData, + final NameMap aAttributeValueMap) + { + maAttributeValueMap = aAttributeValueMap; + maSimpleTypeToParsersMap = new HashMap<>(); + ParseData(aData); + } + + + + + private void ParseData (final Vector<String[]> aData) + { + for (final String[] aLine : aData) + { + final int nSimpleTypeId = Integer.parseInt(aLine[1]); +// final int nVariant = Integer.parseInt(aLine[2]); + final boolean bIsList = aLine[3].equals("L"); + final ISimpleTypeParser aVariantParser; + switch (aLine[4]) + { + case "S": + aVariantParser = new StringParser(aLine); + break; + case "N": + aVariantParser = new NumberParser(aLine); + break; + case "D": + aVariantParser = new DateTimeParser(aLine); + break; + case "B": + aVariantParser = new BlobParser(aLine); + break; + default: + throw new RuntimeException("unexpected parser type: "+aLine[4]); + } + + Vector<ISimpleTypeParser> aVariants = maSimpleTypeToParsersMap.get(nSimpleTypeId); + if (aVariants == null) + { + aVariants = new Vector<>(); + maSimpleTypeToParsersMap.put(nSimpleTypeId, aVariants); + } + if (bIsList) + aVariants.add(new ListParser(aVariantParser)); + else + aVariants.add(aVariantParser); + } + } + + + + + public Object PreprocessValue ( + final String sRawValue, + final AttributeDescriptor aAttributeDescriptor) + { + final Vector<ISimpleTypeParser> aTypeParsers = maSimpleTypeToParsersMap.get(aAttributeDescriptor.GetTypeId()); + if (aTypeParsers == null) + throw new RuntimeException("type "+aAttributeDescriptor.GetTypeId()+" is not supported"); + + for (final ISimpleTypeParser aParser : aTypeParsers) + { + try + { + final Object aProcessedValue = aParser.Parse( + sRawValue, + maAttributeValueMap); + if (aProcessedValue != null) + return aProcessedValue; + } + catch(final Exception aException) + { + return "error"; + } + } + return null; + } + + + + + private final NameMap maAttributeValueMap; + private Map<Integer,Vector<ISimpleTypeParser>> maSimpleTypeToParsersMap; +} diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/type/StringParser.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/type/StringParser.java new file mode 100644 index 000000000000..c3b22bf5fba0 --- /dev/null +++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/type/StringParser.java @@ -0,0 +1,122 @@ +/************************************************************** +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +package org.apache.openoffice.ooxml.parser.type; + +import java.util.HashSet; +import java.util.Set; +import java.util.regex.Pattern; + +import org.apache.openoffice.ooxml.parser.NameMap; + +public class StringParser + implements ISimpleTypeParser +{ + + public StringParser (final String[] aLine) + { + switch(aLine[5]) + { + case "E": + meRestrictionType = RestrictionType.Enumeration; + maEnumeration = new HashSet<>(); + for (int nIndex=6; nIndex<aLine.length; ++nIndex) + maEnumeration.add(Integer.parseInt(aLine[nIndex])); + break; + + case "P": + meRestrictionType = RestrictionType.Pattern; + maPattern = Pattern.compile(aLine[6].replace("\\p{Is", "\\p{In")); + break; + + case "L": + meRestrictionType = RestrictionType.Length; + mnMinimumLength = Integer.parseInt(aLine[6]); + mnMaximumLength = Integer.parseInt(aLine[7]); + break; + + case "N": + meRestrictionType = RestrictionType.None; + break; + + default: + throw new RuntimeException(); + } + } + + + + + @Override + public Object Parse ( + final String sRawValue, + final NameMap aAttributeValueMap) + { + switch(meRestrictionType) + { + case Enumeration: + final int nId = aAttributeValueMap.GetIdForOptionalName(sRawValue); + if ( ! maEnumeration.contains(nId)) + return null;//throw new RuntimeException("value is not part of enumeration"); + else + return nId; + + case Pattern: + if ( ! maPattern.matcher(sRawValue).matches()) + return null;//throw new RuntimeException("value does not match pattern"); + else + return sRawValue; + + case Length: + if (sRawValue.length()<mnMinimumLength || sRawValue.length()>mnMaximumLength) + return null;/*throw new RuntimeException( + String.format("value violates string length restriction: %s is not inside [%d,%d]", + sRawValue.length(), + mnMinimumLength, + mnMaximumLength)); + */ + else + return sRawValue; + + case None: + return sRawValue; + + default: + throw new RuntimeException(); + } + } + + + + + enum RestrictionType + { + Enumeration, + Pattern, + Length, + None + } + private final RestrictionType meRestrictionType; + private Set<Integer> maEnumeration; + private Pattern maPattern; + private int mnMinimumLength; + private int mnMaximumLength; +} |