summaryrefslogtreecommitdiff
path: root/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/SchemaReader.java
diff options
context:
space:
mode:
Diffstat (limited to 'ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/SchemaReader.java')
-rw-r--r--ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/SchemaReader.java510
1 files changed, 510 insertions, 0 deletions
diff --git a/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/SchemaReader.java b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/SchemaReader.java
new file mode 100644
index 000000000000..bf15b39d71e2
--- /dev/null
+++ b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/SchemaReader.java
@@ -0,0 +1,510 @@
+/**************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+package org.apache.openoffice.ooxml.schema;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Queue;
+import java.util.Set;
+import java.util.Vector;
+
+import javax.xml.stream.XMLStreamException;
+
+import org.apache.openoffice.ooxml.schema.automaton.FiniteAutomatonContainer;
+import org.apache.openoffice.ooxml.schema.automaton.NonValidatingCreator;
+import org.apache.openoffice.ooxml.schema.automaton.ValidatingCreator;
+import org.apache.openoffice.ooxml.schema.generator.LogGenerator;
+import org.apache.openoffice.ooxml.schema.generator.ParserTablesGenerator;
+import org.apache.openoffice.ooxml.schema.generator.html.HtmlGenerator;
+import org.apache.openoffice.ooxml.schema.model.schema.Schema;
+import org.apache.openoffice.ooxml.schema.model.schema.SchemaBase;
+import org.apache.openoffice.ooxml.schema.parser.SchemaParser;
+import org.apache.openoffice.ooxml.schema.simple.SimpleTypeContainer;
+
+public class SchemaReader
+{
+ public static void main (final String ... aArgumentList)
+ {
+ if (aArgumentList.length != 1)
+ {
+ System.err.printf("usage: SchemaParser <driver-file>\n");
+ System.err.printf(" driver file can contain these lines:\n");
+ System.err.printf("# Comments\n");
+ System.err.printf(" are ignored\n");
+ System.err.printf("schema <mark> <file-name>\n");
+ System.err.printf(" specifies a top-level schema file to read\n");
+ System.err.printf("output-schema <file-name>\n");
+ System.err.printf(" write schema information to file\n");
+ System.err.printf("output-optimized-schema <file-name>\n");
+ System.err.printf(" write information about optimized schema to file\n");
+ System.exit(1);
+ }
+
+ final SchemaReader aReader = new SchemaReader(new File(aArgumentList[0]));
+ aReader.Run();
+ }
+
+
+
+
+ private SchemaReader (final File aDriverFile)
+ {
+ maSchemaBase = new SchemaBase();
+ maTopLevelSchemas = new HashMap<>();
+ maMainSchemaFiles = new Vector<>();
+ maSchemaFiles = new HashSet<>();
+ maWorkList = new LinkedList<>();
+ maOutputOperations = new Vector<>();
+ mnTotalLineCount = 0;
+ mnTotalByteCount = 0;
+
+ ParseDriverFile(aDriverFile);
+ }
+
+
+
+
+ /** Read and parse the driver file that specifies which schema files to read
+ * and where the output should go.
+ */
+ private void ParseDriverFile (final File aDriverFile)
+ {
+ if (aDriverFile == null || ! aDriverFile.exists() || ! aDriverFile.canRead())
+ {
+ System.err.printf("can not read driver file\n");
+ System.exit(1);
+ }
+
+ try
+ {
+ final BufferedReader aIn = new BufferedReader(new FileReader(aDriverFile));
+ while(true)
+ {
+ String sLine = aIn.readLine();
+ if (sLine == null)
+ break;
+ // Lines starting with # are comment lines and are ignored.
+ if (sLine.matches("^\\s*#.*"))
+ continue;
+ // Lines containing only whitespace are also ignored.
+ else if (sLine.matches("^\\s*$"))
+ continue;
+
+ // Handle line continuation.
+ while (sLine.endsWith("\\"))
+ sLine = sLine.substring(0, sLine.length()-1) + aIn.readLine();
+
+ final Vector<String> aParts = SplitLine(sLine);
+ switch (aParts.get(0))
+ {
+ case "schema":
+ maMainSchemaFiles.add(new String[]{aParts.get(1), aParts.get(2)});
+ break;
+
+ case "output-schema":
+ maOutputOperations.add(new Runnable()
+ {
+ final File maFile = CreateCheckedOutputFile(aParts.get(1));
+ @Override public void run()
+ {
+ WriteSchema(maFile);
+ }
+ });
+ break;
+
+ case "output-optimized-schema":
+ maOutputOperations.add(new Runnable()
+ {
+ final File maFile = CreateCheckedOutputFile(aParts.get(1));
+ @Override public void run()
+ {
+ WriteOptimizedSchema(maFile);
+ }
+ });
+ break;
+
+ case "output-nonvalidating-parse-tables":
+ maOutputOperations.add(new Runnable()
+ {
+ final File aAutomatonLogFile = CreateCheckedOutputFile(aParts.get(1));
+ final File aSimpleTypeLogFile = CreateCheckedOutputFile(aParts.get(2));
+ final File aParseTableFile = CreateCheckedOutputFile(aParts.get(3));
+ @Override public void run()
+ {
+ WriteNonValidatingParseTables(
+ aAutomatonLogFile,
+ aSimpleTypeLogFile,
+ aParseTableFile);
+ }
+ });
+ break;
+
+ case "output-validating-parse-tables":
+ maOutputOperations.add(new Runnable()
+ {
+ final File aAutomatonLogFile = CreateCheckedOutputFile(aParts.get(1));
+ final File aSimpleTypeLogFile = CreateCheckedOutputFile(aParts.get(2));
+ final File aParseTableFile = CreateCheckedOutputFile(aParts.get(3));
+ @Override public void run()
+ {
+ WriteValidatingParseTables(
+ aAutomatonLogFile,
+ aSimpleTypeLogFile,
+ aParseTableFile);
+ }
+ });
+ break;
+
+ case "output-html-page":
+ maOutputOperations.add(new Runnable()
+ {
+ final File aHTMLPageFile = CreateCheckedOutputFile(aParts.get(1));
+ @Override public void run()
+ {
+ WriteHTMLPage(aHTMLPageFile);
+ }
+ });
+ break;
+
+ default:
+ System.err.printf("unknown command '%s' in driver file", aParts.get(0));
+ System.exit(1);
+ }
+ }
+ aIn.close();
+ }
+ catch (final Exception aException)
+ {
+ aException.printStackTrace();
+ }
+ }
+
+
+
+
+ private void Run ()
+ {
+ try
+ {
+ ParseSchemaFiles();
+ }
+ catch (final Exception aException)
+ {
+ aException.printStackTrace();
+ }
+
+ maOptimizedSchemaBase = maSchemaBase.GetOptimizedSchema(maTopLevelSchemas.values());
+ for (final Entry<String, Schema> aEntry : maTopLevelSchemas.entrySet())
+ aEntry.setValue(aEntry.getValue().GetOptimizedSchema(maOptimizedSchemaBase));
+
+ System.out.printf(" optimization left %d complex types and %d simple types\n",
+ maOptimizedSchemaBase.ComplexTypes.GetCount(),
+ maOptimizedSchemaBase.SimpleTypes.GetCount());
+
+ for (final Runnable aOperation : maOutputOperations)
+ {
+ aOperation.run();
+ }
+ }
+
+
+
+
+ private void ParseSchemaFiles ()
+ throws XMLStreamException
+ {
+ System.out.printf("parsing %d main schema files\n", maMainSchemaFiles.size());
+
+ for (final String[] aEntry : maMainSchemaFiles)
+ {
+ final String sMainSchemaShortname = aEntry[0];
+ final String sMainSchemaFile = aEntry[1];
+ final File aMainSchemaFile = new File(sMainSchemaFile);
+ if ( ! aMainSchemaFile.exists())
+ {
+ System.err.printf(" schema file does not exist\n");
+ System.exit(1);
+ }
+ if ( ! aMainSchemaFile.canRead())
+ {
+ System.err.printf("can not read schema file\n");
+ System.exit(1);
+ }
+
+ final Schema aSchema = new Schema(sMainSchemaShortname, maSchemaBase);
+ ParseSchemaFile(sMainSchemaFile, aSchema);
+ maTopLevelSchemas.put(sMainSchemaShortname, aSchema);
+ }
+
+ long nStartTime = System.currentTimeMillis();
+ while ( ! maWorkList.isEmpty())
+ {
+ ParseSchemaFile(maWorkList.poll(), null);
+ }
+ long nEndTime = System.currentTimeMillis();
+
+ System.out.printf("parsed %d schema files with a total of %d lines and %d bytes in %fs\n",
+ maSchemaFiles.size(),
+ mnTotalLineCount,
+ mnTotalByteCount,
+ (nEndTime-nStartTime)/1000.0);
+ System.out.printf(" found %d complex types and %d simple types\n",
+ maSchemaBase.ComplexTypes.GetCount(),
+ maSchemaBase.SimpleTypes.GetCount());
+
+ int nTopLevelElementCount = 0;
+ for (final Schema aSchema : maTopLevelSchemas.values())
+ nTopLevelElementCount += aSchema.TopLevelElements.GetCount();
+ System.out.printf(" the %d top level schemas have %d elements\n",
+ maTopLevelSchemas.size(),
+ nTopLevelElementCount);
+ }
+
+
+
+
+ private void ParseSchemaFile (
+ final String sSchemaFilename,
+ final Schema aSchema)
+ throws XMLStreamException
+ {
+ System.out.printf("parsing %s\n", sSchemaFilename);
+ maSchemaFiles.add(sSchemaFilename);
+
+ final SchemaParser aParser = new SchemaParser(new File(sSchemaFilename), aSchema, maSchemaBase);
+ aParser.Parse();
+
+ mnTotalLineCount += aParser.GetLineCount();
+ mnTotalByteCount += aParser.GetByteCount();
+ for (final File aFile : aParser.GetImportedSchemaFilenames())
+ AddSchemaReference(aFile.getAbsolutePath());
+ }
+
+
+
+
+ private void AddSchemaReference (final String sSchemaFilename)
+ {
+ if ( ! maSchemaFiles.contains(sSchemaFilename))
+ {
+ if (sSchemaFilename == null)
+ throw new RuntimeException();
+
+ // We don't know yet the file name of the schema, so just store null to mark the schema name as 'known'.
+ maSchemaFiles.add(sSchemaFilename);
+ maWorkList.add(sSchemaFilename);
+ }
+ }
+
+
+
+
+ /** Split the given string at whitespace but not at whitespace inside double quotes.
+ *
+ */
+ private Vector<String> SplitLine (final String sLine)
+ {
+ final Vector<String> aParts = new Vector<>();
+
+ boolean bIsInsideQuotes = false;
+ for (final String sPart : sLine.split("\""))
+ {
+ if (bIsInsideQuotes)
+ aParts.add(sPart);
+ else
+ for (final String sInnerPart : sPart.split("\\s+"))
+ {
+ if (sInnerPart == null)
+ throw new RuntimeException();
+ else if ( ! sInnerPart.isEmpty())
+ aParts.add(sInnerPart);
+ }
+
+ bIsInsideQuotes = ! bIsInsideQuotes;
+ }
+
+ return aParts;
+ }
+
+
+
+
+ /** Create a File object for a given file name.
+ * Check that the file is writable, i.e. its directory exists and that if
+ * the file already exists it can be replaced.
+ * Throws a RuntimeException when a check fails.
+ */
+ private File CreateCheckedOutputFile (final String sFilename)
+ {
+ final File aFile = new File(sFilename);
+ if ( ! aFile.getParentFile().exists())
+ throw new RuntimeException("directory of "+sFilename+" does not exist: can not create file");
+ if (aFile.exists() && ! aFile.canWrite())
+ throw new RuntimeException("file "+sFilename+" already exists and can not be replaced");
+ return aFile;
+ }
+
+
+
+
+ private void WriteSchema (final File aOutputFile)
+ {
+ LogGenerator.Write(aOutputFile, maSchemaBase, maTopLevelSchemas.values());
+ }
+
+
+
+
+ private void WriteOptimizedSchema (final File aOutputFile)
+ {
+ LogGenerator.Write(aOutputFile, maOptimizedSchemaBase, maTopLevelSchemas.values());
+ }
+
+
+
+
+ private void WriteNonValidatingParseTables (
+ final File aAutomatonLogFile,
+ final File aSimpleTypeLogFile,
+ final File aParseTableFile)
+ {
+ long nStartTime = System.currentTimeMillis();
+ final NonValidatingCreator aCreator = new NonValidatingCreator(maOptimizedSchemaBase, aAutomatonLogFile);
+ FiniteAutomatonContainer aAutomatons = aCreator.Create(maTopLevelSchemas.values());
+ long nEndTime = System.currentTimeMillis();
+ System.out.printf(
+ "created %d non-validating automatons with %d states and %d transitions in %fs\n",
+ aAutomatons.GetAutomatonCount(),
+ aAutomatons.GetStateCount(),
+ aAutomatons.GetTransitionCount(),
+ (nEndTime-nStartTime)/1000.0);
+
+ nStartTime = System.currentTimeMillis();
+ final SimpleTypeContainer aSimpleTypes = SimpleTypeContainer.Create(
+ maOptimizedSchemaBase,
+ aSimpleTypeLogFile);
+ nEndTime = System.currentTimeMillis();
+ System.out.printf(
+ "created %d simple type descriptions in %fs\n",
+ aSimpleTypes.GetSimpleTypeCount(),
+ (nEndTime-nStartTime)/1000.0);
+
+ new ParserTablesGenerator(
+ aAutomatons,
+ maOptimizedSchemaBase.Namespaces,
+ aSimpleTypes,
+ maOptimizedSchemaBase.AttributeValueToIdMap)
+ .Generate(aParseTableFile);
+ }
+
+
+
+
+ private void WriteValidatingParseTables (
+ final File aAutomatonLogFile,
+ final File aSimpleTypeLogFile,
+ final File aParseTableFile)
+ {
+ long nStartTime = System.currentTimeMillis();
+ final ValidatingCreator aCreator = new ValidatingCreator(maOptimizedSchemaBase, aAutomatonLogFile);
+ FiniteAutomatonContainer aAutomatons = aCreator.Create();
+ long nEndTime = System.currentTimeMillis();
+ System.out.printf(
+ "created %d validating stack automatons with %d states and %d transitions in %fs\n",
+ aAutomatons.GetAutomatonCount(),
+ aAutomatons.GetStateCount(),
+ aAutomatons.GetTransitionCount(),
+ (nEndTime-nStartTime)/1000.0);
+
+
+ nStartTime = System.currentTimeMillis();
+ aAutomatons = aAutomatons.CreateDFAs();
+ nEndTime = System.currentTimeMillis();
+ System.out.printf(
+ "created %d deterministic automatons with %d states and %d transitions in %fs\n",
+ aAutomatons.GetAutomatonCount(),
+ aAutomatons.GetStateCount(),
+ aAutomatons.GetTransitionCount(),
+ (nEndTime-nStartTime)/1000.0);
+
+ nStartTime = System.currentTimeMillis();
+ aAutomatons = aAutomatons.MinimizeDFAs();
+ nEndTime = System.currentTimeMillis();
+ System.out.printf(
+ "minimized automaton in %fs, there are now %d states and %d transitions\n",
+ (nEndTime-nStartTime)/1000.0,
+ aAutomatons.GetStateCount(),
+ aAutomatons.GetTransitionCount());
+
+ nStartTime = System.currentTimeMillis();
+ final SimpleTypeContainer aSimpleTypes = SimpleTypeContainer.Create(
+ maOptimizedSchemaBase,
+ aSimpleTypeLogFile);
+ nEndTime = System.currentTimeMillis();
+ System.out.printf(
+ "created %d simple type descriptions in %fs\n",
+ aSimpleTypes.GetSimpleTypeCount(),
+ (nEndTime-nStartTime)/1000.0);
+
+ new ParserTablesGenerator(
+ aAutomatons,
+ maOptimizedSchemaBase.Namespaces,
+ aSimpleTypes,
+ maOptimizedSchemaBase.AttributeValueToIdMap)
+ .Generate(aParseTableFile);
+ }
+
+
+
+
+ private void WriteHTMLPage (
+ final File aHTMLPageFile)
+ {
+ long nStartTime = System.currentTimeMillis();
+
+ new HtmlGenerator(maOptimizedSchemaBase, maTopLevelSchemas, aHTMLPageFile).Generate();
+
+ long nEndTime = System.currentTimeMillis();
+ System.out.printf(
+ "created HTML page in %fs\n",
+ (nEndTime-nStartTime)/1000.0);
+ }
+
+
+
+
+ private final SchemaBase maSchemaBase;
+ private SchemaBase maOptimizedSchemaBase;
+ private final Map<String,Schema> maTopLevelSchemas;
+ private final Vector<String[]> maMainSchemaFiles;
+ private final Queue<String> maWorkList;
+ private final Vector<Runnable> maOutputOperations;
+ private final Set<String> maSchemaFiles;
+ private int mnTotalLineCount;
+ private int mnTotalByteCount;
+}