diff options
Diffstat (limited to 'ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/SchemaReader.java')
-rw-r--r-- | ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/SchemaReader.java | 510 |
1 files changed, 510 insertions, 0 deletions
diff --git a/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/SchemaReader.java b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/SchemaReader.java new file mode 100644 index 000000000000..bf15b39d71e2 --- /dev/null +++ b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/SchemaReader.java @@ -0,0 +1,510 @@ +/************************************************************** +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +package org.apache.openoffice.ooxml.schema; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Queue; +import java.util.Set; +import java.util.Vector; + +import javax.xml.stream.XMLStreamException; + +import org.apache.openoffice.ooxml.schema.automaton.FiniteAutomatonContainer; +import org.apache.openoffice.ooxml.schema.automaton.NonValidatingCreator; +import org.apache.openoffice.ooxml.schema.automaton.ValidatingCreator; +import org.apache.openoffice.ooxml.schema.generator.LogGenerator; +import org.apache.openoffice.ooxml.schema.generator.ParserTablesGenerator; +import org.apache.openoffice.ooxml.schema.generator.html.HtmlGenerator; +import org.apache.openoffice.ooxml.schema.model.schema.Schema; +import org.apache.openoffice.ooxml.schema.model.schema.SchemaBase; +import org.apache.openoffice.ooxml.schema.parser.SchemaParser; +import org.apache.openoffice.ooxml.schema.simple.SimpleTypeContainer; + +public class SchemaReader +{ + public static void main (final String ... aArgumentList) + { + if (aArgumentList.length != 1) + { + System.err.printf("usage: SchemaParser <driver-file>\n"); + System.err.printf(" driver file can contain these lines:\n"); + System.err.printf("# Comments\n"); + System.err.printf(" are ignored\n"); + System.err.printf("schema <mark> <file-name>\n"); + System.err.printf(" specifies a top-level schema file to read\n"); + System.err.printf("output-schema <file-name>\n"); + System.err.printf(" write schema information to file\n"); + System.err.printf("output-optimized-schema <file-name>\n"); + System.err.printf(" write information about optimized schema to file\n"); + System.exit(1); + } + + final SchemaReader aReader = new SchemaReader(new File(aArgumentList[0])); + aReader.Run(); + } + + + + + private SchemaReader (final File aDriverFile) + { + maSchemaBase = new SchemaBase(); + maTopLevelSchemas = new HashMap<>(); + maMainSchemaFiles = new Vector<>(); + maSchemaFiles = new HashSet<>(); + maWorkList = new LinkedList<>(); + maOutputOperations = new Vector<>(); + mnTotalLineCount = 0; + mnTotalByteCount = 0; + + ParseDriverFile(aDriverFile); + } + + + + + /** Read and parse the driver file that specifies which schema files to read + * and where the output should go. + */ + private void ParseDriverFile (final File aDriverFile) + { + if (aDriverFile == null || ! aDriverFile.exists() || ! aDriverFile.canRead()) + { + System.err.printf("can not read driver file\n"); + System.exit(1); + } + + try + { + final BufferedReader aIn = new BufferedReader(new FileReader(aDriverFile)); + while(true) + { + String sLine = aIn.readLine(); + if (sLine == null) + break; + // Lines starting with # are comment lines and are ignored. + if (sLine.matches("^\\s*#.*")) + continue; + // Lines containing only whitespace are also ignored. + else if (sLine.matches("^\\s*$")) + continue; + + // Handle line continuation. + while (sLine.endsWith("\\")) + sLine = sLine.substring(0, sLine.length()-1) + aIn.readLine(); + + final Vector<String> aParts = SplitLine(sLine); + switch (aParts.get(0)) + { + case "schema": + maMainSchemaFiles.add(new String[]{aParts.get(1), aParts.get(2)}); + break; + + case "output-schema": + maOutputOperations.add(new Runnable() + { + final File maFile = CreateCheckedOutputFile(aParts.get(1)); + @Override public void run() + { + WriteSchema(maFile); + } + }); + break; + + case "output-optimized-schema": + maOutputOperations.add(new Runnable() + { + final File maFile = CreateCheckedOutputFile(aParts.get(1)); + @Override public void run() + { + WriteOptimizedSchema(maFile); + } + }); + break; + + case "output-nonvalidating-parse-tables": + maOutputOperations.add(new Runnable() + { + final File aAutomatonLogFile = CreateCheckedOutputFile(aParts.get(1)); + final File aSimpleTypeLogFile = CreateCheckedOutputFile(aParts.get(2)); + final File aParseTableFile = CreateCheckedOutputFile(aParts.get(3)); + @Override public void run() + { + WriteNonValidatingParseTables( + aAutomatonLogFile, + aSimpleTypeLogFile, + aParseTableFile); + } + }); + break; + + case "output-validating-parse-tables": + maOutputOperations.add(new Runnable() + { + final File aAutomatonLogFile = CreateCheckedOutputFile(aParts.get(1)); + final File aSimpleTypeLogFile = CreateCheckedOutputFile(aParts.get(2)); + final File aParseTableFile = CreateCheckedOutputFile(aParts.get(3)); + @Override public void run() + { + WriteValidatingParseTables( + aAutomatonLogFile, + aSimpleTypeLogFile, + aParseTableFile); + } + }); + break; + + case "output-html-page": + maOutputOperations.add(new Runnable() + { + final File aHTMLPageFile = CreateCheckedOutputFile(aParts.get(1)); + @Override public void run() + { + WriteHTMLPage(aHTMLPageFile); + } + }); + break; + + default: + System.err.printf("unknown command '%s' in driver file", aParts.get(0)); + System.exit(1); + } + } + aIn.close(); + } + catch (final Exception aException) + { + aException.printStackTrace(); + } + } + + + + + private void Run () + { + try + { + ParseSchemaFiles(); + } + catch (final Exception aException) + { + aException.printStackTrace(); + } + + maOptimizedSchemaBase = maSchemaBase.GetOptimizedSchema(maTopLevelSchemas.values()); + for (final Entry<String, Schema> aEntry : maTopLevelSchemas.entrySet()) + aEntry.setValue(aEntry.getValue().GetOptimizedSchema(maOptimizedSchemaBase)); + + System.out.printf(" optimization left %d complex types and %d simple types\n", + maOptimizedSchemaBase.ComplexTypes.GetCount(), + maOptimizedSchemaBase.SimpleTypes.GetCount()); + + for (final Runnable aOperation : maOutputOperations) + { + aOperation.run(); + } + } + + + + + private void ParseSchemaFiles () + throws XMLStreamException + { + System.out.printf("parsing %d main schema files\n", maMainSchemaFiles.size()); + + for (final String[] aEntry : maMainSchemaFiles) + { + final String sMainSchemaShortname = aEntry[0]; + final String sMainSchemaFile = aEntry[1]; + final File aMainSchemaFile = new File(sMainSchemaFile); + if ( ! aMainSchemaFile.exists()) + { + System.err.printf(" schema file does not exist\n"); + System.exit(1); + } + if ( ! aMainSchemaFile.canRead()) + { + System.err.printf("can not read schema file\n"); + System.exit(1); + } + + final Schema aSchema = new Schema(sMainSchemaShortname, maSchemaBase); + ParseSchemaFile(sMainSchemaFile, aSchema); + maTopLevelSchemas.put(sMainSchemaShortname, aSchema); + } + + long nStartTime = System.currentTimeMillis(); + while ( ! maWorkList.isEmpty()) + { + ParseSchemaFile(maWorkList.poll(), null); + } + long nEndTime = System.currentTimeMillis(); + + System.out.printf("parsed %d schema files with a total of %d lines and %d bytes in %fs\n", + maSchemaFiles.size(), + mnTotalLineCount, + mnTotalByteCount, + (nEndTime-nStartTime)/1000.0); + System.out.printf(" found %d complex types and %d simple types\n", + maSchemaBase.ComplexTypes.GetCount(), + maSchemaBase.SimpleTypes.GetCount()); + + int nTopLevelElementCount = 0; + for (final Schema aSchema : maTopLevelSchemas.values()) + nTopLevelElementCount += aSchema.TopLevelElements.GetCount(); + System.out.printf(" the %d top level schemas have %d elements\n", + maTopLevelSchemas.size(), + nTopLevelElementCount); + } + + + + + private void ParseSchemaFile ( + final String sSchemaFilename, + final Schema aSchema) + throws XMLStreamException + { + System.out.printf("parsing %s\n", sSchemaFilename); + maSchemaFiles.add(sSchemaFilename); + + final SchemaParser aParser = new SchemaParser(new File(sSchemaFilename), aSchema, maSchemaBase); + aParser.Parse(); + + mnTotalLineCount += aParser.GetLineCount(); + mnTotalByteCount += aParser.GetByteCount(); + for (final File aFile : aParser.GetImportedSchemaFilenames()) + AddSchemaReference(aFile.getAbsolutePath()); + } + + + + + private void AddSchemaReference (final String sSchemaFilename) + { + if ( ! maSchemaFiles.contains(sSchemaFilename)) + { + if (sSchemaFilename == null) + throw new RuntimeException(); + + // We don't know yet the file name of the schema, so just store null to mark the schema name as 'known'. + maSchemaFiles.add(sSchemaFilename); + maWorkList.add(sSchemaFilename); + } + } + + + + + /** Split the given string at whitespace but not at whitespace inside double quotes. + * + */ + private Vector<String> SplitLine (final String sLine) + { + final Vector<String> aParts = new Vector<>(); + + boolean bIsInsideQuotes = false; + for (final String sPart : sLine.split("\"")) + { + if (bIsInsideQuotes) + aParts.add(sPart); + else + for (final String sInnerPart : sPart.split("\\s+")) + { + if (sInnerPart == null) + throw new RuntimeException(); + else if ( ! sInnerPart.isEmpty()) + aParts.add(sInnerPart); + } + + bIsInsideQuotes = ! bIsInsideQuotes; + } + + return aParts; + } + + + + + /** Create a File object for a given file name. + * Check that the file is writable, i.e. its directory exists and that if + * the file already exists it can be replaced. + * Throws a RuntimeException when a check fails. + */ + private File CreateCheckedOutputFile (final String sFilename) + { + final File aFile = new File(sFilename); + if ( ! aFile.getParentFile().exists()) + throw new RuntimeException("directory of "+sFilename+" does not exist: can not create file"); + if (aFile.exists() && ! aFile.canWrite()) + throw new RuntimeException("file "+sFilename+" already exists and can not be replaced"); + return aFile; + } + + + + + private void WriteSchema (final File aOutputFile) + { + LogGenerator.Write(aOutputFile, maSchemaBase, maTopLevelSchemas.values()); + } + + + + + private void WriteOptimizedSchema (final File aOutputFile) + { + LogGenerator.Write(aOutputFile, maOptimizedSchemaBase, maTopLevelSchemas.values()); + } + + + + + private void WriteNonValidatingParseTables ( + final File aAutomatonLogFile, + final File aSimpleTypeLogFile, + final File aParseTableFile) + { + long nStartTime = System.currentTimeMillis(); + final NonValidatingCreator aCreator = new NonValidatingCreator(maOptimizedSchemaBase, aAutomatonLogFile); + FiniteAutomatonContainer aAutomatons = aCreator.Create(maTopLevelSchemas.values()); + long nEndTime = System.currentTimeMillis(); + System.out.printf( + "created %d non-validating automatons with %d states and %d transitions in %fs\n", + aAutomatons.GetAutomatonCount(), + aAutomatons.GetStateCount(), + aAutomatons.GetTransitionCount(), + (nEndTime-nStartTime)/1000.0); + + nStartTime = System.currentTimeMillis(); + final SimpleTypeContainer aSimpleTypes = SimpleTypeContainer.Create( + maOptimizedSchemaBase, + aSimpleTypeLogFile); + nEndTime = System.currentTimeMillis(); + System.out.printf( + "created %d simple type descriptions in %fs\n", + aSimpleTypes.GetSimpleTypeCount(), + (nEndTime-nStartTime)/1000.0); + + new ParserTablesGenerator( + aAutomatons, + maOptimizedSchemaBase.Namespaces, + aSimpleTypes, + maOptimizedSchemaBase.AttributeValueToIdMap) + .Generate(aParseTableFile); + } + + + + + private void WriteValidatingParseTables ( + final File aAutomatonLogFile, + final File aSimpleTypeLogFile, + final File aParseTableFile) + { + long nStartTime = System.currentTimeMillis(); + final ValidatingCreator aCreator = new ValidatingCreator(maOptimizedSchemaBase, aAutomatonLogFile); + FiniteAutomatonContainer aAutomatons = aCreator.Create(); + long nEndTime = System.currentTimeMillis(); + System.out.printf( + "created %d validating stack automatons with %d states and %d transitions in %fs\n", + aAutomatons.GetAutomatonCount(), + aAutomatons.GetStateCount(), + aAutomatons.GetTransitionCount(), + (nEndTime-nStartTime)/1000.0); + + + nStartTime = System.currentTimeMillis(); + aAutomatons = aAutomatons.CreateDFAs(); + nEndTime = System.currentTimeMillis(); + System.out.printf( + "created %d deterministic automatons with %d states and %d transitions in %fs\n", + aAutomatons.GetAutomatonCount(), + aAutomatons.GetStateCount(), + aAutomatons.GetTransitionCount(), + (nEndTime-nStartTime)/1000.0); + + nStartTime = System.currentTimeMillis(); + aAutomatons = aAutomatons.MinimizeDFAs(); + nEndTime = System.currentTimeMillis(); + System.out.printf( + "minimized automaton in %fs, there are now %d states and %d transitions\n", + (nEndTime-nStartTime)/1000.0, + aAutomatons.GetStateCount(), + aAutomatons.GetTransitionCount()); + + nStartTime = System.currentTimeMillis(); + final SimpleTypeContainer aSimpleTypes = SimpleTypeContainer.Create( + maOptimizedSchemaBase, + aSimpleTypeLogFile); + nEndTime = System.currentTimeMillis(); + System.out.printf( + "created %d simple type descriptions in %fs\n", + aSimpleTypes.GetSimpleTypeCount(), + (nEndTime-nStartTime)/1000.0); + + new ParserTablesGenerator( + aAutomatons, + maOptimizedSchemaBase.Namespaces, + aSimpleTypes, + maOptimizedSchemaBase.AttributeValueToIdMap) + .Generate(aParseTableFile); + } + + + + + private void WriteHTMLPage ( + final File aHTMLPageFile) + { + long nStartTime = System.currentTimeMillis(); + + new HtmlGenerator(maOptimizedSchemaBase, maTopLevelSchemas, aHTMLPageFile).Generate(); + + long nEndTime = System.currentTimeMillis(); + System.out.printf( + "created HTML page in %fs\n", + (nEndTime-nStartTime)/1000.0); + } + + + + + private final SchemaBase maSchemaBase; + private SchemaBase maOptimizedSchemaBase; + private final Map<String,Schema> maTopLevelSchemas; + private final Vector<String[]> maMainSchemaFiles; + private final Queue<String> maWorkList; + private final Vector<Runnable> maOutputOperations; + private final Set<String> maSchemaFiles; + private int mnTotalLineCount; + private int mnTotalByteCount; +} |