1 files changed, 510 insertions, 0 deletions
diff --git a/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/SchemaReader.java b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/SchemaReader.java
new file mode 100644
index 000000000000..bf15b39d71e2
--- /dev/null
+++ b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/SchemaReader.java
@@ -0,0 +1,510 @@
+/**************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+package org.apache.openoffice.ooxml.schema;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Queue;
+import java.util.Set;
+import java.util.Vector;
+
+import javax.xml.stream.XMLStreamException;
+
+import org.apache.openoffice.ooxml.schema.automaton.FiniteAutomatonContainer;
+import org.apache.openoffice.ooxml.schema.automaton.NonValidatingCreator;
+import org.apache.openoffice.ooxml.schema.automaton.ValidatingCreator;
+import org.apache.openoffice.ooxml.schema.generator.LogGenerator;
+import org.apache.openoffice.ooxml.schema.generator.ParserTablesGenerator;
+import org.apache.openoffice.ooxml.schema.generator.html.HtmlGenerator;
+import org.apache.openoffice.ooxml.schema.model.schema.Schema;
+import org.apache.openoffice.ooxml.schema.model.schema.SchemaBase;
+import org.apache.openoffice.ooxml.schema.parser.SchemaParser;
+import org.apache.openoffice.ooxml.schema.simple.SimpleTypeContainer;
+
+public class SchemaReader
+{
+    public static void main (final String ... aArgumentList)
+    {
+        if (aArgumentList.length != 1)
+        {
+            System.err.printf("usage: SchemaParser <driver-file>\n");
+            System.err.printf(" driver file can contain these lines:\n");
+            System.err.printf("# Comments\n");
+            System.err.printf("    are ignored\n");
+            System.err.printf("schema <mark> <file-name>\n");
+            System.err.printf("    specifies a top-level schema file to read\n");
+            System.err.printf("output-schema <file-name>\n");
+            System.err.printf("    write schema information to file\n");
+            System.err.printf("output-optimized-schema <file-name>\n");
+            System.err.printf("    write information about optimized schema to file\n");
+            System.exit(1);
+        }
+
+        final SchemaReader aReader = new SchemaReader(new File(aArgumentList[0]));
+        aReader.Run();
+    }
+
+
+
+
+    private SchemaReader (final File aDriverFile)
+    {
+        maSchemaBase = new SchemaBase();
+        maTopLevelSchemas = new HashMap<>();
+        maMainSchemaFiles = new Vector<>();
+        maSchemaFiles = new HashSet<>();
+        maWorkList = new LinkedList<>();
+        maOutputOperations = new Vector<>();
+        mnTotalLineCount = 0;
+        mnTotalByteCount = 0;
+
+        ParseDriverFile(aDriverFile);
+    }
+
+
+
+
+    /** Read and parse the driver file that specifies which schema files to read
+     *  and where the output should go.
+     */
+    private void ParseDriverFile (final File aDriverFile)
+    {
+        if (aDriverFile == null || ! aDriverFile.exists() || ! aDriverFile.canRead())
+        {
+            System.err.printf("can not read driver file\n");
+            System.exit(1);
+        }
+
+        try
+        {
+            final BufferedReader aIn = new BufferedReader(new FileReader(aDriverFile));
+            while(true)
+            {
+                String sLine = aIn.readLine();
+                if (sLine == null)
+                    break;
+                // Lines starting with # are comment lines and are ignored.
+                if (sLine.matches("^\\s*#.*"))
+                    continue;
+                // Lines containing only whitespace are also ignored.
+                else if (sLine.matches("^\\s*$"))
+                    continue;
+
+                // Handle line continuation.
+                while (sLine.endsWith("\\"))
+                    sLine = sLine.substring(0, sLine.length()-1) + aIn.readLine();
+
+                final Vector<String> aParts = SplitLine(sLine);
+                switch (aParts.get(0))
+                {
+                    case "schema":
+                        maMainSchemaFiles.add(new String[]{aParts.get(1), aParts.get(2)});
+                        break;
+
+                    case "output-schema":
+                        maOutputOperations.add(new Runnable()
+                        {
+                            final File maFile = CreateCheckedOutputFile(aParts.get(1));
+                            @Override public void run()
+                            {
+                                WriteSchema(maFile);
+                            }
+                        });
+                        break;
+
+                    case "output-optimized-schema":
+                        maOutputOperations.add(new Runnable()
+                        {
+                            final File maFile = CreateCheckedOutputFile(aParts.get(1));
+                            @Override public void run()
+                            {
+                                WriteOptimizedSchema(maFile);
+                            }
+                        });
+                        break;
+
+                    case "output-nonvalidating-parse-tables":
+                        maOutputOperations.add(new Runnable()
+                        {
+                            final File aAutomatonLogFile = CreateCheckedOutputFile(aParts.get(1));
+                            final File aSimpleTypeLogFile = CreateCheckedOutputFile(aParts.get(2));
+                            final File aParseTableFile = CreateCheckedOutputFile(aParts.get(3));
+                            @Override public void run()
+                            {
+                                WriteNonValidatingParseTables(
+                                    aAutomatonLogFile,
+                                    aSimpleTypeLogFile,
+                                    aParseTableFile);
+                            }
+                        });
+                        break;
+
+                    case "output-validating-parse-tables":
+                        maOutputOperations.add(new Runnable()
+                        {
+                            final File aAutomatonLogFile = CreateCheckedOutputFile(aParts.get(1));
+                            final File aSimpleTypeLogFile = CreateCheckedOutputFile(aParts.get(2));
+                            final File aParseTableFile = CreateCheckedOutputFile(aParts.get(3));
+                            @Override public void run()
+                            {
+                                WriteValidatingParseTables(
+                                    aAutomatonLogFile,
+                                    aSimpleTypeLogFile,
+                                    aParseTableFile);
+                            }
+                        });
+                        break;
+
+                    case "output-html-page":
+                        maOutputOperations.add(new Runnable()
+                        {
+                            final File aHTMLPageFile = CreateCheckedOutputFile(aParts.get(1));
+                            @Override public void run()
+                            {
+                                WriteHTMLPage(aHTMLPageFile);
+                            }
+                        });
+                        break;
+
+                    default:
+                        System.err.printf("unknown command '%s' in driver file", aParts.get(0));
+                        System.exit(1);
+                }
+            }
+            aIn.close();
+        }
+        catch (final Exception aException)
+        {
+            aException.printStackTrace();
+        }
+    }
+
+
+
+
+    private void Run ()
+    {
+        try
+        {
+            ParseSchemaFiles();
+        }
+        catch (final Exception aException)
+        {
+            aException.printStackTrace();
+        }
+
+        maOptimizedSchemaBase = maSchemaBase.GetOptimizedSchema(maTopLevelSchemas.values());
+        for (final Entry<String, Schema> aEntry : maTopLevelSchemas.entrySet())
+            aEntry.setValue(aEntry.getValue().GetOptimizedSchema(maOptimizedSchemaBase));
+
+        System.out.printf("    optimization left %d complex types and %d simple types\n",
+            maOptimizedSchemaBase.ComplexTypes.GetCount(),
+            maOptimizedSchemaBase.SimpleTypes.GetCount());
+
+        for (final Runnable aOperation : maOutputOperations)
+        {
+            aOperation.run();
+        }
+    }
+
+
+
+
+    private void ParseSchemaFiles ()
+        throws XMLStreamException
+    {
+        System.out.printf("parsing %d main schema files\n", maMainSchemaFiles.size());
+
+        for (final String[] aEntry : maMainSchemaFiles)
+        {
+            final String sMainSchemaShortname = aEntry[0];
+            final String sMainSchemaFile = aEntry[1];
+            final File aMainSchemaFile = new File(sMainSchemaFile);
+            if ( ! aMainSchemaFile.exists())
+            {
+                System.err.printf("    schema file does not exist\n");
+                System.exit(1);
+            }
+            if ( ! aMainSchemaFile.canRead())
+            {
+                System.err.printf("can not read schema file\n");
+                System.exit(1);
+            }
+
+            final Schema aSchema = new Schema(sMainSchemaShortname, maSchemaBase);
+            ParseSchemaFile(sMainSchemaFile, aSchema);
+            maTopLevelSchemas.put(sMainSchemaShortname, aSchema);
+        }
+
+        long nStartTime = System.currentTimeMillis();
+        while ( ! maWorkList.isEmpty())
+        {
+            ParseSchemaFile(maWorkList.poll(), null);
+        }
+        long nEndTime = System.currentTimeMillis();
+
+        System.out.printf("parsed %d schema files with a total of %d lines and %d bytes in %fs\n",
+            maSchemaFiles.size(),
+            mnTotalLineCount,
+            mnTotalByteCount,
+            (nEndTime-nStartTime)/1000.0);
+        System.out.printf("    found %d complex types and %d simple types\n",
+            maSchemaBase.ComplexTypes.GetCount(),
+            maSchemaBase.SimpleTypes.GetCount());
+
+        int nTopLevelElementCount = 0;
+        for (final Schema aSchema : maTopLevelSchemas.values())
+            nTopLevelElementCount += aSchema.TopLevelElements.GetCount();
+        System.out.printf("    the %d top level schemas have %d elements\n",
+                maTopLevelSchemas.size(),
+                nTopLevelElementCount);
+    }
+
+
+
+
+    private void ParseSchemaFile (
+            final String sSchemaFilename,
+            final Schema aSchema)
+                    throws XMLStreamException
+    {
+        System.out.printf("parsing %s\n", sSchemaFilename);
+        maSchemaFiles.add(sSchemaFilename);
+
+        final SchemaParser aParser = new SchemaParser(new File(sSchemaFilename), aSchema, maSchemaBase);
+        aParser.Parse();
+
+        mnTotalLineCount += aParser.GetLineCount();
+        mnTotalByteCount += aParser.GetByteCount();
+        for (final File aFile : aParser.GetImportedSchemaFilenames())
+            AddSchemaReference(aFile.getAbsolutePath());
+    }
+
+
+
+
+    private void AddSchemaReference (final String sSchemaFilename)
+    {
+        if ( ! maSchemaFiles.contains(sSchemaFilename))
+        {
+            if (sSchemaFilename == null)
+                throw new RuntimeException();
+
+            // We don't know yet the file name of the schema, so just store null to mark the schema name as 'known'.
+            maSchemaFiles.add(sSchemaFilename);
+            maWorkList.add(sSchemaFilename);
+        }
+    }
+
+
+
+
+    /** Split the given string at whitespace but not at whitespace inside double quotes.
+     *
+     */
+    private Vector<String> SplitLine (final String sLine)
+    {
+        final Vector<String> aParts = new Vector<>();
+
+        boolean bIsInsideQuotes = false;
+        for (final String sPart : sLine.split("\""))
+        {
+            if (bIsInsideQuotes)
+                aParts.add(sPart);
+            else
+                for (final String sInnerPart : sPart.split("\\s+"))
+                {
+                    if (sInnerPart == null)
+                        throw new RuntimeException();
+                    else if ( ! sInnerPart.isEmpty())
+                        aParts.add(sInnerPart);
+                }
+
+            bIsInsideQuotes = ! bIsInsideQuotes;
+        }
+
+        return aParts;
+    }
+
+
+
+
+    /** Create a File object for a given file name.
+     *  Check that the file is writable, i.e. its directory exists and that if
+     *  the file already exists it can be replaced.
+     *  Throws a RuntimeException when a check fails.
+     */
+    private File CreateCheckedOutputFile (final String sFilename)
+    {
+        final File aFile = new File(sFilename);
+        if ( ! aFile.getParentFile().exists())
+            throw new RuntimeException("directory of "+sFilename+" does not exist: can not create file");
+        if (aFile.exists() && ! aFile.canWrite())
+            throw new RuntimeException("file "+sFilename+" already exists and can not be replaced");
+        return aFile;
+    }
+
+
+
+
+    private void WriteSchema (final File aOutputFile)
+    {
+        LogGenerator.Write(aOutputFile, maSchemaBase, maTopLevelSchemas.values());
+    }
+
+
+
+
+    private void WriteOptimizedSchema (final File aOutputFile)
+    {
+        LogGenerator.Write(aOutputFile, maOptimizedSchemaBase, maTopLevelSchemas.values());
+    }
+
+
+
+
+    private void WriteNonValidatingParseTables (
+        final File aAutomatonLogFile,
+        final File aSimpleTypeLogFile,
+        final File aParseTableFile)
+    {
+        long nStartTime = System.currentTimeMillis();
+        final NonValidatingCreator aCreator = new NonValidatingCreator(maOptimizedSchemaBase, aAutomatonLogFile);
+        FiniteAutomatonContainer aAutomatons = aCreator.Create(maTopLevelSchemas.values());
+        long nEndTime = System.currentTimeMillis();
+        System.out.printf(
+            "created %d non-validating automatons with %d states and %d transitions in %fs\n",
+            aAutomatons.GetAutomatonCount(),
+            aAutomatons.GetStateCount(),
+            aAutomatons.GetTransitionCount(),
+            (nEndTime-nStartTime)/1000.0);
+
+        nStartTime = System.currentTimeMillis();
+        final SimpleTypeContainer aSimpleTypes = SimpleTypeContainer.Create(
+            maOptimizedSchemaBase,
+            aSimpleTypeLogFile);
+        nEndTime = System.currentTimeMillis();
+        System.out.printf(
+            "created %d simple type descriptions in %fs\n",
+            aSimpleTypes.GetSimpleTypeCount(),
+            (nEndTime-nStartTime)/1000.0);
+
+        new ParserTablesGenerator(
+            aAutomatons,
+            maOptimizedSchemaBase.Namespaces,
+            aSimpleTypes,
+            maOptimizedSchemaBase.AttributeValueToIdMap)
+            .Generate(aParseTableFile);
+    }
+
+
+
+
+    private void WriteValidatingParseTables (
+        final File aAutomatonLogFile,
+        final File aSimpleTypeLogFile,
+        final File aParseTableFile)
+    {
+        long nStartTime = System.currentTimeMillis();
+        final ValidatingCreator aCreator = new ValidatingCreator(maOptimizedSchemaBase, aAutomatonLogFile);
+        FiniteAutomatonContainer aAutomatons = aCreator.Create();
+        long nEndTime = System.currentTimeMillis();
+        System.out.printf(
+            "created %d validating stack automatons with %d states and %d transitions in %fs\n",
+            aAutomatons.GetAutomatonCount(),
+            aAutomatons.GetStateCount(),
+            aAutomatons.GetTransitionCount(),
+            (nEndTime-nStartTime)/1000.0);
+
+
+        nStartTime = System.currentTimeMillis();
+        aAutomatons = aAutomatons.CreateDFAs();
+        nEndTime = System.currentTimeMillis();
+        System.out.printf(
+            "created %d deterministic automatons with %d states and %d transitions in %fs\n",
+            aAutomatons.GetAutomatonCount(),
+            aAutomatons.GetStateCount(),
+            aAutomatons.GetTransitionCount(),
+            (nEndTime-nStartTime)/1000.0);
+
+        nStartTime = System.currentTimeMillis();
+        aAutomatons = aAutomatons.MinimizeDFAs();
+        nEndTime = System.currentTimeMillis();
+        System.out.printf(
+            "minimized automaton in %fs, there are now %d states and %d transitions\n",
+            (nEndTime-nStartTime)/1000.0,
+            aAutomatons.GetStateCount(),
+            aAutomatons.GetTransitionCount());
+
+        nStartTime = System.currentTimeMillis();
+        final SimpleTypeContainer aSimpleTypes = SimpleTypeContainer.Create(
+            maOptimizedSchemaBase,
+            aSimpleTypeLogFile);
+        nEndTime = System.currentTimeMillis();
+        System.out.printf(
+            "created %d simple type descriptions in %fs\n",
+            aSimpleTypes.GetSimpleTypeCount(),
+            (nEndTime-nStartTime)/1000.0);
+
+        new ParserTablesGenerator(
+            aAutomatons,
+            maOptimizedSchemaBase.Namespaces,
+            aSimpleTypes,
+            maOptimizedSchemaBase.AttributeValueToIdMap)
+            .Generate(aParseTableFile);
+    }
+
+
+
+
+    private void WriteHTMLPage (
+        final File aHTMLPageFile)
+    {
+        long nStartTime = System.currentTimeMillis();
+
+        new HtmlGenerator(maOptimizedSchemaBase, maTopLevelSchemas, aHTMLPageFile).Generate();
+
+        long nEndTime = System.currentTimeMillis();
+        System.out.printf(
+            "created HTML page in %fs\n",
+            (nEndTime-nStartTime)/1000.0);
+    }
+
+
+
+
+    private final SchemaBase maSchemaBase;
+    private SchemaBase maOptimizedSchemaBase;
+    private final Map<String,Schema> maTopLevelSchemas;
+    private final Vector<String[]> maMainSchemaFiles;
+    private final Queue<String> maWorkList;
+    private final Vector<Runnable> maOutputOperations;
+    private final Set<String> maSchemaFiles;
+    private int mnTotalLineCount;
+    private int mnTotalByteCount;
+}