1 files changed, 1265 insertions, 0 deletions
diff --git a/xmerge/source/xmerge/java/org/openoffice/xmerge/converter/xml/OfficeDocument.java b/xmerge/source/xmerge/java/org/openoffice/xmerge/converter/xml/OfficeDocument.java
new file mode 100644
index 000000000000..d3372be5a757
--- /dev/null
+++ b/xmerge/source/xmerge/java/org/openoffice/xmerge/converter/xml/OfficeDocument.java
@@ -0,0 +1,1265 @@
+/*************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2000, 2010 Oracle and/or its affiliates.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org.  If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+package org.openoffice.xmerge.converter.xml;
+
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.Reader;
+import java.io.BufferedReader;
+import java.io.StringReader;
+import java.io.StringWriter;
+import java.io.InputStreamReader;
+import java.io.ByteArrayOutputStream;
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.HashMap;
+
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.ParserConfigurationException;
+
+import org.w3c.dom.Node;
+import org.w3c.dom.Element;
+import org.w3c.dom.Document;
+import org.w3c.dom.DOMImplementation;
+import org.w3c.dom.DocumentType;
+import org.w3c.dom.NodeList;
+import org.xml.sax.InputSource;
+import org.w3c.dom.NamedNodeMap;
+import org.xml.sax.SAXException;
+
+import javax.xml.transform.*;
+import javax.xml.transform.dom.*;
+import javax.xml.transform.stream.*;
+
+import org.openoffice.xmerge.util.Resources;
+import org.openoffice.xmerge.util.Debug;
+
+/**
+ *  An implementation of <code>Document</code> for
+ *  StarOffice documents.
+ */
+public abstract class OfficeDocument
+    implements org.openoffice.xmerge.Document,
+               OfficeConstants {
+
+    /** Factory for <code>DocumentBuilder</code> objects. */
+    private static DocumentBuilderFactory factory =
+       DocumentBuilderFactory.newInstance();
+
+    /** DOM <code>Document</code> of content.xml. */
+    private Document contentDoc = null;
+
+   /** DOM <code>Document</code> of meta.xml. */
+    private Document metaDoc = null;
+
+   /** DOM <code>Document</code> of settings.xml. */
+    private Document settingsDoc = null;
+
+    /** DOM <code>Document</code> of content.xml. */
+    private Document styleDoc = null;
+
+    /** DOM <code>Docuemtn</code> of META-INF/manifest.xml. */
+    private Document manifestDoc = null;
+
+    private String documentName = null;
+    private String fileName = null;
+
+    /** Resources object. */
+    private Resources res = null;
+
+    /**
+     *  <code>OfficeZip</code> object to store zip contents from
+     *  read <code>InputStream</code>.  Note that this member
+     *  will still be null if it was initialized using a template
+     *  file instead of reading from a StarOffice zipped
+     *  XML file.
+     */
+    private OfficeZip zip = null;
+
+    /** Collection to keep track of the embedded objects in the document. */
+    private Map embeddedObjects = null;
+
+    /**
+     *  Default constructor.
+     *
+     *  @param  name  <code>Document</code> name.
+     */
+    public OfficeDocument(String name)
+    {
+        this(name, true, false);
+    }
+
+
+    /**
+     *  Constructor with arguments to set <code>namespaceAware</code>
+     *  and <code>validating</code> flags.
+     *
+     *  @param  name            <code>Document</code> name (may or may not
+     *                          contain extension).
+     *  @param  namespaceAware  Value for <code>namespaceAware</code> flag.
+     *  @param  validating      Value for <code>validating</code> flag.
+     */
+    public OfficeDocument(String name, boolean namespaceAware, boolean validating) {
+
+        res = Resources.getInstance();
+        factory.setValidating(validating);
+        factory.setNamespaceAware(namespaceAware);
+        this.documentName = trimDocumentName(name);
+        this.fileName = documentName + getFileExtension();
+    }
+
+
+    /**
+     *  Removes the file extension from the <code>Document</code>
+     *  name.
+     *
+     *  @param  name  Full <code>Document</code> name with extension.
+     *
+     *  @return  Name of <code>Document</code> without the extension.
+     */
+    private String trimDocumentName(String name) {
+        String temp = name.toLowerCase();
+        String ext = getFileExtension();
+
+        if (temp.endsWith(ext)) {
+            // strip the extension
+            int nlen = name.length();
+            int endIndex = nlen - ext.length();
+            name = name.substring(0,endIndex);
+        }
+
+        return name;
+    }
+
+
+    /**
+     *  Return a DOM <code>Document</code> object of the content.xml
+     *  file.  Note that a content DOM is not created when the constructor
+     *  is called.  So, either the <code>read</code> method or the
+     *  <code>initContentDOM</code> method will need to be called ahead
+     *  on this object before calling this method.
+     *
+     *  @return  DOM <code>Document</code> object.
+     */
+    public Document getContentDOM() {
+
+        return contentDoc;
+    }
+
+ /**
+     *  Return a DOM <code>Document</code> object of the meta.xml
+     *  file.  Note that a content DOM is not created when the constructor
+     *  is called.  So, either the <code>read</code> method or the
+     *  <code>initContentDOM</code> method will need to be called ahead
+     *  on this object before calling this method.
+     *
+     *  @return  DOM <code>Document</code> object.
+     */
+    public Document getMetaDOM() {
+
+        return metaDoc;
+    }
+
+
+ /**
+     *  Return a DOM <code>Document</code> object of the settings.xml
+     *  file.  Note that a content DOM is not created when the constructor
+     *  is called.  So, either the <code>read</code> method or the
+     *  <code>initContentDOM</code> method will need to be called ahead
+     *  on this object before calling this method.
+     *
+     *  @return  DOM <code>Document</code> object.
+     */
+    public Document getSettingsDOM() {
+
+        return settingsDoc;
+    }
+
+
+    /**
+     * Sets the content tree of the document.
+     *
+     * @param   newDom  <code>Node</code> containing the new content tree.
+     */
+    public void setContentDOM( Node newDom) {
+        contentDoc = (Document)newDom;
+    }
+
+
+    /**
+     * Sets the meta tree of the document.
+     *
+     * @param   newDom  <code>Node</code> containing the new meta tree.
+     */
+    public void setMetaDOM (Node newDom) {
+        metaDoc = (Document)newDom;
+    }
+
+
+    /**
+     * Sets the settings tree of the document.
+     *
+     * @param   newDom  <code>Node</code> containing the new settings tree.
+     */
+    public void setSettingsDOM (Node newDom) {
+        settingsDoc = (Document)newDom;
+    }
+
+
+    /**
+     * Sets the style tree of the document.
+     *
+     * @param   newDom  <code>Node</code> containing the new style tree.
+     */
+    public void setStyleDOM (Node newDom) {
+        styleDoc = (Document)newDom;
+    }
+
+
+    /**
+     *  Return a DOM <code>Document</code> object of the style.xml file.
+     *  Note that this may return null if there is no style DOM.
+     *  Note that a style DOM is not created when the constructor
+     *  is called.  Depending on the <code>InputStream</code>, a
+     *  <code>read</code> method may or may not build a style DOM.  When
+     *  creating a new style DOM, call the <code>initStyleDOM</code> method
+     *  first.
+     *
+     *  @return  DOM <code>Document</code> object.
+     */
+    public Document getStyleDOM() {
+
+        return styleDoc;
+    }
+
+
+    /**
+     *  Return the name of the <code>Document</code>.
+     *
+     *  @return  The name of <code>Document</code>.
+     */
+    public String getName() {
+
+        return documentName;
+    }
+
+
+    /**
+     *  Return the file name of the <code>Document</code>, possibly
+     *  with the standard extension.
+     *
+     *  @return  The file name of <code>Document</code>.
+     */
+    public String getFileName() {
+
+        return fileName;
+    }
+
+
+    /**
+     *  Returns the file extension for this type of
+     *  <code>Document</code>.
+     *
+     *  @return  The file extension of <code>Document</code>.
+     */
+    protected abstract String getFileExtension();
+
+
+    /**
+     * Returns all the embedded objects (graphics, formulae, etc.) present in
+     * this document.
+     *
+     * @return An <code>Iterator</code> of <code>EmbeddedObject</code> objects.
+     */
+    public Iterator getEmbeddedObjects() {
+
+        if (embeddedObjects == null && manifestDoc != null) {
+            embeddedObjects = new HashMap();
+
+            // Need to read the manifest file and construct a list of objects
+            NodeList nl = manifestDoc.getElementsByTagName(TAG_MANIFEST_FILE);
+
+            // Dont create the HashMap if there are no embedded objects
+            int len = nl.getLength();
+            for (int i = 0; i < len; i++) {
+                Node n = nl.item(i);
+
+                NamedNodeMap attrs = n.getAttributes();
+
+                String type = attrs.getNamedItem(ATTRIBUTE_MANIFEST_FILE_TYPE).getNodeValue();
+                String path = attrs.getNamedItem(ATTRIBUTE_MANIFEST_FILE_PATH).getNodeValue();
+
+
+                /*
+                 * According to OpenOffice.org XML File Format document (ver. 1)
+                 * there are only two types of embedded object:
+                 *
+                 *      Objects with an XML representation.
+                 *      Objects without an XML representation.
+                 *
+                 * The former are represented by one or more XML files.
+                 * The latter are in binary form.
+                 */
+                if (type.startsWith("application/vnd.sun.xml"))
+                {
+                    if (path.equals("/")) {
+                        // Exclude the main document entries
+                        continue;
+                    }
+                    // Take off the trailing '/'
+                    String name = path.substring(0, path.length() - 1);
+                    embeddedObjects.put(name, new EmbeddedXMLObject(name, type, zip));
+                }
+                else if (type.equals("text/xml")) {
+                    // XML entries are either embedded StarOffice doc entries or main
+                    // document entries
+                    continue;
+                }
+                else { // FIX (HJ): allows empty MIME type
+                    embeddedObjects.put(path, new EmbeddedBinaryObject(path, type, zip));
+                }
+            }
+        }
+
+        return embeddedObjects.values().iterator();
+    }
+
+    /**
+     * Returns the embedded object corresponding to the name provided.
+     * The name should be stripped of any preceding path characters, such as
+     * '/', '.' or '#'.
+     *
+     * @param   name    The name of the embedded object to retrieve.
+     *
+     * @return  An <code>EmbeddedObject</code> instance representing the named
+     *          object.
+     */
+    public EmbeddedObject getEmbeddedObject(String name) {
+        if (name == null) {
+            return null;
+        }
+
+        if (embeddedObjects == null) {
+            getEmbeddedObjects();
+        }
+
+        if (embeddedObjects.containsKey(name)) {
+            return (EmbeddedObject)embeddedObjects.get(name);
+        }
+        else {
+            return null;
+        }
+    }
+
+
+    /**
+     * Adds a new embedded object to the document.
+     *
+     * @param   embObj  An instance of <code>EmbeddedObject</code>.
+     */
+    public void addEmbeddedObject(EmbeddedObject embObj) {
+        if (embObj == null) {
+            return;
+        }
+
+        if (embeddedObjects == null) {
+            embeddedObjects = new HashMap();
+        }
+
+        embeddedObjects.put(embObj.getName(), embObj);
+    }
+
+
+    /**
+     *  Read the Office <code>Document</code> from the given
+     *  <code>InputStream</code>.
+     *
+     *  @param  is  Office document <code>InputStream</code>.
+     *
+     *  @throws  IOException  If any I/O error occurs.
+     */
+    public void read(InputStream is) throws IOException {
+
+        Debug.log(Debug.INFO, "reading Office file");
+
+        DocumentBuilder builder = null;
+
+        try {
+            builder = factory.newDocumentBuilder();
+        } catch (ParserConfigurationException ex) {
+            throw new OfficeDocumentException(ex);
+        }
+
+        // read in Office zip file format
+
+        zip = new OfficeZip();
+        zip.read(is);
+
+        // grab the content.xml and
+        // parse it into contentDoc.
+
+        byte contentBytes[] = zip.getContentXMLBytes();
+
+        if (contentBytes == null) {
+
+            throw new OfficeDocumentException("Entry content.xml not found in file");
+        }
+
+        try {
+
+            contentDoc = parse(builder, contentBytes);
+
+        } catch (SAXException ex) {
+
+            throw new OfficeDocumentException(ex);
+        }
+
+        // if style.xml exists, grab the style.xml
+        // parse it into styleDoc.
+
+        byte styleBytes[] = zip.getStyleXMLBytes();
+
+        if (styleBytes != null) {
+
+            try {
+
+                styleDoc = parse(builder, styleBytes);
+
+            } catch (SAXException ex) {
+
+                throw new OfficeDocumentException(ex);
+            }
+        }
+
+    byte metaBytes[] = zip.getMetaXMLBytes();
+
+        if (metaBytes != null) {
+
+            try {
+
+                metaDoc = parse(builder, metaBytes);
+
+            } catch (SAXException ex) {
+
+                throw new OfficeDocumentException(ex);
+            }
+        }
+
+    byte settingsBytes[] = zip.getSettingsXMLBytes();
+
+        if (settingsBytes != null) {
+
+            try {
+
+                settingsDoc = parse(builder, settingsBytes);
+
+            } catch (SAXException ex) {
+
+                throw new OfficeDocumentException(ex);
+            }
+        }
+
+
+        // Read in the META-INF/manifest.xml file
+        byte manifestBytes[] = zip.getManifestXMLBytes();
+
+        if (manifestBytes != null) {
+
+            try {
+                manifestDoc = parse(builder, manifestBytes);
+            } catch (SAXException ex) {
+                throw new OfficeDocumentException(ex);
+            }
+        }
+
+    }
+
+
+    /**
+     *  Read the Office <code>Document</code> from the given
+     *  <code>InputStream</code>.
+     *
+     *  @param  is  Office document <code>InputStream</code>.
+     *  @param  isZip <code>boolean</code> Identifies whether
+     *                 a file is zipped or not
+     *
+     *  @throws  IOException  If any I/O error occurs.
+     */
+    public void read(InputStream is, boolean isZip) throws IOException {
+
+        Debug.log(Debug.INFO, "reading Office file");
+
+        DocumentBuilder builder = null;
+
+        try {
+            builder = factory.newDocumentBuilder();
+        } catch (ParserConfigurationException ex) {
+            throw new OfficeDocumentException(ex);
+        }
+
+    if (isZip)
+    {
+            read(is);
+    }
+    else{
+        try{
+        //System.out.println("\nParsing Input stream, validating?: "+builder.isValidating());
+        //contentDoc=  builder.parse((InputStream)is);
+
+               Reader r = secondHack(is);
+               InputSource ins = new InputSource(r);
+            org.w3c.dom.Document newDoc = builder.parse(ins);
+            //org.w3c.dom.Document newDoc = builder.parse((InputStream)is);
+            Element rootElement=newDoc.getDocumentElement();
+
+            NodeList nodeList;
+            Node tmpNode;
+            Node rootNode = (Node)rootElement;
+                if (newDoc !=null){
+            /*content*/
+                   contentDoc = createDOM(TAG_OFFICE_DOCUMENT_CONTENT);
+                   rootElement=contentDoc.getDocumentElement();
+                   rootNode = (Node)rootElement;
+
+                   // FIX (HJ): Include office:font-decls in content DOM
+                   nodeList= newDoc.getElementsByTagName(TAG_OFFICE_FONT_DECLS);
+                   if (nodeList.getLength()>0){
+                       tmpNode = contentDoc.importNode(nodeList.item(0),true);
+                       rootNode.appendChild(tmpNode);
+                   }
+
+                   nodeList= newDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES);
+                   if (nodeList.getLength()>0){
+                  tmpNode = contentDoc.importNode(nodeList.item(0),true);
+                  rootNode.appendChild(tmpNode);
+                   }
+
+                    nodeList= newDoc.getElementsByTagName(TAG_OFFICE_BODY);
+                   if (nodeList.getLength()>0){
+                  tmpNode = contentDoc.importNode(nodeList.item(0),true);
+                  rootNode.appendChild(tmpNode);
+                   }
+
+           /*Styles*/
+                   styleDoc = createDOM(TAG_OFFICE_DOCUMENT_STYLES);
+                   rootElement=styleDoc.getDocumentElement();
+                   rootNode = (Node)rootElement;
+
+                   // FIX (HJ): Include office:font-decls in styles DOM
+                   nodeList= newDoc.getElementsByTagName(TAG_OFFICE_FONT_DECLS);
+                   if (nodeList.getLength()>0){
+                      tmpNode = styleDoc.importNode(nodeList.item(0),true);
+                      rootNode.appendChild(tmpNode);
+                   }
+
+                   nodeList= newDoc.getElementsByTagName(TAG_OFFICE_STYLES);
+                   if (nodeList.getLength()>0){
+                  tmpNode = styleDoc.importNode(nodeList.item(0),true);
+                  rootNode.appendChild(tmpNode);
+                   }
+
+                   // FIX (HJ): Include office:automatic-styles in styles DOM
+                   nodeList= newDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES);
+                   if (nodeList.getLength()>0){
+                      tmpNode = styleDoc.importNode(nodeList.item(0),true);
+                      rootNode.appendChild(tmpNode);
+                   }
+
+                   // FIX (HJ): Include office:master-styles in styles DOM
+                   nodeList= newDoc.getElementsByTagName(TAG_OFFICE_MASTER_STYLES);
+                   if (nodeList.getLength()>0){
+                       tmpNode = styleDoc.importNode(nodeList.item(0),true);
+                       rootNode.appendChild(tmpNode);
+                   }
+
+           /*Settings*/
+                   settingsDoc = createDOM(TAG_OFFICE_DOCUMENT_SETTINGS);
+                   rootElement=settingsDoc.getDocumentElement();
+                   rootNode = (Node)rootElement;
+                   nodeList= newDoc.getElementsByTagName(TAG_OFFICE_SETTINGS);
+                   if (nodeList.getLength()>0){
+                  tmpNode = settingsDoc.importNode(nodeList.item(0),true);
+                  rootNode.appendChild(tmpNode);
+                   }
+           /*Meta*/
+                   metaDoc = createDOM(TAG_OFFICE_DOCUMENT_META);
+                   rootElement=metaDoc.getDocumentElement();
+                   rootNode = (Node)rootElement;
+                   nodeList= newDoc.getElementsByTagName(TAG_OFFICE_META);
+                   if (nodeList.getLength()>0){
+                  tmpNode = metaDoc.importNode(nodeList.item(0),true);
+                  rootNode.appendChild(tmpNode);
+                   }
+                }
+        }
+        catch (SAXException ex) {
+        throw new OfficeDocumentException(ex);
+        }
+    }
+
+    }
+
+
+
+    /**
+     *  Parse given <code>byte</code> array into a DOM
+     *  <code>Document</code> object using the
+     *  <code>DocumentBuilder</code> object.
+     *
+     *  @param  builder  <code>DocumentBuilder</code> object for parsing.
+     *  @param  bytes    <code>byte</code> array for parsing.
+     *
+     *  @return  Resulting DOM <code>Document</code> object.
+     *
+     *  @throws  SAXException  If any parsing error occurs.
+     */
+    static Document parse(DocumentBuilder builder, byte bytes[])
+        throws SAXException, IOException {
+
+        Document doc = null;
+
+        ByteArrayInputStream is = new ByteArrayInputStream(bytes);
+
+        // TODO:  replace hack with a more appropriate fix.
+
+        Reader r = hack(is);
+        InputSource ins = new InputSource(r);
+        doc = builder.parse(ins);
+
+        return doc;
+    }
+
+
+    /**
+     * Method to return the MIME type of the document.
+     *
+     * @return  String  The document's MIME type.
+     */
+    protected abstract String getDocumentMimeType();
+
+
+    /**
+     *  Write out Office ZIP file format.
+     *
+     *  @param  os  XML <code>OutputStream</code>.
+     *
+     *  @throws  IOException  If any I/O error occurs.
+     */
+    public void write(OutputStream os) throws IOException {
+        if (zip == null) {
+            zip = new OfficeZip();
+        }
+
+        initManifestDOM();
+
+        Element domEntry;
+        Element manifestRoot = manifestDoc.getDocumentElement();
+
+        // The EmbeddedObjects come first.
+        Iterator embObjs = getEmbeddedObjects();
+        while (embObjs.hasNext()) {
+            EmbeddedObject obj = (EmbeddedObject)embObjs.next();
+            obj.writeManifestData(manifestDoc);
+
+            obj.write(zip);
+        }
+
+        // Add in the entry for the Pictures directory.  Always present.
+        domEntry = manifestDoc.createElement(TAG_MANIFEST_FILE);
+        domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "Pictures/");
+        domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, "");
+        manifestRoot.appendChild(domEntry);
+
+    // Write content to the Zip file and then write any of the optional
+        // data, if it exists.
+    zip.setContentXMLBytes(docToBytes(contentDoc));
+
+        domEntry = manifestDoc.createElement(TAG_MANIFEST_FILE);
+        domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "content.xml");
+        domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, "text/xml");
+
+        manifestRoot.appendChild(domEntry);
+
+    if (styleDoc != null) {
+            zip.setStyleXMLBytes(docToBytes(styleDoc));
+
+            domEntry = manifestDoc.createElement(TAG_MANIFEST_FILE);
+            domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "styles.xml");
+            domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, "text/xml");
+            manifestRoot.appendChild(domEntry);
+        }
+
+        if (metaDoc != null) {
+            zip.setMetaXMLBytes(docToBytes(metaDoc));
+
+            domEntry = manifestDoc.createElement(TAG_MANIFEST_FILE);
+            domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "meta.xml");
+            domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, "text/xml");
+            manifestRoot.appendChild(domEntry);
+        }
+
+        if (settingsDoc != null) {
+            zip.setSettingsXMLBytes(docToBytes(settingsDoc));
+
+            domEntry = manifestDoc.createElement(TAG_MANIFEST_FILE);
+            domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "settings.xml");
+            domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, "text/xml");
+            manifestRoot.appendChild(domEntry);
+        }
+
+        zip.setManifestXMLBytes(docToBytes(manifestDoc));
+
+        zip.write(os);
+    }
+
+
+     /**
+     *  Write out Office ZIP file format.
+     *
+     *  @param  os  XML <code>OutputStream</code>.
+     *  @param  isZip <code>boolean</code>
+     *
+     *  @throws  IOException  If any I/O error occurs.
+     */
+    public void write(OutputStream os, boolean isZip) throws IOException {
+
+        // Create an OfficeZip object if one does not exist.
+        if (isZip){
+            write(os);
+    }
+    else{
+        try{
+        DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
+        DocumentBuilder builder= builderFactory.newDocumentBuilder();
+        DOMImplementation domImpl = builder.getDOMImplementation();
+        DocumentType docType =domImpl.createDocumentType("office:document","-//OpenOffice.org//DTD OfficeDocument 1.0//EN",null);
+        org.w3c.dom.Document newDoc = domImpl.createDocument("http://openoffice.org/2000/office","office:document",null);
+
+
+        Element rootElement=newDoc.getDocumentElement();
+        rootElement.setAttribute("xmlns:office","http://openoffice.org/2000/office");
+        rootElement.setAttribute("xmlns:style","http://openoffice.org/2000/style" );
+        rootElement.setAttribute("xmlns:text","http://openoffice.org/2000/text");
+        rootElement.setAttribute("xmlns:table","http://openoffice.org/2000/table");
+
+        rootElement.setAttribute("xmlns:draw","http://openoffice.org/2000/drawing");
+        rootElement.setAttribute("xmlns:fo","http://www.w3.org/1999/XSL/Format" );
+        rootElement.setAttribute("xmlns:xlink","http://www.w3.org/1999/xlink" );
+        rootElement.setAttribute("xmlns:dc","http://purl.org/dc/elements/1.1/" );
+        rootElement.setAttribute("xmlns:meta","http://openoffice.org/2000/meta" );
+        rootElement.setAttribute("xmlns:number","http://openoffice.org/2000/datastyle" );
+        rootElement.setAttribute("xmlns:svg","http://www.w3.org/2000/svg" );
+        rootElement.setAttribute("xmlns:chart","http://openoffice.org/2000/chart" );
+        rootElement.setAttribute("xmlns:dr3d","http://openoffice.org/2000/dr3d" );
+        rootElement.setAttribute("xmlns:math","http://www.w3.org/1998/Math/MathML" );
+        rootElement.setAttribute("xmlns:form","http://openoffice.org/2000/form" );
+        rootElement.setAttribute("xmlns:script","http://openoffice.org/2000/script" );
+        rootElement.setAttribute("xmlns:config","http://openoffice.org/2001/config" );
+        // #i41033# OASIS format needs the "office:class" set.
+        if(getDocumentMimeType() == SXC_MIME_TYPE)
+            rootElement.setAttribute("office:class","spreadsheet" );
+        else if(getDocumentMimeType() == SXW_MIME_TYPE)
+            rootElement.setAttribute("office:class","text" );
+        rootElement.setAttribute("office:version","1.0");
+
+
+        NodeList nodeList;
+        Node tmpNode;
+        Node rootNode = (Node)rootElement;
+        if (metaDoc !=null){
+            nodeList= metaDoc.getElementsByTagName(TAG_OFFICE_META);
+            if (nodeList.getLength()>0){
+            tmpNode = newDoc.importNode(nodeList.item(0),true);
+            rootNode.appendChild(tmpNode);
+            }
+        }if (styleDoc !=null){
+            nodeList= styleDoc.getElementsByTagName(TAG_OFFICE_STYLES);
+            if (nodeList.getLength()>0){
+            tmpNode = newDoc.importNode(nodeList.item(0),true);
+            rootNode.appendChild(tmpNode);
+                }
+
+        }if (settingsDoc !=null){
+            nodeList= settingsDoc.getElementsByTagName(TAG_OFFICE_SETTINGS);
+            if (nodeList.getLength()>0){
+            tmpNode = newDoc.importNode(nodeList.item(0),true);
+            rootNode.appendChild(tmpNode);
+            }
+        }
+        if (contentDoc !=null){
+            nodeList= contentDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES);
+            if (nodeList.getLength()>0){
+            tmpNode = newDoc.importNode(nodeList.item(0),true);
+                 rootNode.appendChild(tmpNode);
+            }
+
+            nodeList= contentDoc.getElementsByTagName(TAG_OFFICE_BODY);
+            if (nodeList.getLength()>0){
+            tmpNode = newDoc.importNode(nodeList.item(0),true);
+            rootNode.appendChild(tmpNode);
+            }
+        }
+
+        byte contentBytes[] = docToBytes(newDoc);
+        //System.out.println(new String(contentBytes));
+        os.write(contentBytes);
+            }
+            catch(Exception exc){
+        System.out.println("\nException in OfficeDocument.write():" +exc);
+            }
+        //byte contentBytes[] = docToBytes(contentDoc);
+    }
+    }
+
+
+    /**
+     *  <p>Write out a <code>org.w3c.dom.Document</code> object into a
+     *  <code>byte</code> array.</p>
+     *
+     *  <p>TODO: remove dependency on com.sun.xml.tree.XmlDocument
+     *  package!</p>
+     *
+     *  @param  Document  DOM <code>Document</code> object.
+     *
+     *  @return  <code>byte</code> array of DOM <code>Document</code>
+     *           object.
+     *
+     *  @throws  IOException  If any I/O error occurs.
+     */
+    static byte[] docToBytes(Document doc)
+        throws IOException {
+
+        ByteArrayOutputStream baos = new ByteArrayOutputStream();
+
+        java.lang.reflect.Constructor con;
+        java.lang.reflect.Method meth;
+
+        String domImpl = doc.getClass().getName();
+
+        /*
+         * We may have multiple XML parsers in the Classpath.
+         * Depending on which one is first, the actual type of
+         * doc may vary.  Need a way to find out which API is being
+         * used and use an appropriate serialization method.
+         */
+
+        try {
+            // First of all try for JAXP 1.0
+            if (domImpl.equals("com.sun.xml.tree.XmlDocument")) {
+
+                Debug.log(Debug.INFO, "Using JAXP");
+
+                Class jaxpDoc = Class.forName("com.sun.xml.tree.XmlDocument");
+
+                // The method is in the XMLDocument class itself, not a helper
+                meth = jaxpDoc.getMethod("write",
+                            new Class[] { Class.forName("java.io.OutputStream") } );
+
+                meth.invoke(doc, new Object [] { baos } );
+            }
+         else if (domImpl.equals("org.apache.crimson.tree.XmlDocument"))
+        {
+                Debug.log(Debug.INFO, "Using Crimson");
+
+         Class crimsonDoc = Class.forName("org.apache.crimson.tree.XmlDocument");
+         // The method is in the XMLDocument class itself, not a helper
+                meth = crimsonDoc.getMethod("write",
+                            new Class[] { Class.forName("java.io.OutputStream") } );
+
+                meth.invoke(doc, new Object [] { baos } );
+        }
+            else if (domImpl.equals("org.apache.xerces.dom.DocumentImpl")
+            || domImpl.equals("org.apache.xerces.dom.DeferredDocumentImpl")) {
+
+                Debug.log(Debug.INFO, "Using Xerces");
+
+                // Try for Xerces
+                Class xercesSer =
+                        Class.forName("org.apache.xml.serialize.XMLSerializer");
+
+                // Get the OutputStream constructor
+                // May want to use the OutputFormat parameter at some stage too
+                con = xercesSer.getConstructor(new Class []
+                        { Class.forName("java.io.OutputStream"),
+                          Class.forName("org.apache.xml.serialize.OutputFormat") } );
+
+
+                // Get the serialize method
+                meth = xercesSer.getMethod("serialize",
+                            new Class [] { Class.forName("org.w3c.dom.Document") } );
+
+
+                // Get an instance
+                Object serializer = con.newInstance(new Object [] { baos, null } );
+
+
+                // Now call serialize to write the document
+                meth.invoke(serializer, new Object [] { doc } );
+            }
+            else if (domImpl.equals("gnu.xml.dom.DomDocument")) {
+                Debug.log(Debug.INFO, "Using GNU");
+
+                Class gnuSer = Class.forName("gnu.xml.dom.ls.DomLSSerializer");
+
+                // Get the serialize method
+                meth = gnuSer.getMethod("serialize",
+                            new Class [] { Class.forName("org.w3c.dom.Node"),
+                            Class.forName("java.io.OutputStream") } );
+
+                // Get an instance
+                Object serializer = gnuSer.newInstance();
+
+                // Now call serialize to write the document
+                meth.invoke(serializer, new Object [] { doc, baos } );
+            }
+            else {
+        try {
+            DOMSource domSource = new DOMSource(doc);
+            StringWriter writer = new StringWriter();
+            StreamResult result = new StreamResult(writer);
+            TransformerFactory tf = TransformerFactory.newInstance();
+            Transformer transformer = tf.newTransformer();
+            transformer.transform(domSource, result);
+            return writer.toString().getBytes();
+            }
+                catch (Exception e) {
+                    // We don't have another parser
+                    throw new IOException("No appropriate API (JAXP/Xerces) to serialize XML document: " + domImpl);
+                }
+            }
+        }
+        catch (ClassNotFoundException cnfe) {
+            throw new IOException(cnfe.toString());
+        }
+        catch (Exception e) {
+            // We may get some other errors, but the bottom line is that
+            // the steps being executed no longer work
+            throw new IOException(e.toString());
+        }
+
+        byte bytes[] = baos.toByteArray();
+
+        return bytes;
+    }
+
+
+    /**
+     *  Initializes a new DOM <code>Document</code> with the content
+     *  containing minimum OpenOffice XML tags.
+     *
+     *  @throws  IOException  If any I/O error occurs.
+     */
+    public final void initContentDOM() throws IOException {
+
+        contentDoc = createDOM(TAG_OFFICE_DOCUMENT_CONTENT);
+
+        // this is a work-around for a bug in Office6.0 - not really
+        // needed but StarCalc 6.0 will crash without this tag.
+        Element root = contentDoc.getDocumentElement();
+
+        Element child = contentDoc.createElement(TAG_OFFICE_FONT_DECLS);
+        root.appendChild(child);
+
+        child = contentDoc.createElement(TAG_OFFICE_AUTOMATIC_STYLES);
+        root.appendChild(child);
+
+        child = contentDoc.createElement(TAG_OFFICE_BODY);
+        root.appendChild(child);
+    }
+
+    /**
+     *  Initializes a new DOM <code>Document</code> with the content
+     *  containing minimum OpenOffice XML tags.
+     *
+     *  @throws  IOException  If any I/O error occurs.
+     */
+    public final void initSettingsDOM() throws IOException {
+
+        settingsDoc = createSettingsDOM(TAG_OFFICE_DOCUMENT_SETTINGS);
+
+        // this is a work-around for a bug in Office6.0 - not really
+        // needed but StarCalc 6.0 will crash without this tag.
+        Element root = settingsDoc.getDocumentElement();
+
+        Element child = settingsDoc.createElement(TAG_OFFICE_SETTINGS);
+        root.appendChild(child);
+    }
+
+    /**
+     *  Initializes a new DOM Document with styles
+     *  containing minimum OpenOffice XML tags.
+     *
+     *  @throws  IOException  If any I/O error occurs.
+     */
+    public final void initStyleDOM() throws IOException {
+
+        styleDoc = createDOM(TAG_OFFICE_DOCUMENT_STYLES);
+    }
+
+    /**
+     *  <p>Creates a new DOM <code>Document</code> containing minimum
+     *  OpenOffice XML tags.</p>
+     *
+     *  <p>This method uses the subclass
+     *  <code>getOfficeClassAttribute</code> method to get the
+     *  attribute for <i>office:class</i>.</p>
+     *
+     *  @param  rootName  root name of <code>Document</code>.
+     *
+     *  @throws  IOException  If any I/O error occurs.
+     */
+    private final Document createSettingsDOM(String rootName) throws IOException {
+
+        Document doc = null;
+
+        try {
+
+            DocumentBuilder builder = factory.newDocumentBuilder();
+            doc = builder.newDocument();
+
+        } catch (ParserConfigurationException ex) {
+
+            throw new OfficeDocumentException(ex);
+
+        }
+
+        Element root = (Element) doc.createElement(rootName);
+        doc.appendChild(root);
+
+        root.setAttribute("xmlns:office", "http://openoffice.org/2000/office");
+        root.setAttribute("xmlns:xlink", "http://openoffice.org/1999/xlink");
+        root.setAttribute("xmlns:config", "http://openoffice.org/2001/config");
+        root.setAttribute("office:version", "1.0");
+
+        return doc;
+    }
+
+
+    /**
+     *  <p>Creates a new DOM <code>Document</code> containing minimum
+     *  OpenOffice XML tags.</p>
+     *
+     *  <p>This method uses the subclass
+     *  <code>getOfficeClassAttribute</code> method to get the
+     *  attribute for <i>office:class</i>.</p>
+     *
+     *  @param  rootName  root name of <code>Document</code>.
+     *
+     *  @throws  IOException  If any I/O error occurs.
+     */
+    private final Document createDOM(String rootName) throws IOException {
+
+        Document doc = null;
+
+        try {
+
+            DocumentBuilder builder = factory.newDocumentBuilder();
+            doc = builder.newDocument();
+
+        } catch (ParserConfigurationException ex) {
+
+            throw new OfficeDocumentException(ex);
+
+        }
+
+        Element root = (Element) doc.createElement(rootName);
+        doc.appendChild(root);
+
+        root.setAttribute("xmlns:office", "http://openoffice.org/2000/office");
+        root.setAttribute("xmlns:style", "http://openoffice.org/2000/style");
+        root.setAttribute("xmlns:text", "http://openoffice.org/2000/text");
+        root.setAttribute("xmlns:table", "http://openoffice.org/2000/table");
+        root.setAttribute("xmlns:draw", "http://openoffice.org/2000/drawing");
+        root.setAttribute("xmlns:fo", "http://www.w3.org/1999/XSL/Format");
+        root.setAttribute("xmlns:xlink", "http://www.w3.org/1999/xlink");
+        root.setAttribute("xmlns:number", "http://openoffice.org/2000/datastyle");
+        root.setAttribute("xmlns:svg", "http://www.w3.org/2000/svg");
+        root.setAttribute("xmlns:chart", "http://openoffice.org/2000/chart");
+        root.setAttribute("xmlns:dr3d", "http://openoffice.org/2000/dr3d");
+        root.setAttribute("xmlns:math", "http://www.w3.org/1998/Math/MathML");
+        root.setAttribute("xmlns:form", "http://openoffice.org/2000/form");
+        root.setAttribute("xmlns:script", "http://openoffice.org/2000/script");
+        root.setAttribute("office:class", getOfficeClassAttribute());
+        root.setAttribute("office:version", "1.0");
+
+        return doc;
+    }
+
+
+    /**
+     *  Return the <i>office:class</i> attribute value.
+     *
+     *  @return  The attribute value.
+     */
+    protected abstract String getOfficeClassAttribute();
+
+
+    /**
+     *  <p>Hacked code to filter <!DOCTYPE> tag before
+     *  sending stream to parser.</p>
+     *
+     *  <p>This hacked code needs to be changed later on.</p>
+     *
+     *  <p>Issue: using current jaxp1.0 parser, there is no way
+     *  to turn off processing of dtds.  Current set of dtds
+     *  have bugs, processing them will throw exceptions.</p>
+     *
+     *  <p>This is a simple hack that assumes the whole <!DOCTYPE>
+     *  tag are all in the same line.  This is sufficient for
+     *  current StarOffice 6.0 generated XML files.  Since this
+     *  hack really needs to go away, I don't want to spend
+     *  too much time in making it a perfect hack.</p>
+     *  FIX (HJ): Removed requirement for DOCTYPE to be in one line
+     *  FIX (HJ): No longer removes newlines
+     *
+     *  @param  is  <code>InputStream</code> to be filtered.
+     *
+     *  @return  Reader value without the <!DOCTYPE> tag.
+     *
+     *  @throws  IOException  If any I/O error occurs.
+     */
+    private static Reader hack(InputStream is) throws IOException {
+
+        BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"));
+        StringBuffer buffer = new StringBuffer();
+
+        String str = null;
+
+        while ((str = br.readLine()) != null) {
+
+            int sIndex = str.indexOf("<!DOCTYPE");
+
+            if (sIndex > -1) {
+
+                buffer.append(str.substring(0, sIndex));
+
+                int eIndex = str.indexOf('>', sIndex + 8 );
+
+                if (eIndex > -1) {
+
+                    buffer.append(str.substring(eIndex + 1, str.length()));
+                    // FIX (HJ): Preserve the newline
+                    buffer.append("\n");
+
+                } else {
+
+                    // FIX (HJ): More than one line. Search for '>' in following lines
+                    boolean bOK = false;
+                    while ((str = br.readLine())!=null) {
+                        eIndex = str.indexOf('>');
+                        if (eIndex>-1) {
+                            buffer.append(str.substring(eIndex+1));
+                            // FIX (HJ): Preserve the newline
+                            buffer.append("\n");
+                            bOK = true;
+                            break;
+                        }
+                    }
+
+                    if (!bOK) { throw new IOException("Invalid XML"); }
+                }
+
+            } else {
+
+                buffer.append(str);
+                // FIX (HJ): Preserve the newline
+                buffer.append("\n");
+            }
+        }
+
+        StringReader r = new StringReader(buffer.toString());
+        return r;
+    }
+
+    /**
+     *  <p>Transform the InputStream to a Reader Stream.</p>
+     *
+     *  <p>This hacked code needs to be changed later on.</p>
+     *
+     *  <p>Issue: the new oasis input file stream means
+     *  that the old input stream fails. see #i33702# </p>
+     *
+     *  @param  is  <code>InputStream</code> to be filtered.
+     *
+     *  @return  Reader value of the InputStream().
+     *
+     *  @throws  IOException  If any I/O error occurs.
+     */
+    private static Reader secondHack(InputStream is) throws IOException {
+
+        BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"));
+        char[] charArray = new char[4096];
+        StringBuffer sBuf = new StringBuffer();
+        int n = 0;
+        while ((n=br.read(charArray, 0, charArray.length)) > 0)
+            sBuf.append(charArray, 0, n);
+
+        // ensure there is no trailing garbage after the end of the stream.
+        int sIndex = sBuf.lastIndexOf("</office:document>");
+        sBuf.delete(sIndex, sBuf.length());
+        sBuf.append("</office:document>");
+        StringReader r = new StringReader(sBuf.toString());
+        return r;
+    }
+
+
+    /**
+     * Method to create the initial entries in the manifest.xml file stored
+     * in an SX? file.
+     */
+    private void initManifestDOM() throws IOException {
+
+        try {
+            DocumentBuilder builder = factory.newDocumentBuilder();
+            DOMImplementation domImpl = builder.getDOMImplementation();
+
+            DocumentType docType = domImpl.createDocumentType(TAG_MANIFEST_ROOT,
+                                        "-//OpenOffice.org//DTD Manifest 1.0//EN",
+                                        "Manifest.dtd");
+        manifestDoc = domImpl.createDocument("manifest", TAG_MANIFEST_ROOT, docType);
+        } catch (ParserConfigurationException ex) {
+            throw new OfficeDocumentException(ex);
+        }
+
+        // Add the <manifest:manifest> entry
+        Element manifestRoot = manifestDoc.getDocumentElement();
+
+        manifestRoot.setAttribute("xmlns:manifest", "http://openoffice.org/2001/manifest");
+
+        Element docRoot = manifestDoc.createElement(TAG_MANIFEST_FILE);
+
+        docRoot.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "/");
+        docRoot.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, getDocumentMimeType());
+
+        manifestRoot.appendChild(docRoot);
+    }
+}
+