summaryrefslogtreecommitdiff
path: root/xmerge/source/xmerge/java/org/openoffice/xmerge/converter/xml/OfficeDocument.java
diff options
context:
space:
mode:
Diffstat (limited to 'xmerge/source/xmerge/java/org/openoffice/xmerge/converter/xml/OfficeDocument.java')
-rw-r--r--xmerge/source/xmerge/java/org/openoffice/xmerge/converter/xml/OfficeDocument.java1265
1 files changed, 1265 insertions, 0 deletions
diff --git a/xmerge/source/xmerge/java/org/openoffice/xmerge/converter/xml/OfficeDocument.java b/xmerge/source/xmerge/java/org/openoffice/xmerge/converter/xml/OfficeDocument.java
new file mode 100644
index 000000000000..d3372be5a757
--- /dev/null
+++ b/xmerge/source/xmerge/java/org/openoffice/xmerge/converter/xml/OfficeDocument.java
@@ -0,0 +1,1265 @@
+/*************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2000, 2010 Oracle and/or its affiliates.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+package org.openoffice.xmerge.converter.xml;
+
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.Reader;
+import java.io.BufferedReader;
+import java.io.StringReader;
+import java.io.StringWriter;
+import java.io.InputStreamReader;
+import java.io.ByteArrayOutputStream;
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.HashMap;
+
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.ParserConfigurationException;
+
+import org.w3c.dom.Node;
+import org.w3c.dom.Element;
+import org.w3c.dom.Document;
+import org.w3c.dom.DOMImplementation;
+import org.w3c.dom.DocumentType;
+import org.w3c.dom.NodeList;
+import org.xml.sax.InputSource;
+import org.w3c.dom.NamedNodeMap;
+import org.xml.sax.SAXException;
+
+import javax.xml.transform.*;
+import javax.xml.transform.dom.*;
+import javax.xml.transform.stream.*;
+
+import org.openoffice.xmerge.util.Resources;
+import org.openoffice.xmerge.util.Debug;
+
+/**
+ * An implementation of <code>Document</code> for
+ * StarOffice documents.
+ */
+public abstract class OfficeDocument
+ implements org.openoffice.xmerge.Document,
+ OfficeConstants {
+
+ /** Factory for <code>DocumentBuilder</code> objects. */
+ private static DocumentBuilderFactory factory =
+ DocumentBuilderFactory.newInstance();
+
+ /** DOM <code>Document</code> of content.xml. */
+ private Document contentDoc = null;
+
+ /** DOM <code>Document</code> of meta.xml. */
+ private Document metaDoc = null;
+
+ /** DOM <code>Document</code> of settings.xml. */
+ private Document settingsDoc = null;
+
+ /** DOM <code>Document</code> of content.xml. */
+ private Document styleDoc = null;
+
+ /** DOM <code>Docuemtn</code> of META-INF/manifest.xml. */
+ private Document manifestDoc = null;
+
+ private String documentName = null;
+ private String fileName = null;
+
+ /** Resources object. */
+ private Resources res = null;
+
+ /**
+ * <code>OfficeZip</code> object to store zip contents from
+ * read <code>InputStream</code>. Note that this member
+ * will still be null if it was initialized using a template
+ * file instead of reading from a StarOffice zipped
+ * XML file.
+ */
+ private OfficeZip zip = null;
+
+ /** Collection to keep track of the embedded objects in the document. */
+ private Map embeddedObjects = null;
+
+ /**
+ * Default constructor.
+ *
+ * @param name <code>Document</code> name.
+ */
+ public OfficeDocument(String name)
+ {
+ this(name, true, false);
+ }
+
+
+ /**
+ * Constructor with arguments to set <code>namespaceAware</code>
+ * and <code>validating</code> flags.
+ *
+ * @param name <code>Document</code> name (may or may not
+ * contain extension).
+ * @param namespaceAware Value for <code>namespaceAware</code> flag.
+ * @param validating Value for <code>validating</code> flag.
+ */
+ public OfficeDocument(String name, boolean namespaceAware, boolean validating) {
+
+ res = Resources.getInstance();
+ factory.setValidating(validating);
+ factory.setNamespaceAware(namespaceAware);
+ this.documentName = trimDocumentName(name);
+ this.fileName = documentName + getFileExtension();
+ }
+
+
+ /**
+ * Removes the file extension from the <code>Document</code>
+ * name.
+ *
+ * @param name Full <code>Document</code> name with extension.
+ *
+ * @return Name of <code>Document</code> without the extension.
+ */
+ private String trimDocumentName(String name) {
+ String temp = name.toLowerCase();
+ String ext = getFileExtension();
+
+ if (temp.endsWith(ext)) {
+ // strip the extension
+ int nlen = name.length();
+ int endIndex = nlen - ext.length();
+ name = name.substring(0,endIndex);
+ }
+
+ return name;
+ }
+
+
+ /**
+ * Return a DOM <code>Document</code> object of the content.xml
+ * file. Note that a content DOM is not created when the constructor
+ * is called. So, either the <code>read</code> method or the
+ * <code>initContentDOM</code> method will need to be called ahead
+ * on this object before calling this method.
+ *
+ * @return DOM <code>Document</code> object.
+ */
+ public Document getContentDOM() {
+
+ return contentDoc;
+ }
+
+ /**
+ * Return a DOM <code>Document</code> object of the meta.xml
+ * file. Note that a content DOM is not created when the constructor
+ * is called. So, either the <code>read</code> method or the
+ * <code>initContentDOM</code> method will need to be called ahead
+ * on this object before calling this method.
+ *
+ * @return DOM <code>Document</code> object.
+ */
+ public Document getMetaDOM() {
+
+ return metaDoc;
+ }
+
+
+ /**
+ * Return a DOM <code>Document</code> object of the settings.xml
+ * file. Note that a content DOM is not created when the constructor
+ * is called. So, either the <code>read</code> method or the
+ * <code>initContentDOM</code> method will need to be called ahead
+ * on this object before calling this method.
+ *
+ * @return DOM <code>Document</code> object.
+ */
+ public Document getSettingsDOM() {
+
+ return settingsDoc;
+ }
+
+
+ /**
+ * Sets the content tree of the document.
+ *
+ * @param newDom <code>Node</code> containing the new content tree.
+ */
+ public void setContentDOM( Node newDom) {
+ contentDoc = (Document)newDom;
+ }
+
+
+ /**
+ * Sets the meta tree of the document.
+ *
+ * @param newDom <code>Node</code> containing the new meta tree.
+ */
+ public void setMetaDOM (Node newDom) {
+ metaDoc = (Document)newDom;
+ }
+
+
+ /**
+ * Sets the settings tree of the document.
+ *
+ * @param newDom <code>Node</code> containing the new settings tree.
+ */
+ public void setSettingsDOM (Node newDom) {
+ settingsDoc = (Document)newDom;
+ }
+
+
+ /**
+ * Sets the style tree of the document.
+ *
+ * @param newDom <code>Node</code> containing the new style tree.
+ */
+ public void setStyleDOM (Node newDom) {
+ styleDoc = (Document)newDom;
+ }
+
+
+ /**
+ * Return a DOM <code>Document</code> object of the style.xml file.
+ * Note that this may return null if there is no style DOM.
+ * Note that a style DOM is not created when the constructor
+ * is called. Depending on the <code>InputStream</code>, a
+ * <code>read</code> method may or may not build a style DOM. When
+ * creating a new style DOM, call the <code>initStyleDOM</code> method
+ * first.
+ *
+ * @return DOM <code>Document</code> object.
+ */
+ public Document getStyleDOM() {
+
+ return styleDoc;
+ }
+
+
+ /**
+ * Return the name of the <code>Document</code>.
+ *
+ * @return The name of <code>Document</code>.
+ */
+ public String getName() {
+
+ return documentName;
+ }
+
+
+ /**
+ * Return the file name of the <code>Document</code>, possibly
+ * with the standard extension.
+ *
+ * @return The file name of <code>Document</code>.
+ */
+ public String getFileName() {
+
+ return fileName;
+ }
+
+
+ /**
+ * Returns the file extension for this type of
+ * <code>Document</code>.
+ *
+ * @return The file extension of <code>Document</code>.
+ */
+ protected abstract String getFileExtension();
+
+
+ /**
+ * Returns all the embedded objects (graphics, formulae, etc.) present in
+ * this document.
+ *
+ * @return An <code>Iterator</code> of <code>EmbeddedObject</code> objects.
+ */
+ public Iterator getEmbeddedObjects() {
+
+ if (embeddedObjects == null && manifestDoc != null) {
+ embeddedObjects = new HashMap();
+
+ // Need to read the manifest file and construct a list of objects
+ NodeList nl = manifestDoc.getElementsByTagName(TAG_MANIFEST_FILE);
+
+ // Dont create the HashMap if there are no embedded objects
+ int len = nl.getLength();
+ for (int i = 0; i < len; i++) {
+ Node n = nl.item(i);
+
+ NamedNodeMap attrs = n.getAttributes();
+
+ String type = attrs.getNamedItem(ATTRIBUTE_MANIFEST_FILE_TYPE).getNodeValue();
+ String path = attrs.getNamedItem(ATTRIBUTE_MANIFEST_FILE_PATH).getNodeValue();
+
+
+ /*
+ * According to OpenOffice.org XML File Format document (ver. 1)
+ * there are only two types of embedded object:
+ *
+ * Objects with an XML representation.
+ * Objects without an XML representation.
+ *
+ * The former are represented by one or more XML files.
+ * The latter are in binary form.
+ */
+ if (type.startsWith("application/vnd.sun.xml"))
+ {
+ if (path.equals("/")) {
+ // Exclude the main document entries
+ continue;
+ }
+ // Take off the trailing '/'
+ String name = path.substring(0, path.length() - 1);
+ embeddedObjects.put(name, new EmbeddedXMLObject(name, type, zip));
+ }
+ else if (type.equals("text/xml")) {
+ // XML entries are either embedded StarOffice doc entries or main
+ // document entries
+ continue;
+ }
+ else { // FIX (HJ): allows empty MIME type
+ embeddedObjects.put(path, new EmbeddedBinaryObject(path, type, zip));
+ }
+ }
+ }
+
+ return embeddedObjects.values().iterator();
+ }
+
+ /**
+ * Returns the embedded object corresponding to the name provided.
+ * The name should be stripped of any preceding path characters, such as
+ * '/', '.' or '#'.
+ *
+ * @param name The name of the embedded object to retrieve.
+ *
+ * @return An <code>EmbeddedObject</code> instance representing the named
+ * object.
+ */
+ public EmbeddedObject getEmbeddedObject(String name) {
+ if (name == null) {
+ return null;
+ }
+
+ if (embeddedObjects == null) {
+ getEmbeddedObjects();
+ }
+
+ if (embeddedObjects.containsKey(name)) {
+ return (EmbeddedObject)embeddedObjects.get(name);
+ }
+ else {
+ return null;
+ }
+ }
+
+
+ /**
+ * Adds a new embedded object to the document.
+ *
+ * @param embObj An instance of <code>EmbeddedObject</code>.
+ */
+ public void addEmbeddedObject(EmbeddedObject embObj) {
+ if (embObj == null) {
+ return;
+ }
+
+ if (embeddedObjects == null) {
+ embeddedObjects = new HashMap();
+ }
+
+ embeddedObjects.put(embObj.getName(), embObj);
+ }
+
+
+ /**
+ * Read the Office <code>Document</code> from the given
+ * <code>InputStream</code>.
+ *
+ * @param is Office document <code>InputStream</code>.
+ *
+ * @throws IOException If any I/O error occurs.
+ */
+ public void read(InputStream is) throws IOException {
+
+ Debug.log(Debug.INFO, "reading Office file");
+
+ DocumentBuilder builder = null;
+
+ try {
+ builder = factory.newDocumentBuilder();
+ } catch (ParserConfigurationException ex) {
+ throw new OfficeDocumentException(ex);
+ }
+
+ // read in Office zip file format
+
+ zip = new OfficeZip();
+ zip.read(is);
+
+ // grab the content.xml and
+ // parse it into contentDoc.
+
+ byte contentBytes[] = zip.getContentXMLBytes();
+
+ if (contentBytes == null) {
+
+ throw new OfficeDocumentException("Entry content.xml not found in file");
+ }
+
+ try {
+
+ contentDoc = parse(builder, contentBytes);
+
+ } catch (SAXException ex) {
+
+ throw new OfficeDocumentException(ex);
+ }
+
+ // if style.xml exists, grab the style.xml
+ // parse it into styleDoc.
+
+ byte styleBytes[] = zip.getStyleXMLBytes();
+
+ if (styleBytes != null) {
+
+ try {
+
+ styleDoc = parse(builder, styleBytes);
+
+ } catch (SAXException ex) {
+
+ throw new OfficeDocumentException(ex);
+ }
+ }
+
+ byte metaBytes[] = zip.getMetaXMLBytes();
+
+ if (metaBytes != null) {
+
+ try {
+
+ metaDoc = parse(builder, metaBytes);
+
+ } catch (SAXException ex) {
+
+ throw new OfficeDocumentException(ex);
+ }
+ }
+
+ byte settingsBytes[] = zip.getSettingsXMLBytes();
+
+ if (settingsBytes != null) {
+
+ try {
+
+ settingsDoc = parse(builder, settingsBytes);
+
+ } catch (SAXException ex) {
+
+ throw new OfficeDocumentException(ex);
+ }
+ }
+
+
+ // Read in the META-INF/manifest.xml file
+ byte manifestBytes[] = zip.getManifestXMLBytes();
+
+ if (manifestBytes != null) {
+
+ try {
+ manifestDoc = parse(builder, manifestBytes);
+ } catch (SAXException ex) {
+ throw new OfficeDocumentException(ex);
+ }
+ }
+
+ }
+
+
+ /**
+ * Read the Office <code>Document</code> from the given
+ * <code>InputStream</code>.
+ *
+ * @param is Office document <code>InputStream</code>.
+ * @param isZip <code>boolean</code> Identifies whether
+ * a file is zipped or not
+ *
+ * @throws IOException If any I/O error occurs.
+ */
+ public void read(InputStream is, boolean isZip) throws IOException {
+
+ Debug.log(Debug.INFO, "reading Office file");
+
+ DocumentBuilder builder = null;
+
+ try {
+ builder = factory.newDocumentBuilder();
+ } catch (ParserConfigurationException ex) {
+ throw new OfficeDocumentException(ex);
+ }
+
+ if (isZip)
+ {
+ read(is);
+ }
+ else{
+ try{
+ //System.out.println("\nParsing Input stream, validating?: "+builder.isValidating());
+ //contentDoc= builder.parse((InputStream)is);
+
+ Reader r = secondHack(is);
+ InputSource ins = new InputSource(r);
+ org.w3c.dom.Document newDoc = builder.parse(ins);
+ //org.w3c.dom.Document newDoc = builder.parse((InputStream)is);
+ Element rootElement=newDoc.getDocumentElement();
+
+ NodeList nodeList;
+ Node tmpNode;
+ Node rootNode = (Node)rootElement;
+ if (newDoc !=null){
+ /*content*/
+ contentDoc = createDOM(TAG_OFFICE_DOCUMENT_CONTENT);
+ rootElement=contentDoc.getDocumentElement();
+ rootNode = (Node)rootElement;
+
+ // FIX (HJ): Include office:font-decls in content DOM
+ nodeList= newDoc.getElementsByTagName(TAG_OFFICE_FONT_DECLS);
+ if (nodeList.getLength()>0){
+ tmpNode = contentDoc.importNode(nodeList.item(0),true);
+ rootNode.appendChild(tmpNode);
+ }
+
+ nodeList= newDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES);
+ if (nodeList.getLength()>0){
+ tmpNode = contentDoc.importNode(nodeList.item(0),true);
+ rootNode.appendChild(tmpNode);
+ }
+
+ nodeList= newDoc.getElementsByTagName(TAG_OFFICE_BODY);
+ if (nodeList.getLength()>0){
+ tmpNode = contentDoc.importNode(nodeList.item(0),true);
+ rootNode.appendChild(tmpNode);
+ }
+
+ /*Styles*/
+ styleDoc = createDOM(TAG_OFFICE_DOCUMENT_STYLES);
+ rootElement=styleDoc.getDocumentElement();
+ rootNode = (Node)rootElement;
+
+ // FIX (HJ): Include office:font-decls in styles DOM
+ nodeList= newDoc.getElementsByTagName(TAG_OFFICE_FONT_DECLS);
+ if (nodeList.getLength()>0){
+ tmpNode = styleDoc.importNode(nodeList.item(0),true);
+ rootNode.appendChild(tmpNode);
+ }
+
+ nodeList= newDoc.getElementsByTagName(TAG_OFFICE_STYLES);
+ if (nodeList.getLength()>0){
+ tmpNode = styleDoc.importNode(nodeList.item(0),true);
+ rootNode.appendChild(tmpNode);
+ }
+
+ // FIX (HJ): Include office:automatic-styles in styles DOM
+ nodeList= newDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES);
+ if (nodeList.getLength()>0){
+ tmpNode = styleDoc.importNode(nodeList.item(0),true);
+ rootNode.appendChild(tmpNode);
+ }
+
+ // FIX (HJ): Include office:master-styles in styles DOM
+ nodeList= newDoc.getElementsByTagName(TAG_OFFICE_MASTER_STYLES);
+ if (nodeList.getLength()>0){
+ tmpNode = styleDoc.importNode(nodeList.item(0),true);
+ rootNode.appendChild(tmpNode);
+ }
+
+ /*Settings*/
+ settingsDoc = createDOM(TAG_OFFICE_DOCUMENT_SETTINGS);
+ rootElement=settingsDoc.getDocumentElement();
+ rootNode = (Node)rootElement;
+ nodeList= newDoc.getElementsByTagName(TAG_OFFICE_SETTINGS);
+ if (nodeList.getLength()>0){
+ tmpNode = settingsDoc.importNode(nodeList.item(0),true);
+ rootNode.appendChild(tmpNode);
+ }
+ /*Meta*/
+ metaDoc = createDOM(TAG_OFFICE_DOCUMENT_META);
+ rootElement=metaDoc.getDocumentElement();
+ rootNode = (Node)rootElement;
+ nodeList= newDoc.getElementsByTagName(TAG_OFFICE_META);
+ if (nodeList.getLength()>0){
+ tmpNode = metaDoc.importNode(nodeList.item(0),true);
+ rootNode.appendChild(tmpNode);
+ }
+ }
+ }
+ catch (SAXException ex) {
+ throw new OfficeDocumentException(ex);
+ }
+ }
+
+ }
+
+
+
+ /**
+ * Parse given <code>byte</code> array into a DOM
+ * <code>Document</code> object using the
+ * <code>DocumentBuilder</code> object.
+ *
+ * @param builder <code>DocumentBuilder</code> object for parsing.
+ * @param bytes <code>byte</code> array for parsing.
+ *
+ * @return Resulting DOM <code>Document</code> object.
+ *
+ * @throws SAXException If any parsing error occurs.
+ */
+ static Document parse(DocumentBuilder builder, byte bytes[])
+ throws SAXException, IOException {
+
+ Document doc = null;
+
+ ByteArrayInputStream is = new ByteArrayInputStream(bytes);
+
+ // TODO: replace hack with a more appropriate fix.
+
+ Reader r = hack(is);
+ InputSource ins = new InputSource(r);
+ doc = builder.parse(ins);
+
+ return doc;
+ }
+
+
+ /**
+ * Method to return the MIME type of the document.
+ *
+ * @return String The document's MIME type.
+ */
+ protected abstract String getDocumentMimeType();
+
+
+ /**
+ * Write out Office ZIP file format.
+ *
+ * @param os XML <code>OutputStream</code>.
+ *
+ * @throws IOException If any I/O error occurs.
+ */
+ public void write(OutputStream os) throws IOException {
+ if (zip == null) {
+ zip = new OfficeZip();
+ }
+
+ initManifestDOM();
+
+ Element domEntry;
+ Element manifestRoot = manifestDoc.getDocumentElement();
+
+ // The EmbeddedObjects come first.
+ Iterator embObjs = getEmbeddedObjects();
+ while (embObjs.hasNext()) {
+ EmbeddedObject obj = (EmbeddedObject)embObjs.next();
+ obj.writeManifestData(manifestDoc);
+
+ obj.write(zip);
+ }
+
+ // Add in the entry for the Pictures directory. Always present.
+ domEntry = manifestDoc.createElement(TAG_MANIFEST_FILE);
+ domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "Pictures/");
+ domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, "");
+ manifestRoot.appendChild(domEntry);
+
+ // Write content to the Zip file and then write any of the optional
+ // data, if it exists.
+ zip.setContentXMLBytes(docToBytes(contentDoc));
+
+ domEntry = manifestDoc.createElement(TAG_MANIFEST_FILE);
+ domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "content.xml");
+ domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, "text/xml");
+
+ manifestRoot.appendChild(domEntry);
+
+ if (styleDoc != null) {
+ zip.setStyleXMLBytes(docToBytes(styleDoc));
+
+ domEntry = manifestDoc.createElement(TAG_MANIFEST_FILE);
+ domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "styles.xml");
+ domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, "text/xml");
+ manifestRoot.appendChild(domEntry);
+ }
+
+ if (metaDoc != null) {
+ zip.setMetaXMLBytes(docToBytes(metaDoc));
+
+ domEntry = manifestDoc.createElement(TAG_MANIFEST_FILE);
+ domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "meta.xml");
+ domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, "text/xml");
+ manifestRoot.appendChild(domEntry);
+ }
+
+ if (settingsDoc != null) {
+ zip.setSettingsXMLBytes(docToBytes(settingsDoc));
+
+ domEntry = manifestDoc.createElement(TAG_MANIFEST_FILE);
+ domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "settings.xml");
+ domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, "text/xml");
+ manifestRoot.appendChild(domEntry);
+ }
+
+ zip.setManifestXMLBytes(docToBytes(manifestDoc));
+
+ zip.write(os);
+ }
+
+
+ /**
+ * Write out Office ZIP file format.
+ *
+ * @param os XML <code>OutputStream</code>.
+ * @param isZip <code>boolean</code>
+ *
+ * @throws IOException If any I/O error occurs.
+ */
+ public void write(OutputStream os, boolean isZip) throws IOException {
+
+ // Create an OfficeZip object if one does not exist.
+ if (isZip){
+ write(os);
+ }
+ else{
+ try{
+ DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
+ DocumentBuilder builder= builderFactory.newDocumentBuilder();
+ DOMImplementation domImpl = builder.getDOMImplementation();
+ DocumentType docType =domImpl.createDocumentType("office:document","-//OpenOffice.org//DTD OfficeDocument 1.0//EN",null);
+ org.w3c.dom.Document newDoc = domImpl.createDocument("http://openoffice.org/2000/office","office:document",null);
+
+
+ Element rootElement=newDoc.getDocumentElement();
+ rootElement.setAttribute("xmlns:office","http://openoffice.org/2000/office");
+ rootElement.setAttribute("xmlns:style","http://openoffice.org/2000/style" );
+ rootElement.setAttribute("xmlns:text","http://openoffice.org/2000/text");
+ rootElement.setAttribute("xmlns:table","http://openoffice.org/2000/table");
+
+ rootElement.setAttribute("xmlns:draw","http://openoffice.org/2000/drawing");
+ rootElement.setAttribute("xmlns:fo","http://www.w3.org/1999/XSL/Format" );
+ rootElement.setAttribute("xmlns:xlink","http://www.w3.org/1999/xlink" );
+ rootElement.setAttribute("xmlns:dc","http://purl.org/dc/elements/1.1/" );
+ rootElement.setAttribute("xmlns:meta","http://openoffice.org/2000/meta" );
+ rootElement.setAttribute("xmlns:number","http://openoffice.org/2000/datastyle" );
+ rootElement.setAttribute("xmlns:svg","http://www.w3.org/2000/svg" );
+ rootElement.setAttribute("xmlns:chart","http://openoffice.org/2000/chart" );
+ rootElement.setAttribute("xmlns:dr3d","http://openoffice.org/2000/dr3d" );
+ rootElement.setAttribute("xmlns:math","http://www.w3.org/1998/Math/MathML" );
+ rootElement.setAttribute("xmlns:form","http://openoffice.org/2000/form" );
+ rootElement.setAttribute("xmlns:script","http://openoffice.org/2000/script" );
+ rootElement.setAttribute("xmlns:config","http://openoffice.org/2001/config" );
+ // #i41033# OASIS format needs the "office:class" set.
+ if(getDocumentMimeType() == SXC_MIME_TYPE)
+ rootElement.setAttribute("office:class","spreadsheet" );
+ else if(getDocumentMimeType() == SXW_MIME_TYPE)
+ rootElement.setAttribute("office:class","text" );
+ rootElement.setAttribute("office:version","1.0");
+
+
+ NodeList nodeList;
+ Node tmpNode;
+ Node rootNode = (Node)rootElement;
+ if (metaDoc !=null){
+ nodeList= metaDoc.getElementsByTagName(TAG_OFFICE_META);
+ if (nodeList.getLength()>0){
+ tmpNode = newDoc.importNode(nodeList.item(0),true);
+ rootNode.appendChild(tmpNode);
+ }
+ }if (styleDoc !=null){
+ nodeList= styleDoc.getElementsByTagName(TAG_OFFICE_STYLES);
+ if (nodeList.getLength()>0){
+ tmpNode = newDoc.importNode(nodeList.item(0),true);
+ rootNode.appendChild(tmpNode);
+ }
+
+ }if (settingsDoc !=null){
+ nodeList= settingsDoc.getElementsByTagName(TAG_OFFICE_SETTINGS);
+ if (nodeList.getLength()>0){
+ tmpNode = newDoc.importNode(nodeList.item(0),true);
+ rootNode.appendChild(tmpNode);
+ }
+ }
+ if (contentDoc !=null){
+ nodeList= contentDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES);
+ if (nodeList.getLength()>0){
+ tmpNode = newDoc.importNode(nodeList.item(0),true);
+ rootNode.appendChild(tmpNode);
+ }
+
+ nodeList= contentDoc.getElementsByTagName(TAG_OFFICE_BODY);
+ if (nodeList.getLength()>0){
+ tmpNode = newDoc.importNode(nodeList.item(0),true);
+ rootNode.appendChild(tmpNode);
+ }
+ }
+
+ byte contentBytes[] = docToBytes(newDoc);
+ //System.out.println(new String(contentBytes));
+ os.write(contentBytes);
+ }
+ catch(Exception exc){
+ System.out.println("\nException in OfficeDocument.write():" +exc);
+ }
+ //byte contentBytes[] = docToBytes(contentDoc);
+ }
+ }
+
+
+ /**
+ * <p>Write out a <code>org.w3c.dom.Document</code> object into a
+ * <code>byte</code> array.</p>
+ *
+ * <p>TODO: remove dependency on com.sun.xml.tree.XmlDocument
+ * package!</p>
+ *
+ * @param Document DOM <code>Document</code> object.
+ *
+ * @return <code>byte</code> array of DOM <code>Document</code>
+ * object.
+ *
+ * @throws IOException If any I/O error occurs.
+ */
+ static byte[] docToBytes(Document doc)
+ throws IOException {
+
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+
+ java.lang.reflect.Constructor con;
+ java.lang.reflect.Method meth;
+
+ String domImpl = doc.getClass().getName();
+
+ /*
+ * We may have multiple XML parsers in the Classpath.
+ * Depending on which one is first, the actual type of
+ * doc may vary. Need a way to find out which API is being
+ * used and use an appropriate serialization method.
+ */
+
+ try {
+ // First of all try for JAXP 1.0
+ if (domImpl.equals("com.sun.xml.tree.XmlDocument")) {
+
+ Debug.log(Debug.INFO, "Using JAXP");
+
+ Class jaxpDoc = Class.forName("com.sun.xml.tree.XmlDocument");
+
+ // The method is in the XMLDocument class itself, not a helper
+ meth = jaxpDoc.getMethod("write",
+ new Class[] { Class.forName("java.io.OutputStream") } );
+
+ meth.invoke(doc, new Object [] { baos } );
+ }
+ else if (domImpl.equals("org.apache.crimson.tree.XmlDocument"))
+ {
+ Debug.log(Debug.INFO, "Using Crimson");
+
+ Class crimsonDoc = Class.forName("org.apache.crimson.tree.XmlDocument");
+ // The method is in the XMLDocument class itself, not a helper
+ meth = crimsonDoc.getMethod("write",
+ new Class[] { Class.forName("java.io.OutputStream") } );
+
+ meth.invoke(doc, new Object [] { baos } );
+ }
+ else if (domImpl.equals("org.apache.xerces.dom.DocumentImpl")
+ || domImpl.equals("org.apache.xerces.dom.DeferredDocumentImpl")) {
+
+ Debug.log(Debug.INFO, "Using Xerces");
+
+ // Try for Xerces
+ Class xercesSer =
+ Class.forName("org.apache.xml.serialize.XMLSerializer");
+
+ // Get the OutputStream constructor
+ // May want to use the OutputFormat parameter at some stage too
+ con = xercesSer.getConstructor(new Class []
+ { Class.forName("java.io.OutputStream"),
+ Class.forName("org.apache.xml.serialize.OutputFormat") } );
+
+
+ // Get the serialize method
+ meth = xercesSer.getMethod("serialize",
+ new Class [] { Class.forName("org.w3c.dom.Document") } );
+
+
+ // Get an instance
+ Object serializer = con.newInstance(new Object [] { baos, null } );
+
+
+ // Now call serialize to write the document
+ meth.invoke(serializer, new Object [] { doc } );
+ }
+ else if (domImpl.equals("gnu.xml.dom.DomDocument")) {
+ Debug.log(Debug.INFO, "Using GNU");
+
+ Class gnuSer = Class.forName("gnu.xml.dom.ls.DomLSSerializer");
+
+ // Get the serialize method
+ meth = gnuSer.getMethod("serialize",
+ new Class [] { Class.forName("org.w3c.dom.Node"),
+ Class.forName("java.io.OutputStream") } );
+
+ // Get an instance
+ Object serializer = gnuSer.newInstance();
+
+ // Now call serialize to write the document
+ meth.invoke(serializer, new Object [] { doc, baos } );
+ }
+ else {
+ try {
+ DOMSource domSource = new DOMSource(doc);
+ StringWriter writer = new StringWriter();
+ StreamResult result = new StreamResult(writer);
+ TransformerFactory tf = TransformerFactory.newInstance();
+ Transformer transformer = tf.newTransformer();
+ transformer.transform(domSource, result);
+ return writer.toString().getBytes();
+ }
+ catch (Exception e) {
+ // We don't have another parser
+ throw new IOException("No appropriate API (JAXP/Xerces) to serialize XML document: " + domImpl);
+ }
+ }
+ }
+ catch (ClassNotFoundException cnfe) {
+ throw new IOException(cnfe.toString());
+ }
+ catch (Exception e) {
+ // We may get some other errors, but the bottom line is that
+ // the steps being executed no longer work
+ throw new IOException(e.toString());
+ }
+
+ byte bytes[] = baos.toByteArray();
+
+ return bytes;
+ }
+
+
+ /**
+ * Initializes a new DOM <code>Document</code> with the content
+ * containing minimum OpenOffice XML tags.
+ *
+ * @throws IOException If any I/O error occurs.
+ */
+ public final void initContentDOM() throws IOException {
+
+ contentDoc = createDOM(TAG_OFFICE_DOCUMENT_CONTENT);
+
+ // this is a work-around for a bug in Office6.0 - not really
+ // needed but StarCalc 6.0 will crash without this tag.
+ Element root = contentDoc.getDocumentElement();
+
+ Element child = contentDoc.createElement(TAG_OFFICE_FONT_DECLS);
+ root.appendChild(child);
+
+ child = contentDoc.createElement(TAG_OFFICE_AUTOMATIC_STYLES);
+ root.appendChild(child);
+
+ child = contentDoc.createElement(TAG_OFFICE_BODY);
+ root.appendChild(child);
+ }
+
+ /**
+ * Initializes a new DOM <code>Document</code> with the content
+ * containing minimum OpenOffice XML tags.
+ *
+ * @throws IOException If any I/O error occurs.
+ */
+ public final void initSettingsDOM() throws IOException {
+
+ settingsDoc = createSettingsDOM(TAG_OFFICE_DOCUMENT_SETTINGS);
+
+ // this is a work-around for a bug in Office6.0 - not really
+ // needed but StarCalc 6.0 will crash without this tag.
+ Element root = settingsDoc.getDocumentElement();
+
+ Element child = settingsDoc.createElement(TAG_OFFICE_SETTINGS);
+ root.appendChild(child);
+ }
+
+ /**
+ * Initializes a new DOM Document with styles
+ * containing minimum OpenOffice XML tags.
+ *
+ * @throws IOException If any I/O error occurs.
+ */
+ public final void initStyleDOM() throws IOException {
+
+ styleDoc = createDOM(TAG_OFFICE_DOCUMENT_STYLES);
+ }
+
+ /**
+ * <p>Creates a new DOM <code>Document</code> containing minimum
+ * OpenOffice XML tags.</p>
+ *
+ * <p>This method uses the subclass
+ * <code>getOfficeClassAttribute</code> method to get the
+ * attribute for <i>office:class</i>.</p>
+ *
+ * @param rootName root name of <code>Document</code>.
+ *
+ * @throws IOException If any I/O error occurs.
+ */
+ private final Document createSettingsDOM(String rootName) throws IOException {
+
+ Document doc = null;
+
+ try {
+
+ DocumentBuilder builder = factory.newDocumentBuilder();
+ doc = builder.newDocument();
+
+ } catch (ParserConfigurationException ex) {
+
+ throw new OfficeDocumentException(ex);
+
+ }
+
+ Element root = (Element) doc.createElement(rootName);
+ doc.appendChild(root);
+
+ root.setAttribute("xmlns:office", "http://openoffice.org/2000/office");
+ root.setAttribute("xmlns:xlink", "http://openoffice.org/1999/xlink");
+ root.setAttribute("xmlns:config", "http://openoffice.org/2001/config");
+ root.setAttribute("office:version", "1.0");
+
+ return doc;
+ }
+
+
+ /**
+ * <p>Creates a new DOM <code>Document</code> containing minimum
+ * OpenOffice XML tags.</p>
+ *
+ * <p>This method uses the subclass
+ * <code>getOfficeClassAttribute</code> method to get the
+ * attribute for <i>office:class</i>.</p>
+ *
+ * @param rootName root name of <code>Document</code>.
+ *
+ * @throws IOException If any I/O error occurs.
+ */
+ private final Document createDOM(String rootName) throws IOException {
+
+ Document doc = null;
+
+ try {
+
+ DocumentBuilder builder = factory.newDocumentBuilder();
+ doc = builder.newDocument();
+
+ } catch (ParserConfigurationException ex) {
+
+ throw new OfficeDocumentException(ex);
+
+ }
+
+ Element root = (Element) doc.createElement(rootName);
+ doc.appendChild(root);
+
+ root.setAttribute("xmlns:office", "http://openoffice.org/2000/office");
+ root.setAttribute("xmlns:style", "http://openoffice.org/2000/style");
+ root.setAttribute("xmlns:text", "http://openoffice.org/2000/text");
+ root.setAttribute("xmlns:table", "http://openoffice.org/2000/table");
+ root.setAttribute("xmlns:draw", "http://openoffice.org/2000/drawing");
+ root.setAttribute("xmlns:fo", "http://www.w3.org/1999/XSL/Format");
+ root.setAttribute("xmlns:xlink", "http://www.w3.org/1999/xlink");
+ root.setAttribute("xmlns:number", "http://openoffice.org/2000/datastyle");
+ root.setAttribute("xmlns:svg", "http://www.w3.org/2000/svg");
+ root.setAttribute("xmlns:chart", "http://openoffice.org/2000/chart");
+ root.setAttribute("xmlns:dr3d", "http://openoffice.org/2000/dr3d");
+ root.setAttribute("xmlns:math", "http://www.w3.org/1998/Math/MathML");
+ root.setAttribute("xmlns:form", "http://openoffice.org/2000/form");
+ root.setAttribute("xmlns:script", "http://openoffice.org/2000/script");
+ root.setAttribute("office:class", getOfficeClassAttribute());
+ root.setAttribute("office:version", "1.0");
+
+ return doc;
+ }
+
+
+ /**
+ * Return the <i>office:class</i> attribute value.
+ *
+ * @return The attribute value.
+ */
+ protected abstract String getOfficeClassAttribute();
+
+
+ /**
+ * <p>Hacked code to filter <!DOCTYPE> tag before
+ * sending stream to parser.</p>
+ *
+ * <p>This hacked code needs to be changed later on.</p>
+ *
+ * <p>Issue: using current jaxp1.0 parser, there is no way
+ * to turn off processing of dtds. Current set of dtds
+ * have bugs, processing them will throw exceptions.</p>
+ *
+ * <p>This is a simple hack that assumes the whole <!DOCTYPE>
+ * tag are all in the same line. This is sufficient for
+ * current StarOffice 6.0 generated XML files. Since this
+ * hack really needs to go away, I don't want to spend
+ * too much time in making it a perfect hack.</p>
+ * FIX (HJ): Removed requirement for DOCTYPE to be in one line
+ * FIX (HJ): No longer removes newlines
+ *
+ * @param is <code>InputStream</code> to be filtered.
+ *
+ * @return Reader value without the <!DOCTYPE> tag.
+ *
+ * @throws IOException If any I/O error occurs.
+ */
+ private static Reader hack(InputStream is) throws IOException {
+
+ BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"));
+ StringBuffer buffer = new StringBuffer();
+
+ String str = null;
+
+ while ((str = br.readLine()) != null) {
+
+ int sIndex = str.indexOf("<!DOCTYPE");
+
+ if (sIndex > -1) {
+
+ buffer.append(str.substring(0, sIndex));
+
+ int eIndex = str.indexOf('>', sIndex + 8 );
+
+ if (eIndex > -1) {
+
+ buffer.append(str.substring(eIndex + 1, str.length()));
+ // FIX (HJ): Preserve the newline
+ buffer.append("\n");
+
+ } else {
+
+ // FIX (HJ): More than one line. Search for '>' in following lines
+ boolean bOK = false;
+ while ((str = br.readLine())!=null) {
+ eIndex = str.indexOf('>');
+ if (eIndex>-1) {
+ buffer.append(str.substring(eIndex+1));
+ // FIX (HJ): Preserve the newline
+ buffer.append("\n");
+ bOK = true;
+ break;
+ }
+ }
+
+ if (!bOK) { throw new IOException("Invalid XML"); }
+ }
+
+ } else {
+
+ buffer.append(str);
+ // FIX (HJ): Preserve the newline
+ buffer.append("\n");
+ }
+ }
+
+ StringReader r = new StringReader(buffer.toString());
+ return r;
+ }
+
+ /**
+ * <p>Transform the InputStream to a Reader Stream.</p>
+ *
+ * <p>This hacked code needs to be changed later on.</p>
+ *
+ * <p>Issue: the new oasis input file stream means
+ * that the old input stream fails. see #i33702# </p>
+ *
+ * @param is <code>InputStream</code> to be filtered.
+ *
+ * @return Reader value of the InputStream().
+ *
+ * @throws IOException If any I/O error occurs.
+ */
+ private static Reader secondHack(InputStream is) throws IOException {
+
+ BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"));
+ char[] charArray = new char[4096];
+ StringBuffer sBuf = new StringBuffer();
+ int n = 0;
+ while ((n=br.read(charArray, 0, charArray.length)) > 0)
+ sBuf.append(charArray, 0, n);
+
+ // ensure there is no trailing garbage after the end of the stream.
+ int sIndex = sBuf.lastIndexOf("</office:document>");
+ sBuf.delete(sIndex, sBuf.length());
+ sBuf.append("</office:document>");
+ StringReader r = new StringReader(sBuf.toString());
+ return r;
+ }
+
+
+ /**
+ * Method to create the initial entries in the manifest.xml file stored
+ * in an SX? file.
+ */
+ private void initManifestDOM() throws IOException {
+
+ try {
+ DocumentBuilder builder = factory.newDocumentBuilder();
+ DOMImplementation domImpl = builder.getDOMImplementation();
+
+ DocumentType docType = domImpl.createDocumentType(TAG_MANIFEST_ROOT,
+ "-//OpenOffice.org//DTD Manifest 1.0//EN",
+ "Manifest.dtd");
+ manifestDoc = domImpl.createDocument("manifest", TAG_MANIFEST_ROOT, docType);
+ } catch (ParserConfigurationException ex) {
+ throw new OfficeDocumentException(ex);
+ }
+
+ // Add the <manifest:manifest> entry
+ Element manifestRoot = manifestDoc.getDocumentElement();
+
+ manifestRoot.setAttribute("xmlns:manifest", "http://openoffice.org/2001/manifest");
+
+ Element docRoot = manifestDoc.createElement(TAG_MANIFEST_FILE);
+
+ docRoot.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "/");
+ docRoot.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, getDocumentMimeType());
+
+ manifestRoot.appendChild(docRoot);
+ }
+}
+