1 files changed, 565 insertions, 0 deletions
diff --git a/xmerge/source/wordsmith/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DocumentDeserializerImpl.java b/xmerge/source/wordsmith/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DocumentDeserializerImpl.java
new file mode 100644
index 000000000000..29098b72cc17
--- /dev/null
+++ b/xmerge/source/wordsmith/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DocumentDeserializerImpl.java
@@ -0,0 +1,565 @@
+/*************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2000, 2010 Oracle and/or its affiliates.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org.  If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+package org.openoffice.xmerge.converter.xml.sxw.wordsmith;
+
+import org.w3c.dom.*;
+
+import java.io.IOException;
+import java.util.Enumeration;
+
+import org.openoffice.xmerge.Document;
+import org.openoffice.xmerge.ConvertData;
+import org.openoffice.xmerge.ConvertException;
+import org.openoffice.xmerge.DocumentDeserializer;
+import org.openoffice.xmerge.converter.xml.OfficeConstants;
+import org.openoffice.xmerge.converter.palm.PalmDB;
+import org.openoffice.xmerge.converter.palm.Record;
+import org.openoffice.xmerge.converter.palm.PdbDecoder;
+import org.openoffice.xmerge.converter.palm.PalmDocument;
+import org.openoffice.xmerge.converter.xml.sxw.SxwDocument;
+
+import java.util.Vector;
+import java.io.ByteArrayInputStream;
+
+import org.openoffice.xmerge.converter.xml.*;
+import org.openoffice.xmerge.util.Debug;
+import org.openoffice.xmerge.util.XmlUtil;
+
+/**
+ *  <p>WordSmith implementation of
+ *  org.openoffice.xmerge.DocumentDeserializer
+ *  for the {@link
+ *  org.openoffice.xmerge.converter.xml.sxw.wordsmith.PluginFactoryImpl
+ *  PluginFactoryImpl}.</p>
+ *
+ *  The <code>deserialize</code> method uses a
+ *  <code>DocDecoder</code> to read the WordSmith format into a
+ *  <code>String</code> object, then it calls <code>buildDocument</code>
+ *  to create a <code>SxwDocument</code> object from it.
+ *
+ *  @author      Herbie Ong, David Proulx
+ */
+public final class DocumentDeserializerImpl
+implements DOCConstants, OfficeConstants, DocumentDeserializer {
+
+    /** A Decoder object for decoding WordSmith format. */
+    private WSDecoder decoder = null;
+
+    WseFontTable fontTable = null;
+    WseColorTable colorTable = null;
+    StyleCatalog styleCat = null;
+    StyleCatalog oldStyleCat = null;
+
+    /** A <code>ConvertData</code> object assigned to this object. */
+    private ConvertData cd = null;
+
+
+    /**
+     *  Constructor that assigns the given <code>ConvertData</code>
+     *  to the object.
+     *
+     *  @param  cd  A <code>ConvertData</code> object to read data for
+     *              the conversion process by the deserialize method.
+     */
+    public DocumentDeserializerImpl(ConvertData cd) {
+        this.cd = cd;
+    }
+
+
+    /**
+     *  Convert the given <code>ConvertData</code> into a
+     *  <code>SxwDocument</code> object.
+     *
+     *  @return  Resulting <code>Document</code> object.
+     *
+     *  @throws  ConvertException  If any conversion error occurs.
+     *  @throws  IOException       If any I/O error occurs.
+     */
+    public Document deserialize() throws ConvertException,
+        IOException {
+        return deserialize(null, cd);
+    }
+
+
+    public Document deserialize(Document origDoc, ConvertData cd)
+    throws IOException {
+
+        Document doc         = null;
+        PalmDocument palmDoc = null;
+        Enumeration e        = cd.getDocumentEnumeration();
+
+        while(e.hasMoreElements()) {
+            palmDoc        = (PalmDocument) e.nextElement();
+            PalmDB pdb     = palmDoc.getPdb();
+            Record[] recs  = pdb.getRecords();
+            decoder        = new WSDecoder();
+            Wse[] b        = decoder.parseDocument(recs);
+            String docName = palmDoc.getName();
+            doc            = buildDocument(docName, b, origDoc);
+        }
+        return doc;
+    }
+
+
+    /**
+     *  Temporary method to read existing <code>StyleCatalog</code>
+     *  as a starting point.
+     *
+     *  @param  parentDoc  The parent <code>Document</code>.
+     */
+    private void readStyleCatalog(Document parentDoc) {
+        Element rootNode = null;
+        try {
+            java.io.ByteArrayOutputStream bos = new java.io.ByteArrayOutputStream();
+            parentDoc.write(bos);
+            SxwDocument sxwDoc = new SxwDocument("old");
+            sxwDoc.read(new ByteArrayInputStream(bos.toByteArray()));
+            org.w3c.dom.Document domDoc = sxwDoc.getContentDOM();
+
+            String families[] = new String[3];
+            families[0] = "text";
+            families[1] = "paragraph";
+            families[2] = "paragraph";
+            Class classes[] = new Class[3];
+            classes[0] = TextStyle.class;
+            classes[1] = ParaStyle.class;
+            classes[2] = TextStyle.class;
+
+            NodeList nl = domDoc.getElementsByTagName(TAG_OFFICE_STYLES);
+            oldStyleCat.add(nl.item(0), families, classes, null, false);
+            nl = domDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES);
+            oldStyleCat.add(nl.item(0), families, classes, null, false);
+            nl = domDoc.getElementsByTagName(TAG_OFFICE_MASTER_STYLES);
+            oldStyleCat.add(nl.item(0), families, classes, null, false);
+
+        } catch (Exception e) {
+            Debug.log(Debug.ERROR, "", e);
+        }
+
+    }
+
+
+    /**
+     *  Given an array of paragraph <code>Style</code> objects, see if
+     *  there is exactly one which matches the text formatting
+     *  <code>Style</code> of <code>tStyle</code>.
+     *
+     *  @param  paraStyles  An array of paragraph <code>Style</code>
+     *                      objects.
+     *  @param  tStyle      Text <code>Style</code> to match.
+     *
+     *  @return  The paragraph <code>Style</code> that matches.
+     */
+    private ParaStyle matchParaByText(Style paraStyles[], TextStyle tStyle) {
+        int matchIndex = -1;
+    int matchCount = 0;
+    Style txtMatches[] = (Style[]) oldStyleCat.getMatching(tStyle);
+    if (txtMatches.length >= 1) {
+        for (int j = 0; j < txtMatches.length; j++) {
+            TextStyle t = (TextStyle)txtMatches[j];
+
+            if (!t.getFamily().equals("paragraph"))
+                continue;
+
+            for (int k = 0; k < paraStyles.length; k++) {
+                if (t.getName().equals(paraStyles[k].getName())) {
+                    matchCount++;
+                matchIndex = k;
+                }
+            }
+        }
+    }
+    if (matchCount == 1)
+            return (ParaStyle)paraStyles[matchIndex];
+        else return null;
+    }
+
+
+    /**
+     *  Take a <code>String</code> of text and turn it into a sequence
+     *  of <code>Node</code> objects.
+     *
+     *  @param  text       <code>String</code> of text.
+     *  @param  parentDoc  Parent <code>Document</code>.
+     *
+     *  @return  Array of <code>Node</code> objects.
+     */
+    private Node[] parseText(String text, org.w3c.dom.Document parentDoc) {
+    Vector nodeVec = new Vector();
+
+        // Break up the text from the WordSmith text run into Open
+        // Office text runs.  There may be more runs in OO because
+        // runs of 2 or more spaces map to nodes.
+        while ((text.indexOf("  ") != -1) || (text.indexOf("\t") != 1)) {
+
+            // Find the indices of tabs and multiple spaces, and
+            // figure out which of them occurs first in the string.
+            int spaceIndex = text.indexOf("  ");
+            int tabIndex = text.indexOf("\t");
+            if ((spaceIndex == -1) && (tabIndex == -1))
+                break;  // DJP This should not be necessary.  What is wrong
+            // with the while() stmt up above?
+            int closerIndex;  // Index of the first of these
+            if (spaceIndex == -1)
+                closerIndex = tabIndex;
+            else if (tabIndex == -1)
+                closerIndex = spaceIndex;
+            else
+                closerIndex = (spaceIndex > tabIndex) ? tabIndex : spaceIndex;
+
+            // If there is any text prior to the first occurrence of a
+            // tab or spaces, create a text node from it, then chop it
+            // off the string we're working with.
+            if (closerIndex > 0) {
+                String beginningText = text.substring(0, closerIndex);
+                Text textNode = parentDoc.createTextNode(beginningText);
+                nodeVec.addElement(textNode);
+                log("<TEXT>");
+                log(beginningText);
+                log("</TEXT>");
+            }
+            text = text.substring(closerIndex);
+
+            // Handle either tab character or space sequence by creating
+            // an element for it, and then chopping out the text that
+            // represented it in "text".
+            if (closerIndex == tabIndex) {
+                Element tabNode = parentDoc.createElement(TAG_TAB_STOP);
+                nodeVec.add(tabNode);
+                text = text.substring(1);  // tab is always a single character
+                log("<TAB/>");
+            } else {
+                // Compute length of space sequence.
+                int nrSpaces = 2;
+                while ((nrSpaces < text.length())
+                && text.substring(nrSpaces, nrSpaces + 1).equals(" "))
+                    nrSpaces++;
+
+                Element spaceNode = parentDoc.createElement(TAG_SPACE);
+                spaceNode.setAttribute(ATTRIBUTE_SPACE_COUNT, new Integer(nrSpaces).toString());
+                nodeVec.add(spaceNode);
+                text = text.substring(nrSpaces);
+                log("<SPACE count=\"" + nrSpaces + "\" />");
+            }
+        }
+
+        // No more tabs or space sequences.  If there's any remaining
+        // text create a text node for it.
+        if (text.length() > 0) {
+            Text textNode = parentDoc.createTextNode(text);
+            nodeVec.add(textNode);
+            log("<TEXT>");
+            log(text);
+            log("</TEXT>");
+        }
+
+        // Now create and populate an array to return the nodes in.
+        Node nodes[] = new Node[nodeVec.size()];
+        for (int i = 0; i < nodeVec.size(); i++)
+            nodes[i] = (Node)nodeVec.elementAt(i);
+        return nodes;
+    }
+
+
+    /**
+     *  Parses the text content of a WordSmith format and builds a
+     *  <code>SXWDocument</code>.
+     *
+     *  @param  docName  <code>Document</code> name
+     *  @param  str      Text content of WordSmith format
+     *
+     *  @return  Resulting <code>SXWDocument</code> object.
+     *
+     *  @throws  IOException  If any I/O error occurs.
+     */
+    private SxwDocument buildDocument(String docName, Wse[] data, Document origDoc)
+    throws IOException {
+
+        // create minimum office xml document.
+        SxwDocument sxwDoc = new SxwDocument(docName);
+        sxwDoc.initContentDOM();
+
+        org.w3c.dom.Document doc = sxwDoc.getContentDOM();
+
+        // Grab hold of the office:body tag,
+        // Assume there should be one.
+        // This is where top level paragraphs will append to.
+        NodeList list = doc.getElementsByTagName(TAG_OFFICE_BODY);
+        Node bodyNode = list.item(0);
+
+        styleCat = new StyleCatalog(50);
+        oldStyleCat = new StyleCatalog(50);
+           if (origDoc != null)
+             readStyleCatalog(origDoc);
+
+        Element currPara = null;
+        ParaStyle currParaStyle = null;
+        int newTextStyleNr = 0;
+        int newParaStyleNr = 0;
+
+        // Now write out the document body by running through
+        // the list of WordSmith elements and processing each one
+        // in turn.
+        for (int i = 0; i < data.length; i++) {
+
+            if (data[i].getClass() == WsePara.class) {
+
+                currPara = doc.createElement(TAG_PARAGRAPH);
+                log("</PARA>");
+                log("<PARA>");
+
+                WsePara p = (WsePara)data[i];
+
+                // Save info about the first text run, if there is one.
+                WseTextRun firstTextRun = null;
+
+                if ((data.length >= i + 2)
+                && (data[i+1].getClass() == WseTextRun.class))
+                    firstTextRun = (WseTextRun)data[i+1];
+
+                Style matches[] = oldStyleCat.getMatching(p.makeStyle());
+
+                // See if we can find a unique match in the catalog
+                // of existing styles from the original document.
+                ParaStyle pStyle = null;
+                if (matches.length == 1) {
+                    pStyle = (ParaStyle)matches[0];
+                    log("using an existing style");
+                } else if ((matches.length > 1) && (firstTextRun != null)) {
+                    pStyle = matchParaByText(matches, firstTextRun.makeStyle());
+                    log("resolved a para by looking @ text");
+                }
+
+                // If nothing found so far, try looking in the catalog
+                // of newly-created styles.
+                // DJP FIXME: if we need to add two para styles with the
+                // same para formatting info but different default text
+                // styles, this won't work!
+                if (pStyle == null) {
+                    log("had " + matches.length + " matches in old catalog");
+                    matches = styleCat.getMatching(p.makeStyle());
+                    if (matches.length == 0) {
+                        pStyle = p.makeStyle();
+                        String newName = new String("PPP" + ++newParaStyleNr);
+                        pStyle.setName(newName);
+                        styleCat.add(pStyle);
+                        // DJP: write in the text format info here
+                        log("created a new style");
+                    } else if (matches.length == 1) {
+                        pStyle = (ParaStyle)matches[0];
+                        log("re-using a new style");
+                    } else if (firstTextRun != null) {
+                        pStyle = matchParaByText(matches, firstTextRun.makeStyle());
+                        if (pStyle != null) {
+                            log("resolved a (new) para by looking @ text");
+                    } else
+                            log("Hey this shouldn't happen! - nr of matches is "
+                            + matches.length);
+                    }
+                }
+
+                if (pStyle == null)
+                    log("Unable to figure out a para style");
+
+                // Figured out a style to use.  Specify the style in this
+                // paragraph's attributes.
+                currPara.setAttribute(ATTRIBUTE_TEXT_STYLE_NAME, pStyle.getName());
+
+                bodyNode.appendChild(currPara);
+                currParaStyle = pStyle;
+            } else if (data[i].getClass() == WseTextRun.class) {
+                WseTextRun tr = (WseTextRun)data[i];
+                TextStyle trStyle = null;
+                Node trNodes[] = parseText(tr.getText(), doc);
+
+                // First see if the formatting of this text run matches
+                // the default text formatting for this paragraph.  If
+                // it does, then just make the text node(s) children of
+                // the current paragraph.
+                Style[] cps = new Style[1];
+                cps[0] = currParaStyle;
+                if (matchParaByText(cps, tr.makeStyle()) != null) {
+                    for (int ii  = 0; ii < trNodes.length; ii++) {
+                        currPara.appendChild(trNodes[ii]);
+                    }
+                    continue;
+             }
+
+                // Check for existing, matching styles in the old style
+                // catalog.  If exactly one is found, use it.  Otherwise,
+                // check the new style catalog, and either use the style
+                // found or add this new one to it.
+                Style matches[] = oldStyleCat.getMatching(tr.makeStyle());
+                if (matches.length == 1)
+                    trStyle = (TextStyle)matches[0];
+                else {
+                    matches = styleCat.getMatching(tr.makeStyle());
+                    if (matches.length == 0) {
+                        trStyle = tr.makeStyle();
+                        String newName = new String("TTT" + ++newTextStyleNr);
+                        trStyle.setName(newName);
+                        styleCat.add(trStyle);
+                    } else if (matches.length == 1)
+                        trStyle = (TextStyle)matches[0];
+                    else
+                        log("multiple text style matches from new catalog");
+                }
+
+                // Create a text span node, set the style attribute, make the
+                // text node(s) its children, and append it to current paragraph's
+                // list of children.
+                Element textSpanNode = doc.createElement(TAG_SPAN);
+                textSpanNode.setAttribute(ATTRIBUTE_TEXT_STYLE_NAME, trStyle.getName());
+                for (int ii  = 0; ii < trNodes.length; ii++) {
+                    textSpanNode.appendChild(trNodes[ii]);
+                }
+                currPara.appendChild(textSpanNode);
+                log("</SPAN>");
+            }
+
+            else if (data[i].getClass() == WseFontTable.class) {
+                fontTable = (WseFontTable)data[i];
+            }
+
+            else if (data[i].getClass() == WseColorTable.class) {
+                colorTable = (WseColorTable)data[i];
+            }
+        }
+
+
+        //NodeList r = doc.getElementsByTagName(TAG_OFFICE_DOCUMENT);
+        NodeList r = doc.getElementsByTagName(TAG_OFFICE_DOCUMENT_CONTENT);
+        Node rootNode = r.item(0);
+
+        // read the original document
+        org.w3c.dom.NodeList nl;
+        if (origDoc != null) {
+            java.io.ByteArrayOutputStream bos = new java.io.ByteArrayOutputStream();
+            origDoc.write(bos);
+            SxwDocument origSxwDoc = new SxwDocument("old");
+            origSxwDoc.read(new ByteArrayInputStream(bos.toByteArray()));
+            org.w3c.dom.Document origDomDoc = origSxwDoc.getContentDOM();
+
+            XmlUtil xu = new XmlUtil();
+            org.w3c.dom.DocumentFragment df;
+            org.w3c.dom.Node newNode;
+
+            // copy font declarations from original document to the new document
+            nl = origDomDoc.getElementsByTagName(TAG_OFFICE_FONT_DECLS);
+            df = doc.createDocumentFragment();
+            newNode = xu.deepClone(df, nl.item(0));
+            rootNode.insertBefore(newNode, bodyNode);
+
+            // copy style catalog from original document to the new document
+            nl = origDomDoc.getElementsByTagName(TAG_OFFICE_STYLES);
+            df = doc.createDocumentFragment();
+            newNode = xu.deepClone(df, nl.item(0));
+            rootNode.insertBefore(newNode, bodyNode);
+
+            nl = origDomDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES);
+            df = doc.createDocumentFragment();
+            newNode = xu.deepClone(df, nl.item(0));
+            rootNode.insertBefore(newNode, bodyNode);
+
+            nl = origDomDoc.getElementsByTagName(TAG_OFFICE_MASTER_STYLES);
+            df = doc.createDocumentFragment();
+            newNode = xu.deepClone(df, nl.item(0));
+            rootNode.insertBefore(newNode, bodyNode);
+        }
+
+        // Original document not specified.  We need to add font declarations.
+        // DJP: this might just be for debugging.  Merger will probably put
+        // the "real" ones in.
+        // DJP: if really doing it this way, do it right: gather font names
+        // from style catalog(s).
+        else {
+            org.w3c.dom.Node declNode;
+
+            log("<FONT-DECLS/>");
+
+            declNode = doc.createElement(TAG_OFFICE_FONT_DECLS);
+            rootNode.insertBefore(declNode, bodyNode);
+            org.w3c.dom.Element fontNode;
+
+            fontNode = doc.createElement(TAG_STYLE_FONT_DECL);
+            fontNode.setAttribute(ATTRIBUTE_STYLE_NAME, "Arial");
+            fontNode.setAttribute(ATTRIBUTE_FO_FONT_FAMILY, "Arial");
+            fontNode.setAttribute(ATTRIBUTE_STYLE_FONT_PITCH, "variable");
+            declNode.appendChild(fontNode);
+
+            fontNode = doc.createElement(TAG_STYLE_FONT_DECL);
+            fontNode.setAttribute(ATTRIBUTE_STYLE_NAME, "Arioso");
+            fontNode.setAttribute(ATTRIBUTE_FO_FONT_FAMILY, "Arioso");
+            fontNode.setAttribute(ATTRIBUTE_STYLE_FONT_PITCH, "variable");
+            declNode.appendChild(fontNode);
+        }
+
+
+        // Now add any new styles we have created in this document.
+        nl = doc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES);
+        Node autoStylesNode = nl.item(0);
+        if (autoStylesNode == null) {
+            autoStylesNode = doc.createElement(TAG_OFFICE_AUTOMATIC_STYLES);
+            log("<OFFICE-AUTOMATIC-STYLES/>");
+            rootNode.insertBefore(autoStylesNode, bodyNode);
+        }
+
+        Node newStyleCatNode = styleCat.writeNode(doc, "dummy");
+        nl = newStyleCatNode.getChildNodes();
+        int nNodes = nl.getLength();
+        for (int i = 0; i < nNodes; i++) {
+            autoStylesNode.appendChild(nl.item(0));
+        }
+
+        oldStyleCat.dumpCSV(true);
+        styleCat.dumpCSV(true);
+        return sxwDoc;
+    }
+
+
+    /**
+     *  Sends message to the log object.
+     *
+     *  @param  str  Debug message.
+     */
+    private void log(String str) {
+
+         Debug.log(Debug.TRACE, str);
+    }
+
+
+    /*
+    public static void main(String args[]) {
+
+     //   DocumentDeserializerImpl d = new DocumentDeserializerImpl(new InputStream());
+
+        Node nodes[] = parseText("Tab here:\tThen some more text");
+    }
+*/
+}
+