summaryrefslogtreecommitdiff
path: root/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DocumentDeserializerImpl.java
diff options
context:
space:
mode:
Diffstat (limited to 'xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DocumentDeserializerImpl.java')
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DocumentDeserializerImpl.java565
1 files changed, 0 insertions, 565 deletions
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DocumentDeserializerImpl.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DocumentDeserializerImpl.java
deleted file mode 100644
index ad90541afbbb..000000000000
--- a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DocumentDeserializerImpl.java
+++ /dev/null
@@ -1,565 +0,0 @@
-/************************************************************************
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * Copyright 2000, 2010 Oracle and/or its affiliates.
- *
- * OpenOffice.org - a multi-platform office productivity suite
- *
- * This file is part of OpenOffice.org.
- *
- * OpenOffice.org is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License version 3
- * only, as published by the Free Software Foundation.
- *
- * OpenOffice.org is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License version 3 for more details
- * (a copy is included in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU Lesser General Public License
- * version 3 along with OpenOffice.org. If not, see
- * <http://www.openoffice.org/license.html>
- * for a copy of the LGPLv3 License.
- *
- ************************************************************************/
-
-package org.openoffice.xmerge.converter.xml.sxw.wordsmith;
-
-import org.w3c.dom.*;
-
-import java.io.IOException;
-import java.util.Enumeration;
-
-import org.openoffice.xmerge.Document;
-import org.openoffice.xmerge.ConvertData;
-import org.openoffice.xmerge.ConvertException;
-import org.openoffice.xmerge.DocumentDeserializer;
-import org.openoffice.xmerge.converter.xml.OfficeConstants;
-import org.openoffice.xmerge.converter.palm.PalmDB;
-import org.openoffice.xmerge.converter.palm.Record;
-import org.openoffice.xmerge.converter.palm.PdbDecoder;
-import org.openoffice.xmerge.converter.palm.PalmDocument;
-import org.openoffice.xmerge.converter.xml.sxw.SxwDocument;
-
-import java.util.Vector;
-import java.io.ByteArrayInputStream;
-
-import org.openoffice.xmerge.converter.xml.*;
-import org.openoffice.xmerge.util.Debug;
-import org.openoffice.xmerge.util.XmlUtil;
-
-/**
- * <p>WordSmith implementation of
- * org.openoffice.xmerge.DocumentDeserializer
- * for the {@link
- * org.openoffice.xmerge.converter.xml.sxw.wordsmith.PluginFactoryImpl
- * PluginFactoryImpl}.</p>
- *
- * The <code>deserialize</code> method uses a
- * <code>DocDecoder</code> to read the WordSmith format into a
- * <code>String</code> object, then it calls <code>buildDocument</code>
- * to create a <code>SxwDocument</code> object from it.
- *
- * @author Herbie Ong, David Proulx
- */
-public final class DocumentDeserializerImpl
-implements DOCConstants, OfficeConstants, DocumentDeserializer {
-
- /** A Decoder object for decoding WordSmith format. */
- private WSDecoder decoder = null;
-
- WseFontTable fontTable = null;
- WseColorTable colorTable = null;
- StyleCatalog styleCat = null;
- StyleCatalog oldStyleCat = null;
-
- /** A <code>ConvertData</code> object assigned to this object. */
- private ConvertData cd = null;
-
-
- /**
- * Constructor that assigns the given <code>ConvertData</code>
- * to the object.
- *
- * @param cd A <code>ConvertData</code> object to read data for
- * the conversion process by the deserialize method.
- */
- public DocumentDeserializerImpl(ConvertData cd) {
- this.cd = cd;
- }
-
-
- /**
- * Convert the given <code>ConvertData</code> into a
- * <code>SxwDocument</code> object.
- *
- * @return Resulting <code>Document</code> object.
- *
- * @throws ConvertException If any conversion error occurs.
- * @throws IOException If any I/O error occurs.
- */
- public Document deserialize() throws ConvertException,
- IOException {
- return deserialize(null, cd);
- }
-
-
- public Document deserialize(Document origDoc, ConvertData cd)
- throws IOException {
-
- Document doc = null;
- PalmDocument palmDoc = null;
- Enumeration e = cd.getDocumentEnumeration();
-
- while(e.hasMoreElements()) {
- palmDoc = (PalmDocument) e.nextElement();
- PalmDB pdb = palmDoc.getPdb();
- Record[] recs = pdb.getRecords();
- decoder = new WSDecoder();
- Wse[] b = decoder.parseDocument(recs);
- String docName = palmDoc.getName();
- doc = buildDocument(docName, b, origDoc);
- }
- return doc;
- }
-
-
- /**
- * Temporary method to read existing <code>StyleCatalog</code>
- * as a starting point.
- *
- * @param parentDoc The parent <code>Document</code>.
- */
- private void readStyleCatalog(Document parentDoc) {
- Element rootNode = null;
- try {
- java.io.ByteArrayOutputStream bos = new java.io.ByteArrayOutputStream();
- parentDoc.write(bos);
- SxwDocument sxwDoc = new SxwDocument("old");
- sxwDoc.read(new ByteArrayInputStream(bos.toByteArray()));
- org.w3c.dom.Document domDoc = sxwDoc.getContentDOM();
-
- String families[] = new String[3];
- families[0] = "text";
- families[1] = "paragraph";
- families[2] = "paragraph";
- Class classes[] = new Class[3];
- classes[0] = TextStyle.class;
- classes[1] = ParaStyle.class;
- classes[2] = TextStyle.class;
-
- NodeList nl = domDoc.getElementsByTagName(TAG_OFFICE_STYLES);
- oldStyleCat.add(nl.item(0), families, classes, null, false);
- nl = domDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES);
- oldStyleCat.add(nl.item(0), families, classes, null, false);
- nl = domDoc.getElementsByTagName(TAG_OFFICE_MASTER_STYLES);
- oldStyleCat.add(nl.item(0), families, classes, null, false);
-
- } catch (Exception e) {
- Debug.log(Debug.ERROR, "", e);
- }
-
- }
-
-
- /**
- * Given an array of paragraph <code>Style</code> objects, see if
- * there is exactly one which matches the text formatting
- * <code>Style</code> of <code>tStyle</code>.
- *
- * @param paraStyles An array of paragraph <code>Style</code>
- * objects.
- * @param tStyle Text <code>Style</code> to match.
- *
- * @return The paragraph <code>Style</code> that matches.
- */
- private ParaStyle matchParaByText(Style paraStyles[], TextStyle tStyle) {
- int matchIndex = -1;
- int matchCount = 0;
- Style txtMatches[] = (Style[]) oldStyleCat.getMatching(tStyle);
- if (txtMatches.length >= 1) {
- for (int j = 0; j < txtMatches.length; j++) {
- TextStyle t = (TextStyle)txtMatches[j];
-
- if (!t.getFamily().equals("paragraph"))
- continue;
-
- for (int k = 0; k < paraStyles.length; k++) {
- if (t.getName().equals(paraStyles[k].getName())) {
- matchCount++;
- matchIndex = k;
- }
- }
- }
- }
- if (matchCount == 1)
- return (ParaStyle)paraStyles[matchIndex];
- else return null;
- }
-
-
- /**
- * Take a <code>String</code> of text and turn it into a sequence
- * of <code>Node</code> objects.
- *
- * @param text <code>String</code> of text.
- * @param parentDoc Parent <code>Document</code>.
- *
- * @return Array of <code>Node</code> objects.
- */
- private Node[] parseText(String text, org.w3c.dom.Document parentDoc) {
- Vector nodeVec = new Vector();
-
- // Break up the text from the WordSmith text run into Open
- // Office text runs. There may be more runs in OO because
- // runs of 2 or more spaces map to nodes.
- while ((text.indexOf(" ") != -1) || (text.indexOf("\t") != 1)) {
-
- // Find the indices of tabs and multiple spaces, and
- // figure out which of them occurs first in the string.
- int spaceIndex = text.indexOf(" ");
- int tabIndex = text.indexOf("\t");
- if ((spaceIndex == -1) && (tabIndex == -1))
- break; // DJP This should not be necessary. What is wrong
- // with the while() stmt up above?
- int closerIndex; // Index of the first of these
- if (spaceIndex == -1)
- closerIndex = tabIndex;
- else if (tabIndex == -1)
- closerIndex = spaceIndex;
- else
- closerIndex = (spaceIndex > tabIndex) ? tabIndex : spaceIndex;
-
- // If there is any text prior to the first occurrence of a
- // tab or spaces, create a text node from it, then chop it
- // off the string we're working with.
- if (closerIndex > 0) {
- String beginningText = text.substring(0, closerIndex);
- Text textNode = parentDoc.createTextNode(beginningText);
- nodeVec.addElement(textNode);
- log("<TEXT>");
- log(beginningText);
- log("</TEXT>");
- }
- text = text.substring(closerIndex);
-
- // Handle either tab character or space sequence by creating
- // an element for it, and then chopping out the text that
- // represented it in "text".
- if (closerIndex == tabIndex) {
- Element tabNode = parentDoc.createElement(TAG_TAB_STOP);
- nodeVec.add(tabNode);
- text = text.substring(1); // tab is always a single character
- log("<TAB/>");
- } else {
- // Compute length of space sequence.
- int nrSpaces = 2;
- while ((nrSpaces < text.length())
- && text.substring(nrSpaces, nrSpaces + 1).equals(" "))
- nrSpaces++;
-
- Element spaceNode = parentDoc.createElement(TAG_SPACE);
- spaceNode.setAttribute(ATTRIBUTE_SPACE_COUNT, new Integer(nrSpaces).toString());
- nodeVec.add(spaceNode);
- text = text.substring(nrSpaces);
- log("<SPACE count=\"" + nrSpaces + "\" />");
- }
- }
-
- // No more tabs or space sequences. If there's any remaining
- // text create a text node for it.
- if (text.length() > 0) {
- Text textNode = parentDoc.createTextNode(text);
- nodeVec.add(textNode);
- log("<TEXT>");
- log(text);
- log("</TEXT>");
- }
-
- // Now create and populate an array to return the nodes in.
- Node nodes[] = new Node[nodeVec.size()];
- for (int i = 0; i < nodeVec.size(); i++)
- nodes[i] = (Node)nodeVec.elementAt(i);
- return nodes;
- }
-
-
- /**
- * Parses the text content of a WordSmith format and builds a
- * <code>SXWDocument</code>.
- *
- * @param docName <code>Document</code> name
- * @param str Text content of WordSmith format
- *
- * @return Resulting <code>SXWDocument</code> object.
- *
- * @throws IOException If any I/O error occurs.
- */
- private SxwDocument buildDocument(String docName, Wse[] data, Document origDoc)
- throws IOException {
-
- // create minimum office xml document.
- SxwDocument sxwDoc = new SxwDocument(docName);
- sxwDoc.initContentDOM();
-
- org.w3c.dom.Document doc = sxwDoc.getContentDOM();
-
- // Grab hold of the office:body tag,
- // Assume there should be one.
- // This is where top level paragraphs will append to.
- NodeList list = doc.getElementsByTagName(TAG_OFFICE_BODY);
- Node bodyNode = list.item(0);
-
- styleCat = new StyleCatalog(50);
- oldStyleCat = new StyleCatalog(50);
- if (origDoc != null)
- readStyleCatalog(origDoc);
-
- Element currPara = null;
- ParaStyle currParaStyle = null;
- int newTextStyleNr = 0;
- int newParaStyleNr = 0;
-
- // Now write out the document body by running through
- // the list of WordSmith elements and processing each one
- // in turn.
- for (int i = 0; i < data.length; i++) {
-
- if (data[i].getClass() == WsePara.class) {
-
- currPara = doc.createElement(TAG_PARAGRAPH);
- log("</PARA>");
- log("<PARA>");
-
- WsePara p = (WsePara)data[i];
-
- // Save info about the first text run, if there is one.
- WseTextRun firstTextRun = null;
-
- if ((data.length >= i + 2)
- && (data[i+1].getClass() == WseTextRun.class))
- firstTextRun = (WseTextRun)data[i+1];
-
- Style matches[] = oldStyleCat.getMatching(p.makeStyle());
-
- // See if we can find a unique match in the catalog
- // of existing styles from the original document.
- ParaStyle pStyle = null;
- if (matches.length == 1) {
- pStyle = (ParaStyle)matches[0];
- log("using an existing style");
- } else if ((matches.length > 1) && (firstTextRun != null)) {
- pStyle = matchParaByText(matches, firstTextRun.makeStyle());
- log("resolved a para by looking @ text");
- }
-
- // If nothing found so far, try looking in the catalog
- // of newly-created styles.
- // DJP FIXME: if we need to add two para styles with the
- // same para formatting info but different default text
- // styles, this won't work!
- if (pStyle == null) {
- log("had " + matches.length + " matches in old catalog");
- matches = styleCat.getMatching(p.makeStyle());
- if (matches.length == 0) {
- pStyle = p.makeStyle();
- String newName = new String("PPP" + ++newParaStyleNr);
- pStyle.setName(newName);
- styleCat.add(pStyle);
- // DJP: write in the text format info here
- log("created a new style");
- } else if (matches.length == 1) {
- pStyle = (ParaStyle)matches[0];
- log("re-using a new style");
- } else if (firstTextRun != null) {
- pStyle = matchParaByText(matches, firstTextRun.makeStyle());
- if (pStyle != null) {
- log("resolved a (new) para by looking @ text");
- } else
- log("Hey this shouldn't happen! - nr of matches is "
- + matches.length);
- }
- }
-
- if (pStyle == null)
- log("Unable to figure out a para style");
-
- // Figured out a style to use. Specify the style in this
- // paragraph's attributes.
- currPara.setAttribute(ATTRIBUTE_TEXT_STYLE_NAME, pStyle.getName());
-
- bodyNode.appendChild(currPara);
- currParaStyle = pStyle;
- } else if (data[i].getClass() == WseTextRun.class) {
- WseTextRun tr = (WseTextRun)data[i];
- TextStyle trStyle = null;
- Node trNodes[] = parseText(tr.getText(), doc);
-
- // First see if the formatting of this text run matches
- // the default text formatting for this paragraph. If
- // it does, then just make the text node(s) children of
- // the current paragraph.
- Style[] cps = new Style[1];
- cps[0] = currParaStyle;
- if (matchParaByText(cps, tr.makeStyle()) != null) {
- for (int ii = 0; ii < trNodes.length; ii++) {
- currPara.appendChild(trNodes[ii]);
- }
- continue;
- }
-
- // Check for existing, matching styles in the old style
- // catalog. If exactly one is found, use it. Otherwise,
- // check the new style catalog, and either use the style
- // found or add this new one to it.
- Style matches[] = oldStyleCat.getMatching(tr.makeStyle());
- if (matches.length == 1)
- trStyle = (TextStyle)matches[0];
- else {
- matches = styleCat.getMatching(tr.makeStyle());
- if (matches.length == 0) {
- trStyle = tr.makeStyle();
- String newName = new String("TTT" + ++newTextStyleNr);
- trStyle.setName(newName);
- styleCat.add(trStyle);
- } else if (matches.length == 1)
- trStyle = (TextStyle)matches[0];
- else
- log("multiple text style matches from new catalog");
- }
-
- // Create a text span node, set the style attribute, make the
- // text node(s) its children, and append it to current paragraph's
- // list of children.
- Element textSpanNode = doc.createElement(TAG_SPAN);
- textSpanNode.setAttribute(ATTRIBUTE_TEXT_STYLE_NAME, trStyle.getName());
- for (int ii = 0; ii < trNodes.length; ii++) {
- textSpanNode.appendChild(trNodes[ii]);
- }
- currPara.appendChild(textSpanNode);
- log("</SPAN>");
- }
-
- else if (data[i].getClass() == WseFontTable.class) {
- fontTable = (WseFontTable)data[i];
- }
-
- else if (data[i].getClass() == WseColorTable.class) {
- colorTable = (WseColorTable)data[i];
- }
- }
-
-
- //NodeList r = doc.getElementsByTagName(TAG_OFFICE_DOCUMENT);
- NodeList r = doc.getElementsByTagName(TAG_OFFICE_DOCUMENT_CONTENT);
- Node rootNode = r.item(0);
-
- // read the original document
- org.w3c.dom.NodeList nl;
- if (origDoc != null) {
- java.io.ByteArrayOutputStream bos = new java.io.ByteArrayOutputStream();
- origDoc.write(bos);
- SxwDocument origSxwDoc = new SxwDocument("old");
- origSxwDoc.read(new ByteArrayInputStream(bos.toByteArray()));
- org.w3c.dom.Document origDomDoc = origSxwDoc.getContentDOM();
-
- XmlUtil xu = new XmlUtil();
- org.w3c.dom.DocumentFragment df;
- org.w3c.dom.Node newNode;
-
- // copy font declarations from original document to the new document
- nl = origDomDoc.getElementsByTagName(TAG_OFFICE_FONT_DECLS);
- df = doc.createDocumentFragment();
- newNode = xu.deepClone(df, nl.item(0));
- rootNode.insertBefore(newNode, bodyNode);
-
- // copy style catalog from original document to the new document
- nl = origDomDoc.getElementsByTagName(TAG_OFFICE_STYLES);
- df = doc.createDocumentFragment();
- newNode = xu.deepClone(df, nl.item(0));
- rootNode.insertBefore(newNode, bodyNode);
-
- nl = origDomDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES);
- df = doc.createDocumentFragment();
- newNode = xu.deepClone(df, nl.item(0));
- rootNode.insertBefore(newNode, bodyNode);
-
- nl = origDomDoc.getElementsByTagName(TAG_OFFICE_MASTER_STYLES);
- df = doc.createDocumentFragment();
- newNode = xu.deepClone(df, nl.item(0));
- rootNode.insertBefore(newNode, bodyNode);
- }
-
- // Original document not specified. We need to add font declarations.
- // DJP: this might just be for debugging. Merger will probably put
- // the "real" ones in.
- // DJP: if really doing it this way, do it right: gather font names
- // from style catalog(s).
- else {
- org.w3c.dom.Node declNode;
-
- log("<FONT-DECLS/>");
-
- declNode = doc.createElement(TAG_OFFICE_FONT_DECLS);
- rootNode.insertBefore(declNode, bodyNode);
- org.w3c.dom.Element fontNode;
-
- fontNode = doc.createElement(TAG_STYLE_FONT_DECL);
- fontNode.setAttribute(ATTRIBUTE_STYLE_NAME, "Arial");
- fontNode.setAttribute(ATTRIBUTE_FO_FONT_FAMILY, "Arial");
- fontNode.setAttribute(ATTRIBUTE_STYLE_FONT_PITCH, "variable");
- declNode.appendChild(fontNode);
-
- fontNode = doc.createElement(TAG_STYLE_FONT_DECL);
- fontNode.setAttribute(ATTRIBUTE_STYLE_NAME, "Arioso");
- fontNode.setAttribute(ATTRIBUTE_FO_FONT_FAMILY, "Arioso");
- fontNode.setAttribute(ATTRIBUTE_STYLE_FONT_PITCH, "variable");
- declNode.appendChild(fontNode);
- }
-
-
- // Now add any new styles we have created in this document.
- nl = doc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES);
- Node autoStylesNode = nl.item(0);
- if (autoStylesNode == null) {
- autoStylesNode = doc.createElement(TAG_OFFICE_AUTOMATIC_STYLES);
- log("<OFFICE-AUTOMATIC-STYLES/>");
- rootNode.insertBefore(autoStylesNode, bodyNode);
- }
-
- Node newStyleCatNode = styleCat.writeNode(doc, "dummy");
- nl = newStyleCatNode.getChildNodes();
- int nNodes = nl.getLength();
- for (int i = 0; i < nNodes; i++) {
- autoStylesNode.appendChild(nl.item(0));
- }
-
- oldStyleCat.dumpCSV(true);
- styleCat.dumpCSV(true);
- return sxwDoc;
- }
-
-
- /**
- * Sends message to the log object.
- *
- * @param str Debug message.
- */
- private void log(String str) {
-
- Debug.log(Debug.TRACE, str);
- }
-
-
- /*
- public static void main(String args[]) {
-
- // DocumentDeserializerImpl d = new DocumentDeserializerImpl(new InputStream());
-
- Node nodes[] = parseText("Tab here:\tThen some more text");
- }
-*/
-}
-