diff options
Diffstat (limited to 'xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DocumentDeserializerImpl.java')
-rw-r--r-- | xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DocumentDeserializerImpl.java | 565 |
1 files changed, 0 insertions, 565 deletions
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DocumentDeserializerImpl.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DocumentDeserializerImpl.java deleted file mode 100644 index ad90541afbbb..000000000000 --- a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DocumentDeserializerImpl.java +++ /dev/null @@ -1,565 +0,0 @@ -/************************************************************************ - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2000, 2010 Oracle and/or its affiliates. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * <http://www.openoffice.org/license.html> - * for a copy of the LGPLv3 License. - * - ************************************************************************/ - -package org.openoffice.xmerge.converter.xml.sxw.wordsmith; - -import org.w3c.dom.*; - -import java.io.IOException; -import java.util.Enumeration; - -import org.openoffice.xmerge.Document; -import org.openoffice.xmerge.ConvertData; -import org.openoffice.xmerge.ConvertException; -import org.openoffice.xmerge.DocumentDeserializer; -import org.openoffice.xmerge.converter.xml.OfficeConstants; -import org.openoffice.xmerge.converter.palm.PalmDB; -import org.openoffice.xmerge.converter.palm.Record; -import org.openoffice.xmerge.converter.palm.PdbDecoder; -import org.openoffice.xmerge.converter.palm.PalmDocument; -import org.openoffice.xmerge.converter.xml.sxw.SxwDocument; - -import java.util.Vector; -import java.io.ByteArrayInputStream; - -import org.openoffice.xmerge.converter.xml.*; -import org.openoffice.xmerge.util.Debug; -import org.openoffice.xmerge.util.XmlUtil; - -/** - * <p>WordSmith implementation of - * org.openoffice.xmerge.DocumentDeserializer - * for the {@link - * org.openoffice.xmerge.converter.xml.sxw.wordsmith.PluginFactoryImpl - * PluginFactoryImpl}.</p> - * - * The <code>deserialize</code> method uses a - * <code>DocDecoder</code> to read the WordSmith format into a - * <code>String</code> object, then it calls <code>buildDocument</code> - * to create a <code>SxwDocument</code> object from it. - * - * @author Herbie Ong, David Proulx - */ -public final class DocumentDeserializerImpl -implements DOCConstants, OfficeConstants, DocumentDeserializer { - - /** A Decoder object for decoding WordSmith format. */ - private WSDecoder decoder = null; - - WseFontTable fontTable = null; - WseColorTable colorTable = null; - StyleCatalog styleCat = null; - StyleCatalog oldStyleCat = null; - - /** A <code>ConvertData</code> object assigned to this object. */ - private ConvertData cd = null; - - - /** - * Constructor that assigns the given <code>ConvertData</code> - * to the object. - * - * @param cd A <code>ConvertData</code> object to read data for - * the conversion process by the deserialize method. - */ - public DocumentDeserializerImpl(ConvertData cd) { - this.cd = cd; - } - - - /** - * Convert the given <code>ConvertData</code> into a - * <code>SxwDocument</code> object. - * - * @return Resulting <code>Document</code> object. - * - * @throws ConvertException If any conversion error occurs. - * @throws IOException If any I/O error occurs. - */ - public Document deserialize() throws ConvertException, - IOException { - return deserialize(null, cd); - } - - - public Document deserialize(Document origDoc, ConvertData cd) - throws IOException { - - Document doc = null; - PalmDocument palmDoc = null; - Enumeration e = cd.getDocumentEnumeration(); - - while(e.hasMoreElements()) { - palmDoc = (PalmDocument) e.nextElement(); - PalmDB pdb = palmDoc.getPdb(); - Record[] recs = pdb.getRecords(); - decoder = new WSDecoder(); - Wse[] b = decoder.parseDocument(recs); - String docName = palmDoc.getName(); - doc = buildDocument(docName, b, origDoc); - } - return doc; - } - - - /** - * Temporary method to read existing <code>StyleCatalog</code> - * as a starting point. - * - * @param parentDoc The parent <code>Document</code>. - */ - private void readStyleCatalog(Document parentDoc) { - Element rootNode = null; - try { - java.io.ByteArrayOutputStream bos = new java.io.ByteArrayOutputStream(); - parentDoc.write(bos); - SxwDocument sxwDoc = new SxwDocument("old"); - sxwDoc.read(new ByteArrayInputStream(bos.toByteArray())); - org.w3c.dom.Document domDoc = sxwDoc.getContentDOM(); - - String families[] = new String[3]; - families[0] = "text"; - families[1] = "paragraph"; - families[2] = "paragraph"; - Class classes[] = new Class[3]; - classes[0] = TextStyle.class; - classes[1] = ParaStyle.class; - classes[2] = TextStyle.class; - - NodeList nl = domDoc.getElementsByTagName(TAG_OFFICE_STYLES); - oldStyleCat.add(nl.item(0), families, classes, null, false); - nl = domDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES); - oldStyleCat.add(nl.item(0), families, classes, null, false); - nl = domDoc.getElementsByTagName(TAG_OFFICE_MASTER_STYLES); - oldStyleCat.add(nl.item(0), families, classes, null, false); - - } catch (Exception e) { - Debug.log(Debug.ERROR, "", e); - } - - } - - - /** - * Given an array of paragraph <code>Style</code> objects, see if - * there is exactly one which matches the text formatting - * <code>Style</code> of <code>tStyle</code>. - * - * @param paraStyles An array of paragraph <code>Style</code> - * objects. - * @param tStyle Text <code>Style</code> to match. - * - * @return The paragraph <code>Style</code> that matches. - */ - private ParaStyle matchParaByText(Style paraStyles[], TextStyle tStyle) { - int matchIndex = -1; - int matchCount = 0; - Style txtMatches[] = (Style[]) oldStyleCat.getMatching(tStyle); - if (txtMatches.length >= 1) { - for (int j = 0; j < txtMatches.length; j++) { - TextStyle t = (TextStyle)txtMatches[j]; - - if (!t.getFamily().equals("paragraph")) - continue; - - for (int k = 0; k < paraStyles.length; k++) { - if (t.getName().equals(paraStyles[k].getName())) { - matchCount++; - matchIndex = k; - } - } - } - } - if (matchCount == 1) - return (ParaStyle)paraStyles[matchIndex]; - else return null; - } - - - /** - * Take a <code>String</code> of text and turn it into a sequence - * of <code>Node</code> objects. - * - * @param text <code>String</code> of text. - * @param parentDoc Parent <code>Document</code>. - * - * @return Array of <code>Node</code> objects. - */ - private Node[] parseText(String text, org.w3c.dom.Document parentDoc) { - Vector nodeVec = new Vector(); - - // Break up the text from the WordSmith text run into Open - // Office text runs. There may be more runs in OO because - // runs of 2 or more spaces map to nodes. - while ((text.indexOf(" ") != -1) || (text.indexOf("\t") != 1)) { - - // Find the indices of tabs and multiple spaces, and - // figure out which of them occurs first in the string. - int spaceIndex = text.indexOf(" "); - int tabIndex = text.indexOf("\t"); - if ((spaceIndex == -1) && (tabIndex == -1)) - break; // DJP This should not be necessary. What is wrong - // with the while() stmt up above? - int closerIndex; // Index of the first of these - if (spaceIndex == -1) - closerIndex = tabIndex; - else if (tabIndex == -1) - closerIndex = spaceIndex; - else - closerIndex = (spaceIndex > tabIndex) ? tabIndex : spaceIndex; - - // If there is any text prior to the first occurrence of a - // tab or spaces, create a text node from it, then chop it - // off the string we're working with. - if (closerIndex > 0) { - String beginningText = text.substring(0, closerIndex); - Text textNode = parentDoc.createTextNode(beginningText); - nodeVec.addElement(textNode); - log("<TEXT>"); - log(beginningText); - log("</TEXT>"); - } - text = text.substring(closerIndex); - - // Handle either tab character or space sequence by creating - // an element for it, and then chopping out the text that - // represented it in "text". - if (closerIndex == tabIndex) { - Element tabNode = parentDoc.createElement(TAG_TAB_STOP); - nodeVec.add(tabNode); - text = text.substring(1); // tab is always a single character - log("<TAB/>"); - } else { - // Compute length of space sequence. - int nrSpaces = 2; - while ((nrSpaces < text.length()) - && text.substring(nrSpaces, nrSpaces + 1).equals(" ")) - nrSpaces++; - - Element spaceNode = parentDoc.createElement(TAG_SPACE); - spaceNode.setAttribute(ATTRIBUTE_SPACE_COUNT, new Integer(nrSpaces).toString()); - nodeVec.add(spaceNode); - text = text.substring(nrSpaces); - log("<SPACE count=\"" + nrSpaces + "\" />"); - } - } - - // No more tabs or space sequences. If there's any remaining - // text create a text node for it. - if (text.length() > 0) { - Text textNode = parentDoc.createTextNode(text); - nodeVec.add(textNode); - log("<TEXT>"); - log(text); - log("</TEXT>"); - } - - // Now create and populate an array to return the nodes in. - Node nodes[] = new Node[nodeVec.size()]; - for (int i = 0; i < nodeVec.size(); i++) - nodes[i] = (Node)nodeVec.elementAt(i); - return nodes; - } - - - /** - * Parses the text content of a WordSmith format and builds a - * <code>SXWDocument</code>. - * - * @param docName <code>Document</code> name - * @param str Text content of WordSmith format - * - * @return Resulting <code>SXWDocument</code> object. - * - * @throws IOException If any I/O error occurs. - */ - private SxwDocument buildDocument(String docName, Wse[] data, Document origDoc) - throws IOException { - - // create minimum office xml document. - SxwDocument sxwDoc = new SxwDocument(docName); - sxwDoc.initContentDOM(); - - org.w3c.dom.Document doc = sxwDoc.getContentDOM(); - - // Grab hold of the office:body tag, - // Assume there should be one. - // This is where top level paragraphs will append to. - NodeList list = doc.getElementsByTagName(TAG_OFFICE_BODY); - Node bodyNode = list.item(0); - - styleCat = new StyleCatalog(50); - oldStyleCat = new StyleCatalog(50); - if (origDoc != null) - readStyleCatalog(origDoc); - - Element currPara = null; - ParaStyle currParaStyle = null; - int newTextStyleNr = 0; - int newParaStyleNr = 0; - - // Now write out the document body by running through - // the list of WordSmith elements and processing each one - // in turn. - for (int i = 0; i < data.length; i++) { - - if (data[i].getClass() == WsePara.class) { - - currPara = doc.createElement(TAG_PARAGRAPH); - log("</PARA>"); - log("<PARA>"); - - WsePara p = (WsePara)data[i]; - - // Save info about the first text run, if there is one. - WseTextRun firstTextRun = null; - - if ((data.length >= i + 2) - && (data[i+1].getClass() == WseTextRun.class)) - firstTextRun = (WseTextRun)data[i+1]; - - Style matches[] = oldStyleCat.getMatching(p.makeStyle()); - - // See if we can find a unique match in the catalog - // of existing styles from the original document. - ParaStyle pStyle = null; - if (matches.length == 1) { - pStyle = (ParaStyle)matches[0]; - log("using an existing style"); - } else if ((matches.length > 1) && (firstTextRun != null)) { - pStyle = matchParaByText(matches, firstTextRun.makeStyle()); - log("resolved a para by looking @ text"); - } - - // If nothing found so far, try looking in the catalog - // of newly-created styles. - // DJP FIXME: if we need to add two para styles with the - // same para formatting info but different default text - // styles, this won't work! - if (pStyle == null) { - log("had " + matches.length + " matches in old catalog"); - matches = styleCat.getMatching(p.makeStyle()); - if (matches.length == 0) { - pStyle = p.makeStyle(); - String newName = new String("PPP" + ++newParaStyleNr); - pStyle.setName(newName); - styleCat.add(pStyle); - // DJP: write in the text format info here - log("created a new style"); - } else if (matches.length == 1) { - pStyle = (ParaStyle)matches[0]; - log("re-using a new style"); - } else if (firstTextRun != null) { - pStyle = matchParaByText(matches, firstTextRun.makeStyle()); - if (pStyle != null) { - log("resolved a (new) para by looking @ text"); - } else - log("Hey this shouldn't happen! - nr of matches is " - + matches.length); - } - } - - if (pStyle == null) - log("Unable to figure out a para style"); - - // Figured out a style to use. Specify the style in this - // paragraph's attributes. - currPara.setAttribute(ATTRIBUTE_TEXT_STYLE_NAME, pStyle.getName()); - - bodyNode.appendChild(currPara); - currParaStyle = pStyle; - } else if (data[i].getClass() == WseTextRun.class) { - WseTextRun tr = (WseTextRun)data[i]; - TextStyle trStyle = null; - Node trNodes[] = parseText(tr.getText(), doc); - - // First see if the formatting of this text run matches - // the default text formatting for this paragraph. If - // it does, then just make the text node(s) children of - // the current paragraph. - Style[] cps = new Style[1]; - cps[0] = currParaStyle; - if (matchParaByText(cps, tr.makeStyle()) != null) { - for (int ii = 0; ii < trNodes.length; ii++) { - currPara.appendChild(trNodes[ii]); - } - continue; - } - - // Check for existing, matching styles in the old style - // catalog. If exactly one is found, use it. Otherwise, - // check the new style catalog, and either use the style - // found or add this new one to it. - Style matches[] = oldStyleCat.getMatching(tr.makeStyle()); - if (matches.length == 1) - trStyle = (TextStyle)matches[0]; - else { - matches = styleCat.getMatching(tr.makeStyle()); - if (matches.length == 0) { - trStyle = tr.makeStyle(); - String newName = new String("TTT" + ++newTextStyleNr); - trStyle.setName(newName); - styleCat.add(trStyle); - } else if (matches.length == 1) - trStyle = (TextStyle)matches[0]; - else - log("multiple text style matches from new catalog"); - } - - // Create a text span node, set the style attribute, make the - // text node(s) its children, and append it to current paragraph's - // list of children. - Element textSpanNode = doc.createElement(TAG_SPAN); - textSpanNode.setAttribute(ATTRIBUTE_TEXT_STYLE_NAME, trStyle.getName()); - for (int ii = 0; ii < trNodes.length; ii++) { - textSpanNode.appendChild(trNodes[ii]); - } - currPara.appendChild(textSpanNode); - log("</SPAN>"); - } - - else if (data[i].getClass() == WseFontTable.class) { - fontTable = (WseFontTable)data[i]; - } - - else if (data[i].getClass() == WseColorTable.class) { - colorTable = (WseColorTable)data[i]; - } - } - - - //NodeList r = doc.getElementsByTagName(TAG_OFFICE_DOCUMENT); - NodeList r = doc.getElementsByTagName(TAG_OFFICE_DOCUMENT_CONTENT); - Node rootNode = r.item(0); - - // read the original document - org.w3c.dom.NodeList nl; - if (origDoc != null) { - java.io.ByteArrayOutputStream bos = new java.io.ByteArrayOutputStream(); - origDoc.write(bos); - SxwDocument origSxwDoc = new SxwDocument("old"); - origSxwDoc.read(new ByteArrayInputStream(bos.toByteArray())); - org.w3c.dom.Document origDomDoc = origSxwDoc.getContentDOM(); - - XmlUtil xu = new XmlUtil(); - org.w3c.dom.DocumentFragment df; - org.w3c.dom.Node newNode; - - // copy font declarations from original document to the new document - nl = origDomDoc.getElementsByTagName(TAG_OFFICE_FONT_DECLS); - df = doc.createDocumentFragment(); - newNode = xu.deepClone(df, nl.item(0)); - rootNode.insertBefore(newNode, bodyNode); - - // copy style catalog from original document to the new document - nl = origDomDoc.getElementsByTagName(TAG_OFFICE_STYLES); - df = doc.createDocumentFragment(); - newNode = xu.deepClone(df, nl.item(0)); - rootNode.insertBefore(newNode, bodyNode); - - nl = origDomDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES); - df = doc.createDocumentFragment(); - newNode = xu.deepClone(df, nl.item(0)); - rootNode.insertBefore(newNode, bodyNode); - - nl = origDomDoc.getElementsByTagName(TAG_OFFICE_MASTER_STYLES); - df = doc.createDocumentFragment(); - newNode = xu.deepClone(df, nl.item(0)); - rootNode.insertBefore(newNode, bodyNode); - } - - // Original document not specified. We need to add font declarations. - // DJP: this might just be for debugging. Merger will probably put - // the "real" ones in. - // DJP: if really doing it this way, do it right: gather font names - // from style catalog(s). - else { - org.w3c.dom.Node declNode; - - log("<FONT-DECLS/>"); - - declNode = doc.createElement(TAG_OFFICE_FONT_DECLS); - rootNode.insertBefore(declNode, bodyNode); - org.w3c.dom.Element fontNode; - - fontNode = doc.createElement(TAG_STYLE_FONT_DECL); - fontNode.setAttribute(ATTRIBUTE_STYLE_NAME, "Arial"); - fontNode.setAttribute(ATTRIBUTE_FO_FONT_FAMILY, "Arial"); - fontNode.setAttribute(ATTRIBUTE_STYLE_FONT_PITCH, "variable"); - declNode.appendChild(fontNode); - - fontNode = doc.createElement(TAG_STYLE_FONT_DECL); - fontNode.setAttribute(ATTRIBUTE_STYLE_NAME, "Arioso"); - fontNode.setAttribute(ATTRIBUTE_FO_FONT_FAMILY, "Arioso"); - fontNode.setAttribute(ATTRIBUTE_STYLE_FONT_PITCH, "variable"); - declNode.appendChild(fontNode); - } - - - // Now add any new styles we have created in this document. - nl = doc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES); - Node autoStylesNode = nl.item(0); - if (autoStylesNode == null) { - autoStylesNode = doc.createElement(TAG_OFFICE_AUTOMATIC_STYLES); - log("<OFFICE-AUTOMATIC-STYLES/>"); - rootNode.insertBefore(autoStylesNode, bodyNode); - } - - Node newStyleCatNode = styleCat.writeNode(doc, "dummy"); - nl = newStyleCatNode.getChildNodes(); - int nNodes = nl.getLength(); - for (int i = 0; i < nNodes; i++) { - autoStylesNode.appendChild(nl.item(0)); - } - - oldStyleCat.dumpCSV(true); - styleCat.dumpCSV(true); - return sxwDoc; - } - - - /** - * Sends message to the log object. - * - * @param str Debug message. - */ - private void log(String str) { - - Debug.log(Debug.TRACE, str); - } - - - /* - public static void main(String args[]) { - - // DocumentDeserializerImpl d = new DocumentDeserializerImpl(new InputStream()); - - Node nodes[] = parseText("Tab here:\tThen some more text"); - } -*/ -} - |