diff options
Diffstat (limited to 'xmerge/source/pocketword')
14 files changed, 3093 insertions, 0 deletions
diff --git a/xmerge/source/pocketword/build.xml b/xmerge/source/pocketword/build.xml new file mode 100644 index 000000000000..51b198c329af --- /dev/null +++ b/xmerge/source/pocketword/build.xml @@ -0,0 +1,76 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + #************************************************************************* + # + DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + + Copyright 2000, 2010 Oracle and/or its affiliates. + + OpenOffice.org - a multi-platform office productivity suite + + This file is part of OpenOffice.org. + + OpenOffice.org is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License version 3 + only, as published by the Free Software Foundation. + + OpenOffice.org is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License version 3 for more details + (a copy is included in the LICENSE file that accompanied this code). + + You should have received a copy of the GNU Lesser General Public License + version 3 along with OpenOffice.org. If not, see + <http://www.openoffice.org/license.html> + for a copy of the LGPLv3 License. + + #************************************************************************* + --> + +<project name="pocketword" default="all" basedir="."> + + <property file="../inc/antbuild.properties"/> + + <path id="classpath"> + <pathelement location="${build.dir}/xmerge.jar"/> + </path> + + + <target name="init" > + <mkdir dir="${target.dir}"/> + </target> + + <!-- compile java sources in ${package} --> + <target name="compile" depends="init"> + <javac srcdir="${src.dir}" + destdir="${target.dir}" + debug="${debug}" + deprecation="${deprecation}" + optimize="${optimize}"> + <classpath refid="classpath"/> + </javac> + </target> + + <!-- package to jar --> + <target name="jar" depends="compile"> + <jar destfile="${target.jar}"> + <fileset dir="${target.dir}" + includes="**/*.class" /> + <metainf dir="${basedir}"> + <filename name="converter.xml"/> + </metainf> + </jar> + </target> + + <!-- clean up --> + <target name="clean"> + <delete dir="${class.dir}"/> + <delete file="${target.jar}"/> + </target> + + <target name="all" depends="jar"> + </target> + +</project> + diff --git a/xmerge/source/pocketword/converter.xml b/xmerge/source/pocketword/converter.xml new file mode 100644 index 000000000000..56fcebfba6b1 --- /dev/null +++ b/xmerge/source/pocketword/converter.xml @@ -0,0 +1,50 @@ +<?xml version="1.0" encoding="UTF-8"?> + +<!-- + #************************************************************************* + # + DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + + Copyright 2000, 2010 Oracle and/or its affiliates. + + OpenOffice.org - a multi-platform office productivity suite + + This file is part of OpenOffice.org. + + OpenOffice.org is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License version 3 + only, as published by the Free Software Foundation. + + OpenOffice.org is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License version 3 for more details + (a copy is included in the LICENSE file that accompanied this code). + + You should have received a copy of the GNU Lesser General Public License + version 3 along with OpenOffice.org. If not, see + <http://www.openoffice.org/license.html> + for a copy of the LGPLv3 License. + + #************************************************************************* + --> + + +<converters> + <converter type="staroffice/sxw" version="1.1"> + <converter-display-name> + Pocket Word + </converter-display-name> + <converter-description> + OpenOffice Writer XML to/from Pocket Word conversion. + </converter-description> + <converter-vendor> + OpenOffice.org + </converter-vendor> + <converter-class-impl> + org.openoffice.xmerge.converter.xml.sxw.pocketword.PluginFactoryImpl + </converter-class-impl> + <converter-target type="application/x-pocket-word"/> + </converter> +</converters> + diff --git a/xmerge/source/pocketword/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/ConverterCapabilitiesImpl.java b/xmerge/source/pocketword/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/ConverterCapabilitiesImpl.java new file mode 100644 index 000000000000..13437cc7bb7c --- /dev/null +++ b/xmerge/source/pocketword/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/ConverterCapabilitiesImpl.java @@ -0,0 +1,93 @@ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.pocketword; + +import org.openoffice.xmerge.ConverterCapabilities; +import org.openoffice.xmerge.converter.xml.OfficeConstants; + +/** + * <p>PocketWord implementation of <code>ConverterCapabilities</code> for + * the {@link + * org.openoffice.xmerge.converter.xml.sxw.pocketword.PluginFactoryImpl + * PluginFactoryImpl}.</p> + * + * <p>Used with StarWriter XML to/from PocketWord conversions. The + * <code>ConverterCapibilies</code> specify which "Office" + * <code>Document</code> tags and attributes are supported on the + * "Device" <code>Document</code> format.</p> + */ +public final class ConverterCapabilitiesImpl + implements ConverterCapabilities { + + public boolean canConvertTag(String tag) { + + if (OfficeConstants.TAG_OFFICE_DOCUMENT.equals(tag)) + return true; + else if (OfficeConstants.TAG_OFFICE_DOCUMENT_CONTENT.equals(tag)) + return true; + else if (OfficeConstants.TAG_OFFICE_BODY.equals(tag)) + return true; + else if (OfficeConstants.TAG_PARAGRAPH.equals(tag)) + return true; + else if (OfficeConstants.TAG_HEADING.equals(tag)) + return true; + else if (OfficeConstants.TAG_ORDERED_LIST.equals(tag)) + return true; + else if (OfficeConstants.TAG_UNORDERED_LIST.equals(tag)) + return true; + else if (OfficeConstants.TAG_LIST_ITEM.equals(tag)) + return true; + else if (OfficeConstants.TAG_LIST_HEADER.equals(tag)) + return true; + else if (OfficeConstants.TAG_SPAN.equals(tag)) + return true; + else if (OfficeConstants.TAG_HYPERLINK.equals(tag)) + return true; + else if (OfficeConstants.TAG_LINE_BREAK.equals(tag)) + return true; + else if (OfficeConstants.TAG_SPACE.equals(tag)) + return true; + else if (OfficeConstants.TAG_TAB_STOP.equals(tag)) + return true; + + return false; + } + + public boolean canConvertAttribute(String tag, + String attribute) { + + if (OfficeConstants.TAG_SPACE.equals(tag)) { + + if (OfficeConstants.ATTRIBUTE_SPACE_COUNT.equals(attribute)) + return true; + } + + return false; + } +} + diff --git a/xmerge/source/pocketword/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/DocumentDescriptor.java b/xmerge/source/pocketword/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/DocumentDescriptor.java new file mode 100644 index 000000000000..5e2f8a06e3a0 --- /dev/null +++ b/xmerge/source/pocketword/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/DocumentDescriptor.java @@ -0,0 +1,235 @@ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.pocketword; + +import org.openoffice.xmerge.util.EndianConverter; + +import java.io.ByteArrayOutputStream; +import java.io.OutputStream; +import java.io.IOException; + +import java.util.Vector; + + +/** + * This class to represent the data structure stored by a Pocket Word file that + * describes that file. + * + * The data structure is of variable length, beginning at the end of the + * font declarations and ending 10 bytes before the first instance of 0xFF 0xFF + * marking a paragraph block. + * + * The variable length component arises from an 8 byte structure describing each + * paragraph in the document. These paragraph descriptors appear at the end + * of the Document Descriptor. + * + * @author Mark Murnane + * @version 1.1 + */ +class DocumentDescriptor { + private short numParagraphs = 0; + private short length = 0; + private short numLines = 0; + + private Vector paragraphDesc = null; + + DocumentDescriptor() { + paragraphDesc = new Vector(0, 1); + } + + + + /** + * Updates the <code>DocumentDescriptor</code> to include details of another + * paragraph in the document. + * + * @param len The number of characters in the paragraph. + * @param lines The number of lines on screen that the paragraph uses. + */ + public void addParagraph(short len, short lines) { + ParagraphDescriptor pd = new ParagraphDescriptor(len, lines); + + paragraphDesc.add(pd); + numParagraphs++; + numLines += lines; + length += pd.length; + } + + + /** + * Retrieve the <code>DocumentDescriptor's</code> data. Due to the variable + * length nature of the descriptor, certain fields can only be + * calculated/written after the addition of all paragraphs. + * + * @return Byte array containing the Pocket Word representation of this + * <code>DocumentDescriptor</code>. + */ + public byte[] getDescriptor () { + ByteArrayOutputStream descStream = new ByteArrayOutputStream(); + + writeHeader(descStream); + + /* + * This value seems to increment by 0x02 for each paragraph. + * For a single paragraph doc, the value is 0x08, 0x0A for two, + * 0x0C for three ... + */ + try { + descStream.write(EndianConverter.writeShort((short)(6 + + (numParagraphs * 2)))); + + descStream.write(EndianConverter.writeShort(numParagraphs)); + descStream.write(EndianConverter.writeShort((short)0)); + descStream.write(EndianConverter.writeShort(numParagraphs)); + + descStream.write(EndianConverter.writeShort((short)0)); + descStream.write(EndianConverter.writeShort((short)length)); + descStream.write(EndianConverter.writeShort((short)0)); + + descStream.write(EndianConverter.writeShort(numLines)); + descStream.write(new byte[] { 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00 } ); + + for (int i = 0; i < paragraphDesc.size(); i++) { + ParagraphDescriptor pd = (ParagraphDescriptor)paragraphDesc.elementAt(i); + + descStream.write(pd.getDescriptor()); + } + + // Byte sequence marking the end of this DocumentDescriptor + descStream.write(EndianConverter.writeShort((short)0)); + descStream.write(EndianConverter.writeShort((short)0x41)); + } + catch (IOException ioe) { + // Should never happen as this is a memory based stream. + } + + return descStream.toByteArray(); + } + + + /* + * This method loads the intial fixed portion of the descriptor and the + * mid-section. The mid-section is variable but Pocket Word doesn't seem + * to mind default values. + */ + private void writeHeader(OutputStream descStream) { + + try { + descStream.write(new byte[] { 0x00, 0x00, 0x00, 0x00, + 0x07, 0x00, 0x06, 0x00, + 0x15, 0x00, 0x10, 0x00, + 0x01, 0x00, (byte)0xD0, 0x2F, + 0x00, 0x00, (byte)0xE0, 0x3D, + 0x00, 0x00, (byte)0xF0, 0x00, + 0x00, 0x00, (byte)0xA0, 0x05, + 0x00, 0x00, (byte)0xA0, 0x05, + 0x00, 0x00, (byte)0xA0, 0x05, + 0x00, 0x00, (byte)0xA0, 0x05, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x0A, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x04, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x0A, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x04, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x08, 0x00, + 0x07, 0x00, 0x10, 0x00, + 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x12, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x1F, 0x04, 0x00, 0x00 } ); + + /* + * The next four bytes are variable, but a pattern hasn't yet been + * established. Pocket Word seems to accept this constant value. + * + * The bytes are repeated after another 12 byte sequence which does + * not seem to change from one file to the next. + */ + descStream.write(new byte[] { (byte)0xE2, 0x02, 0x00, 0x00 } ); + descStream.write(new byte[] { 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x3D, 0x04, 0x00, 0x00 } ); + descStream.write(new byte[] { (byte)0xE2, 0x02, 0x00, 0x00 } ); + + descStream.write(new byte[] { 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x40, 0x00, 0x08, 0x00 } ); + } + catch (IOException ioe) { + /* Shouldn't happen with a ByteArrayOutputStream */ + } + } + + + /** + * <code>ParagraphDescriptor</code> represents the data structure used to + * describe individual paragraphs within a <code>DocumentDescriptor.</code> + * + * It is used solely by the <code>DocumentDescriptor<code> class. + */ + private class ParagraphDescriptor { + private short filler = 0; + private short lines = 0; + private short length = 0; + private short unknown = 0x23; + + public ParagraphDescriptor(short len, short numLines) { + lines = numLines; + length = (short)(len + 1); + } + + public byte[] getDescriptor() { + ByteArrayOutputStream desc = new ByteArrayOutputStream(); + + try { + desc.write(EndianConverter.writeShort(filler)); + desc.write(EndianConverter.writeShort(lines)); + desc.write(EndianConverter.writeShort(length)); + desc.write(EndianConverter.writeShort(unknown)); + } + catch (IOException ioe) { + /* Should never happen */ + } + + return desc.toByteArray(); + } + } +}
\ No newline at end of file diff --git a/xmerge/source/pocketword/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/DocumentDeserializerImpl.java b/xmerge/source/pocketword/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/DocumentDeserializerImpl.java new file mode 100644 index 000000000000..6d7873cca96b --- /dev/null +++ b/xmerge/source/pocketword/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/DocumentDeserializerImpl.java @@ -0,0 +1,294 @@ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.pocketword; + +import org.openoffice.xmerge.ConvertData; +import org.openoffice.xmerge.ConvertException; +import org.openoffice.xmerge.Document; +import org.openoffice.xmerge.DocumentDeserializer; + +import org.openoffice.xmerge.converter.xml.OfficeConstants; +import org.openoffice.xmerge.converter.xml.sxw.SxwDocument; + +import org.openoffice.xmerge.converter.xml.ParaStyle; +import org.openoffice.xmerge.converter.xml.TextStyle; +import org.openoffice.xmerge.converter.xml.StyleCatalog; + +import org.openoffice.xmerge.util.OfficeUtil; + +import java.io.IOException; + +import java.util.Enumeration; +import java.util.Vector; + +import org.w3c.dom.NodeList; +import org.w3c.dom.Node; +import org.w3c.dom.Element; + + +/** + * <p>Pocket Word implementation of <code>DocumentDeserializer</code> + * for the {@link + * org.openoffice.xmerge.converter.xml.sxw.pocketword.PluginFactoryImpl + * PluginFactoryImpl}.</p> + * + * <p>This converts a Pocket Word file to an OpenOffice Writer XML DOM.</p> + * + * @author Mark Murnane + * @version 1.1 + */ +public final class DocumentDeserializerImpl + implements DocumentDeserializer, OfficeConstants { + + private PocketWordDocument pswDoc = null; + private SxwDocument sxwDoc = null; + private String docName; + + private StyleCatalog styleCat = null; + + + /** + * Initialises a new <code>DocumentDeserializerImpl</code> using the + * supplied <code>ConvertData</code>.</p> + * + * <p>The <code>Document</code> objects in the <code>ConvertData</code> + * should be {@link + * org.openoffice.xmerge.converter.xml.sxw.pocketword.PocketWordDocument + * PocketWordDocument} objects.</p> + * + * @param cd ConvertData containing a <code>PocketWordDocument</code> + * for conversion. + */ + public DocumentDeserializerImpl(ConvertData cd) { + Enumeration e = cd.getDocumentEnumeration(); + + // A Pocket Word file is composed of one binary file + while (e.hasMoreElements()) { + pswDoc = (PocketWordDocument)e.nextElement(); + } + + docName = pswDoc.getName(); + } + + + /** + * <p>Convert the data passed into the <code>DocumentDeserializer</code> + * constructor into the OpenOffice Writer <code>Document</code> + * format.</p> + * + * <p>This method may or may not be thread-safe. It is expected + * that the user code does not call this method in more than one + * thread. And for most cases, this method is only done once.</p> + * + * @return The resulting <code>Document</code> object from conversion. + * + * @throws ConvertException If any Convert error occurs. + * @throws IOException If any I/O error occurs. + */ + public Document deserialize() throws IOException, ConvertException { + Enumeration pe = pswDoc.getParagraphEnumeration(); + + sxwDoc = new SxwDocument (docName); + sxwDoc.initContentDOM(); + + // Default to an initial 5 entries in the catalog. + styleCat = new StyleCatalog(5); + + try { + buildDocument(pe); + } + catch (Exception e) { + e.printStackTrace(); + throw new ConvertException("Error building OpenOffice Writer DOM: " + + e.toString()); + + } + + return sxwDoc; + } + + + /** + * This method actually takes care of the conversion. + * + * @param data An Enumeration of all Paragraphs in the Pocket Word doc. + * + * @return The OpenOffice Writer XML representation of the data. + * + * @throws IOException If any I/O errors occur. + */ + private void buildDocument(Enumeration data) throws IOException { + + org.w3c.dom.Document doc = sxwDoc.getContentDOM(); + + /* + * There should be only one each of office:body and + * office:automatic-styles in each document. + */ + Node bodyNode = doc.getElementsByTagName(TAG_OFFICE_BODY).item(0); + + // Not every document has an automatic style tag + Node autoStylesNode = doc.getElementsByTagName( + TAG_OFFICE_AUTOMATIC_STYLES).item(0); + if (autoStylesNode == null) { + autoStylesNode = doc.createElement(TAG_OFFICE_AUTOMATIC_STYLES); + doc.insertBefore(autoStylesNode, bodyNode); + } + + + // Needed for naming new styles + int paraStyles = 1; + int textStyles = 1; + + // Pocket Word has no concept of a list. + Element listNode = null; + + + // Down to business ... + while (data.hasMoreElements()) { + Paragraph p = (Paragraph)data.nextElement(); + Element paraNode = doc.createElement(TAG_PARAGRAPH); + + // Set paragraph style information here + ParaStyle pStyle = p.makeStyle(); + if (pStyle == null) { + paraNode.setAttribute(ATTRIBUTE_TEXT_STYLE_NAME, + PocketWordConstants.DEFAULT_STYLE); + } + else { + // Create paragraph style + pStyle.setName(new String("PS" + paraStyles++)); + paraNode.setAttribute(ATTRIBUTE_TEXT_STYLE_NAME, pStyle.getName()); + styleCat.add(pStyle); + } + + + /* + * For each of the paragraphs, process each segment. + * There will always be at least one. + */ + Enumeration paraData = p.getSegmentsEnumerator(); + Vector textSpans = new Vector(0, 1); + + do { + ParagraphTextSegment pts = (ParagraphTextSegment)paraData.nextElement(); + Element span = doc.createElement(OfficeConstants.TAG_SPAN); + + TextStyle ts = pts.getStyle(); + + if (ts != null) { + ts.setName(new String("TS" + textStyles++)); + span.setAttribute(ATTRIBUTE_TEXT_STYLE_NAME, ts.getName()); + styleCat.add(ts); + } + else { + span.setAttribute(ATTRIBUTE_TEXT_STYLE_NAME, + PocketWordConstants.DEFAULT_STYLE); + } + + // If this isn't a blank paragraph + if (pts.getText() != null && !pts.getText().equals("")) { + Node[] children = OfficeUtil.parseText(pts.getText(), doc); + + for (int j = 0; j < children.length; j++) { + span.appendChild(children[j]); + } + } + + textSpans.add(span); + + } while (paraData.hasMoreElements()); + + + /* + * Special case for the first span. If it has no style, then + * it shouldn't be a span, so just add its children with style + * set as standard. + */ + Element firstSpan = (Element)textSpans.elementAt(0); + String styleName = firstSpan.getAttribute(ATTRIBUTE_TEXT_STYLE_NAME); + if (styleName.equals(PocketWordConstants.DEFAULT_STYLE)) { + NodeList nl = firstSpan.getChildNodes(); + int len = nl.getLength(); + + for (int i = 0; i < len; i++) { + /* + * Always take item 0 as the DOM tree event model will + * cause the NodeList to shrink as each Node is reparented. + * + * By taking the first item from the list, we essentially + * traverse the list in order. + */ + paraNode.appendChild(nl.item(0)); + } + } + else { + paraNode.appendChild(firstSpan); + } + + // The rest are spans, so just add them + for (int i = 1; i < textSpans.size(); i++) { + paraNode.appendChild((Node)textSpans.elementAt(i)); + } + + + /* + * Pocket Word doesn't support lists, but it does have bulleted + * paragraphs that are essentially the same thing. + * + * Unlike OpenOffice Writer, a blank paragraph can be bulleted + * as well. This will be handled by inserting a blank paragraph + * into the unordered list, but OpenOffice Writer will not display + * an item at that point in the list. + */ + if (p.isBulleted()) { + if (listNode == null) { + listNode = doc.createElement(TAG_UNORDERED_LIST); + } + Element listItem = doc.createElement(TAG_LIST_ITEM); + listItem.appendChild(paraNode); + listNode.appendChild(listItem); + } + else { + if (listNode != null) { + bodyNode.appendChild(listNode); + listNode = null; + } + bodyNode.appendChild(paraNode); + } + } // End processing paragraphs + + + // Now write the style catalog to the document + NodeList nl = styleCat.writeNode(doc, "dummy").getChildNodes(); + int nlLen = nl.getLength(); // nl.item reduces the length + for (int i = 0; i < nlLen; i++) { + autoStylesNode.appendChild(nl.item(0)); + } + } +} diff --git a/xmerge/source/pocketword/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/DocumentMergerImpl.java b/xmerge/source/pocketword/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/DocumentMergerImpl.java new file mode 100644 index 000000000000..e3d09b5c2bcc --- /dev/null +++ b/xmerge/source/pocketword/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/DocumentMergerImpl.java @@ -0,0 +1,97 @@ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.pocketword; + +import org.w3c.dom.Document; + +import org.openoffice.xmerge.DocumentMerger; +import org.openoffice.xmerge.MergeException; +import org.openoffice.xmerge.ConverterCapabilities; +import org.openoffice.xmerge.converter.xml.sxw.SxwDocument; +import org.openoffice.xmerge.merger.DiffAlgorithm; +import org.openoffice.xmerge.merger.Difference; +import org.openoffice.xmerge.merger.NodeMergeAlgorithm; +import org.openoffice.xmerge.merger.Iterator; +import org.openoffice.xmerge.merger.diff.ParaNodeIterator; +import org.openoffice.xmerge.merger.diff.IteratorLCSAlgorithm; +import org.openoffice.xmerge.merger.merge.DocumentMerge; +import org.openoffice.xmerge.merger.merge.CharacterBaseParagraphMerge; +import org.openoffice.xmerge.util.Debug; + + +/** + * PocketWord implementation of <code>DocumentMerger</code> + * for the {@link + * org.openoffice.xmerge.converter.xml.sxw.pocketword.PluginFactoryImpl + * PluginFactoryImpl}.</p> + */ +public class DocumentMergerImpl implements DocumentMerger { + + private ConverterCapabilities cc_; + private org.openoffice.xmerge.Document orig = null; + + public DocumentMergerImpl(org.openoffice.xmerge.Document doc, ConverterCapabilities cc) { + cc_ = cc; + this.orig = doc; + } + + public void merge(org.openoffice.xmerge.Document modifiedDoc) throws MergeException { + + SxwDocument wdoc1 = (SxwDocument) orig; + SxwDocument wdoc2 = (SxwDocument) modifiedDoc; + + Document doc1 = wdoc1.getContentDOM(); + Document doc2 = wdoc2.getContentDOM(); + + Iterator i1 = new ParaNodeIterator(cc_, doc1.getDocumentElement()); + Iterator i2 = new ParaNodeIterator(cc_, doc2.getDocumentElement()); + + DiffAlgorithm diffAlgo = new IteratorLCSAlgorithm(); + + // find out the paragrah level diffs + Difference[] diffTable = diffAlgo.computeDiffs(i1, i2); + + if (Debug.isFlagSet(Debug.INFO)) { + Debug.log(Debug.INFO, "Diff Result: "); + + for (int i = 0; i < diffTable.length; i++) { + Debug.log(Debug.INFO, diffTable[i].debug()); + } + } + + // merge the paragraphs + NodeMergeAlgorithm charMerge = new CharacterBaseParagraphMerge(); + DocumentMerge docMerge = new DocumentMerge(cc_, charMerge); + + Iterator result = null; + + docMerge.applyDifference(i1, i2, diffTable); + } +} + + diff --git a/xmerge/source/pocketword/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/DocumentSerializerImpl.java b/xmerge/source/pocketword/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/DocumentSerializerImpl.java new file mode 100644 index 000000000000..08424a073803 --- /dev/null +++ b/xmerge/source/pocketword/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/DocumentSerializerImpl.java @@ -0,0 +1,433 @@ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.pocketword; + +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; +import org.w3c.dom.NamedNodeMap; + +import org.openoffice.xmerge.ConvertData; +import org.openoffice.xmerge.ConvertException; +import org.openoffice.xmerge.Document; +import org.openoffice.xmerge.DocumentSerializer; + +import org.openoffice.xmerge.converter.xml.OfficeConstants; +import org.openoffice.xmerge.converter.xml.sxw.SxwDocument; + +import org.openoffice.xmerge.converter.xml.ParaStyle; +import org.openoffice.xmerge.converter.xml.TextStyle; +import org.openoffice.xmerge.converter.xml.StyleCatalog; + +import java.io.IOException; + + +/** + * <p>Pocket Word implementation of <code>DocumentDeserializer</code> + * for use by {@link + * org.openoffice.xmerge.converter.xml.sxw.pocketword.PluginFactoryImpl + * PluginFactoryImpl}.</p> + * + * <p>This converts an OpenOffice Writer XML files to a Pocket Word file<.</p> + * + * @author Mark Murnane + * @version 1.1 + */ +public final class DocumentSerializerImpl + implements DocumentSerializer, OfficeConstants { + + private PocketWordDocument pswDoc; + private SxwDocument sxwDoc; + + private StyleCatalog styleCat = null; + + private boolean inList = false; + + + /** + * <p>Initialises a new <code>DocumentSerializerImpl</code> using the.<br> + * supplied <code>Document</code></p> + * + * <p>The supplied document should be an {@link + * org.openoffice.xmerge.converter.xml.sxw.SxwDocument SxwDocument} + * object.</p> + * + * @param document The <code>Document</code> to convert. + */ + public DocumentSerializerImpl(Document doc) { + sxwDoc = (SxwDocument)doc; + pswDoc = new PocketWordDocument(sxwDoc.getName()); + } + + + /** + * <p>Convert the data passed into the <code>DocumentSerializerImpl</code> + * constructor into Pocket Word format.</p> + * + * <p>This method may or may not be thread-safe. It is expected + * that the user code does not call this method in more than one + * thread. And for most cases, this method is only done once.</p> + * + * @return <code>ConvertData</code> object to pass back the + * converted data. + * + * @throws ConvertException If any conversion error occurs. + * @throws IOException If any I/O error occurs. + */ + public ConvertData serialize() throws IOException, ConvertException { + ConvertData cd = new ConvertData(); + + org.w3c.dom.Document doc = sxwDoc.getContentDOM(); + + // Load any style info before traversing the document content tree + loadStyles(); + + NodeList list = doc.getElementsByTagName(TAG_OFFICE_BODY); + + int len = list.getLength(); + if (len > 0) { + Node node = list.item(0); + traverseBody(node); + } + + cd.addDocument(pswDoc); + + return cd; + } + + + /* + * Handles the loading of defined styles from the style.xml file as well + * as automatic styles from the content.xml file. + * + * Any change to a defined style, such as a short bold section, falls into + * the latter category. + */ + private void loadStyles() { + org.w3c.dom.Document contentDom = sxwDoc.getContentDOM(); + org.w3c.dom.Document styleDom = sxwDoc.getStyleDOM(); + + styleCat = new StyleCatalog(25); + + NodeList nl = null; + String families[] = new String[] { PocketWordConstants.TEXT_STYLE_FAMILY, + PocketWordConstants.PARAGRAPH_STYLE_FAMILY, + PocketWordConstants.PARAGRAPH_STYLE_FAMILY }; + Class classes[] = new Class[] { TextStyle.class, + ParaStyle.class, + TextStyle.class }; + + String[] styleTypes = new String[] { TAG_OFFICE_STYLES, + TAG_OFFICE_AUTOMATIC_STYLES, + TAG_OFFICE_MASTER_STYLES }; + + /* + * Documents converted from PSW -> SXW will not have a style.xml when + * being converted back to PSW. This would occur if a document was + * not modified within Writer between conversions. + * + * Any Writer modifications and saves create the style.xml and other + * portions of a complete Writer SXW file. + */ + if (styleDom != null) { + // Process the Style XML tree + for (int i = 0; i < styleTypes.length; i++ ) { + nl = styleDom.getElementsByTagName(styleTypes[i]); + if (nl.getLength() != 0) { + styleCat.add(nl.item(0), families, classes, null, false); + } + } + } + + /* + * Process the content XML for any other style info. + * Should only be automatic types here. + */ + for (int i = 0; i < styleTypes.length; i++ ) { + nl = contentDom.getElementsByTagName(styleTypes[i]); + if (nl.getLength() != 0) { + styleCat.add(nl.item(0), families, classes, null, false); + } + } + } + + + /* + * Process the office:body tag. + */ + private void traverseBody(Node node) throws IOException, ConvertException { + + if (node.hasChildNodes()) { + NodeList nList = node.getChildNodes(); + int len = nList.getLength(); + + for (int i = 0; i < len; i++) { + Node child = nList.item(i); + + if (child.getNodeType() == Node.ELEMENT_NODE) { + String nodeName = child.getNodeName(); + + if (nodeName.equals(TAG_PARAGRAPH) + || nodeName.equals(TAG_HEADING)) { + traverseParagraph(child); + } + + if (nodeName.equals(TAG_UNORDERED_LIST) || + nodeName.equals(TAG_ORDERED_LIST)) { + traverseList(child); + } + } + } + } + } + + + /* + * Process a text:p tag + */ + private void traverseParagraph(Node node) throws IOException, ConvertException { + String styleName = getAttribute(node, ATTRIBUTE_TEXT_STYLE_NAME); + + ParaStyle pstyle = (ParaStyle)styleCat.lookup(styleName, + PocketWordConstants.PARAGRAPH_STYLE_FAMILY, null, + ParaStyle.class); + if (pstyle != null) { + pstyle = (ParaStyle)pstyle.getResolved(); + } + + TextStyle tstyle = (TextStyle)styleCat.lookup(styleName, + PocketWordConstants.PARAGRAPH_STYLE_FAMILY, null, + TextStyle.class); + if (pstyle != null) { + tstyle = (TextStyle)tstyle.getResolved(); + } + + try { + pswDoc.addParagraph(pstyle, inList); + } + catch (Exception e) { + throw new ConvertException( + "Error adding paragraph to PocketWordDocument.\n" + + e.toString()); + } + + traverseParagraphContents(node, tstyle); + } + + + /* + * Process the contents of a paragraph. This method handles situations + * where the paragraph contains multiple children, each representing a + * differently formatted piece of text. + */ + private void traverseParagraphContents (Node node, TextStyle defTextStyle) + throws IOException, ConvertException { + // First up, get the style of this little bit + String styleName = getAttribute(node, ATTRIBUTE_TEXT_STYLE_NAME); + TextStyle tStyle = (TextStyle)styleCat.lookup(styleName, + PocketWordConstants.TEXT_STYLE_FAMILY, null, + TextStyle.class); + + if (tStyle == null) { + tStyle = defTextStyle; + } + + if (node.hasChildNodes()) { + NodeList nList = node.getChildNodes(); + int len = nList.getLength(); + + for (int i = 0; i < len; i++) { + + Node child = nList.item(i); + short nodeType = child.getNodeType(); + + switch (nodeType) { + case Node.TEXT_NODE: + String s = child.getNodeValue(); + if (s.length() > 0) { + try { + pswDoc.addParagraphData(s, tStyle); + } + catch (Exception e) { + throw new ConvertException( + "Error adding data to paragraph in " + + "PocketWordDocument.\n" + e.toString()); + + } + } + break; + + case Node.ELEMENT_NODE: + if (child.getNodeName().equals(TAG_SPACE)) { + StringBuffer sb = new StringBuffer(""); + int count = 1; + + NamedNodeMap map = child.getAttributes(); + + if (map.getLength() > 0) { + Node attr = map.getNamedItem(ATTRIBUTE_SPACE_COUNT); + count = Integer.parseInt(attr.getNodeValue().trim()); + } + + for ( ; count > 0; count--) { + sb.append(" "); + } + + /* + * May want to look at style info for spaces. Could + * be important when calculating font metrics. + */ + try { + pswDoc.addParagraphData(sb.toString(), tStyle); + } + catch (Exception e) { + throw new ConvertException( + "Error adding data to paragraph in " + + "PocketWordDocument.\n" + e.toString()); + + } + } + else if (child.getNodeName().equals(TAG_TAB_STOP)) { + try { + pswDoc.addParagraphData("\t", tStyle); + } + catch (Exception e) { + throw new ConvertException( + "Error adding data to paragraph in " + + "PocketWordDocument.\n" + e.toString()); + + } + } + else if (child.getNodeName().equals(TAG_LINE_BREAK)) { + /* + * Pocket Word does not support soft line breaks. + * They are just new paragraphs. + */ + } + else if (child.getNodeName().equals(TAG_SPAN)) { + /* + * This is where the interesting ones, i.e. format + * changes occur. + */ + traverseParagraphContents (child, defTextStyle); + } + else if (child.getNodeName().equals(TAG_HYPERLINK)) { + traverseParagraphContents (child, defTextStyle); + } + else { + // Should maybe have a default in here. + } + break; + default: + // Do nothing + } + } + } + else { + /* + * If the node has no children, then it is a blank paragraph, but + * they still require an entry in the Paragraph class to make sense. + */ + pswDoc.addParagraphData("", tStyle); + } + } + + + /* + * Process a text:ordered-list or text:unordered-list tag. Pocket Word has + * no concept of a list so there is no need to differentiate between the + * two. + * + * Each item on the list contains a text:p node. + */ + private void traverseList (Node node) throws IOException, ConvertException { + inList = true; + + if (node.hasChildNodes()) { + NodeList nList = node.getChildNodes(); + int len = nList.getLength(); + + for (int i = 0; i < len; i++) { + Node child = nList.item(i); + + if (child.getNodeType() == Node.ELEMENT_NODE) { + String nodeName = child.getNodeName(); + + if (nodeName.equals(TAG_LIST_ITEM)) { + traverseListItem(child); + } + } + } + } + + inList = false; + } + + + /* + * Process a text:list-item node. They usually contain have a single + * text:p child but can also have sections or other lists. + * + * For this case, only paragraphs are supported. + */ + private void traverseListItem (Node node) throws IOException, ConvertException { + if (node.hasChildNodes()) { + NodeList nList = node.getChildNodes(); + int len = nList.getLength(); + + for (int i = 0; i < len; i++) { + Node child = nList.item(i); + + if (child.getNodeType() == Node.ELEMENT_NODE) { + String nodeName = child.getNodeName(); + + if (nodeName.equals(TAG_PARAGRAPH)) { + traverseParagraph(child); + } + } + } + } + + } + + + /* + * Utility method to retrieve a Node attribute. + */ + private String getAttribute (Node node, String attribute) { + NamedNodeMap attrNodes = node.getAttributes(); + + if (attrNodes != null) { + Node attr = attrNodes.getNamedItem(attribute); + if (attr != null) { + return attr.getNodeValue(); + } + } + + return null; + } +} diff --git a/xmerge/source/pocketword/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/Paragraph.java b/xmerge/source/pocketword/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/Paragraph.java new file mode 100644 index 000000000000..0302a5d6efba --- /dev/null +++ b/xmerge/source/pocketword/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/Paragraph.java @@ -0,0 +1,858 @@ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.pocketword; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; + +import java.util.Vector; +import java.util.Enumeration; + +import java.awt.Color; + +import org.openoffice.xmerge.util.EndianConverter; +import org.openoffice.xmerge.util.ColourConverter; +import org.openoffice.xmerge.converter.xml.ParaStyle; +import org.openoffice.xmerge.converter.xml.TextStyle; + + +/** + * Represents a paragraph data structure within a Pocket Word document. + * + * @author Mark Murnane + * @version 1.1 + */ +class Paragraph implements PocketWordConstants { + /* + * The data elements of a Paragraph. + * + * As the 'unknown' values are not calculated they are declared static. + * They are not declared final because they do have a calcuable value. + */ + private static short unknown1 = 0x23; + private short dataWords = 0; + private short textLength = 0; + private short lengthWithFormatting = 0; + private short lines = 0; + + private static final short marker = (short)0xFFFF; + private static int unknown2 = 0x22; // May be two short values + + private short specialIndentation = 0; + private short leftIndentation = 0; + private short rightIndentation = 0; + + private byte bullets = 0; + private byte alignment = 0; + + private static int unknown3 = 0; + + // Will always have at least these formatting settings in each paragraph + private short defaultFont = 2; // Courier New for the time being + private short defaultSize = 10; + + + /* + * Remaining elements assist in calculating correct values for the paragraph + * representation. + */ + + private Vector textSegments = null; + + private Vector lineDescriptors = null; + + private ParaStyle pStyle = null; + + private boolean isLastParagraph = false; + + + /* + * Private class constructor used by all constructors. Ensures the proper + * initialisation of the Vector storing the paragraph's text. + */ + private Paragraph () { + textSegments = new Vector(0, 1); + } + + + /** + * <p>Constructor for use when converting from SXW format to Pocket Word + * format.</p> + * + * @param style Paragraph style object describing the formatting style + * of this paragraph. + */ + public Paragraph (ParaStyle style) { + this(); + + lineDescriptors = new Vector(0, 1); + pStyle = style; + } + + + /** + * <p>Constructor for use when converting from Pocket Word format to SXW + * format.</p> + * + * @param data Byte array containing byte data describing this paragraph + * from the Pocket Word file. + */ + public Paragraph (byte[] data) { + this(); + + /* + * Read in all fixed data from the array + * + * unknown1 appears at data[0] and data[1] + */ + dataWords = EndianConverter.readShort(new byte[] { data[2], data[3] } ); + textLength = EndianConverter.readShort(new byte[] { data[4], data [5] } ); + lengthWithFormatting = EndianConverter.readShort( + new byte[] { data[6], data[7] } ); + lines = EndianConverter.readShort(new byte[] { data[8], data [9] } ); + + /* + * The marker appears at data[10] and data[11]. + * + * The value of unknown2 is at data[12], data[13], data[14] and data[15]. + */ + + specialIndentation = EndianConverter.readShort(new byte[] { data[16], data[17] } ); + leftIndentation = EndianConverter.readShort(new byte[] { data[18], data [19] } ); + rightIndentation = EndianConverter.readShort(new byte[] { data[20], data [21] } ); + + bullets = data[22]; + alignment = data[23]; + + // The value of unknown3 is at data[24], data[25], data[26] and data[27]. + + /* + * The actual paragraph data is in the remainder of the byte sequence. + * + * Only the actual text seqence with the embedded formatting tags is + * relevant to the conversion from Pocket Word to SXW format. + */ + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + bos.write(data, 28, lengthWithFormatting); + parseText(bos.toByteArray()); + } + + + /* + * Processes the text portion of the raw paragraph data from the Pocket Word + * file. This data also includes formatting settings for the text in the + * paragraph. + * + * Formatting changes appear like XML/HTML tags. Formatted blocks are + * preceded by a sequence of bytes switching on a formatting change and + * followed by a sequence switching off that formatting change. + */ + private void parseText (byte[] data) { + + int totalLength = data.length; + + StringBuffer sb = new StringBuffer(""); + + // Setup text style information + int mask = TextStyle.BOLD | TextStyle.ITALIC | TextStyle.UNDERLINE + | TextStyle.STRIKETHRU; + + + String fontName = null; + int fontSize = 0; + Color textColour = null; + Color backColour = null; + int modifiers = 0; + + TextStyle ts = null; + + int attrsSet = 0; // If this is 0, we have no extra style + boolean inSequence = false; + boolean sawText = false; + + String s = new String(); // For debugging + + // Start from the very beginning + for (int i = 0; i < totalLength; i++) { + // Will encounter at least two codes first + if ((byte)(data[i] & 0xF0) == FORMATTING_TAG) { + if (sawText) { + // Style change so dump previous segment and style info + addTextSegment(sb.toString(), ts); + sb = new StringBuffer(""); + sawText = false; + } + + switch (data[i]) { + case FONT_TAG: + int index = EndianConverter.readShort( + new byte[] { data[i + 1], data[i + 2] } ); + + /* + * Standard font. + * + * Should really be one, but as the only supported font + * currently is Courier New, want to leave it at Courier + * New for round trip conversions. + * + * Also need to account for the fact that Tahoma is the + * correct standard font. + */ + if (fontName == null || fontName.equals("2")) { + if (index != 2 && index != 1) { + fontName = String.valueOf(index); + attrsSet++; + } + } + else { + // Font is set, but not the default + if (index == 2 || index == 1) { + fontName = "2"; + attrsSet--; + } + else { + fontName = String.valueOf(index); + } + } + i += 2; + break; + + + case FONT_SIZE_TAG: + int size = EndianConverter.readShort( + new byte[] { data[i + 1], data[i + 2] } ); + + if (size == 0) { + // Flags the end of the last paragraph + isLastParagraph = true; + i += 2; + break; + } + + // Standard size + if (fontSize == 0 || fontSize == 10) { + if (size != 10) { + fontSize = size; + attrsSet++; + } + } + else { + // Font size is set, but not to standard + if (size == 10) { + fontSize = 10; + attrsSet--; + } + else { + fontSize = size; + } + } + i += 2; + break; + + + case COLOUR_TAG: + if (data[i + 1] != 0) { + ColourConverter cc = new ColourConverter(); + textColour = cc.convertToRGB( + EndianConverter.readShort(new byte[] { data[i + 1], + data[i + 2] } )); + attrsSet++; + } + else { + textColour = null; + attrsSet--; + } + i += 2; + break; + + + case FONT_WEIGHT_TAG: + if (data[i + 1] == FONT_WEIGHT_BOLD + || data[i + 1] == FONT_WEIGHT_THICK) { + modifiers |= TextStyle.BOLD; + attrsSet++; + } + else { + // Its a bit field so subtracting should work okay. + modifiers ^= TextStyle.BOLD; + attrsSet--; + } + i += 2; + break; + + + case ITALIC_TAG: + if (data[i + 1] == (byte)0x01) { + modifiers |= TextStyle.ITALIC; + attrsSet++; + } + else { + modifiers ^= TextStyle.ITALIC; + attrsSet--; + } + i++; + break; + + + case UNDERLINE_TAG: + if (data[i + 1] == (byte)0x01) { + modifiers |= TextStyle.UNDERLINE; + attrsSet++; + } + else { + modifiers ^= TextStyle.UNDERLINE; + attrsSet--; + } + i++; + break; + + + case STRIKETHROUGH_TAG: + if (data[i + 1] == (byte)0x01) { + modifiers |= TextStyle.STRIKETHRU; + attrsSet++; + } + else { + modifiers ^= TextStyle.STRIKETHRU; + attrsSet--; + } + i++; + break; + + case HIGHLIGHT_TAG: + /* + * Highlighting is treated by OpenOffice as a + * background colour. + */ + if (data[i + 1] == (byte)0x01) { + backColour = Color.yellow; + attrsSet++; + } + else { + backColour = null; + attrsSet--; + } + i++; + break; + } + + inSequence = true; + continue; + } + + if (inSequence) { + // Style information has been changed. Create new style here + + inSequence = false; + if (attrsSet > 0) { + ts = new TextStyle(null, TEXT_STYLE_FAMILY, DEFAULT_STYLE, + mask, modifiers, fontSize, fontName, null); + ts.setColors(textColour, backColour); + } + else { + ts = null; + } + } + + /* + * C4 xx seems to indicate a control code. C4 00 indicates the end + * of a paragraph; C4 04 indicates a tab space. Only these two + * have been seen so far. + */ + if (data[i] == (byte)0xC4) { + /* + * Redundant nodes are sometimes added to the last paragraph + * because a new sequence is being processed when the flag is + * set. + * + * To avoid this, do nothing with the last paragraph unless no + * text has been added for it already. In that case, add the + * empty text segment being process to ensure that all + * paragraphs have at least one text segment. + */ + if (data[i + 1] == (byte)0x00) { + if (isLastParagraph && textSegments.size() > 0) { + return; + } + addTextSegment(sb.toString(), ts); + return; + } + sb.append("\t"); + sawText = true; + i++; + continue; + } + + sb.append((char)data[i]); + sawText = true; + s = sb.toString(); + } + } + + + /** + * <p>Adds details of a new text block to the <code>Paragraph</code> object. + * </p> + * + * @param text The text of the new block. + * @param style Text style object describing the formatting attached + * to this block of text. + */ + public void addTextSegment(String text, TextStyle style) { + textLength += text.length(); + textSegments.add(new ParagraphTextSegment(text, style)); + } + + + /** + * <p>This method alters the state of the <code>Paragraph</code> object to + * indicate whether or not it is the final paragraph in the document.</p> + * + * <p>It is used during conversion from SXW format to Pocket Word format. + * In Pocket Word files, the last paragraph finishes with a different byte + * sequence to other paragraphs.</p> + * + * @param isLast true if the Paragraph is the last in the document, + * false otherwise. + */ + public void setLastParagraph(boolean isLast) { + isLastParagraph = isLast; + } + + + /** + * <p>Complementary method to {@link #setLastParagraph(boolean) + * setLastParagraph}. Returns the terminal status of this + * <code>Paragraph</code> within the Pocket Word document.</p> + * + * @return true if the Paragraph is the last in the document; false otherwise. + */ + public boolean getLastParagraph () { + return isLastParagraph; + } + + + /** + * <p>This method returns the Pocket Word representation of this + * <code>Paragraph</code> in Little Endian byte order.</p> + * + * <p>Used when converting from SXW format to Pocket Word format.</p> + * + * @return <code>byte</code> array containing the formatted representation + * of this Paragraph. + */ + public byte[] getParagraphData() { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + + postProcessText(); + + /* + * Need information about the paragraph segments in two places + * so calculate them first. + * + * The stream contains the text wrapped in any formatting sequences that + * are necessary. + */ + ByteArrayOutputStream segs = new ByteArrayOutputStream(); + + try { + for (int i = 0; i < textSegments.size(); i++) { + ParagraphTextSegment pts = (ParagraphTextSegment)textSegments.elementAt(i); + segs.write(pts.getData()); + } + } + catch (IOException ioe) { + // Should never happen in a memory based stream + } + + /* + * Number of data words for this paragraph descriptor: + * + * 26 is the number of bytes prior to the start of the segment. + * 3 comes from the C4 00 00 termintating sequence. + */ + dataWords = (short)(26 + segs.size() + 3 + 4); + if (isLastParagraph) { + dataWords += 6; + } + if (dataWords % 4 != 0) { + dataWords += (4 - (dataWords % 4)); + } + dataWords /= 4; + + /* + * The 8 bytes are made up of E6 ?0 00 and E5 ?0 00 at the start of the + * text along with the C4 00 that terminates it. + * + * In the event that the paragraph is the last one E6 00 00 is also + * present at the end of the text. Also, as we currently use a font + * other than the first in the index (Tahoma) E5 01 00 is also present. + * + * Make sure this is accurate when font specifications change + */ + lengthWithFormatting = (short)(segs.size() + (isLastParagraph ? 14 : 8)); + + try { + bos.write(EndianConverter.writeShort(unknown1)); + bos.write(EndianConverter.writeShort(dataWords)); + bos.write(EndianConverter.writeShort((short)(textLength + 1))); + bos.write(EndianConverter.writeShort(lengthWithFormatting)); + bos.write(EndianConverter.writeShort(lines)); + + bos.write(EndianConverter.writeShort(marker)); + bos.write(EndianConverter.writeInt(unknown2)); + + bos.write(EndianConverter.writeShort(specialIndentation)); + bos.write(EndianConverter.writeShort(leftIndentation)); + bos.write(EndianConverter.writeShort(rightIndentation)); + + bos.write(bullets); + + if (pStyle != null && pStyle.isAttributeSet(ParaStyle.TEXT_ALIGN)) { + switch (pStyle.getAttribute(ParaStyle.TEXT_ALIGN)) { + + case ParaStyle.ALIGN_RIGHT: + bos.write(0x01); + break; + + case ParaStyle.ALIGN_CENTER: + bos.write(0x02); + break; + + default: + bos.write(0x00); // Left align in all other circumstances + break; + } + } + else { + bos.write(0x00); + } + + bos.write(EndianConverter.writeInt(unknown3)); + + + /* + * Write out font and size. + * + * If font support is added then this should change as the information + * will have to be calculated from a Font table. + */ + bos.write(FONT_TAG); + bos.write(EndianConverter.writeShort(defaultFont)); + bos.write(FONT_SIZE_TAG); + bos.write(EndianConverter.writeShort(defaultSize)); + + // Write out the text segments + bos.write(segs.toByteArray()); + + /* + * If this is the last paragraph in the document then we need to make + * sure that the paragraph text is terminated correctly with an E6 00 00 + * before the C4 00 00. + */ + if (isLastParagraph) { + if (defaultFont != 1) { + // Must always go back to the first font. + bos.write(FONT_TAG); + bos.write(EndianConverter.writeShort((short)0x01)); + } + bos.write(FONT_SIZE_TAG); + bos.write(EndianConverter.writeShort((short)0x00)); + } + + bos.write(new byte[] { (byte)0xC4, 0x00, 0x00 } ); + + int padding = 0; + if (bos.size() % 4 != 0) { + padding = 4 - (bos.size() % 4); + } + for (int i = 0; i < padding; i++) { + bos.write(0x00); + } + + // Third byte should match first byte after 0xFF 0xFF + bos.write(new byte[] { 0x42, 0x00, 0x22, 0x00} ); + + /* + * Meaning of last two bytes seems to be the number of words describing + * lines. This is calculated at 10 bytes per descriptor. + * + * May have two extra padding bytes that need to be accounted for too + * The division below may lose 2 bytes (integer result). + */ + int wordsRemaining = (lineDescriptors.size() * 10) / 4; + if ((lineDescriptors.size() * 10) % 4 != 0) { + wordsRemaining++; + } + bos.write(EndianConverter.writeShort((short)wordsRemaining)); + + + // Now write out the line descriptors + for (int i = 0; i < lineDescriptors.size(); i++) { + LineDescriptor ld = (LineDescriptor)lineDescriptors.elementAt(i); + + bos.write(ld.getDescriptorInfo()); + } + + + if (!isLastParagraph) { + /* + * There may be a need to pad this. Will be writing at + * either start of 4 byte block or 2 bytes into it. + */ + if (bos.size() % 4 != 2) { + bos.write(EndianConverter.writeShort((short)0)); + } + bos.write(EndianConverter.writeShort((short)0x41)); + } + } + catch (IOException ioe) { + // Should never occur for a memory based stream + } + + return bos.toByteArray(); + } + + + /* + * This method handles the calculation of correct values for line lengths + * in each individual descriptor and the number of lines in the document. + * + * TODO: Update to take account of different font metrics. + */ + private void postProcessText() { + /* + * The post-processing ... + * + * For each line, we need to add a line descriptor and increment + * the number of lines in the paragraph data structure. + * + * To do this, make sure that no sequence goes over the given screen + * width unless the last char is a whitespace character. + */ + + // In courier, can have no more than 29 chars per line + + int chunkStart = 0; + StringBuffer sb = new StringBuffer(""); + + // Line Descriptor info should be eliminated each time + lineDescriptors = new Vector(1, 1); + lines = 0; + + for (int i = 0; i < textSegments.size(); i++) { + ParagraphTextSegment pts = (ParagraphTextSegment)textSegments.elementAt(i); + sb.append(pts.getText()); + } + + if (sb.length() == 0) { + lines = 1; + lineDescriptors.add(new LineDescriptor((short)1, (short)0)); + return; + } + + while (chunkStart < sb.length()) { + String text = ""; + + try { + text = sb.substring(chunkStart, chunkStart + 30); + } + catch (StringIndexOutOfBoundsException sioobe) { + // We have less than one line left so just add it + text = sb.substring(chunkStart); + lineDescriptors.add(new LineDescriptor((short)(text.length() + 1), (short)(text.length() * 36))); + chunkStart += text.length(); + lines++; + continue; + } + + int lastWhitespace = -1; + + for (int i = 29; i >= 0; i--) { + if (Character.isWhitespace(text.charAt(i))) { + lastWhitespace = i; + break; + } + } + + if (lastWhitespace != -1) { + // The line can be split + lineDescriptors.add(new LineDescriptor((short)(lastWhitespace + 1), (short)(lastWhitespace * 36))); + chunkStart += lastWhitespace + 1; + lines++; + } + else { + // The line is completely occupied by a single word + lineDescriptors.add(new LineDescriptor((short)29, (short)(29 * 36))); + chunkStart += 29; + lines++; + } + } + } + + + /** + * <p>Returns the number of lines in the <code>Paragraph</code>.</p> + * + * @return The number of lines in the document. + */ + public short getLines() { + postProcessText(); + + return lines; + } + + + /** + * <p>Toggles the flag indicating that the <code>Paragraph</code> is a + * bulleted paragraph.</p> + * + * @param isBulleted true to enable bulleting for this paragraph, false + * otherwise. + */ + public void setBullets(boolean isBulleted) { + if (isBulleted) { + bullets = (byte)0xFF; + } + else { + bullets = 0; + } + } + + /** + * <p>Returns the bulleting status of the <code>Paragraph</code>.</p> + * + * @return true if the paragraph is bulleted, false otherwise. + */ + public boolean isBulleted() { + if (bullets != 0) { + return true; + } + return false; + } + + + /** + * <p>Returns the number of text characters in the <code>Paragraph</code>, + * excluding formatting.</p> + * + * @return The length of the paragraph. + */ + public int getTextLength () { + return textLength; + } + + + /** + * <p>Returns an <code>Enumeration</code> over the individual text segments + * of the <code>Paragraph</code>.</p> + * + * @return An <code>Enumeration</code> of the text segments. + */ + public Enumeration getSegmentsEnumerator () { + return textSegments.elements(); + } + + + /** + * <p>Returns a paragraph style object that describes any of the paragraph + * level formatting used by this <code>Paragraph</code>.</p> + * + * @return Paragraph style object describing the <code>Paragraph</code>. + */ + public ParaStyle makeStyle() { + int attrs[] = new int[] { ParaStyle.MARGIN_LEFT, ParaStyle.MARGIN_RIGHT, + ParaStyle.TEXT_ALIGN }; + String values[] = new String[attrs.length]; + + /* + * Not interested in left or right indents just yet. Don't know + * how to calculate them. + */ + + switch (alignment) { + case 2: + values[2] = "center"; + break; + + case 1: + values[2] = "right"; + break; + + case 0: + default: + values[2] = "left"; + return null; // Not interested if its the default. + } + + return new ParaStyle(null, PARAGRAPH_STYLE_FAMILY, null, attrs, + values, null); + } + + + /* + * Class describing the data structures which appear following the text + * of a Paragraph. For each line on screen that the Paragraph uses, a + * LineDescriptor details how many characters are on the line and how much + * screen space they occupy. + * + * The screen space and character breaks are calculated during post-processing + * of the paragraph. See postProcessText(). + * + * The unit of measurement used for screen space is currently unknown. + */ + private class LineDescriptor { + private short characters = 0; + private int filler = 0; + private short screen_space = 0; + private short marker = 0; + + private LineDescriptor(short chars, short space) { + characters = chars; + screen_space = space; + marker = (short)0x040C; // Not a constant. Depends on font used. + } + + + private byte[] getDescriptorInfo(){ + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + + try { + bos.write(EndianConverter.writeShort(characters)); + bos.write(EndianConverter.writeInt(filler)); + bos.write(EndianConverter.writeShort(screen_space)); + bos.write(EndianConverter.writeShort(marker)); + } + catch (IOException ioe) { + // Should never happen in a memory based stream. + } + + return bos.toByteArray(); + } + } +} diff --git a/xmerge/source/pocketword/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/ParagraphTextSegment.java b/xmerge/source/pocketword/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/ParagraphTextSegment.java new file mode 100644 index 000000000000..e17617f467c7 --- /dev/null +++ b/xmerge/source/pocketword/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/ParagraphTextSegment.java @@ -0,0 +1,202 @@ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.pocketword; + +import org.openoffice.xmerge.converter.xml.TextStyle; + +import org.openoffice.xmerge.util.EndianConverter; + +import org.openoffice.xmerge.util.ColourConverter; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; + +/** + * This class represents a portion of text with a particular formatting style. + * The style may differ from the default style of the paragraph of which it + * is part. + * + * @author Mark Murnane + * @version 1.1 + */ +class ParagraphTextSegment implements PocketWordConstants { + + private String pText; + private TextStyle pStyle; + + + /** + * <p>Initialise a new <code>ParagraphTextSegment</p>. + * <p>Both parameters may be <code>null</code>.</p> + * + * @param data The text of this segment. + * @param style The style describing this segment. + */ + public ParagraphTextSegment (String data, TextStyle style) { + pText = data; + pStyle = style; + } + + /** + * <p>Sets the text for this segment.</p> + * + * @param data The text of this segment. + */ + public void setText (String data) { + pText = data; + } + + /** + * <p>Gets the text for this segment.</p> + * + * @return The text of this segment. + */ + public String getText () { + return pText; + } + + + /** + * <p>Sets the style for this segment.</p> + * + * @param data The style describing this segment. + */ + public void setStyle (TextStyle style) { + pStyle = style; + } + + + /** + * <p>Gets the style for this segment.</p> + * + * @return The style describing this segment. + */ + public TextStyle getStyle () { + return pStyle; + } + + + /** + * <p>Returns the string data for this text segment wrapped with the + * appropriate byte codes for the formatting settings used.</p> + * + * @return <code>byte</code> array containing formatted text in Pocket Word + * format. + */ + public byte[] getData () { + ByteArrayOutputStream data = new ByteArrayOutputStream(); + + boolean colourSet = false; + boolean boldSet = false; + boolean italicSet = false; + boolean underlineSet = false; + boolean strikeSet = false; + boolean highlightSet = false; + + // TODO: Font changes need to be worked out here + + try { + if (pStyle != null) { + if (pStyle.getFontColor() != null) { + ColourConverter cc = new ColourConverter(); + short colourCode = cc.convertFromRGB(pStyle.getFontColor()); + if (colourCode != 0) { // not black + data.write(COLOUR_TAG); + data.write(EndianConverter.writeShort(colourCode)); + colourSet = true; + } + } + if (pStyle.isSet(TextStyle.BOLD) && pStyle.getAttribute(TextStyle.BOLD)) { + data.write(new byte[] { FONT_WEIGHT_TAG, FONT_WEIGHT_BOLD, 0x00 } ); + boldSet = true; + } + if (pStyle.isSet(TextStyle.ITALIC) && pStyle.getAttribute(TextStyle.ITALIC)) { + data.write(new byte[] { ITALIC_TAG, 0x01 } ); + italicSet = true; + } + if (pStyle.isSet(TextStyle.UNDERLINE) && pStyle.getAttribute(TextStyle.UNDERLINE)) { + data.write(new byte[] { UNDERLINE_TAG, 0x01 } ); + underlineSet = true; + } + if (pStyle.isSet(TextStyle.STRIKETHRU) && pStyle.getAttribute(TextStyle.STRIKETHRU)) { + data.write(new byte[] { STRIKETHROUGH_TAG, 0x01 } ); + strikeSet = true; + } + if (pStyle.getBackgroundColor() != null) { + data.write(new byte[] { HIGHLIGHT_TAG, 0x01 } ); + highlightSet = true; + } + } + + + // Now write out the data + if (!pText.equals("\t")) { + data.write(pText.getBytes()); + } + else { + /* + * Tabs are a special case. They are represented by Pocket Word + * as the LE sequence 0xC4 0x04. + */ + data.write(new byte[] { (byte)0xC4, 0x04 } ); + } + + + // Now close out any of the settings changes + if (colourSet) { + /* + * Colours may change without changing back to black, but + * without knowing what the previous colour was, the only + * way to ensure correct conversion is to restore to black and + * let the next segment change the colour again. + */ + data.write(new byte[] { COLOUR_TAG, 0x00, 0x00 } ); + } + if (boldSet) { + data.write(new byte[] { FONT_WEIGHT_TAG, FONT_WEIGHT_NORMAL, 0x00 } ); + } + if (italicSet) { + data.write(new byte[] { ITALIC_TAG, 0x00 } ); + } + if (underlineSet) { + data.write(new byte[] { UNDERLINE_TAG, 0x00 } ); + } + if (strikeSet) { + data.write(new byte[] { STRIKETHROUGH_TAG, 0x00 } ); + } + if (highlightSet) { + data.write(new byte[] { HIGHLIGHT_TAG, 0x00 } ); + } + } + catch (IOException ioe) { + // Should never occur in a memory based stream + } + + return data.toByteArray(); + } +} diff --git a/xmerge/source/pocketword/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/PluginFactoryImpl.java b/xmerge/source/pocketword/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/PluginFactoryImpl.java new file mode 100644 index 000000000000..963e7f489726 --- /dev/null +++ b/xmerge/source/pocketword/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/PluginFactoryImpl.java @@ -0,0 +1,164 @@ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.pocketword; + + +import java.io.InputStream; +import java.io.IOException; + +import org.openoffice.xmerge.ConvertData; +import org.openoffice.xmerge.Document; +import org.openoffice.xmerge.DocumentDeserializer; +import org.openoffice.xmerge.DocumentSerializer; +import org.openoffice.xmerge.DocumentDeserializerFactory; +import org.openoffice.xmerge.DocumentSerializerFactory; +import org.openoffice.xmerge.DocumentMerger; +import org.openoffice.xmerge.DocumentMergerFactory; +import org.openoffice.xmerge.ConverterCapabilities; + +import org.openoffice.xmerge.util.registry.ConverterInfo; + +import org.openoffice.xmerge.converter.xml.sxw.SxwPluginFactory; + + +/** + * Factory class used to create converters to/from the Pocket Word format. + * + * @author Mark Murnane + * @version 1.1 + */ +public final class PluginFactoryImpl extends SxwPluginFactory + implements DocumentDeserializerFactory, DocumentSerializerFactory, + DocumentMergerFactory{ + + /** + * <p>Constructor that caches the <code>ConvertInfo</code> that + * corresponds to the registry information for this plug-in.</p> + * + * @param ci <code>ConvertInfo</code> object. + */ + public PluginFactoryImpl (ConverterInfo ci) { + super(ci); + } + + /** ConverterCapabilities object for this type of conversion. */ + private final static ConverterCapabilities converterCap = + new ConverterCapabilitiesImpl(); + + + /** + * <p>The <code>DocumentSerializer</code> is used to convert + * from the OpenOffice Writer <code>Document</code> format + * to the Pocket Word <code>Document</code> format.</p> + * + * <p>The <code>ConvertData</code> object is passed along to the + * created <code>DocumentSerializer</code> via its constructor. + * The <code>ConvertData</code> is read and converted when the + * the <code>DocumentSerializer</code> object's + * <code>serialize</code> method is called.</p> + * + * @param doc <code>Document</code> object that the created + * <code>DocumentSerializer</code> object uses + * as input. + * + * @return A <code>DocumentSerializer</code> object. + */ + public DocumentSerializer createDocumentSerializer(Document doc) { + return new DocumentSerializerImpl(doc); + } + + + /** + * The <code>DocumentDeserializer</code> is used to convert + * from the Pocket Word <code>Document</code> format to + * the OpenOffice Writer <code>Document</code> format.</p> + * + * The <code>ConvertData</code> object is passed along to the + * created <code>DocumentDeserializer</code> via its constructor. + * The <code>ConvertData</code> is read and converted when the + * the <code>DocumentDeserializer</code> object's + * <code>deserialize</code> method is called. + * </p> + * + * @param cd <code>ConvertData</code> object that the created + * <code>DocumentDeserializer</code> object uses as + * input. + * + * @return A <code>DocumentDeserializer</code> object. + */ + public DocumentDeserializer createDocumentDeserializer(ConvertData cd) { + return new DocumentDeserializerImpl(cd); + } + + + /** + * <p>Create a <code>Document</code> object that corresponds to + * the Pocket Word data passed in via the <code>InputStream</code> + * object. + * + * <p>This method will read from the given <code>InputStream</code> + * object. The returned <code>Document</code> object will contain + * the necessary data for the other objects created by the + * <code>PluginFactoryImpl</code> to process, like the + * <code>DocumentSerializerImpl</code> object and a + * <code>DocumentMerger</code> object.</p> + * + * @param name The <code>Document</code> name. + * @param is <code>InputStream</code> object corresponding + * to the <code>Document</code>. + * + * @return A <code>Document</code> object representing the + * Pocket Word format. + * + * @throws IOException If any I/O error occurs. + */ + + public Document createDeviceDocument(String name, InputStream is) + throws IOException { + PocketWordDocument pwd = new PocketWordDocument(name); + pwd.read(is); + return pwd; + } + + /** + * Returns an instance of <code>DocumentMergerImpl</code>, + * which is an implementation of the <code>DocumentMerger</code> + * interface. + * + * @param doc <code>Document</code> to merge. + * + * @return A DocumentMergerImpl object. + */ + public DocumentMerger createDocumentMerger(Document doc) { + ConverterCapabilities cc = converterCap; + DocumentMergerImpl merger = new DocumentMergerImpl(doc, cc); + return merger; + + } + +} diff --git a/xmerge/source/pocketword/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/PocketWordConstants.java b/xmerge/source/pocketword/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/PocketWordConstants.java new file mode 100644 index 000000000000..207627398ca1 --- /dev/null +++ b/xmerge/source/pocketword/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/PocketWordConstants.java @@ -0,0 +1,94 @@ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.pocketword; + + +/** + * Interface defining constants for Pocket Word attributes. + * + * @author Mark Murnane + * @version 1.1 + */ +public interface PocketWordConstants { + /** File extension for Pocket Word files. */ + public static final String FILE_EXTENSION = ".psw"; + + /** Name of the default style. */ + public static final String DEFAULT_STYLE = "Standard"; + + /** Family name for Paragraph styles. */ + public static final String PARAGRAPH_STYLE_FAMILY = "paragraph"; + + /** Family name for Text styles. */ + public static final String TEXT_STYLE_FAMILY = "text"; + + + /** + * Generic Pocket Word formatting code. + * + * Formatting codes are 0xEz, where z indicates the specific format code. + */ + public static final byte FORMATTING_TAG = (byte)0xE0; + + /** Font specification tag. The two bytes following inidicate which font. */ + public static final byte FONT_TAG = (byte)0xE5; + + /** Font size tag. The two bytes following specify font size in points. */ + public static final byte FONT_SIZE_TAG = (byte)0xE6; + + /** Colour tag. Two bytes following index a 4-bit colour table. */ + public static final byte COLOUR_TAG = (byte)0xE7; + + /** Font weight tag. Two bytes following indicate weighting of font. */ + public static final byte FONT_WEIGHT_TAG = (byte)0xE8; + + /** Normal font weight value. */ + public static final byte FONT_WEIGHT_NORMAL = (byte)0x04; + + /** Fine font weight value. */ + public static final byte FONT_WEIGHT_FINE = (byte)0x01; + + /** Bold font weight value. */ + public static final byte FONT_WEIGHT_BOLD = (byte)0x07; + + /** Thick font weight value. */ + public static final byte FONT_WEIGHT_THICK = (byte)0x09; + + /** Italic tag. Single byte following indicates whether italic is on. */ + public static final byte ITALIC_TAG = (byte)0xE9; + + /** Underline tag. Single byte following indicates whether underline is on. */ + public static final byte UNDERLINE_TAG = (byte)0xEA; + + /** Strikethrough tag. Single byte following indicates whether strikethrough is on. */ + public static final byte STRIKETHROUGH_TAG = (byte)0XEB; + + /** Highlighting tag. Single byte following indicates whether highlighting is on. */ + public static final byte HIGHLIGHT_TAG = (byte)0xEC; + +} diff --git a/xmerge/source/pocketword/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/PocketWordDocument.java b/xmerge/source/pocketword/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/PocketWordDocument.java new file mode 100644 index 000000000000..bc77e0fa6988 --- /dev/null +++ b/xmerge/source/pocketword/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/PocketWordDocument.java @@ -0,0 +1,404 @@ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.pocketword; + +import org.openoffice.xmerge.Document; +import org.openoffice.xmerge.converter.xml.ParaStyle; +import org.openoffice.xmerge.converter.xml.TextStyle; + +import java.io.InputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; + +import java.util.Enumeration; +import java.util.Vector; + + +/** + * <p>Class representing a Pocket Word Document.</p> + * + * <p><code>PocketWordDocument</code> is used to create new Pocket Word documents + * and to read existing data to allow for conversion to OpenOffice Writer + * format.</p> + * + * @author Mark Murnane + * @version 1.1 + */ +public class PocketWordDocument implements Document, PocketWordConstants { + private String docName; + + private byte[] preamble; + private Vector fonts; + private DocumentDescriptor descriptor; + private Vector paragraphs; + + private ParaStyle pStyle; + private Paragraph currentPara; + + /* + * The trailer currently appears to be constant, but if its found to + * have a variable component, then this initialisation should be moved + * to an initTrailer() method. + * + * Padding is sometimes needed before the trailer to ensure the file + * ends on a 4-byte boundary, but this is handled in write(). + */ + private static final byte[] trailer = new byte[] { (byte)0x82, 0x00, + 0x09, 0x00, + 0x03, 0x00, + (byte)0x82, 0x00, + 0x00, 0x00, + 0x00, 0x00, + 0x00, 0x00, + 0x00, 0x00, + 0x00, 0x00 }; + + + /** + * <p>Constructs a new Pocket Word Document.</p> + * + * <p>This new document does notcontain any information. Document data must + * either be added using appropriate methods, or an existing file can be + * {@link #read(InputStream) read} from an <code>InputStream</code>.</p> + * + * @param name The name of the <code>PocketWordDocument</code>. + */ + public PocketWordDocument(String name) { + + docName = trimDocumentName(name); + + preamble = new byte[52]; + fonts = new Vector(0, 1); + descriptor = new DocumentDescriptor(); + paragraphs = new Vector(0, 1); + } + + + /** + * <p>This method reads <code>byte</code> data from the InputStream and + * extracts font and paragraph data from the file.</p> + * + * @param is InputStream containing a Pocket Word data file. + * + * @throws IOException In case of any I/O errors. + */ + public void read(InputStream docData) throws IOException { + + if (docData == null) { + throw new IOException ("No input stream to convert"); + } + + // The preamble may become important for font declarations. + int readValue = docData.read(preamble); + // #i33702# check for an empty InputStream. + if(readValue == -1) { + System.err.println("Error:invalid input stream"); + return; + } + + byte[] font = new byte[80]; + int numfonts = 0; + do { + docData.read(font); + + String name = new String(font, 0, 64, "UTF-16LE"); + fonts.add(name.trim()); + + } while (!(font[76] == 5 && font[77] == 0 + && font[78] == 1 && font[79] == 0)); + + /* + * TODO: The document descriptor data that follows the fonts ends with + * a variable section containing data for each of the paragraphs. + * It may be possible to use this information to calculate staring + * positions for each paragraph rather than iterating through the + * entire byte stream. + */ + + int value; + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + while ((value = docData.read()) != -1) { + bos.write(value); + } + + + byte[] contentData = bos.toByteArray(); + int start = 0, end = 0; + boolean sawMarker = false; + + for (int i = 0; i < contentData.length; i += 4) { + if (contentData[i + 2] == (byte)0xFF + && contentData[i + 3] == (byte)0xFF && !sawMarker) { + start = i - 8; + sawMarker = true; + continue; + } + + if (contentData[i + 2] == (byte)0xFF + && contentData[i + 3] == (byte)0xFF && sawMarker) { + end = i - 8; + ByteArrayOutputStream paragraph = new ByteArrayOutputStream(); + paragraph.write(contentData, start, end - start); + paragraphs.add(new Paragraph(paragraph.toByteArray())); + + // Reset the markers + sawMarker = false; + i -= 4; // Skip back + } + + } + + /* + * Special case, the last paragraph + * If we got here, and the marker is set then we saw the start of the + * last paragraph, but no following paragraph + */ + ByteArrayOutputStream paragraph = new ByteArrayOutputStream(); + if (contentData[contentData.length - 19] == 0) { + paragraph.write(contentData, start, contentData.length - start - 20); + } + else { + paragraph.write(contentData, start, contentData.length - start - 18); + } + paragraphs.add(new Paragraph(paragraph.toByteArray())); + } + + + /* + * Utility method to make sure the document name is stripped of any file + * extensions before use. + */ + private String trimDocumentName(String name) { + String temp = name.toLowerCase(); + + if (temp.endsWith(FILE_EXTENSION)) { + // strip the extension + int nlen = name.length(); + int endIndex = nlen - FILE_EXTENSION.length(); + name = name.substring(0,endIndex); + } + + return name; + } + + + /** + * <p>Method to provide access to all of the <code>Paragraph</code> objects + * in the <code>Document</code>.</p> + * + * @return <code>Enumeration</code> over the paragraphs in the document. + */ + public Enumeration getParagraphEnumeration() { + return paragraphs.elements(); + } + + + /** + * <p>Returns the <code>Document</code> name with no file extension.</p> + * + * @return The <code>Document</code> name with no file extension. + */ + public String getName() { + return docName; + } + + + /** + * <p>Returns the <code>Document</code> name with file extension.</p> + * + * @return The <code>Document</code> name with file extension. + */ + public String getFileName() { + return new String(docName + FILE_EXTENSION); + } + + + /** + * <p>Writes out the <code>Document</code> content to the specified + * <code>OutputStream</code>.</p> + * + * <p>This method may not be thread-safe. + * Implementations may or may not synchronize this + * method. User code (i.e. caller) must make sure that + * calls to this method are thread-safe.</p> + * + * @param os <code>OutputStream</code> to write out the + * <code>Document</code> content. + * + * @throws IOException If any I/O error occurs. + */ + public void write(OutputStream os) throws IOException { + DataOutputStream dos = new DataOutputStream(os); + + initPreamble(); + dos.write(preamble); + + loadFonts(); + for (int i = 0; i < fonts.size(); i++ ) { + ByteArrayOutputStream fontData = (ByteArrayOutputStream)fonts.elementAt(i); + dos.write(fontData.toByteArray()); + } + + + for (int i = 0; i < paragraphs.size(); i++) { + Paragraph para = (Paragraph)paragraphs.elementAt(i); + descriptor.addParagraph((short)para.getTextLength(), para.getLines()); + } + dos.write(descriptor.getDescriptor()); + + for (int i = 0; i < paragraphs.size(); i++ ) { + Paragraph para = (Paragraph)paragraphs.elementAt(i); + + // Last paragraph has some extra data + if (i + 1 == paragraphs.size()) { + para.setLastParagraph(true); + } + dos.write(para.getParagraphData()); + } + + + /* + * Before we write out the trailer, we need to make sure that it will + * lead to the file ending on a 4 byte boundary. + */ + if (dos.size() % 4 == 0) { + dos.write((byte)0x00); + dos.write((byte)0x00); + } + + dos.write(trailer); + + dos.flush(); + dos.close(); + } + + + /** + * <p>This method adds a new paragraph element to the document. No string + * data is added to the paragraph.</p> + * + * <p><b>N.B.</b> The newly added paragraph becomes the current paragraph and + * is used as the target for all subsequent calls to addParagraphData().</p> + * + * @param style Paragraph Style object describing the formatting for + * the new paragraph. Can be null. + * @param listElement true if this paragraph is to be bulleted; + * false otherwise. + */ + public void addParagraph(ParaStyle style, boolean listElement) { + /* For the moment, only support basic text entry in a single paragraph */ + Paragraph para = new Paragraph(style); + + paragraphs.add(para); + + pStyle = style; + currentPara = para; + + if (listElement) { + para.setBullets(true); + } + } + + + /** + * <p>This method adds text to the current paragraph.</p> + * + * <p>If no paragraphs exist within the document, it creates one.</p> + * + * @param data The string data for this segment. + * @param style Text Style object describing the formatting of this + * segment. Can be null. + */ + public void addParagraphData(String data, TextStyle style) { + if (currentPara == null) { + addParagraph(null, false); + } + currentPara.addTextSegment(data, style); + } + + + /* + * Preamble is the portion before font specification which never + * seems to change from one file, or one saved version, to the next. + * + * Bytes 18h and 19h seem to contain the number of fonts and should + * be modified when all of the fonts have been specified. + * These bytes are the first two on the fourth line below. + */ + private void initPreamble() { + preamble = new byte[] { 0x7B, 0x5C, 0x70, 0x77, 0x69, 0x15, 0x00, 0x00, + 0x01, 0x01, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x2C, 0x00, 0x01, 0x00, 0x0A, 0x00, // Bytes 3-4 Font?? + 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Bytes 1-2 # Fonts + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00 }; + } + + + /* + * This method writes the minimum font data that is used by the converter. + * Currently, all documents convert to 10 point Courier New. Tahoma is + * always mentioned in Pocket Word files, however, even if it is not used. + * + * TODO: Rewrite to allow for multiple fonts once font support issues + * have been resolved. + */ + private void loadFonts() { + ByteArrayOutputStream fontData = new ByteArrayOutputStream(); + + try { + fontData.write(new String("Tahoma").getBytes("UTF-16LE")); + fontData.write(new byte[52]); // Rest of font name? + fontData.write(new byte[] { 0x02, 0x00, 0x01, 0x00 } ); + fontData.write(new byte[] { 0x00, 0x00, 0x01, 0x00 } ); + fontData.write(new byte[] { 0x00, 0x00, 0x00, 0x00 } ); + fontData.write(new byte[] { 0x00, 0x00, 0x00, 0x00 } ); + + fonts.add(fontData); + + fontData = new ByteArrayOutputStream(); + + fontData.write(new String("Courier New").getBytes("UTF-16LE")); + fontData.write(new byte[42]); + fontData.write(new byte[] { 0x14, 0x00, 0x04, 0x00 } ); + fontData.write(new byte[] { 0x01, 0x00, 0x00, 0x00 } ); + fontData.write(new byte[] { 0x00, 0x00, 0x15, 0x00 } ); + + // Next part indicates that this is the last font + fontData.write(new byte[] { 0x05, 0x00, 0x01, 0x00 } ); + + fonts.add(fontData); + } + catch (IOException ioe) { + // Shouldn't happen as this is a memory based stream + } + } +} diff --git a/xmerge/source/pocketword/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/package.html b/xmerge/source/pocketword/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/package.html new file mode 100644 index 000000000000..e32357b85ef7 --- /dev/null +++ b/xmerge/source/pocketword/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/package.html @@ -0,0 +1,58 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN"> +<!-- + #************************************************************************* + # + DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + + Copyright 2000, 2010 Oracle and/or its affiliates. + + OpenOffice.org - a multi-platform office productivity suite + + This file is part of OpenOffice.org. + + OpenOffice.org is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License version 3 + only, as published by the Free Software Foundation. + + OpenOffice.org is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License version 3 for more details + (a copy is included in the LICENSE file that accompanied this code). + + You should have received a copy of the GNU Lesser General Public License + version 3 along with OpenOffice.org. If not, see + <http://www.openoffice.org/license.html> + for a copy of the LGPLv3 License. + + #************************************************************************* + --> +<html> +<head> +<title>org.openoffice.xmerge.converter.xml.sxw.pocketword package</title> +</head> + +<body bgcolor="white"> + +<p>Plugin for the conversion of documents between StarWriter XML and + Pocket Word format.</p> +<p>This plugin suports conversion of most features supported by Pocket Word.</p> +<ul> + <li>Bold, Italic, Underline</li> + <li>Strikethrough</li> + <li>Highlight</li> + <li>Colours</li> + <li>Lists</li> + <li>Alignments</li> +</ul> + +<p>Additionally, work on fonts is currently underway.</p> + +<p>This plugin is based on the Windows CE 3.0 version of Pocket Word.<br> + Testing was carried out using Pocket PC 2000 and Pocket PC 2002 devices.</p> + +<p>It follows the {@link org.openoffice.xmerge} framework +for the conversion process.</p> + +</body> +</html> diff --git a/xmerge/source/pocketword/makefile.mk b/xmerge/source/pocketword/makefile.mk new file mode 100644 index 000000000000..9db2d15000c6 --- /dev/null +++ b/xmerge/source/pocketword/makefile.mk @@ -0,0 +1,35 @@ +#************************************************************************* +# +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +# +# Copyright 2000, 2010 Oracle and/or its affiliates. +# +# OpenOffice.org - a multi-platform office productivity suite +# +# This file is part of OpenOffice.org. +# +# OpenOffice.org is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License version 3 +# only, as published by the Free Software Foundation. +# +# OpenOffice.org is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License version 3 for more details +# (a copy is included in the LICENSE file that accompanied this code). +# +# You should have received a copy of the GNU Lesser General Public License +# version 3 along with OpenOffice.org. If not, see +# <http://www.openoffice.org/license.html> +# for a copy of the LGPLv3 License. +# +#************************************************************************* + +TARGET=pocketword +PRJ=../.. +PRJNAME=xmerge + +.INCLUDE : ant.mk +.IF "$(L10N_framework)"=="" +ALLTAR: ANTBUILD +.ENDIF |