summaryrefslogtreecommitdiff
path: root/xmerge/java/org/openoffice/xmerge/converter/xml/sxw
diff options
context:
space:
mode:
Diffstat (limited to 'xmerge/java/org/openoffice/xmerge/converter/xml/sxw')
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/SxwDocument.java98
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/SxwPluginFactory.java81
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/ConverterCapabilitiesImpl.java96
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocConstants.java72
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocDecoder.java307
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocEncoder.java217
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocumentDeserializerImpl.java316
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocumentMergerImpl.java102
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocumentSerializerImpl.java535
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/PluginFactoryImpl.java144
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/build.xml138
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/converter.xml47
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/makefile.mk36
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/package.html241
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/build.xml132
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/makefile.mk36
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/package.html42
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/ConverterCapabilitiesImpl.java96
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/DocumentDescriptor.java239
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/DocumentDeserializerImpl.java301
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/DocumentMergerImpl.java102
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/DocumentSerializerImpl.java440
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/Paragraph.java862
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/ParagraphTextSegment.java208
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/PluginFactoryImpl.java168
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/PocketWordConstants.java98
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/PocketWordDocument.java411
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/build.xml140
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/converter.xml51
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/package.html60
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/ConverterCapabilitiesImpl.java96
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DOCConstants.java64
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DocumentDeserializerImpl.java568
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DocumentMergerImpl.java102
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DocumentSerializerImpl.java539
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/PluginFactoryImpl.java152
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WSDecoder.java355
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WSEncoder.java215
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/Wse.java103
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WseColorTable.java250
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WseFontTable.java221
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WseHeader.java148
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WsePara.java302
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WseTextRun.java327
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/build.xml145
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/converter.xml17
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/makefile.mk36
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/textRecord.java118
-rw-r--r--xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/util.java71
49 files changed, 9645 insertions, 0 deletions
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/SxwDocument.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/SxwDocument.java
new file mode 100644
index 000000000000..4888357a6de2
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/SxwDocument.java
@@ -0,0 +1,98 @@
+/************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2008 by Sun Microsystems, Inc.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * $RCSfile: SxwDocument.java,v $
+ * $Revision: 1.4 $
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+package org.openoffice.xmerge.converter.xml.sxw;
+
+import org.w3c.dom.Document;
+import org.openoffice.xmerge.converter.xml.OfficeDocument;
+import org.openoffice.xmerge.converter.xml.OfficeConstants;
+
+/**
+ * This class is an implementation of <code>OfficeDocument</code> for
+ * the SXW format.
+ */
+public class SxwDocument extends OfficeDocument {
+
+
+ /**
+ * Constructor with arguments to set <code>name</code>.
+ *
+ * @param name The name of the <code>Document</code>
+ */
+ public SxwDocument(String name) {
+ super(name);
+ }
+
+
+ /**
+ * Constructor with arguments to set <code>name</code>, the
+ * <code>namespaceAware</code> flag, and the <code>validating</code>
+ * flag.
+ *
+ * @param name The name of the <code>Document</code>.
+ * @param namespaceAware The value of the namespaceAware flag.
+ * @param validating The value of the validating flag.
+ */
+ public SxwDocument(String name, boolean namespaceAware, boolean validating) {
+
+ super(name, namespaceAware, validating);
+ }
+
+
+ /**
+ * Returns the Office file extension for the SXW format.
+ *
+ * @return The Office file extension for the SXW format.
+ */
+ protected String getFileExtension() {
+ return OfficeConstants.SXW_FILE_EXTENSION;
+ }
+
+
+ /**
+ * Returns the Office attribute for the SXW format.
+ *
+ * @return The Office attribute for the SXW format.
+ */
+ protected String getOfficeClassAttribute() {
+ return OfficeConstants.SXW_TYPE;
+ }
+
+ /**
+ * Method to return the MIME type of the document.
+ *
+ * @return String The document's MIME type.
+ */
+ protected final String getDocumentMimeType() {
+ return OfficeConstants.SXW_MIME_TYPE;
+ }
+
+}
+
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/SxwPluginFactory.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/SxwPluginFactory.java
new file mode 100644
index 000000000000..06af85d94431
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/SxwPluginFactory.java
@@ -0,0 +1,81 @@
+/************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2008 by Sun Microsystems, Inc.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * $RCSfile: SxwPluginFactory.java,v $
+ * $Revision: 1.4 $
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+package org.openoffice.xmerge.converter.xml.sxw;
+
+import java.io.InputStream;
+import java.io.IOException;
+
+import org.openoffice.xmerge.ConvertData;
+import org.openoffice.xmerge.PluginFactory;
+import org.openoffice.xmerge.PluginFactory;
+import org.openoffice.xmerge.Document;
+import org.openoffice.xmerge.util.registry.ConverterInfo;
+
+/**
+ * General implementation of the <code>PluginFactory</code> interface
+ * for SXW documents.
+ *
+ * @see org.openoffice.xmerge.DocumentDeserializer
+ * @see org.openoffice.xmerge.DocumentMerger
+ * @see org.openoffice.xmerge.DocumentSerializer
+ */
+public abstract class SxwPluginFactory extends PluginFactory {
+
+ /**
+ * Constructor that caches the <code>ConvertInfo</code> that
+ * corresponds to the registry information for this plug-in.
+ *
+ * @param ci <code>ConvertInfo</code> object.
+ */
+ public SxwPluginFactory (ConverterInfo ci) {
+ super(ci);
+ }
+
+
+ public Document createOfficeDocument(String name, InputStream is)
+ throws IOException {
+
+ // read zipped XML stream
+ SxwDocument doc = new SxwDocument(name);
+ doc.read(is);
+ return doc;
+ }
+
+ public Document createOfficeDocument(String name, InputStream is,boolean isZip)
+ throws IOException {
+
+ // read XML stream
+ SxwDocument doc = new SxwDocument(name);
+ doc.read(is,isZip);
+ return doc;
+ }
+}
+
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/ConverterCapabilitiesImpl.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/ConverterCapabilitiesImpl.java
new file mode 100644
index 000000000000..852f8f6f4b49
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/ConverterCapabilitiesImpl.java
@@ -0,0 +1,96 @@
+/************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2008 by Sun Microsystems, Inc.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * $RCSfile: ConverterCapabilitiesImpl.java,v $
+ * $Revision: 1.3 $
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+package org.openoffice.xmerge.converter.xml.sxw.aportisdoc;
+
+import org.openoffice.xmerge.ConverterCapabilities;
+import org.openoffice.xmerge.converter.xml.OfficeConstants;
+
+/**
+ * <p>AportisDoc implementation of <code>ConverterCapabilities</code> for
+ * the {@link
+ * org.openoffice.xmerge.converter.xml.sxw.aportisdoc.PluginFactoryImpl
+ * PluginFactoryImpl}.</p>
+ *
+ * <p>Used with StarWriter XML to/from AportisDoc conversions. The
+ * <code>ConverterCapibilies</code> specify which &quot;Office&quot;
+ * <code>Document</code> tags and attributes are supported on the
+ * &quot;Device&quot; <code>Document</code> format.</p>
+ */
+public final class ConverterCapabilitiesImpl
+ implements ConverterCapabilities {
+
+ public boolean canConvertTag(String tag) {
+
+ if (OfficeConstants.TAG_OFFICE_DOCUMENT.equals(tag))
+ return true;
+ else if (OfficeConstants.TAG_OFFICE_DOCUMENT_CONTENT.equals(tag))
+ return true;
+ else if (OfficeConstants.TAG_OFFICE_BODY.equals(tag))
+ return true;
+ else if (OfficeConstants.TAG_PARAGRAPH.equals(tag))
+ return true;
+ else if (OfficeConstants.TAG_HEADING.equals(tag))
+ return true;
+ else if (OfficeConstants.TAG_ORDERED_LIST.equals(tag))
+ return true;
+ else if (OfficeConstants.TAG_UNORDERED_LIST.equals(tag))
+ return true;
+ else if (OfficeConstants.TAG_LIST_ITEM.equals(tag))
+ return true;
+ else if (OfficeConstants.TAG_LIST_HEADER.equals(tag))
+ return true;
+ else if (OfficeConstants.TAG_SPAN.equals(tag))
+ return true;
+ else if (OfficeConstants.TAG_HYPERLINK.equals(tag))
+ return true;
+ else if (OfficeConstants.TAG_LINE_BREAK.equals(tag))
+ return true;
+ else if (OfficeConstants.TAG_SPACE.equals(tag))
+ return true;
+ else if (OfficeConstants.TAG_TAB_STOP.equals(tag))
+ return true;
+
+ return false;
+ }
+
+ public boolean canConvertAttribute(String tag,
+ String attribute) {
+
+ if (OfficeConstants.TAG_SPACE.equals(tag)) {
+
+ if (OfficeConstants.ATTRIBUTE_SPACE_COUNT.equals(attribute))
+ return true;
+ }
+
+ return false;
+ }
+}
+
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocConstants.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocConstants.java
new file mode 100644
index 000000000000..6f097636a9ba
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocConstants.java
@@ -0,0 +1,72 @@
+/************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2008 by Sun Microsystems, Inc.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * $RCSfile: DocConstants.java,v $
+ * $Revision: 1.3 $
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+package org.openoffice.xmerge.converter.xml.sxw.aportisdoc;
+
+import org.openoffice.xmerge.converter.palm.PdbUtil;
+
+/**
+ * Constants used for encoding and decoding the AportisDoc format.
+ *
+ * @author Herbie Ong
+ */
+interface DocConstants {
+
+ /** Creator id. */
+ public static final int CREATOR_ID = PdbUtil.intID("REAd");
+
+ /** Type id. */
+ public static final int TYPE_ID = PdbUtil.intID("TEXt");
+
+ /** Constant for uncompressed version. */
+ public static final short UNCOMPRESSED = 1;
+
+ /** Constant for compressed version. */
+ public static final short COMPRESSED = 2;
+
+ /** Constant used for spare fields. */
+ public static final int SPARE = 0;
+
+ /** AportisDoc record size. */
+ public static final short TEXT_RECORD_SIZE = 4096;
+
+ /** Constant for encoding scheme. */
+ public static final String ENCODING = "8859_1";
+
+ /** Constant for TAB character. */
+ public final static char TAB_CHAR = '\t';
+
+ /** Constant for EOL character. */
+ public final static char EOL_CHAR = '\n';
+
+ /** Constant for SPACE character. */
+ public final static char SPACE_CHAR = ' ';
+}
+
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocDecoder.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocDecoder.java
new file mode 100644
index 000000000000..f8631cccc78e
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocDecoder.java
@@ -0,0 +1,307 @@
+/************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2008 by Sun Microsystems, Inc.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * $RCSfile: DocDecoder.java,v $
+ * $Revision: 1.3 $
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+package org.openoffice.xmerge.converter.xml.sxw.aportisdoc;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.util.List;
+import java.util.ArrayList;
+
+import org.openoffice.xmerge.converter.palm.Record;
+import org.openoffice.xmerge.util.Resources;
+import org.openoffice.xmerge.util.Debug;
+
+/**
+ * This class is used by {@link
+ * org.openoffice.xmerge.converter.xml.sxw.DocumentDeserializerImpl}
+ * to decode the AportisDoc format. It currently decodes
+ * the text content into a single <code>String</code> object.
+ *
+ * @author Herbie Ong
+ */
+final class DocDecoder implements DocConstants {
+
+ /** For decoding purposes. */
+ private final static int COUNT_BITS = 3;
+
+ /** Resources object for I18N. */
+ private Resources res = null;
+
+
+ /**
+ * Default constructor creates a header and a text buffer
+ * for holding all the text in the AportisDoc database.
+ */
+ DocDecoder() {
+ res = Resources.getInstance();
+ }
+
+
+ /**
+ * Decode the text records into a single <code>String</code>
+ * of text content.
+ *
+ * @param Record <code>Record</code> array holding AportisDoc
+ * contents.
+ *
+ * @throws IOException If any I/O error occurs.
+ */
+ String parseRecords(Record[] recs) throws IOException {
+
+ // read the header record
+ HeaderInfo header = readHeader(recs[0].getBytes());
+
+ dumpHeader(header);
+
+ // store all the characters in textBuffer
+ StringBuffer textBuffer = new StringBuffer(header.textLen);
+
+ switch (header.version) {
+
+ case COMPRESSED:
+ for (int i = 1; i <= header.textRecordCount; i++) {
+
+ byte[] bytes = decompress(recs[i].getBytes(),
+ header.textRecordSize);
+ log("processing " + bytes.length + " bytes");
+ String str = new String(bytes, ENCODING);
+ textBuffer.append(str);
+ }
+
+ break;
+
+ case UNCOMPRESSED:
+ for (int i = 1; i <= header.textRecordCount; i++) {
+
+ byte[] bytes = recs[i].getBytes();
+ log("processing " + bytes.length + " bytes");
+ String str = new String(bytes, ENCODING);
+ textBuffer.append(str);
+ }
+
+ break;
+
+ default:
+ throw new IOException(res.getString("UNKNOWN_DOC_VERSION"));
+
+ }
+
+ return textBuffer.toString();
+ }
+
+
+ /**
+ * <p>Decompress the <code>byte</code> array.</p>
+ *
+ * <p>The resulting uncompressed <code>byte</code> array should
+ * be within <code>textRecordSize</code> length, definitely
+ * within twice the size it claims, else treat it as a problem
+ * with the encoding of that PDB and throw
+ * <code>IOException</code>.</p>
+ *
+ * @param bytes Compressed <code>byte</code> array.
+ * @param textRecordSize Size of uncompressed
+ * <code>byte</code> array.
+ *
+ * @throws IOException If <code>textRecordSize</code> &lt;
+ * <code>cBytes.length</code>.
+ */
+ private byte[] decompress(byte[] cBytes, int textRecordSize)
+ throws IOException {
+
+ // create byte array for storing uncompressed bytes
+ // it should be within textRecordSize range, definitely
+ // within twice of textRecordSize! if not, then
+ // an ArrayIndexOutOfBoundsException will get thrown,
+ // and it should be converted into an IOException, and
+ // treat it as a conversion error.
+ byte[] uBytes = new byte[textRecordSize*2];
+
+ int up = 0;
+ int cp = 0;
+
+ try {
+
+ while (cp < cBytes.length) {
+
+ int c = cBytes[cp++] & 0xff;
+
+ // codes 1...8 mean copy that many bytes
+ if (c > 0 && c < 9) {
+
+ while (c-- > 0)
+ uBytes[up++] = cBytes[cp++];
+ }
+
+ // codes 0, 9...0x7F represent themselves
+ else if (c < 0x80) {
+ uBytes[up++] = (byte) c;
+ }
+
+ // codes 0xC0...0xFF represent "space + ascii char"
+ else if (c >= 0xC0) {
+ uBytes[up++] = (byte) ' ';
+ uBytes[up++] = (byte) (c ^ 0x80);
+ }
+
+ // codes 0x80...0xBf represent sequences
+ else {
+ c <<= 8;
+ c += cBytes[cp++] & 0xff;
+ int m = (c & 0x3fff) >> COUNT_BITS;
+ int n = c & ((1 << COUNT_BITS) - 1);
+ n += COUNT_BITS;
+ while (n-- > 0) {
+ uBytes[up] = uBytes[up - m];
+ up++;
+ }
+ }
+ }
+
+ } catch (ArrayIndexOutOfBoundsException e) {
+
+ throw new IOException(
+ res.getString("DOC_TEXT_RECORD_SIZE_EXCEEDED"));
+ }
+
+ // note that ubytes may be larger that the amount of
+ // uncompressed bytes, so trim it to another byte array
+ // with the exact size.
+ byte[] textBytes = new byte[up];
+ System.arraycopy(uBytes, 0, textBytes, 0, up);
+
+ return textBytes;
+ }
+
+
+ /**
+ * Read the header <code>byte</code> array.
+ *
+ * @param bytes <code>byte</code> array containing header
+ * record data.
+ *
+ * @return <code>HeaderInfo</code> object.
+ *
+ * @throws IOException If any I/O error occurs.
+ */
+ private HeaderInfo readHeader(byte[] bytes) throws IOException {
+
+ HeaderInfo header = new HeaderInfo();
+
+ ByteArrayInputStream bis = new ByteArrayInputStream(bytes);
+ DataInputStream dis = new DataInputStream(bis);
+
+ // Normally the first 2 bytes comprised of the version
+ // which should either be COMPRESSED or UNCOMPRESSED
+ // SmartDoc/Quickword would add a 0x01 to the first
+ // byte, thus their version would be 0x0101 for UNCOMPRESSED
+ // instead of 0x0001 and 0x0102 for UNCOMPRESSED instead of
+ // 0x0002.
+
+ dis.readByte();
+ header.version = dis.readByte();
+
+ // read extra 2 unused bytes
+ dis.readShort();
+
+ // Read the text length, this should be unsigned 4 bytes.
+ // We could store the read value into a long, but then
+ // our current buffer limit is the max positive of an int.
+ // That is a large enough limit, thus we shall stay with
+ // storing the value in an int. If it exceeds, then
+ // an IOException should be thrown.
+ header.textLen = dis.readInt();
+ if (header.textLen < 0) {
+ throw new IOException(res.getString("DOC_TEXT_LENGTH_EXCEEDED"));
+ }
+
+ // read the number of records - unsigned 2 bytes
+ header.textRecordCount = ((int) dis.readShort()) & 0x0000ffff;
+
+ // read the record size - unsigned 2 bytes
+ header.textRecordSize = ((int) dis.readShort()) & 0x0000ffff;
+
+ // read extra 4 unused bytes
+ dis.readInt();
+
+ return header;
+ }
+
+
+ /**
+ * Prints out header info into log. Used for debugging purposes only.
+ *
+ * @param header <code>HeaderInfo</code> structure.
+ */
+ private void dumpHeader(HeaderInfo header) {
+
+ log("<DOC_INFO ");
+ log("version=\"" + header.version + "\" ");
+ log("text-length=\"" + header.textLen + "\" ");
+ log("number-of-records=\"" + header.textRecordCount + "\" ");
+ log("record-size=\"" + header.textRecordSize + "\" />");
+ }
+
+
+ /**
+ * Sends message to the log object.
+ *
+ * @param str Debug string message.
+ */
+ private void log(String str) {
+ Debug.log(Debug.TRACE, str);
+ }
+
+
+ /**
+ * Inner class to store AportisDoc header information.
+ */
+ private class HeaderInfo {
+
+ /** length of text section */
+ int textLen = 0;
+
+ /** number of text records */
+ int textRecordCount = 0;
+
+ /**
+ * size of a text record. This is normally the same as
+ * TEXT_RECORD_SIZE, but some applications may modify this.
+ */
+ int textRecordSize = 0;
+
+ /** compression type */
+ int version = 0;
+ }
+}
+
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocEncoder.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocEncoder.java
new file mode 100644
index 000000000000..cd08b9921a17
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocEncoder.java
@@ -0,0 +1,217 @@
+/************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2008 by Sun Microsystems, Inc.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * $RCSfile: DocEncoder.java,v $
+ * $Revision: 1.3 $
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+package org.openoffice.xmerge.converter.xml.sxw.aportisdoc;
+
+import java.io.ByteArrayOutputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.util.List;
+import java.util.ArrayList;
+
+import org.openoffice.xmerge.converter.palm.Record;
+
+/**
+ * This class is used by {@link
+ * org.openoffice.xmerge.converter.xml.sxw.DocumentSerializerImpl
+ * DocumentSerializerImpl} to encode the AportisDoc format.
+ * It does not deal with any XML tags. It only knows how to encode
+ * from <code>String</code>.
+ *
+ * @author Herbie Ong
+ */
+final class DocEncoder implements DocConstants {
+
+ /** Text buffer to contain text section. */
+ private StringBuffer textBuffer = null;
+
+ /** Length of text section. */
+ private int textLen = 0;
+
+ /** Number of text records. */
+ private int textRecCount = 0;
+
+
+ /**
+ * Default constructor creates a header and
+ * a text buffer for holding all the text in
+ * the AportisDoc database.
+ */
+ DocEncoder() {
+
+ textBuffer = new StringBuffer(TEXT_RECORD_SIZE);
+ }
+
+
+ /**
+ * This method appends text into the text section of
+ * the AportisDoc database.
+ *
+ * @param text <code>String</code> to append.
+ */
+ void addText(String text) {
+
+ textBuffer.append(text);
+ }
+
+
+ /**
+ * This method appends text into the text section of
+ * the AportisDoc database.
+ *
+ * @param text <code>char</code> array to append.
+ */
+ void addText(char[] text) {
+
+ textBuffer.append(text);
+ }
+
+
+ /**
+ * This method appends text character into the text
+ * section of the AportisDoc database.
+ *
+ * @param text <code>char</code> to append.
+ */
+ void addText(char text) {
+
+ textBuffer.append(text);
+ }
+
+
+ /**
+ * This method encodes the information given to a
+ * palm <code>Record</code> array in the AportisDoc
+ * database format.
+ *
+ * @return <code>Record</code> array holding AportisDoc
+ * contents.
+ *
+ * @throws IOException If any I/O error occurs.
+ */
+ Record[] getRecords() throws IOException {
+
+ byte textBytes[] = processTextBuffer();
+ textLen = textBytes.length;
+ textRecCount = (short) (textBytes.length / TEXT_RECORD_SIZE);
+
+ // recBytes to hold a record of bytes at a time
+ byte recBytes[] = new byte[TEXT_RECORD_SIZE];
+ int pos = 0;
+
+ List textRecords = new ArrayList(textRecCount + 1);
+
+ // split textBytes into chunks of Record objects
+ // and store in textRecords object.
+ for (int i = 0; i < textRecCount; i++) {
+
+ System.arraycopy(textBytes, pos, recBytes, 0, recBytes.length);
+ pos += recBytes.length;
+ Record zRec = new Record(recBytes);
+ textRecords.add(zRec);
+ }
+
+ // there's more if ...
+
+ if (pos < textLen) {
+
+ textRecCount++;
+
+ recBytes = new byte[textLen - pos];
+ System.arraycopy(textBytes, pos, recBytes, 0, recBytes.length);
+ Record rec = new Record(recBytes);
+ textRecords.add(rec);
+ }
+
+ // construct the Record array and copy
+ // references from textRecords.
+
+ Record[] allRecords = new Record[textRecords.size() + 1];
+
+ allRecords[0] = new Record(getHeaderBytes());
+
+ for (int i = 1; i < allRecords.length; i++) {
+
+ allRecords[i] = (Record) textRecords.get(i-1);
+ }
+
+ return allRecords;
+ }
+
+
+ /**
+ * This method converts the text buffer into a <code>byte</code>
+ * array with the proper encoding of the text section of the
+ * AportisDoc format.
+ *
+ * TODO: do compression.
+ *
+ * @return byte[] Converted <code>byte</code> array of text
+ * section.
+ *
+ * @throws IOException If any I/O error occurs.
+ */
+ private byte[] processTextBuffer() throws IOException
+ {
+ String str = textBuffer.toString();
+ byte bytes[] = str.getBytes(ENCODING);
+
+ return bytes;
+ }
+
+
+ /**
+ * This method produces the <code>byte</code> array for the header.
+ *
+ * @return <code>byte</code> array containing header record data.
+ *
+ * @throws IOException If any I/O error occurs.
+ */
+ private byte[] getHeaderBytes() throws IOException
+ {
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ DataOutputStream dos = new DataOutputStream(bos);
+
+ // TODO: for now, we shall use UNCOMPRESSED.
+ // later, we need to use COMPRESSED or a setting.
+ dos.writeShort(UNCOMPRESSED);
+ dos.writeShort(SPARE);
+ dos.writeInt(textLen);
+ dos.writeShort(textRecCount);
+ dos.writeShort(TEXT_RECORD_SIZE);
+ dos.writeInt(SPARE);
+
+ byte[] bytes = bos.toByteArray();
+
+ return bytes;
+ }
+}
+
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocumentDeserializerImpl.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocumentDeserializerImpl.java
new file mode 100644
index 000000000000..a819f82c14c3
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocumentDeserializerImpl.java
@@ -0,0 +1,316 @@
+/************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2008 by Sun Microsystems, Inc.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * $RCSfile: DocumentDeserializerImpl.java,v $
+ * $Revision: 1.3 $
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+package org.openoffice.xmerge.converter.xml.sxw.aportisdoc;
+
+import org.w3c.dom.NodeList;
+import org.w3c.dom.Node;
+import org.w3c.dom.Element;
+import org.w3c.dom.Text;
+
+import java.io.IOException;
+import java.util.Enumeration;
+
+import org.openoffice.xmerge.Document;
+import org.openoffice.xmerge.ConvertData;
+import org.openoffice.xmerge.ConvertException;
+import org.openoffice.xmerge.DocumentDeserializer;
+import org.openoffice.xmerge.converter.xml.OfficeConstants;
+import org.openoffice.xmerge.converter.xml.sxw.SxwDocument;
+import org.openoffice.xmerge.converter.palm.PalmDB;
+import org.openoffice.xmerge.converter.palm.PdbDecoder;
+import org.openoffice.xmerge.converter.palm.Record;
+import org.openoffice.xmerge.converter.palm.PalmDocument;
+import org.openoffice.xmerge.util.Debug;
+
+/**
+ * <p>AportisDoc implementation of <code>DocumentDeserializer</code>
+ * for the {@link
+ * org.openoffice.xmerge.converter.xml.sxw.aportisdoc.PluginFactoryImpl
+ * PluginFactoryImpl}.</p>
+ *
+ * <p>This converts an file in AportisDoc PDB format to StarOffice
+ * XML format.</p>
+ *
+ * <p>The <code>deserialize</code> method uses a <code>DocDecoder</code>
+ * to read the AportisDoc format into a <code>String</code> object, then
+ * it calls <code>buildDocument</code> to create a <code>SxwDocument</code>
+ * object from it.</p>
+ *
+ * @author Herbie Ong
+ */
+public final class DocumentDeserializerImpl
+ implements OfficeConstants, DocConstants, DocumentDeserializer {
+
+ /** A <code>ConvertData</code> object assigned to this object. */
+ private ConvertData cd = null;
+
+
+ /**
+ * Constructor that assigns the given <code>ConvertData</code>
+ * to this object as input.
+ *
+ * @param cd A <code>ConvertData</code> object to read data for
+ * the conversion process by the <code>deserialize</code>
+ * method.
+ */
+ public DocumentDeserializerImpl(ConvertData cd) {
+ this.cd = cd;
+ }
+
+
+ /**
+ * Convert the given <code>ConvertData</code> object
+ * into a <code>SxwDocument</code> object.
+ *
+ * @return Resulting <code>SxwDocument</code> object.
+ *
+ * @throws ConvertException If any conversion error occurs.
+ * @throws IOException If any I/O error occurs.
+ */
+ public Document deserialize() throws IOException, ConvertException {
+
+ int numberOfPDBs = cd.getNumDocuments();
+ Document doc = null;
+ int i=0;
+ ConvertData cdOut;
+ Enumeration e = cd.getDocumentEnumeration();
+ while (e.hasMoreElements()) {
+ PalmDocument palmDoc = (PalmDocument) e.nextElement();
+ PalmDB pdb = palmDoc.getPdb();
+
+ log("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
+ log("<AportisDoc>");
+
+ Record[] recs = pdb.getRecords();
+ String docName = palmDoc.getName();
+ DocDecoder decoder = new DocDecoder();
+ String text = decoder.parseRecords(recs);
+ doc = buildDocument(docName, text);
+
+ log("</AportisDoc>");
+ }
+
+ return doc;
+ }
+
+
+ /**
+ * Parses the text content of an AportisDoc format and build a
+ * <code>SxwDocument</code>.
+ *
+ * @param docName Name of <code>Document</code>.
+ * @param str Text content of AportisDoc format.
+ *
+ * @return Resulting <code>SxwDocument</code> object.
+ *
+ * @throws IOException If any I/O error occurs.
+ */
+ private SxwDocument buildDocument(String docName, String str)
+ throws IOException {
+
+ // create minimum office xml document.
+ SxwDocument sxwDoc = new SxwDocument(docName);
+ sxwDoc.initContentDOM();
+
+ org.w3c.dom.Document doc = sxwDoc.getContentDOM();
+
+ // Grab hold of the office:body tag,
+ // Assume there should be one.
+ // This is where top level paragraphs will append to.
+ NodeList list = doc.getElementsByTagName(TAG_OFFICE_BODY);
+ Node bodyNode = list.item(0);
+
+ // Store all the text in a character array.
+ char[] text = str.toCharArray();
+
+ // startIndex has 2 purposes:
+ // if value is -1, it means that there are no text characters
+ // needed to be processed for a Text node. if value >= 0, it
+ // is the index of the starting position of a text section
+ // for a Text node.
+ int startIndex = -1;
+
+ // Create a paragraph node to start with.
+ Element paraNode = doc.createElement(TAG_PARAGRAPH);
+
+ log("<PARA>");
+
+ for (int i = 0; i < text.length; i++) {
+
+ switch (text[i]) {
+
+ case TAB_CHAR:
+
+ // Check if there are text to be processed first.
+ if (startIndex >= 0) {
+ addTextNode(doc, paraNode, text, startIndex, i - 1);
+ startIndex = -1;
+ }
+
+ // Then, add tab element.
+ Element tabNode = doc.createElement(TAG_TAB_STOP);
+ paraNode.appendChild(tabNode);
+
+ log("<TAB/>");
+ break;
+
+ case EOL_CHAR:
+
+ // Check if there are text to be processed first.
+ if (startIndex >= 0) {
+ addTextNode(doc, paraNode, text, startIndex, i - 1);
+ startIndex = -1;
+ }
+
+ // Then, add the current paragraph to body.
+ bodyNode.appendChild(paraNode);
+
+ // Create another paragraph element.
+ paraNode = doc.createElement(TAG_PARAGRAPH);
+
+ log("</PARA>");
+ log("<PARA>");
+ break;
+
+ case SPACE_CHAR:
+
+ // count is the number of space chars from i
+ int count = 0;
+
+ // Do a look ahead and count the number of space chars
+ while (text[i + 1 + count] == SPACE_CHAR) {
+ count++;
+ }
+
+ // Need to build a space node ONLY if count is > 1.
+
+ if (count > 0) {
+
+ // Check if there are text to be processed first
+ if (startIndex >= 0) {
+ addTextNode(doc, paraNode, text,
+ startIndex, i);
+ startIndex = -1;
+ }
+
+ // Then, create a space element
+ // with the proper attribute.
+ Element spaceNode = doc.createElement(TAG_SPACE);
+ spaceNode.setAttribute(ATTRIBUTE_SPACE_COUNT,
+ Integer.toString(count));
+
+ paraNode.appendChild(spaceNode);
+
+ // reposition i to the last space character.
+ i += count;
+
+ log("<SPACE count=\"" + count + "\" />");
+
+ } else {
+
+ // If there are no chars for text node yet,
+ // consider this one.
+ if (startIndex < 0) {
+
+ startIndex = i;
+ log("<TEXT>");
+ }
+ }
+
+ break;
+
+ default:
+
+ // If there are no chars for text node yet,
+ // this should be the start.
+ if (startIndex < 0) {
+
+ startIndex = i;
+ log("<TEXT>");
+ }
+
+ break;
+ }
+ }
+
+ int lastIndex = text.length - 1;
+
+ // Check if there are text to be processed first.
+
+ if (startIndex >= 0) {
+ addTextNode(doc, paraNode, text, startIndex, lastIndex);
+ }
+
+ // Then, add the last paragraph element if it is not added yet.
+ if (text[lastIndex] != EOL_CHAR) {
+ bodyNode.appendChild(paraNode);
+ }
+
+ log("</PARA>");
+
+ return sxwDoc;
+ }
+
+
+ /**
+ * Add a Text <code>Node</code> to the given paragraph node with the
+ * text starting at the given <code>startPos</code> until
+ * <code>endPos</code>.
+ *
+ * @param doc <code>org.w3c.dom.Document</code> object for creating
+ * <code>Node</code> objects.
+ * @param para The current paragraph <code>Node</code> to append
+ * text <code>Node</code>.
+ * @param text Array of characters containing text.
+ * @param startPos Starting index position for text value.
+ * @param endPos End index position for text value.
+ */
+ private void addTextNode(org.w3c.dom.Document doc, Node para, char text[],
+ int startPos, int endPos) {
+
+ String str = new String(text, startPos, endPos - startPos + 1);
+ Text textNode = doc.createTextNode(str);
+ para.appendChild(textNode);
+ log(str);
+ log("</TEXT>");
+ }
+
+ /**
+ * Sends message to the log object.
+ *
+ * @param str Debug message.
+ */
+ private void log(String str) {
+
+ Debug.log(Debug.TRACE, str);
+ }
+}
+
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocumentMergerImpl.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocumentMergerImpl.java
new file mode 100644
index 000000000000..50fcfee02178
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocumentMergerImpl.java
@@ -0,0 +1,102 @@
+/************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2008 by Sun Microsystems, Inc.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * $RCSfile: DocumentMergerImpl.java,v $
+ * $Revision: 1.3 $
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+package org.openoffice.xmerge.converter.xml.sxw.aportisdoc;
+
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+
+import org.openoffice.xmerge.DocumentMerger;
+import org.openoffice.xmerge.MergeException;
+import org.openoffice.xmerge.ConverterCapabilities;
+import org.openoffice.xmerge.converter.xml.sxw.SxwDocument;
+import org.openoffice.xmerge.merger.DiffAlgorithm;
+import org.openoffice.xmerge.merger.Difference;
+import org.openoffice.xmerge.merger.NodeMergeAlgorithm;
+import org.openoffice.xmerge.merger.Iterator;
+import org.openoffice.xmerge.merger.DiffAlgorithm;
+import org.openoffice.xmerge.merger.diff.ParaNodeIterator;
+import org.openoffice.xmerge.merger.diff.IteratorLCSAlgorithm;
+import org.openoffice.xmerge.merger.merge.DocumentMerge;
+import org.openoffice.xmerge.merger.merge.CharacterBaseParagraphMerge;
+import org.openoffice.xmerge.util.Debug;
+
+
+/**
+ * AportisDoc implementation of <code>DocumentMerger</code>
+ * for the {@link
+ * org.openoffice.xmerge.converter.xml.sxw.aportisdoc.PluginFactoryImpl
+ * PluginFactoryImpl}.</p>
+ */
+public class DocumentMergerImpl implements DocumentMerger {
+
+ private ConverterCapabilities cc_;
+ private org.openoffice.xmerge.Document orig = null;
+
+ public DocumentMergerImpl(org.openoffice.xmerge.Document doc, ConverterCapabilities cc) {
+ cc_ = cc;
+ this.orig = doc;
+ }
+
+ public void merge(org.openoffice.xmerge.Document modifiedDoc) throws MergeException {
+
+ SxwDocument wdoc1 = (SxwDocument) orig;
+ SxwDocument wdoc2 = (SxwDocument) modifiedDoc;
+
+ Document doc1 = wdoc1.getContentDOM();
+ Document doc2 = wdoc2.getContentDOM();
+
+ Iterator i1 = new ParaNodeIterator(cc_, doc1.getDocumentElement());
+ Iterator i2 = new ParaNodeIterator(cc_, doc2.getDocumentElement());
+
+ DiffAlgorithm diffAlgo = new IteratorLCSAlgorithm();
+
+ // find out the paragrah level diffs
+ Difference[] diffTable = diffAlgo.computeDiffs(i1, i2);
+
+ if (Debug.isFlagSet(Debug.INFO)) {
+ Debug.log(Debug.INFO, "Diff Result: ");
+
+ for (int i = 0; i < diffTable.length; i++) {
+ Debug.log(Debug.INFO, diffTable[i].debug());
+ }
+ }
+
+ // merge the paragraphs
+ NodeMergeAlgorithm charMerge = new CharacterBaseParagraphMerge();
+ DocumentMerge docMerge = new DocumentMerge(cc_, charMerge);
+
+ Iterator result = null;
+
+ docMerge.applyDifference(i1, i2, diffTable);
+ }
+}
+
+
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocumentSerializerImpl.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocumentSerializerImpl.java
new file mode 100644
index 000000000000..75edfbd78d7f
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocumentSerializerImpl.java
@@ -0,0 +1,535 @@
+/************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2008 by Sun Microsystems, Inc.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * $RCSfile: DocumentSerializerImpl.java,v $
+ * $Revision: 1.5 $
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+package org.openoffice.xmerge.converter.xml.sxw.aportisdoc;
+
+import org.w3c.dom.NodeList;
+import org.w3c.dom.Node;
+import org.w3c.dom.NamedNodeMap;
+import org.w3c.dom.Element;
+
+import java.io.IOException;
+
+import org.openoffice.xmerge.Document;
+import org.openoffice.xmerge.ConvertData;
+import org.openoffice.xmerge.ConvertException;
+import org.openoffice.xmerge.DocumentSerializer;
+import org.openoffice.xmerge.converter.xml.OfficeConstants;
+import org.openoffice.xmerge.converter.xml.sxw.SxwDocument;
+import org.openoffice.xmerge.converter.palm.PdbEncoder;
+import org.openoffice.xmerge.converter.palm.PdbDecoder;
+import org.openoffice.xmerge.converter.palm.PalmDB;
+import org.openoffice.xmerge.converter.palm.Record;
+import org.openoffice.xmerge.converter.palm.PalmDocument;
+import org.openoffice.xmerge.util.Debug;
+import org.openoffice.xmerge.util.XmlUtil;
+
+/**
+ * <p>AportisDoc implementation of
+ * org.openoffice.xmerge.DocumentSerializer
+ * for the {@link
+ * org.openoffice.xmerge.converter.xml.sxw.aportisdoc.PluginFactoryImpl
+ * PluginFactoryImpl}.</p>
+ *
+ * <p>The <code>serialize</code> method traverses the DOM
+ * document from the given <code>Document</code> object. It uses a
+ * <code>DocEncoder</code> object for the actual conversion of
+ * contents to the AportisDoc format.</p>
+ *
+ * @author Herbie Ong
+ */
+
+
+public final class DocumentSerializerImpl
+ implements OfficeConstants, DocConstants, DocumentSerializer {
+
+ /** A <code>DocEncoder</code> object for encoding to AportisDoc. */
+ private DocEncoder encoder = null;
+
+ /** SXW <code>Document</code> object that this converter processes. */
+ private SxwDocument sxwDoc = null;
+
+
+ /**
+ * Constructor.
+ *
+ * @param doc A SXW <code>Document</code> to be converted.
+ */
+ public DocumentSerializerImpl(Document doc) {
+ sxwDoc = (SxwDocument) doc;
+ }
+
+
+ /**
+ * <p>Method to convert a <code>Document</code> into a PDB.
+ * It passes back the converted data as a <code>ConvertData</code>
+ * object.</p>
+ *
+ * <p>This method is not thread safe for performance reasons.
+ * This method should not be called from within two threads.
+ * It would be best to call this method only once per object
+ * instance.</p>
+ *
+ * @return The <code>ConvertData</code> object containing the output.
+ *
+ * @throws ConvertException If any conversion error occurs.
+ * @throws IOException If any I/O error occurs.
+ */
+ public ConvertData serialize() throws ConvertException, IOException {
+
+
+ // get the server document name
+
+ String docName = sxwDoc.getName();
+
+ // get DOM document
+
+ org.w3c.dom.Document domDoc = sxwDoc.getContentDOM();
+
+ encoder = new DocEncoder();
+
+ // Traverse to the office:body element.
+ // There should only be one.
+
+ NodeList list = domDoc.getElementsByTagName(TAG_OFFICE_BODY);
+ int len = list.getLength();
+
+ if (len > 0) {
+ Node node = list.item(0);
+ traverseBody(node);
+ }
+
+ // create a ConvertData object.
+ //
+ Record records[] = encoder.getRecords();
+ ConvertData cd = new ConvertData();
+
+ PalmDocument palmDoc = new PalmDocument(docName,
+ DocConstants.CREATOR_ID, DocConstants.TYPE_ID,
+ 0, PalmDB.PDB_HEADER_ATTR_BACKUP, records);
+
+ cd.addDocument(palmDoc);
+ return cd;
+ }
+
+
+ /**
+ * This method traverses <i>office:body</i> element.
+ *
+ * @param node <i>office:body</i> <code>Node</code>.
+ *
+ * @throws IOException If any I/O error occurs.
+ */
+ private void traverseBody(Node node) throws IOException {
+
+ log("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
+ log("<AportisDOC>");
+
+ if (node.hasChildNodes()) {
+
+ NodeList nodeList = node.getChildNodes();
+ int len = nodeList.getLength();
+
+ for (int i = 0; i < len; i++) {
+ Node child = nodeList.item(i);
+
+ if (child.getNodeType() == Node.ELEMENT_NODE) {
+ String nodeName = child.getNodeName();
+
+ if (nodeName.equals(TAG_PARAGRAPH) ||
+ nodeName.equals(TAG_HEADING)) {
+
+ traverseParagraph(child);
+
+ } else if (nodeName.equals(TAG_UNORDERED_LIST)) {
+
+ traverseList(child);
+
+ } else if (nodeName.equals(TAG_ORDERED_LIST)) {
+
+ traverseList(child);
+
+ } else {
+
+ log("<OTHERS " + XmlUtil.getNodeInfo(child) + " />");
+ }
+ }
+ }
+ }
+
+ log("</AportisDOC>");
+ }
+
+
+ /**
+ * This method traverses the <i>text:p</i> and <i>text:h</i>
+ * element <code>Node</code> objects.
+ *
+ * @param node A <i>text:p</i> or <i>text:h</i>
+ * <code>Node</code>.
+ *
+ * @throws IOException If any I/O error occurs.
+ */
+ private void traverseParagraph(Node node) throws IOException {
+
+ log("<PARA>");
+ traverseParaContents(node);
+ encoder.addText(EOL_CHAR);
+ log("</PARA>");
+ }
+
+
+ /**
+ * This method traverses a paragraph content.
+ * It uses the <code>traverseParaElem</code> method to
+ * traverse into Element <code>Node</code> objects.
+ *
+ * @param node A paragraph or content <code>Node</code>.
+ *
+ * @throws IOException If any I/O error occurs.
+ */
+ private void traverseParaContents(Node node) throws IOException {
+
+ if (node.hasChildNodes()) {
+
+ NodeList nodeList = node.getChildNodes();
+ int len = nodeList.getLength();
+
+ for (int i = 0; i < len; i++) {
+
+ Node child = nodeList.item(i);
+ short nodeType = child.getNodeType();
+
+ switch (nodeType) {
+
+ case Node.TEXT_NODE:
+ // this is for grabbing text nodes.
+ String s = child.getNodeValue();
+
+ if (s.length() > 0) {
+ encoder.addText(s);
+ }
+
+ log("<TEXT>");
+ log(s);
+ log("</TEXT>");
+
+ break;
+
+ case Node.ELEMENT_NODE:
+
+ traverseParaElem(child);
+ break;
+
+ case Node.ENTITY_REFERENCE_NODE:
+
+ log("<ENTITY_REFERENCE>");
+ traverseParaContents(child);
+ log("<ENTITY_REFERENCE/>");
+ break;
+
+ default:
+ log("<OTHERS " + XmlUtil.getNodeInfo(node) + " />");
+ }
+ }
+ }
+ }
+
+
+ /**
+ * This method traverses an <code>Element</code> <code>Node</code>
+ * within a paragraph.
+ *
+ * @param node <code>Element</code> <code>Node</code> within a
+ * paragraph.
+ *
+ * @throws IOException If any I/O error occurs.
+ */
+ private void traverseParaElem(Node node) throws IOException {
+
+ String nodeName = node.getNodeName();
+
+ if (nodeName.equals(TAG_SPACE)) {
+
+ // this is for text:s tags.
+ NamedNodeMap map = node.getAttributes();
+ Node attr = map.getNamedItem(ATTRIBUTE_SPACE_COUNT);
+ StringBuffer space = new StringBuffer(SPACE_CHAR);
+ int count = 1;
+
+ if (attr != null) {
+
+ try {
+
+ String countStr = attr.getNodeValue();
+ count = Integer.parseInt(countStr.trim());
+
+ } catch (NumberFormatException e) {
+
+ // TODO: for now, throw IOException.
+ // later, perhaps will have to throw
+ // some other conversion exception instead.
+ throw new IOException(e.getMessage());
+ }
+ }
+
+ for (int j = 0; j < count; j++) {
+
+ space.append(SPACE_CHAR);
+ }
+
+ encoder.addText(space.toString());
+
+ log("<SPACE count=\"" + count + "\" />");
+
+ } else if (nodeName.equals(TAG_TAB_STOP)) {
+
+ // this is for text:tab-stop
+ encoder.addText(TAB_CHAR);
+
+ log("<TAB/>");
+
+ } else if (nodeName.equals(TAG_LINE_BREAK)) {
+
+ // commented out by Csaba: There is no point to convert a linebreak
+ // into a EOL, because it messes up the number of XML nodes and the
+ // merge won't work properly. Other solution would be to implement such
+ // nodemerger, which would be able to merge embedded tags in a paragraph
+
+ // this is for text:line-break
+ // encoder.addText(EOL_CHAR);
+
+ log("skipped <LINE-BREAK/>");
+
+ } else if (nodeName.equals(TAG_SPAN)) {
+
+ // this is for text:span
+ log("<SPAN>");
+ traverseParaContents(node);
+ log("</SPAN>");
+
+ } else if (nodeName.equals(TAG_HYPERLINK)) {
+
+ // this is for text:a
+ log("<HYPERLINK>");
+ traverseParaContents(node);
+ log("<HYPERLINK/>");
+
+ } else if (nodeName.equals(TAG_BOOKMARK) ||
+ nodeName.equals(TAG_BOOKMARK_START)) {
+
+ log("<BOOKMARK/>");
+
+ } else if (nodeName.equals(TAG_TEXT_VARIABLE_SET)
+ || nodeName.equals(TAG_TEXT_VARIABLE_GET)
+ || nodeName.equals(TAG_TEXT_EXPRESSION)
+ || nodeName.equals(TAG_TEXT_USER_FIELD_GET)
+ || nodeName.equals(TAG_TEXT_PAGE_VARIABLE_GET)
+ || nodeName.equals(TAG_TEXT_SEQUENCE)
+ || nodeName.equals( TAG_TEXT_VARIABLE_INPUT)
+ || nodeName.equals(TAG_TEXT_TIME)
+ || nodeName.equals( TAG_TEXT_PAGE_COUNT)
+ || nodeName.equals(TAG_TEXT_PAGE_NUMBER )
+ || nodeName.equals(TAG_TEXT_SUBJECT)
+ || nodeName.equals(TAG_TEXT_TITLE)
+ || nodeName.equals(TAG_TEXT_CREATION_TIME)
+ || nodeName.equals(TAG_TEXT_DATE)
+ || nodeName.equals(TAG_TEXT_TEXT_INPUT)
+ || nodeName.equals(TAG_TEXT_AUTHOR_INITIALS)) {
+ log("<FIELD>");
+ traverseParaContents(node);
+ log("</FIELD>");
+
+ }else if (nodeName.startsWith(TAG_TEXT)) {
+ log("<Unknown text Field>");
+ traverseParaContents(node);
+ log("</Unknown text Field>");
+
+ }else {
+
+ log("<OTHERS " + XmlUtil.getNodeInfo(node) + " />");
+ }
+ }
+
+
+ /**
+ * This method traverses list tags <i>text:unordered-list</i> and
+ * <i>text:ordered-list</i>. A list can only contain one optional
+ * <i>text:list-header</i> and one or more <i>text:list-item</i>
+ * elements.
+ *
+ * @param node A list <code>Node</code>.
+ *
+ * @throws IOException If any I/O error occurs.
+ */
+ private void traverseList(Node node) throws IOException {
+
+ log("<LIST>");
+
+ if (node.hasChildNodes()) {
+
+ NodeList nodeList = node.getChildNodes();
+ int len = nodeList.getLength();
+
+ for (int i = 0; i < len; i++) {
+
+ Node child = nodeList.item(i);
+
+ if (child.getNodeType() == Node.ELEMENT_NODE) {
+
+ String nodeName = child.getNodeName();
+
+ if (nodeName.equals(TAG_LIST_ITEM)) {
+
+ traverseListItem(child);
+
+ } else if (nodeName.equals(TAG_LIST_HEADER)) {
+
+ traverseListHeader(child);
+
+ } else {
+
+ log("<INVALID-XML-BUG " + XmlUtil.getNodeInfo(child) + " />");
+ }
+ }
+ }
+ }
+
+ log("</LIST>");
+ }
+
+
+ /**
+ * This method traverses a <i>text:list-header</i> element.
+ * It contains one or more <i>text:p</i> elements.
+ *
+ * @param node A list header <code>Node</code>.
+ *
+ * @throws IOException If any I/O error occurs.
+ */
+ private void traverseListHeader(Node node) throws IOException {
+
+ log("<LIST-HEADER>");
+
+ if (node.hasChildNodes()) {
+
+ NodeList nodeList = node.getChildNodes();
+ int len = nodeList.getLength();
+
+ for (int i = 0; i < len; i++) {
+
+ Node child = nodeList.item(i);
+
+ if (child.getNodeType() == Node.ELEMENT_NODE) {
+
+ String nodeName = child.getNodeName();
+
+ if (nodeName.equals(TAG_PARAGRAPH)) {
+
+ traverseParagraph(child);
+
+ } else {
+
+ log("<INVALID-XML-BUG " + XmlUtil.getNodeInfo(child) + " />");
+ }
+ }
+ }
+ }
+
+ log("</LIST-HEADER>");
+ }
+
+
+ /**
+ * <p>This method will traverse a <i>text:list-item</i>.
+ * A list item may contain one or more of <i>text:p</i>,
+ * <i>text:h</i>, <i>text:section</i>, <i>text:ordered-list</i>
+ * and <i>text:unordered-list</i>.</p>
+ *
+ * <p>This method currently only implements grabbing <i>text:p</i>,
+ * <i>text:h</i>, <i>text:unordered-list</i> and
+ * <i>text:ordered-list</i>.</p>
+ *
+ * @param node The <code>Node</code>.
+ *
+ * @throws IOException If any I/O error occurs.
+ */
+ private void traverseListItem(Node node) throws IOException {
+
+ log("<LIST-ITEM>");
+
+ if (node.hasChildNodes()) {
+
+ NodeList nodeList = node.getChildNodes();
+ int len = nodeList.getLength();
+
+ for (int i = 0; i < len; i++) {
+
+ Node child = nodeList.item(i);
+
+ if (child.getNodeType() == Node.ELEMENT_NODE) {
+
+ String nodeName = child.getNodeName();
+
+ if (nodeName.equals(TAG_PARAGRAPH)) {
+
+ traverseParagraph(child);
+
+ } else if (nodeName.equals(TAG_UNORDERED_LIST)) {
+
+ traverseList(child);
+
+ } else if (nodeName.equals(TAG_ORDERED_LIST)) {
+
+ traverseList(child);
+
+ } else {
+
+ log("<INVALID-XML-BUG " + XmlUtil.getNodeInfo(child) + " />");
+ }
+ }
+ }
+ }
+
+ log("</LIST-ITEM>");
+ }
+
+
+ /**
+ * Logs debug messages.
+ *
+ * @param str The debug message.
+ */
+ private void log(String str) {
+
+ Debug.log(Debug.TRACE, str);
+ }
+}
+
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/PluginFactoryImpl.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/PluginFactoryImpl.java
new file mode 100644
index 000000000000..3f6f9e26cd50
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/PluginFactoryImpl.java
@@ -0,0 +1,144 @@
+/************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2008 by Sun Microsystems, Inc.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * $RCSfile: PluginFactoryImpl.java,v $
+ * $Revision: 1.3 $
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+package org.openoffice.xmerge.converter.xml.sxw.aportisdoc;
+
+import org.openoffice.xmerge.Document;
+import org.openoffice.xmerge.ConvertData;
+import org.openoffice.xmerge.DocumentMerger;
+import org.openoffice.xmerge.DocumentMergerFactory;
+import org.openoffice.xmerge.DocumentSerializer;
+import org.openoffice.xmerge.DocumentSerializerFactory;
+import org.openoffice.xmerge.DocumentDeserializer;
+import org.openoffice.xmerge.DocumentDeserializerFactory;
+import org.openoffice.xmerge.ConverterCapabilities;
+import org.openoffice.xmerge.converter.xml.sxw.SxwPluginFactory;
+import org.openoffice.xmerge.converter.palm.PalmDocument;
+import org.openoffice.xmerge.util.registry.ConverterInfo;
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * <p>AportisDoc implementation of the <code>PluginFactory</code>.
+ * This encapsulates conversion of StarWriter XML format to and from
+ * AportisDoc format.</p>
+ *
+ * <p>The superclass produces a particular
+ * {@link org.openoffice.xmerge.Document Document}
+ * object, i.e. {@link
+ * org.openoffice.xmerge.converter.xml.sxw.SxwDocument
+ * SxwDocument} that the converters in this class works with. Thus,
+ * this class only implements the methods that produces the converters,
+ * i.e. {@link
+ * org.openoffice.xmerge.DocumentSerializer
+ * DocumentSerializer} and {@link
+ * org.openoffice.xmerge.DocumentDeserializer
+ * DocumentDeserializer};
+ * as well as the {@link
+ * org.openoffice.xmerge.ConverterCapabilities
+ * ConverterCapabilities} object that is specific to this format
+ * conversion. That superclass also produces a {@link
+ * org.openoffice.xmerge.DocumentMerger DocumentMerger}
+ * object, i.e. {@link
+ * org.openoffice.xmerge.converter.xml.sxw.aportisdoc.DocumentMergerImpl
+ * DocumentMergerImpl} which this class derives the functionality.</p>
+ *
+ * @author Herbie Ong
+ */
+public final class PluginFactoryImpl extends SxwPluginFactory
+ implements DocumentDeserializerFactory, DocumentSerializerFactory,
+ DocumentMergerFactory {
+
+ public PluginFactoryImpl (ConverterInfo ci) {
+ super(ci);
+ }
+
+ /** ConverterCapabilities object for this type of conversion. */
+ private final static ConverterCapabilities converterCap =
+ new ConverterCapabilitiesImpl();
+
+
+ /**
+ * Returns an instance of <code>DocumentSerializerImpl</code>,
+ * which is an implementation of the <code>DocumentSerializer</code>
+ * interface.
+ *
+ * @param doc <code>Document</code> object to be
+ * converted/serialized.
+ *
+ * @return A <code>DocumentSerializerImpl</code> object.
+ */
+ public DocumentSerializer createDocumentSerializer(Document doc) {
+
+ return new DocumentSerializerImpl(doc);
+ }
+
+
+ /**
+ * Returns an instance of <code>DocumentDeserializerImpl</code>,
+ * which is an implementation of the <code>DocumentDeserializer</code>
+ * interface.
+ *
+ * @param cd <code>ConvertData</code> object for reading data
+ * which will be converted back to a
+ * <code>Document</code> object.
+ *
+ * @return A DocumentDeserializerImpl object.
+ */
+ public DocumentDeserializer createDocumentDeserializer(ConvertData cd) {
+
+ return new DocumentDeserializerImpl(cd);
+ }
+
+
+ /**
+ * Returns an instance of <code>DocumentMergerImpl</code>,
+ * which is an implementation of the <code>DocumentMerger</code>
+ * interface.
+ *
+ * @param doc <code>Document</code> to merge.
+ *
+ * @return A DocumentMergerImpl object.
+ */
+ public DocumentMerger createDocumentMerger(Document doc) {
+
+ ConverterCapabilities cc = converterCap;
+ DocumentMergerImpl merger = new DocumentMergerImpl(doc, cc);
+ return merger;
+ }
+
+ public Document createDeviceDocument(String name, InputStream is)
+ throws IOException {
+
+ PalmDocument palmDoc = new PalmDocument(is);
+ return palmDoc;
+ }
+}
+
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/build.xml b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/build.xml
new file mode 100644
index 000000000000..5b34dc29ad9d
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/build.xml
@@ -0,0 +1,138 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+
+ DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+
+ Copyright 2008 by Sun Microsystems, Inc.
+
+ OpenOffice.org - a multi-platform office productivity suite
+
+ $RCSfile: build.xml,v $
+
+ $Revision: 1.4 $
+
+ This file is part of OpenOffice.org.
+
+ OpenOffice.org is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License version 3
+ only, as published by the Free Software Foundation.
+
+ OpenOffice.org is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Lesser General Public License version 3 for more details
+ (a copy is included in the LICENSE file that accompanied this code).
+
+ You should have received a copy of the GNU Lesser General Public License
+ version 3 along with OpenOffice.org. If not, see
+ <http://www.openoffice.org/license.html>
+ for a copy of the LGPLv3 License.
+
+-->
+<project name="xmrg_jooxcxs_aportisdoc" default="main" basedir=".">
+
+ <!-- ================================================================= -->
+ <!-- settings -->
+ <!-- ================================================================= -->
+
+ <!-- project prefix, used for targets and build.lst -->
+ <property name="prj.prefix" value="xmrg"/>
+
+ <!-- name of this sub target used in recursive builds -->
+ <property name="target" value="xmrg_jooxcxs_aportisdoc"/>
+
+ <!-- relative path to project directory -->
+ <property name="prj" value="../../../../../../../.."/>
+
+ <!-- start of java source code package structure -->
+ <property name="java.dir" value="${prj}/java"/>
+
+ <!-- path component for current java package -->
+ <property name="package"
+ value="org/openoffice/xmerge/converter/xml/sxw/aportisdoc"/>
+
+ <!-- define how to handle CLASSPATH environment -->
+ <property name="build.sysclasspath" value="ignore"/>
+
+ <!-- classpath settings for javac tasks -->
+ <path id="classpath">
+ <pathelement location="${build.class}"/>
+ <pathelement location="${solar.jar}/parser.jar"/>
+ <pathelement location="${solar.jar}/jaxp.jar"/>
+ <pathelement location="${solar.jar}/xerces.jar"/>
+ </path>
+
+ <!-- set wether we want to compile with or without deprecation -->
+ <property name="deprecation" value="on"/>
+
+ <!-- ================================================================= -->
+ <!-- solar build environment targets -->
+ <!-- ================================================================= -->
+
+ <target name="build_dir" unless="build.dir">
+ <property name="build.dir" value="${out}"/>
+ </target>
+
+ <target name="solar" depends="build_dir" if="solar.update">
+ <property name="solar.properties"
+ value="${solar.bin}/solar.properties"/>
+ </target>
+
+ <target name="init" depends="solar">
+ <property name="build.compiler" value="classic"/>
+ <property file="${solar.properties}"/>
+ <property file="${build.dir}/class/solar.properties"/>
+ </target>
+
+ <target name="info">
+ <echo message="--------------------"/>
+ <echo message="${target}"/>
+ <echo message="--------------------"/>
+ </target>
+
+
+ <!-- ================================================================= -->
+ <!-- custom targets -->
+ <!-- ================================================================= -->
+
+ <!-- the main target, called in recursive builds -->
+ <target name="main" depends="info,prepare,compile"/>
+
+ <!-- prepare output directories -->
+ <target name="prepare" depends="init" if="build.class">
+ <mkdir dir="${build.dir}"/>
+ <mkdir dir="${build.class}"/>
+ </target>
+
+ <!-- compile java sources in ${package} -->
+ <target name="compile" depends="prepare" if="build.class">
+ <javac srcdir="${java.dir}"
+ destdir="${build.class}"
+ debug="${debug}"
+ deprecation="${deprecation}"
+ optimize="${optimize}">
+ <classpath refid="classpath"/>
+ <include name="${package}/DocConstants.java"/>
+ <include name="${package}/DocDecoder.java"/>
+ <include name="${package}/DocEncoder.java"/>
+ <include name="${package}/DocumentDeserializerImpl.java"/>
+ <include name="${package}/DocumentSerializerImpl.java"/>
+ <include name="${package}/DocumentMergerImpl.java"/>
+ <include name="${package}/ConverterCapabilitiesImpl.java"/>
+ <include name="${package}/PluginFactoryImpl.java"/>
+ </javac>
+ </target>
+
+ <!-- clean up -->
+ <target name="clean" depends="prepare">
+ <delete includeEmptyDirs="true">
+ <fileset dir="${build.class}">
+ <patternset>
+ <include name="${package}/*.class"/>
+ </patternset>
+ </fileset>
+ </delete>
+ </target>
+
+</project>
+
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/converter.xml b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/converter.xml
new file mode 100644
index 000000000000..04ab76384dd0
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/converter.xml
@@ -0,0 +1,47 @@
+<?xml version="1.0"?>
+<!--
+
+ DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+
+ Copyright 2008 by Sun Microsystems, Inc.
+
+ OpenOffice.org - a multi-platform office productivity suite
+
+ $RCSfile: converter.xml,v $
+
+ $Revision: 1.4 $
+
+ This file is part of OpenOffice.org.
+
+ OpenOffice.org is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License version 3
+ only, as published by the Free Software Foundation.
+
+ OpenOffice.org is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Lesser General Public License version 3 for more details
+ (a copy is included in the LICENSE file that accompanied this code).
+
+ You should have received a copy of the GNU Lesser General Public License
+ version 3 along with OpenOffice.org. If not, see
+ <http://www.openoffice.org/license.html>
+ for a copy of the LGPLv3 License.
+
+-->
+<converters>
+ <converter type="staroffice/sxw" version="1.0">
+ <converter-display-name>
+ AportisDoc
+ </converter-display-name>
+ <converter-description>
+ StarWriter XML to/from AportisDoc conversion
+ </converter-description>
+ <converter-vendor>OpenOffice.org</converter-vendor>
+ <converter-class-impl>
+ org.openoffice.xmerge.converter.xml.sxw.aportisdoc.PluginFactoryImpl
+ </converter-class-impl>
+ <converter-target type="application/x-aportisdoc" />
+ </converter>
+</converters>
+
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/makefile.mk b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/makefile.mk
new file mode 100644
index 000000000000..efbf2869009b
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/makefile.mk
@@ -0,0 +1,36 @@
+#***************************************************************************
+#
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# Copyright 2008 by Sun Microsystems, Inc.
+#
+# OpenOffice.org - a multi-platform office productivity suite
+#
+# $RCSfile: makefile.mk,v $
+#
+# $Revision: 1.3 $
+#
+# This file is part of OpenOffice.org.
+#
+# OpenOffice.org is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License version 3
+# only, as published by the Free Software Foundation.
+#
+# OpenOffice.org is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Lesser General Public License version 3 for more details
+# (a copy is included in the LICENSE file that accompanied this code).
+#
+# You should have received a copy of the GNU Lesser General Public License
+# version 3 along with OpenOffice.org. If not, see
+# <http://www.openoffice.org/license.html>
+# for a copy of the LGPLv3 License.
+#
+#***************************************************************************
+
+TARGET=xmrg_jooxcxs_aportisdoc
+PRJ=../../../../../../../..
+
+.INCLUDE : ant.mk
+ALLTAR: ANTBUILD
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/package.html b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/package.html
new file mode 100644
index 000000000000..9d5d1bdc00cb
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/package.html
@@ -0,0 +1,241 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
+<!--
+
+ DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+
+ Copyright 2008 by Sun Microsystems, Inc.
+
+ OpenOffice.org - a multi-platform office productivity suite
+
+ $RCSfile: package.html,v $
+
+ $Revision: 1.3 $
+
+ This file is part of OpenOffice.org.
+
+ OpenOffice.org is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License version 3
+ only, as published by the Free Software Foundation.
+
+ OpenOffice.org is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Lesser General Public License version 3 for more details
+ (a copy is included in the LICENSE file that accompanied this code).
+
+ You should have received a copy of the GNU Lesser General Public License
+ version 3 along with OpenOffice.org. If not, see
+ <http://www.openoffice.org/license.html>
+ for a copy of the LGPLv3 License.
+
+-->
+<html>
+<head>
+<title>org.openoffice.xmerge.converter.xml.sxw.aportisdoc package</title>
+</head>
+
+<body bgcolor="white">
+
+<p>Provides the tools for doing the conversion of StarWriter XML to
+and from AportisDoc format.</p>
+
+<p>It follows the {@link org.openoffice.xmerge} framework for the conversion process.</p>
+
+<p>Since it converts to/from a Palm application format, these converters
+follow the <a href=../../../../converter/palm/package-summary.html#streamformat>
+<code>PalmDB</code> stream format</a> for writing out to the Palm sync client or
+reading in from the Palm sync client.</p>
+
+<p>Note that <code>PluginFactoryImpl</code> also provides a
+<code>DocumentMerger</code> object, i.e. {@link org.openoffice.xmerge.converter.xml.sxw.aportisdoc.DocumentMergerImpl DocumentMergerImpl}.
+This functionality was derived from its superclass
+{@link org.openoffice.xmerge.converter.xml.sxw.SxwPluginFactory
+SxwPluginFactory}.</p>
+
+<h2>AportisDoc pdb format - Doc</h2>
+
+<p>The AportisDoc pdb format is widely used by different Palm applications,
+e.g. QuickWord, AportisDoc Reader, MiniWrite, etc. Note that some
+of these applications put tweaks into the format. The converters will only
+support the default AportisDoc format, plus some very minor tweaks to accommodate
+other applications.</p>
+
+<p>The text content of the format is plain text, i.e. there are no styles
+or structures. There is no notion of lists, list items, paragraphs,
+headings, etc. The format does have support for bookmarks.</p>
+
+<p>For most Doc applications, the default character encoding supported is
+the extended ASCII character set, i.e. ISO-8859-1. StarWriter XML is in
+UTF-8 encoding scheme. Since UTF-8 encoding scheme covers more characters,
+converting UTF-8 strings into extended ASCII would mean that there can be
+possible loss of character mappings.</p>
+
+<p>Using JAXP, XML files can be parsed and read in as Java <code>String</code>s
+which is in Unicode format, there is no loss of character mapping from UTF-8
+to Java Strings. There is possible loss of character mapping in
+converting Java <code>String</code>s to ASCII bytes. Java characters that
+cannot be represented in extended ASCII are converted into the ASCII
+character '?' or x3F in hex digit via the <code>String.getBytes(encoding)</code>
+API.</p>
+
+<h2>SXW to DOC Conversion</h2>
+
+<p>The <code>DocumentSerializerImpl</code> class implements the
+<code>org.openoffice.xmerge.DocumentSerializer</code>.
+This class specifically provides the conversion process from a given
+<code>SxwDocument</code> object to DOC formatted records, which are
+then passed back to the client via the <code>ConvertData</code> object.</p>
+
+<p>The following XML tags are handled. [Note that some may not be implemented yet.]</p>
+<ul>
+<li>
+ <p>Paragraphs <tt>&lt;text:p&gt;</tt> and Headings <tt>&lt;text:h&gt;</tt></p>
+
+ <p>Heading elements are classified the same as paragraph
+ elements since both have the same possible elements inside.
+ Their main difference is that they refer to different types
+ of style information, which is outside of their element tags.
+ Since there are no styles on the DOC format, headings should
+ be treated the same way a paragraph is converted.</p>
+
+ <p>For paragraph elements, convert and transfer text nodes
+ that are essential. Text nodes directly contained within paragraph
+ nodes are such. There are also a number of elements that
+ a paragraph element may contain. These are explained in their
+ own context.</p>
+
+ <p>At the end of the paragraph, an EOL character is added by
+ the converter to provide a separation for each paragraph,
+ since the Doc format does not have a notion of a paragraph.</p>
+</li>
+<li>
+ <p>White spaces <tt>&lt;text:s&gt;</tt> and Tabs <tt>&lt;text:tab-stop&gt;</tt></p>
+
+ <p>In SXW, normally 2 or more white-space characters are collapsed into
+ a single space character. In order to make sure that the document
+ content really contains those white-space characters, there are special
+ elements assigned to them.</p>
+
+ <p>The space element specifies the number of spaces are in it.
+ Thus, converting it just means providing the specific number of spaces
+ that the element requires.</p>
+
+ <p>There is also the tab-stop element. This is a bit tricky. In a
+ StarWriter document, tab-stops are specified by a column position.
+ A tab is not an exact number of space, but rather a specific column
+ positioning. Say, regular tab-stops are set at every 5th column.
+ At column 4, if I hit a tab, it goes to column 5. At column 1, hitting
+ a tab would put the cursor at column 5 as well. SmartDoc and AporticDoc
+ applications goes by columns for the ASCII tab character. The only problem
+ is that in StarWriter, one could specify a different tab-stop, but not
+ in most of these Doc applications, at least I have not seen one.
+ Solution for this is just to go with the converting to the ASCII tab
+ character and not do anything for different tab-stop positioning.</p>
+</li>
+<li>
+ <p>Line breaks <tt>&lt;text:line-break&gt;</tt></p>
+
+ <p>To represent line breaks, it is simpliest to just put an ASCII LF
+ character. Note that the side effect of this is that an end of paragraph
+ also contains an ASCII LF character. Thus, for the DOC to SXW conversion,
+ line breaks are not distinguishable from specifying the end of a
+ paragraph.</p>
+</li>
+<li>
+ <p>Text spans <tt>&lt;text:span&gt;</tt></p>
+
+ <p>Text spans contain text that have different style attributes
+ from the paragraphs'. Text spans can be embedded within another
+ text span. Since it is purely for style tagging, we only needed
+ to convert and transfer the text elements within these.</p>
+</li>
+<li>
+ <p>Hyperlinks <tt>&lt;text:a&gt;</tt>
+
+ <p>Convert and transfer the text portion.</p>
+</li>
+<li>
+ <p>Bookmarks <tt>&lt;text:bookmark&gt;</tt> <tt>&lt;text:bookmark-start&gt;</tt>
+ <tt>&lt;text:bookmark-end&gt;</tt> [Not implemented yet]</p>
+
+ <p>In SXW, bookmark elements are embedded inside paragraph elements.
+ Bookmarks can either mark a text position or a text range. <tt>&lt;text:bookmark&gt;</tt>
+ marks a position while the pair <tt>&lt;text:bookmark-start&gt;</tt> and
+ <tt>&lt;text:bookmark-end&gt;</tt></p> marks a text range. The DOC format only
+ supports bookmarking a text position. Thus, for the conversion,
+ <tt>&lt;text:bookmark&gt;</tt> and <tt>&lt;text:bookmark-start&gt;</tt> will both mark
+ a text position.</p>
+</li>
+<li>
+ <p>Change Tracking <tt>&lt;text:tracked-changes&gt;</tt>
+ <tt>&lt;text:change*&gt;</tt> [Not implemented yet]</p>
+
+ <p>Change tracking elements are not supported yet on the current
+ OpenOffice XML filters, will have to watch out on this. The text
+ within these elements have to be interpreted properly during the
+ conversion process.</p>
+</li>
+<li>
+ <p>Lists <tt>&lt;text:unordered-list&gt;</tt> and
+ <tt>&lt;text:ordered-lists&gt;</tt></p>
+
+ <p>A list can only contain one optional <tt>&lt;text:list-header&gt;</tt>
+ and one or more <tt>&lt;text:list-item&gt;</tt> elements.</p>
+
+ <p>A <tt>&lt;text:list-header&gt;</tt> contains one or more paragraph
+ elements. Since there are no styles, the conversion process does not
+ do anything special for list headers, conversion for the paragraphs
+ within list headers are the same as explained above.</p>
+
+ <p>A <tt>&lt;text:list-item&gt;</tt> may contain one or more of paragraphs,
+ headings, list, etc. Since the Doc format does not support any list
+ structure, there will not be any special handling for this element.
+ Conversion for elements within it shall be applied according to the
+ element type. Thus, lists with paragraphs within it will result in just
+ plain paragraphs. Sublists will not be identifiable. Paragraphs in
+ sublists will still appear.</p>
+</li>
+<li>
+ <p><tt>&lt;text:section&gt;</tt></p>
+
+ <p>I am not sure what this is yet, will need to investigate more on this.</p>
+</li>
+</ul>
+<p>There may be other tags that will still need to be addressed for this conversion.</p>
+
+<p>Refer to {@link org.openoffice.xmerge.converter.xml.sxw.aportisdoc.DocumentSerializerImpl DocumentSerializerImpl}
+for details of implementation. It uses <code>DocEncoder</code> class to do the encoding
+part.</p>
+
+<h2>DOC to SXW Conversion</h2>
+
+<p>The <code>DocumentDeserializerImpl</code> class implements the
+<code>org.openoffice.xmerge.DocumentDeserializer</code>. It is
+passed the device document in the form of a <code>ConvertData</code> object.
+It will then create a <code>SxwDocument</code> object from the conversion of
+the DOC formatted records.</p>
+
+<p>The text content of the Doc format will be transferred as text. Paragraph
+elements will be formed based on the existence of an ASCII LF character. There
+will be at least one paragraph element.</p>
+
+<p>Bookmarks in the Doc format will be converted to the bookmark element
+<tt>&lt;text:bookmark&gt;</tt> [Not implemented yet].</p>
+
+
+<h2>Merging changes</h2>
+
+<p>As mentioned above, the <code>DocumentMerger</code> object produced by
+<code>PluginFactoryImpl</code> is <code>DocumentMergerImpl</code>.
+Refer to the javadocs for that package/class on its merging specifications.
+</p>
+
+<h2>TODO list</h2>
+
+<p><ol>
+<li>Investigate Palm's with different character encodings.</li>
+<li>Investigate other StarWriter XML tags</li>
+</ol></p>
+
+</body>
+</html>
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/build.xml b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/build.xml
new file mode 100644
index 000000000000..4a1c7fba5dca
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/build.xml
@@ -0,0 +1,132 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+
+ DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+
+ Copyright 2008 by Sun Microsystems, Inc.
+
+ OpenOffice.org - a multi-platform office productivity suite
+
+ $RCSfile: build.xml,v $
+
+ $Revision: 1.4 $
+
+ This file is part of OpenOffice.org.
+
+ OpenOffice.org is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License version 3
+ only, as published by the Free Software Foundation.
+
+ OpenOffice.org is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Lesser General Public License version 3 for more details
+ (a copy is included in the LICENSE file that accompanied this code).
+
+ You should have received a copy of the GNU Lesser General Public License
+ version 3 along with OpenOffice.org. If not, see
+ <http://www.openoffice.org/license.html>
+ for a copy of the LGPLv3 License.
+
+-->
+<project name="xmrg_jooxcx_sxw" default="main" basedir=".">
+
+ <!-- ================================================================= -->
+ <!-- settings -->
+ <!-- ================================================================= -->
+
+ <!-- project prefix, used for targets and build.lst -->
+ <property name="prj.prefix" value="xmrg"/>
+
+ <!-- name of this sub target used in recursive builds -->
+ <property name="target" value="xmrg_jooxcx_sxw"/>
+
+ <!-- relative path to project directory -->
+ <property name="prj" value="../../../../../../.."/>
+
+ <!-- start of java source code package structure -->
+ <property name="java.dir" value="${prj}/java"/>
+
+ <!-- path component for current java package -->
+ <property name="package"
+ value="org/openoffice/xmerge/converter/xml/sxw"/>
+
+ <!-- define how to handle CLASSPATH environment -->
+ <property name="build.sysclasspath" value="ignore"/>
+
+ <!-- classpath settings for javac tasks -->
+ <path id="classpath">
+ <pathelement location="${build.class}"/>
+ <pathelement location="${solar.jar}/parser.jar"/>
+ <pathelement location="${solar.jar}/jaxp.jar"/>
+ <pathelement location="${solar.jar}/xerces.jar"/>
+ </path>
+
+ <!-- set wether we want to compile with or without deprecation -->
+ <property name="deprecation" value="on"/>
+
+ <!-- ================================================================= -->
+ <!-- solar build environment targets -->
+ <!-- ================================================================= -->
+
+ <target name="build_dir" unless="build.dir">
+ <property name="build.dir" value="${out}"/>
+ </target>
+
+ <target name="solar" depends="build_dir" if="solar.update">
+ <property name="solar.properties"
+ value="${solar.bin}/solar.properties"/>
+ </target>
+
+ <target name="init" depends="solar">
+ <property name="build.compiler" value="classic"/>
+ <property file="${solar.properties}"/>
+ <property file="${build.dir}/class/solar.properties"/>
+ </target>
+
+ <target name="info">
+ <echo message="--------------------"/>
+ <echo message="${target}"/>
+ <echo message="--------------------"/>
+ </target>
+
+
+ <!-- ================================================================= -->
+ <!-- custom targets -->
+ <!-- ================================================================= -->
+
+ <!-- the main target, called in recursive builds -->
+ <target name="main" depends="info,prepare,compile"/>
+
+ <!-- prepare output directories -->
+ <target name="prepare" depends="init" if="build.class">
+ <mkdir dir="${build.dir}"/>
+ <mkdir dir="${build.class}"/>
+ </target>
+
+ <!-- compile java sources in ${package} -->
+ <target name="compile" depends="prepare" if="build.class">
+ <javac srcdir="${java.dir}"
+ destdir="${build.class}"
+ debug="${debug}"
+ deprecation="${deprecation}"
+ optimize="${optimize}">
+ <classpath refid="classpath"/>
+ <include name="${package}/SxwDocument.java"/>
+ <include name="${package}/SxwPluginFactory.java"/>
+ </javac>
+ </target>
+
+ <!-- clean up -->
+ <target name="clean" depends="prepare">
+ <delete includeEmptyDirs="true">
+ <fileset dir="${build.class}">
+ <patternset>
+ <include name="${package}/*.class"/>
+ </patternset>
+ </fileset>
+ </delete>
+ </target>
+
+</project>
+
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/makefile.mk b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/makefile.mk
new file mode 100644
index 000000000000..03db799f178c
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/makefile.mk
@@ -0,0 +1,36 @@
+#***************************************************************************
+#
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# Copyright 2008 by Sun Microsystems, Inc.
+#
+# OpenOffice.org - a multi-platform office productivity suite
+#
+# $RCSfile: makefile.mk,v $
+#
+# $Revision: 1.3 $
+#
+# This file is part of OpenOffice.org.
+#
+# OpenOffice.org is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License version 3
+# only, as published by the Free Software Foundation.
+#
+# OpenOffice.org is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Lesser General Public License version 3 for more details
+# (a copy is included in the LICENSE file that accompanied this code).
+#
+# You should have received a copy of the GNU Lesser General Public License
+# version 3 along with OpenOffice.org. If not, see
+# <http://www.openoffice.org/license.html>
+# for a copy of the LGPLv3 License.
+#
+#***************************************************************************
+
+TARGET=xmrg_jooxcx_sxw
+PRJ=../../../../../../..
+
+.INCLUDE : ant.mk
+ALLTAR: ANTBUILD
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/package.html b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/package.html
new file mode 100644
index 000000000000..47a7e940fc36
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/package.html
@@ -0,0 +1,42 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
+<!--
+
+ DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+
+ Copyright 2008 by Sun Microsystems, Inc.
+
+ OpenOffice.org - a multi-platform office productivity suite
+
+ $RCSfile: package.html,v $
+
+ $Revision: 1.3 $
+
+ This file is part of OpenOffice.org.
+
+ OpenOffice.org is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License version 3
+ only, as published by the Free Software Foundation.
+
+ OpenOffice.org is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Lesser General Public License version 3 for more details
+ (a copy is included in the LICENSE file that accompanied this code).
+
+ You should have received a copy of the GNU Lesser General Public License
+ version 3 along with OpenOffice.org. If not, see
+ <http://www.openoffice.org/license.html>
+ for a copy of the LGPLv3 License.
+
+-->
+<html>
+<head>
+<title>org.openoffice.xmerge.converter.xml.sxw package</title>
+</head>
+
+<body bgcolor="white">
+<p>Provides base implementation of StarWriter XML conversion to and from
+different &quot;Device&quot; <code>Document</code> formats.</p>
+
+</body>
+</html>
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/ConverterCapabilitiesImpl.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/ConverterCapabilitiesImpl.java
new file mode 100644
index 000000000000..bc57ccc776ea
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/ConverterCapabilitiesImpl.java
@@ -0,0 +1,96 @@
+/************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2008 by Sun Microsystems, Inc.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * $RCSfile: ConverterCapabilitiesImpl.java,v $
+ * $Revision: 1.3 $
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+package org.openoffice.xmerge.converter.xml.sxw.pocketword;
+
+import org.openoffice.xmerge.ConverterCapabilities;
+import org.openoffice.xmerge.converter.xml.OfficeConstants;
+
+/**
+ * <p>PocketWord implementation of <code>ConverterCapabilities</code> for
+ * the {@link
+ * org.openoffice.xmerge.converter.xml.sxw.pocketword.PluginFactoryImpl
+ * PluginFactoryImpl}.</p>
+ *
+ * <p>Used with StarWriter XML to/from PocketWord conversions. The
+ * <code>ConverterCapibilies</code> specify which &quot;Office&quot;
+ * <code>Document</code> tags and attributes are supported on the
+ * &quot;Device&quot; <code>Document</code> format.</p>
+ */
+public final class ConverterCapabilitiesImpl
+ implements ConverterCapabilities {
+
+ public boolean canConvertTag(String tag) {
+
+ if (OfficeConstants.TAG_OFFICE_DOCUMENT.equals(tag))
+ return true;
+ else if (OfficeConstants.TAG_OFFICE_DOCUMENT_CONTENT.equals(tag))
+ return true;
+ else if (OfficeConstants.TAG_OFFICE_BODY.equals(tag))
+ return true;
+ else if (OfficeConstants.TAG_PARAGRAPH.equals(tag))
+ return true;
+ else if (OfficeConstants.TAG_HEADING.equals(tag))
+ return true;
+ else if (OfficeConstants.TAG_ORDERED_LIST.equals(tag))
+ return true;
+ else if (OfficeConstants.TAG_UNORDERED_LIST.equals(tag))
+ return true;
+ else if (OfficeConstants.TAG_LIST_ITEM.equals(tag))
+ return true;
+ else if (OfficeConstants.TAG_LIST_HEADER.equals(tag))
+ return true;
+ else if (OfficeConstants.TAG_SPAN.equals(tag))
+ return true;
+ else if (OfficeConstants.TAG_HYPERLINK.equals(tag))
+ return true;
+ else if (OfficeConstants.TAG_LINE_BREAK.equals(tag))
+ return true;
+ else if (OfficeConstants.TAG_SPACE.equals(tag))
+ return true;
+ else if (OfficeConstants.TAG_TAB_STOP.equals(tag))
+ return true;
+
+ return false;
+ }
+
+ public boolean canConvertAttribute(String tag,
+ String attribute) {
+
+ if (OfficeConstants.TAG_SPACE.equals(tag)) {
+
+ if (OfficeConstants.ATTRIBUTE_SPACE_COUNT.equals(attribute))
+ return true;
+ }
+
+ return false;
+ }
+}
+
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/DocumentDescriptor.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/DocumentDescriptor.java
new file mode 100644
index 000000000000..312762308df1
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/DocumentDescriptor.java
@@ -0,0 +1,239 @@
+/************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2008 by Sun Microsystems, Inc.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * $RCSfile: DocumentDescriptor.java,v $
+ * $Revision: 1.3 $
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+
+package org.openoffice.xmerge.converter.xml.sxw.pocketword;
+
+import org.openoffice.xmerge.util.EndianConverter;
+
+import java.io.ByteArrayOutputStream;
+import java.io.OutputStream;
+import java.io.IOException;
+
+import java.util.Vector;
+
+
+/**
+ * This class to represent the data structure stored by a Pocket Word file that
+ * describes that file.
+ *
+ * The data structure is of variable length, beginning at the end of the
+ * font declarations and ending 10 bytes before the first instance of 0xFF 0xFF
+ * marking a paragraph block.
+ *
+ * The variable length component arises from an 8 byte structure describing each
+ * paragraph in the document. These paragraph descriptors appear at the end
+ * of the Document Descriptor.
+ *
+ * @author Mark Murnane
+ * @version 1.1
+ */
+class DocumentDescriptor {
+ private short numParagraphs = 0;
+ private short length = 0;
+ private short numLines = 0;
+
+ private Vector paragraphDesc = null;
+
+ DocumentDescriptor() {
+ paragraphDesc = new Vector(0, 1);
+ }
+
+
+
+ /**
+ * Updates the <code>DocumentDescriptor</code> to include details of another
+ * paragraph in the document.
+ *
+ * @param len The number of characters in the paragraph.
+ * @param lines The number of lines on screen that the paragraph uses.
+ */
+ public void addParagraph(short len, short lines) {
+ ParagraphDescriptor pd = new ParagraphDescriptor(len, lines);
+
+ paragraphDesc.add(pd);
+ numParagraphs++;
+ numLines += lines;
+ length += pd.length;
+ }
+
+
+ /**
+ * Retrieve the <code>DocumentDescriptor's</code> data. Due to the variable
+ * length nature of the descriptor, certain fields can only be
+ * calculated/written after the addition of all paragraphs.
+ *
+ * @return Byte array containing the Pocket Word representation of this
+ * <code>DocumentDescriptor</code>.
+ */
+ public byte[] getDescriptor () {
+ ByteArrayOutputStream descStream = new ByteArrayOutputStream();
+
+ writeHeader(descStream);
+
+ /*
+ * This value seems to increment by 0x02 for each paragraph.
+ * For a single paragraph doc, the value is 0x08, 0x0A for two,
+ * 0x0C for three ...
+ */
+ try {
+ descStream.write(EndianConverter.writeShort((short)(6 +
+ (numParagraphs * 2))));
+
+ descStream.write(EndianConverter.writeShort(numParagraphs));
+ descStream.write(EndianConverter.writeShort((short)0));
+ descStream.write(EndianConverter.writeShort(numParagraphs));
+
+ descStream.write(EndianConverter.writeShort((short)0));
+ descStream.write(EndianConverter.writeShort((short)length));
+ descStream.write(EndianConverter.writeShort((short)0));
+
+ descStream.write(EndianConverter.writeShort(numLines));
+ descStream.write(new byte[] { 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00 } );
+
+ for (int i = 0; i < paragraphDesc.size(); i++) {
+ ParagraphDescriptor pd = (ParagraphDescriptor)paragraphDesc.elementAt(i);
+
+ descStream.write(pd.getDescriptor());
+ }
+
+ // Byte sequence marking the end of this DocumentDescriptor
+ descStream.write(EndianConverter.writeShort((short)0));
+ descStream.write(EndianConverter.writeShort((short)0x41));
+ }
+ catch (IOException ioe) {
+ // Should never happen as this is a memory based stream.
+ }
+
+ return descStream.toByteArray();
+ }
+
+
+ /*
+ * This method loads the intial fixed portion of the descriptor and the
+ * mid-section. The mid-section is variable but Pocket Word doesn't seem
+ * to mind default values.
+ */
+ private void writeHeader(OutputStream descStream) {
+
+ try {
+ descStream.write(new byte[] { 0x00, 0x00, 0x00, 0x00,
+ 0x07, 0x00, 0x06, 0x00,
+ 0x15, 0x00, 0x10, 0x00,
+ 0x01, 0x00, (byte)0xD0, 0x2F,
+ 0x00, 0x00, (byte)0xE0, 0x3D,
+ 0x00, 0x00, (byte)0xF0, 0x00,
+ 0x00, 0x00, (byte)0xA0, 0x05,
+ 0x00, 0x00, (byte)0xA0, 0x05,
+ 0x00, 0x00, (byte)0xA0, 0x05,
+ 0x00, 0x00, (byte)0xA0, 0x05,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x0A, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x04, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x0A, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x04, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x08, 0x00,
+ 0x07, 0x00, 0x10, 0x00,
+ 0x01, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x12, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x1F, 0x04, 0x00, 0x00 } );
+
+ /*
+ * The next four bytes are variable, but a pattern hasn't yet been
+ * established. Pocket Word seems to accept this constant value.
+ *
+ * The bytes are repeated after another 12 byte sequence which does
+ * not seem to change from one file to the next.
+ */
+ descStream.write(new byte[] { (byte)0xE2, 0x02, 0x00, 0x00 } );
+ descStream.write(new byte[] { 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x3D, 0x04, 0x00, 0x00 } );
+ descStream.write(new byte[] { (byte)0xE2, 0x02, 0x00, 0x00 } );
+
+ descStream.write(new byte[] { 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x40, 0x00, 0x08, 0x00 } );
+ }
+ catch (IOException ioe) {
+ /* Shouldn't happen with a ByteArrayOutputStream */
+ }
+ }
+
+
+ /**
+ * <code>ParagraphDescriptor</code> represents the data structure used to
+ * describe individual paragraphs within a <code>DocumentDescriptor.</code>
+ *
+ * It is used solely by the <code>DocumentDescriptor<code> class.
+ */
+ private class ParagraphDescriptor {
+ private short filler = 0;
+ private short lines = 0;
+ private short length = 0;
+ private short unknown = 0x23;
+
+ public ParagraphDescriptor(short len, short numLines) {
+ lines = numLines;
+ length = (short)(len + 1);
+ }
+
+ public byte[] getDescriptor() {
+ ByteArrayOutputStream desc = new ByteArrayOutputStream();
+
+ try {
+ desc.write(EndianConverter.writeShort(filler));
+ desc.write(EndianConverter.writeShort(lines));
+ desc.write(EndianConverter.writeShort(length));
+ desc.write(EndianConverter.writeShort(unknown));
+ }
+ catch (IOException ioe) {
+ /* Should never happen */
+ }
+
+ return desc.toByteArray();
+ }
+ }
+}
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/DocumentDeserializerImpl.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/DocumentDeserializerImpl.java
new file mode 100644
index 000000000000..0378006661c1
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/DocumentDeserializerImpl.java
@@ -0,0 +1,301 @@
+/************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2008 by Sun Microsystems, Inc.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * $RCSfile: DocumentDeserializerImpl.java,v $
+ * $Revision: 1.4 $
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+
+package org.openoffice.xmerge.converter.xml.sxw.pocketword;
+
+import org.openoffice.xmerge.ConvertData;
+import org.openoffice.xmerge.ConvertException;
+import org.openoffice.xmerge.Document;
+import org.openoffice.xmerge.DocumentDeserializer;
+
+import org.openoffice.xmerge.converter.xml.OfficeConstants;
+import org.openoffice.xmerge.converter.xml.sxw.SxwDocument;
+
+import org.openoffice.xmerge.converter.xml.ParaStyle;
+import org.openoffice.xmerge.converter.xml.TextStyle;
+import org.openoffice.xmerge.converter.xml.StyleCatalog;
+
+import org.openoffice.xmerge.util.OfficeUtil;
+
+import java.io.InputStream;
+import java.io.IOException;
+import java.io.FileInputStream;
+import java.io.FileDescriptor;
+
+import java.util.Enumeration;
+import java.util.Vector;
+
+import org.w3c.dom.NodeList;
+import org.w3c.dom.Node;
+import org.w3c.dom.Element;
+
+
+/**
+ * <p>Pocket Word implementation of <code>DocumentDeserializer</code>
+ * for the {@link
+ * org.openoffice.xmerge.converter.xml.sxw.pocketword.PluginFactoryImpl
+ * PluginFactoryImpl}.</p>
+ *
+ * <p>This converts a Pocket Word file to an OpenOffice Writer XML DOM.</p>
+ *
+ * @author Mark Murnane
+ * @version 1.1
+ */
+public final class DocumentDeserializerImpl
+ implements DocumentDeserializer, OfficeConstants {
+
+ private PocketWordDocument pswDoc = null;
+ private SxwDocument sxwDoc = null;
+ private String docName;
+
+ private StyleCatalog styleCat = null;
+
+
+ /**
+ * Initialises a new <code>DocumentDeserializerImpl</code> using the
+ * supplied <code>ConvertData</code>.</p>
+ *
+ * <p>The <code>Document</code> objects in the <code>ConvertData</code>
+ * should be {@link
+ * org.openoffice.xmerge.converter.xml.sxw.pocketword.PocketWordDocument
+ * PocketWordDocument} objects.</p>
+ *
+ * @param cd ConvertData containing a <code>PocketWordDocument</code>
+ * for conversion.
+ */
+ public DocumentDeserializerImpl(ConvertData cd) {
+ Enumeration e = cd.getDocumentEnumeration();
+
+ // A Pocket Word file is composed of one binary file
+ while (e.hasMoreElements()) {
+ pswDoc = (PocketWordDocument)e.nextElement();
+ }
+
+ docName = pswDoc.getName();
+ }
+
+
+ /**
+ * <p>Convert the data passed into the <code>DocumentDeserializer</code>
+ * constructor into the OpenOffice Writer <code>Document</code>
+ * format.</p>
+ *
+ * <p>This method may or may not be thread-safe. It is expected
+ * that the user code does not call this method in more than one
+ * thread. And for most cases, this method is only done once.</p>
+ *
+ * @return The resulting <code>Document</code> object from conversion.
+ *
+ * @throws ConvertException If any Convert error occurs.
+ * @throws IOException If any I/O error occurs.
+ */
+ public Document deserialize() throws IOException, ConvertException {
+ Enumeration pe = pswDoc.getParagraphEnumeration();
+
+ sxwDoc = new SxwDocument (docName);
+ sxwDoc.initContentDOM();
+
+ // Default to an initial 5 entries in the catalog.
+ styleCat = new StyleCatalog(5);
+
+ try {
+ buildDocument(pe);
+ }
+ catch (Exception e) {
+ e.printStackTrace();
+ throw new ConvertException("Error building OpenOffice Writer DOM: "
+ + e.toString());
+
+ }
+
+ return sxwDoc;
+ }
+
+
+ /**
+ * This method actually takes care of the conversion.
+ *
+ * @param data An Enumeration of all Paragraphs in the Pocket Word doc.
+ *
+ * @return The OpenOffice Writer XML representation of the data.
+ *
+ * @throws IOException If any I/O errors occur.
+ */
+ private void buildDocument(Enumeration data) throws IOException {
+
+ org.w3c.dom.Document doc = sxwDoc.getContentDOM();
+
+ /*
+ * There should be only one each of office:body and
+ * office:automatic-styles in each document.
+ */
+ Node bodyNode = doc.getElementsByTagName(TAG_OFFICE_BODY).item(0);
+
+ // Not every document has an automatic style tag
+ Node autoStylesNode = doc.getElementsByTagName(
+ TAG_OFFICE_AUTOMATIC_STYLES).item(0);
+ if (autoStylesNode == null) {
+ autoStylesNode = doc.createElement(TAG_OFFICE_AUTOMATIC_STYLES);
+ doc.insertBefore(autoStylesNode, bodyNode);
+ }
+
+
+ // Needed for naming new styles
+ int paraStyles = 1;
+ int textStyles = 1;
+
+ // Pocket Word has no concept of a list.
+ Element listNode = null;
+
+
+ // Down to business ...
+ while (data.hasMoreElements()) {
+ Paragraph p = (Paragraph)data.nextElement();
+ Element paraNode = doc.createElement(TAG_PARAGRAPH);
+
+ // Set paragraph style information here
+ ParaStyle pStyle = p.makeStyle();
+ if (pStyle == null) {
+ paraNode.setAttribute(ATTRIBUTE_TEXT_STYLE_NAME,
+ PocketWordConstants.DEFAULT_STYLE);
+ }
+ else {
+ // Create paragraph style
+ pStyle.setName(new String("PS" + paraStyles++));
+ paraNode.setAttribute(ATTRIBUTE_TEXT_STYLE_NAME, pStyle.getName());
+ styleCat.add(pStyle);
+ }
+
+
+ /*
+ * For each of the paragraphs, process each segment.
+ * There will always be at least one.
+ */
+ Enumeration paraData = p.getSegmentsEnumerator();
+ Vector textSpans = new Vector(0, 1);
+
+ do {
+ ParagraphTextSegment pts = (ParagraphTextSegment)paraData.nextElement();
+ Element span = doc.createElement(OfficeConstants.TAG_SPAN);
+
+ TextStyle ts = pts.getStyle();
+
+ if (ts != null) {
+ ts.setName(new String("TS" + textStyles++));
+ span.setAttribute(ATTRIBUTE_TEXT_STYLE_NAME, ts.getName());
+ styleCat.add(ts);
+ }
+ else {
+ span.setAttribute(ATTRIBUTE_TEXT_STYLE_NAME,
+ PocketWordConstants.DEFAULT_STYLE);
+ }
+
+ // If this isn't a blank paragraph
+ if (pts.getText() != null && !pts.getText().equals("")) {
+ Node[] children = OfficeUtil.parseText(pts.getText(), doc);
+
+ for (int j = 0; j < children.length; j++) {
+ span.appendChild(children[j]);
+ }
+ }
+
+ textSpans.add(span);
+
+ } while (paraData.hasMoreElements());
+
+
+ /*
+ * Special case for the first span. If it has no style, then
+ * it shouldn't be a span, so just add its children with style
+ * set as standard.
+ */
+ Element firstSpan = (Element)textSpans.elementAt(0);
+ String styleName = firstSpan.getAttribute(ATTRIBUTE_TEXT_STYLE_NAME);
+ if (styleName.equals(PocketWordConstants.DEFAULT_STYLE)) {
+ NodeList nl = firstSpan.getChildNodes();
+ int len = nl.getLength();
+
+ for (int i = 0; i < len; i++) {
+ /*
+ * Always take item 0 as the DOM tree event model will
+ * cause the NodeList to shrink as each Node is reparented.
+ *
+ * By taking the first item from the list, we essentially
+ * traverse the list in order.
+ */
+ paraNode.appendChild(nl.item(0));
+ }
+ }
+ else {
+ paraNode.appendChild(firstSpan);
+ }
+
+ // The rest are spans, so just add them
+ for (int i = 1; i < textSpans.size(); i++) {
+ paraNode.appendChild((Node)textSpans.elementAt(i));
+ }
+
+
+ /*
+ * Pocket Word doesn't support lists, but it does have bulleted
+ * paragraphs that are essentially the same thing.
+ *
+ * Unlike OpenOffice Writer, a blank paragraph can be bulleted
+ * as well. This will be handled by inserting a blank paragraph
+ * into the unordered list, but OpenOffice Writer will not display
+ * an item at that point in the list.
+ */
+ if (p.isBulleted()) {
+ if (listNode == null) {
+ listNode = doc.createElement(TAG_UNORDERED_LIST);
+ }
+ Element listItem = doc.createElement(TAG_LIST_ITEM);
+ listItem.appendChild(paraNode);
+ listNode.appendChild(listItem);
+ }
+ else {
+ if (listNode != null) {
+ bodyNode.appendChild(listNode);
+ listNode = null;
+ }
+ bodyNode.appendChild(paraNode);
+ }
+ } // End processing paragraphs
+
+
+ // Now write the style catalog to the document
+ NodeList nl = styleCat.writeNode(doc, "dummy").getChildNodes();
+ int nlLen = nl.getLength(); // nl.item reduces the length
+ for (int i = 0; i < nlLen; i++) {
+ autoStylesNode.appendChild(nl.item(0));
+ }
+ }
+}
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/DocumentMergerImpl.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/DocumentMergerImpl.java
new file mode 100644
index 000000000000..ae087d65b6a2
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/DocumentMergerImpl.java
@@ -0,0 +1,102 @@
+/************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2008 by Sun Microsystems, Inc.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * $RCSfile: DocumentMergerImpl.java,v $
+ * $Revision: 1.3 $
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+package org.openoffice.xmerge.converter.xml.sxw.pocketword;
+
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+
+import org.openoffice.xmerge.DocumentMerger;
+import org.openoffice.xmerge.MergeException;
+import org.openoffice.xmerge.ConverterCapabilities;
+import org.openoffice.xmerge.converter.xml.sxw.SxwDocument;
+import org.openoffice.xmerge.merger.DiffAlgorithm;
+import org.openoffice.xmerge.merger.Difference;
+import org.openoffice.xmerge.merger.NodeMergeAlgorithm;
+import org.openoffice.xmerge.merger.Iterator;
+import org.openoffice.xmerge.merger.DiffAlgorithm;
+import org.openoffice.xmerge.merger.diff.ParaNodeIterator;
+import org.openoffice.xmerge.merger.diff.IteratorLCSAlgorithm;
+import org.openoffice.xmerge.merger.merge.DocumentMerge;
+import org.openoffice.xmerge.merger.merge.CharacterBaseParagraphMerge;
+import org.openoffice.xmerge.util.Debug;
+
+
+/**
+ * PocketWord implementation of <code>DocumentMerger</code>
+ * for the {@link
+ * org.openoffice.xmerge.converter.xml.sxw.pocketword.PluginFactoryImpl
+ * PluginFactoryImpl}.</p>
+ */
+public class DocumentMergerImpl implements DocumentMerger {
+
+ private ConverterCapabilities cc_;
+ private org.openoffice.xmerge.Document orig = null;
+
+ public DocumentMergerImpl(org.openoffice.xmerge.Document doc, ConverterCapabilities cc) {
+ cc_ = cc;
+ this.orig = doc;
+ }
+
+ public void merge(org.openoffice.xmerge.Document modifiedDoc) throws MergeException {
+
+ SxwDocument wdoc1 = (SxwDocument) orig;
+ SxwDocument wdoc2 = (SxwDocument) modifiedDoc;
+
+ Document doc1 = wdoc1.getContentDOM();
+ Document doc2 = wdoc2.getContentDOM();
+
+ Iterator i1 = new ParaNodeIterator(cc_, doc1.getDocumentElement());
+ Iterator i2 = new ParaNodeIterator(cc_, doc2.getDocumentElement());
+
+ DiffAlgorithm diffAlgo = new IteratorLCSAlgorithm();
+
+ // find out the paragrah level diffs
+ Difference[] diffTable = diffAlgo.computeDiffs(i1, i2);
+
+ if (Debug.isFlagSet(Debug.INFO)) {
+ Debug.log(Debug.INFO, "Diff Result: ");
+
+ for (int i = 0; i < diffTable.length; i++) {
+ Debug.log(Debug.INFO, diffTable[i].debug());
+ }
+ }
+
+ // merge the paragraphs
+ NodeMergeAlgorithm charMerge = new CharacterBaseParagraphMerge();
+ DocumentMerge docMerge = new DocumentMerge(cc_, charMerge);
+
+ Iterator result = null;
+
+ docMerge.applyDifference(i1, i2, diffTable);
+ }
+}
+
+
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/DocumentSerializerImpl.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/DocumentSerializerImpl.java
new file mode 100644
index 000000000000..2a715a7871cd
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/DocumentSerializerImpl.java
@@ -0,0 +1,440 @@
+/************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2008 by Sun Microsystems, Inc.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * $RCSfile: DocumentSerializerImpl.java,v $
+ * $Revision: 1.4 $
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+
+package org.openoffice.xmerge.converter.xml.sxw.pocketword;
+
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
+import org.w3c.dom.NamedNodeMap;
+import org.w3c.dom.Element;
+
+import org.openoffice.xmerge.ConvertData;
+import org.openoffice.xmerge.ConvertException;
+import org.openoffice.xmerge.Document;
+import org.openoffice.xmerge.DocumentSerializer;
+
+import org.openoffice.xmerge.converter.xml.OfficeConstants;
+import org.openoffice.xmerge.converter.xml.sxw.SxwDocument;
+
+import org.openoffice.xmerge.converter.xml.ParaStyle;
+import org.openoffice.xmerge.converter.xml.TextStyle;
+import org.openoffice.xmerge.converter.xml.StyleCatalog;
+
+import java.io.OutputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+
+
+/**
+ * <p>Pocket Word implementation of <code>DocumentDeserializer</code>
+ * for use by {@link
+ * org.openoffice.xmerge.converter.xml.sxw.pocketword.PluginFactoryImpl
+ * PluginFactoryImpl}.</p>
+ *
+ * <p>This converts an OpenOffice Writer XML files to a Pocket Word file<.</p>
+ *
+ * @author Mark Murnane
+ * @version 1.1
+ */
+public final class DocumentSerializerImpl
+ implements DocumentSerializer, OfficeConstants {
+
+ private PocketWordDocument pswDoc;
+ private SxwDocument sxwDoc;
+
+ private StyleCatalog styleCat = null;
+
+ private boolean inList = false;
+
+
+ /**
+ * <p>Initialises a new <code>DocumentSerializerImpl</code> using the.<br>
+ * supplied <code>Document</code></p>
+ *
+ * <p>The supplied document should be an {@link
+ * org.openoffice.xmerge.converter.xml.sxw.SxwDocument SxwDocument}
+ * object.</p>
+ *
+ * @param document The <code>Document</code> to convert.
+ */
+ public DocumentSerializerImpl(Document doc) {
+ sxwDoc = (SxwDocument)doc;
+ pswDoc = new PocketWordDocument(sxwDoc.getName());
+ }
+
+
+ /**
+ * <p>Convert the data passed into the <code>DocumentSerializerImpl</code>
+ * constructor into Pocket Word format.</p>
+ *
+ * <p>This method may or may not be thread-safe. It is expected
+ * that the user code does not call this method in more than one
+ * thread. And for most cases, this method is only done once.</p>
+ *
+ * @return <code>ConvertData</code> object to pass back the
+ * converted data.
+ *
+ * @throws ConvertException If any conversion error occurs.
+ * @throws IOException If any I/O error occurs.
+ */
+ public ConvertData serialize() throws IOException, ConvertException {
+ ConvertData cd = new ConvertData();
+
+ org.w3c.dom.Document doc = sxwDoc.getContentDOM();
+
+ // Load any style info before traversing the document content tree
+ loadStyles();
+
+ NodeList list = doc.getElementsByTagName(TAG_OFFICE_BODY);
+
+ int len = list.getLength();
+ if (len > 0) {
+ Node node = list.item(0);
+ traverseBody(node);
+ }
+
+ cd.addDocument(pswDoc);
+
+ return cd;
+ }
+
+
+ /*
+ * Handles the loading of defined styles from the style.xml file as well
+ * as automatic styles from the content.xml file.
+ *
+ * Any change to a defined style, such as a short bold section, falls into
+ * the latter category.
+ */
+ private void loadStyles() {
+ org.w3c.dom.Document contentDom = sxwDoc.getContentDOM();
+ org.w3c.dom.Document styleDom = sxwDoc.getStyleDOM();
+
+ styleCat = new StyleCatalog(25);
+
+ NodeList nl = null;
+ String families[] = new String[] { PocketWordConstants.TEXT_STYLE_FAMILY,
+ PocketWordConstants.PARAGRAPH_STYLE_FAMILY,
+ PocketWordConstants.PARAGRAPH_STYLE_FAMILY };
+ Class classes[] = new Class[] { TextStyle.class,
+ ParaStyle.class,
+ TextStyle.class };
+
+ String[] styleTypes = new String[] { TAG_OFFICE_STYLES,
+ TAG_OFFICE_AUTOMATIC_STYLES,
+ TAG_OFFICE_MASTER_STYLES };
+
+ /*
+ * Documents converted from PSW -> SXW will not have a style.xml when
+ * being converted back to PSW. This would occur if a document was
+ * not modified within Writer between conversions.
+ *
+ * Any Writer modifications and saves create the style.xml and other
+ * portions of a complete Writer SXW file.
+ */
+ if (styleDom != null) {
+ // Process the Style XML tree
+ for (int i = 0; i < styleTypes.length; i++ ) {
+ nl = styleDom.getElementsByTagName(styleTypes[i]);
+ if (nl.getLength() != 0) {
+ styleCat.add(nl.item(0), families, classes, null, false);
+ }
+ }
+ }
+
+ /*
+ * Process the content XML for any other style info.
+ * Should only be automatic types here.
+ */
+ for (int i = 0; i < styleTypes.length; i++ ) {
+ nl = contentDom.getElementsByTagName(styleTypes[i]);
+ if (nl.getLength() != 0) {
+ styleCat.add(nl.item(0), families, classes, null, false);
+ }
+ }
+ }
+
+
+ /*
+ * Process the office:body tag.
+ */
+ private void traverseBody(Node node) throws IOException, ConvertException {
+
+ if (node.hasChildNodes()) {
+ NodeList nList = node.getChildNodes();
+ int len = nList.getLength();
+
+ for (int i = 0; i < len; i++) {
+ Node child = nList.item(i);
+
+ if (child.getNodeType() == Node.ELEMENT_NODE) {
+ String nodeName = child.getNodeName();
+
+ if (nodeName.equals(TAG_PARAGRAPH)
+ || nodeName.equals(TAG_HEADING)) {
+ traverseParagraph(child);
+ }
+
+ if (nodeName.equals(TAG_UNORDERED_LIST) ||
+ nodeName.equals(TAG_ORDERED_LIST)) {
+ traverseList(child);
+ }
+ }
+ }
+ }
+ }
+
+
+ /*
+ * Process a text:p tag
+ */
+ private void traverseParagraph(Node node) throws IOException, ConvertException {
+ String styleName = getAttribute(node, ATTRIBUTE_TEXT_STYLE_NAME);
+
+ ParaStyle pstyle = (ParaStyle)styleCat.lookup(styleName,
+ PocketWordConstants.PARAGRAPH_STYLE_FAMILY, null,
+ ParaStyle.class);
+ if (pstyle != null) {
+ pstyle = (ParaStyle)pstyle.getResolved();
+ }
+
+ TextStyle tstyle = (TextStyle)styleCat.lookup(styleName,
+ PocketWordConstants.PARAGRAPH_STYLE_FAMILY, null,
+ TextStyle.class);
+ if (pstyle != null) {
+ tstyle = (TextStyle)tstyle.getResolved();
+ }
+
+ try {
+ pswDoc.addParagraph(pstyle, inList);
+ }
+ catch (Exception e) {
+ throw new ConvertException(
+ "Error adding paragraph to PocketWordDocument.\n"
+ + e.toString());
+ }
+
+ traverseParagraphContents(node, tstyle);
+ }
+
+
+ /*
+ * Process the contents of a paragraph. This method handles situations
+ * where the paragraph contains multiple children, each representing a
+ * differently formatted piece of text.
+ */
+ private void traverseParagraphContents (Node node, TextStyle defTextStyle)
+ throws IOException, ConvertException {
+ // First up, get the style of this little bit
+ String styleName = getAttribute(node, ATTRIBUTE_TEXT_STYLE_NAME);
+ TextStyle tStyle = (TextStyle)styleCat.lookup(styleName,
+ PocketWordConstants.TEXT_STYLE_FAMILY, null,
+ TextStyle.class);
+
+ if (tStyle == null) {
+ tStyle = defTextStyle;
+ }
+
+ if (node.hasChildNodes()) {
+ NodeList nList = node.getChildNodes();
+ int len = nList.getLength();
+
+ for (int i = 0; i < len; i++) {
+
+ Node child = nList.item(i);
+ short nodeType = child.getNodeType();
+
+ switch (nodeType) {
+ case Node.TEXT_NODE:
+ String s = child.getNodeValue();
+ if (s.length() > 0) {
+ try {
+ pswDoc.addParagraphData(s, tStyle);
+ }
+ catch (Exception e) {
+ throw new ConvertException(
+ "Error adding data to paragraph in " +
+ "PocketWordDocument.\n" + e.toString());
+
+ }
+ }
+ break;
+
+ case Node.ELEMENT_NODE:
+ if (child.getNodeName().equals(TAG_SPACE)) {
+ StringBuffer sb = new StringBuffer("");
+ int count = 1;
+
+ NamedNodeMap map = child.getAttributes();
+
+ if (map.getLength() > 0) {
+ Node attr = map.getNamedItem(ATTRIBUTE_SPACE_COUNT);
+ count = Integer.parseInt(attr.getNodeValue().trim());
+ }
+
+ for ( ; count > 0; count--) {
+ sb.append(" ");
+ }
+
+ /*
+ * May want to look at style info for spaces. Could
+ * be important when calculating font metrics.
+ */
+ try {
+ pswDoc.addParagraphData(sb.toString(), tStyle);
+ }
+ catch (Exception e) {
+ throw new ConvertException(
+ "Error adding data to paragraph in " +
+ "PocketWordDocument.\n" + e.toString());
+
+ }
+ }
+ else if (child.getNodeName().equals(TAG_TAB_STOP)) {
+ try {
+ pswDoc.addParagraphData("\t", tStyle);
+ }
+ catch (Exception e) {
+ throw new ConvertException(
+ "Error adding data to paragraph in " +
+ "PocketWordDocument.\n" + e.toString());
+
+ }
+ }
+ else if (child.getNodeName().equals(TAG_LINE_BREAK)) {
+ /*
+ * Pocket Word does not support soft line breaks.
+ * They are just new paragraphs.
+ */
+ }
+ else if (child.getNodeName().equals(TAG_SPAN)) {
+ /*
+ * This is where the interesting ones, i.e. format
+ * changes occur.
+ */
+ traverseParagraphContents (child, defTextStyle);
+ }
+ else if (child.getNodeName().equals(TAG_HYPERLINK)) {
+ traverseParagraphContents (child, defTextStyle);
+ }
+ else {
+ // Should maybe have a default in here.
+ }
+ break;
+ default:
+ // Do nothing
+ }
+ }
+ }
+ else {
+ /*
+ * If the node has no children, then it is a blank paragraph, but
+ * they still require an entry in the Paragraph class to make sense.
+ */
+ pswDoc.addParagraphData("", tStyle);
+ }
+ }
+
+
+ /*
+ * Process a text:ordered-list or text:unordered-list tag. Pocket Word has
+ * no concept of a list so there is no need to differentiate between the
+ * two.
+ *
+ * Each item on the list contains a text:p node.
+ */
+ private void traverseList (Node node) throws IOException, ConvertException {
+ inList = true;
+
+ if (node.hasChildNodes()) {
+ NodeList nList = node.getChildNodes();
+ int len = nList.getLength();
+
+ for (int i = 0; i < len; i++) {
+ Node child = nList.item(i);
+
+ if (child.getNodeType() == Node.ELEMENT_NODE) {
+ String nodeName = child.getNodeName();
+
+ if (nodeName.equals(TAG_LIST_ITEM)) {
+ traverseListItem(child);
+ }
+ }
+ }
+ }
+
+ inList = false;
+ }
+
+
+ /*
+ * Process a text:list-item node. They usually contain have a single
+ * text:p child but can also have sections or other lists.
+ *
+ * For this case, only paragraphs are supported.
+ */
+ private void traverseListItem (Node node) throws IOException, ConvertException {
+ if (node.hasChildNodes()) {
+ NodeList nList = node.getChildNodes();
+ int len = nList.getLength();
+
+ for (int i = 0; i < len; i++) {
+ Node child = nList.item(i);
+
+ if (child.getNodeType() == Node.ELEMENT_NODE) {
+ String nodeName = child.getNodeName();
+
+ if (nodeName.equals(TAG_PARAGRAPH)) {
+ traverseParagraph(child);
+ }
+ }
+ }
+ }
+
+ }
+
+
+ /*
+ * Utility method to retrieve a Node attribute.
+ */
+ private String getAttribute (Node node, String attribute) {
+ NamedNodeMap attrNodes = node.getAttributes();
+
+ if (attrNodes != null) {
+ Node attr = attrNodes.getNamedItem(attribute);
+ if (attr != null) {
+ return attr.getNodeValue();
+ }
+ }
+
+ return null;
+ }
+}
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/Paragraph.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/Paragraph.java
new file mode 100644
index 000000000000..3dee1eeff3b5
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/Paragraph.java
@@ -0,0 +1,862 @@
+/************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2008 by Sun Microsystems, Inc.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * $RCSfile: Paragraph.java,v $
+ * $Revision: 1.9 $
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+package org.openoffice.xmerge.converter.xml.sxw.pocketword;
+
+import java.io.ByteArrayOutputStream;
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+
+import java.util.Vector;
+import java.util.Enumeration;
+
+import java.awt.Color;
+
+import org.openoffice.xmerge.util.EndianConverter;
+import org.openoffice.xmerge.util.ColourConverter;
+import org.openoffice.xmerge.converter.xml.ParaStyle;
+import org.openoffice.xmerge.converter.xml.TextStyle;
+
+
+/**
+ * Represents a paragraph data structure within a Pocket Word document.
+ *
+ * @author Mark Murnane
+ * @version 1.1
+ */
+class Paragraph implements PocketWordConstants {
+ /*
+ * The data elements of a Paragraph.
+ *
+ * As the 'unknown' values are not calculated they are declared static.
+ * They are not declared final because they do have a calcuable value.
+ */
+ private static short unknown1 = 0x23;
+ private short dataWords = 0;
+ private short textLength = 0;
+ private short lengthWithFormatting = 0;
+ private short lines = 0;
+
+ private static final short marker = (short)0xFFFF;
+ private static int unknown2 = 0x22; // May be two short values
+
+ private short specialIndentation = 0;
+ private short leftIndentation = 0;
+ private short rightIndentation = 0;
+
+ private byte bullets = 0;
+ private byte alignment = 0;
+
+ private static int unknown3 = 0;
+
+ // Will always have at least these formatting settings in each paragraph
+ private short defaultFont = 2; // Courier New for the time being
+ private short defaultSize = 10;
+
+
+ /*
+ * Remaining elements assist in calculating correct values for the paragraph
+ * representation.
+ */
+
+ private Vector textSegments = null;
+
+ private Vector lineDescriptors = null;
+
+ private ParaStyle pStyle = null;
+
+ private boolean isLastParagraph = false;
+
+
+ /*
+ * Private class constructor used by all constructors. Ensures the proper
+ * initialisation of the Vector storing the paragraph's text.
+ */
+ private Paragraph () {
+ textSegments = new Vector(0, 1);
+ }
+
+
+ /**
+ * <p>Constructor for use when converting from SXW format to Pocket Word
+ * format.</p>
+ *
+ * @param style Paragraph style object describing the formatting style
+ * of this paragraph.
+ */
+ public Paragraph (ParaStyle style) {
+ this();
+
+ lineDescriptors = new Vector(0, 1);
+ pStyle = style;
+ }
+
+
+ /**
+ * <p>Constructor for use when converting from Pocket Word format to SXW
+ * format.</p>
+ *
+ * @param data Byte array containing byte data describing this paragraph
+ * from the Pocket Word file.
+ */
+ public Paragraph (byte[] data) {
+ this();
+
+ /*
+ * Read in all fixed data from the array
+ *
+ * unknown1 appears at data[0] and data[1]
+ */
+ dataWords = EndianConverter.readShort(new byte[] { data[2], data[3] } );
+ textLength = EndianConverter.readShort(new byte[] { data[4], data [5] } );
+ lengthWithFormatting = EndianConverter.readShort(
+ new byte[] { data[6], data[7] } );
+ lines = EndianConverter.readShort(new byte[] { data[8], data [9] } );
+
+ /*
+ * The marker appears at data[10] and data[11].
+ *
+ * The value of unknown2 is at data[12], data[13], data[14] and data[15].
+ */
+
+ specialIndentation = EndianConverter.readShort(new byte[] { data[16], data[17] } );
+ leftIndentation = EndianConverter.readShort(new byte[] { data[18], data [19] } );
+ rightIndentation = EndianConverter.readShort(new byte[] { data[20], data [21] } );
+
+ bullets = data[22];
+ alignment = data[23];
+
+ // The value of unknown3 is at data[24], data[25], data[26] and data[27].
+
+ /*
+ * The actual paragraph data is in the remainder of the byte sequence.
+ *
+ * Only the actual text seqence with the embedded formatting tags is
+ * relevant to the conversion from Pocket Word to SXW format.
+ */
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ bos.write(data, 28, lengthWithFormatting);
+ parseText(bos.toByteArray());
+ }
+
+
+ /*
+ * Processes the text portion of the raw paragraph data from the Pocket Word
+ * file. This data also includes formatting settings for the text in the
+ * paragraph.
+ *
+ * Formatting changes appear like XML/HTML tags. Formatted blocks are
+ * preceded by a sequence of bytes switching on a formatting change and
+ * followed by a sequence switching off that formatting change.
+ */
+ private void parseText (byte[] data) {
+
+ int totalLength = data.length;
+
+ StringBuffer sb = new StringBuffer("");
+
+ // Setup text style information
+ int mask = TextStyle.BOLD | TextStyle.ITALIC | TextStyle.UNDERLINE
+ | TextStyle.STRIKETHRU;
+
+
+ String fontName = null;
+ int fontSize = 0;
+ Color textColour = null;
+ Color backColour = null;
+ int modifiers = 0;
+
+ TextStyle ts = null;
+
+ int attrsSet = 0; // If this is 0, we have no extra style
+ boolean inSequence = false;
+ boolean sawText = false;
+
+ String s = new String(); // For debugging
+
+ // Start from the very beginning
+ for (int i = 0; i < totalLength; i++) {
+ // Will encounter at least two codes first
+ if ((byte)(data[i] & 0xF0) == FORMATTING_TAG) {
+ if (sawText) {
+ // Style change so dump previous segment and style info
+ addTextSegment(sb.toString(), ts);
+ sb = new StringBuffer("");
+ sawText = false;
+ }
+
+ switch (data[i]) {
+ case FONT_TAG:
+ int index = EndianConverter.readShort(
+ new byte[] { data[i + 1], data[i + 2] } );
+
+ /*
+ * Standard font.
+ *
+ * Should really be one, but as the only supported font
+ * currently is Courier New, want to leave it at Courier
+ * New for round trip conversions.
+ *
+ * Also need to account for the fact that Tahoma is the
+ * correct standard font.
+ */
+ if (fontName == null || fontName.equals("2")) {
+ if (index != 2 && index != 1) {
+ fontName = String.valueOf(index);
+ attrsSet++;
+ }
+ }
+ else {
+ // Font is set, but not the default
+ if (index == 2 || index == 1) {
+ fontName = "2";
+ attrsSet--;
+ }
+ else {
+ fontName = String.valueOf(index);
+ }
+ }
+ i += 2;
+ break;
+
+
+ case FONT_SIZE_TAG:
+ int size = EndianConverter.readShort(
+ new byte[] { data[i + 1], data[i + 2] } );
+
+ if (size == 0) {
+ // Flags the end of the last paragraph
+ isLastParagraph = true;
+ i += 2;
+ break;
+ }
+
+ // Standard size
+ if (fontSize == 0 || fontSize == 10) {
+ if (size != 10) {
+ fontSize = size;
+ attrsSet++;
+ }
+ }
+ else {
+ // Font size is set, but not to standard
+ if (size == 10) {
+ fontSize = 10;
+ attrsSet--;
+ }
+ else {
+ fontSize = size;
+ }
+ }
+ i += 2;
+ break;
+
+
+ case COLOUR_TAG:
+ if (data[i + 1] != 0) {
+ ColourConverter cc = new ColourConverter();
+ textColour = cc.convertToRGB(
+ EndianConverter.readShort(new byte[] { data[i + 1],
+ data[i + 2] } ));
+ attrsSet++;
+ }
+ else {
+ textColour = null;
+ attrsSet--;
+ }
+ i += 2;
+ break;
+
+
+ case FONT_WEIGHT_TAG:
+ if (data[i + 1] == FONT_WEIGHT_BOLD
+ || data[i + 1] == FONT_WEIGHT_THICK) {
+ modifiers |= TextStyle.BOLD;
+ attrsSet++;
+ }
+ else {
+ // Its a bit field so subtracting should work okay.
+ modifiers ^= TextStyle.BOLD;
+ attrsSet--;
+ }
+ i += 2;
+ break;
+
+
+ case ITALIC_TAG:
+ if (data[i + 1] == (byte)0x01) {
+ modifiers |= TextStyle.ITALIC;
+ attrsSet++;
+ }
+ else {
+ modifiers ^= TextStyle.ITALIC;
+ attrsSet--;
+ }
+ i++;
+ break;
+
+
+ case UNDERLINE_TAG:
+ if (data[i + 1] == (byte)0x01) {
+ modifiers |= TextStyle.UNDERLINE;
+ attrsSet++;
+ }
+ else {
+ modifiers ^= TextStyle.UNDERLINE;
+ attrsSet--;
+ }
+ i++;
+ break;
+
+
+ case STRIKETHROUGH_TAG:
+ if (data[i + 1] == (byte)0x01) {
+ modifiers |= TextStyle.STRIKETHRU;
+ attrsSet++;
+ }
+ else {
+ modifiers ^= TextStyle.STRIKETHRU;
+ attrsSet--;
+ }
+ i++;
+ break;
+
+ case HIGHLIGHT_TAG:
+ /*
+ * Highlighting is treated by OpenOffice as a
+ * background colour.
+ */
+ if (data[i + 1] == (byte)0x01) {
+ backColour = Color.yellow;
+ attrsSet++;
+ }
+ else {
+ backColour = null;
+ attrsSet--;
+ }
+ i++;
+ break;
+ }
+
+ inSequence = true;
+ continue;
+ }
+
+ if (inSequence) {
+ // Style information has been changed. Create new style here
+
+ inSequence = false;
+ if (attrsSet > 0) {
+ ts = new TextStyle(null, TEXT_STYLE_FAMILY, DEFAULT_STYLE,
+ mask, modifiers, fontSize, fontName, null);
+ ts.setColors(textColour, backColour);
+ }
+ else {
+ ts = null;
+ }
+ }
+
+ /*
+ * C4 xx seems to indicate a control code. C4 00 indicates the end
+ * of a paragraph; C4 04 indicates a tab space. Only these two
+ * have been seen so far.
+ */
+ if (data[i] == (byte)0xC4) {
+ /*
+ * Redundant nodes are sometimes added to the last paragraph
+ * because a new sequence is being processed when the flag is
+ * set.
+ *
+ * To avoid this, do nothing with the last paragraph unless no
+ * text has been added for it already. In that case, add the
+ * empty text segment being process to ensure that all
+ * paragraphs have at least one text segment.
+ */
+ if (data[i + 1] == (byte)0x00) {
+ if (isLastParagraph && textSegments.size() > 0) {
+ return;
+ }
+ addTextSegment(sb.toString(), ts);
+ return;
+ }
+ sb.append("\t");
+ sawText = true;
+ i++;
+ continue;
+ }
+
+ sb.append((char)data[i]);
+ sawText = true;
+ s = sb.toString();
+ }
+ }
+
+
+ /**
+ * <p>Adds details of a new text block to the <code>Paragraph</code> object.
+ * </p>
+ *
+ * @param text The text of the new block.
+ * @param style Text style object describing the formatting attached
+ * to this block of text.
+ */
+ public void addTextSegment(String text, TextStyle style) {
+ textLength += text.length();
+ textSegments.add(new ParagraphTextSegment(text, style));
+ }
+
+
+ /**
+ * <p>This method alters the state of the <code>Paragraph</code> object to
+ * indicate whether or not it is the final paragraph in the document.</p>
+ *
+ * <p>It is used during conversion from SXW format to Pocket Word format.
+ * In Pocket Word files, the last paragraph finishes with a different byte
+ * sequence to other paragraphs.</p>
+ *
+ * @param isLast true if the Paragraph is the last in the document,
+ * false otherwise.
+ */
+ public void setLastParagraph(boolean isLast) {
+ isLastParagraph = isLast;
+ }
+
+
+ /**
+ * <p>Complementary method to {@link #setLastParagraph(boolean)
+ * setLastParagraph}. Returns the terminal status of this
+ * <code>Paragraph</code> within the Pocket Word document.</p>
+ *
+ * @return true if the Paragraph is the last in the document; false otherwise.
+ */
+ public boolean getLastParagraph () {
+ return isLastParagraph;
+ }
+
+
+ /**
+ * <p>This method returns the Pocket Word representation of this
+ * <code>Paragraph</code> in Little Endian byte order.</p>
+ *
+ * <p>Used when converting from SXW format to Pocket Word format.</p>
+ *
+ * @return <code>byte</code> array containing the formatted representation
+ * of this Paragraph.
+ */
+ public byte[] getParagraphData() {
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+
+ postProcessText();
+
+ /*
+ * Need information about the paragraph segments in two places
+ * so calculate them first.
+ *
+ * The stream contains the text wrapped in any formatting sequences that
+ * are necessary.
+ */
+ ByteArrayOutputStream segs = new ByteArrayOutputStream();
+
+ try {
+ for (int i = 0; i < textSegments.size(); i++) {
+ ParagraphTextSegment pts = (ParagraphTextSegment)textSegments.elementAt(i);
+ segs.write(pts.getData());
+ }
+ }
+ catch (IOException ioe) {
+ // Should never happen in a memory based stream
+ }
+
+ /*
+ * Number of data words for this paragraph descriptor:
+ *
+ * 26 is the number of bytes prior to the start of the segment.
+ * 3 comes from the C4 00 00 termintating sequence.
+ */
+ dataWords = (short)(26 + segs.size() + 3 + 4);
+ if (isLastParagraph) {
+ dataWords += 6;
+ }
+ if (dataWords % 4 != 0) {
+ dataWords += (4 - (dataWords % 4));
+ }
+ dataWords /= 4;
+
+ /*
+ * The 8 bytes are made up of E6 ?0 00 and E5 ?0 00 at the start of the
+ * text along with the C4 00 that terminates it.
+ *
+ * In the event that the paragraph is the last one E6 00 00 is also
+ * present at the end of the text. Also, as we currently use a font
+ * other than the first in the index (Tahoma) E5 01 00 is also present.
+ *
+ * Make sure this is accurate when font specifications change
+ */
+ lengthWithFormatting = (short)(segs.size() + (isLastParagraph ? 14 : 8));
+
+ try {
+ bos.write(EndianConverter.writeShort(unknown1));
+ bos.write(EndianConverter.writeShort(dataWords));
+ bos.write(EndianConverter.writeShort((short)(textLength + 1)));
+ bos.write(EndianConverter.writeShort(lengthWithFormatting));
+ bos.write(EndianConverter.writeShort(lines));
+
+ bos.write(EndianConverter.writeShort(marker));
+ bos.write(EndianConverter.writeInt(unknown2));
+
+ bos.write(EndianConverter.writeShort(specialIndentation));
+ bos.write(EndianConverter.writeShort(leftIndentation));
+ bos.write(EndianConverter.writeShort(rightIndentation));
+
+ bos.write(bullets);
+
+ if (pStyle != null && pStyle.isAttributeSet(ParaStyle.TEXT_ALIGN)) {
+ switch (pStyle.getAttribute(ParaStyle.TEXT_ALIGN)) {
+
+ case ParaStyle.ALIGN_RIGHT:
+ bos.write(0x01);
+ break;
+
+ case ParaStyle.ALIGN_CENTER:
+ bos.write(0x02);
+ break;
+
+ default:
+ bos.write(0x00); // Left align in all other circumstances
+ break;
+ }
+ }
+ else {
+ bos.write(0x00);
+ }
+
+ bos.write(EndianConverter.writeInt(unknown3));
+
+
+ /*
+ * Write out font and size.
+ *
+ * If font support is added then this should change as the information
+ * will have to be calculated from a Font table.
+ */
+ bos.write(FONT_TAG);
+ bos.write(EndianConverter.writeShort(defaultFont));
+ bos.write(FONT_SIZE_TAG);
+ bos.write(EndianConverter.writeShort(defaultSize));
+
+ // Write out the text segments
+ bos.write(segs.toByteArray());
+
+ /*
+ * If this is the last paragraph in the document then we need to make
+ * sure that the paragraph text is terminated correctly with an E6 00 00
+ * before the C4 00 00.
+ */
+ if (isLastParagraph) {
+ if (defaultFont != 1) {
+ // Must always go back to the first font.
+ bos.write(FONT_TAG);
+ bos.write(EndianConverter.writeShort((short)0x01));
+ }
+ bos.write(FONT_SIZE_TAG);
+ bos.write(EndianConverter.writeShort((short)0x00));
+ }
+
+ bos.write(new byte[] { (byte)0xC4, 0x00, 0x00 } );
+
+ int padding = 0;
+ if (bos.size() % 4 != 0) {
+ padding = 4 - (bos.size() % 4);
+ }
+ for (int i = 0; i < padding; i++) {
+ bos.write(0x00);
+ }
+
+ // Third byte should match first byte after 0xFF 0xFF
+ bos.write(new byte[] { 0x42, 0x00, 0x22, 0x00} );
+
+ /*
+ * Meaning of last two bytes seems to be the number of words describing
+ * lines. This is calculated at 10 bytes per descriptor.
+ *
+ * May have two extra padding bytes that need to be accounted for too
+ * The division below may lose 2 bytes (integer result).
+ */
+ int wordsRemaining = (lineDescriptors.size() * 10) / 4;
+ if ((lineDescriptors.size() * 10) % 4 != 0) {
+ wordsRemaining++;
+ }
+ bos.write(EndianConverter.writeShort((short)wordsRemaining));
+
+
+ // Now write out the line descriptors
+ for (int i = 0; i < lineDescriptors.size(); i++) {
+ LineDescriptor ld = (LineDescriptor)lineDescriptors.elementAt(i);
+
+ bos.write(ld.getDescriptorInfo());
+ }
+
+
+ if (!isLastParagraph) {
+ /*
+ * There may be a need to pad this. Will be writing at
+ * either start of 4 byte block or 2 bytes into it.
+ */
+ if (bos.size() % 4 != 2) {
+ bos.write(EndianConverter.writeShort((short)0));
+ }
+ bos.write(EndianConverter.writeShort((short)0x41));
+ }
+ }
+ catch (IOException ioe) {
+ // Should never occur for a memory based stream
+ }
+
+ return bos.toByteArray();
+ }
+
+
+ /*
+ * This method handles the calculation of correct values for line lengths
+ * in each individual descriptor and the number of lines in the document.
+ *
+ * TODO: Update to take account of different font metrics.
+ */
+ private void postProcessText() {
+ /*
+ * The post-processing ...
+ *
+ * For each line, we need to add a line descriptor and increment
+ * the number of lines in the paragraph data structure.
+ *
+ * To do this, make sure that no sequence goes over the given screen
+ * width unless the last char is a whitespace character.
+ */
+
+ // In courier, can have no more than 29 chars per line
+
+ int chunkStart = 0;
+ StringBuffer sb = new StringBuffer("");
+
+ // Line Descriptor info should be eliminated each time
+ lineDescriptors = new Vector(1, 1);
+ lines = 0;
+
+ for (int i = 0; i < textSegments.size(); i++) {
+ ParagraphTextSegment pts = (ParagraphTextSegment)textSegments.elementAt(i);
+ sb.append(pts.getText());
+ }
+
+ if (sb.length() == 0) {
+ lines = 1;
+ lineDescriptors.add(new LineDescriptor((short)1, (short)0));
+ return;
+ }
+
+ while (chunkStart < sb.length()) {
+ String text = "";
+
+ try {
+ text = sb.substring(chunkStart, chunkStart + 30);
+ }
+ catch (StringIndexOutOfBoundsException sioobe) {
+ // We have less than one line left so just add it
+ text = sb.substring(chunkStart);
+ lineDescriptors.add(new LineDescriptor((short)(text.length() + 1), (short)(text.length() * 36)));
+ chunkStart += text.length();
+ lines++;
+ continue;
+ }
+
+ int lastWhitespace = -1;
+
+ for (int i = 29; i >= 0; i--) {
+ if (Character.isWhitespace(text.charAt(i))) {
+ lastWhitespace = i;
+ break;
+ }
+ }
+
+ if (lastWhitespace != -1) {
+ // The line can be split
+ lineDescriptors.add(new LineDescriptor((short)(lastWhitespace + 1), (short)(lastWhitespace * 36)));
+ chunkStart += lastWhitespace + 1;
+ lines++;
+ }
+ else {
+ // The line is completely occupied by a single word
+ lineDescriptors.add(new LineDescriptor((short)29, (short)(29 * 36)));
+ chunkStart += 29;
+ lines++;
+ }
+ }
+ }
+
+
+ /**
+ * <p>Returns the number of lines in the <code>Paragraph</code>.</p>
+ *
+ * @return The number of lines in the document.
+ */
+ public short getLines() {
+ postProcessText();
+
+ return lines;
+ }
+
+
+ /**
+ * <p>Toggles the flag indicating that the <code>Paragraph</code> is a
+ * bulleted paragraph.</p>
+ *
+ * @param isBulleted true to enable bulleting for this paragraph, false
+ * otherwise.
+ */
+ public void setBullets(boolean isBulleted) {
+ if (isBulleted) {
+ bullets = (byte)0xFF;
+ }
+ else {
+ bullets = 0;
+ }
+ }
+
+ /**
+ * <p>Returns the bulleting status of the <code>Paragraph</code>.</p>
+ *
+ * @return true if the paragraph is bulleted, false otherwise.
+ */
+ public boolean isBulleted() {
+ if (bullets != 0) {
+ return true;
+ }
+ return false;
+ }
+
+
+ /**
+ * <p>Returns the number of text characters in the <code>Paragraph</code>,
+ * excluding formatting.</p>
+ *
+ * @return The length of the paragraph.
+ */
+ public int getTextLength () {
+ return textLength;
+ }
+
+
+ /**
+ * <p>Returns an <code>Enumeration</code> over the individual text segments
+ * of the <code>Paragraph</code>.</p>
+ *
+ * @return An <code>Enumeration</code> of the text segments.
+ */
+ public Enumeration getSegmentsEnumerator () {
+ return textSegments.elements();
+ }
+
+
+ /**
+ * <p>Returns a paragraph style object that describes any of the paragraph
+ * level formatting used by this <code>Paragraph</code>.</p>
+ *
+ * @return Paragraph style object describing the <code>Paragraph</code>.
+ */
+ public ParaStyle makeStyle() {
+ int attrs[] = new int[] { ParaStyle.MARGIN_LEFT, ParaStyle.MARGIN_RIGHT,
+ ParaStyle.TEXT_ALIGN };
+ String values[] = new String[attrs.length];
+
+ /*
+ * Not interested in left or right indents just yet. Don't know
+ * how to calculate them.
+ */
+
+ switch (alignment) {
+ case 2:
+ values[2] = "center";
+ break;
+
+ case 1:
+ values[2] = "right";
+ break;
+
+ case 0:
+ default:
+ values[2] = "left";
+ return null; // Not interested if its the default.
+ }
+
+ return new ParaStyle(null, PARAGRAPH_STYLE_FAMILY, null, attrs,
+ values, null);
+ }
+
+
+ /*
+ * Class describing the data structures which appear following the text
+ * of a Paragraph. For each line on screen that the Paragraph uses, a
+ * LineDescriptor details how many characters are on the line and how much
+ * screen space they occupy.
+ *
+ * The screen space and character breaks are calculated during post-processing
+ * of the paragraph. See postProcessText().
+ *
+ * The unit of measurement used for screen space is currently unknown.
+ */
+ private class LineDescriptor {
+ private short characters = 0;
+ private int filler = 0;
+ private short screen_space = 0;
+ private short marker = 0;
+
+ private LineDescriptor(short chars, short space) {
+ characters = chars;
+ screen_space = space;
+ marker = (short)0x040C; // Not a constant. Depends on font used.
+ }
+
+
+ private byte[] getDescriptorInfo(){
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+
+ try {
+ bos.write(EndianConverter.writeShort(characters));
+ bos.write(EndianConverter.writeInt(filler));
+ bos.write(EndianConverter.writeShort(screen_space));
+ bos.write(EndianConverter.writeShort(marker));
+ }
+ catch (IOException ioe) {
+ // Should never happen in a memory based stream.
+ }
+
+ return bos.toByteArray();
+ }
+ }
+}
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/ParagraphTextSegment.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/ParagraphTextSegment.java
new file mode 100644
index 000000000000..d2a460037771
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/ParagraphTextSegment.java
@@ -0,0 +1,208 @@
+/************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2008 by Sun Microsystems, Inc.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * $RCSfile: ParagraphTextSegment.java,v $
+ * $Revision: 1.5 $
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+
+package org.openoffice.xmerge.converter.xml.sxw.pocketword;
+
+import org.openoffice.xmerge.converter.xml.TextStyle;
+
+import org.openoffice.xmerge.util.EndianConverter;
+
+import org.openoffice.xmerge.util.ColourConverter;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+
+import java.awt.Color;
+
+/**
+ * This class represents a portion of text with a particular formatting style.
+ * The style may differ from the default style of the paragraph of which it
+ * is part.
+ *
+ * @author Mark Murnane
+ * @version 1.1
+ */
+class ParagraphTextSegment implements PocketWordConstants {
+
+ private String pText;
+ private TextStyle pStyle;
+
+
+ /**
+ * <p>Initialise a new <code>ParagraphTextSegment</p>.
+ * <p>Both parameters may be <code>null</code>.</p>
+ *
+ * @param data The text of this segment.
+ * @param style The style describing this segment.
+ */
+ public ParagraphTextSegment (String data, TextStyle style) {
+ pText = data;
+ pStyle = style;
+ }
+
+ /**
+ * <p>Sets the text for this segment.</p>
+ *
+ * @param data The text of this segment.
+ */
+ public void setText (String data) {
+ pText = data;
+ }
+
+ /**
+ * <p>Gets the text for this segment.</p>
+ *
+ * @return The text of this segment.
+ */
+ public String getText () {
+ return pText;
+ }
+
+
+ /**
+ * <p>Sets the style for this segment.</p>
+ *
+ * @param data The style describing this segment.
+ */
+ public void setStyle (TextStyle style) {
+ pStyle = style;
+ }
+
+
+ /**
+ * <p>Gets the style for this segment.</p>
+ *
+ * @return The style describing this segment.
+ */
+ public TextStyle getStyle () {
+ return pStyle;
+ }
+
+
+ /**
+ * <p>Returns the string data for this text segment wrapped with the
+ * appropriate byte codes for the formatting settings used.</p>
+ *
+ * @return <code>byte</code> array containing formatted text in Pocket Word
+ * format.
+ */
+ public byte[] getData () {
+ ByteArrayOutputStream data = new ByteArrayOutputStream();
+
+ boolean colourSet = false;
+ boolean boldSet = false;
+ boolean italicSet = false;
+ boolean underlineSet = false;
+ boolean strikeSet = false;
+ boolean highlightSet = false;
+
+ // TODO: Font changes need to be worked out here
+
+ try {
+ if (pStyle != null) {
+ if (pStyle.getFontColor() != null) {
+ ColourConverter cc = new ColourConverter();
+ short colourCode = cc.convertFromRGB(pStyle.getFontColor());
+ if (colourCode != 0) { // not black
+ data.write(COLOUR_TAG);
+ data.write(EndianConverter.writeShort(colourCode));
+ colourSet = true;
+ }
+ }
+ if (pStyle.isSet(TextStyle.BOLD) && pStyle.getAttribute(TextStyle.BOLD)) {
+ data.write(new byte[] { FONT_WEIGHT_TAG, FONT_WEIGHT_BOLD, 0x00 } );
+ boldSet = true;
+ }
+ if (pStyle.isSet(TextStyle.ITALIC) && pStyle.getAttribute(TextStyle.ITALIC)) {
+ data.write(new byte[] { ITALIC_TAG, 0x01 } );
+ italicSet = true;
+ }
+ if (pStyle.isSet(TextStyle.UNDERLINE) && pStyle.getAttribute(TextStyle.UNDERLINE)) {
+ data.write(new byte[] { UNDERLINE_TAG, 0x01 } );
+ underlineSet = true;
+ }
+ if (pStyle.isSet(TextStyle.STRIKETHRU) && pStyle.getAttribute(TextStyle.STRIKETHRU)) {
+ data.write(new byte[] { STRIKETHROUGH_TAG, 0x01 } );
+ strikeSet = true;
+ }
+ if (pStyle.getBackgroundColor() != null) {
+ data.write(new byte[] { HIGHLIGHT_TAG, 0x01 } );
+ highlightSet = true;
+ }
+ }
+
+
+ // Now write out the data
+ if (!pText.equals("\t")) {
+ data.write(pText.getBytes());
+ }
+ else {
+ /*
+ * Tabs are a special case. They are represented by Pocket Word
+ * as the LE sequence 0xC4 0x04.
+ */
+ data.write(new byte[] { (byte)0xC4, 0x04 } );
+ }
+
+
+ // Now close out any of the settings changes
+ if (colourSet) {
+ /*
+ * Colours may change without changing back to black, but
+ * without knowing what the previous colour was, the only
+ * way to ensure correct conversion is to restore to black and
+ * let the next segment change the colour again.
+ */
+ data.write(new byte[] { COLOUR_TAG, 0x00, 0x00 } );
+ }
+ if (boldSet) {
+ data.write(new byte[] { FONT_WEIGHT_TAG, FONT_WEIGHT_NORMAL, 0x00 } );
+ }
+ if (italicSet) {
+ data.write(new byte[] { ITALIC_TAG, 0x00 } );
+ }
+ if (underlineSet) {
+ data.write(new byte[] { UNDERLINE_TAG, 0x00 } );
+ }
+ if (strikeSet) {
+ data.write(new byte[] { STRIKETHROUGH_TAG, 0x00 } );
+ }
+ if (highlightSet) {
+ data.write(new byte[] { HIGHLIGHT_TAG, 0x00 } );
+ }
+ }
+ catch (IOException ioe) {
+ // Should never occur in a memory based stream
+ }
+
+ return data.toByteArray();
+ }
+}
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/PluginFactoryImpl.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/PluginFactoryImpl.java
new file mode 100644
index 000000000000..35e6b89a9ec6
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/PluginFactoryImpl.java
@@ -0,0 +1,168 @@
+/************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2008 by Sun Microsystems, Inc.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * $RCSfile: PluginFactoryImpl.java,v $
+ * $Revision: 1.4 $
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+
+package org.openoffice.xmerge.converter.xml.sxw.pocketword;
+
+
+import java.io.InputStream;
+import java.io.IOException;
+
+import org.openoffice.xmerge.ConvertData;
+import org.openoffice.xmerge.Document;
+import org.openoffice.xmerge.DocumentDeserializer;
+import org.openoffice.xmerge.DocumentSerializer;
+import org.openoffice.xmerge.DocumentDeserializerFactory;
+import org.openoffice.xmerge.DocumentSerializerFactory;
+import org.openoffice.xmerge.DocumentMerger;
+import org.openoffice.xmerge.DocumentMergerFactory;
+import org.openoffice.xmerge.ConverterCapabilities;
+
+import org.openoffice.xmerge.util.registry.ConverterInfo;
+
+import org.openoffice.xmerge.converter.xml.sxw.SxwPluginFactory;
+
+
+/**
+ * Factory class used to create converters to/from the Pocket Word format.
+ *
+ * @author Mark Murnane
+ * @version 1.1
+ */
+public final class PluginFactoryImpl extends SxwPluginFactory
+ implements DocumentDeserializerFactory, DocumentSerializerFactory,
+ DocumentMergerFactory{
+
+ /**
+ * <p>Constructor that caches the <code>ConvertInfo</code> that
+ * corresponds to the registry information for this plug-in.</p>
+ *
+ * @param ci <code>ConvertInfo</code> object.
+ */
+ public PluginFactoryImpl (ConverterInfo ci) {
+ super(ci);
+ }
+
+ /** ConverterCapabilities object for this type of conversion. */
+ private final static ConverterCapabilities converterCap =
+ new ConverterCapabilitiesImpl();
+
+
+ /**
+ * <p>The <code>DocumentSerializer</code> is used to convert
+ * from the OpenOffice Writer <code>Document</code> format
+ * to the Pocket Word <code>Document</code> format.</p>
+ *
+ * <p>The <code>ConvertData</code> object is passed along to the
+ * created <code>DocumentSerializer</code> via its constructor.
+ * The <code>ConvertData</code> is read and converted when the
+ * the <code>DocumentSerializer</code> object's
+ * <code>serialize</code> method is called.</p>
+ *
+ * @param doc <code>Document</code> object that the created
+ * <code>DocumentSerializer</code> object uses
+ * as input.
+ *
+ * @return A <code>DocumentSerializer</code> object.
+ */
+ public DocumentSerializer createDocumentSerializer(Document doc) {
+ return new DocumentSerializerImpl(doc);
+ }
+
+
+ /**
+ * The <code>DocumentDeserializer</code> is used to convert
+ * from the Pocket Word <code>Document</code> format to
+ * the OpenOffice Writer <code>Document</code> format.</p>
+ *
+ * The <code>ConvertData</code> object is passed along to the
+ * created <code>DocumentDeserializer</code> via its constructor.
+ * The <code>ConvertData</code> is read and converted when the
+ * the <code>DocumentDeserializer</code> object's
+ * <code>deserialize</code> method is called.
+ * </p>
+ *
+ * @param cd <code>ConvertData</code> object that the created
+ * <code>DocumentDeserializer</code> object uses as
+ * input.
+ *
+ * @return A <code>DocumentDeserializer</code> object.
+ */
+ public DocumentDeserializer createDocumentDeserializer(ConvertData cd) {
+ return new DocumentDeserializerImpl(cd);
+ }
+
+
+ /**
+ * <p>Create a <code>Document</code> object that corresponds to
+ * the Pocket Word data passed in via the <code>InputStream</code>
+ * object.
+ *
+ * <p>This method will read from the given <code>InputStream</code>
+ * object. The returned <code>Document</code> object will contain
+ * the necessary data for the other objects created by the
+ * <code>PluginFactoryImpl</code> to process, like the
+ * <code>DocumentSerializerImpl</code> object and a
+ * <code>DocumentMerger</code> object.</p>
+ *
+ * @param name The <code>Document</code> name.
+ * @param is <code>InputStream</code> object corresponding
+ * to the <code>Document</code>.
+ *
+ * @return A <code>Document</code> object representing the
+ * Pocket Word format.
+ *
+ * @throws IOException If any I/O error occurs.
+ */
+
+ public Document createDeviceDocument(String name, InputStream is)
+ throws IOException {
+ PocketWordDocument pwd = new PocketWordDocument(name);
+ pwd.read(is);
+ return pwd;
+ }
+
+ /**
+ * Returns an instance of <code>DocumentMergerImpl</code>,
+ * which is an implementation of the <code>DocumentMerger</code>
+ * interface.
+ *
+ * @param doc <code>Document</code> to merge.
+ *
+ * @return A DocumentMergerImpl object.
+ */
+ public DocumentMerger createDocumentMerger(Document doc) {
+ ConverterCapabilities cc = converterCap;
+ DocumentMergerImpl merger = new DocumentMergerImpl(doc, cc);
+ return merger;
+
+ }
+
+}
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/PocketWordConstants.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/PocketWordConstants.java
new file mode 100644
index 000000000000..03af731f2122
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/PocketWordConstants.java
@@ -0,0 +1,98 @@
+/************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2008 by Sun Microsystems, Inc.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * $RCSfile: PocketWordConstants.java,v $
+ * $Revision: 1.4 $
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+
+package org.openoffice.xmerge.converter.xml.sxw.pocketword;
+
+
+/**
+ * Interface defining constants for Pocket Word attributes.
+ *
+ * @author Mark Murnane
+ * @version 1.1
+ */
+public interface PocketWordConstants {
+ /** File extension for Pocket Word files. */
+ public static final String FILE_EXTENSION = ".psw";
+
+ /** Name of the default style. */
+ public static final String DEFAULT_STYLE = "Standard";
+
+ /** Family name for Paragraph styles. */
+ public static final String PARAGRAPH_STYLE_FAMILY = "paragraph";
+
+ /** Family name for Text styles. */
+ public static final String TEXT_STYLE_FAMILY = "text";
+
+
+ /**
+ * Generic Pocket Word formatting code.
+ *
+ * Formatting codes are 0xEz, where z indicates the specific format code.
+ */
+ public static final byte FORMATTING_TAG = (byte)0xE0;
+
+ /** Font specification tag. The two bytes following inidicate which font. */
+ public static final byte FONT_TAG = (byte)0xE5;
+
+ /** Font size tag. The two bytes following specify font size in points. */
+ public static final byte FONT_SIZE_TAG = (byte)0xE6;
+
+ /** Colour tag. Two bytes following index a 4-bit colour table. */
+ public static final byte COLOUR_TAG = (byte)0xE7;
+
+ /** Font weight tag. Two bytes following indicate weighting of font. */
+ public static final byte FONT_WEIGHT_TAG = (byte)0xE8;
+
+ /** Normal font weight value. */
+ public static final byte FONT_WEIGHT_NORMAL = (byte)0x04;
+
+ /** Fine font weight value. */
+ public static final byte FONT_WEIGHT_FINE = (byte)0x01;
+
+ /** Bold font weight value. */
+ public static final byte FONT_WEIGHT_BOLD = (byte)0x07;
+
+ /** Thick font weight value. */
+ public static final byte FONT_WEIGHT_THICK = (byte)0x09;
+
+ /** Italic tag. Single byte following indicates whether italic is on. */
+ public static final byte ITALIC_TAG = (byte)0xE9;
+
+ /** Underline tag. Single byte following indicates whether underline is on. */
+ public static final byte UNDERLINE_TAG = (byte)0xEA;
+
+ /** Strikethrough tag. Single byte following indicates whether strikethrough is on. */
+ public static final byte STRIKETHROUGH_TAG = (byte)0XEB;
+
+ /** Highlighting tag. Single byte following indicates whether highlighting is on. */
+ public static final byte HIGHLIGHT_TAG = (byte)0xEC;
+
+}
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/PocketWordDocument.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/PocketWordDocument.java
new file mode 100644
index 000000000000..8d4ad63fa82a
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/PocketWordDocument.java
@@ -0,0 +1,411 @@
+/************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2008 by Sun Microsystems, Inc.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * $RCSfile: PocketWordDocument.java,v $
+ * $Revision: 1.4 $
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+
+package org.openoffice.xmerge.converter.xml.sxw.pocketword;
+
+import org.openoffice.xmerge.Document;
+import org.openoffice.xmerge.converter.xml.ParaStyle;
+import org.openoffice.xmerge.converter.xml.TextStyle;
+
+import java.awt.Font;
+
+import java.io.InputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataOutputStream;
+
+import java.util.Enumeration;
+import java.util.Vector;
+
+
+/**
+ * <p>Class representing a Pocket Word Document.</p>
+ *
+ * <p><code>PocketWordDocument</code> is used to create new Pocket Word documents
+ * and to read existing data to allow for conversion to OpenOffice Writer
+ * format.</p>
+ *
+ * @author Mark Murnane
+ * @version 1.1
+ */
+public class PocketWordDocument implements Document, PocketWordConstants {
+ private String docName;
+
+ private byte[] preamble;
+ private Vector fonts;
+ private DocumentDescriptor descriptor;
+ private Vector paragraphs;
+
+ private ParaStyle pStyle;
+ private Paragraph currentPara;
+
+ /*
+ * The trailer currently appears to be constant, but if its found to
+ * have a variable component, then this initialisation should be moved
+ * to an initTrailer() method.
+ *
+ * Padding is sometimes needed before the trailer to ensure the file
+ * ends on a 4-byte boundary, but this is handled in write().
+ */
+ private static final byte[] trailer = new byte[] { (byte)0x82, 0x00,
+ 0x09, 0x00,
+ 0x03, 0x00,
+ (byte)0x82, 0x00,
+ 0x00, 0x00,
+ 0x00, 0x00,
+ 0x00, 0x00,
+ 0x00, 0x00,
+ 0x00, 0x00 };
+
+
+ /**
+ * <p>Constructs a new Pocket Word Document.</p>
+ *
+ * <p>This new document does notcontain any information. Document data must
+ * either be added using appropriate methods, or an existing file can be
+ * {@link #read(InputStream) read} from an <code>InputStream</code>.</p>
+ *
+ * @param name The name of the <code>PocketWordDocument</code>.
+ */
+ public PocketWordDocument(String name) {
+
+ docName = trimDocumentName(name);
+
+ preamble = new byte[52];
+ fonts = new Vector(0, 1);
+ descriptor = new DocumentDescriptor();
+ paragraphs = new Vector(0, 1);
+ }
+
+
+ /**
+ * <p>This method reads <code>byte</code> data from the InputStream and
+ * extracts font and paragraph data from the file.</p>
+ *
+ * @param is InputStream containing a Pocket Word data file.
+ *
+ * @throws IOException In case of any I/O errors.
+ */
+ public void read(InputStream docData) throws IOException {
+
+ if (docData == null) {
+ throw new IOException ("No input stream to convert");
+ }
+
+ // The preamble may become important for font declarations.
+ int readValue = docData.read(preamble);
+ // #i33702# check for an empty InputStream.
+ if(readValue == -1) {
+ System.err.println("Error:invalid input stream");
+ return;
+ }
+
+ byte[] font = new byte[80];
+ int numfonts = 0;
+ do {
+ docData.read(font);
+
+ String name = new String(font, 0, 64, "UTF-16LE");
+ fonts.add(name.trim());
+
+ } while (!(font[76] == 5 && font[77] == 0
+ && font[78] == 1 && font[79] == 0));
+
+ /*
+ * TODO: The document descriptor data that follows the fonts ends with
+ * a variable section containing data for each of the paragraphs.
+ * It may be possible to use this information to calculate staring
+ * positions for each paragraph rather than iterating through the
+ * entire byte stream.
+ */
+
+ int value;
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ while ((value = docData.read()) != -1) {
+ bos.write(value);
+ }
+
+
+ byte[] contentData = bos.toByteArray();
+ int start = 0, end = 0;
+ boolean sawMarker = false;
+
+ for (int i = 0; i < contentData.length; i += 4) {
+ if (contentData[i + 2] == (byte)0xFF
+ && contentData[i + 3] == (byte)0xFF && !sawMarker) {
+ start = i - 8;
+ sawMarker = true;
+ continue;
+ }
+
+ if (contentData[i + 2] == (byte)0xFF
+ && contentData[i + 3] == (byte)0xFF && sawMarker) {
+ end = i - 8;
+ ByteArrayOutputStream paragraph = new ByteArrayOutputStream();
+ paragraph.write(contentData, start, end - start);
+ paragraphs.add(new Paragraph(paragraph.toByteArray()));
+
+ // Reset the markers
+ sawMarker = false;
+ i -= 4; // Skip back
+ }
+
+ }
+
+ /*
+ * Special case, the last paragraph
+ * If we got here, and the marker is set then we saw the start of the
+ * last paragraph, but no following paragraph
+ */
+ ByteArrayOutputStream paragraph = new ByteArrayOutputStream();
+ if (contentData[contentData.length - 19] == 0) {
+ paragraph.write(contentData, start, contentData.length - start - 20);
+ }
+ else {
+ paragraph.write(contentData, start, contentData.length - start - 18);
+ }
+ paragraphs.add(new Paragraph(paragraph.toByteArray()));
+ }
+
+
+ /*
+ * Utility method to make sure the document name is stripped of any file
+ * extensions before use.
+ */
+ private String trimDocumentName(String name) {
+ String temp = name.toLowerCase();
+
+ if (temp.endsWith(FILE_EXTENSION)) {
+ // strip the extension
+ int nlen = name.length();
+ int endIndex = nlen - FILE_EXTENSION.length();
+ name = name.substring(0,endIndex);
+ }
+
+ return name;
+ }
+
+
+ /**
+ * <p>Method to provide access to all of the <code>Paragraph</code> objects
+ * in the <code>Document</code>.</p>
+ *
+ * @return <code>Enumeration</code> over the paragraphs in the document.
+ */
+ public Enumeration getParagraphEnumeration() {
+ return paragraphs.elements();
+ }
+
+
+ /**
+ * <p>Returns the <code>Document</code> name with no file extension.</p>
+ *
+ * @return The <code>Document</code> name with no file extension.
+ */
+ public String getName() {
+ return docName;
+ }
+
+
+ /**
+ * <p>Returns the <code>Document</code> name with file extension.</p>
+ *
+ * @return The <code>Document</code> name with file extension.
+ */
+ public String getFileName() {
+ return new String(docName + FILE_EXTENSION);
+ }
+
+
+ /**
+ * <p>Writes out the <code>Document</code> content to the specified
+ * <code>OutputStream</code>.</p>
+ *
+ * <p>This method may not be thread-safe.
+ * Implementations may or may not synchronize this
+ * method. User code (i.e. caller) must make sure that
+ * calls to this method are thread-safe.</p>
+ *
+ * @param os <code>OutputStream</code> to write out the
+ * <code>Document</code> content.
+ *
+ * @throws IOException If any I/O error occurs.
+ */
+ public void write(OutputStream os) throws IOException {
+ DataOutputStream dos = new DataOutputStream(os);
+
+ initPreamble();
+ dos.write(preamble);
+
+ loadFonts();
+ for (int i = 0; i < fonts.size(); i++ ) {
+ ByteArrayOutputStream fontData = (ByteArrayOutputStream)fonts.elementAt(i);
+ dos.write(fontData.toByteArray());
+ }
+
+
+ for (int i = 0; i < paragraphs.size(); i++) {
+ Paragraph para = (Paragraph)paragraphs.elementAt(i);
+ descriptor.addParagraph((short)para.getTextLength(), para.getLines());
+ }
+ dos.write(descriptor.getDescriptor());
+
+ for (int i = 0; i < paragraphs.size(); i++ ) {
+ Paragraph para = (Paragraph)paragraphs.elementAt(i);
+
+ // Last paragraph has some extra data
+ if (i + 1 == paragraphs.size()) {
+ para.setLastParagraph(true);
+ }
+ dos.write(para.getParagraphData());
+ }
+
+
+ /*
+ * Before we write out the trailer, we need to make sure that it will
+ * lead to the file ending on a 4 byte boundary.
+ */
+ if (dos.size() % 4 == 0) {
+ dos.write((byte)0x00);
+ dos.write((byte)0x00);
+ }
+
+ dos.write(trailer);
+
+ dos.flush();
+ dos.close();
+ }
+
+
+ /**
+ * <p>This method adds a new paragraph element to the document. No string
+ * data is added to the paragraph.</p>
+ *
+ * <p><b>N.B.</b> The newly added paragraph becomes the current paragraph and
+ * is used as the target for all subsequent calls to addParagraphData().</p>
+ *
+ * @param style Paragraph Style object describing the formatting for
+ * the new paragraph. Can be null.
+ * @param listElement true if this paragraph is to be bulleted;
+ * false otherwise.
+ */
+ public void addParagraph(ParaStyle style, boolean listElement) {
+ /* For the moment, only support basic text entry in a single paragraph */
+ Paragraph para = new Paragraph(style);
+
+ paragraphs.add(para);
+
+ pStyle = style;
+ currentPara = para;
+
+ if (listElement) {
+ para.setBullets(true);
+ }
+ }
+
+
+ /**
+ * <p>This method adds text to the current paragraph.</p>
+ *
+ * <p>If no paragraphs exist within the document, it creates one.</p>
+ *
+ * @param data The string data for this segment.
+ * @param style Text Style object describing the formatting of this
+ * segment. Can be null.
+ */
+ public void addParagraphData(String data, TextStyle style) {
+ if (currentPara == null) {
+ addParagraph(null, false);
+ }
+ currentPara.addTextSegment(data, style);
+ }
+
+
+ /*
+ * Preamble is the portion before font specification which never
+ * seems to change from one file, or one saved version, to the next.
+ *
+ * Bytes 18h and 19h seem to contain the number of fonts and should
+ * be modified when all of the fonts have been specified.
+ * These bytes are the first two on the fourth line below.
+ */
+ private void initPreamble() {
+ preamble = new byte[] { 0x7B, 0x5C, 0x70, 0x77, 0x69, 0x15, 0x00, 0x00,
+ 0x01, 0x01, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x00, 0x2C, 0x00, 0x01, 0x00, 0x0A, 0x00, // Bytes 3-4 Font??
+ 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Bytes 1-2 # Fonts
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00 };
+ }
+
+
+ /*
+ * This method writes the minimum font data that is used by the converter.
+ * Currently, all documents convert to 10 point Courier New. Tahoma is
+ * always mentioned in Pocket Word files, however, even if it is not used.
+ *
+ * TODO: Rewrite to allow for multiple fonts once font support issues
+ * have been resolved.
+ */
+ private void loadFonts() {
+ ByteArrayOutputStream fontData = new ByteArrayOutputStream();
+
+ try {
+ fontData.write(new String("Tahoma").getBytes("UTF-16LE"));
+ fontData.write(new byte[52]); // Rest of font name?
+ fontData.write(new byte[] { 0x02, 0x00, 0x01, 0x00 } );
+ fontData.write(new byte[] { 0x00, 0x00, 0x01, 0x00 } );
+ fontData.write(new byte[] { 0x00, 0x00, 0x00, 0x00 } );
+ fontData.write(new byte[] { 0x00, 0x00, 0x00, 0x00 } );
+
+ fonts.add(fontData);
+
+ fontData = new ByteArrayOutputStream();
+
+ fontData.write(new String("Courier New").getBytes("UTF-16LE"));
+ fontData.write(new byte[42]);
+ fontData.write(new byte[] { 0x14, 0x00, 0x04, 0x00 } );
+ fontData.write(new byte[] { 0x01, 0x00, 0x00, 0x00 } );
+ fontData.write(new byte[] { 0x00, 0x00, 0x15, 0x00 } );
+
+ // Next part indicates that this is the last font
+ fontData.write(new byte[] { 0x05, 0x00, 0x01, 0x00 } );
+
+ fonts.add(fontData);
+ }
+ catch (IOException ioe) {
+ // Shouldn't happen as this is a memory based stream
+ }
+ }
+}
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/build.xml b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/build.xml
new file mode 100644
index 000000000000..f12db13e755c
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/build.xml
@@ -0,0 +1,140 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+
+ DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+
+ Copyright 2008 by Sun Microsystems, Inc.
+
+ OpenOffice.org - a multi-platform office productivity suite
+
+ $RCSfile: build.xml,v $
+
+ $Revision: 1.6 $
+
+ This file is part of OpenOffice.org.
+
+ OpenOffice.org is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License version 3
+ only, as published by the Free Software Foundation.
+
+ OpenOffice.org is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Lesser General Public License version 3 for more details
+ (a copy is included in the LICENSE file that accompanied this code).
+
+ You should have received a copy of the GNU Lesser General Public License
+ version 3 along with OpenOffice.org. If not, see
+ <http://www.openoffice.org/license.html>
+ for a copy of the LGPLv3 License.
+
+-->
+<project name="xmrg_jooxcxs_pocketword" default="main" basedir=".">
+
+ <!-- ================================================================= -->
+ <!-- settings -->
+ <!-- ================================================================= -->
+
+ <!-- project prefix, used for targets and build.lst -->
+ <property name="prj.prefix" value="xmrg"/>
+
+ <!-- name of this sub target used in recursive builds -->
+ <property name="target" value="xmrg_jooxcxs_pocketword"/>
+
+ <!-- relative path to project directory -->
+ <property name="prj" value="../../../../../../../.."/>
+
+ <!-- start of java source code package structure -->
+ <property name="java.dir" value="${prj}/java"/>
+
+ <!-- path component for current java package -->
+ <property name="package"
+ value="org/openoffice/xmerge/converter/xml/sxw/pocketword"/>
+
+ <!-- define how to handle CLASSPATH environment -->
+ <property name="build.sysclasspath" value="ignore"/>
+
+ <!-- classpath settings for javac tasks -->
+ <path id="classpath">
+ <pathelement location="${build.class}"/>
+ <pathelement location="${solar.jar}/parser.jar"/>
+ <pathelement location="${solar.jar}/jaxp.jar"/>
+ <pathelement location="${solar.jar}/xerces.jar"/>
+ </path>
+
+ <!-- set wether we want to compile with or without deprecation -->
+ <property name="deprecation" value="on"/>
+
+ <!-- ================================================================= -->
+ <!-- solar build environment targets -->
+ <!-- ================================================================= -->
+
+ <target name="build_dir" unless="build.dir">
+ <property name="build.dir" value="${out}"/>
+ </target>
+
+ <target name="solar" depends="build_dir" if="solar.update">
+ <property name="solar.properties"
+ value="${solar.bin}/solar.properties"/>
+ </target>
+
+ <target name="init" depends="solar">
+ <property name="build.compiler" value="classic"/>
+ <property file="${solar.properties}"/>
+ <property file="${build.dir}/class/solar.properties"/>
+ </target>
+
+ <target name="info">
+ <echo message="--------------------"/>
+ <echo message="${target}"/>
+ <echo message="--------------------"/>
+ </target>
+
+
+ <!-- ================================================================= -->
+ <!-- custom targets -->
+ <!-- ================================================================= -->
+
+ <!-- the main target, called in recursive builds -->
+ <target name="main" depends="info,prepare,compile"/>
+
+ <!-- prepare output directories -->
+ <target name="prepare" depends="init" if="build.class">
+ <mkdir dir="${build.dir}"/>
+ <mkdir dir="${build.class}"/>
+ </target>
+
+ <!-- compile java sources in ${package} -->
+ <target name="compile" depends="prepare" if="build.class">
+ <javac srcdir="${java.dir}"
+ destdir="${build.class}"
+ debug="${debug}"
+ deprecation="${deprecation}"
+ optimize="${optimize}">
+ <classpath refid="classpath"/>
+ <include name="${package}/DocumentDescriptor.java"/>
+ <include name="${package}/DocumentDeserializerImpl.java"/>
+ <include name="${package}/DocumentSerializerImpl.java"/>
+ <include name="${package}/Paragraph.java"/>
+ <include name="${package}/ParagraphTextSegment.java"/>
+ <include name="${package}/PluginFactoryImpl.java"/>
+ <include name="${package}/PocketWordConstants.java"/>
+ <include name="${package}/PocketWordDocument.java"/>
+ <include name="${package}/DocumentMergerImpl.java"/>
+ <include name="${package}/ConverterCapabilitiesImpl.java"/>
+ </javac>
+ </target>
+
+ <!-- clean up -->
+ <target name="clean" depends="prepare">
+ <delete includeEmptyDirs="true">
+ <fileset dir="${build.class}">
+ <patternset>
+ <include name="${package}/*.class"/>
+ </patternset>
+ </fileset>
+ </delete>
+ </target>
+
+</project>
+
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/converter.xml b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/converter.xml
new file mode 100644
index 000000000000..e9ee658d27f3
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/converter.xml
@@ -0,0 +1,51 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+
+ DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+
+ Copyright 2008 by Sun Microsystems, Inc.
+
+ OpenOffice.org - a multi-platform office productivity suite
+
+ $RCSfile: converter.xml,v $
+
+ $Revision: 1.4 $
+
+ This file is part of OpenOffice.org.
+
+ OpenOffice.org is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License version 3
+ only, as published by the Free Software Foundation.
+
+ OpenOffice.org is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Lesser General Public License version 3 for more details
+ (a copy is included in the LICENSE file that accompanied this code).
+
+ You should have received a copy of the GNU Lesser General Public License
+ version 3 along with OpenOffice.org. If not, see
+ <http://www.openoffice.org/license.html>
+ for a copy of the LGPLv3 License.
+
+-->
+
+
+<converters>
+ <converter type="staroffice/sxw" version="1.1">
+ <converter-display-name>
+ Pocket Word
+ </converter-display-name>
+ <converter-description>
+ OpenOffice Writer XML to/from Pocket Word conversion.
+ </converter-description>
+ <converter-vendor>
+ OpenOffice.org
+ </converter-vendor>
+ <converter-class-impl>
+ org.openoffice.xmerge.converter.xml.sxw.pocketword.PluginFactoryImpl
+ </converter-class-impl>
+ <converter-target type="application/x-pocket-word"/>
+ </converter>
+</converters>
+
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/package.html b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/package.html
new file mode 100644
index 000000000000..65454f24773c
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/package.html
@@ -0,0 +1,60 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
+<!--
+
+ DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+
+ Copyright 2008 by Sun Microsystems, Inc.
+
+ OpenOffice.org - a multi-platform office productivity suite
+
+ $RCSfile: package.html,v $
+
+ $Revision: 1.3 $
+
+ This file is part of OpenOffice.org.
+
+ OpenOffice.org is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License version 3
+ only, as published by the Free Software Foundation.
+
+ OpenOffice.org is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Lesser General Public License version 3 for more details
+ (a copy is included in the LICENSE file that accompanied this code).
+
+ You should have received a copy of the GNU Lesser General Public License
+ version 3 along with OpenOffice.org. If not, see
+ <http://www.openoffice.org/license.html>
+ for a copy of the LGPLv3 License.
+
+-->
+<html>
+<head>
+<title>org.openoffice.xmerge.converter.xml.sxw.pocketword package</title>
+</head>
+
+<body bgcolor="white">
+
+<p>Plugin for the conversion of documents between StarWriter XML and
+ Pocket Word format.</p>
+<p>This plugin suports conversion of most features supported by Pocket Word.</p>
+<ul>
+ <li>Bold, Italic, Underline</li>
+ <li>Strikethrough</li>
+ <li>Highlight</li>
+ <li>Colours</li>
+ <li>Lists</li>
+ <li>Alignments</li>
+</ul>
+
+<p>Additionally, work on fonts is currently underway.</p>
+
+<p>This plugin is based on the Windows CE 3.0 version of Pocket Word.<br>
+ Testing was carried out using Pocket PC 2000 and Pocket PC 2002 devices.</p>
+
+<p>It follows the {@link org.openoffice.xmerge} framework
+for the conversion process.</p>
+
+</body>
+</html>
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/ConverterCapabilitiesImpl.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/ConverterCapabilitiesImpl.java
new file mode 100644
index 000000000000..5470025a934f
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/ConverterCapabilitiesImpl.java
@@ -0,0 +1,96 @@
+/************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2008 by Sun Microsystems, Inc.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * $RCSfile: ConverterCapabilitiesImpl.java,v $
+ * $Revision: 1.3 $
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+package org.openoffice.xmerge.converter.xml.sxw.wordsmith;
+
+import org.openoffice.xmerge.ConverterCapabilities;
+import org.openoffice.xmerge.converter.xml.OfficeConstants;
+
+/**
+ * <p>WordSmith implementation of <code>ConverterCapabilities</code> for
+ * the {@link
+ * org.openoffice.xmerge.converter.xml.sxw.wordsmith.PluginFactoryImpl
+ * PluginFactoryImpl}.</p>
+ *
+ * <p>Used with StarWriter XML to/from WordSmith conversions.
+ * The <code>ConverterCapibilies</code> specify which &quot;Office&quot;
+ * <code>Document</code> tags and attributes are supported on the
+ * &quot;Device&quot; <code>Document</code> format.</p>
+ */
+public final class ConverterCapabilitiesImpl
+ implements ConverterCapabilities {
+
+ public boolean canConvertTag(String tag) {
+
+ if (OfficeConstants.TAG_OFFICE_DOCUMENT.equals(tag))
+ return true;
+ else if (OfficeConstants.TAG_OFFICE_DOCUMENT_CONTENT.equals(tag))
+ return true;
+ else if (OfficeConstants.TAG_OFFICE_BODY.equals(tag))
+ return true;
+ else if (OfficeConstants.TAG_PARAGRAPH.equals(tag))
+ return true;
+ else if (OfficeConstants.TAG_HEADING.equals(tag))
+ return true;
+ else if (OfficeConstants.TAG_ORDERED_LIST.equals(tag))
+ return true;
+ else if (OfficeConstants.TAG_UNORDERED_LIST.equals(tag))
+ return true;
+ else if (OfficeConstants.TAG_LIST_ITEM.equals(tag))
+ return true;
+ else if (OfficeConstants.TAG_LIST_HEADER.equals(tag))
+ return true;
+ else if (OfficeConstants.TAG_SPAN.equals(tag))
+ return true;
+ else if (OfficeConstants.TAG_HYPERLINK.equals(tag))
+ return true;
+ else if (OfficeConstants.TAG_LINE_BREAK.equals(tag))
+ return true;
+ else if (OfficeConstants.TAG_SPACE.equals(tag))
+ return true;
+ else if (OfficeConstants.TAG_TAB_STOP.equals(tag))
+ return true;
+
+ return false;
+ }
+
+ public boolean canConvertAttribute(String tag,
+ String attribute) {
+
+ if (OfficeConstants.TAG_SPACE.equals(tag)) {
+
+ if (OfficeConstants.ATTRIBUTE_SPACE_COUNT.equals(attribute))
+ return true;
+ }
+
+ return false;
+ }
+}
+
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DOCConstants.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DOCConstants.java
new file mode 100644
index 000000000000..ef066386d4a4
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DOCConstants.java
@@ -0,0 +1,64 @@
+/************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2008 by Sun Microsystems, Inc.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * $RCSfile: DOCConstants.java,v $
+ * $Revision: 1.3 $
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+package org.openoffice.xmerge.converter.xml.sxw.wordsmith;
+
+/**
+ * Constants used for encoding and decoding the WordSmith format.
+ *
+ * @author Herbie Ong, David Proulx
+ */
+interface DOCConstants {
+
+ /** Constant for uncompressed version. */
+ public static final short UNCOMPRESSED = 1;
+
+ /** Constant for compressed version. */
+ public static final short COMPRESSED = 2;
+
+ /** Constant used for spare fields. */
+ public static final int SPARE = 0;
+
+ /** WordSmith record size. */
+ public static final short TEXT_RECORD_SIZE = 4096;
+
+ /** Constant for encoding scheme. */
+ public static final String ENCODING = "8859_1";
+
+ /** Constant for TAB character. */
+ public final static char TAB_CHAR = '\t';
+
+ /** Constant for EOL character. */
+ public final static char EOL_CHAR = '\n';
+
+ /** Constant for SPACE character. */
+ public final static char SPACE_CHAR = ' ';
+}
+
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DocumentDeserializerImpl.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DocumentDeserializerImpl.java
new file mode 100644
index 000000000000..4c6c48822093
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DocumentDeserializerImpl.java
@@ -0,0 +1,568 @@
+/************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2008 by Sun Microsystems, Inc.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * $RCSfile: DocumentDeserializerImpl.java,v $
+ * $Revision: 1.3 $
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+package org.openoffice.xmerge.converter.xml.sxw.wordsmith;
+
+import org.w3c.dom.*;
+
+import java.io.IOException;
+import java.util.Enumeration;
+
+import org.openoffice.xmerge.Document;
+import org.openoffice.xmerge.ConvertData;
+import org.openoffice.xmerge.ConvertException;
+import org.openoffice.xmerge.DocumentDeserializer;
+import org.openoffice.xmerge.converter.xml.OfficeConstants;
+import org.openoffice.xmerge.converter.palm.PalmDB;
+import org.openoffice.xmerge.converter.palm.Record;
+import org.openoffice.xmerge.converter.palm.PdbDecoder;
+import org.openoffice.xmerge.converter.palm.PalmDocument;
+import org.openoffice.xmerge.converter.xml.sxw.SxwDocument;
+
+import java.util.Vector;
+import java.io.ByteArrayInputStream;
+
+import org.openoffice.xmerge.converter.xml.*;
+import org.openoffice.xmerge.util.Debug;
+import org.openoffice.xmerge.util.XmlUtil;
+
+/**
+ * <p>WordSmith implementation of
+ * org.openoffice.xmerge.DocumentDeserializer
+ * for the {@link
+ * org.openoffice.xmerge.converter.xml.sxw.wordsmith.PluginFactoryImpl
+ * PluginFactoryImpl}.</p>
+ *
+ * The <code>deserialize</code> method uses a
+ * <code>DocDecoder</code> to read the WordSmith format into a
+ * <code>String</code> object, then it calls <code>buildDocument</code>
+ * to create a <code>SxwDocument</code> object from it.
+ *
+ * @author Herbie Ong, David Proulx
+ */
+public final class DocumentDeserializerImpl
+implements DOCConstants, OfficeConstants, DocumentDeserializer {
+
+ /** A Decoder object for decoding WordSmith format. */
+ private WSDecoder decoder = null;
+
+ WseFontTable fontTable = null;
+ WseColorTable colorTable = null;
+ StyleCatalog styleCat = null;
+ StyleCatalog oldStyleCat = null;
+
+ /** A <code>ConvertData</code> object assigned to this object. */
+ private ConvertData cd = null;
+
+
+ /**
+ * Constructor that assigns the given <code>ConvertData</code>
+ * to the object.
+ *
+ * @param cd A <code>ConvertData</code> object to read data for
+ * the conversion process by the deserialize method.
+ */
+ public DocumentDeserializerImpl(ConvertData cd) {
+ this.cd = cd;
+ }
+
+
+ /**
+ * Convert the given <code>ConvertData</code> into a
+ * <code>SxwDocument</code> object.
+ *
+ * @return Resulting <code>Document</code> object.
+ *
+ * @throws ConvertException If any conversion error occurs.
+ * @throws IOException If any I/O error occurs.
+ */
+ public Document deserialize() throws ConvertException,
+ IOException {
+ return deserialize(null, cd);
+ }
+
+
+ public Document deserialize(Document origDoc, ConvertData cd)
+ throws IOException {
+
+ Document doc = null;
+ PalmDocument palmDoc = null;
+ Enumeration e = cd.getDocumentEnumeration();
+
+ while(e.hasMoreElements()) {
+ palmDoc = (PalmDocument) e.nextElement();
+ PalmDB pdb = palmDoc.getPdb();
+ Record[] recs = pdb.getRecords();
+ decoder = new WSDecoder();
+ Wse[] b = decoder.parseDocument(recs);
+ String docName = palmDoc.getName();
+ doc = buildDocument(docName, b, origDoc);
+ }
+ return doc;
+ }
+
+
+ /**
+ * Temporary method to read existing <code>StyleCatalog</code>
+ * as a starting point.
+ *
+ * @param parentDoc The parent <code>Document</code>.
+ */
+ private void readStyleCatalog(Document parentDoc) {
+ Element rootNode = null;
+ try {
+ java.io.ByteArrayOutputStream bos = new java.io.ByteArrayOutputStream();
+ parentDoc.write(bos);
+ SxwDocument sxwDoc = new SxwDocument("old");
+ sxwDoc.read(new ByteArrayInputStream(bos.toByteArray()));
+ org.w3c.dom.Document domDoc = sxwDoc.getContentDOM();
+
+ String families[] = new String[3];
+ families[0] = "text";
+ families[1] = "paragraph";
+ families[2] = "paragraph";
+ Class classes[] = new Class[3];
+ classes[0] = TextStyle.class;
+ classes[1] = ParaStyle.class;
+ classes[2] = TextStyle.class;
+
+ NodeList nl = domDoc.getElementsByTagName(TAG_OFFICE_STYLES);
+ oldStyleCat.add(nl.item(0), families, classes, null, false);
+ nl = domDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES);
+ oldStyleCat.add(nl.item(0), families, classes, null, false);
+ nl = domDoc.getElementsByTagName(TAG_OFFICE_MASTER_STYLES);
+ oldStyleCat.add(nl.item(0), families, classes, null, false);
+
+ } catch (Exception e) {
+ Debug.log(Debug.ERROR, "", e);
+ }
+
+ }
+
+
+ /**
+ * Given an array of paragraph <code>Style</code> objects, see if
+ * there is exactly one which matches the text formatting
+ * <code>Style</code> of <code>tStyle</code>.
+ *
+ * @param paraStyles An array of paragraph <code>Style</code>
+ * objects.
+ * @param tStyle Text <code>Style</code> to match.
+ *
+ * @return The paragraph <code>Style</code> that matches.
+ */
+ private ParaStyle matchParaByText(Style paraStyles[], TextStyle tStyle) {
+ int matchIndex = -1;
+ int matchCount = 0;
+ Style txtMatches[] = (Style[]) oldStyleCat.getMatching(tStyle);
+ if (txtMatches.length >= 1) {
+ for (int j = 0; j < txtMatches.length; j++) {
+ TextStyle t = (TextStyle)txtMatches[j];
+
+ if (!t.getFamily().equals("paragraph"))
+ continue;
+
+ for (int k = 0; k < paraStyles.length; k++) {
+ if (t.getName().equals(paraStyles[k].getName())) {
+ matchCount++;
+ matchIndex = k;
+ }
+ }
+ }
+ }
+ if (matchCount == 1)
+ return (ParaStyle)paraStyles[matchIndex];
+ else return null;
+ }
+
+
+ /**
+ * Take a <code>String</code> of text and turn it into a sequence
+ * of <code>Node</code> objects.
+ *
+ * @param text <code>String</code> of text.
+ * @param parentDoc Parent <code>Document</code>.
+ *
+ * @return Array of <code>Node</code> objects.
+ */
+ private Node[] parseText(String text, org.w3c.dom.Document parentDoc) {
+ Vector nodeVec = new Vector();
+
+ // Break up the text from the WordSmith text run into Open
+ // Office text runs. There may be more runs in OO because
+ // runs of 2 or more spaces map to nodes.
+ while ((text.indexOf(" ") != -1) || (text.indexOf("\t") != 1)) {
+
+ // Find the indices of tabs and multiple spaces, and
+ // figure out which of them occurs first in the string.
+ int spaceIndex = text.indexOf(" ");
+ int tabIndex = text.indexOf("\t");
+ if ((spaceIndex == -1) && (tabIndex == -1))
+ break; // DJP This should not be necessary. What is wrong
+ // with the while() stmt up above?
+ int closerIndex; // Index of the first of these
+ if (spaceIndex == -1)
+ closerIndex = tabIndex;
+ else if (tabIndex == -1)
+ closerIndex = spaceIndex;
+ else
+ closerIndex = (spaceIndex > tabIndex) ? tabIndex : spaceIndex;
+
+ // If there is any text prior to the first occurrence of a
+ // tab or spaces, create a text node from it, then chop it
+ // off the string we're working with.
+ if (closerIndex > 0) {
+ String beginningText = text.substring(0, closerIndex);
+ Text textNode = parentDoc.createTextNode(beginningText);
+ nodeVec.addElement(textNode);
+ log("<TEXT>");
+ log(beginningText);
+ log("</TEXT>");
+ }
+ text = text.substring(closerIndex);
+
+ // Handle either tab character or space sequence by creating
+ // an element for it, and then chopping out the text that
+ // represented it in "text".
+ if (closerIndex == tabIndex) {
+ Element tabNode = parentDoc.createElement(TAG_TAB_STOP);
+ nodeVec.add(tabNode);
+ text = text.substring(1); // tab is always a single character
+ log("<TAB/>");
+ } else {
+ // Compute length of space sequence.
+ int nrSpaces = 2;
+ while ((nrSpaces < text.length())
+ && text.substring(nrSpaces, nrSpaces + 1).equals(" "))
+ nrSpaces++;
+
+ Element spaceNode = parentDoc.createElement(TAG_SPACE);
+ spaceNode.setAttribute(ATTRIBUTE_SPACE_COUNT, new Integer(nrSpaces).toString());
+ nodeVec.add(spaceNode);
+ text = text.substring(nrSpaces);
+ log("<SPACE count=\"" + nrSpaces + "\" />");
+ }
+ }
+
+ // No more tabs or space sequences. If there's any remaining
+ // text create a text node for it.
+ if (text.length() > 0) {
+ Text textNode = parentDoc.createTextNode(text);
+ nodeVec.add(textNode);
+ log("<TEXT>");
+ log(text);
+ log("</TEXT>");
+ }
+
+ // Now create and populate an array to return the nodes in.
+ Node nodes[] = new Node[nodeVec.size()];
+ for (int i = 0; i < nodeVec.size(); i++)
+ nodes[i] = (Node)nodeVec.elementAt(i);
+ return nodes;
+ }
+
+
+ /**
+ * Parses the text content of a WordSmith format and builds a
+ * <code>SXWDocument</code>.
+ *
+ * @param docName <code>Document</code> name
+ * @param str Text content of WordSmith format
+ *
+ * @return Resulting <code>SXWDocument</code> object.
+ *
+ * @throws IOException If any I/O error occurs.
+ */
+ private SxwDocument buildDocument(String docName, Wse[] data, Document origDoc)
+ throws IOException {
+
+ // create minimum office xml document.
+ SxwDocument sxwDoc = new SxwDocument(docName);
+ sxwDoc.initContentDOM();
+
+ org.w3c.dom.Document doc = sxwDoc.getContentDOM();
+
+ // Grab hold of the office:body tag,
+ // Assume there should be one.
+ // This is where top level paragraphs will append to.
+ NodeList list = doc.getElementsByTagName(TAG_OFFICE_BODY);
+ Node bodyNode = list.item(0);
+
+ styleCat = new StyleCatalog(50);
+ oldStyleCat = new StyleCatalog(50);
+ if (origDoc != null)
+ readStyleCatalog(origDoc);
+
+ Element currPara = null;
+ ParaStyle currParaStyle = null;
+ int newTextStyleNr = 0;
+ int newParaStyleNr = 0;
+
+ // Now write out the document body by running through
+ // the list of WordSmith elements and processing each one
+ // in turn.
+ for (int i = 0; i < data.length; i++) {
+
+ if (data[i].getClass() == WsePara.class) {
+
+ currPara = doc.createElement(TAG_PARAGRAPH);
+ log("</PARA>");
+ log("<PARA>");
+
+ WsePara p = (WsePara)data[i];
+
+ // Save info about the first text run, if there is one.
+ WseTextRun firstTextRun = null;
+
+ if ((data.length >= i + 2)
+ && (data[i+1].getClass() == WseTextRun.class))
+ firstTextRun = (WseTextRun)data[i+1];
+
+ Style matches[] = oldStyleCat.getMatching(p.makeStyle());
+
+ // See if we can find a unique match in the catalog
+ // of existing styles from the original document.
+ ParaStyle pStyle = null;
+ if (matches.length == 1) {
+ pStyle = (ParaStyle)matches[0];
+ log("using an existing style");
+ } else if ((matches.length > 1) && (firstTextRun != null)) {
+ pStyle = matchParaByText(matches, firstTextRun.makeStyle());
+ log("resolved a para by looking @ text");
+ }
+
+ // If nothing found so far, try looking in the catalog
+ // of newly-created styles.
+ // DJP FIXME: if we need to add two para styles with the
+ // same para formatting info but different default text
+ // styles, this won't work!
+ if (pStyle == null) {
+ log("had " + matches.length + " matches in old catalog");
+ matches = styleCat.getMatching(p.makeStyle());
+ if (matches.length == 0) {
+ pStyle = p.makeStyle();
+ String newName = new String("PPP" + ++newParaStyleNr);
+ pStyle.setName(newName);
+ styleCat.add(pStyle);
+ // DJP: write in the text format info here
+ log("created a new style");
+ } else if (matches.length == 1) {
+ pStyle = (ParaStyle)matches[0];
+ log("re-using a new style");
+ } else if (firstTextRun != null) {
+ pStyle = matchParaByText(matches, firstTextRun.makeStyle());
+ if (pStyle != null) {
+ log("resolved a (new) para by looking @ text");
+ } else
+ log("Hey this shouldn't happen! - nr of matches is "
+ + matches.length);
+ }
+ }
+
+ if (pStyle == null)
+ log("Unable to figure out a para style");
+
+ // Figured out a style to use. Specify the style in this
+ // paragraph's attributes.
+ currPara.setAttribute(ATTRIBUTE_TEXT_STYLE_NAME, pStyle.getName());
+
+ bodyNode.appendChild(currPara);
+ currParaStyle = pStyle;
+ } else if (data[i].getClass() == WseTextRun.class) {
+ WseTextRun tr = (WseTextRun)data[i];
+ TextStyle trStyle = null;
+ Node trNodes[] = parseText(tr.getText(), doc);
+
+ // First see if the formatting of this text run matches
+ // the default text formatting for this paragraph. If
+ // it does, then just make the text node(s) children of
+ // the current paragraph.
+ Style[] cps = new Style[1];
+ cps[0] = currParaStyle;
+ if (matchParaByText(cps, tr.makeStyle()) != null) {
+ for (int ii = 0; ii < trNodes.length; ii++) {
+ currPara.appendChild(trNodes[ii]);
+ }
+ continue;
+ }
+
+ // Check for existing, matching styles in the old style
+ // catalog. If exactly one is found, use it. Otherwise,
+ // check the new style catalog, and either use the style
+ // found or add this new one to it.
+ Style matches[] = oldStyleCat.getMatching(tr.makeStyle());
+ if (matches.length == 1)
+ trStyle = (TextStyle)matches[0];
+ else {
+ matches = styleCat.getMatching(tr.makeStyle());
+ if (matches.length == 0) {
+ trStyle = tr.makeStyle();
+ String newName = new String("TTT" + ++newTextStyleNr);
+ trStyle.setName(newName);
+ styleCat.add(trStyle);
+ } else if (matches.length == 1)
+ trStyle = (TextStyle)matches[0];
+ else
+ log("multiple text style matches from new catalog");
+ }
+
+ // Create a text span node, set the style attribute, make the
+ // text node(s) its children, and append it to current paragraph's
+ // list of children.
+ Element textSpanNode = doc.createElement(TAG_SPAN);
+ textSpanNode.setAttribute(ATTRIBUTE_TEXT_STYLE_NAME, trStyle.getName());
+ for (int ii = 0; ii < trNodes.length; ii++) {
+ textSpanNode.appendChild(trNodes[ii]);
+ }
+ currPara.appendChild(textSpanNode);
+ log("</SPAN>");
+ }
+
+ else if (data[i].getClass() == WseFontTable.class) {
+ fontTable = (WseFontTable)data[i];
+ }
+
+ else if (data[i].getClass() == WseColorTable.class) {
+ colorTable = (WseColorTable)data[i];
+ }
+ }
+
+
+ //NodeList r = doc.getElementsByTagName(TAG_OFFICE_DOCUMENT);
+ NodeList r = doc.getElementsByTagName(TAG_OFFICE_DOCUMENT_CONTENT);
+ Node rootNode = r.item(0);
+
+ // read the original document
+ org.w3c.dom.NodeList nl;
+ if (origDoc != null) {
+ java.io.ByteArrayOutputStream bos = new java.io.ByteArrayOutputStream();
+ origDoc.write(bos);
+ SxwDocument origSxwDoc = new SxwDocument("old");
+ origSxwDoc.read(new ByteArrayInputStream(bos.toByteArray()));
+ org.w3c.dom.Document origDomDoc = origSxwDoc.getContentDOM();
+
+ XmlUtil xu = new XmlUtil();
+ org.w3c.dom.DocumentFragment df;
+ org.w3c.dom.Node newNode;
+
+ // copy font declarations from original document to the new document
+ nl = origDomDoc.getElementsByTagName(TAG_OFFICE_FONT_DECLS);
+ df = doc.createDocumentFragment();
+ newNode = xu.deepClone(df, nl.item(0));
+ rootNode.insertBefore(newNode, bodyNode);
+
+ // copy style catalog from original document to the new document
+ nl = origDomDoc.getElementsByTagName(TAG_OFFICE_STYLES);
+ df = doc.createDocumentFragment();
+ newNode = xu.deepClone(df, nl.item(0));
+ rootNode.insertBefore(newNode, bodyNode);
+
+ nl = origDomDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES);
+ df = doc.createDocumentFragment();
+ newNode = xu.deepClone(df, nl.item(0));
+ rootNode.insertBefore(newNode, bodyNode);
+
+ nl = origDomDoc.getElementsByTagName(TAG_OFFICE_MASTER_STYLES);
+ df = doc.createDocumentFragment();
+ newNode = xu.deepClone(df, nl.item(0));
+ rootNode.insertBefore(newNode, bodyNode);
+ }
+
+ // Original document not specified. We need to add font declarations.
+ // DJP: this might just be for debugging. Merger will probably put
+ // the "real" ones in.
+ // DJP: if really doing it this way, do it right: gather font names
+ // from style catalog(s).
+ else {
+ org.w3c.dom.Node declNode;
+
+ log("<FONT-DECLS/>");
+
+ declNode = doc.createElement(TAG_OFFICE_FONT_DECLS);
+ rootNode.insertBefore(declNode, bodyNode);
+ org.w3c.dom.Element fontNode;
+
+ fontNode = doc.createElement(TAG_STYLE_FONT_DECL);
+ fontNode.setAttribute(ATTRIBUTE_STYLE_NAME, "Arial");
+ fontNode.setAttribute(ATTRIBUTE_FO_FONT_FAMILY, "Arial");
+ fontNode.setAttribute(ATTRIBUTE_STYLE_FONT_PITCH, "variable");
+ declNode.appendChild(fontNode);
+
+ fontNode = doc.createElement(TAG_STYLE_FONT_DECL);
+ fontNode.setAttribute(ATTRIBUTE_STYLE_NAME, "Arioso");
+ fontNode.setAttribute(ATTRIBUTE_FO_FONT_FAMILY, "Arioso");
+ fontNode.setAttribute(ATTRIBUTE_STYLE_FONT_PITCH, "variable");
+ declNode.appendChild(fontNode);
+ }
+
+
+ // Now add any new styles we have created in this document.
+ nl = doc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES);
+ Node autoStylesNode = nl.item(0);
+ if (autoStylesNode == null) {
+ autoStylesNode = doc.createElement(TAG_OFFICE_AUTOMATIC_STYLES);
+ log("<OFFICE-AUTOMATIC-STYLES/>");
+ rootNode.insertBefore(autoStylesNode, bodyNode);
+ }
+
+ Node newStyleCatNode = styleCat.writeNode(doc, "dummy");
+ nl = newStyleCatNode.getChildNodes();
+ int nNodes = nl.getLength();
+ for (int i = 0; i < nNodes; i++) {
+ autoStylesNode.appendChild(nl.item(0));
+ }
+
+ oldStyleCat.dumpCSV(true);
+ styleCat.dumpCSV(true);
+ return sxwDoc;
+ }
+
+
+ /**
+ * Sends message to the log object.
+ *
+ * @param str Debug message.
+ */
+ private void log(String str) {
+
+ Debug.log(Debug.TRACE, str);
+ }
+
+
+ /*
+ public static void main(String args[]) {
+
+ // DocumentDeserializerImpl d = new DocumentDeserializerImpl(new InputStream());
+
+ Node nodes[] = parseText("Tab here:\tThen some more text");
+ }
+*/
+}
+
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DocumentMergerImpl.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DocumentMergerImpl.java
new file mode 100644
index 000000000000..09c2b998f5c1
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DocumentMergerImpl.java
@@ -0,0 +1,102 @@
+/************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2008 by Sun Microsystems, Inc.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * $RCSfile: DocumentMergerImpl.java,v $
+ * $Revision: 1.3 $
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+package org.openoffice.xmerge.converter.xml.sxw.wordsmith;
+
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+
+import org.openoffice.xmerge.DocumentMerger;
+import org.openoffice.xmerge.MergeException;
+import org.openoffice.xmerge.ConverterCapabilities;
+import org.openoffice.xmerge.converter.xml.sxw.SxwDocument;
+import org.openoffice.xmerge.merger.DiffAlgorithm;
+import org.openoffice.xmerge.merger.Difference;
+import org.openoffice.xmerge.merger.NodeMergeAlgorithm;
+import org.openoffice.xmerge.merger.Iterator;
+import org.openoffice.xmerge.merger.DiffAlgorithm;
+import org.openoffice.xmerge.merger.diff.ParaNodeIterator;
+import org.openoffice.xmerge.merger.diff.IteratorLCSAlgorithm;
+import org.openoffice.xmerge.merger.merge.DocumentMerge;
+import org.openoffice.xmerge.merger.merge.CharacterBaseParagraphMerge;
+import org.openoffice.xmerge.util.Debug;
+
+
+/**
+ * Wordsmith implementation of <code>DocumentMerger</code>
+ * for the {@link
+ * org.openoffice.xmerge.converter.xml.sxw.wordsmith.PluginFactoryImpl
+ * PluginFactoryImpl}.</p>
+ */
+public class DocumentMergerImpl implements DocumentMerger {
+
+ private ConverterCapabilities cc_;
+ private org.openoffice.xmerge.Document orig = null;
+
+ public DocumentMergerImpl(org.openoffice.xmerge.Document doc, ConverterCapabilities cc) {
+ cc_ = cc;
+ this.orig = doc;
+ }
+
+ public void merge(org.openoffice.xmerge.Document modifiedDoc) throws MergeException {
+
+ SxwDocument wdoc1 = (SxwDocument) orig;
+ SxwDocument wdoc2 = (SxwDocument) modifiedDoc;
+
+ Document doc1 = wdoc1.getContentDOM();
+ Document doc2 = wdoc2.getContentDOM();
+
+ Iterator i1 = new ParaNodeIterator(cc_, doc1.getDocumentElement());
+ Iterator i2 = new ParaNodeIterator(cc_, doc2.getDocumentElement());
+
+ DiffAlgorithm diffAlgo = new IteratorLCSAlgorithm();
+
+ // find out the paragrah level diffs
+ Difference[] diffTable = diffAlgo.computeDiffs(i1, i2);
+
+ if (Debug.isFlagSet(Debug.INFO)) {
+ Debug.log(Debug.INFO, "Diff Result: ");
+
+ for (int i = 0; i < diffTable.length; i++) {
+ Debug.log(Debug.INFO, diffTable[i].debug());
+ }
+ }
+
+ // merge the paragraphs
+ NodeMergeAlgorithm charMerge = new CharacterBaseParagraphMerge();
+ DocumentMerge docMerge = new DocumentMerge(cc_, charMerge);
+
+ Iterator result = null;
+
+ docMerge.applyDifference(i1, i2, diffTable);
+ }
+}
+
+
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DocumentSerializerImpl.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DocumentSerializerImpl.java
new file mode 100644
index 000000000000..207f38d263a3
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DocumentSerializerImpl.java
@@ -0,0 +1,539 @@
+/************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2008 by Sun Microsystems, Inc.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * $RCSfile: DocumentSerializerImpl.java,v $
+ * $Revision: 1.3 $
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+package org.openoffice.xmerge.converter.xml.sxw.wordsmith;
+
+import org.w3c.dom.NodeList;
+import org.w3c.dom.Node;
+import org.w3c.dom.NamedNodeMap;
+import org.w3c.dom.Element;
+
+import java.io.IOException;
+
+import org.openoffice.xmerge.Document;
+import org.openoffice.xmerge.ConvertData;
+import org.openoffice.xmerge.ConvertException;
+import org.openoffice.xmerge.DocumentSerializer;
+import org.openoffice.xmerge.ConverterCapabilities;
+import org.openoffice.xmerge.converter.xml.OfficeConstants;
+import org.openoffice.xmerge.converter.xml.sxw.SxwDocument;
+import org.openoffice.xmerge.converter.palm.PalmDB;
+import org.openoffice.xmerge.converter.palm.PdbEncoder;
+import org.openoffice.xmerge.converter.palm.Record;
+import org.openoffice.xmerge.converter.palm.PdbUtil;
+import org.openoffice.xmerge.converter.palm.PalmDocument;
+import org.openoffice.xmerge.converter.xml.OfficeDocument;
+import org.openoffice.xmerge.util.*;
+import org.openoffice.xmerge.converter.xml.*;
+
+/**
+ * <p>WordSmith implementation of
+ * org.openoffice.xmerge.DocumentSerializer
+ * for the {@link
+ * org.openoffice.xmerge.converter.xml.sxw.wordsmith.PluginFactoryImpl
+ * PluginFactoryImpl}.</p>
+ *
+ * <p>The <code>serialize</code> method traverses the DOM
+ * document from the given <code>Document</code> object. It uses a
+ * <code>DocEncoder</code> object for the actual conversion of
+ * contents to the WordSmith format.</p>
+ *
+ * @author Herbie Ong, David Proulx
+ */
+
+// DJP: take out "implements OfficeConstants"
+public final class DocumentSerializerImpl
+implements OfficeConstants, DocumentSerializer {
+
+ /** A WSEncoder object for encoding to WordSmith. */
+ private WSEncoder encoder = null;
+
+ /** The <code>StyleCatalog</code>. */
+ private StyleCatalog styleCat = null;
+
+ private WseFontTable fontTable = new WseFontTable();
+ private WseColorTable colorTable = new WseColorTable();
+
+ /**
+ * The <code>SxwDocument</code> object that this converter
+ * processes.
+ */
+ private SxwDocument sxwDoc = null;
+
+ /**
+ * Constructor.
+ *
+ * @param doc The <code>Document</code> to convert.
+ */
+ public DocumentSerializerImpl(Document doc) {
+ sxwDoc = (SxwDocument) doc;
+ }
+
+
+ /**
+ * <p>Method to convert a <code>Document</code> into a
+ * <code>PalmDocument</code>.</p>
+ *
+ * <p>This method is not thread safe for performance reasons.
+ * This method should not be called from within two threads.
+ * It would be best to call this method only once per object
+ * instance.</p>
+ *
+ * <p>Note that the doc parameter needs to be an XML
+ * <code>Document</code>, else this method will throw a
+ * <code>ClassCastException</code>. I think this is a hack,
+ * but this is the only way to not modify most of the existing
+ * code right now.</p>
+ *
+ * @param doc Input should be an XML <code>Document</code>
+ * object
+ * @param os Output of <code>PalmDB</code> object
+ *
+ * @throws ConvertException If any conversion error occurs.
+ * @throws IOException If any I/O error occurs.
+ */
+ public ConvertData serialize()
+ throws IOException {
+
+
+ // get the server document name
+ String docName = sxwDoc.getName();
+
+ // get DOM document
+ org.w3c.dom.Document domDoc = sxwDoc.getContentDOM();
+
+ // Create WordSmith encoder object. Add WordSmith header,
+ // empty font table to it.
+ encoder = new WSEncoder();
+ encoder.addElement(fontTable);
+ encoder.addElement(colorTable);
+
+ // Read the styles into the style catalog
+ String families[] = new String[3];
+ families[0] = "text";
+ families[1] = "paragraph";
+ families[2] = "paragraph";
+ Class classes[] = new Class[3];
+ classes[0] = TextStyle.class;
+ classes[1] = ParaStyle.class;
+ classes[2] = TextStyle.class;
+ styleCat = new StyleCatalog(25);
+
+ // Parse the input document
+ // DJP todo: eliminate multiple calls to add() when it can
+ // recurse properly.
+ NodeList nl = domDoc.getElementsByTagName(TAG_OFFICE_STYLES);
+ styleCat.add(nl.item(0), families, classes, null, false);
+ nl = domDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES);
+ styleCat.add(nl.item(0), families, classes, null, false);
+ nl = domDoc.getElementsByTagName(TAG_OFFICE_MASTER_STYLES);
+ styleCat.add(nl.item(0), families, classes, null, false);
+
+ // Traverse to the office:body element.
+ // There should only be one.
+ NodeList list = domDoc.getElementsByTagName(TAG_OFFICE_BODY);
+ int len = list.getLength();
+ if (len > 0) {
+ Node node = list.item(0);
+ traverseBody(node);
+ }
+
+ // create a PalmDB object and ConvertData object.
+ //
+ Record records[] = encoder.getRecords();
+
+ ConvertData cd = new ConvertData();
+ PalmDocument palmDoc = new PalmDocument(docName,
+ PdbUtil.intID("WrdS"), PdbUtil.intID("BDOC"), 0,
+ PalmDB.PDB_HEADER_ATTR_BACKUP, records);
+ cd.addDocument(palmDoc);
+ return cd;
+ }
+
+
+ /**
+ * This method traverses <i>office:body</i> element.
+ *
+ * @param node <i>office:body</i> <code>Node</code>.
+ *
+ * @throws IOException If any I/O error occurs.
+ */
+ private void traverseBody(Node node) throws IOException {
+
+ if (node.hasChildNodes()) {
+
+ NodeList nodeList = node.getChildNodes();
+ int len = nodeList.getLength();
+
+ for (int i = 0; i < len; i++) {
+
+ Node child = nodeList.item(i);
+
+ if (child.getNodeType() == Node.ELEMENT_NODE) {
+ String nodeName = child.getNodeName();
+
+ if (nodeName.equals(TAG_PARAGRAPH) ||
+ nodeName.equals(TAG_HEADING)) {
+
+ traverseParagraph(child);
+
+ } else if (nodeName.equals(TAG_UNORDERED_LIST)) {
+
+ traverseList(child);
+
+ } else if (nodeName.equals(TAG_ORDERED_LIST)) {
+
+ traverseList(child);
+
+ } else {
+
+ Debug.log(Debug.INFO, "<OTHERS " /* + XmlDebug.nodeInfo(child) */ + " />");
+ }
+ }
+ }
+ }
+
+ }
+
+
+ /**
+ * This method traverses the <i>text:p</i> and <i>text:h</i>
+ * element <code>Node</code> objects.
+ *
+ * @param node A <i>text:p</i> or <i>text:h</i> <code>Node</code>.
+ *
+ * @throws IOException If any I/O error occurs.
+ */
+ private void traverseParagraph(Node node) throws IOException {
+
+ String styleName = findAttribute(node, "text:style-name");
+ ParaStyle pstyle = (ParaStyle)styleCat.lookup(styleName, "paragraph",
+ null, ParaStyle.class);
+
+ // If the style does not exist in the style catalog for some reason,
+ // make up a default style and use it. We'll have to add this default
+ // style to the style catalog the first time it is used.
+ if (pstyle == null) {
+ styleName = "CONVERTER-DEFAULT";
+ pstyle = (ParaStyle)styleCat.lookup(styleName, "paragraph", null,
+ ParaStyle.class);
+ if (pstyle == null) {
+ pstyle = new ParaStyle(styleName, "paragraph", null,
+ (String [])null, null, styleCat);
+ styleCat.add(pstyle);
+ styleCat.add(new TextStyle(styleName, "paragraph", null,
+ 0, 0, 12, "Times-Roman", styleCat));
+ }
+ }
+
+ pstyle = (ParaStyle)pstyle.getResolved();
+ encoder.addElement(new WsePara(pstyle, styleCat));
+ TextStyle defParaTextStyle = (TextStyle)
+ styleCat.lookup(styleName, "paragraph", null, TextStyle.class);
+
+ traverseParaContents(node, defParaTextStyle);
+ }
+
+
+ /**
+ * This method traverses a paragraph content. Note that this
+ * method may recurse to call itself.
+ *
+ * @param node A paragraph or content <code>Node</code>
+ */
+ private void traverseParaContents(Node node, TextStyle defTextStyle) {
+
+ String styleName = findAttribute(node, "text:style-name");
+ TextStyle style = (TextStyle)
+ styleCat.lookup(styleName, "text", null, TextStyle.class);
+
+ if (node.hasChildNodes()) {
+ NodeList nodeList = node.getChildNodes();
+ int nChildren = nodeList.getLength();
+
+ for (int i = 0; i < nChildren; i++) {
+ Node child = nodeList.item(i);
+
+ if (child.getNodeType() == Node.TEXT_NODE) {
+
+ // this is for grabbing text nodes.
+ String s = child.getNodeValue();
+
+ if (s.length() > 0) {
+ if (style != null)
+ encoder.addElement(new WseTextRun(s, style, styleCat,
+ fontTable, colorTable));
+ else
+ encoder.addElement(new WseTextRun(s, defTextStyle,
+ styleCat, fontTable, colorTable));
+ }
+
+ } else if (child.getNodeType() == Node.ELEMENT_NODE) {
+
+ String childNodeName = child.getNodeName();
+
+ if (childNodeName.equals(TAG_SPACE)) {
+
+ // this is for text:s tags.
+ NamedNodeMap map = child.getAttributes();
+ Node attr = map.getNamedItem(ATTRIBUTE_SPACE_COUNT);
+ StringBuffer space = new StringBuffer(" ");
+ int count = 1;
+
+ if (attr != null) {
+ try {
+ String countStr = attr.getNodeValue();
+ count = Integer.parseInt(countStr.trim());
+ } catch (NumberFormatException e) {
+ Debug.log(Debug.ERROR, "Problem parsing space tag", e);
+ }
+ }
+
+ for (int j = 1; j < count; j++)
+ space.append(" ");
+
+ encoder.addElement(new WseTextRun(space.toString(),
+ defTextStyle,
+ styleCat, fontTable, colorTable));
+ Debug.log(Debug.INFO, "<SPACE count=\"" + count + "\" />");
+
+ } else if (childNodeName.equals(TAG_TAB_STOP)) {
+
+ // this is for text:tab-stop
+ encoder.addElement(new WseTextRun("\t", defTextStyle, styleCat,
+ fontTable, colorTable));
+
+ Debug.log(Debug.INFO, "<TAB/>");
+
+ } else if (childNodeName.equals(TAG_LINE_BREAK)) {
+
+ // this is for text:line-break
+ encoder.addElement(new WseTextRun("\n", defTextStyle,
+ styleCat, fontTable, colorTable));
+
+ Debug.log(Debug.INFO, "<LINE-BREAK/>");
+
+ } else if (childNodeName.equals(TAG_SPAN)) {
+
+ // this is for text:span
+ Debug.log(Debug.INFO, "<SPAN>");
+ traverseParaContents(child, defTextStyle);
+ Debug.log(Debug.INFO, "</SPAN>");
+
+ } else if (childNodeName.equals(TAG_HYPERLINK)) {
+
+ // this is for text:a
+ Debug.log(Debug.INFO, "<HYPERLINK>");
+ traverseParaContents(child, defTextStyle);
+ Debug.log(Debug.INFO, "<HYPERLINK/>");
+
+ } else if (childNodeName.equals(TAG_BOOKMARK) ||
+ childNodeName.equals(TAG_BOOKMARK_START)) {
+
+ Debug.log(Debug.INFO, "<BOOKMARK/>");
+
+ } else {
+
+ Debug.log(Debug.INFO, "<OTHERS " /* + XmlDebug.nodeInfo(child) */ + " />");
+ }
+
+ }
+
+ }
+ }
+ }
+
+
+ /**
+ * This method traverses list tags <i>text:unordered-list</i> and
+ * <i>text:ordered-list</i>. A list can only contain one optional
+ * <i>text:list-header</i> and one or more <i>text:list-item</i>
+ * elements.
+ *
+ * @param node A list <code>Node</code>.
+ *
+ * @throws IOException If any I/O error occurs.
+ */
+ private void traverseList(Node node) throws IOException {
+
+ Debug.log(Debug.TRACE, "<LIST>");
+
+ if (node.hasChildNodes()) {
+
+ NodeList nodeList = node.getChildNodes();
+ int len = nodeList.getLength();
+
+ for (int i = 0; i < len; i++) {
+
+ Node child = nodeList.item(i);
+
+ if (child.getNodeType() == Node.ELEMENT_NODE) {
+
+ String nodeName = child.getNodeName();
+
+ if (nodeName.equals(TAG_LIST_ITEM)) {
+
+ traverseListItem(child);
+
+ } else if (nodeName.equals(TAG_LIST_HEADER)) {
+
+ traverseListHeader(child);
+
+ } else {
+
+ Debug.log(Debug.ERROR, "<INVALID-XML-BUG " + " />");
+ }
+ }
+ }
+ }
+
+ Debug.log(Debug.TRACE, "</LIST>");
+ }
+
+
+ /**
+ * This method traverses a <i>text:list-header</i> element.
+ * It contains one or more <i>text:p</i> elements.
+ *
+ * @param node A list header <code>Node</code>.
+ *
+ * @throws IOException If any I/O error occurs.
+ */
+ private void traverseListHeader(Node node) throws IOException {
+
+ Debug.log(Debug.TRACE, "<LIST-HEADER>");
+
+ if (node.hasChildNodes()) {
+
+ NodeList nodeList = node.getChildNodes();
+ int len = nodeList.getLength();
+
+ for (int i = 0; i < len; i++) {
+
+ Node child = nodeList.item(i);
+
+ if (child.getNodeType() == Node.ELEMENT_NODE) {
+
+ String nodeName = child.getNodeName();
+
+ if (nodeName.equals(TAG_PARAGRAPH)) {
+
+ traverseParagraph(child);
+
+ } else {
+
+ Debug.log(Debug.TRACE, "<INVALID-XML-BUG " + " />");
+ }
+ }
+ }
+ }
+
+ Debug.log(Debug.TRACE, "</LIST-HEADER>");
+ }
+
+
+ /**
+ * This method will traverse a <i>text:list-item</i>.
+ * A list item may contain one or more of <i>text:p</i>,
+ * <i>text:h</i>, <i>text:section</i>,
+ * <i>text:ordered-list</i> and <i>text:unordered-list</i>.
+ *
+ * This method currently only implements grabbing <i>text:p</i>,
+ * <i>text:h</i>, <i>text:unordered-list</i> and
+ * <i>text:ordered-list</i>.
+ *
+ * @param Node <code>Node</code> to traverse.
+ *
+ * @throws IOException If any I/O error occurs.
+ */
+ private void traverseListItem(Node node) throws IOException {
+
+ Debug.log(Debug.TRACE, "<LIST-ITEM>");
+
+ if (node.hasChildNodes()) {
+
+ NodeList nodeList = node.getChildNodes();
+ int len = nodeList.getLength();
+
+ for (int i = 0; i < len; i++) {
+
+ Node child = nodeList.item(i);
+
+ if (child.getNodeType() == Node.ELEMENT_NODE) {
+
+ String nodeName = child.getNodeName();
+
+ if (nodeName.equals(TAG_PARAGRAPH)) {
+
+ traverseParagraph(child);
+
+ } else if (nodeName.equals(TAG_UNORDERED_LIST)) {
+
+ traverseList(child);
+
+ } else if (nodeName.equals(TAG_ORDERED_LIST)) {
+
+ traverseList(child);
+
+ } else {
+
+ Debug.log(Debug.ERROR, "<INVALID-XML-BUG " + " />");
+ }
+ }
+ }
+ }
+
+ Debug.log(Debug.TRACE, "</LIST-ITEM>");
+ }
+
+
+ /**
+ * Look up a <code>Node</code> object's named attribute and return
+ * its value
+ *
+ * @param node The <code>Node</code>.
+ * @param name The attribute name.
+ *
+ * @return The value of the named attribute
+ */
+ private String findAttribute(Node node, String name) {
+ NamedNodeMap attrNodes = node.getAttributes();
+ if (attrNodes != null) {
+ int len = attrNodes.getLength();
+ for (int i = 0; i < len; i++) {
+ Node attr = attrNodes.item(i);
+ if (attr.getNodeName().equals(name))
+ return attr.getNodeValue();
+ }
+ }
+ return null;
+ }
+}
+
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/PluginFactoryImpl.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/PluginFactoryImpl.java
new file mode 100644
index 000000000000..56dd2a4fb53f
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/PluginFactoryImpl.java
@@ -0,0 +1,152 @@
+/************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2008 by Sun Microsystems, Inc.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * $RCSfile: PluginFactoryImpl.java,v $
+ * $Revision: 1.3 $
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+package org.openoffice.xmerge.converter.xml.sxw.wordsmith;
+
+import org.openoffice.xmerge.Document;
+import org.openoffice.xmerge.ConvertData;
+import org.openoffice.xmerge.DocumentMerger;
+import org.openoffice.xmerge.DocumentMergerFactory;
+import org.openoffice.xmerge.DocumentSerializer;
+import org.openoffice.xmerge.DocumentSerializerFactory;
+import org.openoffice.xmerge.DocumentDeserializer;
+import org.openoffice.xmerge.DocumentDeserializerFactory;
+import org.openoffice.xmerge.ConverterCapabilities;
+import org.openoffice.xmerge.converter.xml.sxw.SxwPluginFactory;
+import org.openoffice.xmerge.converter.palm.PalmDocument;
+import org.openoffice.xmerge.util.registry.ConverterInfo;
+
+import java.io.InputStream;
+import java.io.IOException;
+
+
+/**
+ * <p>WordSmith implementation of a <code>PluginFactory</code> that
+ * encapsulates conversion of StarWriter XML format to and from
+ * WordSmith format.</p>
+ *
+ * The superclass produces a particular
+ * {@link org.openoffice.xmerge.Document Document}
+ * object, i.e.
+ * {@link org.openoffice.xmerge.converter.xml.sxw.SxwDocument
+ * SxwDocument} that the converters in this class works with. Thus,
+ * this class only implements the methods that produces the converters,
+ * i.e. {@link
+ * org.openoffice.xmerge.DocumentSerializer
+ * DocumentSerializer} and {@link
+ * org.openoffice.xmerge.DocumentDeserializer
+ * DocumentDeserializer};
+ * as well as the {@link
+ * org.openoffice.xmerge.ConverterCapabilities
+ * ConverterCapabilities} object that is specific to this format
+ * conversion. That superclass also produces a {@link
+ * org.openoffice.xmerge.DocumentMerger DocumentMerger}
+ * object, i.e. {@link
+ * org.openoffice.xmerge.converter.xml.sxw.wordsmith.DocumentMergerImpl
+ * DocumentMergerImpl} which this class derives the functionality.</p>
+ *
+ * @author Herbie Ong, Dave Proulx
+ */
+public final class PluginFactoryImpl extends SxwPluginFactory
+ implements DocumentDeserializerFactory, DocumentSerializerFactory,
+ DocumentMergerFactory {
+
+ public PluginFactoryImpl(ConverterInfo ci) {
+ super(ci);
+ }
+
+ /** ConverterCapabilities object for this type of conversion. */
+ private final static ConverterCapabilities converterCap =
+ new ConverterCapabilitiesImpl();
+
+
+ /**
+ * Returns an instance of <code>DocumentSerializerImpl</code>, which is
+ * an implementation of <code>DocumentSerializer</code> interface.
+ *
+ * @param doc <code>Document</code> object to be converted/serialized.
+ *
+ * @return A <code>DocumentSerializerImpl</code> object.
+ */
+ public DocumentSerializer createDocumentSerializer(Document doc) {
+
+ return new DocumentSerializerImpl(doc);
+ }
+
+
+ /**
+ * Returns an instance of <code>DocumentDeserializerImpl</code>,
+ * which is an implementation of <code>DocumentDeserializer</code>
+ * interface.
+ *
+ * @param cd <code>ConvertData</code> object for reading data
+ * which will be converted back to a
+ * <code>Document</code> object.
+ *
+ * @return A <code>DocumentDeserializerImpl</code> object.
+ */
+ public DocumentDeserializer createDocumentDeserializer(ConvertData cd) {
+
+ return new DocumentDeserializerImpl(cd);
+ }
+
+ /**
+ * Returns an instance of <code>DocumentMergerImpl</code>,
+ * which is an implementation of the <code>DocumentMerger</code>
+ * interface.
+ *
+ * @param doc <code>Document</code> to merge.
+ *
+ * @return A DocumentMergerImpl object.
+ */
+ public DocumentMerger createDocumentMerger(Document doc) {
+
+ ConverterCapabilities cc = converterCap;
+ DocumentMergerImpl merger = new DocumentMergerImpl(doc, cc);
+ return merger;
+ }
+
+ /**
+ * Returns an instance of the DeviceDocument
+ * which is an implementation of the <code>DocumentMerger</code>
+ * interface.
+ *
+ * @param doc <code>Document</code> to merge.
+ *
+ * @return A Device Document object
+ */
+ public Document createDeviceDocument(String name, InputStream is)
+ throws IOException {
+
+ PalmDocument palmDoc = new PalmDocument(is);
+ return palmDoc;
+ }
+}
+
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WSDecoder.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WSDecoder.java
new file mode 100644
index 000000000000..77ba70f6ac2a
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WSDecoder.java
@@ -0,0 +1,355 @@
+/************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2008 by Sun Microsystems, Inc.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * $RCSfile: WSDecoder.java,v $
+ * $Revision: 1.3 $
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+package org.openoffice.xmerge.converter.xml.sxw.wordsmith;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.io.FileInputStream;
+import java.io.UnsupportedEncodingException;
+import org.openoffice.xmerge.util.Debug;
+
+import org.openoffice.xmerge.converter.palm.*;
+import org.openoffice.xmerge.util.Resources;
+
+/**
+ * This class is used by {@link
+ * org.openoffice.xmerge.converter.xml.sxw.wordsmith.DocumentDeserializerImpl
+ * DocumentDeserializerImpl} to decode a WordSmith format. It currently
+ * decodes the text content into a single <code>String</code> object.
+ *
+ * @author Herbie Ong, David Proulx
+ */
+final class WSDecoder implements DOCConstants {
+
+ /** For decoding purposes. */
+ private final static int COUNT_BITS = 3;
+
+ /** Resources object for I18N. */
+ private Resources res = null;
+
+ /**
+ * Default constructor creates a header and
+ * a text buffer for holding all the text in
+ * the DOC db.
+ */
+ WSDecoder() {
+ res = Resources.getInstance();
+ }
+
+ /**
+ * Decode the text records into a single <code>byte</code> array.
+ *
+ * @param Record <code>Record</code> array holding WordSmith
+ * contents.
+ *
+ * @throws IOException If any I/O error occurs.
+ */
+ byte[] parseRecords(Record[] recs) throws IOException {
+
+ // read the header record
+ HeaderInfo header = readHeader(recs[0].getBytes());
+ dumpHeader(header);
+ byte[][] byteArrays = new byte[recs.length - 1][];
+ for (int i = 0; i < recs.length - 1; i++) byteArrays[i] = null;
+
+ switch (header.version & ~4) { // DJP: "4" indicates OOB data is present.
+ // Add a constant to handle this, might also need code to handle it.
+
+ case COMPRESSED:
+ case 3: // DJP: determined this empirically. Are Herbie's constants wrong?
+ for (int i = 1; i < recs.length; i++) {
+ byteArrays[i-1] = decompress(recs[i].getBytes(),
+ header.textRecordSize);
+ Debug.log(Debug.INFO, "processing " + byteArrays[i-1].length + " bytes");
+ }
+
+ break;
+
+ case UNCOMPRESSED:
+ for (int i = 1; i < recs.length; i++) {
+ byteArrays[i-1] = recs[i].getBytes();
+ Debug.log(Debug.INFO, "processing " + byteArrays[i-1].length + " bytes");
+ }
+
+ break;
+
+ default:
+ throw new IOException(res.getString("UNKNOWN_DOC_VERSION"));
+
+ }
+
+ // Concatenate byteArrays[][] into a single byte array.
+ int length = 0;
+ for (int i = 0; i < recs.length - 1; i++)
+ length += byteArrays[i].length;
+ byte bigArray[] = new byte[length];
+ int offset = 0;
+ for (int i = 0; i < recs.length - 1; i++) {
+ System.arraycopy(byteArrays[i], 0, bigArray, offset,
+ byteArrays[i].length);
+ offset += byteArrays[i].length;
+ }
+ return bigArray;
+ }
+
+
+ /**
+ * Decode the text records into a <code>Wse</code> array.
+ *
+ * @param Record[] <code>Record</code> array holding DOC
+ * contents.
+ *
+ * @throws IOException If any I/O error occurs.
+ */
+ Wse[] parseDocument(Record[] recs) throws IOException {
+
+ java.util.Vector v = new java.util.Vector(20, 20);
+ WseFontTable fontTable = null;
+ WseColorTable colorTable = null;
+
+ // rawData is the document data to be parsed.
+ byte rawData[] = parseRecords(recs);
+
+ // beginning of document has some header information, including
+ // optional font and color tables.
+ // DJP: maybe should add a new WSelement (docHeader) to hold
+ // header info.
+ // DJP: finish code here to parse header
+ if (rawData[0] != 2) throw new IOException();
+ int nParagraphs = util.intFrom4bytes(rawData, 2);
+ int nAtoms = util.intFrom4bytes(rawData, 6);
+ int nChars = util.intFrom4bytes(rawData, 10);
+ int miscSize = util.intFrom4bytes(rawData, 14);
+ int curIndex = 18;
+
+ while (curIndex < rawData.length) {
+ if (WsePara.isValid(rawData, curIndex)) {
+ v.add(new WsePara(rawData, curIndex));
+ curIndex = WsePara.computeNewIndex(rawData, curIndex);
+ } else if (WseTextRun.isValid(rawData, curIndex)) {
+ v.add(new WseTextRun(rawData, curIndex, fontTable, colorTable));
+ curIndex = WseTextRun.computeNewIndex(rawData, curIndex);
+ } else if (WseFontTable.isValid(rawData, curIndex)) {
+ fontTable = new WseFontTable(rawData, curIndex);
+ v.add(fontTable);
+ curIndex = WseFontTable.computeNewIndex(rawData, curIndex);
+ } else if (WseColorTable.isValid(rawData, curIndex)) {
+ colorTable = new WseColorTable(rawData, curIndex);
+ v.add(colorTable);
+ curIndex = WseColorTable.computeNewIndex(rawData, curIndex);
+ } else {
+ Debug.log(Debug.ERROR, "Unknown code " + rawData[curIndex]);
+ throw new IOException();
+ }
+ }
+
+ return (Wse[])v.toArray(new Wse[2]);
+ }
+
+
+ /**
+ * <p>Decompress the <code>byte</code> array.</p>
+ *
+ * <p>The resulting uncompressed <code>byte</code> array
+ * should be within <code>textRecordSize</code> length,
+ * definitely within twice the size it claims, else treat
+ * it as a problem with the encoding of that PDB and
+ * throw <code>IOException</code>.</p>
+ *
+ * @param bytes Compressed <code>byte</code> array
+ * @param textRecordSize Size of uncompressed <code>byte</code>
+ * array
+ *
+ * @throws IOException If <code>textRecordSize</codeL &lt;
+ * <code>cBytes.length</code>.
+ */
+ private byte[] decompress(byte[] cBytes, int textRecordSize)
+ throws IOException {
+
+ // create byte array for storing uncompressed bytes
+ // it should be within textRecordSize range, definitely
+ // within twice of textRecordSize! if not, then
+ // an ArrayIndexOutOfBoundsException will get thrown,
+ // and it should be converted into an IOException, and
+ // treat it as a conversion error.
+ byte[] uBytes = new byte[textRecordSize*2];
+
+ int up = 0;
+ int cp = 0;
+
+ try {
+
+ while (cp < cBytes.length) {
+
+ int c = cBytes[cp++] & 0xff;
+
+ // codes 1...8 mean copy that many bytes
+ if (c > 0 && c < 9) {
+
+ while (c-- > 0)
+ uBytes[up++] = cBytes[cp++];
+ }
+
+ // codes 0, 9...0x7F represent themselves
+ else if (c < 0x80) {
+ uBytes[up++] = (byte) c;
+ }
+
+ // codes 0xC0...0xFF represent "space + ascii char"
+ else if (c >= 0xC0) {
+ uBytes[up++] = (byte) ' ';
+ uBytes[up++] = (byte) (c ^ 0x80);
+ }
+
+ // codes 0x80...0xBf represent sequences
+ else {
+ c <<= 8;
+ c += cBytes[cp++] & 0xff;
+ int m = (c & 0x3fff) >> COUNT_BITS;
+ int n = c & ((1 << COUNT_BITS) - 1);
+ n += COUNT_BITS;
+ while (n-- > 0) {
+ uBytes[up] = uBytes[up - m];
+ up++;
+ }
+ }
+ }
+
+ } catch (ArrayIndexOutOfBoundsException e) {
+
+ throw new IOException(
+ res.getString("DOC_TEXT_RECORD_SIZE_EXCEEDED"));
+ }
+
+ // note that ubytes may be larger that the amount of
+ // uncompressed bytes, so trim it to another byte array
+ // with the exact size.
+ byte[] textBytes = new byte[up];
+ System.arraycopy(uBytes, 0, textBytes, 0, up);
+
+ return textBytes;
+ }
+
+
+ /**
+ * Read the header <code>byte</code> array.
+ *
+ * @param bytes <code>byte</code> array containing header
+ * record data.
+ *
+ * @return <code>HeaderInfo</code> object.
+ *
+ * @throws IOException If any I/O error occurs.
+ */
+ private HeaderInfo readHeader(byte[] bytes) throws IOException {
+
+ HeaderInfo header = new HeaderInfo();
+
+ ByteArrayInputStream bis = new ByteArrayInputStream(bytes);
+ DataInputStream dis = new DataInputStream(bis);
+
+ // Normally the first 2 bytes comprised of the version
+ // which should either be COMPRESSED or UNCOMPRESSED
+ // SmartDoc/Quickword would add a 0x01 to the first
+ // byte, thus their version would be 0x0101 for UNCOMPRESSED
+ // instead of 0x0001 and 0x0102 for UNCOMPRESSED instead of
+ // 0x0002.
+
+ dis.readByte();
+ header.version = dis.readByte();
+
+ // read extra 2 unused bytes
+ dis.readShort();
+
+ // Read the text length, this should be unsigned 4 bytes.
+ // We could store the read value into a long, but then
+ // our current buffer limit is the max positive of an int.
+ // That is a large enough limit, thus we shall stay with
+ // storing the value in an int. If it exceeds, then
+ // an IOException should be thrown.
+ header.textLen = dis.readInt();
+ if (header.textLen < 0) {
+ throw new IOException(res.getString("DOC_TEXT_LENGTH_EXCEEDED"));
+ }
+
+ // read the number of records - unsigned 2 bytes
+ header.textRecordCount = ((int) dis.readShort()) & 0x0000ffff;
+
+ // read the record size - unsigned 2 bytes
+ header.textRecordSize = ((int) dis.readShort()) & 0x0000ffff;
+
+ // read extra 4 unused bytes
+ dis.readInt();
+
+ return header;
+ }
+
+
+ /**
+ * Prints out header info into log.
+ * Used for debugging purposes only.
+ *
+ * @param header <code>HeaderInfo</code> structure.
+ */
+ private void dumpHeader(HeaderInfo header) {
+ /*
+ log("<DOC_INFO ");
+ log("version=\"" + header.version + "\" ");
+ log("text-length=\"" + header.textLen + "\" ");
+ log("number-of-records=\"" + header.textRecordCount + "\" ");
+ log("record-size=\"" + header.textRecordSize + "\" />\n");
+ */
+ }
+
+
+ /**
+ * Inner class to store DOC header information.
+ */
+ private class HeaderInfo {
+
+ /** length of text section */
+ int textLen = 0;
+
+ /** number of text records */
+ int textRecordCount = 0;
+
+ /**
+ * size of a text record. This is normally the same as
+ * TEXT_RECORD_SIZE, but some applications may modify this.
+ */
+ int textRecordSize = 0;
+
+ /** compression type */
+ int version = 0;
+ }
+}
+
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WSEncoder.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WSEncoder.java
new file mode 100644
index 000000000000..58df6112a8f4
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WSEncoder.java
@@ -0,0 +1,215 @@
+/************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2008 by Sun Microsystems, Inc.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * $RCSfile: WSEncoder.java,v $
+ * $Revision: 1.3 $
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+package org.openoffice.xmerge.converter.xml.sxw.wordsmith;
+
+import java.io.ByteArrayOutputStream;
+import java.io.DataOutputStream;
+import java.io.*;
+import java.io.UnsupportedEncodingException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Vector;
+
+import org.openoffice.xmerge.converter.palm.*;
+
+/**
+ * This class is used by {@link
+ * org.openoffice.xmerge.converter.xml.sxw.wordsmith.DocumentDeserializerImpl
+ * DocumentDeserializerImpl} to encode the WordSmith format.
+ *
+ * @author David Proulx
+ */
+
+// DJP: replace 4096 w/ a defined constant
+
+final class WSEncoder {
+
+ /* DJP: These should probably go somewhere else! */
+ /** Constant for uncompressed version. */
+ public static final short UNCOMPRESSED = 1;
+
+ /** Constant for compressed version. */
+ public static final short COMPRESSED = 2;
+
+ /** Constant used for spare fields. */
+ public static final int SPARE = 0;
+
+ /* WordSmith Header information. */
+ private short version;
+ private int textLen;
+ private short maxRecSize;
+ private int textRecCount = 0;
+
+
+ /* WordSmith document elements. */
+ WseHeader header = null;
+ WseFontTable ft = null;
+ WseColorTable ct = null;
+ private Vector elements; // paragraphs & text runs
+
+ /* Totals for the WordSmith document. */
+ int nrParagraphs = 0;
+ int nrAtoms = 0;
+ int nrChars = 0;
+
+
+ /**
+ * Default constructor creates a header and
+ * a text buffer for holding all the text in
+ * the WordSmith database.
+ */
+ WSEncoder() {
+ version = 1;
+ textLen = 0;
+ maxRecSize = 4096;
+ elements = new Vector();
+ }
+
+
+ /**
+ * This method adds a new element to the WordSmith document.
+ *
+ * @param elem WordSmith document element to add
+ */
+ void addElement(Wse elem) {
+ if (elem.getClass() == WseHeader.class)
+ header = (WseHeader)elem;
+ else if (elem.getClass() == WseFontTable.class)
+ ft = (WseFontTable)elem;
+ else if (elem.getClass() == WseColorTable.class)
+ ct = (WseColorTable)elem;
+ else
+ elements.addElement(elem);
+ }
+
+
+ /**
+ * This method encodes the information given to
+ * an array of palm Records in the WordSmith database format.
+ *
+ * @return <code>Record</code> array holding WordSmith contents.
+ *
+ * @throws IOException If any I/O error occurs.
+ */
+ Record[] getRecords() throws IOException {
+
+ Vector allRecs = new Vector();
+ int nElements = elements.size();
+
+ // Count up the number of paragraphs, atoms, and characters.
+ int currElement = 0;
+ while (currElement < nElements) {
+ Wse e = (Wse)elements.elementAt(currElement++);
+ if (e.getClass() == WsePara.class)
+ nrParagraphs++;
+ if (e.getClass() == WseTextRun.class) {
+ nrAtoms++;
+ nrChars += ((WseTextRun)e).getText().length();
+ }
+ }
+
+ byte[] currRec = new byte[4096];
+ int currRecLen = 0;
+
+ // This code assumes that the WordSmith header, font table,
+ // and color table total less than 4096 bytes.
+ header = new WseHeader(nrParagraphs, nrAtoms, nrChars, ft, ct);
+ System.arraycopy(header.getBytes(), 0,
+ currRec, currRecLen, header.getByteCount());
+ currRecLen += header.getByteCount();
+
+ if (ft != null) {
+ System.arraycopy(ft.getBytes(), 0, currRec, currRecLen,
+ ft.getByteCount());
+ currRecLen += ft.getByteCount();
+ }
+ if (ct != null) {
+ System.arraycopy(ct.getBytes(), 0, currRec, currRecLen,
+ ct.getByteCount());
+ currRecLen += ct.getByteCount();
+ }
+
+ currElement = 0;
+ while (currElement < nElements) {
+ Wse e = (Wse)elements.elementAt(currElement++);
+ int length = e.getByteCount();
+ if ((length + currRecLen) <= 4096) {
+ System.arraycopy(e.getBytes(), 0, currRec, currRecLen, length);
+ currRecLen += length;
+ } else {
+ // Copy in enough to get to full size, then create a
+ // new Record and add it to the Vector.
+ int firstPartLen = 4096 - currRecLen;
+ System.arraycopy(e.getBytes(), 0, currRec, currRecLen,
+ firstPartLen);
+ Record r = new Record(currRec);
+ allRecs.addElement(r);
+
+ // Put the remainder at the beginning of the next record
+ currRecLen = 0;
+ System.arraycopy(e.getBytes(), firstPartLen, currRec,
+ currRecLen, length - firstPartLen);
+ currRecLen += length - firstPartLen;
+ }
+ }
+
+ // Processed all the elements. Write out any remaining partial record.
+ if (currRecLen > 0) {
+ byte[] partial = new byte[currRecLen];
+ System.arraycopy(currRec, 0, partial, 0, currRecLen);
+ Record rr = new Record(partial);
+ allRecs.addElement(rr);
+ }
+
+
+ // Record 0 is the WordSmith header. Do it last since it
+ // contains totals for the entire document. It goes
+ // before everything else.
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ DataOutputStream dos = new DataOutputStream(bos);
+ dos.writeShort(version);
+ dos.writeShort(0);
+ dos.writeInt(textLen);
+ dos.writeShort(allRecs.size());
+ dos.writeShort(maxRecSize);
+ dos.writeInt(0);
+ allRecs.insertElementAt(new Record(bos.toByteArray()), 0);
+
+ // Convert Vector of Records to an array and return it.
+ int nRecs = allRecs.size();
+ Record recs[] = new Record[nRecs];
+ for (int i = 0; i < nRecs; i++)
+ recs[i] = (Record)allRecs.elementAt(i);
+ return recs;
+ }
+
+}
+
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/Wse.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/Wse.java
new file mode 100644
index 000000000000..1df80a427594
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/Wse.java
@@ -0,0 +1,103 @@
+/************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2008 by Sun Microsystems, Inc.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * $RCSfile: Wse.java,v $
+ * $Revision: 1.3 $
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+package org.openoffice.xmerge.converter.xml.sxw.wordsmith;
+
+import java.io.IOException;
+
+import org.w3c.dom.NodeList;
+import org.w3c.dom.Node;
+import org.w3c.dom.NamedNodeMap;
+import org.w3c.dom.Element;
+
+import org.openoffice.xmerge.Document;
+import org.openoffice.xmerge.ConverterCapabilities;
+import org.openoffice.xmerge.converter.xml.OfficeDocument;
+import org.openoffice.xmerge.converter.xml.sxw.SxwDocument;
+import org.openoffice.xmerge.converter.xml.*;
+
+
+/**
+ * This is the superclass for all elements in a WordSmith document.
+ * Elements can be paragraphs, text runs, font tables, or color tables.
+ *
+ * @author David Proulx
+ */
+abstract class Wse {
+
+ /**
+ * Return true if <code>dataArray[startIndex]</code> is the start
+ * of a valid element of this type.
+ *
+ * @param dataArray <code>byte</code> array.
+ * @param startIndex The start index.
+ *
+ * @return true if <code>dataArray[startIndex]</code> is the
+ * start of a valid element of this type, false otherwise.
+ */
+ static boolean isValid(byte dataArray[], int startIndex) {
+ return false;
+ }
+
+
+ /**
+ * Compute and return the index of the first <code>byte</code>
+ * following this element. It is assumed that the element
+ * starting at <code>dataArray[startIndex]</code> is valid.
+ *
+ * @param dataArray <code>byte</code> array.
+ * @param startIndex The start index.
+ *
+ * @return The index of the first <code>byte</code> following
+ * this element.
+ */
+ static int computeNewIndex(byte dataArray[], int startIndex) {
+ return 0;
+ }
+
+
+ /**
+ * Return the total number of bytes needed to represent this
+ * object.
+ *
+ * @return The total number of bytes needed to represent this
+ * object.
+ */
+ abstract int getByteCount();
+
+
+ /**
+ * Return an <code>byte</code> array representing this element.
+ *
+ * @return An <code>bytes</code> array representing this element.
+ */
+ abstract byte[] getBytes();
+}
+
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WseColorTable.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WseColorTable.java
new file mode 100644
index 000000000000..8d36c5d8a9a8
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WseColorTable.java
@@ -0,0 +1,250 @@
+/************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2008 by Sun Microsystems, Inc.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * $RCSfile: WseColorTable.java,v $
+ * $Revision: 1.3 $
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+package org.openoffice.xmerge.converter.xml.sxw.wordsmith;
+
+import java.io.IOException;
+import java.awt.Color;
+
+import org.w3c.dom.NodeList;
+import org.w3c.dom.Node;
+import org.w3c.dom.NamedNodeMap;
+import org.w3c.dom.Element;
+
+import org.openoffice.xmerge.Document;
+import org.openoffice.xmerge.ConverterCapabilities;
+import org.openoffice.xmerge.converter.xml.OfficeDocument;
+import org.openoffice.xmerge.converter.xml.sxw.SxwDocument;
+import org.openoffice.xmerge.converter.xml.*;
+
+/**
+ * This class represents a color table in a WordSmith document.
+ *
+ * @author David Proulx
+ */
+class WseColorTable extends Wse {
+
+ private Color fgColors[];
+ private Color bgColors[];
+
+ /**
+ * Constructor to use when going from DOM to WordSmith
+ */
+ public WseColorTable() {
+ fgColors = new Color[16];
+ bgColors = new Color[16];
+
+ // Always need these two!
+ fgColors[0] = Color.black;
+ bgColors[0] = Color.white;
+
+ }
+
+ /**
+ * Constructor to use when going from WordSmith to DOM.
+ *
+ * @param dataArray <code>byte</code> array.
+ * @param i The index.
+ */
+ public WseColorTable(byte dataArray[], int i) {
+ fgColors = new Color[16];
+ bgColors = new Color[16];
+
+ i += 2; // Skip leading "64" and table length field.
+ for (int k = 0; k < 16; k++) {
+ fgColors[k] = new Color(((int)dataArray[i+1]) & 0xFF,
+ ((int)dataArray[i+2]) & 0xFF,
+ ((int)dataArray[i+3]) & 0xFF);
+ i += 4;
+ }
+ for (int k = 0; k < 16; k++) {
+ bgColors[k] = new Color(((int)dataArray[i+1]) & 0xFF,
+ ((int)dataArray[i+2]) & 0xFF,
+ ((int)dataArray[i+3]) & 0xFF);
+ i += 4;
+ }
+
+ }
+
+
+ /**
+ * Compute the index of the first <code>byte</code> following the
+ * paragraph descriptor, assuming that
+ * <code>dataArray[startIndex]</code> is the beginning of a valid
+ * paragraph descriptor.
+ *
+ * @param dataArray <code>byte</code array.
+ * @param startIndex The start index.
+ *
+ * @return The index of the first <code>byte</code> following the
+ * paragraph description.
+ */
+ static int computeNewIndex(byte dataArray[], int startIndex) {
+ int tableLen = dataArray[startIndex + 1];
+ tableLen &= 0xFF; // eliminate problems with sign-extension
+ return startIndex + tableLen + 2;
+ }
+
+
+ /**
+ * Return true if <code>dataArray[startIndex]</code> is the start
+ * of a valid paragraph descriptor.
+ *
+ * @param dataArray <code>byte</code> array.
+ * @param startIndex Start index.
+ *
+ * @return true if <code>dataArray[startIndex]</code> is the start
+ * of a valid paragraph descriptor, false otherwise.
+ */
+ static boolean isValid(byte dataArray[], int startIndex) {
+ try {
+ if (dataArray[startIndex] != 64)
+ return false;
+ int len = dataArray[startIndex + 1];
+ len &= 0xFF; // eliminate problems with sign-extension
+ int temp = dataArray[startIndex + (int)len + 2]; // probe end of table
+ } catch (ArrayIndexOutOfBoundsException e) {
+ return false;
+ }
+ return true;
+ }
+
+
+ /**
+ * Return the number of bytes needed to represent this color table.
+ *
+ * @return The byte count.
+ */
+ int getByteCount() {
+ return (32 * 4) + 1 + 1;
+ }
+
+
+ /**
+ * Return a <code>byte</code> array representing this color table.
+ *
+ * @return <code>bytes</code> array representing this color table.
+ */
+ byte[] getBytes() {
+ byte[] b = new byte[(32 * 4) + 1 + 1];
+ b[0] = 0x40;
+ b[1] = (byte)128;
+ int i = 2;
+ // int indVal = 0xd8;
+ int indVal = 0;
+
+ for (int j = 0; j < 16; j++) {
+ b[i++] = (byte)indVal++;
+ if (fgColors[j] != null) {
+ b[i++] = (byte)fgColors[j].getRed();
+ b[i++] = (byte)fgColors[j].getGreen();
+ b[i++] = (byte)fgColors[j].getBlue();
+ } else {
+ b[i++] = (byte)0;
+ b[i++] = (byte)0;
+ b[i++] = (byte)0;
+ }
+ }
+
+ for (int j = 0; j < 16; j++) {
+ b[i++] = (byte)indVal++;
+ if (bgColors[j] != null) {
+ b[i++] = (byte)bgColors[j].getRed();
+ b[i++] = (byte)bgColors[j].getGreen();
+ b[i++] = (byte)bgColors[j].getBlue();
+ } else {
+ b[i++] = (byte)0xFF;
+ b[i++] = (byte)0xFF;
+ b[i++] = (byte)0xFF;
+ }
+ }
+
+ return b;
+ }
+
+
+ /**
+ * Return the index of the specified foreground or background
+ * <code>Color</code>. (If the color is not already in the table,
+ * it will be added.)
+ *
+ * Note that the implementation of this may include a "margin of
+ * error" to prevent the color table from being filled up too
+ * quickly.
+ *
+ * @param c The <code>Color</code>.
+ * @param foreground true if foreground color, false if background
+ * color
+ *
+ * @return The index of the specified foreground or background
+ * <code>Color</code>.
+ *
+ * DJP: how to handle table overflow?
+ */
+ int findColor(Color c, boolean foreground) {
+
+ Color colorArray[] = foreground ? fgColors : bgColors;
+
+ for (int i = 0; i < 16; i++) {
+ if (colorArray[i] != null) {
+ if (colorArray[i].equals(c))
+ return i;
+ }
+ else
+ break; // hit a null entry - no more colors in table!
+ }
+
+ // Color was not found in the table. Add it.
+ for (int i = 0; i < 16; i++) {
+ if (colorArray[i] == null) {
+ colorArray[i] = c;
+ return i;
+ }
+ }
+ return 0; // Default - we should never get here though.
+ }
+
+
+ /**
+ * Given an index, return the <code>Color</code> from the table.
+ *
+ * @param index The index
+ * @param foreground true if foreground color, false if background
+ * color
+ *
+ * @return The <code>Color</code> at the specified index.
+ */
+ Color getColor(int index, boolean foreground) {
+
+ Color colorArray[] = foreground ? fgColors : bgColors;
+ return colorArray[index];
+ }
+}
+
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WseFontTable.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WseFontTable.java
new file mode 100644
index 000000000000..120dfb86a16f
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WseFontTable.java
@@ -0,0 +1,221 @@
+/************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2008 by Sun Microsystems, Inc.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * $RCSfile: WseFontTable.java,v $
+ * $Revision: 1.3 $
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+package org.openoffice.xmerge.converter.xml.sxw.wordsmith;
+
+import java.io.IOException;
+
+import org.w3c.dom.Node;
+import org.w3c.dom.NamedNodeMap;
+import org.w3c.dom.Element;
+
+import org.openoffice.xmerge.Document;
+import org.openoffice.xmerge.ConverterCapabilities;
+import org.openoffice.xmerge.converter.xml.OfficeDocument;
+import org.openoffice.xmerge.converter.xml.sxw.SxwDocument;
+import org.openoffice.xmerge.converter.xml.*;
+
+/**
+ * <p>This class represents a font table in a WordSmith document.
+ * A font table is represented as follows:</p>
+ *
+ * <p><blockquote>
+ * binary "3"<br>
+ * two-byte length of the table of strings which follows<br>
+ * string table (null-terminated strings) representing font names
+ * </blockquote></p>
+ *
+ * @author David Proulx
+ */
+class WseFontTable extends Wse {
+
+ java.util.Vector fontNames = new java.util.Vector(10);
+
+
+ /**
+ * Constructor for use when going from DOM to WordSmith.
+ */
+ public WseFontTable() {
+ }
+
+
+ /**
+ * Constructor for use when going from WordSmith to DOM.
+ *
+ * @param dataArray <code>byte</code> array.
+ * @param i The index.
+ */
+ public WseFontTable(byte dataArray[], int i) {
+ i++;
+ int tableLen = ((dataArray[i] << 8) | (dataArray[i+1] & 0xFF));
+ i += 2;
+ while (tableLen > 0) {
+ int j = 0;
+ while (dataArray[i + j] != 0) j++;
+ fontNames.add(new String(dataArray, i, j));
+ tableLen -= (j + 1);
+ i += (j + 1);
+ }
+ }
+
+
+ /**
+ * Add a new font to the table.
+ *
+ * @param newFontName The new font name.
+ */
+ public void add(String newFontName) {
+ if (newFontName != null)
+ fontNames.add(newFontName);
+ }
+
+
+ /**
+ * Return a font name from the table, or null if invalid index.
+ *
+ * @param index The font name index.
+ *
+ * @return The font name.
+ */
+ public String getFontName(int index) {
+ try {
+ return (String)fontNames.elementAt(index);
+ } catch (ArrayIndexOutOfBoundsException e) {
+ return null;
+ }
+ }
+
+ /**
+ * Return the index of a font name in the table, or -1 if not found.
+ *
+ * @param fontName The font name.
+ *
+ * @return The index of the font name, or -1 if not found.
+ */
+ public int getFontIndex(String fontName) {
+ int len = fontNames.size();
+ for (int i = 0; i < len; i++) {
+ String name = (String) fontNames.elementAt(i);
+ if (name.equals(fontName))
+ return i;
+ }
+ return -1;
+ }
+
+
+ /**
+ * Compute the index of the first <code>byte</code> following the
+ * paragraph descriptor, assuming that
+ * <code>dataArray[startIndex]</code> is the beginning of a valid
+ * paragraph descriptor.
+ *
+ * @param dataArray <code>byte</code> array.
+ * @param startIndex The start index.
+ *
+ * @return The index of the first <code>byte</code> following the
+ * paragraph description.
+ */
+ static int computeNewIndex(byte dataArray[], int startIndex) {
+ startIndex++; // Skip the leading "3"
+ int tableLen = ((dataArray[startIndex] << 8) | (dataArray[startIndex+1] & 0xFF));
+ tableLen &= 0xFFFF; // eliminate problems with sign-extension
+ return startIndex + tableLen + 2;
+ }
+
+
+ /**
+ * Return true if <code>dataArray[startIndex]</code> is the start of a
+ * valid paragraph descriptor.
+ *
+ * @param dataArray <code>byte</code> string.
+ * @param startIndex Start index.
+ *
+ * @return true if <code>dataArray[startIndex]</code> is the start
+ * of a valid paragraph descriptor, false otherwise.
+ */
+ static boolean isValid(byte dataArray[], int startIndex) {
+ try {
+ if (dataArray[startIndex] != 3)
+ return false;
+ int len = ((dataArray[startIndex+1] << 8)
+ | (dataArray[startIndex+2] & 0xFF));
+ len &= 0xFFFF; // eliminate problems with sign-extension
+
+ if (dataArray[startIndex + len + 2] != 0)
+ return false;
+ } catch (ArrayIndexOutOfBoundsException e) {
+ return false;
+ }
+ return true;
+ }
+
+
+ /**
+ * Return the number of bytes needed to represent this font table.
+ *
+ * @return The number of bytes needed to represent this font table.
+ */
+ int getByteCount() {
+
+ int length = 3; // leading "3" plus 2 bytes for length.
+ int nFonts = fontNames.size();
+ for (int i = 0; i < nFonts; i++) {
+ String name = (String)fontNames.elementAt(i);
+ length += name.length() + 1; // extra byte is for trailing "0"
+ }
+ return length;
+ }
+
+ /**
+ * Return a <code>byte</code> array representing this font table.
+ *
+ * @return An <code>byte</code> array representing this font table.
+ */
+ byte[] getBytes() {
+
+ int length = getByteCount();
+ int nFonts = fontNames.size();
+ byte b[] = new byte[length];
+ b[0] = 3;
+ length -= 3;
+ b[1] = (byte)(length >> 8);
+ b[2] = (byte)(length & 0xFF);
+ int indx = 3;
+ for (int i = 0; i < nFonts; i++) {
+ String name = (String)fontNames.elementAt(i);
+ byte bname[] = name.getBytes();
+ System.arraycopy(bname, 0, b, indx, bname.length);
+ indx += bname.length;
+ b[indx++] = 0;
+ }
+ return b;
+ }
+}
+
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WseHeader.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WseHeader.java
new file mode 100644
index 000000000000..009f1a975b2f
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WseHeader.java
@@ -0,0 +1,148 @@
+/************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2008 by Sun Microsystems, Inc.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * $RCSfile: WseHeader.java,v $
+ * $Revision: 1.3 $
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+package org.openoffice.xmerge.converter.xml.sxw.wordsmith;
+
+import java.io.IOException;
+import java.io.DataOutputStream;
+import java.io.ByteArrayOutputStream;
+
+/**
+ * This class represents a WordSmith document header.
+ *
+ * @author David Proulx
+ */
+class WseHeader extends Wse {
+
+ private int nParagraphs = 0;
+ private int nAtoms = 0;
+ private int nChars = 0;
+ private int miscSize = 0;
+
+ /**
+ * Constructor for use when going from DOM to WordSmith.
+ *
+ * @param nPara The number of paragraphs.
+ * @param nAtoms The number of atoms.
+ * @param nChars The number of characters.
+ * @param ft The font table.
+ * @param ct The color table.
+ */
+ public WseHeader(int nPara, int nAtoms, int nChars, WseFontTable ft,
+ WseColorTable ct) {
+ nParagraphs = nPara;
+ this.nAtoms = nAtoms;
+ this.nChars = nChars;
+ if (ft != null) miscSize += ft.getByteCount();
+ if (ct != null) miscSize += ct.getByteCount();
+ }
+
+
+ /**
+ * Constructor for use when going from WordSmith to DOM.
+ *
+ * @param dataArray <code>byte</code> array.
+ * @param i Index.
+ */
+ public WseHeader(byte dataArray[], int i) {
+ // DJP: write this!
+ }
+
+ /**
+ * Return true if <code>dataArray[startIndex]</code> is the start
+ * of a document header.
+ *
+ * @param dataArray <code>byte</code> array.
+ * @param startIndex The index.
+ *
+ * @return true if <code>dataArray[startIndex]</code> is the start
+ * of a document header, false otherwise.
+ */
+ static boolean isValid(byte dataArray[], int startIndex) {
+ return ((dataArray[startIndex] == 2)
+ && (dataArray[startIndex + 1] == 4));
+ }
+
+
+ /**
+ * Compute and return the index of the first <code>byte</code>
+ * following this element. It is assumed that the element
+ * starting at <code>dataArray[startIndex]</code> is valid.
+ *
+ * @param dataArray <code>byte</code> array.
+ * @param startIndex The start index.
+ *
+ * @return The first <code>byte</code> following this element.
+ */
+ static int computeNewIndex(byte dataArray[], int startIndex) {
+ return startIndex + 18;
+ }
+
+
+ /**
+ * Return the total number of bytes needed to represent this.
+ *
+ * @return The total number of bytes needed to represent this.
+ */
+ int getByteCount() {
+ return 18;
+ }
+
+
+ /**
+ * Return a <code>byte</code> array representing this element.
+ *
+ * @return A <code>byte</code> array representing this element.
+ */
+ byte[] getBytes() {
+ DataOutputStream os; // Used for storing the data
+ ByteArrayOutputStream bs = null; // Used for storing the data
+
+ try {
+ bs = new ByteArrayOutputStream();
+ os = new DataOutputStream(bs);
+ os.write(2); // binary doc indicator
+ os.write(4); // binary header indicator
+
+ os.writeInt(nParagraphs);
+ os.writeInt(nAtoms);
+ os.writeInt(nChars);
+ os.writeInt(miscSize);
+
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+
+ if (bs != null) {
+ return bs.toByteArray();
+ } else return null;
+ }
+}
+
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WsePara.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WsePara.java
new file mode 100644
index 000000000000..8947aa7a1f47
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WsePara.java
@@ -0,0 +1,302 @@
+/************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2008 by Sun Microsystems, Inc.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * $RCSfile: WsePara.java,v $
+ * $Revision: 1.3 $
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+package org.openoffice.xmerge.converter.xml.sxw.wordsmith;
+
+import java.io.IOException;
+
+import org.w3c.dom.NodeList;
+import org.w3c.dom.Node;
+import org.w3c.dom.NamedNodeMap;
+import org.w3c.dom.Element;
+
+import org.openoffice.xmerge.Document;
+import org.openoffice.xmerge.ConverterCapabilities;
+import org.openoffice.xmerge.converter.xml.OfficeDocument;
+import org.openoffice.xmerge.converter.xml.sxw.SxwDocument;
+import org.openoffice.xmerge.converter.xml.*;
+
+
+/**
+ * This class represents a paragraph in a WordSmith document.
+ * (A paragraph is "5" followed by 12 bytes of attributes.)
+ *
+ * @author David Proulx
+ */
+class WsePara extends Wse {
+
+ private byte spaceBefore = 0;
+ private byte spaceAfter = 0;
+ private byte leftIndent = 0;
+ private byte firstIndent = 0;
+ private byte rightIndent = 0;
+ private byte misc = 0;
+ private byte style = 0;
+ private byte lineSpace = 0;
+ private byte outline = 0;
+ private byte reserved = 0;
+
+ private static final byte LS_EXACTLY = (byte)0xC0;
+ private static final byte LS_ATLEAST = (byte)0x80;
+ private static final byte LS_MULTIPLE = (byte)0x40;
+ private static final byte LS_VALUEMASK = (byte)0x3F;
+
+ private static final byte ALIGN_RIGHT = (byte)2;
+ private static final byte ALIGN_LEFT = (byte)0;
+ private static final byte ALIGN_CENTER = (byte)1;
+ private static final byte ALIGN_JUST = (byte)3;
+
+ private StyleCatalog sc = null;
+
+
+ /**
+ * Constructor for use when going from DOM to WordSmith.
+ *
+ * @param p The paragraph style.
+ * @param sc The <code>StyleCatalog</code>.
+ */
+ public WsePara(ParaStyle p, StyleCatalog sc) {
+ this.sc = sc;
+ ParaStyle ps = (ParaStyle)p.getResolved();
+
+ if (ps.isAttributeSet(ParaStyle.MARGIN_LEFT)) {
+ double temp = ps.getAttribute(ParaStyle.MARGIN_LEFT) * 1.6 / 100;
+ leftIndent = (byte) temp;
+ if ((temp - leftIndent) > 0.5) leftIndent++;
+ }
+
+ if (ps.isAttributeSet(ParaStyle.MARGIN_RIGHT)) {
+ double temp = ps.getAttribute(ParaStyle.MARGIN_RIGHT) * 1.6 / 100;
+ rightIndent = (byte) temp;
+ if ((temp - rightIndent) > 0.5) rightIndent++;
+ }
+
+ if (ps.isAttributeSet(ParaStyle.TEXT_INDENT)) {
+ double temp = ps.getAttribute(ParaStyle.TEXT_INDENT) * 1.6 / 100;
+ firstIndent = (byte) temp;
+ if ((temp - firstIndent) > 0.5) firstIndent++;
+ }
+
+ if (ps.isAttributeSet(ParaStyle.MARGIN_TOP)) {
+ double temp = ps.getAttribute(ParaStyle.MARGIN_TOP) * 1.6 / 100;
+ spaceBefore = (byte) temp;
+ if ((temp - spaceBefore) > 0.5) spaceBefore++;
+ }
+
+ if (ps.isAttributeSet(ParaStyle.MARGIN_BOTTOM)) {
+ double temp = ps.getAttribute(ParaStyle.MARGIN_BOTTOM) * 1.6 / 100;
+ spaceAfter = (byte) temp;
+ if ((temp - spaceAfter) > 0.5) spaceAfter++;
+ }
+
+ if (ps.isAttributeSet(ParaStyle.LINE_HEIGHT)) {
+ int lh = ps.getAttribute(ParaStyle.LINE_HEIGHT);
+ if ((lh & ~ParaStyle.LH_VALUEMASK) == 0)
+ lineSpace = (byte)(LS_MULTIPLE | (lh * 2));
+ else if ((lh & ParaStyle.LH_PCT) != 0) {
+ lh = (lh & ParaStyle.LH_VALUEMASK) / 100;
+ lineSpace = (byte)(LS_MULTIPLE | (lh * 2));
+ }
+ // DJP: handle other cases....
+ }
+
+ if (ps.isAttributeSet(ParaStyle.TEXT_ALIGN)) {
+
+ int val = ps.getAttribute(ParaStyle.TEXT_ALIGN);
+
+ switch (val) {
+ case ParaStyle.ALIGN_RIGHT:
+ misc = ALIGN_RIGHT;
+ break;
+ case ParaStyle.ALIGN_LEFT:
+ misc = ALIGN_LEFT;
+ break;
+ case ParaStyle.ALIGN_CENTER:
+ misc = ALIGN_CENTER;
+ break;
+ case ParaStyle.ALIGN_JUST:
+ misc = ALIGN_JUST;
+ break;
+ }
+ }
+
+ }
+
+
+ /**
+ * Constructor for use when going from WordSmith to DOM.
+ * Assumes <code>dataArray[startIndex]</code> is the first
+ * <code>byte</code> of a valid WordSmith paragraph descriptor.
+ *
+ * @param dataArray <code>byte</code> array.
+ * @param startIndex The start index.
+ */
+ public WsePara(byte dataArray[], int startIndex) {
+ spaceBefore = dataArray[startIndex + 1];
+ spaceAfter = dataArray[startIndex + 2];
+ leftIndent = dataArray[startIndex + 3];
+ firstIndent = dataArray[startIndex + 4];
+ rightIndent = dataArray[startIndex + 5];
+ misc = dataArray[startIndex + 6];
+ style = dataArray[startIndex + 7];
+ lineSpace = dataArray[startIndex + 8];
+ outline = dataArray[startIndex + 9];
+ }
+
+
+ /**
+ * Compute the index of the first <code>byte</code> following the
+ * paragraph descriptor, assuming that
+ * <code>dataArray[startIndex]</code> is the beginning of a valid
+ * paragraph descriptor.
+ *
+ * @param dataArray <code>byte</code> array.
+ * @param startIndex The start index.
+ *
+ * @return The index of the first <code>byte</code> following the
+ * paragraph description.
+ */
+ static int computeNewIndex(byte dataArray[], int startIndex) {
+ return startIndex + 13;
+ }
+
+
+ /**
+ * Return true if <code>dataArray[startIndex]</code> is the start
+ * of a valid paragraph descriptor.
+ *
+ * @param dataArray <code>byte</code> array.
+ * @param startIndex The start index.
+ *
+ * @return true if <code>dataArray[startIndex]</code> is the start
+ * of a valid paragraph descriptor, false otherwise.
+ */
+ static boolean isValid(byte dataArray[], int startIndex) {
+ return (dataArray[startIndex] == 5);
+ }
+
+ /**
+ * Return the number of bytes needed to represent this paragraph.
+ *
+ * @return The number of bytes needed to represent this paragraph.
+ */
+ int getByteCount() {
+ return 13;
+ }
+
+ /**
+ * Return an <code>byte</code> array representing this paragraph.
+ *
+ * @return An <code>byte</code> array representing this paragraph.
+ */
+ byte[] getBytes() {
+ byte b[] = new byte[13];
+
+ b[0] = 5;
+ b[1] = spaceBefore;
+ b[2] = spaceAfter;
+ b[3] = leftIndent;
+ b[4] = firstIndent;
+ b[5] = rightIndent;
+ b[6] = misc;
+ b[7] = style;
+ b[8] = lineSpace;
+ b[9] = outline;
+ b[10] = reserved;
+ b[11] = 0;
+ b[12] = 0;
+
+ return b;
+ }
+
+ /**
+ * Return a <code>ParaStyle</code> that reflects the formatting of
+ * this run.
+ *
+ * @return A <code>ParaStyle</code> that reflects the formatting
+ * of this run.
+ */
+ ParaStyle makeStyle() {
+ /* Csaba: Commented out the LINE_HEIGHT syle, because there was no
+ incoming data for that style. It was resulting a zero line
+ height in the xml document, ie. the doc looked empty.
+ */
+ int attrs[] = { ParaStyle.MARGIN_LEFT, ParaStyle.MARGIN_RIGHT,
+ ParaStyle.TEXT_INDENT, //ParaStyle.LINE_HEIGHT,
+ ParaStyle.MARGIN_TOP, ParaStyle.MARGIN_BOTTOM,
+ ParaStyle.TEXT_ALIGN };
+ String values[] = new String[attrs.length];
+ double temp;
+
+ temp = leftIndent / 1.6;
+ values[0] = (new Double(temp)).toString() + "mm";
+
+ temp = rightIndent / 1.6;
+ values[1] = (new Double(temp)).toString() + "mm";
+
+ temp = firstIndent / 1.6;
+ values[2] = (new Double(temp)).toString() + "mm";
+
+/* if ((lineSpace & LS_MULTIPLE) != 0) {
+ temp = (lineSpace & LS_VALUEMASK) / 2;
+ temp *= 100;
+ values[3] = (new Double(temp)).toString() + "%";
+ } else {
+ values[3] = (new Double(temp)).toString() + "mm";
+ // DJP: handle other cases
+ }
+*/
+ temp = spaceBefore / 1.6;
+// values[4] = (new Double(temp)).toString() + "mm";
+ values[3] = (new Double(temp)).toString() + "mm";
+
+ temp = spaceAfter / 1.6;
+// values[5] = (new Double(temp)).toString() + "mm";
+ values[4] = (new Double(temp)).toString() + "mm";
+
+ switch (misc) {
+
+// case ALIGN_RIGHT: values[6] = "right"; break;
+// case ALIGN_LEFT: values[6] = "left"; break;
+// case ALIGN_CENTER:values[6] = "center"; break;
+// case ALIGN_JUST: values[6] = "justified"; break;
+
+ case ALIGN_RIGHT: values[5] = "right"; break;
+ case ALIGN_LEFT: values[5] = "left"; break;
+ case ALIGN_CENTER:values[5] = "center"; break;
+ case ALIGN_JUST: values[5] = "justified"; break;
+ }
+ ParaStyle x = new ParaStyle(null, "paragraph", null, attrs,
+ values, sc);
+
+ return x;
+ }
+}
+
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WseTextRun.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WseTextRun.java
new file mode 100644
index 000000000000..b1b6fa934e78
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WseTextRun.java
@@ -0,0 +1,327 @@
+/************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2008 by Sun Microsystems, Inc.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * $RCSfile: WseTextRun.java,v $
+ * $Revision: 1.3 $
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+package org.openoffice.xmerge.converter.xml.sxw.wordsmith;
+
+import org.openoffice.xmerge.converter.xml.TextStyle;
+import org.openoffice.xmerge.converter.xml.StyleCatalog;
+import java.awt.Color;
+
+/**
+ * <p>This class represents a text run (aka text atom) in a WordSmith
+ * document.</p>
+ *
+ * <p>WordSmith represents a text run as follows:</p>
+ *
+ * <p><ul><li>
+ * 1 byte Value of "1", indicating beginning of a text atom
+ * </li><li>
+ * 2 bytes Length of text (does not include attributes, this length field,
+ * etc)
+ * </li><li>
+ * 1 byte Font index - Index in the font table of font to be used
+ * </li><li>
+ * 1 byte Font size (DJP: get details of representation)
+ * </li><li>
+ * 1 byte Color index - Index in the color table of font color to be used
+ * </li><li>
+ * 1 byte Modifiers - bit flags for bold, italic, etc
+ * </li><li>
+ * n bytes Text - the actual text
+ * </li></ul></p>
+ *
+ * @author David Proulx
+ */
+class WseTextRun extends Wse {
+
+ /** Font specifier. This is an index into the font table. */
+ private byte fontIndex = 0;
+ private String fontName = null;
+
+ /** Size of the font. */
+ private byte fontSize = 0;
+
+ /**
+ * Color of the font. This is an index into the color table.
+ * High nibble is background color index, low nibble is font color
+ * index.
+ */
+ private byte colorIndex = 0;
+
+ /**
+ * Reference to color table for color lookups.
+ */
+ private WseColorTable ct;
+
+ /**
+ * The modifiers for the text run. (Mostly) Bitwise flags. The "_TOKEN"
+ * values are not yet implemented in this converter. They may not even
+ * be implemented in WordSmith yet.
+ */
+ private byte modifiers = 0;
+ final public static int BOLD = 0x01;
+ final public static int ITALIC = 0x02;
+ final public static int UNDERLINE = 0x04;
+ final public static int STRIKETHRU = 0x08;
+ final public static int SUPERSCRIPT = 0x10;
+ final public static int SUBSCRIPT = 0x20;
+ final public static int LINK = 0x40;
+ final public static int CUSTOM_TOKEN = 0x80;
+ final public static int IMAGE_TOKEN = 0x80;
+ final public static int BOOKMARK_TOKEN = 0x81;
+ final public static int ANNOTATION_TOKEN = 0x82;
+ final public static int LINK_TOKEN = 0x83;
+
+ /** The actual text. */
+ private String text;
+
+ StyleCatalog sc;
+
+
+ /**
+ * Constructor for use when going from DOM to WordSmith.
+ *
+ * @param txt The text.
+ * @param t The text style.
+ * @param sc The <code>StyleCatalog</code>.
+ * @param ft The font table.
+ * @param ct The color Table.
+ */
+ public WseTextRun(String txt, TextStyle t, StyleCatalog sc,
+ WseFontTable ft, WseColorTable ct) {
+
+ this.sc = sc;
+ this.ct = ct;
+
+ TextStyle ts = (TextStyle)t.getResolved();
+
+ if (ts.isSet(TextStyle.BOLD) && ts.getAttribute(TextStyle.BOLD))
+ modifiers |= BOLD;
+ if (ts.isSet(TextStyle.ITALIC) && ts.getAttribute(TextStyle.ITALIC))
+ modifiers |= ITALIC;
+ if (ts.isSet(TextStyle.UNDERLINE) && ts.getAttribute(TextStyle.UNDERLINE))
+ modifiers |= UNDERLINE;
+ if (ts.isSet(TextStyle.STRIKETHRU) && ts.getAttribute(TextStyle.STRIKETHRU))
+ modifiers |= STRIKETHRU;
+ if (ts.isSet(TextStyle.SUPERSCRIPT) && ts.getAttribute(TextStyle.SUPERSCRIPT))
+ modifiers |= SUPERSCRIPT;
+ if (ts.isSet(TextStyle.SUBSCRIPT) && ts.getAttribute(TextStyle.SUBSCRIPT))
+ modifiers |= SUBSCRIPT;
+
+ fontSize = (byte)(ts.getFontSize() * 2);
+ fontName = ts.getFontName();
+ fontIndex = (byte)ft.getFontIndex(fontName);
+ if (fontIndex == -1) {
+ ft.add(fontName);
+ fontIndex = (byte)ft.getFontIndex(fontName);
+ }
+
+ // Figure out the color index.
+ Color c = t.getFontColor();
+ if (c == null)
+ c = Color.black;
+ colorIndex = (byte)ct.findColor(c, true);
+ c = t.getBackgroundColor();
+ if (c == null)
+ c = Color.white;
+ colorIndex |= (byte)(ct.findColor(c, false) << 4);
+
+ text = txt;
+ }
+
+
+ /**
+ * Standard constructor for use when going from WordSmith to DOM.
+ *
+ * @param dataArray <code>byte</code> array.
+ * @param startIndex The start index.
+ * @param ft The font table.
+ * @param ct The color table.
+ */
+ public WseTextRun(byte dataArray[], int startIndex, WseFontTable ft,
+ WseColorTable ct) {
+
+ this.ct = ct;
+
+ startIndex++; // Skip the leading "1"
+
+ int textLen = ((dataArray[startIndex] << 8)
+ | (dataArray[startIndex+1] & 0xFF));
+ startIndex += 2;
+
+ fontIndex = dataArray[startIndex++];
+ if (ft != null)
+ fontName = ft.getFontName(fontIndex);
+
+ fontSize = dataArray[startIndex++];
+
+ colorIndex = dataArray[startIndex++];
+ modifiers = dataArray[startIndex++];
+
+ text = new String(dataArray, startIndex, textLen);
+ startIndex += textLen; // skip the text
+ }
+
+
+ /**
+ * Given a <code>byte</code> sequence, assumed to be a text run,
+ * compute the index of the first byte past the text run.
+ *
+ * @param dataArray <code>byte</code> array.
+ * @param startIndex The start index
+ *
+ * @return The index of the first <code>byte</code> past the
+ * text run.
+ */
+ public static int computeNewIndex(byte dataArray[], int startIndex) {
+
+ startIndex++; // Skip the leading "1"
+
+ int textLen = ((dataArray[startIndex] << 8)
+ | (dataArray[startIndex+1] & 0xFF));
+ startIndex += 2;
+
+ startIndex += 4; // skip attributes
+ // text = new String(dataArray, startIndex, textLen);
+ startIndex += textLen; // skip the text
+ return startIndex;
+ }
+
+
+ /**
+ * Return true if the sequence starting at
+ * <code>dataArray[startIndex]</code> is a valid text run.
+ *
+ * @param dataArray <code>byte</code> array.
+ * @param startIndex The start index.
+ *
+ * @return true if the sequence starting at
+ * <code>dataArray[startIndex]</code> is a valid
+ * text run, false otherwise.
+ */
+ public static boolean isValid(byte dataArray[], int startIndex) {
+ return (dataArray[startIndex] == 1);
+ }
+
+ /**
+ * Return the number of bytes needed to represent this text run.
+ *
+ * @return The number of bytes needed to represent this text run.
+ */
+ int getByteCount() {
+ return text.length() + 7;
+ }
+
+
+ /**
+ * Return an <code>byte</code> array representing this text run.
+ *
+ * @return An <code>byte</code> array representing this text run.
+ */
+ byte[] getBytes() {
+ short textLen = (short)text.length();
+ byte b[] = new byte[textLen + 7];
+ b[0] = 1;
+ b[1] = (byte)(textLen >> 8);
+ b[2] = (byte)(textLen & 0xFF);
+ b[3] = fontIndex;
+ b[4] = fontSize;
+ b[5] = colorIndex;
+ b[6] = modifiers;
+ byte[] txtBytes = text.getBytes();
+ System.arraycopy(txtBytes, 0, b, 7, textLen);
+ return b;
+ }
+
+
+ /**
+ * Return the text of this run.
+ *
+ * @return The text of this run.
+ */
+ public String getText() {
+ return text;
+ }
+
+
+ /**
+ * Return a <code>TextStyle</code> that reflects the formatting
+ * of this run.
+ *
+ * @return A <code>TextStyle</code> that reflects the formatting
+ * of this run.
+ */
+ public TextStyle makeStyle() {
+ int mod = 0;
+ if ((modifiers & BOLD) != 0) mod |= TextStyle.BOLD;
+ if ((modifiers & ITALIC) != 0) mod |= TextStyle.ITALIC;
+ if ((modifiers & UNDERLINE) != 0) mod |= TextStyle.UNDERLINE;
+ if ((modifiers & STRIKETHRU) != 0)
+ mod |= TextStyle.STRIKETHRU;
+ if ((modifiers & SUPERSCRIPT) != 0) mod |= TextStyle.SUPERSCRIPT;
+ if ((modifiers & SUBSCRIPT) != 0) mod |= TextStyle.SUBSCRIPT;
+
+ int mask = TextStyle.BOLD | TextStyle.ITALIC
+ | TextStyle.UNDERLINE
+ | TextStyle.STRIKETHRU | TextStyle.SUPERSCRIPT
+ | TextStyle.SUBSCRIPT;
+
+ TextStyle x = new TextStyle(null, "text", null, mask,
+ mod, (int)(fontSize/2), fontName, sc);
+
+ // If color table is available, set the colors.
+ if (ct != null) {
+ Color fc = ct.getColor(colorIndex & 0xF, true);
+ Color bc = ct.getColor(colorIndex >> 4, false);
+ x.setColors(fc, bc);
+ }
+
+ return x;
+ }
+
+
+ /**
+ * Display debug information.
+ */
+ public void dump() {
+ System.out.print("TEXT RUN: fontIndex = " + fontIndex
+ + " fontsize = " + fontSize
+ + " colorIndex = " + colorIndex
+ + " ");
+ if ((modifiers & BOLD) != 0) System.out.print("BOLD,");
+ if ((modifiers & ITALIC) != 0) System.out.print("ITALIC,");
+ if ((modifiers & UNDERLINE) != 0) System.out.print("UNDERLINE,");
+ if ((modifiers & STRIKETHRU) != 0) System.out.print("STRIKETHRU,");
+ if ((modifiers & SUPERSCRIPT) != 0) System.out.print("SUPERSCRIPT,");
+ if ((modifiers & SUBSCRIPT) != 0) System.out.print("SUBSCRIPT,");
+ System.out.println("\n" + text);
+ }
+}
+
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/build.xml b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/build.xml
new file mode 100644
index 000000000000..5b48ae6ddc1e
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/build.xml
@@ -0,0 +1,145 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+
+ DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+
+ Copyright 2008 by Sun Microsystems, Inc.
+
+ OpenOffice.org - a multi-platform office productivity suite
+
+ $RCSfile: build.xml,v $
+
+ $Revision: 1.3 $
+
+ This file is part of OpenOffice.org.
+
+ OpenOffice.org is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License version 3
+ only, as published by the Free Software Foundation.
+
+ OpenOffice.org is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Lesser General Public License version 3 for more details
+ (a copy is included in the LICENSE file that accompanied this code).
+
+ You should have received a copy of the GNU Lesser General Public License
+ version 3 along with OpenOffice.org. If not, see
+ <http://www.openoffice.org/license.html>
+ for a copy of the LGPLv3 License.
+
+-->
+<project name="xmrg_jooxcxs_wordsmith" default="main" basedir=".">
+
+ <!-- ================================================================= -->
+ <!-- settings -->
+ <!-- ================================================================= -->
+
+ <!-- project prefix, used for targets and build.lst -->
+ <property name="prj.prefix" value="xmrg"/>
+
+ <!-- name of this sub target used in recursive builds -->
+ <property name="target" value="xmrg_jooxcxs_wordsmith"/>
+
+ <!-- relative path to project directory -->
+ <property name="prj" value="../../../../../../../.."/>
+
+ <!-- start of java source code package structure -->
+ <property name="java.dir" value="${prj}/java"/>
+
+ <!-- path component for current java package -->
+ <property name="package"
+ value="org/openoffice/xmerge/converter/xml/sxw/wordsmith"/>
+
+ <!-- define how to handle CLASSPATH environment -->
+ <property name="build.sysclasspath" value="ignore"/>
+
+ <!-- classpath settings for javac tasks -->
+ <path id="classpath">
+ <pathelement location="${build.class}"/>
+ <pathelement location="${solar.jar}/parser.jar"/>
+ <pathelement location="${solar.jar}/jaxp.jar"/>
+ </path>
+
+ <!-- set wether we want to compile with or without deprecation -->
+ <property name="deprecation" value="on"/>
+
+ <!-- ================================================================= -->
+ <!-- solar build environment targets -->
+ <!-- ================================================================= -->
+
+ <target name="build_dir" unless="build.dir">
+ <property name="build.dir" value="${out}"/>
+ </target>
+
+ <target name="solar" depends="build_dir" if="solar.update">
+ <property name="solar.properties"
+ value="${solar.bin}/solar.properties"/>
+ </target>
+
+ <target name="init" depends="solar">
+ <property name="build.compiler" value="classic"/>
+ <property file="${solar.properties}"/>
+ <property file="${build.dir}/class/solar.properties"/>
+ </target>
+
+ <target name="info">
+ <echo message="--------------------"/>
+ <echo message="${target}"/>
+ <echo message="--------------------"/>
+ </target>
+
+
+ <!-- ================================================================= -->
+ <!-- custom targets -->
+ <!-- ================================================================= -->
+
+ <!-- the main target, called in recursive builds -->
+ <target name="main" depends="info,prepare,compile"/>
+
+ <!-- prepare output directories -->
+ <target name="prepare" depends="init" if="build.class">
+ <mkdir dir="${build.dir}"/>
+ <mkdir dir="${build.class}"/>
+ </target>
+
+ <!-- compile java sources in ${package} -->
+ <target name="compile" depends="prepare" if="build.class">
+ <javac srcdir="${java.dir}"
+ destdir="${build.class}"
+ debug="${debug}"
+ deprecation="${deprecation}"
+ optimize="${optimize}">
+ <classpath refid="classpath"/>
+ <include name="${package}/DOCConstants.java"/>
+ <include name="${package}/textRecord.java"/>
+ <include name="${package}/util.java"/>
+ <include name="${package}/WSDecoder.java"/>
+ <include name="${package}/WseColorTable.java"/>
+ <include name="${package}/WseFontTable.java"/>
+ <include name="${package}/Wse.java"/>
+ <include name="${package}/WseHeader.java"/>
+ <include name="${package}/WSEncoder.java"/>
+ <include name="${package}/WsePara.java"/>
+ <include name="${package}/WseTextRun.java"/>
+ <include name="${package}/DocumentMergerImpl.java"/>
+ <include name="${package}/DocumentSerializerImpl.java"/>
+ <include name="${package}/DocumentDeserializerImpl.java"/>
+ <include name="${package}/ConverterCapabilitiesImpl.java"/>
+ <include name="${package}/PluginFactoryImpl.java"/>
+ </javac>
+ </target>
+
+ <!-- clean up -->
+ <target name="clean" depends="prepare">
+ <delete includeEmptyDirs="true">
+ <fileset dir="${build.class}">
+ <patternset>
+ <include name="${package}/*.class"/>
+ </patternset>
+ </fileset>
+ </delete>
+ </target>
+
+</project>
+
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/converter.xml b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/converter.xml
new file mode 100644
index 000000000000..9285730569db
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/converter.xml
@@ -0,0 +1,17 @@
+<?xml version="1.0"?>
+<converters>
+ <converter type="staroffice/sxw" version="1.0">
+ <converter-display-name>
+ WordSmith
+ </converter-display-name>
+ <converter-description>
+ StarWriter XML to/from WordSmith conversion
+ </converter-description>
+ <converter-vendor>OpenOffice.org</converter-vendor>
+ <converter-class-impl>
+ org.openoffice.xmerge.converter.xml.sxw.wordsmith.PluginFactoryImpl
+ </converter-class-impl>
+ <converter-target type="application/x-wordsmith" />
+ </converter>
+</converters>
+
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/makefile.mk b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/makefile.mk
new file mode 100644
index 000000000000..6cfbb307ba85
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/makefile.mk
@@ -0,0 +1,36 @@
+#*************************************************************************
+#
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# Copyright 2008 by Sun Microsystems, Inc.
+#
+# OpenOffice.org - a multi-platform office productivity suite
+#
+# $RCSfile: makefile.mk,v $
+#
+# $Revision: 1.3 $
+#
+# This file is part of OpenOffice.org.
+#
+# OpenOffice.org is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License version 3
+# only, as published by the Free Software Foundation.
+#
+# OpenOffice.org is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Lesser General Public License version 3 for more details
+# (a copy is included in the LICENSE file that accompanied this code).
+#
+# You should have received a copy of the GNU Lesser General Public License
+# version 3 along with OpenOffice.org. If not, see
+# <http://www.openoffice.org/license.html>
+# for a copy of the LGPLv3 License.
+#
+#*************************************************************************
+PRJNAME=converter
+TARGET=cv_jcsscdcxs_wordsmith
+PRJ=../../../../../../../../../..
+
+.INCLUDE : ant.mk
+ALLTAR: ANTBUILD
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/textRecord.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/textRecord.java
new file mode 100644
index 000000000000..0083899dce0e
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/textRecord.java
@@ -0,0 +1,118 @@
+/************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2008 by Sun Microsystems, Inc.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * $RCSfile: textRecord.java,v $
+ * $Revision: 1.3 $
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+package org.openoffice.xmerge.converter.xml.sxw.wordsmith;
+
+import org.openoffice.xmerge.util.Debug;
+import java.io.IOException;
+import java.io.DataOutputStream;
+import java.io.ByteArrayOutputStream;
+
+/**
+ * This class represents a single text record in a WordSmith document.
+ * A record is composed of one or more "WordSmith elements", which
+ * include: WordSmith header, font table, color table, paragraphs,
+ * and text runs.
+ *
+ * @author David Proulx
+ */
+
+class textRecord {
+
+ java.util.Vector elements;
+
+
+ /**
+ * Default constructor
+ */
+ textRecord() {
+ elements = new java.util.Vector(10);
+ }
+
+
+ /**
+ * Add an element
+ *
+ * @param elem The element to add
+ */
+ void addElement(Wse elem) {
+ elements.add(elem);
+ }
+
+
+ /**
+ * Return the number of bytes needed to represent the current
+ * contents of this text record.
+ *
+ * @return The number of bytes needed to represent the current
+ * contents of this text record.
+ */
+ int getByteCount() {
+ int totalBytes = 0;
+ int nElements = elements.size();
+ for (int i = 0; i < nElements; i++) {
+ Wse e = (Wse)elements.elementAt(i);
+ totalBytes += e.getByteCount();
+ }
+ return totalBytes;
+ }
+
+
+ /**
+ * Return the contents of this record as a <code>byte</code> array.
+ *
+ * @return the contents of this record as a <code>byte</code> array.
+ */
+ byte[] getBytes() {
+ DataOutputStream os = null; // Used for storing the data
+ ByteArrayOutputStream bs = null; // Used for storing the data
+ byte ftBytes[] = null;
+ byte ctBytes[] = null;
+
+ try {
+ bs = new ByteArrayOutputStream();
+ os = new DataOutputStream(bs);
+ int nElements = elements.size();
+ for (int i = 0; i < nElements; i++) {
+ Wse e = (Wse)elements.get(i);
+ os.write(e.getBytes());
+ }
+
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+
+ if (bs != null)
+ return bs.toByteArray();
+ else
+ return null;
+ }
+}
+
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/util.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/util.java
new file mode 100644
index 000000000000..d123c0a72b56
--- /dev/null
+++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/util.java
@@ -0,0 +1,71 @@
+/************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2008 by Sun Microsystems, Inc.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * $RCSfile: util.java,v $
+ * $Revision: 1.3 $
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+package org.openoffice.xmerge.converter.xml.sxw.wordsmith;
+
+/**
+ * WordSmith utility class.
+ *
+ * @author David Proulx
+ */
+class util {
+
+ /**
+ * Convert 2 bytes to an integer.
+ *
+ * @param data <code>byte</code> data to convert.
+ * @param index Index to convert.
+ *
+ * @return Converted integer.
+ */
+ static int intFrom2bytes(byte[] data, int index) {
+ return (((data[index] & 0xFF) << 8)
+ | (data[index+1] & 0xFF));
+
+ }
+
+
+ /**
+ * Convert 4 bytes to an integer.
+ *
+ * @param data <code>byte</code> data to convert.
+ * @param index Index to convert.
+ *
+ * @return Converted integer.
+ */
+ static int intFrom4bytes(byte[] data, int index) {
+ return (((data[index] & 0xFF) << 24)
+ | ((data[index + 1] & 0xFF) << 16)
+ | ((data[index + 2] & 0xFF) << 8)
+ | (data[index+3] & 0xFF));
+
+ }
+}
+