diff options
Diffstat (limited to 'xmerge/java/org/openoffice/xmerge/converter/xml/sxw')
49 files changed, 9489 insertions, 0 deletions
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/SxwDocument.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/SxwDocument.java new file mode 100644 index 000000000000..839a63e8bec9 --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/SxwDocument.java @@ -0,0 +1,95 @@ +/************************************************************************ + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw; + +import org.w3c.dom.Document; +import org.openoffice.xmerge.converter.xml.OfficeDocument; +import org.openoffice.xmerge.converter.xml.OfficeConstants; + +/** + * This class is an implementation of <code>OfficeDocument</code> for + * the SXW format. + */ +public class SxwDocument extends OfficeDocument { + + + /** + * Constructor with arguments to set <code>name</code>. + * + * @param name The name of the <code>Document</code> + */ + public SxwDocument(String name) { + super(name); + } + + + /** + * Constructor with arguments to set <code>name</code>, the + * <code>namespaceAware</code> flag, and the <code>validating</code> + * flag. + * + * @param name The name of the <code>Document</code>. + * @param namespaceAware The value of the namespaceAware flag. + * @param validating The value of the validating flag. + */ + public SxwDocument(String name, boolean namespaceAware, boolean validating) { + + super(name, namespaceAware, validating); + } + + + /** + * Returns the Office file extension for the SXW format. + * + * @return The Office file extension for the SXW format. + */ + protected String getFileExtension() { + return OfficeConstants.SXW_FILE_EXTENSION; + } + + + /** + * Returns the Office attribute for the SXW format. + * + * @return The Office attribute for the SXW format. + */ + protected String getOfficeClassAttribute() { + return OfficeConstants.SXW_TYPE; + } + + /** + * Method to return the MIME type of the document. + * + * @return String The document's MIME type. + */ + protected final String getDocumentMimeType() { + return OfficeConstants.SXW_MIME_TYPE; + } + +} + diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/SxwPluginFactory.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/SxwPluginFactory.java new file mode 100644 index 000000000000..23e2608c242e --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/SxwPluginFactory.java @@ -0,0 +1,78 @@ +/************************************************************************ + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw; + +import java.io.InputStream; +import java.io.IOException; + +import org.openoffice.xmerge.ConvertData; +import org.openoffice.xmerge.PluginFactory; +import org.openoffice.xmerge.PluginFactory; +import org.openoffice.xmerge.Document; +import org.openoffice.xmerge.util.registry.ConverterInfo; + +/** + * General implementation of the <code>PluginFactory</code> interface + * for SXW documents. + * + * @see org.openoffice.xmerge.DocumentDeserializer + * @see org.openoffice.xmerge.DocumentMerger + * @see org.openoffice.xmerge.DocumentSerializer + */ +public abstract class SxwPluginFactory extends PluginFactory { + + /** + * Constructor that caches the <code>ConvertInfo</code> that + * corresponds to the registry information for this plug-in. + * + * @param ci <code>ConvertInfo</code> object. + */ + public SxwPluginFactory (ConverterInfo ci) { + super(ci); + } + + + public Document createOfficeDocument(String name, InputStream is) + throws IOException { + + // read zipped XML stream + SxwDocument doc = new SxwDocument(name); + doc.read(is); + return doc; + } + + public Document createOfficeDocument(String name, InputStream is,boolean isZip) + throws IOException { + + // read XML stream + SxwDocument doc = new SxwDocument(name); + doc.read(is,isZip); + return doc; + } +} + diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/ConverterCapabilitiesImpl.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/ConverterCapabilitiesImpl.java new file mode 100644 index 000000000000..671ae420bcfc --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/ConverterCapabilitiesImpl.java @@ -0,0 +1,93 @@ +/************************************************************************ + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.aportisdoc; + +import org.openoffice.xmerge.ConverterCapabilities; +import org.openoffice.xmerge.converter.xml.OfficeConstants; + +/** + * <p>AportisDoc implementation of <code>ConverterCapabilities</code> for + * the {@link + * org.openoffice.xmerge.converter.xml.sxw.aportisdoc.PluginFactoryImpl + * PluginFactoryImpl}.</p> + * + * <p>Used with StarWriter XML to/from AportisDoc conversions. The + * <code>ConverterCapibilies</code> specify which "Office" + * <code>Document</code> tags and attributes are supported on the + * "Device" <code>Document</code> format.</p> + */ +public final class ConverterCapabilitiesImpl + implements ConverterCapabilities { + + public boolean canConvertTag(String tag) { + + if (OfficeConstants.TAG_OFFICE_DOCUMENT.equals(tag)) + return true; + else if (OfficeConstants.TAG_OFFICE_DOCUMENT_CONTENT.equals(tag)) + return true; + else if (OfficeConstants.TAG_OFFICE_BODY.equals(tag)) + return true; + else if (OfficeConstants.TAG_PARAGRAPH.equals(tag)) + return true; + else if (OfficeConstants.TAG_HEADING.equals(tag)) + return true; + else if (OfficeConstants.TAG_ORDERED_LIST.equals(tag)) + return true; + else if (OfficeConstants.TAG_UNORDERED_LIST.equals(tag)) + return true; + else if (OfficeConstants.TAG_LIST_ITEM.equals(tag)) + return true; + else if (OfficeConstants.TAG_LIST_HEADER.equals(tag)) + return true; + else if (OfficeConstants.TAG_SPAN.equals(tag)) + return true; + else if (OfficeConstants.TAG_HYPERLINK.equals(tag)) + return true; + else if (OfficeConstants.TAG_LINE_BREAK.equals(tag)) + return true; + else if (OfficeConstants.TAG_SPACE.equals(tag)) + return true; + else if (OfficeConstants.TAG_TAB_STOP.equals(tag)) + return true; + + return false; + } + + public boolean canConvertAttribute(String tag, + String attribute) { + + if (OfficeConstants.TAG_SPACE.equals(tag)) { + + if (OfficeConstants.ATTRIBUTE_SPACE_COUNT.equals(attribute)) + return true; + } + + return false; + } +} + diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocConstants.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocConstants.java new file mode 100644 index 000000000000..86627c6d7ed3 --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocConstants.java @@ -0,0 +1,69 @@ +/************************************************************************ + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.aportisdoc; + +import org.openoffice.xmerge.converter.palm.PdbUtil; + +/** + * Constants used for encoding and decoding the AportisDoc format. + * + * @author Herbie Ong + */ +interface DocConstants { + + /** Creator id. */ + public static final int CREATOR_ID = PdbUtil.intID("REAd"); + + /** Type id. */ + public static final int TYPE_ID = PdbUtil.intID("TEXt"); + + /** Constant for uncompressed version. */ + public static final short UNCOMPRESSED = 1; + + /** Constant for compressed version. */ + public static final short COMPRESSED = 2; + + /** Constant used for spare fields. */ + public static final int SPARE = 0; + + /** AportisDoc record size. */ + public static final short TEXT_RECORD_SIZE = 4096; + + /** Constant for encoding scheme. */ + public static final String ENCODING = "8859_1"; + + /** Constant for TAB character. */ + public final static char TAB_CHAR = '\t'; + + /** Constant for EOL character. */ + public final static char EOL_CHAR = '\n'; + + /** Constant for SPACE character. */ + public final static char SPACE_CHAR = ' '; +} + diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocDecoder.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocDecoder.java new file mode 100644 index 000000000000..9651e5b10b4d --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocDecoder.java @@ -0,0 +1,304 @@ +/************************************************************************ + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.aportisdoc; + +import java.io.ByteArrayInputStream; +import java.io.DataInputStream; +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.util.List; +import java.util.ArrayList; + +import org.openoffice.xmerge.converter.palm.Record; +import org.openoffice.xmerge.util.Resources; +import org.openoffice.xmerge.util.Debug; + +/** + * This class is used by {@link + * org.openoffice.xmerge.converter.xml.sxw.DocumentDeserializerImpl} + * to decode the AportisDoc format. It currently decodes + * the text content into a single <code>String</code> object. + * + * @author Herbie Ong + */ +final class DocDecoder implements DocConstants { + + /** For decoding purposes. */ + private final static int COUNT_BITS = 3; + + /** Resources object for I18N. */ + private Resources res = null; + + + /** + * Default constructor creates a header and a text buffer + * for holding all the text in the AportisDoc database. + */ + DocDecoder() { + res = Resources.getInstance(); + } + + + /** + * Decode the text records into a single <code>String</code> + * of text content. + * + * @param Record <code>Record</code> array holding AportisDoc + * contents. + * + * @throws IOException If any I/O error occurs. + */ + String parseRecords(Record[] recs) throws IOException { + + // read the header record + HeaderInfo header = readHeader(recs[0].getBytes()); + + dumpHeader(header); + + // store all the characters in textBuffer + StringBuffer textBuffer = new StringBuffer(header.textLen); + + switch (header.version) { + + case COMPRESSED: + for (int i = 1; i <= header.textRecordCount; i++) { + + byte[] bytes = decompress(recs[i].getBytes(), + header.textRecordSize); + log("processing " + bytes.length + " bytes"); + String str = new String(bytes, ENCODING); + textBuffer.append(str); + } + + break; + + case UNCOMPRESSED: + for (int i = 1; i <= header.textRecordCount; i++) { + + byte[] bytes = recs[i].getBytes(); + log("processing " + bytes.length + " bytes"); + String str = new String(bytes, ENCODING); + textBuffer.append(str); + } + + break; + + default: + throw new IOException(res.getString("UNKNOWN_DOC_VERSION")); + + } + + return textBuffer.toString(); + } + + + /** + * <p>Decompress the <code>byte</code> array.</p> + * + * <p>The resulting uncompressed <code>byte</code> array should + * be within <code>textRecordSize</code> length, definitely + * within twice the size it claims, else treat it as a problem + * with the encoding of that PDB and throw + * <code>IOException</code>.</p> + * + * @param bytes Compressed <code>byte</code> array. + * @param textRecordSize Size of uncompressed + * <code>byte</code> array. + * + * @throws IOException If <code>textRecordSize</code> < + * <code>cBytes.length</code>. + */ + private byte[] decompress(byte[] cBytes, int textRecordSize) + throws IOException { + + // create byte array for storing uncompressed bytes + // it should be within textRecordSize range, definitely + // within twice of textRecordSize! if not, then + // an ArrayIndexOutOfBoundsException will get thrown, + // and it should be converted into an IOException, and + // treat it as a conversion error. + byte[] uBytes = new byte[textRecordSize*2]; + + int up = 0; + int cp = 0; + + try { + + while (cp < cBytes.length) { + + int c = cBytes[cp++] & 0xff; + + // codes 1...8 mean copy that many bytes + if (c > 0 && c < 9) { + + while (c-- > 0) + uBytes[up++] = cBytes[cp++]; + } + + // codes 0, 9...0x7F represent themselves + else if (c < 0x80) { + uBytes[up++] = (byte) c; + } + + // codes 0xC0...0xFF represent "space + ascii char" + else if (c >= 0xC0) { + uBytes[up++] = (byte) ' '; + uBytes[up++] = (byte) (c ^ 0x80); + } + + // codes 0x80...0xBf represent sequences + else { + c <<= 8; + c += cBytes[cp++] & 0xff; + int m = (c & 0x3fff) >> COUNT_BITS; + int n = c & ((1 << COUNT_BITS) - 1); + n += COUNT_BITS; + while (n-- > 0) { + uBytes[up] = uBytes[up - m]; + up++; + } + } + } + + } catch (ArrayIndexOutOfBoundsException e) { + + throw new IOException( + res.getString("DOC_TEXT_RECORD_SIZE_EXCEEDED")); + } + + // note that ubytes may be larger that the amount of + // uncompressed bytes, so trim it to another byte array + // with the exact size. + byte[] textBytes = new byte[up]; + System.arraycopy(uBytes, 0, textBytes, 0, up); + + return textBytes; + } + + + /** + * Read the header <code>byte</code> array. + * + * @param bytes <code>byte</code> array containing header + * record data. + * + * @return <code>HeaderInfo</code> object. + * + * @throws IOException If any I/O error occurs. + */ + private HeaderInfo readHeader(byte[] bytes) throws IOException { + + HeaderInfo header = new HeaderInfo(); + + ByteArrayInputStream bis = new ByteArrayInputStream(bytes); + DataInputStream dis = new DataInputStream(bis); + + // Normally the first 2 bytes comprised of the version + // which should either be COMPRESSED or UNCOMPRESSED + // SmartDoc/Quickword would add a 0x01 to the first + // byte, thus their version would be 0x0101 for UNCOMPRESSED + // instead of 0x0001 and 0x0102 for UNCOMPRESSED instead of + // 0x0002. + + dis.readByte(); + header.version = dis.readByte(); + + // read extra 2 unused bytes + dis.readShort(); + + // Read the text length, this should be unsigned 4 bytes. + // We could store the read value into a long, but then + // our current buffer limit is the max positive of an int. + // That is a large enough limit, thus we shall stay with + // storing the value in an int. If it exceeds, then + // an IOException should be thrown. + header.textLen = dis.readInt(); + if (header.textLen < 0) { + throw new IOException(res.getString("DOC_TEXT_LENGTH_EXCEEDED")); + } + + // read the number of records - unsigned 2 bytes + header.textRecordCount = ((int) dis.readShort()) & 0x0000ffff; + + // read the record size - unsigned 2 bytes + header.textRecordSize = ((int) dis.readShort()) & 0x0000ffff; + + // read extra 4 unused bytes + dis.readInt(); + + return header; + } + + + /** + * Prints out header info into log. Used for debugging purposes only. + * + * @param header <code>HeaderInfo</code> structure. + */ + private void dumpHeader(HeaderInfo header) { + + log("<DOC_INFO "); + log("version=\"" + header.version + "\" "); + log("text-length=\"" + header.textLen + "\" "); + log("number-of-records=\"" + header.textRecordCount + "\" "); + log("record-size=\"" + header.textRecordSize + "\" />"); + } + + + /** + * Sends message to the log object. + * + * @param str Debug string message. + */ + private void log(String str) { + Debug.log(Debug.TRACE, str); + } + + + /** + * Inner class to store AportisDoc header information. + */ + private class HeaderInfo { + + /** length of text section */ + int textLen = 0; + + /** number of text records */ + int textRecordCount = 0; + + /** + * size of a text record. This is normally the same as + * TEXT_RECORD_SIZE, but some applications may modify this. + */ + int textRecordSize = 0; + + /** compression type */ + int version = 0; + } +} + diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocEncoder.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocEncoder.java new file mode 100644 index 000000000000..90cf0e5cd1f1 --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocEncoder.java @@ -0,0 +1,214 @@ +/************************************************************************ + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.aportisdoc; + +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.util.List; +import java.util.ArrayList; + +import org.openoffice.xmerge.converter.palm.Record; + +/** + * This class is used by {@link + * org.openoffice.xmerge.converter.xml.sxw.DocumentSerializerImpl + * DocumentSerializerImpl} to encode the AportisDoc format. + * It does not deal with any XML tags. It only knows how to encode + * from <code>String</code>. + * + * @author Herbie Ong + */ +final class DocEncoder implements DocConstants { + + /** Text buffer to contain text section. */ + private StringBuffer textBuffer = null; + + /** Length of text section. */ + private int textLen = 0; + + /** Number of text records. */ + private int textRecCount = 0; + + + /** + * Default constructor creates a header and + * a text buffer for holding all the text in + * the AportisDoc database. + */ + DocEncoder() { + + textBuffer = new StringBuffer(TEXT_RECORD_SIZE); + } + + + /** + * This method appends text into the text section of + * the AportisDoc database. + * + * @param text <code>String</code> to append. + */ + void addText(String text) { + + textBuffer.append(text); + } + + + /** + * This method appends text into the text section of + * the AportisDoc database. + * + * @param text <code>char</code> array to append. + */ + void addText(char[] text) { + + textBuffer.append(text); + } + + + /** + * This method appends text character into the text + * section of the AportisDoc database. + * + * @param text <code>char</code> to append. + */ + void addText(char text) { + + textBuffer.append(text); + } + + + /** + * This method encodes the information given to a + * palm <code>Record</code> array in the AportisDoc + * database format. + * + * @return <code>Record</code> array holding AportisDoc + * contents. + * + * @throws IOException If any I/O error occurs. + */ + Record[] getRecords() throws IOException { + + byte textBytes[] = processTextBuffer(); + textLen = textBytes.length; + textRecCount = (short) (textBytes.length / TEXT_RECORD_SIZE); + + // recBytes to hold a record of bytes at a time + byte recBytes[] = new byte[TEXT_RECORD_SIZE]; + int pos = 0; + + List textRecords = new ArrayList(textRecCount + 1); + + // split textBytes into chunks of Record objects + // and store in textRecords object. + for (int i = 0; i < textRecCount; i++) { + + System.arraycopy(textBytes, pos, recBytes, 0, recBytes.length); + pos += recBytes.length; + Record zRec = new Record(recBytes); + textRecords.add(zRec); + } + + // there's more if ... + + if (pos < textLen) { + + textRecCount++; + + recBytes = new byte[textLen - pos]; + System.arraycopy(textBytes, pos, recBytes, 0, recBytes.length); + Record rec = new Record(recBytes); + textRecords.add(rec); + } + + // construct the Record array and copy + // references from textRecords. + + Record[] allRecords = new Record[textRecords.size() + 1]; + + allRecords[0] = new Record(getHeaderBytes()); + + for (int i = 1; i < allRecords.length; i++) { + + allRecords[i] = (Record) textRecords.get(i-1); + } + + return allRecords; + } + + + /** + * This method converts the text buffer into a <code>byte</code> + * array with the proper encoding of the text section of the + * AportisDoc format. + * + * TODO: do compression. + * + * @return byte[] Converted <code>byte</code> array of text + * section. + * + * @throws IOException If any I/O error occurs. + */ + private byte[] processTextBuffer() throws IOException + { + String str = textBuffer.toString(); + byte bytes[] = str.getBytes(ENCODING); + + return bytes; + } + + + /** + * This method produces the <code>byte</code> array for the header. + * + * @return <code>byte</code> array containing header record data. + * + * @throws IOException If any I/O error occurs. + */ + private byte[] getHeaderBytes() throws IOException + { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + DataOutputStream dos = new DataOutputStream(bos); + + // TODO: for now, we shall use UNCOMPRESSED. + // later, we need to use COMPRESSED or a setting. + dos.writeShort(UNCOMPRESSED); + dos.writeShort(SPARE); + dos.writeInt(textLen); + dos.writeShort(textRecCount); + dos.writeShort(TEXT_RECORD_SIZE); + dos.writeInt(SPARE); + + byte[] bytes = bos.toByteArray(); + + return bytes; + } +} + diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocumentDeserializerImpl.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocumentDeserializerImpl.java new file mode 100644 index 000000000000..98022bcf47d9 --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocumentDeserializerImpl.java @@ -0,0 +1,313 @@ +/************************************************************************ + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.aportisdoc; + +import org.w3c.dom.NodeList; +import org.w3c.dom.Node; +import org.w3c.dom.Element; +import org.w3c.dom.Text; + +import java.io.IOException; +import java.util.Enumeration; + +import org.openoffice.xmerge.Document; +import org.openoffice.xmerge.ConvertData; +import org.openoffice.xmerge.ConvertException; +import org.openoffice.xmerge.DocumentDeserializer; +import org.openoffice.xmerge.converter.xml.OfficeConstants; +import org.openoffice.xmerge.converter.xml.sxw.SxwDocument; +import org.openoffice.xmerge.converter.palm.PalmDB; +import org.openoffice.xmerge.converter.palm.PdbDecoder; +import org.openoffice.xmerge.converter.palm.Record; +import org.openoffice.xmerge.converter.palm.PalmDocument; +import org.openoffice.xmerge.util.Debug; + +/** + * <p>AportisDoc implementation of <code>DocumentDeserializer</code> + * for the {@link + * org.openoffice.xmerge.converter.xml.sxw.aportisdoc.PluginFactoryImpl + * PluginFactoryImpl}.</p> + * + * <p>This converts an file in AportisDoc PDB format to StarOffice + * XML format.</p> + * + * <p>The <code>deserialize</code> method uses a <code>DocDecoder</code> + * to read the AportisDoc format into a <code>String</code> object, then + * it calls <code>buildDocument</code> to create a <code>SxwDocument</code> + * object from it.</p> + * + * @author Herbie Ong + */ +public final class DocumentDeserializerImpl + implements OfficeConstants, DocConstants, DocumentDeserializer { + + /** A <code>ConvertData</code> object assigned to this object. */ + private ConvertData cd = null; + + + /** + * Constructor that assigns the given <code>ConvertData</code> + * to this object as input. + * + * @param cd A <code>ConvertData</code> object to read data for + * the conversion process by the <code>deserialize</code> + * method. + */ + public DocumentDeserializerImpl(ConvertData cd) { + this.cd = cd; + } + + + /** + * Convert the given <code>ConvertData</code> object + * into a <code>SxwDocument</code> object. + * + * @return Resulting <code>SxwDocument</code> object. + * + * @throws ConvertException If any conversion error occurs. + * @throws IOException If any I/O error occurs. + */ + public Document deserialize() throws IOException, ConvertException { + + int numberOfPDBs = cd.getNumDocuments(); + Document doc = null; + int i=0; + ConvertData cdOut; + Enumeration e = cd.getDocumentEnumeration(); + while (e.hasMoreElements()) { + PalmDocument palmDoc = (PalmDocument) e.nextElement(); + PalmDB pdb = palmDoc.getPdb(); + + log("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"); + log("<AportisDoc>"); + + Record[] recs = pdb.getRecords(); + String docName = palmDoc.getName(); + DocDecoder decoder = new DocDecoder(); + String text = decoder.parseRecords(recs); + doc = buildDocument(docName, text); + + log("</AportisDoc>"); + } + + return doc; + } + + + /** + * Parses the text content of an AportisDoc format and build a + * <code>SxwDocument</code>. + * + * @param docName Name of <code>Document</code>. + * @param str Text content of AportisDoc format. + * + * @return Resulting <code>SxwDocument</code> object. + * + * @throws IOException If any I/O error occurs. + */ + private SxwDocument buildDocument(String docName, String str) + throws IOException { + + // create minimum office xml document. + SxwDocument sxwDoc = new SxwDocument(docName); + sxwDoc.initContentDOM(); + + org.w3c.dom.Document doc = sxwDoc.getContentDOM(); + + // Grab hold of the office:body tag, + // Assume there should be one. + // This is where top level paragraphs will append to. + NodeList list = doc.getElementsByTagName(TAG_OFFICE_BODY); + Node bodyNode = list.item(0); + + // Store all the text in a character array. + char[] text = str.toCharArray(); + + // startIndex has 2 purposes: + // if value is -1, it means that there are no text characters + // needed to be processed for a Text node. if value >= 0, it + // is the index of the starting position of a text section + // for a Text node. + int startIndex = -1; + + // Create a paragraph node to start with. + Element paraNode = doc.createElement(TAG_PARAGRAPH); + + log("<PARA>"); + + for (int i = 0; i < text.length; i++) { + + switch (text[i]) { + + case TAB_CHAR: + + // Check if there are text to be processed first. + if (startIndex >= 0) { + addTextNode(doc, paraNode, text, startIndex, i - 1); + startIndex = -1; + } + + // Then, add tab element. + Element tabNode = doc.createElement(TAG_TAB_STOP); + paraNode.appendChild(tabNode); + + log("<TAB/>"); + break; + + case EOL_CHAR: + + // Check if there are text to be processed first. + if (startIndex >= 0) { + addTextNode(doc, paraNode, text, startIndex, i - 1); + startIndex = -1; + } + + // Then, add the current paragraph to body. + bodyNode.appendChild(paraNode); + + // Create another paragraph element. + paraNode = doc.createElement(TAG_PARAGRAPH); + + log("</PARA>"); + log("<PARA>"); + break; + + case SPACE_CHAR: + + // count is the number of space chars from i + int count = 0; + + // Do a look ahead and count the number of space chars + while (text[i + 1 + count] == SPACE_CHAR) { + count++; + } + + // Need to build a space node ONLY if count is > 1. + + if (count > 0) { + + // Check if there are text to be processed first + if (startIndex >= 0) { + addTextNode(doc, paraNode, text, + startIndex, i); + startIndex = -1; + } + + // Then, create a space element + // with the proper attribute. + Element spaceNode = doc.createElement(TAG_SPACE); + spaceNode.setAttribute(ATTRIBUTE_SPACE_COUNT, + Integer.toString(count)); + + paraNode.appendChild(spaceNode); + + // reposition i to the last space character. + i += count; + + log("<SPACE count=\"" + count + "\" />"); + + } else { + + // If there are no chars for text node yet, + // consider this one. + if (startIndex < 0) { + + startIndex = i; + log("<TEXT>"); + } + } + + break; + + default: + + // If there are no chars for text node yet, + // this should be the start. + if (startIndex < 0) { + + startIndex = i; + log("<TEXT>"); + } + + break; + } + } + + int lastIndex = text.length - 1; + + // Check if there are text to be processed first. + + if (startIndex >= 0) { + addTextNode(doc, paraNode, text, startIndex, lastIndex); + } + + // Then, add the last paragraph element if it is not added yet. + if (text[lastIndex] != EOL_CHAR) { + bodyNode.appendChild(paraNode); + } + + log("</PARA>"); + + return sxwDoc; + } + + + /** + * Add a Text <code>Node</code> to the given paragraph node with the + * text starting at the given <code>startPos</code> until + * <code>endPos</code>. + * + * @param doc <code>org.w3c.dom.Document</code> object for creating + * <code>Node</code> objects. + * @param para The current paragraph <code>Node</code> to append + * text <code>Node</code>. + * @param text Array of characters containing text. + * @param startPos Starting index position for text value. + * @param endPos End index position for text value. + */ + private void addTextNode(org.w3c.dom.Document doc, Node para, char text[], + int startPos, int endPos) { + + String str = new String(text, startPos, endPos - startPos + 1); + Text textNode = doc.createTextNode(str); + para.appendChild(textNode); + log(str); + log("</TEXT>"); + } + + /** + * Sends message to the log object. + * + * @param str Debug message. + */ + private void log(String str) { + + Debug.log(Debug.TRACE, str); + } +} + diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocumentMergerImpl.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocumentMergerImpl.java new file mode 100644 index 000000000000..23b236b41e6a --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocumentMergerImpl.java @@ -0,0 +1,99 @@ +/************************************************************************ + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.aportisdoc; + +import org.w3c.dom.Document; +import org.w3c.dom.Element; + +import org.openoffice.xmerge.DocumentMerger; +import org.openoffice.xmerge.MergeException; +import org.openoffice.xmerge.ConverterCapabilities; +import org.openoffice.xmerge.converter.xml.sxw.SxwDocument; +import org.openoffice.xmerge.merger.DiffAlgorithm; +import org.openoffice.xmerge.merger.Difference; +import org.openoffice.xmerge.merger.NodeMergeAlgorithm; +import org.openoffice.xmerge.merger.Iterator; +import org.openoffice.xmerge.merger.DiffAlgorithm; +import org.openoffice.xmerge.merger.diff.ParaNodeIterator; +import org.openoffice.xmerge.merger.diff.IteratorLCSAlgorithm; +import org.openoffice.xmerge.merger.merge.DocumentMerge; +import org.openoffice.xmerge.merger.merge.CharacterBaseParagraphMerge; +import org.openoffice.xmerge.util.Debug; + + +/** + * AportisDoc implementation of <code>DocumentMerger</code> + * for the {@link + * org.openoffice.xmerge.converter.xml.sxw.aportisdoc.PluginFactoryImpl + * PluginFactoryImpl}.</p> + */ +public class DocumentMergerImpl implements DocumentMerger { + + private ConverterCapabilities cc_; + private org.openoffice.xmerge.Document orig = null; + + public DocumentMergerImpl(org.openoffice.xmerge.Document doc, ConverterCapabilities cc) { + cc_ = cc; + this.orig = doc; + } + + public void merge(org.openoffice.xmerge.Document modifiedDoc) throws MergeException { + + SxwDocument wdoc1 = (SxwDocument) orig; + SxwDocument wdoc2 = (SxwDocument) modifiedDoc; + + Document doc1 = wdoc1.getContentDOM(); + Document doc2 = wdoc2.getContentDOM(); + + Iterator i1 = new ParaNodeIterator(cc_, doc1.getDocumentElement()); + Iterator i2 = new ParaNodeIterator(cc_, doc2.getDocumentElement()); + + DiffAlgorithm diffAlgo = new IteratorLCSAlgorithm(); + + // find out the paragrah level diffs + Difference[] diffTable = diffAlgo.computeDiffs(i1, i2); + + if (Debug.isFlagSet(Debug.INFO)) { + Debug.log(Debug.INFO, "Diff Result: "); + + for (int i = 0; i < diffTable.length; i++) { + Debug.log(Debug.INFO, diffTable[i].debug()); + } + } + + // merge the paragraphs + NodeMergeAlgorithm charMerge = new CharacterBaseParagraphMerge(); + DocumentMerge docMerge = new DocumentMerge(cc_, charMerge); + + Iterator result = null; + + docMerge.applyDifference(i1, i2, diffTable); + } +} + + diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocumentSerializerImpl.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocumentSerializerImpl.java new file mode 100644 index 000000000000..a2652df792b3 --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocumentSerializerImpl.java @@ -0,0 +1,532 @@ +/************************************************************************ + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.aportisdoc; + +import org.w3c.dom.NodeList; +import org.w3c.dom.Node; +import org.w3c.dom.NamedNodeMap; +import org.w3c.dom.Element; + +import java.io.IOException; + +import org.openoffice.xmerge.Document; +import org.openoffice.xmerge.ConvertData; +import org.openoffice.xmerge.ConvertException; +import org.openoffice.xmerge.DocumentSerializer; +import org.openoffice.xmerge.converter.xml.OfficeConstants; +import org.openoffice.xmerge.converter.xml.sxw.SxwDocument; +import org.openoffice.xmerge.converter.palm.PdbEncoder; +import org.openoffice.xmerge.converter.palm.PdbDecoder; +import org.openoffice.xmerge.converter.palm.PalmDB; +import org.openoffice.xmerge.converter.palm.Record; +import org.openoffice.xmerge.converter.palm.PalmDocument; +import org.openoffice.xmerge.util.Debug; +import org.openoffice.xmerge.util.XmlUtil; + +/** + * <p>AportisDoc implementation of + * org.openoffice.xmerge.DocumentSerializer + * for the {@link + * org.openoffice.xmerge.converter.xml.sxw.aportisdoc.PluginFactoryImpl + * PluginFactoryImpl}.</p> + * + * <p>The <code>serialize</code> method traverses the DOM + * document from the given <code>Document</code> object. It uses a + * <code>DocEncoder</code> object for the actual conversion of + * contents to the AportisDoc format.</p> + * + * @author Herbie Ong + */ + + +public final class DocumentSerializerImpl + implements OfficeConstants, DocConstants, DocumentSerializer { + + /** A <code>DocEncoder</code> object for encoding to AportisDoc. */ + private DocEncoder encoder = null; + + /** SXW <code>Document</code> object that this converter processes. */ + private SxwDocument sxwDoc = null; + + + /** + * Constructor. + * + * @param doc A SXW <code>Document</code> to be converted. + */ + public DocumentSerializerImpl(Document doc) { + sxwDoc = (SxwDocument) doc; + } + + + /** + * <p>Method to convert a <code>Document</code> into a PDB. + * It passes back the converted data as a <code>ConvertData</code> + * object.</p> + * + * <p>This method is not thread safe for performance reasons. + * This method should not be called from within two threads. + * It would be best to call this method only once per object + * instance.</p> + * + * @return The <code>ConvertData</code> object containing the output. + * + * @throws ConvertException If any conversion error occurs. + * @throws IOException If any I/O error occurs. + */ + public ConvertData serialize() throws ConvertException, IOException { + + + // get the server document name + + String docName = sxwDoc.getName(); + + // get DOM document + + org.w3c.dom.Document domDoc = sxwDoc.getContentDOM(); + + encoder = new DocEncoder(); + + // Traverse to the office:body element. + // There should only be one. + + NodeList list = domDoc.getElementsByTagName(TAG_OFFICE_BODY); + int len = list.getLength(); + + if (len > 0) { + Node node = list.item(0); + traverseBody(node); + } + + // create a ConvertData object. + // + Record records[] = encoder.getRecords(); + ConvertData cd = new ConvertData(); + + PalmDocument palmDoc = new PalmDocument(docName, + DocConstants.CREATOR_ID, DocConstants.TYPE_ID, + 0, PalmDB.PDB_HEADER_ATTR_BACKUP, records); + + cd.addDocument(palmDoc); + return cd; + } + + + /** + * This method traverses <i>office:body</i> element. + * + * @param node <i>office:body</i> <code>Node</code>. + * + * @throws IOException If any I/O error occurs. + */ + private void traverseBody(Node node) throws IOException { + + log("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"); + log("<AportisDOC>"); + + if (node.hasChildNodes()) { + + NodeList nodeList = node.getChildNodes(); + int len = nodeList.getLength(); + + for (int i = 0; i < len; i++) { + Node child = nodeList.item(i); + + if (child.getNodeType() == Node.ELEMENT_NODE) { + String nodeName = child.getNodeName(); + + if (nodeName.equals(TAG_PARAGRAPH) || + nodeName.equals(TAG_HEADING)) { + + traverseParagraph(child); + + } else if (nodeName.equals(TAG_UNORDERED_LIST)) { + + traverseList(child); + + } else if (nodeName.equals(TAG_ORDERED_LIST)) { + + traverseList(child); + + } else { + + log("<OTHERS " + XmlUtil.getNodeInfo(child) + " />"); + } + } + } + } + + log("</AportisDOC>"); + } + + + /** + * This method traverses the <i>text:p</i> and <i>text:h</i> + * element <code>Node</code> objects. + * + * @param node A <i>text:p</i> or <i>text:h</i> + * <code>Node</code>. + * + * @throws IOException If any I/O error occurs. + */ + private void traverseParagraph(Node node) throws IOException { + + log("<PARA>"); + traverseParaContents(node); + encoder.addText(EOL_CHAR); + log("</PARA>"); + } + + + /** + * This method traverses a paragraph content. + * It uses the <code>traverseParaElem</code> method to + * traverse into Element <code>Node</code> objects. + * + * @param node A paragraph or content <code>Node</code>. + * + * @throws IOException If any I/O error occurs. + */ + private void traverseParaContents(Node node) throws IOException { + + if (node.hasChildNodes()) { + + NodeList nodeList = node.getChildNodes(); + int len = nodeList.getLength(); + + for (int i = 0; i < len; i++) { + + Node child = nodeList.item(i); + short nodeType = child.getNodeType(); + + switch (nodeType) { + + case Node.TEXT_NODE: + // this is for grabbing text nodes. + String s = child.getNodeValue(); + + if (s.length() > 0) { + encoder.addText(s); + } + + log("<TEXT>"); + log(s); + log("</TEXT>"); + + break; + + case Node.ELEMENT_NODE: + + traverseParaElem(child); + break; + + case Node.ENTITY_REFERENCE_NODE: + + log("<ENTITY_REFERENCE>"); + traverseParaContents(child); + log("<ENTITY_REFERENCE/>"); + break; + + default: + log("<OTHERS " + XmlUtil.getNodeInfo(node) + " />"); + } + } + } + } + + + /** + * This method traverses an <code>Element</code> <code>Node</code> + * within a paragraph. + * + * @param node <code>Element</code> <code>Node</code> within a + * paragraph. + * + * @throws IOException If any I/O error occurs. + */ + private void traverseParaElem(Node node) throws IOException { + + String nodeName = node.getNodeName(); + + if (nodeName.equals(TAG_SPACE)) { + + // this is for text:s tags. + NamedNodeMap map = node.getAttributes(); + Node attr = map.getNamedItem(ATTRIBUTE_SPACE_COUNT); + StringBuffer space = new StringBuffer(SPACE_CHAR); + int count = 1; + + if (attr != null) { + + try { + + String countStr = attr.getNodeValue(); + count = Integer.parseInt(countStr.trim()); + + } catch (NumberFormatException e) { + + // TODO: for now, throw IOException. + // later, perhaps will have to throw + // some other conversion exception instead. + throw new IOException(e.getMessage()); + } + } + + for (int j = 0; j < count; j++) { + + space.append(SPACE_CHAR); + } + + encoder.addText(space.toString()); + + log("<SPACE count=\"" + count + "\" />"); + + } else if (nodeName.equals(TAG_TAB_STOP)) { + + // this is for text:tab-stop + encoder.addText(TAB_CHAR); + + log("<TAB/>"); + + } else if (nodeName.equals(TAG_LINE_BREAK)) { + + // commented out by Csaba: There is no point to convert a linebreak + // into a EOL, because it messes up the number of XML nodes and the + // merge won't work properly. Other solution would be to implement such + // nodemerger, which would be able to merge embedded tags in a paragraph + + // this is for text:line-break + // encoder.addText(EOL_CHAR); + + log("skipped <LINE-BREAK/>"); + + } else if (nodeName.equals(TAG_SPAN)) { + + // this is for text:span + log("<SPAN>"); + traverseParaContents(node); + log("</SPAN>"); + + } else if (nodeName.equals(TAG_HYPERLINK)) { + + // this is for text:a + log("<HYPERLINK>"); + traverseParaContents(node); + log("<HYPERLINK/>"); + + } else if (nodeName.equals(TAG_BOOKMARK) || + nodeName.equals(TAG_BOOKMARK_START)) { + + log("<BOOKMARK/>"); + + } else if (nodeName.equals(TAG_TEXT_VARIABLE_SET) + || nodeName.equals(TAG_TEXT_VARIABLE_GET) + || nodeName.equals(TAG_TEXT_EXPRESSION) + || nodeName.equals(TAG_TEXT_USER_FIELD_GET) + || nodeName.equals(TAG_TEXT_PAGE_VARIABLE_GET) + || nodeName.equals(TAG_TEXT_SEQUENCE) + || nodeName.equals( TAG_TEXT_VARIABLE_INPUT) + || nodeName.equals(TAG_TEXT_TIME) + || nodeName.equals( TAG_TEXT_PAGE_COUNT) + || nodeName.equals(TAG_TEXT_PAGE_NUMBER ) + || nodeName.equals(TAG_TEXT_SUBJECT) + || nodeName.equals(TAG_TEXT_TITLE) + || nodeName.equals(TAG_TEXT_CREATION_TIME) + || nodeName.equals(TAG_TEXT_DATE) + || nodeName.equals(TAG_TEXT_TEXT_INPUT) + || nodeName.equals(TAG_TEXT_AUTHOR_INITIALS)) { + log("<FIELD>"); + traverseParaContents(node); + log("</FIELD>"); + + }else if (nodeName.startsWith(TAG_TEXT)) { + log("<Unknown text Field>"); + traverseParaContents(node); + log("</Unknown text Field>"); + + }else { + + log("<OTHERS " + XmlUtil.getNodeInfo(node) + " />"); + } + } + + + /** + * This method traverses list tags <i>text:unordered-list</i> and + * <i>text:ordered-list</i>. A list can only contain one optional + * <i>text:list-header</i> and one or more <i>text:list-item</i> + * elements. + * + * @param node A list <code>Node</code>. + * + * @throws IOException If any I/O error occurs. + */ + private void traverseList(Node node) throws IOException { + + log("<LIST>"); + + if (node.hasChildNodes()) { + + NodeList nodeList = node.getChildNodes(); + int len = nodeList.getLength(); + + for (int i = 0; i < len; i++) { + + Node child = nodeList.item(i); + + if (child.getNodeType() == Node.ELEMENT_NODE) { + + String nodeName = child.getNodeName(); + + if (nodeName.equals(TAG_LIST_ITEM)) { + + traverseListItem(child); + + } else if (nodeName.equals(TAG_LIST_HEADER)) { + + traverseListHeader(child); + + } else { + + log("<INVALID-XML-BUG " + XmlUtil.getNodeInfo(child) + " />"); + } + } + } + } + + log("</LIST>"); + } + + + /** + * This method traverses a <i>text:list-header</i> element. + * It contains one or more <i>text:p</i> elements. + * + * @param node A list header <code>Node</code>. + * + * @throws IOException If any I/O error occurs. + */ + private void traverseListHeader(Node node) throws IOException { + + log("<LIST-HEADER>"); + + if (node.hasChildNodes()) { + + NodeList nodeList = node.getChildNodes(); + int len = nodeList.getLength(); + + for (int i = 0; i < len; i++) { + + Node child = nodeList.item(i); + + if (child.getNodeType() == Node.ELEMENT_NODE) { + + String nodeName = child.getNodeName(); + + if (nodeName.equals(TAG_PARAGRAPH)) { + + traverseParagraph(child); + + } else { + + log("<INVALID-XML-BUG " + XmlUtil.getNodeInfo(child) + " />"); + } + } + } + } + + log("</LIST-HEADER>"); + } + + + /** + * <p>This method will traverse a <i>text:list-item</i>. + * A list item may contain one or more of <i>text:p</i>, + * <i>text:h</i>, <i>text:section</i>, <i>text:ordered-list</i> + * and <i>text:unordered-list</i>.</p> + * + * <p>This method currently only implements grabbing <i>text:p</i>, + * <i>text:h</i>, <i>text:unordered-list</i> and + * <i>text:ordered-list</i>.</p> + * + * @param node The <code>Node</code>. + * + * @throws IOException If any I/O error occurs. + */ + private void traverseListItem(Node node) throws IOException { + + log("<LIST-ITEM>"); + + if (node.hasChildNodes()) { + + NodeList nodeList = node.getChildNodes(); + int len = nodeList.getLength(); + + for (int i = 0; i < len; i++) { + + Node child = nodeList.item(i); + + if (child.getNodeType() == Node.ELEMENT_NODE) { + + String nodeName = child.getNodeName(); + + if (nodeName.equals(TAG_PARAGRAPH)) { + + traverseParagraph(child); + + } else if (nodeName.equals(TAG_UNORDERED_LIST)) { + + traverseList(child); + + } else if (nodeName.equals(TAG_ORDERED_LIST)) { + + traverseList(child); + + } else { + + log("<INVALID-XML-BUG " + XmlUtil.getNodeInfo(child) + " />"); + } + } + } + } + + log("</LIST-ITEM>"); + } + + + /** + * Logs debug messages. + * + * @param str The debug message. + */ + private void log(String str) { + + Debug.log(Debug.TRACE, str); + } +} + diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/PluginFactoryImpl.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/PluginFactoryImpl.java new file mode 100644 index 000000000000..d1de0b19a6ab --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/PluginFactoryImpl.java @@ -0,0 +1,141 @@ +/************************************************************************ + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.aportisdoc; + +import org.openoffice.xmerge.Document; +import org.openoffice.xmerge.ConvertData; +import org.openoffice.xmerge.DocumentMerger; +import org.openoffice.xmerge.DocumentMergerFactory; +import org.openoffice.xmerge.DocumentSerializer; +import org.openoffice.xmerge.DocumentSerializerFactory; +import org.openoffice.xmerge.DocumentDeserializer; +import org.openoffice.xmerge.DocumentDeserializerFactory; +import org.openoffice.xmerge.ConverterCapabilities; +import org.openoffice.xmerge.converter.xml.sxw.SxwPluginFactory; +import org.openoffice.xmerge.converter.palm.PalmDocument; +import org.openoffice.xmerge.util.registry.ConverterInfo; +import java.io.IOException; +import java.io.InputStream; + +/** + * <p>AportisDoc implementation of the <code>PluginFactory</code>. + * This encapsulates conversion of StarWriter XML format to and from + * AportisDoc format.</p> + * + * <p>The superclass produces a particular + * {@link org.openoffice.xmerge.Document Document} + * object, i.e. {@link + * org.openoffice.xmerge.converter.xml.sxw.SxwDocument + * SxwDocument} that the converters in this class works with. Thus, + * this class only implements the methods that produces the converters, + * i.e. {@link + * org.openoffice.xmerge.DocumentSerializer + * DocumentSerializer} and {@link + * org.openoffice.xmerge.DocumentDeserializer + * DocumentDeserializer}; + * as well as the {@link + * org.openoffice.xmerge.ConverterCapabilities + * ConverterCapabilities} object that is specific to this format + * conversion. That superclass also produces a {@link + * org.openoffice.xmerge.DocumentMerger DocumentMerger} + * object, i.e. {@link + * org.openoffice.xmerge.converter.xml.sxw.aportisdoc.DocumentMergerImpl + * DocumentMergerImpl} which this class derives the functionality.</p> + * + * @author Herbie Ong + */ +public final class PluginFactoryImpl extends SxwPluginFactory + implements DocumentDeserializerFactory, DocumentSerializerFactory, + DocumentMergerFactory { + + public PluginFactoryImpl (ConverterInfo ci) { + super(ci); + } + + /** ConverterCapabilities object for this type of conversion. */ + private final static ConverterCapabilities converterCap = + new ConverterCapabilitiesImpl(); + + + /** + * Returns an instance of <code>DocumentSerializerImpl</code>, + * which is an implementation of the <code>DocumentSerializer</code> + * interface. + * + * @param doc <code>Document</code> object to be + * converted/serialized. + * + * @return A <code>DocumentSerializerImpl</code> object. + */ + public DocumentSerializer createDocumentSerializer(Document doc) { + + return new DocumentSerializerImpl(doc); + } + + + /** + * Returns an instance of <code>DocumentDeserializerImpl</code>, + * which is an implementation of the <code>DocumentDeserializer</code> + * interface. + * + * @param cd <code>ConvertData</code> object for reading data + * which will be converted back to a + * <code>Document</code> object. + * + * @return A DocumentDeserializerImpl object. + */ + public DocumentDeserializer createDocumentDeserializer(ConvertData cd) { + + return new DocumentDeserializerImpl(cd); + } + + + /** + * Returns an instance of <code>DocumentMergerImpl</code>, + * which is an implementation of the <code>DocumentMerger</code> + * interface. + * + * @param doc <code>Document</code> to merge. + * + * @return A DocumentMergerImpl object. + */ + public DocumentMerger createDocumentMerger(Document doc) { + + ConverterCapabilities cc = converterCap; + DocumentMergerImpl merger = new DocumentMergerImpl(doc, cc); + return merger; + } + + public Document createDeviceDocument(String name, InputStream is) + throws IOException { + + PalmDocument palmDoc = new PalmDocument(is); + return palmDoc; + } +} + diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/build.xml b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/build.xml new file mode 100644 index 000000000000..b6efd3e2bec3 --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/build.xml @@ -0,0 +1,134 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + + DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + + Copyright 2000, 2010 Oracle and/or its affiliates. + + OpenOffice.org - a multi-platform office productivity suite + + This file is part of OpenOffice.org. + + OpenOffice.org is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License version 3 + only, as published by the Free Software Foundation. + + OpenOffice.org is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License version 3 for more details + (a copy is included in the LICENSE file that accompanied this code). + + You should have received a copy of the GNU Lesser General Public License + version 3 along with OpenOffice.org. If not, see + <http://www.openoffice.org/license.html> + for a copy of the LGPLv3 License. + +--> +<project name="xmrg_jooxcxs_aportisdoc" default="main" basedir="."> + + <!-- ================================================================= --> + <!-- settings --> + <!-- ================================================================= --> + + <!-- project prefix, used for targets and build.lst --> + <property name="prj.prefix" value="xmrg"/> + + <!-- name of this sub target used in recursive builds --> + <property name="target" value="xmrg_jooxcxs_aportisdoc"/> + + <!-- relative path to project directory --> + <property name="prj" value="../../../../../../../.."/> + + <!-- start of java source code package structure --> + <property name="java.dir" value="${prj}/java"/> + + <!-- path component for current java package --> + <property name="package" + value="org/openoffice/xmerge/converter/xml/sxw/aportisdoc"/> + + <!-- define how to handle CLASSPATH environment --> + <property name="build.sysclasspath" value="ignore"/> + + <!-- classpath settings for javac tasks --> + <path id="classpath"> + <pathelement location="${build.class}"/> + <pathelement location="${solar.jar}/parser.jar"/> + <pathelement location="${solar.jar}/jaxp.jar"/> + <pathelement location="${solar.jar}/xerces.jar"/> + </path> + + <!-- set wether we want to compile with or without deprecation --> + <property name="deprecation" value="on"/> + + <!-- ================================================================= --> + <!-- solar build environment targets --> + <!-- ================================================================= --> + + <target name="build_dir" unless="build.dir"> + <property name="build.dir" value="${out}"/> + </target> + + <target name="solar" depends="build_dir" if="solar.update"> + <property name="solar.properties" + value="${solar.bin}/solar.properties"/> + </target> + + <target name="init" depends="solar"> + <property name="build.compiler" value="classic"/> + <property file="${solar.properties}"/> + <property file="${build.dir}/class/solar.properties"/> + </target> + + <target name="info"> + <echo message="--------------------"/> + <echo message="${target}"/> + <echo message="--------------------"/> + </target> + + + <!-- ================================================================= --> + <!-- custom targets --> + <!-- ================================================================= --> + + <!-- the main target, called in recursive builds --> + <target name="main" depends="info,prepare,compile"/> + + <!-- prepare output directories --> + <target name="prepare" depends="init" if="build.class"> + <mkdir dir="${build.dir}"/> + <mkdir dir="${build.class}"/> + </target> + + <!-- compile java sources in ${package} --> + <target name="compile" depends="prepare" if="build.class"> + <javac srcdir="${java.dir}" + destdir="${build.class}" + debug="${debug}" + deprecation="${deprecation}" + optimize="${optimize}"> + <classpath refid="classpath"/> + <include name="${package}/DocConstants.java"/> + <include name="${package}/DocDecoder.java"/> + <include name="${package}/DocEncoder.java"/> + <include name="${package}/DocumentDeserializerImpl.java"/> + <include name="${package}/DocumentSerializerImpl.java"/> + <include name="${package}/DocumentMergerImpl.java"/> + <include name="${package}/ConverterCapabilitiesImpl.java"/> + <include name="${package}/PluginFactoryImpl.java"/> + </javac> + </target> + + <!-- clean up --> + <target name="clean" depends="prepare"> + <delete includeEmptyDirs="true"> + <fileset dir="${build.class}"> + <patternset> + <include name="${package}/*.class"/> + </patternset> + </fileset> + </delete> + </target> + +</project> + diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/converter.xml b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/converter.xml new file mode 100644 index 000000000000..7942295c004a --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/converter.xml @@ -0,0 +1,43 @@ +<?xml version="1.0"?> +<!-- + + DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + + Copyright 2000, 2010 Oracle and/or its affiliates. + + OpenOffice.org - a multi-platform office productivity suite + + This file is part of OpenOffice.org. + + OpenOffice.org is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License version 3 + only, as published by the Free Software Foundation. + + OpenOffice.org is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License version 3 for more details + (a copy is included in the LICENSE file that accompanied this code). + + You should have received a copy of the GNU Lesser General Public License + version 3 along with OpenOffice.org. If not, see + <http://www.openoffice.org/license.html> + for a copy of the LGPLv3 License. + +--> +<converters> + <converter type="staroffice/sxw" version="1.0"> + <converter-display-name> + AportisDoc + </converter-display-name> + <converter-description> + StarWriter XML to/from AportisDoc conversion + </converter-description> + <converter-vendor>OpenOffice.org</converter-vendor> + <converter-class-impl> + org.openoffice.xmerge.converter.xml.sxw.aportisdoc.PluginFactoryImpl + </converter-class-impl> + <converter-target type="application/x-aportisdoc" /> + </converter> +</converters> + diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/makefile.mk b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/makefile.mk new file mode 100644 index 000000000000..5b3f3fea509d --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/makefile.mk @@ -0,0 +1,32 @@ +#*************************************************************************** +# +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +# +# Copyright 2000, 2010 Oracle and/or its affiliates. +# +# OpenOffice.org - a multi-platform office productivity suite +# +# This file is part of OpenOffice.org. +# +# OpenOffice.org is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License version 3 +# only, as published by the Free Software Foundation. +# +# OpenOffice.org is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License version 3 for more details +# (a copy is included in the LICENSE file that accompanied this code). +# +# You should have received a copy of the GNU Lesser General Public License +# version 3 along with OpenOffice.org. If not, see +# <http://www.openoffice.org/license.html> +# for a copy of the LGPLv3 License. +# +#*************************************************************************** + +TARGET=xmrg_jooxcxs_aportisdoc +PRJ=../../../../../../../.. + +.INCLUDE : ant.mk +ALLTAR: ANTBUILD diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/package.html b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/package.html new file mode 100644 index 000000000000..78cfe79bfbbf --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/package.html @@ -0,0 +1,237 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN"> +<!-- + + DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + + Copyright 2000, 2010 Oracle and/or its affiliates. + + OpenOffice.org - a multi-platform office productivity suite + + This file is part of OpenOffice.org. + + OpenOffice.org is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License version 3 + only, as published by the Free Software Foundation. + + OpenOffice.org is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License version 3 for more details + (a copy is included in the LICENSE file that accompanied this code). + + You should have received a copy of the GNU Lesser General Public License + version 3 along with OpenOffice.org. If not, see + <http://www.openoffice.org/license.html> + for a copy of the LGPLv3 License. + +--> +<html> +<head> +<title>org.openoffice.xmerge.converter.xml.sxw.aportisdoc package</title> +</head> + +<body bgcolor="white"> + +<p>Provides the tools for doing the conversion of StarWriter XML to +and from AportisDoc format.</p> + +<p>It follows the {@link org.openoffice.xmerge} framework for the conversion process.</p> + +<p>Since it converts to/from a Palm application format, these converters +follow the <a href=../../../../converter/palm/package-summary.html#streamformat> +<code>PalmDB</code> stream format</a> for writing out to the Palm sync client or +reading in from the Palm sync client.</p> + +<p>Note that <code>PluginFactoryImpl</code> also provides a +<code>DocumentMerger</code> object, i.e. {@link org.openoffice.xmerge.converter.xml.sxw.aportisdoc.DocumentMergerImpl DocumentMergerImpl}. +This functionality was derived from its superclass +{@link org.openoffice.xmerge.converter.xml.sxw.SxwPluginFactory +SxwPluginFactory}.</p> + +<h2>AportisDoc pdb format - Doc</h2> + +<p>The AportisDoc pdb format is widely used by different Palm applications, +e.g. QuickWord, AportisDoc Reader, MiniWrite, etc. Note that some +of these applications put tweaks into the format. The converters will only +support the default AportisDoc format, plus some very minor tweaks to accommodate +other applications.</p> + +<p>The text content of the format is plain text, i.e. there are no styles +or structures. There is no notion of lists, list items, paragraphs, +headings, etc. The format does have support for bookmarks.</p> + +<p>For most Doc applications, the default character encoding supported is +the extended ASCII character set, i.e. ISO-8859-1. StarWriter XML is in +UTF-8 encoding scheme. Since UTF-8 encoding scheme covers more characters, +converting UTF-8 strings into extended ASCII would mean that there can be +possible loss of character mappings.</p> + +<p>Using JAXP, XML files can be parsed and read in as Java <code>String</code>s +which is in Unicode format, there is no loss of character mapping from UTF-8 +to Java Strings. There is possible loss of character mapping in +converting Java <code>String</code>s to ASCII bytes. Java characters that +cannot be represented in extended ASCII are converted into the ASCII +character '?' or x3F in hex digit via the <code>String.getBytes(encoding)</code> +API.</p> + +<h2>SXW to DOC Conversion</h2> + +<p>The <code>DocumentSerializerImpl</code> class implements the +<code>org.openoffice.xmerge.DocumentSerializer</code>. +This class specifically provides the conversion process from a given +<code>SxwDocument</code> object to DOC formatted records, which are +then passed back to the client via the <code>ConvertData</code> object.</p> + +<p>The following XML tags are handled. [Note that some may not be implemented yet.]</p> +<ul> +<li> + <p>Paragraphs <tt><text:p></tt> and Headings <tt><text:h></tt></p> + + <p>Heading elements are classified the same as paragraph + elements since both have the same possible elements inside. + Their main difference is that they refer to different types + of style information, which is outside of their element tags. + Since there are no styles on the DOC format, headings should + be treated the same way a paragraph is converted.</p> + + <p>For paragraph elements, convert and transfer text nodes + that are essential. Text nodes directly contained within paragraph + nodes are such. There are also a number of elements that + a paragraph element may contain. These are explained in their + own context.</p> + + <p>At the end of the paragraph, an EOL character is added by + the converter to provide a separation for each paragraph, + since the Doc format does not have a notion of a paragraph.</p> +</li> +<li> + <p>White spaces <tt><text:s></tt> and Tabs <tt><text:tab-stop></tt></p> + + <p>In SXW, normally 2 or more white-space characters are collapsed into + a single space character. In order to make sure that the document + content really contains those white-space characters, there are special + elements assigned to them.</p> + + <p>The space element specifies the number of spaces are in it. + Thus, converting it just means providing the specific number of spaces + that the element requires.</p> + + <p>There is also the tab-stop element. This is a bit tricky. In a + StarWriter document, tab-stops are specified by a column position. + A tab is not an exact number of space, but rather a specific column + positioning. Say, regular tab-stops are set at every 5th column. + At column 4, if I hit a tab, it goes to column 5. At column 1, hitting + a tab would put the cursor at column 5 as well. SmartDoc and AporticDoc + applications goes by columns for the ASCII tab character. The only problem + is that in StarWriter, one could specify a different tab-stop, but not + in most of these Doc applications, at least I have not seen one. + Solution for this is just to go with the converting to the ASCII tab + character and not do anything for different tab-stop positioning.</p> +</li> +<li> + <p>Line breaks <tt><text:line-break></tt></p> + + <p>To represent line breaks, it is simpliest to just put an ASCII LF + character. Note that the side effect of this is that an end of paragraph + also contains an ASCII LF character. Thus, for the DOC to SXW conversion, + line breaks are not distinguishable from specifying the end of a + paragraph.</p> +</li> +<li> + <p>Text spans <tt><text:span></tt></p> + + <p>Text spans contain text that have different style attributes + from the paragraphs'. Text spans can be embedded within another + text span. Since it is purely for style tagging, we only needed + to convert and transfer the text elements within these.</p> +</li> +<li> + <p>Hyperlinks <tt><text:a></tt> + + <p>Convert and transfer the text portion.</p> +</li> +<li> + <p>Bookmarks <tt><text:bookmark></tt> <tt><text:bookmark-start></tt> + <tt><text:bookmark-end></tt> [Not implemented yet]</p> + + <p>In SXW, bookmark elements are embedded inside paragraph elements. + Bookmarks can either mark a text position or a text range. <tt><text:bookmark></tt> + marks a position while the pair <tt><text:bookmark-start></tt> and + <tt><text:bookmark-end></tt></p> marks a text range. The DOC format only + supports bookmarking a text position. Thus, for the conversion, + <tt><text:bookmark></tt> and <tt><text:bookmark-start></tt> will both mark + a text position.</p> +</li> +<li> + <p>Change Tracking <tt><text:tracked-changes></tt> + <tt><text:change*></tt> [Not implemented yet]</p> + + <p>Change tracking elements are not supported yet on the current + OpenOffice XML filters, will have to watch out on this. The text + within these elements have to be interpreted properly during the + conversion process.</p> +</li> +<li> + <p>Lists <tt><text:unordered-list></tt> and + <tt><text:ordered-lists></tt></p> + + <p>A list can only contain one optional <tt><text:list-header></tt> + and one or more <tt><text:list-item></tt> elements.</p> + + <p>A <tt><text:list-header></tt> contains one or more paragraph + elements. Since there are no styles, the conversion process does not + do anything special for list headers, conversion for the paragraphs + within list headers are the same as explained above.</p> + + <p>A <tt><text:list-item></tt> may contain one or more of paragraphs, + headings, list, etc. Since the Doc format does not support any list + structure, there will not be any special handling for this element. + Conversion for elements within it shall be applied according to the + element type. Thus, lists with paragraphs within it will result in just + plain paragraphs. Sublists will not be identifiable. Paragraphs in + sublists will still appear.</p> +</li> +<li> + <p><tt><text:section></tt></p> + + <p>I am not sure what this is yet, will need to investigate more on this.</p> +</li> +</ul> +<p>There may be other tags that will still need to be addressed for this conversion.</p> + +<p>Refer to {@link org.openoffice.xmerge.converter.xml.sxw.aportisdoc.DocumentSerializerImpl DocumentSerializerImpl} +for details of implementation. It uses <code>DocEncoder</code> class to do the encoding +part.</p> + +<h2>DOC to SXW Conversion</h2> + +<p>The <code>DocumentDeserializerImpl</code> class implements the +<code>org.openoffice.xmerge.DocumentDeserializer</code>. It is +passed the device document in the form of a <code>ConvertData</code> object. +It will then create a <code>SxwDocument</code> object from the conversion of +the DOC formatted records.</p> + +<p>The text content of the Doc format will be transferred as text. Paragraph +elements will be formed based on the existence of an ASCII LF character. There +will be at least one paragraph element.</p> + +<p>Bookmarks in the Doc format will be converted to the bookmark element +<tt><text:bookmark></tt> [Not implemented yet].</p> + + +<h2>Merging changes</h2> + +<p>As mentioned above, the <code>DocumentMerger</code> object produced by +<code>PluginFactoryImpl</code> is <code>DocumentMergerImpl</code>. +Refer to the javadocs for that package/class on its merging specifications. +</p> + +<h2>TODO list</h2> + +<p><ol> +<li>Investigate Palm's with different character encodings.</li> +<li>Investigate other StarWriter XML tags</li> +</ol></p> + +</body> +</html> diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/build.xml b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/build.xml new file mode 100644 index 000000000000..02936516e7c9 --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/build.xml @@ -0,0 +1,128 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + + DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + + Copyright 2000, 2010 Oracle and/or its affiliates. + + OpenOffice.org - a multi-platform office productivity suite + + This file is part of OpenOffice.org. + + OpenOffice.org is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License version 3 + only, as published by the Free Software Foundation. + + OpenOffice.org is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License version 3 for more details + (a copy is included in the LICENSE file that accompanied this code). + + You should have received a copy of the GNU Lesser General Public License + version 3 along with OpenOffice.org. If not, see + <http://www.openoffice.org/license.html> + for a copy of the LGPLv3 License. + +--> +<project name="xmrg_jooxcx_sxw" default="main" basedir="."> + + <!-- ================================================================= --> + <!-- settings --> + <!-- ================================================================= --> + + <!-- project prefix, used for targets and build.lst --> + <property name="prj.prefix" value="xmrg"/> + + <!-- name of this sub target used in recursive builds --> + <property name="target" value="xmrg_jooxcx_sxw"/> + + <!-- relative path to project directory --> + <property name="prj" value="../../../../../../.."/> + + <!-- start of java source code package structure --> + <property name="java.dir" value="${prj}/java"/> + + <!-- path component for current java package --> + <property name="package" + value="org/openoffice/xmerge/converter/xml/sxw"/> + + <!-- define how to handle CLASSPATH environment --> + <property name="build.sysclasspath" value="ignore"/> + + <!-- classpath settings for javac tasks --> + <path id="classpath"> + <pathelement location="${build.class}"/> + <pathelement location="${solar.jar}/parser.jar"/> + <pathelement location="${solar.jar}/jaxp.jar"/> + <pathelement location="${solar.jar}/xerces.jar"/> + </path> + + <!-- set wether we want to compile with or without deprecation --> + <property name="deprecation" value="on"/> + + <!-- ================================================================= --> + <!-- solar build environment targets --> + <!-- ================================================================= --> + + <target name="build_dir" unless="build.dir"> + <property name="build.dir" value="${out}"/> + </target> + + <target name="solar" depends="build_dir" if="solar.update"> + <property name="solar.properties" + value="${solar.bin}/solar.properties"/> + </target> + + <target name="init" depends="solar"> + <property name="build.compiler" value="classic"/> + <property file="${solar.properties}"/> + <property file="${build.dir}/class/solar.properties"/> + </target> + + <target name="info"> + <echo message="--------------------"/> + <echo message="${target}"/> + <echo message="--------------------"/> + </target> + + + <!-- ================================================================= --> + <!-- custom targets --> + <!-- ================================================================= --> + + <!-- the main target, called in recursive builds --> + <target name="main" depends="info,prepare,compile"/> + + <!-- prepare output directories --> + <target name="prepare" depends="init" if="build.class"> + <mkdir dir="${build.dir}"/> + <mkdir dir="${build.class}"/> + </target> + + <!-- compile java sources in ${package} --> + <target name="compile" depends="prepare" if="build.class"> + <javac srcdir="${java.dir}" + destdir="${build.class}" + debug="${debug}" + deprecation="${deprecation}" + optimize="${optimize}"> + <classpath refid="classpath"/> + <include name="${package}/SxwDocument.java"/> + <include name="${package}/SxwPluginFactory.java"/> + </javac> + </target> + + <!-- clean up --> + <target name="clean" depends="prepare"> + <delete includeEmptyDirs="true"> + <fileset dir="${build.class}"> + <patternset> + <include name="${package}/*.class"/> + </patternset> + </fileset> + </delete> + </target> + +</project> + diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/makefile.mk b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/makefile.mk new file mode 100644 index 000000000000..c4953812ef58 --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/makefile.mk @@ -0,0 +1,32 @@ +#*************************************************************************** +# +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +# +# Copyright 2000, 2010 Oracle and/or its affiliates. +# +# OpenOffice.org - a multi-platform office productivity suite +# +# This file is part of OpenOffice.org. +# +# OpenOffice.org is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License version 3 +# only, as published by the Free Software Foundation. +# +# OpenOffice.org is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License version 3 for more details +# (a copy is included in the LICENSE file that accompanied this code). +# +# You should have received a copy of the GNU Lesser General Public License +# version 3 along with OpenOffice.org. If not, see +# <http://www.openoffice.org/license.html> +# for a copy of the LGPLv3 License. +# +#*************************************************************************** + +TARGET=xmrg_jooxcx_sxw +PRJ=../../../../../../.. + +.INCLUDE : ant.mk +ALLTAR: ANTBUILD diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/package.html b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/package.html new file mode 100644 index 000000000000..409f041fffb2 --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/package.html @@ -0,0 +1,38 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN"> +<!-- + + DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + + Copyright 2000, 2010 Oracle and/or its affiliates. + + OpenOffice.org - a multi-platform office productivity suite + + This file is part of OpenOffice.org. + + OpenOffice.org is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License version 3 + only, as published by the Free Software Foundation. + + OpenOffice.org is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License version 3 for more details + (a copy is included in the LICENSE file that accompanied this code). + + You should have received a copy of the GNU Lesser General Public License + version 3 along with OpenOffice.org. If not, see + <http://www.openoffice.org/license.html> + for a copy of the LGPLv3 License. + +--> +<html> +<head> +<title>org.openoffice.xmerge.converter.xml.sxw package</title> +</head> + +<body bgcolor="white"> +<p>Provides base implementation of StarWriter XML conversion to and from +different "Device" <code>Document</code> formats.</p> + +</body> +</html> diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/ConverterCapabilitiesImpl.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/ConverterCapabilitiesImpl.java new file mode 100644 index 000000000000..4e4aaf164e7a --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/ConverterCapabilitiesImpl.java @@ -0,0 +1,93 @@ +/************************************************************************ + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.pocketword; + +import org.openoffice.xmerge.ConverterCapabilities; +import org.openoffice.xmerge.converter.xml.OfficeConstants; + +/** + * <p>PocketWord implementation of <code>ConverterCapabilities</code> for + * the {@link + * org.openoffice.xmerge.converter.xml.sxw.pocketword.PluginFactoryImpl + * PluginFactoryImpl}.</p> + * + * <p>Used with StarWriter XML to/from PocketWord conversions. The + * <code>ConverterCapibilies</code> specify which "Office" + * <code>Document</code> tags and attributes are supported on the + * "Device" <code>Document</code> format.</p> + */ +public final class ConverterCapabilitiesImpl + implements ConverterCapabilities { + + public boolean canConvertTag(String tag) { + + if (OfficeConstants.TAG_OFFICE_DOCUMENT.equals(tag)) + return true; + else if (OfficeConstants.TAG_OFFICE_DOCUMENT_CONTENT.equals(tag)) + return true; + else if (OfficeConstants.TAG_OFFICE_BODY.equals(tag)) + return true; + else if (OfficeConstants.TAG_PARAGRAPH.equals(tag)) + return true; + else if (OfficeConstants.TAG_HEADING.equals(tag)) + return true; + else if (OfficeConstants.TAG_ORDERED_LIST.equals(tag)) + return true; + else if (OfficeConstants.TAG_UNORDERED_LIST.equals(tag)) + return true; + else if (OfficeConstants.TAG_LIST_ITEM.equals(tag)) + return true; + else if (OfficeConstants.TAG_LIST_HEADER.equals(tag)) + return true; + else if (OfficeConstants.TAG_SPAN.equals(tag)) + return true; + else if (OfficeConstants.TAG_HYPERLINK.equals(tag)) + return true; + else if (OfficeConstants.TAG_LINE_BREAK.equals(tag)) + return true; + else if (OfficeConstants.TAG_SPACE.equals(tag)) + return true; + else if (OfficeConstants.TAG_TAB_STOP.equals(tag)) + return true; + + return false; + } + + public boolean canConvertAttribute(String tag, + String attribute) { + + if (OfficeConstants.TAG_SPACE.equals(tag)) { + + if (OfficeConstants.ATTRIBUTE_SPACE_COUNT.equals(attribute)) + return true; + } + + return false; + } +} + diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/DocumentDescriptor.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/DocumentDescriptor.java new file mode 100644 index 000000000000..3e5f6e6827ff --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/DocumentDescriptor.java @@ -0,0 +1,236 @@ +/************************************************************************ + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + + +package org.openoffice.xmerge.converter.xml.sxw.pocketword; + +import org.openoffice.xmerge.util.EndianConverter; + +import java.io.ByteArrayOutputStream; +import java.io.OutputStream; +import java.io.IOException; + +import java.util.Vector; + + +/** + * This class to represent the data structure stored by a Pocket Word file that + * describes that file. + * + * The data structure is of variable length, beginning at the end of the + * font declarations and ending 10 bytes before the first instance of 0xFF 0xFF + * marking a paragraph block. + * + * The variable length component arises from an 8 byte structure describing each + * paragraph in the document. These paragraph descriptors appear at the end + * of the Document Descriptor. + * + * @author Mark Murnane + * @version 1.1 + */ +class DocumentDescriptor { + private short numParagraphs = 0; + private short length = 0; + private short numLines = 0; + + private Vector paragraphDesc = null; + + DocumentDescriptor() { + paragraphDesc = new Vector(0, 1); + } + + + + /** + * Updates the <code>DocumentDescriptor</code> to include details of another + * paragraph in the document. + * + * @param len The number of characters in the paragraph. + * @param lines The number of lines on screen that the paragraph uses. + */ + public void addParagraph(short len, short lines) { + ParagraphDescriptor pd = new ParagraphDescriptor(len, lines); + + paragraphDesc.add(pd); + numParagraphs++; + numLines += lines; + length += pd.length; + } + + + /** + * Retrieve the <code>DocumentDescriptor's</code> data. Due to the variable + * length nature of the descriptor, certain fields can only be + * calculated/written after the addition of all paragraphs. + * + * @return Byte array containing the Pocket Word representation of this + * <code>DocumentDescriptor</code>. + */ + public byte[] getDescriptor () { + ByteArrayOutputStream descStream = new ByteArrayOutputStream(); + + writeHeader(descStream); + + /* + * This value seems to increment by 0x02 for each paragraph. + * For a single paragraph doc, the value is 0x08, 0x0A for two, + * 0x0C for three ... + */ + try { + descStream.write(EndianConverter.writeShort((short)(6 + + (numParagraphs * 2)))); + + descStream.write(EndianConverter.writeShort(numParagraphs)); + descStream.write(EndianConverter.writeShort((short)0)); + descStream.write(EndianConverter.writeShort(numParagraphs)); + + descStream.write(EndianConverter.writeShort((short)0)); + descStream.write(EndianConverter.writeShort((short)length)); + descStream.write(EndianConverter.writeShort((short)0)); + + descStream.write(EndianConverter.writeShort(numLines)); + descStream.write(new byte[] { 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00 } ); + + for (int i = 0; i < paragraphDesc.size(); i++) { + ParagraphDescriptor pd = (ParagraphDescriptor)paragraphDesc.elementAt(i); + + descStream.write(pd.getDescriptor()); + } + + // Byte sequence marking the end of this DocumentDescriptor + descStream.write(EndianConverter.writeShort((short)0)); + descStream.write(EndianConverter.writeShort((short)0x41)); + } + catch (IOException ioe) { + // Should never happen as this is a memory based stream. + } + + return descStream.toByteArray(); + } + + + /* + * This method loads the intial fixed portion of the descriptor and the + * mid-section. The mid-section is variable but Pocket Word doesn't seem + * to mind default values. + */ + private void writeHeader(OutputStream descStream) { + + try { + descStream.write(new byte[] { 0x00, 0x00, 0x00, 0x00, + 0x07, 0x00, 0x06, 0x00, + 0x15, 0x00, 0x10, 0x00, + 0x01, 0x00, (byte)0xD0, 0x2F, + 0x00, 0x00, (byte)0xE0, 0x3D, + 0x00, 0x00, (byte)0xF0, 0x00, + 0x00, 0x00, (byte)0xA0, 0x05, + 0x00, 0x00, (byte)0xA0, 0x05, + 0x00, 0x00, (byte)0xA0, 0x05, + 0x00, 0x00, (byte)0xA0, 0x05, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x0A, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x04, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x0A, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x04, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x08, 0x00, + 0x07, 0x00, 0x10, 0x00, + 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x12, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x1F, 0x04, 0x00, 0x00 } ); + + /* + * The next four bytes are variable, but a pattern hasn't yet been + * established. Pocket Word seems to accept this constant value. + * + * The bytes are repeated after another 12 byte sequence which does + * not seem to change from one file to the next. + */ + descStream.write(new byte[] { (byte)0xE2, 0x02, 0x00, 0x00 } ); + descStream.write(new byte[] { 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x3D, 0x04, 0x00, 0x00 } ); + descStream.write(new byte[] { (byte)0xE2, 0x02, 0x00, 0x00 } ); + + descStream.write(new byte[] { 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x40, 0x00, 0x08, 0x00 } ); + } + catch (IOException ioe) { + /* Shouldn't happen with a ByteArrayOutputStream */ + } + } + + + /** + * <code>ParagraphDescriptor</code> represents the data structure used to + * describe individual paragraphs within a <code>DocumentDescriptor.</code> + * + * It is used solely by the <code>DocumentDescriptor<code> class. + */ + private class ParagraphDescriptor { + private short filler = 0; + private short lines = 0; + private short length = 0; + private short unknown = 0x23; + + public ParagraphDescriptor(short len, short numLines) { + lines = numLines; + length = (short)(len + 1); + } + + public byte[] getDescriptor() { + ByteArrayOutputStream desc = new ByteArrayOutputStream(); + + try { + desc.write(EndianConverter.writeShort(filler)); + desc.write(EndianConverter.writeShort(lines)); + desc.write(EndianConverter.writeShort(length)); + desc.write(EndianConverter.writeShort(unknown)); + } + catch (IOException ioe) { + /* Should never happen */ + } + + return desc.toByteArray(); + } + } +} diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/DocumentDeserializerImpl.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/DocumentDeserializerImpl.java new file mode 100644 index 000000000000..373df77886f8 --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/DocumentDeserializerImpl.java @@ -0,0 +1,298 @@ +/************************************************************************ + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + + +package org.openoffice.xmerge.converter.xml.sxw.pocketword; + +import org.openoffice.xmerge.ConvertData; +import org.openoffice.xmerge.ConvertException; +import org.openoffice.xmerge.Document; +import org.openoffice.xmerge.DocumentDeserializer; + +import org.openoffice.xmerge.converter.xml.OfficeConstants; +import org.openoffice.xmerge.converter.xml.sxw.SxwDocument; + +import org.openoffice.xmerge.converter.xml.ParaStyle; +import org.openoffice.xmerge.converter.xml.TextStyle; +import org.openoffice.xmerge.converter.xml.StyleCatalog; + +import org.openoffice.xmerge.util.OfficeUtil; + +import java.io.InputStream; +import java.io.IOException; +import java.io.FileInputStream; +import java.io.FileDescriptor; + +import java.util.Enumeration; +import java.util.Vector; + +import org.w3c.dom.NodeList; +import org.w3c.dom.Node; +import org.w3c.dom.Element; + + +/** + * <p>Pocket Word implementation of <code>DocumentDeserializer</code> + * for the {@link + * org.openoffice.xmerge.converter.xml.sxw.pocketword.PluginFactoryImpl + * PluginFactoryImpl}.</p> + * + * <p>This converts a Pocket Word file to an OpenOffice Writer XML DOM.</p> + * + * @author Mark Murnane + * @version 1.1 + */ +public final class DocumentDeserializerImpl + implements DocumentDeserializer, OfficeConstants { + + private PocketWordDocument pswDoc = null; + private SxwDocument sxwDoc = null; + private String docName; + + private StyleCatalog styleCat = null; + + + /** + * Initialises a new <code>DocumentDeserializerImpl</code> using the + * supplied <code>ConvertData</code>.</p> + * + * <p>The <code>Document</code> objects in the <code>ConvertData</code> + * should be {@link + * org.openoffice.xmerge.converter.xml.sxw.pocketword.PocketWordDocument + * PocketWordDocument} objects.</p> + * + * @param cd ConvertData containing a <code>PocketWordDocument</code> + * for conversion. + */ + public DocumentDeserializerImpl(ConvertData cd) { + Enumeration e = cd.getDocumentEnumeration(); + + // A Pocket Word file is composed of one binary file + while (e.hasMoreElements()) { + pswDoc = (PocketWordDocument)e.nextElement(); + } + + docName = pswDoc.getName(); + } + + + /** + * <p>Convert the data passed into the <code>DocumentDeserializer</code> + * constructor into the OpenOffice Writer <code>Document</code> + * format.</p> + * + * <p>This method may or may not be thread-safe. It is expected + * that the user code does not call this method in more than one + * thread. And for most cases, this method is only done once.</p> + * + * @return The resulting <code>Document</code> object from conversion. + * + * @throws ConvertException If any Convert error occurs. + * @throws IOException If any I/O error occurs. + */ + public Document deserialize() throws IOException, ConvertException { + Enumeration pe = pswDoc.getParagraphEnumeration(); + + sxwDoc = new SxwDocument (docName); + sxwDoc.initContentDOM(); + + // Default to an initial 5 entries in the catalog. + styleCat = new StyleCatalog(5); + + try { + buildDocument(pe); + } + catch (Exception e) { + e.printStackTrace(); + throw new ConvertException("Error building OpenOffice Writer DOM: " + + e.toString()); + + } + + return sxwDoc; + } + + + /** + * This method actually takes care of the conversion. + * + * @param data An Enumeration of all Paragraphs in the Pocket Word doc. + * + * @return The OpenOffice Writer XML representation of the data. + * + * @throws IOException If any I/O errors occur. + */ + private void buildDocument(Enumeration data) throws IOException { + + org.w3c.dom.Document doc = sxwDoc.getContentDOM(); + + /* + * There should be only one each of office:body and + * office:automatic-styles in each document. + */ + Node bodyNode = doc.getElementsByTagName(TAG_OFFICE_BODY).item(0); + + // Not every document has an automatic style tag + Node autoStylesNode = doc.getElementsByTagName( + TAG_OFFICE_AUTOMATIC_STYLES).item(0); + if (autoStylesNode == null) { + autoStylesNode = doc.createElement(TAG_OFFICE_AUTOMATIC_STYLES); + doc.insertBefore(autoStylesNode, bodyNode); + } + + + // Needed for naming new styles + int paraStyles = 1; + int textStyles = 1; + + // Pocket Word has no concept of a list. + Element listNode = null; + + + // Down to business ... + while (data.hasMoreElements()) { + Paragraph p = (Paragraph)data.nextElement(); + Element paraNode = doc.createElement(TAG_PARAGRAPH); + + // Set paragraph style information here + ParaStyle pStyle = p.makeStyle(); + if (pStyle == null) { + paraNode.setAttribute(ATTRIBUTE_TEXT_STYLE_NAME, + PocketWordConstants.DEFAULT_STYLE); + } + else { + // Create paragraph style + pStyle.setName(new String("PS" + paraStyles++)); + paraNode.setAttribute(ATTRIBUTE_TEXT_STYLE_NAME, pStyle.getName()); + styleCat.add(pStyle); + } + + + /* + * For each of the paragraphs, process each segment. + * There will always be at least one. + */ + Enumeration paraData = p.getSegmentsEnumerator(); + Vector textSpans = new Vector(0, 1); + + do { + ParagraphTextSegment pts = (ParagraphTextSegment)paraData.nextElement(); + Element span = doc.createElement(OfficeConstants.TAG_SPAN); + + TextStyle ts = pts.getStyle(); + + if (ts != null) { + ts.setName(new String("TS" + textStyles++)); + span.setAttribute(ATTRIBUTE_TEXT_STYLE_NAME, ts.getName()); + styleCat.add(ts); + } + else { + span.setAttribute(ATTRIBUTE_TEXT_STYLE_NAME, + PocketWordConstants.DEFAULT_STYLE); + } + + // If this isn't a blank paragraph + if (pts.getText() != null && !pts.getText().equals("")) { + Node[] children = OfficeUtil.parseText(pts.getText(), doc); + + for (int j = 0; j < children.length; j++) { + span.appendChild(children[j]); + } + } + + textSpans.add(span); + + } while (paraData.hasMoreElements()); + + + /* + * Special case for the first span. If it has no style, then + * it shouldn't be a span, so just add its children with style + * set as standard. + */ + Element firstSpan = (Element)textSpans.elementAt(0); + String styleName = firstSpan.getAttribute(ATTRIBUTE_TEXT_STYLE_NAME); + if (styleName.equals(PocketWordConstants.DEFAULT_STYLE)) { + NodeList nl = firstSpan.getChildNodes(); + int len = nl.getLength(); + + for (int i = 0; i < len; i++) { + /* + * Always take item 0 as the DOM tree event model will + * cause the NodeList to shrink as each Node is reparented. + * + * By taking the first item from the list, we essentially + * traverse the list in order. + */ + paraNode.appendChild(nl.item(0)); + } + } + else { + paraNode.appendChild(firstSpan); + } + + // The rest are spans, so just add them + for (int i = 1; i < textSpans.size(); i++) { + paraNode.appendChild((Node)textSpans.elementAt(i)); + } + + + /* + * Pocket Word doesn't support lists, but it does have bulleted + * paragraphs that are essentially the same thing. + * + * Unlike OpenOffice Writer, a blank paragraph can be bulleted + * as well. This will be handled by inserting a blank paragraph + * into the unordered list, but OpenOffice Writer will not display + * an item at that point in the list. + */ + if (p.isBulleted()) { + if (listNode == null) { + listNode = doc.createElement(TAG_UNORDERED_LIST); + } + Element listItem = doc.createElement(TAG_LIST_ITEM); + listItem.appendChild(paraNode); + listNode.appendChild(listItem); + } + else { + if (listNode != null) { + bodyNode.appendChild(listNode); + listNode = null; + } + bodyNode.appendChild(paraNode); + } + } // End processing paragraphs + + + // Now write the style catalog to the document + NodeList nl = styleCat.writeNode(doc, "dummy").getChildNodes(); + int nlLen = nl.getLength(); // nl.item reduces the length + for (int i = 0; i < nlLen; i++) { + autoStylesNode.appendChild(nl.item(0)); + } + } +} diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/DocumentMergerImpl.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/DocumentMergerImpl.java new file mode 100644 index 000000000000..c6a14ba2877d --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/DocumentMergerImpl.java @@ -0,0 +1,99 @@ +/************************************************************************ + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.pocketword; + +import org.w3c.dom.Document; +import org.w3c.dom.Element; + +import org.openoffice.xmerge.DocumentMerger; +import org.openoffice.xmerge.MergeException; +import org.openoffice.xmerge.ConverterCapabilities; +import org.openoffice.xmerge.converter.xml.sxw.SxwDocument; +import org.openoffice.xmerge.merger.DiffAlgorithm; +import org.openoffice.xmerge.merger.Difference; +import org.openoffice.xmerge.merger.NodeMergeAlgorithm; +import org.openoffice.xmerge.merger.Iterator; +import org.openoffice.xmerge.merger.DiffAlgorithm; +import org.openoffice.xmerge.merger.diff.ParaNodeIterator; +import org.openoffice.xmerge.merger.diff.IteratorLCSAlgorithm; +import org.openoffice.xmerge.merger.merge.DocumentMerge; +import org.openoffice.xmerge.merger.merge.CharacterBaseParagraphMerge; +import org.openoffice.xmerge.util.Debug; + + +/** + * PocketWord implementation of <code>DocumentMerger</code> + * for the {@link + * org.openoffice.xmerge.converter.xml.sxw.pocketword.PluginFactoryImpl + * PluginFactoryImpl}.</p> + */ +public class DocumentMergerImpl implements DocumentMerger { + + private ConverterCapabilities cc_; + private org.openoffice.xmerge.Document orig = null; + + public DocumentMergerImpl(org.openoffice.xmerge.Document doc, ConverterCapabilities cc) { + cc_ = cc; + this.orig = doc; + } + + public void merge(org.openoffice.xmerge.Document modifiedDoc) throws MergeException { + + SxwDocument wdoc1 = (SxwDocument) orig; + SxwDocument wdoc2 = (SxwDocument) modifiedDoc; + + Document doc1 = wdoc1.getContentDOM(); + Document doc2 = wdoc2.getContentDOM(); + + Iterator i1 = new ParaNodeIterator(cc_, doc1.getDocumentElement()); + Iterator i2 = new ParaNodeIterator(cc_, doc2.getDocumentElement()); + + DiffAlgorithm diffAlgo = new IteratorLCSAlgorithm(); + + // find out the paragrah level diffs + Difference[] diffTable = diffAlgo.computeDiffs(i1, i2); + + if (Debug.isFlagSet(Debug.INFO)) { + Debug.log(Debug.INFO, "Diff Result: "); + + for (int i = 0; i < diffTable.length; i++) { + Debug.log(Debug.INFO, diffTable[i].debug()); + } + } + + // merge the paragraphs + NodeMergeAlgorithm charMerge = new CharacterBaseParagraphMerge(); + DocumentMerge docMerge = new DocumentMerge(cc_, charMerge); + + Iterator result = null; + + docMerge.applyDifference(i1, i2, diffTable); + } +} + + diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/DocumentSerializerImpl.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/DocumentSerializerImpl.java new file mode 100644 index 000000000000..2604e903c564 --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/DocumentSerializerImpl.java @@ -0,0 +1,437 @@ +/************************************************************************ + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + + +package org.openoffice.xmerge.converter.xml.sxw.pocketword; + +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; +import org.w3c.dom.NamedNodeMap; +import org.w3c.dom.Element; + +import org.openoffice.xmerge.ConvertData; +import org.openoffice.xmerge.ConvertException; +import org.openoffice.xmerge.Document; +import org.openoffice.xmerge.DocumentSerializer; + +import org.openoffice.xmerge.converter.xml.OfficeConstants; +import org.openoffice.xmerge.converter.xml.sxw.SxwDocument; + +import org.openoffice.xmerge.converter.xml.ParaStyle; +import org.openoffice.xmerge.converter.xml.TextStyle; +import org.openoffice.xmerge.converter.xml.StyleCatalog; + +import java.io.OutputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; + + +/** + * <p>Pocket Word implementation of <code>DocumentDeserializer</code> + * for use by {@link + * org.openoffice.xmerge.converter.xml.sxw.pocketword.PluginFactoryImpl + * PluginFactoryImpl}.</p> + * + * <p>This converts an OpenOffice Writer XML files to a Pocket Word file<.</p> + * + * @author Mark Murnane + * @version 1.1 + */ +public final class DocumentSerializerImpl + implements DocumentSerializer, OfficeConstants { + + private PocketWordDocument pswDoc; + private SxwDocument sxwDoc; + + private StyleCatalog styleCat = null; + + private boolean inList = false; + + + /** + * <p>Initialises a new <code>DocumentSerializerImpl</code> using the.<br> + * supplied <code>Document</code></p> + * + * <p>The supplied document should be an {@link + * org.openoffice.xmerge.converter.xml.sxw.SxwDocument SxwDocument} + * object.</p> + * + * @param document The <code>Document</code> to convert. + */ + public DocumentSerializerImpl(Document doc) { + sxwDoc = (SxwDocument)doc; + pswDoc = new PocketWordDocument(sxwDoc.getName()); + } + + + /** + * <p>Convert the data passed into the <code>DocumentSerializerImpl</code> + * constructor into Pocket Word format.</p> + * + * <p>This method may or may not be thread-safe. It is expected + * that the user code does not call this method in more than one + * thread. And for most cases, this method is only done once.</p> + * + * @return <code>ConvertData</code> object to pass back the + * converted data. + * + * @throws ConvertException If any conversion error occurs. + * @throws IOException If any I/O error occurs. + */ + public ConvertData serialize() throws IOException, ConvertException { + ConvertData cd = new ConvertData(); + + org.w3c.dom.Document doc = sxwDoc.getContentDOM(); + + // Load any style info before traversing the document content tree + loadStyles(); + + NodeList list = doc.getElementsByTagName(TAG_OFFICE_BODY); + + int len = list.getLength(); + if (len > 0) { + Node node = list.item(0); + traverseBody(node); + } + + cd.addDocument(pswDoc); + + return cd; + } + + + /* + * Handles the loading of defined styles from the style.xml file as well + * as automatic styles from the content.xml file. + * + * Any change to a defined style, such as a short bold section, falls into + * the latter category. + */ + private void loadStyles() { + org.w3c.dom.Document contentDom = sxwDoc.getContentDOM(); + org.w3c.dom.Document styleDom = sxwDoc.getStyleDOM(); + + styleCat = new StyleCatalog(25); + + NodeList nl = null; + String families[] = new String[] { PocketWordConstants.TEXT_STYLE_FAMILY, + PocketWordConstants.PARAGRAPH_STYLE_FAMILY, + PocketWordConstants.PARAGRAPH_STYLE_FAMILY }; + Class classes[] = new Class[] { TextStyle.class, + ParaStyle.class, + TextStyle.class }; + + String[] styleTypes = new String[] { TAG_OFFICE_STYLES, + TAG_OFFICE_AUTOMATIC_STYLES, + TAG_OFFICE_MASTER_STYLES }; + + /* + * Documents converted from PSW -> SXW will not have a style.xml when + * being converted back to PSW. This would occur if a document was + * not modified within Writer between conversions. + * + * Any Writer modifications and saves create the style.xml and other + * portions of a complete Writer SXW file. + */ + if (styleDom != null) { + // Process the Style XML tree + for (int i = 0; i < styleTypes.length; i++ ) { + nl = styleDom.getElementsByTagName(styleTypes[i]); + if (nl.getLength() != 0) { + styleCat.add(nl.item(0), families, classes, null, false); + } + } + } + + /* + * Process the content XML for any other style info. + * Should only be automatic types here. + */ + for (int i = 0; i < styleTypes.length; i++ ) { + nl = contentDom.getElementsByTagName(styleTypes[i]); + if (nl.getLength() != 0) { + styleCat.add(nl.item(0), families, classes, null, false); + } + } + } + + + /* + * Process the office:body tag. + */ + private void traverseBody(Node node) throws IOException, ConvertException { + + if (node.hasChildNodes()) { + NodeList nList = node.getChildNodes(); + int len = nList.getLength(); + + for (int i = 0; i < len; i++) { + Node child = nList.item(i); + + if (child.getNodeType() == Node.ELEMENT_NODE) { + String nodeName = child.getNodeName(); + + if (nodeName.equals(TAG_PARAGRAPH) + || nodeName.equals(TAG_HEADING)) { + traverseParagraph(child); + } + + if (nodeName.equals(TAG_UNORDERED_LIST) || + nodeName.equals(TAG_ORDERED_LIST)) { + traverseList(child); + } + } + } + } + } + + + /* + * Process a text:p tag + */ + private void traverseParagraph(Node node) throws IOException, ConvertException { + String styleName = getAttribute(node, ATTRIBUTE_TEXT_STYLE_NAME); + + ParaStyle pstyle = (ParaStyle)styleCat.lookup(styleName, + PocketWordConstants.PARAGRAPH_STYLE_FAMILY, null, + ParaStyle.class); + if (pstyle != null) { + pstyle = (ParaStyle)pstyle.getResolved(); + } + + TextStyle tstyle = (TextStyle)styleCat.lookup(styleName, + PocketWordConstants.PARAGRAPH_STYLE_FAMILY, null, + TextStyle.class); + if (pstyle != null) { + tstyle = (TextStyle)tstyle.getResolved(); + } + + try { + pswDoc.addParagraph(pstyle, inList); + } + catch (Exception e) { + throw new ConvertException( + "Error adding paragraph to PocketWordDocument.\n" + + e.toString()); + } + + traverseParagraphContents(node, tstyle); + } + + + /* + * Process the contents of a paragraph. This method handles situations + * where the paragraph contains multiple children, each representing a + * differently formatted piece of text. + */ + private void traverseParagraphContents (Node node, TextStyle defTextStyle) + throws IOException, ConvertException { + // First up, get the style of this little bit + String styleName = getAttribute(node, ATTRIBUTE_TEXT_STYLE_NAME); + TextStyle tStyle = (TextStyle)styleCat.lookup(styleName, + PocketWordConstants.TEXT_STYLE_FAMILY, null, + TextStyle.class); + + if (tStyle == null) { + tStyle = defTextStyle; + } + + if (node.hasChildNodes()) { + NodeList nList = node.getChildNodes(); + int len = nList.getLength(); + + for (int i = 0; i < len; i++) { + + Node child = nList.item(i); + short nodeType = child.getNodeType(); + + switch (nodeType) { + case Node.TEXT_NODE: + String s = child.getNodeValue(); + if (s.length() > 0) { + try { + pswDoc.addParagraphData(s, tStyle); + } + catch (Exception e) { + throw new ConvertException( + "Error adding data to paragraph in " + + "PocketWordDocument.\n" + e.toString()); + + } + } + break; + + case Node.ELEMENT_NODE: + if (child.getNodeName().equals(TAG_SPACE)) { + StringBuffer sb = new StringBuffer(""); + int count = 1; + + NamedNodeMap map = child.getAttributes(); + + if (map.getLength() > 0) { + Node attr = map.getNamedItem(ATTRIBUTE_SPACE_COUNT); + count = Integer.parseInt(attr.getNodeValue().trim()); + } + + for ( ; count > 0; count--) { + sb.append(" "); + } + + /* + * May want to look at style info for spaces. Could + * be important when calculating font metrics. + */ + try { + pswDoc.addParagraphData(sb.toString(), tStyle); + } + catch (Exception e) { + throw new ConvertException( + "Error adding data to paragraph in " + + "PocketWordDocument.\n" + e.toString()); + + } + } + else if (child.getNodeName().equals(TAG_TAB_STOP)) { + try { + pswDoc.addParagraphData("\t", tStyle); + } + catch (Exception e) { + throw new ConvertException( + "Error adding data to paragraph in " + + "PocketWordDocument.\n" + e.toString()); + + } + } + else if (child.getNodeName().equals(TAG_LINE_BREAK)) { + /* + * Pocket Word does not support soft line breaks. + * They are just new paragraphs. + */ + } + else if (child.getNodeName().equals(TAG_SPAN)) { + /* + * This is where the interesting ones, i.e. format + * changes occur. + */ + traverseParagraphContents (child, defTextStyle); + } + else if (child.getNodeName().equals(TAG_HYPERLINK)) { + traverseParagraphContents (child, defTextStyle); + } + else { + // Should maybe have a default in here. + } + break; + default: + // Do nothing + } + } + } + else { + /* + * If the node has no children, then it is a blank paragraph, but + * they still require an entry in the Paragraph class to make sense. + */ + pswDoc.addParagraphData("", tStyle); + } + } + + + /* + * Process a text:ordered-list or text:unordered-list tag. Pocket Word has + * no concept of a list so there is no need to differentiate between the + * two. + * + * Each item on the list contains a text:p node. + */ + private void traverseList (Node node) throws IOException, ConvertException { + inList = true; + + if (node.hasChildNodes()) { + NodeList nList = node.getChildNodes(); + int len = nList.getLength(); + + for (int i = 0; i < len; i++) { + Node child = nList.item(i); + + if (child.getNodeType() == Node.ELEMENT_NODE) { + String nodeName = child.getNodeName(); + + if (nodeName.equals(TAG_LIST_ITEM)) { + traverseListItem(child); + } + } + } + } + + inList = false; + } + + + /* + * Process a text:list-item node. They usually contain have a single + * text:p child but can also have sections or other lists. + * + * For this case, only paragraphs are supported. + */ + private void traverseListItem (Node node) throws IOException, ConvertException { + if (node.hasChildNodes()) { + NodeList nList = node.getChildNodes(); + int len = nList.getLength(); + + for (int i = 0; i < len; i++) { + Node child = nList.item(i); + + if (child.getNodeType() == Node.ELEMENT_NODE) { + String nodeName = child.getNodeName(); + + if (nodeName.equals(TAG_PARAGRAPH)) { + traverseParagraph(child); + } + } + } + } + + } + + + /* + * Utility method to retrieve a Node attribute. + */ + private String getAttribute (Node node, String attribute) { + NamedNodeMap attrNodes = node.getAttributes(); + + if (attrNodes != null) { + Node attr = attrNodes.getNamedItem(attribute); + if (attr != null) { + return attr.getNodeValue(); + } + } + + return null; + } +} diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/Paragraph.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/Paragraph.java new file mode 100644 index 000000000000..c2249766b5e5 --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/Paragraph.java @@ -0,0 +1,859 @@ +/************************************************************************ + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.pocketword; + +import java.io.ByteArrayOutputStream; +import java.io.ByteArrayInputStream; +import java.io.IOException; + +import java.util.Vector; +import java.util.Enumeration; + +import java.awt.Color; + +import org.openoffice.xmerge.util.EndianConverter; +import org.openoffice.xmerge.util.ColourConverter; +import org.openoffice.xmerge.converter.xml.ParaStyle; +import org.openoffice.xmerge.converter.xml.TextStyle; + + +/** + * Represents a paragraph data structure within a Pocket Word document. + * + * @author Mark Murnane + * @version 1.1 + */ +class Paragraph implements PocketWordConstants { + /* + * The data elements of a Paragraph. + * + * As the 'unknown' values are not calculated they are declared static. + * They are not declared final because they do have a calcuable value. + */ + private static short unknown1 = 0x23; + private short dataWords = 0; + private short textLength = 0; + private short lengthWithFormatting = 0; + private short lines = 0; + + private static final short marker = (short)0xFFFF; + private static int unknown2 = 0x22; // May be two short values + + private short specialIndentation = 0; + private short leftIndentation = 0; + private short rightIndentation = 0; + + private byte bullets = 0; + private byte alignment = 0; + + private static int unknown3 = 0; + + // Will always have at least these formatting settings in each paragraph + private short defaultFont = 2; // Courier New for the time being + private short defaultSize = 10; + + + /* + * Remaining elements assist in calculating correct values for the paragraph + * representation. + */ + + private Vector textSegments = null; + + private Vector lineDescriptors = null; + + private ParaStyle pStyle = null; + + private boolean isLastParagraph = false; + + + /* + * Private class constructor used by all constructors. Ensures the proper + * initialisation of the Vector storing the paragraph's text. + */ + private Paragraph () { + textSegments = new Vector(0, 1); + } + + + /** + * <p>Constructor for use when converting from SXW format to Pocket Word + * format.</p> + * + * @param style Paragraph style object describing the formatting style + * of this paragraph. + */ + public Paragraph (ParaStyle style) { + this(); + + lineDescriptors = new Vector(0, 1); + pStyle = style; + } + + + /** + * <p>Constructor for use when converting from Pocket Word format to SXW + * format.</p> + * + * @param data Byte array containing byte data describing this paragraph + * from the Pocket Word file. + */ + public Paragraph (byte[] data) { + this(); + + /* + * Read in all fixed data from the array + * + * unknown1 appears at data[0] and data[1] + */ + dataWords = EndianConverter.readShort(new byte[] { data[2], data[3] } ); + textLength = EndianConverter.readShort(new byte[] { data[4], data [5] } ); + lengthWithFormatting = EndianConverter.readShort( + new byte[] { data[6], data[7] } ); + lines = EndianConverter.readShort(new byte[] { data[8], data [9] } ); + + /* + * The marker appears at data[10] and data[11]. + * + * The value of unknown2 is at data[12], data[13], data[14] and data[15]. + */ + + specialIndentation = EndianConverter.readShort(new byte[] { data[16], data[17] } ); + leftIndentation = EndianConverter.readShort(new byte[] { data[18], data [19] } ); + rightIndentation = EndianConverter.readShort(new byte[] { data[20], data [21] } ); + + bullets = data[22]; + alignment = data[23]; + + // The value of unknown3 is at data[24], data[25], data[26] and data[27]. + + /* + * The actual paragraph data is in the remainder of the byte sequence. + * + * Only the actual text seqence with the embedded formatting tags is + * relevant to the conversion from Pocket Word to SXW format. + */ + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + bos.write(data, 28, lengthWithFormatting); + parseText(bos.toByteArray()); + } + + + /* + * Processes the text portion of the raw paragraph data from the Pocket Word + * file. This data also includes formatting settings for the text in the + * paragraph. + * + * Formatting changes appear like XML/HTML tags. Formatted blocks are + * preceded by a sequence of bytes switching on a formatting change and + * followed by a sequence switching off that formatting change. + */ + private void parseText (byte[] data) { + + int totalLength = data.length; + + StringBuffer sb = new StringBuffer(""); + + // Setup text style information + int mask = TextStyle.BOLD | TextStyle.ITALIC | TextStyle.UNDERLINE + | TextStyle.STRIKETHRU; + + + String fontName = null; + int fontSize = 0; + Color textColour = null; + Color backColour = null; + int modifiers = 0; + + TextStyle ts = null; + + int attrsSet = 0; // If this is 0, we have no extra style + boolean inSequence = false; + boolean sawText = false; + + String s = new String(); // For debugging + + // Start from the very beginning + for (int i = 0; i < totalLength; i++) { + // Will encounter at least two codes first + if ((byte)(data[i] & 0xF0) == FORMATTING_TAG) { + if (sawText) { + // Style change so dump previous segment and style info + addTextSegment(sb.toString(), ts); + sb = new StringBuffer(""); + sawText = false; + } + + switch (data[i]) { + case FONT_TAG: + int index = EndianConverter.readShort( + new byte[] { data[i + 1], data[i + 2] } ); + + /* + * Standard font. + * + * Should really be one, but as the only supported font + * currently is Courier New, want to leave it at Courier + * New for round trip conversions. + * + * Also need to account for the fact that Tahoma is the + * correct standard font. + */ + if (fontName == null || fontName.equals("2")) { + if (index != 2 && index != 1) { + fontName = String.valueOf(index); + attrsSet++; + } + } + else { + // Font is set, but not the default + if (index == 2 || index == 1) { + fontName = "2"; + attrsSet--; + } + else { + fontName = String.valueOf(index); + } + } + i += 2; + break; + + + case FONT_SIZE_TAG: + int size = EndianConverter.readShort( + new byte[] { data[i + 1], data[i + 2] } ); + + if (size == 0) { + // Flags the end of the last paragraph + isLastParagraph = true; + i += 2; + break; + } + + // Standard size + if (fontSize == 0 || fontSize == 10) { + if (size != 10) { + fontSize = size; + attrsSet++; + } + } + else { + // Font size is set, but not to standard + if (size == 10) { + fontSize = 10; + attrsSet--; + } + else { + fontSize = size; + } + } + i += 2; + break; + + + case COLOUR_TAG: + if (data[i + 1] != 0) { + ColourConverter cc = new ColourConverter(); + textColour = cc.convertToRGB( + EndianConverter.readShort(new byte[] { data[i + 1], + data[i + 2] } )); + attrsSet++; + } + else { + textColour = null; + attrsSet--; + } + i += 2; + break; + + + case FONT_WEIGHT_TAG: + if (data[i + 1] == FONT_WEIGHT_BOLD + || data[i + 1] == FONT_WEIGHT_THICK) { + modifiers |= TextStyle.BOLD; + attrsSet++; + } + else { + // Its a bit field so subtracting should work okay. + modifiers ^= TextStyle.BOLD; + attrsSet--; + } + i += 2; + break; + + + case ITALIC_TAG: + if (data[i + 1] == (byte)0x01) { + modifiers |= TextStyle.ITALIC; + attrsSet++; + } + else { + modifiers ^= TextStyle.ITALIC; + attrsSet--; + } + i++; + break; + + + case UNDERLINE_TAG: + if (data[i + 1] == (byte)0x01) { + modifiers |= TextStyle.UNDERLINE; + attrsSet++; + } + else { + modifiers ^= TextStyle.UNDERLINE; + attrsSet--; + } + i++; + break; + + + case STRIKETHROUGH_TAG: + if (data[i + 1] == (byte)0x01) { + modifiers |= TextStyle.STRIKETHRU; + attrsSet++; + } + else { + modifiers ^= TextStyle.STRIKETHRU; + attrsSet--; + } + i++; + break; + + case HIGHLIGHT_TAG: + /* + * Highlighting is treated by OpenOffice as a + * background colour. + */ + if (data[i + 1] == (byte)0x01) { + backColour = Color.yellow; + attrsSet++; + } + else { + backColour = null; + attrsSet--; + } + i++; + break; + } + + inSequence = true; + continue; + } + + if (inSequence) { + // Style information has been changed. Create new style here + + inSequence = false; + if (attrsSet > 0) { + ts = new TextStyle(null, TEXT_STYLE_FAMILY, DEFAULT_STYLE, + mask, modifiers, fontSize, fontName, null); + ts.setColors(textColour, backColour); + } + else { + ts = null; + } + } + + /* + * C4 xx seems to indicate a control code. C4 00 indicates the end + * of a paragraph; C4 04 indicates a tab space. Only these two + * have been seen so far. + */ + if (data[i] == (byte)0xC4) { + /* + * Redundant nodes are sometimes added to the last paragraph + * because a new sequence is being processed when the flag is + * set. + * + * To avoid this, do nothing with the last paragraph unless no + * text has been added for it already. In that case, add the + * empty text segment being process to ensure that all + * paragraphs have at least one text segment. + */ + if (data[i + 1] == (byte)0x00) { + if (isLastParagraph && textSegments.size() > 0) { + return; + } + addTextSegment(sb.toString(), ts); + return; + } + sb.append("\t"); + sawText = true; + i++; + continue; + } + + sb.append((char)data[i]); + sawText = true; + s = sb.toString(); + } + } + + + /** + * <p>Adds details of a new text block to the <code>Paragraph</code> object. + * </p> + * + * @param text The text of the new block. + * @param style Text style object describing the formatting attached + * to this block of text. + */ + public void addTextSegment(String text, TextStyle style) { + textLength += text.length(); + textSegments.add(new ParagraphTextSegment(text, style)); + } + + + /** + * <p>This method alters the state of the <code>Paragraph</code> object to + * indicate whether or not it is the final paragraph in the document.</p> + * + * <p>It is used during conversion from SXW format to Pocket Word format. + * In Pocket Word files, the last paragraph finishes with a different byte + * sequence to other paragraphs.</p> + * + * @param isLast true if the Paragraph is the last in the document, + * false otherwise. + */ + public void setLastParagraph(boolean isLast) { + isLastParagraph = isLast; + } + + + /** + * <p>Complementary method to {@link #setLastParagraph(boolean) + * setLastParagraph}. Returns the terminal status of this + * <code>Paragraph</code> within the Pocket Word document.</p> + * + * @return true if the Paragraph is the last in the document; false otherwise. + */ + public boolean getLastParagraph () { + return isLastParagraph; + } + + + /** + * <p>This method returns the Pocket Word representation of this + * <code>Paragraph</code> in Little Endian byte order.</p> + * + * <p>Used when converting from SXW format to Pocket Word format.</p> + * + * @return <code>byte</code> array containing the formatted representation + * of this Paragraph. + */ + public byte[] getParagraphData() { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + + postProcessText(); + + /* + * Need information about the paragraph segments in two places + * so calculate them first. + * + * The stream contains the text wrapped in any formatting sequences that + * are necessary. + */ + ByteArrayOutputStream segs = new ByteArrayOutputStream(); + + try { + for (int i = 0; i < textSegments.size(); i++) { + ParagraphTextSegment pts = (ParagraphTextSegment)textSegments.elementAt(i); + segs.write(pts.getData()); + } + } + catch (IOException ioe) { + // Should never happen in a memory based stream + } + + /* + * Number of data words for this paragraph descriptor: + * + * 26 is the number of bytes prior to the start of the segment. + * 3 comes from the C4 00 00 termintating sequence. + */ + dataWords = (short)(26 + segs.size() + 3 + 4); + if (isLastParagraph) { + dataWords += 6; + } + if (dataWords % 4 != 0) { + dataWords += (4 - (dataWords % 4)); + } + dataWords /= 4; + + /* + * The 8 bytes are made up of E6 ?0 00 and E5 ?0 00 at the start of the + * text along with the C4 00 that terminates it. + * + * In the event that the paragraph is the last one E6 00 00 is also + * present at the end of the text. Also, as we currently use a font + * other than the first in the index (Tahoma) E5 01 00 is also present. + * + * Make sure this is accurate when font specifications change + */ + lengthWithFormatting = (short)(segs.size() + (isLastParagraph ? 14 : 8)); + + try { + bos.write(EndianConverter.writeShort(unknown1)); + bos.write(EndianConverter.writeShort(dataWords)); + bos.write(EndianConverter.writeShort((short)(textLength + 1))); + bos.write(EndianConverter.writeShort(lengthWithFormatting)); + bos.write(EndianConverter.writeShort(lines)); + + bos.write(EndianConverter.writeShort(marker)); + bos.write(EndianConverter.writeInt(unknown2)); + + bos.write(EndianConverter.writeShort(specialIndentation)); + bos.write(EndianConverter.writeShort(leftIndentation)); + bos.write(EndianConverter.writeShort(rightIndentation)); + + bos.write(bullets); + + if (pStyle != null && pStyle.isAttributeSet(ParaStyle.TEXT_ALIGN)) { + switch (pStyle.getAttribute(ParaStyle.TEXT_ALIGN)) { + + case ParaStyle.ALIGN_RIGHT: + bos.write(0x01); + break; + + case ParaStyle.ALIGN_CENTER: + bos.write(0x02); + break; + + default: + bos.write(0x00); // Left align in all other circumstances + break; + } + } + else { + bos.write(0x00); + } + + bos.write(EndianConverter.writeInt(unknown3)); + + + /* + * Write out font and size. + * + * If font support is added then this should change as the information + * will have to be calculated from a Font table. + */ + bos.write(FONT_TAG); + bos.write(EndianConverter.writeShort(defaultFont)); + bos.write(FONT_SIZE_TAG); + bos.write(EndianConverter.writeShort(defaultSize)); + + // Write out the text segments + bos.write(segs.toByteArray()); + + /* + * If this is the last paragraph in the document then we need to make + * sure that the paragraph text is terminated correctly with an E6 00 00 + * before the C4 00 00. + */ + if (isLastParagraph) { + if (defaultFont != 1) { + // Must always go back to the first font. + bos.write(FONT_TAG); + bos.write(EndianConverter.writeShort((short)0x01)); + } + bos.write(FONT_SIZE_TAG); + bos.write(EndianConverter.writeShort((short)0x00)); + } + + bos.write(new byte[] { (byte)0xC4, 0x00, 0x00 } ); + + int padding = 0; + if (bos.size() % 4 != 0) { + padding = 4 - (bos.size() % 4); + } + for (int i = 0; i < padding; i++) { + bos.write(0x00); + } + + // Third byte should match first byte after 0xFF 0xFF + bos.write(new byte[] { 0x42, 0x00, 0x22, 0x00} ); + + /* + * Meaning of last two bytes seems to be the number of words describing + * lines. This is calculated at 10 bytes per descriptor. + * + * May have two extra padding bytes that need to be accounted for too + * The division below may lose 2 bytes (integer result). + */ + int wordsRemaining = (lineDescriptors.size() * 10) / 4; + if ((lineDescriptors.size() * 10) % 4 != 0) { + wordsRemaining++; + } + bos.write(EndianConverter.writeShort((short)wordsRemaining)); + + + // Now write out the line descriptors + for (int i = 0; i < lineDescriptors.size(); i++) { + LineDescriptor ld = (LineDescriptor)lineDescriptors.elementAt(i); + + bos.write(ld.getDescriptorInfo()); + } + + + if (!isLastParagraph) { + /* + * There may be a need to pad this. Will be writing at + * either start of 4 byte block or 2 bytes into it. + */ + if (bos.size() % 4 != 2) { + bos.write(EndianConverter.writeShort((short)0)); + } + bos.write(EndianConverter.writeShort((short)0x41)); + } + } + catch (IOException ioe) { + // Should never occur for a memory based stream + } + + return bos.toByteArray(); + } + + + /* + * This method handles the calculation of correct values for line lengths + * in each individual descriptor and the number of lines in the document. + * + * TODO: Update to take account of different font metrics. + */ + private void postProcessText() { + /* + * The post-processing ... + * + * For each line, we need to add a line descriptor and increment + * the number of lines in the paragraph data structure. + * + * To do this, make sure that no sequence goes over the given screen + * width unless the last char is a whitespace character. + */ + + // In courier, can have no more than 29 chars per line + + int chunkStart = 0; + StringBuffer sb = new StringBuffer(""); + + // Line Descriptor info should be eliminated each time + lineDescriptors = new Vector(1, 1); + lines = 0; + + for (int i = 0; i < textSegments.size(); i++) { + ParagraphTextSegment pts = (ParagraphTextSegment)textSegments.elementAt(i); + sb.append(pts.getText()); + } + + if (sb.length() == 0) { + lines = 1; + lineDescriptors.add(new LineDescriptor((short)1, (short)0)); + return; + } + + while (chunkStart < sb.length()) { + String text = ""; + + try { + text = sb.substring(chunkStart, chunkStart + 30); + } + catch (StringIndexOutOfBoundsException sioobe) { + // We have less than one line left so just add it + text = sb.substring(chunkStart); + lineDescriptors.add(new LineDescriptor((short)(text.length() + 1), (short)(text.length() * 36))); + chunkStart += text.length(); + lines++; + continue; + } + + int lastWhitespace = -1; + + for (int i = 29; i >= 0; i--) { + if (Character.isWhitespace(text.charAt(i))) { + lastWhitespace = i; + break; + } + } + + if (lastWhitespace != -1) { + // The line can be split + lineDescriptors.add(new LineDescriptor((short)(lastWhitespace + 1), (short)(lastWhitespace * 36))); + chunkStart += lastWhitespace + 1; + lines++; + } + else { + // The line is completely occupied by a single word + lineDescriptors.add(new LineDescriptor((short)29, (short)(29 * 36))); + chunkStart += 29; + lines++; + } + } + } + + + /** + * <p>Returns the number of lines in the <code>Paragraph</code>.</p> + * + * @return The number of lines in the document. + */ + public short getLines() { + postProcessText(); + + return lines; + } + + + /** + * <p>Toggles the flag indicating that the <code>Paragraph</code> is a + * bulleted paragraph.</p> + * + * @param isBulleted true to enable bulleting for this paragraph, false + * otherwise. + */ + public void setBullets(boolean isBulleted) { + if (isBulleted) { + bullets = (byte)0xFF; + } + else { + bullets = 0; + } + } + + /** + * <p>Returns the bulleting status of the <code>Paragraph</code>.</p> + * + * @return true if the paragraph is bulleted, false otherwise. + */ + public boolean isBulleted() { + if (bullets != 0) { + return true; + } + return false; + } + + + /** + * <p>Returns the number of text characters in the <code>Paragraph</code>, + * excluding formatting.</p> + * + * @return The length of the paragraph. + */ + public int getTextLength () { + return textLength; + } + + + /** + * <p>Returns an <code>Enumeration</code> over the individual text segments + * of the <code>Paragraph</code>.</p> + * + * @return An <code>Enumeration</code> of the text segments. + */ + public Enumeration getSegmentsEnumerator () { + return textSegments.elements(); + } + + + /** + * <p>Returns a paragraph style object that describes any of the paragraph + * level formatting used by this <code>Paragraph</code>.</p> + * + * @return Paragraph style object describing the <code>Paragraph</code>. + */ + public ParaStyle makeStyle() { + int attrs[] = new int[] { ParaStyle.MARGIN_LEFT, ParaStyle.MARGIN_RIGHT, + ParaStyle.TEXT_ALIGN }; + String values[] = new String[attrs.length]; + + /* + * Not interested in left or right indents just yet. Don't know + * how to calculate them. + */ + + switch (alignment) { + case 2: + values[2] = "center"; + break; + + case 1: + values[2] = "right"; + break; + + case 0: + default: + values[2] = "left"; + return null; // Not interested if its the default. + } + + return new ParaStyle(null, PARAGRAPH_STYLE_FAMILY, null, attrs, + values, null); + } + + + /* + * Class describing the data structures which appear following the text + * of a Paragraph. For each line on screen that the Paragraph uses, a + * LineDescriptor details how many characters are on the line and how much + * screen space they occupy. + * + * The screen space and character breaks are calculated during post-processing + * of the paragraph. See postProcessText(). + * + * The unit of measurement used for screen space is currently unknown. + */ + private class LineDescriptor { + private short characters = 0; + private int filler = 0; + private short screen_space = 0; + private short marker = 0; + + private LineDescriptor(short chars, short space) { + characters = chars; + screen_space = space; + marker = (short)0x040C; // Not a constant. Depends on font used. + } + + + private byte[] getDescriptorInfo(){ + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + + try { + bos.write(EndianConverter.writeShort(characters)); + bos.write(EndianConverter.writeInt(filler)); + bos.write(EndianConverter.writeShort(screen_space)); + bos.write(EndianConverter.writeShort(marker)); + } + catch (IOException ioe) { + // Should never happen in a memory based stream. + } + + return bos.toByteArray(); + } + } +} diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/ParagraphTextSegment.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/ParagraphTextSegment.java new file mode 100644 index 000000000000..442cbf2f53dc --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/ParagraphTextSegment.java @@ -0,0 +1,205 @@ +/************************************************************************ + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + + +package org.openoffice.xmerge.converter.xml.sxw.pocketword; + +import org.openoffice.xmerge.converter.xml.TextStyle; + +import org.openoffice.xmerge.util.EndianConverter; + +import org.openoffice.xmerge.util.ColourConverter; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; + +import java.awt.Color; + +/** + * This class represents a portion of text with a particular formatting style. + * The style may differ from the default style of the paragraph of which it + * is part. + * + * @author Mark Murnane + * @version 1.1 + */ +class ParagraphTextSegment implements PocketWordConstants { + + private String pText; + private TextStyle pStyle; + + + /** + * <p>Initialise a new <code>ParagraphTextSegment</p>. + * <p>Both parameters may be <code>null</code>.</p> + * + * @param data The text of this segment. + * @param style The style describing this segment. + */ + public ParagraphTextSegment (String data, TextStyle style) { + pText = data; + pStyle = style; + } + + /** + * <p>Sets the text for this segment.</p> + * + * @param data The text of this segment. + */ + public void setText (String data) { + pText = data; + } + + /** + * <p>Gets the text for this segment.</p> + * + * @return The text of this segment. + */ + public String getText () { + return pText; + } + + + /** + * <p>Sets the style for this segment.</p> + * + * @param data The style describing this segment. + */ + public void setStyle (TextStyle style) { + pStyle = style; + } + + + /** + * <p>Gets the style for this segment.</p> + * + * @return The style describing this segment. + */ + public TextStyle getStyle () { + return pStyle; + } + + + /** + * <p>Returns the string data for this text segment wrapped with the + * appropriate byte codes for the formatting settings used.</p> + * + * @return <code>byte</code> array containing formatted text in Pocket Word + * format. + */ + public byte[] getData () { + ByteArrayOutputStream data = new ByteArrayOutputStream(); + + boolean colourSet = false; + boolean boldSet = false; + boolean italicSet = false; + boolean underlineSet = false; + boolean strikeSet = false; + boolean highlightSet = false; + + // TODO: Font changes need to be worked out here + + try { + if (pStyle != null) { + if (pStyle.getFontColor() != null) { + ColourConverter cc = new ColourConverter(); + short colourCode = cc.convertFromRGB(pStyle.getFontColor()); + if (colourCode != 0) { // not black + data.write(COLOUR_TAG); + data.write(EndianConverter.writeShort(colourCode)); + colourSet = true; + } + } + if (pStyle.isSet(TextStyle.BOLD) && pStyle.getAttribute(TextStyle.BOLD)) { + data.write(new byte[] { FONT_WEIGHT_TAG, FONT_WEIGHT_BOLD, 0x00 } ); + boldSet = true; + } + if (pStyle.isSet(TextStyle.ITALIC) && pStyle.getAttribute(TextStyle.ITALIC)) { + data.write(new byte[] { ITALIC_TAG, 0x01 } ); + italicSet = true; + } + if (pStyle.isSet(TextStyle.UNDERLINE) && pStyle.getAttribute(TextStyle.UNDERLINE)) { + data.write(new byte[] { UNDERLINE_TAG, 0x01 } ); + underlineSet = true; + } + if (pStyle.isSet(TextStyle.STRIKETHRU) && pStyle.getAttribute(TextStyle.STRIKETHRU)) { + data.write(new byte[] { STRIKETHROUGH_TAG, 0x01 } ); + strikeSet = true; + } + if (pStyle.getBackgroundColor() != null) { + data.write(new byte[] { HIGHLIGHT_TAG, 0x01 } ); + highlightSet = true; + } + } + + + // Now write out the data + if (!pText.equals("\t")) { + data.write(pText.getBytes()); + } + else { + /* + * Tabs are a special case. They are represented by Pocket Word + * as the LE sequence 0xC4 0x04. + */ + data.write(new byte[] { (byte)0xC4, 0x04 } ); + } + + + // Now close out any of the settings changes + if (colourSet) { + /* + * Colours may change without changing back to black, but + * without knowing what the previous colour was, the only + * way to ensure correct conversion is to restore to black and + * let the next segment change the colour again. + */ + data.write(new byte[] { COLOUR_TAG, 0x00, 0x00 } ); + } + if (boldSet) { + data.write(new byte[] { FONT_WEIGHT_TAG, FONT_WEIGHT_NORMAL, 0x00 } ); + } + if (italicSet) { + data.write(new byte[] { ITALIC_TAG, 0x00 } ); + } + if (underlineSet) { + data.write(new byte[] { UNDERLINE_TAG, 0x00 } ); + } + if (strikeSet) { + data.write(new byte[] { STRIKETHROUGH_TAG, 0x00 } ); + } + if (highlightSet) { + data.write(new byte[] { HIGHLIGHT_TAG, 0x00 } ); + } + } + catch (IOException ioe) { + // Should never occur in a memory based stream + } + + return data.toByteArray(); + } +} diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/PluginFactoryImpl.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/PluginFactoryImpl.java new file mode 100644 index 000000000000..d02c9eeddf53 --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/PluginFactoryImpl.java @@ -0,0 +1,165 @@ +/************************************************************************ + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + + +package org.openoffice.xmerge.converter.xml.sxw.pocketword; + + +import java.io.InputStream; +import java.io.IOException; + +import org.openoffice.xmerge.ConvertData; +import org.openoffice.xmerge.Document; +import org.openoffice.xmerge.DocumentDeserializer; +import org.openoffice.xmerge.DocumentSerializer; +import org.openoffice.xmerge.DocumentDeserializerFactory; +import org.openoffice.xmerge.DocumentSerializerFactory; +import org.openoffice.xmerge.DocumentMerger; +import org.openoffice.xmerge.DocumentMergerFactory; +import org.openoffice.xmerge.ConverterCapabilities; + +import org.openoffice.xmerge.util.registry.ConverterInfo; + +import org.openoffice.xmerge.converter.xml.sxw.SxwPluginFactory; + + +/** + * Factory class used to create converters to/from the Pocket Word format. + * + * @author Mark Murnane + * @version 1.1 + */ +public final class PluginFactoryImpl extends SxwPluginFactory + implements DocumentDeserializerFactory, DocumentSerializerFactory, + DocumentMergerFactory{ + + /** + * <p>Constructor that caches the <code>ConvertInfo</code> that + * corresponds to the registry information for this plug-in.</p> + * + * @param ci <code>ConvertInfo</code> object. + */ + public PluginFactoryImpl (ConverterInfo ci) { + super(ci); + } + + /** ConverterCapabilities object for this type of conversion. */ + private final static ConverterCapabilities converterCap = + new ConverterCapabilitiesImpl(); + + + /** + * <p>The <code>DocumentSerializer</code> is used to convert + * from the OpenOffice Writer <code>Document</code> format + * to the Pocket Word <code>Document</code> format.</p> + * + * <p>The <code>ConvertData</code> object is passed along to the + * created <code>DocumentSerializer</code> via its constructor. + * The <code>ConvertData</code> is read and converted when the + * the <code>DocumentSerializer</code> object's + * <code>serialize</code> method is called.</p> + * + * @param doc <code>Document</code> object that the created + * <code>DocumentSerializer</code> object uses + * as input. + * + * @return A <code>DocumentSerializer</code> object. + */ + public DocumentSerializer createDocumentSerializer(Document doc) { + return new DocumentSerializerImpl(doc); + } + + + /** + * The <code>DocumentDeserializer</code> is used to convert + * from the Pocket Word <code>Document</code> format to + * the OpenOffice Writer <code>Document</code> format.</p> + * + * The <code>ConvertData</code> object is passed along to the + * created <code>DocumentDeserializer</code> via its constructor. + * The <code>ConvertData</code> is read and converted when the + * the <code>DocumentDeserializer</code> object's + * <code>deserialize</code> method is called. + * </p> + * + * @param cd <code>ConvertData</code> object that the created + * <code>DocumentDeserializer</code> object uses as + * input. + * + * @return A <code>DocumentDeserializer</code> object. + */ + public DocumentDeserializer createDocumentDeserializer(ConvertData cd) { + return new DocumentDeserializerImpl(cd); + } + + + /** + * <p>Create a <code>Document</code> object that corresponds to + * the Pocket Word data passed in via the <code>InputStream</code> + * object. + * + * <p>This method will read from the given <code>InputStream</code> + * object. The returned <code>Document</code> object will contain + * the necessary data for the other objects created by the + * <code>PluginFactoryImpl</code> to process, like the + * <code>DocumentSerializerImpl</code> object and a + * <code>DocumentMerger</code> object.</p> + * + * @param name The <code>Document</code> name. + * @param is <code>InputStream</code> object corresponding + * to the <code>Document</code>. + * + * @return A <code>Document</code> object representing the + * Pocket Word format. + * + * @throws IOException If any I/O error occurs. + */ + + public Document createDeviceDocument(String name, InputStream is) + throws IOException { + PocketWordDocument pwd = new PocketWordDocument(name); + pwd.read(is); + return pwd; + } + + /** + * Returns an instance of <code>DocumentMergerImpl</code>, + * which is an implementation of the <code>DocumentMerger</code> + * interface. + * + * @param doc <code>Document</code> to merge. + * + * @return A DocumentMergerImpl object. + */ + public DocumentMerger createDocumentMerger(Document doc) { + ConverterCapabilities cc = converterCap; + DocumentMergerImpl merger = new DocumentMergerImpl(doc, cc); + return merger; + + } + +} diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/PocketWordConstants.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/PocketWordConstants.java new file mode 100644 index 000000000000..de67eeec31a7 --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/PocketWordConstants.java @@ -0,0 +1,95 @@ +/************************************************************************ + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + + +package org.openoffice.xmerge.converter.xml.sxw.pocketword; + + +/** + * Interface defining constants for Pocket Word attributes. + * + * @author Mark Murnane + * @version 1.1 + */ +public interface PocketWordConstants { + /** File extension for Pocket Word files. */ + public static final String FILE_EXTENSION = ".psw"; + + /** Name of the default style. */ + public static final String DEFAULT_STYLE = "Standard"; + + /** Family name for Paragraph styles. */ + public static final String PARAGRAPH_STYLE_FAMILY = "paragraph"; + + /** Family name for Text styles. */ + public static final String TEXT_STYLE_FAMILY = "text"; + + + /** + * Generic Pocket Word formatting code. + * + * Formatting codes are 0xEz, where z indicates the specific format code. + */ + public static final byte FORMATTING_TAG = (byte)0xE0; + + /** Font specification tag. The two bytes following inidicate which font. */ + public static final byte FONT_TAG = (byte)0xE5; + + /** Font size tag. The two bytes following specify font size in points. */ + public static final byte FONT_SIZE_TAG = (byte)0xE6; + + /** Colour tag. Two bytes following index a 4-bit colour table. */ + public static final byte COLOUR_TAG = (byte)0xE7; + + /** Font weight tag. Two bytes following indicate weighting of font. */ + public static final byte FONT_WEIGHT_TAG = (byte)0xE8; + + /** Normal font weight value. */ + public static final byte FONT_WEIGHT_NORMAL = (byte)0x04; + + /** Fine font weight value. */ + public static final byte FONT_WEIGHT_FINE = (byte)0x01; + + /** Bold font weight value. */ + public static final byte FONT_WEIGHT_BOLD = (byte)0x07; + + /** Thick font weight value. */ + public static final byte FONT_WEIGHT_THICK = (byte)0x09; + + /** Italic tag. Single byte following indicates whether italic is on. */ + public static final byte ITALIC_TAG = (byte)0xE9; + + /** Underline tag. Single byte following indicates whether underline is on. */ + public static final byte UNDERLINE_TAG = (byte)0xEA; + + /** Strikethrough tag. Single byte following indicates whether strikethrough is on. */ + public static final byte STRIKETHROUGH_TAG = (byte)0XEB; + + /** Highlighting tag. Single byte following indicates whether highlighting is on. */ + public static final byte HIGHLIGHT_TAG = (byte)0xEC; + +} diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/PocketWordDocument.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/PocketWordDocument.java new file mode 100644 index 000000000000..9026d8866113 --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/PocketWordDocument.java @@ -0,0 +1,408 @@ +/************************************************************************ + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + + +package org.openoffice.xmerge.converter.xml.sxw.pocketword; + +import org.openoffice.xmerge.Document; +import org.openoffice.xmerge.converter.xml.ParaStyle; +import org.openoffice.xmerge.converter.xml.TextStyle; + +import java.awt.Font; + +import java.io.InputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; + +import java.util.Enumeration; +import java.util.Vector; + + +/** + * <p>Class representing a Pocket Word Document.</p> + * + * <p><code>PocketWordDocument</code> is used to create new Pocket Word documents + * and to read existing data to allow for conversion to OpenOffice Writer + * format.</p> + * + * @author Mark Murnane + * @version 1.1 + */ +public class PocketWordDocument implements Document, PocketWordConstants { + private String docName; + + private byte[] preamble; + private Vector fonts; + private DocumentDescriptor descriptor; + private Vector paragraphs; + + private ParaStyle pStyle; + private Paragraph currentPara; + + /* + * The trailer currently appears to be constant, but if its found to + * have a variable component, then this initialisation should be moved + * to an initTrailer() method. + * + * Padding is sometimes needed before the trailer to ensure the file + * ends on a 4-byte boundary, but this is handled in write(). + */ + private static final byte[] trailer = new byte[] { (byte)0x82, 0x00, + 0x09, 0x00, + 0x03, 0x00, + (byte)0x82, 0x00, + 0x00, 0x00, + 0x00, 0x00, + 0x00, 0x00, + 0x00, 0x00, + 0x00, 0x00 }; + + + /** + * <p>Constructs a new Pocket Word Document.</p> + * + * <p>This new document does notcontain any information. Document data must + * either be added using appropriate methods, or an existing file can be + * {@link #read(InputStream) read} from an <code>InputStream</code>.</p> + * + * @param name The name of the <code>PocketWordDocument</code>. + */ + public PocketWordDocument(String name) { + + docName = trimDocumentName(name); + + preamble = new byte[52]; + fonts = new Vector(0, 1); + descriptor = new DocumentDescriptor(); + paragraphs = new Vector(0, 1); + } + + + /** + * <p>This method reads <code>byte</code> data from the InputStream and + * extracts font and paragraph data from the file.</p> + * + * @param is InputStream containing a Pocket Word data file. + * + * @throws IOException In case of any I/O errors. + */ + public void read(InputStream docData) throws IOException { + + if (docData == null) { + throw new IOException ("No input stream to convert"); + } + + // The preamble may become important for font declarations. + int readValue = docData.read(preamble); + // #i33702# check for an empty InputStream. + if(readValue == -1) { + System.err.println("Error:invalid input stream"); + return; + } + + byte[] font = new byte[80]; + int numfonts = 0; + do { + docData.read(font); + + String name = new String(font, 0, 64, "UTF-16LE"); + fonts.add(name.trim()); + + } while (!(font[76] == 5 && font[77] == 0 + && font[78] == 1 && font[79] == 0)); + + /* + * TODO: The document descriptor data that follows the fonts ends with + * a variable section containing data for each of the paragraphs. + * It may be possible to use this information to calculate staring + * positions for each paragraph rather than iterating through the + * entire byte stream. + */ + + int value; + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + while ((value = docData.read()) != -1) { + bos.write(value); + } + + + byte[] contentData = bos.toByteArray(); + int start = 0, end = 0; + boolean sawMarker = false; + + for (int i = 0; i < contentData.length; i += 4) { + if (contentData[i + 2] == (byte)0xFF + && contentData[i + 3] == (byte)0xFF && !sawMarker) { + start = i - 8; + sawMarker = true; + continue; + } + + if (contentData[i + 2] == (byte)0xFF + && contentData[i + 3] == (byte)0xFF && sawMarker) { + end = i - 8; + ByteArrayOutputStream paragraph = new ByteArrayOutputStream(); + paragraph.write(contentData, start, end - start); + paragraphs.add(new Paragraph(paragraph.toByteArray())); + + // Reset the markers + sawMarker = false; + i -= 4; // Skip back + } + + } + + /* + * Special case, the last paragraph + * If we got here, and the marker is set then we saw the start of the + * last paragraph, but no following paragraph + */ + ByteArrayOutputStream paragraph = new ByteArrayOutputStream(); + if (contentData[contentData.length - 19] == 0) { + paragraph.write(contentData, start, contentData.length - start - 20); + } + else { + paragraph.write(contentData, start, contentData.length - start - 18); + } + paragraphs.add(new Paragraph(paragraph.toByteArray())); + } + + + /* + * Utility method to make sure the document name is stripped of any file + * extensions before use. + */ + private String trimDocumentName(String name) { + String temp = name.toLowerCase(); + + if (temp.endsWith(FILE_EXTENSION)) { + // strip the extension + int nlen = name.length(); + int endIndex = nlen - FILE_EXTENSION.length(); + name = name.substring(0,endIndex); + } + + return name; + } + + + /** + * <p>Method to provide access to all of the <code>Paragraph</code> objects + * in the <code>Document</code>.</p> + * + * @return <code>Enumeration</code> over the paragraphs in the document. + */ + public Enumeration getParagraphEnumeration() { + return paragraphs.elements(); + } + + + /** + * <p>Returns the <code>Document</code> name with no file extension.</p> + * + * @return The <code>Document</code> name with no file extension. + */ + public String getName() { + return docName; + } + + + /** + * <p>Returns the <code>Document</code> name with file extension.</p> + * + * @return The <code>Document</code> name with file extension. + */ + public String getFileName() { + return new String(docName + FILE_EXTENSION); + } + + + /** + * <p>Writes out the <code>Document</code> content to the specified + * <code>OutputStream</code>.</p> + * + * <p>This method may not be thread-safe. + * Implementations may or may not synchronize this + * method. User code (i.e. caller) must make sure that + * calls to this method are thread-safe.</p> + * + * @param os <code>OutputStream</code> to write out the + * <code>Document</code> content. + * + * @throws IOException If any I/O error occurs. + */ + public void write(OutputStream os) throws IOException { + DataOutputStream dos = new DataOutputStream(os); + + initPreamble(); + dos.write(preamble); + + loadFonts(); + for (int i = 0; i < fonts.size(); i++ ) { + ByteArrayOutputStream fontData = (ByteArrayOutputStream)fonts.elementAt(i); + dos.write(fontData.toByteArray()); + } + + + for (int i = 0; i < paragraphs.size(); i++) { + Paragraph para = (Paragraph)paragraphs.elementAt(i); + descriptor.addParagraph((short)para.getTextLength(), para.getLines()); + } + dos.write(descriptor.getDescriptor()); + + for (int i = 0; i < paragraphs.size(); i++ ) { + Paragraph para = (Paragraph)paragraphs.elementAt(i); + + // Last paragraph has some extra data + if (i + 1 == paragraphs.size()) { + para.setLastParagraph(true); + } + dos.write(para.getParagraphData()); + } + + + /* + * Before we write out the trailer, we need to make sure that it will + * lead to the file ending on a 4 byte boundary. + */ + if (dos.size() % 4 == 0) { + dos.write((byte)0x00); + dos.write((byte)0x00); + } + + dos.write(trailer); + + dos.flush(); + dos.close(); + } + + + /** + * <p>This method adds a new paragraph element to the document. No string + * data is added to the paragraph.</p> + * + * <p><b>N.B.</b> The newly added paragraph becomes the current paragraph and + * is used as the target for all subsequent calls to addParagraphData().</p> + * + * @param style Paragraph Style object describing the formatting for + * the new paragraph. Can be null. + * @param listElement true if this paragraph is to be bulleted; + * false otherwise. + */ + public void addParagraph(ParaStyle style, boolean listElement) { + /* For the moment, only support basic text entry in a single paragraph */ + Paragraph para = new Paragraph(style); + + paragraphs.add(para); + + pStyle = style; + currentPara = para; + + if (listElement) { + para.setBullets(true); + } + } + + + /** + * <p>This method adds text to the current paragraph.</p> + * + * <p>If no paragraphs exist within the document, it creates one.</p> + * + * @param data The string data for this segment. + * @param style Text Style object describing the formatting of this + * segment. Can be null. + */ + public void addParagraphData(String data, TextStyle style) { + if (currentPara == null) { + addParagraph(null, false); + } + currentPara.addTextSegment(data, style); + } + + + /* + * Preamble is the portion before font specification which never + * seems to change from one file, or one saved version, to the next. + * + * Bytes 18h and 19h seem to contain the number of fonts and should + * be modified when all of the fonts have been specified. + * These bytes are the first two on the fourth line below. + */ + private void initPreamble() { + preamble = new byte[] { 0x7B, 0x5C, 0x70, 0x77, 0x69, 0x15, 0x00, 0x00, + 0x01, 0x01, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x2C, 0x00, 0x01, 0x00, 0x0A, 0x00, // Bytes 3-4 Font?? + 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Bytes 1-2 # Fonts + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00 }; + } + + + /* + * This method writes the minimum font data that is used by the converter. + * Currently, all documents convert to 10 point Courier New. Tahoma is + * always mentioned in Pocket Word files, however, even if it is not used. + * + * TODO: Rewrite to allow for multiple fonts once font support issues + * have been resolved. + */ + private void loadFonts() { + ByteArrayOutputStream fontData = new ByteArrayOutputStream(); + + try { + fontData.write(new String("Tahoma").getBytes("UTF-16LE")); + fontData.write(new byte[52]); // Rest of font name? + fontData.write(new byte[] { 0x02, 0x00, 0x01, 0x00 } ); + fontData.write(new byte[] { 0x00, 0x00, 0x01, 0x00 } ); + fontData.write(new byte[] { 0x00, 0x00, 0x00, 0x00 } ); + fontData.write(new byte[] { 0x00, 0x00, 0x00, 0x00 } ); + + fonts.add(fontData); + + fontData = new ByteArrayOutputStream(); + + fontData.write(new String("Courier New").getBytes("UTF-16LE")); + fontData.write(new byte[42]); + fontData.write(new byte[] { 0x14, 0x00, 0x04, 0x00 } ); + fontData.write(new byte[] { 0x01, 0x00, 0x00, 0x00 } ); + fontData.write(new byte[] { 0x00, 0x00, 0x15, 0x00 } ); + + // Next part indicates that this is the last font + fontData.write(new byte[] { 0x05, 0x00, 0x01, 0x00 } ); + + fonts.add(fontData); + } + catch (IOException ioe) { + // Shouldn't happen as this is a memory based stream + } + } +} diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/build.xml b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/build.xml new file mode 100644 index 000000000000..9253aa503359 --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/build.xml @@ -0,0 +1,136 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + + DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + + Copyright 2000, 2010 Oracle and/or its affiliates. + + OpenOffice.org - a multi-platform office productivity suite + + This file is part of OpenOffice.org. + + OpenOffice.org is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License version 3 + only, as published by the Free Software Foundation. + + OpenOffice.org is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License version 3 for more details + (a copy is included in the LICENSE file that accompanied this code). + + You should have received a copy of the GNU Lesser General Public License + version 3 along with OpenOffice.org. If not, see + <http://www.openoffice.org/license.html> + for a copy of the LGPLv3 License. + +--> +<project name="xmrg_jooxcxs_pocketword" default="main" basedir="."> + + <!-- ================================================================= --> + <!-- settings --> + <!-- ================================================================= --> + + <!-- project prefix, used for targets and build.lst --> + <property name="prj.prefix" value="xmrg"/> + + <!-- name of this sub target used in recursive builds --> + <property name="target" value="xmrg_jooxcxs_pocketword"/> + + <!-- relative path to project directory --> + <property name="prj" value="../../../../../../../.."/> + + <!-- start of java source code package structure --> + <property name="java.dir" value="${prj}/java"/> + + <!-- path component for current java package --> + <property name="package" + value="org/openoffice/xmerge/converter/xml/sxw/pocketword"/> + + <!-- define how to handle CLASSPATH environment --> + <property name="build.sysclasspath" value="ignore"/> + + <!-- classpath settings for javac tasks --> + <path id="classpath"> + <pathelement location="${build.class}"/> + <pathelement location="${solar.jar}/parser.jar"/> + <pathelement location="${solar.jar}/jaxp.jar"/> + <pathelement location="${solar.jar}/xerces.jar"/> + </path> + + <!-- set wether we want to compile with or without deprecation --> + <property name="deprecation" value="on"/> + + <!-- ================================================================= --> + <!-- solar build environment targets --> + <!-- ================================================================= --> + + <target name="build_dir" unless="build.dir"> + <property name="build.dir" value="${out}"/> + </target> + + <target name="solar" depends="build_dir" if="solar.update"> + <property name="solar.properties" + value="${solar.bin}/solar.properties"/> + </target> + + <target name="init" depends="solar"> + <property name="build.compiler" value="classic"/> + <property file="${solar.properties}"/> + <property file="${build.dir}/class/solar.properties"/> + </target> + + <target name="info"> + <echo message="--------------------"/> + <echo message="${target}"/> + <echo message="--------------------"/> + </target> + + + <!-- ================================================================= --> + <!-- custom targets --> + <!-- ================================================================= --> + + <!-- the main target, called in recursive builds --> + <target name="main" depends="info,prepare,compile"/> + + <!-- prepare output directories --> + <target name="prepare" depends="init" if="build.class"> + <mkdir dir="${build.dir}"/> + <mkdir dir="${build.class}"/> + </target> + + <!-- compile java sources in ${package} --> + <target name="compile" depends="prepare" if="build.class"> + <javac srcdir="${java.dir}" + destdir="${build.class}" + debug="${debug}" + deprecation="${deprecation}" + optimize="${optimize}"> + <classpath refid="classpath"/> + <include name="${package}/DocumentDescriptor.java"/> + <include name="${package}/DocumentDeserializerImpl.java"/> + <include name="${package}/DocumentSerializerImpl.java"/> + <include name="${package}/Paragraph.java"/> + <include name="${package}/ParagraphTextSegment.java"/> + <include name="${package}/PluginFactoryImpl.java"/> + <include name="${package}/PocketWordConstants.java"/> + <include name="${package}/PocketWordDocument.java"/> + <include name="${package}/DocumentMergerImpl.java"/> + <include name="${package}/ConverterCapabilitiesImpl.java"/> + </javac> + </target> + + <!-- clean up --> + <target name="clean" depends="prepare"> + <delete includeEmptyDirs="true"> + <fileset dir="${build.class}"> + <patternset> + <include name="${package}/*.class"/> + </patternset> + </fileset> + </delete> + </target> + +</project> + diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/converter.xml b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/converter.xml new file mode 100644 index 000000000000..aaa0dcc32e0d --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/converter.xml @@ -0,0 +1,47 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + + DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + + Copyright 2000, 2010 Oracle and/or its affiliates. + + OpenOffice.org - a multi-platform office productivity suite + + This file is part of OpenOffice.org. + + OpenOffice.org is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License version 3 + only, as published by the Free Software Foundation. + + OpenOffice.org is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License version 3 for more details + (a copy is included in the LICENSE file that accompanied this code). + + You should have received a copy of the GNU Lesser General Public License + version 3 along with OpenOffice.org. If not, see + <http://www.openoffice.org/license.html> + for a copy of the LGPLv3 License. + +--> + + +<converters> + <converter type="staroffice/sxw" version="1.1"> + <converter-display-name> + Pocket Word + </converter-display-name> + <converter-description> + OpenOffice Writer XML to/from Pocket Word conversion. + </converter-description> + <converter-vendor> + OpenOffice.org + </converter-vendor> + <converter-class-impl> + org.openoffice.xmerge.converter.xml.sxw.pocketword.PluginFactoryImpl + </converter-class-impl> + <converter-target type="application/x-pocket-word"/> + </converter> +</converters> + diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/package.html b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/package.html new file mode 100644 index 000000000000..ecc7d6105d83 --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/package.html @@ -0,0 +1,56 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN"> +<!-- + + DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + + Copyright 2000, 2010 Oracle and/or its affiliates. + + OpenOffice.org - a multi-platform office productivity suite + + This file is part of OpenOffice.org. + + OpenOffice.org is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License version 3 + only, as published by the Free Software Foundation. + + OpenOffice.org is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License version 3 for more details + (a copy is included in the LICENSE file that accompanied this code). + + You should have received a copy of the GNU Lesser General Public License + version 3 along with OpenOffice.org. If not, see + <http://www.openoffice.org/license.html> + for a copy of the LGPLv3 License. + +--> +<html> +<head> +<title>org.openoffice.xmerge.converter.xml.sxw.pocketword package</title> +</head> + +<body bgcolor="white"> + +<p>Plugin for the conversion of documents between StarWriter XML and + Pocket Word format.</p> +<p>This plugin suports conversion of most features supported by Pocket Word.</p> +<ul> + <li>Bold, Italic, Underline</li> + <li>Strikethrough</li> + <li>Highlight</li> + <li>Colours</li> + <li>Lists</li> + <li>Alignments</li> +</ul> + +<p>Additionally, work on fonts is currently underway.</p> + +<p>This plugin is based on the Windows CE 3.0 version of Pocket Word.<br> + Testing was carried out using Pocket PC 2000 and Pocket PC 2002 devices.</p> + +<p>It follows the {@link org.openoffice.xmerge} framework +for the conversion process.</p> + +</body> +</html> diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/ConverterCapabilitiesImpl.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/ConverterCapabilitiesImpl.java new file mode 100644 index 000000000000..510f983e8248 --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/ConverterCapabilitiesImpl.java @@ -0,0 +1,93 @@ +/************************************************************************ + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.wordsmith; + +import org.openoffice.xmerge.ConverterCapabilities; +import org.openoffice.xmerge.converter.xml.OfficeConstants; + +/** + * <p>WordSmith implementation of <code>ConverterCapabilities</code> for + * the {@link + * org.openoffice.xmerge.converter.xml.sxw.wordsmith.PluginFactoryImpl + * PluginFactoryImpl}.</p> + * + * <p>Used with StarWriter XML to/from WordSmith conversions. + * The <code>ConverterCapibilies</code> specify which "Office" + * <code>Document</code> tags and attributes are supported on the + * "Device" <code>Document</code> format.</p> + */ +public final class ConverterCapabilitiesImpl + implements ConverterCapabilities { + + public boolean canConvertTag(String tag) { + + if (OfficeConstants.TAG_OFFICE_DOCUMENT.equals(tag)) + return true; + else if (OfficeConstants.TAG_OFFICE_DOCUMENT_CONTENT.equals(tag)) + return true; + else if (OfficeConstants.TAG_OFFICE_BODY.equals(tag)) + return true; + else if (OfficeConstants.TAG_PARAGRAPH.equals(tag)) + return true; + else if (OfficeConstants.TAG_HEADING.equals(tag)) + return true; + else if (OfficeConstants.TAG_ORDERED_LIST.equals(tag)) + return true; + else if (OfficeConstants.TAG_UNORDERED_LIST.equals(tag)) + return true; + else if (OfficeConstants.TAG_LIST_ITEM.equals(tag)) + return true; + else if (OfficeConstants.TAG_LIST_HEADER.equals(tag)) + return true; + else if (OfficeConstants.TAG_SPAN.equals(tag)) + return true; + else if (OfficeConstants.TAG_HYPERLINK.equals(tag)) + return true; + else if (OfficeConstants.TAG_LINE_BREAK.equals(tag)) + return true; + else if (OfficeConstants.TAG_SPACE.equals(tag)) + return true; + else if (OfficeConstants.TAG_TAB_STOP.equals(tag)) + return true; + + return false; + } + + public boolean canConvertAttribute(String tag, + String attribute) { + + if (OfficeConstants.TAG_SPACE.equals(tag)) { + + if (OfficeConstants.ATTRIBUTE_SPACE_COUNT.equals(attribute)) + return true; + } + + return false; + } +} + diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DOCConstants.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DOCConstants.java new file mode 100644 index 000000000000..546d3060eb9b --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DOCConstants.java @@ -0,0 +1,61 @@ +/************************************************************************ + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.wordsmith; + +/** + * Constants used for encoding and decoding the WordSmith format. + * + * @author Herbie Ong, David Proulx + */ +interface DOCConstants { + + /** Constant for uncompressed version. */ + public static final short UNCOMPRESSED = 1; + + /** Constant for compressed version. */ + public static final short COMPRESSED = 2; + + /** Constant used for spare fields. */ + public static final int SPARE = 0; + + /** WordSmith record size. */ + public static final short TEXT_RECORD_SIZE = 4096; + + /** Constant for encoding scheme. */ + public static final String ENCODING = "8859_1"; + + /** Constant for TAB character. */ + public final static char TAB_CHAR = '\t'; + + /** Constant for EOL character. */ + public final static char EOL_CHAR = '\n'; + + /** Constant for SPACE character. */ + public final static char SPACE_CHAR = ' '; +} + diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DocumentDeserializerImpl.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DocumentDeserializerImpl.java new file mode 100644 index 000000000000..ad90541afbbb --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DocumentDeserializerImpl.java @@ -0,0 +1,565 @@ +/************************************************************************ + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.wordsmith; + +import org.w3c.dom.*; + +import java.io.IOException; +import java.util.Enumeration; + +import org.openoffice.xmerge.Document; +import org.openoffice.xmerge.ConvertData; +import org.openoffice.xmerge.ConvertException; +import org.openoffice.xmerge.DocumentDeserializer; +import org.openoffice.xmerge.converter.xml.OfficeConstants; +import org.openoffice.xmerge.converter.palm.PalmDB; +import org.openoffice.xmerge.converter.palm.Record; +import org.openoffice.xmerge.converter.palm.PdbDecoder; +import org.openoffice.xmerge.converter.palm.PalmDocument; +import org.openoffice.xmerge.converter.xml.sxw.SxwDocument; + +import java.util.Vector; +import java.io.ByteArrayInputStream; + +import org.openoffice.xmerge.converter.xml.*; +import org.openoffice.xmerge.util.Debug; +import org.openoffice.xmerge.util.XmlUtil; + +/** + * <p>WordSmith implementation of + * org.openoffice.xmerge.DocumentDeserializer + * for the {@link + * org.openoffice.xmerge.converter.xml.sxw.wordsmith.PluginFactoryImpl + * PluginFactoryImpl}.</p> + * + * The <code>deserialize</code> method uses a + * <code>DocDecoder</code> to read the WordSmith format into a + * <code>String</code> object, then it calls <code>buildDocument</code> + * to create a <code>SxwDocument</code> object from it. + * + * @author Herbie Ong, David Proulx + */ +public final class DocumentDeserializerImpl +implements DOCConstants, OfficeConstants, DocumentDeserializer { + + /** A Decoder object for decoding WordSmith format. */ + private WSDecoder decoder = null; + + WseFontTable fontTable = null; + WseColorTable colorTable = null; + StyleCatalog styleCat = null; + StyleCatalog oldStyleCat = null; + + /** A <code>ConvertData</code> object assigned to this object. */ + private ConvertData cd = null; + + + /** + * Constructor that assigns the given <code>ConvertData</code> + * to the object. + * + * @param cd A <code>ConvertData</code> object to read data for + * the conversion process by the deserialize method. + */ + public DocumentDeserializerImpl(ConvertData cd) { + this.cd = cd; + } + + + /** + * Convert the given <code>ConvertData</code> into a + * <code>SxwDocument</code> object. + * + * @return Resulting <code>Document</code> object. + * + * @throws ConvertException If any conversion error occurs. + * @throws IOException If any I/O error occurs. + */ + public Document deserialize() throws ConvertException, + IOException { + return deserialize(null, cd); + } + + + public Document deserialize(Document origDoc, ConvertData cd) + throws IOException { + + Document doc = null; + PalmDocument palmDoc = null; + Enumeration e = cd.getDocumentEnumeration(); + + while(e.hasMoreElements()) { + palmDoc = (PalmDocument) e.nextElement(); + PalmDB pdb = palmDoc.getPdb(); + Record[] recs = pdb.getRecords(); + decoder = new WSDecoder(); + Wse[] b = decoder.parseDocument(recs); + String docName = palmDoc.getName(); + doc = buildDocument(docName, b, origDoc); + } + return doc; + } + + + /** + * Temporary method to read existing <code>StyleCatalog</code> + * as a starting point. + * + * @param parentDoc The parent <code>Document</code>. + */ + private void readStyleCatalog(Document parentDoc) { + Element rootNode = null; + try { + java.io.ByteArrayOutputStream bos = new java.io.ByteArrayOutputStream(); + parentDoc.write(bos); + SxwDocument sxwDoc = new SxwDocument("old"); + sxwDoc.read(new ByteArrayInputStream(bos.toByteArray())); + org.w3c.dom.Document domDoc = sxwDoc.getContentDOM(); + + String families[] = new String[3]; + families[0] = "text"; + families[1] = "paragraph"; + families[2] = "paragraph"; + Class classes[] = new Class[3]; + classes[0] = TextStyle.class; + classes[1] = ParaStyle.class; + classes[2] = TextStyle.class; + + NodeList nl = domDoc.getElementsByTagName(TAG_OFFICE_STYLES); + oldStyleCat.add(nl.item(0), families, classes, null, false); + nl = domDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES); + oldStyleCat.add(nl.item(0), families, classes, null, false); + nl = domDoc.getElementsByTagName(TAG_OFFICE_MASTER_STYLES); + oldStyleCat.add(nl.item(0), families, classes, null, false); + + } catch (Exception e) { + Debug.log(Debug.ERROR, "", e); + } + + } + + + /** + * Given an array of paragraph <code>Style</code> objects, see if + * there is exactly one which matches the text formatting + * <code>Style</code> of <code>tStyle</code>. + * + * @param paraStyles An array of paragraph <code>Style</code> + * objects. + * @param tStyle Text <code>Style</code> to match. + * + * @return The paragraph <code>Style</code> that matches. + */ + private ParaStyle matchParaByText(Style paraStyles[], TextStyle tStyle) { + int matchIndex = -1; + int matchCount = 0; + Style txtMatches[] = (Style[]) oldStyleCat.getMatching(tStyle); + if (txtMatches.length >= 1) { + for (int j = 0; j < txtMatches.length; j++) { + TextStyle t = (TextStyle)txtMatches[j]; + + if (!t.getFamily().equals("paragraph")) + continue; + + for (int k = 0; k < paraStyles.length; k++) { + if (t.getName().equals(paraStyles[k].getName())) { + matchCount++; + matchIndex = k; + } + } + } + } + if (matchCount == 1) + return (ParaStyle)paraStyles[matchIndex]; + else return null; + } + + + /** + * Take a <code>String</code> of text and turn it into a sequence + * of <code>Node</code> objects. + * + * @param text <code>String</code> of text. + * @param parentDoc Parent <code>Document</code>. + * + * @return Array of <code>Node</code> objects. + */ + private Node[] parseText(String text, org.w3c.dom.Document parentDoc) { + Vector nodeVec = new Vector(); + + // Break up the text from the WordSmith text run into Open + // Office text runs. There may be more runs in OO because + // runs of 2 or more spaces map to nodes. + while ((text.indexOf(" ") != -1) || (text.indexOf("\t") != 1)) { + + // Find the indices of tabs and multiple spaces, and + // figure out which of them occurs first in the string. + int spaceIndex = text.indexOf(" "); + int tabIndex = text.indexOf("\t"); + if ((spaceIndex == -1) && (tabIndex == -1)) + break; // DJP This should not be necessary. What is wrong + // with the while() stmt up above? + int closerIndex; // Index of the first of these + if (spaceIndex == -1) + closerIndex = tabIndex; + else if (tabIndex == -1) + closerIndex = spaceIndex; + else + closerIndex = (spaceIndex > tabIndex) ? tabIndex : spaceIndex; + + // If there is any text prior to the first occurrence of a + // tab or spaces, create a text node from it, then chop it + // off the string we're working with. + if (closerIndex > 0) { + String beginningText = text.substring(0, closerIndex); + Text textNode = parentDoc.createTextNode(beginningText); + nodeVec.addElement(textNode); + log("<TEXT>"); + log(beginningText); + log("</TEXT>"); + } + text = text.substring(closerIndex); + + // Handle either tab character or space sequence by creating + // an element for it, and then chopping out the text that + // represented it in "text". + if (closerIndex == tabIndex) { + Element tabNode = parentDoc.createElement(TAG_TAB_STOP); + nodeVec.add(tabNode); + text = text.substring(1); // tab is always a single character + log("<TAB/>"); + } else { + // Compute length of space sequence. + int nrSpaces = 2; + while ((nrSpaces < text.length()) + && text.substring(nrSpaces, nrSpaces + 1).equals(" ")) + nrSpaces++; + + Element spaceNode = parentDoc.createElement(TAG_SPACE); + spaceNode.setAttribute(ATTRIBUTE_SPACE_COUNT, new Integer(nrSpaces).toString()); + nodeVec.add(spaceNode); + text = text.substring(nrSpaces); + log("<SPACE count=\"" + nrSpaces + "\" />"); + } + } + + // No more tabs or space sequences. If there's any remaining + // text create a text node for it. + if (text.length() > 0) { + Text textNode = parentDoc.createTextNode(text); + nodeVec.add(textNode); + log("<TEXT>"); + log(text); + log("</TEXT>"); + } + + // Now create and populate an array to return the nodes in. + Node nodes[] = new Node[nodeVec.size()]; + for (int i = 0; i < nodeVec.size(); i++) + nodes[i] = (Node)nodeVec.elementAt(i); + return nodes; + } + + + /** + * Parses the text content of a WordSmith format and builds a + * <code>SXWDocument</code>. + * + * @param docName <code>Document</code> name + * @param str Text content of WordSmith format + * + * @return Resulting <code>SXWDocument</code> object. + * + * @throws IOException If any I/O error occurs. + */ + private SxwDocument buildDocument(String docName, Wse[] data, Document origDoc) + throws IOException { + + // create minimum office xml document. + SxwDocument sxwDoc = new SxwDocument(docName); + sxwDoc.initContentDOM(); + + org.w3c.dom.Document doc = sxwDoc.getContentDOM(); + + // Grab hold of the office:body tag, + // Assume there should be one. + // This is where top level paragraphs will append to. + NodeList list = doc.getElementsByTagName(TAG_OFFICE_BODY); + Node bodyNode = list.item(0); + + styleCat = new StyleCatalog(50); + oldStyleCat = new StyleCatalog(50); + if (origDoc != null) + readStyleCatalog(origDoc); + + Element currPara = null; + ParaStyle currParaStyle = null; + int newTextStyleNr = 0; + int newParaStyleNr = 0; + + // Now write out the document body by running through + // the list of WordSmith elements and processing each one + // in turn. + for (int i = 0; i < data.length; i++) { + + if (data[i].getClass() == WsePara.class) { + + currPara = doc.createElement(TAG_PARAGRAPH); + log("</PARA>"); + log("<PARA>"); + + WsePara p = (WsePara)data[i]; + + // Save info about the first text run, if there is one. + WseTextRun firstTextRun = null; + + if ((data.length >= i + 2) + && (data[i+1].getClass() == WseTextRun.class)) + firstTextRun = (WseTextRun)data[i+1]; + + Style matches[] = oldStyleCat.getMatching(p.makeStyle()); + + // See if we can find a unique match in the catalog + // of existing styles from the original document. + ParaStyle pStyle = null; + if (matches.length == 1) { + pStyle = (ParaStyle)matches[0]; + log("using an existing style"); + } else if ((matches.length > 1) && (firstTextRun != null)) { + pStyle = matchParaByText(matches, firstTextRun.makeStyle()); + log("resolved a para by looking @ text"); + } + + // If nothing found so far, try looking in the catalog + // of newly-created styles. + // DJP FIXME: if we need to add two para styles with the + // same para formatting info but different default text + // styles, this won't work! + if (pStyle == null) { + log("had " + matches.length + " matches in old catalog"); + matches = styleCat.getMatching(p.makeStyle()); + if (matches.length == 0) { + pStyle = p.makeStyle(); + String newName = new String("PPP" + ++newParaStyleNr); + pStyle.setName(newName); + styleCat.add(pStyle); + // DJP: write in the text format info here + log("created a new style"); + } else if (matches.length == 1) { + pStyle = (ParaStyle)matches[0]; + log("re-using a new style"); + } else if (firstTextRun != null) { + pStyle = matchParaByText(matches, firstTextRun.makeStyle()); + if (pStyle != null) { + log("resolved a (new) para by looking @ text"); + } else + log("Hey this shouldn't happen! - nr of matches is " + + matches.length); + } + } + + if (pStyle == null) + log("Unable to figure out a para style"); + + // Figured out a style to use. Specify the style in this + // paragraph's attributes. + currPara.setAttribute(ATTRIBUTE_TEXT_STYLE_NAME, pStyle.getName()); + + bodyNode.appendChild(currPara); + currParaStyle = pStyle; + } else if (data[i].getClass() == WseTextRun.class) { + WseTextRun tr = (WseTextRun)data[i]; + TextStyle trStyle = null; + Node trNodes[] = parseText(tr.getText(), doc); + + // First see if the formatting of this text run matches + // the default text formatting for this paragraph. If + // it does, then just make the text node(s) children of + // the current paragraph. + Style[] cps = new Style[1]; + cps[0] = currParaStyle; + if (matchParaByText(cps, tr.makeStyle()) != null) { + for (int ii = 0; ii < trNodes.length; ii++) { + currPara.appendChild(trNodes[ii]); + } + continue; + } + + // Check for existing, matching styles in the old style + // catalog. If exactly one is found, use it. Otherwise, + // check the new style catalog, and either use the style + // found or add this new one to it. + Style matches[] = oldStyleCat.getMatching(tr.makeStyle()); + if (matches.length == 1) + trStyle = (TextStyle)matches[0]; + else { + matches = styleCat.getMatching(tr.makeStyle()); + if (matches.length == 0) { + trStyle = tr.makeStyle(); + String newName = new String("TTT" + ++newTextStyleNr); + trStyle.setName(newName); + styleCat.add(trStyle); + } else if (matches.length == 1) + trStyle = (TextStyle)matches[0]; + else + log("multiple text style matches from new catalog"); + } + + // Create a text span node, set the style attribute, make the + // text node(s) its children, and append it to current paragraph's + // list of children. + Element textSpanNode = doc.createElement(TAG_SPAN); + textSpanNode.setAttribute(ATTRIBUTE_TEXT_STYLE_NAME, trStyle.getName()); + for (int ii = 0; ii < trNodes.length; ii++) { + textSpanNode.appendChild(trNodes[ii]); + } + currPara.appendChild(textSpanNode); + log("</SPAN>"); + } + + else if (data[i].getClass() == WseFontTable.class) { + fontTable = (WseFontTable)data[i]; + } + + else if (data[i].getClass() == WseColorTable.class) { + colorTable = (WseColorTable)data[i]; + } + } + + + //NodeList r = doc.getElementsByTagName(TAG_OFFICE_DOCUMENT); + NodeList r = doc.getElementsByTagName(TAG_OFFICE_DOCUMENT_CONTENT); + Node rootNode = r.item(0); + + // read the original document + org.w3c.dom.NodeList nl; + if (origDoc != null) { + java.io.ByteArrayOutputStream bos = new java.io.ByteArrayOutputStream(); + origDoc.write(bos); + SxwDocument origSxwDoc = new SxwDocument("old"); + origSxwDoc.read(new ByteArrayInputStream(bos.toByteArray())); + org.w3c.dom.Document origDomDoc = origSxwDoc.getContentDOM(); + + XmlUtil xu = new XmlUtil(); + org.w3c.dom.DocumentFragment df; + org.w3c.dom.Node newNode; + + // copy font declarations from original document to the new document + nl = origDomDoc.getElementsByTagName(TAG_OFFICE_FONT_DECLS); + df = doc.createDocumentFragment(); + newNode = xu.deepClone(df, nl.item(0)); + rootNode.insertBefore(newNode, bodyNode); + + // copy style catalog from original document to the new document + nl = origDomDoc.getElementsByTagName(TAG_OFFICE_STYLES); + df = doc.createDocumentFragment(); + newNode = xu.deepClone(df, nl.item(0)); + rootNode.insertBefore(newNode, bodyNode); + + nl = origDomDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES); + df = doc.createDocumentFragment(); + newNode = xu.deepClone(df, nl.item(0)); + rootNode.insertBefore(newNode, bodyNode); + + nl = origDomDoc.getElementsByTagName(TAG_OFFICE_MASTER_STYLES); + df = doc.createDocumentFragment(); + newNode = xu.deepClone(df, nl.item(0)); + rootNode.insertBefore(newNode, bodyNode); + } + + // Original document not specified. We need to add font declarations. + // DJP: this might just be for debugging. Merger will probably put + // the "real" ones in. + // DJP: if really doing it this way, do it right: gather font names + // from style catalog(s). + else { + org.w3c.dom.Node declNode; + + log("<FONT-DECLS/>"); + + declNode = doc.createElement(TAG_OFFICE_FONT_DECLS); + rootNode.insertBefore(declNode, bodyNode); + org.w3c.dom.Element fontNode; + + fontNode = doc.createElement(TAG_STYLE_FONT_DECL); + fontNode.setAttribute(ATTRIBUTE_STYLE_NAME, "Arial"); + fontNode.setAttribute(ATTRIBUTE_FO_FONT_FAMILY, "Arial"); + fontNode.setAttribute(ATTRIBUTE_STYLE_FONT_PITCH, "variable"); + declNode.appendChild(fontNode); + + fontNode = doc.createElement(TAG_STYLE_FONT_DECL); + fontNode.setAttribute(ATTRIBUTE_STYLE_NAME, "Arioso"); + fontNode.setAttribute(ATTRIBUTE_FO_FONT_FAMILY, "Arioso"); + fontNode.setAttribute(ATTRIBUTE_STYLE_FONT_PITCH, "variable"); + declNode.appendChild(fontNode); + } + + + // Now add any new styles we have created in this document. + nl = doc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES); + Node autoStylesNode = nl.item(0); + if (autoStylesNode == null) { + autoStylesNode = doc.createElement(TAG_OFFICE_AUTOMATIC_STYLES); + log("<OFFICE-AUTOMATIC-STYLES/>"); + rootNode.insertBefore(autoStylesNode, bodyNode); + } + + Node newStyleCatNode = styleCat.writeNode(doc, "dummy"); + nl = newStyleCatNode.getChildNodes(); + int nNodes = nl.getLength(); + for (int i = 0; i < nNodes; i++) { + autoStylesNode.appendChild(nl.item(0)); + } + + oldStyleCat.dumpCSV(true); + styleCat.dumpCSV(true); + return sxwDoc; + } + + + /** + * Sends message to the log object. + * + * @param str Debug message. + */ + private void log(String str) { + + Debug.log(Debug.TRACE, str); + } + + + /* + public static void main(String args[]) { + + // DocumentDeserializerImpl d = new DocumentDeserializerImpl(new InputStream()); + + Node nodes[] = parseText("Tab here:\tThen some more text"); + } +*/ +} + diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DocumentMergerImpl.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DocumentMergerImpl.java new file mode 100644 index 000000000000..c6dd88589fe5 --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DocumentMergerImpl.java @@ -0,0 +1,99 @@ +/************************************************************************ + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.wordsmith; + +import org.w3c.dom.Document; +import org.w3c.dom.Element; + +import org.openoffice.xmerge.DocumentMerger; +import org.openoffice.xmerge.MergeException; +import org.openoffice.xmerge.ConverterCapabilities; +import org.openoffice.xmerge.converter.xml.sxw.SxwDocument; +import org.openoffice.xmerge.merger.DiffAlgorithm; +import org.openoffice.xmerge.merger.Difference; +import org.openoffice.xmerge.merger.NodeMergeAlgorithm; +import org.openoffice.xmerge.merger.Iterator; +import org.openoffice.xmerge.merger.DiffAlgorithm; +import org.openoffice.xmerge.merger.diff.ParaNodeIterator; +import org.openoffice.xmerge.merger.diff.IteratorLCSAlgorithm; +import org.openoffice.xmerge.merger.merge.DocumentMerge; +import org.openoffice.xmerge.merger.merge.CharacterBaseParagraphMerge; +import org.openoffice.xmerge.util.Debug; + + +/** + * Wordsmith implementation of <code>DocumentMerger</code> + * for the {@link + * org.openoffice.xmerge.converter.xml.sxw.wordsmith.PluginFactoryImpl + * PluginFactoryImpl}.</p> + */ +public class DocumentMergerImpl implements DocumentMerger { + + private ConverterCapabilities cc_; + private org.openoffice.xmerge.Document orig = null; + + public DocumentMergerImpl(org.openoffice.xmerge.Document doc, ConverterCapabilities cc) { + cc_ = cc; + this.orig = doc; + } + + public void merge(org.openoffice.xmerge.Document modifiedDoc) throws MergeException { + + SxwDocument wdoc1 = (SxwDocument) orig; + SxwDocument wdoc2 = (SxwDocument) modifiedDoc; + + Document doc1 = wdoc1.getContentDOM(); + Document doc2 = wdoc2.getContentDOM(); + + Iterator i1 = new ParaNodeIterator(cc_, doc1.getDocumentElement()); + Iterator i2 = new ParaNodeIterator(cc_, doc2.getDocumentElement()); + + DiffAlgorithm diffAlgo = new IteratorLCSAlgorithm(); + + // find out the paragrah level diffs + Difference[] diffTable = diffAlgo.computeDiffs(i1, i2); + + if (Debug.isFlagSet(Debug.INFO)) { + Debug.log(Debug.INFO, "Diff Result: "); + + for (int i = 0; i < diffTable.length; i++) { + Debug.log(Debug.INFO, diffTable[i].debug()); + } + } + + // merge the paragraphs + NodeMergeAlgorithm charMerge = new CharacterBaseParagraphMerge(); + DocumentMerge docMerge = new DocumentMerge(cc_, charMerge); + + Iterator result = null; + + docMerge.applyDifference(i1, i2, diffTable); + } +} + + diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DocumentSerializerImpl.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DocumentSerializerImpl.java new file mode 100644 index 000000000000..edbf7f5b4370 --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DocumentSerializerImpl.java @@ -0,0 +1,536 @@ +/************************************************************************ + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.wordsmith; + +import org.w3c.dom.NodeList; +import org.w3c.dom.Node; +import org.w3c.dom.NamedNodeMap; +import org.w3c.dom.Element; + +import java.io.IOException; + +import org.openoffice.xmerge.Document; +import org.openoffice.xmerge.ConvertData; +import org.openoffice.xmerge.ConvertException; +import org.openoffice.xmerge.DocumentSerializer; +import org.openoffice.xmerge.ConverterCapabilities; +import org.openoffice.xmerge.converter.xml.OfficeConstants; +import org.openoffice.xmerge.converter.xml.sxw.SxwDocument; +import org.openoffice.xmerge.converter.palm.PalmDB; +import org.openoffice.xmerge.converter.palm.PdbEncoder; +import org.openoffice.xmerge.converter.palm.Record; +import org.openoffice.xmerge.converter.palm.PdbUtil; +import org.openoffice.xmerge.converter.palm.PalmDocument; +import org.openoffice.xmerge.converter.xml.OfficeDocument; +import org.openoffice.xmerge.util.*; +import org.openoffice.xmerge.converter.xml.*; + +/** + * <p>WordSmith implementation of + * org.openoffice.xmerge.DocumentSerializer + * for the {@link + * org.openoffice.xmerge.converter.xml.sxw.wordsmith.PluginFactoryImpl + * PluginFactoryImpl}.</p> + * + * <p>The <code>serialize</code> method traverses the DOM + * document from the given <code>Document</code> object. It uses a + * <code>DocEncoder</code> object for the actual conversion of + * contents to the WordSmith format.</p> + * + * @author Herbie Ong, David Proulx + */ + +// DJP: take out "implements OfficeConstants" +public final class DocumentSerializerImpl +implements OfficeConstants, DocumentSerializer { + + /** A WSEncoder object for encoding to WordSmith. */ + private WSEncoder encoder = null; + + /** The <code>StyleCatalog</code>. */ + private StyleCatalog styleCat = null; + + private WseFontTable fontTable = new WseFontTable(); + private WseColorTable colorTable = new WseColorTable(); + + /** + * The <code>SxwDocument</code> object that this converter + * processes. + */ + private SxwDocument sxwDoc = null; + + /** + * Constructor. + * + * @param doc The <code>Document</code> to convert. + */ + public DocumentSerializerImpl(Document doc) { + sxwDoc = (SxwDocument) doc; + } + + + /** + * <p>Method to convert a <code>Document</code> into a + * <code>PalmDocument</code>.</p> + * + * <p>This method is not thread safe for performance reasons. + * This method should not be called from within two threads. + * It would be best to call this method only once per object + * instance.</p> + * + * <p>Note that the doc parameter needs to be an XML + * <code>Document</code>, else this method will throw a + * <code>ClassCastException</code>. I think this is a hack, + * but this is the only way to not modify most of the existing + * code right now.</p> + * + * @param doc Input should be an XML <code>Document</code> + * object + * @param os Output of <code>PalmDB</code> object + * + * @throws ConvertException If any conversion error occurs. + * @throws IOException If any I/O error occurs. + */ + public ConvertData serialize() + throws IOException { + + + // get the server document name + String docName = sxwDoc.getName(); + + // get DOM document + org.w3c.dom.Document domDoc = sxwDoc.getContentDOM(); + + // Create WordSmith encoder object. Add WordSmith header, + // empty font table to it. + encoder = new WSEncoder(); + encoder.addElement(fontTable); + encoder.addElement(colorTable); + + // Read the styles into the style catalog + String families[] = new String[3]; + families[0] = "text"; + families[1] = "paragraph"; + families[2] = "paragraph"; + Class classes[] = new Class[3]; + classes[0] = TextStyle.class; + classes[1] = ParaStyle.class; + classes[2] = TextStyle.class; + styleCat = new StyleCatalog(25); + + // Parse the input document + // DJP todo: eliminate multiple calls to add() when it can + // recurse properly. + NodeList nl = domDoc.getElementsByTagName(TAG_OFFICE_STYLES); + styleCat.add(nl.item(0), families, classes, null, false); + nl = domDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES); + styleCat.add(nl.item(0), families, classes, null, false); + nl = domDoc.getElementsByTagName(TAG_OFFICE_MASTER_STYLES); + styleCat.add(nl.item(0), families, classes, null, false); + + // Traverse to the office:body element. + // There should only be one. + NodeList list = domDoc.getElementsByTagName(TAG_OFFICE_BODY); + int len = list.getLength(); + if (len > 0) { + Node node = list.item(0); + traverseBody(node); + } + + // create a PalmDB object and ConvertData object. + // + Record records[] = encoder.getRecords(); + + ConvertData cd = new ConvertData(); + PalmDocument palmDoc = new PalmDocument(docName, + PdbUtil.intID("WrdS"), PdbUtil.intID("BDOC"), 0, + PalmDB.PDB_HEADER_ATTR_BACKUP, records); + cd.addDocument(palmDoc); + return cd; + } + + + /** + * This method traverses <i>office:body</i> element. + * + * @param node <i>office:body</i> <code>Node</code>. + * + * @throws IOException If any I/O error occurs. + */ + private void traverseBody(Node node) throws IOException { + + if (node.hasChildNodes()) { + + NodeList nodeList = node.getChildNodes(); + int len = nodeList.getLength(); + + for (int i = 0; i < len; i++) { + + Node child = nodeList.item(i); + + if (child.getNodeType() == Node.ELEMENT_NODE) { + String nodeName = child.getNodeName(); + + if (nodeName.equals(TAG_PARAGRAPH) || + nodeName.equals(TAG_HEADING)) { + + traverseParagraph(child); + + } else if (nodeName.equals(TAG_UNORDERED_LIST)) { + + traverseList(child); + + } else if (nodeName.equals(TAG_ORDERED_LIST)) { + + traverseList(child); + + } else { + + Debug.log(Debug.INFO, "<OTHERS " /* + XmlDebug.nodeInfo(child) */ + " />"); + } + } + } + } + + } + + + /** + * This method traverses the <i>text:p</i> and <i>text:h</i> + * element <code>Node</code> objects. + * + * @param node A <i>text:p</i> or <i>text:h</i> <code>Node</code>. + * + * @throws IOException If any I/O error occurs. + */ + private void traverseParagraph(Node node) throws IOException { + + String styleName = findAttribute(node, "text:style-name"); + ParaStyle pstyle = (ParaStyle)styleCat.lookup(styleName, "paragraph", + null, ParaStyle.class); + + // If the style does not exist in the style catalog for some reason, + // make up a default style and use it. We'll have to add this default + // style to the style catalog the first time it is used. + if (pstyle == null) { + styleName = "CONVERTER-DEFAULT"; + pstyle = (ParaStyle)styleCat.lookup(styleName, "paragraph", null, + ParaStyle.class); + if (pstyle == null) { + pstyle = new ParaStyle(styleName, "paragraph", null, + (String [])null, null, styleCat); + styleCat.add(pstyle); + styleCat.add(new TextStyle(styleName, "paragraph", null, + 0, 0, 12, "Times-Roman", styleCat)); + } + } + + pstyle = (ParaStyle)pstyle.getResolved(); + encoder.addElement(new WsePara(pstyle, styleCat)); + TextStyle defParaTextStyle = (TextStyle) + styleCat.lookup(styleName, "paragraph", null, TextStyle.class); + + traverseParaContents(node, defParaTextStyle); + } + + + /** + * This method traverses a paragraph content. Note that this + * method may recurse to call itself. + * + * @param node A paragraph or content <code>Node</code> + */ + private void traverseParaContents(Node node, TextStyle defTextStyle) { + + String styleName = findAttribute(node, "text:style-name"); + TextStyle style = (TextStyle) + styleCat.lookup(styleName, "text", null, TextStyle.class); + + if (node.hasChildNodes()) { + NodeList nodeList = node.getChildNodes(); + int nChildren = nodeList.getLength(); + + for (int i = 0; i < nChildren; i++) { + Node child = nodeList.item(i); + + if (child.getNodeType() == Node.TEXT_NODE) { + + // this is for grabbing text nodes. + String s = child.getNodeValue(); + + if (s.length() > 0) { + if (style != null) + encoder.addElement(new WseTextRun(s, style, styleCat, + fontTable, colorTable)); + else + encoder.addElement(new WseTextRun(s, defTextStyle, + styleCat, fontTable, colorTable)); + } + + } else if (child.getNodeType() == Node.ELEMENT_NODE) { + + String childNodeName = child.getNodeName(); + + if (childNodeName.equals(TAG_SPACE)) { + + // this is for text:s tags. + NamedNodeMap map = child.getAttributes(); + Node attr = map.getNamedItem(ATTRIBUTE_SPACE_COUNT); + StringBuffer space = new StringBuffer(" "); + int count = 1; + + if (attr != null) { + try { + String countStr = attr.getNodeValue(); + count = Integer.parseInt(countStr.trim()); + } catch (NumberFormatException e) { + Debug.log(Debug.ERROR, "Problem parsing space tag", e); + } + } + + for (int j = 1; j < count; j++) + space.append(" "); + + encoder.addElement(new WseTextRun(space.toString(), + defTextStyle, + styleCat, fontTable, colorTable)); + Debug.log(Debug.INFO, "<SPACE count=\"" + count + "\" />"); + + } else if (childNodeName.equals(TAG_TAB_STOP)) { + + // this is for text:tab-stop + encoder.addElement(new WseTextRun("\t", defTextStyle, styleCat, + fontTable, colorTable)); + + Debug.log(Debug.INFO, "<TAB/>"); + + } else if (childNodeName.equals(TAG_LINE_BREAK)) { + + // this is for text:line-break + encoder.addElement(new WseTextRun("\n", defTextStyle, + styleCat, fontTable, colorTable)); + + Debug.log(Debug.INFO, "<LINE-BREAK/>"); + + } else if (childNodeName.equals(TAG_SPAN)) { + + // this is for text:span + Debug.log(Debug.INFO, "<SPAN>"); + traverseParaContents(child, defTextStyle); + Debug.log(Debug.INFO, "</SPAN>"); + + } else if (childNodeName.equals(TAG_HYPERLINK)) { + + // this is for text:a + Debug.log(Debug.INFO, "<HYPERLINK>"); + traverseParaContents(child, defTextStyle); + Debug.log(Debug.INFO, "<HYPERLINK/>"); + + } else if (childNodeName.equals(TAG_BOOKMARK) || + childNodeName.equals(TAG_BOOKMARK_START)) { + + Debug.log(Debug.INFO, "<BOOKMARK/>"); + + } else { + + Debug.log(Debug.INFO, "<OTHERS " /* + XmlDebug.nodeInfo(child) */ + " />"); + } + + } + + } + } + } + + + /** + * This method traverses list tags <i>text:unordered-list</i> and + * <i>text:ordered-list</i>. A list can only contain one optional + * <i>text:list-header</i> and one or more <i>text:list-item</i> + * elements. + * + * @param node A list <code>Node</code>. + * + * @throws IOException If any I/O error occurs. + */ + private void traverseList(Node node) throws IOException { + + Debug.log(Debug.TRACE, "<LIST>"); + + if (node.hasChildNodes()) { + + NodeList nodeList = node.getChildNodes(); + int len = nodeList.getLength(); + + for (int i = 0; i < len; i++) { + + Node child = nodeList.item(i); + + if (child.getNodeType() == Node.ELEMENT_NODE) { + + String nodeName = child.getNodeName(); + + if (nodeName.equals(TAG_LIST_ITEM)) { + + traverseListItem(child); + + } else if (nodeName.equals(TAG_LIST_HEADER)) { + + traverseListHeader(child); + + } else { + + Debug.log(Debug.ERROR, "<INVALID-XML-BUG " + " />"); + } + } + } + } + + Debug.log(Debug.TRACE, "</LIST>"); + } + + + /** + * This method traverses a <i>text:list-header</i> element. + * It contains one or more <i>text:p</i> elements. + * + * @param node A list header <code>Node</code>. + * + * @throws IOException If any I/O error occurs. + */ + private void traverseListHeader(Node node) throws IOException { + + Debug.log(Debug.TRACE, "<LIST-HEADER>"); + + if (node.hasChildNodes()) { + + NodeList nodeList = node.getChildNodes(); + int len = nodeList.getLength(); + + for (int i = 0; i < len; i++) { + + Node child = nodeList.item(i); + + if (child.getNodeType() == Node.ELEMENT_NODE) { + + String nodeName = child.getNodeName(); + + if (nodeName.equals(TAG_PARAGRAPH)) { + + traverseParagraph(child); + + } else { + + Debug.log(Debug.TRACE, "<INVALID-XML-BUG " + " />"); + } + } + } + } + + Debug.log(Debug.TRACE, "</LIST-HEADER>"); + } + + + /** + * This method will traverse a <i>text:list-item</i>. + * A list item may contain one or more of <i>text:p</i>, + * <i>text:h</i>, <i>text:section</i>, + * <i>text:ordered-list</i> and <i>text:unordered-list</i>. + * + * This method currently only implements grabbing <i>text:p</i>, + * <i>text:h</i>, <i>text:unordered-list</i> and + * <i>text:ordered-list</i>. + * + * @param Node <code>Node</code> to traverse. + * + * @throws IOException If any I/O error occurs. + */ + private void traverseListItem(Node node) throws IOException { + + Debug.log(Debug.TRACE, "<LIST-ITEM>"); + + if (node.hasChildNodes()) { + + NodeList nodeList = node.getChildNodes(); + int len = nodeList.getLength(); + + for (int i = 0; i < len; i++) { + + Node child = nodeList.item(i); + + if (child.getNodeType() == Node.ELEMENT_NODE) { + + String nodeName = child.getNodeName(); + + if (nodeName.equals(TAG_PARAGRAPH)) { + + traverseParagraph(child); + + } else if (nodeName.equals(TAG_UNORDERED_LIST)) { + + traverseList(child); + + } else if (nodeName.equals(TAG_ORDERED_LIST)) { + + traverseList(child); + + } else { + + Debug.log(Debug.ERROR, "<INVALID-XML-BUG " + " />"); + } + } + } + } + + Debug.log(Debug.TRACE, "</LIST-ITEM>"); + } + + + /** + * Look up a <code>Node</code> object's named attribute and return + * its value + * + * @param node The <code>Node</code>. + * @param name The attribute name. + * + * @return The value of the named attribute + */ + private String findAttribute(Node node, String name) { + NamedNodeMap attrNodes = node.getAttributes(); + if (attrNodes != null) { + int len = attrNodes.getLength(); + for (int i = 0; i < len; i++) { + Node attr = attrNodes.item(i); + if (attr.getNodeName().equals(name)) + return attr.getNodeValue(); + } + } + return null; + } +} + diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/PluginFactoryImpl.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/PluginFactoryImpl.java new file mode 100644 index 000000000000..f8df638bd69f --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/PluginFactoryImpl.java @@ -0,0 +1,149 @@ +/************************************************************************ + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.wordsmith; + +import org.openoffice.xmerge.Document; +import org.openoffice.xmerge.ConvertData; +import org.openoffice.xmerge.DocumentMerger; +import org.openoffice.xmerge.DocumentMergerFactory; +import org.openoffice.xmerge.DocumentSerializer; +import org.openoffice.xmerge.DocumentSerializerFactory; +import org.openoffice.xmerge.DocumentDeserializer; +import org.openoffice.xmerge.DocumentDeserializerFactory; +import org.openoffice.xmerge.ConverterCapabilities; +import org.openoffice.xmerge.converter.xml.sxw.SxwPluginFactory; +import org.openoffice.xmerge.converter.palm.PalmDocument; +import org.openoffice.xmerge.util.registry.ConverterInfo; + +import java.io.InputStream; +import java.io.IOException; + + +/** + * <p>WordSmith implementation of a <code>PluginFactory</code> that + * encapsulates conversion of StarWriter XML format to and from + * WordSmith format.</p> + * + * The superclass produces a particular + * {@link org.openoffice.xmerge.Document Document} + * object, i.e. + * {@link org.openoffice.xmerge.converter.xml.sxw.SxwDocument + * SxwDocument} that the converters in this class works with. Thus, + * this class only implements the methods that produces the converters, + * i.e. {@link + * org.openoffice.xmerge.DocumentSerializer + * DocumentSerializer} and {@link + * org.openoffice.xmerge.DocumentDeserializer + * DocumentDeserializer}; + * as well as the {@link + * org.openoffice.xmerge.ConverterCapabilities + * ConverterCapabilities} object that is specific to this format + * conversion. That superclass also produces a {@link + * org.openoffice.xmerge.DocumentMerger DocumentMerger} + * object, i.e. {@link + * org.openoffice.xmerge.converter.xml.sxw.wordsmith.DocumentMergerImpl + * DocumentMergerImpl} which this class derives the functionality.</p> + * + * @author Herbie Ong, Dave Proulx + */ +public final class PluginFactoryImpl extends SxwPluginFactory + implements DocumentDeserializerFactory, DocumentSerializerFactory, + DocumentMergerFactory { + + public PluginFactoryImpl(ConverterInfo ci) { + super(ci); + } + + /** ConverterCapabilities object for this type of conversion. */ + private final static ConverterCapabilities converterCap = + new ConverterCapabilitiesImpl(); + + + /** + * Returns an instance of <code>DocumentSerializerImpl</code>, which is + * an implementation of <code>DocumentSerializer</code> interface. + * + * @param doc <code>Document</code> object to be converted/serialized. + * + * @return A <code>DocumentSerializerImpl</code> object. + */ + public DocumentSerializer createDocumentSerializer(Document doc) { + + return new DocumentSerializerImpl(doc); + } + + + /** + * Returns an instance of <code>DocumentDeserializerImpl</code>, + * which is an implementation of <code>DocumentDeserializer</code> + * interface. + * + * @param cd <code>ConvertData</code> object for reading data + * which will be converted back to a + * <code>Document</code> object. + * + * @return A <code>DocumentDeserializerImpl</code> object. + */ + public DocumentDeserializer createDocumentDeserializer(ConvertData cd) { + + return new DocumentDeserializerImpl(cd); + } + + /** + * Returns an instance of <code>DocumentMergerImpl</code>, + * which is an implementation of the <code>DocumentMerger</code> + * interface. + * + * @param doc <code>Document</code> to merge. + * + * @return A DocumentMergerImpl object. + */ + public DocumentMerger createDocumentMerger(Document doc) { + + ConverterCapabilities cc = converterCap; + DocumentMergerImpl merger = new DocumentMergerImpl(doc, cc); + return merger; + } + + /** + * Returns an instance of the DeviceDocument + * which is an implementation of the <code>DocumentMerger</code> + * interface. + * + * @param doc <code>Document</code> to merge. + * + * @return A Device Document object + */ + public Document createDeviceDocument(String name, InputStream is) + throws IOException { + + PalmDocument palmDoc = new PalmDocument(is); + return palmDoc; + } +} + diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WSDecoder.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WSDecoder.java new file mode 100644 index 000000000000..07e8ea97ea43 --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WSDecoder.java @@ -0,0 +1,352 @@ +/************************************************************************ + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.wordsmith; + +import java.io.ByteArrayInputStream; +import java.io.DataInputStream; +import java.io.IOException; +import java.io.FileInputStream; +import java.io.UnsupportedEncodingException; +import org.openoffice.xmerge.util.Debug; + +import org.openoffice.xmerge.converter.palm.*; +import org.openoffice.xmerge.util.Resources; + +/** + * This class is used by {@link + * org.openoffice.xmerge.converter.xml.sxw.wordsmith.DocumentDeserializerImpl + * DocumentDeserializerImpl} to decode a WordSmith format. It currently + * decodes the text content into a single <code>String</code> object. + * + * @author Herbie Ong, David Proulx + */ +final class WSDecoder implements DOCConstants { + + /** For decoding purposes. */ + private final static int COUNT_BITS = 3; + + /** Resources object for I18N. */ + private Resources res = null; + + /** + * Default constructor creates a header and + * a text buffer for holding all the text in + * the DOC db. + */ + WSDecoder() { + res = Resources.getInstance(); + } + + /** + * Decode the text records into a single <code>byte</code> array. + * + * @param Record <code>Record</code> array holding WordSmith + * contents. + * + * @throws IOException If any I/O error occurs. + */ + byte[] parseRecords(Record[] recs) throws IOException { + + // read the header record + HeaderInfo header = readHeader(recs[0].getBytes()); + dumpHeader(header); + byte[][] byteArrays = new byte[recs.length - 1][]; + for (int i = 0; i < recs.length - 1; i++) byteArrays[i] = null; + + switch (header.version & ~4) { // DJP: "4" indicates OOB data is present. + // Add a constant to handle this, might also need code to handle it. + + case COMPRESSED: + case 3: // DJP: determined this empirically. Are Herbie's constants wrong? + for (int i = 1; i < recs.length; i++) { + byteArrays[i-1] = decompress(recs[i].getBytes(), + header.textRecordSize); + Debug.log(Debug.INFO, "processing " + byteArrays[i-1].length + " bytes"); + } + + break; + + case UNCOMPRESSED: + for (int i = 1; i < recs.length; i++) { + byteArrays[i-1] = recs[i].getBytes(); + Debug.log(Debug.INFO, "processing " + byteArrays[i-1].length + " bytes"); + } + + break; + + default: + throw new IOException(res.getString("UNKNOWN_DOC_VERSION")); + + } + + // Concatenate byteArrays[][] into a single byte array. + int length = 0; + for (int i = 0; i < recs.length - 1; i++) + length += byteArrays[i].length; + byte bigArray[] = new byte[length]; + int offset = 0; + for (int i = 0; i < recs.length - 1; i++) { + System.arraycopy(byteArrays[i], 0, bigArray, offset, + byteArrays[i].length); + offset += byteArrays[i].length; + } + return bigArray; + } + + + /** + * Decode the text records into a <code>Wse</code> array. + * + * @param Record[] <code>Record</code> array holding DOC + * contents. + * + * @throws IOException If any I/O error occurs. + */ + Wse[] parseDocument(Record[] recs) throws IOException { + + java.util.Vector v = new java.util.Vector(20, 20); + WseFontTable fontTable = null; + WseColorTable colorTable = null; + + // rawData is the document data to be parsed. + byte rawData[] = parseRecords(recs); + + // beginning of document has some header information, including + // optional font and color tables. + // DJP: maybe should add a new WSelement (docHeader) to hold + // header info. + // DJP: finish code here to parse header + if (rawData[0] != 2) throw new IOException(); + int nParagraphs = util.intFrom4bytes(rawData, 2); + int nAtoms = util.intFrom4bytes(rawData, 6); + int nChars = util.intFrom4bytes(rawData, 10); + int miscSize = util.intFrom4bytes(rawData, 14); + int curIndex = 18; + + while (curIndex < rawData.length) { + if (WsePara.isValid(rawData, curIndex)) { + v.add(new WsePara(rawData, curIndex)); + curIndex = WsePara.computeNewIndex(rawData, curIndex); + } else if (WseTextRun.isValid(rawData, curIndex)) { + v.add(new WseTextRun(rawData, curIndex, fontTable, colorTable)); + curIndex = WseTextRun.computeNewIndex(rawData, curIndex); + } else if (WseFontTable.isValid(rawData, curIndex)) { + fontTable = new WseFontTable(rawData, curIndex); + v.add(fontTable); + curIndex = WseFontTable.computeNewIndex(rawData, curIndex); + } else if (WseColorTable.isValid(rawData, curIndex)) { + colorTable = new WseColorTable(rawData, curIndex); + v.add(colorTable); + curIndex = WseColorTable.computeNewIndex(rawData, curIndex); + } else { + Debug.log(Debug.ERROR, "Unknown code " + rawData[curIndex]); + throw new IOException(); + } + } + + return (Wse[])v.toArray(new Wse[2]); + } + + + /** + * <p>Decompress the <code>byte</code> array.</p> + * + * <p>The resulting uncompressed <code>byte</code> array + * should be within <code>textRecordSize</code> length, + * definitely within twice the size it claims, else treat + * it as a problem with the encoding of that PDB and + * throw <code>IOException</code>.</p> + * + * @param bytes Compressed <code>byte</code> array + * @param textRecordSize Size of uncompressed <code>byte</code> + * array + * + * @throws IOException If <code>textRecordSize</codeL < + * <code>cBytes.length</code>. + */ + private byte[] decompress(byte[] cBytes, int textRecordSize) + throws IOException { + + // create byte array for storing uncompressed bytes + // it should be within textRecordSize range, definitely + // within twice of textRecordSize! if not, then + // an ArrayIndexOutOfBoundsException will get thrown, + // and it should be converted into an IOException, and + // treat it as a conversion error. + byte[] uBytes = new byte[textRecordSize*2]; + + int up = 0; + int cp = 0; + + try { + + while (cp < cBytes.length) { + + int c = cBytes[cp++] & 0xff; + + // codes 1...8 mean copy that many bytes + if (c > 0 && c < 9) { + + while (c-- > 0) + uBytes[up++] = cBytes[cp++]; + } + + // codes 0, 9...0x7F represent themselves + else if (c < 0x80) { + uBytes[up++] = (byte) c; + } + + // codes 0xC0...0xFF represent "space + ascii char" + else if (c >= 0xC0) { + uBytes[up++] = (byte) ' '; + uBytes[up++] = (byte) (c ^ 0x80); + } + + // codes 0x80...0xBf represent sequences + else { + c <<= 8; + c += cBytes[cp++] & 0xff; + int m = (c & 0x3fff) >> COUNT_BITS; + int n = c & ((1 << COUNT_BITS) - 1); + n += COUNT_BITS; + while (n-- > 0) { + uBytes[up] = uBytes[up - m]; + up++; + } + } + } + + } catch (ArrayIndexOutOfBoundsException e) { + + throw new IOException( + res.getString("DOC_TEXT_RECORD_SIZE_EXCEEDED")); + } + + // note that ubytes may be larger that the amount of + // uncompressed bytes, so trim it to another byte array + // with the exact size. + byte[] textBytes = new byte[up]; + System.arraycopy(uBytes, 0, textBytes, 0, up); + + return textBytes; + } + + + /** + * Read the header <code>byte</code> array. + * + * @param bytes <code>byte</code> array containing header + * record data. + * + * @return <code>HeaderInfo</code> object. + * + * @throws IOException If any I/O error occurs. + */ + private HeaderInfo readHeader(byte[] bytes) throws IOException { + + HeaderInfo header = new HeaderInfo(); + + ByteArrayInputStream bis = new ByteArrayInputStream(bytes); + DataInputStream dis = new DataInputStream(bis); + + // Normally the first 2 bytes comprised of the version + // which should either be COMPRESSED or UNCOMPRESSED + // SmartDoc/Quickword would add a 0x01 to the first + // byte, thus their version would be 0x0101 for UNCOMPRESSED + // instead of 0x0001 and 0x0102 for UNCOMPRESSED instead of + // 0x0002. + + dis.readByte(); + header.version = dis.readByte(); + + // read extra 2 unused bytes + dis.readShort(); + + // Read the text length, this should be unsigned 4 bytes. + // We could store the read value into a long, but then + // our current buffer limit is the max positive of an int. + // That is a large enough limit, thus we shall stay with + // storing the value in an int. If it exceeds, then + // an IOException should be thrown. + header.textLen = dis.readInt(); + if (header.textLen < 0) { + throw new IOException(res.getString("DOC_TEXT_LENGTH_EXCEEDED")); + } + + // read the number of records - unsigned 2 bytes + header.textRecordCount = ((int) dis.readShort()) & 0x0000ffff; + + // read the record size - unsigned 2 bytes + header.textRecordSize = ((int) dis.readShort()) & 0x0000ffff; + + // read extra 4 unused bytes + dis.readInt(); + + return header; + } + + + /** + * Prints out header info into log. + * Used for debugging purposes only. + * + * @param header <code>HeaderInfo</code> structure. + */ + private void dumpHeader(HeaderInfo header) { + /* + log("<DOC_INFO "); + log("version=\"" + header.version + "\" "); + log("text-length=\"" + header.textLen + "\" "); + log("number-of-records=\"" + header.textRecordCount + "\" "); + log("record-size=\"" + header.textRecordSize + "\" />\n"); + */ + } + + + /** + * Inner class to store DOC header information. + */ + private class HeaderInfo { + + /** length of text section */ + int textLen = 0; + + /** number of text records */ + int textRecordCount = 0; + + /** + * size of a text record. This is normally the same as + * TEXT_RECORD_SIZE, but some applications may modify this. + */ + int textRecordSize = 0; + + /** compression type */ + int version = 0; + } +} + diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WSEncoder.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WSEncoder.java new file mode 100644 index 000000000000..476e34c72e1f --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WSEncoder.java @@ -0,0 +1,212 @@ +/************************************************************************ + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.wordsmith; + +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; +import java.io.*; +import java.io.UnsupportedEncodingException; +import java.util.List; +import java.util.ArrayList; +import java.util.Vector; + +import org.openoffice.xmerge.converter.palm.*; + +/** + * This class is used by {@link + * org.openoffice.xmerge.converter.xml.sxw.wordsmith.DocumentDeserializerImpl + * DocumentDeserializerImpl} to encode the WordSmith format. + * + * @author David Proulx + */ + +// DJP: replace 4096 w/ a defined constant + +final class WSEncoder { + + /* DJP: These should probably go somewhere else! */ + /** Constant for uncompressed version. */ + public static final short UNCOMPRESSED = 1; + + /** Constant for compressed version. */ + public static final short COMPRESSED = 2; + + /** Constant used for spare fields. */ + public static final int SPARE = 0; + + /* WordSmith Header information. */ + private short version; + private int textLen; + private short maxRecSize; + private int textRecCount = 0; + + + /* WordSmith document elements. */ + WseHeader header = null; + WseFontTable ft = null; + WseColorTable ct = null; + private Vector elements; // paragraphs & text runs + + /* Totals for the WordSmith document. */ + int nrParagraphs = 0; + int nrAtoms = 0; + int nrChars = 0; + + + /** + * Default constructor creates a header and + * a text buffer for holding all the text in + * the WordSmith database. + */ + WSEncoder() { + version = 1; + textLen = 0; + maxRecSize = 4096; + elements = new Vector(); + } + + + /** + * This method adds a new element to the WordSmith document. + * + * @param elem WordSmith document element to add + */ + void addElement(Wse elem) { + if (elem.getClass() == WseHeader.class) + header = (WseHeader)elem; + else if (elem.getClass() == WseFontTable.class) + ft = (WseFontTable)elem; + else if (elem.getClass() == WseColorTable.class) + ct = (WseColorTable)elem; + else + elements.addElement(elem); + } + + + /** + * This method encodes the information given to + * an array of palm Records in the WordSmith database format. + * + * @return <code>Record</code> array holding WordSmith contents. + * + * @throws IOException If any I/O error occurs. + */ + Record[] getRecords() throws IOException { + + Vector allRecs = new Vector(); + int nElements = elements.size(); + + // Count up the number of paragraphs, atoms, and characters. + int currElement = 0; + while (currElement < nElements) { + Wse e = (Wse)elements.elementAt(currElement++); + if (e.getClass() == WsePara.class) + nrParagraphs++; + if (e.getClass() == WseTextRun.class) { + nrAtoms++; + nrChars += ((WseTextRun)e).getText().length(); + } + } + + byte[] currRec = new byte[4096]; + int currRecLen = 0; + + // This code assumes that the WordSmith header, font table, + // and color table total less than 4096 bytes. + header = new WseHeader(nrParagraphs, nrAtoms, nrChars, ft, ct); + System.arraycopy(header.getBytes(), 0, + currRec, currRecLen, header.getByteCount()); + currRecLen += header.getByteCount(); + + if (ft != null) { + System.arraycopy(ft.getBytes(), 0, currRec, currRecLen, + ft.getByteCount()); + currRecLen += ft.getByteCount(); + } + if (ct != null) { + System.arraycopy(ct.getBytes(), 0, currRec, currRecLen, + ct.getByteCount()); + currRecLen += ct.getByteCount(); + } + + currElement = 0; + while (currElement < nElements) { + Wse e = (Wse)elements.elementAt(currElement++); + int length = e.getByteCount(); + if ((length + currRecLen) <= 4096) { + System.arraycopy(e.getBytes(), 0, currRec, currRecLen, length); + currRecLen += length; + } else { + // Copy in enough to get to full size, then create a + // new Record and add it to the Vector. + int firstPartLen = 4096 - currRecLen; + System.arraycopy(e.getBytes(), 0, currRec, currRecLen, + firstPartLen); + Record r = new Record(currRec); + allRecs.addElement(r); + + // Put the remainder at the beginning of the next record + currRecLen = 0; + System.arraycopy(e.getBytes(), firstPartLen, currRec, + currRecLen, length - firstPartLen); + currRecLen += length - firstPartLen; + } + } + + // Processed all the elements. Write out any remaining partial record. + if (currRecLen > 0) { + byte[] partial = new byte[currRecLen]; + System.arraycopy(currRec, 0, partial, 0, currRecLen); + Record rr = new Record(partial); + allRecs.addElement(rr); + } + + + // Record 0 is the WordSmith header. Do it last since it + // contains totals for the entire document. It goes + // before everything else. + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + DataOutputStream dos = new DataOutputStream(bos); + dos.writeShort(version); + dos.writeShort(0); + dos.writeInt(textLen); + dos.writeShort(allRecs.size()); + dos.writeShort(maxRecSize); + dos.writeInt(0); + allRecs.insertElementAt(new Record(bos.toByteArray()), 0); + + // Convert Vector of Records to an array and return it. + int nRecs = allRecs.size(); + Record recs[] = new Record[nRecs]; + for (int i = 0; i < nRecs; i++) + recs[i] = (Record)allRecs.elementAt(i); + return recs; + } + +} + diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/Wse.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/Wse.java new file mode 100644 index 000000000000..778ff5d38f57 --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/Wse.java @@ -0,0 +1,100 @@ +/************************************************************************ + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.wordsmith; + +import java.io.IOException; + +import org.w3c.dom.NodeList; +import org.w3c.dom.Node; +import org.w3c.dom.NamedNodeMap; +import org.w3c.dom.Element; + +import org.openoffice.xmerge.Document; +import org.openoffice.xmerge.ConverterCapabilities; +import org.openoffice.xmerge.converter.xml.OfficeDocument; +import org.openoffice.xmerge.converter.xml.sxw.SxwDocument; +import org.openoffice.xmerge.converter.xml.*; + + +/** + * This is the superclass for all elements in a WordSmith document. + * Elements can be paragraphs, text runs, font tables, or color tables. + * + * @author David Proulx + */ +abstract class Wse { + + /** + * Return true if <code>dataArray[startIndex]</code> is the start + * of a valid element of this type. + * + * @param dataArray <code>byte</code> array. + * @param startIndex The start index. + * + * @return true if <code>dataArray[startIndex]</code> is the + * start of a valid element of this type, false otherwise. + */ + static boolean isValid(byte dataArray[], int startIndex) { + return false; + } + + + /** + * Compute and return the index of the first <code>byte</code> + * following this element. It is assumed that the element + * starting at <code>dataArray[startIndex]</code> is valid. + * + * @param dataArray <code>byte</code> array. + * @param startIndex The start index. + * + * @return The index of the first <code>byte</code> following + * this element. + */ + static int computeNewIndex(byte dataArray[], int startIndex) { + return 0; + } + + + /** + * Return the total number of bytes needed to represent this + * object. + * + * @return The total number of bytes needed to represent this + * object. + */ + abstract int getByteCount(); + + + /** + * Return an <code>byte</code> array representing this element. + * + * @return An <code>bytes</code> array representing this element. + */ + abstract byte[] getBytes(); +} + diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WseColorTable.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WseColorTable.java new file mode 100644 index 000000000000..e5af0337283b --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WseColorTable.java @@ -0,0 +1,247 @@ +/************************************************************************ + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.wordsmith; + +import java.io.IOException; +import java.awt.Color; + +import org.w3c.dom.NodeList; +import org.w3c.dom.Node; +import org.w3c.dom.NamedNodeMap; +import org.w3c.dom.Element; + +import org.openoffice.xmerge.Document; +import org.openoffice.xmerge.ConverterCapabilities; +import org.openoffice.xmerge.converter.xml.OfficeDocument; +import org.openoffice.xmerge.converter.xml.sxw.SxwDocument; +import org.openoffice.xmerge.converter.xml.*; + +/** + * This class represents a color table in a WordSmith document. + * + * @author David Proulx + */ +class WseColorTable extends Wse { + + private Color fgColors[]; + private Color bgColors[]; + + /** + * Constructor to use when going from DOM to WordSmith + */ + public WseColorTable() { + fgColors = new Color[16]; + bgColors = new Color[16]; + + // Always need these two! + fgColors[0] = Color.black; + bgColors[0] = Color.white; + + } + + /** + * Constructor to use when going from WordSmith to DOM. + * + * @param dataArray <code>byte</code> array. + * @param i The index. + */ + public WseColorTable(byte dataArray[], int i) { + fgColors = new Color[16]; + bgColors = new Color[16]; + + i += 2; // Skip leading "64" and table length field. + for (int k = 0; k < 16; k++) { + fgColors[k] = new Color(((int)dataArray[i+1]) & 0xFF, + ((int)dataArray[i+2]) & 0xFF, + ((int)dataArray[i+3]) & 0xFF); + i += 4; + } + for (int k = 0; k < 16; k++) { + bgColors[k] = new Color(((int)dataArray[i+1]) & 0xFF, + ((int)dataArray[i+2]) & 0xFF, + ((int)dataArray[i+3]) & 0xFF); + i += 4; + } + + } + + + /** + * Compute the index of the first <code>byte</code> following the + * paragraph descriptor, assuming that + * <code>dataArray[startIndex]</code> is the beginning of a valid + * paragraph descriptor. + * + * @param dataArray <code>byte</code array. + * @param startIndex The start index. + * + * @return The index of the first <code>byte</code> following the + * paragraph description. + */ + static int computeNewIndex(byte dataArray[], int startIndex) { + int tableLen = dataArray[startIndex + 1]; + tableLen &= 0xFF; // eliminate problems with sign-extension + return startIndex + tableLen + 2; + } + + + /** + * Return true if <code>dataArray[startIndex]</code> is the start + * of a valid paragraph descriptor. + * + * @param dataArray <code>byte</code> array. + * @param startIndex Start index. + * + * @return true if <code>dataArray[startIndex]</code> is the start + * of a valid paragraph descriptor, false otherwise. + */ + static boolean isValid(byte dataArray[], int startIndex) { + try { + if (dataArray[startIndex] != 64) + return false; + int len = dataArray[startIndex + 1]; + len &= 0xFF; // eliminate problems with sign-extension + int temp = dataArray[startIndex + (int)len + 2]; // probe end of table + } catch (ArrayIndexOutOfBoundsException e) { + return false; + } + return true; + } + + + /** + * Return the number of bytes needed to represent this color table. + * + * @return The byte count. + */ + int getByteCount() { + return (32 * 4) + 1 + 1; + } + + + /** + * Return a <code>byte</code> array representing this color table. + * + * @return <code>bytes</code> array representing this color table. + */ + byte[] getBytes() { + byte[] b = new byte[(32 * 4) + 1 + 1]; + b[0] = 0x40; + b[1] = (byte)128; + int i = 2; + // int indVal = 0xd8; + int indVal = 0; + + for (int j = 0; j < 16; j++) { + b[i++] = (byte)indVal++; + if (fgColors[j] != null) { + b[i++] = (byte)fgColors[j].getRed(); + b[i++] = (byte)fgColors[j].getGreen(); + b[i++] = (byte)fgColors[j].getBlue(); + } else { + b[i++] = (byte)0; + b[i++] = (byte)0; + b[i++] = (byte)0; + } + } + + for (int j = 0; j < 16; j++) { + b[i++] = (byte)indVal++; + if (bgColors[j] != null) { + b[i++] = (byte)bgColors[j].getRed(); + b[i++] = (byte)bgColors[j].getGreen(); + b[i++] = (byte)bgColors[j].getBlue(); + } else { + b[i++] = (byte)0xFF; + b[i++] = (byte)0xFF; + b[i++] = (byte)0xFF; + } + } + + return b; + } + + + /** + * Return the index of the specified foreground or background + * <code>Color</code>. (If the color is not already in the table, + * it will be added.) + * + * Note that the implementation of this may include a "margin of + * error" to prevent the color table from being filled up too + * quickly. + * + * @param c The <code>Color</code>. + * @param foreground true if foreground color, false if background + * color + * + * @return The index of the specified foreground or background + * <code>Color</code>. + * + * DJP: how to handle table overflow? + */ + int findColor(Color c, boolean foreground) { + + Color colorArray[] = foreground ? fgColors : bgColors; + + for (int i = 0; i < 16; i++) { + if (colorArray[i] != null) { + if (colorArray[i].equals(c)) + return i; + } + else + break; // hit a null entry - no more colors in table! + } + + // Color was not found in the table. Add it. + for (int i = 0; i < 16; i++) { + if (colorArray[i] == null) { + colorArray[i] = c; + return i; + } + } + return 0; // Default - we should never get here though. + } + + + /** + * Given an index, return the <code>Color</code> from the table. + * + * @param index The index + * @param foreground true if foreground color, false if background + * color + * + * @return The <code>Color</code> at the specified index. + */ + Color getColor(int index, boolean foreground) { + + Color colorArray[] = foreground ? fgColors : bgColors; + return colorArray[index]; + } +} + diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WseFontTable.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WseFontTable.java new file mode 100644 index 000000000000..d5e7a84fb6bf --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WseFontTable.java @@ -0,0 +1,218 @@ +/************************************************************************ + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.wordsmith; + +import java.io.IOException; + +import org.w3c.dom.Node; +import org.w3c.dom.NamedNodeMap; +import org.w3c.dom.Element; + +import org.openoffice.xmerge.Document; +import org.openoffice.xmerge.ConverterCapabilities; +import org.openoffice.xmerge.converter.xml.OfficeDocument; +import org.openoffice.xmerge.converter.xml.sxw.SxwDocument; +import org.openoffice.xmerge.converter.xml.*; + +/** + * <p>This class represents a font table in a WordSmith document. + * A font table is represented as follows:</p> + * + * <p><blockquote> + * binary "3"<br> + * two-byte length of the table of strings which follows<br> + * string table (null-terminated strings) representing font names + * </blockquote></p> + * + * @author David Proulx + */ +class WseFontTable extends Wse { + + java.util.Vector fontNames = new java.util.Vector(10); + + + /** + * Constructor for use when going from DOM to WordSmith. + */ + public WseFontTable() { + } + + + /** + * Constructor for use when going from WordSmith to DOM. + * + * @param dataArray <code>byte</code> array. + * @param i The index. + */ + public WseFontTable(byte dataArray[], int i) { + i++; + int tableLen = ((dataArray[i] << 8) | (dataArray[i+1] & 0xFF)); + i += 2; + while (tableLen > 0) { + int j = 0; + while (dataArray[i + j] != 0) j++; + fontNames.add(new String(dataArray, i, j)); + tableLen -= (j + 1); + i += (j + 1); + } + } + + + /** + * Add a new font to the table. + * + * @param newFontName The new font name. + */ + public void add(String newFontName) { + if (newFontName != null) + fontNames.add(newFontName); + } + + + /** + * Return a font name from the table, or null if invalid index. + * + * @param index The font name index. + * + * @return The font name. + */ + public String getFontName(int index) { + try { + return (String)fontNames.elementAt(index); + } catch (ArrayIndexOutOfBoundsException e) { + return null; + } + } + + /** + * Return the index of a font name in the table, or -1 if not found. + * + * @param fontName The font name. + * + * @return The index of the font name, or -1 if not found. + */ + public int getFontIndex(String fontName) { + int len = fontNames.size(); + for (int i = 0; i < len; i++) { + String name = (String) fontNames.elementAt(i); + if (name.equals(fontName)) + return i; + } + return -1; + } + + + /** + * Compute the index of the first <code>byte</code> following the + * paragraph descriptor, assuming that + * <code>dataArray[startIndex]</code> is the beginning of a valid + * paragraph descriptor. + * + * @param dataArray <code>byte</code> array. + * @param startIndex The start index. + * + * @return The index of the first <code>byte</code> following the + * paragraph description. + */ + static int computeNewIndex(byte dataArray[], int startIndex) { + startIndex++; // Skip the leading "3" + int tableLen = ((dataArray[startIndex] << 8) | (dataArray[startIndex+1] & 0xFF)); + tableLen &= 0xFFFF; // eliminate problems with sign-extension + return startIndex + tableLen + 2; + } + + + /** + * Return true if <code>dataArray[startIndex]</code> is the start of a + * valid paragraph descriptor. + * + * @param dataArray <code>byte</code> string. + * @param startIndex Start index. + * + * @return true if <code>dataArray[startIndex]</code> is the start + * of a valid paragraph descriptor, false otherwise. + */ + static boolean isValid(byte dataArray[], int startIndex) { + try { + if (dataArray[startIndex] != 3) + return false; + int len = ((dataArray[startIndex+1] << 8) + | (dataArray[startIndex+2] & 0xFF)); + len &= 0xFFFF; // eliminate problems with sign-extension + + if (dataArray[startIndex + len + 2] != 0) + return false; + } catch (ArrayIndexOutOfBoundsException e) { + return false; + } + return true; + } + + + /** + * Return the number of bytes needed to represent this font table. + * + * @return The number of bytes needed to represent this font table. + */ + int getByteCount() { + + int length = 3; // leading "3" plus 2 bytes for length. + int nFonts = fontNames.size(); + for (int i = 0; i < nFonts; i++) { + String name = (String)fontNames.elementAt(i); + length += name.length() + 1; // extra byte is for trailing "0" + } + return length; + } + + /** + * Return a <code>byte</code> array representing this font table. + * + * @return An <code>byte</code> array representing this font table. + */ + byte[] getBytes() { + + int length = getByteCount(); + int nFonts = fontNames.size(); + byte b[] = new byte[length]; + b[0] = 3; + length -= 3; + b[1] = (byte)(length >> 8); + b[2] = (byte)(length & 0xFF); + int indx = 3; + for (int i = 0; i < nFonts; i++) { + String name = (String)fontNames.elementAt(i); + byte bname[] = name.getBytes(); + System.arraycopy(bname, 0, b, indx, bname.length); + indx += bname.length; + b[indx++] = 0; + } + return b; + } +} + diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WseHeader.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WseHeader.java new file mode 100644 index 000000000000..52fffba259a8 --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WseHeader.java @@ -0,0 +1,145 @@ +/************************************************************************ + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.wordsmith; + +import java.io.IOException; +import java.io.DataOutputStream; +import java.io.ByteArrayOutputStream; + +/** + * This class represents a WordSmith document header. + * + * @author David Proulx + */ +class WseHeader extends Wse { + + private int nParagraphs = 0; + private int nAtoms = 0; + private int nChars = 0; + private int miscSize = 0; + + /** + * Constructor for use when going from DOM to WordSmith. + * + * @param nPara The number of paragraphs. + * @param nAtoms The number of atoms. + * @param nChars The number of characters. + * @param ft The font table. + * @param ct The color table. + */ + public WseHeader(int nPara, int nAtoms, int nChars, WseFontTable ft, + WseColorTable ct) { + nParagraphs = nPara; + this.nAtoms = nAtoms; + this.nChars = nChars; + if (ft != null) miscSize += ft.getByteCount(); + if (ct != null) miscSize += ct.getByteCount(); + } + + + /** + * Constructor for use when going from WordSmith to DOM. + * + * @param dataArray <code>byte</code> array. + * @param i Index. + */ + public WseHeader(byte dataArray[], int i) { + // DJP: write this! + } + + /** + * Return true if <code>dataArray[startIndex]</code> is the start + * of a document header. + * + * @param dataArray <code>byte</code> array. + * @param startIndex The index. + * + * @return true if <code>dataArray[startIndex]</code> is the start + * of a document header, false otherwise. + */ + static boolean isValid(byte dataArray[], int startIndex) { + return ((dataArray[startIndex] == 2) + && (dataArray[startIndex + 1] == 4)); + } + + + /** + * Compute and return the index of the first <code>byte</code> + * following this element. It is assumed that the element + * starting at <code>dataArray[startIndex]</code> is valid. + * + * @param dataArray <code>byte</code> array. + * @param startIndex The start index. + * + * @return The first <code>byte</code> following this element. + */ + static int computeNewIndex(byte dataArray[], int startIndex) { + return startIndex + 18; + } + + + /** + * Return the total number of bytes needed to represent this. + * + * @return The total number of bytes needed to represent this. + */ + int getByteCount() { + return 18; + } + + + /** + * Return a <code>byte</code> array representing this element. + * + * @return A <code>byte</code> array representing this element. + */ + byte[] getBytes() { + DataOutputStream os; // Used for storing the data + ByteArrayOutputStream bs = null; // Used for storing the data + + try { + bs = new ByteArrayOutputStream(); + os = new DataOutputStream(bs); + os.write(2); // binary doc indicator + os.write(4); // binary header indicator + + os.writeInt(nParagraphs); + os.writeInt(nAtoms); + os.writeInt(nChars); + os.writeInt(miscSize); + + } catch (IOException e) { + e.printStackTrace(); + } + + if (bs != null) { + return bs.toByteArray(); + } else return null; + } +} + diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WsePara.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WsePara.java new file mode 100644 index 000000000000..91530174211f --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WsePara.java @@ -0,0 +1,299 @@ +/************************************************************************ + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.wordsmith; + +import java.io.IOException; + +import org.w3c.dom.NodeList; +import org.w3c.dom.Node; +import org.w3c.dom.NamedNodeMap; +import org.w3c.dom.Element; + +import org.openoffice.xmerge.Document; +import org.openoffice.xmerge.ConverterCapabilities; +import org.openoffice.xmerge.converter.xml.OfficeDocument; +import org.openoffice.xmerge.converter.xml.sxw.SxwDocument; +import org.openoffice.xmerge.converter.xml.*; + + +/** + * This class represents a paragraph in a WordSmith document. + * (A paragraph is "5" followed by 12 bytes of attributes.) + * + * @author David Proulx + */ +class WsePara extends Wse { + + private byte spaceBefore = 0; + private byte spaceAfter = 0; + private byte leftIndent = 0; + private byte firstIndent = 0; + private byte rightIndent = 0; + private byte misc = 0; + private byte style = 0; + private byte lineSpace = 0; + private byte outline = 0; + private byte reserved = 0; + + private static final byte LS_EXACTLY = (byte)0xC0; + private static final byte LS_ATLEAST = (byte)0x80; + private static final byte LS_MULTIPLE = (byte)0x40; + private static final byte LS_VALUEMASK = (byte)0x3F; + + private static final byte ALIGN_RIGHT = (byte)2; + private static final byte ALIGN_LEFT = (byte)0; + private static final byte ALIGN_CENTER = (byte)1; + private static final byte ALIGN_JUST = (byte)3; + + private StyleCatalog sc = null; + + + /** + * Constructor for use when going from DOM to WordSmith. + * + * @param p The paragraph style. + * @param sc The <code>StyleCatalog</code>. + */ + public WsePara(ParaStyle p, StyleCatalog sc) { + this.sc = sc; + ParaStyle ps = (ParaStyle)p.getResolved(); + + if (ps.isAttributeSet(ParaStyle.MARGIN_LEFT)) { + double temp = ps.getAttribute(ParaStyle.MARGIN_LEFT) * 1.6 / 100; + leftIndent = (byte) temp; + if ((temp - leftIndent) > 0.5) leftIndent++; + } + + if (ps.isAttributeSet(ParaStyle.MARGIN_RIGHT)) { + double temp = ps.getAttribute(ParaStyle.MARGIN_RIGHT) * 1.6 / 100; + rightIndent = (byte) temp; + if ((temp - rightIndent) > 0.5) rightIndent++; + } + + if (ps.isAttributeSet(ParaStyle.TEXT_INDENT)) { + double temp = ps.getAttribute(ParaStyle.TEXT_INDENT) * 1.6 / 100; + firstIndent = (byte) temp; + if ((temp - firstIndent) > 0.5) firstIndent++; + } + + if (ps.isAttributeSet(ParaStyle.MARGIN_TOP)) { + double temp = ps.getAttribute(ParaStyle.MARGIN_TOP) * 1.6 / 100; + spaceBefore = (byte) temp; + if ((temp - spaceBefore) > 0.5) spaceBefore++; + } + + if (ps.isAttributeSet(ParaStyle.MARGIN_BOTTOM)) { + double temp = ps.getAttribute(ParaStyle.MARGIN_BOTTOM) * 1.6 / 100; + spaceAfter = (byte) temp; + if ((temp - spaceAfter) > 0.5) spaceAfter++; + } + + if (ps.isAttributeSet(ParaStyle.LINE_HEIGHT)) { + int lh = ps.getAttribute(ParaStyle.LINE_HEIGHT); + if ((lh & ~ParaStyle.LH_VALUEMASK) == 0) + lineSpace = (byte)(LS_MULTIPLE | (lh * 2)); + else if ((lh & ParaStyle.LH_PCT) != 0) { + lh = (lh & ParaStyle.LH_VALUEMASK) / 100; + lineSpace = (byte)(LS_MULTIPLE | (lh * 2)); + } + // DJP: handle other cases.... + } + + if (ps.isAttributeSet(ParaStyle.TEXT_ALIGN)) { + + int val = ps.getAttribute(ParaStyle.TEXT_ALIGN); + + switch (val) { + case ParaStyle.ALIGN_RIGHT: + misc = ALIGN_RIGHT; + break; + case ParaStyle.ALIGN_LEFT: + misc = ALIGN_LEFT; + break; + case ParaStyle.ALIGN_CENTER: + misc = ALIGN_CENTER; + break; + case ParaStyle.ALIGN_JUST: + misc = ALIGN_JUST; + break; + } + } + + } + + + /** + * Constructor for use when going from WordSmith to DOM. + * Assumes <code>dataArray[startIndex]</code> is the first + * <code>byte</code> of a valid WordSmith paragraph descriptor. + * + * @param dataArray <code>byte</code> array. + * @param startIndex The start index. + */ + public WsePara(byte dataArray[], int startIndex) { + spaceBefore = dataArray[startIndex + 1]; + spaceAfter = dataArray[startIndex + 2]; + leftIndent = dataArray[startIndex + 3]; + firstIndent = dataArray[startIndex + 4]; + rightIndent = dataArray[startIndex + 5]; + misc = dataArray[startIndex + 6]; + style = dataArray[startIndex + 7]; + lineSpace = dataArray[startIndex + 8]; + outline = dataArray[startIndex + 9]; + } + + + /** + * Compute the index of the first <code>byte</code> following the + * paragraph descriptor, assuming that + * <code>dataArray[startIndex]</code> is the beginning of a valid + * paragraph descriptor. + * + * @param dataArray <code>byte</code> array. + * @param startIndex The start index. + * + * @return The index of the first <code>byte</code> following the + * paragraph description. + */ + static int computeNewIndex(byte dataArray[], int startIndex) { + return startIndex + 13; + } + + + /** + * Return true if <code>dataArray[startIndex]</code> is the start + * of a valid paragraph descriptor. + * + * @param dataArray <code>byte</code> array. + * @param startIndex The start index. + * + * @return true if <code>dataArray[startIndex]</code> is the start + * of a valid paragraph descriptor, false otherwise. + */ + static boolean isValid(byte dataArray[], int startIndex) { + return (dataArray[startIndex] == 5); + } + + /** + * Return the number of bytes needed to represent this paragraph. + * + * @return The number of bytes needed to represent this paragraph. + */ + int getByteCount() { + return 13; + } + + /** + * Return an <code>byte</code> array representing this paragraph. + * + * @return An <code>byte</code> array representing this paragraph. + */ + byte[] getBytes() { + byte b[] = new byte[13]; + + b[0] = 5; + b[1] = spaceBefore; + b[2] = spaceAfter; + b[3] = leftIndent; + b[4] = firstIndent; + b[5] = rightIndent; + b[6] = misc; + b[7] = style; + b[8] = lineSpace; + b[9] = outline; + b[10] = reserved; + b[11] = 0; + b[12] = 0; + + return b; + } + + /** + * Return a <code>ParaStyle</code> that reflects the formatting of + * this run. + * + * @return A <code>ParaStyle</code> that reflects the formatting + * of this run. + */ + ParaStyle makeStyle() { + /* Csaba: Commented out the LINE_HEIGHT syle, because there was no + incoming data for that style. It was resulting a zero line + height in the xml document, ie. the doc looked empty. + */ + int attrs[] = { ParaStyle.MARGIN_LEFT, ParaStyle.MARGIN_RIGHT, + ParaStyle.TEXT_INDENT, //ParaStyle.LINE_HEIGHT, + ParaStyle.MARGIN_TOP, ParaStyle.MARGIN_BOTTOM, + ParaStyle.TEXT_ALIGN }; + String values[] = new String[attrs.length]; + double temp; + + temp = leftIndent / 1.6; + values[0] = (new Double(temp)).toString() + "mm"; + + temp = rightIndent / 1.6; + values[1] = (new Double(temp)).toString() + "mm"; + + temp = firstIndent / 1.6; + values[2] = (new Double(temp)).toString() + "mm"; + +/* if ((lineSpace & LS_MULTIPLE) != 0) { + temp = (lineSpace & LS_VALUEMASK) / 2; + temp *= 100; + values[3] = (new Double(temp)).toString() + "%"; + } else { + values[3] = (new Double(temp)).toString() + "mm"; + // DJP: handle other cases + } +*/ + temp = spaceBefore / 1.6; +// values[4] = (new Double(temp)).toString() + "mm"; + values[3] = (new Double(temp)).toString() + "mm"; + + temp = spaceAfter / 1.6; +// values[5] = (new Double(temp)).toString() + "mm"; + values[4] = (new Double(temp)).toString() + "mm"; + + switch (misc) { + +// case ALIGN_RIGHT: values[6] = "right"; break; +// case ALIGN_LEFT: values[6] = "left"; break; +// case ALIGN_CENTER:values[6] = "center"; break; +// case ALIGN_JUST: values[6] = "justified"; break; + + case ALIGN_RIGHT: values[5] = "right"; break; + case ALIGN_LEFT: values[5] = "left"; break; + case ALIGN_CENTER:values[5] = "center"; break; + case ALIGN_JUST: values[5] = "justified"; break; + } + ParaStyle x = new ParaStyle(null, "paragraph", null, attrs, + values, sc); + + return x; + } +} + diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WseTextRun.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WseTextRun.java new file mode 100644 index 000000000000..86626dd4d2c7 --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/WseTextRun.java @@ -0,0 +1,324 @@ +/************************************************************************ + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.wordsmith; + +import org.openoffice.xmerge.converter.xml.TextStyle; +import org.openoffice.xmerge.converter.xml.StyleCatalog; +import java.awt.Color; + +/** + * <p>This class represents a text run (aka text atom) in a WordSmith + * document.</p> + * + * <p>WordSmith represents a text run as follows:</p> + * + * <p><ul><li> + * 1 byte Value of "1", indicating beginning of a text atom + * </li><li> + * 2 bytes Length of text (does not include attributes, this length field, + * etc) + * </li><li> + * 1 byte Font index - Index in the font table of font to be used + * </li><li> + * 1 byte Font size (DJP: get details of representation) + * </li><li> + * 1 byte Color index - Index in the color table of font color to be used + * </li><li> + * 1 byte Modifiers - bit flags for bold, italic, etc + * </li><li> + * n bytes Text - the actual text + * </li></ul></p> + * + * @author David Proulx + */ +class WseTextRun extends Wse { + + /** Font specifier. This is an index into the font table. */ + private byte fontIndex = 0; + private String fontName = null; + + /** Size of the font. */ + private byte fontSize = 0; + + /** + * Color of the font. This is an index into the color table. + * High nibble is background color index, low nibble is font color + * index. + */ + private byte colorIndex = 0; + + /** + * Reference to color table for color lookups. + */ + private WseColorTable ct; + + /** + * The modifiers for the text run. (Mostly) Bitwise flags. The "_TOKEN" + * values are not yet implemented in this converter. They may not even + * be implemented in WordSmith yet. + */ + private byte modifiers = 0; + final public static int BOLD = 0x01; + final public static int ITALIC = 0x02; + final public static int UNDERLINE = 0x04; + final public static int STRIKETHRU = 0x08; + final public static int SUPERSCRIPT = 0x10; + final public static int SUBSCRIPT = 0x20; + final public static int LINK = 0x40; + final public static int CUSTOM_TOKEN = 0x80; + final public static int IMAGE_TOKEN = 0x80; + final public static int BOOKMARK_TOKEN = 0x81; + final public static int ANNOTATION_TOKEN = 0x82; + final public static int LINK_TOKEN = 0x83; + + /** The actual text. */ + private String text; + + StyleCatalog sc; + + + /** + * Constructor for use when going from DOM to WordSmith. + * + * @param txt The text. + * @param t The text style. + * @param sc The <code>StyleCatalog</code>. + * @param ft The font table. + * @param ct The color Table. + */ + public WseTextRun(String txt, TextStyle t, StyleCatalog sc, + WseFontTable ft, WseColorTable ct) { + + this.sc = sc; + this.ct = ct; + + TextStyle ts = (TextStyle)t.getResolved(); + + if (ts.isSet(TextStyle.BOLD) && ts.getAttribute(TextStyle.BOLD)) + modifiers |= BOLD; + if (ts.isSet(TextStyle.ITALIC) && ts.getAttribute(TextStyle.ITALIC)) + modifiers |= ITALIC; + if (ts.isSet(TextStyle.UNDERLINE) && ts.getAttribute(TextStyle.UNDERLINE)) + modifiers |= UNDERLINE; + if (ts.isSet(TextStyle.STRIKETHRU) && ts.getAttribute(TextStyle.STRIKETHRU)) + modifiers |= STRIKETHRU; + if (ts.isSet(TextStyle.SUPERSCRIPT) && ts.getAttribute(TextStyle.SUPERSCRIPT)) + modifiers |= SUPERSCRIPT; + if (ts.isSet(TextStyle.SUBSCRIPT) && ts.getAttribute(TextStyle.SUBSCRIPT)) + modifiers |= SUBSCRIPT; + + fontSize = (byte)(ts.getFontSize() * 2); + fontName = ts.getFontName(); + fontIndex = (byte)ft.getFontIndex(fontName); + if (fontIndex == -1) { + ft.add(fontName); + fontIndex = (byte)ft.getFontIndex(fontName); + } + + // Figure out the color index. + Color c = t.getFontColor(); + if (c == null) + c = Color.black; + colorIndex = (byte)ct.findColor(c, true); + c = t.getBackgroundColor(); + if (c == null) + c = Color.white; + colorIndex |= (byte)(ct.findColor(c, false) << 4); + + text = txt; + } + + + /** + * Standard constructor for use when going from WordSmith to DOM. + * + * @param dataArray <code>byte</code> array. + * @param startIndex The start index. + * @param ft The font table. + * @param ct The color table. + */ + public WseTextRun(byte dataArray[], int startIndex, WseFontTable ft, + WseColorTable ct) { + + this.ct = ct; + + startIndex++; // Skip the leading "1" + + int textLen = ((dataArray[startIndex] << 8) + | (dataArray[startIndex+1] & 0xFF)); + startIndex += 2; + + fontIndex = dataArray[startIndex++]; + if (ft != null) + fontName = ft.getFontName(fontIndex); + + fontSize = dataArray[startIndex++]; + + colorIndex = dataArray[startIndex++]; + modifiers = dataArray[startIndex++]; + + text = new String(dataArray, startIndex, textLen); + startIndex += textLen; // skip the text + } + + + /** + * Given a <code>byte</code> sequence, assumed to be a text run, + * compute the index of the first byte past the text run. + * + * @param dataArray <code>byte</code> array. + * @param startIndex The start index + * + * @return The index of the first <code>byte</code> past the + * text run. + */ + public static int computeNewIndex(byte dataArray[], int startIndex) { + + startIndex++; // Skip the leading "1" + + int textLen = ((dataArray[startIndex] << 8) + | (dataArray[startIndex+1] & 0xFF)); + startIndex += 2; + + startIndex += 4; // skip attributes + // text = new String(dataArray, startIndex, textLen); + startIndex += textLen; // skip the text + return startIndex; + } + + + /** + * Return true if the sequence starting at + * <code>dataArray[startIndex]</code> is a valid text run. + * + * @param dataArray <code>byte</code> array. + * @param startIndex The start index. + * + * @return true if the sequence starting at + * <code>dataArray[startIndex]</code> is a valid + * text run, false otherwise. + */ + public static boolean isValid(byte dataArray[], int startIndex) { + return (dataArray[startIndex] == 1); + } + + /** + * Return the number of bytes needed to represent this text run. + * + * @return The number of bytes needed to represent this text run. + */ + int getByteCount() { + return text.length() + 7; + } + + + /** + * Return an <code>byte</code> array representing this text run. + * + * @return An <code>byte</code> array representing this text run. + */ + byte[] getBytes() { + short textLen = (short)text.length(); + byte b[] = new byte[textLen + 7]; + b[0] = 1; + b[1] = (byte)(textLen >> 8); + b[2] = (byte)(textLen & 0xFF); + b[3] = fontIndex; + b[4] = fontSize; + b[5] = colorIndex; + b[6] = modifiers; + byte[] txtBytes = text.getBytes(); + System.arraycopy(txtBytes, 0, b, 7, textLen); + return b; + } + + + /** + * Return the text of this run. + * + * @return The text of this run. + */ + public String getText() { + return text; + } + + + /** + * Return a <code>TextStyle</code> that reflects the formatting + * of this run. + * + * @return A <code>TextStyle</code> that reflects the formatting + * of this run. + */ + public TextStyle makeStyle() { + int mod = 0; + if ((modifiers & BOLD) != 0) mod |= TextStyle.BOLD; + if ((modifiers & ITALIC) != 0) mod |= TextStyle.ITALIC; + if ((modifiers & UNDERLINE) != 0) mod |= TextStyle.UNDERLINE; + if ((modifiers & STRIKETHRU) != 0) + mod |= TextStyle.STRIKETHRU; + if ((modifiers & SUPERSCRIPT) != 0) mod |= TextStyle.SUPERSCRIPT; + if ((modifiers & SUBSCRIPT) != 0) mod |= TextStyle.SUBSCRIPT; + + int mask = TextStyle.BOLD | TextStyle.ITALIC + | TextStyle.UNDERLINE + | TextStyle.STRIKETHRU | TextStyle.SUPERSCRIPT + | TextStyle.SUBSCRIPT; + + TextStyle x = new TextStyle(null, "text", null, mask, + mod, (int)(fontSize/2), fontName, sc); + + // If color table is available, set the colors. + if (ct != null) { + Color fc = ct.getColor(colorIndex & 0xF, true); + Color bc = ct.getColor(colorIndex >> 4, false); + x.setColors(fc, bc); + } + + return x; + } + + + /** + * Display debug information. + */ + public void dump() { + System.out.print("TEXT RUN: fontIndex = " + fontIndex + + " fontsize = " + fontSize + + " colorIndex = " + colorIndex + + " "); + if ((modifiers & BOLD) != 0) System.out.print("BOLD,"); + if ((modifiers & ITALIC) != 0) System.out.print("ITALIC,"); + if ((modifiers & UNDERLINE) != 0) System.out.print("UNDERLINE,"); + if ((modifiers & STRIKETHRU) != 0) System.out.print("STRIKETHRU,"); + if ((modifiers & SUPERSCRIPT) != 0) System.out.print("SUPERSCRIPT,"); + if ((modifiers & SUBSCRIPT) != 0) System.out.print("SUBSCRIPT,"); + System.out.println("\n" + text); + } +} + diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/build.xml b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/build.xml new file mode 100644 index 000000000000..aa889d4d3f2e --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/build.xml @@ -0,0 +1,141 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + + DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + + Copyright 2000, 2010 Oracle and/or its affiliates. + + OpenOffice.org - a multi-platform office productivity suite + + This file is part of OpenOffice.org. + + OpenOffice.org is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License version 3 + only, as published by the Free Software Foundation. + + OpenOffice.org is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License version 3 for more details + (a copy is included in the LICENSE file that accompanied this code). + + You should have received a copy of the GNU Lesser General Public License + version 3 along with OpenOffice.org. If not, see + <http://www.openoffice.org/license.html> + for a copy of the LGPLv3 License. + +--> +<project name="xmrg_jooxcxs_wordsmith" default="main" basedir="."> + + <!-- ================================================================= --> + <!-- settings --> + <!-- ================================================================= --> + + <!-- project prefix, used for targets and build.lst --> + <property name="prj.prefix" value="xmrg"/> + + <!-- name of this sub target used in recursive builds --> + <property name="target" value="xmrg_jooxcxs_wordsmith"/> + + <!-- relative path to project directory --> + <property name="prj" value="../../../../../../../.."/> + + <!-- start of java source code package structure --> + <property name="java.dir" value="${prj}/java"/> + + <!-- path component for current java package --> + <property name="package" + value="org/openoffice/xmerge/converter/xml/sxw/wordsmith"/> + + <!-- define how to handle CLASSPATH environment --> + <property name="build.sysclasspath" value="ignore"/> + + <!-- classpath settings for javac tasks --> + <path id="classpath"> + <pathelement location="${build.class}"/> + <pathelement location="${solar.jar}/parser.jar"/> + <pathelement location="${solar.jar}/jaxp.jar"/> + </path> + + <!-- set wether we want to compile with or without deprecation --> + <property name="deprecation" value="on"/> + + <!-- ================================================================= --> + <!-- solar build environment targets --> + <!-- ================================================================= --> + + <target name="build_dir" unless="build.dir"> + <property name="build.dir" value="${out}"/> + </target> + + <target name="solar" depends="build_dir" if="solar.update"> + <property name="solar.properties" + value="${solar.bin}/solar.properties"/> + </target> + + <target name="init" depends="solar"> + <property name="build.compiler" value="classic"/> + <property file="${solar.properties}"/> + <property file="${build.dir}/class/solar.properties"/> + </target> + + <target name="info"> + <echo message="--------------------"/> + <echo message="${target}"/> + <echo message="--------------------"/> + </target> + + + <!-- ================================================================= --> + <!-- custom targets --> + <!-- ================================================================= --> + + <!-- the main target, called in recursive builds --> + <target name="main" depends="info,prepare,compile"/> + + <!-- prepare output directories --> + <target name="prepare" depends="init" if="build.class"> + <mkdir dir="${build.dir}"/> + <mkdir dir="${build.class}"/> + </target> + + <!-- compile java sources in ${package} --> + <target name="compile" depends="prepare" if="build.class"> + <javac srcdir="${java.dir}" + destdir="${build.class}" + debug="${debug}" + deprecation="${deprecation}" + optimize="${optimize}"> + <classpath refid="classpath"/> + <include name="${package}/DOCConstants.java"/> + <include name="${package}/textRecord.java"/> + <include name="${package}/util.java"/> + <include name="${package}/WSDecoder.java"/> + <include name="${package}/WseColorTable.java"/> + <include name="${package}/WseFontTable.java"/> + <include name="${package}/Wse.java"/> + <include name="${package}/WseHeader.java"/> + <include name="${package}/WSEncoder.java"/> + <include name="${package}/WsePara.java"/> + <include name="${package}/WseTextRun.java"/> + <include name="${package}/DocumentMergerImpl.java"/> + <include name="${package}/DocumentSerializerImpl.java"/> + <include name="${package}/DocumentDeserializerImpl.java"/> + <include name="${package}/ConverterCapabilitiesImpl.java"/> + <include name="${package}/PluginFactoryImpl.java"/> + </javac> + </target> + + <!-- clean up --> + <target name="clean" depends="prepare"> + <delete includeEmptyDirs="true"> + <fileset dir="${build.class}"> + <patternset> + <include name="${package}/*.class"/> + </patternset> + </fileset> + </delete> + </target> + +</project> + diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/converter.xml b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/converter.xml new file mode 100644 index 000000000000..9285730569db --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/converter.xml @@ -0,0 +1,17 @@ +<?xml version="1.0"?> +<converters> + <converter type="staroffice/sxw" version="1.0"> + <converter-display-name> + WordSmith + </converter-display-name> + <converter-description> + StarWriter XML to/from WordSmith conversion + </converter-description> + <converter-vendor>OpenOffice.org</converter-vendor> + <converter-class-impl> + org.openoffice.xmerge.converter.xml.sxw.wordsmith.PluginFactoryImpl + </converter-class-impl> + <converter-target type="application/x-wordsmith" /> + </converter> +</converters> + diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/makefile.mk b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/makefile.mk new file mode 100644 index 000000000000..c64e26894dac --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/makefile.mk @@ -0,0 +1,32 @@ +#************************************************************************* +# +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +# +# Copyright 2000, 2010 Oracle and/or its affiliates. +# +# OpenOffice.org - a multi-platform office productivity suite +# +# This file is part of OpenOffice.org. +# +# OpenOffice.org is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License version 3 +# only, as published by the Free Software Foundation. +# +# OpenOffice.org is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License version 3 for more details +# (a copy is included in the LICENSE file that accompanied this code). +# +# You should have received a copy of the GNU Lesser General Public License +# version 3 along with OpenOffice.org. If not, see +# <http://www.openoffice.org/license.html> +# for a copy of the LGPLv3 License. +# +#************************************************************************* +PRJNAME=converter +TARGET=cv_jcsscdcxs_wordsmith +PRJ=../../../../../../../../../.. + +.INCLUDE : ant.mk +ALLTAR: ANTBUILD diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/textRecord.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/textRecord.java new file mode 100644 index 000000000000..7651767920fc --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/textRecord.java @@ -0,0 +1,115 @@ +/************************************************************************ + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.wordsmith; + +import org.openoffice.xmerge.util.Debug; +import java.io.IOException; +import java.io.DataOutputStream; +import java.io.ByteArrayOutputStream; + +/** + * This class represents a single text record in a WordSmith document. + * A record is composed of one or more "WordSmith elements", which + * include: WordSmith header, font table, color table, paragraphs, + * and text runs. + * + * @author David Proulx + */ + +class textRecord { + + java.util.Vector elements; + + + /** + * Default constructor + */ + textRecord() { + elements = new java.util.Vector(10); + } + + + /** + * Add an element + * + * @param elem The element to add + */ + void addElement(Wse elem) { + elements.add(elem); + } + + + /** + * Return the number of bytes needed to represent the current + * contents of this text record. + * + * @return The number of bytes needed to represent the current + * contents of this text record. + */ + int getByteCount() { + int totalBytes = 0; + int nElements = elements.size(); + for (int i = 0; i < nElements; i++) { + Wse e = (Wse)elements.elementAt(i); + totalBytes += e.getByteCount(); + } + return totalBytes; + } + + + /** + * Return the contents of this record as a <code>byte</code> array. + * + * @return the contents of this record as a <code>byte</code> array. + */ + byte[] getBytes() { + DataOutputStream os = null; // Used for storing the data + ByteArrayOutputStream bs = null; // Used for storing the data + byte ftBytes[] = null; + byte ctBytes[] = null; + + try { + bs = new ByteArrayOutputStream(); + os = new DataOutputStream(bs); + int nElements = elements.size(); + for (int i = 0; i < nElements; i++) { + Wse e = (Wse)elements.get(i); + os.write(e.getBytes()); + } + + } catch (IOException e) { + e.printStackTrace(); + } + + if (bs != null) + return bs.toByteArray(); + else + return null; + } +} + diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/util.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/util.java new file mode 100644 index 000000000000..0c1af8d5a8ec --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/util.java @@ -0,0 +1,68 @@ +/************************************************************************ + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.wordsmith; + +/** + * WordSmith utility class. + * + * @author David Proulx + */ +class util { + + /** + * Convert 2 bytes to an integer. + * + * @param data <code>byte</code> data to convert. + * @param index Index to convert. + * + * @return Converted integer. + */ + static int intFrom2bytes(byte[] data, int index) { + return (((data[index] & 0xFF) << 8) + | (data[index+1] & 0xFF)); + + } + + + /** + * Convert 4 bytes to an integer. + * + * @param data <code>byte</code> data to convert. + * @param index Index to convert. + * + * @return Converted integer. + */ + static int intFrom4bytes(byte[] data, int index) { + return (((data[index] & 0xFF) << 24) + | ((data[index + 1] & 0xFF) << 16) + | ((data[index + 2] & 0xFF) << 8) + | (data[index+3] & 0xFF)); + + } +} + |