diff options
Diffstat (limited to 'xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc')
12 files changed, 2211 insertions, 0 deletions
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/ConverterCapabilitiesImpl.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/ConverterCapabilitiesImpl.java new file mode 100644 index 000000000000..671ae420bcfc --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/ConverterCapabilitiesImpl.java @@ -0,0 +1,93 @@ +/************************************************************************ + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.aportisdoc; + +import org.openoffice.xmerge.ConverterCapabilities; +import org.openoffice.xmerge.converter.xml.OfficeConstants; + +/** + * <p>AportisDoc implementation of <code>ConverterCapabilities</code> for + * the {@link + * org.openoffice.xmerge.converter.xml.sxw.aportisdoc.PluginFactoryImpl + * PluginFactoryImpl}.</p> + * + * <p>Used with StarWriter XML to/from AportisDoc conversions. The + * <code>ConverterCapibilies</code> specify which "Office" + * <code>Document</code> tags and attributes are supported on the + * "Device" <code>Document</code> format.</p> + */ +public final class ConverterCapabilitiesImpl + implements ConverterCapabilities { + + public boolean canConvertTag(String tag) { + + if (OfficeConstants.TAG_OFFICE_DOCUMENT.equals(tag)) + return true; + else if (OfficeConstants.TAG_OFFICE_DOCUMENT_CONTENT.equals(tag)) + return true; + else if (OfficeConstants.TAG_OFFICE_BODY.equals(tag)) + return true; + else if (OfficeConstants.TAG_PARAGRAPH.equals(tag)) + return true; + else if (OfficeConstants.TAG_HEADING.equals(tag)) + return true; + else if (OfficeConstants.TAG_ORDERED_LIST.equals(tag)) + return true; + else if (OfficeConstants.TAG_UNORDERED_LIST.equals(tag)) + return true; + else if (OfficeConstants.TAG_LIST_ITEM.equals(tag)) + return true; + else if (OfficeConstants.TAG_LIST_HEADER.equals(tag)) + return true; + else if (OfficeConstants.TAG_SPAN.equals(tag)) + return true; + else if (OfficeConstants.TAG_HYPERLINK.equals(tag)) + return true; + else if (OfficeConstants.TAG_LINE_BREAK.equals(tag)) + return true; + else if (OfficeConstants.TAG_SPACE.equals(tag)) + return true; + else if (OfficeConstants.TAG_TAB_STOP.equals(tag)) + return true; + + return false; + } + + public boolean canConvertAttribute(String tag, + String attribute) { + + if (OfficeConstants.TAG_SPACE.equals(tag)) { + + if (OfficeConstants.ATTRIBUTE_SPACE_COUNT.equals(attribute)) + return true; + } + + return false; + } +} + diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocConstants.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocConstants.java new file mode 100644 index 000000000000..86627c6d7ed3 --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocConstants.java @@ -0,0 +1,69 @@ +/************************************************************************ + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.aportisdoc; + +import org.openoffice.xmerge.converter.palm.PdbUtil; + +/** + * Constants used for encoding and decoding the AportisDoc format. + * + * @author Herbie Ong + */ +interface DocConstants { + + /** Creator id. */ + public static final int CREATOR_ID = PdbUtil.intID("REAd"); + + /** Type id. */ + public static final int TYPE_ID = PdbUtil.intID("TEXt"); + + /** Constant for uncompressed version. */ + public static final short UNCOMPRESSED = 1; + + /** Constant for compressed version. */ + public static final short COMPRESSED = 2; + + /** Constant used for spare fields. */ + public static final int SPARE = 0; + + /** AportisDoc record size. */ + public static final short TEXT_RECORD_SIZE = 4096; + + /** Constant for encoding scheme. */ + public static final String ENCODING = "8859_1"; + + /** Constant for TAB character. */ + public final static char TAB_CHAR = '\t'; + + /** Constant for EOL character. */ + public final static char EOL_CHAR = '\n'; + + /** Constant for SPACE character. */ + public final static char SPACE_CHAR = ' '; +} + diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocDecoder.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocDecoder.java new file mode 100644 index 000000000000..9651e5b10b4d --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocDecoder.java @@ -0,0 +1,304 @@ +/************************************************************************ + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.aportisdoc; + +import java.io.ByteArrayInputStream; +import java.io.DataInputStream; +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.util.List; +import java.util.ArrayList; + +import org.openoffice.xmerge.converter.palm.Record; +import org.openoffice.xmerge.util.Resources; +import org.openoffice.xmerge.util.Debug; + +/** + * This class is used by {@link + * org.openoffice.xmerge.converter.xml.sxw.DocumentDeserializerImpl} + * to decode the AportisDoc format. It currently decodes + * the text content into a single <code>String</code> object. + * + * @author Herbie Ong + */ +final class DocDecoder implements DocConstants { + + /** For decoding purposes. */ + private final static int COUNT_BITS = 3; + + /** Resources object for I18N. */ + private Resources res = null; + + + /** + * Default constructor creates a header and a text buffer + * for holding all the text in the AportisDoc database. + */ + DocDecoder() { + res = Resources.getInstance(); + } + + + /** + * Decode the text records into a single <code>String</code> + * of text content. + * + * @param Record <code>Record</code> array holding AportisDoc + * contents. + * + * @throws IOException If any I/O error occurs. + */ + String parseRecords(Record[] recs) throws IOException { + + // read the header record + HeaderInfo header = readHeader(recs[0].getBytes()); + + dumpHeader(header); + + // store all the characters in textBuffer + StringBuffer textBuffer = new StringBuffer(header.textLen); + + switch (header.version) { + + case COMPRESSED: + for (int i = 1; i <= header.textRecordCount; i++) { + + byte[] bytes = decompress(recs[i].getBytes(), + header.textRecordSize); + log("processing " + bytes.length + " bytes"); + String str = new String(bytes, ENCODING); + textBuffer.append(str); + } + + break; + + case UNCOMPRESSED: + for (int i = 1; i <= header.textRecordCount; i++) { + + byte[] bytes = recs[i].getBytes(); + log("processing " + bytes.length + " bytes"); + String str = new String(bytes, ENCODING); + textBuffer.append(str); + } + + break; + + default: + throw new IOException(res.getString("UNKNOWN_DOC_VERSION")); + + } + + return textBuffer.toString(); + } + + + /** + * <p>Decompress the <code>byte</code> array.</p> + * + * <p>The resulting uncompressed <code>byte</code> array should + * be within <code>textRecordSize</code> length, definitely + * within twice the size it claims, else treat it as a problem + * with the encoding of that PDB and throw + * <code>IOException</code>.</p> + * + * @param bytes Compressed <code>byte</code> array. + * @param textRecordSize Size of uncompressed + * <code>byte</code> array. + * + * @throws IOException If <code>textRecordSize</code> < + * <code>cBytes.length</code>. + */ + private byte[] decompress(byte[] cBytes, int textRecordSize) + throws IOException { + + // create byte array for storing uncompressed bytes + // it should be within textRecordSize range, definitely + // within twice of textRecordSize! if not, then + // an ArrayIndexOutOfBoundsException will get thrown, + // and it should be converted into an IOException, and + // treat it as a conversion error. + byte[] uBytes = new byte[textRecordSize*2]; + + int up = 0; + int cp = 0; + + try { + + while (cp < cBytes.length) { + + int c = cBytes[cp++] & 0xff; + + // codes 1...8 mean copy that many bytes + if (c > 0 && c < 9) { + + while (c-- > 0) + uBytes[up++] = cBytes[cp++]; + } + + // codes 0, 9...0x7F represent themselves + else if (c < 0x80) { + uBytes[up++] = (byte) c; + } + + // codes 0xC0...0xFF represent "space + ascii char" + else if (c >= 0xC0) { + uBytes[up++] = (byte) ' '; + uBytes[up++] = (byte) (c ^ 0x80); + } + + // codes 0x80...0xBf represent sequences + else { + c <<= 8; + c += cBytes[cp++] & 0xff; + int m = (c & 0x3fff) >> COUNT_BITS; + int n = c & ((1 << COUNT_BITS) - 1); + n += COUNT_BITS; + while (n-- > 0) { + uBytes[up] = uBytes[up - m]; + up++; + } + } + } + + } catch (ArrayIndexOutOfBoundsException e) { + + throw new IOException( + res.getString("DOC_TEXT_RECORD_SIZE_EXCEEDED")); + } + + // note that ubytes may be larger that the amount of + // uncompressed bytes, so trim it to another byte array + // with the exact size. + byte[] textBytes = new byte[up]; + System.arraycopy(uBytes, 0, textBytes, 0, up); + + return textBytes; + } + + + /** + * Read the header <code>byte</code> array. + * + * @param bytes <code>byte</code> array containing header + * record data. + * + * @return <code>HeaderInfo</code> object. + * + * @throws IOException If any I/O error occurs. + */ + private HeaderInfo readHeader(byte[] bytes) throws IOException { + + HeaderInfo header = new HeaderInfo(); + + ByteArrayInputStream bis = new ByteArrayInputStream(bytes); + DataInputStream dis = new DataInputStream(bis); + + // Normally the first 2 bytes comprised of the version + // which should either be COMPRESSED or UNCOMPRESSED + // SmartDoc/Quickword would add a 0x01 to the first + // byte, thus their version would be 0x0101 for UNCOMPRESSED + // instead of 0x0001 and 0x0102 for UNCOMPRESSED instead of + // 0x0002. + + dis.readByte(); + header.version = dis.readByte(); + + // read extra 2 unused bytes + dis.readShort(); + + // Read the text length, this should be unsigned 4 bytes. + // We could store the read value into a long, but then + // our current buffer limit is the max positive of an int. + // That is a large enough limit, thus we shall stay with + // storing the value in an int. If it exceeds, then + // an IOException should be thrown. + header.textLen = dis.readInt(); + if (header.textLen < 0) { + throw new IOException(res.getString("DOC_TEXT_LENGTH_EXCEEDED")); + } + + // read the number of records - unsigned 2 bytes + header.textRecordCount = ((int) dis.readShort()) & 0x0000ffff; + + // read the record size - unsigned 2 bytes + header.textRecordSize = ((int) dis.readShort()) & 0x0000ffff; + + // read extra 4 unused bytes + dis.readInt(); + + return header; + } + + + /** + * Prints out header info into log. Used for debugging purposes only. + * + * @param header <code>HeaderInfo</code> structure. + */ + private void dumpHeader(HeaderInfo header) { + + log("<DOC_INFO "); + log("version=\"" + header.version + "\" "); + log("text-length=\"" + header.textLen + "\" "); + log("number-of-records=\"" + header.textRecordCount + "\" "); + log("record-size=\"" + header.textRecordSize + "\" />"); + } + + + /** + * Sends message to the log object. + * + * @param str Debug string message. + */ + private void log(String str) { + Debug.log(Debug.TRACE, str); + } + + + /** + * Inner class to store AportisDoc header information. + */ + private class HeaderInfo { + + /** length of text section */ + int textLen = 0; + + /** number of text records */ + int textRecordCount = 0; + + /** + * size of a text record. This is normally the same as + * TEXT_RECORD_SIZE, but some applications may modify this. + */ + int textRecordSize = 0; + + /** compression type */ + int version = 0; + } +} + diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocEncoder.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocEncoder.java new file mode 100644 index 000000000000..90cf0e5cd1f1 --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocEncoder.java @@ -0,0 +1,214 @@ +/************************************************************************ + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.aportisdoc; + +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.util.List; +import java.util.ArrayList; + +import org.openoffice.xmerge.converter.palm.Record; + +/** + * This class is used by {@link + * org.openoffice.xmerge.converter.xml.sxw.DocumentSerializerImpl + * DocumentSerializerImpl} to encode the AportisDoc format. + * It does not deal with any XML tags. It only knows how to encode + * from <code>String</code>. + * + * @author Herbie Ong + */ +final class DocEncoder implements DocConstants { + + /** Text buffer to contain text section. */ + private StringBuffer textBuffer = null; + + /** Length of text section. */ + private int textLen = 0; + + /** Number of text records. */ + private int textRecCount = 0; + + + /** + * Default constructor creates a header and + * a text buffer for holding all the text in + * the AportisDoc database. + */ + DocEncoder() { + + textBuffer = new StringBuffer(TEXT_RECORD_SIZE); + } + + + /** + * This method appends text into the text section of + * the AportisDoc database. + * + * @param text <code>String</code> to append. + */ + void addText(String text) { + + textBuffer.append(text); + } + + + /** + * This method appends text into the text section of + * the AportisDoc database. + * + * @param text <code>char</code> array to append. + */ + void addText(char[] text) { + + textBuffer.append(text); + } + + + /** + * This method appends text character into the text + * section of the AportisDoc database. + * + * @param text <code>char</code> to append. + */ + void addText(char text) { + + textBuffer.append(text); + } + + + /** + * This method encodes the information given to a + * palm <code>Record</code> array in the AportisDoc + * database format. + * + * @return <code>Record</code> array holding AportisDoc + * contents. + * + * @throws IOException If any I/O error occurs. + */ + Record[] getRecords() throws IOException { + + byte textBytes[] = processTextBuffer(); + textLen = textBytes.length; + textRecCount = (short) (textBytes.length / TEXT_RECORD_SIZE); + + // recBytes to hold a record of bytes at a time + byte recBytes[] = new byte[TEXT_RECORD_SIZE]; + int pos = 0; + + List textRecords = new ArrayList(textRecCount + 1); + + // split textBytes into chunks of Record objects + // and store in textRecords object. + for (int i = 0; i < textRecCount; i++) { + + System.arraycopy(textBytes, pos, recBytes, 0, recBytes.length); + pos += recBytes.length; + Record zRec = new Record(recBytes); + textRecords.add(zRec); + } + + // there's more if ... + + if (pos < textLen) { + + textRecCount++; + + recBytes = new byte[textLen - pos]; + System.arraycopy(textBytes, pos, recBytes, 0, recBytes.length); + Record rec = new Record(recBytes); + textRecords.add(rec); + } + + // construct the Record array and copy + // references from textRecords. + + Record[] allRecords = new Record[textRecords.size() + 1]; + + allRecords[0] = new Record(getHeaderBytes()); + + for (int i = 1; i < allRecords.length; i++) { + + allRecords[i] = (Record) textRecords.get(i-1); + } + + return allRecords; + } + + + /** + * This method converts the text buffer into a <code>byte</code> + * array with the proper encoding of the text section of the + * AportisDoc format. + * + * TODO: do compression. + * + * @return byte[] Converted <code>byte</code> array of text + * section. + * + * @throws IOException If any I/O error occurs. + */ + private byte[] processTextBuffer() throws IOException + { + String str = textBuffer.toString(); + byte bytes[] = str.getBytes(ENCODING); + + return bytes; + } + + + /** + * This method produces the <code>byte</code> array for the header. + * + * @return <code>byte</code> array containing header record data. + * + * @throws IOException If any I/O error occurs. + */ + private byte[] getHeaderBytes() throws IOException + { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + DataOutputStream dos = new DataOutputStream(bos); + + // TODO: for now, we shall use UNCOMPRESSED. + // later, we need to use COMPRESSED or a setting. + dos.writeShort(UNCOMPRESSED); + dos.writeShort(SPARE); + dos.writeInt(textLen); + dos.writeShort(textRecCount); + dos.writeShort(TEXT_RECORD_SIZE); + dos.writeInt(SPARE); + + byte[] bytes = bos.toByteArray(); + + return bytes; + } +} + diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocumentDeserializerImpl.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocumentDeserializerImpl.java new file mode 100644 index 000000000000..98022bcf47d9 --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocumentDeserializerImpl.java @@ -0,0 +1,313 @@ +/************************************************************************ + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.aportisdoc; + +import org.w3c.dom.NodeList; +import org.w3c.dom.Node; +import org.w3c.dom.Element; +import org.w3c.dom.Text; + +import java.io.IOException; +import java.util.Enumeration; + +import org.openoffice.xmerge.Document; +import org.openoffice.xmerge.ConvertData; +import org.openoffice.xmerge.ConvertException; +import org.openoffice.xmerge.DocumentDeserializer; +import org.openoffice.xmerge.converter.xml.OfficeConstants; +import org.openoffice.xmerge.converter.xml.sxw.SxwDocument; +import org.openoffice.xmerge.converter.palm.PalmDB; +import org.openoffice.xmerge.converter.palm.PdbDecoder; +import org.openoffice.xmerge.converter.palm.Record; +import org.openoffice.xmerge.converter.palm.PalmDocument; +import org.openoffice.xmerge.util.Debug; + +/** + * <p>AportisDoc implementation of <code>DocumentDeserializer</code> + * for the {@link + * org.openoffice.xmerge.converter.xml.sxw.aportisdoc.PluginFactoryImpl + * PluginFactoryImpl}.</p> + * + * <p>This converts an file in AportisDoc PDB format to StarOffice + * XML format.</p> + * + * <p>The <code>deserialize</code> method uses a <code>DocDecoder</code> + * to read the AportisDoc format into a <code>String</code> object, then + * it calls <code>buildDocument</code> to create a <code>SxwDocument</code> + * object from it.</p> + * + * @author Herbie Ong + */ +public final class DocumentDeserializerImpl + implements OfficeConstants, DocConstants, DocumentDeserializer { + + /** A <code>ConvertData</code> object assigned to this object. */ + private ConvertData cd = null; + + + /** + * Constructor that assigns the given <code>ConvertData</code> + * to this object as input. + * + * @param cd A <code>ConvertData</code> object to read data for + * the conversion process by the <code>deserialize</code> + * method. + */ + public DocumentDeserializerImpl(ConvertData cd) { + this.cd = cd; + } + + + /** + * Convert the given <code>ConvertData</code> object + * into a <code>SxwDocument</code> object. + * + * @return Resulting <code>SxwDocument</code> object. + * + * @throws ConvertException If any conversion error occurs. + * @throws IOException If any I/O error occurs. + */ + public Document deserialize() throws IOException, ConvertException { + + int numberOfPDBs = cd.getNumDocuments(); + Document doc = null; + int i=0; + ConvertData cdOut; + Enumeration e = cd.getDocumentEnumeration(); + while (e.hasMoreElements()) { + PalmDocument palmDoc = (PalmDocument) e.nextElement(); + PalmDB pdb = palmDoc.getPdb(); + + log("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"); + log("<AportisDoc>"); + + Record[] recs = pdb.getRecords(); + String docName = palmDoc.getName(); + DocDecoder decoder = new DocDecoder(); + String text = decoder.parseRecords(recs); + doc = buildDocument(docName, text); + + log("</AportisDoc>"); + } + + return doc; + } + + + /** + * Parses the text content of an AportisDoc format and build a + * <code>SxwDocument</code>. + * + * @param docName Name of <code>Document</code>. + * @param str Text content of AportisDoc format. + * + * @return Resulting <code>SxwDocument</code> object. + * + * @throws IOException If any I/O error occurs. + */ + private SxwDocument buildDocument(String docName, String str) + throws IOException { + + // create minimum office xml document. + SxwDocument sxwDoc = new SxwDocument(docName); + sxwDoc.initContentDOM(); + + org.w3c.dom.Document doc = sxwDoc.getContentDOM(); + + // Grab hold of the office:body tag, + // Assume there should be one. + // This is where top level paragraphs will append to. + NodeList list = doc.getElementsByTagName(TAG_OFFICE_BODY); + Node bodyNode = list.item(0); + + // Store all the text in a character array. + char[] text = str.toCharArray(); + + // startIndex has 2 purposes: + // if value is -1, it means that there are no text characters + // needed to be processed for a Text node. if value >= 0, it + // is the index of the starting position of a text section + // for a Text node. + int startIndex = -1; + + // Create a paragraph node to start with. + Element paraNode = doc.createElement(TAG_PARAGRAPH); + + log("<PARA>"); + + for (int i = 0; i < text.length; i++) { + + switch (text[i]) { + + case TAB_CHAR: + + // Check if there are text to be processed first. + if (startIndex >= 0) { + addTextNode(doc, paraNode, text, startIndex, i - 1); + startIndex = -1; + } + + // Then, add tab element. + Element tabNode = doc.createElement(TAG_TAB_STOP); + paraNode.appendChild(tabNode); + + log("<TAB/>"); + break; + + case EOL_CHAR: + + // Check if there are text to be processed first. + if (startIndex >= 0) { + addTextNode(doc, paraNode, text, startIndex, i - 1); + startIndex = -1; + } + + // Then, add the current paragraph to body. + bodyNode.appendChild(paraNode); + + // Create another paragraph element. + paraNode = doc.createElement(TAG_PARAGRAPH); + + log("</PARA>"); + log("<PARA>"); + break; + + case SPACE_CHAR: + + // count is the number of space chars from i + int count = 0; + + // Do a look ahead and count the number of space chars + while (text[i + 1 + count] == SPACE_CHAR) { + count++; + } + + // Need to build a space node ONLY if count is > 1. + + if (count > 0) { + + // Check if there are text to be processed first + if (startIndex >= 0) { + addTextNode(doc, paraNode, text, + startIndex, i); + startIndex = -1; + } + + // Then, create a space element + // with the proper attribute. + Element spaceNode = doc.createElement(TAG_SPACE); + spaceNode.setAttribute(ATTRIBUTE_SPACE_COUNT, + Integer.toString(count)); + + paraNode.appendChild(spaceNode); + + // reposition i to the last space character. + i += count; + + log("<SPACE count=\"" + count + "\" />"); + + } else { + + // If there are no chars for text node yet, + // consider this one. + if (startIndex < 0) { + + startIndex = i; + log("<TEXT>"); + } + } + + break; + + default: + + // If there are no chars for text node yet, + // this should be the start. + if (startIndex < 0) { + + startIndex = i; + log("<TEXT>"); + } + + break; + } + } + + int lastIndex = text.length - 1; + + // Check if there are text to be processed first. + + if (startIndex >= 0) { + addTextNode(doc, paraNode, text, startIndex, lastIndex); + } + + // Then, add the last paragraph element if it is not added yet. + if (text[lastIndex] != EOL_CHAR) { + bodyNode.appendChild(paraNode); + } + + log("</PARA>"); + + return sxwDoc; + } + + + /** + * Add a Text <code>Node</code> to the given paragraph node with the + * text starting at the given <code>startPos</code> until + * <code>endPos</code>. + * + * @param doc <code>org.w3c.dom.Document</code> object for creating + * <code>Node</code> objects. + * @param para The current paragraph <code>Node</code> to append + * text <code>Node</code>. + * @param text Array of characters containing text. + * @param startPos Starting index position for text value. + * @param endPos End index position for text value. + */ + private void addTextNode(org.w3c.dom.Document doc, Node para, char text[], + int startPos, int endPos) { + + String str = new String(text, startPos, endPos - startPos + 1); + Text textNode = doc.createTextNode(str); + para.appendChild(textNode); + log(str); + log("</TEXT>"); + } + + /** + * Sends message to the log object. + * + * @param str Debug message. + */ + private void log(String str) { + + Debug.log(Debug.TRACE, str); + } +} + diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocumentMergerImpl.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocumentMergerImpl.java new file mode 100644 index 000000000000..23b236b41e6a --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocumentMergerImpl.java @@ -0,0 +1,99 @@ +/************************************************************************ + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.aportisdoc; + +import org.w3c.dom.Document; +import org.w3c.dom.Element; + +import org.openoffice.xmerge.DocumentMerger; +import org.openoffice.xmerge.MergeException; +import org.openoffice.xmerge.ConverterCapabilities; +import org.openoffice.xmerge.converter.xml.sxw.SxwDocument; +import org.openoffice.xmerge.merger.DiffAlgorithm; +import org.openoffice.xmerge.merger.Difference; +import org.openoffice.xmerge.merger.NodeMergeAlgorithm; +import org.openoffice.xmerge.merger.Iterator; +import org.openoffice.xmerge.merger.DiffAlgorithm; +import org.openoffice.xmerge.merger.diff.ParaNodeIterator; +import org.openoffice.xmerge.merger.diff.IteratorLCSAlgorithm; +import org.openoffice.xmerge.merger.merge.DocumentMerge; +import org.openoffice.xmerge.merger.merge.CharacterBaseParagraphMerge; +import org.openoffice.xmerge.util.Debug; + + +/** + * AportisDoc implementation of <code>DocumentMerger</code> + * for the {@link + * org.openoffice.xmerge.converter.xml.sxw.aportisdoc.PluginFactoryImpl + * PluginFactoryImpl}.</p> + */ +public class DocumentMergerImpl implements DocumentMerger { + + private ConverterCapabilities cc_; + private org.openoffice.xmerge.Document orig = null; + + public DocumentMergerImpl(org.openoffice.xmerge.Document doc, ConverterCapabilities cc) { + cc_ = cc; + this.orig = doc; + } + + public void merge(org.openoffice.xmerge.Document modifiedDoc) throws MergeException { + + SxwDocument wdoc1 = (SxwDocument) orig; + SxwDocument wdoc2 = (SxwDocument) modifiedDoc; + + Document doc1 = wdoc1.getContentDOM(); + Document doc2 = wdoc2.getContentDOM(); + + Iterator i1 = new ParaNodeIterator(cc_, doc1.getDocumentElement()); + Iterator i2 = new ParaNodeIterator(cc_, doc2.getDocumentElement()); + + DiffAlgorithm diffAlgo = new IteratorLCSAlgorithm(); + + // find out the paragrah level diffs + Difference[] diffTable = diffAlgo.computeDiffs(i1, i2); + + if (Debug.isFlagSet(Debug.INFO)) { + Debug.log(Debug.INFO, "Diff Result: "); + + for (int i = 0; i < diffTable.length; i++) { + Debug.log(Debug.INFO, diffTable[i].debug()); + } + } + + // merge the paragraphs + NodeMergeAlgorithm charMerge = new CharacterBaseParagraphMerge(); + DocumentMerge docMerge = new DocumentMerge(cc_, charMerge); + + Iterator result = null; + + docMerge.applyDifference(i1, i2, diffTable); + } +} + + diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocumentSerializerImpl.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocumentSerializerImpl.java new file mode 100644 index 000000000000..a2652df792b3 --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocumentSerializerImpl.java @@ -0,0 +1,532 @@ +/************************************************************************ + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.aportisdoc; + +import org.w3c.dom.NodeList; +import org.w3c.dom.Node; +import org.w3c.dom.NamedNodeMap; +import org.w3c.dom.Element; + +import java.io.IOException; + +import org.openoffice.xmerge.Document; +import org.openoffice.xmerge.ConvertData; +import org.openoffice.xmerge.ConvertException; +import org.openoffice.xmerge.DocumentSerializer; +import org.openoffice.xmerge.converter.xml.OfficeConstants; +import org.openoffice.xmerge.converter.xml.sxw.SxwDocument; +import org.openoffice.xmerge.converter.palm.PdbEncoder; +import org.openoffice.xmerge.converter.palm.PdbDecoder; +import org.openoffice.xmerge.converter.palm.PalmDB; +import org.openoffice.xmerge.converter.palm.Record; +import org.openoffice.xmerge.converter.palm.PalmDocument; +import org.openoffice.xmerge.util.Debug; +import org.openoffice.xmerge.util.XmlUtil; + +/** + * <p>AportisDoc implementation of + * org.openoffice.xmerge.DocumentSerializer + * for the {@link + * org.openoffice.xmerge.converter.xml.sxw.aportisdoc.PluginFactoryImpl + * PluginFactoryImpl}.</p> + * + * <p>The <code>serialize</code> method traverses the DOM + * document from the given <code>Document</code> object. It uses a + * <code>DocEncoder</code> object for the actual conversion of + * contents to the AportisDoc format.</p> + * + * @author Herbie Ong + */ + + +public final class DocumentSerializerImpl + implements OfficeConstants, DocConstants, DocumentSerializer { + + /** A <code>DocEncoder</code> object for encoding to AportisDoc. */ + private DocEncoder encoder = null; + + /** SXW <code>Document</code> object that this converter processes. */ + private SxwDocument sxwDoc = null; + + + /** + * Constructor. + * + * @param doc A SXW <code>Document</code> to be converted. + */ + public DocumentSerializerImpl(Document doc) { + sxwDoc = (SxwDocument) doc; + } + + + /** + * <p>Method to convert a <code>Document</code> into a PDB. + * It passes back the converted data as a <code>ConvertData</code> + * object.</p> + * + * <p>This method is not thread safe for performance reasons. + * This method should not be called from within two threads. + * It would be best to call this method only once per object + * instance.</p> + * + * @return The <code>ConvertData</code> object containing the output. + * + * @throws ConvertException If any conversion error occurs. + * @throws IOException If any I/O error occurs. + */ + public ConvertData serialize() throws ConvertException, IOException { + + + // get the server document name + + String docName = sxwDoc.getName(); + + // get DOM document + + org.w3c.dom.Document domDoc = sxwDoc.getContentDOM(); + + encoder = new DocEncoder(); + + // Traverse to the office:body element. + // There should only be one. + + NodeList list = domDoc.getElementsByTagName(TAG_OFFICE_BODY); + int len = list.getLength(); + + if (len > 0) { + Node node = list.item(0); + traverseBody(node); + } + + // create a ConvertData object. + // + Record records[] = encoder.getRecords(); + ConvertData cd = new ConvertData(); + + PalmDocument palmDoc = new PalmDocument(docName, + DocConstants.CREATOR_ID, DocConstants.TYPE_ID, + 0, PalmDB.PDB_HEADER_ATTR_BACKUP, records); + + cd.addDocument(palmDoc); + return cd; + } + + + /** + * This method traverses <i>office:body</i> element. + * + * @param node <i>office:body</i> <code>Node</code>. + * + * @throws IOException If any I/O error occurs. + */ + private void traverseBody(Node node) throws IOException { + + log("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"); + log("<AportisDOC>"); + + if (node.hasChildNodes()) { + + NodeList nodeList = node.getChildNodes(); + int len = nodeList.getLength(); + + for (int i = 0; i < len; i++) { + Node child = nodeList.item(i); + + if (child.getNodeType() == Node.ELEMENT_NODE) { + String nodeName = child.getNodeName(); + + if (nodeName.equals(TAG_PARAGRAPH) || + nodeName.equals(TAG_HEADING)) { + + traverseParagraph(child); + + } else if (nodeName.equals(TAG_UNORDERED_LIST)) { + + traverseList(child); + + } else if (nodeName.equals(TAG_ORDERED_LIST)) { + + traverseList(child); + + } else { + + log("<OTHERS " + XmlUtil.getNodeInfo(child) + " />"); + } + } + } + } + + log("</AportisDOC>"); + } + + + /** + * This method traverses the <i>text:p</i> and <i>text:h</i> + * element <code>Node</code> objects. + * + * @param node A <i>text:p</i> or <i>text:h</i> + * <code>Node</code>. + * + * @throws IOException If any I/O error occurs. + */ + private void traverseParagraph(Node node) throws IOException { + + log("<PARA>"); + traverseParaContents(node); + encoder.addText(EOL_CHAR); + log("</PARA>"); + } + + + /** + * This method traverses a paragraph content. + * It uses the <code>traverseParaElem</code> method to + * traverse into Element <code>Node</code> objects. + * + * @param node A paragraph or content <code>Node</code>. + * + * @throws IOException If any I/O error occurs. + */ + private void traverseParaContents(Node node) throws IOException { + + if (node.hasChildNodes()) { + + NodeList nodeList = node.getChildNodes(); + int len = nodeList.getLength(); + + for (int i = 0; i < len; i++) { + + Node child = nodeList.item(i); + short nodeType = child.getNodeType(); + + switch (nodeType) { + + case Node.TEXT_NODE: + // this is for grabbing text nodes. + String s = child.getNodeValue(); + + if (s.length() > 0) { + encoder.addText(s); + } + + log("<TEXT>"); + log(s); + log("</TEXT>"); + + break; + + case Node.ELEMENT_NODE: + + traverseParaElem(child); + break; + + case Node.ENTITY_REFERENCE_NODE: + + log("<ENTITY_REFERENCE>"); + traverseParaContents(child); + log("<ENTITY_REFERENCE/>"); + break; + + default: + log("<OTHERS " + XmlUtil.getNodeInfo(node) + " />"); + } + } + } + } + + + /** + * This method traverses an <code>Element</code> <code>Node</code> + * within a paragraph. + * + * @param node <code>Element</code> <code>Node</code> within a + * paragraph. + * + * @throws IOException If any I/O error occurs. + */ + private void traverseParaElem(Node node) throws IOException { + + String nodeName = node.getNodeName(); + + if (nodeName.equals(TAG_SPACE)) { + + // this is for text:s tags. + NamedNodeMap map = node.getAttributes(); + Node attr = map.getNamedItem(ATTRIBUTE_SPACE_COUNT); + StringBuffer space = new StringBuffer(SPACE_CHAR); + int count = 1; + + if (attr != null) { + + try { + + String countStr = attr.getNodeValue(); + count = Integer.parseInt(countStr.trim()); + + } catch (NumberFormatException e) { + + // TODO: for now, throw IOException. + // later, perhaps will have to throw + // some other conversion exception instead. + throw new IOException(e.getMessage()); + } + } + + for (int j = 0; j < count; j++) { + + space.append(SPACE_CHAR); + } + + encoder.addText(space.toString()); + + log("<SPACE count=\"" + count + "\" />"); + + } else if (nodeName.equals(TAG_TAB_STOP)) { + + // this is for text:tab-stop + encoder.addText(TAB_CHAR); + + log("<TAB/>"); + + } else if (nodeName.equals(TAG_LINE_BREAK)) { + + // commented out by Csaba: There is no point to convert a linebreak + // into a EOL, because it messes up the number of XML nodes and the + // merge won't work properly. Other solution would be to implement such + // nodemerger, which would be able to merge embedded tags in a paragraph + + // this is for text:line-break + // encoder.addText(EOL_CHAR); + + log("skipped <LINE-BREAK/>"); + + } else if (nodeName.equals(TAG_SPAN)) { + + // this is for text:span + log("<SPAN>"); + traverseParaContents(node); + log("</SPAN>"); + + } else if (nodeName.equals(TAG_HYPERLINK)) { + + // this is for text:a + log("<HYPERLINK>"); + traverseParaContents(node); + log("<HYPERLINK/>"); + + } else if (nodeName.equals(TAG_BOOKMARK) || + nodeName.equals(TAG_BOOKMARK_START)) { + + log("<BOOKMARK/>"); + + } else if (nodeName.equals(TAG_TEXT_VARIABLE_SET) + || nodeName.equals(TAG_TEXT_VARIABLE_GET) + || nodeName.equals(TAG_TEXT_EXPRESSION) + || nodeName.equals(TAG_TEXT_USER_FIELD_GET) + || nodeName.equals(TAG_TEXT_PAGE_VARIABLE_GET) + || nodeName.equals(TAG_TEXT_SEQUENCE) + || nodeName.equals( TAG_TEXT_VARIABLE_INPUT) + || nodeName.equals(TAG_TEXT_TIME) + || nodeName.equals( TAG_TEXT_PAGE_COUNT) + || nodeName.equals(TAG_TEXT_PAGE_NUMBER ) + || nodeName.equals(TAG_TEXT_SUBJECT) + || nodeName.equals(TAG_TEXT_TITLE) + || nodeName.equals(TAG_TEXT_CREATION_TIME) + || nodeName.equals(TAG_TEXT_DATE) + || nodeName.equals(TAG_TEXT_TEXT_INPUT) + || nodeName.equals(TAG_TEXT_AUTHOR_INITIALS)) { + log("<FIELD>"); + traverseParaContents(node); + log("</FIELD>"); + + }else if (nodeName.startsWith(TAG_TEXT)) { + log("<Unknown text Field>"); + traverseParaContents(node); + log("</Unknown text Field>"); + + }else { + + log("<OTHERS " + XmlUtil.getNodeInfo(node) + " />"); + } + } + + + /** + * This method traverses list tags <i>text:unordered-list</i> and + * <i>text:ordered-list</i>. A list can only contain one optional + * <i>text:list-header</i> and one or more <i>text:list-item</i> + * elements. + * + * @param node A list <code>Node</code>. + * + * @throws IOException If any I/O error occurs. + */ + private void traverseList(Node node) throws IOException { + + log("<LIST>"); + + if (node.hasChildNodes()) { + + NodeList nodeList = node.getChildNodes(); + int len = nodeList.getLength(); + + for (int i = 0; i < len; i++) { + + Node child = nodeList.item(i); + + if (child.getNodeType() == Node.ELEMENT_NODE) { + + String nodeName = child.getNodeName(); + + if (nodeName.equals(TAG_LIST_ITEM)) { + + traverseListItem(child); + + } else if (nodeName.equals(TAG_LIST_HEADER)) { + + traverseListHeader(child); + + } else { + + log("<INVALID-XML-BUG " + XmlUtil.getNodeInfo(child) + " />"); + } + } + } + } + + log("</LIST>"); + } + + + /** + * This method traverses a <i>text:list-header</i> element. + * It contains one or more <i>text:p</i> elements. + * + * @param node A list header <code>Node</code>. + * + * @throws IOException If any I/O error occurs. + */ + private void traverseListHeader(Node node) throws IOException { + + log("<LIST-HEADER>"); + + if (node.hasChildNodes()) { + + NodeList nodeList = node.getChildNodes(); + int len = nodeList.getLength(); + + for (int i = 0; i < len; i++) { + + Node child = nodeList.item(i); + + if (child.getNodeType() == Node.ELEMENT_NODE) { + + String nodeName = child.getNodeName(); + + if (nodeName.equals(TAG_PARAGRAPH)) { + + traverseParagraph(child); + + } else { + + log("<INVALID-XML-BUG " + XmlUtil.getNodeInfo(child) + " />"); + } + } + } + } + + log("</LIST-HEADER>"); + } + + + /** + * <p>This method will traverse a <i>text:list-item</i>. + * A list item may contain one or more of <i>text:p</i>, + * <i>text:h</i>, <i>text:section</i>, <i>text:ordered-list</i> + * and <i>text:unordered-list</i>.</p> + * + * <p>This method currently only implements grabbing <i>text:p</i>, + * <i>text:h</i>, <i>text:unordered-list</i> and + * <i>text:ordered-list</i>.</p> + * + * @param node The <code>Node</code>. + * + * @throws IOException If any I/O error occurs. + */ + private void traverseListItem(Node node) throws IOException { + + log("<LIST-ITEM>"); + + if (node.hasChildNodes()) { + + NodeList nodeList = node.getChildNodes(); + int len = nodeList.getLength(); + + for (int i = 0; i < len; i++) { + + Node child = nodeList.item(i); + + if (child.getNodeType() == Node.ELEMENT_NODE) { + + String nodeName = child.getNodeName(); + + if (nodeName.equals(TAG_PARAGRAPH)) { + + traverseParagraph(child); + + } else if (nodeName.equals(TAG_UNORDERED_LIST)) { + + traverseList(child); + + } else if (nodeName.equals(TAG_ORDERED_LIST)) { + + traverseList(child); + + } else { + + log("<INVALID-XML-BUG " + XmlUtil.getNodeInfo(child) + " />"); + } + } + } + } + + log("</LIST-ITEM>"); + } + + + /** + * Logs debug messages. + * + * @param str The debug message. + */ + private void log(String str) { + + Debug.log(Debug.TRACE, str); + } +} + diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/PluginFactoryImpl.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/PluginFactoryImpl.java new file mode 100644 index 000000000000..d1de0b19a6ab --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/PluginFactoryImpl.java @@ -0,0 +1,141 @@ +/************************************************************************ + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.aportisdoc; + +import org.openoffice.xmerge.Document; +import org.openoffice.xmerge.ConvertData; +import org.openoffice.xmerge.DocumentMerger; +import org.openoffice.xmerge.DocumentMergerFactory; +import org.openoffice.xmerge.DocumentSerializer; +import org.openoffice.xmerge.DocumentSerializerFactory; +import org.openoffice.xmerge.DocumentDeserializer; +import org.openoffice.xmerge.DocumentDeserializerFactory; +import org.openoffice.xmerge.ConverterCapabilities; +import org.openoffice.xmerge.converter.xml.sxw.SxwPluginFactory; +import org.openoffice.xmerge.converter.palm.PalmDocument; +import org.openoffice.xmerge.util.registry.ConverterInfo; +import java.io.IOException; +import java.io.InputStream; + +/** + * <p>AportisDoc implementation of the <code>PluginFactory</code>. + * This encapsulates conversion of StarWriter XML format to and from + * AportisDoc format.</p> + * + * <p>The superclass produces a particular + * {@link org.openoffice.xmerge.Document Document} + * object, i.e. {@link + * org.openoffice.xmerge.converter.xml.sxw.SxwDocument + * SxwDocument} that the converters in this class works with. Thus, + * this class only implements the methods that produces the converters, + * i.e. {@link + * org.openoffice.xmerge.DocumentSerializer + * DocumentSerializer} and {@link + * org.openoffice.xmerge.DocumentDeserializer + * DocumentDeserializer}; + * as well as the {@link + * org.openoffice.xmerge.ConverterCapabilities + * ConverterCapabilities} object that is specific to this format + * conversion. That superclass also produces a {@link + * org.openoffice.xmerge.DocumentMerger DocumentMerger} + * object, i.e. {@link + * org.openoffice.xmerge.converter.xml.sxw.aportisdoc.DocumentMergerImpl + * DocumentMergerImpl} which this class derives the functionality.</p> + * + * @author Herbie Ong + */ +public final class PluginFactoryImpl extends SxwPluginFactory + implements DocumentDeserializerFactory, DocumentSerializerFactory, + DocumentMergerFactory { + + public PluginFactoryImpl (ConverterInfo ci) { + super(ci); + } + + /** ConverterCapabilities object for this type of conversion. */ + private final static ConverterCapabilities converterCap = + new ConverterCapabilitiesImpl(); + + + /** + * Returns an instance of <code>DocumentSerializerImpl</code>, + * which is an implementation of the <code>DocumentSerializer</code> + * interface. + * + * @param doc <code>Document</code> object to be + * converted/serialized. + * + * @return A <code>DocumentSerializerImpl</code> object. + */ + public DocumentSerializer createDocumentSerializer(Document doc) { + + return new DocumentSerializerImpl(doc); + } + + + /** + * Returns an instance of <code>DocumentDeserializerImpl</code>, + * which is an implementation of the <code>DocumentDeserializer</code> + * interface. + * + * @param cd <code>ConvertData</code> object for reading data + * which will be converted back to a + * <code>Document</code> object. + * + * @return A DocumentDeserializerImpl object. + */ + public DocumentDeserializer createDocumentDeserializer(ConvertData cd) { + + return new DocumentDeserializerImpl(cd); + } + + + /** + * Returns an instance of <code>DocumentMergerImpl</code>, + * which is an implementation of the <code>DocumentMerger</code> + * interface. + * + * @param doc <code>Document</code> to merge. + * + * @return A DocumentMergerImpl object. + */ + public DocumentMerger createDocumentMerger(Document doc) { + + ConverterCapabilities cc = converterCap; + DocumentMergerImpl merger = new DocumentMergerImpl(doc, cc); + return merger; + } + + public Document createDeviceDocument(String name, InputStream is) + throws IOException { + + PalmDocument palmDoc = new PalmDocument(is); + return palmDoc; + } +} + diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/build.xml b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/build.xml new file mode 100644 index 000000000000..b6efd3e2bec3 --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/build.xml @@ -0,0 +1,134 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + + DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + + Copyright 2000, 2010 Oracle and/or its affiliates. + + OpenOffice.org - a multi-platform office productivity suite + + This file is part of OpenOffice.org. + + OpenOffice.org is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License version 3 + only, as published by the Free Software Foundation. + + OpenOffice.org is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License version 3 for more details + (a copy is included in the LICENSE file that accompanied this code). + + You should have received a copy of the GNU Lesser General Public License + version 3 along with OpenOffice.org. If not, see + <http://www.openoffice.org/license.html> + for a copy of the LGPLv3 License. + +--> +<project name="xmrg_jooxcxs_aportisdoc" default="main" basedir="."> + + <!-- ================================================================= --> + <!-- settings --> + <!-- ================================================================= --> + + <!-- project prefix, used for targets and build.lst --> + <property name="prj.prefix" value="xmrg"/> + + <!-- name of this sub target used in recursive builds --> + <property name="target" value="xmrg_jooxcxs_aportisdoc"/> + + <!-- relative path to project directory --> + <property name="prj" value="../../../../../../../.."/> + + <!-- start of java source code package structure --> + <property name="java.dir" value="${prj}/java"/> + + <!-- path component for current java package --> + <property name="package" + value="org/openoffice/xmerge/converter/xml/sxw/aportisdoc"/> + + <!-- define how to handle CLASSPATH environment --> + <property name="build.sysclasspath" value="ignore"/> + + <!-- classpath settings for javac tasks --> + <path id="classpath"> + <pathelement location="${build.class}"/> + <pathelement location="${solar.jar}/parser.jar"/> + <pathelement location="${solar.jar}/jaxp.jar"/> + <pathelement location="${solar.jar}/xerces.jar"/> + </path> + + <!-- set wether we want to compile with or without deprecation --> + <property name="deprecation" value="on"/> + + <!-- ================================================================= --> + <!-- solar build environment targets --> + <!-- ================================================================= --> + + <target name="build_dir" unless="build.dir"> + <property name="build.dir" value="${out}"/> + </target> + + <target name="solar" depends="build_dir" if="solar.update"> + <property name="solar.properties" + value="${solar.bin}/solar.properties"/> + </target> + + <target name="init" depends="solar"> + <property name="build.compiler" value="classic"/> + <property file="${solar.properties}"/> + <property file="${build.dir}/class/solar.properties"/> + </target> + + <target name="info"> + <echo message="--------------------"/> + <echo message="${target}"/> + <echo message="--------------------"/> + </target> + + + <!-- ================================================================= --> + <!-- custom targets --> + <!-- ================================================================= --> + + <!-- the main target, called in recursive builds --> + <target name="main" depends="info,prepare,compile"/> + + <!-- prepare output directories --> + <target name="prepare" depends="init" if="build.class"> + <mkdir dir="${build.dir}"/> + <mkdir dir="${build.class}"/> + </target> + + <!-- compile java sources in ${package} --> + <target name="compile" depends="prepare" if="build.class"> + <javac srcdir="${java.dir}" + destdir="${build.class}" + debug="${debug}" + deprecation="${deprecation}" + optimize="${optimize}"> + <classpath refid="classpath"/> + <include name="${package}/DocConstants.java"/> + <include name="${package}/DocDecoder.java"/> + <include name="${package}/DocEncoder.java"/> + <include name="${package}/DocumentDeserializerImpl.java"/> + <include name="${package}/DocumentSerializerImpl.java"/> + <include name="${package}/DocumentMergerImpl.java"/> + <include name="${package}/ConverterCapabilitiesImpl.java"/> + <include name="${package}/PluginFactoryImpl.java"/> + </javac> + </target> + + <!-- clean up --> + <target name="clean" depends="prepare"> + <delete includeEmptyDirs="true"> + <fileset dir="${build.class}"> + <patternset> + <include name="${package}/*.class"/> + </patternset> + </fileset> + </delete> + </target> + +</project> + diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/converter.xml b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/converter.xml new file mode 100644 index 000000000000..7942295c004a --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/converter.xml @@ -0,0 +1,43 @@ +<?xml version="1.0"?> +<!-- + + DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + + Copyright 2000, 2010 Oracle and/or its affiliates. + + OpenOffice.org - a multi-platform office productivity suite + + This file is part of OpenOffice.org. + + OpenOffice.org is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License version 3 + only, as published by the Free Software Foundation. + + OpenOffice.org is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License version 3 for more details + (a copy is included in the LICENSE file that accompanied this code). + + You should have received a copy of the GNU Lesser General Public License + version 3 along with OpenOffice.org. If not, see + <http://www.openoffice.org/license.html> + for a copy of the LGPLv3 License. + +--> +<converters> + <converter type="staroffice/sxw" version="1.0"> + <converter-display-name> + AportisDoc + </converter-display-name> + <converter-description> + StarWriter XML to/from AportisDoc conversion + </converter-description> + <converter-vendor>OpenOffice.org</converter-vendor> + <converter-class-impl> + org.openoffice.xmerge.converter.xml.sxw.aportisdoc.PluginFactoryImpl + </converter-class-impl> + <converter-target type="application/x-aportisdoc" /> + </converter> +</converters> + diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/makefile.mk b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/makefile.mk new file mode 100644 index 000000000000..5b3f3fea509d --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/makefile.mk @@ -0,0 +1,32 @@ +#*************************************************************************** +# +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +# +# Copyright 2000, 2010 Oracle and/or its affiliates. +# +# OpenOffice.org - a multi-platform office productivity suite +# +# This file is part of OpenOffice.org. +# +# OpenOffice.org is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License version 3 +# only, as published by the Free Software Foundation. +# +# OpenOffice.org is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License version 3 for more details +# (a copy is included in the LICENSE file that accompanied this code). +# +# You should have received a copy of the GNU Lesser General Public License +# version 3 along with OpenOffice.org. If not, see +# <http://www.openoffice.org/license.html> +# for a copy of the LGPLv3 License. +# +#*************************************************************************** + +TARGET=xmrg_jooxcxs_aportisdoc +PRJ=../../../../../../../.. + +.INCLUDE : ant.mk +ALLTAR: ANTBUILD diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/package.html b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/package.html new file mode 100644 index 000000000000..78cfe79bfbbf --- /dev/null +++ b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/package.html @@ -0,0 +1,237 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN"> +<!-- + + DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + + Copyright 2000, 2010 Oracle and/or its affiliates. + + OpenOffice.org - a multi-platform office productivity suite + + This file is part of OpenOffice.org. + + OpenOffice.org is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License version 3 + only, as published by the Free Software Foundation. + + OpenOffice.org is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License version 3 for more details + (a copy is included in the LICENSE file that accompanied this code). + + You should have received a copy of the GNU Lesser General Public License + version 3 along with OpenOffice.org. If not, see + <http://www.openoffice.org/license.html> + for a copy of the LGPLv3 License. + +--> +<html> +<head> +<title>org.openoffice.xmerge.converter.xml.sxw.aportisdoc package</title> +</head> + +<body bgcolor="white"> + +<p>Provides the tools for doing the conversion of StarWriter XML to +and from AportisDoc format.</p> + +<p>It follows the {@link org.openoffice.xmerge} framework for the conversion process.</p> + +<p>Since it converts to/from a Palm application format, these converters +follow the <a href=../../../../converter/palm/package-summary.html#streamformat> +<code>PalmDB</code> stream format</a> for writing out to the Palm sync client or +reading in from the Palm sync client.</p> + +<p>Note that <code>PluginFactoryImpl</code> also provides a +<code>DocumentMerger</code> object, i.e. {@link org.openoffice.xmerge.converter.xml.sxw.aportisdoc.DocumentMergerImpl DocumentMergerImpl}. +This functionality was derived from its superclass +{@link org.openoffice.xmerge.converter.xml.sxw.SxwPluginFactory +SxwPluginFactory}.</p> + +<h2>AportisDoc pdb format - Doc</h2> + +<p>The AportisDoc pdb format is widely used by different Palm applications, +e.g. QuickWord, AportisDoc Reader, MiniWrite, etc. Note that some +of these applications put tweaks into the format. The converters will only +support the default AportisDoc format, plus some very minor tweaks to accommodate +other applications.</p> + +<p>The text content of the format is plain text, i.e. there are no styles +or structures. There is no notion of lists, list items, paragraphs, +headings, etc. The format does have support for bookmarks.</p> + +<p>For most Doc applications, the default character encoding supported is +the extended ASCII character set, i.e. ISO-8859-1. StarWriter XML is in +UTF-8 encoding scheme. Since UTF-8 encoding scheme covers more characters, +converting UTF-8 strings into extended ASCII would mean that there can be +possible loss of character mappings.</p> + +<p>Using JAXP, XML files can be parsed and read in as Java <code>String</code>s +which is in Unicode format, there is no loss of character mapping from UTF-8 +to Java Strings. There is possible loss of character mapping in +converting Java <code>String</code>s to ASCII bytes. Java characters that +cannot be represented in extended ASCII are converted into the ASCII +character '?' or x3F in hex digit via the <code>String.getBytes(encoding)</code> +API.</p> + +<h2>SXW to DOC Conversion</h2> + +<p>The <code>DocumentSerializerImpl</code> class implements the +<code>org.openoffice.xmerge.DocumentSerializer</code>. +This class specifically provides the conversion process from a given +<code>SxwDocument</code> object to DOC formatted records, which are +then passed back to the client via the <code>ConvertData</code> object.</p> + +<p>The following XML tags are handled. [Note that some may not be implemented yet.]</p> +<ul> +<li> + <p>Paragraphs <tt><text:p></tt> and Headings <tt><text:h></tt></p> + + <p>Heading elements are classified the same as paragraph + elements since both have the same possible elements inside. + Their main difference is that they refer to different types + of style information, which is outside of their element tags. + Since there are no styles on the DOC format, headings should + be treated the same way a paragraph is converted.</p> + + <p>For paragraph elements, convert and transfer text nodes + that are essential. Text nodes directly contained within paragraph + nodes are such. There are also a number of elements that + a paragraph element may contain. These are explained in their + own context.</p> + + <p>At the end of the paragraph, an EOL character is added by + the converter to provide a separation for each paragraph, + since the Doc format does not have a notion of a paragraph.</p> +</li> +<li> + <p>White spaces <tt><text:s></tt> and Tabs <tt><text:tab-stop></tt></p> + + <p>In SXW, normally 2 or more white-space characters are collapsed into + a single space character. In order to make sure that the document + content really contains those white-space characters, there are special + elements assigned to them.</p> + + <p>The space element specifies the number of spaces are in it. + Thus, converting it just means providing the specific number of spaces + that the element requires.</p> + + <p>There is also the tab-stop element. This is a bit tricky. In a + StarWriter document, tab-stops are specified by a column position. + A tab is not an exact number of space, but rather a specific column + positioning. Say, regular tab-stops are set at every 5th column. + At column 4, if I hit a tab, it goes to column 5. At column 1, hitting + a tab would put the cursor at column 5 as well. SmartDoc and AporticDoc + applications goes by columns for the ASCII tab character. The only problem + is that in StarWriter, one could specify a different tab-stop, but not + in most of these Doc applications, at least I have not seen one. + Solution for this is just to go with the converting to the ASCII tab + character and not do anything for different tab-stop positioning.</p> +</li> +<li> + <p>Line breaks <tt><text:line-break></tt></p> + + <p>To represent line breaks, it is simpliest to just put an ASCII LF + character. Note that the side effect of this is that an end of paragraph + also contains an ASCII LF character. Thus, for the DOC to SXW conversion, + line breaks are not distinguishable from specifying the end of a + paragraph.</p> +</li> +<li> + <p>Text spans <tt><text:span></tt></p> + + <p>Text spans contain text that have different style attributes + from the paragraphs'. Text spans can be embedded within another + text span. Since it is purely for style tagging, we only needed + to convert and transfer the text elements within these.</p> +</li> +<li> + <p>Hyperlinks <tt><text:a></tt> + + <p>Convert and transfer the text portion.</p> +</li> +<li> + <p>Bookmarks <tt><text:bookmark></tt> <tt><text:bookmark-start></tt> + <tt><text:bookmark-end></tt> [Not implemented yet]</p> + + <p>In SXW, bookmark elements are embedded inside paragraph elements. + Bookmarks can either mark a text position or a text range. <tt><text:bookmark></tt> + marks a position while the pair <tt><text:bookmark-start></tt> and + <tt><text:bookmark-end></tt></p> marks a text range. The DOC format only + supports bookmarking a text position. Thus, for the conversion, + <tt><text:bookmark></tt> and <tt><text:bookmark-start></tt> will both mark + a text position.</p> +</li> +<li> + <p>Change Tracking <tt><text:tracked-changes></tt> + <tt><text:change*></tt> [Not implemented yet]</p> + + <p>Change tracking elements are not supported yet on the current + OpenOffice XML filters, will have to watch out on this. The text + within these elements have to be interpreted properly during the + conversion process.</p> +</li> +<li> + <p>Lists <tt><text:unordered-list></tt> and + <tt><text:ordered-lists></tt></p> + + <p>A list can only contain one optional <tt><text:list-header></tt> + and one or more <tt><text:list-item></tt> elements.</p> + + <p>A <tt><text:list-header></tt> contains one or more paragraph + elements. Since there are no styles, the conversion process does not + do anything special for list headers, conversion for the paragraphs + within list headers are the same as explained above.</p> + + <p>A <tt><text:list-item></tt> may contain one or more of paragraphs, + headings, list, etc. Since the Doc format does not support any list + structure, there will not be any special handling for this element. + Conversion for elements within it shall be applied according to the + element type. Thus, lists with paragraphs within it will result in just + plain paragraphs. Sublists will not be identifiable. Paragraphs in + sublists will still appear.</p> +</li> +<li> + <p><tt><text:section></tt></p> + + <p>I am not sure what this is yet, will need to investigate more on this.</p> +</li> +</ul> +<p>There may be other tags that will still need to be addressed for this conversion.</p> + +<p>Refer to {@link org.openoffice.xmerge.converter.xml.sxw.aportisdoc.DocumentSerializerImpl DocumentSerializerImpl} +for details of implementation. It uses <code>DocEncoder</code> class to do the encoding +part.</p> + +<h2>DOC to SXW Conversion</h2> + +<p>The <code>DocumentDeserializerImpl</code> class implements the +<code>org.openoffice.xmerge.DocumentDeserializer</code>. It is +passed the device document in the form of a <code>ConvertData</code> object. +It will then create a <code>SxwDocument</code> object from the conversion of +the DOC formatted records.</p> + +<p>The text content of the Doc format will be transferred as text. Paragraph +elements will be formed based on the existence of an ASCII LF character. There +will be at least one paragraph element.</p> + +<p>Bookmarks in the Doc format will be converted to the bookmark element +<tt><text:bookmark></tt> [Not implemented yet].</p> + + +<h2>Merging changes</h2> + +<p>As mentioned above, the <code>DocumentMerger</code> object produced by +<code>PluginFactoryImpl</code> is <code>DocumentMergerImpl</code>. +Refer to the javadocs for that package/class on its merging specifications. +</p> + +<h2>TODO list</h2> + +<p><ol> +<li>Investigate Palm's with different character encodings.</li> +<li>Investigate other StarWriter XML tags</li> +</ol></p> + +</body> +</html> |