summaryrefslogtreecommitdiff
path: root/xmerge/source/pocketword/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/PocketWordDocument.java
diff options
context:
space:
mode:
Diffstat (limited to 'xmerge/source/pocketword/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/PocketWordDocument.java')
-rw-r--r--xmerge/source/pocketword/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/PocketWordDocument.java404
1 files changed, 404 insertions, 0 deletions
diff --git a/xmerge/source/pocketword/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/PocketWordDocument.java b/xmerge/source/pocketword/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/PocketWordDocument.java
new file mode 100644
index 000000000000..bc77e0fa6988
--- /dev/null
+++ b/xmerge/source/pocketword/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/PocketWordDocument.java
@@ -0,0 +1,404 @@
+/*************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2000, 2010 Oracle and/or its affiliates.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+package org.openoffice.xmerge.converter.xml.sxw.pocketword;
+
+import org.openoffice.xmerge.Document;
+import org.openoffice.xmerge.converter.xml.ParaStyle;
+import org.openoffice.xmerge.converter.xml.TextStyle;
+
+import java.io.InputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataOutputStream;
+
+import java.util.Enumeration;
+import java.util.Vector;
+
+
+/**
+ * <p>Class representing a Pocket Word Document.</p>
+ *
+ * <p><code>PocketWordDocument</code> is used to create new Pocket Word documents
+ * and to read existing data to allow for conversion to OpenOffice Writer
+ * format.</p>
+ *
+ * @author Mark Murnane
+ * @version 1.1
+ */
+public class PocketWordDocument implements Document, PocketWordConstants {
+ private String docName;
+
+ private byte[] preamble;
+ private Vector fonts;
+ private DocumentDescriptor descriptor;
+ private Vector paragraphs;
+
+ private ParaStyle pStyle;
+ private Paragraph currentPara;
+
+ /*
+ * The trailer currently appears to be constant, but if its found to
+ * have a variable component, then this initialisation should be moved
+ * to an initTrailer() method.
+ *
+ * Padding is sometimes needed before the trailer to ensure the file
+ * ends on a 4-byte boundary, but this is handled in write().
+ */
+ private static final byte[] trailer = new byte[] { (byte)0x82, 0x00,
+ 0x09, 0x00,
+ 0x03, 0x00,
+ (byte)0x82, 0x00,
+ 0x00, 0x00,
+ 0x00, 0x00,
+ 0x00, 0x00,
+ 0x00, 0x00,
+ 0x00, 0x00 };
+
+
+ /**
+ * <p>Constructs a new Pocket Word Document.</p>
+ *
+ * <p>This new document does notcontain any information. Document data must
+ * either be added using appropriate methods, or an existing file can be
+ * {@link #read(InputStream) read} from an <code>InputStream</code>.</p>
+ *
+ * @param name The name of the <code>PocketWordDocument</code>.
+ */
+ public PocketWordDocument(String name) {
+
+ docName = trimDocumentName(name);
+
+ preamble = new byte[52];
+ fonts = new Vector(0, 1);
+ descriptor = new DocumentDescriptor();
+ paragraphs = new Vector(0, 1);
+ }
+
+
+ /**
+ * <p>This method reads <code>byte</code> data from the InputStream and
+ * extracts font and paragraph data from the file.</p>
+ *
+ * @param is InputStream containing a Pocket Word data file.
+ *
+ * @throws IOException In case of any I/O errors.
+ */
+ public void read(InputStream docData) throws IOException {
+
+ if (docData == null) {
+ throw new IOException ("No input stream to convert");
+ }
+
+ // The preamble may become important for font declarations.
+ int readValue = docData.read(preamble);
+ // #i33702# check for an empty InputStream.
+ if(readValue == -1) {
+ System.err.println("Error:invalid input stream");
+ return;
+ }
+
+ byte[] font = new byte[80];
+ int numfonts = 0;
+ do {
+ docData.read(font);
+
+ String name = new String(font, 0, 64, "UTF-16LE");
+ fonts.add(name.trim());
+
+ } while (!(font[76] == 5 && font[77] == 0
+ && font[78] == 1 && font[79] == 0));
+
+ /*
+ * TODO: The document descriptor data that follows the fonts ends with
+ * a variable section containing data for each of the paragraphs.
+ * It may be possible to use this information to calculate staring
+ * positions for each paragraph rather than iterating through the
+ * entire byte stream.
+ */
+
+ int value;
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ while ((value = docData.read()) != -1) {
+ bos.write(value);
+ }
+
+
+ byte[] contentData = bos.toByteArray();
+ int start = 0, end = 0;
+ boolean sawMarker = false;
+
+ for (int i = 0; i < contentData.length; i += 4) {
+ if (contentData[i + 2] == (byte)0xFF
+ && contentData[i + 3] == (byte)0xFF && !sawMarker) {
+ start = i - 8;
+ sawMarker = true;
+ continue;
+ }
+
+ if (contentData[i + 2] == (byte)0xFF
+ && contentData[i + 3] == (byte)0xFF && sawMarker) {
+ end = i - 8;
+ ByteArrayOutputStream paragraph = new ByteArrayOutputStream();
+ paragraph.write(contentData, start, end - start);
+ paragraphs.add(new Paragraph(paragraph.toByteArray()));
+
+ // Reset the markers
+ sawMarker = false;
+ i -= 4; // Skip back
+ }
+
+ }
+
+ /*
+ * Special case, the last paragraph
+ * If we got here, and the marker is set then we saw the start of the
+ * last paragraph, but no following paragraph
+ */
+ ByteArrayOutputStream paragraph = new ByteArrayOutputStream();
+ if (contentData[contentData.length - 19] == 0) {
+ paragraph.write(contentData, start, contentData.length - start - 20);
+ }
+ else {
+ paragraph.write(contentData, start, contentData.length - start - 18);
+ }
+ paragraphs.add(new Paragraph(paragraph.toByteArray()));
+ }
+
+
+ /*
+ * Utility method to make sure the document name is stripped of any file
+ * extensions before use.
+ */
+ private String trimDocumentName(String name) {
+ String temp = name.toLowerCase();
+
+ if (temp.endsWith(FILE_EXTENSION)) {
+ // strip the extension
+ int nlen = name.length();
+ int endIndex = nlen - FILE_EXTENSION.length();
+ name = name.substring(0,endIndex);
+ }
+
+ return name;
+ }
+
+
+ /**
+ * <p>Method to provide access to all of the <code>Paragraph</code> objects
+ * in the <code>Document</code>.</p>
+ *
+ * @return <code>Enumeration</code> over the paragraphs in the document.
+ */
+ public Enumeration getParagraphEnumeration() {
+ return paragraphs.elements();
+ }
+
+
+ /**
+ * <p>Returns the <code>Document</code> name with no file extension.</p>
+ *
+ * @return The <code>Document</code> name with no file extension.
+ */
+ public String getName() {
+ return docName;
+ }
+
+
+ /**
+ * <p>Returns the <code>Document</code> name with file extension.</p>
+ *
+ * @return The <code>Document</code> name with file extension.
+ */
+ public String getFileName() {
+ return new String(docName + FILE_EXTENSION);
+ }
+
+
+ /**
+ * <p>Writes out the <code>Document</code> content to the specified
+ * <code>OutputStream</code>.</p>
+ *
+ * <p>This method may not be thread-safe.
+ * Implementations may or may not synchronize this
+ * method. User code (i.e. caller) must make sure that
+ * calls to this method are thread-safe.</p>
+ *
+ * @param os <code>OutputStream</code> to write out the
+ * <code>Document</code> content.
+ *
+ * @throws IOException If any I/O error occurs.
+ */
+ public void write(OutputStream os) throws IOException {
+ DataOutputStream dos = new DataOutputStream(os);
+
+ initPreamble();
+ dos.write(preamble);
+
+ loadFonts();
+ for (int i = 0; i < fonts.size(); i++ ) {
+ ByteArrayOutputStream fontData = (ByteArrayOutputStream)fonts.elementAt(i);
+ dos.write(fontData.toByteArray());
+ }
+
+
+ for (int i = 0; i < paragraphs.size(); i++) {
+ Paragraph para = (Paragraph)paragraphs.elementAt(i);
+ descriptor.addParagraph((short)para.getTextLength(), para.getLines());
+ }
+ dos.write(descriptor.getDescriptor());
+
+ for (int i = 0; i < paragraphs.size(); i++ ) {
+ Paragraph para = (Paragraph)paragraphs.elementAt(i);
+
+ // Last paragraph has some extra data
+ if (i + 1 == paragraphs.size()) {
+ para.setLastParagraph(true);
+ }
+ dos.write(para.getParagraphData());
+ }
+
+
+ /*
+ * Before we write out the trailer, we need to make sure that it will
+ * lead to the file ending on a 4 byte boundary.
+ */
+ if (dos.size() % 4 == 0) {
+ dos.write((byte)0x00);
+ dos.write((byte)0x00);
+ }
+
+ dos.write(trailer);
+
+ dos.flush();
+ dos.close();
+ }
+
+
+ /**
+ * <p>This method adds a new paragraph element to the document. No string
+ * data is added to the paragraph.</p>
+ *
+ * <p><b>N.B.</b> The newly added paragraph becomes the current paragraph and
+ * is used as the target for all subsequent calls to addParagraphData().</p>
+ *
+ * @param style Paragraph Style object describing the formatting for
+ * the new paragraph. Can be null.
+ * @param listElement true if this paragraph is to be bulleted;
+ * false otherwise.
+ */
+ public void addParagraph(ParaStyle style, boolean listElement) {
+ /* For the moment, only support basic text entry in a single paragraph */
+ Paragraph para = new Paragraph(style);
+
+ paragraphs.add(para);
+
+ pStyle = style;
+ currentPara = para;
+
+ if (listElement) {
+ para.setBullets(true);
+ }
+ }
+
+
+ /**
+ * <p>This method adds text to the current paragraph.</p>
+ *
+ * <p>If no paragraphs exist within the document, it creates one.</p>
+ *
+ * @param data The string data for this segment.
+ * @param style Text Style object describing the formatting of this
+ * segment. Can be null.
+ */
+ public void addParagraphData(String data, TextStyle style) {
+ if (currentPara == null) {
+ addParagraph(null, false);
+ }
+ currentPara.addTextSegment(data, style);
+ }
+
+
+ /*
+ * Preamble is the portion before font specification which never
+ * seems to change from one file, or one saved version, to the next.
+ *
+ * Bytes 18h and 19h seem to contain the number of fonts and should
+ * be modified when all of the fonts have been specified.
+ * These bytes are the first two on the fourth line below.
+ */
+ private void initPreamble() {
+ preamble = new byte[] { 0x7B, 0x5C, 0x70, 0x77, 0x69, 0x15, 0x00, 0x00,
+ 0x01, 0x01, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x00, 0x2C, 0x00, 0x01, 0x00, 0x0A, 0x00, // Bytes 3-4 Font??
+ 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Bytes 1-2 # Fonts
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00 };
+ }
+
+
+ /*
+ * This method writes the minimum font data that is used by the converter.
+ * Currently, all documents convert to 10 point Courier New. Tahoma is
+ * always mentioned in Pocket Word files, however, even if it is not used.
+ *
+ * TODO: Rewrite to allow for multiple fonts once font support issues
+ * have been resolved.
+ */
+ private void loadFonts() {
+ ByteArrayOutputStream fontData = new ByteArrayOutputStream();
+
+ try {
+ fontData.write(new String("Tahoma").getBytes("UTF-16LE"));
+ fontData.write(new byte[52]); // Rest of font name?
+ fontData.write(new byte[] { 0x02, 0x00, 0x01, 0x00 } );
+ fontData.write(new byte[] { 0x00, 0x00, 0x01, 0x00 } );
+ fontData.write(new byte[] { 0x00, 0x00, 0x00, 0x00 } );
+ fontData.write(new byte[] { 0x00, 0x00, 0x00, 0x00 } );
+
+ fonts.add(fontData);
+
+ fontData = new ByteArrayOutputStream();
+
+ fontData.write(new String("Courier New").getBytes("UTF-16LE"));
+ fontData.write(new byte[42]);
+ fontData.write(new byte[] { 0x14, 0x00, 0x04, 0x00 } );
+ fontData.write(new byte[] { 0x01, 0x00, 0x00, 0x00 } );
+ fontData.write(new byte[] { 0x00, 0x00, 0x15, 0x00 } );
+
+ // Next part indicates that this is the last font
+ fontData.write(new byte[] { 0x05, 0x00, 0x01, 0x00 } );
+
+ fonts.add(fontData);
+ }
+ catch (IOException ioe) {
+ // Shouldn't happen as this is a memory based stream
+ }
+ }
+}