diff options
Diffstat (limited to 'xmerge/source/pocketword/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/PocketWordDocument.java')
-rw-r--r-- | xmerge/source/pocketword/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/PocketWordDocument.java | 404 |
1 files changed, 404 insertions, 0 deletions
diff --git a/xmerge/source/pocketword/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/PocketWordDocument.java b/xmerge/source/pocketword/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/PocketWordDocument.java new file mode 100644 index 000000000000..bc77e0fa6988 --- /dev/null +++ b/xmerge/source/pocketword/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/PocketWordDocument.java @@ -0,0 +1,404 @@ +/************************************************************************* + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * Copyright 2000, 2010 Oracle and/or its affiliates. + * + * OpenOffice.org - a multi-platform office productivity suite + * + * This file is part of OpenOffice.org. + * + * OpenOffice.org is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3 + * only, as published by the Free Software Foundation. + * + * OpenOffice.org is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License version 3 for more details + * (a copy is included in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU Lesser General Public License + * version 3 along with OpenOffice.org. If not, see + * <http://www.openoffice.org/license.html> + * for a copy of the LGPLv3 License. + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.pocketword; + +import org.openoffice.xmerge.Document; +import org.openoffice.xmerge.converter.xml.ParaStyle; +import org.openoffice.xmerge.converter.xml.TextStyle; + +import java.io.InputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; + +import java.util.Enumeration; +import java.util.Vector; + + +/** + * <p>Class representing a Pocket Word Document.</p> + * + * <p><code>PocketWordDocument</code> is used to create new Pocket Word documents + * and to read existing data to allow for conversion to OpenOffice Writer + * format.</p> + * + * @author Mark Murnane + * @version 1.1 + */ +public class PocketWordDocument implements Document, PocketWordConstants { + private String docName; + + private byte[] preamble; + private Vector fonts; + private DocumentDescriptor descriptor; + private Vector paragraphs; + + private ParaStyle pStyle; + private Paragraph currentPara; + + /* + * The trailer currently appears to be constant, but if its found to + * have a variable component, then this initialisation should be moved + * to an initTrailer() method. + * + * Padding is sometimes needed before the trailer to ensure the file + * ends on a 4-byte boundary, but this is handled in write(). + */ + private static final byte[] trailer = new byte[] { (byte)0x82, 0x00, + 0x09, 0x00, + 0x03, 0x00, + (byte)0x82, 0x00, + 0x00, 0x00, + 0x00, 0x00, + 0x00, 0x00, + 0x00, 0x00, + 0x00, 0x00 }; + + + /** + * <p>Constructs a new Pocket Word Document.</p> + * + * <p>This new document does notcontain any information. Document data must + * either be added using appropriate methods, or an existing file can be + * {@link #read(InputStream) read} from an <code>InputStream</code>.</p> + * + * @param name The name of the <code>PocketWordDocument</code>. + */ + public PocketWordDocument(String name) { + + docName = trimDocumentName(name); + + preamble = new byte[52]; + fonts = new Vector(0, 1); + descriptor = new DocumentDescriptor(); + paragraphs = new Vector(0, 1); + } + + + /** + * <p>This method reads <code>byte</code> data from the InputStream and + * extracts font and paragraph data from the file.</p> + * + * @param is InputStream containing a Pocket Word data file. + * + * @throws IOException In case of any I/O errors. + */ + public void read(InputStream docData) throws IOException { + + if (docData == null) { + throw new IOException ("No input stream to convert"); + } + + // The preamble may become important for font declarations. + int readValue = docData.read(preamble); + // #i33702# check for an empty InputStream. + if(readValue == -1) { + System.err.println("Error:invalid input stream"); + return; + } + + byte[] font = new byte[80]; + int numfonts = 0; + do { + docData.read(font); + + String name = new String(font, 0, 64, "UTF-16LE"); + fonts.add(name.trim()); + + } while (!(font[76] == 5 && font[77] == 0 + && font[78] == 1 && font[79] == 0)); + + /* + * TODO: The document descriptor data that follows the fonts ends with + * a variable section containing data for each of the paragraphs. + * It may be possible to use this information to calculate staring + * positions for each paragraph rather than iterating through the + * entire byte stream. + */ + + int value; + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + while ((value = docData.read()) != -1) { + bos.write(value); + } + + + byte[] contentData = bos.toByteArray(); + int start = 0, end = 0; + boolean sawMarker = false; + + for (int i = 0; i < contentData.length; i += 4) { + if (contentData[i + 2] == (byte)0xFF + && contentData[i + 3] == (byte)0xFF && !sawMarker) { + start = i - 8; + sawMarker = true; + continue; + } + + if (contentData[i + 2] == (byte)0xFF + && contentData[i + 3] == (byte)0xFF && sawMarker) { + end = i - 8; + ByteArrayOutputStream paragraph = new ByteArrayOutputStream(); + paragraph.write(contentData, start, end - start); + paragraphs.add(new Paragraph(paragraph.toByteArray())); + + // Reset the markers + sawMarker = false; + i -= 4; // Skip back + } + + } + + /* + * Special case, the last paragraph + * If we got here, and the marker is set then we saw the start of the + * last paragraph, but no following paragraph + */ + ByteArrayOutputStream paragraph = new ByteArrayOutputStream(); + if (contentData[contentData.length - 19] == 0) { + paragraph.write(contentData, start, contentData.length - start - 20); + } + else { + paragraph.write(contentData, start, contentData.length - start - 18); + } + paragraphs.add(new Paragraph(paragraph.toByteArray())); + } + + + /* + * Utility method to make sure the document name is stripped of any file + * extensions before use. + */ + private String trimDocumentName(String name) { + String temp = name.toLowerCase(); + + if (temp.endsWith(FILE_EXTENSION)) { + // strip the extension + int nlen = name.length(); + int endIndex = nlen - FILE_EXTENSION.length(); + name = name.substring(0,endIndex); + } + + return name; + } + + + /** + * <p>Method to provide access to all of the <code>Paragraph</code> objects + * in the <code>Document</code>.</p> + * + * @return <code>Enumeration</code> over the paragraphs in the document. + */ + public Enumeration getParagraphEnumeration() { + return paragraphs.elements(); + } + + + /** + * <p>Returns the <code>Document</code> name with no file extension.</p> + * + * @return The <code>Document</code> name with no file extension. + */ + public String getName() { + return docName; + } + + + /** + * <p>Returns the <code>Document</code> name with file extension.</p> + * + * @return The <code>Document</code> name with file extension. + */ + public String getFileName() { + return new String(docName + FILE_EXTENSION); + } + + + /** + * <p>Writes out the <code>Document</code> content to the specified + * <code>OutputStream</code>.</p> + * + * <p>This method may not be thread-safe. + * Implementations may or may not synchronize this + * method. User code (i.e. caller) must make sure that + * calls to this method are thread-safe.</p> + * + * @param os <code>OutputStream</code> to write out the + * <code>Document</code> content. + * + * @throws IOException If any I/O error occurs. + */ + public void write(OutputStream os) throws IOException { + DataOutputStream dos = new DataOutputStream(os); + + initPreamble(); + dos.write(preamble); + + loadFonts(); + for (int i = 0; i < fonts.size(); i++ ) { + ByteArrayOutputStream fontData = (ByteArrayOutputStream)fonts.elementAt(i); + dos.write(fontData.toByteArray()); + } + + + for (int i = 0; i < paragraphs.size(); i++) { + Paragraph para = (Paragraph)paragraphs.elementAt(i); + descriptor.addParagraph((short)para.getTextLength(), para.getLines()); + } + dos.write(descriptor.getDescriptor()); + + for (int i = 0; i < paragraphs.size(); i++ ) { + Paragraph para = (Paragraph)paragraphs.elementAt(i); + + // Last paragraph has some extra data + if (i + 1 == paragraphs.size()) { + para.setLastParagraph(true); + } + dos.write(para.getParagraphData()); + } + + + /* + * Before we write out the trailer, we need to make sure that it will + * lead to the file ending on a 4 byte boundary. + */ + if (dos.size() % 4 == 0) { + dos.write((byte)0x00); + dos.write((byte)0x00); + } + + dos.write(trailer); + + dos.flush(); + dos.close(); + } + + + /** + * <p>This method adds a new paragraph element to the document. No string + * data is added to the paragraph.</p> + * + * <p><b>N.B.</b> The newly added paragraph becomes the current paragraph and + * is used as the target for all subsequent calls to addParagraphData().</p> + * + * @param style Paragraph Style object describing the formatting for + * the new paragraph. Can be null. + * @param listElement true if this paragraph is to be bulleted; + * false otherwise. + */ + public void addParagraph(ParaStyle style, boolean listElement) { + /* For the moment, only support basic text entry in a single paragraph */ + Paragraph para = new Paragraph(style); + + paragraphs.add(para); + + pStyle = style; + currentPara = para; + + if (listElement) { + para.setBullets(true); + } + } + + + /** + * <p>This method adds text to the current paragraph.</p> + * + * <p>If no paragraphs exist within the document, it creates one.</p> + * + * @param data The string data for this segment. + * @param style Text Style object describing the formatting of this + * segment. Can be null. + */ + public void addParagraphData(String data, TextStyle style) { + if (currentPara == null) { + addParagraph(null, false); + } + currentPara.addTextSegment(data, style); + } + + + /* + * Preamble is the portion before font specification which never + * seems to change from one file, or one saved version, to the next. + * + * Bytes 18h and 19h seem to contain the number of fonts and should + * be modified when all of the fonts have been specified. + * These bytes are the first two on the fourth line below. + */ + private void initPreamble() { + preamble = new byte[] { 0x7B, 0x5C, 0x70, 0x77, 0x69, 0x15, 0x00, 0x00, + 0x01, 0x01, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x2C, 0x00, 0x01, 0x00, 0x0A, 0x00, // Bytes 3-4 Font?? + 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Bytes 1-2 # Fonts + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00 }; + } + + + /* + * This method writes the minimum font data that is used by the converter. + * Currently, all documents convert to 10 point Courier New. Tahoma is + * always mentioned in Pocket Word files, however, even if it is not used. + * + * TODO: Rewrite to allow for multiple fonts once font support issues + * have been resolved. + */ + private void loadFonts() { + ByteArrayOutputStream fontData = new ByteArrayOutputStream(); + + try { + fontData.write(new String("Tahoma").getBytes("UTF-16LE")); + fontData.write(new byte[52]); // Rest of font name? + fontData.write(new byte[] { 0x02, 0x00, 0x01, 0x00 } ); + fontData.write(new byte[] { 0x00, 0x00, 0x01, 0x00 } ); + fontData.write(new byte[] { 0x00, 0x00, 0x00, 0x00 } ); + fontData.write(new byte[] { 0x00, 0x00, 0x00, 0x00 } ); + + fonts.add(fontData); + + fontData = new ByteArrayOutputStream(); + + fontData.write(new String("Courier New").getBytes("UTF-16LE")); + fontData.write(new byte[42]); + fontData.write(new byte[] { 0x14, 0x00, 0x04, 0x00 } ); + fontData.write(new byte[] { 0x01, 0x00, 0x00, 0x00 } ); + fontData.write(new byte[] { 0x00, 0x00, 0x15, 0x00 } ); + + // Next part indicates that this is the last font + fontData.write(new byte[] { 0x05, 0x00, 0x01, 0x00 } ); + + fonts.add(fontData); + } + catch (IOException ioe) { + // Shouldn't happen as this is a memory based stream + } + } +} |