diff options
Diffstat (limited to 'xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/Paragraph.java')
-rw-r--r-- | xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/Paragraph.java | 859 |
1 files changed, 0 insertions, 859 deletions
diff --git a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/Paragraph.java b/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/Paragraph.java deleted file mode 100644 index c2249766b5e5..000000000000 --- a/xmerge/java/org/openoffice/xmerge/converter/xml/sxw/pocketword/Paragraph.java +++ /dev/null @@ -1,859 +0,0 @@ -/************************************************************************ - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2000, 2010 Oracle and/or its affiliates. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * <http://www.openoffice.org/license.html> - * for a copy of the LGPLv3 License. - * - ************************************************************************/ - -package org.openoffice.xmerge.converter.xml.sxw.pocketword; - -import java.io.ByteArrayOutputStream; -import java.io.ByteArrayInputStream; -import java.io.IOException; - -import java.util.Vector; -import java.util.Enumeration; - -import java.awt.Color; - -import org.openoffice.xmerge.util.EndianConverter; -import org.openoffice.xmerge.util.ColourConverter; -import org.openoffice.xmerge.converter.xml.ParaStyle; -import org.openoffice.xmerge.converter.xml.TextStyle; - - -/** - * Represents a paragraph data structure within a Pocket Word document. - * - * @author Mark Murnane - * @version 1.1 - */ -class Paragraph implements PocketWordConstants { - /* - * The data elements of a Paragraph. - * - * As the 'unknown' values are not calculated they are declared static. - * They are not declared final because they do have a calcuable value. - */ - private static short unknown1 = 0x23; - private short dataWords = 0; - private short textLength = 0; - private short lengthWithFormatting = 0; - private short lines = 0; - - private static final short marker = (short)0xFFFF; - private static int unknown2 = 0x22; // May be two short values - - private short specialIndentation = 0; - private short leftIndentation = 0; - private short rightIndentation = 0; - - private byte bullets = 0; - private byte alignment = 0; - - private static int unknown3 = 0; - - // Will always have at least these formatting settings in each paragraph - private short defaultFont = 2; // Courier New for the time being - private short defaultSize = 10; - - - /* - * Remaining elements assist in calculating correct values for the paragraph - * representation. - */ - - private Vector textSegments = null; - - private Vector lineDescriptors = null; - - private ParaStyle pStyle = null; - - private boolean isLastParagraph = false; - - - /* - * Private class constructor used by all constructors. Ensures the proper - * initialisation of the Vector storing the paragraph's text. - */ - private Paragraph () { - textSegments = new Vector(0, 1); - } - - - /** - * <p>Constructor for use when converting from SXW format to Pocket Word - * format.</p> - * - * @param style Paragraph style object describing the formatting style - * of this paragraph. - */ - public Paragraph (ParaStyle style) { - this(); - - lineDescriptors = new Vector(0, 1); - pStyle = style; - } - - - /** - * <p>Constructor for use when converting from Pocket Word format to SXW - * format.</p> - * - * @param data Byte array containing byte data describing this paragraph - * from the Pocket Word file. - */ - public Paragraph (byte[] data) { - this(); - - /* - * Read in all fixed data from the array - * - * unknown1 appears at data[0] and data[1] - */ - dataWords = EndianConverter.readShort(new byte[] { data[2], data[3] } ); - textLength = EndianConverter.readShort(new byte[] { data[4], data [5] } ); - lengthWithFormatting = EndianConverter.readShort( - new byte[] { data[6], data[7] } ); - lines = EndianConverter.readShort(new byte[] { data[8], data [9] } ); - - /* - * The marker appears at data[10] and data[11]. - * - * The value of unknown2 is at data[12], data[13], data[14] and data[15]. - */ - - specialIndentation = EndianConverter.readShort(new byte[] { data[16], data[17] } ); - leftIndentation = EndianConverter.readShort(new byte[] { data[18], data [19] } ); - rightIndentation = EndianConverter.readShort(new byte[] { data[20], data [21] } ); - - bullets = data[22]; - alignment = data[23]; - - // The value of unknown3 is at data[24], data[25], data[26] and data[27]. - - /* - * The actual paragraph data is in the remainder of the byte sequence. - * - * Only the actual text seqence with the embedded formatting tags is - * relevant to the conversion from Pocket Word to SXW format. - */ - ByteArrayOutputStream bos = new ByteArrayOutputStream(); - bos.write(data, 28, lengthWithFormatting); - parseText(bos.toByteArray()); - } - - - /* - * Processes the text portion of the raw paragraph data from the Pocket Word - * file. This data also includes formatting settings for the text in the - * paragraph. - * - * Formatting changes appear like XML/HTML tags. Formatted blocks are - * preceded by a sequence of bytes switching on a formatting change and - * followed by a sequence switching off that formatting change. - */ - private void parseText (byte[] data) { - - int totalLength = data.length; - - StringBuffer sb = new StringBuffer(""); - - // Setup text style information - int mask = TextStyle.BOLD | TextStyle.ITALIC | TextStyle.UNDERLINE - | TextStyle.STRIKETHRU; - - - String fontName = null; - int fontSize = 0; - Color textColour = null; - Color backColour = null; - int modifiers = 0; - - TextStyle ts = null; - - int attrsSet = 0; // If this is 0, we have no extra style - boolean inSequence = false; - boolean sawText = false; - - String s = new String(); // For debugging - - // Start from the very beginning - for (int i = 0; i < totalLength; i++) { - // Will encounter at least two codes first - if ((byte)(data[i] & 0xF0) == FORMATTING_TAG) { - if (sawText) { - // Style change so dump previous segment and style info - addTextSegment(sb.toString(), ts); - sb = new StringBuffer(""); - sawText = false; - } - - switch (data[i]) { - case FONT_TAG: - int index = EndianConverter.readShort( - new byte[] { data[i + 1], data[i + 2] } ); - - /* - * Standard font. - * - * Should really be one, but as the only supported font - * currently is Courier New, want to leave it at Courier - * New for round trip conversions. - * - * Also need to account for the fact that Tahoma is the - * correct standard font. - */ - if (fontName == null || fontName.equals("2")) { - if (index != 2 && index != 1) { - fontName = String.valueOf(index); - attrsSet++; - } - } - else { - // Font is set, but not the default - if (index == 2 || index == 1) { - fontName = "2"; - attrsSet--; - } - else { - fontName = String.valueOf(index); - } - } - i += 2; - break; - - - case FONT_SIZE_TAG: - int size = EndianConverter.readShort( - new byte[] { data[i + 1], data[i + 2] } ); - - if (size == 0) { - // Flags the end of the last paragraph - isLastParagraph = true; - i += 2; - break; - } - - // Standard size - if (fontSize == 0 || fontSize == 10) { - if (size != 10) { - fontSize = size; - attrsSet++; - } - } - else { - // Font size is set, but not to standard - if (size == 10) { - fontSize = 10; - attrsSet--; - } - else { - fontSize = size; - } - } - i += 2; - break; - - - case COLOUR_TAG: - if (data[i + 1] != 0) { - ColourConverter cc = new ColourConverter(); - textColour = cc.convertToRGB( - EndianConverter.readShort(new byte[] { data[i + 1], - data[i + 2] } )); - attrsSet++; - } - else { - textColour = null; - attrsSet--; - } - i += 2; - break; - - - case FONT_WEIGHT_TAG: - if (data[i + 1] == FONT_WEIGHT_BOLD - || data[i + 1] == FONT_WEIGHT_THICK) { - modifiers |= TextStyle.BOLD; - attrsSet++; - } - else { - // Its a bit field so subtracting should work okay. - modifiers ^= TextStyle.BOLD; - attrsSet--; - } - i += 2; - break; - - - case ITALIC_TAG: - if (data[i + 1] == (byte)0x01) { - modifiers |= TextStyle.ITALIC; - attrsSet++; - } - else { - modifiers ^= TextStyle.ITALIC; - attrsSet--; - } - i++; - break; - - - case UNDERLINE_TAG: - if (data[i + 1] == (byte)0x01) { - modifiers |= TextStyle.UNDERLINE; - attrsSet++; - } - else { - modifiers ^= TextStyle.UNDERLINE; - attrsSet--; - } - i++; - break; - - - case STRIKETHROUGH_TAG: - if (data[i + 1] == (byte)0x01) { - modifiers |= TextStyle.STRIKETHRU; - attrsSet++; - } - else { - modifiers ^= TextStyle.STRIKETHRU; - attrsSet--; - } - i++; - break; - - case HIGHLIGHT_TAG: - /* - * Highlighting is treated by OpenOffice as a - * background colour. - */ - if (data[i + 1] == (byte)0x01) { - backColour = Color.yellow; - attrsSet++; - } - else { - backColour = null; - attrsSet--; - } - i++; - break; - } - - inSequence = true; - continue; - } - - if (inSequence) { - // Style information has been changed. Create new style here - - inSequence = false; - if (attrsSet > 0) { - ts = new TextStyle(null, TEXT_STYLE_FAMILY, DEFAULT_STYLE, - mask, modifiers, fontSize, fontName, null); - ts.setColors(textColour, backColour); - } - else { - ts = null; - } - } - - /* - * C4 xx seems to indicate a control code. C4 00 indicates the end - * of a paragraph; C4 04 indicates a tab space. Only these two - * have been seen so far. - */ - if (data[i] == (byte)0xC4) { - /* - * Redundant nodes are sometimes added to the last paragraph - * because a new sequence is being processed when the flag is - * set. - * - * To avoid this, do nothing with the last paragraph unless no - * text has been added for it already. In that case, add the - * empty text segment being process to ensure that all - * paragraphs have at least one text segment. - */ - if (data[i + 1] == (byte)0x00) { - if (isLastParagraph && textSegments.size() > 0) { - return; - } - addTextSegment(sb.toString(), ts); - return; - } - sb.append("\t"); - sawText = true; - i++; - continue; - } - - sb.append((char)data[i]); - sawText = true; - s = sb.toString(); - } - } - - - /** - * <p>Adds details of a new text block to the <code>Paragraph</code> object. - * </p> - * - * @param text The text of the new block. - * @param style Text style object describing the formatting attached - * to this block of text. - */ - public void addTextSegment(String text, TextStyle style) { - textLength += text.length(); - textSegments.add(new ParagraphTextSegment(text, style)); - } - - - /** - * <p>This method alters the state of the <code>Paragraph</code> object to - * indicate whether or not it is the final paragraph in the document.</p> - * - * <p>It is used during conversion from SXW format to Pocket Word format. - * In Pocket Word files, the last paragraph finishes with a different byte - * sequence to other paragraphs.</p> - * - * @param isLast true if the Paragraph is the last in the document, - * false otherwise. - */ - public void setLastParagraph(boolean isLast) { - isLastParagraph = isLast; - } - - - /** - * <p>Complementary method to {@link #setLastParagraph(boolean) - * setLastParagraph}. Returns the terminal status of this - * <code>Paragraph</code> within the Pocket Word document.</p> - * - * @return true if the Paragraph is the last in the document; false otherwise. - */ - public boolean getLastParagraph () { - return isLastParagraph; - } - - - /** - * <p>This method returns the Pocket Word representation of this - * <code>Paragraph</code> in Little Endian byte order.</p> - * - * <p>Used when converting from SXW format to Pocket Word format.</p> - * - * @return <code>byte</code> array containing the formatted representation - * of this Paragraph. - */ - public byte[] getParagraphData() { - ByteArrayOutputStream bos = new ByteArrayOutputStream(); - - postProcessText(); - - /* - * Need information about the paragraph segments in two places - * so calculate them first. - * - * The stream contains the text wrapped in any formatting sequences that - * are necessary. - */ - ByteArrayOutputStream segs = new ByteArrayOutputStream(); - - try { - for (int i = 0; i < textSegments.size(); i++) { - ParagraphTextSegment pts = (ParagraphTextSegment)textSegments.elementAt(i); - segs.write(pts.getData()); - } - } - catch (IOException ioe) { - // Should never happen in a memory based stream - } - - /* - * Number of data words for this paragraph descriptor: - * - * 26 is the number of bytes prior to the start of the segment. - * 3 comes from the C4 00 00 termintating sequence. - */ - dataWords = (short)(26 + segs.size() + 3 + 4); - if (isLastParagraph) { - dataWords += 6; - } - if (dataWords % 4 != 0) { - dataWords += (4 - (dataWords % 4)); - } - dataWords /= 4; - - /* - * The 8 bytes are made up of E6 ?0 00 and E5 ?0 00 at the start of the - * text along with the C4 00 that terminates it. - * - * In the event that the paragraph is the last one E6 00 00 is also - * present at the end of the text. Also, as we currently use a font - * other than the first in the index (Tahoma) E5 01 00 is also present. - * - * Make sure this is accurate when font specifications change - */ - lengthWithFormatting = (short)(segs.size() + (isLastParagraph ? 14 : 8)); - - try { - bos.write(EndianConverter.writeShort(unknown1)); - bos.write(EndianConverter.writeShort(dataWords)); - bos.write(EndianConverter.writeShort((short)(textLength + 1))); - bos.write(EndianConverter.writeShort(lengthWithFormatting)); - bos.write(EndianConverter.writeShort(lines)); - - bos.write(EndianConverter.writeShort(marker)); - bos.write(EndianConverter.writeInt(unknown2)); - - bos.write(EndianConverter.writeShort(specialIndentation)); - bos.write(EndianConverter.writeShort(leftIndentation)); - bos.write(EndianConverter.writeShort(rightIndentation)); - - bos.write(bullets); - - if (pStyle != null && pStyle.isAttributeSet(ParaStyle.TEXT_ALIGN)) { - switch (pStyle.getAttribute(ParaStyle.TEXT_ALIGN)) { - - case ParaStyle.ALIGN_RIGHT: - bos.write(0x01); - break; - - case ParaStyle.ALIGN_CENTER: - bos.write(0x02); - break; - - default: - bos.write(0x00); // Left align in all other circumstances - break; - } - } - else { - bos.write(0x00); - } - - bos.write(EndianConverter.writeInt(unknown3)); - - - /* - * Write out font and size. - * - * If font support is added then this should change as the information - * will have to be calculated from a Font table. - */ - bos.write(FONT_TAG); - bos.write(EndianConverter.writeShort(defaultFont)); - bos.write(FONT_SIZE_TAG); - bos.write(EndianConverter.writeShort(defaultSize)); - - // Write out the text segments - bos.write(segs.toByteArray()); - - /* - * If this is the last paragraph in the document then we need to make - * sure that the paragraph text is terminated correctly with an E6 00 00 - * before the C4 00 00. - */ - if (isLastParagraph) { - if (defaultFont != 1) { - // Must always go back to the first font. - bos.write(FONT_TAG); - bos.write(EndianConverter.writeShort((short)0x01)); - } - bos.write(FONT_SIZE_TAG); - bos.write(EndianConverter.writeShort((short)0x00)); - } - - bos.write(new byte[] { (byte)0xC4, 0x00, 0x00 } ); - - int padding = 0; - if (bos.size() % 4 != 0) { - padding = 4 - (bos.size() % 4); - } - for (int i = 0; i < padding; i++) { - bos.write(0x00); - } - - // Third byte should match first byte after 0xFF 0xFF - bos.write(new byte[] { 0x42, 0x00, 0x22, 0x00} ); - - /* - * Meaning of last two bytes seems to be the number of words describing - * lines. This is calculated at 10 bytes per descriptor. - * - * May have two extra padding bytes that need to be accounted for too - * The division below may lose 2 bytes (integer result). - */ - int wordsRemaining = (lineDescriptors.size() * 10) / 4; - if ((lineDescriptors.size() * 10) % 4 != 0) { - wordsRemaining++; - } - bos.write(EndianConverter.writeShort((short)wordsRemaining)); - - - // Now write out the line descriptors - for (int i = 0; i < lineDescriptors.size(); i++) { - LineDescriptor ld = (LineDescriptor)lineDescriptors.elementAt(i); - - bos.write(ld.getDescriptorInfo()); - } - - - if (!isLastParagraph) { - /* - * There may be a need to pad this. Will be writing at - * either start of 4 byte block or 2 bytes into it. - */ - if (bos.size() % 4 != 2) { - bos.write(EndianConverter.writeShort((short)0)); - } - bos.write(EndianConverter.writeShort((short)0x41)); - } - } - catch (IOException ioe) { - // Should never occur for a memory based stream - } - - return bos.toByteArray(); - } - - - /* - * This method handles the calculation of correct values for line lengths - * in each individual descriptor and the number of lines in the document. - * - * TODO: Update to take account of different font metrics. - */ - private void postProcessText() { - /* - * The post-processing ... - * - * For each line, we need to add a line descriptor and increment - * the number of lines in the paragraph data structure. - * - * To do this, make sure that no sequence goes over the given screen - * width unless the last char is a whitespace character. - */ - - // In courier, can have no more than 29 chars per line - - int chunkStart = 0; - StringBuffer sb = new StringBuffer(""); - - // Line Descriptor info should be eliminated each time - lineDescriptors = new Vector(1, 1); - lines = 0; - - for (int i = 0; i < textSegments.size(); i++) { - ParagraphTextSegment pts = (ParagraphTextSegment)textSegments.elementAt(i); - sb.append(pts.getText()); - } - - if (sb.length() == 0) { - lines = 1; - lineDescriptors.add(new LineDescriptor((short)1, (short)0)); - return; - } - - while (chunkStart < sb.length()) { - String text = ""; - - try { - text = sb.substring(chunkStart, chunkStart + 30); - } - catch (StringIndexOutOfBoundsException sioobe) { - // We have less than one line left so just add it - text = sb.substring(chunkStart); - lineDescriptors.add(new LineDescriptor((short)(text.length() + 1), (short)(text.length() * 36))); - chunkStart += text.length(); - lines++; - continue; - } - - int lastWhitespace = -1; - - for (int i = 29; i >= 0; i--) { - if (Character.isWhitespace(text.charAt(i))) { - lastWhitespace = i; - break; - } - } - - if (lastWhitespace != -1) { - // The line can be split - lineDescriptors.add(new LineDescriptor((short)(lastWhitespace + 1), (short)(lastWhitespace * 36))); - chunkStart += lastWhitespace + 1; - lines++; - } - else { - // The line is completely occupied by a single word - lineDescriptors.add(new LineDescriptor((short)29, (short)(29 * 36))); - chunkStart += 29; - lines++; - } - } - } - - - /** - * <p>Returns the number of lines in the <code>Paragraph</code>.</p> - * - * @return The number of lines in the document. - */ - public short getLines() { - postProcessText(); - - return lines; - } - - - /** - * <p>Toggles the flag indicating that the <code>Paragraph</code> is a - * bulleted paragraph.</p> - * - * @param isBulleted true to enable bulleting for this paragraph, false - * otherwise. - */ - public void setBullets(boolean isBulleted) { - if (isBulleted) { - bullets = (byte)0xFF; - } - else { - bullets = 0; - } - } - - /** - * <p>Returns the bulleting status of the <code>Paragraph</code>.</p> - * - * @return true if the paragraph is bulleted, false otherwise. - */ - public boolean isBulleted() { - if (bullets != 0) { - return true; - } - return false; - } - - - /** - * <p>Returns the number of text characters in the <code>Paragraph</code>, - * excluding formatting.</p> - * - * @return The length of the paragraph. - */ - public int getTextLength () { - return textLength; - } - - - /** - * <p>Returns an <code>Enumeration</code> over the individual text segments - * of the <code>Paragraph</code>.</p> - * - * @return An <code>Enumeration</code> of the text segments. - */ - public Enumeration getSegmentsEnumerator () { - return textSegments.elements(); - } - - - /** - * <p>Returns a paragraph style object that describes any of the paragraph - * level formatting used by this <code>Paragraph</code>.</p> - * - * @return Paragraph style object describing the <code>Paragraph</code>. - */ - public ParaStyle makeStyle() { - int attrs[] = new int[] { ParaStyle.MARGIN_LEFT, ParaStyle.MARGIN_RIGHT, - ParaStyle.TEXT_ALIGN }; - String values[] = new String[attrs.length]; - - /* - * Not interested in left or right indents just yet. Don't know - * how to calculate them. - */ - - switch (alignment) { - case 2: - values[2] = "center"; - break; - - case 1: - values[2] = "right"; - break; - - case 0: - default: - values[2] = "left"; - return null; // Not interested if its the default. - } - - return new ParaStyle(null, PARAGRAPH_STYLE_FAMILY, null, attrs, - values, null); - } - - - /* - * Class describing the data structures which appear following the text - * of a Paragraph. For each line on screen that the Paragraph uses, a - * LineDescriptor details how many characters are on the line and how much - * screen space they occupy. - * - * The screen space and character breaks are calculated during post-processing - * of the paragraph. See postProcessText(). - * - * The unit of measurement used for screen space is currently unknown. - */ - private class LineDescriptor { - private short characters = 0; - private int filler = 0; - private short screen_space = 0; - private short marker = 0; - - private LineDescriptor(short chars, short space) { - characters = chars; - screen_space = space; - marker = (short)0x040C; // Not a constant. Depends on font used. - } - - - private byte[] getDescriptorInfo(){ - ByteArrayOutputStream bos = new ByteArrayOutputStream(); - - try { - bos.write(EndianConverter.writeShort(characters)); - bos.write(EndianConverter.writeInt(filler)); - bos.write(EndianConverter.writeShort(screen_space)); - bos.write(EndianConverter.writeShort(marker)); - } - catch (IOException ioe) { - // Should never happen in a memory based stream. - } - - return bos.toByteArray(); - } - } -} |